{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999603881956823, "eval_steps": 500, "global_step": 12622, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.922360863537334e-05, "grad_norm": 42.614111854296226, "learning_rate": 5.277044854881267e-08, "loss": 2.2873, "step": 1 }, { "epoch": 0.00015844721727074668, "grad_norm": 37.76236820883414, "learning_rate": 1.0554089709762534e-07, "loss": 2.1054, "step": 2 }, { "epoch": 0.00023767082590612002, "grad_norm": 44.20324100032348, "learning_rate": 1.5831134564643802e-07, "loss": 2.3843, "step": 3 }, { "epoch": 0.00031689443454149336, "grad_norm": 45.99189863287436, "learning_rate": 2.1108179419525068e-07, "loss": 2.3161, "step": 4 }, { "epoch": 0.0003961180431768667, "grad_norm": 61.44135033953449, "learning_rate": 2.6385224274406334e-07, "loss": 2.5541, "step": 5 }, { "epoch": 0.00047534165181224003, "grad_norm": 51.340025526685665, "learning_rate": 3.1662269129287605e-07, "loss": 2.4175, "step": 6 }, { "epoch": 0.0005545652604476134, "grad_norm": 40.24597610798052, "learning_rate": 3.693931398416887e-07, "loss": 2.2304, "step": 7 }, { "epoch": 0.0006337888690829867, "grad_norm": 35.26815455408599, "learning_rate": 4.2216358839050136e-07, "loss": 2.154, "step": 8 }, { "epoch": 0.00071301247771836, "grad_norm": 37.95145832540095, "learning_rate": 4.7493403693931397e-07, "loss": 2.3349, "step": 9 }, { "epoch": 0.0007922360863537334, "grad_norm": 32.52077001059761, "learning_rate": 5.277044854881267e-07, "loss": 2.1674, "step": 10 }, { "epoch": 0.0008714596949891067, "grad_norm": 32.695184250972915, "learning_rate": 5.804749340369393e-07, "loss": 2.2218, "step": 11 }, { "epoch": 0.0009506833036244801, "grad_norm": 31.569994275350567, "learning_rate": 6.332453825857521e-07, "loss": 2.1072, "step": 12 }, { "epoch": 0.0010299069122598535, "grad_norm": 36.87101598190986, "learning_rate": 6.860158311345646e-07, "loss": 2.1892, "step": 13 }, { "epoch": 0.0011091305208952267, "grad_norm": 31.42772724566191, "learning_rate": 7.387862796833774e-07, "loss": 1.9935, "step": 14 }, { "epoch": 0.0011883541295306002, "grad_norm": 33.810169392899475, "learning_rate": 7.915567282321901e-07, "loss": 1.9951, "step": 15 }, { "epoch": 0.0012675777381659734, "grad_norm": 34.022371788566474, "learning_rate": 8.443271767810027e-07, "loss": 1.8929, "step": 16 }, { "epoch": 0.0013468013468013469, "grad_norm": 27.522247882860785, "learning_rate": 8.970976253298154e-07, "loss": 1.743, "step": 17 }, { "epoch": 0.00142602495543672, "grad_norm": 26.687057865206754, "learning_rate": 9.498680738786279e-07, "loss": 1.7579, "step": 18 }, { "epoch": 0.0015052485640720936, "grad_norm": 25.8674338473116, "learning_rate": 1.0026385224274407e-06, "loss": 1.6363, "step": 19 }, { "epoch": 0.0015844721727074668, "grad_norm": 24.962962468925166, "learning_rate": 1.0554089709762534e-06, "loss": 1.5538, "step": 20 }, { "epoch": 0.0016636957813428402, "grad_norm": 19.201733561685682, "learning_rate": 1.108179419525066e-06, "loss": 1.4519, "step": 21 }, { "epoch": 0.0017429193899782135, "grad_norm": 17.008045145586003, "learning_rate": 1.1609498680738787e-06, "loss": 1.5598, "step": 22 }, { "epoch": 0.001822142998613587, "grad_norm": 22.925097803179106, "learning_rate": 1.2137203166226915e-06, "loss": 1.3887, "step": 23 }, { "epoch": 0.0019013666072489601, "grad_norm": 14.467758174340604, "learning_rate": 1.2664907651715042e-06, "loss": 1.4529, "step": 24 }, { "epoch": 0.0019805902158843334, "grad_norm": 18.069021547652184, "learning_rate": 1.3192612137203166e-06, "loss": 1.5061, "step": 25 }, { "epoch": 0.002059813824519707, "grad_norm": 13.54544464402972, "learning_rate": 1.3720316622691293e-06, "loss": 1.3593, "step": 26 }, { "epoch": 0.0021390374331550803, "grad_norm": 13.221854786587498, "learning_rate": 1.4248021108179422e-06, "loss": 1.3513, "step": 27 }, { "epoch": 0.0022182610417904535, "grad_norm": 15.201327664134368, "learning_rate": 1.4775725593667548e-06, "loss": 1.4593, "step": 28 }, { "epoch": 0.0022974846504258267, "grad_norm": 11.505659412870976, "learning_rate": 1.5303430079155673e-06, "loss": 1.3352, "step": 29 }, { "epoch": 0.0023767082590612004, "grad_norm": 11.135420445794026, "learning_rate": 1.5831134564643801e-06, "loss": 1.1387, "step": 30 }, { "epoch": 0.0024559318676965736, "grad_norm": 12.751311370173159, "learning_rate": 1.6358839050131928e-06, "loss": 1.2372, "step": 31 }, { "epoch": 0.002535155476331947, "grad_norm": 10.191505969079218, "learning_rate": 1.6886543535620054e-06, "loss": 1.2993, "step": 32 }, { "epoch": 0.00261437908496732, "grad_norm": 10.36869003526171, "learning_rate": 1.7414248021108183e-06, "loss": 1.1897, "step": 33 }, { "epoch": 0.0026936026936026937, "grad_norm": 11.106688395157834, "learning_rate": 1.7941952506596308e-06, "loss": 1.2186, "step": 34 }, { "epoch": 0.002772826302238067, "grad_norm": 9.811641497260164, "learning_rate": 1.8469656992084434e-06, "loss": 1.173, "step": 35 }, { "epoch": 0.00285204991087344, "grad_norm": 9.748182125181, "learning_rate": 1.8997361477572559e-06, "loss": 1.1051, "step": 36 }, { "epoch": 0.0029312735195088134, "grad_norm": 11.359302006106475, "learning_rate": 1.9525065963060687e-06, "loss": 1.1292, "step": 37 }, { "epoch": 0.003010497128144187, "grad_norm": 9.351484551662441, "learning_rate": 2.0052770448548814e-06, "loss": 1.1873, "step": 38 }, { "epoch": 0.0030897207367795603, "grad_norm": 9.400069186684982, "learning_rate": 2.058047493403694e-06, "loss": 1.2061, "step": 39 }, { "epoch": 0.0031689443454149336, "grad_norm": 10.5292163306882, "learning_rate": 2.1108179419525067e-06, "loss": 1.1456, "step": 40 }, { "epoch": 0.003248167954050307, "grad_norm": 8.75803640559494, "learning_rate": 2.1635883905013194e-06, "loss": 1.122, "step": 41 }, { "epoch": 0.0033273915626856805, "grad_norm": 9.958373416709335, "learning_rate": 2.216358839050132e-06, "loss": 1.0987, "step": 42 }, { "epoch": 0.0034066151713210537, "grad_norm": 10.295384189695442, "learning_rate": 2.2691292875989447e-06, "loss": 1.0441, "step": 43 }, { "epoch": 0.003485838779956427, "grad_norm": 9.364845821887274, "learning_rate": 2.3218997361477573e-06, "loss": 1.1967, "step": 44 }, { "epoch": 0.0035650623885918, "grad_norm": 10.836964308793071, "learning_rate": 2.37467018469657e-06, "loss": 1.1089, "step": 45 }, { "epoch": 0.003644285997227174, "grad_norm": 10.301230304850362, "learning_rate": 2.427440633245383e-06, "loss": 1.2325, "step": 46 }, { "epoch": 0.003723509605862547, "grad_norm": 11.32728636855559, "learning_rate": 2.4802110817941953e-06, "loss": 1.06, "step": 47 }, { "epoch": 0.0038027332144979203, "grad_norm": 8.608360270376286, "learning_rate": 2.5329815303430084e-06, "loss": 1.0562, "step": 48 }, { "epoch": 0.0038819568231332935, "grad_norm": 9.748035469890205, "learning_rate": 2.5857519788918206e-06, "loss": 1.0684, "step": 49 }, { "epoch": 0.003961180431768667, "grad_norm": 9.799999174312287, "learning_rate": 2.6385224274406333e-06, "loss": 0.9504, "step": 50 }, { "epoch": 0.00404040404040404, "grad_norm": 8.982776127017706, "learning_rate": 2.6912928759894464e-06, "loss": 1.0965, "step": 51 }, { "epoch": 0.004119627649039414, "grad_norm": 8.334820113759973, "learning_rate": 2.7440633245382586e-06, "loss": 1.0768, "step": 52 }, { "epoch": 0.004198851257674787, "grad_norm": 9.00233498749542, "learning_rate": 2.7968337730870717e-06, "loss": 1.0667, "step": 53 }, { "epoch": 0.0042780748663101605, "grad_norm": 8.639113973155974, "learning_rate": 2.8496042216358843e-06, "loss": 1.0581, "step": 54 }, { "epoch": 0.004357298474945534, "grad_norm": 8.878926254612908, "learning_rate": 2.9023746701846966e-06, "loss": 0.9961, "step": 55 }, { "epoch": 0.004436522083580907, "grad_norm": 8.162243884388658, "learning_rate": 2.9551451187335096e-06, "loss": 1.0752, "step": 56 }, { "epoch": 0.004515745692216281, "grad_norm": 8.330023679308326, "learning_rate": 3.0079155672823223e-06, "loss": 0.9054, "step": 57 }, { "epoch": 0.0045949693008516534, "grad_norm": 7.53471868214602, "learning_rate": 3.0606860158311345e-06, "loss": 1.0052, "step": 58 }, { "epoch": 0.004674192909487027, "grad_norm": 7.112656124939108, "learning_rate": 3.1134564643799476e-06, "loss": 0.9873, "step": 59 }, { "epoch": 0.004753416518122401, "grad_norm": 9.490342308709941, "learning_rate": 3.1662269129287603e-06, "loss": 0.9418, "step": 60 }, { "epoch": 0.004832640126757774, "grad_norm": 9.12196590712411, "learning_rate": 3.2189973614775725e-06, "loss": 1.0247, "step": 61 }, { "epoch": 0.004911863735393147, "grad_norm": 7.820198356920157, "learning_rate": 3.2717678100263856e-06, "loss": 0.9676, "step": 62 }, { "epoch": 0.004991087344028521, "grad_norm": 7.730956247882421, "learning_rate": 3.3245382585751982e-06, "loss": 0.915, "step": 63 }, { "epoch": 0.005070310952663894, "grad_norm": 8.7988146994701, "learning_rate": 3.377308707124011e-06, "loss": 0.9994, "step": 64 }, { "epoch": 0.005149534561299267, "grad_norm": 7.964992208166385, "learning_rate": 3.4300791556728235e-06, "loss": 0.9097, "step": 65 }, { "epoch": 0.00522875816993464, "grad_norm": 9.772442960771029, "learning_rate": 3.4828496042216366e-06, "loss": 0.9568, "step": 66 }, { "epoch": 0.005307981778570014, "grad_norm": 8.135360901673979, "learning_rate": 3.535620052770449e-06, "loss": 0.9513, "step": 67 }, { "epoch": 0.0053872053872053875, "grad_norm": 7.6709860091833715, "learning_rate": 3.5883905013192615e-06, "loss": 0.8889, "step": 68 }, { "epoch": 0.00546642899584076, "grad_norm": 7.443248507707116, "learning_rate": 3.6411609498680746e-06, "loss": 0.8934, "step": 69 }, { "epoch": 0.005545652604476134, "grad_norm": 8.896062469657917, "learning_rate": 3.693931398416887e-06, "loss": 0.8696, "step": 70 }, { "epoch": 0.005624876213111508, "grad_norm": 6.7033732684985425, "learning_rate": 3.7467018469656995e-06, "loss": 0.9287, "step": 71 }, { "epoch": 0.00570409982174688, "grad_norm": 8.28266478949728, "learning_rate": 3.7994722955145117e-06, "loss": 1.0932, "step": 72 }, { "epoch": 0.005783323430382254, "grad_norm": 7.480457460036021, "learning_rate": 3.852242744063324e-06, "loss": 0.9265, "step": 73 }, { "epoch": 0.005862547039017627, "grad_norm": 8.46194228991338, "learning_rate": 3.9050131926121375e-06, "loss": 0.8829, "step": 74 }, { "epoch": 0.0059417706476530005, "grad_norm": 7.108096457031844, "learning_rate": 3.95778364116095e-06, "loss": 0.8676, "step": 75 }, { "epoch": 0.006020994256288374, "grad_norm": 8.26289581235156, "learning_rate": 4.010554089709763e-06, "loss": 0.9526, "step": 76 }, { "epoch": 0.006100217864923747, "grad_norm": 7.426924959601872, "learning_rate": 4.063324538258576e-06, "loss": 0.8984, "step": 77 }, { "epoch": 0.006179441473559121, "grad_norm": 7.314131265022161, "learning_rate": 4.116094986807388e-06, "loss": 0.938, "step": 78 }, { "epoch": 0.006258665082194494, "grad_norm": 7.2349724550929215, "learning_rate": 4.168865435356201e-06, "loss": 0.9012, "step": 79 }, { "epoch": 0.006337888690829867, "grad_norm": 6.303291307253892, "learning_rate": 4.221635883905013e-06, "loss": 0.9582, "step": 80 }, { "epoch": 0.006417112299465241, "grad_norm": 8.165404763258302, "learning_rate": 4.274406332453826e-06, "loss": 0.8817, "step": 81 }, { "epoch": 0.006496335908100614, "grad_norm": 6.589957848159812, "learning_rate": 4.327176781002639e-06, "loss": 0.8781, "step": 82 }, { "epoch": 0.006575559516735987, "grad_norm": 7.044234476574764, "learning_rate": 4.379947229551452e-06, "loss": 0.8443, "step": 83 }, { "epoch": 0.006654783125371361, "grad_norm": 6.679333954068805, "learning_rate": 4.432717678100264e-06, "loss": 0.8553, "step": 84 }, { "epoch": 0.006734006734006734, "grad_norm": 7.208240387990633, "learning_rate": 4.485488126649077e-06, "loss": 0.887, "step": 85 }, { "epoch": 0.006813230342642107, "grad_norm": 6.654126823841316, "learning_rate": 4.538258575197889e-06, "loss": 0.8218, "step": 86 }, { "epoch": 0.006892453951277481, "grad_norm": 6.6573158796176966, "learning_rate": 4.5910290237467024e-06, "loss": 0.8883, "step": 87 }, { "epoch": 0.006971677559912854, "grad_norm": 7.475449982222273, "learning_rate": 4.643799472295515e-06, "loss": 0.9484, "step": 88 }, { "epoch": 0.0070509011685482275, "grad_norm": 7.226263349561209, "learning_rate": 4.696569920844328e-06, "loss": 0.9058, "step": 89 }, { "epoch": 0.0071301247771836, "grad_norm": 6.714612107470067, "learning_rate": 4.74934036939314e-06, "loss": 0.945, "step": 90 }, { "epoch": 0.007209348385818974, "grad_norm": 7.2642247561182955, "learning_rate": 4.802110817941953e-06, "loss": 0.8411, "step": 91 }, { "epoch": 0.007288571994454348, "grad_norm": 6.0516847173704695, "learning_rate": 4.854881266490766e-06, "loss": 0.9651, "step": 92 }, { "epoch": 0.00736779560308972, "grad_norm": 7.480869160126672, "learning_rate": 4.907651715039578e-06, "loss": 0.7841, "step": 93 }, { "epoch": 0.007447019211725094, "grad_norm": 8.225645152206116, "learning_rate": 4.960422163588391e-06, "loss": 0.8955, "step": 94 }, { "epoch": 0.007526242820360468, "grad_norm": 7.216394822081365, "learning_rate": 5.013192612137203e-06, "loss": 0.8966, "step": 95 }, { "epoch": 0.0076054664289958405, "grad_norm": 7.369662653573449, "learning_rate": 5.065963060686017e-06, "loss": 0.884, "step": 96 }, { "epoch": 0.007684690037631214, "grad_norm": 7.725513207950532, "learning_rate": 5.118733509234829e-06, "loss": 0.9601, "step": 97 }, { "epoch": 0.007763913646266587, "grad_norm": 7.6744067203539545, "learning_rate": 5.171503957783641e-06, "loss": 0.9021, "step": 98 }, { "epoch": 0.00784313725490196, "grad_norm": 6.291842395956368, "learning_rate": 5.224274406332454e-06, "loss": 0.8333, "step": 99 }, { "epoch": 0.007922360863537333, "grad_norm": 6.437414406114134, "learning_rate": 5.2770448548812665e-06, "loss": 0.8087, "step": 100 }, { "epoch": 0.008001584472172708, "grad_norm": 6.567236790812515, "learning_rate": 5.32981530343008e-06, "loss": 0.8172, "step": 101 }, { "epoch": 0.00808080808080808, "grad_norm": 6.062145791177876, "learning_rate": 5.382585751978893e-06, "loss": 0.8378, "step": 102 }, { "epoch": 0.008160031689443454, "grad_norm": 7.775648390585933, "learning_rate": 5.435356200527705e-06, "loss": 0.9035, "step": 103 }, { "epoch": 0.008239255298078828, "grad_norm": 6.451599166161916, "learning_rate": 5.488126649076517e-06, "loss": 0.948, "step": 104 }, { "epoch": 0.008318478906714201, "grad_norm": 7.411667507978405, "learning_rate": 5.540897097625331e-06, "loss": 0.8766, "step": 105 }, { "epoch": 0.008397702515349574, "grad_norm": 7.033119727954545, "learning_rate": 5.593667546174143e-06, "loss": 0.7836, "step": 106 }, { "epoch": 0.008476926123984948, "grad_norm": 6.369139602547609, "learning_rate": 5.6464379947229556e-06, "loss": 0.8344, "step": 107 }, { "epoch": 0.008556149732620321, "grad_norm": 8.42831073461348, "learning_rate": 5.699208443271769e-06, "loss": 0.8721, "step": 108 }, { "epoch": 0.008635373341255694, "grad_norm": 6.945334822024758, "learning_rate": 5.751978891820581e-06, "loss": 0.8424, "step": 109 }, { "epoch": 0.008714596949891068, "grad_norm": 7.3402728309411716, "learning_rate": 5.804749340369393e-06, "loss": 0.7852, "step": 110 }, { "epoch": 0.008793820558526441, "grad_norm": 7.621443431050536, "learning_rate": 5.857519788918207e-06, "loss": 0.8047, "step": 111 }, { "epoch": 0.008873044167161814, "grad_norm": 6.042150172429204, "learning_rate": 5.910290237467019e-06, "loss": 0.6894, "step": 112 }, { "epoch": 0.008952267775797187, "grad_norm": 8.284828432110238, "learning_rate": 5.9630606860158315e-06, "loss": 0.9772, "step": 113 }, { "epoch": 0.009031491384432561, "grad_norm": 6.82287369170654, "learning_rate": 6.015831134564645e-06, "loss": 0.7183, "step": 114 }, { "epoch": 0.009110714993067934, "grad_norm": 7.709064088472621, "learning_rate": 6.068601583113457e-06, "loss": 0.874, "step": 115 }, { "epoch": 0.009189938601703307, "grad_norm": 7.818614870566409, "learning_rate": 6.121372031662269e-06, "loss": 0.8342, "step": 116 }, { "epoch": 0.009269162210338681, "grad_norm": 6.510556273745667, "learning_rate": 6.174142480211083e-06, "loss": 0.9821, "step": 117 }, { "epoch": 0.009348385818974054, "grad_norm": 6.688508998061781, "learning_rate": 6.226912928759895e-06, "loss": 0.8985, "step": 118 }, { "epoch": 0.009427609427609427, "grad_norm": 6.16060199216206, "learning_rate": 6.2796833773087074e-06, "loss": 0.7694, "step": 119 }, { "epoch": 0.009506833036244802, "grad_norm": 6.637708679329666, "learning_rate": 6.3324538258575205e-06, "loss": 0.9272, "step": 120 }, { "epoch": 0.009586056644880174, "grad_norm": 7.117160429896391, "learning_rate": 6.385224274406333e-06, "loss": 0.8983, "step": 121 }, { "epoch": 0.009665280253515547, "grad_norm": 6.381316300883817, "learning_rate": 6.437994722955145e-06, "loss": 0.8254, "step": 122 }, { "epoch": 0.009744503862150922, "grad_norm": 7.535814059523654, "learning_rate": 6.490765171503959e-06, "loss": 0.8102, "step": 123 }, { "epoch": 0.009823727470786294, "grad_norm": 8.245421659934367, "learning_rate": 6.543535620052771e-06, "loss": 0.8976, "step": 124 }, { "epoch": 0.009902951079421667, "grad_norm": 8.035345095892026, "learning_rate": 6.596306068601583e-06, "loss": 0.872, "step": 125 }, { "epoch": 0.009982174688057042, "grad_norm": 7.768805602766118, "learning_rate": 6.6490765171503965e-06, "loss": 0.8721, "step": 126 }, { "epoch": 0.010061398296692415, "grad_norm": 6.8564534315544865, "learning_rate": 6.701846965699209e-06, "loss": 0.7111, "step": 127 }, { "epoch": 0.010140621905327787, "grad_norm": 6.035762422487148, "learning_rate": 6.754617414248022e-06, "loss": 0.7817, "step": 128 }, { "epoch": 0.01021984551396316, "grad_norm": 6.544879876408064, "learning_rate": 6.807387862796835e-06, "loss": 0.7646, "step": 129 }, { "epoch": 0.010299069122598535, "grad_norm": 6.374898342587209, "learning_rate": 6.860158311345647e-06, "loss": 0.8517, "step": 130 }, { "epoch": 0.010378292731233908, "grad_norm": 6.742524152542619, "learning_rate": 6.912928759894459e-06, "loss": 0.783, "step": 131 }, { "epoch": 0.01045751633986928, "grad_norm": 6.463683281336434, "learning_rate": 6.965699208443273e-06, "loss": 0.8553, "step": 132 }, { "epoch": 0.010536739948504655, "grad_norm": 6.006316276828489, "learning_rate": 7.0184696569920855e-06, "loss": 0.8629, "step": 133 }, { "epoch": 0.010615963557140028, "grad_norm": 6.9008903602171605, "learning_rate": 7.071240105540898e-06, "loss": 0.8117, "step": 134 }, { "epoch": 0.0106951871657754, "grad_norm": 6.200731497677948, "learning_rate": 7.124010554089711e-06, "loss": 0.9114, "step": 135 }, { "epoch": 0.010774410774410775, "grad_norm": 6.807630684088574, "learning_rate": 7.176781002638523e-06, "loss": 0.8792, "step": 136 }, { "epoch": 0.010853634383046148, "grad_norm": 5.3701401090661, "learning_rate": 7.229551451187335e-06, "loss": 0.8673, "step": 137 }, { "epoch": 0.01093285799168152, "grad_norm": 6.757375028297844, "learning_rate": 7.282321899736149e-06, "loss": 0.8845, "step": 138 }, { "epoch": 0.011012081600316895, "grad_norm": 7.1004230146825895, "learning_rate": 7.3350923482849614e-06, "loss": 0.8714, "step": 139 }, { "epoch": 0.011091305208952268, "grad_norm": 5.336040919868236, "learning_rate": 7.387862796833774e-06, "loss": 0.7554, "step": 140 }, { "epoch": 0.01117052881758764, "grad_norm": 6.988830275708981, "learning_rate": 7.440633245382587e-06, "loss": 0.8094, "step": 141 }, { "epoch": 0.011249752426223015, "grad_norm": 6.582171590449282, "learning_rate": 7.493403693931399e-06, "loss": 0.7634, "step": 142 }, { "epoch": 0.011328976034858388, "grad_norm": 5.987515827179504, "learning_rate": 7.546174142480211e-06, "loss": 0.7018, "step": 143 }, { "epoch": 0.01140819964349376, "grad_norm": 6.700327692091717, "learning_rate": 7.5989445910290234e-06, "loss": 0.8476, "step": 144 }, { "epoch": 0.011487423252129134, "grad_norm": 6.378921328384878, "learning_rate": 7.651715039577837e-06, "loss": 0.7756, "step": 145 }, { "epoch": 0.011566646860764508, "grad_norm": 6.2661110671266425, "learning_rate": 7.704485488126649e-06, "loss": 0.7612, "step": 146 }, { "epoch": 0.011645870469399881, "grad_norm": 6.020189423063324, "learning_rate": 7.757255936675462e-06, "loss": 0.8413, "step": 147 }, { "epoch": 0.011725094078035254, "grad_norm": 6.020660293827696, "learning_rate": 7.810026385224275e-06, "loss": 0.7821, "step": 148 }, { "epoch": 0.011804317686670628, "grad_norm": 6.255480130642274, "learning_rate": 7.862796833773088e-06, "loss": 0.8484, "step": 149 }, { "epoch": 0.011883541295306001, "grad_norm": 5.665212849234186, "learning_rate": 7.9155672823219e-06, "loss": 0.7354, "step": 150 }, { "epoch": 0.011962764903941374, "grad_norm": 6.09860841147306, "learning_rate": 7.968337730870712e-06, "loss": 0.6789, "step": 151 }, { "epoch": 0.012041988512576748, "grad_norm": 5.807336004435362, "learning_rate": 8.021108179419526e-06, "loss": 0.7493, "step": 152 }, { "epoch": 0.012121212121212121, "grad_norm": 8.018066053700938, "learning_rate": 8.073878627968339e-06, "loss": 0.8071, "step": 153 }, { "epoch": 0.012200435729847494, "grad_norm": 6.683925727218106, "learning_rate": 8.126649076517152e-06, "loss": 0.7496, "step": 154 }, { "epoch": 0.012279659338482869, "grad_norm": 5.214523793254702, "learning_rate": 8.179419525065963e-06, "loss": 0.6752, "step": 155 }, { "epoch": 0.012358882947118241, "grad_norm": 6.384856496160088, "learning_rate": 8.232189973614776e-06, "loss": 0.7791, "step": 156 }, { "epoch": 0.012438106555753614, "grad_norm": 5.7506919577732365, "learning_rate": 8.28496042216359e-06, "loss": 0.6719, "step": 157 }, { "epoch": 0.012517330164388989, "grad_norm": 6.451943905941833, "learning_rate": 8.337730870712402e-06, "loss": 0.8368, "step": 158 }, { "epoch": 0.012596553773024361, "grad_norm": 6.353285853795154, "learning_rate": 8.390501319261214e-06, "loss": 0.8077, "step": 159 }, { "epoch": 0.012675777381659734, "grad_norm": 6.3946120664157045, "learning_rate": 8.443271767810027e-06, "loss": 0.916, "step": 160 }, { "epoch": 0.012755000990295109, "grad_norm": 5.531993314272616, "learning_rate": 8.49604221635884e-06, "loss": 0.8075, "step": 161 }, { "epoch": 0.012834224598930482, "grad_norm": 5.750261961203504, "learning_rate": 8.548812664907651e-06, "loss": 0.786, "step": 162 }, { "epoch": 0.012913448207565854, "grad_norm": 5.983737631006511, "learning_rate": 8.601583113456466e-06, "loss": 0.6516, "step": 163 }, { "epoch": 0.012992671816201227, "grad_norm": 7.504346432602982, "learning_rate": 8.654353562005277e-06, "loss": 0.8069, "step": 164 }, { "epoch": 0.013071895424836602, "grad_norm": 6.0464892561402594, "learning_rate": 8.70712401055409e-06, "loss": 0.8819, "step": 165 }, { "epoch": 0.013151119033471975, "grad_norm": 5.83366448717492, "learning_rate": 8.759894459102904e-06, "loss": 0.6973, "step": 166 }, { "epoch": 0.013230342642107347, "grad_norm": 6.4472556260765925, "learning_rate": 8.812664907651715e-06, "loss": 0.8768, "step": 167 }, { "epoch": 0.013309566250742722, "grad_norm": 6.036974197208871, "learning_rate": 8.865435356200528e-06, "loss": 0.7416, "step": 168 }, { "epoch": 0.013388789859378095, "grad_norm": 5.852387747977291, "learning_rate": 8.918205804749341e-06, "loss": 0.6665, "step": 169 }, { "epoch": 0.013468013468013467, "grad_norm": 5.47985240394418, "learning_rate": 8.970976253298154e-06, "loss": 0.9516, "step": 170 }, { "epoch": 0.013547237076648842, "grad_norm": 6.241247727690077, "learning_rate": 9.023746701846966e-06, "loss": 0.8888, "step": 171 }, { "epoch": 0.013626460685284215, "grad_norm": 6.3090614251586885, "learning_rate": 9.076517150395779e-06, "loss": 0.8151, "step": 172 }, { "epoch": 0.013705684293919588, "grad_norm": 6.8806061487648025, "learning_rate": 9.129287598944592e-06, "loss": 0.822, "step": 173 }, { "epoch": 0.013784907902554962, "grad_norm": 5.836880430806096, "learning_rate": 9.182058047493405e-06, "loss": 0.7376, "step": 174 }, { "epoch": 0.013864131511190335, "grad_norm": 5.7922355557454575, "learning_rate": 9.234828496042218e-06, "loss": 0.7222, "step": 175 }, { "epoch": 0.013943355119825708, "grad_norm": 6.01423356467863, "learning_rate": 9.28759894459103e-06, "loss": 0.7947, "step": 176 }, { "epoch": 0.014022578728461082, "grad_norm": 5.3625604233349184, "learning_rate": 9.340369393139842e-06, "loss": 0.7288, "step": 177 }, { "epoch": 0.014101802337096455, "grad_norm": 5.8912869137431505, "learning_rate": 9.393139841688655e-06, "loss": 0.7991, "step": 178 }, { "epoch": 0.014181025945731828, "grad_norm": 6.226436224894201, "learning_rate": 9.445910290237469e-06, "loss": 0.778, "step": 179 }, { "epoch": 0.0142602495543672, "grad_norm": 6.126272177451517, "learning_rate": 9.49868073878628e-06, "loss": 0.6597, "step": 180 }, { "epoch": 0.014339473163002575, "grad_norm": 6.214182615091143, "learning_rate": 9.551451187335093e-06, "loss": 0.8443, "step": 181 }, { "epoch": 0.014418696771637948, "grad_norm": 5.978541650955973, "learning_rate": 9.604221635883906e-06, "loss": 0.7414, "step": 182 }, { "epoch": 0.01449792038027332, "grad_norm": 5.6190877624160755, "learning_rate": 9.656992084432717e-06, "loss": 0.6482, "step": 183 }, { "epoch": 0.014577143988908695, "grad_norm": 5.837583999021052, "learning_rate": 9.709762532981532e-06, "loss": 0.8733, "step": 184 }, { "epoch": 0.014656367597544068, "grad_norm": 5.650537160006914, "learning_rate": 9.762532981530344e-06, "loss": 0.7878, "step": 185 }, { "epoch": 0.01473559120617944, "grad_norm": 4.895363025247281, "learning_rate": 9.815303430079157e-06, "loss": 0.7125, "step": 186 }, { "epoch": 0.014814814814814815, "grad_norm": 6.1635072979440295, "learning_rate": 9.86807387862797e-06, "loss": 0.804, "step": 187 }, { "epoch": 0.014894038423450188, "grad_norm": 5.204900213880355, "learning_rate": 9.920844327176781e-06, "loss": 0.705, "step": 188 }, { "epoch": 0.014973262032085561, "grad_norm": 5.7582758917110555, "learning_rate": 9.973614775725594e-06, "loss": 0.7431, "step": 189 }, { "epoch": 0.015052485640720936, "grad_norm": 5.606321164149868, "learning_rate": 1.0026385224274406e-05, "loss": 0.8404, "step": 190 }, { "epoch": 0.015131709249356308, "grad_norm": 5.408619447702593, "learning_rate": 1.007915567282322e-05, "loss": 0.7144, "step": 191 }, { "epoch": 0.015210932857991681, "grad_norm": 5.8683474427102045, "learning_rate": 1.0131926121372034e-05, "loss": 0.6259, "step": 192 }, { "epoch": 0.015290156466627056, "grad_norm": 6.423863810489736, "learning_rate": 1.0184696569920845e-05, "loss": 0.7656, "step": 193 }, { "epoch": 0.015369380075262428, "grad_norm": 4.828903750610458, "learning_rate": 1.0237467018469658e-05, "loss": 0.6393, "step": 194 }, { "epoch": 0.015448603683897801, "grad_norm": 4.680933132914672, "learning_rate": 1.0290237467018471e-05, "loss": 0.6767, "step": 195 }, { "epoch": 0.015527827292533174, "grad_norm": 6.145591101536939, "learning_rate": 1.0343007915567282e-05, "loss": 0.7693, "step": 196 }, { "epoch": 0.015607050901168549, "grad_norm": 5.635191054312746, "learning_rate": 1.0395778364116096e-05, "loss": 0.7552, "step": 197 }, { "epoch": 0.01568627450980392, "grad_norm": 5.303888806565517, "learning_rate": 1.0448548812664909e-05, "loss": 0.7241, "step": 198 }, { "epoch": 0.015765498118439296, "grad_norm": 5.6327296377707485, "learning_rate": 1.050131926121372e-05, "loss": 0.7321, "step": 199 }, { "epoch": 0.015844721727074667, "grad_norm": 5.7751096911113535, "learning_rate": 1.0554089709762533e-05, "loss": 0.6401, "step": 200 }, { "epoch": 0.01592394533571004, "grad_norm": 5.080070634374656, "learning_rate": 1.0606860158311348e-05, "loss": 0.6558, "step": 201 }, { "epoch": 0.016003168944345416, "grad_norm": 5.354288260848981, "learning_rate": 1.065963060686016e-05, "loss": 0.7592, "step": 202 }, { "epoch": 0.016082392552980787, "grad_norm": 5.9386907334086665, "learning_rate": 1.0712401055408972e-05, "loss": 0.8064, "step": 203 }, { "epoch": 0.01616161616161616, "grad_norm": 5.292484411072734, "learning_rate": 1.0765171503957785e-05, "loss": 0.6699, "step": 204 }, { "epoch": 0.016240839770251536, "grad_norm": 5.665278837914741, "learning_rate": 1.0817941952506597e-05, "loss": 0.7521, "step": 205 }, { "epoch": 0.016320063378886907, "grad_norm": 5.740857851043832, "learning_rate": 1.087071240105541e-05, "loss": 0.7137, "step": 206 }, { "epoch": 0.01639928698752228, "grad_norm": 6.0454658670924335, "learning_rate": 1.0923482849604223e-05, "loss": 0.7445, "step": 207 }, { "epoch": 0.016478510596157656, "grad_norm": 5.23838587009887, "learning_rate": 1.0976253298153034e-05, "loss": 0.7024, "step": 208 }, { "epoch": 0.016557734204793027, "grad_norm": 6.205514996867051, "learning_rate": 1.1029023746701847e-05, "loss": 0.8582, "step": 209 }, { "epoch": 0.016636957813428402, "grad_norm": 5.309346411741108, "learning_rate": 1.1081794195250662e-05, "loss": 0.7522, "step": 210 }, { "epoch": 0.016716181422063776, "grad_norm": 5.5375116733389325, "learning_rate": 1.1134564643799472e-05, "loss": 0.6855, "step": 211 }, { "epoch": 0.016795405030699147, "grad_norm": 5.578528155883679, "learning_rate": 1.1187335092348287e-05, "loss": 0.6859, "step": 212 }, { "epoch": 0.016874628639334522, "grad_norm": 5.382253655791046, "learning_rate": 1.12401055408971e-05, "loss": 0.7339, "step": 213 }, { "epoch": 0.016953852247969897, "grad_norm": 5.528062539724129, "learning_rate": 1.1292875989445911e-05, "loss": 0.8214, "step": 214 }, { "epoch": 0.017033075856605268, "grad_norm": 5.859717168728105, "learning_rate": 1.1345646437994724e-05, "loss": 0.7755, "step": 215 }, { "epoch": 0.017112299465240642, "grad_norm": 5.1296682886393015, "learning_rate": 1.1398416886543537e-05, "loss": 0.8086, "step": 216 }, { "epoch": 0.017191523073876017, "grad_norm": 5.792792407400176, "learning_rate": 1.1451187335092349e-05, "loss": 0.6665, "step": 217 }, { "epoch": 0.017270746682511388, "grad_norm": 5.426129151374314, "learning_rate": 1.1503957783641162e-05, "loss": 0.6077, "step": 218 }, { "epoch": 0.017349970291146762, "grad_norm": 6.446499513139741, "learning_rate": 1.1556728232189975e-05, "loss": 0.6683, "step": 219 }, { "epoch": 0.017429193899782137, "grad_norm": 5.257821679228257, "learning_rate": 1.1609498680738786e-05, "loss": 0.7431, "step": 220 }, { "epoch": 0.017508417508417508, "grad_norm": 6.0486630424704, "learning_rate": 1.16622691292876e-05, "loss": 0.8161, "step": 221 }, { "epoch": 0.017587641117052882, "grad_norm": 5.391818266826954, "learning_rate": 1.1715039577836414e-05, "loss": 0.8968, "step": 222 }, { "epoch": 0.017666864725688253, "grad_norm": 5.214277082229823, "learning_rate": 1.1767810026385225e-05, "loss": 0.8127, "step": 223 }, { "epoch": 0.017746088334323628, "grad_norm": 5.947812857415303, "learning_rate": 1.1820580474934039e-05, "loss": 0.7495, "step": 224 }, { "epoch": 0.017825311942959002, "grad_norm": 6.530761964307894, "learning_rate": 1.1873350923482852e-05, "loss": 0.6612, "step": 225 }, { "epoch": 0.017904535551594374, "grad_norm": 6.345427295525233, "learning_rate": 1.1926121372031663e-05, "loss": 0.6558, "step": 226 }, { "epoch": 0.017983759160229748, "grad_norm": 5.698031713831435, "learning_rate": 1.1978891820580476e-05, "loss": 0.7203, "step": 227 }, { "epoch": 0.018062982768865123, "grad_norm": 5.12522619447126, "learning_rate": 1.203166226912929e-05, "loss": 0.7576, "step": 228 }, { "epoch": 0.018142206377500494, "grad_norm": 5.808288250755425, "learning_rate": 1.20844327176781e-05, "loss": 0.8105, "step": 229 }, { "epoch": 0.018221429986135868, "grad_norm": 4.9300203075498805, "learning_rate": 1.2137203166226914e-05, "loss": 0.7983, "step": 230 }, { "epoch": 0.018300653594771243, "grad_norm": 4.697187469691393, "learning_rate": 1.2189973614775727e-05, "loss": 0.7084, "step": 231 }, { "epoch": 0.018379877203406614, "grad_norm": 5.001773945258765, "learning_rate": 1.2242744063324538e-05, "loss": 0.7223, "step": 232 }, { "epoch": 0.01845910081204199, "grad_norm": 7.1355893597252855, "learning_rate": 1.2295514511873353e-05, "loss": 0.8413, "step": 233 }, { "epoch": 0.018538324420677363, "grad_norm": 6.348833763192268, "learning_rate": 1.2348284960422166e-05, "loss": 0.6731, "step": 234 }, { "epoch": 0.018617548029312734, "grad_norm": 4.946204827143474, "learning_rate": 1.2401055408970977e-05, "loss": 0.6908, "step": 235 }, { "epoch": 0.01869677163794811, "grad_norm": 5.493686807606341, "learning_rate": 1.245382585751979e-05, "loss": 0.7671, "step": 236 }, { "epoch": 0.018775995246583483, "grad_norm": 5.673914737037365, "learning_rate": 1.2506596306068604e-05, "loss": 0.7087, "step": 237 }, { "epoch": 0.018855218855218854, "grad_norm": 4.83293534885964, "learning_rate": 1.2559366754617415e-05, "loss": 0.7008, "step": 238 }, { "epoch": 0.01893444246385423, "grad_norm": 4.668620821999954, "learning_rate": 1.2612137203166228e-05, "loss": 0.7444, "step": 239 }, { "epoch": 0.019013666072489603, "grad_norm": 5.320602701600323, "learning_rate": 1.2664907651715041e-05, "loss": 0.7076, "step": 240 }, { "epoch": 0.019092889681124974, "grad_norm": 5.216235256752975, "learning_rate": 1.2717678100263852e-05, "loss": 0.669, "step": 241 }, { "epoch": 0.01917211328976035, "grad_norm": 8.852938876339596, "learning_rate": 1.2770448548812666e-05, "loss": 0.7982, "step": 242 }, { "epoch": 0.019251336898395723, "grad_norm": 7.639310926487093, "learning_rate": 1.282321899736148e-05, "loss": 0.645, "step": 243 }, { "epoch": 0.019330560507031094, "grad_norm": 5.191896526211173, "learning_rate": 1.287598944591029e-05, "loss": 0.6148, "step": 244 }, { "epoch": 0.01940978411566647, "grad_norm": 6.010869015407416, "learning_rate": 1.2928759894459105e-05, "loss": 0.8221, "step": 245 }, { "epoch": 0.019489007724301843, "grad_norm": 6.462023902750167, "learning_rate": 1.2981530343007918e-05, "loss": 0.8216, "step": 246 }, { "epoch": 0.019568231332937214, "grad_norm": 6.6283590992549595, "learning_rate": 1.303430079155673e-05, "loss": 0.8258, "step": 247 }, { "epoch": 0.01964745494157259, "grad_norm": 5.2723616814518275, "learning_rate": 1.3087071240105542e-05, "loss": 0.8107, "step": 248 }, { "epoch": 0.019726678550207963, "grad_norm": 5.489791824763702, "learning_rate": 1.3139841688654355e-05, "loss": 0.7367, "step": 249 }, { "epoch": 0.019805902158843335, "grad_norm": 5.400167981733446, "learning_rate": 1.3192612137203167e-05, "loss": 0.7817, "step": 250 }, { "epoch": 0.01988512576747871, "grad_norm": 4.944767634852149, "learning_rate": 1.324538258575198e-05, "loss": 0.6969, "step": 251 }, { "epoch": 0.019964349376114084, "grad_norm": 6.164358046513915, "learning_rate": 1.3298153034300793e-05, "loss": 0.7408, "step": 252 }, { "epoch": 0.020043572984749455, "grad_norm": 4.713538395438616, "learning_rate": 1.3350923482849604e-05, "loss": 0.7404, "step": 253 }, { "epoch": 0.02012279659338483, "grad_norm": 5.476222960909804, "learning_rate": 1.3403693931398417e-05, "loss": 0.7923, "step": 254 }, { "epoch": 0.020202020202020204, "grad_norm": 6.439441203627157, "learning_rate": 1.3456464379947232e-05, "loss": 0.6825, "step": 255 }, { "epoch": 0.020281243810655575, "grad_norm": 5.172281142180801, "learning_rate": 1.3509234828496044e-05, "loss": 0.8169, "step": 256 }, { "epoch": 0.02036046741929095, "grad_norm": 4.533213017046608, "learning_rate": 1.3562005277044857e-05, "loss": 0.694, "step": 257 }, { "epoch": 0.02043969102792632, "grad_norm": 6.029156818830929, "learning_rate": 1.361477572559367e-05, "loss": 0.8392, "step": 258 }, { "epoch": 0.020518914636561695, "grad_norm": 5.227043666231582, "learning_rate": 1.3667546174142481e-05, "loss": 0.7026, "step": 259 }, { "epoch": 0.02059813824519707, "grad_norm": 5.037218463230358, "learning_rate": 1.3720316622691294e-05, "loss": 0.8244, "step": 260 }, { "epoch": 0.02067736185383244, "grad_norm": 5.175478194531059, "learning_rate": 1.3773087071240107e-05, "loss": 0.8124, "step": 261 }, { "epoch": 0.020756585462467815, "grad_norm": 5.21225774868909, "learning_rate": 1.3825857519788919e-05, "loss": 0.6844, "step": 262 }, { "epoch": 0.02083580907110319, "grad_norm": 4.344408122167347, "learning_rate": 1.3878627968337732e-05, "loss": 0.7178, "step": 263 }, { "epoch": 0.02091503267973856, "grad_norm": 5.772588454479037, "learning_rate": 1.3931398416886547e-05, "loss": 0.9369, "step": 264 }, { "epoch": 0.020994256288373935, "grad_norm": 4.860320800887344, "learning_rate": 1.3984168865435356e-05, "loss": 0.7445, "step": 265 }, { "epoch": 0.02107347989700931, "grad_norm": 4.926487338802397, "learning_rate": 1.4036939313984171e-05, "loss": 0.6876, "step": 266 }, { "epoch": 0.02115270350564468, "grad_norm": 4.685217534157765, "learning_rate": 1.4089709762532984e-05, "loss": 0.7607, "step": 267 }, { "epoch": 0.021231927114280055, "grad_norm": 5.7889691146255995, "learning_rate": 1.4142480211081795e-05, "loss": 0.8241, "step": 268 }, { "epoch": 0.02131115072291543, "grad_norm": 5.173504532588536, "learning_rate": 1.4195250659630609e-05, "loss": 0.7805, "step": 269 }, { "epoch": 0.0213903743315508, "grad_norm": 5.062020683191384, "learning_rate": 1.4248021108179422e-05, "loss": 0.7023, "step": 270 }, { "epoch": 0.021469597940186175, "grad_norm": 5.125795245863302, "learning_rate": 1.4300791556728233e-05, "loss": 0.7066, "step": 271 }, { "epoch": 0.02154882154882155, "grad_norm": 5.221424203324531, "learning_rate": 1.4353562005277046e-05, "loss": 0.6957, "step": 272 }, { "epoch": 0.02162804515745692, "grad_norm": 4.751600484591223, "learning_rate": 1.440633245382586e-05, "loss": 0.725, "step": 273 }, { "epoch": 0.021707268766092296, "grad_norm": 5.704248185447199, "learning_rate": 1.445910290237467e-05, "loss": 0.7975, "step": 274 }, { "epoch": 0.02178649237472767, "grad_norm": 5.283878229259629, "learning_rate": 1.4511873350923484e-05, "loss": 0.7827, "step": 275 }, { "epoch": 0.02186571598336304, "grad_norm": 5.561430305745563, "learning_rate": 1.4564643799472298e-05, "loss": 0.8687, "step": 276 }, { "epoch": 0.021944939591998416, "grad_norm": 4.953562752325849, "learning_rate": 1.461741424802111e-05, "loss": 0.6568, "step": 277 }, { "epoch": 0.02202416320063379, "grad_norm": 4.989031450636796, "learning_rate": 1.4670184696569923e-05, "loss": 0.6796, "step": 278 }, { "epoch": 0.02210338680926916, "grad_norm": 5.0814915981541215, "learning_rate": 1.4722955145118736e-05, "loss": 0.6442, "step": 279 }, { "epoch": 0.022182610417904536, "grad_norm": 5.440612218933665, "learning_rate": 1.4775725593667547e-05, "loss": 0.8033, "step": 280 }, { "epoch": 0.02226183402653991, "grad_norm": 4.7036030181612265, "learning_rate": 1.482849604221636e-05, "loss": 0.7499, "step": 281 }, { "epoch": 0.02234105763517528, "grad_norm": 5.563795040266641, "learning_rate": 1.4881266490765173e-05, "loss": 0.7586, "step": 282 }, { "epoch": 0.022420281243810656, "grad_norm": 5.571378672857436, "learning_rate": 1.4934036939313985e-05, "loss": 0.751, "step": 283 }, { "epoch": 0.02249950485244603, "grad_norm": 4.5765927786250264, "learning_rate": 1.4986807387862798e-05, "loss": 0.6686, "step": 284 }, { "epoch": 0.0225787284610814, "grad_norm": 4.520184821154518, "learning_rate": 1.503957783641161e-05, "loss": 0.6698, "step": 285 }, { "epoch": 0.022657952069716776, "grad_norm": 4.719191921605493, "learning_rate": 1.5092348284960422e-05, "loss": 0.7684, "step": 286 }, { "epoch": 0.02273717567835215, "grad_norm": 5.282904894309323, "learning_rate": 1.5145118733509237e-05, "loss": 0.7937, "step": 287 }, { "epoch": 0.02281639928698752, "grad_norm": 4.85916128437978, "learning_rate": 1.5197889182058047e-05, "loss": 0.7639, "step": 288 }, { "epoch": 0.022895622895622896, "grad_norm": 5.087385286910722, "learning_rate": 1.5250659630606862e-05, "loss": 0.7772, "step": 289 }, { "epoch": 0.022974846504258267, "grad_norm": 5.385201890840065, "learning_rate": 1.5303430079155675e-05, "loss": 0.7612, "step": 290 }, { "epoch": 0.023054070112893642, "grad_norm": 4.7618693908420875, "learning_rate": 1.5356200527704484e-05, "loss": 0.7228, "step": 291 }, { "epoch": 0.023133293721529016, "grad_norm": 4.656091577931212, "learning_rate": 1.5408970976253298e-05, "loss": 0.7909, "step": 292 }, { "epoch": 0.023212517330164387, "grad_norm": 5.0321567242044365, "learning_rate": 1.5461741424802114e-05, "loss": 0.7067, "step": 293 }, { "epoch": 0.023291740938799762, "grad_norm": 4.77287153420482, "learning_rate": 1.5514511873350924e-05, "loss": 0.6899, "step": 294 }, { "epoch": 0.023370964547435136, "grad_norm": 4.558788078989551, "learning_rate": 1.5567282321899737e-05, "loss": 0.7837, "step": 295 }, { "epoch": 0.023450188156070507, "grad_norm": 5.216472651111241, "learning_rate": 1.562005277044855e-05, "loss": 0.6787, "step": 296 }, { "epoch": 0.023529411764705882, "grad_norm": 5.210615417423931, "learning_rate": 1.5672823218997363e-05, "loss": 0.7199, "step": 297 }, { "epoch": 0.023608635373341257, "grad_norm": 5.6984947886017965, "learning_rate": 1.5725593667546176e-05, "loss": 0.7916, "step": 298 }, { "epoch": 0.023687858981976628, "grad_norm": 5.532960252900681, "learning_rate": 1.577836411609499e-05, "loss": 0.8564, "step": 299 }, { "epoch": 0.023767082590612002, "grad_norm": 6.697260878045465, "learning_rate": 1.58311345646438e-05, "loss": 0.7996, "step": 300 }, { "epoch": 0.023846306199247377, "grad_norm": 4.432578004969917, "learning_rate": 1.5883905013192612e-05, "loss": 0.6621, "step": 301 }, { "epoch": 0.023925529807882748, "grad_norm": 5.53714351619017, "learning_rate": 1.5936675461741425e-05, "loss": 0.6781, "step": 302 }, { "epoch": 0.024004753416518122, "grad_norm": 4.485178636736496, "learning_rate": 1.5989445910290238e-05, "loss": 0.6729, "step": 303 }, { "epoch": 0.024083977025153497, "grad_norm": 4.8765460394107185, "learning_rate": 1.604221635883905e-05, "loss": 0.6071, "step": 304 }, { "epoch": 0.024163200633788868, "grad_norm": 5.1872808489810005, "learning_rate": 1.6094986807387864e-05, "loss": 0.7562, "step": 305 }, { "epoch": 0.024242424242424242, "grad_norm": 4.750901671483955, "learning_rate": 1.6147757255936677e-05, "loss": 0.6922, "step": 306 }, { "epoch": 0.024321647851059617, "grad_norm": 4.58393405278496, "learning_rate": 1.620052770448549e-05, "loss": 0.7805, "step": 307 }, { "epoch": 0.024400871459694988, "grad_norm": 4.631535325277142, "learning_rate": 1.6253298153034303e-05, "loss": 0.7336, "step": 308 }, { "epoch": 0.024480095068330363, "grad_norm": 5.669850117188683, "learning_rate": 1.6306068601583113e-05, "loss": 0.5998, "step": 309 }, { "epoch": 0.024559318676965737, "grad_norm": 4.712742532199791, "learning_rate": 1.6358839050131926e-05, "loss": 0.7182, "step": 310 }, { "epoch": 0.024638542285601108, "grad_norm": 4.878669935714314, "learning_rate": 1.641160949868074e-05, "loss": 0.7017, "step": 311 }, { "epoch": 0.024717765894236483, "grad_norm": 5.469294720818251, "learning_rate": 1.6464379947229552e-05, "loss": 0.686, "step": 312 }, { "epoch": 0.024796989502871857, "grad_norm": 5.505596073484324, "learning_rate": 1.6517150395778365e-05, "loss": 0.9775, "step": 313 }, { "epoch": 0.024876213111507228, "grad_norm": 5.606387481204577, "learning_rate": 1.656992084432718e-05, "loss": 0.7261, "step": 314 }, { "epoch": 0.024955436720142603, "grad_norm": 4.464959691277312, "learning_rate": 1.6622691292875988e-05, "loss": 0.6096, "step": 315 }, { "epoch": 0.025034660328777977, "grad_norm": 4.898467999914199, "learning_rate": 1.6675461741424805e-05, "loss": 0.8412, "step": 316 }, { "epoch": 0.02511388393741335, "grad_norm": 4.872228087709652, "learning_rate": 1.6728232189973618e-05, "loss": 0.6006, "step": 317 }, { "epoch": 0.025193107546048723, "grad_norm": 6.173465907759124, "learning_rate": 1.6781002638522427e-05, "loss": 0.6591, "step": 318 }, { "epoch": 0.025272331154684097, "grad_norm": 4.904345804183026, "learning_rate": 1.683377308707124e-05, "loss": 0.646, "step": 319 }, { "epoch": 0.02535155476331947, "grad_norm": 5.586871756586915, "learning_rate": 1.6886543535620054e-05, "loss": 0.5606, "step": 320 }, { "epoch": 0.025430778371954843, "grad_norm": 4.6174459074797, "learning_rate": 1.6939313984168867e-05, "loss": 0.7099, "step": 321 }, { "epoch": 0.025510001980590218, "grad_norm": 5.235131703024437, "learning_rate": 1.699208443271768e-05, "loss": 0.7065, "step": 322 }, { "epoch": 0.02558922558922559, "grad_norm": 4.826210405176726, "learning_rate": 1.7044854881266493e-05, "loss": 0.8279, "step": 323 }, { "epoch": 0.025668449197860963, "grad_norm": 4.54598953496401, "learning_rate": 1.7097625329815303e-05, "loss": 0.6764, "step": 324 }, { "epoch": 0.025747672806496334, "grad_norm": 4.424977462814265, "learning_rate": 1.7150395778364116e-05, "loss": 0.604, "step": 325 }, { "epoch": 0.02582689641513171, "grad_norm": 4.454436082874082, "learning_rate": 1.7203166226912932e-05, "loss": 0.7477, "step": 326 }, { "epoch": 0.025906120023767083, "grad_norm": 4.933371834743646, "learning_rate": 1.7255936675461742e-05, "loss": 0.8529, "step": 327 }, { "epoch": 0.025985343632402454, "grad_norm": 5.015914473675524, "learning_rate": 1.7308707124010555e-05, "loss": 0.7221, "step": 328 }, { "epoch": 0.02606456724103783, "grad_norm": 4.883274382223023, "learning_rate": 1.7361477572559368e-05, "loss": 0.7482, "step": 329 }, { "epoch": 0.026143790849673203, "grad_norm": 6.222847344775064, "learning_rate": 1.741424802110818e-05, "loss": 0.847, "step": 330 }, { "epoch": 0.026223014458308574, "grad_norm": 5.122229201964031, "learning_rate": 1.7467018469656994e-05, "loss": 0.7503, "step": 331 }, { "epoch": 0.02630223806694395, "grad_norm": 4.884576991437588, "learning_rate": 1.7519788918205807e-05, "loss": 0.698, "step": 332 }, { "epoch": 0.026381461675579324, "grad_norm": 4.360395504080726, "learning_rate": 1.7572559366754617e-05, "loss": 0.7492, "step": 333 }, { "epoch": 0.026460685284214695, "grad_norm": 4.20751253678441, "learning_rate": 1.762532981530343e-05, "loss": 0.6072, "step": 334 }, { "epoch": 0.02653990889285007, "grad_norm": 4.0010841153087044, "learning_rate": 1.7678100263852246e-05, "loss": 0.6294, "step": 335 }, { "epoch": 0.026619132501485444, "grad_norm": 5.0942419602954025, "learning_rate": 1.7730870712401056e-05, "loss": 0.7124, "step": 336 }, { "epoch": 0.026698356110120815, "grad_norm": 4.9025049718820926, "learning_rate": 1.778364116094987e-05, "loss": 0.7486, "step": 337 }, { "epoch": 0.02677757971875619, "grad_norm": 4.216820203802327, "learning_rate": 1.7836411609498682e-05, "loss": 0.6624, "step": 338 }, { "epoch": 0.026856803327391564, "grad_norm": 4.873402113608072, "learning_rate": 1.7889182058047495e-05, "loss": 0.6233, "step": 339 }, { "epoch": 0.026936026936026935, "grad_norm": 5.207898415655085, "learning_rate": 1.794195250659631e-05, "loss": 0.8605, "step": 340 }, { "epoch": 0.02701525054466231, "grad_norm": 4.279273943695959, "learning_rate": 1.799472295514512e-05, "loss": 0.8051, "step": 341 }, { "epoch": 0.027094474153297684, "grad_norm": 4.889154719893198, "learning_rate": 1.804749340369393e-05, "loss": 0.8153, "step": 342 }, { "epoch": 0.027173697761933055, "grad_norm": 4.304712492086262, "learning_rate": 1.8100263852242744e-05, "loss": 0.5527, "step": 343 }, { "epoch": 0.02725292137056843, "grad_norm": 4.757940775300428, "learning_rate": 1.8153034300791557e-05, "loss": 0.7414, "step": 344 }, { "epoch": 0.027332144979203804, "grad_norm": 4.745641576583929, "learning_rate": 1.820580474934037e-05, "loss": 0.7018, "step": 345 }, { "epoch": 0.027411368587839175, "grad_norm": 5.606855238901125, "learning_rate": 1.8258575197889184e-05, "loss": 0.711, "step": 346 }, { "epoch": 0.02749059219647455, "grad_norm": 4.589455511019485, "learning_rate": 1.8311345646437997e-05, "loss": 0.6657, "step": 347 }, { "epoch": 0.027569815805109924, "grad_norm": 4.380556690595863, "learning_rate": 1.836411609498681e-05, "loss": 0.6207, "step": 348 }, { "epoch": 0.027649039413745295, "grad_norm": 5.464281507136505, "learning_rate": 1.8416886543535623e-05, "loss": 0.7978, "step": 349 }, { "epoch": 0.02772826302238067, "grad_norm": 4.455830264641891, "learning_rate": 1.8469656992084436e-05, "loss": 0.647, "step": 350 }, { "epoch": 0.027807486631016044, "grad_norm": 4.415427815113544, "learning_rate": 1.8522427440633246e-05, "loss": 0.7126, "step": 351 }, { "epoch": 0.027886710239651415, "grad_norm": 4.423598563001037, "learning_rate": 1.857519788918206e-05, "loss": 0.7051, "step": 352 }, { "epoch": 0.02796593384828679, "grad_norm": 3.8569744021636385, "learning_rate": 1.8627968337730872e-05, "loss": 0.6308, "step": 353 }, { "epoch": 0.028045157456922164, "grad_norm": 4.415411292606307, "learning_rate": 1.8680738786279685e-05, "loss": 0.5519, "step": 354 }, { "epoch": 0.028124381065557535, "grad_norm": 5.0422917734338135, "learning_rate": 1.8733509234828498e-05, "loss": 0.7172, "step": 355 }, { "epoch": 0.02820360467419291, "grad_norm": 4.002764445493207, "learning_rate": 1.878627968337731e-05, "loss": 0.7165, "step": 356 }, { "epoch": 0.028282828282828285, "grad_norm": 4.599337876833972, "learning_rate": 1.883905013192612e-05, "loss": 0.7968, "step": 357 }, { "epoch": 0.028362051891463656, "grad_norm": 5.033776323410691, "learning_rate": 1.8891820580474937e-05, "loss": 0.7958, "step": 358 }, { "epoch": 0.02844127550009903, "grad_norm": 4.738923984100409, "learning_rate": 1.894459102902375e-05, "loss": 0.7353, "step": 359 }, { "epoch": 0.0285204991087344, "grad_norm": 4.246229627710574, "learning_rate": 1.899736147757256e-05, "loss": 0.7837, "step": 360 }, { "epoch": 0.028599722717369776, "grad_norm": 5.233523278268373, "learning_rate": 1.9050131926121373e-05, "loss": 0.7994, "step": 361 }, { "epoch": 0.02867894632600515, "grad_norm": 5.000247119050768, "learning_rate": 1.9102902374670186e-05, "loss": 0.8393, "step": 362 }, { "epoch": 0.02875816993464052, "grad_norm": 4.990627757505972, "learning_rate": 1.9155672823219e-05, "loss": 0.683, "step": 363 }, { "epoch": 0.028837393543275896, "grad_norm": 3.5378068556875655, "learning_rate": 1.9208443271767812e-05, "loss": 0.698, "step": 364 }, { "epoch": 0.02891661715191127, "grad_norm": 4.707473097246092, "learning_rate": 1.9261213720316625e-05, "loss": 0.7491, "step": 365 }, { "epoch": 0.02899584076054664, "grad_norm": 4.264970516367911, "learning_rate": 1.9313984168865435e-05, "loss": 0.6915, "step": 366 }, { "epoch": 0.029075064369182016, "grad_norm": 4.745594773515386, "learning_rate": 1.9366754617414248e-05, "loss": 0.6245, "step": 367 }, { "epoch": 0.02915428797781739, "grad_norm": 4.218060854608493, "learning_rate": 1.9419525065963065e-05, "loss": 0.7101, "step": 368 }, { "epoch": 0.02923351158645276, "grad_norm": 4.422163537716093, "learning_rate": 1.9472295514511874e-05, "loss": 0.6612, "step": 369 }, { "epoch": 0.029312735195088136, "grad_norm": 5.059970177902883, "learning_rate": 1.9525065963060687e-05, "loss": 0.6844, "step": 370 }, { "epoch": 0.02939195880372351, "grad_norm": 5.425596923779999, "learning_rate": 1.95778364116095e-05, "loss": 0.7581, "step": 371 }, { "epoch": 0.02947118241235888, "grad_norm": 4.448599851923129, "learning_rate": 1.9630606860158313e-05, "loss": 0.5906, "step": 372 }, { "epoch": 0.029550406020994256, "grad_norm": 5.867014061178323, "learning_rate": 1.9683377308707127e-05, "loss": 0.777, "step": 373 }, { "epoch": 0.02962962962962963, "grad_norm": 4.633622660915606, "learning_rate": 1.973614775725594e-05, "loss": 0.7659, "step": 374 }, { "epoch": 0.029708853238265002, "grad_norm": 5.2984904666256005, "learning_rate": 1.978891820580475e-05, "loss": 0.7074, "step": 375 }, { "epoch": 0.029788076846900376, "grad_norm": 4.861656259919689, "learning_rate": 1.9841688654353562e-05, "loss": 0.6616, "step": 376 }, { "epoch": 0.02986730045553575, "grad_norm": 5.545022792268187, "learning_rate": 1.9894459102902375e-05, "loss": 0.6232, "step": 377 }, { "epoch": 0.029946524064171122, "grad_norm": 4.911902408072508, "learning_rate": 1.994722955145119e-05, "loss": 0.8349, "step": 378 }, { "epoch": 0.030025747672806496, "grad_norm": 4.712000002428415, "learning_rate": 2e-05, "loss": 0.7714, "step": 379 }, { "epoch": 0.03010497128144187, "grad_norm": 5.0463324227267154, "learning_rate": 1.999999967077406e-05, "loss": 0.753, "step": 380 }, { "epoch": 0.030184194890077242, "grad_norm": 4.467950933977687, "learning_rate": 1.9999998683096255e-05, "loss": 0.6661, "step": 381 }, { "epoch": 0.030263418498712617, "grad_norm": 5.685728093385676, "learning_rate": 1.999999703696666e-05, "loss": 0.826, "step": 382 }, { "epoch": 0.03034264210734799, "grad_norm": 4.0431594610378285, "learning_rate": 1.999999473238537e-05, "loss": 0.659, "step": 383 }, { "epoch": 0.030421865715983362, "grad_norm": 4.2065187710344665, "learning_rate": 1.9999991769352545e-05, "loss": 0.716, "step": 384 }, { "epoch": 0.030501089324618737, "grad_norm": 4.699634931223346, "learning_rate": 1.9999988147868384e-05, "loss": 0.7331, "step": 385 }, { "epoch": 0.03058031293325411, "grad_norm": 3.830676038932044, "learning_rate": 1.9999983867933114e-05, "loss": 0.6457, "step": 386 }, { "epoch": 0.030659536541889482, "grad_norm": 4.187993752123971, "learning_rate": 1.999997892954703e-05, "loss": 0.6993, "step": 387 }, { "epoch": 0.030738760150524857, "grad_norm": 4.842800869373137, "learning_rate": 1.9999973332710443e-05, "loss": 0.7597, "step": 388 }, { "epoch": 0.03081798375916023, "grad_norm": 4.729110283875682, "learning_rate": 1.9999967077423732e-05, "loss": 0.5969, "step": 389 }, { "epoch": 0.030897207367795602, "grad_norm": 5.142680374559456, "learning_rate": 1.9999960163687307e-05, "loss": 0.6302, "step": 390 }, { "epoch": 0.030976430976430977, "grad_norm": 4.864329373176803, "learning_rate": 1.999995259150162e-05, "loss": 0.6236, "step": 391 }, { "epoch": 0.031055654585066348, "grad_norm": 4.1234032843182655, "learning_rate": 1.999994436086717e-05, "loss": 0.6444, "step": 392 }, { "epoch": 0.031134878193701723, "grad_norm": 4.281847243114499, "learning_rate": 1.9999935471784508e-05, "loss": 0.7837, "step": 393 }, { "epoch": 0.031214101802337097, "grad_norm": 4.213055829765473, "learning_rate": 1.9999925924254203e-05, "loss": 0.5674, "step": 394 }, { "epoch": 0.03129332541097247, "grad_norm": 4.714674138266144, "learning_rate": 1.9999915718276898e-05, "loss": 0.7518, "step": 395 }, { "epoch": 0.03137254901960784, "grad_norm": 3.7935243587876877, "learning_rate": 1.9999904853853256e-05, "loss": 0.649, "step": 396 }, { "epoch": 0.03145177262824322, "grad_norm": 4.260232514025721, "learning_rate": 1.9999893330983998e-05, "loss": 0.6109, "step": 397 }, { "epoch": 0.03153099623687859, "grad_norm": 4.537641317809541, "learning_rate": 1.999988114966988e-05, "loss": 0.7417, "step": 398 }, { "epoch": 0.031610219845513966, "grad_norm": 4.701561160728191, "learning_rate": 1.9999868309911704e-05, "loss": 0.7249, "step": 399 }, { "epoch": 0.031689443454149334, "grad_norm": 4.22285593155482, "learning_rate": 1.9999854811710317e-05, "loss": 0.7386, "step": 400 }, { "epoch": 0.03176866706278471, "grad_norm": 4.276842833278445, "learning_rate": 1.9999840655066608e-05, "loss": 0.7625, "step": 401 }, { "epoch": 0.03184789067142008, "grad_norm": 4.001443827946862, "learning_rate": 1.9999825839981506e-05, "loss": 0.7701, "step": 402 }, { "epoch": 0.03192711428005546, "grad_norm": 4.18069583531695, "learning_rate": 1.9999810366455986e-05, "loss": 0.6873, "step": 403 }, { "epoch": 0.03200633788869083, "grad_norm": 4.472852463492354, "learning_rate": 1.9999794234491075e-05, "loss": 0.6958, "step": 404 }, { "epoch": 0.03208556149732621, "grad_norm": 5.545645546601008, "learning_rate": 1.9999777444087826e-05, "loss": 0.7381, "step": 405 }, { "epoch": 0.032164785105961574, "grad_norm": 4.477348669868605, "learning_rate": 1.999975999524735e-05, "loss": 0.6911, "step": 406 }, { "epoch": 0.03224400871459695, "grad_norm": 4.318832171882154, "learning_rate": 1.9999741887970795e-05, "loss": 0.6146, "step": 407 }, { "epoch": 0.03232323232323232, "grad_norm": 3.4676427320653227, "learning_rate": 1.999972312225935e-05, "loss": 0.7041, "step": 408 }, { "epoch": 0.0324024559318677, "grad_norm": 4.035350874161928, "learning_rate": 1.999970369811425e-05, "loss": 0.7943, "step": 409 }, { "epoch": 0.03248167954050307, "grad_norm": 4.3804215167531755, "learning_rate": 1.9999683615536784e-05, "loss": 0.5909, "step": 410 }, { "epoch": 0.03256090314913844, "grad_norm": 3.607200114286588, "learning_rate": 1.9999662874528264e-05, "loss": 0.624, "step": 411 }, { "epoch": 0.032640126757773814, "grad_norm": 4.531218352807698, "learning_rate": 1.999964147509006e-05, "loss": 0.6523, "step": 412 }, { "epoch": 0.03271935036640919, "grad_norm": 4.204126764299906, "learning_rate": 1.999961941722358e-05, "loss": 0.6777, "step": 413 }, { "epoch": 0.03279857397504456, "grad_norm": 4.034517262908247, "learning_rate": 1.9999596700930274e-05, "loss": 0.5886, "step": 414 }, { "epoch": 0.03287779758367994, "grad_norm": 4.840497432146151, "learning_rate": 1.999957332621164e-05, "loss": 0.7662, "step": 415 }, { "epoch": 0.03295702119231531, "grad_norm": 4.423505255377689, "learning_rate": 1.999954929306922e-05, "loss": 0.6206, "step": 416 }, { "epoch": 0.03303624480095068, "grad_norm": 4.715010799490287, "learning_rate": 1.999952460150459e-05, "loss": 0.824, "step": 417 }, { "epoch": 0.033115468409586055, "grad_norm": 4.255583388027712, "learning_rate": 1.9999499251519388e-05, "loss": 0.7422, "step": 418 }, { "epoch": 0.03319469201822143, "grad_norm": 5.197441633535912, "learning_rate": 1.9999473243115268e-05, "loss": 0.8425, "step": 419 }, { "epoch": 0.033273915626856804, "grad_norm": 3.448163491789807, "learning_rate": 1.999944657629395e-05, "loss": 0.6634, "step": 420 }, { "epoch": 0.03335313923549218, "grad_norm": 4.0169855304030015, "learning_rate": 1.999941925105719e-05, "loss": 0.6716, "step": 421 }, { "epoch": 0.03343236284412755, "grad_norm": 3.740364010912455, "learning_rate": 1.9999391267406786e-05, "loss": 0.7694, "step": 422 }, { "epoch": 0.03351158645276292, "grad_norm": 3.549221462165083, "learning_rate": 1.9999362625344584e-05, "loss": 0.6881, "step": 423 }, { "epoch": 0.033590810061398295, "grad_norm": 4.830348989262113, "learning_rate": 1.9999333324872464e-05, "loss": 0.8412, "step": 424 }, { "epoch": 0.03367003367003367, "grad_norm": 3.7762810766178956, "learning_rate": 1.9999303365992357e-05, "loss": 0.735, "step": 425 }, { "epoch": 0.033749257278669044, "grad_norm": 3.9829759791080654, "learning_rate": 1.999927274870624e-05, "loss": 0.5417, "step": 426 }, { "epoch": 0.03382848088730442, "grad_norm": 4.075751711811119, "learning_rate": 1.9999241473016126e-05, "loss": 0.7003, "step": 427 }, { "epoch": 0.03390770449593979, "grad_norm": 4.0480284487019675, "learning_rate": 1.999920953892407e-05, "loss": 0.7324, "step": 428 }, { "epoch": 0.03398692810457516, "grad_norm": 4.730819648450008, "learning_rate": 1.9999176946432183e-05, "loss": 0.6864, "step": 429 }, { "epoch": 0.034066151713210535, "grad_norm": 4.297433006698665, "learning_rate": 1.9999143695542606e-05, "loss": 0.7106, "step": 430 }, { "epoch": 0.03414537532184591, "grad_norm": 4.459895139884524, "learning_rate": 1.9999109786257528e-05, "loss": 0.798, "step": 431 }, { "epoch": 0.034224598930481284, "grad_norm": 4.155135795233358, "learning_rate": 1.9999075218579184e-05, "loss": 0.6899, "step": 432 }, { "epoch": 0.03430382253911666, "grad_norm": 3.550121768386916, "learning_rate": 1.999903999250985e-05, "loss": 0.5274, "step": 433 }, { "epoch": 0.03438304614775203, "grad_norm": 4.178491310694602, "learning_rate": 1.9999004108051846e-05, "loss": 0.6853, "step": 434 }, { "epoch": 0.0344622697563874, "grad_norm": 3.6249342104934437, "learning_rate": 1.999896756520753e-05, "loss": 0.5693, "step": 435 }, { "epoch": 0.034541493365022775, "grad_norm": 3.710555922301624, "learning_rate": 1.9998930363979315e-05, "loss": 0.6222, "step": 436 }, { "epoch": 0.03462071697365815, "grad_norm": 4.282896553340935, "learning_rate": 1.999889250436965e-05, "loss": 0.6877, "step": 437 }, { "epoch": 0.034699940582293524, "grad_norm": 4.070929739318065, "learning_rate": 1.9998853986381018e-05, "loss": 0.6578, "step": 438 }, { "epoch": 0.0347791641909289, "grad_norm": 3.5806771607523715, "learning_rate": 1.9998814810015968e-05, "loss": 0.6339, "step": 439 }, { "epoch": 0.034858387799564274, "grad_norm": 4.275256492797494, "learning_rate": 1.9998774975277074e-05, "loss": 0.7398, "step": 440 }, { "epoch": 0.03493761140819964, "grad_norm": 4.198337935981686, "learning_rate": 1.9998734482166954e-05, "loss": 0.5238, "step": 441 }, { "epoch": 0.035016835016835016, "grad_norm": 4.222553552201761, "learning_rate": 1.9998693330688283e-05, "loss": 0.7514, "step": 442 }, { "epoch": 0.03509605862547039, "grad_norm": 4.03782342518217, "learning_rate": 1.9998651520843766e-05, "loss": 0.7049, "step": 443 }, { "epoch": 0.035175282234105765, "grad_norm": 3.4146854878819033, "learning_rate": 1.999860905263616e-05, "loss": 0.6022, "step": 444 }, { "epoch": 0.03525450584274114, "grad_norm": 4.528935499317078, "learning_rate": 1.9998565926068253e-05, "loss": 0.7426, "step": 445 }, { "epoch": 0.03533372945137651, "grad_norm": 3.7491079949721673, "learning_rate": 1.999852214114289e-05, "loss": 0.5667, "step": 446 }, { "epoch": 0.03541295306001188, "grad_norm": 3.6964639629032487, "learning_rate": 1.9998477697862956e-05, "loss": 0.6659, "step": 447 }, { "epoch": 0.035492176668647256, "grad_norm": 4.156133240212877, "learning_rate": 1.9998432596231373e-05, "loss": 0.6773, "step": 448 }, { "epoch": 0.03557140027728263, "grad_norm": 4.293205440872425, "learning_rate": 1.9998386836251116e-05, "loss": 0.6266, "step": 449 }, { "epoch": 0.035650623885918005, "grad_norm": 4.0772084133636985, "learning_rate": 1.9998340417925193e-05, "loss": 0.6588, "step": 450 }, { "epoch": 0.03572984749455338, "grad_norm": 4.329855059640332, "learning_rate": 1.9998293341256664e-05, "loss": 0.8068, "step": 451 }, { "epoch": 0.03580907110318875, "grad_norm": 4.053701004155578, "learning_rate": 1.9998245606248627e-05, "loss": 0.6841, "step": 452 }, { "epoch": 0.03588829471182412, "grad_norm": 4.202351721500618, "learning_rate": 1.999819721290422e-05, "loss": 0.6414, "step": 453 }, { "epoch": 0.035967518320459496, "grad_norm": 3.676834437809905, "learning_rate": 1.9998148161226645e-05, "loss": 0.5713, "step": 454 }, { "epoch": 0.03604674192909487, "grad_norm": 3.694595043871052, "learning_rate": 1.9998098451219115e-05, "loss": 0.6078, "step": 455 }, { "epoch": 0.036125965537730245, "grad_norm": 3.9757704962804667, "learning_rate": 1.999804808288491e-05, "loss": 0.7277, "step": 456 }, { "epoch": 0.03620518914636562, "grad_norm": 4.633541706729874, "learning_rate": 1.9997997056227347e-05, "loss": 0.8114, "step": 457 }, { "epoch": 0.03628441275500099, "grad_norm": 3.93767707947965, "learning_rate": 1.9997945371249784e-05, "loss": 0.6533, "step": 458 }, { "epoch": 0.03636363636363636, "grad_norm": 3.724852327104761, "learning_rate": 1.999789302795563e-05, "loss": 0.6025, "step": 459 }, { "epoch": 0.036442859972271736, "grad_norm": 4.195074369301747, "learning_rate": 1.999784002634832e-05, "loss": 0.7006, "step": 460 }, { "epoch": 0.03652208358090711, "grad_norm": 3.978391724157097, "learning_rate": 1.9997786366431354e-05, "loss": 0.7393, "step": 461 }, { "epoch": 0.036601307189542485, "grad_norm": 3.6045949315417776, "learning_rate": 1.9997732048208264e-05, "loss": 0.6342, "step": 462 }, { "epoch": 0.03668053079817786, "grad_norm": 4.037839325371844, "learning_rate": 1.9997677071682623e-05, "loss": 0.6058, "step": 463 }, { "epoch": 0.03675975440681323, "grad_norm": 4.278379001148626, "learning_rate": 1.9997621436858053e-05, "loss": 0.6623, "step": 464 }, { "epoch": 0.0368389780154486, "grad_norm": 3.4950522159289217, "learning_rate": 1.9997565143738216e-05, "loss": 0.5441, "step": 465 }, { "epoch": 0.03691820162408398, "grad_norm": 3.8381685733649866, "learning_rate": 1.999750819232682e-05, "loss": 0.6601, "step": 466 }, { "epoch": 0.03699742523271935, "grad_norm": 3.620568988823789, "learning_rate": 1.9997450582627614e-05, "loss": 0.6263, "step": 467 }, { "epoch": 0.037076648841354726, "grad_norm": 3.6614380431722573, "learning_rate": 1.9997392314644392e-05, "loss": 0.4856, "step": 468 }, { "epoch": 0.0371558724499901, "grad_norm": 4.473212402515391, "learning_rate": 1.999733338838099e-05, "loss": 0.5263, "step": 469 }, { "epoch": 0.03723509605862547, "grad_norm": 4.225398591977323, "learning_rate": 1.999727380384129e-05, "loss": 0.676, "step": 470 }, { "epoch": 0.03731431966726084, "grad_norm": 3.8574841104712903, "learning_rate": 1.999721356102921e-05, "loss": 0.5476, "step": 471 }, { "epoch": 0.03739354327589622, "grad_norm": 3.9291359486112425, "learning_rate": 1.9997152659948727e-05, "loss": 0.6404, "step": 472 }, { "epoch": 0.03747276688453159, "grad_norm": 4.012932102434357, "learning_rate": 1.9997091100603842e-05, "loss": 0.6552, "step": 473 }, { "epoch": 0.037551990493166966, "grad_norm": 3.9281726486511763, "learning_rate": 1.999702888299861e-05, "loss": 0.7265, "step": 474 }, { "epoch": 0.03763121410180234, "grad_norm": 3.7828949019621088, "learning_rate": 1.9996966007137125e-05, "loss": 0.7433, "step": 475 }, { "epoch": 0.03771043771043771, "grad_norm": 3.1150374498198112, "learning_rate": 1.9996902473023537e-05, "loss": 0.6051, "step": 476 }, { "epoch": 0.03778966131907308, "grad_norm": 3.677285993882122, "learning_rate": 1.999683828066202e-05, "loss": 0.6053, "step": 477 }, { "epoch": 0.03786888492770846, "grad_norm": 4.193161098209371, "learning_rate": 1.9996773430056806e-05, "loss": 0.7045, "step": 478 }, { "epoch": 0.03794810853634383, "grad_norm": 3.559327316006344, "learning_rate": 1.999670792121216e-05, "loss": 0.5634, "step": 479 }, { "epoch": 0.038027332144979206, "grad_norm": 3.4381744224022994, "learning_rate": 1.99966417541324e-05, "loss": 0.5958, "step": 480 }, { "epoch": 0.038106555753614574, "grad_norm": 3.859694920178025, "learning_rate": 1.9996574928821883e-05, "loss": 0.5779, "step": 481 }, { "epoch": 0.03818577936224995, "grad_norm": 3.7604500656206317, "learning_rate": 1.9996507445285003e-05, "loss": 0.6985, "step": 482 }, { "epoch": 0.03826500297088532, "grad_norm": 3.663635354080971, "learning_rate": 1.999643930352621e-05, "loss": 0.5569, "step": 483 }, { "epoch": 0.0383442265795207, "grad_norm": 3.7080033322823667, "learning_rate": 1.999637050354999e-05, "loss": 0.6284, "step": 484 }, { "epoch": 0.03842345018815607, "grad_norm": 4.031681695337317, "learning_rate": 1.9996301045360874e-05, "loss": 0.5587, "step": 485 }, { "epoch": 0.038502673796791446, "grad_norm": 3.5617223215792735, "learning_rate": 1.999623092896343e-05, "loss": 0.5596, "step": 486 }, { "epoch": 0.038581897405426814, "grad_norm": 4.643660776617633, "learning_rate": 1.9996160154362275e-05, "loss": 0.7314, "step": 487 }, { "epoch": 0.03866112101406219, "grad_norm": 3.6784759763962955, "learning_rate": 1.9996088721562076e-05, "loss": 0.6118, "step": 488 }, { "epoch": 0.03874034462269756, "grad_norm": 3.9519303685126803, "learning_rate": 1.9996016630567535e-05, "loss": 0.5629, "step": 489 }, { "epoch": 0.03881956823133294, "grad_norm": 3.517417534187261, "learning_rate": 1.9995943881383393e-05, "loss": 0.5969, "step": 490 }, { "epoch": 0.03889879183996831, "grad_norm": 3.5399321332043723, "learning_rate": 1.9995870474014444e-05, "loss": 0.6577, "step": 491 }, { "epoch": 0.03897801544860369, "grad_norm": 3.3975854379183206, "learning_rate": 1.9995796408465523e-05, "loss": 0.5913, "step": 492 }, { "epoch": 0.039057239057239054, "grad_norm": 4.795982077421927, "learning_rate": 1.9995721684741505e-05, "loss": 0.8022, "step": 493 }, { "epoch": 0.03913646266587443, "grad_norm": 3.756175720690799, "learning_rate": 1.9995646302847307e-05, "loss": 0.7024, "step": 494 }, { "epoch": 0.0392156862745098, "grad_norm": 3.5933857493175796, "learning_rate": 1.9995570262787903e-05, "loss": 0.5881, "step": 495 }, { "epoch": 0.03929490988314518, "grad_norm": 3.669507362553083, "learning_rate": 1.9995493564568286e-05, "loss": 0.6628, "step": 496 }, { "epoch": 0.03937413349178055, "grad_norm": 4.407602280926834, "learning_rate": 1.9995416208193518e-05, "loss": 0.6663, "step": 497 }, { "epoch": 0.03945335710041593, "grad_norm": 3.525706353779159, "learning_rate": 1.999533819366868e-05, "loss": 0.6049, "step": 498 }, { "epoch": 0.039532580709051295, "grad_norm": 3.6774942001914295, "learning_rate": 1.9995259520998927e-05, "loss": 0.6637, "step": 499 }, { "epoch": 0.03961180431768667, "grad_norm": 3.3815464260140033, "learning_rate": 1.9995180190189424e-05, "loss": 0.5997, "step": 500 }, { "epoch": 0.039691027926322044, "grad_norm": 3.6349553469130966, "learning_rate": 1.9995100201245397e-05, "loss": 0.5628, "step": 501 }, { "epoch": 0.03977025153495742, "grad_norm": 4.008880962208993, "learning_rate": 1.999501955417212e-05, "loss": 0.5247, "step": 502 }, { "epoch": 0.03984947514359279, "grad_norm": 4.113040679313265, "learning_rate": 1.999493824897489e-05, "loss": 0.6521, "step": 503 }, { "epoch": 0.03992869875222817, "grad_norm": 3.360324773968465, "learning_rate": 1.9994856285659073e-05, "loss": 0.6559, "step": 504 }, { "epoch": 0.040007922360863535, "grad_norm": 3.897122630625106, "learning_rate": 1.9994773664230064e-05, "loss": 0.6236, "step": 505 }, { "epoch": 0.04008714596949891, "grad_norm": 4.082730649124917, "learning_rate": 1.99946903846933e-05, "loss": 0.6917, "step": 506 }, { "epoch": 0.040166369578134284, "grad_norm": 4.375710444717418, "learning_rate": 1.9994606447054265e-05, "loss": 0.6463, "step": 507 }, { "epoch": 0.04024559318676966, "grad_norm": 3.9407990653496214, "learning_rate": 1.999452185131849e-05, "loss": 0.7824, "step": 508 }, { "epoch": 0.04032481679540503, "grad_norm": 3.874965037146742, "learning_rate": 1.9994436597491537e-05, "loss": 0.7309, "step": 509 }, { "epoch": 0.04040404040404041, "grad_norm": 4.22520270593105, "learning_rate": 1.9994350685579024e-05, "loss": 0.6914, "step": 510 }, { "epoch": 0.040483264012675775, "grad_norm": 3.5866394165801587, "learning_rate": 1.999426411558661e-05, "loss": 0.6996, "step": 511 }, { "epoch": 0.04056248762131115, "grad_norm": 3.8611810503765573, "learning_rate": 1.9994176887519994e-05, "loss": 0.6387, "step": 512 }, { "epoch": 0.040641711229946524, "grad_norm": 3.5121356727512403, "learning_rate": 1.9994089001384918e-05, "loss": 0.6522, "step": 513 }, { "epoch": 0.0407209348385819, "grad_norm": 3.4597192033887527, "learning_rate": 1.9994000457187167e-05, "loss": 0.593, "step": 514 }, { "epoch": 0.04080015844721727, "grad_norm": 3.474541699099251, "learning_rate": 1.999391125493258e-05, "loss": 0.6385, "step": 515 }, { "epoch": 0.04087938205585264, "grad_norm": 3.2548781364117816, "learning_rate": 1.9993821394627018e-05, "loss": 0.6725, "step": 516 }, { "epoch": 0.040958605664488015, "grad_norm": 3.9666359076360136, "learning_rate": 1.9993730876276407e-05, "loss": 0.6341, "step": 517 }, { "epoch": 0.04103782927312339, "grad_norm": 4.298069093873997, "learning_rate": 1.9993639699886707e-05, "loss": 0.8401, "step": 518 }, { "epoch": 0.041117052881758764, "grad_norm": 3.549914286472616, "learning_rate": 1.9993547865463916e-05, "loss": 0.6659, "step": 519 }, { "epoch": 0.04119627649039414, "grad_norm": 4.009238618845933, "learning_rate": 1.9993455373014087e-05, "loss": 0.6957, "step": 520 }, { "epoch": 0.04127550009902951, "grad_norm": 3.2311627282206965, "learning_rate": 1.99933622225433e-05, "loss": 0.5976, "step": 521 }, { "epoch": 0.04135472370766488, "grad_norm": 3.1454133738763277, "learning_rate": 1.9993268414057704e-05, "loss": 0.5969, "step": 522 }, { "epoch": 0.041433947316300256, "grad_norm": 3.785407899378014, "learning_rate": 1.9993173947563466e-05, "loss": 0.6444, "step": 523 }, { "epoch": 0.04151317092493563, "grad_norm": 4.960922683132975, "learning_rate": 1.9993078823066804e-05, "loss": 0.7575, "step": 524 }, { "epoch": 0.041592394533571005, "grad_norm": 3.2686721469199655, "learning_rate": 1.9992983040573986e-05, "loss": 0.653, "step": 525 }, { "epoch": 0.04167161814220638, "grad_norm": 3.6688999626644154, "learning_rate": 1.9992886600091318e-05, "loss": 0.619, "step": 526 }, { "epoch": 0.041750841750841754, "grad_norm": 3.559109787862381, "learning_rate": 1.9992789501625155e-05, "loss": 0.6763, "step": 527 }, { "epoch": 0.04183006535947712, "grad_norm": 3.986658913468185, "learning_rate": 1.9992691745181882e-05, "loss": 0.7319, "step": 528 }, { "epoch": 0.041909288968112496, "grad_norm": 3.641431065115546, "learning_rate": 1.9992593330767938e-05, "loss": 0.6168, "step": 529 }, { "epoch": 0.04198851257674787, "grad_norm": 3.4363018932511062, "learning_rate": 1.9992494258389805e-05, "loss": 0.583, "step": 530 }, { "epoch": 0.042067736185383245, "grad_norm": 3.8248432426142203, "learning_rate": 1.9992394528054006e-05, "loss": 0.6341, "step": 531 }, { "epoch": 0.04214695979401862, "grad_norm": 3.8247778936282466, "learning_rate": 1.9992294139767106e-05, "loss": 0.5237, "step": 532 }, { "epoch": 0.042226183402653994, "grad_norm": 3.4828614340990223, "learning_rate": 1.999219309353572e-05, "loss": 0.5949, "step": 533 }, { "epoch": 0.04230540701128936, "grad_norm": 3.733805629820094, "learning_rate": 1.9992091389366497e-05, "loss": 0.6166, "step": 534 }, { "epoch": 0.042384630619924736, "grad_norm": 3.623047965210853, "learning_rate": 1.9991989027266134e-05, "loss": 0.5641, "step": 535 }, { "epoch": 0.04246385422856011, "grad_norm": 3.7851844307402116, "learning_rate": 1.9991886007241375e-05, "loss": 0.6686, "step": 536 }, { "epoch": 0.042543077837195485, "grad_norm": 3.5008686577460226, "learning_rate": 1.9991782329298998e-05, "loss": 0.6061, "step": 537 }, { "epoch": 0.04262230144583086, "grad_norm": 3.697592505120355, "learning_rate": 1.9991677993445832e-05, "loss": 0.7002, "step": 538 }, { "epoch": 0.042701525054466234, "grad_norm": 3.819382943544156, "learning_rate": 1.9991572999688746e-05, "loss": 0.6522, "step": 539 }, { "epoch": 0.0427807486631016, "grad_norm": 3.3522736691510864, "learning_rate": 1.9991467348034653e-05, "loss": 0.6039, "step": 540 }, { "epoch": 0.042859972271736976, "grad_norm": 3.618967536556049, "learning_rate": 1.9991361038490515e-05, "loss": 0.644, "step": 541 }, { "epoch": 0.04293919588037235, "grad_norm": 3.37957259389003, "learning_rate": 1.9991254071063327e-05, "loss": 0.5794, "step": 542 }, { "epoch": 0.043018419489007725, "grad_norm": 3.73008674961396, "learning_rate": 1.9991146445760133e-05, "loss": 0.678, "step": 543 }, { "epoch": 0.0430976430976431, "grad_norm": 3.150033832160963, "learning_rate": 1.9991038162588018e-05, "loss": 0.6449, "step": 544 }, { "epoch": 0.043176866706278474, "grad_norm": 3.93201705920017, "learning_rate": 1.9990929221554117e-05, "loss": 0.551, "step": 545 }, { "epoch": 0.04325609031491384, "grad_norm": 3.1690786607972963, "learning_rate": 1.99908196226656e-05, "loss": 0.6372, "step": 546 }, { "epoch": 0.04333531392354922, "grad_norm": 4.31888228933249, "learning_rate": 1.9990709365929678e-05, "loss": 0.5972, "step": 547 }, { "epoch": 0.04341453753218459, "grad_norm": 3.323048595645411, "learning_rate": 1.999059845135362e-05, "loss": 0.6934, "step": 548 }, { "epoch": 0.043493761140819966, "grad_norm": 3.5673902357162652, "learning_rate": 1.9990486878944727e-05, "loss": 0.7046, "step": 549 }, { "epoch": 0.04357298474945534, "grad_norm": 3.6871286430597126, "learning_rate": 1.9990374648710343e-05, "loss": 0.6601, "step": 550 }, { "epoch": 0.04365220835809071, "grad_norm": 2.853772466079235, "learning_rate": 1.9990261760657858e-05, "loss": 0.5641, "step": 551 }, { "epoch": 0.04373143196672608, "grad_norm": 4.072027412899879, "learning_rate": 1.9990148214794713e-05, "loss": 0.5734, "step": 552 }, { "epoch": 0.04381065557536146, "grad_norm": 3.2089212935944107, "learning_rate": 1.999003401112837e-05, "loss": 0.5388, "step": 553 }, { "epoch": 0.04388987918399683, "grad_norm": 4.094850113621171, "learning_rate": 1.9989919149666356e-05, "loss": 0.769, "step": 554 }, { "epoch": 0.043969102792632206, "grad_norm": 3.461478011178914, "learning_rate": 1.998980363041624e-05, "loss": 0.6186, "step": 555 }, { "epoch": 0.04404832640126758, "grad_norm": 4.300151340714484, "learning_rate": 1.9989687453385617e-05, "loss": 0.6646, "step": 556 }, { "epoch": 0.04412755000990295, "grad_norm": 3.4734137803963687, "learning_rate": 1.9989570618582145e-05, "loss": 0.5299, "step": 557 }, { "epoch": 0.04420677361853832, "grad_norm": 4.066686199052264, "learning_rate": 1.9989453126013515e-05, "loss": 0.6054, "step": 558 }, { "epoch": 0.0442859972271737, "grad_norm": 3.9748322815357597, "learning_rate": 1.9989334975687462e-05, "loss": 0.615, "step": 559 }, { "epoch": 0.04436522083580907, "grad_norm": 4.005368167611016, "learning_rate": 1.9989216167611766e-05, "loss": 0.61, "step": 560 }, { "epoch": 0.044444444444444446, "grad_norm": 3.881254275697099, "learning_rate": 1.998909670179425e-05, "loss": 0.7321, "step": 561 }, { "epoch": 0.04452366805307982, "grad_norm": 3.2785677512469675, "learning_rate": 1.9988976578242785e-05, "loss": 0.5244, "step": 562 }, { "epoch": 0.04460289166171519, "grad_norm": 3.422106865180319, "learning_rate": 1.9988855796965275e-05, "loss": 0.5331, "step": 563 }, { "epoch": 0.04468211527035056, "grad_norm": 3.530076162896214, "learning_rate": 1.998873435796967e-05, "loss": 0.6064, "step": 564 }, { "epoch": 0.04476133887898594, "grad_norm": 3.4016962967487503, "learning_rate": 1.9988612261263972e-05, "loss": 0.6349, "step": 565 }, { "epoch": 0.04484056248762131, "grad_norm": 3.3263454727489443, "learning_rate": 1.9988489506856218e-05, "loss": 0.6439, "step": 566 }, { "epoch": 0.044919786096256686, "grad_norm": 3.2261381597517627, "learning_rate": 1.9988366094754493e-05, "loss": 0.4953, "step": 567 }, { "epoch": 0.04499900970489206, "grad_norm": 3.4719934634535368, "learning_rate": 1.9988242024966924e-05, "loss": 0.6628, "step": 568 }, { "epoch": 0.04507823331352743, "grad_norm": 3.8283800160440635, "learning_rate": 1.9988117297501674e-05, "loss": 0.4674, "step": 569 }, { "epoch": 0.0451574569221628, "grad_norm": 3.891449460148191, "learning_rate": 1.998799191236696e-05, "loss": 0.5612, "step": 570 }, { "epoch": 0.04523668053079818, "grad_norm": 3.8415122292573387, "learning_rate": 1.998786586957104e-05, "loss": 0.5787, "step": 571 }, { "epoch": 0.04531590413943355, "grad_norm": 3.279413849083167, "learning_rate": 1.998773916912221e-05, "loss": 0.4776, "step": 572 }, { "epoch": 0.04539512774806893, "grad_norm": 3.807566952571174, "learning_rate": 1.9987611811028814e-05, "loss": 0.7145, "step": 573 }, { "epoch": 0.0454743513567043, "grad_norm": 3.669444622209576, "learning_rate": 1.9987483795299236e-05, "loss": 0.6527, "step": 574 }, { "epoch": 0.04555357496533967, "grad_norm": 3.459414739368479, "learning_rate": 1.9987355121941907e-05, "loss": 0.5198, "step": 575 }, { "epoch": 0.04563279857397504, "grad_norm": 3.577146066788758, "learning_rate": 1.99872257909653e-05, "loss": 0.5355, "step": 576 }, { "epoch": 0.04571202218261042, "grad_norm": 3.814856415338765, "learning_rate": 1.9987095802377933e-05, "loss": 0.6196, "step": 577 }, { "epoch": 0.04579124579124579, "grad_norm": 4.620429956328929, "learning_rate": 1.9986965156188357e-05, "loss": 0.7046, "step": 578 }, { "epoch": 0.04587046939988117, "grad_norm": 4.285619576434249, "learning_rate": 1.9986833852405183e-05, "loss": 0.5899, "step": 579 }, { "epoch": 0.045949693008516534, "grad_norm": 3.595812363622929, "learning_rate": 1.9986701891037053e-05, "loss": 0.5648, "step": 580 }, { "epoch": 0.04602891661715191, "grad_norm": 4.715717315399082, "learning_rate": 1.9986569272092656e-05, "loss": 0.7187, "step": 581 }, { "epoch": 0.046108140225787284, "grad_norm": 3.9011693035201938, "learning_rate": 1.9986435995580725e-05, "loss": 0.7205, "step": 582 }, { "epoch": 0.04618736383442266, "grad_norm": 3.598522283966038, "learning_rate": 1.9986302061510036e-05, "loss": 0.6788, "step": 583 }, { "epoch": 0.04626658744305803, "grad_norm": 3.8073028481044813, "learning_rate": 1.9986167469889405e-05, "loss": 0.5908, "step": 584 }, { "epoch": 0.04634581105169341, "grad_norm": 2.891955514307686, "learning_rate": 1.9986032220727698e-05, "loss": 0.5999, "step": 585 }, { "epoch": 0.046425034660328775, "grad_norm": 3.876749628271864, "learning_rate": 1.9985896314033816e-05, "loss": 0.6567, "step": 586 }, { "epoch": 0.04650425826896415, "grad_norm": 3.6940316887176894, "learning_rate": 1.9985759749816715e-05, "loss": 0.6342, "step": 587 }, { "epoch": 0.046583481877599524, "grad_norm": 3.785463738453173, "learning_rate": 1.9985622528085382e-05, "loss": 0.7148, "step": 588 }, { "epoch": 0.0466627054862349, "grad_norm": 3.21042838436158, "learning_rate": 1.9985484648848854e-05, "loss": 0.6131, "step": 589 }, { "epoch": 0.04674192909487027, "grad_norm": 4.05802326939007, "learning_rate": 1.9985346112116207e-05, "loss": 0.5711, "step": 590 }, { "epoch": 0.04682115270350565, "grad_norm": 3.523847614097594, "learning_rate": 1.9985206917896563e-05, "loss": 0.6842, "step": 591 }, { "epoch": 0.046900376312141015, "grad_norm": 3.6119114711140603, "learning_rate": 1.9985067066199093e-05, "loss": 0.6983, "step": 592 }, { "epoch": 0.04697959992077639, "grad_norm": 3.315918989225194, "learning_rate": 1.9984926557033003e-05, "loss": 0.5243, "step": 593 }, { "epoch": 0.047058823529411764, "grad_norm": 3.470617520958609, "learning_rate": 1.998478539040754e-05, "loss": 0.57, "step": 594 }, { "epoch": 0.04713804713804714, "grad_norm": 3.608138347682558, "learning_rate": 1.9984643566332005e-05, "loss": 0.6612, "step": 595 }, { "epoch": 0.04721727074668251, "grad_norm": 3.390556418320335, "learning_rate": 1.9984501084815734e-05, "loss": 0.5658, "step": 596 }, { "epoch": 0.04729649435531789, "grad_norm": 3.5497709627847502, "learning_rate": 1.9984357945868106e-05, "loss": 0.5289, "step": 597 }, { "epoch": 0.047375717963953255, "grad_norm": 3.594448280790585, "learning_rate": 1.998421414949855e-05, "loss": 0.6217, "step": 598 }, { "epoch": 0.04745494157258863, "grad_norm": 4.123128545858547, "learning_rate": 1.9984069695716534e-05, "loss": 0.6952, "step": 599 }, { "epoch": 0.047534165181224004, "grad_norm": 3.079482379339533, "learning_rate": 1.998392458453157e-05, "loss": 0.6006, "step": 600 }, { "epoch": 0.04761338878985938, "grad_norm": 3.5932147243803314, "learning_rate": 1.998377881595321e-05, "loss": 0.7653, "step": 601 }, { "epoch": 0.04769261239849475, "grad_norm": 3.925361417911679, "learning_rate": 1.9983632389991056e-05, "loss": 0.6688, "step": 602 }, { "epoch": 0.04777183600713013, "grad_norm": 4.385638428834529, "learning_rate": 1.9983485306654745e-05, "loss": 0.642, "step": 603 }, { "epoch": 0.047851059615765495, "grad_norm": 3.0550077881772717, "learning_rate": 1.9983337565953968e-05, "loss": 0.6014, "step": 604 }, { "epoch": 0.04793028322440087, "grad_norm": 3.163657261536773, "learning_rate": 1.9983189167898446e-05, "loss": 0.4853, "step": 605 }, { "epoch": 0.048009506833036245, "grad_norm": 3.6297702170718176, "learning_rate": 1.998304011249795e-05, "loss": 0.5661, "step": 606 }, { "epoch": 0.04808873044167162, "grad_norm": 3.5235373603016606, "learning_rate": 1.9982890399762303e-05, "loss": 0.5475, "step": 607 }, { "epoch": 0.048167954050306994, "grad_norm": 3.843292708555788, "learning_rate": 1.9982740029701356e-05, "loss": 0.6951, "step": 608 }, { "epoch": 0.04824717765894237, "grad_norm": 4.3231238472977545, "learning_rate": 1.998258900232501e-05, "loss": 0.528, "step": 609 }, { "epoch": 0.048326401267577736, "grad_norm": 4.391070317986208, "learning_rate": 1.9982437317643218e-05, "loss": 0.6609, "step": 610 }, { "epoch": 0.04840562487621311, "grad_norm": 3.587332401529109, "learning_rate": 1.9982284975665952e-05, "loss": 0.485, "step": 611 }, { "epoch": 0.048484848484848485, "grad_norm": 3.6881210400497233, "learning_rate": 1.998213197640326e-05, "loss": 0.658, "step": 612 }, { "epoch": 0.04856407209348386, "grad_norm": 3.476604257152046, "learning_rate": 1.9981978319865204e-05, "loss": 0.4793, "step": 613 }, { "epoch": 0.048643295702119234, "grad_norm": 4.127252418881677, "learning_rate": 1.9981824006061904e-05, "loss": 0.5326, "step": 614 }, { "epoch": 0.0487225193107546, "grad_norm": 3.4441649802115095, "learning_rate": 1.998166903500353e-05, "loss": 0.5361, "step": 615 }, { "epoch": 0.048801742919389976, "grad_norm": 3.3247716254900244, "learning_rate": 1.998151340670027e-05, "loss": 0.5887, "step": 616 }, { "epoch": 0.04888096652802535, "grad_norm": 3.294291573099863, "learning_rate": 1.9981357121162385e-05, "loss": 0.5829, "step": 617 }, { "epoch": 0.048960190136660725, "grad_norm": 3.6509061621178254, "learning_rate": 1.998120017840016e-05, "loss": 0.5756, "step": 618 }, { "epoch": 0.0490394137452961, "grad_norm": 3.799488526771851, "learning_rate": 1.998104257842393e-05, "loss": 0.5147, "step": 619 }, { "epoch": 0.049118637353931474, "grad_norm": 3.637657796620207, "learning_rate": 1.9980884321244072e-05, "loss": 0.6066, "step": 620 }, { "epoch": 0.04919786096256684, "grad_norm": 3.5414243921427575, "learning_rate": 1.9980725406871007e-05, "loss": 0.6376, "step": 621 }, { "epoch": 0.049277084571202216, "grad_norm": 5.147506741814145, "learning_rate": 1.9980565835315196e-05, "loss": 0.7115, "step": 622 }, { "epoch": 0.04935630817983759, "grad_norm": 3.7449299160511225, "learning_rate": 1.9980405606587148e-05, "loss": 0.531, "step": 623 }, { "epoch": 0.049435531788472965, "grad_norm": 3.1465453382891417, "learning_rate": 1.9980244720697417e-05, "loss": 0.5825, "step": 624 }, { "epoch": 0.04951475539710834, "grad_norm": 3.7358004188853884, "learning_rate": 1.9980083177656588e-05, "loss": 0.5773, "step": 625 }, { "epoch": 0.049593979005743714, "grad_norm": 4.8536296010364754, "learning_rate": 1.9979920977475306e-05, "loss": 0.6305, "step": 626 }, { "epoch": 0.04967320261437908, "grad_norm": 2.9820039525426374, "learning_rate": 1.9979758120164248e-05, "loss": 0.5118, "step": 627 }, { "epoch": 0.049752426223014456, "grad_norm": 3.56312148323265, "learning_rate": 1.997959460573414e-05, "loss": 0.6544, "step": 628 }, { "epoch": 0.04983164983164983, "grad_norm": 2.8358001569516316, "learning_rate": 1.9979430434195742e-05, "loss": 0.5189, "step": 629 }, { "epoch": 0.049910873440285206, "grad_norm": 3.9870526074281405, "learning_rate": 1.9979265605559868e-05, "loss": 0.5472, "step": 630 }, { "epoch": 0.04999009704892058, "grad_norm": 3.645653150403932, "learning_rate": 1.997910011983737e-05, "loss": 0.5923, "step": 631 }, { "epoch": 0.050069320657555955, "grad_norm": 3.6778139708866933, "learning_rate": 1.997893397703915e-05, "loss": 0.7483, "step": 632 }, { "epoch": 0.05014854426619132, "grad_norm": 3.3697830000040154, "learning_rate": 1.997876717717614e-05, "loss": 0.6071, "step": 633 }, { "epoch": 0.0502277678748267, "grad_norm": 3.9377510963745896, "learning_rate": 1.9978599720259325e-05, "loss": 0.6231, "step": 634 }, { "epoch": 0.05030699148346207, "grad_norm": 3.049836859632232, "learning_rate": 1.9978431606299736e-05, "loss": 0.6054, "step": 635 }, { "epoch": 0.050386215092097446, "grad_norm": 3.4704182926753466, "learning_rate": 1.9978262835308437e-05, "loss": 0.5647, "step": 636 }, { "epoch": 0.05046543870073282, "grad_norm": 3.863271887491197, "learning_rate": 1.997809340729654e-05, "loss": 0.6242, "step": 637 }, { "epoch": 0.050544662309368195, "grad_norm": 3.9731140115433927, "learning_rate": 1.9977923322275206e-05, "loss": 0.7247, "step": 638 }, { "epoch": 0.05062388591800356, "grad_norm": 4.064037520685318, "learning_rate": 1.997775258025563e-05, "loss": 0.6728, "step": 639 }, { "epoch": 0.05070310952663894, "grad_norm": 3.7457814493593484, "learning_rate": 1.997758118124906e-05, "loss": 0.6004, "step": 640 }, { "epoch": 0.05078233313527431, "grad_norm": 3.629777712494507, "learning_rate": 1.997740912526678e-05, "loss": 0.552, "step": 641 }, { "epoch": 0.050861556743909686, "grad_norm": 4.825013274018131, "learning_rate": 1.9977236412320112e-05, "loss": 0.6208, "step": 642 }, { "epoch": 0.05094078035254506, "grad_norm": 3.4541197056587016, "learning_rate": 1.9977063042420438e-05, "loss": 0.6652, "step": 643 }, { "epoch": 0.051020003961180435, "grad_norm": 3.3331748995754293, "learning_rate": 1.9976889015579167e-05, "loss": 0.7036, "step": 644 }, { "epoch": 0.0510992275698158, "grad_norm": 3.5838952992165303, "learning_rate": 1.997671433180776e-05, "loss": 0.5595, "step": 645 }, { "epoch": 0.05117845117845118, "grad_norm": 3.1556606457071186, "learning_rate": 1.997653899111772e-05, "loss": 0.4995, "step": 646 }, { "epoch": 0.05125767478708655, "grad_norm": 3.760593567611662, "learning_rate": 1.9976362993520587e-05, "loss": 0.5686, "step": 647 }, { "epoch": 0.051336898395721926, "grad_norm": 5.766104592551594, "learning_rate": 1.9976186339027958e-05, "loss": 0.6967, "step": 648 }, { "epoch": 0.0514161220043573, "grad_norm": 4.661421527766419, "learning_rate": 1.9976009027651463e-05, "loss": 0.6744, "step": 649 }, { "epoch": 0.05149534561299267, "grad_norm": 3.8041767704286142, "learning_rate": 1.9975831059402774e-05, "loss": 0.6685, "step": 650 }, { "epoch": 0.05157456922162804, "grad_norm": 3.8501502113577852, "learning_rate": 1.9975652434293607e-05, "loss": 0.5533, "step": 651 }, { "epoch": 0.05165379283026342, "grad_norm": 3.1331313213684826, "learning_rate": 1.9975473152335726e-05, "loss": 0.4754, "step": 652 }, { "epoch": 0.05173301643889879, "grad_norm": 3.0091468362632083, "learning_rate": 1.9975293213540942e-05, "loss": 0.4827, "step": 653 }, { "epoch": 0.05181224004753417, "grad_norm": 4.045039447568997, "learning_rate": 1.9975112617921097e-05, "loss": 0.6001, "step": 654 }, { "epoch": 0.05189146365616954, "grad_norm": 4.096840240355661, "learning_rate": 1.997493136548808e-05, "loss": 0.6506, "step": 655 }, { "epoch": 0.05197068726480491, "grad_norm": 3.57659214395994, "learning_rate": 1.9974749456253834e-05, "loss": 0.52, "step": 656 }, { "epoch": 0.05204991087344028, "grad_norm": 3.044935910167429, "learning_rate": 1.9974566890230327e-05, "loss": 0.5726, "step": 657 }, { "epoch": 0.05212913448207566, "grad_norm": 3.152422843320445, "learning_rate": 1.9974383667429585e-05, "loss": 0.4847, "step": 658 }, { "epoch": 0.05220835809071103, "grad_norm": 3.6432092561424922, "learning_rate": 1.9974199787863674e-05, "loss": 0.7887, "step": 659 }, { "epoch": 0.05228758169934641, "grad_norm": 3.2339171936264464, "learning_rate": 1.99740152515447e-05, "loss": 0.5148, "step": 660 }, { "epoch": 0.05236680530798178, "grad_norm": 3.5111120243733254, "learning_rate": 1.9973830058484813e-05, "loss": 0.5545, "step": 661 }, { "epoch": 0.05244602891661715, "grad_norm": 3.4524811312893906, "learning_rate": 1.9973644208696208e-05, "loss": 0.5938, "step": 662 }, { "epoch": 0.052525252525252523, "grad_norm": 3.1500280594662735, "learning_rate": 1.9973457702191123e-05, "loss": 0.5447, "step": 663 }, { "epoch": 0.0526044761338879, "grad_norm": 3.499106629291908, "learning_rate": 1.9973270538981835e-05, "loss": 0.5781, "step": 664 }, { "epoch": 0.05268369974252327, "grad_norm": 3.3935092155293805, "learning_rate": 1.9973082719080673e-05, "loss": 0.6165, "step": 665 }, { "epoch": 0.05276292335115865, "grad_norm": 3.5242328602672406, "learning_rate": 1.9972894242499997e-05, "loss": 0.6048, "step": 666 }, { "epoch": 0.05284214695979402, "grad_norm": 4.19898474198839, "learning_rate": 1.9972705109252227e-05, "loss": 0.4411, "step": 667 }, { "epoch": 0.05292137056842939, "grad_norm": 3.6410434507080462, "learning_rate": 1.997251531934981e-05, "loss": 0.6123, "step": 668 }, { "epoch": 0.053000594177064764, "grad_norm": 3.280657444340508, "learning_rate": 1.997232487280524e-05, "loss": 0.5898, "step": 669 }, { "epoch": 0.05307981778570014, "grad_norm": 4.14228533618234, "learning_rate": 1.9972133769631065e-05, "loss": 0.5315, "step": 670 }, { "epoch": 0.05315904139433551, "grad_norm": 3.887517174916096, "learning_rate": 1.9971942009839862e-05, "loss": 0.5781, "step": 671 }, { "epoch": 0.05323826500297089, "grad_norm": 3.6687004662067664, "learning_rate": 1.997174959344426e-05, "loss": 0.4738, "step": 672 }, { "epoch": 0.05331748861160626, "grad_norm": 3.3939499875207657, "learning_rate": 1.9971556520456928e-05, "loss": 0.6866, "step": 673 }, { "epoch": 0.05339671222024163, "grad_norm": 3.512062688304022, "learning_rate": 1.997136279089058e-05, "loss": 0.4789, "step": 674 }, { "epoch": 0.053475935828877004, "grad_norm": 3.0518120209989705, "learning_rate": 1.9971168404757972e-05, "loss": 0.4802, "step": 675 }, { "epoch": 0.05355515943751238, "grad_norm": 3.8237950433416903, "learning_rate": 1.99709733620719e-05, "loss": 0.5675, "step": 676 }, { "epoch": 0.05363438304614775, "grad_norm": 3.195166886044514, "learning_rate": 1.9970777662845212e-05, "loss": 0.5459, "step": 677 }, { "epoch": 0.05371360665478313, "grad_norm": 3.6631282865603247, "learning_rate": 1.997058130709079e-05, "loss": 0.6171, "step": 678 }, { "epoch": 0.0537928302634185, "grad_norm": 3.538844194225932, "learning_rate": 1.9970384294821565e-05, "loss": 0.5658, "step": 679 }, { "epoch": 0.05387205387205387, "grad_norm": 2.9626158006583854, "learning_rate": 1.9970186626050507e-05, "loss": 0.4446, "step": 680 }, { "epoch": 0.053951277480689244, "grad_norm": 3.2659000156780404, "learning_rate": 1.9969988300790636e-05, "loss": 0.6007, "step": 681 }, { "epoch": 0.05403050108932462, "grad_norm": 3.4968292044822995, "learning_rate": 1.9969789319055007e-05, "loss": 0.5461, "step": 682 }, { "epoch": 0.05410972469795999, "grad_norm": 3.8729078974161215, "learning_rate": 1.996958968085672e-05, "loss": 0.6358, "step": 683 }, { "epoch": 0.05418894830659537, "grad_norm": 3.2081829398888586, "learning_rate": 1.9969389386208927e-05, "loss": 0.534, "step": 684 }, { "epoch": 0.054268171915230735, "grad_norm": 3.3877845870813723, "learning_rate": 1.9969188435124812e-05, "loss": 0.5699, "step": 685 }, { "epoch": 0.05434739552386611, "grad_norm": 3.84551200253368, "learning_rate": 1.9968986827617603e-05, "loss": 0.5622, "step": 686 }, { "epoch": 0.054426619132501484, "grad_norm": 3.4932566370322515, "learning_rate": 1.9968784563700586e-05, "loss": 0.5722, "step": 687 }, { "epoch": 0.05450584274113686, "grad_norm": 3.2637182722557143, "learning_rate": 1.9968581643387065e-05, "loss": 0.5292, "step": 688 }, { "epoch": 0.054585066349772234, "grad_norm": 3.5018617824780116, "learning_rate": 1.9968378066690414e-05, "loss": 0.5713, "step": 689 }, { "epoch": 0.05466428995840761, "grad_norm": 3.2199561612336227, "learning_rate": 1.996817383362403e-05, "loss": 0.5341, "step": 690 }, { "epoch": 0.054743513567042976, "grad_norm": 3.0004900966966797, "learning_rate": 1.996796894420136e-05, "loss": 0.6363, "step": 691 }, { "epoch": 0.05482273717567835, "grad_norm": 3.17012251363694, "learning_rate": 1.9967763398435904e-05, "loss": 0.5366, "step": 692 }, { "epoch": 0.054901960784313725, "grad_norm": 2.8231583747276043, "learning_rate": 1.9967557196341184e-05, "loss": 0.4645, "step": 693 }, { "epoch": 0.0549811843929491, "grad_norm": 3.1533863075108672, "learning_rate": 1.996735033793079e-05, "loss": 0.5379, "step": 694 }, { "epoch": 0.055060408001584474, "grad_norm": 3.3719384236897993, "learning_rate": 1.996714282321833e-05, "loss": 0.4714, "step": 695 }, { "epoch": 0.05513963161021985, "grad_norm": 3.050102698974, "learning_rate": 1.9966934652217477e-05, "loss": 0.5152, "step": 696 }, { "epoch": 0.055218855218855216, "grad_norm": 3.1816568381011243, "learning_rate": 1.9966725824941933e-05, "loss": 0.4703, "step": 697 }, { "epoch": 0.05529807882749059, "grad_norm": 3.725553056239348, "learning_rate": 1.9966516341405452e-05, "loss": 0.6012, "step": 698 }, { "epoch": 0.055377302436125965, "grad_norm": 3.340483571981605, "learning_rate": 1.9966306201621826e-05, "loss": 0.7178, "step": 699 }, { "epoch": 0.05545652604476134, "grad_norm": 3.672446409460007, "learning_rate": 1.996609540560489e-05, "loss": 0.629, "step": 700 }, { "epoch": 0.055535749653396714, "grad_norm": 3.2971947821087957, "learning_rate": 1.9965883953368527e-05, "loss": 0.5387, "step": 701 }, { "epoch": 0.05561497326203209, "grad_norm": 3.7545851473692924, "learning_rate": 1.9965671844926656e-05, "loss": 0.5285, "step": 702 }, { "epoch": 0.055694196870667456, "grad_norm": 3.7044687887216705, "learning_rate": 1.9965459080293247e-05, "loss": 0.6099, "step": 703 }, { "epoch": 0.05577342047930283, "grad_norm": 2.8440632568968223, "learning_rate": 1.9965245659482312e-05, "loss": 0.4754, "step": 704 }, { "epoch": 0.055852644087938205, "grad_norm": 3.3518752387605097, "learning_rate": 1.9965031582507896e-05, "loss": 0.5202, "step": 705 }, { "epoch": 0.05593186769657358, "grad_norm": 3.322222655996487, "learning_rate": 1.99648168493841e-05, "loss": 0.5195, "step": 706 }, { "epoch": 0.056011091305208954, "grad_norm": 3.604406890842087, "learning_rate": 1.996460146012506e-05, "loss": 0.7097, "step": 707 }, { "epoch": 0.05609031491384433, "grad_norm": 3.0958758769349424, "learning_rate": 1.996438541474496e-05, "loss": 0.6299, "step": 708 }, { "epoch": 0.056169538522479696, "grad_norm": 4.564409778153522, "learning_rate": 1.996416871325803e-05, "loss": 0.6776, "step": 709 }, { "epoch": 0.05624876213111507, "grad_norm": 3.217151554432102, "learning_rate": 1.9963951355678533e-05, "loss": 0.4426, "step": 710 }, { "epoch": 0.056327985739750445, "grad_norm": 3.893164198008113, "learning_rate": 1.996373334202078e-05, "loss": 0.7279, "step": 711 }, { "epoch": 0.05640720934838582, "grad_norm": 3.483196537256004, "learning_rate": 1.9963514672299135e-05, "loss": 0.5573, "step": 712 }, { "epoch": 0.056486432957021195, "grad_norm": 3.2607345588255865, "learning_rate": 1.9963295346527984e-05, "loss": 0.5353, "step": 713 }, { "epoch": 0.05656565656565657, "grad_norm": 3.9139375896392177, "learning_rate": 1.996307536472178e-05, "loss": 0.5619, "step": 714 }, { "epoch": 0.05664488017429194, "grad_norm": 3.442690740042697, "learning_rate": 1.9962854726894997e-05, "loss": 0.5902, "step": 715 }, { "epoch": 0.05672410378292731, "grad_norm": 3.6606012908133336, "learning_rate": 1.9962633433062174e-05, "loss": 0.5121, "step": 716 }, { "epoch": 0.056803327391562686, "grad_norm": 3.3497865631212824, "learning_rate": 1.996241148323787e-05, "loss": 0.4978, "step": 717 }, { "epoch": 0.05688255100019806, "grad_norm": 3.4827552644308226, "learning_rate": 1.996218887743671e-05, "loss": 0.5625, "step": 718 }, { "epoch": 0.056961774608833435, "grad_norm": 3.2127906527141388, "learning_rate": 1.996196561567335e-05, "loss": 0.5686, "step": 719 }, { "epoch": 0.0570409982174688, "grad_norm": 3.2648095261708776, "learning_rate": 1.996174169796248e-05, "loss": 0.5225, "step": 720 }, { "epoch": 0.05712022182610418, "grad_norm": 2.9951694124526034, "learning_rate": 1.996151712431886e-05, "loss": 0.4255, "step": 721 }, { "epoch": 0.05719944543473955, "grad_norm": 3.442800049571805, "learning_rate": 1.9961291894757267e-05, "loss": 0.5549, "step": 722 }, { "epoch": 0.057278669043374926, "grad_norm": 3.2800047603653195, "learning_rate": 1.9961066009292532e-05, "loss": 0.5902, "step": 723 }, { "epoch": 0.0573578926520103, "grad_norm": 3.0687037289043166, "learning_rate": 1.9960839467939534e-05, "loss": 0.4605, "step": 724 }, { "epoch": 0.057437116260645675, "grad_norm": 3.2619902936278753, "learning_rate": 1.996061227071318e-05, "loss": 0.599, "step": 725 }, { "epoch": 0.05751633986928104, "grad_norm": 2.771994014100968, "learning_rate": 1.996038441762844e-05, "loss": 0.4764, "step": 726 }, { "epoch": 0.05759556347791642, "grad_norm": 3.5187912842147724, "learning_rate": 1.9960155908700306e-05, "loss": 0.3719, "step": 727 }, { "epoch": 0.05767478708655179, "grad_norm": 3.808869301374037, "learning_rate": 1.9959926743943836e-05, "loss": 0.6481, "step": 728 }, { "epoch": 0.057754010695187166, "grad_norm": 3.1915056219699816, "learning_rate": 1.9959696923374113e-05, "loss": 0.4986, "step": 729 }, { "epoch": 0.05783323430382254, "grad_norm": 3.161022277767463, "learning_rate": 1.995946644700627e-05, "loss": 0.3726, "step": 730 }, { "epoch": 0.057912457912457915, "grad_norm": 3.583963080905145, "learning_rate": 1.9959235314855485e-05, "loss": 0.5534, "step": 731 }, { "epoch": 0.05799168152109328, "grad_norm": 4.065214801449088, "learning_rate": 1.9959003526936972e-05, "loss": 0.6399, "step": 732 }, { "epoch": 0.05807090512972866, "grad_norm": 3.6006608035634433, "learning_rate": 1.9958771083266e-05, "loss": 0.4556, "step": 733 }, { "epoch": 0.05815012873836403, "grad_norm": 3.809853857364322, "learning_rate": 1.995853798385787e-05, "loss": 0.6825, "step": 734 }, { "epoch": 0.058229352346999406, "grad_norm": 3.2408338330169797, "learning_rate": 1.9958304228727928e-05, "loss": 0.4771, "step": 735 }, { "epoch": 0.05830857595563478, "grad_norm": 3.1505271905137024, "learning_rate": 1.995806981789157e-05, "loss": 0.4676, "step": 736 }, { "epoch": 0.058387799564270156, "grad_norm": 3.7435750688790135, "learning_rate": 1.9957834751364232e-05, "loss": 0.5649, "step": 737 }, { "epoch": 0.05846702317290552, "grad_norm": 3.468380145982312, "learning_rate": 1.995759902916139e-05, "loss": 0.5544, "step": 738 }, { "epoch": 0.0585462467815409, "grad_norm": 3.1078384827281305, "learning_rate": 1.995736265129856e-05, "loss": 0.4375, "step": 739 }, { "epoch": 0.05862547039017627, "grad_norm": 3.299000059642858, "learning_rate": 1.9957125617791314e-05, "loss": 0.5883, "step": 740 }, { "epoch": 0.05870469399881165, "grad_norm": 3.1178855470679663, "learning_rate": 1.995688792865526e-05, "loss": 0.4847, "step": 741 }, { "epoch": 0.05878391760744702, "grad_norm": 2.7074170027979565, "learning_rate": 1.995664958390604e-05, "loss": 0.6599, "step": 742 }, { "epoch": 0.058863141216082396, "grad_norm": 3.3223520370253254, "learning_rate": 1.995641058355936e-05, "loss": 0.4712, "step": 743 }, { "epoch": 0.05894236482471776, "grad_norm": 3.1110034863320193, "learning_rate": 1.9956170927630946e-05, "loss": 0.428, "step": 744 }, { "epoch": 0.05902158843335314, "grad_norm": 3.102205220331527, "learning_rate": 1.9955930616136582e-05, "loss": 0.5479, "step": 745 }, { "epoch": 0.05910081204198851, "grad_norm": 3.677342014539697, "learning_rate": 1.995568964909209e-05, "loss": 0.5976, "step": 746 }, { "epoch": 0.05918003565062389, "grad_norm": 3.5375691834245573, "learning_rate": 1.995544802651334e-05, "loss": 0.557, "step": 747 }, { "epoch": 0.05925925925925926, "grad_norm": 2.985052620237613, "learning_rate": 1.995520574841624e-05, "loss": 0.5389, "step": 748 }, { "epoch": 0.059338482867894636, "grad_norm": 3.258792656618429, "learning_rate": 1.9954962814816744e-05, "loss": 0.595, "step": 749 }, { "epoch": 0.059417706476530004, "grad_norm": 3.4019492829900817, "learning_rate": 1.9954719225730847e-05, "loss": 0.4949, "step": 750 }, { "epoch": 0.05949693008516538, "grad_norm": 3.3758435744471678, "learning_rate": 1.995447498117459e-05, "loss": 0.4804, "step": 751 }, { "epoch": 0.05957615369380075, "grad_norm": 3.248604476746306, "learning_rate": 1.9954230081164047e-05, "loss": 0.5321, "step": 752 }, { "epoch": 0.05965537730243613, "grad_norm": 3.603087466697579, "learning_rate": 1.9953984525715354e-05, "loss": 0.6247, "step": 753 }, { "epoch": 0.0597346009110715, "grad_norm": 3.1764205510708643, "learning_rate": 1.9953738314844676e-05, "loss": 0.5138, "step": 754 }, { "epoch": 0.05981382451970687, "grad_norm": 3.785327413957289, "learning_rate": 1.9953491448568222e-05, "loss": 0.5865, "step": 755 }, { "epoch": 0.059893048128342244, "grad_norm": 3.0494167171533517, "learning_rate": 1.9953243926902254e-05, "loss": 0.5465, "step": 756 }, { "epoch": 0.05997227173697762, "grad_norm": 3.758585541255481, "learning_rate": 1.995299574986306e-05, "loss": 0.6202, "step": 757 }, { "epoch": 0.06005149534561299, "grad_norm": 2.8546698404153905, "learning_rate": 1.9952746917466988e-05, "loss": 0.5589, "step": 758 }, { "epoch": 0.06013071895424837, "grad_norm": 3.381098485240035, "learning_rate": 1.9952497429730423e-05, "loss": 0.5346, "step": 759 }, { "epoch": 0.06020994256288374, "grad_norm": 4.071612969289404, "learning_rate": 1.9952247286669787e-05, "loss": 0.5611, "step": 760 }, { "epoch": 0.06028916617151911, "grad_norm": 3.5487326277659577, "learning_rate": 1.995199648830156e-05, "loss": 0.5495, "step": 761 }, { "epoch": 0.060368389780154484, "grad_norm": 3.929517760998896, "learning_rate": 1.9951745034642245e-05, "loss": 0.582, "step": 762 }, { "epoch": 0.06044761338878986, "grad_norm": 3.572933592182264, "learning_rate": 1.995149292570841e-05, "loss": 0.697, "step": 763 }, { "epoch": 0.06052683699742523, "grad_norm": 3.3905931641718583, "learning_rate": 1.9951240161516643e-05, "loss": 0.4269, "step": 764 }, { "epoch": 0.06060606060606061, "grad_norm": 4.048560499346723, "learning_rate": 1.9950986742083594e-05, "loss": 0.7241, "step": 765 }, { "epoch": 0.06068528421469598, "grad_norm": 3.3746790643609756, "learning_rate": 1.9950732667425953e-05, "loss": 0.6714, "step": 766 }, { "epoch": 0.06076450782333135, "grad_norm": 3.0467545961335816, "learning_rate": 1.9950477937560442e-05, "loss": 0.5577, "step": 767 }, { "epoch": 0.060843731431966724, "grad_norm": 5.485705362789386, "learning_rate": 1.995022255250384e-05, "loss": 0.5648, "step": 768 }, { "epoch": 0.0609229550406021, "grad_norm": 2.9539704464487593, "learning_rate": 1.9949966512272964e-05, "loss": 0.569, "step": 769 }, { "epoch": 0.06100217864923747, "grad_norm": 4.318479392563151, "learning_rate": 1.994970981688466e-05, "loss": 0.6147, "step": 770 }, { "epoch": 0.06108140225787285, "grad_norm": 3.5923912297788547, "learning_rate": 1.9949452466355847e-05, "loss": 0.5515, "step": 771 }, { "epoch": 0.06116062586650822, "grad_norm": 3.303299276932963, "learning_rate": 1.9949194460703462e-05, "loss": 0.5222, "step": 772 }, { "epoch": 0.06123984947514359, "grad_norm": 3.088897216647192, "learning_rate": 1.9948935799944492e-05, "loss": 0.5554, "step": 773 }, { "epoch": 0.061319073083778965, "grad_norm": 3.2686023826405126, "learning_rate": 1.994867648409597e-05, "loss": 0.5565, "step": 774 }, { "epoch": 0.06139829669241434, "grad_norm": 3.2418057508706632, "learning_rate": 1.9948416513174976e-05, "loss": 0.6196, "step": 775 }, { "epoch": 0.061477520301049714, "grad_norm": 3.2647364379838946, "learning_rate": 1.994815588719862e-05, "loss": 0.5637, "step": 776 }, { "epoch": 0.06155674390968509, "grad_norm": 3.1972071587145496, "learning_rate": 1.9947894606184065e-05, "loss": 0.538, "step": 777 }, { "epoch": 0.06163596751832046, "grad_norm": 5.0454017184462705, "learning_rate": 1.9947632670148517e-05, "loss": 0.6505, "step": 778 }, { "epoch": 0.06171519112695583, "grad_norm": 4.580361567919214, "learning_rate": 1.9947370079109224e-05, "loss": 0.6131, "step": 779 }, { "epoch": 0.061794414735591205, "grad_norm": 3.5160028389933253, "learning_rate": 1.9947106833083474e-05, "loss": 0.6069, "step": 780 }, { "epoch": 0.06187363834422658, "grad_norm": 4.3939801486340535, "learning_rate": 1.9946842932088603e-05, "loss": 0.6809, "step": 781 }, { "epoch": 0.061952861952861954, "grad_norm": 3.5523256337609013, "learning_rate": 1.9946578376141985e-05, "loss": 0.5473, "step": 782 }, { "epoch": 0.06203208556149733, "grad_norm": 3.084117043302228, "learning_rate": 1.9946313165261042e-05, "loss": 0.4179, "step": 783 }, { "epoch": 0.062111309170132696, "grad_norm": 3.236057231283532, "learning_rate": 1.9946047299463234e-05, "loss": 0.392, "step": 784 }, { "epoch": 0.06219053277876807, "grad_norm": 4.1043521682677, "learning_rate": 1.994578077876607e-05, "loss": 0.546, "step": 785 }, { "epoch": 0.062269756387403445, "grad_norm": 2.875456920695374, "learning_rate": 1.9945513603187096e-05, "loss": 0.5148, "step": 786 }, { "epoch": 0.06234897999603882, "grad_norm": 3.3628937602042517, "learning_rate": 1.994524577274391e-05, "loss": 0.5202, "step": 787 }, { "epoch": 0.062428203604674194, "grad_norm": 3.2304470588568073, "learning_rate": 1.994497728745414e-05, "loss": 0.6103, "step": 788 }, { "epoch": 0.06250742721330957, "grad_norm": 3.6065857419719354, "learning_rate": 1.9944708147335466e-05, "loss": 0.5804, "step": 789 }, { "epoch": 0.06258665082194494, "grad_norm": 4.08051276743454, "learning_rate": 1.9944438352405614e-05, "loss": 0.6671, "step": 790 }, { "epoch": 0.06266587443058032, "grad_norm": 4.008142336960553, "learning_rate": 1.9944167902682345e-05, "loss": 0.5497, "step": 791 }, { "epoch": 0.06274509803921569, "grad_norm": 3.1747393804679223, "learning_rate": 1.994389679818347e-05, "loss": 0.4977, "step": 792 }, { "epoch": 0.06282432164785105, "grad_norm": 3.331522779491226, "learning_rate": 1.9943625038926834e-05, "loss": 0.6059, "step": 793 }, { "epoch": 0.06290354525648643, "grad_norm": 2.6405997147387015, "learning_rate": 1.9943352624930336e-05, "loss": 0.4948, "step": 794 }, { "epoch": 0.0629827688651218, "grad_norm": 2.873587286137695, "learning_rate": 1.9943079556211915e-05, "loss": 0.5497, "step": 795 }, { "epoch": 0.06306199247375718, "grad_norm": 3.7373979810372138, "learning_rate": 1.9942805832789548e-05, "loss": 0.4954, "step": 796 }, { "epoch": 0.06314121608239255, "grad_norm": 4.052674250578069, "learning_rate": 1.9942531454681254e-05, "loss": 0.5153, "step": 797 }, { "epoch": 0.06322043969102793, "grad_norm": 3.265300570351352, "learning_rate": 1.994225642190511e-05, "loss": 0.6229, "step": 798 }, { "epoch": 0.0632996632996633, "grad_norm": 3.5495566354263843, "learning_rate": 1.9941980734479214e-05, "loss": 0.643, "step": 799 }, { "epoch": 0.06337888690829867, "grad_norm": 3.8547303759832974, "learning_rate": 1.994170439242173e-05, "loss": 0.4973, "step": 800 }, { "epoch": 0.06345811051693405, "grad_norm": 4.494308152772487, "learning_rate": 1.9941427395750844e-05, "loss": 0.6769, "step": 801 }, { "epoch": 0.06353733412556942, "grad_norm": 3.1210903642974483, "learning_rate": 1.99411497444848e-05, "loss": 0.4375, "step": 802 }, { "epoch": 0.0636165577342048, "grad_norm": 2.9562743507116824, "learning_rate": 1.994087143864188e-05, "loss": 0.4864, "step": 803 }, { "epoch": 0.06369578134284017, "grad_norm": 3.0621586910450884, "learning_rate": 1.994059247824041e-05, "loss": 0.5716, "step": 804 }, { "epoch": 0.06377500495147553, "grad_norm": 3.174053051013673, "learning_rate": 1.994031286329875e-05, "loss": 0.4088, "step": 805 }, { "epoch": 0.06385422856011091, "grad_norm": 2.3628598277423674, "learning_rate": 1.9940032593835324e-05, "loss": 0.3996, "step": 806 }, { "epoch": 0.06393345216874628, "grad_norm": 3.5657883786748243, "learning_rate": 1.993975166986858e-05, "loss": 0.5882, "step": 807 }, { "epoch": 0.06401267577738166, "grad_norm": 3.106973540018159, "learning_rate": 1.9939470091417012e-05, "loss": 0.5068, "step": 808 }, { "epoch": 0.06409189938601703, "grad_norm": 2.8094472265240698, "learning_rate": 1.9939187858499166e-05, "loss": 0.6059, "step": 809 }, { "epoch": 0.06417112299465241, "grad_norm": 3.0464270934163484, "learning_rate": 1.9938904971133626e-05, "loss": 0.5343, "step": 810 }, { "epoch": 0.06425034660328778, "grad_norm": 3.318874701089729, "learning_rate": 1.9938621429339012e-05, "loss": 0.5146, "step": 811 }, { "epoch": 0.06432957021192315, "grad_norm": 3.2667968994129364, "learning_rate": 1.9938337233134e-05, "loss": 0.4356, "step": 812 }, { "epoch": 0.06440879382055853, "grad_norm": 3.3281028621426345, "learning_rate": 1.9938052382537304e-05, "loss": 0.4901, "step": 813 }, { "epoch": 0.0644880174291939, "grad_norm": 3.28552622467835, "learning_rate": 1.9937766877567676e-05, "loss": 0.5669, "step": 814 }, { "epoch": 0.06456724103782928, "grad_norm": 3.6196509856752423, "learning_rate": 1.9937480718243914e-05, "loss": 0.5781, "step": 815 }, { "epoch": 0.06464646464646465, "grad_norm": 3.602465658372545, "learning_rate": 1.9937193904584865e-05, "loss": 0.6029, "step": 816 }, { "epoch": 0.06472568825510001, "grad_norm": 3.0275404943149806, "learning_rate": 1.9936906436609413e-05, "loss": 0.5235, "step": 817 }, { "epoch": 0.0648049118637354, "grad_norm": 3.6567518441982276, "learning_rate": 1.9936618314336486e-05, "loss": 0.5523, "step": 818 }, { "epoch": 0.06488413547237076, "grad_norm": 3.600521347136226, "learning_rate": 1.9936329537785054e-05, "loss": 0.434, "step": 819 }, { "epoch": 0.06496335908100614, "grad_norm": 2.814730311151901, "learning_rate": 1.9936040106974132e-05, "loss": 0.5441, "step": 820 }, { "epoch": 0.06504258268964151, "grad_norm": 3.0705772882904383, "learning_rate": 1.9935750021922778e-05, "loss": 0.5583, "step": 821 }, { "epoch": 0.06512180629827688, "grad_norm": 3.153976794235165, "learning_rate": 1.993545928265009e-05, "loss": 0.5385, "step": 822 }, { "epoch": 0.06520102990691226, "grad_norm": 3.2671450413470784, "learning_rate": 1.993516788917522e-05, "loss": 0.5837, "step": 823 }, { "epoch": 0.06528025351554763, "grad_norm": 3.0541618783472986, "learning_rate": 1.9934875841517346e-05, "loss": 0.477, "step": 824 }, { "epoch": 0.06535947712418301, "grad_norm": 3.0794903304719106, "learning_rate": 1.9934583139695703e-05, "loss": 0.5423, "step": 825 }, { "epoch": 0.06543870073281838, "grad_norm": 3.3781063720373345, "learning_rate": 1.9934289783729564e-05, "loss": 0.5394, "step": 826 }, { "epoch": 0.06551792434145376, "grad_norm": 3.226646288433327, "learning_rate": 1.993399577363824e-05, "loss": 0.4867, "step": 827 }, { "epoch": 0.06559714795008913, "grad_norm": 3.8081904719350765, "learning_rate": 1.9933701109441093e-05, "loss": 0.5372, "step": 828 }, { "epoch": 0.0656763715587245, "grad_norm": 3.1738409548591284, "learning_rate": 1.993340579115753e-05, "loss": 0.5519, "step": 829 }, { "epoch": 0.06575559516735988, "grad_norm": 3.1668036877365227, "learning_rate": 1.993310981880699e-05, "loss": 0.6013, "step": 830 }, { "epoch": 0.06583481877599524, "grad_norm": 3.249838730162251, "learning_rate": 1.9932813192408964e-05, "loss": 0.5012, "step": 831 }, { "epoch": 0.06591404238463063, "grad_norm": 3.1605404828049664, "learning_rate": 1.9932515911982983e-05, "loss": 0.5964, "step": 832 }, { "epoch": 0.06599326599326599, "grad_norm": 3.3002585332126313, "learning_rate": 1.993221797754862e-05, "loss": 0.6101, "step": 833 }, { "epoch": 0.06607248960190136, "grad_norm": 3.544986135899622, "learning_rate": 1.9931919389125496e-05, "loss": 0.4522, "step": 834 }, { "epoch": 0.06615171321053674, "grad_norm": 3.1080756194695107, "learning_rate": 1.9931620146733264e-05, "loss": 0.555, "step": 835 }, { "epoch": 0.06623093681917211, "grad_norm": 2.9532720023537, "learning_rate": 1.993132025039164e-05, "loss": 0.5338, "step": 836 }, { "epoch": 0.06631016042780749, "grad_norm": 2.9387886959896665, "learning_rate": 1.9931019700120363e-05, "loss": 0.4974, "step": 837 }, { "epoch": 0.06638938403644286, "grad_norm": 2.8833467108706756, "learning_rate": 1.9930718495939222e-05, "loss": 0.4894, "step": 838 }, { "epoch": 0.06646860764507824, "grad_norm": 2.982357852199496, "learning_rate": 1.9930416637868053e-05, "loss": 0.4433, "step": 839 }, { "epoch": 0.06654783125371361, "grad_norm": 3.517244070083994, "learning_rate": 1.993011412592673e-05, "loss": 0.5498, "step": 840 }, { "epoch": 0.06662705486234898, "grad_norm": 3.6511980208377843, "learning_rate": 1.992981096013517e-05, "loss": 0.5865, "step": 841 }, { "epoch": 0.06670627847098436, "grad_norm": 3.0976463342317753, "learning_rate": 1.9929507140513342e-05, "loss": 0.6326, "step": 842 }, { "epoch": 0.06678550207961972, "grad_norm": 3.759032585907262, "learning_rate": 1.9929202667081246e-05, "loss": 0.4802, "step": 843 }, { "epoch": 0.0668647256882551, "grad_norm": 3.152741725864914, "learning_rate": 1.9928897539858926e-05, "loss": 0.4866, "step": 844 }, { "epoch": 0.06694394929689047, "grad_norm": 3.006762251009012, "learning_rate": 1.992859175886648e-05, "loss": 0.5257, "step": 845 }, { "epoch": 0.06702317290552584, "grad_norm": 3.1440988527317493, "learning_rate": 1.9928285324124038e-05, "loss": 0.4722, "step": 846 }, { "epoch": 0.06710239651416122, "grad_norm": 3.9346948563166375, "learning_rate": 1.9927978235651782e-05, "loss": 0.5036, "step": 847 }, { "epoch": 0.06718162012279659, "grad_norm": 3.261745891095376, "learning_rate": 1.992767049346993e-05, "loss": 0.5603, "step": 848 }, { "epoch": 0.06726084373143197, "grad_norm": 3.4979426665820537, "learning_rate": 1.9927362097598746e-05, "loss": 0.5236, "step": 849 }, { "epoch": 0.06734006734006734, "grad_norm": 3.701323033633284, "learning_rate": 1.9927053048058534e-05, "loss": 0.53, "step": 850 }, { "epoch": 0.06741929094870272, "grad_norm": 4.27900706688922, "learning_rate": 1.9926743344869645e-05, "loss": 0.5007, "step": 851 }, { "epoch": 0.06749851455733809, "grad_norm": 3.2862540823863315, "learning_rate": 1.992643298805247e-05, "loss": 0.55, "step": 852 }, { "epoch": 0.06757773816597346, "grad_norm": 4.161286276262577, "learning_rate": 1.9926121977627447e-05, "loss": 0.5335, "step": 853 }, { "epoch": 0.06765696177460884, "grad_norm": 3.3258757718395535, "learning_rate": 1.9925810313615052e-05, "loss": 0.4599, "step": 854 }, { "epoch": 0.0677361853832442, "grad_norm": 2.9489275773156565, "learning_rate": 1.9925497996035807e-05, "loss": 0.5385, "step": 855 }, { "epoch": 0.06781540899187959, "grad_norm": 3.016060849369381, "learning_rate": 1.992518502491028e-05, "loss": 0.5752, "step": 856 }, { "epoch": 0.06789463260051495, "grad_norm": 3.281559578229815, "learning_rate": 1.9924871400259074e-05, "loss": 0.5521, "step": 857 }, { "epoch": 0.06797385620915032, "grad_norm": 3.3400649850747537, "learning_rate": 1.9924557122102843e-05, "loss": 0.6003, "step": 858 }, { "epoch": 0.0680530798177857, "grad_norm": 3.0509399072498304, "learning_rate": 1.9924242190462276e-05, "loss": 0.6108, "step": 859 }, { "epoch": 0.06813230342642107, "grad_norm": 3.789874651458056, "learning_rate": 1.992392660535812e-05, "loss": 0.7623, "step": 860 }, { "epoch": 0.06821152703505645, "grad_norm": 3.2651667785669245, "learning_rate": 1.9923610366811142e-05, "loss": 0.695, "step": 861 }, { "epoch": 0.06829075064369182, "grad_norm": 3.4776044999931193, "learning_rate": 1.9923293474842175e-05, "loss": 0.5909, "step": 862 }, { "epoch": 0.06836997425232719, "grad_norm": 3.611249398263171, "learning_rate": 1.9922975929472076e-05, "loss": 0.5961, "step": 863 }, { "epoch": 0.06844919786096257, "grad_norm": 3.506222268871883, "learning_rate": 1.9922657730721758e-05, "loss": 0.5333, "step": 864 }, { "epoch": 0.06852842146959794, "grad_norm": 3.19384462797647, "learning_rate": 1.9922338878612177e-05, "loss": 0.6063, "step": 865 }, { "epoch": 0.06860764507823332, "grad_norm": 3.1499780815233187, "learning_rate": 1.9922019373164324e-05, "loss": 0.4373, "step": 866 }, { "epoch": 0.06868686868686869, "grad_norm": 3.220586048290242, "learning_rate": 1.9921699214399238e-05, "loss": 0.5253, "step": 867 }, { "epoch": 0.06876609229550407, "grad_norm": 3.1435798789486387, "learning_rate": 1.9921378402337996e-05, "loss": 0.5183, "step": 868 }, { "epoch": 0.06884531590413943, "grad_norm": 3.208414684842551, "learning_rate": 1.9921056937001725e-05, "loss": 0.5066, "step": 869 }, { "epoch": 0.0689245395127748, "grad_norm": 3.2094310440452922, "learning_rate": 1.9920734818411592e-05, "loss": 0.4729, "step": 870 }, { "epoch": 0.06900376312141018, "grad_norm": 3.331217135565873, "learning_rate": 1.9920412046588807e-05, "loss": 0.5208, "step": 871 }, { "epoch": 0.06908298673004555, "grad_norm": 2.769705825482414, "learning_rate": 1.992008862155462e-05, "loss": 0.5024, "step": 872 }, { "epoch": 0.06916221033868093, "grad_norm": 2.9648064214569754, "learning_rate": 1.9919764543330334e-05, "loss": 0.4394, "step": 873 }, { "epoch": 0.0692414339473163, "grad_norm": 3.016760835507678, "learning_rate": 1.9919439811937283e-05, "loss": 0.5889, "step": 874 }, { "epoch": 0.06932065755595167, "grad_norm": 4.31693011635622, "learning_rate": 1.991911442739685e-05, "loss": 0.5902, "step": 875 }, { "epoch": 0.06939988116458705, "grad_norm": 3.3614015321940096, "learning_rate": 1.9918788389730457e-05, "loss": 0.4873, "step": 876 }, { "epoch": 0.06947910477322242, "grad_norm": 3.405796265129666, "learning_rate": 1.9918461698959576e-05, "loss": 0.5877, "step": 877 }, { "epoch": 0.0695583283818578, "grad_norm": 2.7882997771111797, "learning_rate": 1.9918134355105717e-05, "loss": 0.4429, "step": 878 }, { "epoch": 0.06963755199049317, "grad_norm": 3.405965968471252, "learning_rate": 1.9917806358190434e-05, "loss": 0.4426, "step": 879 }, { "epoch": 0.06971677559912855, "grad_norm": 2.8137216742036624, "learning_rate": 1.9917477708235324e-05, "loss": 0.5395, "step": 880 }, { "epoch": 0.06979599920776391, "grad_norm": 3.59334653364238, "learning_rate": 1.9917148405262027e-05, "loss": 0.6111, "step": 881 }, { "epoch": 0.06987522281639928, "grad_norm": 3.3384551086839624, "learning_rate": 1.9916818449292223e-05, "loss": 0.5071, "step": 882 }, { "epoch": 0.06995444642503466, "grad_norm": 2.9900469898376936, "learning_rate": 1.9916487840347644e-05, "loss": 0.5367, "step": 883 }, { "epoch": 0.07003367003367003, "grad_norm": 3.2517739697263717, "learning_rate": 1.9916156578450052e-05, "loss": 0.5962, "step": 884 }, { "epoch": 0.07011289364230541, "grad_norm": 3.3366988670980895, "learning_rate": 1.9915824663621267e-05, "loss": 0.5497, "step": 885 }, { "epoch": 0.07019211725094078, "grad_norm": 2.9955746621282087, "learning_rate": 1.991549209588314e-05, "loss": 0.3867, "step": 886 }, { "epoch": 0.07027134085957615, "grad_norm": 2.896430710793849, "learning_rate": 1.9915158875257566e-05, "loss": 0.4654, "step": 887 }, { "epoch": 0.07035056446821153, "grad_norm": 3.3018953564276354, "learning_rate": 1.991482500176649e-05, "loss": 0.4525, "step": 888 }, { "epoch": 0.0704297880768469, "grad_norm": 3.821556077636759, "learning_rate": 1.9914490475431892e-05, "loss": 0.5595, "step": 889 }, { "epoch": 0.07050901168548228, "grad_norm": 3.1537818655838143, "learning_rate": 1.9914155296275804e-05, "loss": 0.4943, "step": 890 }, { "epoch": 0.07058823529411765, "grad_norm": 3.8677166672324432, "learning_rate": 1.9913819464320295e-05, "loss": 0.424, "step": 891 }, { "epoch": 0.07066745890275301, "grad_norm": 3.2623026992414483, "learning_rate": 1.9913482979587473e-05, "loss": 0.4779, "step": 892 }, { "epoch": 0.0707466825113884, "grad_norm": 3.8429588120053153, "learning_rate": 1.9913145842099503e-05, "loss": 0.5283, "step": 893 }, { "epoch": 0.07082590612002376, "grad_norm": 3.1054141220693596, "learning_rate": 1.9912808051878575e-05, "loss": 0.4845, "step": 894 }, { "epoch": 0.07090512972865914, "grad_norm": 3.2737740431901514, "learning_rate": 1.9912469608946932e-05, "loss": 0.4593, "step": 895 }, { "epoch": 0.07098435333729451, "grad_norm": 2.8873162494726703, "learning_rate": 1.9912130513326863e-05, "loss": 0.5449, "step": 896 }, { "epoch": 0.0710635769459299, "grad_norm": 3.5032690698395452, "learning_rate": 1.9911790765040697e-05, "loss": 0.6493, "step": 897 }, { "epoch": 0.07114280055456526, "grad_norm": 3.232292158847636, "learning_rate": 1.9911450364110798e-05, "loss": 0.5729, "step": 898 }, { "epoch": 0.07122202416320063, "grad_norm": 2.902377848042208, "learning_rate": 1.9911109310559583e-05, "loss": 0.4728, "step": 899 }, { "epoch": 0.07130124777183601, "grad_norm": 2.743137754102563, "learning_rate": 1.991076760440951e-05, "loss": 0.54, "step": 900 }, { "epoch": 0.07138047138047138, "grad_norm": 2.6029961026870536, "learning_rate": 1.991042524568308e-05, "loss": 0.5454, "step": 901 }, { "epoch": 0.07145969498910676, "grad_norm": 2.9678516620106565, "learning_rate": 1.991008223440283e-05, "loss": 0.533, "step": 902 }, { "epoch": 0.07153891859774213, "grad_norm": 3.3123247173520003, "learning_rate": 1.9909738570591352e-05, "loss": 0.4731, "step": 903 }, { "epoch": 0.0716181422063775, "grad_norm": 2.7849369854194173, "learning_rate": 1.990939425427127e-05, "loss": 0.5149, "step": 904 }, { "epoch": 0.07169736581501288, "grad_norm": 3.4621544705170715, "learning_rate": 1.9909049285465258e-05, "loss": 0.58, "step": 905 }, { "epoch": 0.07177658942364824, "grad_norm": 2.8339815850791545, "learning_rate": 1.990870366419603e-05, "loss": 0.5151, "step": 906 }, { "epoch": 0.07185581303228362, "grad_norm": 2.779417652718206, "learning_rate": 1.9908357390486342e-05, "loss": 0.5366, "step": 907 }, { "epoch": 0.07193503664091899, "grad_norm": 3.3778322101015066, "learning_rate": 1.9908010464358997e-05, "loss": 0.4649, "step": 908 }, { "epoch": 0.07201426024955437, "grad_norm": 3.7126540608521412, "learning_rate": 1.9907662885836836e-05, "loss": 0.6046, "step": 909 }, { "epoch": 0.07209348385818974, "grad_norm": 3.7301662647735974, "learning_rate": 1.9907314654942748e-05, "loss": 0.4478, "step": 910 }, { "epoch": 0.07217270746682511, "grad_norm": 3.505793056214211, "learning_rate": 1.990696577169966e-05, "loss": 0.6349, "step": 911 }, { "epoch": 0.07225193107546049, "grad_norm": 2.981444684453008, "learning_rate": 1.9906616236130543e-05, "loss": 0.3976, "step": 912 }, { "epoch": 0.07233115468409586, "grad_norm": 2.829423402072483, "learning_rate": 1.990626604825842e-05, "loss": 0.5443, "step": 913 }, { "epoch": 0.07241037829273124, "grad_norm": 3.7201609741816433, "learning_rate": 1.9905915208106342e-05, "loss": 0.4463, "step": 914 }, { "epoch": 0.07248960190136661, "grad_norm": 3.339315194927594, "learning_rate": 1.990556371569741e-05, "loss": 0.5617, "step": 915 }, { "epoch": 0.07256882551000197, "grad_norm": 3.060217869462058, "learning_rate": 1.990521157105477e-05, "loss": 0.421, "step": 916 }, { "epoch": 0.07264804911863736, "grad_norm": 2.9052077714352844, "learning_rate": 1.990485877420161e-05, "loss": 0.465, "step": 917 }, { "epoch": 0.07272727272727272, "grad_norm": 2.9157033711189317, "learning_rate": 1.990450532516116e-05, "loss": 0.5046, "step": 918 }, { "epoch": 0.0728064963359081, "grad_norm": 3.286068191469207, "learning_rate": 1.9904151223956688e-05, "loss": 0.5029, "step": 919 }, { "epoch": 0.07288571994454347, "grad_norm": 3.935350582813888, "learning_rate": 1.9903796470611515e-05, "loss": 0.4149, "step": 920 }, { "epoch": 0.07296494355317884, "grad_norm": 3.322013780640328, "learning_rate": 1.9903441065149e-05, "loss": 0.517, "step": 921 }, { "epoch": 0.07304416716181422, "grad_norm": 2.9753811644754613, "learning_rate": 1.990308500759254e-05, "loss": 0.5145, "step": 922 }, { "epoch": 0.07312339077044959, "grad_norm": 4.015814854715801, "learning_rate": 1.9902728297965586e-05, "loss": 0.6311, "step": 923 }, { "epoch": 0.07320261437908497, "grad_norm": 3.4987862810611134, "learning_rate": 1.990237093629162e-05, "loss": 0.5554, "step": 924 }, { "epoch": 0.07328183798772034, "grad_norm": 3.5241505027941304, "learning_rate": 1.9902012922594178e-05, "loss": 0.605, "step": 925 }, { "epoch": 0.07336106159635572, "grad_norm": 4.3719104981238255, "learning_rate": 1.990165425689683e-05, "loss": 0.537, "step": 926 }, { "epoch": 0.07344028520499109, "grad_norm": 3.69180102158796, "learning_rate": 1.9901294939223192e-05, "loss": 0.6398, "step": 927 }, { "epoch": 0.07351950881362646, "grad_norm": 3.7689137373263617, "learning_rate": 1.9900934969596925e-05, "loss": 0.5357, "step": 928 }, { "epoch": 0.07359873242226184, "grad_norm": 2.969961184715992, "learning_rate": 1.9900574348041728e-05, "loss": 0.385, "step": 929 }, { "epoch": 0.0736779560308972, "grad_norm": 2.875534751970222, "learning_rate": 1.990021307458135e-05, "loss": 0.5323, "step": 930 }, { "epoch": 0.07375717963953259, "grad_norm": 3.6147602203629576, "learning_rate": 1.989985114923958e-05, "loss": 0.6277, "step": 931 }, { "epoch": 0.07383640324816795, "grad_norm": 3.328021476059367, "learning_rate": 1.9899488572040244e-05, "loss": 0.4645, "step": 932 }, { "epoch": 0.07391562685680332, "grad_norm": 2.688794166016588, "learning_rate": 1.989912534300722e-05, "loss": 0.5001, "step": 933 }, { "epoch": 0.0739948504654387, "grad_norm": 3.190149884710966, "learning_rate": 1.9898761462164425e-05, "loss": 0.6568, "step": 934 }, { "epoch": 0.07407407407407407, "grad_norm": 2.8515037574618574, "learning_rate": 1.989839692953581e-05, "loss": 0.5239, "step": 935 }, { "epoch": 0.07415329768270945, "grad_norm": 2.7269253995201077, "learning_rate": 1.9898031745145397e-05, "loss": 0.5187, "step": 936 }, { "epoch": 0.07423252129134482, "grad_norm": 3.170772297887385, "learning_rate": 1.989766590901721e-05, "loss": 0.548, "step": 937 }, { "epoch": 0.0743117448999802, "grad_norm": 3.5797341627772616, "learning_rate": 1.9897299421175353e-05, "loss": 0.6615, "step": 938 }, { "epoch": 0.07439096850861557, "grad_norm": 3.738461780002007, "learning_rate": 1.989693228164395e-05, "loss": 0.6034, "step": 939 }, { "epoch": 0.07447019211725094, "grad_norm": 3.0912636712003367, "learning_rate": 1.989656449044718e-05, "loss": 0.5045, "step": 940 }, { "epoch": 0.07454941572588632, "grad_norm": 3.4284259629492757, "learning_rate": 1.9896196047609255e-05, "loss": 0.6413, "step": 941 }, { "epoch": 0.07462863933452168, "grad_norm": 3.3548505539139084, "learning_rate": 1.9895826953154437e-05, "loss": 0.5221, "step": 942 }, { "epoch": 0.07470786294315707, "grad_norm": 3.4262175625229734, "learning_rate": 1.9895457207107032e-05, "loss": 0.527, "step": 943 }, { "epoch": 0.07478708655179243, "grad_norm": 2.9715757178923794, "learning_rate": 1.9895086809491384e-05, "loss": 0.5269, "step": 944 }, { "epoch": 0.0748663101604278, "grad_norm": 3.010992141648601, "learning_rate": 1.989471576033188e-05, "loss": 0.5371, "step": 945 }, { "epoch": 0.07494553376906318, "grad_norm": 3.009349473795652, "learning_rate": 1.9894344059652953e-05, "loss": 0.4421, "step": 946 }, { "epoch": 0.07502475737769855, "grad_norm": 3.0029717342311972, "learning_rate": 1.989397170747908e-05, "loss": 0.6139, "step": 947 }, { "epoch": 0.07510398098633393, "grad_norm": 3.099087801001541, "learning_rate": 1.9893598703834773e-05, "loss": 0.5155, "step": 948 }, { "epoch": 0.0751832045949693, "grad_norm": 2.8075894256822416, "learning_rate": 1.98932250487446e-05, "loss": 0.5148, "step": 949 }, { "epoch": 0.07526242820360468, "grad_norm": 3.1471239863147398, "learning_rate": 1.989285074223316e-05, "loss": 0.532, "step": 950 }, { "epoch": 0.07534165181224005, "grad_norm": 2.91333039066608, "learning_rate": 1.98924757843251e-05, "loss": 0.5576, "step": 951 }, { "epoch": 0.07542087542087542, "grad_norm": 3.717685257657358, "learning_rate": 1.989210017504511e-05, "loss": 0.4792, "step": 952 }, { "epoch": 0.0755000990295108, "grad_norm": 2.969620364429761, "learning_rate": 1.989172391441792e-05, "loss": 0.5467, "step": 953 }, { "epoch": 0.07557932263814617, "grad_norm": 3.287101332916613, "learning_rate": 1.9891347002468307e-05, "loss": 0.6465, "step": 954 }, { "epoch": 0.07565854624678155, "grad_norm": 2.6125475489364436, "learning_rate": 1.9890969439221086e-05, "loss": 0.3559, "step": 955 }, { "epoch": 0.07573776985541691, "grad_norm": 3.097947753883366, "learning_rate": 1.989059122470112e-05, "loss": 0.5442, "step": 956 }, { "epoch": 0.07581699346405228, "grad_norm": 2.6040975305283522, "learning_rate": 1.9890212358933316e-05, "loss": 0.4987, "step": 957 }, { "epoch": 0.07589621707268766, "grad_norm": 3.3980837162093196, "learning_rate": 1.9889832841942613e-05, "loss": 0.5733, "step": 958 }, { "epoch": 0.07597544068132303, "grad_norm": 4.572176746607006, "learning_rate": 1.988945267375401e-05, "loss": 0.5469, "step": 959 }, { "epoch": 0.07605466428995841, "grad_norm": 3.4094797998905575, "learning_rate": 1.9889071854392528e-05, "loss": 0.4718, "step": 960 }, { "epoch": 0.07613388789859378, "grad_norm": 3.241185505042529, "learning_rate": 1.9888690383883247e-05, "loss": 0.4224, "step": 961 }, { "epoch": 0.07621311150722915, "grad_norm": 2.9933607711239905, "learning_rate": 1.9888308262251286e-05, "loss": 0.4875, "step": 962 }, { "epoch": 0.07629233511586453, "grad_norm": 2.8730272004508803, "learning_rate": 1.988792548952181e-05, "loss": 0.4977, "step": 963 }, { "epoch": 0.0763715587244999, "grad_norm": 3.362786263610291, "learning_rate": 1.9887542065720013e-05, "loss": 0.575, "step": 964 }, { "epoch": 0.07645078233313528, "grad_norm": 3.051133597509086, "learning_rate": 1.988715799087115e-05, "loss": 0.496, "step": 965 }, { "epoch": 0.07653000594177065, "grad_norm": 2.8379262902364992, "learning_rate": 1.9886773265000502e-05, "loss": 0.4711, "step": 966 }, { "epoch": 0.07660922955040603, "grad_norm": 2.662204304813247, "learning_rate": 1.9886387888133413e-05, "loss": 0.384, "step": 967 }, { "epoch": 0.0766884531590414, "grad_norm": 2.820729426169255, "learning_rate": 1.988600186029525e-05, "loss": 0.4521, "step": 968 }, { "epoch": 0.07676767676767676, "grad_norm": 3.0382179891294, "learning_rate": 1.988561518151143e-05, "loss": 0.4529, "step": 969 }, { "epoch": 0.07684690037631214, "grad_norm": 2.957282539174493, "learning_rate": 1.988522785180742e-05, "loss": 0.5485, "step": 970 }, { "epoch": 0.07692612398494751, "grad_norm": 3.005716280610578, "learning_rate": 1.9884839871208717e-05, "loss": 0.5376, "step": 971 }, { "epoch": 0.07700534759358289, "grad_norm": 2.6369837769277096, "learning_rate": 1.9884451239740877e-05, "loss": 0.4363, "step": 972 }, { "epoch": 0.07708457120221826, "grad_norm": 2.987326978594906, "learning_rate": 1.988406195742948e-05, "loss": 0.4927, "step": 973 }, { "epoch": 0.07716379481085363, "grad_norm": 2.9301629653764256, "learning_rate": 1.9883672024300163e-05, "loss": 0.422, "step": 974 }, { "epoch": 0.07724301841948901, "grad_norm": 2.8506892740950818, "learning_rate": 1.98832814403786e-05, "loss": 0.5169, "step": 975 }, { "epoch": 0.07732224202812438, "grad_norm": 3.4548599576908745, "learning_rate": 1.988289020569051e-05, "loss": 0.6048, "step": 976 }, { "epoch": 0.07740146563675976, "grad_norm": 3.522206007638793, "learning_rate": 1.9882498320261652e-05, "loss": 0.5529, "step": 977 }, { "epoch": 0.07748068924539513, "grad_norm": 3.465933933430114, "learning_rate": 1.9882105784117835e-05, "loss": 0.4822, "step": 978 }, { "epoch": 0.07755991285403051, "grad_norm": 4.146962473415494, "learning_rate": 1.98817125972849e-05, "loss": 0.5807, "step": 979 }, { "epoch": 0.07763913646266588, "grad_norm": 3.238348667770255, "learning_rate": 1.9881318759788738e-05, "loss": 0.5667, "step": 980 }, { "epoch": 0.07771836007130124, "grad_norm": 3.200935033788777, "learning_rate": 1.988092427165528e-05, "loss": 0.4223, "step": 981 }, { "epoch": 0.07779758367993662, "grad_norm": 3.677610045711899, "learning_rate": 1.98805291329105e-05, "loss": 0.6305, "step": 982 }, { "epoch": 0.07787680728857199, "grad_norm": 2.41384432750428, "learning_rate": 1.9880133343580423e-05, "loss": 0.5119, "step": 983 }, { "epoch": 0.07795603089720737, "grad_norm": 2.9954557863928466, "learning_rate": 1.9879736903691107e-05, "loss": 0.5795, "step": 984 }, { "epoch": 0.07803525450584274, "grad_norm": 2.962993648836908, "learning_rate": 1.9879339813268653e-05, "loss": 0.6063, "step": 985 }, { "epoch": 0.07811447811447811, "grad_norm": 2.9711648085899753, "learning_rate": 1.9878942072339208e-05, "loss": 0.4009, "step": 986 }, { "epoch": 0.07819370172311349, "grad_norm": 3.9742903438818415, "learning_rate": 1.987854368092896e-05, "loss": 0.6467, "step": 987 }, { "epoch": 0.07827292533174886, "grad_norm": 2.4292974257509137, "learning_rate": 1.9878144639064145e-05, "loss": 0.4372, "step": 988 }, { "epoch": 0.07835214894038424, "grad_norm": 2.8007721191078137, "learning_rate": 1.9877744946771034e-05, "loss": 0.3379, "step": 989 }, { "epoch": 0.0784313725490196, "grad_norm": 2.5542649721521156, "learning_rate": 1.987734460407595e-05, "loss": 0.4264, "step": 990 }, { "epoch": 0.07851059615765497, "grad_norm": 3.1371018904129824, "learning_rate": 1.9876943611005252e-05, "loss": 0.4664, "step": 991 }, { "epoch": 0.07858981976629036, "grad_norm": 3.1621062811895775, "learning_rate": 1.9876541967585337e-05, "loss": 0.5031, "step": 992 }, { "epoch": 0.07866904337492572, "grad_norm": 3.2344934277000315, "learning_rate": 1.987613967384266e-05, "loss": 0.4734, "step": 993 }, { "epoch": 0.0787482669835611, "grad_norm": 2.7557556922391435, "learning_rate": 1.9875736729803705e-05, "loss": 0.5568, "step": 994 }, { "epoch": 0.07882749059219647, "grad_norm": 3.567627748685407, "learning_rate": 1.9875333135495e-05, "loss": 0.5808, "step": 995 }, { "epoch": 0.07890671420083185, "grad_norm": 6.11760922392571, "learning_rate": 1.9874928890943134e-05, "loss": 0.6194, "step": 996 }, { "epoch": 0.07898593780946722, "grad_norm": 2.828729150471876, "learning_rate": 1.9874523996174714e-05, "loss": 0.5218, "step": 997 }, { "epoch": 0.07906516141810259, "grad_norm": 3.21582827951103, "learning_rate": 1.98741184512164e-05, "loss": 0.617, "step": 998 }, { "epoch": 0.07914438502673797, "grad_norm": 2.8138075594820897, "learning_rate": 1.9873712256094898e-05, "loss": 0.5541, "step": 999 }, { "epoch": 0.07922360863537334, "grad_norm": 2.8557096380024474, "learning_rate": 1.987330541083695e-05, "loss": 0.5396, "step": 1000 }, { "epoch": 0.07930283224400872, "grad_norm": 3.3567608766462964, "learning_rate": 1.9872897915469353e-05, "loss": 0.4047, "step": 1001 }, { "epoch": 0.07938205585264409, "grad_norm": 3.0708985217595823, "learning_rate": 1.987248977001893e-05, "loss": 0.5186, "step": 1002 }, { "epoch": 0.07946127946127945, "grad_norm": 3.3865895807018807, "learning_rate": 1.987208097451256e-05, "loss": 0.5101, "step": 1003 }, { "epoch": 0.07954050306991484, "grad_norm": 3.3070447899063113, "learning_rate": 1.987167152897716e-05, "loss": 0.535, "step": 1004 }, { "epoch": 0.0796197266785502, "grad_norm": 2.8231662483860056, "learning_rate": 1.987126143343969e-05, "loss": 0.4462, "step": 1005 }, { "epoch": 0.07969895028718559, "grad_norm": 3.316521112845417, "learning_rate": 1.987085068792715e-05, "loss": 0.5232, "step": 1006 }, { "epoch": 0.07977817389582095, "grad_norm": 3.540571595742248, "learning_rate": 1.9870439292466587e-05, "loss": 0.5254, "step": 1007 }, { "epoch": 0.07985739750445633, "grad_norm": 3.3575150202003354, "learning_rate": 1.9870027247085093e-05, "loss": 0.6666, "step": 1008 }, { "epoch": 0.0799366211130917, "grad_norm": 2.967612030596316, "learning_rate": 1.9869614551809793e-05, "loss": 0.4186, "step": 1009 }, { "epoch": 0.08001584472172707, "grad_norm": 3.2481661369813275, "learning_rate": 1.986920120666787e-05, "loss": 0.471, "step": 1010 }, { "epoch": 0.08009506833036245, "grad_norm": 3.1669031408264807, "learning_rate": 1.986878721168653e-05, "loss": 0.5178, "step": 1011 }, { "epoch": 0.08017429193899782, "grad_norm": 11.351114085480855, "learning_rate": 1.986837256689304e-05, "loss": 0.5286, "step": 1012 }, { "epoch": 0.0802535155476332, "grad_norm": 3.354968889104633, "learning_rate": 1.98679572723147e-05, "loss": 0.48, "step": 1013 }, { "epoch": 0.08033273915626857, "grad_norm": 4.615116408425043, "learning_rate": 1.9867541327978853e-05, "loss": 0.6214, "step": 1014 }, { "epoch": 0.08041196276490394, "grad_norm": 3.6421750123568346, "learning_rate": 1.986712473391289e-05, "loss": 0.5059, "step": 1015 }, { "epoch": 0.08049118637353932, "grad_norm": 2.861637563480363, "learning_rate": 1.986670749014424e-05, "loss": 0.5235, "step": 1016 }, { "epoch": 0.08057040998217468, "grad_norm": 3.306106734822648, "learning_rate": 1.9866289596700383e-05, "loss": 0.4339, "step": 1017 }, { "epoch": 0.08064963359081007, "grad_norm": 3.6107048802811463, "learning_rate": 1.9865871053608823e-05, "loss": 0.4927, "step": 1018 }, { "epoch": 0.08072885719944543, "grad_norm": 3.4573315071224844, "learning_rate": 1.9865451860897126e-05, "loss": 0.5167, "step": 1019 }, { "epoch": 0.08080808080808081, "grad_norm": 3.2852436051003857, "learning_rate": 1.98650320185929e-05, "loss": 0.5141, "step": 1020 }, { "epoch": 0.08088730441671618, "grad_norm": 3.5349466410761363, "learning_rate": 1.986461152672378e-05, "loss": 0.5828, "step": 1021 }, { "epoch": 0.08096652802535155, "grad_norm": 3.395641606236405, "learning_rate": 1.986419038531745e-05, "loss": 0.5388, "step": 1022 }, { "epoch": 0.08104575163398693, "grad_norm": 3.1130354848169786, "learning_rate": 1.9863768594401654e-05, "loss": 0.4576, "step": 1023 }, { "epoch": 0.0811249752426223, "grad_norm": 3.124040096105198, "learning_rate": 1.9863346154004155e-05, "loss": 0.5245, "step": 1024 }, { "epoch": 0.08120419885125768, "grad_norm": 2.7410260102914563, "learning_rate": 1.986292306415277e-05, "loss": 0.5119, "step": 1025 }, { "epoch": 0.08128342245989305, "grad_norm": 2.7015648118243254, "learning_rate": 1.9862499324875362e-05, "loss": 0.447, "step": 1026 }, { "epoch": 0.08136264606852842, "grad_norm": 3.0420700324094114, "learning_rate": 1.9862074936199827e-05, "loss": 0.4277, "step": 1027 }, { "epoch": 0.0814418696771638, "grad_norm": 3.2072228570001777, "learning_rate": 1.9861649898154107e-05, "loss": 0.5813, "step": 1028 }, { "epoch": 0.08152109328579916, "grad_norm": 3.350451358578708, "learning_rate": 1.98612242107662e-05, "loss": 0.4792, "step": 1029 }, { "epoch": 0.08160031689443455, "grad_norm": 2.788663558206002, "learning_rate": 1.9860797874064123e-05, "loss": 0.3798, "step": 1030 }, { "epoch": 0.08167954050306991, "grad_norm": 3.4858638818553125, "learning_rate": 1.9860370888075954e-05, "loss": 0.5046, "step": 1031 }, { "epoch": 0.08175876411170528, "grad_norm": 3.1440843840329604, "learning_rate": 1.9859943252829804e-05, "loss": 0.5357, "step": 1032 }, { "epoch": 0.08183798772034066, "grad_norm": 3.1891244014042974, "learning_rate": 1.9859514968353836e-05, "loss": 0.6391, "step": 1033 }, { "epoch": 0.08191721132897603, "grad_norm": 3.4270411698823375, "learning_rate": 1.985908603467625e-05, "loss": 0.4699, "step": 1034 }, { "epoch": 0.08199643493761141, "grad_norm": 3.169767999676323, "learning_rate": 1.985865645182529e-05, "loss": 0.4575, "step": 1035 }, { "epoch": 0.08207565854624678, "grad_norm": 3.568540642474215, "learning_rate": 1.9858226219829234e-05, "loss": 0.4753, "step": 1036 }, { "epoch": 0.08215488215488216, "grad_norm": 3.033743569390355, "learning_rate": 1.985779533871642e-05, "loss": 0.52, "step": 1037 }, { "epoch": 0.08223410576351753, "grad_norm": 3.0604077852827256, "learning_rate": 1.985736380851521e-05, "loss": 0.4779, "step": 1038 }, { "epoch": 0.0823133293721529, "grad_norm": 3.5317220145185138, "learning_rate": 1.9856931629254032e-05, "loss": 0.3752, "step": 1039 }, { "epoch": 0.08239255298078828, "grad_norm": 3.1526157342567362, "learning_rate": 1.9856498800961328e-05, "loss": 0.4543, "step": 1040 }, { "epoch": 0.08247177658942365, "grad_norm": 3.1841713648545813, "learning_rate": 1.9856065323665606e-05, "loss": 0.5466, "step": 1041 }, { "epoch": 0.08255100019805903, "grad_norm": 3.2133769632346745, "learning_rate": 1.9855631197395406e-05, "loss": 0.5136, "step": 1042 }, { "epoch": 0.0826302238066944, "grad_norm": 3.0391147020087055, "learning_rate": 1.985519642217932e-05, "loss": 0.4877, "step": 1043 }, { "epoch": 0.08270944741532976, "grad_norm": 3.6858143921540405, "learning_rate": 1.9854760998045964e-05, "loss": 0.5523, "step": 1044 }, { "epoch": 0.08278867102396514, "grad_norm": 4.016601076910318, "learning_rate": 1.9854324925024017e-05, "loss": 0.5545, "step": 1045 }, { "epoch": 0.08286789463260051, "grad_norm": 2.903257820892303, "learning_rate": 1.9853888203142184e-05, "loss": 0.4813, "step": 1046 }, { "epoch": 0.08294711824123589, "grad_norm": 2.840563751868195, "learning_rate": 1.9853450832429234e-05, "loss": 0.4203, "step": 1047 }, { "epoch": 0.08302634184987126, "grad_norm": 2.7881233364286544, "learning_rate": 1.9853012812913956e-05, "loss": 0.3934, "step": 1048 }, { "epoch": 0.08310556545850664, "grad_norm": 3.449584601555392, "learning_rate": 1.9852574144625193e-05, "loss": 0.5277, "step": 1049 }, { "epoch": 0.08318478906714201, "grad_norm": 3.157646155820125, "learning_rate": 1.985213482759183e-05, "loss": 0.4682, "step": 1050 }, { "epoch": 0.08326401267577738, "grad_norm": 3.27493690159151, "learning_rate": 1.9851694861842795e-05, "loss": 0.58, "step": 1051 }, { "epoch": 0.08334323628441276, "grad_norm": 2.866102262051014, "learning_rate": 1.9851254247407053e-05, "loss": 0.3921, "step": 1052 }, { "epoch": 0.08342245989304813, "grad_norm": 3.155110350052891, "learning_rate": 1.9850812984313626e-05, "loss": 0.5557, "step": 1053 }, { "epoch": 0.08350168350168351, "grad_norm": 2.8562844113253276, "learning_rate": 1.985037107259156e-05, "loss": 0.5239, "step": 1054 }, { "epoch": 0.08358090711031887, "grad_norm": 2.871566456820682, "learning_rate": 1.984992851226996e-05, "loss": 0.5293, "step": 1055 }, { "epoch": 0.08366013071895424, "grad_norm": 3.4374142642811276, "learning_rate": 1.9849485303377955e-05, "loss": 0.4608, "step": 1056 }, { "epoch": 0.08373935432758962, "grad_norm": 3.724586501248896, "learning_rate": 1.984904144594474e-05, "loss": 0.6747, "step": 1057 }, { "epoch": 0.08381857793622499, "grad_norm": 3.6301441190979298, "learning_rate": 1.9848596939999534e-05, "loss": 0.6223, "step": 1058 }, { "epoch": 0.08389780154486037, "grad_norm": 2.3854111436717997, "learning_rate": 1.984815178557161e-05, "loss": 0.398, "step": 1059 }, { "epoch": 0.08397702515349574, "grad_norm": 3.0993186048035857, "learning_rate": 1.9847705982690275e-05, "loss": 0.5106, "step": 1060 }, { "epoch": 0.08405624876213111, "grad_norm": 2.66489757437417, "learning_rate": 1.984725953138489e-05, "loss": 0.5062, "step": 1061 }, { "epoch": 0.08413547237076649, "grad_norm": 2.8364142644307138, "learning_rate": 1.9846812431684843e-05, "loss": 0.4424, "step": 1062 }, { "epoch": 0.08421469597940186, "grad_norm": 2.6253931479370594, "learning_rate": 1.9846364683619575e-05, "loss": 0.5658, "step": 1063 }, { "epoch": 0.08429391958803724, "grad_norm": 3.0014747590276283, "learning_rate": 1.9845916287218575e-05, "loss": 0.5445, "step": 1064 }, { "epoch": 0.0843731431966726, "grad_norm": 2.989609827158304, "learning_rate": 1.9845467242511362e-05, "loss": 0.5067, "step": 1065 }, { "epoch": 0.08445236680530799, "grad_norm": 3.3724948216425337, "learning_rate": 1.9845017549527502e-05, "loss": 0.5039, "step": 1066 }, { "epoch": 0.08453159041394336, "grad_norm": 2.945638085051838, "learning_rate": 1.984456720829661e-05, "loss": 0.4895, "step": 1067 }, { "epoch": 0.08461081402257872, "grad_norm": 3.845012491238431, "learning_rate": 1.9844116218848335e-05, "loss": 0.4923, "step": 1068 }, { "epoch": 0.0846900376312141, "grad_norm": 3.5786710825964576, "learning_rate": 1.9843664581212374e-05, "loss": 0.4495, "step": 1069 }, { "epoch": 0.08476926123984947, "grad_norm": 3.4840226581121625, "learning_rate": 1.9843212295418464e-05, "loss": 0.4915, "step": 1070 }, { "epoch": 0.08484848484848485, "grad_norm": 3.183119557311288, "learning_rate": 1.984275936149639e-05, "loss": 0.4792, "step": 1071 }, { "epoch": 0.08492770845712022, "grad_norm": 3.0949654824978765, "learning_rate": 1.984230577947597e-05, "loss": 0.4828, "step": 1072 }, { "epoch": 0.08500693206575559, "grad_norm": 2.80538534011389, "learning_rate": 1.9841851549387074e-05, "loss": 0.5201, "step": 1073 }, { "epoch": 0.08508615567439097, "grad_norm": 2.5706908572715985, "learning_rate": 1.9841396671259606e-05, "loss": 0.4986, "step": 1074 }, { "epoch": 0.08516537928302634, "grad_norm": 3.149705891963666, "learning_rate": 1.9840941145123524e-05, "loss": 0.4996, "step": 1075 }, { "epoch": 0.08524460289166172, "grad_norm": 2.873645897622677, "learning_rate": 1.984048497100882e-05, "loss": 0.5226, "step": 1076 }, { "epoch": 0.08532382650029709, "grad_norm": 2.5030867999637207, "learning_rate": 1.9840028148945526e-05, "loss": 0.5205, "step": 1077 }, { "epoch": 0.08540305010893247, "grad_norm": 3.18791226303514, "learning_rate": 1.983957067896373e-05, "loss": 0.3476, "step": 1078 }, { "epoch": 0.08548227371756784, "grad_norm": 2.787518424590658, "learning_rate": 1.9839112561093548e-05, "loss": 0.3596, "step": 1079 }, { "epoch": 0.0855614973262032, "grad_norm": 3.378661751130679, "learning_rate": 1.983865379536515e-05, "loss": 0.5327, "step": 1080 }, { "epoch": 0.08564072093483859, "grad_norm": 3.4768398111991488, "learning_rate": 1.9838194381808737e-05, "loss": 0.5766, "step": 1081 }, { "epoch": 0.08571994454347395, "grad_norm": 2.85385584536561, "learning_rate": 1.983773432045456e-05, "loss": 0.4246, "step": 1082 }, { "epoch": 0.08579916815210933, "grad_norm": 3.6371951984749638, "learning_rate": 1.9837273611332918e-05, "loss": 0.5942, "step": 1083 }, { "epoch": 0.0858783917607447, "grad_norm": 2.677380059161476, "learning_rate": 1.983681225447414e-05, "loss": 0.5097, "step": 1084 }, { "epoch": 0.08595761536938007, "grad_norm": 3.498117904952886, "learning_rate": 1.9836350249908606e-05, "loss": 0.6546, "step": 1085 }, { "epoch": 0.08603683897801545, "grad_norm": 2.3158569524200354, "learning_rate": 1.983588759766674e-05, "loss": 0.2811, "step": 1086 }, { "epoch": 0.08611606258665082, "grad_norm": 3.407167796845734, "learning_rate": 1.9835424297779002e-05, "loss": 0.4563, "step": 1087 }, { "epoch": 0.0861952861952862, "grad_norm": 2.969757801601191, "learning_rate": 1.98349603502759e-05, "loss": 0.4085, "step": 1088 }, { "epoch": 0.08627450980392157, "grad_norm": 2.9950622769790582, "learning_rate": 1.983449575518798e-05, "loss": 0.3886, "step": 1089 }, { "epoch": 0.08635373341255695, "grad_norm": 2.5152208402796448, "learning_rate": 1.983403051254584e-05, "loss": 0.3387, "step": 1090 }, { "epoch": 0.08643295702119232, "grad_norm": 3.004682481820772, "learning_rate": 1.9833564622380105e-05, "loss": 0.4687, "step": 1091 }, { "epoch": 0.08651218062982768, "grad_norm": 5.160912022023601, "learning_rate": 1.9833098084721455e-05, "loss": 0.4735, "step": 1092 }, { "epoch": 0.08659140423846307, "grad_norm": 2.810539005726432, "learning_rate": 1.9832630899600607e-05, "loss": 0.4921, "step": 1093 }, { "epoch": 0.08667062784709843, "grad_norm": 3.575300057152066, "learning_rate": 1.9832163067048335e-05, "loss": 0.4792, "step": 1094 }, { "epoch": 0.08674985145573381, "grad_norm": 3.2766962093931724, "learning_rate": 1.9831694587095428e-05, "loss": 0.4362, "step": 1095 }, { "epoch": 0.08682907506436918, "grad_norm": 3.771766802577481, "learning_rate": 1.983122545977274e-05, "loss": 0.5149, "step": 1096 }, { "epoch": 0.08690829867300455, "grad_norm": 2.91395083259566, "learning_rate": 1.983075568511116e-05, "loss": 0.5302, "step": 1097 }, { "epoch": 0.08698752228163993, "grad_norm": 3.0171089968982585, "learning_rate": 1.983028526314162e-05, "loss": 0.5589, "step": 1098 }, { "epoch": 0.0870667458902753, "grad_norm": 3.3110004246895763, "learning_rate": 1.98298141938951e-05, "loss": 0.5231, "step": 1099 }, { "epoch": 0.08714596949891068, "grad_norm": 2.4694917990492797, "learning_rate": 1.982934247740261e-05, "loss": 0.4208, "step": 1100 }, { "epoch": 0.08722519310754605, "grad_norm": 3.350969968362005, "learning_rate": 1.9828870113695217e-05, "loss": 0.5676, "step": 1101 }, { "epoch": 0.08730441671618142, "grad_norm": 3.409640407416842, "learning_rate": 1.9828397102804016e-05, "loss": 0.3566, "step": 1102 }, { "epoch": 0.0873836403248168, "grad_norm": 2.9697707973618397, "learning_rate": 1.982792344476016e-05, "loss": 0.4851, "step": 1103 }, { "epoch": 0.08746286393345216, "grad_norm": 3.348477739289384, "learning_rate": 1.982744913959483e-05, "loss": 0.5918, "step": 1104 }, { "epoch": 0.08754208754208755, "grad_norm": 2.8133201376495847, "learning_rate": 1.9826974187339267e-05, "loss": 0.5041, "step": 1105 }, { "epoch": 0.08762131115072291, "grad_norm": 2.917907605356377, "learning_rate": 1.9826498588024738e-05, "loss": 0.4162, "step": 1106 }, { "epoch": 0.0877005347593583, "grad_norm": 3.2671158388845085, "learning_rate": 1.982602234168255e-05, "loss": 0.5697, "step": 1107 }, { "epoch": 0.08777975836799366, "grad_norm": 3.184169428945255, "learning_rate": 1.9825545448344078e-05, "loss": 0.4887, "step": 1108 }, { "epoch": 0.08785898197662903, "grad_norm": 4.657450898572146, "learning_rate": 1.9825067908040716e-05, "loss": 0.4193, "step": 1109 }, { "epoch": 0.08793820558526441, "grad_norm": 3.2471336756698554, "learning_rate": 1.9824589720803906e-05, "loss": 0.4368, "step": 1110 }, { "epoch": 0.08801742919389978, "grad_norm": 3.113129707170449, "learning_rate": 1.9824110886665137e-05, "loss": 0.5021, "step": 1111 }, { "epoch": 0.08809665280253516, "grad_norm": 3.556680450494994, "learning_rate": 1.9823631405655933e-05, "loss": 0.458, "step": 1112 }, { "epoch": 0.08817587641117053, "grad_norm": 3.7669716447199537, "learning_rate": 1.9823151277807873e-05, "loss": 0.4883, "step": 1113 }, { "epoch": 0.0882551000198059, "grad_norm": 2.9182702928130095, "learning_rate": 1.9822670503152567e-05, "loss": 0.3853, "step": 1114 }, { "epoch": 0.08833432362844128, "grad_norm": 2.8541517457360435, "learning_rate": 1.982218908172167e-05, "loss": 0.3169, "step": 1115 }, { "epoch": 0.08841354723707665, "grad_norm": 3.7973525542670665, "learning_rate": 1.9821707013546885e-05, "loss": 0.6661, "step": 1116 }, { "epoch": 0.08849277084571203, "grad_norm": 2.5836900278744346, "learning_rate": 1.9821224298659953e-05, "loss": 0.455, "step": 1117 }, { "epoch": 0.0885719944543474, "grad_norm": 4.9682610621552685, "learning_rate": 1.9820740937092656e-05, "loss": 0.5624, "step": 1118 }, { "epoch": 0.08865121806298278, "grad_norm": 3.364726466311942, "learning_rate": 1.982025692887682e-05, "loss": 0.5158, "step": 1119 }, { "epoch": 0.08873044167161814, "grad_norm": 2.9285130067546015, "learning_rate": 1.9819772274044323e-05, "loss": 0.3909, "step": 1120 }, { "epoch": 0.08880966528025351, "grad_norm": 2.5780257901984354, "learning_rate": 1.9819286972627066e-05, "loss": 0.377, "step": 1121 }, { "epoch": 0.08888888888888889, "grad_norm": 2.7393603295608515, "learning_rate": 1.9818801024657014e-05, "loss": 0.476, "step": 1122 }, { "epoch": 0.08896811249752426, "grad_norm": 2.667074727959154, "learning_rate": 1.9818314430166158e-05, "loss": 0.4401, "step": 1123 }, { "epoch": 0.08904733610615964, "grad_norm": 3.07080161359432, "learning_rate": 1.981782718918654e-05, "loss": 0.4849, "step": 1124 }, { "epoch": 0.08912655971479501, "grad_norm": 2.693848335134863, "learning_rate": 1.981733930175024e-05, "loss": 0.3686, "step": 1125 }, { "epoch": 0.08920578332343038, "grad_norm": 2.790447841814945, "learning_rate": 1.9816850767889387e-05, "loss": 0.4372, "step": 1126 }, { "epoch": 0.08928500693206576, "grad_norm": 3.188075691790254, "learning_rate": 1.9816361587636143e-05, "loss": 0.5137, "step": 1127 }, { "epoch": 0.08936423054070113, "grad_norm": 3.4018445055391227, "learning_rate": 1.9815871761022727e-05, "loss": 0.5543, "step": 1128 }, { "epoch": 0.08944345414933651, "grad_norm": 2.9845546392497972, "learning_rate": 1.9815381288081382e-05, "loss": 0.4302, "step": 1129 }, { "epoch": 0.08952267775797187, "grad_norm": 3.962935201548869, "learning_rate": 1.9814890168844412e-05, "loss": 0.559, "step": 1130 }, { "epoch": 0.08960190136660724, "grad_norm": 2.690738336027175, "learning_rate": 1.981439840334415e-05, "loss": 0.4805, "step": 1131 }, { "epoch": 0.08968112497524262, "grad_norm": 3.009150897335099, "learning_rate": 1.9813905991612974e-05, "loss": 0.5308, "step": 1132 }, { "epoch": 0.08976034858387799, "grad_norm": 3.0239670839507853, "learning_rate": 1.9813412933683312e-05, "loss": 0.4036, "step": 1133 }, { "epoch": 0.08983957219251337, "grad_norm": 2.907786408112091, "learning_rate": 1.9812919229587626e-05, "loss": 0.3564, "step": 1134 }, { "epoch": 0.08991879580114874, "grad_norm": 3.0029111382877915, "learning_rate": 1.9812424879358424e-05, "loss": 0.4467, "step": 1135 }, { "epoch": 0.08999801940978412, "grad_norm": 3.1882872194797196, "learning_rate": 1.981192988302826e-05, "loss": 0.5825, "step": 1136 }, { "epoch": 0.09007724301841949, "grad_norm": 2.789041952722907, "learning_rate": 1.981143424062973e-05, "loss": 0.3841, "step": 1137 }, { "epoch": 0.09015646662705486, "grad_norm": 3.575546504062069, "learning_rate": 1.981093795219546e-05, "loss": 0.5407, "step": 1138 }, { "epoch": 0.09023569023569024, "grad_norm": 3.308899760154985, "learning_rate": 1.9810441017758132e-05, "loss": 0.4473, "step": 1139 }, { "epoch": 0.0903149138443256, "grad_norm": 2.7232902149269957, "learning_rate": 1.980994343735047e-05, "loss": 0.4297, "step": 1140 }, { "epoch": 0.09039413745296099, "grad_norm": 3.480343543569543, "learning_rate": 1.9809445211005235e-05, "loss": 0.4717, "step": 1141 }, { "epoch": 0.09047336106159636, "grad_norm": 2.9985786572621125, "learning_rate": 1.980894633875523e-05, "loss": 0.488, "step": 1142 }, { "epoch": 0.09055258467023172, "grad_norm": 2.875454160577531, "learning_rate": 1.980844682063331e-05, "loss": 0.4453, "step": 1143 }, { "epoch": 0.0906318082788671, "grad_norm": 2.991050857390452, "learning_rate": 1.980794665667236e-05, "loss": 0.5236, "step": 1144 }, { "epoch": 0.09071103188750247, "grad_norm": 2.7301328562405707, "learning_rate": 1.9807445846905316e-05, "loss": 0.5338, "step": 1145 }, { "epoch": 0.09079025549613785, "grad_norm": 2.8157463123758677, "learning_rate": 1.980694439136515e-05, "loss": 0.5377, "step": 1146 }, { "epoch": 0.09086947910477322, "grad_norm": 2.551010685786562, "learning_rate": 1.980644229008489e-05, "loss": 0.4777, "step": 1147 }, { "epoch": 0.0909487027134086, "grad_norm": 2.759879934873259, "learning_rate": 1.9805939543097586e-05, "loss": 0.4694, "step": 1148 }, { "epoch": 0.09102792632204397, "grad_norm": 2.8045017592137285, "learning_rate": 1.9805436150436352e-05, "loss": 0.4082, "step": 1149 }, { "epoch": 0.09110714993067934, "grad_norm": 2.6590287712701115, "learning_rate": 1.9804932112134323e-05, "loss": 0.416, "step": 1150 }, { "epoch": 0.09118637353931472, "grad_norm": 3.3508584458774107, "learning_rate": 1.9804427428224696e-05, "loss": 0.4654, "step": 1151 }, { "epoch": 0.09126559714795009, "grad_norm": 3.6605674652380555, "learning_rate": 1.9803922098740696e-05, "loss": 0.4626, "step": 1152 }, { "epoch": 0.09134482075658547, "grad_norm": 2.8853471445108805, "learning_rate": 1.98034161237156e-05, "loss": 0.5001, "step": 1153 }, { "epoch": 0.09142404436522084, "grad_norm": 2.805697083500706, "learning_rate": 1.9802909503182722e-05, "loss": 0.4854, "step": 1154 }, { "epoch": 0.0915032679738562, "grad_norm": 3.210169831192453, "learning_rate": 1.9802402237175426e-05, "loss": 0.4608, "step": 1155 }, { "epoch": 0.09158249158249158, "grad_norm": 2.4652169232103653, "learning_rate": 1.9801894325727104e-05, "loss": 0.3638, "step": 1156 }, { "epoch": 0.09166171519112695, "grad_norm": 3.13335446589247, "learning_rate": 1.980138576887121e-05, "loss": 0.6432, "step": 1157 }, { "epoch": 0.09174093879976233, "grad_norm": 2.499935202609419, "learning_rate": 1.980087656664122e-05, "loss": 0.3888, "step": 1158 }, { "epoch": 0.0918201624083977, "grad_norm": 2.911496172934588, "learning_rate": 1.9800366719070668e-05, "loss": 0.5239, "step": 1159 }, { "epoch": 0.09189938601703307, "grad_norm": 3.309044976537077, "learning_rate": 1.9799856226193125e-05, "loss": 0.4401, "step": 1160 }, { "epoch": 0.09197860962566845, "grad_norm": 3.003601119925414, "learning_rate": 1.97993450880422e-05, "loss": 0.4165, "step": 1161 }, { "epoch": 0.09205783323430382, "grad_norm": 2.949334265018345, "learning_rate": 1.9798833304651555e-05, "loss": 0.4954, "step": 1162 }, { "epoch": 0.0921370568429392, "grad_norm": 3.6785178209257974, "learning_rate": 1.9798320876054882e-05, "loss": 0.42, "step": 1163 }, { "epoch": 0.09221628045157457, "grad_norm": 3.235641668418051, "learning_rate": 1.9797807802285933e-05, "loss": 0.5237, "step": 1164 }, { "epoch": 0.09229550406020995, "grad_norm": 2.7879452355058136, "learning_rate": 1.979729408337848e-05, "loss": 0.4528, "step": 1165 }, { "epoch": 0.09237472766884532, "grad_norm": 3.081500480265576, "learning_rate": 1.9796779719366355e-05, "loss": 0.4927, "step": 1166 }, { "epoch": 0.09245395127748068, "grad_norm": 3.3545869585421304, "learning_rate": 1.9796264710283425e-05, "loss": 0.6186, "step": 1167 }, { "epoch": 0.09253317488611607, "grad_norm": 3.4452432551640264, "learning_rate": 1.9795749056163595e-05, "loss": 0.48, "step": 1168 }, { "epoch": 0.09261239849475143, "grad_norm": 2.5107690026929324, "learning_rate": 1.9795232757040827e-05, "loss": 0.3089, "step": 1169 }, { "epoch": 0.09269162210338681, "grad_norm": 2.96025874370993, "learning_rate": 1.9794715812949117e-05, "loss": 0.3997, "step": 1170 }, { "epoch": 0.09277084571202218, "grad_norm": 3.041303371007694, "learning_rate": 1.9794198223922496e-05, "loss": 0.5336, "step": 1171 }, { "epoch": 0.09285006932065755, "grad_norm": 2.6198516778156384, "learning_rate": 1.979367998999505e-05, "loss": 0.3426, "step": 1172 }, { "epoch": 0.09292929292929293, "grad_norm": 3.42753698324827, "learning_rate": 1.97931611112009e-05, "loss": 0.4556, "step": 1173 }, { "epoch": 0.0930085165379283, "grad_norm": 2.6469552661719633, "learning_rate": 1.9792641587574212e-05, "loss": 0.3358, "step": 1174 }, { "epoch": 0.09308774014656368, "grad_norm": 2.6983144734015796, "learning_rate": 1.9792121419149196e-05, "loss": 0.4735, "step": 1175 }, { "epoch": 0.09316696375519905, "grad_norm": 2.863517246247922, "learning_rate": 1.97916006059601e-05, "loss": 0.4365, "step": 1176 }, { "epoch": 0.09324618736383443, "grad_norm": 3.3008308287626313, "learning_rate": 1.979107914804122e-05, "loss": 0.4891, "step": 1177 }, { "epoch": 0.0933254109724698, "grad_norm": 3.0788316380030936, "learning_rate": 1.979055704542689e-05, "loss": 0.5466, "step": 1178 }, { "epoch": 0.09340463458110516, "grad_norm": 2.7879878542582306, "learning_rate": 1.9790034298151486e-05, "loss": 0.3846, "step": 1179 }, { "epoch": 0.09348385818974055, "grad_norm": 2.6921605687371724, "learning_rate": 1.9789510906249432e-05, "loss": 0.418, "step": 1180 }, { "epoch": 0.09356308179837591, "grad_norm": 3.339369111681325, "learning_rate": 1.9788986869755187e-05, "loss": 0.4613, "step": 1181 }, { "epoch": 0.0936423054070113, "grad_norm": 2.862343906737892, "learning_rate": 1.978846218870326e-05, "loss": 0.7313, "step": 1182 }, { "epoch": 0.09372152901564666, "grad_norm": 2.787712227607083, "learning_rate": 1.9787936863128195e-05, "loss": 0.3882, "step": 1183 }, { "epoch": 0.09380075262428203, "grad_norm": 2.988853974362241, "learning_rate": 1.9787410893064584e-05, "loss": 0.5426, "step": 1184 }, { "epoch": 0.09387997623291741, "grad_norm": 2.5836090903970055, "learning_rate": 1.978688427854706e-05, "loss": 0.4528, "step": 1185 }, { "epoch": 0.09395919984155278, "grad_norm": 2.9320522894001892, "learning_rate": 1.97863570196103e-05, "loss": 0.4741, "step": 1186 }, { "epoch": 0.09403842345018816, "grad_norm": 3.2130123005940767, "learning_rate": 1.9785829116289017e-05, "loss": 0.5473, "step": 1187 }, { "epoch": 0.09411764705882353, "grad_norm": 2.792945193691272, "learning_rate": 1.9785300568617973e-05, "loss": 0.4636, "step": 1188 }, { "epoch": 0.09419687066745891, "grad_norm": 2.7664361018067676, "learning_rate": 1.978477137663197e-05, "loss": 0.5127, "step": 1189 }, { "epoch": 0.09427609427609428, "grad_norm": 3.2046791554771996, "learning_rate": 1.9784241540365856e-05, "loss": 0.5447, "step": 1190 }, { "epoch": 0.09435531788472964, "grad_norm": 2.5187704934378017, "learning_rate": 1.9783711059854514e-05, "loss": 0.4364, "step": 1191 }, { "epoch": 0.09443454149336503, "grad_norm": 2.955054836243262, "learning_rate": 1.9783179935132874e-05, "loss": 0.4794, "step": 1192 }, { "epoch": 0.0945137651020004, "grad_norm": 3.305740028331367, "learning_rate": 1.978264816623591e-05, "loss": 0.4308, "step": 1193 }, { "epoch": 0.09459298871063578, "grad_norm": 2.519206562418568, "learning_rate": 1.9782115753198633e-05, "loss": 0.4597, "step": 1194 }, { "epoch": 0.09467221231927114, "grad_norm": 2.823003234561981, "learning_rate": 1.9781582696056105e-05, "loss": 0.4541, "step": 1195 }, { "epoch": 0.09475143592790651, "grad_norm": 2.997973325370504, "learning_rate": 1.9781048994843423e-05, "loss": 0.5152, "step": 1196 }, { "epoch": 0.09483065953654189, "grad_norm": 2.7588941737240047, "learning_rate": 1.9780514649595727e-05, "loss": 0.3504, "step": 1197 }, { "epoch": 0.09490988314517726, "grad_norm": 2.962805872853815, "learning_rate": 1.97799796603482e-05, "loss": 0.5334, "step": 1198 }, { "epoch": 0.09498910675381264, "grad_norm": 2.986469273864763, "learning_rate": 1.9779444027136075e-05, "loss": 0.5183, "step": 1199 }, { "epoch": 0.09506833036244801, "grad_norm": 2.693312992978399, "learning_rate": 1.977890774999461e-05, "loss": 0.4658, "step": 1200 }, { "epoch": 0.09514755397108338, "grad_norm": 2.933876446691811, "learning_rate": 1.977837082895913e-05, "loss": 0.4742, "step": 1201 }, { "epoch": 0.09522677757971876, "grad_norm": 3.0448273571919056, "learning_rate": 1.9777833264064977e-05, "loss": 0.3984, "step": 1202 }, { "epoch": 0.09530600118835413, "grad_norm": 2.919121030526456, "learning_rate": 1.9777295055347553e-05, "loss": 0.3911, "step": 1203 }, { "epoch": 0.0953852247969895, "grad_norm": 2.7335837293134273, "learning_rate": 1.9776756202842297e-05, "loss": 0.3838, "step": 1204 }, { "epoch": 0.09546444840562487, "grad_norm": 3.9222950670949848, "learning_rate": 1.9776216706584682e-05, "loss": 0.5776, "step": 1205 }, { "epoch": 0.09554367201426026, "grad_norm": 2.808846082632548, "learning_rate": 1.977567656661024e-05, "loss": 0.4147, "step": 1206 }, { "epoch": 0.09562289562289562, "grad_norm": 2.6027134733234436, "learning_rate": 1.9775135782954534e-05, "loss": 0.3953, "step": 1207 }, { "epoch": 0.09570211923153099, "grad_norm": 2.802753304304745, "learning_rate": 1.9774594355653175e-05, "loss": 0.4492, "step": 1208 }, { "epoch": 0.09578134284016637, "grad_norm": 2.9894080991048577, "learning_rate": 1.9774052284741804e-05, "loss": 0.4058, "step": 1209 }, { "epoch": 0.09586056644880174, "grad_norm": 3.2959667997472355, "learning_rate": 1.9773509570256124e-05, "loss": 0.5794, "step": 1210 }, { "epoch": 0.09593979005743712, "grad_norm": 3.4460465410906234, "learning_rate": 1.9772966212231863e-05, "loss": 0.4913, "step": 1211 }, { "epoch": 0.09601901366607249, "grad_norm": 3.2370611897191317, "learning_rate": 1.9772422210704803e-05, "loss": 0.495, "step": 1212 }, { "epoch": 0.09609823727470786, "grad_norm": 2.510565167325101, "learning_rate": 1.977187756571076e-05, "loss": 0.4402, "step": 1213 }, { "epoch": 0.09617746088334324, "grad_norm": 3.3067656355834982, "learning_rate": 1.9771332277285603e-05, "loss": 0.4603, "step": 1214 }, { "epoch": 0.0962566844919786, "grad_norm": 2.9415589484257945, "learning_rate": 1.977078634546523e-05, "loss": 0.5573, "step": 1215 }, { "epoch": 0.09633590810061399, "grad_norm": 3.0337215428094932, "learning_rate": 1.977023977028559e-05, "loss": 0.5792, "step": 1216 }, { "epoch": 0.09641513170924935, "grad_norm": 2.717568010051802, "learning_rate": 1.9769692551782672e-05, "loss": 0.5404, "step": 1217 }, { "epoch": 0.09649435531788474, "grad_norm": 2.851477156302294, "learning_rate": 1.976914468999251e-05, "loss": 0.4395, "step": 1218 }, { "epoch": 0.0965735789265201, "grad_norm": 3.739586339973208, "learning_rate": 1.9768596184951174e-05, "loss": 0.576, "step": 1219 }, { "epoch": 0.09665280253515547, "grad_norm": 2.9827582120586724, "learning_rate": 1.9768047036694785e-05, "loss": 0.565, "step": 1220 }, { "epoch": 0.09673202614379085, "grad_norm": 3.3495330648470376, "learning_rate": 1.9767497245259496e-05, "loss": 0.4631, "step": 1221 }, { "epoch": 0.09681124975242622, "grad_norm": 2.7190958499253717, "learning_rate": 1.9766946810681517e-05, "loss": 0.4555, "step": 1222 }, { "epoch": 0.0968904733610616, "grad_norm": 2.7463302295366, "learning_rate": 1.9766395732997082e-05, "loss": 0.4362, "step": 1223 }, { "epoch": 0.09696969696969697, "grad_norm": 3.0146532239628328, "learning_rate": 1.9765844012242482e-05, "loss": 0.4235, "step": 1224 }, { "epoch": 0.09704892057833234, "grad_norm": 3.3067482693186787, "learning_rate": 1.9765291648454042e-05, "loss": 0.4175, "step": 1225 }, { "epoch": 0.09712814418696772, "grad_norm": 3.043234149394331, "learning_rate": 1.9764738641668137e-05, "loss": 0.49, "step": 1226 }, { "epoch": 0.09720736779560309, "grad_norm": 3.091418048606579, "learning_rate": 1.9764184991921178e-05, "loss": 0.4488, "step": 1227 }, { "epoch": 0.09728659140423847, "grad_norm": 2.882853625788255, "learning_rate": 1.9763630699249615e-05, "loss": 0.3688, "step": 1228 }, { "epoch": 0.09736581501287384, "grad_norm": 3.0998745117146336, "learning_rate": 1.9763075763689956e-05, "loss": 0.3627, "step": 1229 }, { "epoch": 0.0974450386215092, "grad_norm": 2.5904061856839493, "learning_rate": 1.9762520185278734e-05, "loss": 0.3482, "step": 1230 }, { "epoch": 0.09752426223014458, "grad_norm": 3.017908058183374, "learning_rate": 1.9761963964052528e-05, "loss": 0.4379, "step": 1231 }, { "epoch": 0.09760348583877995, "grad_norm": 2.8388266105162, "learning_rate": 1.976140710004797e-05, "loss": 0.5507, "step": 1232 }, { "epoch": 0.09768270944741533, "grad_norm": 2.596324747927256, "learning_rate": 1.976084959330172e-05, "loss": 0.4643, "step": 1233 }, { "epoch": 0.0977619330560507, "grad_norm": 2.7009960126368, "learning_rate": 1.9760291443850496e-05, "loss": 0.4732, "step": 1234 }, { "epoch": 0.09784115666468608, "grad_norm": 3.1240752238483327, "learning_rate": 1.9759732651731037e-05, "loss": 0.4105, "step": 1235 }, { "epoch": 0.09792038027332145, "grad_norm": 3.356670884619534, "learning_rate": 1.975917321698015e-05, "loss": 0.4793, "step": 1236 }, { "epoch": 0.09799960388195682, "grad_norm": 2.720612851275538, "learning_rate": 1.9758613139634662e-05, "loss": 0.4929, "step": 1237 }, { "epoch": 0.0980788274905922, "grad_norm": 3.127615023122187, "learning_rate": 1.975805241973145e-05, "loss": 0.5664, "step": 1238 }, { "epoch": 0.09815805109922757, "grad_norm": 3.1065795642149774, "learning_rate": 1.9757491057307448e-05, "loss": 0.5476, "step": 1239 }, { "epoch": 0.09823727470786295, "grad_norm": 2.4365780324468327, "learning_rate": 1.9756929052399606e-05, "loss": 0.4285, "step": 1240 }, { "epoch": 0.09831649831649832, "grad_norm": 2.6205445849463302, "learning_rate": 1.9756366405044928e-05, "loss": 0.4204, "step": 1241 }, { "epoch": 0.09839572192513368, "grad_norm": 2.5536730629831377, "learning_rate": 1.9755803115280476e-05, "loss": 0.4855, "step": 1242 }, { "epoch": 0.09847494553376906, "grad_norm": 2.6571849723312058, "learning_rate": 1.9755239183143323e-05, "loss": 0.4306, "step": 1243 }, { "epoch": 0.09855416914240443, "grad_norm": 2.7918639991751464, "learning_rate": 1.9754674608670613e-05, "loss": 0.5285, "step": 1244 }, { "epoch": 0.09863339275103981, "grad_norm": 2.6216234041412934, "learning_rate": 1.9754109391899514e-05, "loss": 0.4769, "step": 1245 }, { "epoch": 0.09871261635967518, "grad_norm": 2.6346465405630406, "learning_rate": 1.975354353286725e-05, "loss": 0.4246, "step": 1246 }, { "epoch": 0.09879183996831056, "grad_norm": 2.6617286777674134, "learning_rate": 1.9752977031611072e-05, "loss": 0.4167, "step": 1247 }, { "epoch": 0.09887106357694593, "grad_norm": 3.3104649516555256, "learning_rate": 1.9752409888168285e-05, "loss": 0.5252, "step": 1248 }, { "epoch": 0.0989502871855813, "grad_norm": 2.6979592352141184, "learning_rate": 1.975184210257623e-05, "loss": 0.4282, "step": 1249 }, { "epoch": 0.09902951079421668, "grad_norm": 2.558992126951326, "learning_rate": 1.97512736748723e-05, "loss": 0.3617, "step": 1250 }, { "epoch": 0.09910873440285205, "grad_norm": 2.9826455564027654, "learning_rate": 1.975070460509392e-05, "loss": 0.4462, "step": 1251 }, { "epoch": 0.09918795801148743, "grad_norm": 2.7119674793199007, "learning_rate": 1.9750134893278553e-05, "loss": 0.4626, "step": 1252 }, { "epoch": 0.0992671816201228, "grad_norm": 2.737371496650108, "learning_rate": 1.974956453946372e-05, "loss": 0.3724, "step": 1253 }, { "epoch": 0.09934640522875816, "grad_norm": 2.9481517962377763, "learning_rate": 1.9748993543686973e-05, "loss": 0.4287, "step": 1254 }, { "epoch": 0.09942562883739355, "grad_norm": 2.7451605587656234, "learning_rate": 1.9748421905985915e-05, "loss": 0.461, "step": 1255 }, { "epoch": 0.09950485244602891, "grad_norm": 3.0646336125877243, "learning_rate": 1.9747849626398176e-05, "loss": 0.4566, "step": 1256 }, { "epoch": 0.0995840760546643, "grad_norm": 2.501200872654823, "learning_rate": 1.9747276704961447e-05, "loss": 0.3885, "step": 1257 }, { "epoch": 0.09966329966329966, "grad_norm": 2.876388194153617, "learning_rate": 1.9746703141713444e-05, "loss": 0.4522, "step": 1258 }, { "epoch": 0.09974252327193504, "grad_norm": 2.489857036328931, "learning_rate": 1.974612893669194e-05, "loss": 0.4146, "step": 1259 }, { "epoch": 0.09982174688057041, "grad_norm": 3.3931939435239302, "learning_rate": 1.974555408993474e-05, "loss": 0.4765, "step": 1260 }, { "epoch": 0.09990097048920578, "grad_norm": 3.257095038887568, "learning_rate": 1.9744978601479693e-05, "loss": 0.4967, "step": 1261 }, { "epoch": 0.09998019409784116, "grad_norm": 2.7452639496150155, "learning_rate": 1.97444024713647e-05, "loss": 0.4455, "step": 1262 }, { "epoch": 0.10005941770647653, "grad_norm": 2.7224516279761803, "learning_rate": 1.9743825699627687e-05, "loss": 0.4726, "step": 1263 }, { "epoch": 0.10013864131511191, "grad_norm": 2.8141386805136372, "learning_rate": 1.974324828630664e-05, "loss": 0.5204, "step": 1264 }, { "epoch": 0.10021786492374728, "grad_norm": 2.8844951089038564, "learning_rate": 1.974267023143957e-05, "loss": 0.4311, "step": 1265 }, { "epoch": 0.10029708853238264, "grad_norm": 3.543302427401054, "learning_rate": 1.974209153506455e-05, "loss": 0.6045, "step": 1266 }, { "epoch": 0.10037631214101803, "grad_norm": 3.1033939236310184, "learning_rate": 1.9741512197219675e-05, "loss": 0.4325, "step": 1267 }, { "epoch": 0.1004555357496534, "grad_norm": 3.004199142849806, "learning_rate": 1.9740932217943095e-05, "loss": 0.4457, "step": 1268 }, { "epoch": 0.10053475935828877, "grad_norm": 2.7598379335663807, "learning_rate": 1.9740351597272998e-05, "loss": 0.4384, "step": 1269 }, { "epoch": 0.10061398296692414, "grad_norm": 3.3023441102956275, "learning_rate": 1.9739770335247616e-05, "loss": 0.5227, "step": 1270 }, { "epoch": 0.10069320657555951, "grad_norm": 2.8851054191049954, "learning_rate": 1.9739188431905223e-05, "loss": 0.5745, "step": 1271 }, { "epoch": 0.10077243018419489, "grad_norm": 2.6875763903633914, "learning_rate": 1.9738605887284134e-05, "loss": 0.4829, "step": 1272 }, { "epoch": 0.10085165379283026, "grad_norm": 2.7641889321808772, "learning_rate": 1.9738022701422705e-05, "loss": 0.5405, "step": 1273 }, { "epoch": 0.10093087740146564, "grad_norm": 2.4921301903870243, "learning_rate": 1.973743887435934e-05, "loss": 0.3679, "step": 1274 }, { "epoch": 0.10101010101010101, "grad_norm": 2.951184153446708, "learning_rate": 1.9736854406132476e-05, "loss": 0.3986, "step": 1275 }, { "epoch": 0.10108932461873639, "grad_norm": 3.0518140539278154, "learning_rate": 1.9736269296780603e-05, "loss": 0.4344, "step": 1276 }, { "epoch": 0.10116854822737176, "grad_norm": 2.8974286033719823, "learning_rate": 1.9735683546342243e-05, "loss": 0.4743, "step": 1277 }, { "epoch": 0.10124777183600712, "grad_norm": 3.306914067951883, "learning_rate": 1.9735097154855968e-05, "loss": 0.4694, "step": 1278 }, { "epoch": 0.1013269954446425, "grad_norm": 2.835824077886425, "learning_rate": 1.9734510122360383e-05, "loss": 0.6355, "step": 1279 }, { "epoch": 0.10140621905327787, "grad_norm": 2.9306969134881222, "learning_rate": 1.973392244889415e-05, "loss": 0.4893, "step": 1280 }, { "epoch": 0.10148544266191326, "grad_norm": 2.634497960105454, "learning_rate": 1.9733334134495963e-05, "loss": 0.4683, "step": 1281 }, { "epoch": 0.10156466627054862, "grad_norm": 2.436570027417308, "learning_rate": 1.9732745179204553e-05, "loss": 0.4734, "step": 1282 }, { "epoch": 0.10164388987918399, "grad_norm": 2.6218428136824006, "learning_rate": 1.9732155583058705e-05, "loss": 0.4703, "step": 1283 }, { "epoch": 0.10172311348781937, "grad_norm": 3.512924796677076, "learning_rate": 1.973156534609724e-05, "loss": 0.484, "step": 1284 }, { "epoch": 0.10180233709645474, "grad_norm": 2.6546275957915006, "learning_rate": 1.973097446835902e-05, "loss": 0.4453, "step": 1285 }, { "epoch": 0.10188156070509012, "grad_norm": 2.4609997246849944, "learning_rate": 1.9730382949882955e-05, "loss": 0.3135, "step": 1286 }, { "epoch": 0.10196078431372549, "grad_norm": 3.120760233598349, "learning_rate": 1.9729790790707995e-05, "loss": 0.4991, "step": 1287 }, { "epoch": 0.10204000792236087, "grad_norm": 3.866788013434518, "learning_rate": 1.9729197990873127e-05, "loss": 0.3917, "step": 1288 }, { "epoch": 0.10211923153099624, "grad_norm": 2.554138894135903, "learning_rate": 1.9728604550417385e-05, "loss": 0.4203, "step": 1289 }, { "epoch": 0.1021984551396316, "grad_norm": 2.7615173897506966, "learning_rate": 1.9728010469379844e-05, "loss": 0.3992, "step": 1290 }, { "epoch": 0.10227767874826699, "grad_norm": 3.2799484048313845, "learning_rate": 1.972741574779962e-05, "loss": 0.5083, "step": 1291 }, { "epoch": 0.10235690235690235, "grad_norm": 2.53808381770956, "learning_rate": 1.9726820385715877e-05, "loss": 0.3927, "step": 1292 }, { "epoch": 0.10243612596553774, "grad_norm": 3.3855451710657025, "learning_rate": 1.9726224383167815e-05, "loss": 0.4632, "step": 1293 }, { "epoch": 0.1025153495741731, "grad_norm": 2.5786065014178354, "learning_rate": 1.9725627740194673e-05, "loss": 0.396, "step": 1294 }, { "epoch": 0.10259457318280847, "grad_norm": 2.5489295481440664, "learning_rate": 1.9725030456835745e-05, "loss": 0.3758, "step": 1295 }, { "epoch": 0.10267379679144385, "grad_norm": 3.414059813835967, "learning_rate": 1.9724432533130355e-05, "loss": 0.5763, "step": 1296 }, { "epoch": 0.10275302040007922, "grad_norm": 3.114689554867816, "learning_rate": 1.972383396911787e-05, "loss": 0.5306, "step": 1297 }, { "epoch": 0.1028322440087146, "grad_norm": 3.4382850312425535, "learning_rate": 1.9723234764837708e-05, "loss": 0.4194, "step": 1298 }, { "epoch": 0.10291146761734997, "grad_norm": 2.756221532025514, "learning_rate": 1.9722634920329323e-05, "loss": 0.4148, "step": 1299 }, { "epoch": 0.10299069122598534, "grad_norm": 2.6681005484047198, "learning_rate": 1.9722034435632207e-05, "loss": 0.5743, "step": 1300 }, { "epoch": 0.10306991483462072, "grad_norm": 3.221298897992381, "learning_rate": 1.972143331078591e-05, "loss": 0.5955, "step": 1301 }, { "epoch": 0.10314913844325609, "grad_norm": 2.768257942623405, "learning_rate": 1.972083154583e-05, "loss": 0.4405, "step": 1302 }, { "epoch": 0.10322836205189147, "grad_norm": 2.7263801265215974, "learning_rate": 1.972022914080411e-05, "loss": 0.3928, "step": 1303 }, { "epoch": 0.10330758566052684, "grad_norm": 2.7670407144370515, "learning_rate": 1.9719626095747897e-05, "loss": 0.4937, "step": 1304 }, { "epoch": 0.10338680926916222, "grad_norm": 2.2903908378339004, "learning_rate": 1.971902241070108e-05, "loss": 0.4047, "step": 1305 }, { "epoch": 0.10346603287779758, "grad_norm": 3.3008979763050204, "learning_rate": 1.9718418085703397e-05, "loss": 0.5761, "step": 1306 }, { "epoch": 0.10354525648643295, "grad_norm": 3.2484549345584584, "learning_rate": 1.971781312079465e-05, "loss": 0.4477, "step": 1307 }, { "epoch": 0.10362448009506833, "grad_norm": 2.3328468828519147, "learning_rate": 1.9717207516014664e-05, "loss": 0.4287, "step": 1308 }, { "epoch": 0.1037037037037037, "grad_norm": 2.6045631411131014, "learning_rate": 1.9716601271403322e-05, "loss": 0.3855, "step": 1309 }, { "epoch": 0.10378292731233908, "grad_norm": 2.8979261277228665, "learning_rate": 1.9715994387000537e-05, "loss": 0.5173, "step": 1310 }, { "epoch": 0.10386215092097445, "grad_norm": 2.890078085384356, "learning_rate": 1.9715386862846272e-05, "loss": 0.5025, "step": 1311 }, { "epoch": 0.10394137452960982, "grad_norm": 3.6272945895360142, "learning_rate": 1.971477869898053e-05, "loss": 0.464, "step": 1312 }, { "epoch": 0.1040205981382452, "grad_norm": 2.944361639139367, "learning_rate": 1.9714169895443357e-05, "loss": 0.4089, "step": 1313 }, { "epoch": 0.10409982174688057, "grad_norm": 3.3656323936878616, "learning_rate": 1.971356045227484e-05, "loss": 0.5242, "step": 1314 }, { "epoch": 0.10417904535551595, "grad_norm": 3.4605313243018108, "learning_rate": 1.97129503695151e-05, "loss": 0.4686, "step": 1315 }, { "epoch": 0.10425826896415132, "grad_norm": 3.0248871789387266, "learning_rate": 1.9712339647204313e-05, "loss": 0.4329, "step": 1316 }, { "epoch": 0.1043374925727867, "grad_norm": 2.287099410442511, "learning_rate": 1.97117282853827e-05, "loss": 0.2722, "step": 1317 }, { "epoch": 0.10441671618142206, "grad_norm": 3.4545845525213936, "learning_rate": 1.9711116284090506e-05, "loss": 0.5384, "step": 1318 }, { "epoch": 0.10449593979005743, "grad_norm": 3.2054629517779527, "learning_rate": 1.971050364336803e-05, "loss": 0.5123, "step": 1319 }, { "epoch": 0.10457516339869281, "grad_norm": 3.140498249635979, "learning_rate": 1.9709890363255617e-05, "loss": 0.3961, "step": 1320 }, { "epoch": 0.10465438700732818, "grad_norm": 2.8254917972564324, "learning_rate": 1.9709276443793638e-05, "loss": 0.5797, "step": 1321 }, { "epoch": 0.10473361061596356, "grad_norm": 2.8340757617909627, "learning_rate": 1.970866188502253e-05, "loss": 0.6164, "step": 1322 }, { "epoch": 0.10481283422459893, "grad_norm": 2.947562598547281, "learning_rate": 1.970804668698275e-05, "loss": 0.3962, "step": 1323 }, { "epoch": 0.1048920578332343, "grad_norm": 2.6833697600873663, "learning_rate": 1.970743084971481e-05, "loss": 0.5205, "step": 1324 }, { "epoch": 0.10497128144186968, "grad_norm": 2.2107233355507048, "learning_rate": 1.970681437325925e-05, "loss": 0.3524, "step": 1325 }, { "epoch": 0.10505050505050505, "grad_norm": 2.8771146522101234, "learning_rate": 1.9706197257656675e-05, "loss": 0.5506, "step": 1326 }, { "epoch": 0.10512972865914043, "grad_norm": 2.956514573657737, "learning_rate": 1.9705579502947712e-05, "loss": 0.3825, "step": 1327 }, { "epoch": 0.1052089522677758, "grad_norm": 2.991217178321492, "learning_rate": 1.9704961109173042e-05, "loss": 0.5809, "step": 1328 }, { "epoch": 0.10528817587641116, "grad_norm": 2.7483954060197235, "learning_rate": 1.9704342076373378e-05, "loss": 0.4556, "step": 1329 }, { "epoch": 0.10536739948504655, "grad_norm": 2.739167393451204, "learning_rate": 1.9703722404589484e-05, "loss": 0.401, "step": 1330 }, { "epoch": 0.10544662309368191, "grad_norm": 2.563836171318864, "learning_rate": 1.970310209386216e-05, "loss": 0.4047, "step": 1331 }, { "epoch": 0.1055258467023173, "grad_norm": 2.9131518200373416, "learning_rate": 1.9702481144232253e-05, "loss": 0.4113, "step": 1332 }, { "epoch": 0.10560507031095266, "grad_norm": 2.6988588195233487, "learning_rate": 1.9701859555740647e-05, "loss": 0.3987, "step": 1333 }, { "epoch": 0.10568429391958804, "grad_norm": 2.771397946408388, "learning_rate": 1.9701237328428272e-05, "loss": 0.4994, "step": 1334 }, { "epoch": 0.10576351752822341, "grad_norm": 3.0871542899897806, "learning_rate": 1.9700614462336096e-05, "loss": 0.5022, "step": 1335 }, { "epoch": 0.10584274113685878, "grad_norm": 2.934758055292221, "learning_rate": 1.9699990957505136e-05, "loss": 0.4916, "step": 1336 }, { "epoch": 0.10592196474549416, "grad_norm": 2.690723191029154, "learning_rate": 1.9699366813976443e-05, "loss": 0.4011, "step": 1337 }, { "epoch": 0.10600118835412953, "grad_norm": 2.7481322324087953, "learning_rate": 1.9698742031791118e-05, "loss": 0.5052, "step": 1338 }, { "epoch": 0.10608041196276491, "grad_norm": 2.933957788297982, "learning_rate": 1.96981166109903e-05, "loss": 0.3777, "step": 1339 }, { "epoch": 0.10615963557140028, "grad_norm": 2.6683514820307073, "learning_rate": 1.9697490551615162e-05, "loss": 0.4573, "step": 1340 }, { "epoch": 0.10623885918003564, "grad_norm": 2.9824026178728955, "learning_rate": 1.9696863853706937e-05, "loss": 0.3636, "step": 1341 }, { "epoch": 0.10631808278867103, "grad_norm": 2.7216684954457997, "learning_rate": 1.969623651730688e-05, "loss": 0.3133, "step": 1342 }, { "epoch": 0.1063973063973064, "grad_norm": 4.250271251525364, "learning_rate": 1.969560854245631e-05, "loss": 0.5336, "step": 1343 }, { "epoch": 0.10647653000594177, "grad_norm": 2.902405821987489, "learning_rate": 1.9694979929196566e-05, "loss": 0.403, "step": 1344 }, { "epoch": 0.10655575361457714, "grad_norm": 2.574935133721008, "learning_rate": 1.9694350677569043e-05, "loss": 0.3917, "step": 1345 }, { "epoch": 0.10663497722321252, "grad_norm": 3.1614680037023533, "learning_rate": 1.9693720787615174e-05, "loss": 0.4921, "step": 1346 }, { "epoch": 0.10671420083184789, "grad_norm": 3.552061231809605, "learning_rate": 1.9693090259376436e-05, "loss": 0.3219, "step": 1347 }, { "epoch": 0.10679342444048326, "grad_norm": 2.5293648415989236, "learning_rate": 1.9692459092894343e-05, "loss": 0.467, "step": 1348 }, { "epoch": 0.10687264804911864, "grad_norm": 2.895155207518888, "learning_rate": 1.969182728821046e-05, "loss": 0.5326, "step": 1349 }, { "epoch": 0.10695187165775401, "grad_norm": 2.8137066537947257, "learning_rate": 1.969119484536638e-05, "loss": 0.3978, "step": 1350 }, { "epoch": 0.10703109526638939, "grad_norm": 3.159236741602848, "learning_rate": 1.969056176440375e-05, "loss": 0.3872, "step": 1351 }, { "epoch": 0.10711031887502476, "grad_norm": 2.854905581531992, "learning_rate": 1.9689928045364258e-05, "loss": 0.3368, "step": 1352 }, { "epoch": 0.10718954248366012, "grad_norm": 2.7140487020464974, "learning_rate": 1.9689293688289627e-05, "loss": 0.4743, "step": 1353 }, { "epoch": 0.1072687660922955, "grad_norm": 2.934282122513982, "learning_rate": 1.968865869322163e-05, "loss": 0.3888, "step": 1354 }, { "epoch": 0.10734798970093087, "grad_norm": 3.0119871931050435, "learning_rate": 1.968802306020208e-05, "loss": 0.51, "step": 1355 }, { "epoch": 0.10742721330956626, "grad_norm": 2.8227366406959433, "learning_rate": 1.968738678927282e-05, "loss": 0.5394, "step": 1356 }, { "epoch": 0.10750643691820162, "grad_norm": 3.6029885179202052, "learning_rate": 1.9686749880475756e-05, "loss": 0.4084, "step": 1357 }, { "epoch": 0.107585660526837, "grad_norm": 3.034326363589282, "learning_rate": 1.9686112333852826e-05, "loss": 0.4646, "step": 1358 }, { "epoch": 0.10766488413547237, "grad_norm": 2.664156739194555, "learning_rate": 1.9685474149446e-05, "loss": 0.5023, "step": 1359 }, { "epoch": 0.10774410774410774, "grad_norm": 2.9981760489161426, "learning_rate": 1.9684835327297306e-05, "loss": 0.4371, "step": 1360 }, { "epoch": 0.10782333135274312, "grad_norm": 2.8022547037558785, "learning_rate": 1.9684195867448806e-05, "loss": 0.4122, "step": 1361 }, { "epoch": 0.10790255496137849, "grad_norm": 2.9128267225258946, "learning_rate": 1.9683555769942608e-05, "loss": 0.4876, "step": 1362 }, { "epoch": 0.10798177857001387, "grad_norm": 2.7603908869200477, "learning_rate": 1.968291503482086e-05, "loss": 0.3496, "step": 1363 }, { "epoch": 0.10806100217864924, "grad_norm": 2.980576722482925, "learning_rate": 1.968227366212574e-05, "loss": 0.4767, "step": 1364 }, { "epoch": 0.1081402257872846, "grad_norm": 2.6897521455535105, "learning_rate": 1.968163165189949e-05, "loss": 0.4636, "step": 1365 }, { "epoch": 0.10821944939591999, "grad_norm": 3.186484192734618, "learning_rate": 1.9680989004184383e-05, "loss": 0.3885, "step": 1366 }, { "epoch": 0.10829867300455535, "grad_norm": 2.8663366302588806, "learning_rate": 1.968034571902273e-05, "loss": 0.5326, "step": 1367 }, { "epoch": 0.10837789661319074, "grad_norm": 3.067361813403921, "learning_rate": 1.967970179645689e-05, "loss": 0.2948, "step": 1368 }, { "epoch": 0.1084571202218261, "grad_norm": 3.101527575569622, "learning_rate": 1.9679057236529266e-05, "loss": 0.4802, "step": 1369 }, { "epoch": 0.10853634383046147, "grad_norm": 3.1289935421466004, "learning_rate": 1.9678412039282292e-05, "loss": 0.587, "step": 1370 }, { "epoch": 0.10861556743909685, "grad_norm": 2.539607321168991, "learning_rate": 1.967776620475846e-05, "loss": 0.3966, "step": 1371 }, { "epoch": 0.10869479104773222, "grad_norm": 3.0787692872874817, "learning_rate": 1.9677119733000283e-05, "loss": 0.4989, "step": 1372 }, { "epoch": 0.1087740146563676, "grad_norm": 3.1317016650621956, "learning_rate": 1.967647262405034e-05, "loss": 0.4847, "step": 1373 }, { "epoch": 0.10885323826500297, "grad_norm": 2.705818741295503, "learning_rate": 1.967582487795123e-05, "loss": 0.3081, "step": 1374 }, { "epoch": 0.10893246187363835, "grad_norm": 2.670554827304836, "learning_rate": 1.967517649474561e-05, "loss": 0.3429, "step": 1375 }, { "epoch": 0.10901168548227372, "grad_norm": 3.007658030824364, "learning_rate": 1.9674527474476175e-05, "loss": 0.4141, "step": 1376 }, { "epoch": 0.10909090909090909, "grad_norm": 2.548967813017338, "learning_rate": 1.9673877817185656e-05, "loss": 0.3844, "step": 1377 }, { "epoch": 0.10917013269954447, "grad_norm": 2.9694557965998007, "learning_rate": 1.9673227522916827e-05, "loss": 0.4681, "step": 1378 }, { "epoch": 0.10924935630817983, "grad_norm": 2.5642802366939788, "learning_rate": 1.9672576591712517e-05, "loss": 0.4044, "step": 1379 }, { "epoch": 0.10932857991681522, "grad_norm": 2.508676900501366, "learning_rate": 1.9671925023615572e-05, "loss": 0.3884, "step": 1380 }, { "epoch": 0.10940780352545058, "grad_norm": 2.3718081104156945, "learning_rate": 1.9671272818668906e-05, "loss": 0.4117, "step": 1381 }, { "epoch": 0.10948702713408595, "grad_norm": 2.849798967159578, "learning_rate": 1.967061997691546e-05, "loss": 0.3316, "step": 1382 }, { "epoch": 0.10956625074272133, "grad_norm": 3.4316362850203928, "learning_rate": 1.966996649839822e-05, "loss": 0.4964, "step": 1383 }, { "epoch": 0.1096454743513567, "grad_norm": 2.7490391210747593, "learning_rate": 1.9669312383160217e-05, "loss": 0.468, "step": 1384 }, { "epoch": 0.10972469795999208, "grad_norm": 2.9271533233930738, "learning_rate": 1.966865763124452e-05, "loss": 0.5332, "step": 1385 }, { "epoch": 0.10980392156862745, "grad_norm": 2.8859706306788584, "learning_rate": 1.966800224269424e-05, "loss": 0.4232, "step": 1386 }, { "epoch": 0.10988314517726283, "grad_norm": 2.2786571318011406, "learning_rate": 1.9667346217552528e-05, "loss": 0.4454, "step": 1387 }, { "epoch": 0.1099623687858982, "grad_norm": 3.1330257578407377, "learning_rate": 1.9666689555862586e-05, "loss": 0.4288, "step": 1388 }, { "epoch": 0.11004159239453357, "grad_norm": 2.2193116635216708, "learning_rate": 1.966603225766765e-05, "loss": 0.3197, "step": 1389 }, { "epoch": 0.11012081600316895, "grad_norm": 2.503762279641449, "learning_rate": 1.9665374323011002e-05, "loss": 0.4297, "step": 1390 }, { "epoch": 0.11020003961180432, "grad_norm": 2.882519747928288, "learning_rate": 1.9664715751935958e-05, "loss": 0.3071, "step": 1391 }, { "epoch": 0.1102792632204397, "grad_norm": 2.7560061587342037, "learning_rate": 1.9664056544485887e-05, "loss": 0.4895, "step": 1392 }, { "epoch": 0.11035848682907506, "grad_norm": 3.029556398712317, "learning_rate": 1.9663396700704195e-05, "loss": 0.4292, "step": 1393 }, { "epoch": 0.11043771043771043, "grad_norm": 2.5938390277030217, "learning_rate": 1.9662736220634325e-05, "loss": 0.395, "step": 1394 }, { "epoch": 0.11051693404634581, "grad_norm": 2.8757926968452336, "learning_rate": 1.966207510431977e-05, "loss": 0.3962, "step": 1395 }, { "epoch": 0.11059615765498118, "grad_norm": 2.9582498728870465, "learning_rate": 1.966141335180406e-05, "loss": 0.3903, "step": 1396 }, { "epoch": 0.11067538126361656, "grad_norm": 2.559628817192077, "learning_rate": 1.966075096313077e-05, "loss": 0.4029, "step": 1397 }, { "epoch": 0.11075460487225193, "grad_norm": 2.8927067368568204, "learning_rate": 1.966008793834351e-05, "loss": 0.5218, "step": 1398 }, { "epoch": 0.1108338284808873, "grad_norm": 2.9418809327128352, "learning_rate": 1.9659424277485943e-05, "loss": 0.4159, "step": 1399 }, { "epoch": 0.11091305208952268, "grad_norm": 2.4917968755479225, "learning_rate": 1.9658759980601766e-05, "loss": 0.3447, "step": 1400 }, { "epoch": 0.11099227569815805, "grad_norm": 2.531783157451545, "learning_rate": 1.9658095047734718e-05, "loss": 0.4312, "step": 1401 }, { "epoch": 0.11107149930679343, "grad_norm": 2.253388942254369, "learning_rate": 1.965742947892858e-05, "loss": 0.2864, "step": 1402 }, { "epoch": 0.1111507229154288, "grad_norm": 3.0106414612026287, "learning_rate": 1.9656763274227188e-05, "loss": 0.3669, "step": 1403 }, { "epoch": 0.11122994652406418, "grad_norm": 2.7566736990307357, "learning_rate": 1.9656096433674393e-05, "loss": 0.3761, "step": 1404 }, { "epoch": 0.11130917013269954, "grad_norm": 2.815809055562609, "learning_rate": 1.965542895731411e-05, "loss": 0.4547, "step": 1405 }, { "epoch": 0.11138839374133491, "grad_norm": 2.900163182335524, "learning_rate": 1.965476084519029e-05, "loss": 0.434, "step": 1406 }, { "epoch": 0.1114676173499703, "grad_norm": 2.672358056016173, "learning_rate": 1.9654092097346925e-05, "loss": 0.475, "step": 1407 }, { "epoch": 0.11154684095860566, "grad_norm": 2.764266974356445, "learning_rate": 1.965342271382805e-05, "loss": 0.4598, "step": 1408 }, { "epoch": 0.11162606456724104, "grad_norm": 2.798055913816958, "learning_rate": 1.9652752694677735e-05, "loss": 0.3977, "step": 1409 }, { "epoch": 0.11170528817587641, "grad_norm": 2.753817231232736, "learning_rate": 1.9652082039940102e-05, "loss": 0.3721, "step": 1410 }, { "epoch": 0.11178451178451178, "grad_norm": 2.8710377716268978, "learning_rate": 1.965141074965931e-05, "loss": 0.542, "step": 1411 }, { "epoch": 0.11186373539314716, "grad_norm": 2.8060099901292803, "learning_rate": 1.965073882387956e-05, "loss": 0.4729, "step": 1412 }, { "epoch": 0.11194295900178253, "grad_norm": 2.628235033010763, "learning_rate": 1.9650066262645097e-05, "loss": 0.3152, "step": 1413 }, { "epoch": 0.11202218261041791, "grad_norm": 2.4459879445174417, "learning_rate": 1.96493930660002e-05, "loss": 0.4459, "step": 1414 }, { "epoch": 0.11210140621905328, "grad_norm": 2.963065102384047, "learning_rate": 1.9648719233989202e-05, "loss": 0.4337, "step": 1415 }, { "epoch": 0.11218062982768866, "grad_norm": 3.2144249945801198, "learning_rate": 1.9648044766656466e-05, "loss": 0.4277, "step": 1416 }, { "epoch": 0.11225985343632403, "grad_norm": 2.5725117331658365, "learning_rate": 1.9647369664046407e-05, "loss": 0.3982, "step": 1417 }, { "epoch": 0.11233907704495939, "grad_norm": 2.9849598213319606, "learning_rate": 1.9646693926203477e-05, "loss": 0.5583, "step": 1418 }, { "epoch": 0.11241830065359477, "grad_norm": 2.9912189161815195, "learning_rate": 1.964601755317217e-05, "loss": 0.3894, "step": 1419 }, { "epoch": 0.11249752426223014, "grad_norm": 3.536987419440896, "learning_rate": 1.9645340544997017e-05, "loss": 0.4096, "step": 1420 }, { "epoch": 0.11257674787086552, "grad_norm": 2.244945625177073, "learning_rate": 1.9644662901722603e-05, "loss": 0.4485, "step": 1421 }, { "epoch": 0.11265597147950089, "grad_norm": 2.80778540466449, "learning_rate": 1.9643984623393542e-05, "loss": 0.3135, "step": 1422 }, { "epoch": 0.11273519508813626, "grad_norm": 2.9179584183705236, "learning_rate": 1.96433057100545e-05, "loss": 0.4784, "step": 1423 }, { "epoch": 0.11281441869677164, "grad_norm": 3.0869873158919185, "learning_rate": 1.9642626161750176e-05, "loss": 0.4835, "step": 1424 }, { "epoch": 0.11289364230540701, "grad_norm": 3.3458155387685053, "learning_rate": 1.9641945978525318e-05, "loss": 0.3864, "step": 1425 }, { "epoch": 0.11297286591404239, "grad_norm": 2.888818373390241, "learning_rate": 1.9641265160424705e-05, "loss": 0.4938, "step": 1426 }, { "epoch": 0.11305208952267776, "grad_norm": 3.1532745532807893, "learning_rate": 1.9640583707493176e-05, "loss": 0.401, "step": 1427 }, { "epoch": 0.11313131313131314, "grad_norm": 2.3329892328999984, "learning_rate": 1.96399016197756e-05, "loss": 0.3788, "step": 1428 }, { "epoch": 0.1132105367399485, "grad_norm": 3.4879416768328237, "learning_rate": 1.9639218897316885e-05, "loss": 0.5169, "step": 1429 }, { "epoch": 0.11328976034858387, "grad_norm": 2.799181262316917, "learning_rate": 1.9638535540161988e-05, "loss": 0.4039, "step": 1430 }, { "epoch": 0.11336898395721925, "grad_norm": 2.6342776358790414, "learning_rate": 1.96378515483559e-05, "loss": 0.4094, "step": 1431 }, { "epoch": 0.11344820756585462, "grad_norm": 3.1289878899691264, "learning_rate": 1.9637166921943663e-05, "loss": 0.4729, "step": 1432 }, { "epoch": 0.11352743117449, "grad_norm": 2.573218788917751, "learning_rate": 1.963648166097036e-05, "loss": 0.3954, "step": 1433 }, { "epoch": 0.11360665478312537, "grad_norm": 2.7620209010719012, "learning_rate": 1.9635795765481102e-05, "loss": 0.4299, "step": 1434 }, { "epoch": 0.11368587839176074, "grad_norm": 2.626312333477079, "learning_rate": 1.9635109235521057e-05, "loss": 0.5158, "step": 1435 }, { "epoch": 0.11376510200039612, "grad_norm": 3.2288689351127853, "learning_rate": 1.963442207113543e-05, "loss": 0.5113, "step": 1436 }, { "epoch": 0.11384432560903149, "grad_norm": 2.47591228365586, "learning_rate": 1.9633734272369473e-05, "loss": 0.5116, "step": 1437 }, { "epoch": 0.11392354921766687, "grad_norm": 2.8861142580119985, "learning_rate": 1.9633045839268464e-05, "loss": 0.415, "step": 1438 }, { "epoch": 0.11400277282630224, "grad_norm": 2.420946925143882, "learning_rate": 1.9632356771877735e-05, "loss": 0.4016, "step": 1439 }, { "epoch": 0.1140819964349376, "grad_norm": 2.4014167852764605, "learning_rate": 1.9631667070242667e-05, "loss": 0.402, "step": 1440 }, { "epoch": 0.11416122004357299, "grad_norm": 2.9566102577274544, "learning_rate": 1.963097673440866e-05, "loss": 0.4598, "step": 1441 }, { "epoch": 0.11424044365220835, "grad_norm": 2.7312277270750154, "learning_rate": 1.9630285764421183e-05, "loss": 0.4335, "step": 1442 }, { "epoch": 0.11431966726084374, "grad_norm": 2.3040746735708577, "learning_rate": 1.9629594160325725e-05, "loss": 0.3639, "step": 1443 }, { "epoch": 0.1143988908694791, "grad_norm": 2.620302294334293, "learning_rate": 1.9628901922167823e-05, "loss": 0.4921, "step": 1444 }, { "epoch": 0.11447811447811448, "grad_norm": 2.911555999784591, "learning_rate": 1.9628209049993064e-05, "loss": 0.4379, "step": 1445 }, { "epoch": 0.11455733808674985, "grad_norm": 2.405405100917414, "learning_rate": 1.9627515543847068e-05, "loss": 0.4453, "step": 1446 }, { "epoch": 0.11463656169538522, "grad_norm": 2.9958855340989547, "learning_rate": 1.9626821403775494e-05, "loss": 0.3848, "step": 1447 }, { "epoch": 0.1147157853040206, "grad_norm": 2.8002215684340372, "learning_rate": 1.9626126629824056e-05, "loss": 0.5275, "step": 1448 }, { "epoch": 0.11479500891265597, "grad_norm": 2.4953018630760733, "learning_rate": 1.9625431222038494e-05, "loss": 0.4933, "step": 1449 }, { "epoch": 0.11487423252129135, "grad_norm": 2.6107891181250578, "learning_rate": 1.9624735180464602e-05, "loss": 0.5438, "step": 1450 }, { "epoch": 0.11495345612992672, "grad_norm": 2.687200084638096, "learning_rate": 1.962403850514821e-05, "loss": 0.5916, "step": 1451 }, { "epoch": 0.11503267973856209, "grad_norm": 2.4708752560975555, "learning_rate": 1.962334119613519e-05, "loss": 0.3683, "step": 1452 }, { "epoch": 0.11511190334719747, "grad_norm": 2.623431885926357, "learning_rate": 1.9622643253471457e-05, "loss": 0.3992, "step": 1453 }, { "epoch": 0.11519112695583283, "grad_norm": 2.4053221775577085, "learning_rate": 1.9621944677202966e-05, "loss": 0.4795, "step": 1454 }, { "epoch": 0.11527035056446822, "grad_norm": 2.173221512238794, "learning_rate": 1.9621245467375715e-05, "loss": 0.3732, "step": 1455 }, { "epoch": 0.11534957417310358, "grad_norm": 3.1533565901733893, "learning_rate": 1.9620545624035748e-05, "loss": 0.6043, "step": 1456 }, { "epoch": 0.11542879778173896, "grad_norm": 2.8528162712667067, "learning_rate": 1.961984514722914e-05, "loss": 0.3557, "step": 1457 }, { "epoch": 0.11550802139037433, "grad_norm": 2.701074418393308, "learning_rate": 1.9619144037002015e-05, "loss": 0.3548, "step": 1458 }, { "epoch": 0.1155872449990097, "grad_norm": 2.7501869065389246, "learning_rate": 1.9618442293400544e-05, "loss": 0.4194, "step": 1459 }, { "epoch": 0.11566646860764508, "grad_norm": 2.410240153815325, "learning_rate": 1.9617739916470926e-05, "loss": 0.4575, "step": 1460 }, { "epoch": 0.11574569221628045, "grad_norm": 3.6046265194177494, "learning_rate": 1.9617036906259416e-05, "loss": 0.5225, "step": 1461 }, { "epoch": 0.11582491582491583, "grad_norm": 2.550163598794831, "learning_rate": 1.9616333262812298e-05, "loss": 0.4023, "step": 1462 }, { "epoch": 0.1159041394335512, "grad_norm": 2.5970453273011374, "learning_rate": 1.9615628986175902e-05, "loss": 0.3944, "step": 1463 }, { "epoch": 0.11598336304218657, "grad_norm": 2.808728434483951, "learning_rate": 1.9614924076396605e-05, "loss": 0.3999, "step": 1464 }, { "epoch": 0.11606258665082195, "grad_norm": 2.173829958604981, "learning_rate": 1.9614218533520827e-05, "loss": 0.3606, "step": 1465 }, { "epoch": 0.11614181025945731, "grad_norm": 2.624440721747477, "learning_rate": 1.9613512357595014e-05, "loss": 0.336, "step": 1466 }, { "epoch": 0.1162210338680927, "grad_norm": 2.667638287547519, "learning_rate": 1.9612805548665673e-05, "loss": 0.4324, "step": 1467 }, { "epoch": 0.11630025747672806, "grad_norm": 2.576089489561868, "learning_rate": 1.961209810677934e-05, "loss": 0.4475, "step": 1468 }, { "epoch": 0.11637948108536343, "grad_norm": 2.370723185882638, "learning_rate": 1.9611390031982595e-05, "loss": 0.4733, "step": 1469 }, { "epoch": 0.11645870469399881, "grad_norm": 2.4566831658223487, "learning_rate": 1.9610681324322068e-05, "loss": 0.3275, "step": 1470 }, { "epoch": 0.11653792830263418, "grad_norm": 2.656934667342211, "learning_rate": 1.9609971983844412e-05, "loss": 0.4251, "step": 1471 }, { "epoch": 0.11661715191126956, "grad_norm": 2.3176020192483584, "learning_rate": 1.9609262010596346e-05, "loss": 0.3214, "step": 1472 }, { "epoch": 0.11669637551990493, "grad_norm": 3.0402397686574827, "learning_rate": 1.9608551404624613e-05, "loss": 0.442, "step": 1473 }, { "epoch": 0.11677559912854031, "grad_norm": 2.7463527142119166, "learning_rate": 1.9607840165976003e-05, "loss": 0.4034, "step": 1474 }, { "epoch": 0.11685482273717568, "grad_norm": 2.7781366558517346, "learning_rate": 1.960712829469735e-05, "loss": 0.4868, "step": 1475 }, { "epoch": 0.11693404634581105, "grad_norm": 2.4108285495916157, "learning_rate": 1.9606415790835523e-05, "loss": 0.4523, "step": 1476 }, { "epoch": 0.11701326995444643, "grad_norm": 2.5225314182628664, "learning_rate": 1.9605702654437438e-05, "loss": 0.3294, "step": 1477 }, { "epoch": 0.1170924935630818, "grad_norm": 3.102802518620715, "learning_rate": 1.9604988885550056e-05, "loss": 0.3664, "step": 1478 }, { "epoch": 0.11717171717171718, "grad_norm": 2.6643491304090507, "learning_rate": 1.960427448422037e-05, "loss": 0.4698, "step": 1479 }, { "epoch": 0.11725094078035254, "grad_norm": 2.440914712785853, "learning_rate": 1.9603559450495423e-05, "loss": 0.3997, "step": 1480 }, { "epoch": 0.11733016438898791, "grad_norm": 2.1709628354591963, "learning_rate": 1.9602843784422297e-05, "loss": 0.3029, "step": 1481 }, { "epoch": 0.1174093879976233, "grad_norm": 2.938762120514386, "learning_rate": 1.9602127486048112e-05, "loss": 0.366, "step": 1482 }, { "epoch": 0.11748861160625866, "grad_norm": 2.7088157343684562, "learning_rate": 1.9601410555420035e-05, "loss": 0.3394, "step": 1483 }, { "epoch": 0.11756783521489404, "grad_norm": 2.7259827114603365, "learning_rate": 1.9600692992585275e-05, "loss": 0.3933, "step": 1484 }, { "epoch": 0.11764705882352941, "grad_norm": 2.7765140482764608, "learning_rate": 1.959997479759107e-05, "loss": 0.3928, "step": 1485 }, { "epoch": 0.11772628243216479, "grad_norm": 2.9128258085526997, "learning_rate": 1.959925597048472e-05, "loss": 0.4938, "step": 1486 }, { "epoch": 0.11780550604080016, "grad_norm": 2.4718269763967533, "learning_rate": 1.9598536511313553e-05, "loss": 0.4091, "step": 1487 }, { "epoch": 0.11788472964943553, "grad_norm": 2.6090546215990957, "learning_rate": 1.9597816420124945e-05, "loss": 0.5564, "step": 1488 }, { "epoch": 0.11796395325807091, "grad_norm": 2.819158025579447, "learning_rate": 1.95970956969663e-05, "loss": 0.4879, "step": 1489 }, { "epoch": 0.11804317686670628, "grad_norm": 2.4412115292987266, "learning_rate": 1.9596374341885093e-05, "loss": 0.4954, "step": 1490 }, { "epoch": 0.11812240047534166, "grad_norm": 2.412572553373767, "learning_rate": 1.95956523549288e-05, "loss": 0.4117, "step": 1491 }, { "epoch": 0.11820162408397702, "grad_norm": 2.686183097520435, "learning_rate": 1.9594929736144978e-05, "loss": 0.393, "step": 1492 }, { "epoch": 0.11828084769261239, "grad_norm": 2.8484783873782673, "learning_rate": 1.9594206485581196e-05, "loss": 0.4983, "step": 1493 }, { "epoch": 0.11836007130124777, "grad_norm": 2.156358279266858, "learning_rate": 1.959348260328508e-05, "loss": 0.3206, "step": 1494 }, { "epoch": 0.11843929490988314, "grad_norm": 3.0164801487012616, "learning_rate": 1.95927580893043e-05, "loss": 0.543, "step": 1495 }, { "epoch": 0.11851851851851852, "grad_norm": 3.0554858763887154, "learning_rate": 1.9592032943686554e-05, "loss": 0.4883, "step": 1496 }, { "epoch": 0.11859774212715389, "grad_norm": 2.8308645228412845, "learning_rate": 1.9591307166479595e-05, "loss": 0.4305, "step": 1497 }, { "epoch": 0.11867696573578927, "grad_norm": 2.9918225036819264, "learning_rate": 1.959058075773121e-05, "loss": 0.4614, "step": 1498 }, { "epoch": 0.11875618934442464, "grad_norm": 2.5533857011462917, "learning_rate": 1.9589853717489228e-05, "loss": 0.4102, "step": 1499 }, { "epoch": 0.11883541295306001, "grad_norm": 2.4311466966052957, "learning_rate": 1.958912604580152e-05, "loss": 0.3459, "step": 1500 }, { "epoch": 0.11891463656169539, "grad_norm": 2.5739636174761595, "learning_rate": 1.9588397742716004e-05, "loss": 0.4279, "step": 1501 }, { "epoch": 0.11899386017033076, "grad_norm": 2.9141249303258627, "learning_rate": 1.9587668808280632e-05, "loss": 0.5002, "step": 1502 }, { "epoch": 0.11907308377896614, "grad_norm": 2.2176890890593217, "learning_rate": 1.9586939242543402e-05, "loss": 0.4051, "step": 1503 }, { "epoch": 0.1191523073876015, "grad_norm": 2.6294707532205988, "learning_rate": 1.9586209045552355e-05, "loss": 0.3959, "step": 1504 }, { "epoch": 0.11923153099623687, "grad_norm": 2.897063341641378, "learning_rate": 1.9585478217355563e-05, "loss": 0.4248, "step": 1505 }, { "epoch": 0.11931075460487225, "grad_norm": 2.704944763166334, "learning_rate": 1.9584746758001156e-05, "loss": 0.5395, "step": 1506 }, { "epoch": 0.11938997821350762, "grad_norm": 2.5999050620912527, "learning_rate": 1.9584014667537293e-05, "loss": 0.4858, "step": 1507 }, { "epoch": 0.119469201822143, "grad_norm": 2.8910020273790193, "learning_rate": 1.9583281946012183e-05, "loss": 0.5208, "step": 1508 }, { "epoch": 0.11954842543077837, "grad_norm": 2.632441685311661, "learning_rate": 1.9582548593474064e-05, "loss": 0.5518, "step": 1509 }, { "epoch": 0.11962764903941374, "grad_norm": 2.436644670108184, "learning_rate": 1.9581814609971232e-05, "loss": 0.3156, "step": 1510 }, { "epoch": 0.11970687264804912, "grad_norm": 2.5432163617969645, "learning_rate": 1.958107999555201e-05, "loss": 0.3777, "step": 1511 }, { "epoch": 0.11978609625668449, "grad_norm": 2.8925782162268394, "learning_rate": 1.958034475026477e-05, "loss": 0.4169, "step": 1512 }, { "epoch": 0.11986531986531987, "grad_norm": 2.6287630641747377, "learning_rate": 1.957960887415793e-05, "loss": 0.3504, "step": 1513 }, { "epoch": 0.11994454347395524, "grad_norm": 2.454226161394671, "learning_rate": 1.9578872367279937e-05, "loss": 0.4155, "step": 1514 }, { "epoch": 0.12002376708259062, "grad_norm": 3.0506713653642277, "learning_rate": 1.957813522967929e-05, "loss": 0.4063, "step": 1515 }, { "epoch": 0.12010299069122599, "grad_norm": 2.5683044099764687, "learning_rate": 1.9577397461404527e-05, "loss": 0.277, "step": 1516 }, { "epoch": 0.12018221429986135, "grad_norm": 2.972139986830908, "learning_rate": 1.957665906250422e-05, "loss": 0.5399, "step": 1517 }, { "epoch": 0.12026143790849673, "grad_norm": 2.110434332360107, "learning_rate": 1.9575920033027002e-05, "loss": 0.3962, "step": 1518 }, { "epoch": 0.1203406615171321, "grad_norm": 2.5344095372311695, "learning_rate": 1.9575180373021516e-05, "loss": 0.4118, "step": 1519 }, { "epoch": 0.12041988512576748, "grad_norm": 2.4862295925655236, "learning_rate": 1.9574440082536482e-05, "loss": 0.3404, "step": 1520 }, { "epoch": 0.12049910873440285, "grad_norm": 2.783541127818214, "learning_rate": 1.9573699161620635e-05, "loss": 0.489, "step": 1521 }, { "epoch": 0.12057833234303822, "grad_norm": 2.5850462976897415, "learning_rate": 1.9572957610322766e-05, "loss": 0.2879, "step": 1522 }, { "epoch": 0.1206575559516736, "grad_norm": 2.824212076990093, "learning_rate": 1.95722154286917e-05, "loss": 0.383, "step": 1523 }, { "epoch": 0.12073677956030897, "grad_norm": 2.3777158598526493, "learning_rate": 1.9571472616776304e-05, "loss": 0.4038, "step": 1524 }, { "epoch": 0.12081600316894435, "grad_norm": 3.14402479540086, "learning_rate": 1.9570729174625493e-05, "loss": 0.4047, "step": 1525 }, { "epoch": 0.12089522677757972, "grad_norm": 2.613857130982444, "learning_rate": 1.956998510228822e-05, "loss": 0.3779, "step": 1526 }, { "epoch": 0.1209744503862151, "grad_norm": 2.774024440056841, "learning_rate": 1.956924039981347e-05, "loss": 0.4627, "step": 1527 }, { "epoch": 0.12105367399485047, "grad_norm": 3.0863234715010406, "learning_rate": 1.956849506725029e-05, "loss": 0.4621, "step": 1528 }, { "epoch": 0.12113289760348583, "grad_norm": 2.6450002690119665, "learning_rate": 1.9567749104647746e-05, "loss": 0.4696, "step": 1529 }, { "epoch": 0.12121212121212122, "grad_norm": 2.7470851536957377, "learning_rate": 1.9567002512054964e-05, "loss": 0.3729, "step": 1530 }, { "epoch": 0.12129134482075658, "grad_norm": 2.684730202510088, "learning_rate": 1.9566255289521096e-05, "loss": 0.4945, "step": 1531 }, { "epoch": 0.12137056842939196, "grad_norm": 2.2363869955224485, "learning_rate": 1.956550743709535e-05, "loss": 0.2615, "step": 1532 }, { "epoch": 0.12144979203802733, "grad_norm": 2.9806460645100357, "learning_rate": 1.9564758954826964e-05, "loss": 0.3809, "step": 1533 }, { "epoch": 0.1215290156466627, "grad_norm": 2.5849828693392407, "learning_rate": 1.9564009842765225e-05, "loss": 0.3779, "step": 1534 }, { "epoch": 0.12160823925529808, "grad_norm": 2.643059762973339, "learning_rate": 1.956326010095946e-05, "loss": 0.5008, "step": 1535 }, { "epoch": 0.12168746286393345, "grad_norm": 2.6165263974282422, "learning_rate": 1.9562509729459024e-05, "loss": 0.3935, "step": 1536 }, { "epoch": 0.12176668647256883, "grad_norm": 2.753884796295939, "learning_rate": 1.956175872831334e-05, "loss": 0.5977, "step": 1537 }, { "epoch": 0.1218459100812042, "grad_norm": 2.5690423000474807, "learning_rate": 1.9561007097571853e-05, "loss": 0.4264, "step": 1538 }, { "epoch": 0.12192513368983957, "grad_norm": 2.6789834265427874, "learning_rate": 1.9560254837284053e-05, "loss": 0.3393, "step": 1539 }, { "epoch": 0.12200435729847495, "grad_norm": 2.9376042031960847, "learning_rate": 1.955950194749947e-05, "loss": 0.4775, "step": 1540 }, { "epoch": 0.12208358090711031, "grad_norm": 2.815447831132903, "learning_rate": 1.9558748428267682e-05, "loss": 0.4363, "step": 1541 }, { "epoch": 0.1221628045157457, "grad_norm": 2.5318662855694347, "learning_rate": 1.9557994279638307e-05, "loss": 0.3841, "step": 1542 }, { "epoch": 0.12224202812438106, "grad_norm": 2.4755649117929472, "learning_rate": 1.9557239501660995e-05, "loss": 0.4157, "step": 1543 }, { "epoch": 0.12232125173301645, "grad_norm": 2.7443704456283284, "learning_rate": 1.955648409438545e-05, "loss": 0.4058, "step": 1544 }, { "epoch": 0.12240047534165181, "grad_norm": 2.8926291368017867, "learning_rate": 1.955572805786141e-05, "loss": 0.4513, "step": 1545 }, { "epoch": 0.12247969895028718, "grad_norm": 2.5296947102822434, "learning_rate": 1.9554971392138655e-05, "loss": 0.4033, "step": 1546 }, { "epoch": 0.12255892255892256, "grad_norm": 3.137970896321621, "learning_rate": 1.955421409726701e-05, "loss": 0.4648, "step": 1547 }, { "epoch": 0.12263814616755793, "grad_norm": 2.598940583766311, "learning_rate": 1.9553456173296342e-05, "loss": 0.5123, "step": 1548 }, { "epoch": 0.12271736977619331, "grad_norm": 2.845098533377211, "learning_rate": 1.9552697620276547e-05, "loss": 0.4893, "step": 1549 }, { "epoch": 0.12279659338482868, "grad_norm": 2.171640858053366, "learning_rate": 1.9551938438257583e-05, "loss": 0.2753, "step": 1550 }, { "epoch": 0.12287581699346405, "grad_norm": 2.8101364938004822, "learning_rate": 1.9551178627289436e-05, "loss": 0.413, "step": 1551 }, { "epoch": 0.12295504060209943, "grad_norm": 2.4082972044518405, "learning_rate": 1.9550418187422127e-05, "loss": 0.2552, "step": 1552 }, { "epoch": 0.1230342642107348, "grad_norm": 2.979666631985347, "learning_rate": 1.954965711870574e-05, "loss": 0.4414, "step": 1553 }, { "epoch": 0.12311348781937018, "grad_norm": 2.395588349726931, "learning_rate": 1.954889542119038e-05, "loss": 0.4616, "step": 1554 }, { "epoch": 0.12319271142800554, "grad_norm": 2.7724609630018824, "learning_rate": 1.9548133094926203e-05, "loss": 0.3376, "step": 1555 }, { "epoch": 0.12327193503664093, "grad_norm": 3.9366385584127586, "learning_rate": 1.9547370139963406e-05, "loss": 0.4891, "step": 1556 }, { "epoch": 0.1233511586452763, "grad_norm": 2.9095013900698232, "learning_rate": 1.954660655635222e-05, "loss": 0.4781, "step": 1557 }, { "epoch": 0.12343038225391166, "grad_norm": 3.298047953183691, "learning_rate": 1.954584234414293e-05, "loss": 0.5337, "step": 1558 }, { "epoch": 0.12350960586254704, "grad_norm": 2.8024751242673442, "learning_rate": 1.954507750338585e-05, "loss": 0.3895, "step": 1559 }, { "epoch": 0.12358882947118241, "grad_norm": 2.6821293799796058, "learning_rate": 1.954431203413135e-05, "loss": 0.4845, "step": 1560 }, { "epoch": 0.12366805307981779, "grad_norm": 2.780344685065403, "learning_rate": 1.9543545936429824e-05, "loss": 0.4859, "step": 1561 }, { "epoch": 0.12374727668845316, "grad_norm": 2.9414187280778115, "learning_rate": 1.954277921033172e-05, "loss": 0.5408, "step": 1562 }, { "epoch": 0.12382650029708853, "grad_norm": 2.788911029571584, "learning_rate": 1.954201185588752e-05, "loss": 0.5358, "step": 1563 }, { "epoch": 0.12390572390572391, "grad_norm": 2.6190080183801183, "learning_rate": 1.9541243873147752e-05, "loss": 0.3719, "step": 1564 }, { "epoch": 0.12398494751435928, "grad_norm": 2.886293663927001, "learning_rate": 1.9540475262162988e-05, "loss": 0.6145, "step": 1565 }, { "epoch": 0.12406417112299466, "grad_norm": 2.496323677689286, "learning_rate": 1.9539706022983827e-05, "loss": 0.3729, "step": 1566 }, { "epoch": 0.12414339473163002, "grad_norm": 2.9856334209437323, "learning_rate": 1.9538936155660934e-05, "loss": 0.4992, "step": 1567 }, { "epoch": 0.12422261834026539, "grad_norm": 2.4619775946089995, "learning_rate": 1.953816566024499e-05, "loss": 0.5855, "step": 1568 }, { "epoch": 0.12430184194890077, "grad_norm": 2.9789060122626276, "learning_rate": 1.9537394536786734e-05, "loss": 0.4244, "step": 1569 }, { "epoch": 0.12438106555753614, "grad_norm": 2.9107892936738478, "learning_rate": 1.9536622785336936e-05, "loss": 0.429, "step": 1570 }, { "epoch": 0.12446028916617152, "grad_norm": 2.575065227488331, "learning_rate": 1.953585040594642e-05, "loss": 0.3026, "step": 1571 }, { "epoch": 0.12453951277480689, "grad_norm": 3.122138835238216, "learning_rate": 1.9535077398666034e-05, "loss": 0.3784, "step": 1572 }, { "epoch": 0.12461873638344227, "grad_norm": 2.742184685925629, "learning_rate": 1.953430376354668e-05, "loss": 0.3151, "step": 1573 }, { "epoch": 0.12469795999207764, "grad_norm": 2.9297185263841867, "learning_rate": 1.9533529500639302e-05, "loss": 0.4937, "step": 1574 }, { "epoch": 0.12477718360071301, "grad_norm": 2.4532738059092907, "learning_rate": 1.9532754609994878e-05, "loss": 0.2943, "step": 1575 }, { "epoch": 0.12485640720934839, "grad_norm": 2.899836302875545, "learning_rate": 1.953197909166443e-05, "loss": 0.4488, "step": 1576 }, { "epoch": 0.12493563081798376, "grad_norm": 2.4029920674779537, "learning_rate": 1.9531202945699027e-05, "loss": 0.2845, "step": 1577 }, { "epoch": 0.12501485442661914, "grad_norm": 2.8428512910017134, "learning_rate": 1.953042617214977e-05, "loss": 0.3614, "step": 1578 }, { "epoch": 0.12509407803525452, "grad_norm": 2.798713468090119, "learning_rate": 1.9529648771067805e-05, "loss": 0.3813, "step": 1579 }, { "epoch": 0.12517330164388987, "grad_norm": 2.556467631821324, "learning_rate": 1.9528870742504328e-05, "loss": 0.3817, "step": 1580 }, { "epoch": 0.12525252525252525, "grad_norm": 3.1781166725157557, "learning_rate": 1.9528092086510556e-05, "loss": 0.4631, "step": 1581 }, { "epoch": 0.12533174886116064, "grad_norm": 2.9061494862759623, "learning_rate": 1.9527312803137767e-05, "loss": 0.4058, "step": 1582 }, { "epoch": 0.125410972469796, "grad_norm": 2.8860317624467933, "learning_rate": 1.9526532892437275e-05, "loss": 0.5292, "step": 1583 }, { "epoch": 0.12549019607843137, "grad_norm": 3.1236873627040262, "learning_rate": 1.9525752354460433e-05, "loss": 0.542, "step": 1584 }, { "epoch": 0.12556941968706675, "grad_norm": 2.779541996887809, "learning_rate": 1.9524971189258627e-05, "loss": 0.4801, "step": 1585 }, { "epoch": 0.1256486432957021, "grad_norm": 2.9289218259670027, "learning_rate": 1.9524189396883307e-05, "loss": 0.3938, "step": 1586 }, { "epoch": 0.1257278669043375, "grad_norm": 2.745061426545248, "learning_rate": 1.9523406977385937e-05, "loss": 0.4475, "step": 1587 }, { "epoch": 0.12580709051297287, "grad_norm": 2.036368753501112, "learning_rate": 1.9522623930818043e-05, "loss": 0.3534, "step": 1588 }, { "epoch": 0.12588631412160825, "grad_norm": 2.6342558060261028, "learning_rate": 1.9521840257231183e-05, "loss": 0.4457, "step": 1589 }, { "epoch": 0.1259655377302436, "grad_norm": 2.7610086518946564, "learning_rate": 1.9521055956676956e-05, "loss": 0.4416, "step": 1590 }, { "epoch": 0.12604476133887899, "grad_norm": 2.237689734924548, "learning_rate": 1.9520271029207008e-05, "loss": 0.3523, "step": 1591 }, { "epoch": 0.12612398494751437, "grad_norm": 2.6380819504247723, "learning_rate": 1.9519485474873027e-05, "loss": 0.4576, "step": 1592 }, { "epoch": 0.12620320855614972, "grad_norm": 2.599429333634746, "learning_rate": 1.9518699293726727e-05, "loss": 0.4203, "step": 1593 }, { "epoch": 0.1262824321647851, "grad_norm": 2.9331156090809025, "learning_rate": 1.9517912485819878e-05, "loss": 0.3572, "step": 1594 }, { "epoch": 0.12636165577342048, "grad_norm": 2.6930673687534266, "learning_rate": 1.9517125051204292e-05, "loss": 0.4401, "step": 1595 }, { "epoch": 0.12644087938205587, "grad_norm": 2.9577575468001593, "learning_rate": 1.9516336989931813e-05, "loss": 0.5192, "step": 1596 }, { "epoch": 0.12652010299069122, "grad_norm": 3.1088600669992417, "learning_rate": 1.9515548302054335e-05, "loss": 0.463, "step": 1597 }, { "epoch": 0.1265993265993266, "grad_norm": 2.2750800320333333, "learning_rate": 1.9514758987623784e-05, "loss": 0.2897, "step": 1598 }, { "epoch": 0.12667855020796198, "grad_norm": 2.7515800863371873, "learning_rate": 1.9513969046692137e-05, "loss": 0.4902, "step": 1599 }, { "epoch": 0.12675777381659734, "grad_norm": 3.2977570850013342, "learning_rate": 1.951317847931141e-05, "loss": 0.4044, "step": 1600 }, { "epoch": 0.12683699742523272, "grad_norm": 2.928104060953201, "learning_rate": 1.9512387285533655e-05, "loss": 0.3933, "step": 1601 }, { "epoch": 0.1269162210338681, "grad_norm": 3.2747184680547807, "learning_rate": 1.951159546541096e-05, "loss": 0.5765, "step": 1602 }, { "epoch": 0.12699544464250345, "grad_norm": 2.3706795108547953, "learning_rate": 1.9510803018995477e-05, "loss": 0.3136, "step": 1603 }, { "epoch": 0.12707466825113883, "grad_norm": 2.9030221606700337, "learning_rate": 1.9510009946339377e-05, "loss": 0.4393, "step": 1604 }, { "epoch": 0.12715389185977422, "grad_norm": 2.3529714777259283, "learning_rate": 1.9509216247494882e-05, "loss": 0.3389, "step": 1605 }, { "epoch": 0.1272331154684096, "grad_norm": 3.25468941664847, "learning_rate": 1.950842192251425e-05, "loss": 0.4556, "step": 1606 }, { "epoch": 0.12731233907704495, "grad_norm": 2.170992913070096, "learning_rate": 1.950762697144979e-05, "loss": 0.332, "step": 1607 }, { "epoch": 0.12739156268568033, "grad_norm": 2.2553042551492033, "learning_rate": 1.950683139435384e-05, "loss": 0.2714, "step": 1608 }, { "epoch": 0.1274707862943157, "grad_norm": 2.9878863394480524, "learning_rate": 1.9506035191278784e-05, "loss": 0.5143, "step": 1609 }, { "epoch": 0.12755000990295107, "grad_norm": 2.3776207347682705, "learning_rate": 1.9505238362277054e-05, "loss": 0.3995, "step": 1610 }, { "epoch": 0.12762923351158645, "grad_norm": 2.510010197043507, "learning_rate": 1.9504440907401113e-05, "loss": 0.3674, "step": 1611 }, { "epoch": 0.12770845712022183, "grad_norm": 2.5749818492694234, "learning_rate": 1.9503642826703468e-05, "loss": 0.3398, "step": 1612 }, { "epoch": 0.1277876807288572, "grad_norm": 2.403313331210437, "learning_rate": 1.950284412023668e-05, "loss": 0.3517, "step": 1613 }, { "epoch": 0.12786690433749257, "grad_norm": 3.0503242942353617, "learning_rate": 1.9502044788053322e-05, "loss": 0.4798, "step": 1614 }, { "epoch": 0.12794612794612795, "grad_norm": 2.7721261153916323, "learning_rate": 1.9501244830206037e-05, "loss": 0.4057, "step": 1615 }, { "epoch": 0.12802535155476333, "grad_norm": 2.8444677105161924, "learning_rate": 1.9500444246747502e-05, "loss": 0.3998, "step": 1616 }, { "epoch": 0.12810457516339868, "grad_norm": 2.6802089568829945, "learning_rate": 1.9499643037730422e-05, "loss": 0.409, "step": 1617 }, { "epoch": 0.12818379877203406, "grad_norm": 2.8102940931801452, "learning_rate": 1.949884120320756e-05, "loss": 0.3982, "step": 1618 }, { "epoch": 0.12826302238066944, "grad_norm": 3.231899657674601, "learning_rate": 1.949803874323171e-05, "loss": 0.3676, "step": 1619 }, { "epoch": 0.12834224598930483, "grad_norm": 2.92260270105415, "learning_rate": 1.949723565785571e-05, "loss": 0.4368, "step": 1620 }, { "epoch": 0.12842146959794018, "grad_norm": 2.5592811402494506, "learning_rate": 1.9496431947132438e-05, "loss": 0.4609, "step": 1621 }, { "epoch": 0.12850069320657556, "grad_norm": 2.6221869318504423, "learning_rate": 1.9495627611114817e-05, "loss": 0.3139, "step": 1622 }, { "epoch": 0.12857991681521094, "grad_norm": 3.663064416554614, "learning_rate": 1.949482264985581e-05, "loss": 0.5551, "step": 1623 }, { "epoch": 0.1286591404238463, "grad_norm": 2.67366172507629, "learning_rate": 1.9494017063408415e-05, "loss": 0.4868, "step": 1624 }, { "epoch": 0.12873836403248168, "grad_norm": 2.689543415717479, "learning_rate": 1.9493210851825682e-05, "loss": 0.3989, "step": 1625 }, { "epoch": 0.12881758764111706, "grad_norm": 3.3765266523424255, "learning_rate": 1.949240401516069e-05, "loss": 0.4312, "step": 1626 }, { "epoch": 0.1288968112497524, "grad_norm": 2.1715257505230205, "learning_rate": 1.9491596553466568e-05, "loss": 0.3475, "step": 1627 }, { "epoch": 0.1289760348583878, "grad_norm": 2.6979449124694304, "learning_rate": 1.9490788466796483e-05, "loss": 0.4145, "step": 1628 }, { "epoch": 0.12905525846702318, "grad_norm": 2.2746830664393527, "learning_rate": 1.9489979755203646e-05, "loss": 0.3342, "step": 1629 }, { "epoch": 0.12913448207565856, "grad_norm": 2.648442571428701, "learning_rate": 1.9489170418741306e-05, "loss": 0.4784, "step": 1630 }, { "epoch": 0.1292137056842939, "grad_norm": 2.603482192866092, "learning_rate": 1.948836045746275e-05, "loss": 0.3643, "step": 1631 }, { "epoch": 0.1292929292929293, "grad_norm": 2.7804665993177875, "learning_rate": 1.9487549871421316e-05, "loss": 0.4675, "step": 1632 }, { "epoch": 0.12937215290156467, "grad_norm": 3.347643506302992, "learning_rate": 1.9486738660670373e-05, "loss": 0.4955, "step": 1633 }, { "epoch": 0.12945137651020003, "grad_norm": 2.382435796668833, "learning_rate": 1.9485926825263334e-05, "loss": 0.3684, "step": 1634 }, { "epoch": 0.1295306001188354, "grad_norm": 2.8124658289034428, "learning_rate": 1.948511436525366e-05, "loss": 0.4708, "step": 1635 }, { "epoch": 0.1296098237274708, "grad_norm": 2.397738474960787, "learning_rate": 1.9484301280694845e-05, "loss": 0.3054, "step": 1636 }, { "epoch": 0.12968904733610617, "grad_norm": 2.901551190259369, "learning_rate": 1.9483487571640424e-05, "loss": 0.5474, "step": 1637 }, { "epoch": 0.12976827094474153, "grad_norm": 2.511534172202647, "learning_rate": 1.948267323814398e-05, "loss": 0.3418, "step": 1638 }, { "epoch": 0.1298474945533769, "grad_norm": 2.636660392767567, "learning_rate": 1.948185828025913e-05, "loss": 0.3109, "step": 1639 }, { "epoch": 0.1299267181620123, "grad_norm": 2.4929883609443038, "learning_rate": 1.9481042698039534e-05, "loss": 0.389, "step": 1640 }, { "epoch": 0.13000594177064764, "grad_norm": 2.4043485541317664, "learning_rate": 1.94802264915389e-05, "loss": 0.3345, "step": 1641 }, { "epoch": 0.13008516537928302, "grad_norm": 3.0446967598026498, "learning_rate": 1.9479409660810965e-05, "loss": 0.4212, "step": 1642 }, { "epoch": 0.1301643889879184, "grad_norm": 2.4660348140404094, "learning_rate": 1.9478592205909517e-05, "loss": 0.3405, "step": 1643 }, { "epoch": 0.13024361259655376, "grad_norm": 2.9326105002394227, "learning_rate": 1.947777412688838e-05, "loss": 0.3936, "step": 1644 }, { "epoch": 0.13032283620518914, "grad_norm": 3.3642632036398368, "learning_rate": 1.947695542380142e-05, "loss": 0.3913, "step": 1645 }, { "epoch": 0.13040205981382452, "grad_norm": 2.902126061765541, "learning_rate": 1.9476136096702546e-05, "loss": 0.3567, "step": 1646 }, { "epoch": 0.1304812834224599, "grad_norm": 3.505122731530886, "learning_rate": 1.9475316145645706e-05, "loss": 0.4399, "step": 1647 }, { "epoch": 0.13056050703109526, "grad_norm": 3.362321420296482, "learning_rate": 1.947449557068489e-05, "loss": 0.3739, "step": 1648 }, { "epoch": 0.13063973063973064, "grad_norm": 2.7043004382627194, "learning_rate": 1.947367437187413e-05, "loss": 0.4691, "step": 1649 }, { "epoch": 0.13071895424836602, "grad_norm": 3.09994203153213, "learning_rate": 1.9472852549267496e-05, "loss": 0.484, "step": 1650 }, { "epoch": 0.13079817785700137, "grad_norm": 3.3495132778616186, "learning_rate": 1.9472030102919102e-05, "loss": 0.4564, "step": 1651 }, { "epoch": 0.13087740146563676, "grad_norm": 2.334095499135991, "learning_rate": 1.9471207032883103e-05, "loss": 0.4249, "step": 1652 }, { "epoch": 0.13095662507427214, "grad_norm": 2.5976780141713265, "learning_rate": 1.9470383339213693e-05, "loss": 0.4537, "step": 1653 }, { "epoch": 0.13103584868290752, "grad_norm": 2.855276637433223, "learning_rate": 1.946955902196511e-05, "loss": 0.3884, "step": 1654 }, { "epoch": 0.13111507229154287, "grad_norm": 2.2793206621657167, "learning_rate": 1.9468734081191627e-05, "loss": 0.3887, "step": 1655 }, { "epoch": 0.13119429590017825, "grad_norm": 2.7978508259665733, "learning_rate": 1.9467908516947568e-05, "loss": 0.5249, "step": 1656 }, { "epoch": 0.13127351950881364, "grad_norm": 2.9427997156011787, "learning_rate": 1.946708232928729e-05, "loss": 0.459, "step": 1657 }, { "epoch": 0.131352743117449, "grad_norm": 2.577970086491782, "learning_rate": 1.9466255518265193e-05, "loss": 0.4381, "step": 1658 }, { "epoch": 0.13143196672608437, "grad_norm": 3.2167569523486668, "learning_rate": 1.946542808393572e-05, "loss": 0.4795, "step": 1659 }, { "epoch": 0.13151119033471975, "grad_norm": 2.842328280690265, "learning_rate": 1.946460002635335e-05, "loss": 0.401, "step": 1660 }, { "epoch": 0.13159041394335513, "grad_norm": 2.381339253336541, "learning_rate": 1.946377134557261e-05, "loss": 0.4599, "step": 1661 }, { "epoch": 0.1316696375519905, "grad_norm": 2.4829991752808667, "learning_rate": 1.9462942041648062e-05, "loss": 0.343, "step": 1662 }, { "epoch": 0.13174886116062587, "grad_norm": 2.564416558503891, "learning_rate": 1.9462112114634316e-05, "loss": 0.4138, "step": 1663 }, { "epoch": 0.13182808476926125, "grad_norm": 2.587157250168123, "learning_rate": 1.9461281564586014e-05, "loss": 0.3455, "step": 1664 }, { "epoch": 0.1319073083778966, "grad_norm": 2.2235393603415066, "learning_rate": 1.9460450391557847e-05, "loss": 0.3886, "step": 1665 }, { "epoch": 0.13198653198653199, "grad_norm": 2.272000375557651, "learning_rate": 1.945961859560454e-05, "loss": 0.3728, "step": 1666 }, { "epoch": 0.13206575559516737, "grad_norm": 2.737644215712852, "learning_rate": 1.9458786176780868e-05, "loss": 0.4289, "step": 1667 }, { "epoch": 0.13214497920380272, "grad_norm": 2.477519689542186, "learning_rate": 1.945795313514164e-05, "loss": 0.3687, "step": 1668 }, { "epoch": 0.1322242028124381, "grad_norm": 3.315891742066584, "learning_rate": 1.9457119470741707e-05, "loss": 0.4817, "step": 1669 }, { "epoch": 0.13230342642107348, "grad_norm": 3.751583010646154, "learning_rate": 1.9456285183635958e-05, "loss": 0.4694, "step": 1670 }, { "epoch": 0.13238265002970886, "grad_norm": 2.4207879338478726, "learning_rate": 1.9455450273879332e-05, "loss": 0.3309, "step": 1671 }, { "epoch": 0.13246187363834422, "grad_norm": 3.62041416957926, "learning_rate": 1.94546147415268e-05, "loss": 0.4837, "step": 1672 }, { "epoch": 0.1325410972469796, "grad_norm": 2.518393835869722, "learning_rate": 1.9453778586633386e-05, "loss": 0.4374, "step": 1673 }, { "epoch": 0.13262032085561498, "grad_norm": 2.972761889764143, "learning_rate": 1.9452941809254136e-05, "loss": 0.515, "step": 1674 }, { "epoch": 0.13269954446425034, "grad_norm": 2.4852762300429463, "learning_rate": 1.9452104409444153e-05, "loss": 0.4269, "step": 1675 }, { "epoch": 0.13277876807288572, "grad_norm": 2.691225002242367, "learning_rate": 1.9451266387258576e-05, "loss": 0.3798, "step": 1676 }, { "epoch": 0.1328579916815211, "grad_norm": 2.66315078919408, "learning_rate": 1.9450427742752583e-05, "loss": 0.444, "step": 1677 }, { "epoch": 0.13293721529015648, "grad_norm": 2.172922479297124, "learning_rate": 1.9449588475981394e-05, "loss": 0.4054, "step": 1678 }, { "epoch": 0.13301643889879183, "grad_norm": 2.3766279143702116, "learning_rate": 1.9448748587000277e-05, "loss": 0.3742, "step": 1679 }, { "epoch": 0.13309566250742721, "grad_norm": 2.659481511765234, "learning_rate": 1.944790807586453e-05, "loss": 0.4284, "step": 1680 }, { "epoch": 0.1331748861160626, "grad_norm": 2.403462541581427, "learning_rate": 1.9447066942629495e-05, "loss": 0.2833, "step": 1681 }, { "epoch": 0.13325410972469795, "grad_norm": 2.4337194417494956, "learning_rate": 1.9446225187350558e-05, "loss": 0.4756, "step": 1682 }, { "epoch": 0.13333333333333333, "grad_norm": 2.6078413867928507, "learning_rate": 1.9445382810083143e-05, "loss": 0.4559, "step": 1683 }, { "epoch": 0.1334125569419687, "grad_norm": 1.9835587571797169, "learning_rate": 1.944453981088272e-05, "loss": 0.3094, "step": 1684 }, { "epoch": 0.13349178055060407, "grad_norm": 2.356588714443372, "learning_rate": 1.9443696189804793e-05, "loss": 0.4032, "step": 1685 }, { "epoch": 0.13357100415923945, "grad_norm": 2.2430736075244946, "learning_rate": 1.9442851946904914e-05, "loss": 0.3803, "step": 1686 }, { "epoch": 0.13365022776787483, "grad_norm": 3.0731427134840255, "learning_rate": 1.9442007082238673e-05, "loss": 0.3625, "step": 1687 }, { "epoch": 0.1337294513765102, "grad_norm": 2.3297474213002585, "learning_rate": 1.944116159586169e-05, "loss": 0.3436, "step": 1688 }, { "epoch": 0.13380867498514556, "grad_norm": 2.431890377062831, "learning_rate": 1.944031548782965e-05, "loss": 0.3478, "step": 1689 }, { "epoch": 0.13388789859378095, "grad_norm": 2.849904722734544, "learning_rate": 1.9439468758198258e-05, "loss": 0.352, "step": 1690 }, { "epoch": 0.13396712220241633, "grad_norm": 3.39934604143279, "learning_rate": 1.943862140702327e-05, "loss": 0.3997, "step": 1691 }, { "epoch": 0.13404634581105168, "grad_norm": 2.562701418400086, "learning_rate": 1.9437773434360476e-05, "loss": 0.3949, "step": 1692 }, { "epoch": 0.13412556941968706, "grad_norm": 2.909763695066894, "learning_rate": 1.943692484026571e-05, "loss": 0.4224, "step": 1693 }, { "epoch": 0.13420479302832244, "grad_norm": 3.0401964946470614, "learning_rate": 1.9436075624794853e-05, "loss": 0.3896, "step": 1694 }, { "epoch": 0.13428401663695783, "grad_norm": 3.884386828365048, "learning_rate": 1.9435225788003822e-05, "loss": 0.4134, "step": 1695 }, { "epoch": 0.13436324024559318, "grad_norm": 2.933286761944894, "learning_rate": 1.943437532994857e-05, "loss": 0.4776, "step": 1696 }, { "epoch": 0.13444246385422856, "grad_norm": 2.787306440849998, "learning_rate": 1.9433524250685098e-05, "loss": 0.5181, "step": 1697 }, { "epoch": 0.13452168746286394, "grad_norm": 2.509240646798951, "learning_rate": 1.9432672550269446e-05, "loss": 0.3262, "step": 1698 }, { "epoch": 0.1346009110714993, "grad_norm": 2.7167072597122974, "learning_rate": 1.943182022875769e-05, "loss": 0.3762, "step": 1699 }, { "epoch": 0.13468013468013468, "grad_norm": 2.669643460412162, "learning_rate": 1.9430967286205962e-05, "loss": 0.3957, "step": 1700 }, { "epoch": 0.13475935828877006, "grad_norm": 2.4270850822620114, "learning_rate": 1.9430113722670412e-05, "loss": 0.3467, "step": 1701 }, { "epoch": 0.13483858189740544, "grad_norm": 2.7138244790146815, "learning_rate": 1.942925953820725e-05, "loss": 0.403, "step": 1702 }, { "epoch": 0.1349178055060408, "grad_norm": 3.0766988709084515, "learning_rate": 1.9428404732872716e-05, "loss": 0.3518, "step": 1703 }, { "epoch": 0.13499702911467618, "grad_norm": 2.9382266391598213, "learning_rate": 1.94275493067231e-05, "loss": 0.5382, "step": 1704 }, { "epoch": 0.13507625272331156, "grad_norm": 2.9597048824604255, "learning_rate": 1.9426693259814725e-05, "loss": 0.4127, "step": 1705 }, { "epoch": 0.1351554763319469, "grad_norm": 2.5322215454521766, "learning_rate": 1.9425836592203954e-05, "loss": 0.3423, "step": 1706 }, { "epoch": 0.1352346999405823, "grad_norm": 2.7049015822029037, "learning_rate": 1.94249793039472e-05, "loss": 0.4701, "step": 1707 }, { "epoch": 0.13531392354921767, "grad_norm": 2.4246078231505903, "learning_rate": 1.9424121395100907e-05, "loss": 0.3667, "step": 1708 }, { "epoch": 0.13539314715785303, "grad_norm": 2.6928624533661862, "learning_rate": 1.9423262865721567e-05, "loss": 0.3661, "step": 1709 }, { "epoch": 0.1354723707664884, "grad_norm": 2.6926214334991236, "learning_rate": 1.9422403715865708e-05, "loss": 0.4549, "step": 1710 }, { "epoch": 0.1355515943751238, "grad_norm": 2.8664509011884736, "learning_rate": 1.9421543945589904e-05, "loss": 0.3646, "step": 1711 }, { "epoch": 0.13563081798375917, "grad_norm": 2.5495186171854427, "learning_rate": 1.9420683554950765e-05, "loss": 0.368, "step": 1712 }, { "epoch": 0.13571004159239453, "grad_norm": 3.2178015112401224, "learning_rate": 1.9419822544004942e-05, "loss": 0.5219, "step": 1713 }, { "epoch": 0.1357892652010299, "grad_norm": 3.280884238267083, "learning_rate": 1.941896091280913e-05, "loss": 0.5009, "step": 1714 }, { "epoch": 0.1358684888096653, "grad_norm": 2.826399007686927, "learning_rate": 1.9418098661420064e-05, "loss": 0.413, "step": 1715 }, { "epoch": 0.13594771241830064, "grad_norm": 2.3069129821670855, "learning_rate": 1.9417235789894517e-05, "loss": 0.576, "step": 1716 }, { "epoch": 0.13602693602693602, "grad_norm": 2.559751687671898, "learning_rate": 1.9416372298289306e-05, "loss": 0.4126, "step": 1717 }, { "epoch": 0.1361061596355714, "grad_norm": 2.4776879979271964, "learning_rate": 1.941550818666129e-05, "loss": 0.3482, "step": 1718 }, { "epoch": 0.1361853832442068, "grad_norm": 2.8479681770747467, "learning_rate": 1.941464345506736e-05, "loss": 0.4583, "step": 1719 }, { "epoch": 0.13626460685284214, "grad_norm": 2.1690098167702314, "learning_rate": 1.9413778103564462e-05, "loss": 0.5257, "step": 1720 }, { "epoch": 0.13634383046147752, "grad_norm": 2.455941090465394, "learning_rate": 1.9412912132209573e-05, "loss": 0.4485, "step": 1721 }, { "epoch": 0.1364230540701129, "grad_norm": 2.5830636468674864, "learning_rate": 1.941204554105971e-05, "loss": 0.3915, "step": 1722 }, { "epoch": 0.13650227767874826, "grad_norm": 2.376180009234982, "learning_rate": 1.941117833017194e-05, "loss": 0.3951, "step": 1723 }, { "epoch": 0.13658150128738364, "grad_norm": 2.6244711090192974, "learning_rate": 1.9410310499603356e-05, "loss": 0.4578, "step": 1724 }, { "epoch": 0.13666072489601902, "grad_norm": 2.261573384408937, "learning_rate": 1.9409442049411104e-05, "loss": 0.306, "step": 1725 }, { "epoch": 0.13673994850465437, "grad_norm": 2.8011175232833274, "learning_rate": 1.9408572979652373e-05, "loss": 0.4511, "step": 1726 }, { "epoch": 0.13681917211328976, "grad_norm": 2.748409319210236, "learning_rate": 1.940770329038438e-05, "loss": 0.3832, "step": 1727 }, { "epoch": 0.13689839572192514, "grad_norm": 2.6968365789827042, "learning_rate": 1.9406832981664392e-05, "loss": 0.3206, "step": 1728 }, { "epoch": 0.13697761933056052, "grad_norm": 2.420111617525425, "learning_rate": 1.9405962053549717e-05, "loss": 0.4374, "step": 1729 }, { "epoch": 0.13705684293919587, "grad_norm": 2.6371164388381176, "learning_rate": 1.9405090506097698e-05, "loss": 0.4315, "step": 1730 }, { "epoch": 0.13713606654783125, "grad_norm": 2.547212877630584, "learning_rate": 1.9404218339365724e-05, "loss": 0.3484, "step": 1731 }, { "epoch": 0.13721529015646663, "grad_norm": 3.5284779174582153, "learning_rate": 1.940334555341122e-05, "loss": 0.4274, "step": 1732 }, { "epoch": 0.137294513765102, "grad_norm": 2.5627729261977255, "learning_rate": 1.940247214829166e-05, "loss": 0.3342, "step": 1733 }, { "epoch": 0.13737373737373737, "grad_norm": 2.909823770122146, "learning_rate": 1.9401598124064552e-05, "loss": 0.4361, "step": 1734 }, { "epoch": 0.13745296098237275, "grad_norm": 2.3163164996527628, "learning_rate": 1.9400723480787446e-05, "loss": 0.3918, "step": 1735 }, { "epoch": 0.13753218459100813, "grad_norm": 2.3744330735184627, "learning_rate": 1.9399848218517927e-05, "loss": 0.3415, "step": 1736 }, { "epoch": 0.1376114081996435, "grad_norm": 2.0240339542048384, "learning_rate": 1.9398972337313634e-05, "loss": 0.3365, "step": 1737 }, { "epoch": 0.13769063180827887, "grad_norm": 2.1905630441425576, "learning_rate": 1.939809583723224e-05, "loss": 0.2921, "step": 1738 }, { "epoch": 0.13776985541691425, "grad_norm": 2.8982878046600637, "learning_rate": 1.9397218718331455e-05, "loss": 0.5758, "step": 1739 }, { "epoch": 0.1378490790255496, "grad_norm": 2.325300339897712, "learning_rate": 1.939634098066903e-05, "loss": 0.4574, "step": 1740 }, { "epoch": 0.13792830263418498, "grad_norm": 2.4353157449939604, "learning_rate": 1.9395462624302768e-05, "loss": 0.4399, "step": 1741 }, { "epoch": 0.13800752624282037, "grad_norm": 2.3425346700048, "learning_rate": 1.93945836492905e-05, "loss": 0.404, "step": 1742 }, { "epoch": 0.13808674985145572, "grad_norm": 2.8871416616390677, "learning_rate": 1.93937040556901e-05, "loss": 0.549, "step": 1743 }, { "epoch": 0.1381659734600911, "grad_norm": 4.0136586054759755, "learning_rate": 1.939282384355949e-05, "loss": 0.512, "step": 1744 }, { "epoch": 0.13824519706872648, "grad_norm": 2.5005383446804115, "learning_rate": 1.9391943012956623e-05, "loss": 0.4017, "step": 1745 }, { "epoch": 0.13832442067736186, "grad_norm": 2.3596636785761893, "learning_rate": 1.93910615639395e-05, "loss": 0.362, "step": 1746 }, { "epoch": 0.13840364428599722, "grad_norm": 2.79467014377652, "learning_rate": 1.9390179496566162e-05, "loss": 0.3851, "step": 1747 }, { "epoch": 0.1384828678946326, "grad_norm": 3.2382132850629, "learning_rate": 1.938929681089469e-05, "loss": 0.4707, "step": 1748 }, { "epoch": 0.13856209150326798, "grad_norm": 3.409663158696584, "learning_rate": 1.9388413506983196e-05, "loss": 0.488, "step": 1749 }, { "epoch": 0.13864131511190333, "grad_norm": 2.9866191563452777, "learning_rate": 1.938752958488985e-05, "loss": 0.4674, "step": 1750 }, { "epoch": 0.13872053872053872, "grad_norm": 2.4722404261927147, "learning_rate": 1.9386645044672848e-05, "loss": 0.329, "step": 1751 }, { "epoch": 0.1387997623291741, "grad_norm": 2.552647734846144, "learning_rate": 1.9385759886390433e-05, "loss": 0.3518, "step": 1752 }, { "epoch": 0.13887898593780948, "grad_norm": 2.9725759388961737, "learning_rate": 1.9384874110100897e-05, "loss": 0.459, "step": 1753 }, { "epoch": 0.13895820954644483, "grad_norm": 2.806089119703009, "learning_rate": 1.9383987715862554e-05, "loss": 0.3819, "step": 1754 }, { "epoch": 0.13903743315508021, "grad_norm": 2.6964792416535515, "learning_rate": 1.9383100703733774e-05, "loss": 0.5298, "step": 1755 }, { "epoch": 0.1391166567637156, "grad_norm": 3.028829052005943, "learning_rate": 1.9382213073772962e-05, "loss": 0.4913, "step": 1756 }, { "epoch": 0.13919588037235095, "grad_norm": 3.1683897779788777, "learning_rate": 1.938132482603856e-05, "loss": 0.5255, "step": 1757 }, { "epoch": 0.13927510398098633, "grad_norm": 2.850752841125001, "learning_rate": 1.9380435960589065e-05, "loss": 0.4175, "step": 1758 }, { "epoch": 0.1393543275896217, "grad_norm": 2.4336769845841766, "learning_rate": 1.937954647748299e-05, "loss": 0.4143, "step": 1759 }, { "epoch": 0.1394335511982571, "grad_norm": 2.8209896320010595, "learning_rate": 1.9378656376778914e-05, "loss": 0.4958, "step": 1760 }, { "epoch": 0.13951277480689245, "grad_norm": 3.3084398717923187, "learning_rate": 1.9377765658535445e-05, "loss": 0.408, "step": 1761 }, { "epoch": 0.13959199841552783, "grad_norm": 2.675740052142721, "learning_rate": 1.937687432281123e-05, "loss": 0.2555, "step": 1762 }, { "epoch": 0.1396712220241632, "grad_norm": 2.05647363861636, "learning_rate": 1.9375982369664958e-05, "loss": 0.3897, "step": 1763 }, { "epoch": 0.13975044563279856, "grad_norm": 2.74097400063134, "learning_rate": 1.937508979915536e-05, "loss": 0.4509, "step": 1764 }, { "epoch": 0.13982966924143395, "grad_norm": 2.859127455478302, "learning_rate": 1.9374196611341212e-05, "loss": 0.6438, "step": 1765 }, { "epoch": 0.13990889285006933, "grad_norm": 2.7073035930658094, "learning_rate": 1.937330280628132e-05, "loss": 0.5711, "step": 1766 }, { "epoch": 0.13998811645870468, "grad_norm": 2.6455247709703817, "learning_rate": 1.937240838403454e-05, "loss": 0.4085, "step": 1767 }, { "epoch": 0.14006734006734006, "grad_norm": 2.457411513078964, "learning_rate": 1.9371513344659764e-05, "loss": 0.3975, "step": 1768 }, { "epoch": 0.14014656367597544, "grad_norm": 2.245731219545626, "learning_rate": 1.937061768821593e-05, "loss": 0.358, "step": 1769 }, { "epoch": 0.14022578728461083, "grad_norm": 2.6889545638155368, "learning_rate": 1.936972141476201e-05, "loss": 0.305, "step": 1770 }, { "epoch": 0.14030501089324618, "grad_norm": 2.783009301793553, "learning_rate": 1.936882452435702e-05, "loss": 0.3615, "step": 1771 }, { "epoch": 0.14038423450188156, "grad_norm": 2.614667162035841, "learning_rate": 1.936792701706001e-05, "loss": 0.3377, "step": 1772 }, { "epoch": 0.14046345811051694, "grad_norm": 2.4262050626102862, "learning_rate": 1.9367028892930088e-05, "loss": 0.318, "step": 1773 }, { "epoch": 0.1405426817191523, "grad_norm": 3.3041205906260798, "learning_rate": 1.9366130152026378e-05, "loss": 0.4841, "step": 1774 }, { "epoch": 0.14062190532778768, "grad_norm": 2.2797815061366866, "learning_rate": 1.936523079440807e-05, "loss": 0.332, "step": 1775 }, { "epoch": 0.14070112893642306, "grad_norm": 2.7699321107417787, "learning_rate": 1.936433082013437e-05, "loss": 0.4473, "step": 1776 }, { "epoch": 0.14078035254505844, "grad_norm": 2.1374182325870565, "learning_rate": 1.936343022926455e-05, "loss": 0.3788, "step": 1777 }, { "epoch": 0.1408595761536938, "grad_norm": 3.0339973699361042, "learning_rate": 1.93625290218579e-05, "loss": 0.5335, "step": 1778 }, { "epoch": 0.14093879976232918, "grad_norm": 2.6758153115011516, "learning_rate": 1.9361627197973767e-05, "loss": 0.3508, "step": 1779 }, { "epoch": 0.14101802337096456, "grad_norm": 2.385172483226919, "learning_rate": 1.9360724757671525e-05, "loss": 0.3692, "step": 1780 }, { "epoch": 0.1410972469795999, "grad_norm": 2.2394871546720396, "learning_rate": 1.93598217010106e-05, "loss": 0.4537, "step": 1781 }, { "epoch": 0.1411764705882353, "grad_norm": 2.434345583261253, "learning_rate": 1.9358918028050453e-05, "loss": 0.4931, "step": 1782 }, { "epoch": 0.14125569419687067, "grad_norm": 2.501755616044473, "learning_rate": 1.9358013738850586e-05, "loss": 0.3767, "step": 1783 }, { "epoch": 0.14133491780550603, "grad_norm": 2.3754157703780248, "learning_rate": 1.935710883347054e-05, "loss": 0.4095, "step": 1784 }, { "epoch": 0.1414141414141414, "grad_norm": 2.6069791431240463, "learning_rate": 1.9356203311969903e-05, "loss": 0.4818, "step": 1785 }, { "epoch": 0.1414933650227768, "grad_norm": 2.2688673897499085, "learning_rate": 1.9355297174408298e-05, "loss": 0.3397, "step": 1786 }, { "epoch": 0.14157258863141217, "grad_norm": 2.1285235170660766, "learning_rate": 1.9354390420845387e-05, "loss": 0.3791, "step": 1787 }, { "epoch": 0.14165181224004753, "grad_norm": 3.20407914228043, "learning_rate": 1.9353483051340876e-05, "loss": 0.4441, "step": 1788 }, { "epoch": 0.1417310358486829, "grad_norm": 2.7316191709489, "learning_rate": 1.9352575065954515e-05, "loss": 0.5762, "step": 1789 }, { "epoch": 0.1418102594573183, "grad_norm": 2.670139626233861, "learning_rate": 1.9351666464746087e-05, "loss": 0.4172, "step": 1790 }, { "epoch": 0.14188948306595364, "grad_norm": 2.5409369456840296, "learning_rate": 1.935075724777542e-05, "loss": 0.4056, "step": 1791 }, { "epoch": 0.14196870667458902, "grad_norm": 2.876611771770737, "learning_rate": 1.9349847415102378e-05, "loss": 0.4431, "step": 1792 }, { "epoch": 0.1420479302832244, "grad_norm": 2.363433386652227, "learning_rate": 1.9348936966786874e-05, "loss": 0.3403, "step": 1793 }, { "epoch": 0.1421271538918598, "grad_norm": 3.1314688480524193, "learning_rate": 1.9348025902888858e-05, "loss": 0.4836, "step": 1794 }, { "epoch": 0.14220637750049514, "grad_norm": 2.902301365139742, "learning_rate": 1.9347114223468316e-05, "loss": 0.383, "step": 1795 }, { "epoch": 0.14228560110913052, "grad_norm": 3.229547997557393, "learning_rate": 1.9346201928585273e-05, "loss": 0.5752, "step": 1796 }, { "epoch": 0.1423648247177659, "grad_norm": 2.631268096111741, "learning_rate": 1.9345289018299807e-05, "loss": 0.3044, "step": 1797 }, { "epoch": 0.14244404832640126, "grad_norm": 2.367723781863915, "learning_rate": 1.9344375492672024e-05, "loss": 0.3397, "step": 1798 }, { "epoch": 0.14252327193503664, "grad_norm": 2.570068539571693, "learning_rate": 1.934346135176208e-05, "loss": 0.2447, "step": 1799 }, { "epoch": 0.14260249554367202, "grad_norm": 2.6413623735013214, "learning_rate": 1.9342546595630162e-05, "loss": 0.4542, "step": 1800 }, { "epoch": 0.1426817191523074, "grad_norm": 2.6938621139400984, "learning_rate": 1.9341631224336503e-05, "loss": 0.4423, "step": 1801 }, { "epoch": 0.14276094276094276, "grad_norm": 2.508217181521343, "learning_rate": 1.934071523794138e-05, "loss": 0.4365, "step": 1802 }, { "epoch": 0.14284016636957814, "grad_norm": 2.891330407700625, "learning_rate": 1.9339798636505102e-05, "loss": 0.3714, "step": 1803 }, { "epoch": 0.14291938997821352, "grad_norm": 2.2090716715124077, "learning_rate": 1.9338881420088023e-05, "loss": 0.4381, "step": 1804 }, { "epoch": 0.14299861358684887, "grad_norm": 2.7368042797276817, "learning_rate": 1.933796358875054e-05, "loss": 0.491, "step": 1805 }, { "epoch": 0.14307783719548425, "grad_norm": 2.7439895557239486, "learning_rate": 1.9337045142553085e-05, "loss": 0.4179, "step": 1806 }, { "epoch": 0.14315706080411963, "grad_norm": 2.7852492841905265, "learning_rate": 1.9336126081556134e-05, "loss": 0.4837, "step": 1807 }, { "epoch": 0.143236284412755, "grad_norm": 2.515799825706865, "learning_rate": 1.9335206405820208e-05, "loss": 0.3615, "step": 1808 }, { "epoch": 0.14331550802139037, "grad_norm": 2.5820336848485357, "learning_rate": 1.933428611540585e-05, "loss": 0.4055, "step": 1809 }, { "epoch": 0.14339473163002575, "grad_norm": 2.981222838263862, "learning_rate": 1.9333365210373668e-05, "loss": 0.4564, "step": 1810 }, { "epoch": 0.14347395523866113, "grad_norm": 2.2107166964307927, "learning_rate": 1.93324436907843e-05, "loss": 0.3019, "step": 1811 }, { "epoch": 0.1435531788472965, "grad_norm": 2.5502536517646, "learning_rate": 1.9331521556698415e-05, "loss": 0.4435, "step": 1812 }, { "epoch": 0.14363240245593187, "grad_norm": 2.5045531843154345, "learning_rate": 1.9330598808176736e-05, "loss": 0.4225, "step": 1813 }, { "epoch": 0.14371162606456725, "grad_norm": 2.7058029849479355, "learning_rate": 1.9329675445280024e-05, "loss": 0.3797, "step": 1814 }, { "epoch": 0.1437908496732026, "grad_norm": 2.21236360613346, "learning_rate": 1.9328751468069075e-05, "loss": 0.272, "step": 1815 }, { "epoch": 0.14387007328183798, "grad_norm": 2.3096842202082035, "learning_rate": 1.932782687660473e-05, "loss": 0.4776, "step": 1816 }, { "epoch": 0.14394929689047337, "grad_norm": 2.6397581305612223, "learning_rate": 1.9326901670947868e-05, "loss": 0.4297, "step": 1817 }, { "epoch": 0.14402852049910875, "grad_norm": 2.5240663320757832, "learning_rate": 1.9325975851159406e-05, "loss": 0.3381, "step": 1818 }, { "epoch": 0.1441077441077441, "grad_norm": 2.6027349216933007, "learning_rate": 1.932504941730031e-05, "loss": 0.3218, "step": 1819 }, { "epoch": 0.14418696771637948, "grad_norm": 2.91259323593562, "learning_rate": 1.932412236943158e-05, "loss": 0.4759, "step": 1820 }, { "epoch": 0.14426619132501486, "grad_norm": 2.4509157236949175, "learning_rate": 1.9323194707614253e-05, "loss": 0.3445, "step": 1821 }, { "epoch": 0.14434541493365022, "grad_norm": 2.221462385128824, "learning_rate": 1.932226643190942e-05, "loss": 0.4278, "step": 1822 }, { "epoch": 0.1444246385422856, "grad_norm": 2.8548104438241957, "learning_rate": 1.9321337542378193e-05, "loss": 0.5667, "step": 1823 }, { "epoch": 0.14450386215092098, "grad_norm": 2.7113131051064085, "learning_rate": 1.9320408039081745e-05, "loss": 0.3562, "step": 1824 }, { "epoch": 0.14458308575955633, "grad_norm": 2.613420516914703, "learning_rate": 1.9319477922081273e-05, "loss": 0.3635, "step": 1825 }, { "epoch": 0.14466230936819172, "grad_norm": 2.602287139400813, "learning_rate": 1.9318547191438018e-05, "loss": 0.3518, "step": 1826 }, { "epoch": 0.1447415329768271, "grad_norm": 2.490658825983515, "learning_rate": 1.9317615847213274e-05, "loss": 0.3429, "step": 1827 }, { "epoch": 0.14482075658546248, "grad_norm": 2.915452927581254, "learning_rate": 1.931668388946836e-05, "loss": 0.3886, "step": 1828 }, { "epoch": 0.14489998019409783, "grad_norm": 2.526407207891424, "learning_rate": 1.9315751318264636e-05, "loss": 0.5159, "step": 1829 }, { "epoch": 0.14497920380273321, "grad_norm": 2.955168164747815, "learning_rate": 1.9314818133663516e-05, "loss": 0.4846, "step": 1830 }, { "epoch": 0.1450584274113686, "grad_norm": 3.5121858068678686, "learning_rate": 1.9313884335726443e-05, "loss": 0.4437, "step": 1831 }, { "epoch": 0.14513765102000395, "grad_norm": 2.4542056913989954, "learning_rate": 1.93129499245149e-05, "loss": 0.4038, "step": 1832 }, { "epoch": 0.14521687462863933, "grad_norm": 2.614970627669613, "learning_rate": 1.9312014900090416e-05, "loss": 0.4505, "step": 1833 }, { "epoch": 0.1452960982372747, "grad_norm": 2.94459223807154, "learning_rate": 1.931107926251456e-05, "loss": 0.3997, "step": 1834 }, { "epoch": 0.1453753218459101, "grad_norm": 2.0741903242743276, "learning_rate": 1.931014301184893e-05, "loss": 0.3576, "step": 1835 }, { "epoch": 0.14545454545454545, "grad_norm": 3.0413031471553604, "learning_rate": 1.9309206148155188e-05, "loss": 0.4369, "step": 1836 }, { "epoch": 0.14553376906318083, "grad_norm": 3.022673240121045, "learning_rate": 1.930826867149501e-05, "loss": 0.3604, "step": 1837 }, { "epoch": 0.1456129926718162, "grad_norm": 2.6291402627308327, "learning_rate": 1.9307330581930127e-05, "loss": 0.4243, "step": 1838 }, { "epoch": 0.14569221628045156, "grad_norm": 2.5038010753974613, "learning_rate": 1.930639187952231e-05, "loss": 0.394, "step": 1839 }, { "epoch": 0.14577143988908695, "grad_norm": 2.4377826475629245, "learning_rate": 1.930545256433337e-05, "loss": 0.3946, "step": 1840 }, { "epoch": 0.14585066349772233, "grad_norm": 2.5884114901224793, "learning_rate": 1.930451263642515e-05, "loss": 0.4143, "step": 1841 }, { "epoch": 0.14592988710635768, "grad_norm": 2.4214111746774387, "learning_rate": 1.9303572095859545e-05, "loss": 0.4102, "step": 1842 }, { "epoch": 0.14600911071499306, "grad_norm": 2.521537916787601, "learning_rate": 1.9302630942698487e-05, "loss": 0.3341, "step": 1843 }, { "epoch": 0.14608833432362844, "grad_norm": 2.8937175398340416, "learning_rate": 1.9301689177003938e-05, "loss": 0.4283, "step": 1844 }, { "epoch": 0.14616755793226383, "grad_norm": 2.110164510539122, "learning_rate": 1.9300746798837913e-05, "loss": 0.3956, "step": 1845 }, { "epoch": 0.14624678154089918, "grad_norm": 3.300560078020215, "learning_rate": 1.9299803808262466e-05, "loss": 0.4582, "step": 1846 }, { "epoch": 0.14632600514953456, "grad_norm": 2.5135784194558655, "learning_rate": 1.9298860205339685e-05, "loss": 0.338, "step": 1847 }, { "epoch": 0.14640522875816994, "grad_norm": 2.5355473042736016, "learning_rate": 1.9297915990131704e-05, "loss": 0.3368, "step": 1848 }, { "epoch": 0.1464844523668053, "grad_norm": 2.753276065349817, "learning_rate": 1.9296971162700696e-05, "loss": 0.435, "step": 1849 }, { "epoch": 0.14656367597544068, "grad_norm": 2.3609144108329203, "learning_rate": 1.9296025723108867e-05, "loss": 0.3793, "step": 1850 }, { "epoch": 0.14664289958407606, "grad_norm": 2.7500007886832107, "learning_rate": 1.9295079671418474e-05, "loss": 0.4281, "step": 1851 }, { "epoch": 0.14672212319271144, "grad_norm": 2.2373448308827384, "learning_rate": 1.929413300769181e-05, "loss": 0.3577, "step": 1852 }, { "epoch": 0.1468013468013468, "grad_norm": 2.607880903530943, "learning_rate": 1.9293185731991212e-05, "loss": 0.4729, "step": 1853 }, { "epoch": 0.14688057040998218, "grad_norm": 2.409573353371404, "learning_rate": 1.9292237844379043e-05, "loss": 0.4331, "step": 1854 }, { "epoch": 0.14695979401861756, "grad_norm": 2.5813942984074627, "learning_rate": 1.929128934491773e-05, "loss": 0.3301, "step": 1855 }, { "epoch": 0.1470390176272529, "grad_norm": 2.3618287719994586, "learning_rate": 1.929034023366972e-05, "loss": 0.3427, "step": 1856 }, { "epoch": 0.1471182412358883, "grad_norm": 2.345557554489996, "learning_rate": 1.92893905106975e-05, "loss": 0.3543, "step": 1857 }, { "epoch": 0.14719746484452367, "grad_norm": 2.541322116913921, "learning_rate": 1.9288440176063617e-05, "loss": 0.3835, "step": 1858 }, { "epoch": 0.14727668845315905, "grad_norm": 2.9599252365726505, "learning_rate": 1.9287489229830645e-05, "loss": 0.5104, "step": 1859 }, { "epoch": 0.1473559120617944, "grad_norm": 2.824437379384795, "learning_rate": 1.9286537672061192e-05, "loss": 0.426, "step": 1860 }, { "epoch": 0.1474351356704298, "grad_norm": 2.3642509807595546, "learning_rate": 1.9285585502817917e-05, "loss": 0.3357, "step": 1861 }, { "epoch": 0.14751435927906517, "grad_norm": 2.9052100940915575, "learning_rate": 1.9284632722163515e-05, "loss": 0.3626, "step": 1862 }, { "epoch": 0.14759358288770053, "grad_norm": 2.6413430336799437, "learning_rate": 1.9283679330160726e-05, "loss": 0.4311, "step": 1863 }, { "epoch": 0.1476728064963359, "grad_norm": 2.69947293412902, "learning_rate": 1.9282725326872324e-05, "loss": 0.2961, "step": 1864 }, { "epoch": 0.1477520301049713, "grad_norm": 2.3077475718603653, "learning_rate": 1.9281770712361123e-05, "loss": 0.3365, "step": 1865 }, { "epoch": 0.14783125371360664, "grad_norm": 2.405100634374392, "learning_rate": 1.928081548668998e-05, "loss": 0.4454, "step": 1866 }, { "epoch": 0.14791047732224202, "grad_norm": 2.319488157281669, "learning_rate": 1.9279859649921797e-05, "loss": 0.4218, "step": 1867 }, { "epoch": 0.1479897009308774, "grad_norm": 2.179623154226663, "learning_rate": 1.9278903202119508e-05, "loss": 0.419, "step": 1868 }, { "epoch": 0.1480689245395128, "grad_norm": 2.416402890079171, "learning_rate": 1.9277946143346086e-05, "loss": 0.4061, "step": 1869 }, { "epoch": 0.14814814814814814, "grad_norm": 2.532753755277741, "learning_rate": 1.9276988473664557e-05, "loss": 0.4336, "step": 1870 }, { "epoch": 0.14822737175678352, "grad_norm": 2.5448983829791927, "learning_rate": 1.9276030193137974e-05, "loss": 0.4713, "step": 1871 }, { "epoch": 0.1483065953654189, "grad_norm": 2.344155104525831, "learning_rate": 1.927507130182944e-05, "loss": 0.2774, "step": 1872 }, { "epoch": 0.14838581897405426, "grad_norm": 2.5045687788943547, "learning_rate": 1.9274111799802084e-05, "loss": 0.358, "step": 1873 }, { "epoch": 0.14846504258268964, "grad_norm": 2.8864732873718055, "learning_rate": 1.9273151687119093e-05, "loss": 0.413, "step": 1874 }, { "epoch": 0.14854426619132502, "grad_norm": 3.242439974526763, "learning_rate": 1.927219096384368e-05, "loss": 0.3413, "step": 1875 }, { "epoch": 0.1486234897999604, "grad_norm": 2.9901869445980767, "learning_rate": 1.9271229630039107e-05, "loss": 0.4011, "step": 1876 }, { "epoch": 0.14870271340859575, "grad_norm": 2.218276497437702, "learning_rate": 1.9270267685768676e-05, "loss": 0.3244, "step": 1877 }, { "epoch": 0.14878193701723114, "grad_norm": 2.810244733400113, "learning_rate": 1.9269305131095722e-05, "loss": 0.3259, "step": 1878 }, { "epoch": 0.14886116062586652, "grad_norm": 2.971104829346572, "learning_rate": 1.9268341966083627e-05, "loss": 0.4164, "step": 1879 }, { "epoch": 0.14894038423450187, "grad_norm": 3.1301648101400805, "learning_rate": 1.9267378190795812e-05, "loss": 0.4199, "step": 1880 }, { "epoch": 0.14901960784313725, "grad_norm": 2.6280063689633972, "learning_rate": 1.9266413805295732e-05, "loss": 0.3553, "step": 1881 }, { "epoch": 0.14909883145177263, "grad_norm": 2.3529598745416735, "learning_rate": 1.9265448809646893e-05, "loss": 0.2887, "step": 1882 }, { "epoch": 0.149178055060408, "grad_norm": 2.9183526875864683, "learning_rate": 1.9264483203912826e-05, "loss": 0.3801, "step": 1883 }, { "epoch": 0.14925727866904337, "grad_norm": 2.7442967551169826, "learning_rate": 1.9263516988157123e-05, "loss": 0.4737, "step": 1884 }, { "epoch": 0.14933650227767875, "grad_norm": 2.920579505008127, "learning_rate": 1.92625501624434e-05, "loss": 0.4385, "step": 1885 }, { "epoch": 0.14941572588631413, "grad_norm": 2.0719634192726017, "learning_rate": 1.9261582726835316e-05, "loss": 0.3508, "step": 1886 }, { "epoch": 0.1494949494949495, "grad_norm": 2.368460922799676, "learning_rate": 1.926061468139657e-05, "loss": 0.3515, "step": 1887 }, { "epoch": 0.14957417310358487, "grad_norm": 2.51784280748681, "learning_rate": 1.9259646026190913e-05, "loss": 0.3316, "step": 1888 }, { "epoch": 0.14965339671222025, "grad_norm": 2.3549242722517256, "learning_rate": 1.9258676761282117e-05, "loss": 0.2845, "step": 1889 }, { "epoch": 0.1497326203208556, "grad_norm": 2.118485941256803, "learning_rate": 1.9257706886734e-05, "loss": 0.3105, "step": 1890 }, { "epoch": 0.14981184392949098, "grad_norm": 2.412451448432033, "learning_rate": 1.9256736402610437e-05, "loss": 0.3228, "step": 1891 }, { "epoch": 0.14989106753812637, "grad_norm": 2.6183425802412166, "learning_rate": 1.9255765308975322e-05, "loss": 0.3694, "step": 1892 }, { "epoch": 0.14997029114676175, "grad_norm": 2.849067470828875, "learning_rate": 1.9254793605892596e-05, "loss": 0.4644, "step": 1893 }, { "epoch": 0.1500495147553971, "grad_norm": 2.886134886523467, "learning_rate": 1.9253821293426242e-05, "loss": 0.3898, "step": 1894 }, { "epoch": 0.15012873836403248, "grad_norm": 2.2099469061875645, "learning_rate": 1.9252848371640284e-05, "loss": 0.4072, "step": 1895 }, { "epoch": 0.15020796197266786, "grad_norm": 2.4480325323851693, "learning_rate": 1.925187484059878e-05, "loss": 0.3571, "step": 1896 }, { "epoch": 0.15028718558130322, "grad_norm": 2.8235513613292342, "learning_rate": 1.9250900700365837e-05, "loss": 0.4745, "step": 1897 }, { "epoch": 0.1503664091899386, "grad_norm": 2.207864618771977, "learning_rate": 1.9249925951005593e-05, "loss": 0.2997, "step": 1898 }, { "epoch": 0.15044563279857398, "grad_norm": 2.3071061681709044, "learning_rate": 1.9248950592582235e-05, "loss": 0.402, "step": 1899 }, { "epoch": 0.15052485640720936, "grad_norm": 2.2341515895652466, "learning_rate": 1.9247974625159983e-05, "loss": 0.31, "step": 1900 }, { "epoch": 0.15060408001584472, "grad_norm": 2.3209978738997252, "learning_rate": 1.92469980488031e-05, "loss": 0.4352, "step": 1901 }, { "epoch": 0.1506833036244801, "grad_norm": 2.8130193032203645, "learning_rate": 1.924602086357589e-05, "loss": 0.4978, "step": 1902 }, { "epoch": 0.15076252723311548, "grad_norm": 2.577839814331642, "learning_rate": 1.9245043069542696e-05, "loss": 0.3681, "step": 1903 }, { "epoch": 0.15084175084175083, "grad_norm": 2.427918404756589, "learning_rate": 1.92440646667679e-05, "loss": 0.3145, "step": 1904 }, { "epoch": 0.1509209744503862, "grad_norm": 2.555686083023233, "learning_rate": 1.9243085655315924e-05, "loss": 0.5014, "step": 1905 }, { "epoch": 0.1510001980590216, "grad_norm": 2.9635544974587296, "learning_rate": 1.924210603525123e-05, "loss": 0.4371, "step": 1906 }, { "epoch": 0.15107942166765695, "grad_norm": 2.596129947065641, "learning_rate": 1.924112580663833e-05, "loss": 0.3645, "step": 1907 }, { "epoch": 0.15115864527629233, "grad_norm": 2.372571955362532, "learning_rate": 1.9240144969541754e-05, "loss": 0.4112, "step": 1908 }, { "epoch": 0.1512378688849277, "grad_norm": 2.5385848730304374, "learning_rate": 1.9239163524026097e-05, "loss": 0.3696, "step": 1909 }, { "epoch": 0.1513170924935631, "grad_norm": 2.4157411808313247, "learning_rate": 1.9238181470155978e-05, "loss": 0.3655, "step": 1910 }, { "epoch": 0.15139631610219845, "grad_norm": 2.6058752101688807, "learning_rate": 1.923719880799606e-05, "loss": 0.4488, "step": 1911 }, { "epoch": 0.15147553971083383, "grad_norm": 2.795193109191069, "learning_rate": 1.9236215537611044e-05, "loss": 0.4071, "step": 1912 }, { "epoch": 0.1515547633194692, "grad_norm": 2.683267018921931, "learning_rate": 1.923523165906568e-05, "loss": 0.4357, "step": 1913 }, { "epoch": 0.15163398692810456, "grad_norm": 2.752040545520521, "learning_rate": 1.923424717242475e-05, "loss": 0.4069, "step": 1914 }, { "epoch": 0.15171321053673995, "grad_norm": 2.6445715990298897, "learning_rate": 1.923326207775307e-05, "loss": 0.4138, "step": 1915 }, { "epoch": 0.15179243414537533, "grad_norm": 2.2271638580208126, "learning_rate": 1.9232276375115517e-05, "loss": 0.45, "step": 1916 }, { "epoch": 0.1518716577540107, "grad_norm": 2.5305914712532647, "learning_rate": 1.9231290064576985e-05, "loss": 0.353, "step": 1917 }, { "epoch": 0.15195088136264606, "grad_norm": 2.158368376520064, "learning_rate": 1.923030314620242e-05, "loss": 0.365, "step": 1918 }, { "epoch": 0.15203010497128144, "grad_norm": 3.1284334951262402, "learning_rate": 1.9229315620056805e-05, "loss": 0.4513, "step": 1919 }, { "epoch": 0.15210932857991682, "grad_norm": 2.8823217180333307, "learning_rate": 1.9228327486205166e-05, "loss": 0.4524, "step": 1920 }, { "epoch": 0.15218855218855218, "grad_norm": 2.5224470056536603, "learning_rate": 1.9227338744712565e-05, "loss": 0.4435, "step": 1921 }, { "epoch": 0.15226777579718756, "grad_norm": 2.415218772986, "learning_rate": 1.9226349395644106e-05, "loss": 0.4549, "step": 1922 }, { "epoch": 0.15234699940582294, "grad_norm": 2.9917163657098205, "learning_rate": 1.9225359439064934e-05, "loss": 0.416, "step": 1923 }, { "epoch": 0.1524262230144583, "grad_norm": 2.596585523806836, "learning_rate": 1.9224368875040235e-05, "loss": 0.4176, "step": 1924 }, { "epoch": 0.15250544662309368, "grad_norm": 2.4960749946423584, "learning_rate": 1.922337770363523e-05, "loss": 0.3505, "step": 1925 }, { "epoch": 0.15258467023172906, "grad_norm": 2.4648872949395764, "learning_rate": 1.922238592491518e-05, "loss": 0.3567, "step": 1926 }, { "epoch": 0.15266389384036444, "grad_norm": 2.291997432617008, "learning_rate": 1.9221393538945397e-05, "loss": 0.4393, "step": 1927 }, { "epoch": 0.1527431174489998, "grad_norm": 2.581662550513519, "learning_rate": 1.9220400545791216e-05, "loss": 0.365, "step": 1928 }, { "epoch": 0.15282234105763517, "grad_norm": 2.7329104459559397, "learning_rate": 1.9219406945518028e-05, "loss": 0.44, "step": 1929 }, { "epoch": 0.15290156466627056, "grad_norm": 2.3173582707939318, "learning_rate": 1.921841273819125e-05, "loss": 0.3395, "step": 1930 }, { "epoch": 0.1529807882749059, "grad_norm": 3.836270283575409, "learning_rate": 1.9217417923876352e-05, "loss": 0.595, "step": 1931 }, { "epoch": 0.1530600118835413, "grad_norm": 2.7094393505181924, "learning_rate": 1.9216422502638836e-05, "loss": 0.3966, "step": 1932 }, { "epoch": 0.15313923549217667, "grad_norm": 2.1733271354677064, "learning_rate": 1.9215426474544242e-05, "loss": 0.2855, "step": 1933 }, { "epoch": 0.15321845910081205, "grad_norm": 2.454972338653038, "learning_rate": 1.9214429839658156e-05, "loss": 0.2843, "step": 1934 }, { "epoch": 0.1532976827094474, "grad_norm": 2.289194714120497, "learning_rate": 1.9213432598046205e-05, "loss": 0.3554, "step": 1935 }, { "epoch": 0.1533769063180828, "grad_norm": 2.462188477398111, "learning_rate": 1.9212434749774048e-05, "loss": 0.3417, "step": 1936 }, { "epoch": 0.15345612992671817, "grad_norm": 2.48518064749135, "learning_rate": 1.921143629490739e-05, "loss": 0.3711, "step": 1937 }, { "epoch": 0.15353535353535352, "grad_norm": 2.3725140889180025, "learning_rate": 1.9210437233511974e-05, "loss": 0.3047, "step": 1938 }, { "epoch": 0.1536145771439889, "grad_norm": 3.2169359464900036, "learning_rate": 1.9209437565653587e-05, "loss": 0.3353, "step": 1939 }, { "epoch": 0.1536938007526243, "grad_norm": 2.59904687906021, "learning_rate": 1.9208437291398045e-05, "loss": 0.4435, "step": 1940 }, { "epoch": 0.15377302436125967, "grad_norm": 2.607748724392275, "learning_rate": 1.920743641081122e-05, "loss": 0.4006, "step": 1941 }, { "epoch": 0.15385224796989502, "grad_norm": 2.043277301946978, "learning_rate": 1.920643492395901e-05, "loss": 0.3249, "step": 1942 }, { "epoch": 0.1539314715785304, "grad_norm": 2.9051574925978847, "learning_rate": 1.9205432830907353e-05, "loss": 0.42, "step": 1943 }, { "epoch": 0.15401069518716579, "grad_norm": 2.549680541977789, "learning_rate": 1.9204430131722243e-05, "loss": 0.3844, "step": 1944 }, { "epoch": 0.15408991879580114, "grad_norm": 2.2514664299702836, "learning_rate": 1.9203426826469695e-05, "loss": 0.3962, "step": 1945 }, { "epoch": 0.15416914240443652, "grad_norm": 2.3550168976942243, "learning_rate": 1.9202422915215777e-05, "loss": 0.3624, "step": 1946 }, { "epoch": 0.1542483660130719, "grad_norm": 2.750621721649528, "learning_rate": 1.920141839802659e-05, "loss": 0.3769, "step": 1947 }, { "epoch": 0.15432758962170726, "grad_norm": 2.7096270626917836, "learning_rate": 1.9200413274968276e-05, "loss": 0.3509, "step": 1948 }, { "epoch": 0.15440681323034264, "grad_norm": 2.5011842807184403, "learning_rate": 1.9199407546107014e-05, "loss": 0.4091, "step": 1949 }, { "epoch": 0.15448603683897802, "grad_norm": 2.400068797552773, "learning_rate": 1.919840121150903e-05, "loss": 0.3399, "step": 1950 }, { "epoch": 0.1545652604476134, "grad_norm": 3.365692975849877, "learning_rate": 1.9197394271240587e-05, "loss": 0.5116, "step": 1951 }, { "epoch": 0.15464448405624875, "grad_norm": 2.348411823614479, "learning_rate": 1.919638672536799e-05, "loss": 0.4461, "step": 1952 }, { "epoch": 0.15472370766488414, "grad_norm": 2.326574033758665, "learning_rate": 1.9195378573957574e-05, "loss": 0.3582, "step": 1953 }, { "epoch": 0.15480293127351952, "grad_norm": 3.197937525427283, "learning_rate": 1.9194369817075725e-05, "loss": 0.5215, "step": 1954 }, { "epoch": 0.15488215488215487, "grad_norm": 2.364082698442043, "learning_rate": 1.9193360454788864e-05, "loss": 0.4119, "step": 1955 }, { "epoch": 0.15496137849079025, "grad_norm": 2.261356086273808, "learning_rate": 1.919235048716345e-05, "loss": 0.3367, "step": 1956 }, { "epoch": 0.15504060209942563, "grad_norm": 1.9733795165007209, "learning_rate": 1.919133991426599e-05, "loss": 0.339, "step": 1957 }, { "epoch": 0.15511982570806102, "grad_norm": 2.5969446728972927, "learning_rate": 1.919032873616302e-05, "loss": 0.5067, "step": 1958 }, { "epoch": 0.15519904931669637, "grad_norm": 2.185435591931016, "learning_rate": 1.918931695292113e-05, "loss": 0.4309, "step": 1959 }, { "epoch": 0.15527827292533175, "grad_norm": 2.188221493170469, "learning_rate": 1.918830456460693e-05, "loss": 0.42, "step": 1960 }, { "epoch": 0.15535749653396713, "grad_norm": 2.550535303965114, "learning_rate": 1.9187291571287088e-05, "loss": 0.3554, "step": 1961 }, { "epoch": 0.15543672014260249, "grad_norm": 2.2301417324382613, "learning_rate": 1.91862779730283e-05, "loss": 0.3438, "step": 1962 }, { "epoch": 0.15551594375123787, "grad_norm": 2.740303270086453, "learning_rate": 1.918526376989731e-05, "loss": 0.4272, "step": 1963 }, { "epoch": 0.15559516735987325, "grad_norm": 2.5375538918440874, "learning_rate": 1.9184248961960895e-05, "loss": 0.3971, "step": 1964 }, { "epoch": 0.1556743909685086, "grad_norm": 2.658960699154366, "learning_rate": 1.918323354928588e-05, "loss": 0.4898, "step": 1965 }, { "epoch": 0.15575361457714398, "grad_norm": 2.0416244203848697, "learning_rate": 1.918221753193912e-05, "loss": 0.3569, "step": 1966 }, { "epoch": 0.15583283818577937, "grad_norm": 2.9353735087713404, "learning_rate": 1.9181200909987524e-05, "loss": 0.4745, "step": 1967 }, { "epoch": 0.15591206179441475, "grad_norm": 2.3663236729472055, "learning_rate": 1.918018368349802e-05, "loss": 0.3946, "step": 1968 }, { "epoch": 0.1559912854030501, "grad_norm": 2.5952464620638684, "learning_rate": 1.9179165852537596e-05, "loss": 0.4383, "step": 1969 }, { "epoch": 0.15607050901168548, "grad_norm": 2.65250779277471, "learning_rate": 1.9178147417173265e-05, "loss": 0.5501, "step": 1970 }, { "epoch": 0.15614973262032086, "grad_norm": 2.5965675177727428, "learning_rate": 1.917712837747209e-05, "loss": 0.4203, "step": 1971 }, { "epoch": 0.15622895622895622, "grad_norm": 2.725024577997951, "learning_rate": 1.917610873350117e-05, "loss": 0.3939, "step": 1972 }, { "epoch": 0.1563081798375916, "grad_norm": 2.7905166985781857, "learning_rate": 1.917508848532764e-05, "loss": 0.3775, "step": 1973 }, { "epoch": 0.15638740344622698, "grad_norm": 2.408246525866466, "learning_rate": 1.9174067633018682e-05, "loss": 0.3487, "step": 1974 }, { "epoch": 0.15646662705486236, "grad_norm": 2.288284118588807, "learning_rate": 1.9173046176641515e-05, "loss": 0.3322, "step": 1975 }, { "epoch": 0.15654585066349772, "grad_norm": 2.5430380247700914, "learning_rate": 1.917202411626339e-05, "loss": 0.3408, "step": 1976 }, { "epoch": 0.1566250742721331, "grad_norm": 2.4220785156936127, "learning_rate": 1.9171001451951616e-05, "loss": 0.4192, "step": 1977 }, { "epoch": 0.15670429788076848, "grad_norm": 4.208517632278412, "learning_rate": 1.916997818377352e-05, "loss": 0.4149, "step": 1978 }, { "epoch": 0.15678352148940383, "grad_norm": 2.970158632130674, "learning_rate": 1.9168954311796487e-05, "loss": 0.383, "step": 1979 }, { "epoch": 0.1568627450980392, "grad_norm": 2.0977795879296144, "learning_rate": 1.9167929836087932e-05, "loss": 0.3751, "step": 1980 }, { "epoch": 0.1569419687066746, "grad_norm": 2.490154249518521, "learning_rate": 1.9166904756715307e-05, "loss": 0.3586, "step": 1981 }, { "epoch": 0.15702119231530995, "grad_norm": 2.5337492480961212, "learning_rate": 1.9165879073746112e-05, "loss": 0.5593, "step": 1982 }, { "epoch": 0.15710041592394533, "grad_norm": 2.1043513379897876, "learning_rate": 1.9164852787247887e-05, "loss": 0.4316, "step": 1983 }, { "epoch": 0.1571796395325807, "grad_norm": 2.395381249523691, "learning_rate": 1.91638258972882e-05, "loss": 0.3593, "step": 1984 }, { "epoch": 0.1572588631412161, "grad_norm": 2.4304424087456695, "learning_rate": 1.916279840393467e-05, "loss": 0.4216, "step": 1985 }, { "epoch": 0.15733808674985145, "grad_norm": 3.044013056940546, "learning_rate": 1.916177030725496e-05, "loss": 0.507, "step": 1986 }, { "epoch": 0.15741731035848683, "grad_norm": 2.5012033696965146, "learning_rate": 1.9160741607316755e-05, "loss": 0.3416, "step": 1987 }, { "epoch": 0.1574965339671222, "grad_norm": 2.49684165356504, "learning_rate": 1.9159712304187795e-05, "loss": 0.3868, "step": 1988 }, { "epoch": 0.15757575757575756, "grad_norm": 2.4806195817284142, "learning_rate": 1.9158682397935852e-05, "loss": 0.3231, "step": 1989 }, { "epoch": 0.15765498118439294, "grad_norm": 3.0325071001164683, "learning_rate": 1.9157651888628744e-05, "loss": 0.4461, "step": 1990 }, { "epoch": 0.15773420479302833, "grad_norm": 2.320859475975259, "learning_rate": 1.915662077633432e-05, "loss": 0.411, "step": 1991 }, { "epoch": 0.1578134284016637, "grad_norm": 2.298453393391117, "learning_rate": 1.915558906112048e-05, "loss": 0.3277, "step": 1992 }, { "epoch": 0.15789265201029906, "grad_norm": 1.9772050916982755, "learning_rate": 1.915455674305515e-05, "loss": 0.2862, "step": 1993 }, { "epoch": 0.15797187561893444, "grad_norm": 2.513249010052859, "learning_rate": 1.9153523822206312e-05, "loss": 0.3768, "step": 1994 }, { "epoch": 0.15805109922756982, "grad_norm": 2.4778682765515008, "learning_rate": 1.9152490298641973e-05, "loss": 0.4127, "step": 1995 }, { "epoch": 0.15813032283620518, "grad_norm": 2.4736841771935825, "learning_rate": 1.9151456172430186e-05, "loss": 0.3561, "step": 1996 }, { "epoch": 0.15820954644484056, "grad_norm": 2.3262542182228967, "learning_rate": 1.9150421443639045e-05, "loss": 0.3611, "step": 1997 }, { "epoch": 0.15828877005347594, "grad_norm": 2.66421357961457, "learning_rate": 1.9149386112336682e-05, "loss": 0.4409, "step": 1998 }, { "epoch": 0.15836799366211132, "grad_norm": 2.293721974298357, "learning_rate": 1.9148350178591264e-05, "loss": 0.3153, "step": 1999 }, { "epoch": 0.15844721727074668, "grad_norm": 2.4187314984910717, "learning_rate": 1.914731364247101e-05, "loss": 0.4574, "step": 2000 }, { "epoch": 0.15852644087938206, "grad_norm": 2.492077352643004, "learning_rate": 1.914627650404416e-05, "loss": 0.3937, "step": 2001 }, { "epoch": 0.15860566448801744, "grad_norm": 3.8429708832477933, "learning_rate": 1.9145238763379016e-05, "loss": 0.4076, "step": 2002 }, { "epoch": 0.1586848880966528, "grad_norm": 2.37446947715965, "learning_rate": 1.9144200420543905e-05, "loss": 0.3978, "step": 2003 }, { "epoch": 0.15876411170528817, "grad_norm": 2.2525181852610587, "learning_rate": 1.9143161475607194e-05, "loss": 0.3748, "step": 2004 }, { "epoch": 0.15884333531392356, "grad_norm": 3.2333314540265965, "learning_rate": 1.9142121928637292e-05, "loss": 0.4077, "step": 2005 }, { "epoch": 0.1589225589225589, "grad_norm": 2.2886793279480915, "learning_rate": 1.914108177970265e-05, "loss": 0.3586, "step": 2006 }, { "epoch": 0.1590017825311943, "grad_norm": 2.9240298932930706, "learning_rate": 1.914004102887176e-05, "loss": 0.4101, "step": 2007 }, { "epoch": 0.15908100613982967, "grad_norm": 2.645957628733759, "learning_rate": 1.9138999676213146e-05, "loss": 0.4604, "step": 2008 }, { "epoch": 0.15916022974846505, "grad_norm": 2.8304492123260023, "learning_rate": 1.9137957721795376e-05, "loss": 0.4334, "step": 2009 }, { "epoch": 0.1592394533571004, "grad_norm": 2.682277593664758, "learning_rate": 1.913691516568706e-05, "loss": 0.4744, "step": 2010 }, { "epoch": 0.1593186769657358, "grad_norm": 2.8318036336564925, "learning_rate": 1.9135872007956846e-05, "loss": 0.3819, "step": 2011 }, { "epoch": 0.15939790057437117, "grad_norm": 2.5515605354784734, "learning_rate": 1.9134828248673415e-05, "loss": 0.3293, "step": 2012 }, { "epoch": 0.15947712418300652, "grad_norm": 2.5218977385903383, "learning_rate": 1.9133783887905502e-05, "loss": 0.4383, "step": 2013 }, { "epoch": 0.1595563477916419, "grad_norm": 2.42101144537026, "learning_rate": 1.913273892572187e-05, "loss": 0.3175, "step": 2014 }, { "epoch": 0.1596355714002773, "grad_norm": 2.8670695029270354, "learning_rate": 1.9131693362191318e-05, "loss": 0.3706, "step": 2015 }, { "epoch": 0.15971479500891267, "grad_norm": 2.965626169572073, "learning_rate": 1.91306471973827e-05, "loss": 0.4582, "step": 2016 }, { "epoch": 0.15979401861754802, "grad_norm": 2.5223138223433463, "learning_rate": 1.91296004313649e-05, "loss": 0.4081, "step": 2017 }, { "epoch": 0.1598732422261834, "grad_norm": 2.648496364884265, "learning_rate": 1.9128553064206835e-05, "loss": 0.4031, "step": 2018 }, { "epoch": 0.15995246583481879, "grad_norm": 2.71057088173164, "learning_rate": 1.9127505095977483e-05, "loss": 0.5089, "step": 2019 }, { "epoch": 0.16003168944345414, "grad_norm": 2.8047621605232114, "learning_rate": 1.9126456526745833e-05, "loss": 0.4594, "step": 2020 }, { "epoch": 0.16011091305208952, "grad_norm": 2.858064942449386, "learning_rate": 1.9125407356580932e-05, "loss": 0.3752, "step": 2021 }, { "epoch": 0.1601901366607249, "grad_norm": 2.84549067250781, "learning_rate": 1.9124357585551872e-05, "loss": 0.4845, "step": 2022 }, { "epoch": 0.16026936026936026, "grad_norm": 2.498779682999885, "learning_rate": 1.9123307213727764e-05, "loss": 0.3665, "step": 2023 }, { "epoch": 0.16034858387799564, "grad_norm": 2.47476938925415, "learning_rate": 1.9122256241177776e-05, "loss": 0.4031, "step": 2024 }, { "epoch": 0.16042780748663102, "grad_norm": 2.642160064942329, "learning_rate": 1.9121204667971107e-05, "loss": 0.4218, "step": 2025 }, { "epoch": 0.1605070310952664, "grad_norm": 2.615368504755733, "learning_rate": 1.9120152494177e-05, "loss": 0.3612, "step": 2026 }, { "epoch": 0.16058625470390175, "grad_norm": 2.5313924368608807, "learning_rate": 1.9119099719864735e-05, "loss": 0.4081, "step": 2027 }, { "epoch": 0.16066547831253714, "grad_norm": 3.8175552796999437, "learning_rate": 1.911804634510363e-05, "loss": 0.3725, "step": 2028 }, { "epoch": 0.16074470192117252, "grad_norm": 2.9951060809210617, "learning_rate": 1.911699236996305e-05, "loss": 0.4088, "step": 2029 }, { "epoch": 0.16082392552980787, "grad_norm": 2.348003594052534, "learning_rate": 1.911593779451239e-05, "loss": 0.3109, "step": 2030 }, { "epoch": 0.16090314913844325, "grad_norm": 2.757184007426351, "learning_rate": 1.911488261882109e-05, "loss": 0.4856, "step": 2031 }, { "epoch": 0.16098237274707863, "grad_norm": 2.303383573113819, "learning_rate": 1.911382684295862e-05, "loss": 0.3683, "step": 2032 }, { "epoch": 0.16106159635571402, "grad_norm": 2.671306628313457, "learning_rate": 1.911277046699451e-05, "loss": 0.4193, "step": 2033 }, { "epoch": 0.16114081996434937, "grad_norm": 2.458050905598702, "learning_rate": 1.9111713490998316e-05, "loss": 0.2779, "step": 2034 }, { "epoch": 0.16122004357298475, "grad_norm": 2.148801434175658, "learning_rate": 1.911065591503963e-05, "loss": 0.4058, "step": 2035 }, { "epoch": 0.16129926718162013, "grad_norm": 2.5667405398210543, "learning_rate": 1.9109597739188088e-05, "loss": 0.4142, "step": 2036 }, { "epoch": 0.16137849079025549, "grad_norm": 2.914776632242434, "learning_rate": 1.9108538963513366e-05, "loss": 0.4438, "step": 2037 }, { "epoch": 0.16145771439889087, "grad_norm": 2.8905125445208593, "learning_rate": 1.9107479588085182e-05, "loss": 0.3398, "step": 2038 }, { "epoch": 0.16153693800752625, "grad_norm": 2.006768054386868, "learning_rate": 1.910641961297329e-05, "loss": 0.4016, "step": 2039 }, { "epoch": 0.16161616161616163, "grad_norm": 2.552066249231819, "learning_rate": 1.9105359038247484e-05, "loss": 0.3846, "step": 2040 }, { "epoch": 0.16169538522479698, "grad_norm": 2.3096835855174214, "learning_rate": 1.9104297863977595e-05, "loss": 0.4205, "step": 2041 }, { "epoch": 0.16177460883343237, "grad_norm": 2.596706776733995, "learning_rate": 1.9103236090233507e-05, "loss": 0.4578, "step": 2042 }, { "epoch": 0.16185383244206775, "grad_norm": 2.3372339557634927, "learning_rate": 1.9102173717085114e-05, "loss": 0.3605, "step": 2043 }, { "epoch": 0.1619330560507031, "grad_norm": 2.5420338526568846, "learning_rate": 1.9101110744602384e-05, "loss": 0.4239, "step": 2044 }, { "epoch": 0.16201227965933848, "grad_norm": 2.537370175925376, "learning_rate": 1.9100047172855306e-05, "loss": 0.4747, "step": 2045 }, { "epoch": 0.16209150326797386, "grad_norm": 2.295719027597248, "learning_rate": 1.9098983001913903e-05, "loss": 0.3365, "step": 2046 }, { "epoch": 0.16217072687660922, "grad_norm": 2.4511334477056392, "learning_rate": 1.909791823184825e-05, "loss": 0.3897, "step": 2047 }, { "epoch": 0.1622499504852446, "grad_norm": 2.2627039506298825, "learning_rate": 1.909685286272846e-05, "loss": 0.4409, "step": 2048 }, { "epoch": 0.16232917409387998, "grad_norm": 2.4580596605401714, "learning_rate": 1.9095786894624685e-05, "loss": 0.3955, "step": 2049 }, { "epoch": 0.16240839770251536, "grad_norm": 2.6876106053561037, "learning_rate": 1.9094720327607102e-05, "loss": 0.4521, "step": 2050 }, { "epoch": 0.16248762131115072, "grad_norm": 2.688260383973597, "learning_rate": 1.909365316174595e-05, "loss": 0.4015, "step": 2051 }, { "epoch": 0.1625668449197861, "grad_norm": 1.8927562871984127, "learning_rate": 1.9092585397111492e-05, "loss": 0.2599, "step": 2052 }, { "epoch": 0.16264606852842148, "grad_norm": 2.7526013589756793, "learning_rate": 1.9091517033774038e-05, "loss": 0.4724, "step": 2053 }, { "epoch": 0.16272529213705683, "grad_norm": 2.1804564163925293, "learning_rate": 1.9090448071803932e-05, "loss": 0.3649, "step": 2054 }, { "epoch": 0.1628045157456922, "grad_norm": 2.397642986835983, "learning_rate": 1.908937851127156e-05, "loss": 0.3842, "step": 2055 }, { "epoch": 0.1628837393543276, "grad_norm": 2.413831701072522, "learning_rate": 1.908830835224735e-05, "loss": 0.3511, "step": 2056 }, { "epoch": 0.16296296296296298, "grad_norm": 2.162875520520643, "learning_rate": 1.9087237594801762e-05, "loss": 0.3863, "step": 2057 }, { "epoch": 0.16304218657159833, "grad_norm": 2.4008368626074748, "learning_rate": 1.9086166239005305e-05, "loss": 0.5084, "step": 2058 }, { "epoch": 0.1631214101802337, "grad_norm": 2.5468590631097077, "learning_rate": 1.908509428492852e-05, "loss": 0.3354, "step": 2059 }, { "epoch": 0.1632006337888691, "grad_norm": 2.543397628910795, "learning_rate": 1.9084021732641994e-05, "loss": 0.3894, "step": 2060 }, { "epoch": 0.16327985739750445, "grad_norm": 2.4492414147101824, "learning_rate": 1.9082948582216344e-05, "loss": 0.4175, "step": 2061 }, { "epoch": 0.16335908100613983, "grad_norm": 3.218981075025703, "learning_rate": 1.9081874833722234e-05, "loss": 0.459, "step": 2062 }, { "epoch": 0.1634383046147752, "grad_norm": 2.87191996109704, "learning_rate": 1.908080048723037e-05, "loss": 0.4823, "step": 2063 }, { "epoch": 0.16351752822341056, "grad_norm": 2.2306528898608136, "learning_rate": 1.9079725542811484e-05, "loss": 0.3937, "step": 2064 }, { "epoch": 0.16359675183204594, "grad_norm": 2.041495451703393, "learning_rate": 1.907865000053636e-05, "loss": 0.3763, "step": 2065 }, { "epoch": 0.16367597544068133, "grad_norm": 1.8736622485455503, "learning_rate": 1.9077573860475815e-05, "loss": 0.3738, "step": 2066 }, { "epoch": 0.1637551990493167, "grad_norm": 2.6715963215775957, "learning_rate": 1.9076497122700713e-05, "loss": 0.3496, "step": 2067 }, { "epoch": 0.16383442265795206, "grad_norm": 1.979250690921824, "learning_rate": 1.9075419787281948e-05, "loss": 0.4179, "step": 2068 }, { "epoch": 0.16391364626658744, "grad_norm": 2.422544721645588, "learning_rate": 1.9074341854290458e-05, "loss": 0.3265, "step": 2069 }, { "epoch": 0.16399286987522282, "grad_norm": 1.8309182594501825, "learning_rate": 1.907326332379722e-05, "loss": 0.3067, "step": 2070 }, { "epoch": 0.16407209348385818, "grad_norm": 1.9939881887508093, "learning_rate": 1.9072184195873248e-05, "loss": 0.2872, "step": 2071 }, { "epoch": 0.16415131709249356, "grad_norm": 2.091185918907336, "learning_rate": 1.9071104470589603e-05, "loss": 0.3555, "step": 2072 }, { "epoch": 0.16423054070112894, "grad_norm": 3.1267310089949523, "learning_rate": 1.9070024148017375e-05, "loss": 0.4318, "step": 2073 }, { "epoch": 0.16430976430976432, "grad_norm": 2.505969102780564, "learning_rate": 1.9068943228227695e-05, "loss": 0.3401, "step": 2074 }, { "epoch": 0.16438898791839968, "grad_norm": 2.937497177671296, "learning_rate": 1.9067861711291744e-05, "loss": 0.3575, "step": 2075 }, { "epoch": 0.16446821152703506, "grad_norm": 2.564068401025119, "learning_rate": 1.906677959728073e-05, "loss": 0.3903, "step": 2076 }, { "epoch": 0.16454743513567044, "grad_norm": 2.4717480204506606, "learning_rate": 1.9065696886265906e-05, "loss": 0.4541, "step": 2077 }, { "epoch": 0.1646266587443058, "grad_norm": 2.265608078191137, "learning_rate": 1.9064613578318564e-05, "loss": 0.2936, "step": 2078 }, { "epoch": 0.16470588235294117, "grad_norm": 2.5264154209087755, "learning_rate": 1.9063529673510036e-05, "loss": 0.2668, "step": 2079 }, { "epoch": 0.16478510596157656, "grad_norm": 2.6262399213341205, "learning_rate": 1.9062445171911688e-05, "loss": 0.4439, "step": 2080 }, { "epoch": 0.1648643295702119, "grad_norm": 2.685429770487024, "learning_rate": 1.9061360073594933e-05, "loss": 0.3894, "step": 2081 }, { "epoch": 0.1649435531788473, "grad_norm": 3.0024177468917532, "learning_rate": 1.9060274378631215e-05, "loss": 0.4441, "step": 2082 }, { "epoch": 0.16502277678748267, "grad_norm": 2.7851514300737694, "learning_rate": 1.9059188087092025e-05, "loss": 0.43, "step": 2083 }, { "epoch": 0.16510200039611805, "grad_norm": 2.5017197289095403, "learning_rate": 1.905810119904889e-05, "loss": 0.2752, "step": 2084 }, { "epoch": 0.1651812240047534, "grad_norm": 2.413077150062562, "learning_rate": 1.9057013714573375e-05, "loss": 0.4302, "step": 2085 }, { "epoch": 0.1652604476133888, "grad_norm": 2.4092520752561426, "learning_rate": 1.9055925633737088e-05, "loss": 0.3911, "step": 2086 }, { "epoch": 0.16533967122202417, "grad_norm": 2.4886958012007856, "learning_rate": 1.905483695661167e-05, "loss": 0.4181, "step": 2087 }, { "epoch": 0.16541889483065952, "grad_norm": 2.147771132208899, "learning_rate": 1.905374768326881e-05, "loss": 0.3424, "step": 2088 }, { "epoch": 0.1654981184392949, "grad_norm": 2.736285722084298, "learning_rate": 1.9052657813780226e-05, "loss": 0.4367, "step": 2089 }, { "epoch": 0.1655773420479303, "grad_norm": 2.086534234254006, "learning_rate": 1.9051567348217686e-05, "loss": 0.3098, "step": 2090 }, { "epoch": 0.16565656565656567, "grad_norm": 2.3589462520488502, "learning_rate": 1.905047628665299e-05, "loss": 0.2758, "step": 2091 }, { "epoch": 0.16573578926520102, "grad_norm": 2.384522005928775, "learning_rate": 1.9049384629157974e-05, "loss": 0.376, "step": 2092 }, { "epoch": 0.1658150128738364, "grad_norm": 2.3333173360236246, "learning_rate": 1.9048292375804527e-05, "loss": 0.5036, "step": 2093 }, { "epoch": 0.16589423648247179, "grad_norm": 2.761465650581943, "learning_rate": 1.9047199526664565e-05, "loss": 0.4701, "step": 2094 }, { "epoch": 0.16597346009110714, "grad_norm": 2.390273717896687, "learning_rate": 1.9046106081810047e-05, "loss": 0.4178, "step": 2095 }, { "epoch": 0.16605268369974252, "grad_norm": 2.485548426032065, "learning_rate": 1.9045012041312966e-05, "loss": 0.4411, "step": 2096 }, { "epoch": 0.1661319073083779, "grad_norm": 1.9644997048672619, "learning_rate": 1.904391740524537e-05, "loss": 0.3021, "step": 2097 }, { "epoch": 0.16621113091701328, "grad_norm": 2.3746144407645713, "learning_rate": 1.9042822173679325e-05, "loss": 0.3881, "step": 2098 }, { "epoch": 0.16629035452564864, "grad_norm": 2.2568019717306687, "learning_rate": 1.9041726346686952e-05, "loss": 0.3511, "step": 2099 }, { "epoch": 0.16636957813428402, "grad_norm": 2.457961894509813, "learning_rate": 1.9040629924340406e-05, "loss": 0.4055, "step": 2100 }, { "epoch": 0.1664488017429194, "grad_norm": 2.3332681598428158, "learning_rate": 1.903953290671188e-05, "loss": 0.4422, "step": 2101 }, { "epoch": 0.16652802535155475, "grad_norm": 2.0983274627979642, "learning_rate": 1.903843529387361e-05, "loss": 0.4128, "step": 2102 }, { "epoch": 0.16660724896019014, "grad_norm": 2.2835278490024002, "learning_rate": 1.903733708589786e-05, "loss": 0.4479, "step": 2103 }, { "epoch": 0.16668647256882552, "grad_norm": 2.746919399303464, "learning_rate": 1.9036238282856952e-05, "loss": 0.5907, "step": 2104 }, { "epoch": 0.16676569617746087, "grad_norm": 2.3865614186491126, "learning_rate": 1.903513888482323e-05, "loss": 0.4483, "step": 2105 }, { "epoch": 0.16684491978609625, "grad_norm": 1.7891039212143733, "learning_rate": 1.903403889186909e-05, "loss": 0.3021, "step": 2106 }, { "epoch": 0.16692414339473163, "grad_norm": 2.4818601510672025, "learning_rate": 1.903293830406696e-05, "loss": 0.4789, "step": 2107 }, { "epoch": 0.16700336700336701, "grad_norm": 2.2380740442567095, "learning_rate": 1.9031837121489303e-05, "loss": 0.4511, "step": 2108 }, { "epoch": 0.16708259061200237, "grad_norm": 2.1029352008475595, "learning_rate": 1.903073534420863e-05, "loss": 0.3873, "step": 2109 }, { "epoch": 0.16716181422063775, "grad_norm": 2.4541343693784317, "learning_rate": 1.9029632972297488e-05, "loss": 0.3209, "step": 2110 }, { "epoch": 0.16724103782927313, "grad_norm": 2.499412314769988, "learning_rate": 1.9028530005828462e-05, "loss": 0.3712, "step": 2111 }, { "epoch": 0.16732026143790849, "grad_norm": 2.3803087447164053, "learning_rate": 1.9027426444874177e-05, "loss": 0.4894, "step": 2112 }, { "epoch": 0.16739948504654387, "grad_norm": 2.139522428014468, "learning_rate": 1.90263222895073e-05, "loss": 0.3657, "step": 2113 }, { "epoch": 0.16747870865517925, "grad_norm": 2.5857360157692844, "learning_rate": 1.902521753980053e-05, "loss": 0.4387, "step": 2114 }, { "epoch": 0.16755793226381463, "grad_norm": 2.5254515697665045, "learning_rate": 1.9024112195826614e-05, "loss": 0.4116, "step": 2115 }, { "epoch": 0.16763715587244998, "grad_norm": 2.330962944128933, "learning_rate": 1.902300625765833e-05, "loss": 0.353, "step": 2116 }, { "epoch": 0.16771637948108536, "grad_norm": 2.429531634297885, "learning_rate": 1.9021899725368498e-05, "loss": 0.3004, "step": 2117 }, { "epoch": 0.16779560308972075, "grad_norm": 3.105557690144621, "learning_rate": 1.902079259902998e-05, "loss": 0.3659, "step": 2118 }, { "epoch": 0.1678748266983561, "grad_norm": 2.0809041765481817, "learning_rate": 1.901968487871568e-05, "loss": 0.3142, "step": 2119 }, { "epoch": 0.16795405030699148, "grad_norm": 2.1673918217544332, "learning_rate": 1.9018576564498527e-05, "loss": 0.3441, "step": 2120 }, { "epoch": 0.16803327391562686, "grad_norm": 2.8895498973788722, "learning_rate": 1.9017467656451498e-05, "loss": 0.4644, "step": 2121 }, { "epoch": 0.16811249752426222, "grad_norm": 2.653218764317275, "learning_rate": 1.9016358154647618e-05, "loss": 0.4446, "step": 2122 }, { "epoch": 0.1681917211328976, "grad_norm": 2.276938814563543, "learning_rate": 1.9015248059159937e-05, "loss": 0.3865, "step": 2123 }, { "epoch": 0.16827094474153298, "grad_norm": 2.7172014187309372, "learning_rate": 1.901413737006155e-05, "loss": 0.4451, "step": 2124 }, { "epoch": 0.16835016835016836, "grad_norm": 2.6093729567236896, "learning_rate": 1.901302608742559e-05, "loss": 0.4226, "step": 2125 }, { "epoch": 0.16842939195880371, "grad_norm": 2.595846609277253, "learning_rate": 1.9011914211325225e-05, "loss": 0.4025, "step": 2126 }, { "epoch": 0.1685086155674391, "grad_norm": 1.9634697524463316, "learning_rate": 1.9010801741833678e-05, "loss": 0.3354, "step": 2127 }, { "epoch": 0.16858783917607448, "grad_norm": 3.349044899521629, "learning_rate": 1.900968867902419e-05, "loss": 0.403, "step": 2128 }, { "epoch": 0.16866706278470983, "grad_norm": 2.4891659613813073, "learning_rate": 1.900857502297006e-05, "loss": 0.3356, "step": 2129 }, { "epoch": 0.1687462863933452, "grad_norm": 1.8493124973294865, "learning_rate": 1.9007460773744605e-05, "loss": 0.2588, "step": 2130 }, { "epoch": 0.1688255100019806, "grad_norm": 2.1532833458909346, "learning_rate": 1.90063459314212e-05, "loss": 0.2615, "step": 2131 }, { "epoch": 0.16890473361061598, "grad_norm": 2.533837726728809, "learning_rate": 1.9005230496073256e-05, "loss": 0.2674, "step": 2132 }, { "epoch": 0.16898395721925133, "grad_norm": 2.9587320464331577, "learning_rate": 1.900411446777421e-05, "loss": 0.4194, "step": 2133 }, { "epoch": 0.1690631808278867, "grad_norm": 2.2784747424848435, "learning_rate": 1.900299784659755e-05, "loss": 0.3482, "step": 2134 }, { "epoch": 0.1691424044365221, "grad_norm": 2.768012413760472, "learning_rate": 1.9001880632616806e-05, "loss": 0.4818, "step": 2135 }, { "epoch": 0.16922162804515745, "grad_norm": 3.7781618071929057, "learning_rate": 1.9000762825905535e-05, "loss": 0.4172, "step": 2136 }, { "epoch": 0.16930085165379283, "grad_norm": 2.496340060068725, "learning_rate": 1.899964442653734e-05, "loss": 0.3051, "step": 2137 }, { "epoch": 0.1693800752624282, "grad_norm": 2.528331472675459, "learning_rate": 1.8998525434585862e-05, "loss": 0.4017, "step": 2138 }, { "epoch": 0.1694592988710636, "grad_norm": 2.2919283342131784, "learning_rate": 1.8997405850124786e-05, "loss": 0.3101, "step": 2139 }, { "epoch": 0.16953852247969894, "grad_norm": 2.434449683924331, "learning_rate": 1.8996285673227826e-05, "loss": 0.3701, "step": 2140 }, { "epoch": 0.16961774608833433, "grad_norm": 2.5664662080142318, "learning_rate": 1.899516490396874e-05, "loss": 0.4562, "step": 2141 }, { "epoch": 0.1696969696969697, "grad_norm": 2.373680078928652, "learning_rate": 1.8994043542421328e-05, "loss": 0.3602, "step": 2142 }, { "epoch": 0.16977619330560506, "grad_norm": 2.9418582009739844, "learning_rate": 1.8992921588659424e-05, "loss": 0.3693, "step": 2143 }, { "epoch": 0.16985541691424044, "grad_norm": 1.9707006698811431, "learning_rate": 1.8991799042756906e-05, "loss": 0.2413, "step": 2144 }, { "epoch": 0.16993464052287582, "grad_norm": 2.4198743180837017, "learning_rate": 1.8990675904787688e-05, "loss": 0.3169, "step": 2145 }, { "epoch": 0.17001386413151118, "grad_norm": 2.4385245086665845, "learning_rate": 1.898955217482572e-05, "loss": 0.405, "step": 2146 }, { "epoch": 0.17009308774014656, "grad_norm": 2.448066099258772, "learning_rate": 1.8988427852944997e-05, "loss": 0.3468, "step": 2147 }, { "epoch": 0.17017231134878194, "grad_norm": 2.706294096472709, "learning_rate": 1.898730293921955e-05, "loss": 0.4089, "step": 2148 }, { "epoch": 0.17025153495741732, "grad_norm": 2.8815788548623886, "learning_rate": 1.8986177433723446e-05, "loss": 0.418, "step": 2149 }, { "epoch": 0.17033075856605268, "grad_norm": 2.371487072909641, "learning_rate": 1.89850513365308e-05, "loss": 0.401, "step": 2150 }, { "epoch": 0.17040998217468806, "grad_norm": 3.104191787096029, "learning_rate": 1.8983924647715756e-05, "loss": 0.4464, "step": 2151 }, { "epoch": 0.17048920578332344, "grad_norm": 2.440025377327329, "learning_rate": 1.89827973673525e-05, "loss": 0.2731, "step": 2152 }, { "epoch": 0.1705684293919588, "grad_norm": 2.8971508373914756, "learning_rate": 1.8981669495515264e-05, "loss": 0.4354, "step": 2153 }, { "epoch": 0.17064765300059417, "grad_norm": 3.247004717659025, "learning_rate": 1.8980541032278302e-05, "loss": 0.5439, "step": 2154 }, { "epoch": 0.17072687660922956, "grad_norm": 2.374871292005657, "learning_rate": 1.8979411977715928e-05, "loss": 0.4213, "step": 2155 }, { "epoch": 0.17080610021786494, "grad_norm": 2.2545037196460367, "learning_rate": 1.8978282331902483e-05, "loss": 0.2908, "step": 2156 }, { "epoch": 0.1708853238265003, "grad_norm": 2.619256686868994, "learning_rate": 1.8977152094912346e-05, "loss": 0.441, "step": 2157 }, { "epoch": 0.17096454743513567, "grad_norm": 2.192957677833369, "learning_rate": 1.897602126681994e-05, "loss": 0.3166, "step": 2158 }, { "epoch": 0.17104377104377105, "grad_norm": 2.430374293376807, "learning_rate": 1.897488984769972e-05, "loss": 0.3759, "step": 2159 }, { "epoch": 0.1711229946524064, "grad_norm": 2.7285783174520675, "learning_rate": 1.8973757837626193e-05, "loss": 0.3116, "step": 2160 }, { "epoch": 0.1712022182610418, "grad_norm": 2.1713937899303315, "learning_rate": 1.8972625236673887e-05, "loss": 0.2753, "step": 2161 }, { "epoch": 0.17128144186967717, "grad_norm": 2.4081495295913737, "learning_rate": 1.8971492044917386e-05, "loss": 0.3759, "step": 2162 }, { "epoch": 0.17136066547831252, "grad_norm": 2.7599536453866804, "learning_rate": 1.8970358262431297e-05, "loss": 0.5082, "step": 2163 }, { "epoch": 0.1714398890869479, "grad_norm": 2.556122205339184, "learning_rate": 1.8969223889290283e-05, "loss": 0.4345, "step": 2164 }, { "epoch": 0.1715191126955833, "grad_norm": 2.895613537402155, "learning_rate": 1.8968088925569032e-05, "loss": 0.4889, "step": 2165 }, { "epoch": 0.17159833630421867, "grad_norm": 2.2890530778345846, "learning_rate": 1.896695337134228e-05, "loss": 0.335, "step": 2166 }, { "epoch": 0.17167755991285402, "grad_norm": 2.5929684615372124, "learning_rate": 1.8965817226684794e-05, "loss": 0.3215, "step": 2167 }, { "epoch": 0.1717567835214894, "grad_norm": 2.4875316299779775, "learning_rate": 1.896468049167138e-05, "loss": 0.3408, "step": 2168 }, { "epoch": 0.17183600713012478, "grad_norm": 2.1899809524907665, "learning_rate": 1.896354316637689e-05, "loss": 0.2778, "step": 2169 }, { "epoch": 0.17191523073876014, "grad_norm": 2.3699029896496926, "learning_rate": 1.8962405250876218e-05, "loss": 0.3449, "step": 2170 }, { "epoch": 0.17199445434739552, "grad_norm": 2.358263386433618, "learning_rate": 1.896126674524428e-05, "loss": 0.2964, "step": 2171 }, { "epoch": 0.1720736779560309, "grad_norm": 2.7779522918396675, "learning_rate": 1.896012764955605e-05, "loss": 0.3479, "step": 2172 }, { "epoch": 0.17215290156466628, "grad_norm": 2.8200480690484158, "learning_rate": 1.8958987963886526e-05, "loss": 0.322, "step": 2173 }, { "epoch": 0.17223212517330164, "grad_norm": 2.2987813864920645, "learning_rate": 1.8957847688310752e-05, "loss": 0.3801, "step": 2174 }, { "epoch": 0.17231134878193702, "grad_norm": 2.9524822890598577, "learning_rate": 1.8956706822903812e-05, "loss": 0.294, "step": 2175 }, { "epoch": 0.1723905723905724, "grad_norm": 2.417615406569931, "learning_rate": 1.8955565367740824e-05, "loss": 0.3939, "step": 2176 }, { "epoch": 0.17246979599920775, "grad_norm": 2.2878973867876553, "learning_rate": 1.8954423322896944e-05, "loss": 0.3393, "step": 2177 }, { "epoch": 0.17254901960784313, "grad_norm": 2.7467332554696555, "learning_rate": 1.895328068844738e-05, "loss": 0.4135, "step": 2178 }, { "epoch": 0.17262824321647852, "grad_norm": 2.410082829360762, "learning_rate": 1.8952137464467358e-05, "loss": 0.3861, "step": 2179 }, { "epoch": 0.1727074668251139, "grad_norm": 2.3148305943350236, "learning_rate": 1.895099365103216e-05, "loss": 0.3363, "step": 2180 }, { "epoch": 0.17278669043374925, "grad_norm": 2.4451622488844698, "learning_rate": 1.89498492482171e-05, "loss": 0.3457, "step": 2181 }, { "epoch": 0.17286591404238463, "grad_norm": 2.9123090342896596, "learning_rate": 1.8948704256097533e-05, "loss": 0.4391, "step": 2182 }, { "epoch": 0.17294513765102001, "grad_norm": 2.5961962897146953, "learning_rate": 1.8947558674748844e-05, "loss": 0.3523, "step": 2183 }, { "epoch": 0.17302436125965537, "grad_norm": 2.3554909079110096, "learning_rate": 1.8946412504246474e-05, "loss": 0.3494, "step": 2184 }, { "epoch": 0.17310358486829075, "grad_norm": 2.540965403058671, "learning_rate": 1.8945265744665886e-05, "loss": 0.3714, "step": 2185 }, { "epoch": 0.17318280847692613, "grad_norm": 2.429718294333805, "learning_rate": 1.8944118396082594e-05, "loss": 0.4051, "step": 2186 }, { "epoch": 0.17326203208556148, "grad_norm": 2.3319168615465395, "learning_rate": 1.8942970458572138e-05, "loss": 0.3159, "step": 2187 }, { "epoch": 0.17334125569419687, "grad_norm": 2.180703276575142, "learning_rate": 1.894182193221011e-05, "loss": 0.3446, "step": 2188 }, { "epoch": 0.17342047930283225, "grad_norm": 3.544600079813891, "learning_rate": 1.894067281707213e-05, "loss": 0.4013, "step": 2189 }, { "epoch": 0.17349970291146763, "grad_norm": 2.077860658659168, "learning_rate": 1.893952311323387e-05, "loss": 0.2531, "step": 2190 }, { "epoch": 0.17357892652010298, "grad_norm": 2.7539813496483565, "learning_rate": 1.8938372820771024e-05, "loss": 0.4322, "step": 2191 }, { "epoch": 0.17365815012873836, "grad_norm": 2.3017730244002834, "learning_rate": 1.8937221939759334e-05, "loss": 0.2896, "step": 2192 }, { "epoch": 0.17373737373737375, "grad_norm": 2.2789548140396674, "learning_rate": 1.8936070470274587e-05, "loss": 0.3167, "step": 2193 }, { "epoch": 0.1738165973460091, "grad_norm": 2.4857092572182737, "learning_rate": 1.8934918412392596e-05, "loss": 0.3634, "step": 2194 }, { "epoch": 0.17389582095464448, "grad_norm": 3.0808422718461514, "learning_rate": 1.893376576618922e-05, "loss": 0.4165, "step": 2195 }, { "epoch": 0.17397504456327986, "grad_norm": 2.106262606885841, "learning_rate": 1.8932612531740354e-05, "loss": 0.3718, "step": 2196 }, { "epoch": 0.17405426817191524, "grad_norm": 2.5529701921922117, "learning_rate": 1.893145870912193e-05, "loss": 0.369, "step": 2197 }, { "epoch": 0.1741334917805506, "grad_norm": 2.442736798790252, "learning_rate": 1.8930304298409933e-05, "loss": 0.3621, "step": 2198 }, { "epoch": 0.17421271538918598, "grad_norm": 2.5063383000144634, "learning_rate": 1.8929149299680364e-05, "loss": 0.2891, "step": 2199 }, { "epoch": 0.17429193899782136, "grad_norm": 1.7475169515077713, "learning_rate": 1.8927993713009275e-05, "loss": 0.2974, "step": 2200 }, { "epoch": 0.17437116260645671, "grad_norm": 2.4189101364032757, "learning_rate": 1.892683753847276e-05, "loss": 0.3523, "step": 2201 }, { "epoch": 0.1744503862150921, "grad_norm": 2.303026791129677, "learning_rate": 1.892568077614695e-05, "loss": 0.3423, "step": 2202 }, { "epoch": 0.17452960982372748, "grad_norm": 2.7473646135768695, "learning_rate": 1.892452342610801e-05, "loss": 0.4762, "step": 2203 }, { "epoch": 0.17460883343236283, "grad_norm": 2.7201715222550864, "learning_rate": 1.892336548843214e-05, "loss": 0.3626, "step": 2204 }, { "epoch": 0.1746880570409982, "grad_norm": 2.281882051748451, "learning_rate": 1.892220696319559e-05, "loss": 0.2948, "step": 2205 }, { "epoch": 0.1747672806496336, "grad_norm": 2.3124394676615516, "learning_rate": 1.8921047850474645e-05, "loss": 0.4231, "step": 2206 }, { "epoch": 0.17484650425826898, "grad_norm": 2.1894654960596003, "learning_rate": 1.891988815034562e-05, "loss": 0.2889, "step": 2207 }, { "epoch": 0.17492572786690433, "grad_norm": 2.7231170288819846, "learning_rate": 1.891872786288488e-05, "loss": 0.5903, "step": 2208 }, { "epoch": 0.1750049514755397, "grad_norm": 2.4238463547996543, "learning_rate": 1.8917566988168826e-05, "loss": 0.3282, "step": 2209 }, { "epoch": 0.1750841750841751, "grad_norm": 1.9409698911102269, "learning_rate": 1.8916405526273894e-05, "loss": 0.355, "step": 2210 }, { "epoch": 0.17516339869281045, "grad_norm": 2.6014900331580413, "learning_rate": 1.8915243477276563e-05, "loss": 0.4701, "step": 2211 }, { "epoch": 0.17524262230144583, "grad_norm": 2.2879987364227308, "learning_rate": 1.8914080841253348e-05, "loss": 0.3437, "step": 2212 }, { "epoch": 0.1753218459100812, "grad_norm": 2.7801715899583885, "learning_rate": 1.8912917618280796e-05, "loss": 0.5025, "step": 2213 }, { "epoch": 0.1754010695187166, "grad_norm": 2.694736890213809, "learning_rate": 1.8911753808435508e-05, "loss": 0.4684, "step": 2214 }, { "epoch": 0.17548029312735194, "grad_norm": 2.616118825405062, "learning_rate": 1.891058941179411e-05, "loss": 0.2805, "step": 2215 }, { "epoch": 0.17555951673598733, "grad_norm": 2.2962292147394767, "learning_rate": 1.8909424428433278e-05, "loss": 0.3663, "step": 2216 }, { "epoch": 0.1756387403446227, "grad_norm": 2.300167652656391, "learning_rate": 1.8908258858429716e-05, "loss": 0.3352, "step": 2217 }, { "epoch": 0.17571796395325806, "grad_norm": 2.238963644423776, "learning_rate": 1.890709270186017e-05, "loss": 0.3778, "step": 2218 }, { "epoch": 0.17579718756189344, "grad_norm": 2.0216601390263413, "learning_rate": 1.890592595880143e-05, "loss": 0.3113, "step": 2219 }, { "epoch": 0.17587641117052882, "grad_norm": 2.3600720490721416, "learning_rate": 1.890475862933032e-05, "loss": 0.5143, "step": 2220 }, { "epoch": 0.17595563477916418, "grad_norm": 2.481295917569171, "learning_rate": 1.8903590713523698e-05, "loss": 0.3345, "step": 2221 }, { "epoch": 0.17603485838779956, "grad_norm": 1.9133074466708255, "learning_rate": 1.8902422211458466e-05, "loss": 0.2761, "step": 2222 }, { "epoch": 0.17611408199643494, "grad_norm": 2.360993772606194, "learning_rate": 1.890125312321157e-05, "loss": 0.3967, "step": 2223 }, { "epoch": 0.17619330560507032, "grad_norm": 2.209250537372684, "learning_rate": 1.8900083448859986e-05, "loss": 0.3227, "step": 2224 }, { "epoch": 0.17627252921370568, "grad_norm": 2.216775376139844, "learning_rate": 1.8898913188480733e-05, "loss": 0.2768, "step": 2225 }, { "epoch": 0.17635175282234106, "grad_norm": 2.985214414290217, "learning_rate": 1.8897742342150863e-05, "loss": 0.5473, "step": 2226 }, { "epoch": 0.17643097643097644, "grad_norm": 2.54720611375589, "learning_rate": 1.8896570909947477e-05, "loss": 0.4999, "step": 2227 }, { "epoch": 0.1765102000396118, "grad_norm": 2.5616891684490266, "learning_rate": 1.88953988919477e-05, "loss": 0.3606, "step": 2228 }, { "epoch": 0.17658942364824717, "grad_norm": 2.7624486173525127, "learning_rate": 1.8894226288228707e-05, "loss": 0.3502, "step": 2229 }, { "epoch": 0.17666864725688255, "grad_norm": 2.899884035817668, "learning_rate": 1.8893053098867714e-05, "loss": 0.3123, "step": 2230 }, { "epoch": 0.17674787086551794, "grad_norm": 2.413198494708979, "learning_rate": 1.889187932394196e-05, "loss": 0.4752, "step": 2231 }, { "epoch": 0.1768270944741533, "grad_norm": 1.8774862286167893, "learning_rate": 1.889070496352874e-05, "loss": 0.3285, "step": 2232 }, { "epoch": 0.17690631808278867, "grad_norm": 2.8019297147960502, "learning_rate": 1.888953001770538e-05, "loss": 0.4447, "step": 2233 }, { "epoch": 0.17698554169142405, "grad_norm": 3.089390450629726, "learning_rate": 1.8888354486549238e-05, "loss": 0.4364, "step": 2234 }, { "epoch": 0.1770647653000594, "grad_norm": 2.7763340171829545, "learning_rate": 1.888717837013772e-05, "loss": 0.3412, "step": 2235 }, { "epoch": 0.1771439889086948, "grad_norm": 2.350382712116652, "learning_rate": 1.8886001668548273e-05, "loss": 0.3596, "step": 2236 }, { "epoch": 0.17722321251733017, "grad_norm": 2.0589733456646813, "learning_rate": 1.8884824381858368e-05, "loss": 0.348, "step": 2237 }, { "epoch": 0.17730243612596555, "grad_norm": 2.713390016532201, "learning_rate": 1.888364651014553e-05, "loss": 0.3167, "step": 2238 }, { "epoch": 0.1773816597346009, "grad_norm": 2.580833062371776, "learning_rate": 1.888246805348732e-05, "loss": 0.3798, "step": 2239 }, { "epoch": 0.1774608833432363, "grad_norm": 2.097769257939288, "learning_rate": 1.8881289011961323e-05, "loss": 0.383, "step": 2240 }, { "epoch": 0.17754010695187167, "grad_norm": 2.4511582730125356, "learning_rate": 1.8880109385645184e-05, "loss": 0.4192, "step": 2241 }, { "epoch": 0.17761933056050702, "grad_norm": 2.152868131719102, "learning_rate": 1.8878929174616566e-05, "loss": 0.2988, "step": 2242 }, { "epoch": 0.1776985541691424, "grad_norm": 3.348306824672599, "learning_rate": 1.887774837895318e-05, "loss": 0.3431, "step": 2243 }, { "epoch": 0.17777777777777778, "grad_norm": 2.073670096785361, "learning_rate": 1.887656699873279e-05, "loss": 0.4473, "step": 2244 }, { "epoch": 0.17785700138641314, "grad_norm": 2.1205626412960403, "learning_rate": 1.887538503403317e-05, "loss": 0.3981, "step": 2245 }, { "epoch": 0.17793622499504852, "grad_norm": 2.1057075517848145, "learning_rate": 1.8874202484932148e-05, "loss": 0.541, "step": 2246 }, { "epoch": 0.1780154486036839, "grad_norm": 2.2622129274146556, "learning_rate": 1.8873019351507596e-05, "loss": 0.4459, "step": 2247 }, { "epoch": 0.17809467221231928, "grad_norm": 2.2590690386233345, "learning_rate": 1.887183563383741e-05, "loss": 0.4301, "step": 2248 }, { "epoch": 0.17817389582095464, "grad_norm": 1.8527160438370396, "learning_rate": 1.8870651331999542e-05, "loss": 0.3188, "step": 2249 }, { "epoch": 0.17825311942959002, "grad_norm": 2.1888037910866567, "learning_rate": 1.886946644607196e-05, "loss": 0.3387, "step": 2250 }, { "epoch": 0.1783323430382254, "grad_norm": 2.203207946909501, "learning_rate": 1.8868280976132697e-05, "loss": 0.3214, "step": 2251 }, { "epoch": 0.17841156664686075, "grad_norm": 2.162461292130499, "learning_rate": 1.8867094922259798e-05, "loss": 0.2963, "step": 2252 }, { "epoch": 0.17849079025549613, "grad_norm": 2.2357105821769334, "learning_rate": 1.8865908284531368e-05, "loss": 0.3015, "step": 2253 }, { "epoch": 0.17857001386413152, "grad_norm": 2.098565615184502, "learning_rate": 1.8864721063025536e-05, "loss": 0.3427, "step": 2254 }, { "epoch": 0.1786492374727669, "grad_norm": 2.544108450267634, "learning_rate": 1.8863533257820475e-05, "loss": 0.2758, "step": 2255 }, { "epoch": 0.17872846108140225, "grad_norm": 2.563509390950677, "learning_rate": 1.8862344868994395e-05, "loss": 0.4012, "step": 2256 }, { "epoch": 0.17880768469003763, "grad_norm": 2.056589261982362, "learning_rate": 1.8861155896625553e-05, "loss": 0.3323, "step": 2257 }, { "epoch": 0.17888690829867301, "grad_norm": 2.4927181003160865, "learning_rate": 1.885996634079223e-05, "loss": 0.3375, "step": 2258 }, { "epoch": 0.17896613190730837, "grad_norm": 2.1622531747641003, "learning_rate": 1.8858776201572758e-05, "loss": 0.3627, "step": 2259 }, { "epoch": 0.17904535551594375, "grad_norm": 2.1178658779004795, "learning_rate": 1.8857585479045493e-05, "loss": 0.2167, "step": 2260 }, { "epoch": 0.17912457912457913, "grad_norm": 2.3911010311555936, "learning_rate": 1.8856394173288848e-05, "loss": 0.4988, "step": 2261 }, { "epoch": 0.17920380273321448, "grad_norm": 3.090916014336323, "learning_rate": 1.8855202284381264e-05, "loss": 0.4275, "step": 2262 }, { "epoch": 0.17928302634184987, "grad_norm": 2.521138369068937, "learning_rate": 1.8854009812401213e-05, "loss": 0.3247, "step": 2263 }, { "epoch": 0.17936224995048525, "grad_norm": 2.5078213967329748, "learning_rate": 1.885281675742722e-05, "loss": 0.4314, "step": 2264 }, { "epoch": 0.17944147355912063, "grad_norm": 2.0203068400035735, "learning_rate": 1.885162311953784e-05, "loss": 0.2707, "step": 2265 }, { "epoch": 0.17952069716775598, "grad_norm": 2.3394484789226575, "learning_rate": 1.885042889881167e-05, "loss": 0.3076, "step": 2266 }, { "epoch": 0.17959992077639136, "grad_norm": 2.976526413132003, "learning_rate": 1.8849234095327343e-05, "loss": 0.5041, "step": 2267 }, { "epoch": 0.17967914438502675, "grad_norm": 3.192980426468377, "learning_rate": 1.884803870916353e-05, "loss": 0.356, "step": 2268 }, { "epoch": 0.1797583679936621, "grad_norm": 2.266720195603325, "learning_rate": 1.884684274039894e-05, "loss": 0.3867, "step": 2269 }, { "epoch": 0.17983759160229748, "grad_norm": 2.630003140698048, "learning_rate": 1.8845646189112327e-05, "loss": 0.4206, "step": 2270 }, { "epoch": 0.17991681521093286, "grad_norm": 2.19533126041278, "learning_rate": 1.8844449055382473e-05, "loss": 0.3231, "step": 2271 }, { "epoch": 0.17999603881956824, "grad_norm": 2.291971238372196, "learning_rate": 1.8843251339288207e-05, "loss": 0.2977, "step": 2272 }, { "epoch": 0.1800752624282036, "grad_norm": 2.139047899374045, "learning_rate": 1.884205304090839e-05, "loss": 0.3601, "step": 2273 }, { "epoch": 0.18015448603683898, "grad_norm": 2.744523817004491, "learning_rate": 1.8840854160321926e-05, "loss": 0.2631, "step": 2274 }, { "epoch": 0.18023370964547436, "grad_norm": 2.7548359030785297, "learning_rate": 1.8839654697607756e-05, "loss": 0.3208, "step": 2275 }, { "epoch": 0.18031293325410971, "grad_norm": 2.4774999927677137, "learning_rate": 1.8838454652844857e-05, "loss": 0.3377, "step": 2276 }, { "epoch": 0.1803921568627451, "grad_norm": 1.9707352356292724, "learning_rate": 1.8837254026112245e-05, "loss": 0.2485, "step": 2277 }, { "epoch": 0.18047138047138048, "grad_norm": 1.9098460905264407, "learning_rate": 1.883605281748898e-05, "loss": 0.2836, "step": 2278 }, { "epoch": 0.18055060408001586, "grad_norm": 1.8641038253620796, "learning_rate": 1.8834851027054152e-05, "loss": 0.2612, "step": 2279 }, { "epoch": 0.1806298276886512, "grad_norm": 2.8081035775012375, "learning_rate": 1.8833648654886898e-05, "loss": 0.4917, "step": 2280 }, { "epoch": 0.1807090512972866, "grad_norm": 2.041431150392968, "learning_rate": 1.883244570106638e-05, "loss": 0.2784, "step": 2281 }, { "epoch": 0.18078827490592198, "grad_norm": 1.9540642497672804, "learning_rate": 1.8831242165671816e-05, "loss": 0.3058, "step": 2282 }, { "epoch": 0.18086749851455733, "grad_norm": 2.2666690188234, "learning_rate": 1.8830038048782445e-05, "loss": 0.3771, "step": 2283 }, { "epoch": 0.1809467221231927, "grad_norm": 2.447525472996404, "learning_rate": 1.8828833350477556e-05, "loss": 0.3348, "step": 2284 }, { "epoch": 0.1810259457318281, "grad_norm": 3.0817296113041257, "learning_rate": 1.8827628070836477e-05, "loss": 0.5346, "step": 2285 }, { "epoch": 0.18110516934046345, "grad_norm": 1.7488797341407631, "learning_rate": 1.8826422209938563e-05, "loss": 0.2251, "step": 2286 }, { "epoch": 0.18118439294909883, "grad_norm": 2.312363645373812, "learning_rate": 1.8825215767863215e-05, "loss": 0.3802, "step": 2287 }, { "epoch": 0.1812636165577342, "grad_norm": 2.6113495062677794, "learning_rate": 1.8824008744689873e-05, "loss": 0.3817, "step": 2288 }, { "epoch": 0.1813428401663696, "grad_norm": 2.7492437202208304, "learning_rate": 1.8822801140498014e-05, "loss": 0.3853, "step": 2289 }, { "epoch": 0.18142206377500494, "grad_norm": 3.72839857079176, "learning_rate": 1.8821592955367154e-05, "loss": 0.5297, "step": 2290 }, { "epoch": 0.18150128738364033, "grad_norm": 2.5601664171180807, "learning_rate": 1.8820384189376845e-05, "loss": 0.4437, "step": 2291 }, { "epoch": 0.1815805109922757, "grad_norm": 2.0999948908668307, "learning_rate": 1.8819174842606675e-05, "loss": 0.3295, "step": 2292 }, { "epoch": 0.18165973460091106, "grad_norm": 3.4630688727512533, "learning_rate": 1.8817964915136277e-05, "loss": 0.2814, "step": 2293 }, { "epoch": 0.18173895820954644, "grad_norm": 2.3417772119653333, "learning_rate": 1.881675440704532e-05, "loss": 0.3478, "step": 2294 }, { "epoch": 0.18181818181818182, "grad_norm": 2.3543112301846043, "learning_rate": 1.881554331841351e-05, "loss": 0.3024, "step": 2295 }, { "epoch": 0.1818974054268172, "grad_norm": 2.48172645963912, "learning_rate": 1.881433164932059e-05, "loss": 0.4151, "step": 2296 }, { "epoch": 0.18197662903545256, "grad_norm": 2.4838331653823995, "learning_rate": 1.881311939984634e-05, "loss": 0.3585, "step": 2297 }, { "epoch": 0.18205585264408794, "grad_norm": 2.5104491727078537, "learning_rate": 1.8811906570070583e-05, "loss": 0.3031, "step": 2298 }, { "epoch": 0.18213507625272332, "grad_norm": 2.582212854514169, "learning_rate": 1.8810693160073184e-05, "loss": 0.3555, "step": 2299 }, { "epoch": 0.18221429986135868, "grad_norm": 2.260466920406567, "learning_rate": 1.880947916993403e-05, "loss": 0.3147, "step": 2300 }, { "epoch": 0.18229352346999406, "grad_norm": 2.108711585961772, "learning_rate": 1.8808264599733065e-05, "loss": 0.2336, "step": 2301 }, { "epoch": 0.18237274707862944, "grad_norm": 1.8129311270852229, "learning_rate": 1.8807049449550254e-05, "loss": 0.1859, "step": 2302 }, { "epoch": 0.1824519706872648, "grad_norm": 2.8514702441521216, "learning_rate": 1.8805833719465617e-05, "loss": 0.4239, "step": 2303 }, { "epoch": 0.18253119429590017, "grad_norm": 2.6258942978508775, "learning_rate": 1.88046174095592e-05, "loss": 0.4009, "step": 2304 }, { "epoch": 0.18261041790453555, "grad_norm": 2.376692009004102, "learning_rate": 1.880340051991109e-05, "loss": 0.2973, "step": 2305 }, { "epoch": 0.18268964151317094, "grad_norm": 2.81701758688857, "learning_rate": 1.8802183050601417e-05, "loss": 0.3888, "step": 2306 }, { "epoch": 0.1827688651218063, "grad_norm": 2.6988120734982197, "learning_rate": 1.8800965001710342e-05, "loss": 0.4857, "step": 2307 }, { "epoch": 0.18284808873044167, "grad_norm": 2.434844528570647, "learning_rate": 1.879974637331807e-05, "loss": 0.402, "step": 2308 }, { "epoch": 0.18292731233907705, "grad_norm": 3.2364920599774787, "learning_rate": 1.879852716550484e-05, "loss": 0.439, "step": 2309 }, { "epoch": 0.1830065359477124, "grad_norm": 2.2624829721422395, "learning_rate": 1.8797307378350935e-05, "loss": 0.396, "step": 2310 }, { "epoch": 0.1830857595563478, "grad_norm": 2.418464608583417, "learning_rate": 1.8796087011936665e-05, "loss": 0.4137, "step": 2311 }, { "epoch": 0.18316498316498317, "grad_norm": 2.4378665971974747, "learning_rate": 1.8794866066342394e-05, "loss": 0.3857, "step": 2312 }, { "epoch": 0.18324420677361855, "grad_norm": 2.1559454965374893, "learning_rate": 1.879364454164851e-05, "loss": 0.2897, "step": 2313 }, { "epoch": 0.1833234303822539, "grad_norm": 2.7326341608881095, "learning_rate": 1.879242243793544e-05, "loss": 0.3999, "step": 2314 }, { "epoch": 0.18340265399088929, "grad_norm": 2.5845062026744614, "learning_rate": 1.8791199755283664e-05, "loss": 0.3615, "step": 2315 }, { "epoch": 0.18348187759952467, "grad_norm": 2.383931557532952, "learning_rate": 1.878997649377368e-05, "loss": 0.3871, "step": 2316 }, { "epoch": 0.18356110120816002, "grad_norm": 2.510233308100274, "learning_rate": 1.8788752653486045e-05, "loss": 0.4941, "step": 2317 }, { "epoch": 0.1836403248167954, "grad_norm": 3.645298019919298, "learning_rate": 1.878752823450133e-05, "loss": 0.4638, "step": 2318 }, { "epoch": 0.18371954842543078, "grad_norm": 2.5640338416982487, "learning_rate": 1.878630323690017e-05, "loss": 0.2658, "step": 2319 }, { "epoch": 0.18379877203406614, "grad_norm": 2.277676082097039, "learning_rate": 1.8785077660763217e-05, "loss": 0.2998, "step": 2320 }, { "epoch": 0.18387799564270152, "grad_norm": 2.793564639000829, "learning_rate": 1.8783851506171166e-05, "loss": 0.3348, "step": 2321 }, { "epoch": 0.1839572192513369, "grad_norm": 2.4186368481788665, "learning_rate": 1.8782624773204764e-05, "loss": 0.3054, "step": 2322 }, { "epoch": 0.18403644285997228, "grad_norm": 2.5380074555152135, "learning_rate": 1.8781397461944777e-05, "loss": 0.3612, "step": 2323 }, { "epoch": 0.18411566646860764, "grad_norm": 2.2511183841820523, "learning_rate": 1.8780169572472024e-05, "loss": 0.3667, "step": 2324 }, { "epoch": 0.18419489007724302, "grad_norm": 2.089634176256761, "learning_rate": 1.8778941104867347e-05, "loss": 0.2162, "step": 2325 }, { "epoch": 0.1842741136858784, "grad_norm": 2.334529141607381, "learning_rate": 1.8777712059211643e-05, "loss": 0.4318, "step": 2326 }, { "epoch": 0.18435333729451375, "grad_norm": 2.462494951280783, "learning_rate": 1.8776482435585836e-05, "loss": 0.3748, "step": 2327 }, { "epoch": 0.18443256090314913, "grad_norm": 4.004588331062914, "learning_rate": 1.877525223407089e-05, "loss": 0.4338, "step": 2328 }, { "epoch": 0.18451178451178452, "grad_norm": 2.3269944007926417, "learning_rate": 1.877402145474781e-05, "loss": 0.3882, "step": 2329 }, { "epoch": 0.1845910081204199, "grad_norm": 2.759584402642482, "learning_rate": 1.877279009769763e-05, "loss": 0.5195, "step": 2330 }, { "epoch": 0.18467023172905525, "grad_norm": 2.5557437988805485, "learning_rate": 1.8771558163001438e-05, "loss": 0.4687, "step": 2331 }, { "epoch": 0.18474945533769063, "grad_norm": 2.0378673196831887, "learning_rate": 1.8770325650740347e-05, "loss": 0.3179, "step": 2332 }, { "epoch": 0.184828678946326, "grad_norm": 2.386000362403704, "learning_rate": 1.876909256099551e-05, "loss": 0.3602, "step": 2333 }, { "epoch": 0.18490790255496137, "grad_norm": 3.0234490585000353, "learning_rate": 1.876785889384812e-05, "loss": 0.4049, "step": 2334 }, { "epoch": 0.18498712616359675, "grad_norm": 3.097328032456957, "learning_rate": 1.8766624649379415e-05, "loss": 0.4067, "step": 2335 }, { "epoch": 0.18506634977223213, "grad_norm": 2.859826786833267, "learning_rate": 1.8765389827670657e-05, "loss": 0.4865, "step": 2336 }, { "epoch": 0.1851455733808675, "grad_norm": 2.1015460161226436, "learning_rate": 1.8764154428803155e-05, "loss": 0.2837, "step": 2337 }, { "epoch": 0.18522479698950287, "grad_norm": 2.2995081930660937, "learning_rate": 1.8762918452858256e-05, "loss": 0.2956, "step": 2338 }, { "epoch": 0.18530402059813825, "grad_norm": 2.35434895152673, "learning_rate": 1.876168189991734e-05, "loss": 0.2762, "step": 2339 }, { "epoch": 0.18538324420677363, "grad_norm": 2.451243285460605, "learning_rate": 1.876044477006183e-05, "loss": 0.2849, "step": 2340 }, { "epoch": 0.18546246781540898, "grad_norm": 2.23536515794613, "learning_rate": 1.8759207063373183e-05, "loss": 0.3969, "step": 2341 }, { "epoch": 0.18554169142404436, "grad_norm": 2.566599801976133, "learning_rate": 1.87579687799329e-05, "loss": 0.3268, "step": 2342 }, { "epoch": 0.18562091503267975, "grad_norm": 2.4754747113106124, "learning_rate": 1.875672991982251e-05, "loss": 0.3663, "step": 2343 }, { "epoch": 0.1857001386413151, "grad_norm": 2.74166198690715, "learning_rate": 1.875549048312359e-05, "loss": 0.3339, "step": 2344 }, { "epoch": 0.18577936224995048, "grad_norm": 2.975816090283354, "learning_rate": 1.8754250469917753e-05, "loss": 0.5422, "step": 2345 }, { "epoch": 0.18585858585858586, "grad_norm": 2.6011827108643235, "learning_rate": 1.8753009880286647e-05, "loss": 0.4457, "step": 2346 }, { "epoch": 0.18593780946722124, "grad_norm": 2.5964538856244768, "learning_rate": 1.8751768714311952e-05, "loss": 0.3786, "step": 2347 }, { "epoch": 0.1860170330758566, "grad_norm": 2.3410431498333955, "learning_rate": 1.87505269720754e-05, "loss": 0.4045, "step": 2348 }, { "epoch": 0.18609625668449198, "grad_norm": 2.4402011232336496, "learning_rate": 1.8749284653658754e-05, "loss": 0.3779, "step": 2349 }, { "epoch": 0.18617548029312736, "grad_norm": 2.366642652619114, "learning_rate": 1.874804175914381e-05, "loss": 0.3724, "step": 2350 }, { "epoch": 0.1862547039017627, "grad_norm": 1.9239428344717986, "learning_rate": 1.8746798288612405e-05, "loss": 0.2864, "step": 2351 }, { "epoch": 0.1863339275103981, "grad_norm": 2.362876582429057, "learning_rate": 1.8745554242146428e-05, "loss": 0.358, "step": 2352 }, { "epoch": 0.18641315111903348, "grad_norm": 2.356237795697956, "learning_rate": 1.874430961982778e-05, "loss": 0.3782, "step": 2353 }, { "epoch": 0.18649237472766886, "grad_norm": 2.1905368125781353, "learning_rate": 1.874306442173842e-05, "loss": 0.3575, "step": 2354 }, { "epoch": 0.1865715983363042, "grad_norm": 2.191471676289847, "learning_rate": 1.8741818647960337e-05, "loss": 0.3142, "step": 2355 }, { "epoch": 0.1866508219449396, "grad_norm": 2.525277479361757, "learning_rate": 1.8740572298575558e-05, "loss": 0.3111, "step": 2356 }, { "epoch": 0.18673004555357497, "grad_norm": 2.262008817578584, "learning_rate": 1.8739325373666152e-05, "loss": 0.3561, "step": 2357 }, { "epoch": 0.18680926916221033, "grad_norm": 2.1024282379196855, "learning_rate": 1.8738077873314218e-05, "loss": 0.3291, "step": 2358 }, { "epoch": 0.1868884927708457, "grad_norm": 2.4822728521593334, "learning_rate": 1.8736829797601903e-05, "loss": 0.4646, "step": 2359 }, { "epoch": 0.1869677163794811, "grad_norm": 3.07982275805283, "learning_rate": 1.8735581146611387e-05, "loss": 0.4493, "step": 2360 }, { "epoch": 0.18704693998811645, "grad_norm": 2.775561776895125, "learning_rate": 1.873433192042488e-05, "loss": 0.3567, "step": 2361 }, { "epoch": 0.18712616359675183, "grad_norm": 2.0416057745576763, "learning_rate": 1.8733082119124646e-05, "loss": 0.336, "step": 2362 }, { "epoch": 0.1872053872053872, "grad_norm": 2.6729126196305213, "learning_rate": 1.8731831742792974e-05, "loss": 0.4414, "step": 2363 }, { "epoch": 0.1872846108140226, "grad_norm": 2.3773759184808925, "learning_rate": 1.87305807915122e-05, "loss": 0.375, "step": 2364 }, { "epoch": 0.18736383442265794, "grad_norm": 2.4057512840315294, "learning_rate": 1.8729329265364685e-05, "loss": 0.3645, "step": 2365 }, { "epoch": 0.18744305803129332, "grad_norm": 2.7158335366717257, "learning_rate": 1.8728077164432844e-05, "loss": 0.4029, "step": 2366 }, { "epoch": 0.1875222816399287, "grad_norm": 2.5252002446889295, "learning_rate": 1.872682448879912e-05, "loss": 0.3037, "step": 2367 }, { "epoch": 0.18760150524856406, "grad_norm": 2.491869821849235, "learning_rate": 1.8725571238545992e-05, "loss": 0.3009, "step": 2368 }, { "epoch": 0.18768072885719944, "grad_norm": 2.1272868834953473, "learning_rate": 1.872431741375598e-05, "loss": 0.343, "step": 2369 }, { "epoch": 0.18775995246583482, "grad_norm": 1.8435376199308053, "learning_rate": 1.872306301451165e-05, "loss": 0.2152, "step": 2370 }, { "epoch": 0.1878391760744702, "grad_norm": 2.0131326363249626, "learning_rate": 1.872180804089559e-05, "loss": 0.2593, "step": 2371 }, { "epoch": 0.18791839968310556, "grad_norm": 2.3018050316226475, "learning_rate": 1.8720552492990438e-05, "loss": 0.3328, "step": 2372 }, { "epoch": 0.18799762329174094, "grad_norm": 2.6609049398218088, "learning_rate": 1.8719296370878866e-05, "loss": 0.3521, "step": 2373 }, { "epoch": 0.18807684690037632, "grad_norm": 2.6650809589334727, "learning_rate": 1.871803967464358e-05, "loss": 0.3447, "step": 2374 }, { "epoch": 0.18815607050901167, "grad_norm": 2.451500628574491, "learning_rate": 1.8716782404367333e-05, "loss": 0.2894, "step": 2375 }, { "epoch": 0.18823529411764706, "grad_norm": 1.9325107608589824, "learning_rate": 1.8715524560132906e-05, "loss": 0.3222, "step": 2376 }, { "epoch": 0.18831451772628244, "grad_norm": 2.3753138122376685, "learning_rate": 1.8714266142023124e-05, "loss": 0.3854, "step": 2377 }, { "epoch": 0.18839374133491782, "grad_norm": 2.2117819364013958, "learning_rate": 1.8713007150120846e-05, "loss": 0.32, "step": 2378 }, { "epoch": 0.18847296494355317, "grad_norm": 2.4390039965109773, "learning_rate": 1.871174758450897e-05, "loss": 0.341, "step": 2379 }, { "epoch": 0.18855218855218855, "grad_norm": 3.0573172772389596, "learning_rate": 1.8710487445270436e-05, "loss": 0.4679, "step": 2380 }, { "epoch": 0.18863141216082394, "grad_norm": 2.3143663208533547, "learning_rate": 1.8709226732488216e-05, "loss": 0.3594, "step": 2381 }, { "epoch": 0.1887106357694593, "grad_norm": 2.8772245854603917, "learning_rate": 1.8707965446245317e-05, "loss": 0.3446, "step": 2382 }, { "epoch": 0.18878985937809467, "grad_norm": 4.06400445909464, "learning_rate": 1.87067035866248e-05, "loss": 0.2771, "step": 2383 }, { "epoch": 0.18886908298673005, "grad_norm": 1.999736035035748, "learning_rate": 1.8705441153709742e-05, "loss": 0.307, "step": 2384 }, { "epoch": 0.1889483065953654, "grad_norm": 2.3349303937758736, "learning_rate": 1.8704178147583273e-05, "loss": 0.3565, "step": 2385 }, { "epoch": 0.1890275302040008, "grad_norm": 1.6776105169326794, "learning_rate": 1.8702914568328555e-05, "loss": 0.3258, "step": 2386 }, { "epoch": 0.18910675381263617, "grad_norm": 2.101714509347008, "learning_rate": 1.8701650416028788e-05, "loss": 0.2515, "step": 2387 }, { "epoch": 0.18918597742127155, "grad_norm": 2.29702783638012, "learning_rate": 1.870038569076721e-05, "loss": 0.3166, "step": 2388 }, { "epoch": 0.1892652010299069, "grad_norm": 2.3366008944905476, "learning_rate": 1.86991203926271e-05, "loss": 0.2731, "step": 2389 }, { "epoch": 0.18934442463854229, "grad_norm": 2.522877680230459, "learning_rate": 1.8697854521691767e-05, "loss": 0.3838, "step": 2390 }, { "epoch": 0.18942364824717767, "grad_norm": 2.837417775984766, "learning_rate": 1.8696588078044566e-05, "loss": 0.4043, "step": 2391 }, { "epoch": 0.18950287185581302, "grad_norm": 2.569208471978124, "learning_rate": 1.8695321061768886e-05, "loss": 0.4068, "step": 2392 }, { "epoch": 0.1895820954644484, "grad_norm": 2.324114348310896, "learning_rate": 1.8694053472948154e-05, "loss": 0.3178, "step": 2393 }, { "epoch": 0.18966131907308378, "grad_norm": 2.4716474656941796, "learning_rate": 1.8692785311665835e-05, "loss": 0.3222, "step": 2394 }, { "epoch": 0.18974054268171917, "grad_norm": 2.2117536725780305, "learning_rate": 1.8691516578005426e-05, "loss": 0.3132, "step": 2395 }, { "epoch": 0.18981976629035452, "grad_norm": 2.281402927838633, "learning_rate": 1.8690247272050474e-05, "loss": 0.2456, "step": 2396 }, { "epoch": 0.1898989898989899, "grad_norm": 2.0716927024484857, "learning_rate": 1.8688977393884555e-05, "loss": 0.273, "step": 2397 }, { "epoch": 0.18997821350762528, "grad_norm": 2.192567436763538, "learning_rate": 1.868770694359128e-05, "loss": 0.3838, "step": 2398 }, { "epoch": 0.19005743711626064, "grad_norm": 2.3353910833870333, "learning_rate": 1.868643592125431e-05, "loss": 0.3555, "step": 2399 }, { "epoch": 0.19013666072489602, "grad_norm": 2.9816592791128227, "learning_rate": 1.8685164326957327e-05, "loss": 0.5093, "step": 2400 }, { "epoch": 0.1902158843335314, "grad_norm": 3.248589257719648, "learning_rate": 1.8683892160784066e-05, "loss": 0.3436, "step": 2401 }, { "epoch": 0.19029510794216675, "grad_norm": 2.3672979239434735, "learning_rate": 1.868261942281829e-05, "loss": 0.4247, "step": 2402 }, { "epoch": 0.19037433155080213, "grad_norm": 2.449880558955819, "learning_rate": 1.86813461131438e-05, "loss": 0.4516, "step": 2403 }, { "epoch": 0.19045355515943752, "grad_norm": 1.9781986952862538, "learning_rate": 1.8680072231844445e-05, "loss": 0.3328, "step": 2404 }, { "epoch": 0.1905327787680729, "grad_norm": 1.682442082315602, "learning_rate": 1.8678797779004096e-05, "loss": 0.2546, "step": 2405 }, { "epoch": 0.19061200237670825, "grad_norm": 2.358540070133724, "learning_rate": 1.8677522754706677e-05, "loss": 0.4219, "step": 2406 }, { "epoch": 0.19069122598534363, "grad_norm": 2.279122575981621, "learning_rate": 1.8676247159036132e-05, "loss": 0.3462, "step": 2407 }, { "epoch": 0.190770449593979, "grad_norm": 2.46748142707905, "learning_rate": 1.8674970992076465e-05, "loss": 0.3002, "step": 2408 }, { "epoch": 0.19084967320261437, "grad_norm": 2.7487199920077523, "learning_rate": 1.8673694253911696e-05, "loss": 0.5119, "step": 2409 }, { "epoch": 0.19092889681124975, "grad_norm": 2.459701113653219, "learning_rate": 1.8672416944625896e-05, "loss": 0.4144, "step": 2410 }, { "epoch": 0.19100812041988513, "grad_norm": 2.2355157013713747, "learning_rate": 1.867113906430317e-05, "loss": 0.3779, "step": 2411 }, { "epoch": 0.1910873440285205, "grad_norm": 2.008764280295245, "learning_rate": 1.8669860613027657e-05, "loss": 0.2847, "step": 2412 }, { "epoch": 0.19116656763715587, "grad_norm": 2.830324642302351, "learning_rate": 1.8668581590883544e-05, "loss": 0.5248, "step": 2413 }, { "epoch": 0.19124579124579125, "grad_norm": 2.316528929771331, "learning_rate": 1.8667301997955038e-05, "loss": 0.4244, "step": 2414 }, { "epoch": 0.19132501485442663, "grad_norm": 2.1455708077769295, "learning_rate": 1.8666021834326404e-05, "loss": 0.4063, "step": 2415 }, { "epoch": 0.19140423846306198, "grad_norm": 2.578496252157606, "learning_rate": 1.866474110008193e-05, "loss": 0.4278, "step": 2416 }, { "epoch": 0.19148346207169736, "grad_norm": 2.2288754110195566, "learning_rate": 1.8663459795305946e-05, "loss": 0.4112, "step": 2417 }, { "epoch": 0.19156268568033274, "grad_norm": 2.277262606405243, "learning_rate": 1.866217792008282e-05, "loss": 0.3687, "step": 2418 }, { "epoch": 0.1916419092889681, "grad_norm": 1.8630297645419311, "learning_rate": 1.866089547449696e-05, "loss": 0.2948, "step": 2419 }, { "epoch": 0.19172113289760348, "grad_norm": 2.5857719677134807, "learning_rate": 1.8659612458632802e-05, "loss": 0.3685, "step": 2420 }, { "epoch": 0.19180035650623886, "grad_norm": 2.1077816478372227, "learning_rate": 1.8658328872574833e-05, "loss": 0.3134, "step": 2421 }, { "epoch": 0.19187958011487424, "grad_norm": 2.2558696868498163, "learning_rate": 1.8657044716407573e-05, "loss": 0.3903, "step": 2422 }, { "epoch": 0.1919588037235096, "grad_norm": 2.5260330779322144, "learning_rate": 1.865575999021557e-05, "loss": 0.4757, "step": 2423 }, { "epoch": 0.19203802733214498, "grad_norm": 2.585946758475045, "learning_rate": 1.8654474694083416e-05, "loss": 0.4314, "step": 2424 }, { "epoch": 0.19211725094078036, "grad_norm": 2.5235702116602683, "learning_rate": 1.8653188828095754e-05, "loss": 0.3479, "step": 2425 }, { "epoch": 0.1921964745494157, "grad_norm": 2.338783130966399, "learning_rate": 1.865190239233724e-05, "loss": 0.3642, "step": 2426 }, { "epoch": 0.1922756981580511, "grad_norm": 2.2613101907663795, "learning_rate": 1.8650615386892587e-05, "loss": 0.3137, "step": 2427 }, { "epoch": 0.19235492176668648, "grad_norm": 2.481183082822829, "learning_rate": 1.8649327811846533e-05, "loss": 0.4983, "step": 2428 }, { "epoch": 0.19243414537532186, "grad_norm": 2.056116658196089, "learning_rate": 1.8648039667283857e-05, "loss": 0.395, "step": 2429 }, { "epoch": 0.1925133689839572, "grad_norm": 2.187373035714514, "learning_rate": 1.8646750953289384e-05, "loss": 0.3235, "step": 2430 }, { "epoch": 0.1925925925925926, "grad_norm": 1.953239087467633, "learning_rate": 1.8645461669947966e-05, "loss": 0.27, "step": 2431 }, { "epoch": 0.19267181620122797, "grad_norm": 2.2855907480080697, "learning_rate": 1.8644171817344497e-05, "loss": 0.3153, "step": 2432 }, { "epoch": 0.19275103980986333, "grad_norm": 2.5142407429696254, "learning_rate": 1.8642881395563904e-05, "loss": 0.2761, "step": 2433 }, { "epoch": 0.1928302634184987, "grad_norm": 2.232408290132552, "learning_rate": 1.864159040469116e-05, "loss": 0.4019, "step": 2434 }, { "epoch": 0.1929094870271341, "grad_norm": 2.6768788194814483, "learning_rate": 1.864029884481127e-05, "loss": 0.3774, "step": 2435 }, { "epoch": 0.19298871063576947, "grad_norm": 2.4057338437133695, "learning_rate": 1.8639006716009275e-05, "loss": 0.4702, "step": 2436 }, { "epoch": 0.19306793424440483, "grad_norm": 2.406634426736634, "learning_rate": 1.8637714018370255e-05, "loss": 0.3872, "step": 2437 }, { "epoch": 0.1931471578530402, "grad_norm": 2.1804927207402356, "learning_rate": 1.8636420751979328e-05, "loss": 0.4127, "step": 2438 }, { "epoch": 0.1932263814616756, "grad_norm": 2.2325083599641893, "learning_rate": 1.863512691692165e-05, "loss": 0.4142, "step": 2439 }, { "epoch": 0.19330560507031094, "grad_norm": 2.360300792783522, "learning_rate": 1.863383251328242e-05, "loss": 0.3718, "step": 2440 }, { "epoch": 0.19338482867894632, "grad_norm": 1.9676691602914926, "learning_rate": 1.8632537541146856e-05, "loss": 0.2545, "step": 2441 }, { "epoch": 0.1934640522875817, "grad_norm": 2.544070344504953, "learning_rate": 1.8631242000600235e-05, "loss": 0.421, "step": 2442 }, { "epoch": 0.19354327589621706, "grad_norm": 2.317720778716317, "learning_rate": 1.8629945891727856e-05, "loss": 0.4134, "step": 2443 }, { "epoch": 0.19362249950485244, "grad_norm": 2.1669465368125818, "learning_rate": 1.8628649214615066e-05, "loss": 0.3168, "step": 2444 }, { "epoch": 0.19370172311348782, "grad_norm": 2.0639083871105828, "learning_rate": 1.8627351969347246e-05, "loss": 0.3443, "step": 2445 }, { "epoch": 0.1937809467221232, "grad_norm": 2.2376669330107153, "learning_rate": 1.8626054156009807e-05, "loss": 0.3529, "step": 2446 }, { "epoch": 0.19386017033075856, "grad_norm": 2.550901319390686, "learning_rate": 1.862475577468821e-05, "loss": 0.3534, "step": 2447 }, { "epoch": 0.19393939393939394, "grad_norm": 2.3583170168567658, "learning_rate": 1.8623456825467948e-05, "loss": 0.3823, "step": 2448 }, { "epoch": 0.19401861754802932, "grad_norm": 2.2929430344925135, "learning_rate": 1.8622157308434544e-05, "loss": 0.4592, "step": 2449 }, { "epoch": 0.19409784115666467, "grad_norm": 2.6646782989089504, "learning_rate": 1.8620857223673567e-05, "loss": 0.4302, "step": 2450 }, { "epoch": 0.19417706476530006, "grad_norm": 1.81841435235266, "learning_rate": 1.8619556571270624e-05, "loss": 0.2961, "step": 2451 }, { "epoch": 0.19425628837393544, "grad_norm": 2.1546680016047928, "learning_rate": 1.8618255351311355e-05, "loss": 0.3418, "step": 2452 }, { "epoch": 0.19433551198257082, "grad_norm": 2.150348235408283, "learning_rate": 1.8616953563881444e-05, "loss": 0.352, "step": 2453 }, { "epoch": 0.19441473559120617, "grad_norm": 2.0046820761363846, "learning_rate": 1.8615651209066598e-05, "loss": 0.3235, "step": 2454 }, { "epoch": 0.19449395919984155, "grad_norm": 2.5348173653153623, "learning_rate": 1.8614348286952577e-05, "loss": 0.3452, "step": 2455 }, { "epoch": 0.19457318280847694, "grad_norm": 2.3274243520308007, "learning_rate": 1.8613044797625173e-05, "loss": 0.3892, "step": 2456 }, { "epoch": 0.1946524064171123, "grad_norm": 2.0989791578857155, "learning_rate": 1.861174074117021e-05, "loss": 0.3405, "step": 2457 }, { "epoch": 0.19473163002574767, "grad_norm": 2.1919308956371184, "learning_rate": 1.8610436117673557e-05, "loss": 0.3096, "step": 2458 }, { "epoch": 0.19481085363438305, "grad_norm": 2.2882205739796944, "learning_rate": 1.8609130927221116e-05, "loss": 0.405, "step": 2459 }, { "epoch": 0.1948900772430184, "grad_norm": 2.096111072736974, "learning_rate": 1.8607825169898827e-05, "loss": 0.4088, "step": 2460 }, { "epoch": 0.1949693008516538, "grad_norm": 2.5457410371902127, "learning_rate": 1.8606518845792672e-05, "loss": 0.5546, "step": 2461 }, { "epoch": 0.19504852446028917, "grad_norm": 1.848592772388562, "learning_rate": 1.860521195498866e-05, "loss": 0.3491, "step": 2462 }, { "epoch": 0.19512774806892455, "grad_norm": 2.0752605803301467, "learning_rate": 1.8603904497572846e-05, "loss": 0.3093, "step": 2463 }, { "epoch": 0.1952069716775599, "grad_norm": 2.285386555839289, "learning_rate": 1.8602596473631323e-05, "loss": 0.4335, "step": 2464 }, { "epoch": 0.19528619528619529, "grad_norm": 1.9504041251275086, "learning_rate": 1.8601287883250215e-05, "loss": 0.3306, "step": 2465 }, { "epoch": 0.19536541889483067, "grad_norm": 2.814250993235811, "learning_rate": 1.8599978726515685e-05, "loss": 0.3126, "step": 2466 }, { "epoch": 0.19544464250346602, "grad_norm": 2.699794693071824, "learning_rate": 1.8598669003513934e-05, "loss": 0.5214, "step": 2467 }, { "epoch": 0.1955238661121014, "grad_norm": 2.138989129402344, "learning_rate": 1.8597358714331207e-05, "loss": 0.3157, "step": 2468 }, { "epoch": 0.19560308972073678, "grad_norm": 2.269082248599281, "learning_rate": 1.8596047859053776e-05, "loss": 0.3847, "step": 2469 }, { "epoch": 0.19568231332937217, "grad_norm": 2.1483245298195115, "learning_rate": 1.8594736437767954e-05, "loss": 0.4204, "step": 2470 }, { "epoch": 0.19576153693800752, "grad_norm": 2.4622250635289364, "learning_rate": 1.8593424450560094e-05, "loss": 0.4287, "step": 2471 }, { "epoch": 0.1958407605466429, "grad_norm": 2.2783626156378216, "learning_rate": 1.8592111897516583e-05, "loss": 0.414, "step": 2472 }, { "epoch": 0.19591998415527828, "grad_norm": 2.813603039056177, "learning_rate": 1.8590798778723843e-05, "loss": 0.3991, "step": 2473 }, { "epoch": 0.19599920776391364, "grad_norm": 2.266553035044402, "learning_rate": 1.8589485094268344e-05, "loss": 0.3105, "step": 2474 }, { "epoch": 0.19607843137254902, "grad_norm": 1.8631401649992443, "learning_rate": 1.858817084423658e-05, "loss": 0.3321, "step": 2475 }, { "epoch": 0.1961576549811844, "grad_norm": 2.6001028866504354, "learning_rate": 1.8586856028715087e-05, "loss": 0.4129, "step": 2476 }, { "epoch": 0.19623687858981978, "grad_norm": 1.9225639528899896, "learning_rate": 1.8585540647790445e-05, "loss": 0.3477, "step": 2477 }, { "epoch": 0.19631610219845513, "grad_norm": 1.9029896696961794, "learning_rate": 1.858422470154926e-05, "loss": 0.3582, "step": 2478 }, { "epoch": 0.19639532580709052, "grad_norm": 2.371662228739464, "learning_rate": 1.8582908190078184e-05, "loss": 0.5111, "step": 2479 }, { "epoch": 0.1964745494157259, "grad_norm": 2.3458971637684, "learning_rate": 1.8581591113463903e-05, "loss": 0.4875, "step": 2480 }, { "epoch": 0.19655377302436125, "grad_norm": 2.220020587214232, "learning_rate": 1.858027347179314e-05, "loss": 0.358, "step": 2481 }, { "epoch": 0.19663299663299663, "grad_norm": 2.016025618145945, "learning_rate": 1.8578955265152652e-05, "loss": 0.3057, "step": 2482 }, { "epoch": 0.196712220241632, "grad_norm": 2.390973899894166, "learning_rate": 1.857763649362924e-05, "loss": 0.4794, "step": 2483 }, { "epoch": 0.19679144385026737, "grad_norm": 2.0811221237668005, "learning_rate": 1.857631715730974e-05, "loss": 0.3887, "step": 2484 }, { "epoch": 0.19687066745890275, "grad_norm": 2.3012484162313824, "learning_rate": 1.857499725628102e-05, "loss": 0.3006, "step": 2485 }, { "epoch": 0.19694989106753813, "grad_norm": 2.4683935517071616, "learning_rate": 1.8573676790629988e-05, "loss": 0.2737, "step": 2486 }, { "epoch": 0.1970291146761735, "grad_norm": 2.2284228307033946, "learning_rate": 1.8572355760443597e-05, "loss": 0.2367, "step": 2487 }, { "epoch": 0.19710833828480886, "grad_norm": 2.216729456039309, "learning_rate": 1.8571034165808826e-05, "loss": 0.2884, "step": 2488 }, { "epoch": 0.19718756189344425, "grad_norm": 2.2143382473000313, "learning_rate": 1.85697120068127e-05, "loss": 0.1766, "step": 2489 }, { "epoch": 0.19726678550207963, "grad_norm": 2.732087908688094, "learning_rate": 1.8568389283542263e-05, "loss": 0.3801, "step": 2490 }, { "epoch": 0.19734600911071498, "grad_norm": 3.081517991139325, "learning_rate": 1.8567065996084628e-05, "loss": 0.4109, "step": 2491 }, { "epoch": 0.19742523271935036, "grad_norm": 2.181387457142246, "learning_rate": 1.8565742144526917e-05, "loss": 0.3455, "step": 2492 }, { "epoch": 0.19750445632798574, "grad_norm": 2.1877078020664866, "learning_rate": 1.85644177289563e-05, "loss": 0.392, "step": 2493 }, { "epoch": 0.19758367993662113, "grad_norm": 2.3123444763880387, "learning_rate": 1.856309274945999e-05, "loss": 0.3186, "step": 2494 }, { "epoch": 0.19766290354525648, "grad_norm": 2.2855869416309016, "learning_rate": 1.8561767206125223e-05, "loss": 0.3503, "step": 2495 }, { "epoch": 0.19774212715389186, "grad_norm": 2.6475461584697335, "learning_rate": 1.856044109903928e-05, "loss": 0.4552, "step": 2496 }, { "epoch": 0.19782135076252724, "grad_norm": 2.315308822615627, "learning_rate": 1.8559114428289482e-05, "loss": 0.4657, "step": 2497 }, { "epoch": 0.1979005743711626, "grad_norm": 2.082621432472806, "learning_rate": 1.8557787193963184e-05, "loss": 0.4042, "step": 2498 }, { "epoch": 0.19797979797979798, "grad_norm": 2.2891809452758407, "learning_rate": 1.8556459396147777e-05, "loss": 0.4412, "step": 2499 }, { "epoch": 0.19805902158843336, "grad_norm": 2.8126784423414466, "learning_rate": 1.8555131034930686e-05, "loss": 0.4436, "step": 2500 }, { "epoch": 0.1981382451970687, "grad_norm": 1.920461562424893, "learning_rate": 1.8553802110399385e-05, "loss": 0.3126, "step": 2501 }, { "epoch": 0.1982174688057041, "grad_norm": 2.3405925530387157, "learning_rate": 1.8552472622641372e-05, "loss": 0.3209, "step": 2502 }, { "epoch": 0.19829669241433948, "grad_norm": 2.6909419205423575, "learning_rate": 1.8551142571744188e-05, "loss": 0.5276, "step": 2503 }, { "epoch": 0.19837591602297486, "grad_norm": 1.953917517484155, "learning_rate": 1.854981195779541e-05, "loss": 0.2665, "step": 2504 }, { "epoch": 0.1984551396316102, "grad_norm": 2.21961047385004, "learning_rate": 1.8548480780882658e-05, "loss": 0.3823, "step": 2505 }, { "epoch": 0.1985343632402456, "grad_norm": 2.5403097204187475, "learning_rate": 1.8547149041093574e-05, "loss": 0.3974, "step": 2506 }, { "epoch": 0.19861358684888097, "grad_norm": 2.1253184318112375, "learning_rate": 1.8545816738515855e-05, "loss": 0.3815, "step": 2507 }, { "epoch": 0.19869281045751633, "grad_norm": 1.8509488428343006, "learning_rate": 1.854448387323722e-05, "loss": 0.3224, "step": 2508 }, { "epoch": 0.1987720340661517, "grad_norm": 2.0945640602614763, "learning_rate": 1.8543150445345443e-05, "loss": 0.3317, "step": 2509 }, { "epoch": 0.1988512576747871, "grad_norm": 1.9832166063174255, "learning_rate": 1.854181645492831e-05, "loss": 0.2794, "step": 2510 }, { "epoch": 0.19893048128342247, "grad_norm": 2.489453753213978, "learning_rate": 1.8540481902073664e-05, "loss": 0.3726, "step": 2511 }, { "epoch": 0.19900970489205783, "grad_norm": 2.3006469390899102, "learning_rate": 1.8539146786869385e-05, "loss": 0.3611, "step": 2512 }, { "epoch": 0.1990889285006932, "grad_norm": 2.358837949652743, "learning_rate": 1.8537811109403372e-05, "loss": 0.2882, "step": 2513 }, { "epoch": 0.1991681521093286, "grad_norm": 2.8427295733863596, "learning_rate": 1.853647486976358e-05, "loss": 0.4833, "step": 2514 }, { "epoch": 0.19924737571796394, "grad_norm": 2.0815328712729495, "learning_rate": 1.8535138068037995e-05, "loss": 0.3174, "step": 2515 }, { "epoch": 0.19932659932659932, "grad_norm": 2.491501696031602, "learning_rate": 1.8533800704314633e-05, "loss": 0.471, "step": 2516 }, { "epoch": 0.1994058229352347, "grad_norm": 2.1810681624932373, "learning_rate": 1.8532462778681558e-05, "loss": 0.3122, "step": 2517 }, { "epoch": 0.1994850465438701, "grad_norm": 2.0987487623904424, "learning_rate": 1.8531124291226866e-05, "loss": 0.3213, "step": 2518 }, { "epoch": 0.19956427015250544, "grad_norm": 2.5940693606713463, "learning_rate": 1.8529785242038688e-05, "loss": 0.3382, "step": 2519 }, { "epoch": 0.19964349376114082, "grad_norm": 1.9084487218273467, "learning_rate": 1.8528445631205195e-05, "loss": 0.3098, "step": 2520 }, { "epoch": 0.1997227173697762, "grad_norm": 2.22697748874517, "learning_rate": 1.852710545881459e-05, "loss": 0.4005, "step": 2521 }, { "epoch": 0.19980194097841156, "grad_norm": 1.94871077943574, "learning_rate": 1.8525764724955123e-05, "loss": 0.3308, "step": 2522 }, { "epoch": 0.19988116458704694, "grad_norm": 2.1648646873883606, "learning_rate": 1.8524423429715072e-05, "loss": 0.2976, "step": 2523 }, { "epoch": 0.19996038819568232, "grad_norm": 2.289324311627118, "learning_rate": 1.8523081573182754e-05, "loss": 0.3277, "step": 2524 }, { "epoch": 0.20003961180431767, "grad_norm": 2.6359868580396957, "learning_rate": 1.8521739155446527e-05, "loss": 0.4441, "step": 2525 }, { "epoch": 0.20011883541295306, "grad_norm": 2.0463999849180667, "learning_rate": 1.852039617659478e-05, "loss": 0.3373, "step": 2526 }, { "epoch": 0.20019805902158844, "grad_norm": 2.233538096791076, "learning_rate": 1.851905263671594e-05, "loss": 0.3732, "step": 2527 }, { "epoch": 0.20027728263022382, "grad_norm": 3.07992933976717, "learning_rate": 1.8517708535898477e-05, "loss": 0.3118, "step": 2528 }, { "epoch": 0.20035650623885917, "grad_norm": 2.206639505911278, "learning_rate": 1.851636387423089e-05, "loss": 0.2737, "step": 2529 }, { "epoch": 0.20043572984749455, "grad_norm": 2.088856315478698, "learning_rate": 1.8515018651801723e-05, "loss": 0.3001, "step": 2530 }, { "epoch": 0.20051495345612994, "grad_norm": 2.173784435536571, "learning_rate": 1.8513672868699547e-05, "loss": 0.3735, "step": 2531 }, { "epoch": 0.2005941770647653, "grad_norm": 2.0865718183931508, "learning_rate": 1.851232652501298e-05, "loss": 0.2852, "step": 2532 }, { "epoch": 0.20067340067340067, "grad_norm": 2.4405662133864245, "learning_rate": 1.851097962083067e-05, "loss": 0.4349, "step": 2533 }, { "epoch": 0.20075262428203605, "grad_norm": 1.8240503780746695, "learning_rate": 1.85096321562413e-05, "loss": 0.2592, "step": 2534 }, { "epoch": 0.20083184789067143, "grad_norm": 2.0317250338114023, "learning_rate": 1.8508284131333604e-05, "loss": 0.3196, "step": 2535 }, { "epoch": 0.2009110714993068, "grad_norm": 2.6578832137432653, "learning_rate": 1.850693554619633e-05, "loss": 0.4377, "step": 2536 }, { "epoch": 0.20099029510794217, "grad_norm": 2.2538147570599394, "learning_rate": 1.8505586400918288e-05, "loss": 0.3298, "step": 2537 }, { "epoch": 0.20106951871657755, "grad_norm": 2.7358544242777882, "learning_rate": 1.8504236695588308e-05, "loss": 0.3239, "step": 2538 }, { "epoch": 0.2011487423252129, "grad_norm": 2.7866894854942323, "learning_rate": 1.8502886430295262e-05, "loss": 0.4308, "step": 2539 }, { "epoch": 0.20122796593384829, "grad_norm": 2.403766386329718, "learning_rate": 1.8501535605128054e-05, "loss": 0.3067, "step": 2540 }, { "epoch": 0.20130718954248367, "grad_norm": 2.172612609192193, "learning_rate": 1.8500184220175636e-05, "loss": 0.4173, "step": 2541 }, { "epoch": 0.20138641315111902, "grad_norm": 2.183681647657208, "learning_rate": 1.8498832275526988e-05, "loss": 0.4232, "step": 2542 }, { "epoch": 0.2014656367597544, "grad_norm": 2.1545552923233307, "learning_rate": 1.8497479771271125e-05, "loss": 0.3003, "step": 2543 }, { "epoch": 0.20154486036838978, "grad_norm": 2.7155129320167153, "learning_rate": 1.8496126707497112e-05, "loss": 0.3735, "step": 2544 }, { "epoch": 0.20162408397702516, "grad_norm": 2.446669786535323, "learning_rate": 1.849477308429403e-05, "loss": 0.3324, "step": 2545 }, { "epoch": 0.20170330758566052, "grad_norm": 2.987567324241434, "learning_rate": 1.8493418901751016e-05, "loss": 0.4053, "step": 2546 }, { "epoch": 0.2017825311942959, "grad_norm": 2.0451748233881775, "learning_rate": 1.849206415995724e-05, "loss": 0.3628, "step": 2547 }, { "epoch": 0.20186175480293128, "grad_norm": 2.366937693151244, "learning_rate": 1.8490708859001896e-05, "loss": 0.3862, "step": 2548 }, { "epoch": 0.20194097841156664, "grad_norm": 2.285537809785544, "learning_rate": 1.8489352998974227e-05, "loss": 0.4229, "step": 2549 }, { "epoch": 0.20202020202020202, "grad_norm": 2.14664097818107, "learning_rate": 1.8487996579963515e-05, "loss": 0.3176, "step": 2550 }, { "epoch": 0.2020994256288374, "grad_norm": 2.6709319714445683, "learning_rate": 1.8486639602059066e-05, "loss": 0.3936, "step": 2551 }, { "epoch": 0.20217864923747278, "grad_norm": 2.2299067893671043, "learning_rate": 1.8485282065350237e-05, "loss": 0.3677, "step": 2552 }, { "epoch": 0.20225787284610813, "grad_norm": 1.8376533793627137, "learning_rate": 1.848392396992641e-05, "loss": 0.2929, "step": 2553 }, { "epoch": 0.20233709645474351, "grad_norm": 2.326285525705798, "learning_rate": 1.8482565315877013e-05, "loss": 0.4047, "step": 2554 }, { "epoch": 0.2024163200633789, "grad_norm": 2.2071973868350843, "learning_rate": 1.8481206103291506e-05, "loss": 0.3289, "step": 2555 }, { "epoch": 0.20249554367201425, "grad_norm": 2.253218749715048, "learning_rate": 1.8479846332259388e-05, "loss": 0.3552, "step": 2556 }, { "epoch": 0.20257476728064963, "grad_norm": 2.4826969943083292, "learning_rate": 1.847848600287019e-05, "loss": 0.4362, "step": 2557 }, { "epoch": 0.202653990889285, "grad_norm": 2.4615813938979074, "learning_rate": 1.8477125115213484e-05, "loss": 0.4523, "step": 2558 }, { "epoch": 0.20273321449792037, "grad_norm": 2.589911331391582, "learning_rate": 1.8475763669378878e-05, "loss": 0.3923, "step": 2559 }, { "epoch": 0.20281243810655575, "grad_norm": 2.1544316122631684, "learning_rate": 1.8474401665456016e-05, "loss": 0.3036, "step": 2560 }, { "epoch": 0.20289166171519113, "grad_norm": 2.200846607013613, "learning_rate": 1.8473039103534583e-05, "loss": 0.4175, "step": 2561 }, { "epoch": 0.2029708853238265, "grad_norm": 2.4356438861821474, "learning_rate": 1.8471675983704295e-05, "loss": 0.4491, "step": 2562 }, { "epoch": 0.20305010893246186, "grad_norm": 2.29837279168617, "learning_rate": 1.8470312306054903e-05, "loss": 0.3452, "step": 2563 }, { "epoch": 0.20312933254109725, "grad_norm": 2.0487267473067896, "learning_rate": 1.8468948070676205e-05, "loss": 0.3094, "step": 2564 }, { "epoch": 0.20320855614973263, "grad_norm": 2.1381754994275597, "learning_rate": 1.8467583277658026e-05, "loss": 0.2855, "step": 2565 }, { "epoch": 0.20328777975836798, "grad_norm": 1.8764463167573255, "learning_rate": 1.8466217927090232e-05, "loss": 0.2959, "step": 2566 }, { "epoch": 0.20336700336700336, "grad_norm": 2.5526234060684847, "learning_rate": 1.8464852019062726e-05, "loss": 0.3627, "step": 2567 }, { "epoch": 0.20344622697563874, "grad_norm": 2.1899386174730573, "learning_rate": 1.846348555366544e-05, "loss": 0.3836, "step": 2568 }, { "epoch": 0.20352545058427413, "grad_norm": 2.5673985506546115, "learning_rate": 1.8462118530988356e-05, "loss": 0.4674, "step": 2569 }, { "epoch": 0.20360467419290948, "grad_norm": 2.6110808937235697, "learning_rate": 1.8460750951121487e-05, "loss": 0.4123, "step": 2570 }, { "epoch": 0.20368389780154486, "grad_norm": 1.9865053476855534, "learning_rate": 1.8459382814154874e-05, "loss": 0.3951, "step": 2571 }, { "epoch": 0.20376312141018024, "grad_norm": 2.4539083543726097, "learning_rate": 1.845801412017861e-05, "loss": 0.3154, "step": 2572 }, { "epoch": 0.2038423450188156, "grad_norm": 2.498386015589712, "learning_rate": 1.845664486928281e-05, "loss": 0.3749, "step": 2573 }, { "epoch": 0.20392156862745098, "grad_norm": 2.1661368931050053, "learning_rate": 1.8455275061557643e-05, "loss": 0.2674, "step": 2574 }, { "epoch": 0.20400079223608636, "grad_norm": 1.9942421387612144, "learning_rate": 1.845390469709329e-05, "loss": 0.3599, "step": 2575 }, { "epoch": 0.20408001584472174, "grad_norm": 2.14985491413104, "learning_rate": 1.8452533775979992e-05, "loss": 0.3147, "step": 2576 }, { "epoch": 0.2041592394533571, "grad_norm": 2.3317594207362267, "learning_rate": 1.845116229830802e-05, "loss": 0.3676, "step": 2577 }, { "epoch": 0.20423846306199248, "grad_norm": 2.155006085273215, "learning_rate": 1.8449790264167672e-05, "loss": 0.2258, "step": 2578 }, { "epoch": 0.20431768667062786, "grad_norm": 2.6071663515527987, "learning_rate": 1.8448417673649292e-05, "loss": 0.4777, "step": 2579 }, { "epoch": 0.2043969102792632, "grad_norm": 2.239493857358544, "learning_rate": 1.844704452684326e-05, "loss": 0.3044, "step": 2580 }, { "epoch": 0.2044761338878986, "grad_norm": 2.226009112781301, "learning_rate": 1.844567082383999e-05, "loss": 0.358, "step": 2581 }, { "epoch": 0.20455535749653397, "grad_norm": 2.1352822392390842, "learning_rate": 1.8444296564729935e-05, "loss": 0.4358, "step": 2582 }, { "epoch": 0.20463458110516933, "grad_norm": 2.165123326896601, "learning_rate": 1.8442921749603586e-05, "loss": 0.3361, "step": 2583 }, { "epoch": 0.2047138047138047, "grad_norm": 2.2842604106043507, "learning_rate": 1.8441546378551457e-05, "loss": 0.3988, "step": 2584 }, { "epoch": 0.2047930283224401, "grad_norm": 3.228491038171374, "learning_rate": 1.8440170451664122e-05, "loss": 0.3863, "step": 2585 }, { "epoch": 0.20487225193107547, "grad_norm": 2.4373413126721912, "learning_rate": 1.8438793969032175e-05, "loss": 0.4361, "step": 2586 }, { "epoch": 0.20495147553971083, "grad_norm": 2.2254259951936906, "learning_rate": 1.8437416930746248e-05, "loss": 0.3859, "step": 2587 }, { "epoch": 0.2050306991483462, "grad_norm": 1.8439603395194666, "learning_rate": 1.8436039336897015e-05, "loss": 0.2175, "step": 2588 }, { "epoch": 0.2051099227569816, "grad_norm": 2.5136251343204172, "learning_rate": 1.8434661187575183e-05, "loss": 0.4304, "step": 2589 }, { "epoch": 0.20518914636561694, "grad_norm": 1.883767199381963, "learning_rate": 1.8433282482871497e-05, "loss": 0.3192, "step": 2590 }, { "epoch": 0.20526836997425232, "grad_norm": 2.319317007307682, "learning_rate": 1.8431903222876737e-05, "loss": 0.3958, "step": 2591 }, { "epoch": 0.2053475935828877, "grad_norm": 2.0192310345324946, "learning_rate": 1.8430523407681723e-05, "loss": 0.3304, "step": 2592 }, { "epoch": 0.2054268171915231, "grad_norm": 2.4550122509585743, "learning_rate": 1.8429143037377305e-05, "loss": 0.4448, "step": 2593 }, { "epoch": 0.20550604080015844, "grad_norm": 2.6209542854199355, "learning_rate": 1.8427762112054378e-05, "loss": 0.4104, "step": 2594 }, { "epoch": 0.20558526440879382, "grad_norm": 1.7827263733795262, "learning_rate": 1.842638063180387e-05, "loss": 0.2895, "step": 2595 }, { "epoch": 0.2056644880174292, "grad_norm": 2.1731669035069814, "learning_rate": 1.8424998596716743e-05, "loss": 0.4229, "step": 2596 }, { "epoch": 0.20574371162606456, "grad_norm": 2.0356240598075344, "learning_rate": 1.8423616006883994e-05, "loss": 0.1909, "step": 2597 }, { "epoch": 0.20582293523469994, "grad_norm": 2.4375123859882986, "learning_rate": 1.8422232862396663e-05, "loss": 0.3283, "step": 2598 }, { "epoch": 0.20590215884333532, "grad_norm": 2.3089164850513773, "learning_rate": 1.8420849163345824e-05, "loss": 0.3994, "step": 2599 }, { "epoch": 0.20598138245197067, "grad_norm": 2.8526619245294014, "learning_rate": 1.8419464909822585e-05, "loss": 0.3482, "step": 2600 }, { "epoch": 0.20606060606060606, "grad_norm": 2.2822456832216225, "learning_rate": 1.8418080101918095e-05, "loss": 0.3764, "step": 2601 }, { "epoch": 0.20613982966924144, "grad_norm": 1.7550603316833326, "learning_rate": 1.8416694739723535e-05, "loss": 0.3156, "step": 2602 }, { "epoch": 0.20621905327787682, "grad_norm": 2.3386797685966805, "learning_rate": 1.841530882333012e-05, "loss": 0.4354, "step": 2603 }, { "epoch": 0.20629827688651217, "grad_norm": 2.439938355488765, "learning_rate": 1.8413922352829118e-05, "loss": 0.3275, "step": 2604 }, { "epoch": 0.20637750049514755, "grad_norm": 1.8912032583509386, "learning_rate": 1.8412535328311813e-05, "loss": 0.25, "step": 2605 }, { "epoch": 0.20645672410378293, "grad_norm": 1.951746034926776, "learning_rate": 1.8411147749869536e-05, "loss": 0.3341, "step": 2606 }, { "epoch": 0.2065359477124183, "grad_norm": 2.211967105953523, "learning_rate": 1.840975961759365e-05, "loss": 0.3588, "step": 2607 }, { "epoch": 0.20661517132105367, "grad_norm": 2.129109282217052, "learning_rate": 1.8408370931575556e-05, "loss": 0.3472, "step": 2608 }, { "epoch": 0.20669439492968905, "grad_norm": 1.9980283945785957, "learning_rate": 1.84069816919067e-05, "loss": 0.3169, "step": 2609 }, { "epoch": 0.20677361853832443, "grad_norm": 2.366220442774412, "learning_rate": 1.8405591898678546e-05, "loss": 0.334, "step": 2610 }, { "epoch": 0.2068528421469598, "grad_norm": 2.590989486280597, "learning_rate": 1.8404201551982612e-05, "loss": 0.4114, "step": 2611 }, { "epoch": 0.20693206575559517, "grad_norm": 2.7065415039991283, "learning_rate": 1.8402810651910444e-05, "loss": 0.3114, "step": 2612 }, { "epoch": 0.20701128936423055, "grad_norm": 2.420248316470613, "learning_rate": 1.840141919855363e-05, "loss": 0.4917, "step": 2613 }, { "epoch": 0.2070905129728659, "grad_norm": 2.3910321768992016, "learning_rate": 1.8400027192003782e-05, "loss": 0.4136, "step": 2614 }, { "epoch": 0.20716973658150128, "grad_norm": 2.317256882536081, "learning_rate": 1.8398634632352562e-05, "loss": 0.432, "step": 2615 }, { "epoch": 0.20724896019013667, "grad_norm": 2.1674740524169103, "learning_rate": 1.8397241519691667e-05, "loss": 0.3308, "step": 2616 }, { "epoch": 0.20732818379877205, "grad_norm": 2.0220465250143294, "learning_rate": 1.839584785411282e-05, "loss": 0.3448, "step": 2617 }, { "epoch": 0.2074074074074074, "grad_norm": 2.0931024764127857, "learning_rate": 1.839445363570779e-05, "loss": 0.3316, "step": 2618 }, { "epoch": 0.20748663101604278, "grad_norm": 2.1880844704372215, "learning_rate": 1.8393058864568383e-05, "loss": 0.3456, "step": 2619 }, { "epoch": 0.20756585462467816, "grad_norm": 1.977257775751363, "learning_rate": 1.839166354078643e-05, "loss": 0.3164, "step": 2620 }, { "epoch": 0.20764507823331352, "grad_norm": 2.05211555291372, "learning_rate": 1.8390267664453815e-05, "loss": 0.395, "step": 2621 }, { "epoch": 0.2077243018419489, "grad_norm": 2.1628169614419046, "learning_rate": 1.8388871235662442e-05, "loss": 0.2661, "step": 2622 }, { "epoch": 0.20780352545058428, "grad_norm": 2.1323652417115495, "learning_rate": 1.8387474254504265e-05, "loss": 0.3184, "step": 2623 }, { "epoch": 0.20788274905921963, "grad_norm": 2.5508570251721467, "learning_rate": 1.8386076721071265e-05, "loss": 0.4939, "step": 2624 }, { "epoch": 0.20796197266785502, "grad_norm": 2.8045404773786577, "learning_rate": 1.8384678635455467e-05, "loss": 0.3343, "step": 2625 }, { "epoch": 0.2080411962764904, "grad_norm": 1.8651823508151928, "learning_rate": 1.838327999774892e-05, "loss": 0.2498, "step": 2626 }, { "epoch": 0.20812041988512578, "grad_norm": 2.1930794777905933, "learning_rate": 1.838188080804373e-05, "loss": 0.3193, "step": 2627 }, { "epoch": 0.20819964349376113, "grad_norm": 2.6666841957853724, "learning_rate": 1.8380481066432014e-05, "loss": 0.3227, "step": 2628 }, { "epoch": 0.20827886710239651, "grad_norm": 2.0500333012469394, "learning_rate": 1.8379080773005947e-05, "loss": 0.3185, "step": 2629 }, { "epoch": 0.2083580907110319, "grad_norm": 1.8323762996745598, "learning_rate": 1.8377679927857727e-05, "loss": 0.2804, "step": 2630 }, { "epoch": 0.20843731431966725, "grad_norm": 1.5661517308983972, "learning_rate": 1.8376278531079594e-05, "loss": 0.2732, "step": 2631 }, { "epoch": 0.20851653792830263, "grad_norm": 2.2268710652453936, "learning_rate": 1.8374876582763828e-05, "loss": 0.357, "step": 2632 }, { "epoch": 0.208595761536938, "grad_norm": 2.297070724373449, "learning_rate": 1.8373474083002732e-05, "loss": 0.393, "step": 2633 }, { "epoch": 0.2086749851455734, "grad_norm": 1.9198036488834096, "learning_rate": 1.837207103188866e-05, "loss": 0.2875, "step": 2634 }, { "epoch": 0.20875420875420875, "grad_norm": 2.1961277032385236, "learning_rate": 1.8370667429513992e-05, "loss": 0.3546, "step": 2635 }, { "epoch": 0.20883343236284413, "grad_norm": 2.4534474572110674, "learning_rate": 1.8369263275971153e-05, "loss": 0.4266, "step": 2636 }, { "epoch": 0.2089126559714795, "grad_norm": 2.0447295786211988, "learning_rate": 1.8367858571352603e-05, "loss": 0.3419, "step": 2637 }, { "epoch": 0.20899187958011486, "grad_norm": 2.430849448300121, "learning_rate": 1.8366453315750822e-05, "loss": 0.3886, "step": 2638 }, { "epoch": 0.20907110318875025, "grad_norm": 2.199666297789227, "learning_rate": 1.8365047509258346e-05, "loss": 0.3537, "step": 2639 }, { "epoch": 0.20915032679738563, "grad_norm": 1.8918731842530536, "learning_rate": 1.8363641151967747e-05, "loss": 0.3759, "step": 2640 }, { "epoch": 0.20922955040602098, "grad_norm": 2.081939893913907, "learning_rate": 1.836223424397162e-05, "loss": 0.2967, "step": 2641 }, { "epoch": 0.20930877401465636, "grad_norm": 2.4497323812173732, "learning_rate": 1.8360826785362603e-05, "loss": 0.2896, "step": 2642 }, { "epoch": 0.20938799762329174, "grad_norm": 2.276983591209558, "learning_rate": 1.835941877623337e-05, "loss": 0.3864, "step": 2643 }, { "epoch": 0.20946722123192713, "grad_norm": 2.1956607346184143, "learning_rate": 1.835801021667664e-05, "loss": 0.3027, "step": 2644 }, { "epoch": 0.20954644484056248, "grad_norm": 2.3536200318001326, "learning_rate": 1.8356601106785148e-05, "loss": 0.3932, "step": 2645 }, { "epoch": 0.20962566844919786, "grad_norm": 2.1621747899113823, "learning_rate": 1.8355191446651687e-05, "loss": 0.2965, "step": 2646 }, { "epoch": 0.20970489205783324, "grad_norm": 2.1551953744640744, "learning_rate": 1.8353781236369065e-05, "loss": 0.3712, "step": 2647 }, { "epoch": 0.2097841156664686, "grad_norm": 2.598357265924713, "learning_rate": 1.8352370476030147e-05, "loss": 0.3331, "step": 2648 }, { "epoch": 0.20986333927510398, "grad_norm": 3.2250553483696787, "learning_rate": 1.8350959165727826e-05, "loss": 0.3816, "step": 2649 }, { "epoch": 0.20994256288373936, "grad_norm": 2.1365979100227723, "learning_rate": 1.8349547305555023e-05, "loss": 0.2894, "step": 2650 }, { "epoch": 0.21002178649237474, "grad_norm": 2.644424194721846, "learning_rate": 1.8348134895604708e-05, "loss": 0.3396, "step": 2651 }, { "epoch": 0.2101010101010101, "grad_norm": 2.109756127932805, "learning_rate": 1.8346721935969878e-05, "loss": 0.3661, "step": 2652 }, { "epoch": 0.21018023370964548, "grad_norm": 2.3028644071572724, "learning_rate": 1.8345308426743568e-05, "loss": 0.3585, "step": 2653 }, { "epoch": 0.21025945731828086, "grad_norm": 2.522542063004414, "learning_rate": 1.8343894368018854e-05, "loss": 0.3958, "step": 2654 }, { "epoch": 0.2103386809269162, "grad_norm": 2.124994261044452, "learning_rate": 1.8342479759888844e-05, "loss": 0.2394, "step": 2655 }, { "epoch": 0.2104179045355516, "grad_norm": 2.430304254237179, "learning_rate": 1.8341064602446686e-05, "loss": 0.3612, "step": 2656 }, { "epoch": 0.21049712814418697, "grad_norm": 1.8303348210674257, "learning_rate": 1.8339648895785556e-05, "loss": 0.2152, "step": 2657 }, { "epoch": 0.21057635175282233, "grad_norm": 2.4233598407210133, "learning_rate": 1.8338232639998672e-05, "loss": 0.2722, "step": 2658 }, { "epoch": 0.2106555753614577, "grad_norm": 2.1245333910050364, "learning_rate": 1.8336815835179295e-05, "loss": 0.2951, "step": 2659 }, { "epoch": 0.2107347989700931, "grad_norm": 2.0940444104124336, "learning_rate": 1.8335398481420705e-05, "loss": 0.366, "step": 2660 }, { "epoch": 0.21081402257872847, "grad_norm": 2.513197961247772, "learning_rate": 1.8333980578816234e-05, "loss": 0.5175, "step": 2661 }, { "epoch": 0.21089324618736383, "grad_norm": 2.0638517247404704, "learning_rate": 1.8332562127459242e-05, "loss": 0.3637, "step": 2662 }, { "epoch": 0.2109724697959992, "grad_norm": 2.6062995492531855, "learning_rate": 1.833114312744313e-05, "loss": 0.3722, "step": 2663 }, { "epoch": 0.2110516934046346, "grad_norm": 2.2418245928242495, "learning_rate": 1.8329723578861328e-05, "loss": 0.2643, "step": 2664 }, { "epoch": 0.21113091701326994, "grad_norm": 1.9513762458939297, "learning_rate": 1.8328303481807306e-05, "loss": 0.2953, "step": 2665 }, { "epoch": 0.21121014062190532, "grad_norm": 2.041780750713817, "learning_rate": 1.832688283637458e-05, "loss": 0.3284, "step": 2666 }, { "epoch": 0.2112893642305407, "grad_norm": 1.8743691510752067, "learning_rate": 1.8325461642656676e-05, "loss": 0.2557, "step": 2667 }, { "epoch": 0.2113685878391761, "grad_norm": 2.545594139253551, "learning_rate": 1.832403990074719e-05, "loss": 0.3291, "step": 2668 }, { "epoch": 0.21144781144781144, "grad_norm": 2.5102765646842475, "learning_rate": 1.8322617610739726e-05, "loss": 0.3089, "step": 2669 }, { "epoch": 0.21152703505644682, "grad_norm": 2.153122294702497, "learning_rate": 1.8321194772727938e-05, "loss": 0.3374, "step": 2670 }, { "epoch": 0.2116062586650822, "grad_norm": 2.0620687951065313, "learning_rate": 1.8319771386805514e-05, "loss": 0.3148, "step": 2671 }, { "epoch": 0.21168548227371756, "grad_norm": 2.609068027853725, "learning_rate": 1.8318347453066176e-05, "loss": 0.281, "step": 2672 }, { "epoch": 0.21176470588235294, "grad_norm": 2.327885318425126, "learning_rate": 1.8316922971603685e-05, "loss": 0.324, "step": 2673 }, { "epoch": 0.21184392949098832, "grad_norm": 2.5567265620509247, "learning_rate": 1.8315497942511836e-05, "loss": 0.4571, "step": 2674 }, { "epoch": 0.2119231530996237, "grad_norm": 1.9530602351507904, "learning_rate": 1.8314072365884455e-05, "loss": 0.3368, "step": 2675 }, { "epoch": 0.21200237670825905, "grad_norm": 2.183172551614286, "learning_rate": 1.831264624181542e-05, "loss": 0.381, "step": 2676 }, { "epoch": 0.21208160031689444, "grad_norm": 2.1165451898629657, "learning_rate": 1.8311219570398618e-05, "loss": 0.3704, "step": 2677 }, { "epoch": 0.21216082392552982, "grad_norm": 2.173447764552139, "learning_rate": 1.8309792351728006e-05, "loss": 0.3065, "step": 2678 }, { "epoch": 0.21224004753416517, "grad_norm": 2.509060173114008, "learning_rate": 1.830836458589755e-05, "loss": 0.3491, "step": 2679 }, { "epoch": 0.21231927114280055, "grad_norm": 2.657889401903535, "learning_rate": 1.8306936273001258e-05, "loss": 0.337, "step": 2680 }, { "epoch": 0.21239849475143593, "grad_norm": 2.3382739629251303, "learning_rate": 1.830550741313319e-05, "loss": 0.3242, "step": 2681 }, { "epoch": 0.2124777183600713, "grad_norm": 1.9207616662604226, "learning_rate": 1.830407800638742e-05, "loss": 0.2679, "step": 2682 }, { "epoch": 0.21255694196870667, "grad_norm": 2.7518944547251483, "learning_rate": 1.830264805285807e-05, "loss": 0.3123, "step": 2683 }, { "epoch": 0.21263616557734205, "grad_norm": 2.188738456911267, "learning_rate": 1.8301217552639294e-05, "loss": 0.2878, "step": 2684 }, { "epoch": 0.21271538918597743, "grad_norm": 2.341328680782817, "learning_rate": 1.8299786505825286e-05, "loss": 0.351, "step": 2685 }, { "epoch": 0.2127946127946128, "grad_norm": 2.084815946338467, "learning_rate": 1.8298354912510273e-05, "loss": 0.2899, "step": 2686 }, { "epoch": 0.21287383640324817, "grad_norm": 2.2783993367167557, "learning_rate": 1.8296922772788522e-05, "loss": 0.4847, "step": 2687 }, { "epoch": 0.21295306001188355, "grad_norm": 2.2558782307655316, "learning_rate": 1.8295490086754325e-05, "loss": 0.3419, "step": 2688 }, { "epoch": 0.2130322836205189, "grad_norm": 2.4362783331473783, "learning_rate": 1.829405685450202e-05, "loss": 0.3236, "step": 2689 }, { "epoch": 0.21311150722915428, "grad_norm": 2.2056672430638447, "learning_rate": 1.8292623076125983e-05, "loss": 0.3833, "step": 2690 }, { "epoch": 0.21319073083778967, "grad_norm": 1.9879736236867172, "learning_rate": 1.8291188751720615e-05, "loss": 0.2964, "step": 2691 }, { "epoch": 0.21326995444642505, "grad_norm": 1.9903371697394152, "learning_rate": 1.828975388138036e-05, "loss": 0.3504, "step": 2692 }, { "epoch": 0.2133491780550604, "grad_norm": 2.3339548547217586, "learning_rate": 1.8288318465199705e-05, "loss": 0.2651, "step": 2693 }, { "epoch": 0.21342840166369578, "grad_norm": 2.827813814974156, "learning_rate": 1.8286882503273157e-05, "loss": 0.3639, "step": 2694 }, { "epoch": 0.21350762527233116, "grad_norm": 4.897325653438817, "learning_rate": 1.828544599569527e-05, "loss": 0.3537, "step": 2695 }, { "epoch": 0.21358684888096652, "grad_norm": 2.2427380222423396, "learning_rate": 1.8284008942560634e-05, "loss": 0.4186, "step": 2696 }, { "epoch": 0.2136660724896019, "grad_norm": 2.002453051781248, "learning_rate": 1.8282571343963865e-05, "loss": 0.3463, "step": 2697 }, { "epoch": 0.21374529609823728, "grad_norm": 2.185418342588789, "learning_rate": 1.8281133199999628e-05, "loss": 0.2677, "step": 2698 }, { "epoch": 0.21382451970687263, "grad_norm": 2.9173466222075426, "learning_rate": 1.8279694510762616e-05, "loss": 0.4729, "step": 2699 }, { "epoch": 0.21390374331550802, "grad_norm": 2.1494535842839606, "learning_rate": 1.8278255276347563e-05, "loss": 0.3377, "step": 2700 }, { "epoch": 0.2139829669241434, "grad_norm": 1.9491211564441508, "learning_rate": 1.8276815496849227e-05, "loss": 0.2439, "step": 2701 }, { "epoch": 0.21406219053277878, "grad_norm": 2.84181437827925, "learning_rate": 1.827537517236242e-05, "loss": 0.4375, "step": 2702 }, { "epoch": 0.21414141414141413, "grad_norm": 2.2023445326109456, "learning_rate": 1.8273934302981975e-05, "loss": 0.338, "step": 2703 }, { "epoch": 0.21422063775004951, "grad_norm": 1.9480787022280412, "learning_rate": 1.8272492888802767e-05, "loss": 0.2912, "step": 2704 }, { "epoch": 0.2142998613586849, "grad_norm": 1.9597184234300726, "learning_rate": 1.8271050929919707e-05, "loss": 0.3007, "step": 2705 }, { "epoch": 0.21437908496732025, "grad_norm": 2.765426265653849, "learning_rate": 1.8269608426427743e-05, "loss": 0.4354, "step": 2706 }, { "epoch": 0.21445830857595563, "grad_norm": 2.4772794757151813, "learning_rate": 1.8268165378421852e-05, "loss": 0.3696, "step": 2707 }, { "epoch": 0.214537532184591, "grad_norm": 1.9931273474661464, "learning_rate": 1.826672178599706e-05, "loss": 0.2954, "step": 2708 }, { "epoch": 0.2146167557932264, "grad_norm": 2.5104670234474016, "learning_rate": 1.826527764924841e-05, "loss": 0.4035, "step": 2709 }, { "epoch": 0.21469597940186175, "grad_norm": 2.451612315875279, "learning_rate": 1.8263832968271e-05, "loss": 0.3751, "step": 2710 }, { "epoch": 0.21477520301049713, "grad_norm": 1.8990796504331235, "learning_rate": 1.826238774315995e-05, "loss": 0.2747, "step": 2711 }, { "epoch": 0.2148544266191325, "grad_norm": 2.279804520537856, "learning_rate": 1.8260941974010425e-05, "loss": 0.2956, "step": 2712 }, { "epoch": 0.21493365022776786, "grad_norm": 1.9915139382310376, "learning_rate": 1.825949566091762e-05, "loss": 0.2541, "step": 2713 }, { "epoch": 0.21501287383640325, "grad_norm": 2.1412302963252112, "learning_rate": 1.8258048803976763e-05, "loss": 0.3052, "step": 2714 }, { "epoch": 0.21509209744503863, "grad_norm": 2.6115177209962672, "learning_rate": 1.8256601403283133e-05, "loss": 0.3075, "step": 2715 }, { "epoch": 0.215171321053674, "grad_norm": 2.740814523403143, "learning_rate": 1.8255153458932028e-05, "loss": 0.3103, "step": 2716 }, { "epoch": 0.21525054466230936, "grad_norm": 2.4482295912055134, "learning_rate": 1.825370497101879e-05, "loss": 0.3356, "step": 2717 }, { "epoch": 0.21532976827094474, "grad_norm": 2.866688289624525, "learning_rate": 1.825225593963879e-05, "loss": 0.372, "step": 2718 }, { "epoch": 0.21540899187958013, "grad_norm": 1.9197172492573251, "learning_rate": 1.8250806364887446e-05, "loss": 0.3426, "step": 2719 }, { "epoch": 0.21548821548821548, "grad_norm": 2.1916516118073965, "learning_rate": 1.8249356246860205e-05, "loss": 0.2859, "step": 2720 }, { "epoch": 0.21556743909685086, "grad_norm": 2.427429680619192, "learning_rate": 1.8247905585652545e-05, "loss": 0.4012, "step": 2721 }, { "epoch": 0.21564666270548624, "grad_norm": 1.9188622740558436, "learning_rate": 1.824645438135999e-05, "loss": 0.2689, "step": 2722 }, { "epoch": 0.2157258863141216, "grad_norm": 2.161860602553763, "learning_rate": 1.8245002634078095e-05, "loss": 0.3892, "step": 2723 }, { "epoch": 0.21580510992275698, "grad_norm": 2.607534296474651, "learning_rate": 1.8243550343902447e-05, "loss": 0.3281, "step": 2724 }, { "epoch": 0.21588433353139236, "grad_norm": 2.068957315285945, "learning_rate": 1.8242097510928672e-05, "loss": 0.3697, "step": 2725 }, { "epoch": 0.21596355714002774, "grad_norm": 1.8549841237336069, "learning_rate": 1.824064413525244e-05, "loss": 0.3334, "step": 2726 }, { "epoch": 0.2160427807486631, "grad_norm": 2.1747686167069418, "learning_rate": 1.823919021696944e-05, "loss": 0.3331, "step": 2727 }, { "epoch": 0.21612200435729848, "grad_norm": 2.4904249370413116, "learning_rate": 1.8237735756175408e-05, "loss": 0.4155, "step": 2728 }, { "epoch": 0.21620122796593386, "grad_norm": 2.0184441427647966, "learning_rate": 1.8236280752966115e-05, "loss": 0.2879, "step": 2729 }, { "epoch": 0.2162804515745692, "grad_norm": 2.9282617704471354, "learning_rate": 1.8234825207437365e-05, "loss": 0.3292, "step": 2730 }, { "epoch": 0.2163596751832046, "grad_norm": 2.3388655214942036, "learning_rate": 1.8233369119685e-05, "loss": 0.322, "step": 2731 }, { "epoch": 0.21643889879183997, "grad_norm": 2.4859934978397122, "learning_rate": 1.8231912489804893e-05, "loss": 0.3553, "step": 2732 }, { "epoch": 0.21651812240047535, "grad_norm": 2.3926347070693432, "learning_rate": 1.8230455317892957e-05, "loss": 0.2291, "step": 2733 }, { "epoch": 0.2165973460091107, "grad_norm": 2.0529055491989032, "learning_rate": 1.822899760404514e-05, "loss": 0.3131, "step": 2734 }, { "epoch": 0.2166765696177461, "grad_norm": 2.1458705751821596, "learning_rate": 1.822753934835743e-05, "loss": 0.4049, "step": 2735 }, { "epoch": 0.21675579322638147, "grad_norm": 2.6818148428180306, "learning_rate": 1.822608055092584e-05, "loss": 0.4103, "step": 2736 }, { "epoch": 0.21683501683501682, "grad_norm": 2.432146998159694, "learning_rate": 1.8224621211846426e-05, "loss": 0.5275, "step": 2737 }, { "epoch": 0.2169142404436522, "grad_norm": 2.3261395407093617, "learning_rate": 1.8223161331215285e-05, "loss": 0.3637, "step": 2738 }, { "epoch": 0.2169934640522876, "grad_norm": 2.231923908541737, "learning_rate": 1.822170090912853e-05, "loss": 0.362, "step": 2739 }, { "epoch": 0.21707268766092294, "grad_norm": 1.629742574444992, "learning_rate": 1.8220239945682337e-05, "loss": 0.193, "step": 2740 }, { "epoch": 0.21715191126955832, "grad_norm": 1.7399483003186347, "learning_rate": 1.8218778440972893e-05, "loss": 0.4342, "step": 2741 }, { "epoch": 0.2172311348781937, "grad_norm": 1.7398891607799012, "learning_rate": 1.8217316395096438e-05, "loss": 0.3162, "step": 2742 }, { "epoch": 0.21731035848682909, "grad_norm": 1.906386816586062, "learning_rate": 1.8215853808149237e-05, "loss": 0.2695, "step": 2743 }, { "epoch": 0.21738958209546444, "grad_norm": 2.133144989915411, "learning_rate": 1.8214390680227588e-05, "loss": 0.304, "step": 2744 }, { "epoch": 0.21746880570409982, "grad_norm": 2.32089456726155, "learning_rate": 1.8212927011427847e-05, "loss": 0.4325, "step": 2745 }, { "epoch": 0.2175480293127352, "grad_norm": 2.1732248436578185, "learning_rate": 1.8211462801846375e-05, "loss": 0.3641, "step": 2746 }, { "epoch": 0.21762725292137056, "grad_norm": 1.943990570786058, "learning_rate": 1.820999805157959e-05, "loss": 0.3231, "step": 2747 }, { "epoch": 0.21770647653000594, "grad_norm": 2.176463650257162, "learning_rate": 1.8208532760723937e-05, "loss": 0.3067, "step": 2748 }, { "epoch": 0.21778570013864132, "grad_norm": 2.248850988933716, "learning_rate": 1.82070669293759e-05, "loss": 0.3617, "step": 2749 }, { "epoch": 0.2178649237472767, "grad_norm": 2.3425363068354446, "learning_rate": 1.8205600557631995e-05, "loss": 0.3593, "step": 2750 }, { "epoch": 0.21794414735591205, "grad_norm": 2.451017582759792, "learning_rate": 1.8204133645588774e-05, "loss": 0.3984, "step": 2751 }, { "epoch": 0.21802337096454744, "grad_norm": 3.1209178731826217, "learning_rate": 1.8202666193342834e-05, "loss": 0.3782, "step": 2752 }, { "epoch": 0.21810259457318282, "grad_norm": 2.5570878697802053, "learning_rate": 1.8201198200990787e-05, "loss": 0.3137, "step": 2753 }, { "epoch": 0.21818181818181817, "grad_norm": 1.8973210705038372, "learning_rate": 1.8199729668629303e-05, "loss": 0.3512, "step": 2754 }, { "epoch": 0.21826104179045355, "grad_norm": 2.0668755393102156, "learning_rate": 1.8198260596355077e-05, "loss": 0.2919, "step": 2755 }, { "epoch": 0.21834026539908893, "grad_norm": 2.5307994720131815, "learning_rate": 1.8196790984264835e-05, "loss": 0.4297, "step": 2756 }, { "epoch": 0.21841948900772432, "grad_norm": 2.786472183984388, "learning_rate": 1.8195320832455347e-05, "loss": 0.3708, "step": 2757 }, { "epoch": 0.21849871261635967, "grad_norm": 2.114873282141943, "learning_rate": 1.819385014102342e-05, "loss": 0.3173, "step": 2758 }, { "epoch": 0.21857793622499505, "grad_norm": 2.090792708046673, "learning_rate": 1.8192378910065882e-05, "loss": 0.3205, "step": 2759 }, { "epoch": 0.21865715983363043, "grad_norm": 2.00289789808227, "learning_rate": 1.8190907139679614e-05, "loss": 0.3112, "step": 2760 }, { "epoch": 0.21873638344226579, "grad_norm": 2.807704205118221, "learning_rate": 1.8189434829961525e-05, "loss": 0.3694, "step": 2761 }, { "epoch": 0.21881560705090117, "grad_norm": 1.887187878775249, "learning_rate": 1.8187961981008554e-05, "loss": 0.2792, "step": 2762 }, { "epoch": 0.21889483065953655, "grad_norm": 2.1504083305840505, "learning_rate": 1.8186488592917686e-05, "loss": 0.3135, "step": 2763 }, { "epoch": 0.2189740542681719, "grad_norm": 2.740866856024987, "learning_rate": 1.8185014665785936e-05, "loss": 0.4019, "step": 2764 }, { "epoch": 0.21905327787680728, "grad_norm": 2.211419492581595, "learning_rate": 1.8183540199710354e-05, "loss": 0.3296, "step": 2765 }, { "epoch": 0.21913250148544267, "grad_norm": 2.293345951867348, "learning_rate": 1.8182065194788024e-05, "loss": 0.3739, "step": 2766 }, { "epoch": 0.21921172509407805, "grad_norm": 2.777503593857627, "learning_rate": 1.8180589651116073e-05, "loss": 0.425, "step": 2767 }, { "epoch": 0.2192909487027134, "grad_norm": 2.0841800283325607, "learning_rate": 1.8179113568791656e-05, "loss": 0.3845, "step": 2768 }, { "epoch": 0.21937017231134878, "grad_norm": 2.5131529791407425, "learning_rate": 1.8177636947911964e-05, "loss": 0.5106, "step": 2769 }, { "epoch": 0.21944939591998416, "grad_norm": 2.2825466722925296, "learning_rate": 1.817615978857423e-05, "loss": 0.3962, "step": 2770 }, { "epoch": 0.21952861952861952, "grad_norm": 2.7829011487305313, "learning_rate": 1.8174682090875713e-05, "loss": 0.4594, "step": 2771 }, { "epoch": 0.2196078431372549, "grad_norm": 2.0260529137262693, "learning_rate": 1.8173203854913714e-05, "loss": 0.3251, "step": 2772 }, { "epoch": 0.21968706674589028, "grad_norm": 2.0936671791159642, "learning_rate": 1.817172508078557e-05, "loss": 0.3519, "step": 2773 }, { "epoch": 0.21976629035452566, "grad_norm": 2.0715936918818487, "learning_rate": 1.817024576858865e-05, "loss": 0.3339, "step": 2774 }, { "epoch": 0.21984551396316102, "grad_norm": 2.51740792084349, "learning_rate": 1.8168765918420358e-05, "loss": 0.3975, "step": 2775 }, { "epoch": 0.2199247375717964, "grad_norm": 2.2851669092555076, "learning_rate": 1.8167285530378134e-05, "loss": 0.3576, "step": 2776 }, { "epoch": 0.22000396118043178, "grad_norm": 1.9428991776563813, "learning_rate": 1.8165804604559455e-05, "loss": 0.3135, "step": 2777 }, { "epoch": 0.22008318478906713, "grad_norm": 1.9706812462745862, "learning_rate": 1.816432314106184e-05, "loss": 0.2432, "step": 2778 }, { "epoch": 0.2201624083977025, "grad_norm": 2.0641478005522074, "learning_rate": 1.8162841139982827e-05, "loss": 0.3255, "step": 2779 }, { "epoch": 0.2202416320063379, "grad_norm": 2.0162538964558623, "learning_rate": 1.816135860142e-05, "loss": 0.3206, "step": 2780 }, { "epoch": 0.22032085561497325, "grad_norm": 1.8413196277632444, "learning_rate": 1.8159875525470984e-05, "loss": 0.3019, "step": 2781 }, { "epoch": 0.22040007922360863, "grad_norm": 1.9624042378974955, "learning_rate": 1.815839191223342e-05, "loss": 0.2958, "step": 2782 }, { "epoch": 0.220479302832244, "grad_norm": 2.5618488688306167, "learning_rate": 1.815690776180501e-05, "loss": 0.4318, "step": 2783 }, { "epoch": 0.2205585264408794, "grad_norm": 2.446665133912094, "learning_rate": 1.815542307428347e-05, "loss": 0.4155, "step": 2784 }, { "epoch": 0.22063775004951475, "grad_norm": 2.424049778856075, "learning_rate": 1.8153937849766567e-05, "loss": 0.2669, "step": 2785 }, { "epoch": 0.22071697365815013, "grad_norm": 2.1478646318804846, "learning_rate": 1.8152452088352084e-05, "loss": 0.372, "step": 2786 }, { "epoch": 0.2207961972667855, "grad_norm": 2.3955112163845107, "learning_rate": 1.8150965790137863e-05, "loss": 0.3584, "step": 2787 }, { "epoch": 0.22087542087542086, "grad_norm": 2.661875222645604, "learning_rate": 1.814947895522176e-05, "loss": 0.3316, "step": 2788 }, { "epoch": 0.22095464448405625, "grad_norm": 2.325175855490434, "learning_rate": 1.8147991583701685e-05, "loss": 0.39, "step": 2789 }, { "epoch": 0.22103386809269163, "grad_norm": 2.199277321313363, "learning_rate": 1.8146503675675568e-05, "loss": 0.356, "step": 2790 }, { "epoch": 0.221113091701327, "grad_norm": 2.310229606721889, "learning_rate": 1.814501523124138e-05, "loss": 0.3727, "step": 2791 }, { "epoch": 0.22119231530996236, "grad_norm": 2.417042694775412, "learning_rate": 1.8143526250497134e-05, "loss": 0.3515, "step": 2792 }, { "epoch": 0.22127153891859774, "grad_norm": 2.3438622600553893, "learning_rate": 1.8142036733540868e-05, "loss": 0.3176, "step": 2793 }, { "epoch": 0.22135076252723312, "grad_norm": 2.8023696645605662, "learning_rate": 1.814054668047066e-05, "loss": 0.4563, "step": 2794 }, { "epoch": 0.22142998613586848, "grad_norm": 2.0967926876237972, "learning_rate": 1.8139056091384623e-05, "loss": 0.3873, "step": 2795 }, { "epoch": 0.22150920974450386, "grad_norm": 1.9080582301906857, "learning_rate": 1.8137564966380905e-05, "loss": 0.2884, "step": 2796 }, { "epoch": 0.22158843335313924, "grad_norm": 2.5659845759948277, "learning_rate": 1.813607330555769e-05, "loss": 0.4875, "step": 2797 }, { "epoch": 0.2216676569617746, "grad_norm": 2.4479632119932115, "learning_rate": 1.8134581109013193e-05, "loss": 0.3686, "step": 2798 }, { "epoch": 0.22174688057040998, "grad_norm": 2.1075284303966244, "learning_rate": 1.8133088376845675e-05, "loss": 0.3733, "step": 2799 }, { "epoch": 0.22182610417904536, "grad_norm": 2.1122372908019966, "learning_rate": 1.8131595109153416e-05, "loss": 0.3249, "step": 2800 }, { "epoch": 0.22190532778768074, "grad_norm": 2.4272495035187176, "learning_rate": 1.813010130603475e-05, "loss": 0.3467, "step": 2801 }, { "epoch": 0.2219845513963161, "grad_norm": 2.4179265379896786, "learning_rate": 1.812860696758803e-05, "loss": 0.3454, "step": 2802 }, { "epoch": 0.22206377500495147, "grad_norm": 2.1448270930339586, "learning_rate": 1.8127112093911655e-05, "loss": 0.33, "step": 2803 }, { "epoch": 0.22214299861358686, "grad_norm": 2.104691498733774, "learning_rate": 1.8125616685104055e-05, "loss": 0.3561, "step": 2804 }, { "epoch": 0.2222222222222222, "grad_norm": 2.885218233425823, "learning_rate": 1.8124120741263692e-05, "loss": 0.4748, "step": 2805 }, { "epoch": 0.2223014458308576, "grad_norm": 2.6782122074087766, "learning_rate": 1.812262426248907e-05, "loss": 0.3291, "step": 2806 }, { "epoch": 0.22238066943949297, "grad_norm": 1.9251078060252946, "learning_rate": 1.8121127248878726e-05, "loss": 0.2945, "step": 2807 }, { "epoch": 0.22245989304812835, "grad_norm": 2.5882451306476177, "learning_rate": 1.8119629700531228e-05, "loss": 0.4597, "step": 2808 }, { "epoch": 0.2225391166567637, "grad_norm": 1.913229999193114, "learning_rate": 1.8118131617545183e-05, "loss": 0.3435, "step": 2809 }, { "epoch": 0.2226183402653991, "grad_norm": 3.2774521290088243, "learning_rate": 1.8116633000019233e-05, "loss": 0.3352, "step": 2810 }, { "epoch": 0.22269756387403447, "grad_norm": 2.1927178790263553, "learning_rate": 1.8115133848052052e-05, "loss": 0.3588, "step": 2811 }, { "epoch": 0.22277678748266982, "grad_norm": 1.8271179319107111, "learning_rate": 1.8113634161742356e-05, "loss": 0.2917, "step": 2812 }, { "epoch": 0.2228560110913052, "grad_norm": 1.9939738517158243, "learning_rate": 1.8112133941188892e-05, "loss": 0.2463, "step": 2813 }, { "epoch": 0.2229352346999406, "grad_norm": 2.115788530061031, "learning_rate": 1.811063318649044e-05, "loss": 0.3847, "step": 2814 }, { "epoch": 0.22301445830857597, "grad_norm": 2.3259541606821053, "learning_rate": 1.8109131897745823e-05, "loss": 0.5999, "step": 2815 }, { "epoch": 0.22309368191721132, "grad_norm": 1.9979988228489234, "learning_rate": 1.8107630075053883e-05, "loss": 0.2689, "step": 2816 }, { "epoch": 0.2231729055258467, "grad_norm": 2.576057003769713, "learning_rate": 1.810612771851352e-05, "loss": 0.4017, "step": 2817 }, { "epoch": 0.22325212913448209, "grad_norm": 2.099998067135014, "learning_rate": 1.8104624828223644e-05, "loss": 0.2889, "step": 2818 }, { "epoch": 0.22333135274311744, "grad_norm": 2.064592884476095, "learning_rate": 1.8103121404283222e-05, "loss": 0.2691, "step": 2819 }, { "epoch": 0.22341057635175282, "grad_norm": 2.699722248009324, "learning_rate": 1.8101617446791248e-05, "loss": 0.4082, "step": 2820 }, { "epoch": 0.2234897999603882, "grad_norm": 2.3072058492940672, "learning_rate": 1.8100112955846746e-05, "loss": 0.3894, "step": 2821 }, { "epoch": 0.22356902356902356, "grad_norm": 1.8478529110795325, "learning_rate": 1.8098607931548782e-05, "loss": 0.2974, "step": 2822 }, { "epoch": 0.22364824717765894, "grad_norm": 2.1023917488988726, "learning_rate": 1.8097102373996453e-05, "loss": 0.2879, "step": 2823 }, { "epoch": 0.22372747078629432, "grad_norm": 2.4842397385833155, "learning_rate": 1.809559628328889e-05, "loss": 0.3728, "step": 2824 }, { "epoch": 0.2238066943949297, "grad_norm": 2.748658895467443, "learning_rate": 1.8094089659525274e-05, "loss": 0.2689, "step": 2825 }, { "epoch": 0.22388591800356505, "grad_norm": 2.432126920062806, "learning_rate": 1.8092582502804793e-05, "loss": 0.3129, "step": 2826 }, { "epoch": 0.22396514161220044, "grad_norm": 2.480648062131945, "learning_rate": 1.8091074813226696e-05, "loss": 0.3336, "step": 2827 }, { "epoch": 0.22404436522083582, "grad_norm": 2.439638467305275, "learning_rate": 1.8089566590890253e-05, "loss": 0.2972, "step": 2828 }, { "epoch": 0.22412358882947117, "grad_norm": 1.9591642001963525, "learning_rate": 1.8088057835894775e-05, "loss": 0.2406, "step": 2829 }, { "epoch": 0.22420281243810655, "grad_norm": 1.9939341737486795, "learning_rate": 1.8086548548339604e-05, "loss": 0.3465, "step": 2830 }, { "epoch": 0.22428203604674193, "grad_norm": 2.522427302150872, "learning_rate": 1.8085038728324123e-05, "loss": 0.4351, "step": 2831 }, { "epoch": 0.22436125965537732, "grad_norm": 2.4295206811675967, "learning_rate": 1.8083528375947744e-05, "loss": 0.3199, "step": 2832 }, { "epoch": 0.22444048326401267, "grad_norm": 2.316738940300317, "learning_rate": 1.808201749130992e-05, "loss": 0.4538, "step": 2833 }, { "epoch": 0.22451970687264805, "grad_norm": 2.2986382210777183, "learning_rate": 1.8080506074510128e-05, "loss": 0.3835, "step": 2834 }, { "epoch": 0.22459893048128343, "grad_norm": 1.905129539710133, "learning_rate": 1.8078994125647896e-05, "loss": 0.2711, "step": 2835 }, { "epoch": 0.22467815408991879, "grad_norm": 1.8255445852897605, "learning_rate": 1.807748164482277e-05, "loss": 0.2794, "step": 2836 }, { "epoch": 0.22475737769855417, "grad_norm": 2.0904691221940537, "learning_rate": 1.8075968632134343e-05, "loss": 0.3683, "step": 2837 }, { "epoch": 0.22483660130718955, "grad_norm": 1.905015642312692, "learning_rate": 1.8074455087682247e-05, "loss": 0.275, "step": 2838 }, { "epoch": 0.2249158249158249, "grad_norm": 2.2645497651893267, "learning_rate": 1.8072941011566133e-05, "loss": 0.3043, "step": 2839 }, { "epoch": 0.22499504852446028, "grad_norm": 2.3235871738430007, "learning_rate": 1.8071426403885698e-05, "loss": 0.3646, "step": 2840 }, { "epoch": 0.22507427213309567, "grad_norm": 2.2783148377895204, "learning_rate": 1.8069911264740667e-05, "loss": 0.4315, "step": 2841 }, { "epoch": 0.22515349574173105, "grad_norm": 1.6764015652476059, "learning_rate": 1.8068395594230815e-05, "loss": 0.2442, "step": 2842 }, { "epoch": 0.2252327193503664, "grad_norm": 2.1534012111542484, "learning_rate": 1.8066879392455932e-05, "loss": 0.2912, "step": 2843 }, { "epoch": 0.22531194295900178, "grad_norm": 1.8884219546090963, "learning_rate": 1.8065362659515856e-05, "loss": 0.3183, "step": 2844 }, { "epoch": 0.22539116656763716, "grad_norm": 1.5850970041712427, "learning_rate": 1.806384539551046e-05, "loss": 0.1986, "step": 2845 }, { "epoch": 0.22547039017627252, "grad_norm": 2.0475579202181295, "learning_rate": 1.8062327600539643e-05, "loss": 0.4377, "step": 2846 }, { "epoch": 0.2255496137849079, "grad_norm": 1.9607631637609906, "learning_rate": 1.8060809274703352e-05, "loss": 0.2975, "step": 2847 }, { "epoch": 0.22562883739354328, "grad_norm": 2.2140828779319746, "learning_rate": 1.805929041810155e-05, "loss": 0.2717, "step": 2848 }, { "epoch": 0.22570806100217866, "grad_norm": 1.7974567746149128, "learning_rate": 1.8057771030834255e-05, "loss": 0.2751, "step": 2849 }, { "epoch": 0.22578728461081402, "grad_norm": 2.386475211878843, "learning_rate": 1.8056251113001508e-05, "loss": 0.3615, "step": 2850 }, { "epoch": 0.2258665082194494, "grad_norm": 2.49752307099072, "learning_rate": 1.8054730664703393e-05, "loss": 0.3746, "step": 2851 }, { "epoch": 0.22594573182808478, "grad_norm": 2.7967442450321873, "learning_rate": 1.8053209686040017e-05, "loss": 0.3949, "step": 2852 }, { "epoch": 0.22602495543672013, "grad_norm": 1.619050510468112, "learning_rate": 1.8051688177111532e-05, "loss": 0.2148, "step": 2853 }, { "epoch": 0.2261041790453555, "grad_norm": 1.7740582455891074, "learning_rate": 1.805016613801813e-05, "loss": 0.2236, "step": 2854 }, { "epoch": 0.2261834026539909, "grad_norm": 2.4837249942069204, "learning_rate": 1.8048643568860015e-05, "loss": 0.4577, "step": 2855 }, { "epoch": 0.22626262626262628, "grad_norm": 2.2016832637598642, "learning_rate": 1.804712046973745e-05, "loss": 0.3829, "step": 2856 }, { "epoch": 0.22634184987126163, "grad_norm": 2.438095061444324, "learning_rate": 1.8045596840750722e-05, "loss": 0.4635, "step": 2857 }, { "epoch": 0.226421073479897, "grad_norm": 2.3786191331942166, "learning_rate": 1.804407268200016e-05, "loss": 0.5126, "step": 2858 }, { "epoch": 0.2265002970885324, "grad_norm": 1.7382877157962437, "learning_rate": 1.8042547993586114e-05, "loss": 0.2779, "step": 2859 }, { "epoch": 0.22657952069716775, "grad_norm": 2.2580177520879516, "learning_rate": 1.8041022775608977e-05, "loss": 0.348, "step": 2860 }, { "epoch": 0.22665874430580313, "grad_norm": 2.1685683161357328, "learning_rate": 1.803949702816919e-05, "loss": 0.426, "step": 2861 }, { "epoch": 0.2267379679144385, "grad_norm": 2.1460350431188933, "learning_rate": 1.80379707513672e-05, "loss": 0.3913, "step": 2862 }, { "epoch": 0.22681719152307386, "grad_norm": 2.138814703615336, "learning_rate": 1.8036443945303514e-05, "loss": 0.3307, "step": 2863 }, { "epoch": 0.22689641513170924, "grad_norm": 2.5653723267752233, "learning_rate": 1.8034916610078665e-05, "loss": 0.3779, "step": 2864 }, { "epoch": 0.22697563874034463, "grad_norm": 2.4518404752264624, "learning_rate": 1.8033388745793218e-05, "loss": 0.3643, "step": 2865 }, { "epoch": 0.22705486234898, "grad_norm": 2.62512043388419, "learning_rate": 1.8031860352547777e-05, "loss": 0.5207, "step": 2866 }, { "epoch": 0.22713408595761536, "grad_norm": 2.3470155365932794, "learning_rate": 1.8030331430442974e-05, "loss": 0.421, "step": 2867 }, { "epoch": 0.22721330956625074, "grad_norm": 1.8310345204256284, "learning_rate": 1.8028801979579487e-05, "loss": 0.3591, "step": 2868 }, { "epoch": 0.22729253317488612, "grad_norm": 3.251820785918148, "learning_rate": 1.8027272000058028e-05, "loss": 0.3722, "step": 2869 }, { "epoch": 0.22737175678352148, "grad_norm": 2.0047297137284215, "learning_rate": 1.8025741491979326e-05, "loss": 0.3068, "step": 2870 }, { "epoch": 0.22745098039215686, "grad_norm": 2.103550627420197, "learning_rate": 1.8024210455444168e-05, "loss": 0.3744, "step": 2871 }, { "epoch": 0.22753020400079224, "grad_norm": 2.3269590023557325, "learning_rate": 1.8022678890553364e-05, "loss": 0.3646, "step": 2872 }, { "epoch": 0.22760942760942762, "grad_norm": 2.0459352564615263, "learning_rate": 1.8021146797407752e-05, "loss": 0.3454, "step": 2873 }, { "epoch": 0.22768865121806298, "grad_norm": 1.9884501680386073, "learning_rate": 1.801961417610822e-05, "loss": 0.3498, "step": 2874 }, { "epoch": 0.22776787482669836, "grad_norm": 2.473846253022974, "learning_rate": 1.801808102675568e-05, "loss": 0.4022, "step": 2875 }, { "epoch": 0.22784709843533374, "grad_norm": 1.5653616473294338, "learning_rate": 1.801654734945109e-05, "loss": 0.2476, "step": 2876 }, { "epoch": 0.2279263220439691, "grad_norm": 2.468943502463552, "learning_rate": 1.801501314429543e-05, "loss": 0.333, "step": 2877 }, { "epoch": 0.22800554565260447, "grad_norm": 1.8586663229271703, "learning_rate": 1.801347841138972e-05, "loss": 0.3301, "step": 2878 }, { "epoch": 0.22808476926123986, "grad_norm": 2.293706848926581, "learning_rate": 1.8011943150835013e-05, "loss": 0.3425, "step": 2879 }, { "epoch": 0.2281639928698752, "grad_norm": 2.32557040060527, "learning_rate": 1.80104073627324e-05, "loss": 0.3143, "step": 2880 }, { "epoch": 0.2282432164785106, "grad_norm": 2.542357365686963, "learning_rate": 1.8008871047183005e-05, "loss": 0.3799, "step": 2881 }, { "epoch": 0.22832244008714597, "grad_norm": 1.6391160903639728, "learning_rate": 1.800733420428799e-05, "loss": 0.2466, "step": 2882 }, { "epoch": 0.22840166369578135, "grad_norm": 2.2224153235736717, "learning_rate": 1.8005796834148545e-05, "loss": 0.246, "step": 2883 }, { "epoch": 0.2284808873044167, "grad_norm": 2.1309638642878945, "learning_rate": 1.8004258936865902e-05, "loss": 0.3976, "step": 2884 }, { "epoch": 0.2285601109130521, "grad_norm": 2.4139720687491937, "learning_rate": 1.800272051254132e-05, "loss": 0.3553, "step": 2885 }, { "epoch": 0.22863933452168747, "grad_norm": 2.435499357951839, "learning_rate": 1.80011815612761e-05, "loss": 0.3988, "step": 2886 }, { "epoch": 0.22871855813032282, "grad_norm": 1.879262567029515, "learning_rate": 1.7999642083171576e-05, "loss": 0.2874, "step": 2887 }, { "epoch": 0.2287977817389582, "grad_norm": 2.023245868222001, "learning_rate": 1.799810207832911e-05, "loss": 0.2938, "step": 2888 }, { "epoch": 0.2288770053475936, "grad_norm": 2.6685608718201426, "learning_rate": 1.7996561546850105e-05, "loss": 0.3572, "step": 2889 }, { "epoch": 0.22895622895622897, "grad_norm": 2.2130170651991192, "learning_rate": 1.7995020488836e-05, "loss": 0.233, "step": 2890 }, { "epoch": 0.22903545256486432, "grad_norm": 2.154666875445534, "learning_rate": 1.799347890438827e-05, "loss": 0.3709, "step": 2891 }, { "epoch": 0.2291146761734997, "grad_norm": 2.0188603554095077, "learning_rate": 1.799193679360841e-05, "loss": 0.3068, "step": 2892 }, { "epoch": 0.22919389978213509, "grad_norm": 1.818362308464351, "learning_rate": 1.799039415659797e-05, "loss": 0.2834, "step": 2893 }, { "epoch": 0.22927312339077044, "grad_norm": 2.235979602354852, "learning_rate": 1.798885099345852e-05, "loss": 0.3462, "step": 2894 }, { "epoch": 0.22935234699940582, "grad_norm": 2.4960063989619035, "learning_rate": 1.7987307304291676e-05, "loss": 0.3284, "step": 2895 }, { "epoch": 0.2294315706080412, "grad_norm": 2.7434744305484506, "learning_rate": 1.7985763089199073e-05, "loss": 0.3514, "step": 2896 }, { "epoch": 0.22951079421667656, "grad_norm": 1.9845034730050122, "learning_rate": 1.79842183482824e-05, "loss": 0.2858, "step": 2897 }, { "epoch": 0.22959001782531194, "grad_norm": 1.9666854595762389, "learning_rate": 1.7982673081643364e-05, "loss": 0.216, "step": 2898 }, { "epoch": 0.22966924143394732, "grad_norm": 1.9169888753826316, "learning_rate": 1.7981127289383718e-05, "loss": 0.2515, "step": 2899 }, { "epoch": 0.2297484650425827, "grad_norm": 2.332755771807327, "learning_rate": 1.797958097160524e-05, "loss": 0.3947, "step": 2900 }, { "epoch": 0.22982768865121805, "grad_norm": 2.158902626130824, "learning_rate": 1.797803412840975e-05, "loss": 0.3433, "step": 2901 }, { "epoch": 0.22990691225985344, "grad_norm": 1.9914693487620776, "learning_rate": 1.7976486759899103e-05, "loss": 0.3463, "step": 2902 }, { "epoch": 0.22998613586848882, "grad_norm": 2.2033191304166104, "learning_rate": 1.797493886617518e-05, "loss": 0.3565, "step": 2903 }, { "epoch": 0.23006535947712417, "grad_norm": 2.0778455581291357, "learning_rate": 1.797339044733991e-05, "loss": 0.3573, "step": 2904 }, { "epoch": 0.23014458308575955, "grad_norm": 2.4994002279753267, "learning_rate": 1.797184150349524e-05, "loss": 0.3831, "step": 2905 }, { "epoch": 0.23022380669439493, "grad_norm": 1.9710331465542317, "learning_rate": 1.7970292034743172e-05, "loss": 0.2739, "step": 2906 }, { "epoch": 0.23030303030303031, "grad_norm": 2.231762668543122, "learning_rate": 1.7968742041185718e-05, "loss": 0.3545, "step": 2907 }, { "epoch": 0.23038225391166567, "grad_norm": 2.4829822610232313, "learning_rate": 1.7967191522924946e-05, "loss": 0.3644, "step": 2908 }, { "epoch": 0.23046147752030105, "grad_norm": 2.4002838104282223, "learning_rate": 1.7965640480062945e-05, "loss": 0.4051, "step": 2909 }, { "epoch": 0.23054070112893643, "grad_norm": 2.2386224198187556, "learning_rate": 1.796408891270185e-05, "loss": 0.3959, "step": 2910 }, { "epoch": 0.23061992473757179, "grad_norm": 2.4165159631742323, "learning_rate": 1.7962536820943822e-05, "loss": 0.3215, "step": 2911 }, { "epoch": 0.23069914834620717, "grad_norm": 2.71360000118413, "learning_rate": 1.7960984204891055e-05, "loss": 0.471, "step": 2912 }, { "epoch": 0.23077837195484255, "grad_norm": 2.3426652598697677, "learning_rate": 1.7959431064645786e-05, "loss": 0.3228, "step": 2913 }, { "epoch": 0.23085759556347793, "grad_norm": 2.193463315427334, "learning_rate": 1.7957877400310275e-05, "loss": 0.4157, "step": 2914 }, { "epoch": 0.23093681917211328, "grad_norm": 2.136276471583766, "learning_rate": 1.7956323211986833e-05, "loss": 0.3445, "step": 2915 }, { "epoch": 0.23101604278074866, "grad_norm": 1.7871354320514736, "learning_rate": 1.795476849977779e-05, "loss": 0.3168, "step": 2916 }, { "epoch": 0.23109526638938405, "grad_norm": 2.011408081414906, "learning_rate": 1.7953213263785513e-05, "loss": 0.3476, "step": 2917 }, { "epoch": 0.2311744899980194, "grad_norm": 2.0562473538107606, "learning_rate": 1.7951657504112416e-05, "loss": 0.3409, "step": 2918 }, { "epoch": 0.23125371360665478, "grad_norm": 2.1752303766096164, "learning_rate": 1.795010122086093e-05, "loss": 0.4139, "step": 2919 }, { "epoch": 0.23133293721529016, "grad_norm": 1.8107573650843167, "learning_rate": 1.7948544414133534e-05, "loss": 0.32, "step": 2920 }, { "epoch": 0.23141216082392552, "grad_norm": 1.5762747209352221, "learning_rate": 1.7946987084032733e-05, "loss": 0.2817, "step": 2921 }, { "epoch": 0.2314913844325609, "grad_norm": 2.0999905667464174, "learning_rate": 1.794542923066107e-05, "loss": 0.3092, "step": 2922 }, { "epoch": 0.23157060804119628, "grad_norm": 2.6584197408651806, "learning_rate": 1.7943870854121126e-05, "loss": 0.3684, "step": 2923 }, { "epoch": 0.23164983164983166, "grad_norm": 1.9513772617706642, "learning_rate": 1.794231195451551e-05, "loss": 0.2871, "step": 2924 }, { "epoch": 0.23172905525846701, "grad_norm": 4.495199451645402, "learning_rate": 1.7940752531946867e-05, "loss": 0.3833, "step": 2925 }, { "epoch": 0.2318082788671024, "grad_norm": 2.1564859049950815, "learning_rate": 1.793919258651788e-05, "loss": 0.3519, "step": 2926 }, { "epoch": 0.23188750247573778, "grad_norm": 1.979307986257514, "learning_rate": 1.7937632118331255e-05, "loss": 0.3346, "step": 2927 }, { "epoch": 0.23196672608437313, "grad_norm": 2.261647990004656, "learning_rate": 1.7936071127489755e-05, "loss": 0.4053, "step": 2928 }, { "epoch": 0.2320459496930085, "grad_norm": 2.0955501333327318, "learning_rate": 1.7934509614096156e-05, "loss": 0.285, "step": 2929 }, { "epoch": 0.2321251733016439, "grad_norm": 1.9447321125912682, "learning_rate": 1.7932947578253273e-05, "loss": 0.2999, "step": 2930 }, { "epoch": 0.23220439691027928, "grad_norm": 2.3961018801608054, "learning_rate": 1.793138502006397e-05, "loss": 0.3123, "step": 2931 }, { "epoch": 0.23228362051891463, "grad_norm": 2.248735843211351, "learning_rate": 1.792982193963112e-05, "loss": 0.2409, "step": 2932 }, { "epoch": 0.23236284412755, "grad_norm": 2.1466220519787744, "learning_rate": 1.7928258337057657e-05, "loss": 0.3785, "step": 2933 }, { "epoch": 0.2324420677361854, "grad_norm": 2.3267080861466907, "learning_rate": 1.792669421244653e-05, "loss": 0.3328, "step": 2934 }, { "epoch": 0.23252129134482075, "grad_norm": 2.191770307734201, "learning_rate": 1.7925129565900728e-05, "loss": 0.3971, "step": 2935 }, { "epoch": 0.23260051495345613, "grad_norm": 2.241527801496312, "learning_rate": 1.792356439752328e-05, "loss": 0.3226, "step": 2936 }, { "epoch": 0.2326797385620915, "grad_norm": 2.235423542564452, "learning_rate": 1.792199870741724e-05, "loss": 0.3113, "step": 2937 }, { "epoch": 0.23275896217072686, "grad_norm": 2.2830173602699246, "learning_rate": 1.79204324956857e-05, "loss": 0.3799, "step": 2938 }, { "epoch": 0.23283818577936224, "grad_norm": 2.084657360357404, "learning_rate": 1.7918865762431794e-05, "loss": 0.2937, "step": 2939 }, { "epoch": 0.23291740938799763, "grad_norm": 2.3708023727606014, "learning_rate": 1.7917298507758684e-05, "loss": 0.2781, "step": 2940 }, { "epoch": 0.232996632996633, "grad_norm": 1.9726311656189268, "learning_rate": 1.7915730731769558e-05, "loss": 0.2762, "step": 2941 }, { "epoch": 0.23307585660526836, "grad_norm": 2.559575870281985, "learning_rate": 1.7914162434567653e-05, "loss": 0.4123, "step": 2942 }, { "epoch": 0.23315508021390374, "grad_norm": 2.2646105556653824, "learning_rate": 1.791259361625623e-05, "loss": 0.4099, "step": 2943 }, { "epoch": 0.23323430382253912, "grad_norm": 1.975575846869981, "learning_rate": 1.7911024276938595e-05, "loss": 0.3123, "step": 2944 }, { "epoch": 0.23331352743117448, "grad_norm": 2.2585021786528845, "learning_rate": 1.7909454416718075e-05, "loss": 0.3184, "step": 2945 }, { "epoch": 0.23339275103980986, "grad_norm": 2.0979119072745047, "learning_rate": 1.790788403569804e-05, "loss": 0.3558, "step": 2946 }, { "epoch": 0.23347197464844524, "grad_norm": 2.191737209229685, "learning_rate": 1.7906313133981887e-05, "loss": 0.3658, "step": 2947 }, { "epoch": 0.23355119825708062, "grad_norm": 2.040410647235041, "learning_rate": 1.7904741711673064e-05, "loss": 0.3607, "step": 2948 }, { "epoch": 0.23363042186571598, "grad_norm": 1.83934056089197, "learning_rate": 1.790316976887503e-05, "loss": 0.2585, "step": 2949 }, { "epoch": 0.23370964547435136, "grad_norm": 2.19022147701637, "learning_rate": 1.7901597305691294e-05, "loss": 0.305, "step": 2950 }, { "epoch": 0.23378886908298674, "grad_norm": 2.200737740426933, "learning_rate": 1.7900024322225394e-05, "loss": 0.3319, "step": 2951 }, { "epoch": 0.2338680926916221, "grad_norm": 2.024506765875974, "learning_rate": 1.789845081858091e-05, "loss": 0.393, "step": 2952 }, { "epoch": 0.23394731630025747, "grad_norm": 2.130022689939545, "learning_rate": 1.7896876794861443e-05, "loss": 0.3281, "step": 2953 }, { "epoch": 0.23402653990889286, "grad_norm": 1.9970757082441841, "learning_rate": 1.7895302251170636e-05, "loss": 0.3131, "step": 2954 }, { "epoch": 0.23410576351752824, "grad_norm": 2.630158114518891, "learning_rate": 1.789372718761216e-05, "loss": 0.3403, "step": 2955 }, { "epoch": 0.2341849871261636, "grad_norm": 2.426182844574413, "learning_rate": 1.7892151604289738e-05, "loss": 0.4409, "step": 2956 }, { "epoch": 0.23426421073479897, "grad_norm": 2.5788186681048573, "learning_rate": 1.7890575501307105e-05, "loss": 0.3494, "step": 2957 }, { "epoch": 0.23434343434343435, "grad_norm": 2.2183709054173018, "learning_rate": 1.7888998878768045e-05, "loss": 0.3267, "step": 2958 }, { "epoch": 0.2344226579520697, "grad_norm": 1.8539802798757794, "learning_rate": 1.7887421736776364e-05, "loss": 0.2039, "step": 2959 }, { "epoch": 0.2345018815607051, "grad_norm": 2.0528093366755304, "learning_rate": 1.7885844075435915e-05, "loss": 0.3669, "step": 2960 }, { "epoch": 0.23458110516934047, "grad_norm": 2.6408979751390036, "learning_rate": 1.788426589485058e-05, "loss": 0.3789, "step": 2961 }, { "epoch": 0.23466032877797582, "grad_norm": 2.314651527578581, "learning_rate": 1.788268719512427e-05, "loss": 0.3333, "step": 2962 }, { "epoch": 0.2347395523866112, "grad_norm": 1.9330681335377402, "learning_rate": 1.788110797636094e-05, "loss": 0.3406, "step": 2963 }, { "epoch": 0.2348187759952466, "grad_norm": 2.151811889862483, "learning_rate": 1.7879528238664567e-05, "loss": 0.3437, "step": 2964 }, { "epoch": 0.23489799960388197, "grad_norm": 2.103264771537712, "learning_rate": 1.7877947982139177e-05, "loss": 0.3799, "step": 2965 }, { "epoch": 0.23497722321251732, "grad_norm": 2.2414401511680517, "learning_rate": 1.7876367206888817e-05, "loss": 0.383, "step": 2966 }, { "epoch": 0.2350564468211527, "grad_norm": 2.026698573553762, "learning_rate": 1.7874785913017575e-05, "loss": 0.2826, "step": 2967 }, { "epoch": 0.23513567042978809, "grad_norm": 2.0734253480544504, "learning_rate": 1.7873204100629572e-05, "loss": 0.3227, "step": 2968 }, { "epoch": 0.23521489403842344, "grad_norm": 2.240976789977317, "learning_rate": 1.7871621769828965e-05, "loss": 0.433, "step": 2969 }, { "epoch": 0.23529411764705882, "grad_norm": 2.3453524362665417, "learning_rate": 1.7870038920719935e-05, "loss": 0.2354, "step": 2970 }, { "epoch": 0.2353733412556942, "grad_norm": 2.4563098030517856, "learning_rate": 1.7868455553406713e-05, "loss": 0.467, "step": 2971 }, { "epoch": 0.23545256486432958, "grad_norm": 2.0869761006063157, "learning_rate": 1.7866871667993554e-05, "loss": 0.3801, "step": 2972 }, { "epoch": 0.23553178847296494, "grad_norm": 2.103244513662612, "learning_rate": 1.786528726458475e-05, "loss": 0.3371, "step": 2973 }, { "epoch": 0.23561101208160032, "grad_norm": 1.763642524301725, "learning_rate": 1.786370234328462e-05, "loss": 0.3112, "step": 2974 }, { "epoch": 0.2356902356902357, "grad_norm": 2.259186435823224, "learning_rate": 1.7862116904197534e-05, "loss": 0.2928, "step": 2975 }, { "epoch": 0.23576945929887105, "grad_norm": 2.194096374360557, "learning_rate": 1.7860530947427878e-05, "loss": 0.4277, "step": 2976 }, { "epoch": 0.23584868290750644, "grad_norm": 2.4190719272387464, "learning_rate": 1.785894447308008e-05, "loss": 0.4561, "step": 2977 }, { "epoch": 0.23592790651614182, "grad_norm": 2.204531321281794, "learning_rate": 1.7857357481258603e-05, "loss": 0.2377, "step": 2978 }, { "epoch": 0.23600713012477717, "grad_norm": 1.7940348682390632, "learning_rate": 1.7855769972067944e-05, "loss": 0.2291, "step": 2979 }, { "epoch": 0.23608635373341255, "grad_norm": 2.155516029656373, "learning_rate": 1.785418194561263e-05, "loss": 0.3892, "step": 2980 }, { "epoch": 0.23616557734204793, "grad_norm": 2.2386092723063276, "learning_rate": 1.7852593401997232e-05, "loss": 0.3022, "step": 2981 }, { "epoch": 0.23624480095068331, "grad_norm": 2.469883286408481, "learning_rate": 1.785100434132634e-05, "loss": 0.3573, "step": 2982 }, { "epoch": 0.23632402455931867, "grad_norm": 1.8210031716987176, "learning_rate": 1.7849414763704587e-05, "loss": 0.3088, "step": 2983 }, { "epoch": 0.23640324816795405, "grad_norm": 1.959552410119995, "learning_rate": 1.7847824669236643e-05, "loss": 0.2877, "step": 2984 }, { "epoch": 0.23648247177658943, "grad_norm": 2.4431766471431065, "learning_rate": 1.7846234058027207e-05, "loss": 0.3203, "step": 2985 }, { "epoch": 0.23656169538522479, "grad_norm": 2.118968639306495, "learning_rate": 1.7844642930181008e-05, "loss": 0.3997, "step": 2986 }, { "epoch": 0.23664091899386017, "grad_norm": 2.204189394400891, "learning_rate": 1.7843051285802823e-05, "loss": 0.4061, "step": 2987 }, { "epoch": 0.23672014260249555, "grad_norm": 2.232015235732415, "learning_rate": 1.7841459124997445e-05, "loss": 0.364, "step": 2988 }, { "epoch": 0.23679936621113093, "grad_norm": 1.9942646681901783, "learning_rate": 1.7839866447869717e-05, "loss": 0.3405, "step": 2989 }, { "epoch": 0.23687858981976628, "grad_norm": 2.3141503160082544, "learning_rate": 1.7838273254524505e-05, "loss": 0.2969, "step": 2990 }, { "epoch": 0.23695781342840166, "grad_norm": 2.670787782891005, "learning_rate": 1.7836679545066712e-05, "loss": 0.3358, "step": 2991 }, { "epoch": 0.23703703703703705, "grad_norm": 2.306282591921337, "learning_rate": 1.7835085319601283e-05, "loss": 0.3328, "step": 2992 }, { "epoch": 0.2371162606456724, "grad_norm": 2.715330238933233, "learning_rate": 1.783349057823318e-05, "loss": 0.3557, "step": 2993 }, { "epoch": 0.23719548425430778, "grad_norm": 2.3837024061971426, "learning_rate": 1.783189532106742e-05, "loss": 0.4084, "step": 2994 }, { "epoch": 0.23727470786294316, "grad_norm": 2.1521504224066206, "learning_rate": 1.783029954820904e-05, "loss": 0.4635, "step": 2995 }, { "epoch": 0.23735393147157854, "grad_norm": 2.1020779633639086, "learning_rate": 1.7828703259763107e-05, "loss": 0.2583, "step": 2996 }, { "epoch": 0.2374331550802139, "grad_norm": 2.3296842155950053, "learning_rate": 1.782710645583473e-05, "loss": 0.3461, "step": 2997 }, { "epoch": 0.23751237868884928, "grad_norm": 2.220311054136137, "learning_rate": 1.7825509136529065e-05, "loss": 0.3565, "step": 2998 }, { "epoch": 0.23759160229748466, "grad_norm": 2.16336576802573, "learning_rate": 1.782391130195127e-05, "loss": 0.3771, "step": 2999 }, { "epoch": 0.23767082590612001, "grad_norm": 2.5511785599205323, "learning_rate": 1.7822312952206565e-05, "loss": 0.3897, "step": 3000 }, { "epoch": 0.2377500495147554, "grad_norm": 1.9360200712131914, "learning_rate": 1.782071408740019e-05, "loss": 0.3399, "step": 3001 }, { "epoch": 0.23782927312339078, "grad_norm": 2.3419628859501405, "learning_rate": 1.781911470763742e-05, "loss": 0.3839, "step": 3002 }, { "epoch": 0.23790849673202613, "grad_norm": 2.202891523914208, "learning_rate": 1.7817514813023577e-05, "loss": 0.4076, "step": 3003 }, { "epoch": 0.2379877203406615, "grad_norm": 3.0492547362517453, "learning_rate": 1.781591440366399e-05, "loss": 0.3353, "step": 3004 }, { "epoch": 0.2380669439492969, "grad_norm": 2.058581745043511, "learning_rate": 1.7814313479664054e-05, "loss": 0.3043, "step": 3005 }, { "epoch": 0.23814616755793228, "grad_norm": 2.1482913033857245, "learning_rate": 1.781271204112917e-05, "loss": 0.3708, "step": 3006 }, { "epoch": 0.23822539116656763, "grad_norm": 2.594643206242862, "learning_rate": 1.7811110088164797e-05, "loss": 0.2876, "step": 3007 }, { "epoch": 0.238304614775203, "grad_norm": 2.43691657155438, "learning_rate": 1.7809507620876406e-05, "loss": 0.275, "step": 3008 }, { "epoch": 0.2383838383838384, "grad_norm": 2.4233811864736032, "learning_rate": 1.7807904639369512e-05, "loss": 0.4782, "step": 3009 }, { "epoch": 0.23846306199247375, "grad_norm": 1.708587861395923, "learning_rate": 1.7806301143749672e-05, "loss": 0.2687, "step": 3010 }, { "epoch": 0.23854228560110913, "grad_norm": 1.8973464408153016, "learning_rate": 1.780469713412246e-05, "loss": 0.2567, "step": 3011 }, { "epoch": 0.2386215092097445, "grad_norm": 2.3199391900538306, "learning_rate": 1.78030926105935e-05, "loss": 0.3851, "step": 3012 }, { "epoch": 0.2387007328183799, "grad_norm": 2.0066302029158525, "learning_rate": 1.7801487573268433e-05, "loss": 0.3211, "step": 3013 }, { "epoch": 0.23877995642701524, "grad_norm": 2.30231325136663, "learning_rate": 1.7799882022252948e-05, "loss": 0.3048, "step": 3014 }, { "epoch": 0.23885918003565063, "grad_norm": 2.746329081825597, "learning_rate": 1.7798275957652764e-05, "loss": 0.3981, "step": 3015 }, { "epoch": 0.238938403644286, "grad_norm": 2.2620152695474403, "learning_rate": 1.779666937957363e-05, "loss": 0.3382, "step": 3016 }, { "epoch": 0.23901762725292136, "grad_norm": 2.1045241580740015, "learning_rate": 1.7795062288121335e-05, "loss": 0.3244, "step": 3017 }, { "epoch": 0.23909685086155674, "grad_norm": 2.4100173773549582, "learning_rate": 1.7793454683401692e-05, "loss": 0.3069, "step": 3018 }, { "epoch": 0.23917607447019212, "grad_norm": 2.1408494116591266, "learning_rate": 1.779184656552056e-05, "loss": 0.2665, "step": 3019 }, { "epoch": 0.23925529807882748, "grad_norm": 2.0235463672632874, "learning_rate": 1.7790237934583824e-05, "loss": 0.2889, "step": 3020 }, { "epoch": 0.23933452168746286, "grad_norm": 2.365147754866758, "learning_rate": 1.7788628790697404e-05, "loss": 0.3066, "step": 3021 }, { "epoch": 0.23941374529609824, "grad_norm": 2.0202504708071753, "learning_rate": 1.7787019133967252e-05, "loss": 0.299, "step": 3022 }, { "epoch": 0.23949296890473362, "grad_norm": 2.7561703045597525, "learning_rate": 1.778540896449936e-05, "loss": 0.43, "step": 3023 }, { "epoch": 0.23957219251336898, "grad_norm": 2.1886780721159167, "learning_rate": 1.778379828239975e-05, "loss": 0.3512, "step": 3024 }, { "epoch": 0.23965141612200436, "grad_norm": 2.534728288934522, "learning_rate": 1.778218708777448e-05, "loss": 0.241, "step": 3025 }, { "epoch": 0.23973063973063974, "grad_norm": 1.9806599458068723, "learning_rate": 1.7780575380729626e-05, "loss": 0.3173, "step": 3026 }, { "epoch": 0.2398098633392751, "grad_norm": 2.1423951082006987, "learning_rate": 1.777896316137133e-05, "loss": 0.2625, "step": 3027 }, { "epoch": 0.23988908694791047, "grad_norm": 2.0676769322150794, "learning_rate": 1.7777350429805734e-05, "loss": 0.3346, "step": 3028 }, { "epoch": 0.23996831055654586, "grad_norm": 2.7227403699979034, "learning_rate": 1.777573718613904e-05, "loss": 0.3865, "step": 3029 }, { "epoch": 0.24004753416518124, "grad_norm": 2.090600867992233, "learning_rate": 1.7774123430477464e-05, "loss": 0.3416, "step": 3030 }, { "epoch": 0.2401267577738166, "grad_norm": 2.2450937188770856, "learning_rate": 1.7772509162927266e-05, "loss": 0.3113, "step": 3031 }, { "epoch": 0.24020598138245197, "grad_norm": 2.2750319212185977, "learning_rate": 1.7770894383594737e-05, "loss": 0.3003, "step": 3032 }, { "epoch": 0.24028520499108735, "grad_norm": 2.243129801107735, "learning_rate": 1.7769279092586205e-05, "loss": 0.3222, "step": 3033 }, { "epoch": 0.2403644285997227, "grad_norm": 1.8822340273952287, "learning_rate": 1.776766329000803e-05, "loss": 0.2315, "step": 3034 }, { "epoch": 0.2404436522083581, "grad_norm": 2.035766777504126, "learning_rate": 1.7766046975966603e-05, "loss": 0.2886, "step": 3035 }, { "epoch": 0.24052287581699347, "grad_norm": 2.507059382117977, "learning_rate": 1.7764430150568347e-05, "loss": 0.371, "step": 3036 }, { "epoch": 0.24060209942562882, "grad_norm": 2.068633899948601, "learning_rate": 1.776281281391973e-05, "loss": 0.365, "step": 3037 }, { "epoch": 0.2406813230342642, "grad_norm": 2.063740568498424, "learning_rate": 1.776119496612724e-05, "loss": 0.3588, "step": 3038 }, { "epoch": 0.2407605466428996, "grad_norm": 2.0993081533658153, "learning_rate": 1.7759576607297405e-05, "loss": 0.3467, "step": 3039 }, { "epoch": 0.24083977025153497, "grad_norm": 2.218139493602942, "learning_rate": 1.7757957737536785e-05, "loss": 0.3084, "step": 3040 }, { "epoch": 0.24091899386017032, "grad_norm": 2.4767433228236504, "learning_rate": 1.775633835695198e-05, "loss": 0.4666, "step": 3041 }, { "epoch": 0.2409982174688057, "grad_norm": 2.3302377873706135, "learning_rate": 1.7754718465649618e-05, "loss": 0.3576, "step": 3042 }, { "epoch": 0.24107744107744108, "grad_norm": 2.1386286466831876, "learning_rate": 1.7753098063736355e-05, "loss": 0.3528, "step": 3043 }, { "epoch": 0.24115666468607644, "grad_norm": 2.3938446628579446, "learning_rate": 1.775147715131889e-05, "loss": 0.3669, "step": 3044 }, { "epoch": 0.24123588829471182, "grad_norm": 2.151310289353262, "learning_rate": 1.7749855728503952e-05, "loss": 0.266, "step": 3045 }, { "epoch": 0.2413151119033472, "grad_norm": 2.3082344293082038, "learning_rate": 1.7748233795398308e-05, "loss": 0.3789, "step": 3046 }, { "epoch": 0.24139433551198258, "grad_norm": 2.2287592132944476, "learning_rate": 1.7746611352108744e-05, "loss": 0.3057, "step": 3047 }, { "epoch": 0.24147355912061794, "grad_norm": 2.2708771080614985, "learning_rate": 1.7744988398742102e-05, "loss": 0.3123, "step": 3048 }, { "epoch": 0.24155278272925332, "grad_norm": 1.9809143706483787, "learning_rate": 1.7743364935405238e-05, "loss": 0.406, "step": 3049 }, { "epoch": 0.2416320063378887, "grad_norm": 2.455023056700634, "learning_rate": 1.7741740962205053e-05, "loss": 0.2639, "step": 3050 }, { "epoch": 0.24171122994652405, "grad_norm": 1.9763886126881152, "learning_rate": 1.7740116479248474e-05, "loss": 0.3797, "step": 3051 }, { "epoch": 0.24179045355515943, "grad_norm": 2.2670810089222986, "learning_rate": 1.773849148664247e-05, "loss": 0.3623, "step": 3052 }, { "epoch": 0.24186967716379482, "grad_norm": 2.278500974717792, "learning_rate": 1.773686598449404e-05, "loss": 0.3534, "step": 3053 }, { "epoch": 0.2419489007724302, "grad_norm": 2.3668784101903046, "learning_rate": 1.7735239972910208e-05, "loss": 0.3174, "step": 3054 }, { "epoch": 0.24202812438106555, "grad_norm": 2.3356926312568165, "learning_rate": 1.7733613451998043e-05, "loss": 0.2694, "step": 3055 }, { "epoch": 0.24210734798970093, "grad_norm": 2.3631825641365802, "learning_rate": 1.7731986421864645e-05, "loss": 0.4334, "step": 3056 }, { "epoch": 0.24218657159833631, "grad_norm": 2.2723942755041717, "learning_rate": 1.7730358882617148e-05, "loss": 0.4883, "step": 3057 }, { "epoch": 0.24226579520697167, "grad_norm": 1.8995735427260922, "learning_rate": 1.772873083436271e-05, "loss": 0.282, "step": 3058 }, { "epoch": 0.24234501881560705, "grad_norm": 1.7995097592245324, "learning_rate": 1.7727102277208538e-05, "loss": 0.2518, "step": 3059 }, { "epoch": 0.24242424242424243, "grad_norm": 2.093629870468095, "learning_rate": 1.772547321126186e-05, "loss": 0.2826, "step": 3060 }, { "epoch": 0.24250346603287778, "grad_norm": 2.0709187442149433, "learning_rate": 1.7723843636629945e-05, "loss": 0.2707, "step": 3061 }, { "epoch": 0.24258268964151317, "grad_norm": 2.1257309057385902, "learning_rate": 1.772221355342009e-05, "loss": 0.2861, "step": 3062 }, { "epoch": 0.24266191325014855, "grad_norm": 1.9676148121551202, "learning_rate": 1.7720582961739628e-05, "loss": 0.367, "step": 3063 }, { "epoch": 0.24274113685878393, "grad_norm": 1.7386659213282003, "learning_rate": 1.771895186169593e-05, "loss": 0.2479, "step": 3064 }, { "epoch": 0.24282036046741928, "grad_norm": 1.9995906052795158, "learning_rate": 1.7717320253396393e-05, "loss": 0.2695, "step": 3065 }, { "epoch": 0.24289958407605466, "grad_norm": 2.194517693241196, "learning_rate": 1.771568813694845e-05, "loss": 0.3283, "step": 3066 }, { "epoch": 0.24297880768469005, "grad_norm": 2.3772568656152013, "learning_rate": 1.771405551245957e-05, "loss": 0.2864, "step": 3067 }, { "epoch": 0.2430580312933254, "grad_norm": 2.2628394684525666, "learning_rate": 1.771242238003725e-05, "loss": 0.4026, "step": 3068 }, { "epoch": 0.24313725490196078, "grad_norm": 2.203306792565748, "learning_rate": 1.7710788739789025e-05, "loss": 0.2789, "step": 3069 }, { "epoch": 0.24321647851059616, "grad_norm": 2.147749712669287, "learning_rate": 1.7709154591822466e-05, "loss": 0.3099, "step": 3070 }, { "epoch": 0.24329570211923154, "grad_norm": 2.3707556466582265, "learning_rate": 1.770751993624517e-05, "loss": 0.3582, "step": 3071 }, { "epoch": 0.2433749257278669, "grad_norm": 2.295430963197724, "learning_rate": 1.770588477316477e-05, "loss": 0.307, "step": 3072 }, { "epoch": 0.24345414933650228, "grad_norm": 2.340305414586974, "learning_rate": 1.770424910268894e-05, "loss": 0.3244, "step": 3073 }, { "epoch": 0.24353337294513766, "grad_norm": 2.6355225182577775, "learning_rate": 1.7702612924925377e-05, "loss": 0.3358, "step": 3074 }, { "epoch": 0.24361259655377301, "grad_norm": 2.295335817741724, "learning_rate": 1.7700976239981815e-05, "loss": 0.3329, "step": 3075 }, { "epoch": 0.2436918201624084, "grad_norm": 2.0403956444284344, "learning_rate": 1.769933904796602e-05, "loss": 0.2732, "step": 3076 }, { "epoch": 0.24377104377104378, "grad_norm": 2.3723541641072567, "learning_rate": 1.76977013489858e-05, "loss": 0.3892, "step": 3077 }, { "epoch": 0.24385026737967913, "grad_norm": 1.8792625787878923, "learning_rate": 1.7696063143148982e-05, "loss": 0.2877, "step": 3078 }, { "epoch": 0.2439294909883145, "grad_norm": 2.046997372938021, "learning_rate": 1.7694424430563436e-05, "loss": 0.3901, "step": 3079 }, { "epoch": 0.2440087145969499, "grad_norm": 1.9778716937543714, "learning_rate": 1.769278521133707e-05, "loss": 0.2999, "step": 3080 }, { "epoch": 0.24408793820558528, "grad_norm": 1.7475003812751837, "learning_rate": 1.769114548557781e-05, "loss": 0.2259, "step": 3081 }, { "epoch": 0.24416716181422063, "grad_norm": 1.8632732087521158, "learning_rate": 1.768950525339362e-05, "loss": 0.2879, "step": 3082 }, { "epoch": 0.244246385422856, "grad_norm": 1.751176214358662, "learning_rate": 1.7687864514892516e-05, "loss": 0.2833, "step": 3083 }, { "epoch": 0.2443256090314914, "grad_norm": 2.5221530820825264, "learning_rate": 1.7686223270182524e-05, "loss": 0.3853, "step": 3084 }, { "epoch": 0.24440483264012675, "grad_norm": 2.2674015377016343, "learning_rate": 1.7684581519371714e-05, "loss": 0.3143, "step": 3085 }, { "epoch": 0.24448405624876213, "grad_norm": 2.296367686739741, "learning_rate": 1.768293926256819e-05, "loss": 0.4126, "step": 3086 }, { "epoch": 0.2445632798573975, "grad_norm": 1.9070545827895027, "learning_rate": 1.7681296499880077e-05, "loss": 0.295, "step": 3087 }, { "epoch": 0.2446425034660329, "grad_norm": 1.949526017970783, "learning_rate": 1.767965323141555e-05, "loss": 0.2968, "step": 3088 }, { "epoch": 0.24472172707466824, "grad_norm": 1.9152766318108931, "learning_rate": 1.7678009457282816e-05, "loss": 0.2791, "step": 3089 }, { "epoch": 0.24480095068330363, "grad_norm": 2.409317888612591, "learning_rate": 1.7676365177590097e-05, "loss": 0.2913, "step": 3090 }, { "epoch": 0.244880174291939, "grad_norm": 2.0151659724570608, "learning_rate": 1.7674720392445672e-05, "loss": 0.2866, "step": 3091 }, { "epoch": 0.24495939790057436, "grad_norm": 2.128844799648101, "learning_rate": 1.7673075101957837e-05, "loss": 0.4401, "step": 3092 }, { "epoch": 0.24503862150920974, "grad_norm": 2.589473703920038, "learning_rate": 1.7671429306234924e-05, "loss": 0.3183, "step": 3093 }, { "epoch": 0.24511784511784512, "grad_norm": 1.9695204570266884, "learning_rate": 1.7669783005385305e-05, "loss": 0.2936, "step": 3094 }, { "epoch": 0.2451970687264805, "grad_norm": 2.1502707856048096, "learning_rate": 1.766813619951738e-05, "loss": 0.3922, "step": 3095 }, { "epoch": 0.24527629233511586, "grad_norm": 2.004752127042267, "learning_rate": 1.7666488888739587e-05, "loss": 0.3082, "step": 3096 }, { "epoch": 0.24535551594375124, "grad_norm": 2.4496955910922007, "learning_rate": 1.7664841073160383e-05, "loss": 0.4009, "step": 3097 }, { "epoch": 0.24543473955238662, "grad_norm": 2.0343759382155935, "learning_rate": 1.766319275288828e-05, "loss": 0.38, "step": 3098 }, { "epoch": 0.24551396316102198, "grad_norm": 2.0328476675122316, "learning_rate": 1.7661543928031802e-05, "loss": 0.2678, "step": 3099 }, { "epoch": 0.24559318676965736, "grad_norm": 2.072807095377211, "learning_rate": 1.7659894598699527e-05, "loss": 0.3471, "step": 3100 }, { "epoch": 0.24567241037829274, "grad_norm": 1.9629062381503406, "learning_rate": 1.765824476500005e-05, "loss": 0.3397, "step": 3101 }, { "epoch": 0.2457516339869281, "grad_norm": 2.2431491849824563, "learning_rate": 1.7656594427041997e-05, "loss": 0.3726, "step": 3102 }, { "epoch": 0.24583085759556347, "grad_norm": 2.5833704438715817, "learning_rate": 1.765494358493405e-05, "loss": 0.3105, "step": 3103 }, { "epoch": 0.24591008120419885, "grad_norm": 1.7363134813358179, "learning_rate": 1.7653292238784897e-05, "loss": 0.3231, "step": 3104 }, { "epoch": 0.24598930481283424, "grad_norm": 2.0652764244106976, "learning_rate": 1.7651640388703275e-05, "loss": 0.3944, "step": 3105 }, { "epoch": 0.2460685284214696, "grad_norm": 1.9505530580115473, "learning_rate": 1.7649988034797952e-05, "loss": 0.3854, "step": 3106 }, { "epoch": 0.24614775203010497, "grad_norm": 2.435976544033609, "learning_rate": 1.7648335177177725e-05, "loss": 0.2442, "step": 3107 }, { "epoch": 0.24622697563874035, "grad_norm": 2.2550878285340716, "learning_rate": 1.764668181595143e-05, "loss": 0.3357, "step": 3108 }, { "epoch": 0.2463061992473757, "grad_norm": 2.2145810238031007, "learning_rate": 1.764502795122793e-05, "loss": 0.5642, "step": 3109 }, { "epoch": 0.2463854228560111, "grad_norm": 2.7841251270083096, "learning_rate": 1.7643373583116123e-05, "loss": 0.3687, "step": 3110 }, { "epoch": 0.24646464646464647, "grad_norm": 2.4664735729556955, "learning_rate": 1.7641718711724947e-05, "loss": 0.3189, "step": 3111 }, { "epoch": 0.24654387007328185, "grad_norm": 1.7583655464796075, "learning_rate": 1.764006333716336e-05, "loss": 0.2496, "step": 3112 }, { "epoch": 0.2466230936819172, "grad_norm": 2.0988095088574545, "learning_rate": 1.7638407459540364e-05, "loss": 0.3793, "step": 3113 }, { "epoch": 0.2467023172905526, "grad_norm": 2.385704840297801, "learning_rate": 1.7636751078964995e-05, "loss": 0.3551, "step": 3114 }, { "epoch": 0.24678154089918797, "grad_norm": 2.0017480505349097, "learning_rate": 1.763509419554631e-05, "loss": 0.309, "step": 3115 }, { "epoch": 0.24686076450782332, "grad_norm": 2.0401873798694736, "learning_rate": 1.763343680939341e-05, "loss": 0.2781, "step": 3116 }, { "epoch": 0.2469399881164587, "grad_norm": 2.029527798423958, "learning_rate": 1.7631778920615427e-05, "loss": 0.3596, "step": 3117 }, { "epoch": 0.24701921172509408, "grad_norm": 2.3325494116650636, "learning_rate": 1.7630120529321518e-05, "loss": 0.3624, "step": 3118 }, { "epoch": 0.24709843533372944, "grad_norm": 1.8482318735712697, "learning_rate": 1.7628461635620895e-05, "loss": 0.2841, "step": 3119 }, { "epoch": 0.24717765894236482, "grad_norm": 1.9049012351755752, "learning_rate": 1.7626802239622772e-05, "loss": 0.3095, "step": 3120 }, { "epoch": 0.2472568825510002, "grad_norm": 2.2047090684422312, "learning_rate": 1.7625142341436423e-05, "loss": 0.3357, "step": 3121 }, { "epoch": 0.24733610615963558, "grad_norm": 2.0725570285729544, "learning_rate": 1.762348194117114e-05, "loss": 0.2257, "step": 3122 }, { "epoch": 0.24741532976827094, "grad_norm": 2.134085836909251, "learning_rate": 1.7621821038936257e-05, "loss": 0.3571, "step": 3123 }, { "epoch": 0.24749455337690632, "grad_norm": 2.1048871477271964, "learning_rate": 1.7620159634841127e-05, "loss": 0.3935, "step": 3124 }, { "epoch": 0.2475737769855417, "grad_norm": 2.495189390379055, "learning_rate": 1.761849772899515e-05, "loss": 0.3773, "step": 3125 }, { "epoch": 0.24765300059417705, "grad_norm": 2.4230035380106365, "learning_rate": 1.7616835321507757e-05, "loss": 0.3151, "step": 3126 }, { "epoch": 0.24773222420281243, "grad_norm": 1.9173416923560322, "learning_rate": 1.761517241248841e-05, "loss": 0.1984, "step": 3127 }, { "epoch": 0.24781144781144782, "grad_norm": 2.1033695195367734, "learning_rate": 1.76135090020466e-05, "loss": 0.2874, "step": 3128 }, { "epoch": 0.2478906714200832, "grad_norm": 2.187014033601868, "learning_rate": 1.7611845090291858e-05, "loss": 0.4129, "step": 3129 }, { "epoch": 0.24796989502871855, "grad_norm": 2.2368177220868253, "learning_rate": 1.761018067733374e-05, "loss": 0.3864, "step": 3130 }, { "epoch": 0.24804911863735393, "grad_norm": 2.200626684729094, "learning_rate": 1.7608515763281843e-05, "loss": 0.351, "step": 3131 }, { "epoch": 0.24812834224598931, "grad_norm": 2.970326823529292, "learning_rate": 1.760685034824579e-05, "loss": 0.3877, "step": 3132 }, { "epoch": 0.24820756585462467, "grad_norm": 2.2838672524593404, "learning_rate": 1.760518443233525e-05, "loss": 0.3372, "step": 3133 }, { "epoch": 0.24828678946326005, "grad_norm": 2.135225685427978, "learning_rate": 1.7603518015659905e-05, "loss": 0.4513, "step": 3134 }, { "epoch": 0.24836601307189543, "grad_norm": 1.7243109491541246, "learning_rate": 1.7601851098329484e-05, "loss": 0.3649, "step": 3135 }, { "epoch": 0.24844523668053078, "grad_norm": 2.1079321137847726, "learning_rate": 1.7600183680453745e-05, "loss": 0.3258, "step": 3136 }, { "epoch": 0.24852446028916617, "grad_norm": 2.2520346222407084, "learning_rate": 1.7598515762142484e-05, "loss": 0.2979, "step": 3137 }, { "epoch": 0.24860368389780155, "grad_norm": 2.20246326597006, "learning_rate": 1.759684734350552e-05, "loss": 0.4521, "step": 3138 }, { "epoch": 0.24868290750643693, "grad_norm": 1.9578842163574657, "learning_rate": 1.759517842465271e-05, "loss": 0.2906, "step": 3139 }, { "epoch": 0.24876213111507228, "grad_norm": 1.9412696546710464, "learning_rate": 1.759350900569395e-05, "loss": 0.3337, "step": 3140 }, { "epoch": 0.24884135472370766, "grad_norm": 2.0308340112722765, "learning_rate": 1.759183908673916e-05, "loss": 0.3341, "step": 3141 }, { "epoch": 0.24892057833234305, "grad_norm": 2.194955702738754, "learning_rate": 1.759016866789829e-05, "loss": 0.2867, "step": 3142 }, { "epoch": 0.2489998019409784, "grad_norm": 2.6247322127504145, "learning_rate": 1.7588497749281338e-05, "loss": 0.3331, "step": 3143 }, { "epoch": 0.24907902554961378, "grad_norm": 2.373571487890507, "learning_rate": 1.7586826330998324e-05, "loss": 0.332, "step": 3144 }, { "epoch": 0.24915824915824916, "grad_norm": 2.101409320869915, "learning_rate": 1.7585154413159304e-05, "loss": 0.3089, "step": 3145 }, { "epoch": 0.24923747276688454, "grad_norm": 2.584851109657674, "learning_rate": 1.758348199587436e-05, "loss": 0.3053, "step": 3146 }, { "epoch": 0.2493166963755199, "grad_norm": 1.9170160278008994, "learning_rate": 1.7581809079253616e-05, "loss": 0.297, "step": 3147 }, { "epoch": 0.24939591998415528, "grad_norm": 2.276804387586002, "learning_rate": 1.7580135663407226e-05, "loss": 0.4009, "step": 3148 }, { "epoch": 0.24947514359279066, "grad_norm": 2.269897095319302, "learning_rate": 1.7578461748445374e-05, "loss": 0.3911, "step": 3149 }, { "epoch": 0.24955436720142601, "grad_norm": 2.194189906855524, "learning_rate": 1.7576787334478283e-05, "loss": 0.3068, "step": 3150 }, { "epoch": 0.2496335908100614, "grad_norm": 1.8365439412557574, "learning_rate": 1.7575112421616203e-05, "loss": 0.2874, "step": 3151 }, { "epoch": 0.24971281441869678, "grad_norm": 2.3217891060378557, "learning_rate": 1.757343700996942e-05, "loss": 0.3028, "step": 3152 }, { "epoch": 0.24979203802733216, "grad_norm": 2.3486808110748414, "learning_rate": 1.757176109964825e-05, "loss": 0.3674, "step": 3153 }, { "epoch": 0.2498712616359675, "grad_norm": 2.492330136611301, "learning_rate": 1.7570084690763042e-05, "loss": 0.4028, "step": 3154 }, { "epoch": 0.2499504852446029, "grad_norm": 1.991474236755262, "learning_rate": 1.7568407783424187e-05, "loss": 0.3106, "step": 3155 }, { "epoch": 0.2500297088532383, "grad_norm": 2.0594099059834066, "learning_rate": 1.7566730377742093e-05, "loss": 0.3264, "step": 3156 }, { "epoch": 0.25010893246187366, "grad_norm": 1.9855669044250501, "learning_rate": 1.7565052473827213e-05, "loss": 0.283, "step": 3157 }, { "epoch": 0.25018815607050904, "grad_norm": 1.9828299502358042, "learning_rate": 1.7563374071790028e-05, "loss": 0.2596, "step": 3158 }, { "epoch": 0.25026737967914436, "grad_norm": 2.528195858559667, "learning_rate": 1.7561695171741054e-05, "loss": 0.3769, "step": 3159 }, { "epoch": 0.25034660328777975, "grad_norm": 1.788766226959051, "learning_rate": 1.7560015773790837e-05, "loss": 0.3699, "step": 3160 }, { "epoch": 0.2504258268964151, "grad_norm": 2.210841811229164, "learning_rate": 1.7558335878049955e-05, "loss": 0.4443, "step": 3161 }, { "epoch": 0.2505050505050505, "grad_norm": 1.860121709352534, "learning_rate": 1.7556655484629028e-05, "loss": 0.3621, "step": 3162 }, { "epoch": 0.2505842741136859, "grad_norm": 2.663115144540321, "learning_rate": 1.7554974593638697e-05, "loss": 0.3224, "step": 3163 }, { "epoch": 0.25066349772232127, "grad_norm": 1.670794074242099, "learning_rate": 1.755329320518964e-05, "loss": 0.3041, "step": 3164 }, { "epoch": 0.25074272133095665, "grad_norm": 2.152913748232404, "learning_rate": 1.7551611319392573e-05, "loss": 0.375, "step": 3165 }, { "epoch": 0.250821944939592, "grad_norm": 1.5217735878440846, "learning_rate": 1.7549928936358232e-05, "loss": 0.2081, "step": 3166 }, { "epoch": 0.25090116854822736, "grad_norm": 2.3463524928828225, "learning_rate": 1.75482460561974e-05, "loss": 0.3585, "step": 3167 }, { "epoch": 0.25098039215686274, "grad_norm": 2.572257685272147, "learning_rate": 1.7546562679020884e-05, "loss": 0.2567, "step": 3168 }, { "epoch": 0.2510596157654981, "grad_norm": 2.392915260597958, "learning_rate": 1.7544878804939528e-05, "loss": 0.2652, "step": 3169 }, { "epoch": 0.2511388393741335, "grad_norm": 1.9444629666606124, "learning_rate": 1.7543194434064208e-05, "loss": 0.3158, "step": 3170 }, { "epoch": 0.2512180629827689, "grad_norm": 1.8543573155822384, "learning_rate": 1.754150956650583e-05, "loss": 0.3228, "step": 3171 }, { "epoch": 0.2512972865914042, "grad_norm": 2.066453649492031, "learning_rate": 1.753982420237533e-05, "loss": 0.3562, "step": 3172 }, { "epoch": 0.2513765102000396, "grad_norm": 2.3144003054391384, "learning_rate": 1.753813834178369e-05, "loss": 0.4167, "step": 3173 }, { "epoch": 0.251455733808675, "grad_norm": 1.8389229039143093, "learning_rate": 1.753645198484191e-05, "loss": 0.2271, "step": 3174 }, { "epoch": 0.25153495741731036, "grad_norm": 1.7919746495512738, "learning_rate": 1.753476513166103e-05, "loss": 0.3353, "step": 3175 }, { "epoch": 0.25161418102594574, "grad_norm": 1.7598198919129016, "learning_rate": 1.7533077782352123e-05, "loss": 0.3617, "step": 3176 }, { "epoch": 0.2516934046345811, "grad_norm": 1.8798753175127672, "learning_rate": 1.753138993702629e-05, "loss": 0.2751, "step": 3177 }, { "epoch": 0.2517726282432165, "grad_norm": 2.189914940785449, "learning_rate": 1.752970159579467e-05, "loss": 0.3447, "step": 3178 }, { "epoch": 0.2518518518518518, "grad_norm": 2.114285745548243, "learning_rate": 1.7528012758768426e-05, "loss": 0.3189, "step": 3179 }, { "epoch": 0.2519310754604872, "grad_norm": 2.2412589062856867, "learning_rate": 1.7526323426058767e-05, "loss": 0.3778, "step": 3180 }, { "epoch": 0.2520102990691226, "grad_norm": 2.1606364122220993, "learning_rate": 1.7524633597776923e-05, "loss": 0.4242, "step": 3181 }, { "epoch": 0.25208952267775797, "grad_norm": 1.9567124486250131, "learning_rate": 1.7522943274034165e-05, "loss": 0.3176, "step": 3182 }, { "epoch": 0.25216874628639335, "grad_norm": 1.8611110382284815, "learning_rate": 1.752125245494179e-05, "loss": 0.3322, "step": 3183 }, { "epoch": 0.25224796989502873, "grad_norm": 1.9982039060348933, "learning_rate": 1.751956114061113e-05, "loss": 0.3835, "step": 3184 }, { "epoch": 0.2523271935036641, "grad_norm": 2.8968457584128906, "learning_rate": 1.751786933115355e-05, "loss": 0.4097, "step": 3185 }, { "epoch": 0.25240641711229944, "grad_norm": 2.2111841014606206, "learning_rate": 1.751617702668045e-05, "loss": 0.2665, "step": 3186 }, { "epoch": 0.2524856407209348, "grad_norm": 2.425423174013597, "learning_rate": 1.751448422730326e-05, "loss": 0.3385, "step": 3187 }, { "epoch": 0.2525648643295702, "grad_norm": 2.180026919812749, "learning_rate": 1.7512790933133435e-05, "loss": 0.3916, "step": 3188 }, { "epoch": 0.2526440879382056, "grad_norm": 2.0805844298936225, "learning_rate": 1.7511097144282482e-05, "loss": 0.3591, "step": 3189 }, { "epoch": 0.25272331154684097, "grad_norm": 2.2950658830861177, "learning_rate": 1.7509402860861923e-05, "loss": 0.3946, "step": 3190 }, { "epoch": 0.25280253515547635, "grad_norm": 2.3868853083364723, "learning_rate": 1.7507708082983313e-05, "loss": 0.3427, "step": 3191 }, { "epoch": 0.25288175876411173, "grad_norm": 2.402135313830431, "learning_rate": 1.7506012810758254e-05, "loss": 0.4316, "step": 3192 }, { "epoch": 0.25296098237274706, "grad_norm": 1.932140722868322, "learning_rate": 1.750431704429837e-05, "loss": 0.3912, "step": 3193 }, { "epoch": 0.25304020598138244, "grad_norm": 2.0683311609142434, "learning_rate": 1.7502620783715316e-05, "loss": 0.3283, "step": 3194 }, { "epoch": 0.2531194295900178, "grad_norm": 2.458022152617356, "learning_rate": 1.7500924029120782e-05, "loss": 0.3316, "step": 3195 }, { "epoch": 0.2531986531986532, "grad_norm": 2.165077050442694, "learning_rate": 1.7499226780626494e-05, "loss": 0.3481, "step": 3196 }, { "epoch": 0.2532778768072886, "grad_norm": 2.0504403696436744, "learning_rate": 1.7497529038344208e-05, "loss": 0.2968, "step": 3197 }, { "epoch": 0.25335710041592396, "grad_norm": 2.0919997919238678, "learning_rate": 1.7495830802385707e-05, "loss": 0.3292, "step": 3198 }, { "epoch": 0.25343632402455935, "grad_norm": 2.1356896504056566, "learning_rate": 1.7494132072862818e-05, "loss": 0.3231, "step": 3199 }, { "epoch": 0.25351554763319467, "grad_norm": 2.0703937912551407, "learning_rate": 1.7492432849887387e-05, "loss": 0.3043, "step": 3200 }, { "epoch": 0.25359477124183005, "grad_norm": 2.283695389893035, "learning_rate": 1.749073313357131e-05, "loss": 0.3684, "step": 3201 }, { "epoch": 0.25367399485046543, "grad_norm": 1.6313716388147497, "learning_rate": 1.7489032924026496e-05, "loss": 0.2225, "step": 3202 }, { "epoch": 0.2537532184591008, "grad_norm": 1.7962770489180542, "learning_rate": 1.74873322213649e-05, "loss": 0.2627, "step": 3203 }, { "epoch": 0.2538324420677362, "grad_norm": 2.105269362629646, "learning_rate": 1.7485631025698504e-05, "loss": 0.3818, "step": 3204 }, { "epoch": 0.2539116656763716, "grad_norm": 2.4373811253813904, "learning_rate": 1.7483929337139326e-05, "loss": 0.3456, "step": 3205 }, { "epoch": 0.2539908892850069, "grad_norm": 1.8757043676973533, "learning_rate": 1.748222715579941e-05, "loss": 0.2276, "step": 3206 }, { "epoch": 0.2540701128936423, "grad_norm": 2.212246242481772, "learning_rate": 1.7480524481790835e-05, "loss": 0.3669, "step": 3207 }, { "epoch": 0.25414933650227767, "grad_norm": 1.9697219569320832, "learning_rate": 1.7478821315225717e-05, "loss": 0.2792, "step": 3208 }, { "epoch": 0.25422856011091305, "grad_norm": 2.428866513284939, "learning_rate": 1.7477117656216206e-05, "loss": 0.5144, "step": 3209 }, { "epoch": 0.25430778371954843, "grad_norm": 1.9880183386562895, "learning_rate": 1.7475413504874474e-05, "loss": 0.2765, "step": 3210 }, { "epoch": 0.2543870073281838, "grad_norm": 2.3965006100719215, "learning_rate": 1.7473708861312727e-05, "loss": 0.3426, "step": 3211 }, { "epoch": 0.2544662309368192, "grad_norm": 2.14779450836371, "learning_rate": 1.7472003725643215e-05, "loss": 0.3278, "step": 3212 }, { "epoch": 0.2545454545454545, "grad_norm": 1.9644775960544885, "learning_rate": 1.747029809797821e-05, "loss": 0.2905, "step": 3213 }, { "epoch": 0.2546246781540899, "grad_norm": 1.7695218249493199, "learning_rate": 1.7468591978430024e-05, "loss": 0.2559, "step": 3214 }, { "epoch": 0.2547039017627253, "grad_norm": 1.9945889070636953, "learning_rate": 1.746688536711099e-05, "loss": 0.2843, "step": 3215 }, { "epoch": 0.25478312537136066, "grad_norm": 2.0425649140663507, "learning_rate": 1.7465178264133482e-05, "loss": 0.3341, "step": 3216 }, { "epoch": 0.25486234897999605, "grad_norm": 2.0042442532245652, "learning_rate": 1.7463470669609907e-05, "loss": 0.3296, "step": 3217 }, { "epoch": 0.2549415725886314, "grad_norm": 2.081881892438971, "learning_rate": 1.74617625836527e-05, "loss": 0.3071, "step": 3218 }, { "epoch": 0.2550207961972668, "grad_norm": 2.231791062220781, "learning_rate": 1.746005400637433e-05, "loss": 0.3477, "step": 3219 }, { "epoch": 0.25510001980590213, "grad_norm": 1.9695744281777843, "learning_rate": 1.74583449378873e-05, "loss": 0.2839, "step": 3220 }, { "epoch": 0.2551792434145375, "grad_norm": 1.7860489643023751, "learning_rate": 1.7456635378304143e-05, "loss": 0.3131, "step": 3221 }, { "epoch": 0.2552584670231729, "grad_norm": 2.24608401418775, "learning_rate": 1.7454925327737426e-05, "loss": 0.3816, "step": 3222 }, { "epoch": 0.2553376906318083, "grad_norm": 2.0872890517461014, "learning_rate": 1.7453214786299746e-05, "loss": 0.3019, "step": 3223 }, { "epoch": 0.25541691424044366, "grad_norm": 2.0836811010193275, "learning_rate": 1.7451503754103735e-05, "loss": 0.3271, "step": 3224 }, { "epoch": 0.25549613784907904, "grad_norm": 2.894784140181905, "learning_rate": 1.7449792231262056e-05, "loss": 0.3491, "step": 3225 }, { "epoch": 0.2555753614577144, "grad_norm": 2.443183334778466, "learning_rate": 1.7448080217887403e-05, "loss": 0.3695, "step": 3226 }, { "epoch": 0.25565458506634975, "grad_norm": 2.2047669206530878, "learning_rate": 1.7446367714092508e-05, "loss": 0.3523, "step": 3227 }, { "epoch": 0.25573380867498513, "grad_norm": 1.8939969596676325, "learning_rate": 1.7444654719990128e-05, "loss": 0.2027, "step": 3228 }, { "epoch": 0.2558130322836205, "grad_norm": 2.105530670544525, "learning_rate": 1.7442941235693058e-05, "loss": 0.4269, "step": 3229 }, { "epoch": 0.2558922558922559, "grad_norm": 2.1493057193863403, "learning_rate": 1.744122726131412e-05, "loss": 0.3556, "step": 3230 }, { "epoch": 0.2559714795008913, "grad_norm": 2.5560916004327727, "learning_rate": 1.7439512796966165e-05, "loss": 0.384, "step": 3231 }, { "epoch": 0.25605070310952666, "grad_norm": 2.0765856221477232, "learning_rate": 1.7437797842762098e-05, "loss": 0.3012, "step": 3232 }, { "epoch": 0.25612992671816204, "grad_norm": 2.2611887844681755, "learning_rate": 1.743608239881483e-05, "loss": 0.2891, "step": 3233 }, { "epoch": 0.25620915032679736, "grad_norm": 1.5392627594504231, "learning_rate": 1.7434366465237312e-05, "loss": 0.2233, "step": 3234 }, { "epoch": 0.25628837393543275, "grad_norm": 2.0866414541682583, "learning_rate": 1.7432650042142535e-05, "loss": 0.3982, "step": 3235 }, { "epoch": 0.2563675975440681, "grad_norm": 2.43862585588995, "learning_rate": 1.743093312964352e-05, "loss": 0.4161, "step": 3236 }, { "epoch": 0.2564468211527035, "grad_norm": 2.207354380002613, "learning_rate": 1.742921572785331e-05, "loss": 0.4492, "step": 3237 }, { "epoch": 0.2565260447613389, "grad_norm": 1.7832572052544342, "learning_rate": 1.7427497836884995e-05, "loss": 0.3152, "step": 3238 }, { "epoch": 0.25660526836997427, "grad_norm": 2.0479439293624937, "learning_rate": 1.7425779456851683e-05, "loss": 0.272, "step": 3239 }, { "epoch": 0.25668449197860965, "grad_norm": 2.1496191465684693, "learning_rate": 1.7424060587866526e-05, "loss": 0.4464, "step": 3240 }, { "epoch": 0.256763715587245, "grad_norm": 2.535979390949642, "learning_rate": 1.74223412300427e-05, "loss": 0.3754, "step": 3241 }, { "epoch": 0.25684293919588036, "grad_norm": 1.8068336484004952, "learning_rate": 1.7420621383493423e-05, "loss": 0.2138, "step": 3242 }, { "epoch": 0.25692216280451574, "grad_norm": 2.1913731656884368, "learning_rate": 1.7418901048331927e-05, "loss": 0.3604, "step": 3243 }, { "epoch": 0.2570013864131511, "grad_norm": 2.3797284152895735, "learning_rate": 1.7417180224671497e-05, "loss": 0.3969, "step": 3244 }, { "epoch": 0.2570806100217865, "grad_norm": 1.8754716905679043, "learning_rate": 1.741545891262544e-05, "loss": 0.3134, "step": 3245 }, { "epoch": 0.2571598336304219, "grad_norm": 2.302959913561448, "learning_rate": 1.7413737112307092e-05, "loss": 0.4423, "step": 3246 }, { "epoch": 0.2572390572390572, "grad_norm": 1.7052146413781168, "learning_rate": 1.741201482382983e-05, "loss": 0.3037, "step": 3247 }, { "epoch": 0.2573182808476926, "grad_norm": 1.943526165275058, "learning_rate": 1.7410292047307054e-05, "loss": 0.3148, "step": 3248 }, { "epoch": 0.257397504456328, "grad_norm": 1.8051129337523533, "learning_rate": 1.7408568782852204e-05, "loss": 0.258, "step": 3249 }, { "epoch": 0.25747672806496336, "grad_norm": 2.330297661958351, "learning_rate": 1.7406845030578747e-05, "loss": 0.4081, "step": 3250 }, { "epoch": 0.25755595167359874, "grad_norm": 2.1945372742787206, "learning_rate": 1.7405120790600185e-05, "loss": 0.2748, "step": 3251 }, { "epoch": 0.2576351752822341, "grad_norm": 2.038801311904655, "learning_rate": 1.740339606303005e-05, "loss": 0.315, "step": 3252 }, { "epoch": 0.2577143988908695, "grad_norm": 1.5262391755761546, "learning_rate": 1.7401670847981906e-05, "loss": 0.2739, "step": 3253 }, { "epoch": 0.2577936224995048, "grad_norm": 1.98721405649299, "learning_rate": 1.7399945145569353e-05, "loss": 0.3137, "step": 3254 }, { "epoch": 0.2578728461081402, "grad_norm": 2.1136067175226882, "learning_rate": 1.7398218955906017e-05, "loss": 0.323, "step": 3255 }, { "epoch": 0.2579520697167756, "grad_norm": 2.1009780117040586, "learning_rate": 1.7396492279105562e-05, "loss": 0.3522, "step": 3256 }, { "epoch": 0.25803129332541097, "grad_norm": 1.8529069187609732, "learning_rate": 1.7394765115281678e-05, "loss": 0.3032, "step": 3257 }, { "epoch": 0.25811051693404635, "grad_norm": 2.260794264150886, "learning_rate": 1.7393037464548094e-05, "loss": 0.2797, "step": 3258 }, { "epoch": 0.25818974054268173, "grad_norm": 2.2931706300882664, "learning_rate": 1.7391309327018566e-05, "loss": 0.2881, "step": 3259 }, { "epoch": 0.2582689641513171, "grad_norm": 1.6868311582189306, "learning_rate": 1.7389580702806884e-05, "loss": 0.2794, "step": 3260 }, { "epoch": 0.25834818775995244, "grad_norm": 2.392280611340802, "learning_rate": 1.7387851592026868e-05, "loss": 0.3857, "step": 3261 }, { "epoch": 0.2584274113685878, "grad_norm": 2.0771443378287326, "learning_rate": 1.738612199479237e-05, "loss": 0.3751, "step": 3262 }, { "epoch": 0.2585066349772232, "grad_norm": 2.413822918861285, "learning_rate": 1.7384391911217283e-05, "loss": 0.4064, "step": 3263 }, { "epoch": 0.2585858585858586, "grad_norm": 2.062026400168057, "learning_rate": 1.738266134141552e-05, "loss": 0.4244, "step": 3264 }, { "epoch": 0.25866508219449397, "grad_norm": 2.4641907316013016, "learning_rate": 1.738093028550103e-05, "loss": 0.4091, "step": 3265 }, { "epoch": 0.25874430580312935, "grad_norm": 2.2061444371890433, "learning_rate": 1.7379198743587794e-05, "loss": 0.3615, "step": 3266 }, { "epoch": 0.25882352941176473, "grad_norm": 2.0898042935744257, "learning_rate": 1.7377466715789828e-05, "loss": 0.271, "step": 3267 }, { "epoch": 0.25890275302040006, "grad_norm": 1.9304710185047331, "learning_rate": 1.7375734202221174e-05, "loss": 0.2474, "step": 3268 }, { "epoch": 0.25898197662903544, "grad_norm": 1.9235989854758635, "learning_rate": 1.7374001202995918e-05, "loss": 0.2316, "step": 3269 }, { "epoch": 0.2590612002376708, "grad_norm": 1.8484448547070276, "learning_rate": 1.7372267718228163e-05, "loss": 0.2325, "step": 3270 }, { "epoch": 0.2591404238463062, "grad_norm": 2.5725275746219034, "learning_rate": 1.7370533748032047e-05, "loss": 0.2746, "step": 3271 }, { "epoch": 0.2592196474549416, "grad_norm": 1.8631633864043184, "learning_rate": 1.7368799292521754e-05, "loss": 0.3036, "step": 3272 }, { "epoch": 0.25929887106357696, "grad_norm": 2.1290448621478633, "learning_rate": 1.736706435181148e-05, "loss": 0.4418, "step": 3273 }, { "epoch": 0.25937809467221234, "grad_norm": 1.9321493077490677, "learning_rate": 1.736532892601547e-05, "loss": 0.3132, "step": 3274 }, { "epoch": 0.25945731828084767, "grad_norm": 2.187271121697484, "learning_rate": 1.7363593015247987e-05, "loss": 0.3549, "step": 3275 }, { "epoch": 0.25953654188948305, "grad_norm": 1.8224622208624737, "learning_rate": 1.7361856619623338e-05, "loss": 0.2881, "step": 3276 }, { "epoch": 0.25961576549811843, "grad_norm": 1.6987950303485146, "learning_rate": 1.736011973925585e-05, "loss": 0.2133, "step": 3277 }, { "epoch": 0.2596949891067538, "grad_norm": 2.3325877398108625, "learning_rate": 1.7358382374259895e-05, "loss": 0.3981, "step": 3278 }, { "epoch": 0.2597742127153892, "grad_norm": 3.2234901755688954, "learning_rate": 1.7356644524749867e-05, "loss": 0.4248, "step": 3279 }, { "epoch": 0.2598534363240246, "grad_norm": 1.9142990830082351, "learning_rate": 1.7354906190840194e-05, "loss": 0.3691, "step": 3280 }, { "epoch": 0.25993265993265996, "grad_norm": 2.0571695158798238, "learning_rate": 1.7353167372645337e-05, "loss": 0.3494, "step": 3281 }, { "epoch": 0.2600118835412953, "grad_norm": 1.8267685585052116, "learning_rate": 1.735142807027979e-05, "loss": 0.3425, "step": 3282 }, { "epoch": 0.26009110714993067, "grad_norm": 2.2844424109868924, "learning_rate": 1.734968828385808e-05, "loss": 0.3369, "step": 3283 }, { "epoch": 0.26017033075856605, "grad_norm": 1.8169862733311553, "learning_rate": 1.7347948013494758e-05, "loss": 0.2921, "step": 3284 }, { "epoch": 0.26024955436720143, "grad_norm": 1.9677950581977128, "learning_rate": 1.7346207259304415e-05, "loss": 0.3779, "step": 3285 }, { "epoch": 0.2603287779758368, "grad_norm": 1.8586838384070026, "learning_rate": 1.7344466021401673e-05, "loss": 0.3858, "step": 3286 }, { "epoch": 0.2604080015844722, "grad_norm": 2.102587123892838, "learning_rate": 1.734272429990118e-05, "loss": 0.2525, "step": 3287 }, { "epoch": 0.2604872251931075, "grad_norm": 2.0240899817033053, "learning_rate": 1.7340982094917627e-05, "loss": 0.3671, "step": 3288 }, { "epoch": 0.2605664488017429, "grad_norm": 2.1359480032432083, "learning_rate": 1.7339239406565723e-05, "loss": 0.2414, "step": 3289 }, { "epoch": 0.2606456724103783, "grad_norm": 1.8244353527446417, "learning_rate": 1.733749623496022e-05, "loss": 0.2628, "step": 3290 }, { "epoch": 0.26072489601901366, "grad_norm": 1.995623870348072, "learning_rate": 1.7335752580215898e-05, "loss": 0.2407, "step": 3291 }, { "epoch": 0.26080411962764904, "grad_norm": 1.9265589608442888, "learning_rate": 1.733400844244756e-05, "loss": 0.3299, "step": 3292 }, { "epoch": 0.2608833432362844, "grad_norm": 2.0373546699445115, "learning_rate": 1.733226382177006e-05, "loss": 0.277, "step": 3293 }, { "epoch": 0.2609625668449198, "grad_norm": 2.443805810877364, "learning_rate": 1.7330518718298263e-05, "loss": 0.352, "step": 3294 }, { "epoch": 0.26104179045355513, "grad_norm": 2.102354642128771, "learning_rate": 1.7328773132147086e-05, "loss": 0.335, "step": 3295 }, { "epoch": 0.2611210140621905, "grad_norm": 2.1423854862913654, "learning_rate": 1.732702706343146e-05, "loss": 0.388, "step": 3296 }, { "epoch": 0.2612002376708259, "grad_norm": 2.0592330857010164, "learning_rate": 1.7325280512266357e-05, "loss": 0.3831, "step": 3297 }, { "epoch": 0.2612794612794613, "grad_norm": 2.0217267599216697, "learning_rate": 1.7323533478766777e-05, "loss": 0.2938, "step": 3298 }, { "epoch": 0.26135868488809666, "grad_norm": 2.504916276778093, "learning_rate": 1.732178596304776e-05, "loss": 0.3584, "step": 3299 }, { "epoch": 0.26143790849673204, "grad_norm": 1.8943535790300179, "learning_rate": 1.7320037965224365e-05, "loss": 0.2428, "step": 3300 }, { "epoch": 0.2615171321053674, "grad_norm": 1.7202663161805765, "learning_rate": 1.731828948541169e-05, "loss": 0.2775, "step": 3301 }, { "epoch": 0.26159635571400275, "grad_norm": 2.067244447280836, "learning_rate": 1.731654052372487e-05, "loss": 0.2909, "step": 3302 }, { "epoch": 0.26167557932263813, "grad_norm": 1.8371874674562263, "learning_rate": 1.731479108027906e-05, "loss": 0.368, "step": 3303 }, { "epoch": 0.2617548029312735, "grad_norm": 1.9860994597058739, "learning_rate": 1.7313041155189454e-05, "loss": 0.4676, "step": 3304 }, { "epoch": 0.2618340265399089, "grad_norm": 1.9991120807496603, "learning_rate": 1.7311290748571273e-05, "loss": 0.3268, "step": 3305 }, { "epoch": 0.2619132501485443, "grad_norm": 2.0800115964919854, "learning_rate": 1.7309539860539783e-05, "loss": 0.306, "step": 3306 }, { "epoch": 0.26199247375717966, "grad_norm": 1.7009326575219386, "learning_rate": 1.7307788491210257e-05, "loss": 0.2824, "step": 3307 }, { "epoch": 0.26207169736581504, "grad_norm": 2.1062715591386185, "learning_rate": 1.7306036640698024e-05, "loss": 0.2678, "step": 3308 }, { "epoch": 0.26215092097445036, "grad_norm": 2.185510246191739, "learning_rate": 1.7304284309118436e-05, "loss": 0.4491, "step": 3309 }, { "epoch": 0.26223014458308574, "grad_norm": 2.1919386935161085, "learning_rate": 1.7302531496586866e-05, "loss": 0.3958, "step": 3310 }, { "epoch": 0.2623093681917211, "grad_norm": 1.934797454249626, "learning_rate": 1.730077820321874e-05, "loss": 0.3354, "step": 3311 }, { "epoch": 0.2623885918003565, "grad_norm": 1.799694206858261, "learning_rate": 1.7299024429129497e-05, "loss": 0.2744, "step": 3312 }, { "epoch": 0.2624678154089919, "grad_norm": 2.038853195647611, "learning_rate": 1.7297270174434613e-05, "loss": 0.3197, "step": 3313 }, { "epoch": 0.26254703901762727, "grad_norm": 2.116760797685011, "learning_rate": 1.7295515439249608e-05, "loss": 0.3337, "step": 3314 }, { "epoch": 0.26262626262626265, "grad_norm": 2.4294043123625544, "learning_rate": 1.7293760223690008e-05, "loss": 0.4728, "step": 3315 }, { "epoch": 0.262705486234898, "grad_norm": 1.8635766098439808, "learning_rate": 1.729200452787139e-05, "loss": 0.3491, "step": 3316 }, { "epoch": 0.26278470984353336, "grad_norm": 1.7346554430970518, "learning_rate": 1.729024835190937e-05, "loss": 0.2755, "step": 3317 }, { "epoch": 0.26286393345216874, "grad_norm": 1.942318264523939, "learning_rate": 1.7288491695919567e-05, "loss": 0.2305, "step": 3318 }, { "epoch": 0.2629431570608041, "grad_norm": 2.3059979514013382, "learning_rate": 1.728673456001766e-05, "loss": 0.2954, "step": 3319 }, { "epoch": 0.2630223806694395, "grad_norm": 1.9388207109981859, "learning_rate": 1.728497694431934e-05, "loss": 0.2637, "step": 3320 }, { "epoch": 0.2631016042780749, "grad_norm": 2.0770342811848495, "learning_rate": 1.7283218848940344e-05, "loss": 0.3618, "step": 3321 }, { "epoch": 0.26318082788671027, "grad_norm": 2.1631574225529735, "learning_rate": 1.728146027399643e-05, "loss": 0.3476, "step": 3322 }, { "epoch": 0.2632600514953456, "grad_norm": 2.038833819636668, "learning_rate": 1.7279701219603394e-05, "loss": 0.3631, "step": 3323 }, { "epoch": 0.263339275103981, "grad_norm": 2.132490739011031, "learning_rate": 1.727794168587706e-05, "loss": 0.2823, "step": 3324 }, { "epoch": 0.26341849871261636, "grad_norm": 2.0932111609772344, "learning_rate": 1.7276181672933287e-05, "loss": 0.3566, "step": 3325 }, { "epoch": 0.26349772232125174, "grad_norm": 2.158072948934231, "learning_rate": 1.7274421180887958e-05, "loss": 0.3005, "step": 3326 }, { "epoch": 0.2635769459298871, "grad_norm": 2.0368869102115577, "learning_rate": 1.7272660209857e-05, "loss": 0.3139, "step": 3327 }, { "epoch": 0.2636561695385225, "grad_norm": 2.215593103630561, "learning_rate": 1.727089875995636e-05, "loss": 0.3541, "step": 3328 }, { "epoch": 0.2637353931471578, "grad_norm": 2.697015147530889, "learning_rate": 1.726913683130202e-05, "loss": 0.4182, "step": 3329 }, { "epoch": 0.2638146167557932, "grad_norm": 2.5116736595514166, "learning_rate": 1.7267374424009998e-05, "loss": 0.3195, "step": 3330 }, { "epoch": 0.2638938403644286, "grad_norm": 2.3595958089491433, "learning_rate": 1.726561153819634e-05, "loss": 0.3875, "step": 3331 }, { "epoch": 0.26397306397306397, "grad_norm": 1.901354202275452, "learning_rate": 1.7263848173977122e-05, "loss": 0.2701, "step": 3332 }, { "epoch": 0.26405228758169935, "grad_norm": 2.522415618531674, "learning_rate": 1.726208433146845e-05, "loss": 0.3083, "step": 3333 }, { "epoch": 0.26413151119033473, "grad_norm": 1.9211384918597951, "learning_rate": 1.726032001078647e-05, "loss": 0.2899, "step": 3334 }, { "epoch": 0.2642107347989701, "grad_norm": 1.9647184220259637, "learning_rate": 1.725855521204735e-05, "loss": 0.2816, "step": 3335 }, { "epoch": 0.26428995840760544, "grad_norm": 2.165193659503709, "learning_rate": 1.7256789935367296e-05, "loss": 0.2738, "step": 3336 }, { "epoch": 0.2643691820162408, "grad_norm": 1.9607703731106676, "learning_rate": 1.7255024180862546e-05, "loss": 0.2549, "step": 3337 }, { "epoch": 0.2644484056248762, "grad_norm": 2.0194548077496033, "learning_rate": 1.7253257948649357e-05, "loss": 0.2944, "step": 3338 }, { "epoch": 0.2645276292335116, "grad_norm": 1.8841420416239127, "learning_rate": 1.7251491238844038e-05, "loss": 0.2976, "step": 3339 }, { "epoch": 0.26460685284214697, "grad_norm": 1.7858477313144323, "learning_rate": 1.7249724051562905e-05, "loss": 0.2512, "step": 3340 }, { "epoch": 0.26468607645078235, "grad_norm": 2.128248672173283, "learning_rate": 1.7247956386922334e-05, "loss": 0.2976, "step": 3341 }, { "epoch": 0.26476530005941773, "grad_norm": 2.2043898618326576, "learning_rate": 1.7246188245038705e-05, "loss": 0.3567, "step": 3342 }, { "epoch": 0.26484452366805306, "grad_norm": 2.35282305092463, "learning_rate": 1.7244419626028454e-05, "loss": 0.4477, "step": 3343 }, { "epoch": 0.26492374727668844, "grad_norm": 1.617635459364415, "learning_rate": 1.724265053000802e-05, "loss": 0.2292, "step": 3344 }, { "epoch": 0.2650029708853238, "grad_norm": 2.125265322866548, "learning_rate": 1.7240880957093903e-05, "loss": 0.2552, "step": 3345 }, { "epoch": 0.2650821944939592, "grad_norm": 1.7969324592980613, "learning_rate": 1.7239110907402615e-05, "loss": 0.2744, "step": 3346 }, { "epoch": 0.2651614181025946, "grad_norm": 2.391008221371728, "learning_rate": 1.72373403810507e-05, "loss": 0.3207, "step": 3347 }, { "epoch": 0.26524064171122996, "grad_norm": 2.1596446537617835, "learning_rate": 1.7235569378154752e-05, "loss": 0.2656, "step": 3348 }, { "epoch": 0.26531986531986534, "grad_norm": 1.893349642320206, "learning_rate": 1.7233797898831376e-05, "loss": 0.3016, "step": 3349 }, { "epoch": 0.26539908892850067, "grad_norm": 2.0026866978649767, "learning_rate": 1.7232025943197213e-05, "loss": 0.286, "step": 3350 }, { "epoch": 0.26547831253713605, "grad_norm": 1.7355436574030136, "learning_rate": 1.723025351136894e-05, "loss": 0.2244, "step": 3351 }, { "epoch": 0.26555753614577143, "grad_norm": 2.2079376495757885, "learning_rate": 1.722848060346326e-05, "loss": 0.3482, "step": 3352 }, { "epoch": 0.2656367597544068, "grad_norm": 1.6954301381879433, "learning_rate": 1.7226707219596918e-05, "loss": 0.3574, "step": 3353 }, { "epoch": 0.2657159833630422, "grad_norm": 2.777001468568871, "learning_rate": 1.7224933359886676e-05, "loss": 0.4344, "step": 3354 }, { "epoch": 0.2657952069716776, "grad_norm": 2.9241073423726687, "learning_rate": 1.7223159024449338e-05, "loss": 0.4517, "step": 3355 }, { "epoch": 0.26587443058031296, "grad_norm": 2.399511530399849, "learning_rate": 1.7221384213401732e-05, "loss": 0.3225, "step": 3356 }, { "epoch": 0.2659536541889483, "grad_norm": 1.728416325304639, "learning_rate": 1.7219608926860726e-05, "loss": 0.2689, "step": 3357 }, { "epoch": 0.26603287779758367, "grad_norm": 1.735648579420402, "learning_rate": 1.721783316494321e-05, "loss": 0.3004, "step": 3358 }, { "epoch": 0.26611210140621905, "grad_norm": 2.3216093410646064, "learning_rate": 1.7216056927766106e-05, "loss": 0.3843, "step": 3359 }, { "epoch": 0.26619132501485443, "grad_norm": 2.761399788877409, "learning_rate": 1.721428021544638e-05, "loss": 0.3857, "step": 3360 }, { "epoch": 0.2662705486234898, "grad_norm": 2.2120445712875902, "learning_rate": 1.7212503028101012e-05, "loss": 0.4201, "step": 3361 }, { "epoch": 0.2663497722321252, "grad_norm": 2.4005627922450055, "learning_rate": 1.721072536584702e-05, "loss": 0.4349, "step": 3362 }, { "epoch": 0.2664289958407606, "grad_norm": 1.8909743152480871, "learning_rate": 1.7208947228801464e-05, "loss": 0.267, "step": 3363 }, { "epoch": 0.2665082194493959, "grad_norm": 1.8126766367599259, "learning_rate": 1.7207168617081418e-05, "loss": 0.2903, "step": 3364 }, { "epoch": 0.2665874430580313, "grad_norm": 1.740841195438343, "learning_rate": 1.7205389530804e-05, "loss": 0.2232, "step": 3365 }, { "epoch": 0.26666666666666666, "grad_norm": 1.8321824172234051, "learning_rate": 1.7203609970086347e-05, "loss": 0.264, "step": 3366 }, { "epoch": 0.26674589027530204, "grad_norm": 2.359089679248261, "learning_rate": 1.720182993504564e-05, "loss": 0.3437, "step": 3367 }, { "epoch": 0.2668251138839374, "grad_norm": 1.9946362292912054, "learning_rate": 1.7200049425799087e-05, "loss": 0.3399, "step": 3368 }, { "epoch": 0.2669043374925728, "grad_norm": 1.775687053524371, "learning_rate": 1.7198268442463923e-05, "loss": 0.3073, "step": 3369 }, { "epoch": 0.26698356110120813, "grad_norm": 1.6446176168511335, "learning_rate": 1.719648698515742e-05, "loss": 0.2388, "step": 3370 }, { "epoch": 0.2670627847098435, "grad_norm": 2.169796884925276, "learning_rate": 1.7194705053996873e-05, "loss": 0.2728, "step": 3371 }, { "epoch": 0.2671420083184789, "grad_norm": 1.853749628602967, "learning_rate": 1.719292264909962e-05, "loss": 0.3543, "step": 3372 }, { "epoch": 0.2672212319271143, "grad_norm": 2.2220969340241745, "learning_rate": 1.7191139770583015e-05, "loss": 0.3358, "step": 3373 }, { "epoch": 0.26730045553574966, "grad_norm": 1.9016013931004871, "learning_rate": 1.7189356418564463e-05, "loss": 0.3122, "step": 3374 }, { "epoch": 0.26737967914438504, "grad_norm": 1.811533174902071, "learning_rate": 1.7187572593161382e-05, "loss": 0.2395, "step": 3375 }, { "epoch": 0.2674589027530204, "grad_norm": 1.9815976462937936, "learning_rate": 1.7185788294491232e-05, "loss": 0.3322, "step": 3376 }, { "epoch": 0.26753812636165575, "grad_norm": 1.8483588231747987, "learning_rate": 1.7184003522671497e-05, "loss": 0.2476, "step": 3377 }, { "epoch": 0.26761734997029113, "grad_norm": 1.858329517675542, "learning_rate": 1.7182218277819697e-05, "loss": 0.2063, "step": 3378 }, { "epoch": 0.2676965735789265, "grad_norm": 2.0331968250360397, "learning_rate": 1.718043256005338e-05, "loss": 0.3271, "step": 3379 }, { "epoch": 0.2677757971875619, "grad_norm": 1.8281770863363087, "learning_rate": 1.717864636949013e-05, "loss": 0.2449, "step": 3380 }, { "epoch": 0.2678550207961973, "grad_norm": 1.9349374504083263, "learning_rate": 1.7176859706247563e-05, "loss": 0.2794, "step": 3381 }, { "epoch": 0.26793424440483266, "grad_norm": 1.8730970808507403, "learning_rate": 1.717507257044331e-05, "loss": 0.361, "step": 3382 }, { "epoch": 0.26801346801346804, "grad_norm": 1.8493380523649967, "learning_rate": 1.717328496219506e-05, "loss": 0.1875, "step": 3383 }, { "epoch": 0.26809269162210336, "grad_norm": 2.473703148725078, "learning_rate": 1.7171496881620507e-05, "loss": 0.4314, "step": 3384 }, { "epoch": 0.26817191523073874, "grad_norm": 1.8371546951078674, "learning_rate": 1.716970832883739e-05, "loss": 0.2863, "step": 3385 }, { "epoch": 0.2682511388393741, "grad_norm": 2.2521562254570346, "learning_rate": 1.716791930396348e-05, "loss": 0.3446, "step": 3386 }, { "epoch": 0.2683303624480095, "grad_norm": 2.1877287676789647, "learning_rate": 1.716612980711657e-05, "loss": 0.3864, "step": 3387 }, { "epoch": 0.2684095860566449, "grad_norm": 2.216438671179258, "learning_rate": 1.7164339838414496e-05, "loss": 0.2763, "step": 3388 }, { "epoch": 0.26848880966528027, "grad_norm": 2.173025537378721, "learning_rate": 1.7162549397975118e-05, "loss": 0.2324, "step": 3389 }, { "epoch": 0.26856803327391565, "grad_norm": 1.7275553775691295, "learning_rate": 1.7160758485916325e-05, "loss": 0.206, "step": 3390 }, { "epoch": 0.268647256882551, "grad_norm": 2.109874916464338, "learning_rate": 1.715896710235604e-05, "loss": 0.3108, "step": 3391 }, { "epoch": 0.26872648049118636, "grad_norm": 2.063356303116072, "learning_rate": 1.715717524741222e-05, "loss": 0.3225, "step": 3392 }, { "epoch": 0.26880570409982174, "grad_norm": 2.074422457560864, "learning_rate": 1.7155382921202844e-05, "loss": 0.4281, "step": 3393 }, { "epoch": 0.2688849277084571, "grad_norm": 2.3564315078913594, "learning_rate": 1.7153590123845938e-05, "loss": 0.4054, "step": 3394 }, { "epoch": 0.2689641513170925, "grad_norm": 2.015368707310253, "learning_rate": 1.715179685545954e-05, "loss": 0.3914, "step": 3395 }, { "epoch": 0.2690433749257279, "grad_norm": 1.8676094100668932, "learning_rate": 1.7150003116161734e-05, "loss": 0.2481, "step": 3396 }, { "epoch": 0.26912259853436327, "grad_norm": 1.9978192501374776, "learning_rate": 1.714820890607062e-05, "loss": 0.3082, "step": 3397 }, { "epoch": 0.2692018221429986, "grad_norm": 2.0549641598061825, "learning_rate": 1.714641422530435e-05, "loss": 0.4326, "step": 3398 }, { "epoch": 0.269281045751634, "grad_norm": 1.7291000095708726, "learning_rate": 1.7144619073981088e-05, "loss": 0.2368, "step": 3399 }, { "epoch": 0.26936026936026936, "grad_norm": 1.5685858177501182, "learning_rate": 1.7142823452219036e-05, "loss": 0.2623, "step": 3400 }, { "epoch": 0.26943949296890474, "grad_norm": 1.8022665585665223, "learning_rate": 1.714102736013643e-05, "loss": 0.2953, "step": 3401 }, { "epoch": 0.2695187165775401, "grad_norm": 1.7148333665239874, "learning_rate": 1.7139230797851537e-05, "loss": 0.2899, "step": 3402 }, { "epoch": 0.2695979401861755, "grad_norm": 2.0486075979032226, "learning_rate": 1.7137433765482644e-05, "loss": 0.3429, "step": 3403 }, { "epoch": 0.2696771637948109, "grad_norm": 2.254638670894289, "learning_rate": 1.713563626314808e-05, "loss": 0.4674, "step": 3404 }, { "epoch": 0.2697563874034462, "grad_norm": 1.9844839294437284, "learning_rate": 1.71338382909662e-05, "loss": 0.2142, "step": 3405 }, { "epoch": 0.2698356110120816, "grad_norm": 1.9967467790777143, "learning_rate": 1.71320398490554e-05, "loss": 0.3291, "step": 3406 }, { "epoch": 0.26991483462071697, "grad_norm": 1.750876537251268, "learning_rate": 1.713024093753409e-05, "loss": 0.2859, "step": 3407 }, { "epoch": 0.26999405822935235, "grad_norm": 2.0531637004843155, "learning_rate": 1.7128441556520723e-05, "loss": 0.3012, "step": 3408 }, { "epoch": 0.27007328183798773, "grad_norm": 2.029446732194121, "learning_rate": 1.7126641706133782e-05, "loss": 0.2994, "step": 3409 }, { "epoch": 0.2701525054466231, "grad_norm": 1.825503137882742, "learning_rate": 1.7124841386491774e-05, "loss": 0.325, "step": 3410 }, { "epoch": 0.27023172905525844, "grad_norm": 1.8663217216949353, "learning_rate": 1.7123040597713242e-05, "loss": 0.2081, "step": 3411 }, { "epoch": 0.2703109526638938, "grad_norm": 2.1946547168324386, "learning_rate": 1.7121239339916763e-05, "loss": 0.2858, "step": 3412 }, { "epoch": 0.2703901762725292, "grad_norm": 1.785401676136347, "learning_rate": 1.7119437613220936e-05, "loss": 0.2338, "step": 3413 }, { "epoch": 0.2704693998811646, "grad_norm": 2.0546556830274647, "learning_rate": 1.71176354177444e-05, "loss": 0.3663, "step": 3414 }, { "epoch": 0.27054862348979997, "grad_norm": 1.9598841949012182, "learning_rate": 1.711583275360582e-05, "loss": 0.2318, "step": 3415 }, { "epoch": 0.27062784709843535, "grad_norm": 1.7839842431481236, "learning_rate": 1.711402962092389e-05, "loss": 0.2699, "step": 3416 }, { "epoch": 0.27070707070707073, "grad_norm": 1.9928719154903598, "learning_rate": 1.7112226019817345e-05, "loss": 0.2359, "step": 3417 }, { "epoch": 0.27078629431570606, "grad_norm": 2.515487231914646, "learning_rate": 1.7110421950404935e-05, "loss": 0.3807, "step": 3418 }, { "epoch": 0.27086551792434144, "grad_norm": 2.063975860768467, "learning_rate": 1.710861741280545e-05, "loss": 0.3594, "step": 3419 }, { "epoch": 0.2709447415329768, "grad_norm": 1.9150625017258538, "learning_rate": 1.710681240713772e-05, "loss": 0.2379, "step": 3420 }, { "epoch": 0.2710239651416122, "grad_norm": 2.449227820414286, "learning_rate": 1.7105006933520584e-05, "loss": 0.2713, "step": 3421 }, { "epoch": 0.2711031887502476, "grad_norm": 2.0171076153944782, "learning_rate": 1.710320099207293e-05, "loss": 0.229, "step": 3422 }, { "epoch": 0.27118241235888296, "grad_norm": 1.7603283630385689, "learning_rate": 1.7101394582913667e-05, "loss": 0.2705, "step": 3423 }, { "epoch": 0.27126163596751834, "grad_norm": 1.8706520400199922, "learning_rate": 1.709958770616174e-05, "loss": 0.2379, "step": 3424 }, { "epoch": 0.27134085957615367, "grad_norm": 1.6082111530086896, "learning_rate": 1.7097780361936128e-05, "loss": 0.1881, "step": 3425 }, { "epoch": 0.27142008318478905, "grad_norm": 1.4648527812333807, "learning_rate": 1.709597255035583e-05, "loss": 0.2438, "step": 3426 }, { "epoch": 0.27149930679342443, "grad_norm": 1.9096550438093076, "learning_rate": 1.709416427153988e-05, "loss": 0.3507, "step": 3427 }, { "epoch": 0.2715785304020598, "grad_norm": 2.006438980160681, "learning_rate": 1.7092355525607352e-05, "loss": 0.3203, "step": 3428 }, { "epoch": 0.2716577540106952, "grad_norm": 2.116227302303852, "learning_rate": 1.7090546312677335e-05, "loss": 0.2967, "step": 3429 }, { "epoch": 0.2717369776193306, "grad_norm": 2.0026112783102747, "learning_rate": 1.7088736632868964e-05, "loss": 0.3262, "step": 3430 }, { "epoch": 0.27181620122796596, "grad_norm": 2.1452967188624887, "learning_rate": 1.7086926486301393e-05, "loss": 0.2768, "step": 3431 }, { "epoch": 0.2718954248366013, "grad_norm": 2.417920698203675, "learning_rate": 1.7085115873093814e-05, "loss": 0.3042, "step": 3432 }, { "epoch": 0.27197464844523667, "grad_norm": 1.7361940364004071, "learning_rate": 1.7083304793365445e-05, "loss": 0.2848, "step": 3433 }, { "epoch": 0.27205387205387205, "grad_norm": 2.0124176725836906, "learning_rate": 1.7081493247235537e-05, "loss": 0.3293, "step": 3434 }, { "epoch": 0.27213309566250743, "grad_norm": 2.1813643158396885, "learning_rate": 1.7079681234823374e-05, "loss": 0.3486, "step": 3435 }, { "epoch": 0.2722123192711428, "grad_norm": 2.034921035954899, "learning_rate": 1.7077868756248265e-05, "loss": 0.377, "step": 3436 }, { "epoch": 0.2722915428797782, "grad_norm": 2.3344973685258665, "learning_rate": 1.7076055811629556e-05, "loss": 0.4017, "step": 3437 }, { "epoch": 0.2723707664884136, "grad_norm": 2.3327574830878017, "learning_rate": 1.7074242401086623e-05, "loss": 0.3413, "step": 3438 }, { "epoch": 0.2724499900970489, "grad_norm": 2.1733727982323003, "learning_rate": 1.7072428524738865e-05, "loss": 0.3579, "step": 3439 }, { "epoch": 0.2725292137056843, "grad_norm": 1.6974348724238708, "learning_rate": 1.707061418270572e-05, "loss": 0.3138, "step": 3440 }, { "epoch": 0.27260843731431966, "grad_norm": 1.9907008970891409, "learning_rate": 1.706879937510665e-05, "loss": 0.3513, "step": 3441 }, { "epoch": 0.27268766092295504, "grad_norm": 2.1046107430848573, "learning_rate": 1.7066984102061155e-05, "loss": 0.2928, "step": 3442 }, { "epoch": 0.2727668845315904, "grad_norm": 2.1076791732420763, "learning_rate": 1.706516836368876e-05, "loss": 0.3933, "step": 3443 }, { "epoch": 0.2728461081402258, "grad_norm": 1.8503842878987184, "learning_rate": 1.7063352160109026e-05, "loss": 0.3153, "step": 3444 }, { "epoch": 0.27292533174886113, "grad_norm": 1.8954500335377484, "learning_rate": 1.7061535491441538e-05, "loss": 0.422, "step": 3445 }, { "epoch": 0.2730045553574965, "grad_norm": 2.8329061978914694, "learning_rate": 1.7059718357805915e-05, "loss": 0.4148, "step": 3446 }, { "epoch": 0.2730837789661319, "grad_norm": 1.9058951118772942, "learning_rate": 1.705790075932181e-05, "loss": 0.3352, "step": 3447 }, { "epoch": 0.2731630025747673, "grad_norm": 2.1506428320789692, "learning_rate": 1.7056082696108896e-05, "loss": 0.3541, "step": 3448 }, { "epoch": 0.27324222618340266, "grad_norm": 2.29124772955635, "learning_rate": 1.7054264168286892e-05, "loss": 0.2718, "step": 3449 }, { "epoch": 0.27332144979203804, "grad_norm": 1.8496851493923643, "learning_rate": 1.7052445175975533e-05, "loss": 0.2461, "step": 3450 }, { "epoch": 0.2734006734006734, "grad_norm": 1.8939713897535773, "learning_rate": 1.7050625719294593e-05, "loss": 0.3335, "step": 3451 }, { "epoch": 0.27347989700930875, "grad_norm": 1.9409859724876397, "learning_rate": 1.7048805798363876e-05, "loss": 0.2514, "step": 3452 }, { "epoch": 0.27355912061794413, "grad_norm": 2.290938856530663, "learning_rate": 1.7046985413303215e-05, "loss": 0.3822, "step": 3453 }, { "epoch": 0.2736383442265795, "grad_norm": 2.0332328663757298, "learning_rate": 1.7045164564232474e-05, "loss": 0.3248, "step": 3454 }, { "epoch": 0.2737175678352149, "grad_norm": 2.7222764485818844, "learning_rate": 1.704334325127154e-05, "loss": 0.2372, "step": 3455 }, { "epoch": 0.2737967914438503, "grad_norm": 1.9436490829624704, "learning_rate": 1.704152147454035e-05, "loss": 0.241, "step": 3456 }, { "epoch": 0.27387601505248566, "grad_norm": 1.8552986674436382, "learning_rate": 1.7039699234158846e-05, "loss": 0.3725, "step": 3457 }, { "epoch": 0.27395523866112104, "grad_norm": 1.803009481218911, "learning_rate": 1.7037876530247025e-05, "loss": 0.2819, "step": 3458 }, { "epoch": 0.27403446226975636, "grad_norm": 2.2327289539249717, "learning_rate": 1.7036053362924896e-05, "loss": 0.2886, "step": 3459 }, { "epoch": 0.27411368587839174, "grad_norm": 1.7654127900726622, "learning_rate": 1.7034229732312512e-05, "loss": 0.3399, "step": 3460 }, { "epoch": 0.2741929094870271, "grad_norm": 2.001618149518056, "learning_rate": 1.703240563852994e-05, "loss": 0.3341, "step": 3461 }, { "epoch": 0.2742721330956625, "grad_norm": 1.8275278815146694, "learning_rate": 1.70305810816973e-05, "loss": 0.2905, "step": 3462 }, { "epoch": 0.2743513567042979, "grad_norm": 1.9754697629304463, "learning_rate": 1.7028756061934722e-05, "loss": 0.2721, "step": 3463 }, { "epoch": 0.27443058031293327, "grad_norm": 1.9224899832519573, "learning_rate": 1.702693057936238e-05, "loss": 0.3465, "step": 3464 }, { "epoch": 0.27450980392156865, "grad_norm": 2.047121478483813, "learning_rate": 1.702510463410047e-05, "loss": 0.375, "step": 3465 }, { "epoch": 0.274589027530204, "grad_norm": 1.9609023286637646, "learning_rate": 1.7023278226269222e-05, "loss": 0.2976, "step": 3466 }, { "epoch": 0.27466825113883936, "grad_norm": 2.2701917237976623, "learning_rate": 1.7021451355988895e-05, "loss": 0.3709, "step": 3467 }, { "epoch": 0.27474747474747474, "grad_norm": 1.6581893154818668, "learning_rate": 1.7019624023379784e-05, "loss": 0.2215, "step": 3468 }, { "epoch": 0.2748266983561101, "grad_norm": 2.1512060926941223, "learning_rate": 1.7017796228562206e-05, "loss": 0.4825, "step": 3469 }, { "epoch": 0.2749059219647455, "grad_norm": 1.9605498989527748, "learning_rate": 1.7015967971656513e-05, "loss": 0.3221, "step": 3470 }, { "epoch": 0.2749851455733809, "grad_norm": 1.7094513779059308, "learning_rate": 1.7014139252783092e-05, "loss": 0.2783, "step": 3471 }, { "epoch": 0.27506436918201627, "grad_norm": 1.715976794116535, "learning_rate": 1.7012310072062348e-05, "loss": 0.2278, "step": 3472 }, { "epoch": 0.2751435927906516, "grad_norm": 1.904943137397442, "learning_rate": 1.7010480429614726e-05, "loss": 0.2834, "step": 3473 }, { "epoch": 0.275222816399287, "grad_norm": 2.402731313763493, "learning_rate": 1.70086503255607e-05, "loss": 0.2803, "step": 3474 }, { "epoch": 0.27530204000792236, "grad_norm": 2.219686760281645, "learning_rate": 1.7006819760020773e-05, "loss": 0.3458, "step": 3475 }, { "epoch": 0.27538126361655774, "grad_norm": 1.9100238600024793, "learning_rate": 1.700498873311548e-05, "loss": 0.2849, "step": 3476 }, { "epoch": 0.2754604872251931, "grad_norm": 2.1779120871548026, "learning_rate": 1.7003157244965387e-05, "loss": 0.3716, "step": 3477 }, { "epoch": 0.2755397108338285, "grad_norm": 1.7042861256404576, "learning_rate": 1.700132529569109e-05, "loss": 0.3119, "step": 3478 }, { "epoch": 0.2756189344424639, "grad_norm": 1.9961802261549253, "learning_rate": 1.69994928854132e-05, "loss": 0.3712, "step": 3479 }, { "epoch": 0.2756981580510992, "grad_norm": 2.2530353620611097, "learning_rate": 1.6997660014252392e-05, "loss": 0.3452, "step": 3480 }, { "epoch": 0.2757773816597346, "grad_norm": 2.1210282783750998, "learning_rate": 1.699582668232934e-05, "loss": 0.3903, "step": 3481 }, { "epoch": 0.27585660526836997, "grad_norm": 2.1661304360605502, "learning_rate": 1.6993992889764758e-05, "loss": 0.291, "step": 3482 }, { "epoch": 0.27593582887700535, "grad_norm": 2.0454860038864764, "learning_rate": 1.69921586366794e-05, "loss": 0.3169, "step": 3483 }, { "epoch": 0.27601505248564073, "grad_norm": 2.061118649960762, "learning_rate": 1.6990323923194042e-05, "loss": 0.3644, "step": 3484 }, { "epoch": 0.2760942760942761, "grad_norm": 1.981208434375285, "learning_rate": 1.698848874942949e-05, "loss": 0.3909, "step": 3485 }, { "epoch": 0.27617349970291144, "grad_norm": 2.045603357192902, "learning_rate": 1.698665311550658e-05, "loss": 0.2978, "step": 3486 }, { "epoch": 0.2762527233115468, "grad_norm": 1.9548500614536992, "learning_rate": 1.6984817021546177e-05, "loss": 0.2961, "step": 3487 }, { "epoch": 0.2763319469201822, "grad_norm": 2.4851479274008104, "learning_rate": 1.6982980467669183e-05, "loss": 0.4116, "step": 3488 }, { "epoch": 0.2764111705288176, "grad_norm": 2.1283429585508493, "learning_rate": 1.6981143453996524e-05, "loss": 0.2032, "step": 3489 }, { "epoch": 0.27649039413745297, "grad_norm": 1.9797279961550915, "learning_rate": 1.697930598064916e-05, "loss": 0.3148, "step": 3490 }, { "epoch": 0.27656961774608835, "grad_norm": 2.2805286707108, "learning_rate": 1.697746804774808e-05, "loss": 0.3736, "step": 3491 }, { "epoch": 0.27664884135472373, "grad_norm": 1.9757464079684457, "learning_rate": 1.6975629655414304e-05, "loss": 0.2633, "step": 3492 }, { "epoch": 0.27672806496335906, "grad_norm": 2.0193121205070956, "learning_rate": 1.6973790803768875e-05, "loss": 0.3611, "step": 3493 }, { "epoch": 0.27680728857199444, "grad_norm": 1.8007626832785526, "learning_rate": 1.6971951492932882e-05, "loss": 0.2225, "step": 3494 }, { "epoch": 0.2768865121806298, "grad_norm": 1.966187112911452, "learning_rate": 1.697011172302743e-05, "loss": 0.3494, "step": 3495 }, { "epoch": 0.2769657357892652, "grad_norm": 2.2795464880490215, "learning_rate": 1.696827149417366e-05, "loss": 0.3866, "step": 3496 }, { "epoch": 0.2770449593979006, "grad_norm": 1.8932000201309704, "learning_rate": 1.696643080649274e-05, "loss": 0.2862, "step": 3497 }, { "epoch": 0.27712418300653596, "grad_norm": 2.3551986748591407, "learning_rate": 1.696458966010587e-05, "loss": 0.4194, "step": 3498 }, { "epoch": 0.27720340661517134, "grad_norm": 2.1182925030402417, "learning_rate": 1.6962748055134283e-05, "loss": 0.329, "step": 3499 }, { "epoch": 0.27728263022380667, "grad_norm": 2.2859125334664787, "learning_rate": 1.696090599169924e-05, "loss": 0.3628, "step": 3500 }, { "epoch": 0.27736185383244205, "grad_norm": 2.4958878560634714, "learning_rate": 1.695906346992203e-05, "loss": 0.3302, "step": 3501 }, { "epoch": 0.27744107744107743, "grad_norm": 2.2141659017644715, "learning_rate": 1.6957220489923978e-05, "loss": 0.2901, "step": 3502 }, { "epoch": 0.2775203010497128, "grad_norm": 1.7824419867104577, "learning_rate": 1.695537705182643e-05, "loss": 0.2348, "step": 3503 }, { "epoch": 0.2775995246583482, "grad_norm": 2.3098994777762134, "learning_rate": 1.695353315575077e-05, "loss": 0.2679, "step": 3504 }, { "epoch": 0.2776787482669836, "grad_norm": 1.8635940905020172, "learning_rate": 1.6951688801818413e-05, "loss": 0.2092, "step": 3505 }, { "epoch": 0.27775797187561896, "grad_norm": 2.073096902943524, "learning_rate": 1.6949843990150798e-05, "loss": 0.3515, "step": 3506 }, { "epoch": 0.2778371954842543, "grad_norm": 1.8106237182660103, "learning_rate": 1.6947998720869394e-05, "loss": 0.3101, "step": 3507 }, { "epoch": 0.27791641909288967, "grad_norm": 1.9537670109286247, "learning_rate": 1.6946152994095705e-05, "loss": 0.2823, "step": 3508 }, { "epoch": 0.27799564270152505, "grad_norm": 1.8528504962789316, "learning_rate": 1.6944306809951264e-05, "loss": 0.2335, "step": 3509 }, { "epoch": 0.27807486631016043, "grad_norm": 2.248412889345186, "learning_rate": 1.694246016855764e-05, "loss": 0.3095, "step": 3510 }, { "epoch": 0.2781540899187958, "grad_norm": 1.6960367712117062, "learning_rate": 1.694061307003641e-05, "loss": 0.2585, "step": 3511 }, { "epoch": 0.2782333135274312, "grad_norm": 2.045105421530441, "learning_rate": 1.693876551450921e-05, "loss": 0.33, "step": 3512 }, { "epoch": 0.2783125371360666, "grad_norm": 1.9009183683308206, "learning_rate": 1.693691750209769e-05, "loss": 0.275, "step": 3513 }, { "epoch": 0.2783917607447019, "grad_norm": 1.8786818326446126, "learning_rate": 1.6935069032923525e-05, "loss": 0.3702, "step": 3514 }, { "epoch": 0.2784709843533373, "grad_norm": 2.21713110251996, "learning_rate": 1.6933220107108438e-05, "loss": 0.4924, "step": 3515 }, { "epoch": 0.27855020796197266, "grad_norm": 2.2291293085755255, "learning_rate": 1.6931370724774166e-05, "loss": 0.2728, "step": 3516 }, { "epoch": 0.27862943157060804, "grad_norm": 1.8046522243481335, "learning_rate": 1.6929520886042486e-05, "loss": 0.2754, "step": 3517 }, { "epoch": 0.2787086551792434, "grad_norm": 1.7487421348460166, "learning_rate": 1.6927670591035195e-05, "loss": 0.3436, "step": 3518 }, { "epoch": 0.2787878787878788, "grad_norm": 2.019776805383985, "learning_rate": 1.692581983987413e-05, "loss": 0.3243, "step": 3519 }, { "epoch": 0.2788671023965142, "grad_norm": 2.110360785216861, "learning_rate": 1.6923968632681155e-05, "loss": 0.3165, "step": 3520 }, { "epoch": 0.2789463260051495, "grad_norm": 2.0722510875379916, "learning_rate": 1.6922116969578163e-05, "loss": 0.1952, "step": 3521 }, { "epoch": 0.2790255496137849, "grad_norm": 2.093271655456249, "learning_rate": 1.692026485068707e-05, "loss": 0.3892, "step": 3522 }, { "epoch": 0.2791047732224203, "grad_norm": 2.2802937614085166, "learning_rate": 1.6918412276129837e-05, "loss": 0.3902, "step": 3523 }, { "epoch": 0.27918399683105566, "grad_norm": 1.8380856505009215, "learning_rate": 1.691655924602845e-05, "loss": 0.3321, "step": 3524 }, { "epoch": 0.27926322043969104, "grad_norm": 2.577987441071288, "learning_rate": 1.6914705760504913e-05, "loss": 0.4003, "step": 3525 }, { "epoch": 0.2793424440483264, "grad_norm": 1.8455243983091896, "learning_rate": 1.6912851819681272e-05, "loss": 0.2099, "step": 3526 }, { "epoch": 0.27942166765696175, "grad_norm": 2.574855512597409, "learning_rate": 1.69109974236796e-05, "loss": 0.334, "step": 3527 }, { "epoch": 0.27950089126559713, "grad_norm": 2.189702092016546, "learning_rate": 1.6909142572622003e-05, "loss": 0.2827, "step": 3528 }, { "epoch": 0.2795801148742325, "grad_norm": 2.0898676765935984, "learning_rate": 1.6907287266630614e-05, "loss": 0.351, "step": 3529 }, { "epoch": 0.2796593384828679, "grad_norm": 2.4892231804154767, "learning_rate": 1.6905431505827595e-05, "loss": 0.287, "step": 3530 }, { "epoch": 0.2797385620915033, "grad_norm": 1.9382705291067408, "learning_rate": 1.6903575290335136e-05, "loss": 0.2526, "step": 3531 }, { "epoch": 0.27981778570013865, "grad_norm": 1.9735049122877046, "learning_rate": 1.690171862027546e-05, "loss": 0.2289, "step": 3532 }, { "epoch": 0.27989700930877404, "grad_norm": 4.74144127134824, "learning_rate": 1.6899861495770827e-05, "loss": 0.3942, "step": 3533 }, { "epoch": 0.27997623291740936, "grad_norm": 2.504025500575295, "learning_rate": 1.689800391694351e-05, "loss": 0.2323, "step": 3534 }, { "epoch": 0.28005545652604474, "grad_norm": 2.363072999087013, "learning_rate": 1.689614588391583e-05, "loss": 0.3983, "step": 3535 }, { "epoch": 0.2801346801346801, "grad_norm": 1.9585205896537063, "learning_rate": 1.689428739681012e-05, "loss": 0.2833, "step": 3536 }, { "epoch": 0.2802139037433155, "grad_norm": 4.073854755311286, "learning_rate": 1.6892428455748762e-05, "loss": 0.2544, "step": 3537 }, { "epoch": 0.2802931273519509, "grad_norm": 2.100093931788127, "learning_rate": 1.6890569060854156e-05, "loss": 0.219, "step": 3538 }, { "epoch": 0.28037235096058627, "grad_norm": 2.893602540009141, "learning_rate": 1.6888709212248728e-05, "loss": 0.3959, "step": 3539 }, { "epoch": 0.28045157456922165, "grad_norm": 2.501694315304918, "learning_rate": 1.6886848910054947e-05, "loss": 0.4967, "step": 3540 }, { "epoch": 0.280530798177857, "grad_norm": 2.301233341164083, "learning_rate": 1.6884988154395304e-05, "loss": 0.3129, "step": 3541 }, { "epoch": 0.28061002178649236, "grad_norm": 2.2660350398429236, "learning_rate": 1.688312694539232e-05, "loss": 0.4105, "step": 3542 }, { "epoch": 0.28068924539512774, "grad_norm": 2.5343703470780365, "learning_rate": 1.6881265283168543e-05, "loss": 0.432, "step": 3543 }, { "epoch": 0.2807684690037631, "grad_norm": 2.064622891027119, "learning_rate": 1.6879403167846556e-05, "loss": 0.3034, "step": 3544 }, { "epoch": 0.2808476926123985, "grad_norm": 1.7379244947562247, "learning_rate": 1.6877540599548977e-05, "loss": 0.409, "step": 3545 }, { "epoch": 0.2809269162210339, "grad_norm": 2.1850962461437002, "learning_rate": 1.6875677578398442e-05, "loss": 0.3902, "step": 3546 }, { "epoch": 0.28100613982966927, "grad_norm": 2.6374808658118476, "learning_rate": 1.6873814104517617e-05, "loss": 0.375, "step": 3547 }, { "epoch": 0.2810853634383046, "grad_norm": 2.0622907379385906, "learning_rate": 1.6871950178029216e-05, "loss": 0.2744, "step": 3548 }, { "epoch": 0.28116458704694, "grad_norm": 2.121968929902448, "learning_rate": 1.6870085799055956e-05, "loss": 0.3439, "step": 3549 }, { "epoch": 0.28124381065557535, "grad_norm": 2.386560246097312, "learning_rate": 1.6868220967720604e-05, "loss": 0.2962, "step": 3550 }, { "epoch": 0.28132303426421074, "grad_norm": 2.109175502399617, "learning_rate": 1.686635568414595e-05, "loss": 0.4228, "step": 3551 }, { "epoch": 0.2814022578728461, "grad_norm": 2.106910782322327, "learning_rate": 1.686448994845481e-05, "loss": 0.3233, "step": 3552 }, { "epoch": 0.2814814814814815, "grad_norm": 2.3877406921161426, "learning_rate": 1.6862623760770038e-05, "loss": 0.5141, "step": 3553 }, { "epoch": 0.2815607050901169, "grad_norm": 1.8639037639411236, "learning_rate": 1.6860757121214513e-05, "loss": 0.2009, "step": 3554 }, { "epoch": 0.2816399286987522, "grad_norm": 2.1914663337553226, "learning_rate": 1.685889002991114e-05, "loss": 0.4505, "step": 3555 }, { "epoch": 0.2817191523073876, "grad_norm": 2.232641874872952, "learning_rate": 1.6857022486982865e-05, "loss": 0.3326, "step": 3556 }, { "epoch": 0.28179837591602297, "grad_norm": 2.5640197816652854, "learning_rate": 1.6855154492552656e-05, "loss": 0.3061, "step": 3557 }, { "epoch": 0.28187759952465835, "grad_norm": 1.668174839635691, "learning_rate": 1.6853286046743505e-05, "loss": 0.2929, "step": 3558 }, { "epoch": 0.28195682313329373, "grad_norm": 1.8556588588344165, "learning_rate": 1.6851417149678442e-05, "loss": 0.3338, "step": 3559 }, { "epoch": 0.2820360467419291, "grad_norm": 1.987759747378164, "learning_rate": 1.684954780148053e-05, "loss": 0.3289, "step": 3560 }, { "epoch": 0.2821152703505645, "grad_norm": 1.83485460760186, "learning_rate": 1.684767800227285e-05, "loss": 0.2603, "step": 3561 }, { "epoch": 0.2821944939591998, "grad_norm": 1.7141895985015019, "learning_rate": 1.6845807752178528e-05, "loss": 0.3035, "step": 3562 }, { "epoch": 0.2822737175678352, "grad_norm": 2.375681914953145, "learning_rate": 1.68439370513207e-05, "loss": 0.2527, "step": 3563 }, { "epoch": 0.2823529411764706, "grad_norm": 1.7910327977988199, "learning_rate": 1.6842065899822548e-05, "loss": 0.3252, "step": 3564 }, { "epoch": 0.28243216478510597, "grad_norm": 2.2763206240222145, "learning_rate": 1.6840194297807283e-05, "loss": 0.3886, "step": 3565 }, { "epoch": 0.28251138839374135, "grad_norm": 2.1690502030834433, "learning_rate": 1.6838322245398135e-05, "loss": 0.3165, "step": 3566 }, { "epoch": 0.28259061200237673, "grad_norm": 2.3205598436667763, "learning_rate": 1.6836449742718367e-05, "loss": 0.3334, "step": 3567 }, { "epoch": 0.28266983561101205, "grad_norm": 2.322813610233888, "learning_rate": 1.6834576789891282e-05, "loss": 0.3761, "step": 3568 }, { "epoch": 0.28274905921964744, "grad_norm": 2.0865471060637093, "learning_rate": 1.68327033870402e-05, "loss": 0.415, "step": 3569 }, { "epoch": 0.2828282828282828, "grad_norm": 1.9750981776735403, "learning_rate": 1.6830829534288475e-05, "loss": 0.2678, "step": 3570 }, { "epoch": 0.2829075064369182, "grad_norm": 2.1200493395005857, "learning_rate": 1.6828955231759495e-05, "loss": 0.2851, "step": 3571 }, { "epoch": 0.2829867300455536, "grad_norm": 2.0286185811001203, "learning_rate": 1.682708047957667e-05, "loss": 0.3328, "step": 3572 }, { "epoch": 0.28306595365418896, "grad_norm": 2.02119392039722, "learning_rate": 1.682520527786345e-05, "loss": 0.3359, "step": 3573 }, { "epoch": 0.28314517726282434, "grad_norm": 1.998318348475542, "learning_rate": 1.6823329626743298e-05, "loss": 0.2564, "step": 3574 }, { "epoch": 0.28322440087145967, "grad_norm": 1.96852727328102, "learning_rate": 1.6821453526339727e-05, "loss": 0.2832, "step": 3575 }, { "epoch": 0.28330362448009505, "grad_norm": 2.224176902081358, "learning_rate": 1.6819576976776262e-05, "loss": 0.3629, "step": 3576 }, { "epoch": 0.28338284808873043, "grad_norm": 2.3555540355175495, "learning_rate": 1.6817699978176464e-05, "loss": 0.431, "step": 3577 }, { "epoch": 0.2834620716973658, "grad_norm": 2.0133080493539444, "learning_rate": 1.681582253066393e-05, "loss": 0.3447, "step": 3578 }, { "epoch": 0.2835412953060012, "grad_norm": 1.884766970419607, "learning_rate": 1.681394463436228e-05, "loss": 0.2601, "step": 3579 }, { "epoch": 0.2836205189146366, "grad_norm": 2.4606668834815237, "learning_rate": 1.6812066289395157e-05, "loss": 0.4795, "step": 3580 }, { "epoch": 0.28369974252327196, "grad_norm": 1.910508258048931, "learning_rate": 1.681018749588625e-05, "loss": 0.2632, "step": 3581 }, { "epoch": 0.2837789661319073, "grad_norm": 2.3432004006672256, "learning_rate": 1.6808308253959263e-05, "loss": 0.2895, "step": 3582 }, { "epoch": 0.28385818974054267, "grad_norm": 2.059271374398235, "learning_rate": 1.680642856373794e-05, "loss": 0.3668, "step": 3583 }, { "epoch": 0.28393741334917805, "grad_norm": 1.9232457541921084, "learning_rate": 1.680454842534604e-05, "loss": 0.329, "step": 3584 }, { "epoch": 0.28401663695781343, "grad_norm": 2.193321808622056, "learning_rate": 1.6802667838907374e-05, "loss": 0.3295, "step": 3585 }, { "epoch": 0.2840958605664488, "grad_norm": 2.306313480534004, "learning_rate": 1.680078680454576e-05, "loss": 0.4148, "step": 3586 }, { "epoch": 0.2841750841750842, "grad_norm": 1.7855198358027773, "learning_rate": 1.6798905322385063e-05, "loss": 0.2983, "step": 3587 }, { "epoch": 0.2842543077837196, "grad_norm": 2.1383344587956556, "learning_rate": 1.6797023392549157e-05, "loss": 0.4068, "step": 3588 }, { "epoch": 0.2843335313923549, "grad_norm": 1.8516225810619964, "learning_rate": 1.679514101516197e-05, "loss": 0.2654, "step": 3589 }, { "epoch": 0.2844127550009903, "grad_norm": 1.8688152557506754, "learning_rate": 1.6793258190347445e-05, "loss": 0.3171, "step": 3590 }, { "epoch": 0.28449197860962566, "grad_norm": 1.854097322738989, "learning_rate": 1.679137491822955e-05, "loss": 0.2939, "step": 3591 }, { "epoch": 0.28457120221826104, "grad_norm": 2.3661591801846695, "learning_rate": 1.6789491198932302e-05, "loss": 0.2824, "step": 3592 }, { "epoch": 0.2846504258268964, "grad_norm": 2.1640603983847244, "learning_rate": 1.6787607032579724e-05, "loss": 0.3046, "step": 3593 }, { "epoch": 0.2847296494355318, "grad_norm": 1.7442931811303524, "learning_rate": 1.678572241929588e-05, "loss": 0.2556, "step": 3594 }, { "epoch": 0.2848088730441672, "grad_norm": 2.121506717533791, "learning_rate": 1.6783837359204868e-05, "loss": 0.4281, "step": 3595 }, { "epoch": 0.2848880966528025, "grad_norm": 2.210396454838994, "learning_rate": 1.6781951852430813e-05, "loss": 0.3138, "step": 3596 }, { "epoch": 0.2849673202614379, "grad_norm": 1.65898651711375, "learning_rate": 1.6780065899097853e-05, "loss": 0.2239, "step": 3597 }, { "epoch": 0.2850465438700733, "grad_norm": 2.0225418164027547, "learning_rate": 1.677817949933018e-05, "loss": 0.3401, "step": 3598 }, { "epoch": 0.28512576747870866, "grad_norm": 1.8000689437663295, "learning_rate": 1.6776292653252e-05, "loss": 0.2647, "step": 3599 }, { "epoch": 0.28520499108734404, "grad_norm": 2.217499317459855, "learning_rate": 1.6774405360987556e-05, "loss": 0.3322, "step": 3600 }, { "epoch": 0.2852842146959794, "grad_norm": 2.675302464292596, "learning_rate": 1.6772517622661115e-05, "loss": 0.2487, "step": 3601 }, { "epoch": 0.2853634383046148, "grad_norm": 1.7460731376420404, "learning_rate": 1.6770629438396973e-05, "loss": 0.2797, "step": 3602 }, { "epoch": 0.28544266191325013, "grad_norm": 1.8089699555304126, "learning_rate": 1.676874080831947e-05, "loss": 0.3693, "step": 3603 }, { "epoch": 0.2855218855218855, "grad_norm": 2.1212143885633505, "learning_rate": 1.676685173255294e-05, "loss": 0.3087, "step": 3604 }, { "epoch": 0.2856011091305209, "grad_norm": 2.0775186518972526, "learning_rate": 1.6764962211221796e-05, "loss": 0.3754, "step": 3605 }, { "epoch": 0.2856803327391563, "grad_norm": 2.0970190248751686, "learning_rate": 1.6763072244450435e-05, "loss": 0.336, "step": 3606 }, { "epoch": 0.28575955634779165, "grad_norm": 1.898807057326851, "learning_rate": 1.676118183236331e-05, "loss": 0.2389, "step": 3607 }, { "epoch": 0.28583877995642704, "grad_norm": 1.8566474916308082, "learning_rate": 1.6759290975084894e-05, "loss": 0.2653, "step": 3608 }, { "epoch": 0.28591800356506236, "grad_norm": 1.809177876070745, "learning_rate": 1.675739967273969e-05, "loss": 0.239, "step": 3609 }, { "epoch": 0.28599722717369774, "grad_norm": 1.797582595699639, "learning_rate": 1.675550792545223e-05, "loss": 0.3648, "step": 3610 }, { "epoch": 0.2860764507823331, "grad_norm": 1.68539868626444, "learning_rate": 1.6753615733347085e-05, "loss": 0.3728, "step": 3611 }, { "epoch": 0.2861556743909685, "grad_norm": 2.322625480430677, "learning_rate": 1.6751723096548834e-05, "loss": 0.4333, "step": 3612 }, { "epoch": 0.2862348979996039, "grad_norm": 1.5795178072558425, "learning_rate": 1.6749830015182106e-05, "loss": 0.2479, "step": 3613 }, { "epoch": 0.28631412160823927, "grad_norm": 1.9395875810966259, "learning_rate": 1.6747936489371552e-05, "loss": 0.3846, "step": 3614 }, { "epoch": 0.28639334521687465, "grad_norm": 2.1451983105362333, "learning_rate": 1.674604251924185e-05, "loss": 0.3302, "step": 3615 }, { "epoch": 0.28647256882551, "grad_norm": 2.3806039299292823, "learning_rate": 1.6744148104917705e-05, "loss": 0.3794, "step": 3616 }, { "epoch": 0.28655179243414536, "grad_norm": 2.159648060985913, "learning_rate": 1.6742253246523856e-05, "loss": 0.2889, "step": 3617 }, { "epoch": 0.28663101604278074, "grad_norm": 1.7232120736412737, "learning_rate": 1.6740357944185074e-05, "loss": 0.2689, "step": 3618 }, { "epoch": 0.2867102396514161, "grad_norm": 1.494482510615317, "learning_rate": 1.6738462198026154e-05, "loss": 0.2683, "step": 3619 }, { "epoch": 0.2867894632600515, "grad_norm": 1.8418227970753027, "learning_rate": 1.6736566008171925e-05, "loss": 0.3004, "step": 3620 }, { "epoch": 0.2868686868686869, "grad_norm": 2.1292358677841037, "learning_rate": 1.6734669374747237e-05, "loss": 0.3283, "step": 3621 }, { "epoch": 0.28694791047732227, "grad_norm": 1.9393627711781438, "learning_rate": 1.6732772297876975e-05, "loss": 0.2932, "step": 3622 }, { "epoch": 0.2870271340859576, "grad_norm": 1.7090163594453147, "learning_rate": 1.6730874777686053e-05, "loss": 0.2739, "step": 3623 }, { "epoch": 0.287106357694593, "grad_norm": 1.7680539257104662, "learning_rate": 1.6728976814299413e-05, "loss": 0.2821, "step": 3624 }, { "epoch": 0.28718558130322835, "grad_norm": 1.8415989304597915, "learning_rate": 1.6727078407842028e-05, "loss": 0.2834, "step": 3625 }, { "epoch": 0.28726480491186374, "grad_norm": 2.4013207874511817, "learning_rate": 1.67251795584389e-05, "loss": 0.3087, "step": 3626 }, { "epoch": 0.2873440285204991, "grad_norm": 1.8725678102029302, "learning_rate": 1.6723280266215057e-05, "loss": 0.2968, "step": 3627 }, { "epoch": 0.2874232521291345, "grad_norm": 1.6671936102910183, "learning_rate": 1.672138053129556e-05, "loss": 0.2754, "step": 3628 }, { "epoch": 0.2875024757377699, "grad_norm": 2.0201350153265984, "learning_rate": 1.6719480353805493e-05, "loss": 0.3754, "step": 3629 }, { "epoch": 0.2875816993464052, "grad_norm": 2.154100983446601, "learning_rate": 1.671757973386998e-05, "loss": 0.3122, "step": 3630 }, { "epoch": 0.2876609229550406, "grad_norm": 1.9757988376227502, "learning_rate": 1.6715678671614162e-05, "loss": 0.3703, "step": 3631 }, { "epoch": 0.28774014656367597, "grad_norm": 1.785060455468134, "learning_rate": 1.6713777167163215e-05, "loss": 0.2668, "step": 3632 }, { "epoch": 0.28781937017231135, "grad_norm": 1.8800928803189738, "learning_rate": 1.6711875220642352e-05, "loss": 0.3937, "step": 3633 }, { "epoch": 0.28789859378094673, "grad_norm": 1.8377484951390568, "learning_rate": 1.6709972832176797e-05, "loss": 0.2776, "step": 3634 }, { "epoch": 0.2879778173895821, "grad_norm": 2.32823713372016, "learning_rate": 1.670807000189182e-05, "loss": 0.3597, "step": 3635 }, { "epoch": 0.2880570409982175, "grad_norm": 2.0583496697391555, "learning_rate": 1.6706166729912712e-05, "loss": 0.2811, "step": 3636 }, { "epoch": 0.2881362646068528, "grad_norm": 2.401963084165021, "learning_rate": 1.670426301636479e-05, "loss": 0.3441, "step": 3637 }, { "epoch": 0.2882154882154882, "grad_norm": 1.8959326868716981, "learning_rate": 1.6702358861373408e-05, "loss": 0.3094, "step": 3638 }, { "epoch": 0.2882947118241236, "grad_norm": 2.033529054018751, "learning_rate": 1.6700454265063943e-05, "loss": 0.2835, "step": 3639 }, { "epoch": 0.28837393543275897, "grad_norm": 2.2388221935456665, "learning_rate": 1.6698549227561805e-05, "loss": 0.2869, "step": 3640 }, { "epoch": 0.28845315904139435, "grad_norm": 2.2217667724950734, "learning_rate": 1.6696643748992434e-05, "loss": 0.276, "step": 3641 }, { "epoch": 0.28853238265002973, "grad_norm": 2.041045805619277, "learning_rate": 1.6694737829481292e-05, "loss": 0.2934, "step": 3642 }, { "epoch": 0.2886116062586651, "grad_norm": 2.2057607575055673, "learning_rate": 1.669283146915388e-05, "loss": 0.3095, "step": 3643 }, { "epoch": 0.28869082986730044, "grad_norm": 2.1933911590540935, "learning_rate": 1.6690924668135718e-05, "loss": 0.289, "step": 3644 }, { "epoch": 0.2887700534759358, "grad_norm": 1.704166912810074, "learning_rate": 1.668901742655236e-05, "loss": 0.2769, "step": 3645 }, { "epoch": 0.2888492770845712, "grad_norm": 2.2530365913442916, "learning_rate": 1.6687109744529394e-05, "loss": 0.2988, "step": 3646 }, { "epoch": 0.2889285006932066, "grad_norm": 2.3520705511536604, "learning_rate": 1.6685201622192422e-05, "loss": 0.3506, "step": 3647 }, { "epoch": 0.28900772430184196, "grad_norm": 1.9000939170637945, "learning_rate": 1.6683293059667096e-05, "loss": 0.2916, "step": 3648 }, { "epoch": 0.28908694791047734, "grad_norm": 2.1541870784007275, "learning_rate": 1.6681384057079076e-05, "loss": 0.3038, "step": 3649 }, { "epoch": 0.28916617151911267, "grad_norm": 2.0023161956333912, "learning_rate": 1.6679474614554066e-05, "loss": 0.3088, "step": 3650 }, { "epoch": 0.28924539512774805, "grad_norm": 2.1054650941987907, "learning_rate": 1.667756473221779e-05, "loss": 0.2875, "step": 3651 }, { "epoch": 0.28932461873638343, "grad_norm": 2.123819177462837, "learning_rate": 1.667565441019601e-05, "loss": 0.3562, "step": 3652 }, { "epoch": 0.2894038423450188, "grad_norm": 1.569177301847685, "learning_rate": 1.6673743648614507e-05, "loss": 0.2562, "step": 3653 }, { "epoch": 0.2894830659536542, "grad_norm": 1.780503024684975, "learning_rate": 1.66718324475991e-05, "loss": 0.3112, "step": 3654 }, { "epoch": 0.2895622895622896, "grad_norm": 2.2597996091076364, "learning_rate": 1.6669920807275622e-05, "loss": 0.4138, "step": 3655 }, { "epoch": 0.28964151317092496, "grad_norm": 2.2363210299760174, "learning_rate": 1.666800872776996e-05, "loss": 0.353, "step": 3656 }, { "epoch": 0.2897207367795603, "grad_norm": 1.7204035314898236, "learning_rate": 1.6666096209208e-05, "loss": 0.2918, "step": 3657 }, { "epoch": 0.28979996038819567, "grad_norm": 2.364315481744238, "learning_rate": 1.6664183251715687e-05, "loss": 0.4291, "step": 3658 }, { "epoch": 0.28987918399683105, "grad_norm": 2.050546981189933, "learning_rate": 1.666226985541897e-05, "loss": 0.2882, "step": 3659 }, { "epoch": 0.28995840760546643, "grad_norm": 1.8062537696302947, "learning_rate": 1.666035602044384e-05, "loss": 0.2331, "step": 3660 }, { "epoch": 0.2900376312141018, "grad_norm": 2.297512308887384, "learning_rate": 1.665844174691631e-05, "loss": 0.2945, "step": 3661 }, { "epoch": 0.2901168548227372, "grad_norm": 2.373566087823118, "learning_rate": 1.6656527034962433e-05, "loss": 0.4073, "step": 3662 }, { "epoch": 0.2901960784313726, "grad_norm": 2.0224546324253048, "learning_rate": 1.665461188470828e-05, "loss": 0.248, "step": 3663 }, { "epoch": 0.2902753020400079, "grad_norm": 2.031895320341666, "learning_rate": 1.6652696296279954e-05, "loss": 0.311, "step": 3664 }, { "epoch": 0.2903545256486433, "grad_norm": 1.76040940252944, "learning_rate": 1.6650780269803587e-05, "loss": 0.2771, "step": 3665 }, { "epoch": 0.29043374925727866, "grad_norm": 2.0534493421980953, "learning_rate": 1.664886380540534e-05, "loss": 0.3281, "step": 3666 }, { "epoch": 0.29051297286591404, "grad_norm": 1.8487819573246012, "learning_rate": 1.664694690321141e-05, "loss": 0.3537, "step": 3667 }, { "epoch": 0.2905921964745494, "grad_norm": 2.4302198185415307, "learning_rate": 1.6645029563348e-05, "loss": 0.3363, "step": 3668 }, { "epoch": 0.2906714200831848, "grad_norm": 1.8870363659888052, "learning_rate": 1.6643111785941374e-05, "loss": 0.2672, "step": 3669 }, { "epoch": 0.2907506436918202, "grad_norm": 2.3411402197049536, "learning_rate": 1.66411935711178e-05, "loss": 0.3507, "step": 3670 }, { "epoch": 0.2908298673004555, "grad_norm": 1.991495491258046, "learning_rate": 1.6639274919003582e-05, "loss": 0.2544, "step": 3671 }, { "epoch": 0.2909090909090909, "grad_norm": 1.8100244496728577, "learning_rate": 1.6637355829725057e-05, "loss": 0.3495, "step": 3672 }, { "epoch": 0.2909883145177263, "grad_norm": 1.915294909409222, "learning_rate": 1.663543630340859e-05, "loss": 0.2296, "step": 3673 }, { "epoch": 0.29106753812636166, "grad_norm": 1.7755773252871612, "learning_rate": 1.6633516340180568e-05, "loss": 0.3525, "step": 3674 }, { "epoch": 0.29114676173499704, "grad_norm": 1.8516287853410067, "learning_rate": 1.6631595940167416e-05, "loss": 0.3264, "step": 3675 }, { "epoch": 0.2912259853436324, "grad_norm": 2.0458824774406827, "learning_rate": 1.662967510349558e-05, "loss": 0.346, "step": 3676 }, { "epoch": 0.2913052089522678, "grad_norm": 1.9528731325446507, "learning_rate": 1.6627753830291536e-05, "loss": 0.2949, "step": 3677 }, { "epoch": 0.29138443256090313, "grad_norm": 1.9595910522122675, "learning_rate": 1.6625832120681795e-05, "loss": 0.3277, "step": 3678 }, { "epoch": 0.2914636561695385, "grad_norm": 1.7109544432578585, "learning_rate": 1.6623909974792888e-05, "loss": 0.323, "step": 3679 }, { "epoch": 0.2915428797781739, "grad_norm": 1.9111618608442615, "learning_rate": 1.6621987392751385e-05, "loss": 0.3251, "step": 3680 }, { "epoch": 0.2916221033868093, "grad_norm": 1.9444067990918976, "learning_rate": 1.6620064374683874e-05, "loss": 0.2339, "step": 3681 }, { "epoch": 0.29170132699544465, "grad_norm": 1.927498540419202, "learning_rate": 1.6618140920716976e-05, "loss": 0.2686, "step": 3682 }, { "epoch": 0.29178055060408004, "grad_norm": 1.8758224916618815, "learning_rate": 1.6616217030977345e-05, "loss": 0.3062, "step": 3683 }, { "epoch": 0.29185977421271536, "grad_norm": 1.9389046409615305, "learning_rate": 1.6614292705591658e-05, "loss": 0.3173, "step": 3684 }, { "epoch": 0.29193899782135074, "grad_norm": 1.6636008108224105, "learning_rate": 1.6612367944686617e-05, "loss": 0.2208, "step": 3685 }, { "epoch": 0.2920182214299861, "grad_norm": 1.9698010936256172, "learning_rate": 1.6610442748388972e-05, "loss": 0.2766, "step": 3686 }, { "epoch": 0.2920974450386215, "grad_norm": 2.4567989572923685, "learning_rate": 1.6608517116825473e-05, "loss": 0.2604, "step": 3687 }, { "epoch": 0.2921766686472569, "grad_norm": 2.2638678610882215, "learning_rate": 1.6606591050122924e-05, "loss": 0.3228, "step": 3688 }, { "epoch": 0.29225589225589227, "grad_norm": 2.2446395650968203, "learning_rate": 1.660466454840814e-05, "loss": 0.3117, "step": 3689 }, { "epoch": 0.29233511586452765, "grad_norm": 1.847599156969952, "learning_rate": 1.6602737611807975e-05, "loss": 0.2522, "step": 3690 }, { "epoch": 0.292414339473163, "grad_norm": 2.2433120141663165, "learning_rate": 1.660081024044931e-05, "loss": 0.2587, "step": 3691 }, { "epoch": 0.29249356308179836, "grad_norm": 2.2521143359537272, "learning_rate": 1.659888243445905e-05, "loss": 0.3491, "step": 3692 }, { "epoch": 0.29257278669043374, "grad_norm": 2.152709080731402, "learning_rate": 1.6596954193964136e-05, "loss": 0.281, "step": 3693 }, { "epoch": 0.2926520102990691, "grad_norm": 1.9102095064229225, "learning_rate": 1.659502551909153e-05, "loss": 0.2164, "step": 3694 }, { "epoch": 0.2927312339077045, "grad_norm": 1.8576841611316988, "learning_rate": 1.6593096409968227e-05, "loss": 0.3501, "step": 3695 }, { "epoch": 0.2928104575163399, "grad_norm": 2.143174160634584, "learning_rate": 1.6591166866721247e-05, "loss": 0.2585, "step": 3696 }, { "epoch": 0.29288968112497527, "grad_norm": 1.8322238417572019, "learning_rate": 1.658923688947765e-05, "loss": 0.3295, "step": 3697 }, { "epoch": 0.2929689047336106, "grad_norm": 2.188756027766657, "learning_rate": 1.6587306478364502e-05, "loss": 0.2511, "step": 3698 }, { "epoch": 0.293048128342246, "grad_norm": 2.0800860540775936, "learning_rate": 1.658537563350892e-05, "loss": 0.2951, "step": 3699 }, { "epoch": 0.29312735195088135, "grad_norm": 2.120419441630339, "learning_rate": 1.6583444355038042e-05, "loss": 0.3684, "step": 3700 }, { "epoch": 0.29320657555951674, "grad_norm": 1.5402254572628864, "learning_rate": 1.6581512643079028e-05, "loss": 0.1987, "step": 3701 }, { "epoch": 0.2932857991681521, "grad_norm": 1.8139736902630164, "learning_rate": 1.657958049775908e-05, "loss": 0.2584, "step": 3702 }, { "epoch": 0.2933650227767875, "grad_norm": 1.6039216041841664, "learning_rate": 1.6577647919205407e-05, "loss": 0.232, "step": 3703 }, { "epoch": 0.2934442463854229, "grad_norm": 2.230475177625442, "learning_rate": 1.6575714907545272e-05, "loss": 0.3204, "step": 3704 }, { "epoch": 0.2935234699940582, "grad_norm": 1.9072050700736505, "learning_rate": 1.6573781462905954e-05, "loss": 0.2088, "step": 3705 }, { "epoch": 0.2936026936026936, "grad_norm": 2.052871951760179, "learning_rate": 1.6571847585414754e-05, "loss": 0.291, "step": 3706 }, { "epoch": 0.29368191721132897, "grad_norm": 2.04222020295031, "learning_rate": 1.6569913275199013e-05, "loss": 0.3721, "step": 3707 }, { "epoch": 0.29376114081996435, "grad_norm": 1.9967492969852367, "learning_rate": 1.6567978532386094e-05, "loss": 0.2808, "step": 3708 }, { "epoch": 0.29384036442859973, "grad_norm": 2.1316194578421475, "learning_rate": 1.6566043357103393e-05, "loss": 0.2834, "step": 3709 }, { "epoch": 0.2939195880372351, "grad_norm": 2.151667388197471, "learning_rate": 1.656410774947833e-05, "loss": 0.3011, "step": 3710 }, { "epoch": 0.2939988116458705, "grad_norm": 2.0424733100685235, "learning_rate": 1.6562171709638355e-05, "loss": 0.3393, "step": 3711 }, { "epoch": 0.2940780352545058, "grad_norm": 2.2135287829523933, "learning_rate": 1.656023523771095e-05, "loss": 0.3026, "step": 3712 }, { "epoch": 0.2941572588631412, "grad_norm": 1.9148831504492885, "learning_rate": 1.655829833382362e-05, "loss": 0.2804, "step": 3713 }, { "epoch": 0.2942364824717766, "grad_norm": 2.185569236254536, "learning_rate": 1.6556360998103903e-05, "loss": 0.4382, "step": 3714 }, { "epoch": 0.29431570608041197, "grad_norm": 1.9581195776980467, "learning_rate": 1.655442323067936e-05, "loss": 0.2919, "step": 3715 }, { "epoch": 0.29439492968904735, "grad_norm": 2.3721499609991166, "learning_rate": 1.6552485031677586e-05, "loss": 0.3242, "step": 3716 }, { "epoch": 0.29447415329768273, "grad_norm": 1.9665744121968807, "learning_rate": 1.65505464012262e-05, "loss": 0.267, "step": 3717 }, { "epoch": 0.2945533769063181, "grad_norm": 2.041080822940283, "learning_rate": 1.6548607339452853e-05, "loss": 0.2669, "step": 3718 }, { "epoch": 0.29463260051495344, "grad_norm": 1.9415721706418003, "learning_rate": 1.6546667846485224e-05, "loss": 0.3438, "step": 3719 }, { "epoch": 0.2947118241235888, "grad_norm": 1.7190684876015117, "learning_rate": 1.6544727922451014e-05, "loss": 0.2639, "step": 3720 }, { "epoch": 0.2947910477322242, "grad_norm": 2.1085594563983614, "learning_rate": 1.654278756747796e-05, "loss": 0.3244, "step": 3721 }, { "epoch": 0.2948702713408596, "grad_norm": 1.5993923912005714, "learning_rate": 1.6540846781693837e-05, "loss": 0.2671, "step": 3722 }, { "epoch": 0.29494949494949496, "grad_norm": 1.9621948229440052, "learning_rate": 1.6538905565226416e-05, "loss": 0.2675, "step": 3723 }, { "epoch": 0.29502871855813034, "grad_norm": 1.6014012615104973, "learning_rate": 1.6536963918203532e-05, "loss": 0.2627, "step": 3724 }, { "epoch": 0.29510794216676567, "grad_norm": 2.1318802196688704, "learning_rate": 1.6535021840753026e-05, "loss": 0.3222, "step": 3725 }, { "epoch": 0.29518716577540105, "grad_norm": 1.7809032779562033, "learning_rate": 1.6533079333002775e-05, "loss": 0.4058, "step": 3726 }, { "epoch": 0.29526638938403643, "grad_norm": 1.926384773417309, "learning_rate": 1.6531136395080687e-05, "loss": 0.3357, "step": 3727 }, { "epoch": 0.2953456129926718, "grad_norm": 1.8914574602303056, "learning_rate": 1.6529193027114692e-05, "loss": 0.3571, "step": 3728 }, { "epoch": 0.2954248366013072, "grad_norm": 2.068030663470857, "learning_rate": 1.6527249229232754e-05, "loss": 0.3875, "step": 3729 }, { "epoch": 0.2955040602099426, "grad_norm": 1.6786627830455285, "learning_rate": 1.652530500156286e-05, "loss": 0.2264, "step": 3730 }, { "epoch": 0.29558328381857796, "grad_norm": 2.136329576259284, "learning_rate": 1.652336034423303e-05, "loss": 0.4239, "step": 3731 }, { "epoch": 0.2956625074272133, "grad_norm": 1.7843910142418054, "learning_rate": 1.6521415257371312e-05, "loss": 0.2254, "step": 3732 }, { "epoch": 0.29574173103584867, "grad_norm": 2.548053482969655, "learning_rate": 1.6519469741105777e-05, "loss": 0.396, "step": 3733 }, { "epoch": 0.29582095464448405, "grad_norm": 1.8438518991444643, "learning_rate": 1.6517523795564527e-05, "loss": 0.3635, "step": 3734 }, { "epoch": 0.29590017825311943, "grad_norm": 1.8387940819084987, "learning_rate": 1.6515577420875698e-05, "loss": 0.2378, "step": 3735 }, { "epoch": 0.2959794018617548, "grad_norm": 1.8352681078412778, "learning_rate": 1.6513630617167446e-05, "loss": 0.3685, "step": 3736 }, { "epoch": 0.2960586254703902, "grad_norm": 1.8922883255051839, "learning_rate": 1.6511683384567957e-05, "loss": 0.262, "step": 3737 }, { "epoch": 0.2961378490790256, "grad_norm": 1.9879691389536898, "learning_rate": 1.6509735723205453e-05, "loss": 0.2826, "step": 3738 }, { "epoch": 0.2962170726876609, "grad_norm": 2.562638275860836, "learning_rate": 1.6507787633208173e-05, "loss": 0.425, "step": 3739 }, { "epoch": 0.2962962962962963, "grad_norm": 1.8254178860060655, "learning_rate": 1.650583911470439e-05, "loss": 0.2448, "step": 3740 }, { "epoch": 0.29637551990493166, "grad_norm": 2.1200297511576824, "learning_rate": 1.6503890167822406e-05, "loss": 0.3824, "step": 3741 }, { "epoch": 0.29645474351356704, "grad_norm": 2.2952838979787935, "learning_rate": 1.6501940792690547e-05, "loss": 0.2587, "step": 3742 }, { "epoch": 0.2965339671222024, "grad_norm": 2.10791466760142, "learning_rate": 1.6499990989437177e-05, "loss": 0.354, "step": 3743 }, { "epoch": 0.2966131907308378, "grad_norm": 2.179039323045411, "learning_rate": 1.6498040758190673e-05, "loss": 0.2955, "step": 3744 }, { "epoch": 0.2966924143394732, "grad_norm": 1.8339928189125239, "learning_rate": 1.6496090099079452e-05, "loss": 0.2792, "step": 3745 }, { "epoch": 0.2967716379481085, "grad_norm": 1.7664076519134926, "learning_rate": 1.6494139012231954e-05, "loss": 0.2318, "step": 3746 }, { "epoch": 0.2968508615567439, "grad_norm": 2.476074006188833, "learning_rate": 1.6492187497776654e-05, "loss": 0.4235, "step": 3747 }, { "epoch": 0.2969300851653793, "grad_norm": 1.8606425938848785, "learning_rate": 1.6490235555842044e-05, "loss": 0.2253, "step": 3748 }, { "epoch": 0.29700930877401466, "grad_norm": 2.2264343648593554, "learning_rate": 1.6488283186556648e-05, "loss": 0.2951, "step": 3749 }, { "epoch": 0.29708853238265004, "grad_norm": 2.184795345147084, "learning_rate": 1.6486330390049027e-05, "loss": 0.4208, "step": 3750 }, { "epoch": 0.2971677559912854, "grad_norm": 2.941243300926836, "learning_rate": 1.648437716644776e-05, "loss": 0.2681, "step": 3751 }, { "epoch": 0.2972469795999208, "grad_norm": 2.3336301897213256, "learning_rate": 1.6482423515881455e-05, "loss": 0.4012, "step": 3752 }, { "epoch": 0.29732620320855613, "grad_norm": 1.9028148726439165, "learning_rate": 1.6480469438478756e-05, "loss": 0.2722, "step": 3753 }, { "epoch": 0.2974054268171915, "grad_norm": 2.21444199732445, "learning_rate": 1.6478514934368326e-05, "loss": 0.3385, "step": 3754 }, { "epoch": 0.2974846504258269, "grad_norm": 2.1803562965814844, "learning_rate": 1.647656000367886e-05, "loss": 0.2866, "step": 3755 }, { "epoch": 0.2975638740344623, "grad_norm": 2.3484328394467147, "learning_rate": 1.647460464653908e-05, "loss": 0.3471, "step": 3756 }, { "epoch": 0.29764309764309765, "grad_norm": 2.068600645448547, "learning_rate": 1.6472648863077737e-05, "loss": 0.2878, "step": 3757 }, { "epoch": 0.29772232125173304, "grad_norm": 1.8594561751261733, "learning_rate": 1.6470692653423614e-05, "loss": 0.2747, "step": 3758 }, { "epoch": 0.2978015448603684, "grad_norm": 1.9276118550341557, "learning_rate": 1.6468736017705515e-05, "loss": 0.386, "step": 3759 }, { "epoch": 0.29788076846900374, "grad_norm": 2.3310174829201693, "learning_rate": 1.646677895605227e-05, "loss": 0.2957, "step": 3760 }, { "epoch": 0.2979599920776391, "grad_norm": 2.1831717495372347, "learning_rate": 1.6464821468592748e-05, "loss": 0.362, "step": 3761 }, { "epoch": 0.2980392156862745, "grad_norm": 1.803552518183649, "learning_rate": 1.646286355545584e-05, "loss": 0.2752, "step": 3762 }, { "epoch": 0.2981184392949099, "grad_norm": 1.7538268891622637, "learning_rate": 1.6460905216770467e-05, "loss": 0.278, "step": 3763 }, { "epoch": 0.29819766290354527, "grad_norm": 2.091933796751879, "learning_rate": 1.6458946452665573e-05, "loss": 0.3804, "step": 3764 }, { "epoch": 0.29827688651218065, "grad_norm": 1.8890602600421795, "learning_rate": 1.6456987263270132e-05, "loss": 0.3151, "step": 3765 }, { "epoch": 0.298356110120816, "grad_norm": 2.1169401638108316, "learning_rate": 1.645502764871315e-05, "loss": 0.4476, "step": 3766 }, { "epoch": 0.29843533372945136, "grad_norm": 2.01799098792038, "learning_rate": 1.6453067609123656e-05, "loss": 0.3729, "step": 3767 }, { "epoch": 0.29851455733808674, "grad_norm": 1.8230427343156717, "learning_rate": 1.6451107144630708e-05, "loss": 0.3823, "step": 3768 }, { "epoch": 0.2985937809467221, "grad_norm": 2.084563839398148, "learning_rate": 1.6449146255363395e-05, "loss": 0.2655, "step": 3769 }, { "epoch": 0.2986730045553575, "grad_norm": 1.6726094374073701, "learning_rate": 1.6447184941450833e-05, "loss": 0.209, "step": 3770 }, { "epoch": 0.2987522281639929, "grad_norm": 1.7142935177218888, "learning_rate": 1.644522320302217e-05, "loss": 0.3637, "step": 3771 }, { "epoch": 0.29883145177262826, "grad_norm": 2.0405684388935055, "learning_rate": 1.6443261040206566e-05, "loss": 0.2907, "step": 3772 }, { "epoch": 0.2989106753812636, "grad_norm": 2.405996813502771, "learning_rate": 1.6441298453133224e-05, "loss": 0.4359, "step": 3773 }, { "epoch": 0.298989898989899, "grad_norm": 2.0765645921548774, "learning_rate": 1.6439335441931376e-05, "loss": 0.3118, "step": 3774 }, { "epoch": 0.29906912259853435, "grad_norm": 1.6769594478279577, "learning_rate": 1.6437372006730276e-05, "loss": 0.2352, "step": 3775 }, { "epoch": 0.29914834620716974, "grad_norm": 2.144648243792654, "learning_rate": 1.64354081476592e-05, "loss": 0.3472, "step": 3776 }, { "epoch": 0.2992275698158051, "grad_norm": 1.8940517249820277, "learning_rate": 1.643344386484746e-05, "loss": 0.3558, "step": 3777 }, { "epoch": 0.2993067934244405, "grad_norm": 1.9081919128302902, "learning_rate": 1.64314791584244e-05, "loss": 0.3174, "step": 3778 }, { "epoch": 0.2993860170330759, "grad_norm": 2.1805329539112086, "learning_rate": 1.6429514028519383e-05, "loss": 0.2623, "step": 3779 }, { "epoch": 0.2994652406417112, "grad_norm": 1.5074215594426112, "learning_rate": 1.6427548475261807e-05, "loss": 0.2614, "step": 3780 }, { "epoch": 0.2995444642503466, "grad_norm": 2.062324044594821, "learning_rate": 1.642558249878109e-05, "loss": 0.2816, "step": 3781 }, { "epoch": 0.29962368785898197, "grad_norm": 2.176309277440953, "learning_rate": 1.642361609920668e-05, "loss": 0.2924, "step": 3782 }, { "epoch": 0.29970291146761735, "grad_norm": 1.5885254544296088, "learning_rate": 1.6421649276668065e-05, "loss": 0.1966, "step": 3783 }, { "epoch": 0.29978213507625273, "grad_norm": 2.109245576191315, "learning_rate": 1.641968203129474e-05, "loss": 0.326, "step": 3784 }, { "epoch": 0.2998613586848881, "grad_norm": 1.7084069328818037, "learning_rate": 1.641771436321624e-05, "loss": 0.239, "step": 3785 }, { "epoch": 0.2999405822935235, "grad_norm": 2.208359135301702, "learning_rate": 1.6415746272562133e-05, "loss": 0.2518, "step": 3786 }, { "epoch": 0.3000198059021588, "grad_norm": 2.1678587978477712, "learning_rate": 1.6413777759462005e-05, "loss": 0.3437, "step": 3787 }, { "epoch": 0.3000990295107942, "grad_norm": 2.112349183380899, "learning_rate": 1.6411808824045472e-05, "loss": 0.3893, "step": 3788 }, { "epoch": 0.3001782531194296, "grad_norm": 2.1863989847052876, "learning_rate": 1.640983946644218e-05, "loss": 0.3599, "step": 3789 }, { "epoch": 0.30025747672806496, "grad_norm": 2.1255369291140402, "learning_rate": 1.64078696867818e-05, "loss": 0.2255, "step": 3790 }, { "epoch": 0.30033670033670035, "grad_norm": 2.007314992038718, "learning_rate": 1.6405899485194034e-05, "loss": 0.2372, "step": 3791 }, { "epoch": 0.3004159239453357, "grad_norm": 2.101018394324457, "learning_rate": 1.640392886180861e-05, "loss": 0.3408, "step": 3792 }, { "epoch": 0.3004951475539711, "grad_norm": 2.028528609212532, "learning_rate": 1.6401957816755286e-05, "loss": 0.2749, "step": 3793 }, { "epoch": 0.30057437116260644, "grad_norm": 1.9926545159975135, "learning_rate": 1.6399986350163844e-05, "loss": 0.3149, "step": 3794 }, { "epoch": 0.3006535947712418, "grad_norm": 2.055560333375988, "learning_rate": 1.6398014462164093e-05, "loss": 0.2956, "step": 3795 }, { "epoch": 0.3007328183798772, "grad_norm": 2.0950262286077628, "learning_rate": 1.6396042152885874e-05, "loss": 0.2574, "step": 3796 }, { "epoch": 0.3008120419885126, "grad_norm": 1.9932789912831277, "learning_rate": 1.639406942245906e-05, "loss": 0.2998, "step": 3797 }, { "epoch": 0.30089126559714796, "grad_norm": 2.5109742138400444, "learning_rate": 1.639209627101354e-05, "loss": 0.4138, "step": 3798 }, { "epoch": 0.30097048920578334, "grad_norm": 1.9310931595035867, "learning_rate": 1.6390122698679234e-05, "loss": 0.2583, "step": 3799 }, { "epoch": 0.3010497128144187, "grad_norm": 1.9254912693992525, "learning_rate": 1.6388148705586097e-05, "loss": 0.31, "step": 3800 }, { "epoch": 0.30112893642305405, "grad_norm": 1.8593630247204218, "learning_rate": 1.6386174291864106e-05, "loss": 0.2727, "step": 3801 }, { "epoch": 0.30120816003168943, "grad_norm": 2.1329915531258012, "learning_rate": 1.6384199457643264e-05, "loss": 0.3383, "step": 3802 }, { "epoch": 0.3012873836403248, "grad_norm": 1.824526030686308, "learning_rate": 1.6382224203053607e-05, "loss": 0.242, "step": 3803 }, { "epoch": 0.3013666072489602, "grad_norm": 1.8891658764309938, "learning_rate": 1.6380248528225197e-05, "loss": 0.2673, "step": 3804 }, { "epoch": 0.3014458308575956, "grad_norm": 2.113830019227714, "learning_rate": 1.6378272433288122e-05, "loss": 0.2756, "step": 3805 }, { "epoch": 0.30152505446623096, "grad_norm": 1.9963541226080466, "learning_rate": 1.6376295918372495e-05, "loss": 0.384, "step": 3806 }, { "epoch": 0.3016042780748663, "grad_norm": 2.198575301925684, "learning_rate": 1.6374318983608464e-05, "loss": 0.4783, "step": 3807 }, { "epoch": 0.30168350168350166, "grad_norm": 2.0902659992128267, "learning_rate": 1.63723416291262e-05, "loss": 0.3417, "step": 3808 }, { "epoch": 0.30176272529213705, "grad_norm": 1.9773956755356854, "learning_rate": 1.63703638550559e-05, "loss": 0.2949, "step": 3809 }, { "epoch": 0.3018419489007724, "grad_norm": 2.143615139345269, "learning_rate": 1.6368385661527795e-05, "loss": 0.3105, "step": 3810 }, { "epoch": 0.3019211725094078, "grad_norm": 1.9329996022195897, "learning_rate": 1.6366407048672135e-05, "loss": 0.2392, "step": 3811 }, { "epoch": 0.3020003961180432, "grad_norm": 2.576074000705922, "learning_rate": 1.6364428016619202e-05, "loss": 0.2952, "step": 3812 }, { "epoch": 0.30207961972667857, "grad_norm": 1.8022116453183235, "learning_rate": 1.636244856549931e-05, "loss": 0.2539, "step": 3813 }, { "epoch": 0.3021588433353139, "grad_norm": 1.9554280598292493, "learning_rate": 1.6360468695442797e-05, "loss": 0.3017, "step": 3814 }, { "epoch": 0.3022380669439493, "grad_norm": 2.1463715114075987, "learning_rate": 1.6358488406580023e-05, "loss": 0.3389, "step": 3815 }, { "epoch": 0.30231729055258466, "grad_norm": 1.9529828410318093, "learning_rate": 1.635650769904138e-05, "loss": 0.3936, "step": 3816 }, { "epoch": 0.30239651416122004, "grad_norm": 2.2422501080785056, "learning_rate": 1.6354526572957292e-05, "loss": 0.4553, "step": 3817 }, { "epoch": 0.3024757377698554, "grad_norm": 2.0914354626421225, "learning_rate": 1.6352545028458206e-05, "loss": 0.3765, "step": 3818 }, { "epoch": 0.3025549613784908, "grad_norm": 1.772903823894528, "learning_rate": 1.6350563065674596e-05, "loss": 0.2531, "step": 3819 }, { "epoch": 0.3026341849871262, "grad_norm": 1.8396313762919148, "learning_rate": 1.6348580684736962e-05, "loss": 0.344, "step": 3820 }, { "epoch": 0.3027134085957615, "grad_norm": 1.8627998950990017, "learning_rate": 1.6346597885775843e-05, "loss": 0.1968, "step": 3821 }, { "epoch": 0.3027926322043969, "grad_norm": 1.8869015693376312, "learning_rate": 1.6344614668921787e-05, "loss": 0.3799, "step": 3822 }, { "epoch": 0.3028718558130323, "grad_norm": 1.8189324039238206, "learning_rate": 1.6342631034305386e-05, "loss": 0.3318, "step": 3823 }, { "epoch": 0.30295107942166766, "grad_norm": 1.871169467204066, "learning_rate": 1.634064698205725e-05, "loss": 0.2926, "step": 3824 }, { "epoch": 0.30303030303030304, "grad_norm": 1.7389531760214991, "learning_rate": 1.6338662512308013e-05, "loss": 0.2721, "step": 3825 }, { "epoch": 0.3031095266389384, "grad_norm": 1.7984709446641185, "learning_rate": 1.6336677625188357e-05, "loss": 0.2766, "step": 3826 }, { "epoch": 0.3031887502475738, "grad_norm": 1.8646547821936554, "learning_rate": 1.6334692320828968e-05, "loss": 0.2688, "step": 3827 }, { "epoch": 0.3032679738562091, "grad_norm": 1.7287855373854593, "learning_rate": 1.6332706599360568e-05, "loss": 0.2644, "step": 3828 }, { "epoch": 0.3033471974648445, "grad_norm": 1.9694960688593486, "learning_rate": 1.633072046091391e-05, "loss": 0.3001, "step": 3829 }, { "epoch": 0.3034264210734799, "grad_norm": 1.6664756303722617, "learning_rate": 1.6328733905619775e-05, "loss": 0.2419, "step": 3830 }, { "epoch": 0.30350564468211527, "grad_norm": 2.274382029681345, "learning_rate": 1.632674693360896e-05, "loss": 0.3167, "step": 3831 }, { "epoch": 0.30358486829075065, "grad_norm": 1.9274798710578476, "learning_rate": 1.6324759545012306e-05, "loss": 0.3752, "step": 3832 }, { "epoch": 0.30366409189938603, "grad_norm": 1.897628306665924, "learning_rate": 1.6322771739960664e-05, "loss": 0.275, "step": 3833 }, { "epoch": 0.3037433155080214, "grad_norm": 1.5999477754897036, "learning_rate": 1.6320783518584926e-05, "loss": 0.2875, "step": 3834 }, { "epoch": 0.30382253911665674, "grad_norm": 1.8498667124553432, "learning_rate": 1.631879488101601e-05, "loss": 0.4053, "step": 3835 }, { "epoch": 0.3039017627252921, "grad_norm": 1.995589468587847, "learning_rate": 1.6316805827384856e-05, "loss": 0.2893, "step": 3836 }, { "epoch": 0.3039809863339275, "grad_norm": 2.803428337819996, "learning_rate": 1.631481635782243e-05, "loss": 0.3647, "step": 3837 }, { "epoch": 0.3040602099425629, "grad_norm": 1.6973795930868885, "learning_rate": 1.631282647245973e-05, "loss": 0.2832, "step": 3838 }, { "epoch": 0.30413943355119827, "grad_norm": 2.449413131235091, "learning_rate": 1.6310836171427788e-05, "loss": 0.272, "step": 3839 }, { "epoch": 0.30421865715983365, "grad_norm": 2.160290289403697, "learning_rate": 1.6308845454857647e-05, "loss": 0.3483, "step": 3840 }, { "epoch": 0.30429788076846903, "grad_norm": 1.4587392739605265, "learning_rate": 1.6306854322880386e-05, "loss": 0.2479, "step": 3841 }, { "epoch": 0.30437710437710436, "grad_norm": 1.9545761659315637, "learning_rate": 1.630486277562712e-05, "loss": 0.361, "step": 3842 }, { "epoch": 0.30445632798573974, "grad_norm": 2.2952174193052786, "learning_rate": 1.6302870813228974e-05, "loss": 0.3846, "step": 3843 }, { "epoch": 0.3045355515943751, "grad_norm": 1.959285552471317, "learning_rate": 1.6300878435817115e-05, "loss": 0.3198, "step": 3844 }, { "epoch": 0.3046147752030105, "grad_norm": 2.0954925150260677, "learning_rate": 1.6298885643522724e-05, "loss": 0.3323, "step": 3845 }, { "epoch": 0.3046939988116459, "grad_norm": 1.5629762722753553, "learning_rate": 1.6296892436477024e-05, "loss": 0.2023, "step": 3846 }, { "epoch": 0.30477322242028126, "grad_norm": 1.880133908214487, "learning_rate": 1.6294898814811258e-05, "loss": 0.2758, "step": 3847 }, { "epoch": 0.3048524460289166, "grad_norm": 2.136160045692224, "learning_rate": 1.629290477865669e-05, "loss": 0.3238, "step": 3848 }, { "epoch": 0.30493166963755197, "grad_norm": 2.160497272247849, "learning_rate": 1.6290910328144627e-05, "loss": 0.3461, "step": 3849 }, { "epoch": 0.30501089324618735, "grad_norm": 2.421538222789173, "learning_rate": 1.6288915463406386e-05, "loss": 0.2588, "step": 3850 }, { "epoch": 0.30509011685482273, "grad_norm": 1.9690331361034967, "learning_rate": 1.6286920184573324e-05, "loss": 0.3828, "step": 3851 }, { "epoch": 0.3051693404634581, "grad_norm": 1.7584469761187513, "learning_rate": 1.6284924491776815e-05, "loss": 0.2607, "step": 3852 }, { "epoch": 0.3052485640720935, "grad_norm": 2.243330961403839, "learning_rate": 1.6282928385148273e-05, "loss": 0.4174, "step": 3853 }, { "epoch": 0.3053277876807289, "grad_norm": 1.8479340977378784, "learning_rate": 1.6280931864819125e-05, "loss": 0.2729, "step": 3854 }, { "epoch": 0.3054070112893642, "grad_norm": 2.1521448731130217, "learning_rate": 1.6278934930920834e-05, "loss": 0.4796, "step": 3855 }, { "epoch": 0.3054862348979996, "grad_norm": 1.6216592532715903, "learning_rate": 1.6276937583584895e-05, "loss": 0.2868, "step": 3856 }, { "epoch": 0.30556545850663497, "grad_norm": 1.7803228248602432, "learning_rate": 1.6274939822942818e-05, "loss": 0.3352, "step": 3857 }, { "epoch": 0.30564468211527035, "grad_norm": 2.167989785038989, "learning_rate": 1.6272941649126146e-05, "loss": 0.3192, "step": 3858 }, { "epoch": 0.30572390572390573, "grad_norm": 2.2836151215559877, "learning_rate": 1.627094306226645e-05, "loss": 0.4233, "step": 3859 }, { "epoch": 0.3058031293325411, "grad_norm": 1.7829398001596515, "learning_rate": 1.6268944062495324e-05, "loss": 0.4216, "step": 3860 }, { "epoch": 0.3058823529411765, "grad_norm": 2.3748966718491284, "learning_rate": 1.62669446499444e-05, "loss": 0.3166, "step": 3861 }, { "epoch": 0.3059615765498118, "grad_norm": 1.957610300936025, "learning_rate": 1.6264944824745326e-05, "loss": 0.355, "step": 3862 }, { "epoch": 0.3060408001584472, "grad_norm": 1.9293191358750181, "learning_rate": 1.6262944587029777e-05, "loss": 0.3151, "step": 3863 }, { "epoch": 0.3061200237670826, "grad_norm": 1.9989718286163354, "learning_rate": 1.6260943936929462e-05, "loss": 0.3056, "step": 3864 }, { "epoch": 0.30619924737571796, "grad_norm": 2.1499105687490885, "learning_rate": 1.6258942874576117e-05, "loss": 0.3503, "step": 3865 }, { "epoch": 0.30627847098435335, "grad_norm": 2.8254402425089244, "learning_rate": 1.62569414001015e-05, "loss": 0.3933, "step": 3866 }, { "epoch": 0.3063576945929887, "grad_norm": 2.1122167124721005, "learning_rate": 1.6254939513637397e-05, "loss": 0.2725, "step": 3867 }, { "epoch": 0.3064369182016241, "grad_norm": 1.40117053660708, "learning_rate": 1.6252937215315622e-05, "loss": 0.2355, "step": 3868 }, { "epoch": 0.30651614181025943, "grad_norm": 2.6993801709257004, "learning_rate": 1.6250934505268025e-05, "loss": 0.2807, "step": 3869 }, { "epoch": 0.3065953654188948, "grad_norm": 2.0841346484136425, "learning_rate": 1.6248931383626464e-05, "loss": 0.3336, "step": 3870 }, { "epoch": 0.3066745890275302, "grad_norm": 2.2435399692401563, "learning_rate": 1.6246927850522837e-05, "loss": 0.3394, "step": 3871 }, { "epoch": 0.3067538126361656, "grad_norm": 1.9419838099242361, "learning_rate": 1.624492390608907e-05, "loss": 0.2795, "step": 3872 }, { "epoch": 0.30683303624480096, "grad_norm": 2.26851489961783, "learning_rate": 1.6242919550457116e-05, "loss": 0.2961, "step": 3873 }, { "epoch": 0.30691225985343634, "grad_norm": 1.8683617538131634, "learning_rate": 1.6240914783758946e-05, "loss": 0.2729, "step": 3874 }, { "epoch": 0.3069914834620717, "grad_norm": 2.577863566015914, "learning_rate": 1.6238909606126568e-05, "loss": 0.4546, "step": 3875 }, { "epoch": 0.30707070707070705, "grad_norm": 1.614941693031965, "learning_rate": 1.6236904017692016e-05, "loss": 0.2562, "step": 3876 }, { "epoch": 0.30714993067934243, "grad_norm": 1.7471133075748322, "learning_rate": 1.6234898018587336e-05, "loss": 0.1715, "step": 3877 }, { "epoch": 0.3072291542879778, "grad_norm": 2.0440620824160054, "learning_rate": 1.6232891608944627e-05, "loss": 0.4638, "step": 3878 }, { "epoch": 0.3073083778966132, "grad_norm": 2.211795620200163, "learning_rate": 1.6230884788895998e-05, "loss": 0.324, "step": 3879 }, { "epoch": 0.3073876015052486, "grad_norm": 2.123843471148046, "learning_rate": 1.622887755857358e-05, "loss": 0.4222, "step": 3880 }, { "epoch": 0.30746682511388396, "grad_norm": 2.3514130537858247, "learning_rate": 1.6226869918109553e-05, "loss": 0.3576, "step": 3881 }, { "epoch": 0.30754604872251934, "grad_norm": 1.9322503985319897, "learning_rate": 1.62248618676361e-05, "loss": 0.3345, "step": 3882 }, { "epoch": 0.30762527233115466, "grad_norm": 2.412672439022028, "learning_rate": 1.6222853407285447e-05, "loss": 0.3246, "step": 3883 }, { "epoch": 0.30770449593979005, "grad_norm": 1.8907797274226992, "learning_rate": 1.622084453718984e-05, "loss": 0.2687, "step": 3884 }, { "epoch": 0.3077837195484254, "grad_norm": 1.9689228976989726, "learning_rate": 1.621883525748155e-05, "loss": 0.421, "step": 3885 }, { "epoch": 0.3078629431570608, "grad_norm": 1.7495566011511918, "learning_rate": 1.6216825568292885e-05, "loss": 0.2069, "step": 3886 }, { "epoch": 0.3079421667656962, "grad_norm": 2.6398760013938696, "learning_rate": 1.6214815469756165e-05, "loss": 0.3931, "step": 3887 }, { "epoch": 0.30802139037433157, "grad_norm": 1.9719950123547463, "learning_rate": 1.6212804962003757e-05, "loss": 0.2859, "step": 3888 }, { "epoch": 0.3081006139829669, "grad_norm": 1.940671676172438, "learning_rate": 1.6210794045168033e-05, "loss": 0.4319, "step": 3889 }, { "epoch": 0.3081798375916023, "grad_norm": 2.152809329782499, "learning_rate": 1.6208782719381403e-05, "loss": 0.3564, "step": 3890 }, { "epoch": 0.30825906120023766, "grad_norm": 1.9770811974953582, "learning_rate": 1.6206770984776307e-05, "loss": 0.3088, "step": 3891 }, { "epoch": 0.30833828480887304, "grad_norm": 1.9141487175381255, "learning_rate": 1.620475884148521e-05, "loss": 0.3333, "step": 3892 }, { "epoch": 0.3084175084175084, "grad_norm": 1.7703408511480312, "learning_rate": 1.6202746289640594e-05, "loss": 0.2357, "step": 3893 }, { "epoch": 0.3084967320261438, "grad_norm": 1.9516593716941046, "learning_rate": 1.620073332937498e-05, "loss": 0.3622, "step": 3894 }, { "epoch": 0.3085759556347792, "grad_norm": 1.9480693322190266, "learning_rate": 1.6198719960820917e-05, "loss": 0.2391, "step": 3895 }, { "epoch": 0.3086551792434145, "grad_norm": 1.674640942011194, "learning_rate": 1.619670618411097e-05, "loss": 0.3304, "step": 3896 }, { "epoch": 0.3087344028520499, "grad_norm": 1.7951846188338794, "learning_rate": 1.6194691999377736e-05, "loss": 0.2428, "step": 3897 }, { "epoch": 0.3088136264606853, "grad_norm": 2.1378042822837577, "learning_rate": 1.619267740675384e-05, "loss": 0.3088, "step": 3898 }, { "epoch": 0.30889285006932066, "grad_norm": 1.5547112546564361, "learning_rate": 1.6190662406371937e-05, "loss": 0.2087, "step": 3899 }, { "epoch": 0.30897207367795604, "grad_norm": 1.9005705215609991, "learning_rate": 1.6188646998364703e-05, "loss": 0.3378, "step": 3900 }, { "epoch": 0.3090512972865914, "grad_norm": 1.8001390377069098, "learning_rate": 1.6186631182864835e-05, "loss": 0.2601, "step": 3901 }, { "epoch": 0.3091305208952268, "grad_norm": 2.177327509178617, "learning_rate": 1.6184614960005078e-05, "loss": 0.2658, "step": 3902 }, { "epoch": 0.3092097445038621, "grad_norm": 2.5743060656250085, "learning_rate": 1.6182598329918185e-05, "loss": 0.2972, "step": 3903 }, { "epoch": 0.3092889681124975, "grad_norm": 1.9088422755694918, "learning_rate": 1.6180581292736938e-05, "loss": 0.2194, "step": 3904 }, { "epoch": 0.3093681917211329, "grad_norm": 1.671705802554036, "learning_rate": 1.617856384859415e-05, "loss": 0.2853, "step": 3905 }, { "epoch": 0.30944741532976827, "grad_norm": 1.7661791742375041, "learning_rate": 1.6176545997622662e-05, "loss": 0.1881, "step": 3906 }, { "epoch": 0.30952663893840365, "grad_norm": 1.783243911099805, "learning_rate": 1.6174527739955345e-05, "loss": 0.3599, "step": 3907 }, { "epoch": 0.30960586254703903, "grad_norm": 1.8924379009781485, "learning_rate": 1.6172509075725084e-05, "loss": 0.2983, "step": 3908 }, { "epoch": 0.3096850861556744, "grad_norm": 1.9509820560545892, "learning_rate": 1.61704900050648e-05, "loss": 0.2865, "step": 3909 }, { "epoch": 0.30976430976430974, "grad_norm": 2.156195049673379, "learning_rate": 1.616847052810744e-05, "loss": 0.4085, "step": 3910 }, { "epoch": 0.3098435333729451, "grad_norm": 1.9165157866369644, "learning_rate": 1.6166450644985975e-05, "loss": 0.3005, "step": 3911 }, { "epoch": 0.3099227569815805, "grad_norm": 1.6751328481165728, "learning_rate": 1.6164430355833407e-05, "loss": 0.261, "step": 3912 }, { "epoch": 0.3100019805902159, "grad_norm": 1.9158640865268861, "learning_rate": 1.616240966078276e-05, "loss": 0.293, "step": 3913 }, { "epoch": 0.31008120419885127, "grad_norm": 2.0599838634663152, "learning_rate": 1.616038855996709e-05, "loss": 0.2297, "step": 3914 }, { "epoch": 0.31016042780748665, "grad_norm": 2.4528871772281358, "learning_rate": 1.6158367053519476e-05, "loss": 0.2641, "step": 3915 }, { "epoch": 0.31023965141612203, "grad_norm": 1.9193754673955372, "learning_rate": 1.6156345141573022e-05, "loss": 0.3411, "step": 3916 }, { "epoch": 0.31031887502475736, "grad_norm": 2.358475031590979, "learning_rate": 1.6154322824260865e-05, "loss": 0.4313, "step": 3917 }, { "epoch": 0.31039809863339274, "grad_norm": 2.0051862836262684, "learning_rate": 1.615230010171616e-05, "loss": 0.3042, "step": 3918 }, { "epoch": 0.3104773222420281, "grad_norm": 1.8257439127886896, "learning_rate": 1.61502769740721e-05, "loss": 0.345, "step": 3919 }, { "epoch": 0.3105565458506635, "grad_norm": 1.7222303158122967, "learning_rate": 1.6148253441461887e-05, "loss": 0.2583, "step": 3920 }, { "epoch": 0.3106357694592989, "grad_norm": 1.8354109064706357, "learning_rate": 1.6146229504018777e-05, "loss": 0.3348, "step": 3921 }, { "epoch": 0.31071499306793426, "grad_norm": 1.9938417547079281, "learning_rate": 1.6144205161876023e-05, "loss": 0.3578, "step": 3922 }, { "epoch": 0.3107942166765696, "grad_norm": 1.9080262819362557, "learning_rate": 1.6142180415166926e-05, "loss": 0.2521, "step": 3923 }, { "epoch": 0.31087344028520497, "grad_norm": 2.191058427817248, "learning_rate": 1.61401552640248e-05, "loss": 0.3704, "step": 3924 }, { "epoch": 0.31095266389384035, "grad_norm": 1.94213503263778, "learning_rate": 1.6138129708582996e-05, "loss": 0.2344, "step": 3925 }, { "epoch": 0.31103188750247573, "grad_norm": 1.6868327814697108, "learning_rate": 1.6136103748974885e-05, "loss": 0.2173, "step": 3926 }, { "epoch": 0.3111111111111111, "grad_norm": 2.2202989322696167, "learning_rate": 1.6134077385333867e-05, "loss": 0.2989, "step": 3927 }, { "epoch": 0.3111903347197465, "grad_norm": 1.6816314287567677, "learning_rate": 1.613205061779337e-05, "loss": 0.2768, "step": 3928 }, { "epoch": 0.3112695583283819, "grad_norm": 1.9382536324508184, "learning_rate": 1.6130023446486844e-05, "loss": 0.3009, "step": 3929 }, { "epoch": 0.3113487819370172, "grad_norm": 2.24667938949898, "learning_rate": 1.612799587154777e-05, "loss": 0.3958, "step": 3930 }, { "epoch": 0.3114280055456526, "grad_norm": 1.7925720803306824, "learning_rate": 1.6125967893109657e-05, "loss": 0.2506, "step": 3931 }, { "epoch": 0.31150722915428797, "grad_norm": 2.0923067316498587, "learning_rate": 1.6123939511306028e-05, "loss": 0.2696, "step": 3932 }, { "epoch": 0.31158645276292335, "grad_norm": 1.8367898449378253, "learning_rate": 1.6121910726270453e-05, "loss": 0.3175, "step": 3933 }, { "epoch": 0.31166567637155873, "grad_norm": 2.1286210925687064, "learning_rate": 1.6119881538136514e-05, "loss": 0.3189, "step": 3934 }, { "epoch": 0.3117448999801941, "grad_norm": 1.7267476988139265, "learning_rate": 1.611785194703782e-05, "loss": 0.287, "step": 3935 }, { "epoch": 0.3118241235888295, "grad_norm": 1.7876459912399263, "learning_rate": 1.6115821953108015e-05, "loss": 0.2529, "step": 3936 }, { "epoch": 0.3119033471974648, "grad_norm": 2.1558750922097656, "learning_rate": 1.611379155648076e-05, "loss": 0.3349, "step": 3937 }, { "epoch": 0.3119825708061002, "grad_norm": 1.7851203493784387, "learning_rate": 1.611176075728975e-05, "loss": 0.257, "step": 3938 }, { "epoch": 0.3120617944147356, "grad_norm": 2.283855374530786, "learning_rate": 1.61097295556687e-05, "loss": 0.3111, "step": 3939 }, { "epoch": 0.31214101802337096, "grad_norm": 1.7939502898337396, "learning_rate": 1.610769795175136e-05, "loss": 0.241, "step": 3940 }, { "epoch": 0.31222024163200635, "grad_norm": 1.6231241676865509, "learning_rate": 1.6105665945671497e-05, "loss": 0.1799, "step": 3941 }, { "epoch": 0.3122994652406417, "grad_norm": 2.352700261299547, "learning_rate": 1.610363353756291e-05, "loss": 0.3384, "step": 3942 }, { "epoch": 0.3123786888492771, "grad_norm": 1.4658020470523758, "learning_rate": 1.6101600727559423e-05, "loss": 0.1937, "step": 3943 }, { "epoch": 0.31245791245791243, "grad_norm": 2.1586367091409446, "learning_rate": 1.6099567515794886e-05, "loss": 0.2896, "step": 3944 }, { "epoch": 0.3125371360665478, "grad_norm": 1.837981925044546, "learning_rate": 1.609753390240318e-05, "loss": 0.1941, "step": 3945 }, { "epoch": 0.3126163596751832, "grad_norm": 1.8579657389698425, "learning_rate": 1.6095499887518204e-05, "loss": 0.2319, "step": 3946 }, { "epoch": 0.3126955832838186, "grad_norm": 1.8232280004988357, "learning_rate": 1.6093465471273894e-05, "loss": 0.374, "step": 3947 }, { "epoch": 0.31277480689245396, "grad_norm": 1.8727934165329296, "learning_rate": 1.60914306538042e-05, "loss": 0.2962, "step": 3948 }, { "epoch": 0.31285403050108934, "grad_norm": 2.5372307999468893, "learning_rate": 1.6089395435243105e-05, "loss": 0.4109, "step": 3949 }, { "epoch": 0.3129332541097247, "grad_norm": 1.9742757797484873, "learning_rate": 1.6087359815724623e-05, "loss": 0.3543, "step": 3950 }, { "epoch": 0.31301247771836005, "grad_norm": 1.8996848433813027, "learning_rate": 1.6085323795382785e-05, "loss": 0.2593, "step": 3951 }, { "epoch": 0.31309170132699543, "grad_norm": 1.8096764407408223, "learning_rate": 1.608328737435166e-05, "loss": 0.2937, "step": 3952 }, { "epoch": 0.3131709249356308, "grad_norm": 1.9350111810145432, "learning_rate": 1.608125055276533e-05, "loss": 0.2628, "step": 3953 }, { "epoch": 0.3132501485442662, "grad_norm": 2.1173831719090765, "learning_rate": 1.607921333075791e-05, "loss": 0.2997, "step": 3954 }, { "epoch": 0.3133293721529016, "grad_norm": 3.1258977917392827, "learning_rate": 1.607717570846355e-05, "loss": 0.4694, "step": 3955 }, { "epoch": 0.31340859576153696, "grad_norm": 1.9002696716607135, "learning_rate": 1.6075137686016408e-05, "loss": 0.2501, "step": 3956 }, { "epoch": 0.31348781937017234, "grad_norm": 2.116947415993789, "learning_rate": 1.6073099263550677e-05, "loss": 0.2908, "step": 3957 }, { "epoch": 0.31356704297880766, "grad_norm": 1.8899645168620067, "learning_rate": 1.6071060441200587e-05, "loss": 0.2369, "step": 3958 }, { "epoch": 0.31364626658744305, "grad_norm": 2.308064173911316, "learning_rate": 1.6069021219100375e-05, "loss": 0.3566, "step": 3959 }, { "epoch": 0.3137254901960784, "grad_norm": 1.7959416113274134, "learning_rate": 1.606698159738432e-05, "loss": 0.2439, "step": 3960 }, { "epoch": 0.3138047138047138, "grad_norm": 1.8275297598982878, "learning_rate": 1.606494157618672e-05, "loss": 0.3451, "step": 3961 }, { "epoch": 0.3138839374133492, "grad_norm": 1.5524191180813574, "learning_rate": 1.60629011556419e-05, "loss": 0.2685, "step": 3962 }, { "epoch": 0.31396316102198457, "grad_norm": 1.9748588124929398, "learning_rate": 1.6060860335884208e-05, "loss": 0.2453, "step": 3963 }, { "epoch": 0.3140423846306199, "grad_norm": 2.053769344164576, "learning_rate": 1.605881911704803e-05, "loss": 0.3235, "step": 3964 }, { "epoch": 0.3141216082392553, "grad_norm": 2.224374890468355, "learning_rate": 1.6056777499267764e-05, "loss": 0.3611, "step": 3965 }, { "epoch": 0.31420083184789066, "grad_norm": 1.789999684107788, "learning_rate": 1.6054735482677842e-05, "loss": 0.3265, "step": 3966 }, { "epoch": 0.31428005545652604, "grad_norm": 1.798907961784022, "learning_rate": 1.6052693067412724e-05, "loss": 0.2314, "step": 3967 }, { "epoch": 0.3143592790651614, "grad_norm": 1.6865712013244745, "learning_rate": 1.605065025360689e-05, "loss": 0.2424, "step": 3968 }, { "epoch": 0.3144385026737968, "grad_norm": 1.8333734531802057, "learning_rate": 1.6048607041394856e-05, "loss": 0.2395, "step": 3969 }, { "epoch": 0.3145177262824322, "grad_norm": 1.7631459514094503, "learning_rate": 1.6046563430911148e-05, "loss": 0.2464, "step": 3970 }, { "epoch": 0.3145969498910675, "grad_norm": 2.3856367608051667, "learning_rate": 1.6044519422290333e-05, "loss": 0.3064, "step": 3971 }, { "epoch": 0.3146761734997029, "grad_norm": 1.7766065061498073, "learning_rate": 1.6042475015666995e-05, "loss": 0.2692, "step": 3972 }, { "epoch": 0.3147553971083383, "grad_norm": 1.6779049584215282, "learning_rate": 1.604043021117575e-05, "loss": 0.2584, "step": 3973 }, { "epoch": 0.31483462071697366, "grad_norm": 1.8893508351771968, "learning_rate": 1.603838500895125e-05, "loss": 0.2939, "step": 3974 }, { "epoch": 0.31491384432560904, "grad_norm": 2.3084413715650163, "learning_rate": 1.6036339409128146e-05, "loss": 0.3975, "step": 3975 }, { "epoch": 0.3149930679342444, "grad_norm": 2.1433616849251456, "learning_rate": 1.603429341184114e-05, "loss": 0.2674, "step": 3976 }, { "epoch": 0.3150722915428798, "grad_norm": 2.230723648052661, "learning_rate": 1.6032247017224944e-05, "loss": 0.2977, "step": 3977 }, { "epoch": 0.3151515151515151, "grad_norm": 1.9179401943762864, "learning_rate": 1.603020022541431e-05, "loss": 0.211, "step": 3978 }, { "epoch": 0.3152307387601505, "grad_norm": 1.9089046256172308, "learning_rate": 1.6028153036544005e-05, "loss": 0.34, "step": 3979 }, { "epoch": 0.3153099623687859, "grad_norm": 1.9960431333738415, "learning_rate": 1.6026105450748826e-05, "loss": 0.3704, "step": 3980 }, { "epoch": 0.31538918597742127, "grad_norm": 2.4076080649003684, "learning_rate": 1.6024057468163604e-05, "loss": 0.1812, "step": 3981 }, { "epoch": 0.31546840958605665, "grad_norm": 1.8427481180961933, "learning_rate": 1.602200908892318e-05, "loss": 0.248, "step": 3982 }, { "epoch": 0.31554763319469203, "grad_norm": 2.1798891995769956, "learning_rate": 1.6019960313162436e-05, "loss": 0.2334, "step": 3983 }, { "epoch": 0.3156268568033274, "grad_norm": 1.974442811407452, "learning_rate": 1.601791114101627e-05, "loss": 0.2711, "step": 3984 }, { "epoch": 0.31570608041196274, "grad_norm": 2.175938830014821, "learning_rate": 1.6015861572619612e-05, "loss": 0.3338, "step": 3985 }, { "epoch": 0.3157853040205981, "grad_norm": 2.6222181513096694, "learning_rate": 1.6013811608107415e-05, "loss": 0.3487, "step": 3986 }, { "epoch": 0.3158645276292335, "grad_norm": 2.20789515760567, "learning_rate": 1.6011761247614664e-05, "loss": 0.2957, "step": 3987 }, { "epoch": 0.3159437512378689, "grad_norm": 1.8943652597393255, "learning_rate": 1.600971049127636e-05, "loss": 0.2472, "step": 3988 }, { "epoch": 0.31602297484650427, "grad_norm": 2.024566237756117, "learning_rate": 1.6007659339227534e-05, "loss": 0.2642, "step": 3989 }, { "epoch": 0.31610219845513965, "grad_norm": 1.944313898948442, "learning_rate": 1.6005607791603247e-05, "loss": 0.2307, "step": 3990 }, { "epoch": 0.31618142206377503, "grad_norm": 1.9427822102582424, "learning_rate": 1.6003555848538586e-05, "loss": 0.3385, "step": 3991 }, { "epoch": 0.31626064567241036, "grad_norm": 4.066136665150959, "learning_rate": 1.600150351016866e-05, "loss": 0.2736, "step": 3992 }, { "epoch": 0.31633986928104574, "grad_norm": 1.92154477044616, "learning_rate": 1.5999450776628607e-05, "loss": 0.3779, "step": 3993 }, { "epoch": 0.3164190928896811, "grad_norm": 1.9335814823070883, "learning_rate": 1.5997397648053587e-05, "loss": 0.3789, "step": 3994 }, { "epoch": 0.3164983164983165, "grad_norm": 1.728191793993343, "learning_rate": 1.599534412457879e-05, "loss": 0.2616, "step": 3995 }, { "epoch": 0.3165775401069519, "grad_norm": 2.81413102449859, "learning_rate": 1.5993290206339426e-05, "loss": 0.3215, "step": 3996 }, { "epoch": 0.31665676371558726, "grad_norm": 2.3641176582933587, "learning_rate": 1.5991235893470745e-05, "loss": 0.3937, "step": 3997 }, { "epoch": 0.31673598732422265, "grad_norm": 1.9101103998982618, "learning_rate": 1.5989181186108003e-05, "loss": 0.3124, "step": 3998 }, { "epoch": 0.31681521093285797, "grad_norm": 1.7561473454114287, "learning_rate": 1.59871260843865e-05, "loss": 0.2864, "step": 3999 }, { "epoch": 0.31689443454149335, "grad_norm": 1.8793343555809265, "learning_rate": 1.5985070588441556e-05, "loss": 0.3111, "step": 4000 }, { "epoch": 0.31697365815012873, "grad_norm": 1.7717228214469674, "learning_rate": 1.598301469840851e-05, "loss": 0.3065, "step": 4001 }, { "epoch": 0.3170528817587641, "grad_norm": 2.151955626117451, "learning_rate": 1.598095841442273e-05, "loss": 0.2503, "step": 4002 }, { "epoch": 0.3171321053673995, "grad_norm": 1.9747632321716406, "learning_rate": 1.5978901736619624e-05, "loss": 0.2794, "step": 4003 }, { "epoch": 0.3172113289760349, "grad_norm": 1.90056694755564, "learning_rate": 1.5976844665134607e-05, "loss": 0.3375, "step": 4004 }, { "epoch": 0.3172905525846702, "grad_norm": 1.8978748209876632, "learning_rate": 1.5974787200103124e-05, "loss": 0.3145, "step": 4005 }, { "epoch": 0.3173697761933056, "grad_norm": 2.1567993648396797, "learning_rate": 1.5972729341660653e-05, "loss": 0.396, "step": 4006 }, { "epoch": 0.31744899980194097, "grad_norm": 1.9267914672126427, "learning_rate": 1.597067108994269e-05, "loss": 0.2758, "step": 4007 }, { "epoch": 0.31752822341057635, "grad_norm": 1.5611573646467702, "learning_rate": 1.5968612445084773e-05, "loss": 0.2299, "step": 4008 }, { "epoch": 0.31760744701921173, "grad_norm": 2.361271594463766, "learning_rate": 1.596655340722244e-05, "loss": 0.3738, "step": 4009 }, { "epoch": 0.3176866706278471, "grad_norm": 2.068419813892609, "learning_rate": 1.5964493976491278e-05, "loss": 0.2954, "step": 4010 }, { "epoch": 0.3177658942364825, "grad_norm": 1.870546204783706, "learning_rate": 1.5962434153026884e-05, "loss": 0.2908, "step": 4011 }, { "epoch": 0.3178451178451178, "grad_norm": 1.9475016898043347, "learning_rate": 1.596037393696489e-05, "loss": 0.3117, "step": 4012 }, { "epoch": 0.3179243414537532, "grad_norm": 2.2225880283071118, "learning_rate": 1.5958313328440954e-05, "loss": 0.2465, "step": 4013 }, { "epoch": 0.3180035650623886, "grad_norm": 2.4470109336711148, "learning_rate": 1.595625232759076e-05, "loss": 0.5933, "step": 4014 }, { "epoch": 0.31808278867102396, "grad_norm": 1.3762857094268033, "learning_rate": 1.595419093455e-05, "loss": 0.2015, "step": 4015 }, { "epoch": 0.31816201227965935, "grad_norm": 2.3033690119405095, "learning_rate": 1.5952129149454422e-05, "loss": 0.4467, "step": 4016 }, { "epoch": 0.3182412358882947, "grad_norm": 1.7167589765461668, "learning_rate": 1.595006697243978e-05, "loss": 0.2681, "step": 4017 }, { "epoch": 0.3183204594969301, "grad_norm": 1.9150054326315642, "learning_rate": 1.5948004403641853e-05, "loss": 0.298, "step": 4018 }, { "epoch": 0.31839968310556543, "grad_norm": 2.3981098079412804, "learning_rate": 1.594594144319646e-05, "loss": 0.3773, "step": 4019 }, { "epoch": 0.3184789067142008, "grad_norm": 2.0250177617565144, "learning_rate": 1.594387809123943e-05, "loss": 0.2536, "step": 4020 }, { "epoch": 0.3185581303228362, "grad_norm": 1.9640868143463204, "learning_rate": 1.594181434790663e-05, "loss": 0.2815, "step": 4021 }, { "epoch": 0.3186373539314716, "grad_norm": 2.139947631377795, "learning_rate": 1.5939750213333948e-05, "loss": 0.3134, "step": 4022 }, { "epoch": 0.31871657754010696, "grad_norm": 1.801002495716628, "learning_rate": 1.593768568765729e-05, "loss": 0.2418, "step": 4023 }, { "epoch": 0.31879580114874234, "grad_norm": 1.8599436405834358, "learning_rate": 1.5935620771012603e-05, "loss": 0.3237, "step": 4024 }, { "epoch": 0.3188750247573777, "grad_norm": 2.0256314102735753, "learning_rate": 1.5933555463535846e-05, "loss": 0.2543, "step": 4025 }, { "epoch": 0.31895424836601305, "grad_norm": 2.052342543353026, "learning_rate": 1.5931489765363014e-05, "loss": 0.2433, "step": 4026 }, { "epoch": 0.31903347197464843, "grad_norm": 2.5196943939498806, "learning_rate": 1.592942367663012e-05, "loss": 0.3964, "step": 4027 }, { "epoch": 0.3191126955832838, "grad_norm": 2.8422374135487796, "learning_rate": 1.5927357197473207e-05, "loss": 0.3734, "step": 4028 }, { "epoch": 0.3191919191919192, "grad_norm": 2.126178226411484, "learning_rate": 1.5925290328028346e-05, "loss": 0.2419, "step": 4029 }, { "epoch": 0.3192711428005546, "grad_norm": 2.110679173449746, "learning_rate": 1.5923223068431626e-05, "loss": 0.3075, "step": 4030 }, { "epoch": 0.31935036640918996, "grad_norm": 2.0639564225578164, "learning_rate": 1.592115541881917e-05, "loss": 0.3479, "step": 4031 }, { "epoch": 0.31942959001782534, "grad_norm": 2.371948417427405, "learning_rate": 1.5919087379327116e-05, "loss": 0.3406, "step": 4032 }, { "epoch": 0.31950881362646066, "grad_norm": 1.6725567352261204, "learning_rate": 1.5917018950091642e-05, "loss": 0.2522, "step": 4033 }, { "epoch": 0.31958803723509605, "grad_norm": 1.8444118252975636, "learning_rate": 1.591495013124894e-05, "loss": 0.3204, "step": 4034 }, { "epoch": 0.3196672608437314, "grad_norm": 2.122041391801781, "learning_rate": 1.591288092293523e-05, "loss": 0.3741, "step": 4035 }, { "epoch": 0.3197464844523668, "grad_norm": 2.1362290050951946, "learning_rate": 1.5910811325286768e-05, "loss": 0.4184, "step": 4036 }, { "epoch": 0.3198257080610022, "grad_norm": 1.6414471857008746, "learning_rate": 1.5908741338439818e-05, "loss": 0.237, "step": 4037 }, { "epoch": 0.31990493166963757, "grad_norm": 2.49435095596472, "learning_rate": 1.5906670962530683e-05, "loss": 0.3213, "step": 4038 }, { "epoch": 0.31998415527827295, "grad_norm": 1.6601537654362195, "learning_rate": 1.5904600197695684e-05, "loss": 0.2623, "step": 4039 }, { "epoch": 0.3200633788869083, "grad_norm": 1.944171958200773, "learning_rate": 1.5902529044071173e-05, "loss": 0.3613, "step": 4040 }, { "epoch": 0.32014260249554366, "grad_norm": 2.0236805023828808, "learning_rate": 1.590045750179353e-05, "loss": 0.3689, "step": 4041 }, { "epoch": 0.32022182610417904, "grad_norm": 1.6275474590223904, "learning_rate": 1.5898385570999146e-05, "loss": 0.2395, "step": 4042 }, { "epoch": 0.3203010497128144, "grad_norm": 2.2014302463912436, "learning_rate": 1.589631325182446e-05, "loss": 0.3119, "step": 4043 }, { "epoch": 0.3203802733214498, "grad_norm": 2.43230973569851, "learning_rate": 1.589424054440591e-05, "loss": 0.2708, "step": 4044 }, { "epoch": 0.3204594969300852, "grad_norm": 1.681017091662089, "learning_rate": 1.5892167448879984e-05, "loss": 0.2336, "step": 4045 }, { "epoch": 0.3205387205387205, "grad_norm": 2.0078855698082174, "learning_rate": 1.5890093965383186e-05, "loss": 0.3314, "step": 4046 }, { "epoch": 0.3206179441473559, "grad_norm": 2.049858088648156, "learning_rate": 1.588802009405204e-05, "loss": 0.3397, "step": 4047 }, { "epoch": 0.3206971677559913, "grad_norm": 2.1167523309896317, "learning_rate": 1.5885945835023104e-05, "loss": 0.4135, "step": 4048 }, { "epoch": 0.32077639136462666, "grad_norm": 2.0031193597246815, "learning_rate": 1.5883871188432955e-05, "loss": 0.3587, "step": 4049 }, { "epoch": 0.32085561497326204, "grad_norm": 1.762456007263147, "learning_rate": 1.5881796154418196e-05, "loss": 0.2377, "step": 4050 }, { "epoch": 0.3209348385818974, "grad_norm": 2.2895347703732685, "learning_rate": 1.5879720733115464e-05, "loss": 0.2599, "step": 4051 }, { "epoch": 0.3210140621905328, "grad_norm": 1.8329954948344045, "learning_rate": 1.5877644924661412e-05, "loss": 0.248, "step": 4052 }, { "epoch": 0.3210932857991681, "grad_norm": 1.9270899962526025, "learning_rate": 1.5875568729192728e-05, "loss": 0.227, "step": 4053 }, { "epoch": 0.3211725094078035, "grad_norm": 1.6463588484945015, "learning_rate": 1.587349214684611e-05, "loss": 0.244, "step": 4054 }, { "epoch": 0.3212517330164389, "grad_norm": 1.943624732892277, "learning_rate": 1.5871415177758297e-05, "loss": 0.2808, "step": 4055 }, { "epoch": 0.32133095662507427, "grad_norm": 2.0175144306246535, "learning_rate": 1.5869337822066043e-05, "loss": 0.3496, "step": 4056 }, { "epoch": 0.32141018023370965, "grad_norm": 2.0645349013474847, "learning_rate": 1.586726007990614e-05, "loss": 0.277, "step": 4057 }, { "epoch": 0.32148940384234503, "grad_norm": 1.9668490925635078, "learning_rate": 1.586518195141539e-05, "loss": 0.246, "step": 4058 }, { "epoch": 0.3215686274509804, "grad_norm": 2.076758760248237, "learning_rate": 1.5863103436730627e-05, "loss": 0.2806, "step": 4059 }, { "epoch": 0.32164785105961574, "grad_norm": 1.8178453858181338, "learning_rate": 1.586102453598872e-05, "loss": 0.2752, "step": 4060 }, { "epoch": 0.3217270746682511, "grad_norm": 1.8722637282137147, "learning_rate": 1.5858945249326545e-05, "loss": 0.3395, "step": 4061 }, { "epoch": 0.3218062982768865, "grad_norm": 2.526002572404788, "learning_rate": 1.5856865576881016e-05, "loss": 0.342, "step": 4062 }, { "epoch": 0.3218855218855219, "grad_norm": 2.6755086579777267, "learning_rate": 1.5854785518789074e-05, "loss": 0.3573, "step": 4063 }, { "epoch": 0.32196474549415727, "grad_norm": 1.5681079518616239, "learning_rate": 1.5852705075187674e-05, "loss": 0.2815, "step": 4064 }, { "epoch": 0.32204396910279265, "grad_norm": 1.7370720057240607, "learning_rate": 1.5850624246213805e-05, "loss": 0.3012, "step": 4065 }, { "epoch": 0.32212319271142803, "grad_norm": 1.8082971477500807, "learning_rate": 1.5848543032004483e-05, "loss": 0.3721, "step": 4066 }, { "epoch": 0.32220241632006336, "grad_norm": 2.1193927527194285, "learning_rate": 1.5846461432696744e-05, "loss": 0.2599, "step": 4067 }, { "epoch": 0.32228163992869874, "grad_norm": 2.2325038799031134, "learning_rate": 1.5844379448427648e-05, "loss": 0.4903, "step": 4068 }, { "epoch": 0.3223608635373341, "grad_norm": 1.392150212968567, "learning_rate": 1.5842297079334293e-05, "loss": 0.1691, "step": 4069 }, { "epoch": 0.3224400871459695, "grad_norm": 1.8015553694689401, "learning_rate": 1.5840214325553782e-05, "loss": 0.2973, "step": 4070 }, { "epoch": 0.3225193107546049, "grad_norm": 2.1733442007276733, "learning_rate": 1.583813118722326e-05, "loss": 0.3981, "step": 4071 }, { "epoch": 0.32259853436324026, "grad_norm": 2.614894611398543, "learning_rate": 1.583604766447989e-05, "loss": 0.3827, "step": 4072 }, { "epoch": 0.32267775797187565, "grad_norm": 2.119499121308234, "learning_rate": 1.5833963757460863e-05, "loss": 0.2996, "step": 4073 }, { "epoch": 0.32275698158051097, "grad_norm": 2.037985274175979, "learning_rate": 1.5831879466303393e-05, "loss": 0.276, "step": 4074 }, { "epoch": 0.32283620518914635, "grad_norm": 2.723679634710538, "learning_rate": 1.5829794791144723e-05, "loss": 0.3793, "step": 4075 }, { "epoch": 0.32291542879778173, "grad_norm": 1.9970506313199945, "learning_rate": 1.5827709732122115e-05, "loss": 0.3441, "step": 4076 }, { "epoch": 0.3229946524064171, "grad_norm": 1.9622586147111902, "learning_rate": 1.5825624289372864e-05, "loss": 0.3718, "step": 4077 }, { "epoch": 0.3230738760150525, "grad_norm": 2.143431287772388, "learning_rate": 1.5823538463034283e-05, "loss": 0.2206, "step": 4078 }, { "epoch": 0.3231530996236879, "grad_norm": 1.903792384129505, "learning_rate": 1.5821452253243718e-05, "loss": 0.3107, "step": 4079 }, { "epoch": 0.32323232323232326, "grad_norm": 1.9027351850903347, "learning_rate": 1.581936566013853e-05, "loss": 0.4254, "step": 4080 }, { "epoch": 0.3233115468409586, "grad_norm": 1.740506514681122, "learning_rate": 1.5817278683856117e-05, "loss": 0.2343, "step": 4081 }, { "epoch": 0.32339077044959397, "grad_norm": 2.0469800874352138, "learning_rate": 1.5815191324533893e-05, "loss": 0.2973, "step": 4082 }, { "epoch": 0.32346999405822935, "grad_norm": 2.2171681165223487, "learning_rate": 1.58131035823093e-05, "loss": 0.3047, "step": 4083 }, { "epoch": 0.32354921766686473, "grad_norm": 1.689756820326841, "learning_rate": 1.581101545731981e-05, "loss": 0.2921, "step": 4084 }, { "epoch": 0.3236284412755001, "grad_norm": 2.3031657463495305, "learning_rate": 1.580892694970291e-05, "loss": 0.3372, "step": 4085 }, { "epoch": 0.3237076648841355, "grad_norm": 1.7919402536648075, "learning_rate": 1.580683805959612e-05, "loss": 0.2199, "step": 4086 }, { "epoch": 0.3237868884927708, "grad_norm": 1.5639764339074862, "learning_rate": 1.5804748787136987e-05, "loss": 0.2603, "step": 4087 }, { "epoch": 0.3238661121014062, "grad_norm": 1.6868211992951307, "learning_rate": 1.5802659132463076e-05, "loss": 0.2501, "step": 4088 }, { "epoch": 0.3239453357100416, "grad_norm": 1.769418634947462, "learning_rate": 1.5800569095711983e-05, "loss": 0.1865, "step": 4089 }, { "epoch": 0.32402455931867696, "grad_norm": 2.0179952751308132, "learning_rate": 1.5798478677021327e-05, "loss": 0.3892, "step": 4090 }, { "epoch": 0.32410378292731234, "grad_norm": 2.078649795694238, "learning_rate": 1.5796387876528746e-05, "loss": 0.368, "step": 4091 }, { "epoch": 0.3241830065359477, "grad_norm": 2.6619235752115444, "learning_rate": 1.579429669437192e-05, "loss": 0.2971, "step": 4092 }, { "epoch": 0.3242622301445831, "grad_norm": 2.2926714265516663, "learning_rate": 1.579220513068853e-05, "loss": 0.332, "step": 4093 }, { "epoch": 0.32434145375321843, "grad_norm": 1.8536661196517776, "learning_rate": 1.5790113185616305e-05, "loss": 0.2657, "step": 4094 }, { "epoch": 0.3244206773618538, "grad_norm": 1.9288459995228031, "learning_rate": 1.5788020859292987e-05, "loss": 0.2645, "step": 4095 }, { "epoch": 0.3244999009704892, "grad_norm": 1.8808167657942876, "learning_rate": 1.5785928151856345e-05, "loss": 0.2979, "step": 4096 }, { "epoch": 0.3245791245791246, "grad_norm": 1.412144293332204, "learning_rate": 1.5783835063444176e-05, "loss": 0.2129, "step": 4097 }, { "epoch": 0.32465834818775996, "grad_norm": 1.9839658100546034, "learning_rate": 1.57817415941943e-05, "loss": 0.3353, "step": 4098 }, { "epoch": 0.32473757179639534, "grad_norm": 1.8842000444305722, "learning_rate": 1.5779647744244556e-05, "loss": 0.3133, "step": 4099 }, { "epoch": 0.3248167954050307, "grad_norm": 1.9080653898707225, "learning_rate": 1.577755351373282e-05, "loss": 0.3901, "step": 4100 }, { "epoch": 0.32489601901366605, "grad_norm": 1.7806153644427043, "learning_rate": 1.5775458902796982e-05, "loss": 0.2714, "step": 4101 }, { "epoch": 0.32497524262230143, "grad_norm": 1.7365063142102544, "learning_rate": 1.577336391157497e-05, "loss": 0.357, "step": 4102 }, { "epoch": 0.3250544662309368, "grad_norm": 2.1679652293564127, "learning_rate": 1.5771268540204724e-05, "loss": 0.3341, "step": 4103 }, { "epoch": 0.3251336898395722, "grad_norm": 1.5722481058956101, "learning_rate": 1.576917278882421e-05, "loss": 0.229, "step": 4104 }, { "epoch": 0.3252129134482076, "grad_norm": 1.844090865812453, "learning_rate": 1.576707665757143e-05, "loss": 0.3012, "step": 4105 }, { "epoch": 0.32529213705684296, "grad_norm": 1.9913564865988536, "learning_rate": 1.5764980146584402e-05, "loss": 0.2775, "step": 4106 }, { "epoch": 0.32537136066547834, "grad_norm": 1.8636738078139399, "learning_rate": 1.5762883256001168e-05, "loss": 0.273, "step": 4107 }, { "epoch": 0.32545058427411366, "grad_norm": 1.630847762775362, "learning_rate": 1.57607859859598e-05, "loss": 0.2966, "step": 4108 }, { "epoch": 0.32552980788274904, "grad_norm": 2.253427638357135, "learning_rate": 1.5758688336598397e-05, "loss": 0.3349, "step": 4109 }, { "epoch": 0.3256090314913844, "grad_norm": 1.744316884058336, "learning_rate": 1.5756590308055075e-05, "loss": 0.2538, "step": 4110 }, { "epoch": 0.3256882551000198, "grad_norm": 2.0056713052688036, "learning_rate": 1.5754491900467982e-05, "loss": 0.3635, "step": 4111 }, { "epoch": 0.3257674787086552, "grad_norm": 2.0928956574864035, "learning_rate": 1.5752393113975282e-05, "loss": 0.3495, "step": 4112 }, { "epoch": 0.32584670231729057, "grad_norm": 1.9696345383364964, "learning_rate": 1.5750293948715178e-05, "loss": 0.2408, "step": 4113 }, { "epoch": 0.32592592592592595, "grad_norm": 1.7039144209183519, "learning_rate": 1.5748194404825885e-05, "loss": 0.3456, "step": 4114 }, { "epoch": 0.3260051495345613, "grad_norm": 1.750849112988832, "learning_rate": 1.574609448244565e-05, "loss": 0.267, "step": 4115 }, { "epoch": 0.32608437314319666, "grad_norm": 1.5537283801467563, "learning_rate": 1.574399418171274e-05, "loss": 0.2862, "step": 4116 }, { "epoch": 0.32616359675183204, "grad_norm": 2.4039450094435617, "learning_rate": 1.5741893502765452e-05, "loss": 0.369, "step": 4117 }, { "epoch": 0.3262428203604674, "grad_norm": 1.7095700085066374, "learning_rate": 1.5739792445742103e-05, "loss": 0.2441, "step": 4118 }, { "epoch": 0.3263220439691028, "grad_norm": 1.914552416246257, "learning_rate": 1.573769101078104e-05, "loss": 0.2993, "step": 4119 }, { "epoch": 0.3264012675777382, "grad_norm": 2.075327053752495, "learning_rate": 1.573558919802064e-05, "loss": 0.2773, "step": 4120 }, { "epoch": 0.32648049118637357, "grad_norm": 1.8089891261128201, "learning_rate": 1.573348700759928e-05, "loss": 0.2803, "step": 4121 }, { "epoch": 0.3265597147950089, "grad_norm": 2.7520214383524437, "learning_rate": 1.573138443965539e-05, "loss": 0.3152, "step": 4122 }, { "epoch": 0.3266389384036443, "grad_norm": 2.0706597457899774, "learning_rate": 1.572928149432741e-05, "loss": 0.3849, "step": 4123 }, { "epoch": 0.32671816201227966, "grad_norm": 1.7301278473448791, "learning_rate": 1.5727178171753817e-05, "loss": 0.2783, "step": 4124 }, { "epoch": 0.32679738562091504, "grad_norm": 2.1039699424191656, "learning_rate": 1.57250744720731e-05, "loss": 0.2097, "step": 4125 }, { "epoch": 0.3268766092295504, "grad_norm": 2.2591565211240154, "learning_rate": 1.572297039542377e-05, "loss": 0.3362, "step": 4126 }, { "epoch": 0.3269558328381858, "grad_norm": 1.926279140824297, "learning_rate": 1.572086594194438e-05, "loss": 0.3088, "step": 4127 }, { "epoch": 0.3270350564468211, "grad_norm": 1.8434933979819819, "learning_rate": 1.571876111177349e-05, "loss": 0.3095, "step": 4128 }, { "epoch": 0.3271142800554565, "grad_norm": 1.6554772085052598, "learning_rate": 1.571665590504971e-05, "loss": 0.2262, "step": 4129 }, { "epoch": 0.3271935036640919, "grad_norm": 1.7829938502914777, "learning_rate": 1.5714550321911636e-05, "loss": 0.2941, "step": 4130 }, { "epoch": 0.32727272727272727, "grad_norm": 1.8118777365841372, "learning_rate": 1.5712444362497917e-05, "loss": 0.2147, "step": 4131 }, { "epoch": 0.32735195088136265, "grad_norm": 2.1045371624305473, "learning_rate": 1.5710338026947227e-05, "loss": 0.3798, "step": 4132 }, { "epoch": 0.32743117448999803, "grad_norm": 1.6913427320247267, "learning_rate": 1.5708231315398255e-05, "loss": 0.261, "step": 4133 }, { "epoch": 0.3275103980986334, "grad_norm": 1.755451253773995, "learning_rate": 1.570612422798972e-05, "loss": 0.2415, "step": 4134 }, { "epoch": 0.32758962170726874, "grad_norm": 1.9250741575553094, "learning_rate": 1.5704016764860358e-05, "loss": 0.2959, "step": 4135 }, { "epoch": 0.3276688453159041, "grad_norm": 1.6107620157410916, "learning_rate": 1.5701908926148933e-05, "loss": 0.2067, "step": 4136 }, { "epoch": 0.3277480689245395, "grad_norm": 1.9702381507118036, "learning_rate": 1.5699800711994247e-05, "loss": 0.2861, "step": 4137 }, { "epoch": 0.3278272925331749, "grad_norm": 2.1229368030089724, "learning_rate": 1.569769212253511e-05, "loss": 0.2939, "step": 4138 }, { "epoch": 0.32790651614181027, "grad_norm": 1.893017523021293, "learning_rate": 1.569558315791036e-05, "loss": 0.3275, "step": 4139 }, { "epoch": 0.32798573975044565, "grad_norm": 1.6485804663534345, "learning_rate": 1.5693473818258866e-05, "loss": 0.2983, "step": 4140 }, { "epoch": 0.32806496335908103, "grad_norm": 2.0349170447246565, "learning_rate": 1.5691364103719515e-05, "loss": 0.3413, "step": 4141 }, { "epoch": 0.32814418696771636, "grad_norm": 2.176856751640671, "learning_rate": 1.5689254014431225e-05, "loss": 0.3351, "step": 4142 }, { "epoch": 0.32822341057635174, "grad_norm": 1.906142663821326, "learning_rate": 1.5687143550532932e-05, "loss": 0.2591, "step": 4143 }, { "epoch": 0.3283026341849871, "grad_norm": 2.5868170438887073, "learning_rate": 1.56850327121636e-05, "loss": 0.2514, "step": 4144 }, { "epoch": 0.3283818577936225, "grad_norm": 2.058362372247087, "learning_rate": 1.568292149946222e-05, "loss": 0.3288, "step": 4145 }, { "epoch": 0.3284610814022579, "grad_norm": 2.034689932872934, "learning_rate": 1.56808099125678e-05, "loss": 0.2298, "step": 4146 }, { "epoch": 0.32854030501089326, "grad_norm": 1.893909594325405, "learning_rate": 1.5678697951619386e-05, "loss": 0.2916, "step": 4147 }, { "epoch": 0.32861952861952864, "grad_norm": 2.1359174516830683, "learning_rate": 1.5676585616756037e-05, "loss": 0.3771, "step": 4148 }, { "epoch": 0.32869875222816397, "grad_norm": 2.2077493460693307, "learning_rate": 1.5674472908116834e-05, "loss": 0.3048, "step": 4149 }, { "epoch": 0.32877797583679935, "grad_norm": 1.8999387749329557, "learning_rate": 1.5672359825840895e-05, "loss": 0.2521, "step": 4150 }, { "epoch": 0.32885719944543473, "grad_norm": 1.9518996024157405, "learning_rate": 1.567024637006736e-05, "loss": 0.2551, "step": 4151 }, { "epoch": 0.3289364230540701, "grad_norm": 1.9548221225523328, "learning_rate": 1.566813254093538e-05, "loss": 0.3271, "step": 4152 }, { "epoch": 0.3290156466627055, "grad_norm": 1.7712907745007795, "learning_rate": 1.566601833858415e-05, "loss": 0.283, "step": 4153 }, { "epoch": 0.3290948702713409, "grad_norm": 1.9249634518316, "learning_rate": 1.566390376315287e-05, "loss": 0.2552, "step": 4154 }, { "epoch": 0.32917409387997626, "grad_norm": 2.1857893391887737, "learning_rate": 1.5661788814780782e-05, "loss": 0.3248, "step": 4155 }, { "epoch": 0.3292533174886116, "grad_norm": 1.7952313859059046, "learning_rate": 1.5659673493607144e-05, "loss": 0.2421, "step": 4156 }, { "epoch": 0.32933254109724697, "grad_norm": 2.284554172395247, "learning_rate": 1.565755779977124e-05, "loss": 0.3253, "step": 4157 }, { "epoch": 0.32941176470588235, "grad_norm": 1.7305090606284783, "learning_rate": 1.5655441733412376e-05, "loss": 0.2471, "step": 4158 }, { "epoch": 0.32949098831451773, "grad_norm": 1.4725869928264486, "learning_rate": 1.5653325294669884e-05, "loss": 0.2099, "step": 4159 }, { "epoch": 0.3295702119231531, "grad_norm": 1.726508259815418, "learning_rate": 1.565120848368313e-05, "loss": 0.2285, "step": 4160 }, { "epoch": 0.3296494355317885, "grad_norm": 1.6955974922527628, "learning_rate": 1.5649091300591482e-05, "loss": 0.2198, "step": 4161 }, { "epoch": 0.3297286591404238, "grad_norm": 1.485382215649945, "learning_rate": 1.564697374553436e-05, "loss": 0.1918, "step": 4162 }, { "epoch": 0.3298078827490592, "grad_norm": 1.9590472179203442, "learning_rate": 1.5644855818651184e-05, "loss": 0.3356, "step": 4163 }, { "epoch": 0.3298871063576946, "grad_norm": 2.0848972358316553, "learning_rate": 1.564273752008141e-05, "loss": 0.491, "step": 4164 }, { "epoch": 0.32996632996632996, "grad_norm": 1.5195430850276355, "learning_rate": 1.5640618849964528e-05, "loss": 0.2071, "step": 4165 }, { "epoch": 0.33004555357496534, "grad_norm": 2.8200981666878717, "learning_rate": 1.5638499808440036e-05, "loss": 0.2827, "step": 4166 }, { "epoch": 0.3301247771836007, "grad_norm": 1.4756598003004047, "learning_rate": 1.563638039564746e-05, "loss": 0.1947, "step": 4167 }, { "epoch": 0.3302040007922361, "grad_norm": 1.7680420416192881, "learning_rate": 1.5634260611726355e-05, "loss": 0.2517, "step": 4168 }, { "epoch": 0.33028322440087143, "grad_norm": 2.7309634235464935, "learning_rate": 1.5632140456816302e-05, "loss": 0.3589, "step": 4169 }, { "epoch": 0.3303624480095068, "grad_norm": 1.8039379695711888, "learning_rate": 1.5630019931056894e-05, "loss": 0.3873, "step": 4170 }, { "epoch": 0.3304416716181422, "grad_norm": 2.0306410386562277, "learning_rate": 1.5627899034587768e-05, "loss": 0.248, "step": 4171 }, { "epoch": 0.3305208952267776, "grad_norm": 1.8613563957747847, "learning_rate": 1.562577776754857e-05, "loss": 0.3148, "step": 4172 }, { "epoch": 0.33060011883541296, "grad_norm": 2.0268976858785335, "learning_rate": 1.5623656130078976e-05, "loss": 0.3096, "step": 4173 }, { "epoch": 0.33067934244404834, "grad_norm": 1.9642267780551177, "learning_rate": 1.5621534122318682e-05, "loss": 0.4011, "step": 4174 }, { "epoch": 0.3307585660526837, "grad_norm": 2.2275136002737694, "learning_rate": 1.5619411744407416e-05, "loss": 0.2974, "step": 4175 }, { "epoch": 0.33083778966131905, "grad_norm": 2.0573585547048574, "learning_rate": 1.561728899648493e-05, "loss": 0.4184, "step": 4176 }, { "epoch": 0.33091701326995443, "grad_norm": 1.784065174703472, "learning_rate": 1.561516587869099e-05, "loss": 0.2368, "step": 4177 }, { "epoch": 0.3309962368785898, "grad_norm": 2.345085993296679, "learning_rate": 1.5613042391165395e-05, "loss": 0.5117, "step": 4178 }, { "epoch": 0.3310754604872252, "grad_norm": 1.8843346096104885, "learning_rate": 1.5610918534047964e-05, "loss": 0.3124, "step": 4179 }, { "epoch": 0.3311546840958606, "grad_norm": 2.102157227950788, "learning_rate": 1.5608794307478546e-05, "loss": 0.3268, "step": 4180 }, { "epoch": 0.33123390770449596, "grad_norm": 2.086338685799572, "learning_rate": 1.5606669711597017e-05, "loss": 0.2938, "step": 4181 }, { "epoch": 0.33131313131313134, "grad_norm": 1.7652348594152758, "learning_rate": 1.560454474654326e-05, "loss": 0.2812, "step": 4182 }, { "epoch": 0.33139235492176666, "grad_norm": 1.8252978613925877, "learning_rate": 1.56024194124572e-05, "loss": 0.263, "step": 4183 }, { "epoch": 0.33147157853040204, "grad_norm": 1.864031163813262, "learning_rate": 1.5600293709478776e-05, "loss": 0.2456, "step": 4184 }, { "epoch": 0.3315508021390374, "grad_norm": 1.8790595816595552, "learning_rate": 1.559816763774796e-05, "loss": 0.2499, "step": 4185 }, { "epoch": 0.3316300257476728, "grad_norm": 1.3063733944967173, "learning_rate": 1.559604119740474e-05, "loss": 0.1497, "step": 4186 }, { "epoch": 0.3317092493563082, "grad_norm": 2.011374018149606, "learning_rate": 1.5593914388589136e-05, "loss": 0.2942, "step": 4187 }, { "epoch": 0.33178847296494357, "grad_norm": 1.9226572666156472, "learning_rate": 1.559178721144119e-05, "loss": 0.3166, "step": 4188 }, { "epoch": 0.33186769657357895, "grad_norm": 2.7010134848217198, "learning_rate": 1.5589659666100952e-05, "loss": 0.3039, "step": 4189 }, { "epoch": 0.3319469201822143, "grad_norm": 1.6929861645498105, "learning_rate": 1.5587531752708528e-05, "loss": 0.2536, "step": 4190 }, { "epoch": 0.33202614379084966, "grad_norm": 2.0686873432262693, "learning_rate": 1.558540347140402e-05, "loss": 0.3263, "step": 4191 }, { "epoch": 0.33210536739948504, "grad_norm": 2.151177216151281, "learning_rate": 1.558327482232757e-05, "loss": 0.2321, "step": 4192 }, { "epoch": 0.3321845910081204, "grad_norm": 2.021348978810011, "learning_rate": 1.558114580561934e-05, "loss": 0.2866, "step": 4193 }, { "epoch": 0.3322638146167558, "grad_norm": 1.9616187055423595, "learning_rate": 1.557901642141951e-05, "loss": 0.2853, "step": 4194 }, { "epoch": 0.3323430382253912, "grad_norm": 1.82481141229278, "learning_rate": 1.5576886669868297e-05, "loss": 0.3359, "step": 4195 }, { "epoch": 0.33242226183402657, "grad_norm": 1.9340850627275143, "learning_rate": 1.5574756551105926e-05, "loss": 0.3128, "step": 4196 }, { "epoch": 0.3325014854426619, "grad_norm": 1.6995045651117409, "learning_rate": 1.5572626065272666e-05, "loss": 0.2094, "step": 4197 }, { "epoch": 0.3325807090512973, "grad_norm": 1.5882197868249708, "learning_rate": 1.557049521250879e-05, "loss": 0.2524, "step": 4198 }, { "epoch": 0.33265993265993266, "grad_norm": 2.107135541455425, "learning_rate": 1.5568363992954607e-05, "loss": 0.3428, "step": 4199 }, { "epoch": 0.33273915626856804, "grad_norm": 2.1211769764202546, "learning_rate": 1.556623240675045e-05, "loss": 0.2182, "step": 4200 }, { "epoch": 0.3328183798772034, "grad_norm": 1.8211730408897344, "learning_rate": 1.556410045403667e-05, "loss": 0.28, "step": 4201 }, { "epoch": 0.3328976034858388, "grad_norm": 2.0872544797002566, "learning_rate": 1.556196813495365e-05, "loss": 0.3396, "step": 4202 }, { "epoch": 0.3329768270944741, "grad_norm": 1.6635062661309072, "learning_rate": 1.555983544964179e-05, "loss": 0.2766, "step": 4203 }, { "epoch": 0.3330560507031095, "grad_norm": 1.694082658103534, "learning_rate": 1.555770239824152e-05, "loss": 0.2121, "step": 4204 }, { "epoch": 0.3331352743117449, "grad_norm": 2.605382853380774, "learning_rate": 1.5555568980893284e-05, "loss": 0.3387, "step": 4205 }, { "epoch": 0.33321449792038027, "grad_norm": 1.800015559639178, "learning_rate": 1.5553435197737566e-05, "loss": 0.2996, "step": 4206 }, { "epoch": 0.33329372152901565, "grad_norm": 1.9240469006806957, "learning_rate": 1.5551301048914863e-05, "loss": 0.3536, "step": 4207 }, { "epoch": 0.33337294513765103, "grad_norm": 2.361148170414222, "learning_rate": 1.5549166534565695e-05, "loss": 0.3056, "step": 4208 }, { "epoch": 0.3334521687462864, "grad_norm": 1.5466109690125924, "learning_rate": 1.554703165483061e-05, "loss": 0.2417, "step": 4209 }, { "epoch": 0.33353139235492174, "grad_norm": 2.296150315066984, "learning_rate": 1.5544896409850183e-05, "loss": 0.2576, "step": 4210 }, { "epoch": 0.3336106159635571, "grad_norm": 2.0513319074031404, "learning_rate": 1.554276079976501e-05, "loss": 0.2914, "step": 4211 }, { "epoch": 0.3336898395721925, "grad_norm": 1.9963253522224222, "learning_rate": 1.5540624824715703e-05, "loss": 0.2474, "step": 4212 }, { "epoch": 0.3337690631808279, "grad_norm": 1.509464447307471, "learning_rate": 1.5538488484842914e-05, "loss": 0.2196, "step": 4213 }, { "epoch": 0.33384828678946327, "grad_norm": 1.6846127024733197, "learning_rate": 1.553635178028731e-05, "loss": 0.2665, "step": 4214 }, { "epoch": 0.33392751039809865, "grad_norm": 1.988712110831499, "learning_rate": 1.5534214711189574e-05, "loss": 0.3297, "step": 4215 }, { "epoch": 0.33400673400673403, "grad_norm": 2.27134904472091, "learning_rate": 1.5532077277690435e-05, "loss": 0.3467, "step": 4216 }, { "epoch": 0.33408595761536936, "grad_norm": 1.7186316802985433, "learning_rate": 1.552993947993062e-05, "loss": 0.2703, "step": 4217 }, { "epoch": 0.33416518122400474, "grad_norm": 2.039824739504798, "learning_rate": 1.5527801318050904e-05, "loss": 0.2278, "step": 4218 }, { "epoch": 0.3342444048326401, "grad_norm": 1.5954402937053078, "learning_rate": 1.5525662792192066e-05, "loss": 0.1945, "step": 4219 }, { "epoch": 0.3343236284412755, "grad_norm": 1.6366701666823844, "learning_rate": 1.5523523902494927e-05, "loss": 0.327, "step": 4220 }, { "epoch": 0.3344028520499109, "grad_norm": 2.081910836589598, "learning_rate": 1.552138464910031e-05, "loss": 0.2706, "step": 4221 }, { "epoch": 0.33448207565854626, "grad_norm": 2.349504732929474, "learning_rate": 1.5519245032149083e-05, "loss": 0.3777, "step": 4222 }, { "epoch": 0.33456129926718164, "grad_norm": 1.6512342749360909, "learning_rate": 1.5517105051782127e-05, "loss": 0.1573, "step": 4223 }, { "epoch": 0.33464052287581697, "grad_norm": 1.7863680575581886, "learning_rate": 1.551496470814035e-05, "loss": 0.2866, "step": 4224 }, { "epoch": 0.33471974648445235, "grad_norm": 1.5758976090552037, "learning_rate": 1.5512824001364686e-05, "loss": 0.2174, "step": 4225 }, { "epoch": 0.33479897009308773, "grad_norm": 1.9475637167806332, "learning_rate": 1.5510682931596083e-05, "loss": 0.2782, "step": 4226 }, { "epoch": 0.3348781937017231, "grad_norm": 2.159770847930568, "learning_rate": 1.550854149897553e-05, "loss": 0.389, "step": 4227 }, { "epoch": 0.3349574173103585, "grad_norm": 1.8354856213760793, "learning_rate": 1.5506399703644017e-05, "loss": 0.2698, "step": 4228 }, { "epoch": 0.3350366409189939, "grad_norm": 1.9009134784812198, "learning_rate": 1.5504257545742585e-05, "loss": 0.3655, "step": 4229 }, { "epoch": 0.33511586452762926, "grad_norm": 1.8220364471119024, "learning_rate": 1.5502115025412275e-05, "loss": 0.3391, "step": 4230 }, { "epoch": 0.3351950881362646, "grad_norm": 2.025915788875568, "learning_rate": 1.5499972142794167e-05, "loss": 0.3257, "step": 4231 }, { "epoch": 0.33527431174489997, "grad_norm": 1.6023690089589089, "learning_rate": 1.5497828898029358e-05, "loss": 0.2379, "step": 4232 }, { "epoch": 0.33535353535353535, "grad_norm": 1.5530329381540535, "learning_rate": 1.5495685291258967e-05, "loss": 0.2621, "step": 4233 }, { "epoch": 0.33543275896217073, "grad_norm": 1.6338481623741345, "learning_rate": 1.5493541322624145e-05, "loss": 0.2335, "step": 4234 }, { "epoch": 0.3355119825708061, "grad_norm": 1.6827318394040827, "learning_rate": 1.5491396992266065e-05, "loss": 0.2813, "step": 4235 }, { "epoch": 0.3355912061794415, "grad_norm": 1.979935126907643, "learning_rate": 1.548925230032591e-05, "loss": 0.3657, "step": 4236 }, { "epoch": 0.3356704297880769, "grad_norm": 1.6773241521139335, "learning_rate": 1.5487107246944902e-05, "loss": 0.2955, "step": 4237 }, { "epoch": 0.3357496533967122, "grad_norm": 2.100257671973178, "learning_rate": 1.548496183226429e-05, "loss": 0.3872, "step": 4238 }, { "epoch": 0.3358288770053476, "grad_norm": 1.6312452651279, "learning_rate": 1.548281605642533e-05, "loss": 0.2521, "step": 4239 }, { "epoch": 0.33590810061398296, "grad_norm": 1.9946315754549027, "learning_rate": 1.5480669919569313e-05, "loss": 0.2326, "step": 4240 }, { "epoch": 0.33598732422261834, "grad_norm": 1.8239993388651272, "learning_rate": 1.5478523421837553e-05, "loss": 0.2659, "step": 4241 }, { "epoch": 0.3360665478312537, "grad_norm": 1.9536843460951268, "learning_rate": 1.5476376563371392e-05, "loss": 0.2384, "step": 4242 }, { "epoch": 0.3361457714398891, "grad_norm": 1.51697535987197, "learning_rate": 1.547422934431218e-05, "loss": 0.2301, "step": 4243 }, { "epoch": 0.33622499504852443, "grad_norm": 2.1046197056263956, "learning_rate": 1.5472081764801307e-05, "loss": 0.4041, "step": 4244 }, { "epoch": 0.3363042186571598, "grad_norm": 1.2886288483487907, "learning_rate": 1.546993382498018e-05, "loss": 0.2088, "step": 4245 }, { "epoch": 0.3363834422657952, "grad_norm": 1.8452170013003963, "learning_rate": 1.546778552499023e-05, "loss": 0.2787, "step": 4246 }, { "epoch": 0.3364626658744306, "grad_norm": 2.0496775864564563, "learning_rate": 1.5465636864972914e-05, "loss": 0.3261, "step": 4247 }, { "epoch": 0.33654188948306596, "grad_norm": 2.0760124324458773, "learning_rate": 1.5463487845069708e-05, "loss": 0.4144, "step": 4248 }, { "epoch": 0.33662111309170134, "grad_norm": 1.8105885126090164, "learning_rate": 1.546133846542212e-05, "loss": 0.3485, "step": 4249 }, { "epoch": 0.3367003367003367, "grad_norm": 2.1672221876893065, "learning_rate": 1.5459188726171666e-05, "loss": 0.1811, "step": 4250 }, { "epoch": 0.33677956030897205, "grad_norm": 2.3069163413501994, "learning_rate": 1.5457038627459905e-05, "loss": 0.3412, "step": 4251 }, { "epoch": 0.33685878391760743, "grad_norm": 1.7577940423762994, "learning_rate": 1.545488816942841e-05, "loss": 0.1951, "step": 4252 }, { "epoch": 0.3369380075262428, "grad_norm": 2.123441585135997, "learning_rate": 1.5452737352218773e-05, "loss": 0.355, "step": 4253 }, { "epoch": 0.3370172311348782, "grad_norm": 2.183493113262432, "learning_rate": 1.545058617597262e-05, "loss": 0.3443, "step": 4254 }, { "epoch": 0.3370964547435136, "grad_norm": 2.064295495283359, "learning_rate": 1.544843464083159e-05, "loss": 0.2968, "step": 4255 }, { "epoch": 0.33717567835214896, "grad_norm": 2.1302273046599756, "learning_rate": 1.544628274693736e-05, "loss": 0.3033, "step": 4256 }, { "epoch": 0.33725490196078434, "grad_norm": 1.5397902364478384, "learning_rate": 1.5444130494431612e-05, "loss": 0.1992, "step": 4257 }, { "epoch": 0.33733412556941966, "grad_norm": 1.9645034232813676, "learning_rate": 1.544197788345607e-05, "loss": 0.2881, "step": 4258 }, { "epoch": 0.33741334917805504, "grad_norm": 2.039604624926172, "learning_rate": 1.543982491415247e-05, "loss": 0.3182, "step": 4259 }, { "epoch": 0.3374925727866904, "grad_norm": 1.8970238967569129, "learning_rate": 1.5437671586662575e-05, "loss": 0.2663, "step": 4260 }, { "epoch": 0.3375717963953258, "grad_norm": 1.9660679800310723, "learning_rate": 1.543551790112817e-05, "loss": 0.3307, "step": 4261 }, { "epoch": 0.3376510200039612, "grad_norm": 1.5030433319851695, "learning_rate": 1.5433363857691067e-05, "loss": 0.174, "step": 4262 }, { "epoch": 0.33773024361259657, "grad_norm": 1.807588601157472, "learning_rate": 1.5431209456493093e-05, "loss": 0.2646, "step": 4263 }, { "epoch": 0.33780946722123195, "grad_norm": 1.8680580415839958, "learning_rate": 1.542905469767611e-05, "loss": 0.2956, "step": 4264 }, { "epoch": 0.3378886908298673, "grad_norm": 1.523110883624214, "learning_rate": 1.5426899581382e-05, "loss": 0.1813, "step": 4265 }, { "epoch": 0.33796791443850266, "grad_norm": 2.245064697129695, "learning_rate": 1.5424744107752666e-05, "loss": 0.2906, "step": 4266 }, { "epoch": 0.33804713804713804, "grad_norm": 2.1559301863917124, "learning_rate": 1.542258827693003e-05, "loss": 0.2919, "step": 4267 }, { "epoch": 0.3381263616557734, "grad_norm": 1.742575540923704, "learning_rate": 1.542043208905605e-05, "loss": 0.1956, "step": 4268 }, { "epoch": 0.3382055852644088, "grad_norm": 1.616308647585813, "learning_rate": 1.5418275544272702e-05, "loss": 0.263, "step": 4269 }, { "epoch": 0.3382848088730442, "grad_norm": 2.527817253462671, "learning_rate": 1.541611864272198e-05, "loss": 0.358, "step": 4270 }, { "epoch": 0.33836403248167957, "grad_norm": 1.8537353435793598, "learning_rate": 1.5413961384545902e-05, "loss": 0.3152, "step": 4271 }, { "epoch": 0.3384432560903149, "grad_norm": 2.2402571345985494, "learning_rate": 1.541180376988652e-05, "loss": 0.3091, "step": 4272 }, { "epoch": 0.3385224796989503, "grad_norm": 2.2787702443716054, "learning_rate": 1.54096457988859e-05, "loss": 0.4223, "step": 4273 }, { "epoch": 0.33860170330758566, "grad_norm": 1.7161948501520208, "learning_rate": 1.540748747168613e-05, "loss": 0.2491, "step": 4274 }, { "epoch": 0.33868092691622104, "grad_norm": 1.8799437774761005, "learning_rate": 1.5405328788429333e-05, "loss": 0.2264, "step": 4275 }, { "epoch": 0.3387601505248564, "grad_norm": 2.258720491672299, "learning_rate": 1.5403169749257644e-05, "loss": 0.4304, "step": 4276 }, { "epoch": 0.3388393741334918, "grad_norm": 1.854586281164233, "learning_rate": 1.5401010354313222e-05, "loss": 0.2844, "step": 4277 }, { "epoch": 0.3389185977421272, "grad_norm": 2.0277851794350585, "learning_rate": 1.539885060373826e-05, "loss": 0.3632, "step": 4278 }, { "epoch": 0.3389978213507625, "grad_norm": 1.683993755207749, "learning_rate": 1.539669049767496e-05, "loss": 0.2834, "step": 4279 }, { "epoch": 0.3390770449593979, "grad_norm": 2.2606856110645066, "learning_rate": 1.539453003626556e-05, "loss": 0.2712, "step": 4280 }, { "epoch": 0.33915626856803327, "grad_norm": 1.7078151134332757, "learning_rate": 1.5392369219652313e-05, "loss": 0.3186, "step": 4281 }, { "epoch": 0.33923549217666865, "grad_norm": 1.6712324007049093, "learning_rate": 1.53902080479775e-05, "loss": 0.2478, "step": 4282 }, { "epoch": 0.33931471578530403, "grad_norm": 2.1278755825112388, "learning_rate": 1.5388046521383424e-05, "loss": 0.3337, "step": 4283 }, { "epoch": 0.3393939393939394, "grad_norm": 1.6972863548899855, "learning_rate": 1.538588464001241e-05, "loss": 0.2332, "step": 4284 }, { "epoch": 0.33947316300257474, "grad_norm": 2.2271294790647103, "learning_rate": 1.5383722404006808e-05, "loss": 0.4383, "step": 4285 }, { "epoch": 0.3395523866112101, "grad_norm": 2.237778411149077, "learning_rate": 1.5381559813508986e-05, "loss": 0.4502, "step": 4286 }, { "epoch": 0.3396316102198455, "grad_norm": 1.5086851347808645, "learning_rate": 1.537939686866135e-05, "loss": 0.2157, "step": 4287 }, { "epoch": 0.3397108338284809, "grad_norm": 1.8727890550763813, "learning_rate": 1.5377233569606312e-05, "loss": 0.2876, "step": 4288 }, { "epoch": 0.33979005743711627, "grad_norm": 1.7398716979858997, "learning_rate": 1.5375069916486318e-05, "loss": 0.3142, "step": 4289 }, { "epoch": 0.33986928104575165, "grad_norm": 1.7613656628020053, "learning_rate": 1.5372905909443833e-05, "loss": 0.2358, "step": 4290 }, { "epoch": 0.33994850465438703, "grad_norm": 1.601557636102539, "learning_rate": 1.5370741548621343e-05, "loss": 0.3144, "step": 4291 }, { "epoch": 0.34002772826302236, "grad_norm": 1.9591043075721617, "learning_rate": 1.5368576834161372e-05, "loss": 0.2454, "step": 4292 }, { "epoch": 0.34010695187165774, "grad_norm": 1.8706199646952146, "learning_rate": 1.536641176620644e-05, "loss": 0.342, "step": 4293 }, { "epoch": 0.3401861754802931, "grad_norm": 1.9926457981504575, "learning_rate": 1.536424634489912e-05, "loss": 0.2907, "step": 4294 }, { "epoch": 0.3402653990889285, "grad_norm": 2.328294810366131, "learning_rate": 1.536208057038199e-05, "loss": 0.4167, "step": 4295 }, { "epoch": 0.3403446226975639, "grad_norm": 1.502990605492509, "learning_rate": 1.535991444279765e-05, "loss": 0.2031, "step": 4296 }, { "epoch": 0.34042384630619926, "grad_norm": 1.651332549206242, "learning_rate": 1.535774796228874e-05, "loss": 0.2222, "step": 4297 }, { "epoch": 0.34050306991483464, "grad_norm": 1.7861513747887263, "learning_rate": 1.5355581128997904e-05, "loss": 0.2726, "step": 4298 }, { "epoch": 0.34058229352346997, "grad_norm": 2.226154747167074, "learning_rate": 1.5353413943067818e-05, "loss": 0.3108, "step": 4299 }, { "epoch": 0.34066151713210535, "grad_norm": 1.6325333448809107, "learning_rate": 1.5351246404641183e-05, "loss": 0.1681, "step": 4300 }, { "epoch": 0.34074074074074073, "grad_norm": 2.1551814409247325, "learning_rate": 1.5349078513860728e-05, "loss": 0.2381, "step": 4301 }, { "epoch": 0.3408199643493761, "grad_norm": 2.0248073267510542, "learning_rate": 1.534691027086918e-05, "loss": 0.3461, "step": 4302 }, { "epoch": 0.3408991879580115, "grad_norm": 1.7560626074293617, "learning_rate": 1.5344741675809328e-05, "loss": 0.2805, "step": 4303 }, { "epoch": 0.3409784115666469, "grad_norm": 1.8682179154893879, "learning_rate": 1.534257272882395e-05, "loss": 0.2586, "step": 4304 }, { "epoch": 0.34105763517528226, "grad_norm": 1.7019759340309528, "learning_rate": 1.5340403430055864e-05, "loss": 0.2146, "step": 4305 }, { "epoch": 0.3411368587839176, "grad_norm": 1.7510238402428289, "learning_rate": 1.533823377964791e-05, "loss": 0.314, "step": 4306 }, { "epoch": 0.34121608239255297, "grad_norm": 1.840822756088021, "learning_rate": 1.5336063777742944e-05, "loss": 0.2424, "step": 4307 }, { "epoch": 0.34129530600118835, "grad_norm": 1.9454782748104735, "learning_rate": 1.5333893424483856e-05, "loss": 0.3007, "step": 4308 }, { "epoch": 0.34137452960982373, "grad_norm": 1.8657968760586143, "learning_rate": 1.5331722720013555e-05, "loss": 0.2632, "step": 4309 }, { "epoch": 0.3414537532184591, "grad_norm": 1.7602105159920918, "learning_rate": 1.532955166447496e-05, "loss": 0.3302, "step": 4310 }, { "epoch": 0.3415329768270945, "grad_norm": 2.4262218483881566, "learning_rate": 1.5327380258011037e-05, "loss": 0.3591, "step": 4311 }, { "epoch": 0.3416122004357299, "grad_norm": 1.8887095956710382, "learning_rate": 1.5325208500764756e-05, "loss": 0.2521, "step": 4312 }, { "epoch": 0.3416914240443652, "grad_norm": 1.976389398706188, "learning_rate": 1.532303639287912e-05, "loss": 0.2855, "step": 4313 }, { "epoch": 0.3417706476530006, "grad_norm": 1.9927260923008052, "learning_rate": 1.532086393449715e-05, "loss": 0.3559, "step": 4314 }, { "epoch": 0.34184987126163596, "grad_norm": 2.208702772151743, "learning_rate": 1.531869112576189e-05, "loss": 0.2779, "step": 4315 }, { "epoch": 0.34192909487027134, "grad_norm": 2.1544575997026607, "learning_rate": 1.5316517966816414e-05, "loss": 0.2869, "step": 4316 }, { "epoch": 0.3420083184789067, "grad_norm": 1.7013860031784205, "learning_rate": 1.5314344457803812e-05, "loss": 0.2025, "step": 4317 }, { "epoch": 0.3420875420875421, "grad_norm": 1.644489043748052, "learning_rate": 1.5312170598867195e-05, "loss": 0.2501, "step": 4318 }, { "epoch": 0.3421667656961775, "grad_norm": 1.843467781325001, "learning_rate": 1.5309996390149708e-05, "loss": 0.2452, "step": 4319 }, { "epoch": 0.3422459893048128, "grad_norm": 2.1051344368196356, "learning_rate": 1.5307821831794506e-05, "loss": 0.3663, "step": 4320 }, { "epoch": 0.3423252129134482, "grad_norm": 1.9273496119563407, "learning_rate": 1.5305646923944776e-05, "loss": 0.2697, "step": 4321 }, { "epoch": 0.3424044365220836, "grad_norm": 1.884762212074546, "learning_rate": 1.5303471666743727e-05, "loss": 0.277, "step": 4322 }, { "epoch": 0.34248366013071896, "grad_norm": 1.5493395238928411, "learning_rate": 1.5301296060334588e-05, "loss": 0.2337, "step": 4323 }, { "epoch": 0.34256288373935434, "grad_norm": 2.08668779338647, "learning_rate": 1.529912010486061e-05, "loss": 0.2796, "step": 4324 }, { "epoch": 0.3426421073479897, "grad_norm": 2.1830172210967707, "learning_rate": 1.5296943800465068e-05, "loss": 0.3285, "step": 4325 }, { "epoch": 0.34272133095662505, "grad_norm": 1.8116074492912733, "learning_rate": 1.529476714729127e-05, "loss": 0.32, "step": 4326 }, { "epoch": 0.34280055456526043, "grad_norm": 1.6807150398042805, "learning_rate": 1.529259014548253e-05, "loss": 0.2676, "step": 4327 }, { "epoch": 0.3428797781738958, "grad_norm": 1.7394546432822156, "learning_rate": 1.5290412795182193e-05, "loss": 0.2546, "step": 4328 }, { "epoch": 0.3429590017825312, "grad_norm": 2.1086588191264313, "learning_rate": 1.528823509653363e-05, "loss": 0.4513, "step": 4329 }, { "epoch": 0.3430382253911666, "grad_norm": 1.5969371528609895, "learning_rate": 1.5286057049680236e-05, "loss": 0.3001, "step": 4330 }, { "epoch": 0.34311744899980196, "grad_norm": 2.27881170248216, "learning_rate": 1.5283878654765414e-05, "loss": 0.254, "step": 4331 }, { "epoch": 0.34319667260843734, "grad_norm": 1.4007672243605134, "learning_rate": 1.5281699911932612e-05, "loss": 0.173, "step": 4332 }, { "epoch": 0.34327589621707266, "grad_norm": 1.55334928524067, "learning_rate": 1.527952082132528e-05, "loss": 0.3496, "step": 4333 }, { "epoch": 0.34335511982570804, "grad_norm": 1.8427773871932651, "learning_rate": 1.5277341383086906e-05, "loss": 0.3233, "step": 4334 }, { "epoch": 0.3434343434343434, "grad_norm": 1.5660673113715498, "learning_rate": 1.5275161597360996e-05, "loss": 0.2783, "step": 4335 }, { "epoch": 0.3435135670429788, "grad_norm": 2.289739566600019, "learning_rate": 1.5272981464291077e-05, "loss": 0.3417, "step": 4336 }, { "epoch": 0.3435927906516142, "grad_norm": 1.957723131340051, "learning_rate": 1.5270800984020705e-05, "loss": 0.2761, "step": 4337 }, { "epoch": 0.34367201426024957, "grad_norm": 2.054671639153292, "learning_rate": 1.5268620156693444e-05, "loss": 0.3202, "step": 4338 }, { "epoch": 0.34375123786888495, "grad_norm": 1.7408085922341083, "learning_rate": 1.52664389824529e-05, "loss": 0.2798, "step": 4339 }, { "epoch": 0.3438304614775203, "grad_norm": 1.6112369798064616, "learning_rate": 1.5264257461442687e-05, "loss": 0.1713, "step": 4340 }, { "epoch": 0.34390968508615566, "grad_norm": 2.2249323887993793, "learning_rate": 1.526207559380645e-05, "loss": 0.3886, "step": 4341 }, { "epoch": 0.34398890869479104, "grad_norm": 1.9314707057397802, "learning_rate": 1.5259893379687855e-05, "loss": 0.2596, "step": 4342 }, { "epoch": 0.3440681323034264, "grad_norm": 2.040388022390409, "learning_rate": 1.525771081923059e-05, "loss": 0.3465, "step": 4343 }, { "epoch": 0.3441473559120618, "grad_norm": 2.2032934823475507, "learning_rate": 1.525552791257837e-05, "loss": 0.4216, "step": 4344 }, { "epoch": 0.3442265795206972, "grad_norm": 1.738034897592833, "learning_rate": 1.525334465987492e-05, "loss": 0.2187, "step": 4345 }, { "epoch": 0.34430580312933257, "grad_norm": 1.8480308926002667, "learning_rate": 1.5251161061264003e-05, "loss": 0.2848, "step": 4346 }, { "epoch": 0.3443850267379679, "grad_norm": 1.7362872484399905, "learning_rate": 1.5248977116889396e-05, "loss": 0.2908, "step": 4347 }, { "epoch": 0.3444642503466033, "grad_norm": 1.975588266543425, "learning_rate": 1.5246792826894906e-05, "loss": 0.3602, "step": 4348 }, { "epoch": 0.34454347395523865, "grad_norm": 1.9725950094402869, "learning_rate": 1.5244608191424352e-05, "loss": 0.3373, "step": 4349 }, { "epoch": 0.34462269756387404, "grad_norm": 1.5802875107101795, "learning_rate": 1.5242423210621584e-05, "loss": 0.1813, "step": 4350 }, { "epoch": 0.3447019211725094, "grad_norm": 1.7529626213171856, "learning_rate": 1.5240237884630471e-05, "loss": 0.4385, "step": 4351 }, { "epoch": 0.3447811447811448, "grad_norm": 1.8639229728251556, "learning_rate": 1.5238052213594912e-05, "loss": 0.3419, "step": 4352 }, { "epoch": 0.3448603683897802, "grad_norm": 2.056434018432276, "learning_rate": 1.5235866197658812e-05, "loss": 0.3015, "step": 4353 }, { "epoch": 0.3449395919984155, "grad_norm": 1.764657805949268, "learning_rate": 1.5233679836966122e-05, "loss": 0.3147, "step": 4354 }, { "epoch": 0.3450188156070509, "grad_norm": 2.0679386147054832, "learning_rate": 1.5231493131660794e-05, "loss": 0.2619, "step": 4355 }, { "epoch": 0.34509803921568627, "grad_norm": 1.888886802634934, "learning_rate": 1.5229306081886818e-05, "loss": 0.2476, "step": 4356 }, { "epoch": 0.34517726282432165, "grad_norm": 2.045020138735299, "learning_rate": 1.5227118687788198e-05, "loss": 0.2444, "step": 4357 }, { "epoch": 0.34525648643295703, "grad_norm": 2.1061354082554176, "learning_rate": 1.5224930949508964e-05, "loss": 0.2349, "step": 4358 }, { "epoch": 0.3453357100415924, "grad_norm": 1.6833212123636772, "learning_rate": 1.5222742867193167e-05, "loss": 0.2171, "step": 4359 }, { "epoch": 0.3454149336502278, "grad_norm": 2.2059804935763085, "learning_rate": 1.5220554440984882e-05, "loss": 0.2891, "step": 4360 }, { "epoch": 0.3454941572588631, "grad_norm": 1.9399908906967271, "learning_rate": 1.5218365671028207e-05, "loss": 0.2718, "step": 4361 }, { "epoch": 0.3455733808674985, "grad_norm": 2.064133781808747, "learning_rate": 1.5216176557467265e-05, "loss": 0.4346, "step": 4362 }, { "epoch": 0.3456526044761339, "grad_norm": 2.341288935705729, "learning_rate": 1.521398710044619e-05, "loss": 0.2904, "step": 4363 }, { "epoch": 0.34573182808476927, "grad_norm": 1.480486934605892, "learning_rate": 1.5211797300109154e-05, "loss": 0.1788, "step": 4364 }, { "epoch": 0.34581105169340465, "grad_norm": 1.8980110514113189, "learning_rate": 1.5209607156600346e-05, "loss": 0.2404, "step": 4365 }, { "epoch": 0.34589027530204003, "grad_norm": 1.918180580012612, "learning_rate": 1.520741667006397e-05, "loss": 0.2985, "step": 4366 }, { "epoch": 0.34596949891067535, "grad_norm": 1.7157258831670843, "learning_rate": 1.5205225840644264e-05, "loss": 0.2572, "step": 4367 }, { "epoch": 0.34604872251931074, "grad_norm": 1.4616233109176535, "learning_rate": 1.5203034668485486e-05, "loss": 0.2215, "step": 4368 }, { "epoch": 0.3461279461279461, "grad_norm": 1.7973744840780148, "learning_rate": 1.5200843153731905e-05, "loss": 0.2964, "step": 4369 }, { "epoch": 0.3462071697365815, "grad_norm": 1.8689465298530563, "learning_rate": 1.519865129652783e-05, "loss": 0.2368, "step": 4370 }, { "epoch": 0.3462863933452169, "grad_norm": 2.146848217514277, "learning_rate": 1.5196459097017582e-05, "loss": 0.2507, "step": 4371 }, { "epoch": 0.34636561695385226, "grad_norm": 2.4399665904302963, "learning_rate": 1.5194266555345505e-05, "loss": 0.2897, "step": 4372 }, { "epoch": 0.34644484056248764, "grad_norm": 1.970614575243572, "learning_rate": 1.5192073671655969e-05, "loss": 0.3587, "step": 4373 }, { "epoch": 0.34652406417112297, "grad_norm": 2.0283610101737573, "learning_rate": 1.5189880446093366e-05, "loss": 0.2925, "step": 4374 }, { "epoch": 0.34660328777975835, "grad_norm": 1.6896989147708572, "learning_rate": 1.5187686878802108e-05, "loss": 0.2192, "step": 4375 }, { "epoch": 0.34668251138839373, "grad_norm": 2.3203107076858167, "learning_rate": 1.5185492969926627e-05, "loss": 0.3606, "step": 4376 }, { "epoch": 0.3467617349970291, "grad_norm": 1.7853082041339545, "learning_rate": 1.5183298719611388e-05, "loss": 0.3665, "step": 4377 }, { "epoch": 0.3468409586056645, "grad_norm": 1.6594019539959624, "learning_rate": 1.5181104128000868e-05, "loss": 0.2264, "step": 4378 }, { "epoch": 0.3469201822142999, "grad_norm": 1.9559896403703443, "learning_rate": 1.517890919523957e-05, "loss": 0.2923, "step": 4379 }, { "epoch": 0.34699940582293526, "grad_norm": 1.880034336558596, "learning_rate": 1.517671392147202e-05, "loss": 0.3446, "step": 4380 }, { "epoch": 0.3470786294315706, "grad_norm": 2.689484204475275, "learning_rate": 1.517451830684277e-05, "loss": 0.4122, "step": 4381 }, { "epoch": 0.34715785304020597, "grad_norm": 2.182067699308231, "learning_rate": 1.5172322351496385e-05, "loss": 0.3123, "step": 4382 }, { "epoch": 0.34723707664884135, "grad_norm": 2.1286155319351714, "learning_rate": 1.517012605557746e-05, "loss": 0.3076, "step": 4383 }, { "epoch": 0.34731630025747673, "grad_norm": 1.861668082962604, "learning_rate": 1.5167929419230616e-05, "loss": 0.3237, "step": 4384 }, { "epoch": 0.3473955238661121, "grad_norm": 1.66810975360935, "learning_rate": 1.516573244260048e-05, "loss": 0.2895, "step": 4385 }, { "epoch": 0.3474747474747475, "grad_norm": 1.8512312432650195, "learning_rate": 1.5163535125831724e-05, "loss": 0.3088, "step": 4386 }, { "epoch": 0.3475539710833829, "grad_norm": 2.0341052801180446, "learning_rate": 1.5161337469069024e-05, "loss": 0.3371, "step": 4387 }, { "epoch": 0.3476331946920182, "grad_norm": 1.976628083975725, "learning_rate": 1.5159139472457086e-05, "loss": 0.3395, "step": 4388 }, { "epoch": 0.3477124183006536, "grad_norm": 1.696058103457667, "learning_rate": 1.5156941136140637e-05, "loss": 0.2882, "step": 4389 }, { "epoch": 0.34779164190928896, "grad_norm": 2.107799749146764, "learning_rate": 1.5154742460264426e-05, "loss": 0.297, "step": 4390 }, { "epoch": 0.34787086551792434, "grad_norm": 1.4747042291311936, "learning_rate": 1.515254344497323e-05, "loss": 0.2135, "step": 4391 }, { "epoch": 0.3479500891265597, "grad_norm": 2.88685922740951, "learning_rate": 1.5150344090411841e-05, "loss": 0.3602, "step": 4392 }, { "epoch": 0.3480293127351951, "grad_norm": 1.706895664844948, "learning_rate": 1.5148144396725072e-05, "loss": 0.1824, "step": 4393 }, { "epoch": 0.3481085363438305, "grad_norm": 1.915496284971201, "learning_rate": 1.514594436405777e-05, "loss": 0.226, "step": 4394 }, { "epoch": 0.3481877599524658, "grad_norm": 2.150580405982988, "learning_rate": 1.5143743992554791e-05, "loss": 0.3376, "step": 4395 }, { "epoch": 0.3482669835611012, "grad_norm": 1.5297964883482145, "learning_rate": 1.514154328236102e-05, "loss": 0.2836, "step": 4396 }, { "epoch": 0.3483462071697366, "grad_norm": 1.7733065070222984, "learning_rate": 1.5139342233621364e-05, "loss": 0.2042, "step": 4397 }, { "epoch": 0.34842543077837196, "grad_norm": 1.7394350376309586, "learning_rate": 1.5137140846480752e-05, "loss": 0.1713, "step": 4398 }, { "epoch": 0.34850465438700734, "grad_norm": 1.9405996273018045, "learning_rate": 1.5134939121084129e-05, "loss": 0.2823, "step": 4399 }, { "epoch": 0.3485838779956427, "grad_norm": 2.338470253130088, "learning_rate": 1.5132737057576476e-05, "loss": 0.4525, "step": 4400 }, { "epoch": 0.34866310160427805, "grad_norm": 1.6704759171154522, "learning_rate": 1.5130534656102783e-05, "loss": 0.2541, "step": 4401 }, { "epoch": 0.34874232521291343, "grad_norm": 1.8266411948811352, "learning_rate": 1.512833191680807e-05, "loss": 0.3236, "step": 4402 }, { "epoch": 0.3488215488215488, "grad_norm": 2.061409850804772, "learning_rate": 1.5126128839837378e-05, "loss": 0.2578, "step": 4403 }, { "epoch": 0.3489007724301842, "grad_norm": 1.5343577896229241, "learning_rate": 1.5123925425335766e-05, "loss": 0.2321, "step": 4404 }, { "epoch": 0.3489799960388196, "grad_norm": 2.005253722607864, "learning_rate": 1.5121721673448319e-05, "loss": 0.2829, "step": 4405 }, { "epoch": 0.34905921964745495, "grad_norm": 2.089500827857083, "learning_rate": 1.5119517584320146e-05, "loss": 0.3455, "step": 4406 }, { "epoch": 0.34913844325609034, "grad_norm": 1.8841284410038224, "learning_rate": 1.5117313158096371e-05, "loss": 0.259, "step": 4407 }, { "epoch": 0.34921766686472566, "grad_norm": 1.699858850561031, "learning_rate": 1.511510839492215e-05, "loss": 0.3421, "step": 4408 }, { "epoch": 0.34929689047336104, "grad_norm": 1.7271775514665164, "learning_rate": 1.5112903294942651e-05, "loss": 0.1925, "step": 4409 }, { "epoch": 0.3493761140819964, "grad_norm": 2.2917828207714432, "learning_rate": 1.5110697858303072e-05, "loss": 0.3069, "step": 4410 }, { "epoch": 0.3494553376906318, "grad_norm": 2.0199844096252155, "learning_rate": 1.5108492085148632e-05, "loss": 0.3332, "step": 4411 }, { "epoch": 0.3495345612992672, "grad_norm": 2.9004469049775934, "learning_rate": 1.5106285975624568e-05, "loss": 0.3829, "step": 4412 }, { "epoch": 0.34961378490790257, "grad_norm": 1.6794746118887558, "learning_rate": 1.5104079529876143e-05, "loss": 0.1862, "step": 4413 }, { "epoch": 0.34969300851653795, "grad_norm": 2.670433659965118, "learning_rate": 1.510187274804864e-05, "loss": 0.2534, "step": 4414 }, { "epoch": 0.3497722321251733, "grad_norm": 1.9237606531490077, "learning_rate": 1.5099665630287365e-05, "loss": 0.3361, "step": 4415 }, { "epoch": 0.34985145573380866, "grad_norm": 2.3536509688690233, "learning_rate": 1.5097458176737647e-05, "loss": 0.28, "step": 4416 }, { "epoch": 0.34993067934244404, "grad_norm": 1.9985007451799834, "learning_rate": 1.5095250387544833e-05, "loss": 0.2482, "step": 4417 }, { "epoch": 0.3500099029510794, "grad_norm": 1.4664203288167474, "learning_rate": 1.5093042262854297e-05, "loss": 0.1871, "step": 4418 }, { "epoch": 0.3500891265597148, "grad_norm": 2.071109023299152, "learning_rate": 1.509083380281144e-05, "loss": 0.2448, "step": 4419 }, { "epoch": 0.3501683501683502, "grad_norm": 1.6111722903949248, "learning_rate": 1.5088625007561668e-05, "loss": 0.3055, "step": 4420 }, { "epoch": 0.35024757377698557, "grad_norm": 2.402300155827407, "learning_rate": 1.5086415877250424e-05, "loss": 0.3575, "step": 4421 }, { "epoch": 0.3503267973856209, "grad_norm": 2.1828640785355224, "learning_rate": 1.5084206412023172e-05, "loss": 0.2519, "step": 4422 }, { "epoch": 0.3504060209942563, "grad_norm": 1.87727170332503, "learning_rate": 1.5081996612025387e-05, "loss": 0.3044, "step": 4423 }, { "epoch": 0.35048524460289165, "grad_norm": 1.8376061913158677, "learning_rate": 1.5079786477402581e-05, "loss": 0.2773, "step": 4424 }, { "epoch": 0.35056446821152704, "grad_norm": 1.9599922868668826, "learning_rate": 1.5077576008300278e-05, "loss": 0.4042, "step": 4425 }, { "epoch": 0.3506436918201624, "grad_norm": 1.886436161981459, "learning_rate": 1.5075365204864025e-05, "loss": 0.3172, "step": 4426 }, { "epoch": 0.3507229154287978, "grad_norm": 2.158573459895322, "learning_rate": 1.5073154067239396e-05, "loss": 0.4641, "step": 4427 }, { "epoch": 0.3508021390374332, "grad_norm": 2.163469986816108, "learning_rate": 1.507094259557198e-05, "loss": 0.281, "step": 4428 }, { "epoch": 0.3508813626460685, "grad_norm": 2.196740423828265, "learning_rate": 1.5068730790007395e-05, "loss": 0.3008, "step": 4429 }, { "epoch": 0.3509605862547039, "grad_norm": 1.799638147572534, "learning_rate": 1.5066518650691277e-05, "loss": 0.2728, "step": 4430 }, { "epoch": 0.35103980986333927, "grad_norm": 2.1098419869514276, "learning_rate": 1.5064306177769284e-05, "loss": 0.3632, "step": 4431 }, { "epoch": 0.35111903347197465, "grad_norm": 1.801827280728131, "learning_rate": 1.5062093371387097e-05, "loss": 0.3705, "step": 4432 }, { "epoch": 0.35119825708061003, "grad_norm": 1.752504445294334, "learning_rate": 1.5059880231690418e-05, "loss": 0.1788, "step": 4433 }, { "epoch": 0.3512774806892454, "grad_norm": 1.83183042013958, "learning_rate": 1.5057666758824974e-05, "loss": 0.3134, "step": 4434 }, { "epoch": 0.3513567042978808, "grad_norm": 2.5327598128970923, "learning_rate": 1.5055452952936512e-05, "loss": 0.2955, "step": 4435 }, { "epoch": 0.3514359279065161, "grad_norm": 2.4047093866945946, "learning_rate": 1.5053238814170792e-05, "loss": 0.2791, "step": 4436 }, { "epoch": 0.3515151515151515, "grad_norm": 1.728766487939823, "learning_rate": 1.5051024342673614e-05, "loss": 0.2045, "step": 4437 }, { "epoch": 0.3515943751237869, "grad_norm": 2.4855068979562174, "learning_rate": 1.5048809538590789e-05, "loss": 0.3207, "step": 4438 }, { "epoch": 0.35167359873242227, "grad_norm": 2.158786644866198, "learning_rate": 1.5046594402068147e-05, "loss": 0.3248, "step": 4439 }, { "epoch": 0.35175282234105765, "grad_norm": 2.3381376925169883, "learning_rate": 1.5044378933251546e-05, "loss": 0.323, "step": 4440 }, { "epoch": 0.35183204594969303, "grad_norm": 1.7281068557815027, "learning_rate": 1.5042163132286867e-05, "loss": 0.2607, "step": 4441 }, { "epoch": 0.35191126955832835, "grad_norm": 1.7203874216493409, "learning_rate": 1.5039946999320004e-05, "loss": 0.2534, "step": 4442 }, { "epoch": 0.35199049316696374, "grad_norm": 1.5575732626889371, "learning_rate": 1.5037730534496882e-05, "loss": 0.2317, "step": 4443 }, { "epoch": 0.3520697167755991, "grad_norm": 1.8546706015814542, "learning_rate": 1.5035513737963445e-05, "loss": 0.2794, "step": 4444 }, { "epoch": 0.3521489403842345, "grad_norm": 1.7444183718993096, "learning_rate": 1.5033296609865658e-05, "loss": 0.1767, "step": 4445 }, { "epoch": 0.3522281639928699, "grad_norm": 1.5804079466245837, "learning_rate": 1.503107915034951e-05, "loss": 0.2115, "step": 4446 }, { "epoch": 0.35230738760150526, "grad_norm": 1.6798556614448925, "learning_rate": 1.5028861359561005e-05, "loss": 0.3022, "step": 4447 }, { "epoch": 0.35238661121014064, "grad_norm": 1.643809781537248, "learning_rate": 1.5026643237646176e-05, "loss": 0.2498, "step": 4448 }, { "epoch": 0.35246583481877597, "grad_norm": 1.8262600883655196, "learning_rate": 1.5024424784751079e-05, "loss": 0.3141, "step": 4449 }, { "epoch": 0.35254505842741135, "grad_norm": 1.9265408123435968, "learning_rate": 1.5022206001021784e-05, "loss": 0.2125, "step": 4450 }, { "epoch": 0.35262428203604673, "grad_norm": 1.8989136232334514, "learning_rate": 1.501998688660439e-05, "loss": 0.2919, "step": 4451 }, { "epoch": 0.3527035056446821, "grad_norm": 1.6806567197407747, "learning_rate": 1.5017767441645015e-05, "loss": 0.261, "step": 4452 }, { "epoch": 0.3527827292533175, "grad_norm": 2.37236928160262, "learning_rate": 1.5015547666289798e-05, "loss": 0.2752, "step": 4453 }, { "epoch": 0.3528619528619529, "grad_norm": 2.543361733152592, "learning_rate": 1.50133275606849e-05, "loss": 0.4694, "step": 4454 }, { "epoch": 0.35294117647058826, "grad_norm": 2.6183208684242345, "learning_rate": 1.5011107124976505e-05, "loss": 0.2857, "step": 4455 }, { "epoch": 0.3530204000792236, "grad_norm": 1.8982486209875373, "learning_rate": 1.5008886359310815e-05, "loss": 0.2949, "step": 4456 }, { "epoch": 0.35309962368785897, "grad_norm": 1.6758397899792723, "learning_rate": 1.5006665263834062e-05, "loss": 0.1777, "step": 4457 }, { "epoch": 0.35317884729649435, "grad_norm": 1.9026406711685655, "learning_rate": 1.5004443838692492e-05, "loss": 0.3559, "step": 4458 }, { "epoch": 0.35325807090512973, "grad_norm": 1.6327646019744073, "learning_rate": 1.5002222084032374e-05, "loss": 0.2262, "step": 4459 }, { "epoch": 0.3533372945137651, "grad_norm": 2.0556888262304147, "learning_rate": 1.5000000000000002e-05, "loss": 0.2983, "step": 4460 }, { "epoch": 0.3534165181224005, "grad_norm": 2.0223199864331547, "learning_rate": 1.4997777586741689e-05, "loss": 0.306, "step": 4461 }, { "epoch": 0.3534957417310359, "grad_norm": 1.9646526168339682, "learning_rate": 1.4995554844403767e-05, "loss": 0.4299, "step": 4462 }, { "epoch": 0.3535749653396712, "grad_norm": 2.0637272688595463, "learning_rate": 1.4993331773132598e-05, "loss": 0.3072, "step": 4463 }, { "epoch": 0.3536541889483066, "grad_norm": 1.8858414970509891, "learning_rate": 1.4991108373074557e-05, "loss": 0.2434, "step": 4464 }, { "epoch": 0.35373341255694196, "grad_norm": 1.7065698382424745, "learning_rate": 1.4988884644376045e-05, "loss": 0.2395, "step": 4465 }, { "epoch": 0.35381263616557734, "grad_norm": 1.5618804363379024, "learning_rate": 1.4986660587183485e-05, "loss": 0.2366, "step": 4466 }, { "epoch": 0.3538918597742127, "grad_norm": 1.6103705110655664, "learning_rate": 1.498443620164332e-05, "loss": 0.3091, "step": 4467 }, { "epoch": 0.3539710833828481, "grad_norm": 2.3187816422481795, "learning_rate": 1.4982211487902015e-05, "loss": 0.4032, "step": 4468 }, { "epoch": 0.3540503069914835, "grad_norm": 1.8366383171331269, "learning_rate": 1.4979986446106054e-05, "loss": 0.3065, "step": 4469 }, { "epoch": 0.3541295306001188, "grad_norm": 2.730340448726131, "learning_rate": 1.4977761076401949e-05, "loss": 0.315, "step": 4470 }, { "epoch": 0.3542087542087542, "grad_norm": 1.861506882875236, "learning_rate": 1.4975535378936228e-05, "loss": 0.2781, "step": 4471 }, { "epoch": 0.3542879778173896, "grad_norm": 1.5370290927428905, "learning_rate": 1.4973309353855443e-05, "loss": 0.2145, "step": 4472 }, { "epoch": 0.35436720142602496, "grad_norm": 1.5470037733901187, "learning_rate": 1.497108300130617e-05, "loss": 0.2517, "step": 4473 }, { "epoch": 0.35444642503466034, "grad_norm": 2.1706644129758903, "learning_rate": 1.4968856321434997e-05, "loss": 0.3411, "step": 4474 }, { "epoch": 0.3545256486432957, "grad_norm": 1.452155532639976, "learning_rate": 1.4966629314388548e-05, "loss": 0.136, "step": 4475 }, { "epoch": 0.3546048722519311, "grad_norm": 1.8777592765536582, "learning_rate": 1.4964401980313452e-05, "loss": 0.3524, "step": 4476 }, { "epoch": 0.35468409586056643, "grad_norm": 2.187966976579896, "learning_rate": 1.4962174319356372e-05, "loss": 0.3678, "step": 4477 }, { "epoch": 0.3547633194692018, "grad_norm": 1.8574633617091538, "learning_rate": 1.4959946331663995e-05, "loss": 0.3337, "step": 4478 }, { "epoch": 0.3548425430778372, "grad_norm": 1.8259251177057856, "learning_rate": 1.4957718017383013e-05, "loss": 0.2727, "step": 4479 }, { "epoch": 0.3549217666864726, "grad_norm": 1.7524440633439209, "learning_rate": 1.4955489376660157e-05, "loss": 0.3102, "step": 4480 }, { "epoch": 0.35500099029510795, "grad_norm": 2.063226226559272, "learning_rate": 1.4953260409642172e-05, "loss": 0.3671, "step": 4481 }, { "epoch": 0.35508021390374334, "grad_norm": 2.1118865278626946, "learning_rate": 1.4951031116475819e-05, "loss": 0.2145, "step": 4482 }, { "epoch": 0.35515943751237866, "grad_norm": 1.5880068378403644, "learning_rate": 1.4948801497307893e-05, "loss": 0.2357, "step": 4483 }, { "epoch": 0.35523866112101404, "grad_norm": 1.7385274370948502, "learning_rate": 1.4946571552285196e-05, "loss": 0.3444, "step": 4484 }, { "epoch": 0.3553178847296494, "grad_norm": 1.8917244831295035, "learning_rate": 1.4944341281554566e-05, "loss": 0.2726, "step": 4485 }, { "epoch": 0.3553971083382848, "grad_norm": 2.1572495752829277, "learning_rate": 1.4942110685262854e-05, "loss": 0.3252, "step": 4486 }, { "epoch": 0.3554763319469202, "grad_norm": 1.9851630007469503, "learning_rate": 1.493987976355693e-05, "loss": 0.3795, "step": 4487 }, { "epoch": 0.35555555555555557, "grad_norm": 1.4817547742601904, "learning_rate": 1.4937648516583696e-05, "loss": 0.2436, "step": 4488 }, { "epoch": 0.35563477916419095, "grad_norm": 1.9726909153526209, "learning_rate": 1.4935416944490066e-05, "loss": 0.4018, "step": 4489 }, { "epoch": 0.3557140027728263, "grad_norm": 1.8912975937246552, "learning_rate": 1.4933185047422976e-05, "loss": 0.2751, "step": 4490 }, { "epoch": 0.35579322638146166, "grad_norm": 2.2215615622354146, "learning_rate": 1.493095282552939e-05, "loss": 0.4122, "step": 4491 }, { "epoch": 0.35587244999009704, "grad_norm": 1.756047117177278, "learning_rate": 1.4928720278956284e-05, "loss": 0.2804, "step": 4492 }, { "epoch": 0.3559516735987324, "grad_norm": 1.939696498642266, "learning_rate": 1.4926487407850667e-05, "loss": 0.2614, "step": 4493 }, { "epoch": 0.3560308972073678, "grad_norm": 1.6043403065342479, "learning_rate": 1.4924254212359557e-05, "loss": 0.2459, "step": 4494 }, { "epoch": 0.3561101208160032, "grad_norm": 1.847794508376427, "learning_rate": 1.492202069263e-05, "loss": 0.1965, "step": 4495 }, { "epoch": 0.35618934442463857, "grad_norm": 1.866015536038721, "learning_rate": 1.4919786848809061e-05, "loss": 0.2716, "step": 4496 }, { "epoch": 0.3562685680332739, "grad_norm": 1.9062728149950978, "learning_rate": 1.4917552681043837e-05, "loss": 0.2454, "step": 4497 }, { "epoch": 0.3563477916419093, "grad_norm": 2.076902047483076, "learning_rate": 1.4915318189481425e-05, "loss": 0.3539, "step": 4498 }, { "epoch": 0.35642701525054465, "grad_norm": 2.1394462677614765, "learning_rate": 1.4913083374268965e-05, "loss": 0.28, "step": 4499 }, { "epoch": 0.35650623885918004, "grad_norm": 1.9708424852547899, "learning_rate": 1.4910848235553604e-05, "loss": 0.3289, "step": 4500 }, { "epoch": 0.3565854624678154, "grad_norm": 1.5964853174329938, "learning_rate": 1.4908612773482514e-05, "loss": 0.2311, "step": 4501 }, { "epoch": 0.3566646860764508, "grad_norm": 1.8500186435390105, "learning_rate": 1.4906376988202893e-05, "loss": 0.2334, "step": 4502 }, { "epoch": 0.3567439096850862, "grad_norm": 1.9059740606972333, "learning_rate": 1.4904140879861957e-05, "loss": 0.2515, "step": 4503 }, { "epoch": 0.3568231332937215, "grad_norm": 1.8348869138817459, "learning_rate": 1.490190444860694e-05, "loss": 0.2107, "step": 4504 }, { "epoch": 0.3569023569023569, "grad_norm": 1.707738179716157, "learning_rate": 1.48996676945851e-05, "loss": 0.2349, "step": 4505 }, { "epoch": 0.35698158051099227, "grad_norm": 2.2254491853794054, "learning_rate": 1.4897430617943718e-05, "loss": 0.3567, "step": 4506 }, { "epoch": 0.35706080411962765, "grad_norm": 1.248338011244278, "learning_rate": 1.4895193218830098e-05, "loss": 0.1435, "step": 4507 }, { "epoch": 0.35714002772826303, "grad_norm": 2.243778706100023, "learning_rate": 1.4892955497391556e-05, "loss": 0.2788, "step": 4508 }, { "epoch": 0.3572192513368984, "grad_norm": 2.0744338656622316, "learning_rate": 1.4890717453775438e-05, "loss": 0.2514, "step": 4509 }, { "epoch": 0.3572984749455338, "grad_norm": 1.4472308399580844, "learning_rate": 1.488847908812911e-05, "loss": 0.1623, "step": 4510 }, { "epoch": 0.3573776985541691, "grad_norm": 1.7644779971883402, "learning_rate": 1.4886240400599954e-05, "loss": 0.3246, "step": 4511 }, { "epoch": 0.3574569221628045, "grad_norm": 1.7329541023791304, "learning_rate": 1.488400139133538e-05, "loss": 0.2265, "step": 4512 }, { "epoch": 0.3575361457714399, "grad_norm": 1.7129722573748578, "learning_rate": 1.4881762060482814e-05, "loss": 0.2959, "step": 4513 }, { "epoch": 0.35761536938007527, "grad_norm": 1.8951230646421218, "learning_rate": 1.4879522408189706e-05, "loss": 0.2604, "step": 4514 }, { "epoch": 0.35769459298871065, "grad_norm": 1.7288392584592716, "learning_rate": 1.4877282434603527e-05, "loss": 0.2367, "step": 4515 }, { "epoch": 0.35777381659734603, "grad_norm": 1.992538903721756, "learning_rate": 1.4875042139871768e-05, "loss": 0.3523, "step": 4516 }, { "epoch": 0.3578530402059814, "grad_norm": 2.02740501299765, "learning_rate": 1.487280152414194e-05, "loss": 0.3351, "step": 4517 }, { "epoch": 0.35793226381461674, "grad_norm": 1.8306707156593898, "learning_rate": 1.4870560587561578e-05, "loss": 0.2539, "step": 4518 }, { "epoch": 0.3580114874232521, "grad_norm": 2.317142037857916, "learning_rate": 1.4868319330278236e-05, "loss": 0.3641, "step": 4519 }, { "epoch": 0.3580907110318875, "grad_norm": 1.4492466031203324, "learning_rate": 1.4866077752439495e-05, "loss": 0.1789, "step": 4520 }, { "epoch": 0.3581699346405229, "grad_norm": 2.0823960158601955, "learning_rate": 1.4863835854192945e-05, "loss": 0.2656, "step": 4521 }, { "epoch": 0.35824915824915826, "grad_norm": 1.9778597739014039, "learning_rate": 1.4861593635686207e-05, "loss": 0.3041, "step": 4522 }, { "epoch": 0.35832838185779364, "grad_norm": 1.4878587377277102, "learning_rate": 1.485935109706692e-05, "loss": 0.2841, "step": 4523 }, { "epoch": 0.35840760546642897, "grad_norm": 1.898570776013274, "learning_rate": 1.4857108238482747e-05, "loss": 0.2735, "step": 4524 }, { "epoch": 0.35848682907506435, "grad_norm": 1.9923333177254, "learning_rate": 1.4854865060081367e-05, "loss": 0.3702, "step": 4525 }, { "epoch": 0.35856605268369973, "grad_norm": 1.9271745699492897, "learning_rate": 1.4852621562010484e-05, "loss": 0.3176, "step": 4526 }, { "epoch": 0.3586452762923351, "grad_norm": 1.6935219452465744, "learning_rate": 1.4850377744417816e-05, "loss": 0.2658, "step": 4527 }, { "epoch": 0.3587244999009705, "grad_norm": 1.6334071754581398, "learning_rate": 1.4848133607451116e-05, "loss": 0.2764, "step": 4528 }, { "epoch": 0.3588037235096059, "grad_norm": 1.770940147150993, "learning_rate": 1.4845889151258144e-05, "loss": 0.2752, "step": 4529 }, { "epoch": 0.35888294711824126, "grad_norm": 1.7400020480243055, "learning_rate": 1.484364437598669e-05, "loss": 0.265, "step": 4530 }, { "epoch": 0.3589621707268766, "grad_norm": 1.8936786294272283, "learning_rate": 1.4841399281784558e-05, "loss": 0.2692, "step": 4531 }, { "epoch": 0.35904139433551197, "grad_norm": 1.8265624261751408, "learning_rate": 1.4839153868799583e-05, "loss": 0.2403, "step": 4532 }, { "epoch": 0.35912061794414735, "grad_norm": 1.8339537583849919, "learning_rate": 1.4836908137179607e-05, "loss": 0.2834, "step": 4533 }, { "epoch": 0.35919984155278273, "grad_norm": 2.1927511822249226, "learning_rate": 1.4834662087072502e-05, "loss": 0.2485, "step": 4534 }, { "epoch": 0.3592790651614181, "grad_norm": 2.6198828901448343, "learning_rate": 1.4832415718626166e-05, "loss": 0.2249, "step": 4535 }, { "epoch": 0.3593582887700535, "grad_norm": 1.8791975996335242, "learning_rate": 1.4830169031988502e-05, "loss": 0.3104, "step": 4536 }, { "epoch": 0.3594375123786889, "grad_norm": 1.7408388895555706, "learning_rate": 1.482792202730745e-05, "loss": 0.2941, "step": 4537 }, { "epoch": 0.3595167359873242, "grad_norm": 2.131712923392794, "learning_rate": 1.4825674704730966e-05, "loss": 0.3036, "step": 4538 }, { "epoch": 0.3595959595959596, "grad_norm": 2.2786987947911403, "learning_rate": 1.4823427064407018e-05, "loss": 0.2108, "step": 4539 }, { "epoch": 0.35967518320459496, "grad_norm": 1.5969629787024902, "learning_rate": 1.4821179106483609e-05, "loss": 0.2157, "step": 4540 }, { "epoch": 0.35975440681323034, "grad_norm": 1.8511205429928075, "learning_rate": 1.4818930831108755e-05, "loss": 0.2415, "step": 4541 }, { "epoch": 0.3598336304218657, "grad_norm": 2.1315218058671355, "learning_rate": 1.481668223843049e-05, "loss": 0.3733, "step": 4542 }, { "epoch": 0.3599128540305011, "grad_norm": 1.847512443968618, "learning_rate": 1.481443332859688e-05, "loss": 0.2486, "step": 4543 }, { "epoch": 0.3599920776391365, "grad_norm": 1.9910027128388537, "learning_rate": 1.4812184101755997e-05, "loss": 0.3474, "step": 4544 }, { "epoch": 0.3600713012477718, "grad_norm": 3.2582475850998707, "learning_rate": 1.480993455805595e-05, "loss": 0.3684, "step": 4545 }, { "epoch": 0.3601505248564072, "grad_norm": 1.785677876391657, "learning_rate": 1.480768469764485e-05, "loss": 0.3508, "step": 4546 }, { "epoch": 0.3602297484650426, "grad_norm": 2.1111082556414735, "learning_rate": 1.480543452067085e-05, "loss": 0.4027, "step": 4547 }, { "epoch": 0.36030897207367796, "grad_norm": 1.6313097464088209, "learning_rate": 1.480318402728211e-05, "loss": 0.2332, "step": 4548 }, { "epoch": 0.36038819568231334, "grad_norm": 1.8559683155422102, "learning_rate": 1.480093321762681e-05, "loss": 0.35, "step": 4549 }, { "epoch": 0.3604674192909487, "grad_norm": 1.7853318963964082, "learning_rate": 1.4798682091853161e-05, "loss": 0.1915, "step": 4550 }, { "epoch": 0.3605466428995841, "grad_norm": 2.071621887291487, "learning_rate": 1.4796430650109383e-05, "loss": 0.339, "step": 4551 }, { "epoch": 0.36062586650821943, "grad_norm": 1.7452459123553987, "learning_rate": 1.4794178892543727e-05, "loss": 0.2623, "step": 4552 }, { "epoch": 0.3607050901168548, "grad_norm": 1.9857757949360042, "learning_rate": 1.4791926819304462e-05, "loss": 0.2993, "step": 4553 }, { "epoch": 0.3607843137254902, "grad_norm": 1.9043763234461848, "learning_rate": 1.4789674430539868e-05, "loss": 0.2841, "step": 4554 }, { "epoch": 0.3608635373341256, "grad_norm": 1.9947000003906217, "learning_rate": 1.4787421726398263e-05, "loss": 0.3286, "step": 4555 }, { "epoch": 0.36094276094276095, "grad_norm": 2.3353144966346284, "learning_rate": 1.4785168707027972e-05, "loss": 0.4169, "step": 4556 }, { "epoch": 0.36102198455139634, "grad_norm": 2.1079237478369803, "learning_rate": 1.4782915372577347e-05, "loss": 0.2736, "step": 4557 }, { "epoch": 0.3611012081600317, "grad_norm": 1.8403067529174804, "learning_rate": 1.4780661723194757e-05, "loss": 0.2704, "step": 4558 }, { "epoch": 0.36118043176866704, "grad_norm": 1.9144111954723666, "learning_rate": 1.4778407759028599e-05, "loss": 0.2462, "step": 4559 }, { "epoch": 0.3612596553773024, "grad_norm": 2.0896006232969815, "learning_rate": 1.4776153480227278e-05, "loss": 0.2951, "step": 4560 }, { "epoch": 0.3613388789859378, "grad_norm": 2.175656361755795, "learning_rate": 1.4773898886939235e-05, "loss": 0.2725, "step": 4561 }, { "epoch": 0.3614181025945732, "grad_norm": 2.1968586535941044, "learning_rate": 1.4771643979312917e-05, "loss": 0.329, "step": 4562 }, { "epoch": 0.36149732620320857, "grad_norm": 1.893851266459502, "learning_rate": 1.4769388757496806e-05, "loss": 0.1702, "step": 4563 }, { "epoch": 0.36157654981184395, "grad_norm": 1.8709173094430427, "learning_rate": 1.4767133221639394e-05, "loss": 0.325, "step": 4564 }, { "epoch": 0.3616557734204793, "grad_norm": 2.0964495692159018, "learning_rate": 1.4764877371889194e-05, "loss": 0.3104, "step": 4565 }, { "epoch": 0.36173499702911466, "grad_norm": 1.5181118840211347, "learning_rate": 1.476262120839475e-05, "loss": 0.2408, "step": 4566 }, { "epoch": 0.36181422063775004, "grad_norm": 1.9308876705992082, "learning_rate": 1.4760364731304614e-05, "loss": 0.2622, "step": 4567 }, { "epoch": 0.3618934442463854, "grad_norm": 1.6309956987385346, "learning_rate": 1.4758107940767368e-05, "loss": 0.2814, "step": 4568 }, { "epoch": 0.3619726678550208, "grad_norm": 1.8227012989468465, "learning_rate": 1.4755850836931607e-05, "loss": 0.283, "step": 4569 }, { "epoch": 0.3620518914636562, "grad_norm": 1.8517746035684717, "learning_rate": 1.475359341994595e-05, "loss": 0.345, "step": 4570 }, { "epoch": 0.36213111507229157, "grad_norm": 1.829998598390889, "learning_rate": 1.4751335689959044e-05, "loss": 0.2462, "step": 4571 }, { "epoch": 0.3622103386809269, "grad_norm": 2.0421939088794057, "learning_rate": 1.4749077647119542e-05, "loss": 0.2188, "step": 4572 }, { "epoch": 0.3622895622895623, "grad_norm": 2.0200479575612413, "learning_rate": 1.474681929157613e-05, "loss": 0.3439, "step": 4573 }, { "epoch": 0.36236878589819765, "grad_norm": 1.8543730087865458, "learning_rate": 1.4744560623477502e-05, "loss": 0.2511, "step": 4574 }, { "epoch": 0.36244800950683304, "grad_norm": 1.9421722383662212, "learning_rate": 1.4742301642972392e-05, "loss": 0.3163, "step": 4575 }, { "epoch": 0.3625272331154684, "grad_norm": 2.0974607886007433, "learning_rate": 1.4740042350209536e-05, "loss": 0.2961, "step": 4576 }, { "epoch": 0.3626064567241038, "grad_norm": 2.1036012226703718, "learning_rate": 1.4737782745337696e-05, "loss": 0.3351, "step": 4577 }, { "epoch": 0.3626856803327392, "grad_norm": 2.3285039526764435, "learning_rate": 1.4735522828505663e-05, "loss": 0.3722, "step": 4578 }, { "epoch": 0.3627649039413745, "grad_norm": 1.851927494391605, "learning_rate": 1.4733262599862234e-05, "loss": 0.2979, "step": 4579 }, { "epoch": 0.3628441275500099, "grad_norm": 1.6369910505642769, "learning_rate": 1.4731002059556242e-05, "loss": 0.2057, "step": 4580 }, { "epoch": 0.36292335115864527, "grad_norm": 1.9433350948670887, "learning_rate": 1.4728741207736525e-05, "loss": 0.3834, "step": 4581 }, { "epoch": 0.36300257476728065, "grad_norm": 1.6151885165582778, "learning_rate": 1.4726480044551953e-05, "loss": 0.2589, "step": 4582 }, { "epoch": 0.36308179837591603, "grad_norm": 2.027856639065916, "learning_rate": 1.4724218570151415e-05, "loss": 0.3408, "step": 4583 }, { "epoch": 0.3631610219845514, "grad_norm": 1.9773296362166783, "learning_rate": 1.4721956784683813e-05, "loss": 0.4183, "step": 4584 }, { "epoch": 0.3632402455931868, "grad_norm": 1.9726032512494887, "learning_rate": 1.4719694688298078e-05, "loss": 0.3214, "step": 4585 }, { "epoch": 0.3633194692018221, "grad_norm": 2.0096875940382386, "learning_rate": 1.4717432281143161e-05, "loss": 0.2344, "step": 4586 }, { "epoch": 0.3633986928104575, "grad_norm": 1.8675973209031467, "learning_rate": 1.4715169563368021e-05, "loss": 0.3268, "step": 4587 }, { "epoch": 0.3634779164190929, "grad_norm": 1.8619405637117874, "learning_rate": 1.4712906535121658e-05, "loss": 0.1834, "step": 4588 }, { "epoch": 0.36355714002772826, "grad_norm": 1.9622734340502865, "learning_rate": 1.4710643196553074e-05, "loss": 0.3811, "step": 4589 }, { "epoch": 0.36363636363636365, "grad_norm": 1.2464322813911541, "learning_rate": 1.4708379547811302e-05, "loss": 0.1781, "step": 4590 }, { "epoch": 0.36371558724499903, "grad_norm": 1.97120190389002, "learning_rate": 1.4706115589045396e-05, "loss": 0.2922, "step": 4591 }, { "epoch": 0.3637948108536344, "grad_norm": 1.8130444421048555, "learning_rate": 1.4703851320404416e-05, "loss": 0.3521, "step": 4592 }, { "epoch": 0.36387403446226974, "grad_norm": 1.7903272120508689, "learning_rate": 1.4701586742037464e-05, "loss": 0.2122, "step": 4593 }, { "epoch": 0.3639532580709051, "grad_norm": 2.0312416747431756, "learning_rate": 1.4699321854093649e-05, "loss": 0.3273, "step": 4594 }, { "epoch": 0.3640324816795405, "grad_norm": 1.8251285075242487, "learning_rate": 1.46970566567221e-05, "loss": 0.278, "step": 4595 }, { "epoch": 0.3641117052881759, "grad_norm": 1.5354544779957986, "learning_rate": 1.469479115007197e-05, "loss": 0.3274, "step": 4596 }, { "epoch": 0.36419092889681126, "grad_norm": 2.280352908558222, "learning_rate": 1.4692525334292434e-05, "loss": 0.1861, "step": 4597 }, { "epoch": 0.36427015250544664, "grad_norm": 1.7668855525305438, "learning_rate": 1.4690259209532682e-05, "loss": 0.2326, "step": 4598 }, { "epoch": 0.364349376114082, "grad_norm": 2.2111396426990777, "learning_rate": 1.468799277594193e-05, "loss": 0.3252, "step": 4599 }, { "epoch": 0.36442859972271735, "grad_norm": 1.8981381327900542, "learning_rate": 1.4685726033669412e-05, "loss": 0.3434, "step": 4600 }, { "epoch": 0.36450782333135273, "grad_norm": 1.943164954252058, "learning_rate": 1.468345898286438e-05, "loss": 0.2665, "step": 4601 }, { "epoch": 0.3645870469399881, "grad_norm": 1.5919574835797072, "learning_rate": 1.468119162367611e-05, "loss": 0.2047, "step": 4602 }, { "epoch": 0.3646662705486235, "grad_norm": 1.7437721486295135, "learning_rate": 1.4678923956253894e-05, "loss": 0.3533, "step": 4603 }, { "epoch": 0.3647454941572589, "grad_norm": 2.0121970439790986, "learning_rate": 1.4676655980747052e-05, "loss": 0.3026, "step": 4604 }, { "epoch": 0.36482471776589426, "grad_norm": 1.4807536834732176, "learning_rate": 1.4674387697304914e-05, "loss": 0.2085, "step": 4605 }, { "epoch": 0.3649039413745296, "grad_norm": 1.6945669425668628, "learning_rate": 1.4672119106076838e-05, "loss": 0.419, "step": 4606 }, { "epoch": 0.36498316498316496, "grad_norm": 2.556056779793669, "learning_rate": 1.4669850207212202e-05, "loss": 0.3179, "step": 4607 }, { "epoch": 0.36506238859180035, "grad_norm": 1.8481477313822434, "learning_rate": 1.4667581000860395e-05, "loss": 0.2396, "step": 4608 }, { "epoch": 0.36514161220043573, "grad_norm": 2.8868924312330466, "learning_rate": 1.4665311487170844e-05, "loss": 0.5238, "step": 4609 }, { "epoch": 0.3652208358090711, "grad_norm": 2.107044608719778, "learning_rate": 1.4663041666292978e-05, "loss": 0.3859, "step": 4610 }, { "epoch": 0.3653000594177065, "grad_norm": 1.9488982503648078, "learning_rate": 1.4660771538376253e-05, "loss": 0.3047, "step": 4611 }, { "epoch": 0.3653792830263419, "grad_norm": 1.5307612171191352, "learning_rate": 1.4658501103570149e-05, "loss": 0.2338, "step": 4612 }, { "epoch": 0.3654585066349772, "grad_norm": 1.5640540786814416, "learning_rate": 1.4656230362024166e-05, "loss": 0.2163, "step": 4613 }, { "epoch": 0.3655377302436126, "grad_norm": 1.6627371105261337, "learning_rate": 1.4653959313887813e-05, "loss": 0.307, "step": 4614 }, { "epoch": 0.36561695385224796, "grad_norm": 1.918676718822198, "learning_rate": 1.4651687959310636e-05, "loss": 0.2164, "step": 4615 }, { "epoch": 0.36569617746088334, "grad_norm": 2.0657209258516387, "learning_rate": 1.4649416298442187e-05, "loss": 0.3128, "step": 4616 }, { "epoch": 0.3657754010695187, "grad_norm": 1.8261691355512197, "learning_rate": 1.4647144331432049e-05, "loss": 0.3541, "step": 4617 }, { "epoch": 0.3658546246781541, "grad_norm": 1.83896566157039, "learning_rate": 1.4644872058429816e-05, "loss": 0.2391, "step": 4618 }, { "epoch": 0.3659338482867895, "grad_norm": 2.0076747941817863, "learning_rate": 1.4642599479585106e-05, "loss": 0.3385, "step": 4619 }, { "epoch": 0.3660130718954248, "grad_norm": 2.3144193022940023, "learning_rate": 1.4640326595047561e-05, "loss": 0.3623, "step": 4620 }, { "epoch": 0.3660922955040602, "grad_norm": 2.0255817411378847, "learning_rate": 1.4638053404966836e-05, "loss": 0.3224, "step": 4621 }, { "epoch": 0.3661715191126956, "grad_norm": 1.748214080107897, "learning_rate": 1.4635779909492614e-05, "loss": 0.2633, "step": 4622 }, { "epoch": 0.36625074272133096, "grad_norm": 1.762716927418055, "learning_rate": 1.4633506108774588e-05, "loss": 0.2537, "step": 4623 }, { "epoch": 0.36632996632996634, "grad_norm": 2.134580736957998, "learning_rate": 1.4631232002962481e-05, "loss": 0.2609, "step": 4624 }, { "epoch": 0.3664091899386017, "grad_norm": 1.4654683207780617, "learning_rate": 1.462895759220603e-05, "loss": 0.2321, "step": 4625 }, { "epoch": 0.3664884135472371, "grad_norm": 1.8979989375459447, "learning_rate": 1.4626682876654998e-05, "loss": 0.2743, "step": 4626 }, { "epoch": 0.36656763715587243, "grad_norm": 1.936896031552917, "learning_rate": 1.4624407856459154e-05, "loss": 0.3495, "step": 4627 }, { "epoch": 0.3666468607645078, "grad_norm": 2.05955466442178, "learning_rate": 1.4622132531768309e-05, "loss": 0.3189, "step": 4628 }, { "epoch": 0.3667260843731432, "grad_norm": 1.8753091597874505, "learning_rate": 1.4619856902732279e-05, "loss": 0.2919, "step": 4629 }, { "epoch": 0.36680530798177857, "grad_norm": 1.892511585392787, "learning_rate": 1.4617580969500895e-05, "loss": 0.2447, "step": 4630 }, { "epoch": 0.36688453159041395, "grad_norm": 1.8837761972181513, "learning_rate": 1.461530473222403e-05, "loss": 0.2432, "step": 4631 }, { "epoch": 0.36696375519904934, "grad_norm": 1.4786091567383612, "learning_rate": 1.4613028191051548e-05, "loss": 0.234, "step": 4632 }, { "epoch": 0.3670429788076847, "grad_norm": 2.2305164677214977, "learning_rate": 1.4610751346133361e-05, "loss": 0.3248, "step": 4633 }, { "epoch": 0.36712220241632004, "grad_norm": 1.9363515121712283, "learning_rate": 1.4608474197619383e-05, "loss": 0.314, "step": 4634 }, { "epoch": 0.3672014260249554, "grad_norm": 1.654846397734255, "learning_rate": 1.4606196745659551e-05, "loss": 0.2172, "step": 4635 }, { "epoch": 0.3672806496335908, "grad_norm": 1.471941295936231, "learning_rate": 1.460391899040383e-05, "loss": 0.2592, "step": 4636 }, { "epoch": 0.3673598732422262, "grad_norm": 1.9640710056443629, "learning_rate": 1.4601640932002194e-05, "loss": 0.2894, "step": 4637 }, { "epoch": 0.36743909685086157, "grad_norm": 1.4775331718198763, "learning_rate": 1.4599362570604645e-05, "loss": 0.2402, "step": 4638 }, { "epoch": 0.36751832045949695, "grad_norm": 1.7897415267168406, "learning_rate": 1.4597083906361203e-05, "loss": 0.3734, "step": 4639 }, { "epoch": 0.3675975440681323, "grad_norm": 2.074932247402187, "learning_rate": 1.4594804939421903e-05, "loss": 0.3158, "step": 4640 }, { "epoch": 0.36767676767676766, "grad_norm": 1.737471564022473, "learning_rate": 1.4592525669936808e-05, "loss": 0.2409, "step": 4641 }, { "epoch": 0.36775599128540304, "grad_norm": 2.195945092172361, "learning_rate": 1.4590246098055995e-05, "loss": 0.4206, "step": 4642 }, { "epoch": 0.3678352148940384, "grad_norm": 1.7421794391496206, "learning_rate": 1.4587966223929562e-05, "loss": 0.2932, "step": 4643 }, { "epoch": 0.3679144385026738, "grad_norm": 1.6049634386404827, "learning_rate": 1.458568604770763e-05, "loss": 0.2902, "step": 4644 }, { "epoch": 0.3679936621113092, "grad_norm": 1.5299809026000064, "learning_rate": 1.458340556954034e-05, "loss": 0.279, "step": 4645 }, { "epoch": 0.36807288571994456, "grad_norm": 1.8258500233407735, "learning_rate": 1.4581124789577841e-05, "loss": 0.287, "step": 4646 }, { "epoch": 0.3681521093285799, "grad_norm": 1.6579619781481048, "learning_rate": 1.4578843707970323e-05, "loss": 0.297, "step": 4647 }, { "epoch": 0.36823133293721527, "grad_norm": 2.4866456878273557, "learning_rate": 1.4576562324867975e-05, "loss": 0.2564, "step": 4648 }, { "epoch": 0.36831055654585065, "grad_norm": 1.7970297668660742, "learning_rate": 1.457428064042102e-05, "loss": 0.335, "step": 4649 }, { "epoch": 0.36838978015448604, "grad_norm": 1.4325466255727297, "learning_rate": 1.45719986547797e-05, "loss": 0.1693, "step": 4650 }, { "epoch": 0.3684690037631214, "grad_norm": 1.705408056581669, "learning_rate": 1.4569716368094262e-05, "loss": 0.2615, "step": 4651 }, { "epoch": 0.3685482273717568, "grad_norm": 1.8508482085674984, "learning_rate": 1.456743378051499e-05, "loss": 0.2815, "step": 4652 }, { "epoch": 0.3686274509803922, "grad_norm": 1.8742340972489975, "learning_rate": 1.456515089219218e-05, "loss": 0.2094, "step": 4653 }, { "epoch": 0.3687066745890275, "grad_norm": 2.2385941481780076, "learning_rate": 1.456286770327615e-05, "loss": 0.417, "step": 4654 }, { "epoch": 0.3687858981976629, "grad_norm": 1.4993760016931719, "learning_rate": 1.456058421391724e-05, "loss": 0.1901, "step": 4655 }, { "epoch": 0.36886512180629827, "grad_norm": 1.8644129594122525, "learning_rate": 1.45583004242658e-05, "loss": 0.3158, "step": 4656 }, { "epoch": 0.36894434541493365, "grad_norm": 1.9542568327216117, "learning_rate": 1.4556016334472211e-05, "loss": 0.2579, "step": 4657 }, { "epoch": 0.36902356902356903, "grad_norm": 1.7588025672766292, "learning_rate": 1.455373194468687e-05, "loss": 0.2339, "step": 4658 }, { "epoch": 0.3691027926322044, "grad_norm": 1.7140319811474112, "learning_rate": 1.4551447255060192e-05, "loss": 0.2637, "step": 4659 }, { "epoch": 0.3691820162408398, "grad_norm": 1.3164749056508211, "learning_rate": 1.4549162265742608e-05, "loss": 0.2048, "step": 4660 }, { "epoch": 0.3692612398494751, "grad_norm": 1.8639998795903583, "learning_rate": 1.4546876976884583e-05, "loss": 0.2791, "step": 4661 }, { "epoch": 0.3693404634581105, "grad_norm": 1.9381892678910597, "learning_rate": 1.4544591388636584e-05, "loss": 0.255, "step": 4662 }, { "epoch": 0.3694196870667459, "grad_norm": 1.3953220826390549, "learning_rate": 1.454230550114911e-05, "loss": 0.1501, "step": 4663 }, { "epoch": 0.36949891067538126, "grad_norm": 2.0610398191583843, "learning_rate": 1.4540019314572678e-05, "loss": 0.2809, "step": 4664 }, { "epoch": 0.36957813428401665, "grad_norm": 1.7742172622260766, "learning_rate": 1.4537732829057816e-05, "loss": 0.3152, "step": 4665 }, { "epoch": 0.369657357892652, "grad_norm": 2.125656945669275, "learning_rate": 1.4535446044755082e-05, "loss": 0.3574, "step": 4666 }, { "epoch": 0.3697365815012874, "grad_norm": 2.198456694493325, "learning_rate": 1.4533158961815048e-05, "loss": 0.3725, "step": 4667 }, { "epoch": 0.36981580510992274, "grad_norm": 1.4773839870182743, "learning_rate": 1.4530871580388311e-05, "loss": 0.1353, "step": 4668 }, { "epoch": 0.3698950287185581, "grad_norm": 1.6611877169714782, "learning_rate": 1.4528583900625481e-05, "loss": 0.2634, "step": 4669 }, { "epoch": 0.3699742523271935, "grad_norm": 1.7018209933075727, "learning_rate": 1.4526295922677189e-05, "loss": 0.3748, "step": 4670 }, { "epoch": 0.3700534759358289, "grad_norm": 2.4185178534746403, "learning_rate": 1.4524007646694091e-05, "loss": 0.3315, "step": 4671 }, { "epoch": 0.37013269954446426, "grad_norm": 1.7903678767947944, "learning_rate": 1.4521719072826858e-05, "loss": 0.1874, "step": 4672 }, { "epoch": 0.37021192315309964, "grad_norm": 1.4288781545608105, "learning_rate": 1.451943020122618e-05, "loss": 0.1809, "step": 4673 }, { "epoch": 0.370291146761735, "grad_norm": 1.6422288313024853, "learning_rate": 1.4517141032042773e-05, "loss": 0.2245, "step": 4674 }, { "epoch": 0.37037037037037035, "grad_norm": 1.8735284688445157, "learning_rate": 1.4514851565427362e-05, "loss": 0.2704, "step": 4675 }, { "epoch": 0.37044959397900573, "grad_norm": 2.2479902989191114, "learning_rate": 1.4512561801530699e-05, "loss": 0.2887, "step": 4676 }, { "epoch": 0.3705288175876411, "grad_norm": 2.3373645804027436, "learning_rate": 1.4510271740503555e-05, "loss": 0.3139, "step": 4677 }, { "epoch": 0.3706080411962765, "grad_norm": 1.5650395432174686, "learning_rate": 1.4507981382496716e-05, "loss": 0.2086, "step": 4678 }, { "epoch": 0.3706872648049119, "grad_norm": 2.3625039464800826, "learning_rate": 1.4505690727660997e-05, "loss": 0.3312, "step": 4679 }, { "epoch": 0.37076648841354726, "grad_norm": 1.8036702932799606, "learning_rate": 1.4503399776147223e-05, "loss": 0.265, "step": 4680 }, { "epoch": 0.3708457120221826, "grad_norm": 1.9720782826214083, "learning_rate": 1.4501108528106243e-05, "loss": 0.3586, "step": 4681 }, { "epoch": 0.37092493563081796, "grad_norm": 1.8281158547092875, "learning_rate": 1.4498816983688926e-05, "loss": 0.2726, "step": 4682 }, { "epoch": 0.37100415923945335, "grad_norm": 2.111812174999127, "learning_rate": 1.4496525143046154e-05, "loss": 0.3488, "step": 4683 }, { "epoch": 0.3710833828480887, "grad_norm": 1.8729364466817486, "learning_rate": 1.4494233006328837e-05, "loss": 0.286, "step": 4684 }, { "epoch": 0.3711626064567241, "grad_norm": 1.7379818319196876, "learning_rate": 1.4491940573687906e-05, "loss": 0.2436, "step": 4685 }, { "epoch": 0.3712418300653595, "grad_norm": 1.6843416581137713, "learning_rate": 1.44896478452743e-05, "loss": 0.2432, "step": 4686 }, { "epoch": 0.37132105367399487, "grad_norm": 2.0286483827739548, "learning_rate": 1.4487354821238983e-05, "loss": 0.2842, "step": 4687 }, { "epoch": 0.3714002772826302, "grad_norm": 1.911045750236119, "learning_rate": 1.4485061501732949e-05, "loss": 0.2678, "step": 4688 }, { "epoch": 0.3714795008912656, "grad_norm": 1.7492314580876391, "learning_rate": 1.448276788690719e-05, "loss": 0.2592, "step": 4689 }, { "epoch": 0.37155872449990096, "grad_norm": 1.7136385436125254, "learning_rate": 1.4480473976912737e-05, "loss": 0.2438, "step": 4690 }, { "epoch": 0.37163794810853634, "grad_norm": 1.7401190917926839, "learning_rate": 1.4478179771900634e-05, "loss": 0.2423, "step": 4691 }, { "epoch": 0.3717171717171717, "grad_norm": 1.8271774715508877, "learning_rate": 1.4475885272021936e-05, "loss": 0.346, "step": 4692 }, { "epoch": 0.3717963953258071, "grad_norm": 1.781601678505233, "learning_rate": 1.4473590477427735e-05, "loss": 0.2917, "step": 4693 }, { "epoch": 0.3718756189344425, "grad_norm": 1.9430399152505764, "learning_rate": 1.4471295388269121e-05, "loss": 0.3292, "step": 4694 }, { "epoch": 0.3719548425430778, "grad_norm": 1.9534291727129052, "learning_rate": 1.4469000004697224e-05, "loss": 0.2956, "step": 4695 }, { "epoch": 0.3720340661517132, "grad_norm": 2.183933025080302, "learning_rate": 1.446670432686318e-05, "loss": 0.3236, "step": 4696 }, { "epoch": 0.3721132897603486, "grad_norm": 2.1682657162645893, "learning_rate": 1.4464408354918145e-05, "loss": 0.2877, "step": 4697 }, { "epoch": 0.37219251336898396, "grad_norm": 1.4817786600749772, "learning_rate": 1.4462112089013304e-05, "loss": 0.2326, "step": 4698 }, { "epoch": 0.37227173697761934, "grad_norm": 1.8533209906568295, "learning_rate": 1.4459815529299851e-05, "loss": 0.2531, "step": 4699 }, { "epoch": 0.3723509605862547, "grad_norm": 1.8799966813142532, "learning_rate": 1.4457518675929008e-05, "loss": 0.2968, "step": 4700 }, { "epoch": 0.3724301841948901, "grad_norm": 1.8644717360882417, "learning_rate": 1.4455221529052006e-05, "loss": 0.2205, "step": 4701 }, { "epoch": 0.3725094078035254, "grad_norm": 2.0868630337187097, "learning_rate": 1.4452924088820101e-05, "loss": 0.3263, "step": 4702 }, { "epoch": 0.3725886314121608, "grad_norm": 2.0433028399364175, "learning_rate": 1.4450626355384573e-05, "loss": 0.2889, "step": 4703 }, { "epoch": 0.3726678550207962, "grad_norm": 1.7147792563781357, "learning_rate": 1.4448328328896717e-05, "loss": 0.2512, "step": 4704 }, { "epoch": 0.37274707862943157, "grad_norm": 2.100717531941935, "learning_rate": 1.444603000950784e-05, "loss": 0.2273, "step": 4705 }, { "epoch": 0.37282630223806695, "grad_norm": 1.9873458870634582, "learning_rate": 1.4443731397369283e-05, "loss": 0.2479, "step": 4706 }, { "epoch": 0.37290552584670233, "grad_norm": 2.0897216126976477, "learning_rate": 1.4441432492632395e-05, "loss": 0.3453, "step": 4707 }, { "epoch": 0.3729847494553377, "grad_norm": 1.85767304640378, "learning_rate": 1.4439133295448547e-05, "loss": 0.2637, "step": 4708 }, { "epoch": 0.37306397306397304, "grad_norm": 1.8669765375254737, "learning_rate": 1.4436833805969133e-05, "loss": 0.2248, "step": 4709 }, { "epoch": 0.3731431966726084, "grad_norm": 1.5468480808168354, "learning_rate": 1.4434534024345558e-05, "loss": 0.2213, "step": 4710 }, { "epoch": 0.3732224202812438, "grad_norm": 2.1651278677154173, "learning_rate": 1.4432233950729257e-05, "loss": 0.2346, "step": 4711 }, { "epoch": 0.3733016438898792, "grad_norm": 1.8622206801954768, "learning_rate": 1.442993358527168e-05, "loss": 0.2661, "step": 4712 }, { "epoch": 0.37338086749851457, "grad_norm": 1.860985314496505, "learning_rate": 1.4427632928124288e-05, "loss": 0.2264, "step": 4713 }, { "epoch": 0.37346009110714995, "grad_norm": 1.738303261273715, "learning_rate": 1.4425331979438573e-05, "loss": 0.2249, "step": 4714 }, { "epoch": 0.37353931471578533, "grad_norm": 1.3493321298988545, "learning_rate": 1.4423030739366042e-05, "loss": 0.1953, "step": 4715 }, { "epoch": 0.37361853832442066, "grad_norm": 1.757152477188081, "learning_rate": 1.4420729208058217e-05, "loss": 0.2797, "step": 4716 }, { "epoch": 0.37369776193305604, "grad_norm": 2.4867732454480835, "learning_rate": 1.4418427385666647e-05, "loss": 0.3569, "step": 4717 }, { "epoch": 0.3737769855416914, "grad_norm": 1.7485682772478601, "learning_rate": 1.4416125272342891e-05, "loss": 0.3015, "step": 4718 }, { "epoch": 0.3738562091503268, "grad_norm": 1.6998442982286588, "learning_rate": 1.4413822868238537e-05, "loss": 0.2688, "step": 4719 }, { "epoch": 0.3739354327589622, "grad_norm": 2.422891857454065, "learning_rate": 1.4411520173505184e-05, "loss": 0.2982, "step": 4720 }, { "epoch": 0.37401465636759756, "grad_norm": 2.020050060082948, "learning_rate": 1.4409217188294456e-05, "loss": 0.2554, "step": 4721 }, { "epoch": 0.3740938799762329, "grad_norm": 1.8270816579010787, "learning_rate": 1.440691391275799e-05, "loss": 0.2659, "step": 4722 }, { "epoch": 0.37417310358486827, "grad_norm": 2.0408968489461228, "learning_rate": 1.440461034704745e-05, "loss": 0.2785, "step": 4723 }, { "epoch": 0.37425232719350365, "grad_norm": 2.276670163471484, "learning_rate": 1.4402306491314508e-05, "loss": 0.382, "step": 4724 }, { "epoch": 0.37433155080213903, "grad_norm": 1.7914286178273469, "learning_rate": 1.4400002345710871e-05, "loss": 0.1882, "step": 4725 }, { "epoch": 0.3744107744107744, "grad_norm": 1.8696138522642984, "learning_rate": 1.4397697910388248e-05, "loss": 0.2113, "step": 4726 }, { "epoch": 0.3744899980194098, "grad_norm": 1.9916567042877682, "learning_rate": 1.4395393185498381e-05, "loss": 0.3261, "step": 4727 }, { "epoch": 0.3745692216280452, "grad_norm": 2.1764238125591273, "learning_rate": 1.4393088171193021e-05, "loss": 0.3606, "step": 4728 }, { "epoch": 0.3746484452366805, "grad_norm": 2.287074179969357, "learning_rate": 1.439078286762394e-05, "loss": 0.296, "step": 4729 }, { "epoch": 0.3747276688453159, "grad_norm": 2.069702512234331, "learning_rate": 1.4388477274942936e-05, "loss": 0.3394, "step": 4730 }, { "epoch": 0.37480689245395127, "grad_norm": 1.7618470036155942, "learning_rate": 1.438617139330182e-05, "loss": 0.2939, "step": 4731 }, { "epoch": 0.37488611606258665, "grad_norm": 1.927425830780056, "learning_rate": 1.4383865222852423e-05, "loss": 0.3757, "step": 4732 }, { "epoch": 0.37496533967122203, "grad_norm": 1.6818755923497462, "learning_rate": 1.4381558763746593e-05, "loss": 0.2663, "step": 4733 }, { "epoch": 0.3750445632798574, "grad_norm": 2.0275449152454845, "learning_rate": 1.4379252016136203e-05, "loss": 0.2412, "step": 4734 }, { "epoch": 0.3751237868884928, "grad_norm": 2.365124968042268, "learning_rate": 1.4376944980173138e-05, "loss": 0.3016, "step": 4735 }, { "epoch": 0.3752030104971281, "grad_norm": 1.726110119950647, "learning_rate": 1.4374637656009309e-05, "loss": 0.2693, "step": 4736 }, { "epoch": 0.3752822341057635, "grad_norm": 1.825486942827154, "learning_rate": 1.4372330043796636e-05, "loss": 0.2709, "step": 4737 }, { "epoch": 0.3753614577143989, "grad_norm": 1.8866113495498562, "learning_rate": 1.437002214368707e-05, "loss": 0.3569, "step": 4738 }, { "epoch": 0.37544068132303426, "grad_norm": 1.8885572190682287, "learning_rate": 1.4367713955832575e-05, "loss": 0.2985, "step": 4739 }, { "epoch": 0.37551990493166965, "grad_norm": 1.8054682288349528, "learning_rate": 1.4365405480385129e-05, "loss": 0.2478, "step": 4740 }, { "epoch": 0.375599128540305, "grad_norm": 1.706464839265661, "learning_rate": 1.4363096717496738e-05, "loss": 0.3153, "step": 4741 }, { "epoch": 0.3756783521489404, "grad_norm": 1.9111858773258044, "learning_rate": 1.4360787667319423e-05, "loss": 0.2685, "step": 4742 }, { "epoch": 0.37575757575757573, "grad_norm": 1.8988461695719265, "learning_rate": 1.4358478330005222e-05, "loss": 0.25, "step": 4743 }, { "epoch": 0.3758367993662111, "grad_norm": 2.05609388435826, "learning_rate": 1.4356168705706195e-05, "loss": 0.3652, "step": 4744 }, { "epoch": 0.3759160229748465, "grad_norm": 1.418560946975717, "learning_rate": 1.4353858794574418e-05, "loss": 0.1918, "step": 4745 }, { "epoch": 0.3759952465834819, "grad_norm": 1.8745809635321844, "learning_rate": 1.435154859676199e-05, "loss": 0.1911, "step": 4746 }, { "epoch": 0.37607447019211726, "grad_norm": 1.8668818527695277, "learning_rate": 1.4349238112421025e-05, "loss": 0.3081, "step": 4747 }, { "epoch": 0.37615369380075264, "grad_norm": 2.0402156683640653, "learning_rate": 1.4346927341703659e-05, "loss": 0.1871, "step": 4748 }, { "epoch": 0.376232917409388, "grad_norm": 2.024151905844245, "learning_rate": 1.4344616284762038e-05, "loss": 0.2528, "step": 4749 }, { "epoch": 0.37631214101802335, "grad_norm": 1.8115906364725716, "learning_rate": 1.4342304941748347e-05, "loss": 0.2524, "step": 4750 }, { "epoch": 0.37639136462665873, "grad_norm": 1.730835837178477, "learning_rate": 1.4339993312814765e-05, "loss": 0.2416, "step": 4751 }, { "epoch": 0.3764705882352941, "grad_norm": 2.4746287356539263, "learning_rate": 1.4337681398113508e-05, "loss": 0.3894, "step": 4752 }, { "epoch": 0.3765498118439295, "grad_norm": 1.6848954803166556, "learning_rate": 1.4335369197796803e-05, "loss": 0.3042, "step": 4753 }, { "epoch": 0.3766290354525649, "grad_norm": 1.6223749855599328, "learning_rate": 1.4333056712016893e-05, "loss": 0.2643, "step": 4754 }, { "epoch": 0.37670825906120026, "grad_norm": 1.5619388055588996, "learning_rate": 1.4330743940926052e-05, "loss": 0.321, "step": 4755 }, { "epoch": 0.37678748266983564, "grad_norm": 1.8907211588109332, "learning_rate": 1.4328430884676559e-05, "loss": 0.3613, "step": 4756 }, { "epoch": 0.37686670627847096, "grad_norm": 1.8064758379779098, "learning_rate": 1.432611754342072e-05, "loss": 0.2049, "step": 4757 }, { "epoch": 0.37694592988710635, "grad_norm": 2.2558741911370808, "learning_rate": 1.4323803917310857e-05, "loss": 0.2703, "step": 4758 }, { "epoch": 0.3770251534957417, "grad_norm": 1.6493248676518513, "learning_rate": 1.4321490006499309e-05, "loss": 0.3129, "step": 4759 }, { "epoch": 0.3771043771043771, "grad_norm": 2.023673583331004, "learning_rate": 1.4319175811138439e-05, "loss": 0.3841, "step": 4760 }, { "epoch": 0.3771836007130125, "grad_norm": 1.5846736652531113, "learning_rate": 1.4316861331380624e-05, "loss": 0.2293, "step": 4761 }, { "epoch": 0.37726282432164787, "grad_norm": 1.737152960625183, "learning_rate": 1.431454656737826e-05, "loss": 0.2553, "step": 4762 }, { "epoch": 0.3773420479302832, "grad_norm": 1.847626636025447, "learning_rate": 1.4312231519283768e-05, "loss": 0.3948, "step": 4763 }, { "epoch": 0.3774212715389186, "grad_norm": 1.9922604383254103, "learning_rate": 1.4309916187249578e-05, "loss": 0.2632, "step": 4764 }, { "epoch": 0.37750049514755396, "grad_norm": 1.6642609860673572, "learning_rate": 1.4307600571428143e-05, "loss": 0.2273, "step": 4765 }, { "epoch": 0.37757971875618934, "grad_norm": 2.3994548871589045, "learning_rate": 1.4305284671971943e-05, "loss": 0.2685, "step": 4766 }, { "epoch": 0.3776589423648247, "grad_norm": 1.9601416463291454, "learning_rate": 1.4302968489033462e-05, "loss": 0.2729, "step": 4767 }, { "epoch": 0.3777381659734601, "grad_norm": 1.5918009311669763, "learning_rate": 1.4300652022765207e-05, "loss": 0.1623, "step": 4768 }, { "epoch": 0.3778173895820955, "grad_norm": 1.7491420675037992, "learning_rate": 1.429833527331971e-05, "loss": 0.2347, "step": 4769 }, { "epoch": 0.3778966131907308, "grad_norm": 1.857020044890144, "learning_rate": 1.4296018240849518e-05, "loss": 0.2439, "step": 4770 }, { "epoch": 0.3779758367993662, "grad_norm": 2.1939072313416497, "learning_rate": 1.4293700925507199e-05, "loss": 0.3053, "step": 4771 }, { "epoch": 0.3780550604080016, "grad_norm": 1.6770552256889275, "learning_rate": 1.429138332744533e-05, "loss": 0.2479, "step": 4772 }, { "epoch": 0.37813428401663696, "grad_norm": 2.2720296172370005, "learning_rate": 1.428906544681652e-05, "loss": 0.4448, "step": 4773 }, { "epoch": 0.37821350762527234, "grad_norm": 1.9610731961200303, "learning_rate": 1.4286747283773388e-05, "loss": 0.2748, "step": 4774 }, { "epoch": 0.3782927312339077, "grad_norm": 2.214411496713684, "learning_rate": 1.4284428838468572e-05, "loss": 0.3452, "step": 4775 }, { "epoch": 0.3783719548425431, "grad_norm": 1.6701313928344728, "learning_rate": 1.4282110111054733e-05, "loss": 0.2299, "step": 4776 }, { "epoch": 0.3784511784511784, "grad_norm": 1.8452207662857405, "learning_rate": 1.4279791101684547e-05, "loss": 0.2722, "step": 4777 }, { "epoch": 0.3785304020598138, "grad_norm": 1.8936329879851312, "learning_rate": 1.427747181051071e-05, "loss": 0.3366, "step": 4778 }, { "epoch": 0.3786096256684492, "grad_norm": 2.093726002352405, "learning_rate": 1.4275152237685938e-05, "loss": 0.2198, "step": 4779 }, { "epoch": 0.37868884927708457, "grad_norm": 1.7039406740894012, "learning_rate": 1.4272832383362962e-05, "loss": 0.2576, "step": 4780 }, { "epoch": 0.37876807288571995, "grad_norm": 2.0676157160633637, "learning_rate": 1.427051224769453e-05, "loss": 0.2289, "step": 4781 }, { "epoch": 0.37884729649435533, "grad_norm": 1.9191436982745378, "learning_rate": 1.4268191830833417e-05, "loss": 0.3325, "step": 4782 }, { "epoch": 0.3789265201029907, "grad_norm": 1.7454611128809623, "learning_rate": 1.426587113293241e-05, "loss": 0.2648, "step": 4783 }, { "epoch": 0.37900574371162604, "grad_norm": 1.7793678504511723, "learning_rate": 1.4263550154144313e-05, "loss": 0.2631, "step": 4784 }, { "epoch": 0.3790849673202614, "grad_norm": 1.5371724572623175, "learning_rate": 1.4261228894621955e-05, "loss": 0.2263, "step": 4785 }, { "epoch": 0.3791641909288968, "grad_norm": 1.3992223728756408, "learning_rate": 1.4258907354518177e-05, "loss": 0.2742, "step": 4786 }, { "epoch": 0.3792434145375322, "grad_norm": 2.1303990309180105, "learning_rate": 1.4256585533985842e-05, "loss": 0.2588, "step": 4787 }, { "epoch": 0.37932263814616757, "grad_norm": 1.8591226424121843, "learning_rate": 1.425426343317783e-05, "loss": 0.2327, "step": 4788 }, { "epoch": 0.37940186175480295, "grad_norm": 1.9874139774638357, "learning_rate": 1.4251941052247044e-05, "loss": 0.2767, "step": 4789 }, { "epoch": 0.37948108536343833, "grad_norm": 1.4409303108247693, "learning_rate": 1.4249618391346399e-05, "loss": 0.2136, "step": 4790 }, { "epoch": 0.37956030897207366, "grad_norm": 1.7374835963702855, "learning_rate": 1.4247295450628826e-05, "loss": 0.3302, "step": 4791 }, { "epoch": 0.37963953258070904, "grad_norm": 2.066763210743459, "learning_rate": 1.4244972230247287e-05, "loss": 0.2477, "step": 4792 }, { "epoch": 0.3797187561893444, "grad_norm": 1.667379683610971, "learning_rate": 1.4242648730354756e-05, "loss": 0.2205, "step": 4793 }, { "epoch": 0.3797979797979798, "grad_norm": 2.4761630960969256, "learning_rate": 1.4240324951104213e-05, "loss": 0.3087, "step": 4794 }, { "epoch": 0.3798772034066152, "grad_norm": 1.9259460121966194, "learning_rate": 1.4238000892648682e-05, "loss": 0.2996, "step": 4795 }, { "epoch": 0.37995642701525056, "grad_norm": 1.80462871105025, "learning_rate": 1.423567655514118e-05, "loss": 0.2886, "step": 4796 }, { "epoch": 0.38003565062388595, "grad_norm": 1.9088454542305258, "learning_rate": 1.4233351938734758e-05, "loss": 0.2883, "step": 4797 }, { "epoch": 0.38011487423252127, "grad_norm": 1.8562698626926064, "learning_rate": 1.4231027043582483e-05, "loss": 0.2988, "step": 4798 }, { "epoch": 0.38019409784115665, "grad_norm": 1.9023307467539372, "learning_rate": 1.4228701869837433e-05, "loss": 0.1947, "step": 4799 }, { "epoch": 0.38027332144979203, "grad_norm": 1.5730118458882212, "learning_rate": 1.4226376417652713e-05, "loss": 0.2352, "step": 4800 }, { "epoch": 0.3803525450584274, "grad_norm": 2.5466487394800588, "learning_rate": 1.4224050687181442e-05, "loss": 0.4451, "step": 4801 }, { "epoch": 0.3804317686670628, "grad_norm": 1.7512508606591768, "learning_rate": 1.4221724678576756e-05, "loss": 0.261, "step": 4802 }, { "epoch": 0.3805109922756982, "grad_norm": 1.9443566246310116, "learning_rate": 1.421939839199182e-05, "loss": 0.2422, "step": 4803 }, { "epoch": 0.3805902158843335, "grad_norm": 2.23398072236846, "learning_rate": 1.4217071827579796e-05, "loss": 0.3715, "step": 4804 }, { "epoch": 0.3806694394929689, "grad_norm": 1.7566686596767662, "learning_rate": 1.4214744985493884e-05, "loss": 0.2581, "step": 4805 }, { "epoch": 0.38074866310160427, "grad_norm": 1.8365037645980977, "learning_rate": 1.4212417865887299e-05, "loss": 0.2675, "step": 4806 }, { "epoch": 0.38082788671023965, "grad_norm": 2.222094531196926, "learning_rate": 1.4210090468913263e-05, "loss": 0.2966, "step": 4807 }, { "epoch": 0.38090711031887503, "grad_norm": 1.674970284916282, "learning_rate": 1.4207762794725026e-05, "loss": 0.1844, "step": 4808 }, { "epoch": 0.3809863339275104, "grad_norm": 1.8912679943198152, "learning_rate": 1.4205434843475859e-05, "loss": 0.3335, "step": 4809 }, { "epoch": 0.3810655575361458, "grad_norm": 1.4026111151498115, "learning_rate": 1.420310661531904e-05, "loss": 0.2587, "step": 4810 }, { "epoch": 0.3811447811447811, "grad_norm": 2.0979991976115833, "learning_rate": 1.4200778110407873e-05, "loss": 0.2792, "step": 4811 }, { "epoch": 0.3812240047534165, "grad_norm": 1.8532419798592212, "learning_rate": 1.4198449328895685e-05, "loss": 0.227, "step": 4812 }, { "epoch": 0.3813032283620519, "grad_norm": 1.8545119139565978, "learning_rate": 1.4196120270935807e-05, "loss": 0.2658, "step": 4813 }, { "epoch": 0.38138245197068726, "grad_norm": 2.174031050158867, "learning_rate": 1.4193790936681602e-05, "loss": 0.3955, "step": 4814 }, { "epoch": 0.38146167557932265, "grad_norm": 1.5039943437621284, "learning_rate": 1.4191461326286442e-05, "loss": 0.2524, "step": 4815 }, { "epoch": 0.381540899187958, "grad_norm": 2.1090737438577083, "learning_rate": 1.4189131439903721e-05, "loss": 0.3839, "step": 4816 }, { "epoch": 0.3816201227965934, "grad_norm": 1.7986950020087995, "learning_rate": 1.4186801277686852e-05, "loss": 0.321, "step": 4817 }, { "epoch": 0.38169934640522873, "grad_norm": 1.7847700997923661, "learning_rate": 1.4184470839789265e-05, "loss": 0.1687, "step": 4818 }, { "epoch": 0.3817785700138641, "grad_norm": 1.72693061428541, "learning_rate": 1.4182140126364404e-05, "loss": 0.216, "step": 4819 }, { "epoch": 0.3818577936224995, "grad_norm": 1.842941970286251, "learning_rate": 1.4179809137565742e-05, "loss": 0.2566, "step": 4820 }, { "epoch": 0.3819370172311349, "grad_norm": 1.8199027279628275, "learning_rate": 1.417747787354676e-05, "loss": 0.3312, "step": 4821 }, { "epoch": 0.38201624083977026, "grad_norm": 1.8969625008135695, "learning_rate": 1.4175146334460963e-05, "loss": 0.3397, "step": 4822 }, { "epoch": 0.38209546444840564, "grad_norm": 1.6402500927911516, "learning_rate": 1.4172814520461867e-05, "loss": 0.2617, "step": 4823 }, { "epoch": 0.382174688057041, "grad_norm": 1.9468199653542566, "learning_rate": 1.4170482431703012e-05, "loss": 0.2846, "step": 4824 }, { "epoch": 0.38225391166567635, "grad_norm": 1.8824592388014352, "learning_rate": 1.4168150068337958e-05, "loss": 0.2284, "step": 4825 }, { "epoch": 0.38233313527431173, "grad_norm": 1.8205163241914524, "learning_rate": 1.4165817430520276e-05, "loss": 0.2875, "step": 4826 }, { "epoch": 0.3824123588829471, "grad_norm": 1.6660189023837875, "learning_rate": 1.4163484518403561e-05, "loss": 0.297, "step": 4827 }, { "epoch": 0.3824915824915825, "grad_norm": 2.3646889682065226, "learning_rate": 1.4161151332141426e-05, "loss": 0.2898, "step": 4828 }, { "epoch": 0.3825708061002179, "grad_norm": 1.640092236248722, "learning_rate": 1.4158817871887497e-05, "loss": 0.2715, "step": 4829 }, { "epoch": 0.38265002970885326, "grad_norm": 1.4300968509993024, "learning_rate": 1.4156484137795424e-05, "loss": 0.2185, "step": 4830 }, { "epoch": 0.38272925331748864, "grad_norm": 1.5217152938992407, "learning_rate": 1.4154150130018867e-05, "loss": 0.3335, "step": 4831 }, { "epoch": 0.38280847692612396, "grad_norm": 1.8261276611801172, "learning_rate": 1.4151815848711512e-05, "loss": 0.2915, "step": 4832 }, { "epoch": 0.38288770053475935, "grad_norm": 1.9081419485416535, "learning_rate": 1.4149481294027063e-05, "loss": 0.2701, "step": 4833 }, { "epoch": 0.3829669241433947, "grad_norm": 1.6952495514126464, "learning_rate": 1.4147146466119235e-05, "loss": 0.2624, "step": 4834 }, { "epoch": 0.3830461477520301, "grad_norm": 1.3043963096200666, "learning_rate": 1.4144811365141769e-05, "loss": 0.1968, "step": 4835 }, { "epoch": 0.3831253713606655, "grad_norm": 1.7918340112956868, "learning_rate": 1.4142475991248417e-05, "loss": 0.317, "step": 4836 }, { "epoch": 0.38320459496930087, "grad_norm": 1.9627614844999453, "learning_rate": 1.4140140344592952e-05, "loss": 0.38, "step": 4837 }, { "epoch": 0.3832838185779362, "grad_norm": 1.9409581200822585, "learning_rate": 1.413780442532917e-05, "loss": 0.3079, "step": 4838 }, { "epoch": 0.3833630421865716, "grad_norm": 1.950199498223228, "learning_rate": 1.4135468233610872e-05, "loss": 0.2871, "step": 4839 }, { "epoch": 0.38344226579520696, "grad_norm": 1.600547648732173, "learning_rate": 1.4133131769591893e-05, "loss": 0.2282, "step": 4840 }, { "epoch": 0.38352148940384234, "grad_norm": 1.8133758667906779, "learning_rate": 1.4130795033426073e-05, "loss": 0.2406, "step": 4841 }, { "epoch": 0.3836007130124777, "grad_norm": 1.8636043586584148, "learning_rate": 1.4128458025267276e-05, "loss": 0.3167, "step": 4842 }, { "epoch": 0.3836799366211131, "grad_norm": 1.571216578107831, "learning_rate": 1.4126120745269382e-05, "loss": 0.266, "step": 4843 }, { "epoch": 0.3837591602297485, "grad_norm": 1.5994983578524533, "learning_rate": 1.4123783193586294e-05, "loss": 0.2493, "step": 4844 }, { "epoch": 0.3838383838383838, "grad_norm": 1.7438582773507751, "learning_rate": 1.4121445370371922e-05, "loss": 0.2571, "step": 4845 }, { "epoch": 0.3839176074470192, "grad_norm": 1.5970861836363022, "learning_rate": 1.4119107275780203e-05, "loss": 0.2394, "step": 4846 }, { "epoch": 0.3839968310556546, "grad_norm": 1.7682876578657611, "learning_rate": 1.4116768909965092e-05, "loss": 0.2029, "step": 4847 }, { "epoch": 0.38407605466428996, "grad_norm": 1.5758710726761345, "learning_rate": 1.4114430273080558e-05, "loss": 0.1753, "step": 4848 }, { "epoch": 0.38415527827292534, "grad_norm": 1.980330054356402, "learning_rate": 1.4112091365280585e-05, "loss": 0.3266, "step": 4849 }, { "epoch": 0.3842345018815607, "grad_norm": 1.6570488296570627, "learning_rate": 1.4109752186719181e-05, "loss": 0.3021, "step": 4850 }, { "epoch": 0.3843137254901961, "grad_norm": 1.8031285082403183, "learning_rate": 1.4107412737550372e-05, "loss": 0.2704, "step": 4851 }, { "epoch": 0.3843929490988314, "grad_norm": 2.0120707769336503, "learning_rate": 1.4105073017928199e-05, "loss": 0.299, "step": 4852 }, { "epoch": 0.3844721727074668, "grad_norm": 1.8520540178085845, "learning_rate": 1.4102733028006719e-05, "loss": 0.3679, "step": 4853 }, { "epoch": 0.3845513963161022, "grad_norm": 1.5037155668938742, "learning_rate": 1.410039276794001e-05, "loss": 0.2195, "step": 4854 }, { "epoch": 0.38463061992473757, "grad_norm": 1.7267930169537224, "learning_rate": 1.4098052237882168e-05, "loss": 0.2238, "step": 4855 }, { "epoch": 0.38470984353337295, "grad_norm": 2.101015355534599, "learning_rate": 1.4095711437987303e-05, "loss": 0.2565, "step": 4856 }, { "epoch": 0.38478906714200833, "grad_norm": 1.4687022751446832, "learning_rate": 1.4093370368409546e-05, "loss": 0.1753, "step": 4857 }, { "epoch": 0.3848682907506437, "grad_norm": 1.9474337981934573, "learning_rate": 1.409102902930305e-05, "loss": 0.2036, "step": 4858 }, { "epoch": 0.38494751435927904, "grad_norm": 2.2218361081809004, "learning_rate": 1.4088687420821974e-05, "loss": 0.3247, "step": 4859 }, { "epoch": 0.3850267379679144, "grad_norm": 2.1607974022322174, "learning_rate": 1.4086345543120508e-05, "loss": 0.277, "step": 4860 }, { "epoch": 0.3851059615765498, "grad_norm": 1.9326311646466325, "learning_rate": 1.4084003396352848e-05, "loss": 0.3242, "step": 4861 }, { "epoch": 0.3851851851851852, "grad_norm": 2.4904645284789497, "learning_rate": 1.4081660980673215e-05, "loss": 0.2546, "step": 4862 }, { "epoch": 0.38526440879382057, "grad_norm": 2.0836945441265953, "learning_rate": 1.4079318296235846e-05, "loss": 0.2172, "step": 4863 }, { "epoch": 0.38534363240245595, "grad_norm": 1.942911499323709, "learning_rate": 1.4076975343194996e-05, "loss": 0.2149, "step": 4864 }, { "epoch": 0.38542285601109133, "grad_norm": 2.337131037974696, "learning_rate": 1.4074632121704941e-05, "loss": 0.415, "step": 4865 }, { "epoch": 0.38550207961972666, "grad_norm": 1.8623991270514797, "learning_rate": 1.4072288631919962e-05, "loss": 0.2941, "step": 4866 }, { "epoch": 0.38558130322836204, "grad_norm": 1.671605506172729, "learning_rate": 1.406994487399437e-05, "loss": 0.2679, "step": 4867 }, { "epoch": 0.3856605268369974, "grad_norm": 1.661847975657666, "learning_rate": 1.4067600848082496e-05, "loss": 0.236, "step": 4868 }, { "epoch": 0.3857397504456328, "grad_norm": 1.6534594221970502, "learning_rate": 1.4065256554338675e-05, "loss": 0.2034, "step": 4869 }, { "epoch": 0.3858189740542682, "grad_norm": 1.6770660393433066, "learning_rate": 1.406291199291727e-05, "loss": 0.3157, "step": 4870 }, { "epoch": 0.38589819766290356, "grad_norm": 1.9520223591967838, "learning_rate": 1.4060567163972663e-05, "loss": 0.394, "step": 4871 }, { "epoch": 0.38597742127153895, "grad_norm": 1.6138797248703345, "learning_rate": 1.4058222067659244e-05, "loss": 0.274, "step": 4872 }, { "epoch": 0.38605664488017427, "grad_norm": 1.5375729277822336, "learning_rate": 1.405587670413143e-05, "loss": 0.2322, "step": 4873 }, { "epoch": 0.38613586848880965, "grad_norm": 2.139941920626899, "learning_rate": 1.405353107354365e-05, "loss": 0.2984, "step": 4874 }, { "epoch": 0.38621509209744503, "grad_norm": 3.22239182199651, "learning_rate": 1.4051185176050353e-05, "loss": 0.3102, "step": 4875 }, { "epoch": 0.3862943157060804, "grad_norm": 1.7545156430624596, "learning_rate": 1.4048839011806006e-05, "loss": 0.2105, "step": 4876 }, { "epoch": 0.3863735393147158, "grad_norm": 1.9145457092868745, "learning_rate": 1.404649258096509e-05, "loss": 0.3642, "step": 4877 }, { "epoch": 0.3864527629233512, "grad_norm": 2.0238697466619557, "learning_rate": 1.4044145883682108e-05, "loss": 0.242, "step": 4878 }, { "epoch": 0.3865319865319865, "grad_norm": 2.3406179509660565, "learning_rate": 1.4041798920111582e-05, "loss": 0.2447, "step": 4879 }, { "epoch": 0.3866112101406219, "grad_norm": 1.783521302038437, "learning_rate": 1.4039451690408042e-05, "loss": 0.2776, "step": 4880 }, { "epoch": 0.38669043374925727, "grad_norm": 1.562636401133143, "learning_rate": 1.4037104194726048e-05, "loss": 0.2216, "step": 4881 }, { "epoch": 0.38676965735789265, "grad_norm": 2.0934294543682626, "learning_rate": 1.4034756433220164e-05, "loss": 0.2941, "step": 4882 }, { "epoch": 0.38684888096652803, "grad_norm": 1.8277893187308183, "learning_rate": 1.4032408406044986e-05, "loss": 0.2246, "step": 4883 }, { "epoch": 0.3869281045751634, "grad_norm": 1.6428382442927187, "learning_rate": 1.4030060113355118e-05, "loss": 0.2189, "step": 4884 }, { "epoch": 0.3870073281837988, "grad_norm": 1.6434971900372488, "learning_rate": 1.402771155530518e-05, "loss": 0.2749, "step": 4885 }, { "epoch": 0.3870865517924341, "grad_norm": 1.7541482124622305, "learning_rate": 1.4025362732049816e-05, "loss": 0.2346, "step": 4886 }, { "epoch": 0.3871657754010695, "grad_norm": 2.0755705580105093, "learning_rate": 1.4023013643743688e-05, "loss": 0.2427, "step": 4887 }, { "epoch": 0.3872449990097049, "grad_norm": 1.6388321994048747, "learning_rate": 1.4020664290541465e-05, "loss": 0.1948, "step": 4888 }, { "epoch": 0.38732422261834026, "grad_norm": 1.7288886140854272, "learning_rate": 1.4018314672597848e-05, "loss": 0.3013, "step": 4889 }, { "epoch": 0.38740344622697565, "grad_norm": 1.6625003403145617, "learning_rate": 1.4015964790067545e-05, "loss": 0.2453, "step": 4890 }, { "epoch": 0.387482669835611, "grad_norm": 1.4656638157368234, "learning_rate": 1.401361464310528e-05, "loss": 0.1906, "step": 4891 }, { "epoch": 0.3875618934442464, "grad_norm": 2.1323091963914482, "learning_rate": 1.4011264231865807e-05, "loss": 0.325, "step": 4892 }, { "epoch": 0.38764111705288173, "grad_norm": 1.7364929363055688, "learning_rate": 1.4008913556503885e-05, "loss": 0.2628, "step": 4893 }, { "epoch": 0.3877203406615171, "grad_norm": 1.6629464357090071, "learning_rate": 1.4006562617174292e-05, "loss": 0.2416, "step": 4894 }, { "epoch": 0.3877995642701525, "grad_norm": 1.397683060163063, "learning_rate": 1.4004211414031831e-05, "loss": 0.2043, "step": 4895 }, { "epoch": 0.3878787878787879, "grad_norm": 2.0659803703039263, "learning_rate": 1.4001859947231316e-05, "loss": 0.2598, "step": 4896 }, { "epoch": 0.38795801148742326, "grad_norm": 2.1824148040858717, "learning_rate": 1.3999508216927578e-05, "loss": 0.2318, "step": 4897 }, { "epoch": 0.38803723509605864, "grad_norm": 2.442391324930562, "learning_rate": 1.399715622327547e-05, "loss": 0.3181, "step": 4898 }, { "epoch": 0.388116458704694, "grad_norm": 1.675419768814645, "learning_rate": 1.3994803966429854e-05, "loss": 0.2692, "step": 4899 }, { "epoch": 0.38819568231332935, "grad_norm": 1.8459992373106835, "learning_rate": 1.3992451446545624e-05, "loss": 0.2429, "step": 4900 }, { "epoch": 0.38827490592196473, "grad_norm": 1.99070140567718, "learning_rate": 1.3990098663777674e-05, "loss": 0.2673, "step": 4901 }, { "epoch": 0.3883541295306001, "grad_norm": 1.4336572724242114, "learning_rate": 1.3987745618280925e-05, "loss": 0.2015, "step": 4902 }, { "epoch": 0.3884333531392355, "grad_norm": 1.8377236867934492, "learning_rate": 1.3985392310210318e-05, "loss": 0.3081, "step": 4903 }, { "epoch": 0.3885125767478709, "grad_norm": 1.8703203058869515, "learning_rate": 1.39830387397208e-05, "loss": 0.2521, "step": 4904 }, { "epoch": 0.38859180035650626, "grad_norm": 1.6717876731784511, "learning_rate": 1.3980684906967348e-05, "loss": 0.2553, "step": 4905 }, { "epoch": 0.38867102396514164, "grad_norm": 1.910780058119731, "learning_rate": 1.3978330812104947e-05, "loss": 0.3801, "step": 4906 }, { "epoch": 0.38875024757377696, "grad_norm": 2.363934119931117, "learning_rate": 1.3975976455288607e-05, "loss": 0.3791, "step": 4907 }, { "epoch": 0.38882947118241235, "grad_norm": 1.6532092308011639, "learning_rate": 1.397362183667335e-05, "loss": 0.2529, "step": 4908 }, { "epoch": 0.3889086947910477, "grad_norm": 2.4691674335767217, "learning_rate": 1.3971266956414211e-05, "loss": 0.276, "step": 4909 }, { "epoch": 0.3889879183996831, "grad_norm": 1.937524828043644, "learning_rate": 1.3968911814666252e-05, "loss": 0.2142, "step": 4910 }, { "epoch": 0.3890671420083185, "grad_norm": 1.4871096935617476, "learning_rate": 1.3966556411584548e-05, "loss": 0.228, "step": 4911 }, { "epoch": 0.38914636561695387, "grad_norm": 1.4732108921826366, "learning_rate": 1.396420074732419e-05, "loss": 0.2159, "step": 4912 }, { "epoch": 0.38922558922558925, "grad_norm": 1.6393131903376357, "learning_rate": 1.396184482204029e-05, "loss": 0.2829, "step": 4913 }, { "epoch": 0.3893048128342246, "grad_norm": 2.1166159582836257, "learning_rate": 1.3959488635887967e-05, "loss": 0.2827, "step": 4914 }, { "epoch": 0.38938403644285996, "grad_norm": 2.0861565135264044, "learning_rate": 1.3957132189022373e-05, "loss": 0.3116, "step": 4915 }, { "epoch": 0.38946326005149534, "grad_norm": 1.7402310602116924, "learning_rate": 1.3954775481598665e-05, "loss": 0.2775, "step": 4916 }, { "epoch": 0.3895424836601307, "grad_norm": 1.7299115685930881, "learning_rate": 1.3952418513772016e-05, "loss": 0.3166, "step": 4917 }, { "epoch": 0.3896217072687661, "grad_norm": 1.8177487417502542, "learning_rate": 1.3950061285697629e-05, "loss": 0.2581, "step": 4918 }, { "epoch": 0.3897009308774015, "grad_norm": 2.0557599088322576, "learning_rate": 1.3947703797530716e-05, "loss": 0.2265, "step": 4919 }, { "epoch": 0.3897801544860368, "grad_norm": 2.426335853471312, "learning_rate": 1.3945346049426498e-05, "loss": 0.3799, "step": 4920 }, { "epoch": 0.3898593780946722, "grad_norm": 1.39889361741148, "learning_rate": 1.3942988041540226e-05, "loss": 0.1728, "step": 4921 }, { "epoch": 0.3899386017033076, "grad_norm": 1.8011418967853858, "learning_rate": 1.394062977402717e-05, "loss": 0.1954, "step": 4922 }, { "epoch": 0.39001782531194296, "grad_norm": 1.8324520880123258, "learning_rate": 1.3938271247042601e-05, "loss": 0.2852, "step": 4923 }, { "epoch": 0.39009704892057834, "grad_norm": 1.70950428343249, "learning_rate": 1.3935912460741818e-05, "loss": 0.2401, "step": 4924 }, { "epoch": 0.3901762725292137, "grad_norm": 1.82562064446481, "learning_rate": 1.3933553415280142e-05, "loss": 0.2978, "step": 4925 }, { "epoch": 0.3902554961378491, "grad_norm": 1.8792160520532144, "learning_rate": 1.3931194110812896e-05, "loss": 0.3616, "step": 4926 }, { "epoch": 0.3903347197464844, "grad_norm": 2.086651976264605, "learning_rate": 1.3928834547495438e-05, "loss": 0.3373, "step": 4927 }, { "epoch": 0.3904139433551198, "grad_norm": 1.6151752229474026, "learning_rate": 1.3926474725483125e-05, "loss": 0.2864, "step": 4928 }, { "epoch": 0.3904931669637552, "grad_norm": 1.4121292564210077, "learning_rate": 1.3924114644931346e-05, "loss": 0.1935, "step": 4929 }, { "epoch": 0.39057239057239057, "grad_norm": 1.7320009973356407, "learning_rate": 1.3921754305995501e-05, "loss": 0.2852, "step": 4930 }, { "epoch": 0.39065161418102595, "grad_norm": 1.9151171410205767, "learning_rate": 1.3919393708831004e-05, "loss": 0.3141, "step": 4931 }, { "epoch": 0.39073083778966133, "grad_norm": 2.6039328518843243, "learning_rate": 1.3917032853593289e-05, "loss": 0.4421, "step": 4932 }, { "epoch": 0.3908100613982967, "grad_norm": 2.250046779436568, "learning_rate": 1.3914671740437811e-05, "loss": 0.2321, "step": 4933 }, { "epoch": 0.39088928500693204, "grad_norm": 1.566608093385484, "learning_rate": 1.3912310369520032e-05, "loss": 0.2671, "step": 4934 }, { "epoch": 0.3909685086155674, "grad_norm": 2.0292065487561755, "learning_rate": 1.3909948740995442e-05, "loss": 0.346, "step": 4935 }, { "epoch": 0.3910477322242028, "grad_norm": 1.9078504796860136, "learning_rate": 1.3907586855019538e-05, "loss": 0.3763, "step": 4936 }, { "epoch": 0.3911269558328382, "grad_norm": 1.8957842081482812, "learning_rate": 1.3905224711747844e-05, "loss": 0.3024, "step": 4937 }, { "epoch": 0.39120617944147357, "grad_norm": 1.6406305258588172, "learning_rate": 1.3902862311335896e-05, "loss": 0.2617, "step": 4938 }, { "epoch": 0.39128540305010895, "grad_norm": 1.4694186094641686, "learning_rate": 1.390049965393924e-05, "loss": 0.1937, "step": 4939 }, { "epoch": 0.39136462665874433, "grad_norm": 2.177495606638902, "learning_rate": 1.3898136739713451e-05, "loss": 0.2414, "step": 4940 }, { "epoch": 0.39144385026737966, "grad_norm": 1.2611322495580886, "learning_rate": 1.3895773568814118e-05, "loss": 0.1604, "step": 4941 }, { "epoch": 0.39152307387601504, "grad_norm": 1.6107051489866262, "learning_rate": 1.3893410141396835e-05, "loss": 0.337, "step": 4942 }, { "epoch": 0.3916022974846504, "grad_norm": 1.993077001132843, "learning_rate": 1.3891046457617233e-05, "loss": 0.2665, "step": 4943 }, { "epoch": 0.3916815210932858, "grad_norm": 1.6475760015632661, "learning_rate": 1.388868251763094e-05, "loss": 0.2571, "step": 4944 }, { "epoch": 0.3917607447019212, "grad_norm": 1.573292172431593, "learning_rate": 1.3886318321593614e-05, "loss": 0.247, "step": 4945 }, { "epoch": 0.39183996831055656, "grad_norm": 1.8105642431196092, "learning_rate": 1.388395386966093e-05, "loss": 0.2616, "step": 4946 }, { "epoch": 0.39191919191919194, "grad_norm": 2.189272330131032, "learning_rate": 1.388158916198857e-05, "loss": 0.2842, "step": 4947 }, { "epoch": 0.39199841552782727, "grad_norm": 1.8707833903357227, "learning_rate": 1.3879224198732239e-05, "loss": 0.2423, "step": 4948 }, { "epoch": 0.39207763913646265, "grad_norm": 1.6230767377699655, "learning_rate": 1.3876858980047665e-05, "loss": 0.2408, "step": 4949 }, { "epoch": 0.39215686274509803, "grad_norm": 1.2470554971599215, "learning_rate": 1.3874493506090578e-05, "loss": 0.1702, "step": 4950 }, { "epoch": 0.3922360863537334, "grad_norm": 1.6786225725593404, "learning_rate": 1.3872127777016739e-05, "loss": 0.2646, "step": 4951 }, { "epoch": 0.3923153099623688, "grad_norm": 2.0102372643919417, "learning_rate": 1.3869761792981915e-05, "loss": 0.2323, "step": 4952 }, { "epoch": 0.3923945335710042, "grad_norm": 2.183189800344821, "learning_rate": 1.3867395554141899e-05, "loss": 0.3992, "step": 4953 }, { "epoch": 0.39247375717963956, "grad_norm": 1.6514654279951932, "learning_rate": 1.3865029060652493e-05, "loss": 0.2408, "step": 4954 }, { "epoch": 0.3925529807882749, "grad_norm": 2.023566919321999, "learning_rate": 1.3862662312669518e-05, "loss": 0.2358, "step": 4955 }, { "epoch": 0.39263220439691027, "grad_norm": 1.3332265181540435, "learning_rate": 1.386029531034882e-05, "loss": 0.1371, "step": 4956 }, { "epoch": 0.39271142800554565, "grad_norm": 1.6769897130765947, "learning_rate": 1.385792805384625e-05, "loss": 0.2453, "step": 4957 }, { "epoch": 0.39279065161418103, "grad_norm": 1.9500711749279414, "learning_rate": 1.3855560543317679e-05, "loss": 0.3715, "step": 4958 }, { "epoch": 0.3928698752228164, "grad_norm": 1.7401008467798307, "learning_rate": 1.3853192778919e-05, "loss": 0.1431, "step": 4959 }, { "epoch": 0.3929490988314518, "grad_norm": 1.6368249447133931, "learning_rate": 1.3850824760806115e-05, "loss": 0.3095, "step": 4960 }, { "epoch": 0.3930283224400871, "grad_norm": 2.5225017538825147, "learning_rate": 1.384845648913495e-05, "loss": 0.3605, "step": 4961 }, { "epoch": 0.3931075460487225, "grad_norm": 1.8799307507834746, "learning_rate": 1.3846087964061442e-05, "loss": 0.2772, "step": 4962 }, { "epoch": 0.3931867696573579, "grad_norm": 1.8581621904477863, "learning_rate": 1.3843719185741548e-05, "loss": 0.3554, "step": 4963 }, { "epoch": 0.39326599326599326, "grad_norm": 1.9002845174617342, "learning_rate": 1.3841350154331239e-05, "loss": 0.3284, "step": 4964 }, { "epoch": 0.39334521687462864, "grad_norm": 1.8682381687798193, "learning_rate": 1.383898086998651e-05, "loss": 0.2141, "step": 4965 }, { "epoch": 0.393424440483264, "grad_norm": 1.5478800222173965, "learning_rate": 1.3836611332863356e-05, "loss": 0.2497, "step": 4966 }, { "epoch": 0.3935036640918994, "grad_norm": 2.103979040883339, "learning_rate": 1.383424154311781e-05, "loss": 0.2163, "step": 4967 }, { "epoch": 0.39358288770053473, "grad_norm": 2.1554333998972344, "learning_rate": 1.383187150090591e-05, "loss": 0.3397, "step": 4968 }, { "epoch": 0.3936621113091701, "grad_norm": 2.1009666264119833, "learning_rate": 1.3829501206383704e-05, "loss": 0.2637, "step": 4969 }, { "epoch": 0.3937413349178055, "grad_norm": 1.80394897108238, "learning_rate": 1.3827130659707275e-05, "loss": 0.2668, "step": 4970 }, { "epoch": 0.3938205585264409, "grad_norm": 2.222688601615571, "learning_rate": 1.3824759861032704e-05, "loss": 0.3653, "step": 4971 }, { "epoch": 0.39389978213507626, "grad_norm": 2.0281326596183153, "learning_rate": 1.38223888105161e-05, "loss": 0.2443, "step": 4972 }, { "epoch": 0.39397900574371164, "grad_norm": 1.5918307594741923, "learning_rate": 1.3820017508313587e-05, "loss": 0.2118, "step": 4973 }, { "epoch": 0.394058229352347, "grad_norm": 1.7021430232938264, "learning_rate": 1.3817645954581301e-05, "loss": 0.2281, "step": 4974 }, { "epoch": 0.39413745296098235, "grad_norm": 2.1696884719380205, "learning_rate": 1.3815274149475395e-05, "loss": 0.3571, "step": 4975 }, { "epoch": 0.39421667656961773, "grad_norm": 1.8886359409169726, "learning_rate": 1.3812902093152047e-05, "loss": 0.2554, "step": 4976 }, { "epoch": 0.3942959001782531, "grad_norm": 1.5331896532041256, "learning_rate": 1.3810529785767444e-05, "loss": 0.2454, "step": 4977 }, { "epoch": 0.3943751237868885, "grad_norm": 2.4534832929597057, "learning_rate": 1.3808157227477788e-05, "loss": 0.2577, "step": 4978 }, { "epoch": 0.3944543473955239, "grad_norm": 1.983776372229426, "learning_rate": 1.3805784418439303e-05, "loss": 0.3195, "step": 4979 }, { "epoch": 0.39453357100415926, "grad_norm": 1.8166724279710076, "learning_rate": 1.3803411358808222e-05, "loss": 0.3308, "step": 4980 }, { "epoch": 0.39461279461279464, "grad_norm": 2.1646010012152463, "learning_rate": 1.3801038048740811e-05, "loss": 0.3745, "step": 4981 }, { "epoch": 0.39469201822142996, "grad_norm": 1.7175245046194172, "learning_rate": 1.379866448839333e-05, "loss": 0.2866, "step": 4982 }, { "epoch": 0.39477124183006534, "grad_norm": 1.4705239764687448, "learning_rate": 1.379629067792207e-05, "loss": 0.2032, "step": 4983 }, { "epoch": 0.3948504654387007, "grad_norm": 1.7308900507702025, "learning_rate": 1.3793916617483338e-05, "loss": 0.3028, "step": 4984 }, { "epoch": 0.3949296890473361, "grad_norm": 1.688387068915595, "learning_rate": 1.379154230723345e-05, "loss": 0.3031, "step": 4985 }, { "epoch": 0.3950089126559715, "grad_norm": 1.468650843878097, "learning_rate": 1.3789167747328746e-05, "loss": 0.2904, "step": 4986 }, { "epoch": 0.39508813626460687, "grad_norm": 1.6078849983467876, "learning_rate": 1.3786792937925576e-05, "loss": 0.2307, "step": 4987 }, { "epoch": 0.39516735987324225, "grad_norm": 1.6730174771707977, "learning_rate": 1.3784417879180314e-05, "loss": 0.322, "step": 4988 }, { "epoch": 0.3952465834818776, "grad_norm": 1.360165546854493, "learning_rate": 1.3782042571249343e-05, "loss": 0.2055, "step": 4989 }, { "epoch": 0.39532580709051296, "grad_norm": 1.6439129519005697, "learning_rate": 1.3779667014289067e-05, "loss": 0.2929, "step": 4990 }, { "epoch": 0.39540503069914834, "grad_norm": 1.9872628282521692, "learning_rate": 1.3777291208455902e-05, "loss": 0.279, "step": 4991 }, { "epoch": 0.3954842543077837, "grad_norm": 1.9328544820408649, "learning_rate": 1.3774915153906292e-05, "loss": 0.3648, "step": 4992 }, { "epoch": 0.3955634779164191, "grad_norm": 2.2624746801900093, "learning_rate": 1.377253885079668e-05, "loss": 0.3193, "step": 4993 }, { "epoch": 0.3956427015250545, "grad_norm": 2.113686190311988, "learning_rate": 1.3770162299283535e-05, "loss": 0.3202, "step": 4994 }, { "epoch": 0.39572192513368987, "grad_norm": 1.8139399997624082, "learning_rate": 1.3767785499523347e-05, "loss": 0.3553, "step": 4995 }, { "epoch": 0.3958011487423252, "grad_norm": 1.878926983384108, "learning_rate": 1.376540845167261e-05, "loss": 0.2716, "step": 4996 }, { "epoch": 0.3958803723509606, "grad_norm": 1.5634011543611877, "learning_rate": 1.3763031155887847e-05, "loss": 0.1886, "step": 4997 }, { "epoch": 0.39595959595959596, "grad_norm": 1.4121320013549208, "learning_rate": 1.3760653612325588e-05, "loss": 0.1379, "step": 4998 }, { "epoch": 0.39603881956823134, "grad_norm": 1.8214632320985995, "learning_rate": 1.3758275821142382e-05, "loss": 0.3369, "step": 4999 }, { "epoch": 0.3961180431768667, "grad_norm": 1.7973093353773184, "learning_rate": 1.3755897782494803e-05, "loss": 0.249, "step": 5000 }, { "epoch": 0.3961972667855021, "grad_norm": 1.5617405475102666, "learning_rate": 1.375351949653942e-05, "loss": 0.2417, "step": 5001 }, { "epoch": 0.3962764903941374, "grad_norm": 1.7471023862147215, "learning_rate": 1.375114096343284e-05, "loss": 0.2877, "step": 5002 }, { "epoch": 0.3963557140027728, "grad_norm": 1.64337971421, "learning_rate": 1.3748762183331681e-05, "loss": 0.2191, "step": 5003 }, { "epoch": 0.3964349376114082, "grad_norm": 1.600424142825091, "learning_rate": 1.3746383156392566e-05, "loss": 0.2636, "step": 5004 }, { "epoch": 0.39651416122004357, "grad_norm": 1.7907144513952513, "learning_rate": 1.374400388277215e-05, "loss": 0.2417, "step": 5005 }, { "epoch": 0.39659338482867895, "grad_norm": 1.5228632327356368, "learning_rate": 1.3741624362627091e-05, "loss": 0.1961, "step": 5006 }, { "epoch": 0.39667260843731433, "grad_norm": 1.459007370078432, "learning_rate": 1.373924459611407e-05, "loss": 0.2281, "step": 5007 }, { "epoch": 0.3967518320459497, "grad_norm": 1.741468124079045, "learning_rate": 1.3736864583389789e-05, "loss": 0.3022, "step": 5008 }, { "epoch": 0.39683105565458504, "grad_norm": 1.8512721155465253, "learning_rate": 1.373448432461095e-05, "loss": 0.2493, "step": 5009 }, { "epoch": 0.3969102792632204, "grad_norm": 1.794947096640278, "learning_rate": 1.373210381993429e-05, "loss": 0.2856, "step": 5010 }, { "epoch": 0.3969895028718558, "grad_norm": 1.4903863045899304, "learning_rate": 1.3729723069516554e-05, "loss": 0.233, "step": 5011 }, { "epoch": 0.3970687264804912, "grad_norm": 1.4396502435930123, "learning_rate": 1.3727342073514497e-05, "loss": 0.1945, "step": 5012 }, { "epoch": 0.39714795008912657, "grad_norm": 1.7427975283031958, "learning_rate": 1.3724960832084902e-05, "loss": 0.3438, "step": 5013 }, { "epoch": 0.39722717369776195, "grad_norm": 1.799696050369368, "learning_rate": 1.3722579345384558e-05, "loss": 0.2438, "step": 5014 }, { "epoch": 0.39730639730639733, "grad_norm": 1.6612991846892513, "learning_rate": 1.3720197613570272e-05, "loss": 0.2728, "step": 5015 }, { "epoch": 0.39738562091503266, "grad_norm": 1.5931453059655685, "learning_rate": 1.3717815636798879e-05, "loss": 0.181, "step": 5016 }, { "epoch": 0.39746484452366804, "grad_norm": 1.7223887228104913, "learning_rate": 1.3715433415227212e-05, "loss": 0.2378, "step": 5017 }, { "epoch": 0.3975440681323034, "grad_norm": 1.7525502529847075, "learning_rate": 1.3713050949012134e-05, "loss": 0.2196, "step": 5018 }, { "epoch": 0.3976232917409388, "grad_norm": 1.8684258047792184, "learning_rate": 1.3710668238310519e-05, "loss": 0.3166, "step": 5019 }, { "epoch": 0.3977025153495742, "grad_norm": 1.9444041543805182, "learning_rate": 1.3708285283279252e-05, "loss": 0.2784, "step": 5020 }, { "epoch": 0.39778173895820956, "grad_norm": 2.0956591174241432, "learning_rate": 1.3705902084075244e-05, "loss": 0.3477, "step": 5021 }, { "epoch": 0.39786096256684494, "grad_norm": 2.1816546635619587, "learning_rate": 1.3703518640855414e-05, "loss": 0.357, "step": 5022 }, { "epoch": 0.39794018617548027, "grad_norm": 1.5634312168214664, "learning_rate": 1.37011349537767e-05, "loss": 0.3307, "step": 5023 }, { "epoch": 0.39801940978411565, "grad_norm": 1.944784286465532, "learning_rate": 1.3698751022996061e-05, "loss": 0.2913, "step": 5024 }, { "epoch": 0.39809863339275103, "grad_norm": 2.0047690184686378, "learning_rate": 1.3696366848670464e-05, "loss": 0.3413, "step": 5025 }, { "epoch": 0.3981778570013864, "grad_norm": 1.7360612612633144, "learning_rate": 1.3693982430956896e-05, "loss": 0.2573, "step": 5026 }, { "epoch": 0.3982570806100218, "grad_norm": 1.4386543667099085, "learning_rate": 1.369159777001236e-05, "loss": 0.194, "step": 5027 }, { "epoch": 0.3983363042186572, "grad_norm": 1.5948091834397193, "learning_rate": 1.368921286599387e-05, "loss": 0.3183, "step": 5028 }, { "epoch": 0.39841552782729256, "grad_norm": 1.8623985200814455, "learning_rate": 1.368682771905847e-05, "loss": 0.273, "step": 5029 }, { "epoch": 0.3984947514359279, "grad_norm": 1.8322071548196106, "learning_rate": 1.3684442329363199e-05, "loss": 0.3161, "step": 5030 }, { "epoch": 0.39857397504456327, "grad_norm": 1.9974121310115824, "learning_rate": 1.368205669706513e-05, "loss": 0.2946, "step": 5031 }, { "epoch": 0.39865319865319865, "grad_norm": 1.849566686430218, "learning_rate": 1.3679670822321347e-05, "loss": 0.4029, "step": 5032 }, { "epoch": 0.39873242226183403, "grad_norm": 1.6552099342540687, "learning_rate": 1.3677284705288943e-05, "loss": 0.2915, "step": 5033 }, { "epoch": 0.3988116458704694, "grad_norm": 1.9924908492626303, "learning_rate": 1.3674898346125036e-05, "loss": 0.2559, "step": 5034 }, { "epoch": 0.3988908694791048, "grad_norm": 2.1660942439728603, "learning_rate": 1.3672511744986756e-05, "loss": 0.3949, "step": 5035 }, { "epoch": 0.3989700930877402, "grad_norm": 1.7103848494247698, "learning_rate": 1.3670124902031248e-05, "loss": 0.3066, "step": 5036 }, { "epoch": 0.3990493166963755, "grad_norm": 1.4422331394262926, "learning_rate": 1.3667737817415679e-05, "loss": 0.1716, "step": 5037 }, { "epoch": 0.3991285403050109, "grad_norm": 1.5424422592379177, "learning_rate": 1.3665350491297215e-05, "loss": 0.216, "step": 5038 }, { "epoch": 0.39920776391364626, "grad_norm": 1.6087064335496908, "learning_rate": 1.3662962923833063e-05, "loss": 0.2535, "step": 5039 }, { "epoch": 0.39928698752228164, "grad_norm": 1.7486998482405602, "learning_rate": 1.3660575115180427e-05, "loss": 0.2823, "step": 5040 }, { "epoch": 0.399366211130917, "grad_norm": 1.5491830491544707, "learning_rate": 1.3658187065496533e-05, "loss": 0.2773, "step": 5041 }, { "epoch": 0.3994454347395524, "grad_norm": 1.7637106301081635, "learning_rate": 1.365579877493862e-05, "loss": 0.3368, "step": 5042 }, { "epoch": 0.39952465834818773, "grad_norm": 1.2304973791940805, "learning_rate": 1.3653410243663953e-05, "loss": 0.18, "step": 5043 }, { "epoch": 0.3996038819568231, "grad_norm": 2.137849827852629, "learning_rate": 1.3651021471829797e-05, "loss": 0.328, "step": 5044 }, { "epoch": 0.3996831055654585, "grad_norm": 1.6510817194895882, "learning_rate": 1.3648632459593444e-05, "loss": 0.342, "step": 5045 }, { "epoch": 0.3997623291740939, "grad_norm": 1.7749633383057926, "learning_rate": 1.3646243207112204e-05, "loss": 0.2752, "step": 5046 }, { "epoch": 0.39984155278272926, "grad_norm": 1.7896738519741786, "learning_rate": 1.3643853714543389e-05, "loss": 0.2211, "step": 5047 }, { "epoch": 0.39992077639136464, "grad_norm": 1.798033371382471, "learning_rate": 1.3641463982044343e-05, "loss": 0.2527, "step": 5048 }, { "epoch": 0.4, "grad_norm": 1.6738869725704306, "learning_rate": 1.3639074009772412e-05, "loss": 0.3087, "step": 5049 }, { "epoch": 0.40007922360863535, "grad_norm": 1.9034250653277713, "learning_rate": 1.3636683797884971e-05, "loss": 0.256, "step": 5050 }, { "epoch": 0.40015844721727073, "grad_norm": 2.3394754442178867, "learning_rate": 1.36342933465394e-05, "loss": 0.2338, "step": 5051 }, { "epoch": 0.4002376708259061, "grad_norm": 1.9322442787888647, "learning_rate": 1.3631902655893096e-05, "loss": 0.2931, "step": 5052 }, { "epoch": 0.4003168944345415, "grad_norm": 1.8460460956167999, "learning_rate": 1.3629511726103482e-05, "loss": 0.3765, "step": 5053 }, { "epoch": 0.4003961180431769, "grad_norm": 1.9128762740705467, "learning_rate": 1.3627120557327982e-05, "loss": 0.3223, "step": 5054 }, { "epoch": 0.40047534165181226, "grad_norm": 2.121292997722571, "learning_rate": 1.3624729149724047e-05, "loss": 0.3657, "step": 5055 }, { "epoch": 0.40055456526044764, "grad_norm": 2.062850495139018, "learning_rate": 1.362233750344914e-05, "loss": 0.2723, "step": 5056 }, { "epoch": 0.40063378886908296, "grad_norm": 1.5442796276853339, "learning_rate": 1.3619945618660735e-05, "loss": 0.1732, "step": 5057 }, { "epoch": 0.40071301247771834, "grad_norm": 1.5892232204371144, "learning_rate": 1.3617553495516332e-05, "loss": 0.2607, "step": 5058 }, { "epoch": 0.4007922360863537, "grad_norm": 1.6667206027788657, "learning_rate": 1.3615161134173435e-05, "loss": 0.201, "step": 5059 }, { "epoch": 0.4008714596949891, "grad_norm": 1.7295000969213772, "learning_rate": 1.3612768534789573e-05, "loss": 0.2719, "step": 5060 }, { "epoch": 0.4009506833036245, "grad_norm": 2.139889963806682, "learning_rate": 1.3610375697522287e-05, "loss": 0.2277, "step": 5061 }, { "epoch": 0.40102990691225987, "grad_norm": 1.8895629784509678, "learning_rate": 1.3607982622529135e-05, "loss": 0.3163, "step": 5062 }, { "epoch": 0.40110913052089525, "grad_norm": 1.5585929795452302, "learning_rate": 1.3605589309967686e-05, "loss": 0.1831, "step": 5063 }, { "epoch": 0.4011883541295306, "grad_norm": 2.760265621703642, "learning_rate": 1.3603195759995531e-05, "loss": 0.2304, "step": 5064 }, { "epoch": 0.40126757773816596, "grad_norm": 1.6261390232067319, "learning_rate": 1.3600801972770272e-05, "loss": 0.275, "step": 5065 }, { "epoch": 0.40134680134680134, "grad_norm": 1.6511663987681449, "learning_rate": 1.3598407948449528e-05, "loss": 0.2163, "step": 5066 }, { "epoch": 0.4014260249554367, "grad_norm": 2.0763610174348415, "learning_rate": 1.3596013687190936e-05, "loss": 0.3251, "step": 5067 }, { "epoch": 0.4015052485640721, "grad_norm": 1.8276755914270248, "learning_rate": 1.3593619189152146e-05, "loss": 0.2484, "step": 5068 }, { "epoch": 0.4015844721727075, "grad_norm": 2.3228716331882953, "learning_rate": 1.3591224454490824e-05, "loss": 0.4665, "step": 5069 }, { "epoch": 0.40166369578134287, "grad_norm": 1.711428802085721, "learning_rate": 1.3588829483364652e-05, "loss": 0.2556, "step": 5070 }, { "epoch": 0.4017429193899782, "grad_norm": 1.8971546017599332, "learning_rate": 1.3586434275931324e-05, "loss": 0.2798, "step": 5071 }, { "epoch": 0.4018221429986136, "grad_norm": 1.7116477180305871, "learning_rate": 1.358403883234856e-05, "loss": 0.2013, "step": 5072 }, { "epoch": 0.40190136660724896, "grad_norm": 2.2459854696588915, "learning_rate": 1.358164315277408e-05, "loss": 0.3362, "step": 5073 }, { "epoch": 0.40198059021588434, "grad_norm": 2.038729214693376, "learning_rate": 1.3579247237365634e-05, "loss": 0.2928, "step": 5074 }, { "epoch": 0.4020598138245197, "grad_norm": 1.8371381398537223, "learning_rate": 1.357685108628098e-05, "loss": 0.2402, "step": 5075 }, { "epoch": 0.4021390374331551, "grad_norm": 1.7092859616420752, "learning_rate": 1.3574454699677893e-05, "loss": 0.2464, "step": 5076 }, { "epoch": 0.4022182610417904, "grad_norm": 1.762109324137486, "learning_rate": 1.357205807771416e-05, "loss": 0.2883, "step": 5077 }, { "epoch": 0.4022974846504258, "grad_norm": 1.7286172338398547, "learning_rate": 1.3569661220547596e-05, "loss": 0.2199, "step": 5078 }, { "epoch": 0.4023767082590612, "grad_norm": 2.0910441215810405, "learning_rate": 1.3567264128336013e-05, "loss": 0.2181, "step": 5079 }, { "epoch": 0.40245593186769657, "grad_norm": 1.6132572458514118, "learning_rate": 1.3564866801237254e-05, "loss": 0.2482, "step": 5080 }, { "epoch": 0.40253515547633195, "grad_norm": 1.3883172375145247, "learning_rate": 1.3562469239409166e-05, "loss": 0.1241, "step": 5081 }, { "epoch": 0.40261437908496733, "grad_norm": 1.7380360527567251, "learning_rate": 1.3560071443009622e-05, "loss": 0.2233, "step": 5082 }, { "epoch": 0.4026936026936027, "grad_norm": 1.6011516627954177, "learning_rate": 1.3557673412196504e-05, "loss": 0.25, "step": 5083 }, { "epoch": 0.40277282630223804, "grad_norm": 1.637948566671406, "learning_rate": 1.3555275147127709e-05, "loss": 0.2378, "step": 5084 }, { "epoch": 0.4028520499108734, "grad_norm": 1.6529579288993497, "learning_rate": 1.3552876647961151e-05, "loss": 0.2397, "step": 5085 }, { "epoch": 0.4029312735195088, "grad_norm": 1.9768099106877912, "learning_rate": 1.3550477914854766e-05, "loss": 0.3139, "step": 5086 }, { "epoch": 0.4030104971281442, "grad_norm": 1.7441395851358676, "learning_rate": 1.3548078947966487e-05, "loss": 0.1639, "step": 5087 }, { "epoch": 0.40308972073677957, "grad_norm": 1.4630896070550634, "learning_rate": 1.3545679747454286e-05, "loss": 0.1754, "step": 5088 }, { "epoch": 0.40316894434541495, "grad_norm": 1.5083975274614427, "learning_rate": 1.3543280313476135e-05, "loss": 0.2311, "step": 5089 }, { "epoch": 0.40324816795405033, "grad_norm": 1.8381713048763741, "learning_rate": 1.3540880646190022e-05, "loss": 0.2806, "step": 5090 }, { "epoch": 0.40332739156268566, "grad_norm": 2.004847224048754, "learning_rate": 1.353848074575396e-05, "loss": 0.3118, "step": 5091 }, { "epoch": 0.40340661517132104, "grad_norm": 1.3312190726986168, "learning_rate": 1.3536080612325963e-05, "loss": 0.207, "step": 5092 }, { "epoch": 0.4034858387799564, "grad_norm": 1.8958888059753587, "learning_rate": 1.3533680246064073e-05, "loss": 0.3282, "step": 5093 }, { "epoch": 0.4035650623885918, "grad_norm": 1.7526014917456818, "learning_rate": 1.3531279647126342e-05, "loss": 0.2669, "step": 5094 }, { "epoch": 0.4036442859972272, "grad_norm": 1.8792939581519124, "learning_rate": 1.352887881567084e-05, "loss": 0.2395, "step": 5095 }, { "epoch": 0.40372350960586256, "grad_norm": 1.834515236887629, "learning_rate": 1.3526477751855645e-05, "loss": 0.3298, "step": 5096 }, { "epoch": 0.40380273321449794, "grad_norm": 1.3616851076933911, "learning_rate": 1.3524076455838859e-05, "loss": 0.1685, "step": 5097 }, { "epoch": 0.40388195682313327, "grad_norm": 1.9097697367325288, "learning_rate": 1.3521674927778594e-05, "loss": 0.2278, "step": 5098 }, { "epoch": 0.40396118043176865, "grad_norm": 2.0662021147785494, "learning_rate": 1.3519273167832982e-05, "loss": 0.3435, "step": 5099 }, { "epoch": 0.40404040404040403, "grad_norm": 2.0486283907732865, "learning_rate": 1.3516871176160166e-05, "loss": 0.3015, "step": 5100 }, { "epoch": 0.4041196276490394, "grad_norm": 1.7394115483550667, "learning_rate": 1.3514468952918303e-05, "loss": 0.237, "step": 5101 }, { "epoch": 0.4041988512576748, "grad_norm": 1.6005302850908452, "learning_rate": 1.3512066498265572e-05, "loss": 0.3126, "step": 5102 }, { "epoch": 0.4042780748663102, "grad_norm": 1.7243835979696784, "learning_rate": 1.3509663812360161e-05, "loss": 0.2343, "step": 5103 }, { "epoch": 0.40435729847494556, "grad_norm": 1.9792194218805517, "learning_rate": 1.3507260895360274e-05, "loss": 0.2746, "step": 5104 }, { "epoch": 0.4044365220835809, "grad_norm": 1.7719273628646164, "learning_rate": 1.3504857747424133e-05, "loss": 0.2343, "step": 5105 }, { "epoch": 0.40451574569221627, "grad_norm": 1.7110158744769208, "learning_rate": 1.3502454368709973e-05, "loss": 0.2644, "step": 5106 }, { "epoch": 0.40459496930085165, "grad_norm": 1.8312623245381916, "learning_rate": 1.3500050759376052e-05, "loss": 0.3027, "step": 5107 }, { "epoch": 0.40467419290948703, "grad_norm": 1.5730144082378932, "learning_rate": 1.3497646919580623e-05, "loss": 0.2614, "step": 5108 }, { "epoch": 0.4047534165181224, "grad_norm": 1.9684226159090965, "learning_rate": 1.3495242849481973e-05, "loss": 0.2642, "step": 5109 }, { "epoch": 0.4048326401267578, "grad_norm": 2.256453771822014, "learning_rate": 1.3492838549238406e-05, "loss": 0.3131, "step": 5110 }, { "epoch": 0.4049118637353932, "grad_norm": 1.6807385618522714, "learning_rate": 1.349043401900822e-05, "loss": 0.3136, "step": 5111 }, { "epoch": 0.4049910873440285, "grad_norm": 1.787421441463756, "learning_rate": 1.348802925894975e-05, "loss": 0.2853, "step": 5112 }, { "epoch": 0.4050703109526639, "grad_norm": 1.7384424910122578, "learning_rate": 1.348562426922134e-05, "loss": 0.3134, "step": 5113 }, { "epoch": 0.40514953456129926, "grad_norm": 2.1141066046581334, "learning_rate": 1.3483219049981343e-05, "loss": 0.2466, "step": 5114 }, { "epoch": 0.40522875816993464, "grad_norm": 1.8931192920210242, "learning_rate": 1.348081360138813e-05, "loss": 0.2765, "step": 5115 }, { "epoch": 0.40530798177857, "grad_norm": 1.5003005993759302, "learning_rate": 1.347840792360009e-05, "loss": 0.2283, "step": 5116 }, { "epoch": 0.4053872053872054, "grad_norm": 1.7212036394274006, "learning_rate": 1.3476002016775626e-05, "loss": 0.2982, "step": 5117 }, { "epoch": 0.40546642899584073, "grad_norm": 1.6347946516884355, "learning_rate": 1.3473595881073154e-05, "loss": 0.2245, "step": 5118 }, { "epoch": 0.4055456526044761, "grad_norm": 1.778637944123748, "learning_rate": 1.3471189516651108e-05, "loss": 0.2585, "step": 5119 }, { "epoch": 0.4056248762131115, "grad_norm": 1.9866194928007983, "learning_rate": 1.3468782923667936e-05, "loss": 0.2625, "step": 5120 }, { "epoch": 0.4057040998217469, "grad_norm": 1.7201189995184551, "learning_rate": 1.3466376102282098e-05, "loss": 0.292, "step": 5121 }, { "epoch": 0.40578332343038226, "grad_norm": 1.855959980595934, "learning_rate": 1.3463969052652073e-05, "loss": 0.2453, "step": 5122 }, { "epoch": 0.40586254703901764, "grad_norm": 1.9838084063096186, "learning_rate": 1.3461561774936352e-05, "loss": 0.2398, "step": 5123 }, { "epoch": 0.405941770647653, "grad_norm": 1.690781306775711, "learning_rate": 1.3459154269293443e-05, "loss": 0.2557, "step": 5124 }, { "epoch": 0.40602099425628835, "grad_norm": 1.903089976280466, "learning_rate": 1.3456746535881872e-05, "loss": 0.3484, "step": 5125 }, { "epoch": 0.40610021786492373, "grad_norm": 3.9955197655170127, "learning_rate": 1.3454338574860175e-05, "loss": 0.2926, "step": 5126 }, { "epoch": 0.4061794414735591, "grad_norm": 2.049455391765512, "learning_rate": 1.3451930386386902e-05, "loss": 0.2596, "step": 5127 }, { "epoch": 0.4062586650821945, "grad_norm": 2.2948576440661386, "learning_rate": 1.3449521970620624e-05, "loss": 0.2584, "step": 5128 }, { "epoch": 0.4063378886908299, "grad_norm": 1.6667577359747496, "learning_rate": 1.3447113327719923e-05, "loss": 0.2166, "step": 5129 }, { "epoch": 0.40641711229946526, "grad_norm": 1.8738589842280058, "learning_rate": 1.3444704457843393e-05, "loss": 0.2446, "step": 5130 }, { "epoch": 0.40649633590810064, "grad_norm": 1.628671397406964, "learning_rate": 1.3442295361149651e-05, "loss": 0.2584, "step": 5131 }, { "epoch": 0.40657555951673596, "grad_norm": 1.913003810064006, "learning_rate": 1.3439886037797326e-05, "loss": 0.1983, "step": 5132 }, { "epoch": 0.40665478312537134, "grad_norm": 1.9936332626504387, "learning_rate": 1.3437476487945051e-05, "loss": 0.2907, "step": 5133 }, { "epoch": 0.4067340067340067, "grad_norm": 1.9936659279781714, "learning_rate": 1.3435066711751494e-05, "loss": 0.2472, "step": 5134 }, { "epoch": 0.4068132303426421, "grad_norm": 1.693690545361268, "learning_rate": 1.343265670937532e-05, "loss": 0.228, "step": 5135 }, { "epoch": 0.4068924539512775, "grad_norm": 1.5215631469756292, "learning_rate": 1.3430246480975218e-05, "loss": 0.2413, "step": 5136 }, { "epoch": 0.40697167755991287, "grad_norm": 2.354871344353882, "learning_rate": 1.3427836026709892e-05, "loss": 0.385, "step": 5137 }, { "epoch": 0.40705090116854825, "grad_norm": 1.8539774936856872, "learning_rate": 1.3425425346738057e-05, "loss": 0.2372, "step": 5138 }, { "epoch": 0.4071301247771836, "grad_norm": 1.7400157364653104, "learning_rate": 1.3423014441218444e-05, "loss": 0.2652, "step": 5139 }, { "epoch": 0.40720934838581896, "grad_norm": 1.6583150102594297, "learning_rate": 1.3420603310309805e-05, "loss": 0.2619, "step": 5140 }, { "epoch": 0.40728857199445434, "grad_norm": 1.4663055947715418, "learning_rate": 1.3418191954170892e-05, "loss": 0.1812, "step": 5141 }, { "epoch": 0.4073677956030897, "grad_norm": 1.6256358494549412, "learning_rate": 1.341578037296049e-05, "loss": 0.1928, "step": 5142 }, { "epoch": 0.4074470192117251, "grad_norm": 1.8235862741093718, "learning_rate": 1.3413368566837384e-05, "loss": 0.3374, "step": 5143 }, { "epoch": 0.4075262428203605, "grad_norm": 1.7652030353751302, "learning_rate": 1.341095653596038e-05, "loss": 0.2203, "step": 5144 }, { "epoch": 0.40760546642899587, "grad_norm": 1.7748994600164159, "learning_rate": 1.3408544280488305e-05, "loss": 0.2924, "step": 5145 }, { "epoch": 0.4076846900376312, "grad_norm": 1.6447995004524583, "learning_rate": 1.3406131800579985e-05, "loss": 0.2269, "step": 5146 }, { "epoch": 0.4077639136462666, "grad_norm": 1.5126775232309466, "learning_rate": 1.3403719096394276e-05, "loss": 0.2859, "step": 5147 }, { "epoch": 0.40784313725490196, "grad_norm": 1.7659093894467255, "learning_rate": 1.3401306168090047e-05, "loss": 0.2616, "step": 5148 }, { "epoch": 0.40792236086353734, "grad_norm": 2.1350416405799484, "learning_rate": 1.3398893015826166e-05, "loss": 0.3399, "step": 5149 }, { "epoch": 0.4080015844721727, "grad_norm": 1.794507080313387, "learning_rate": 1.3396479639761541e-05, "loss": 0.3145, "step": 5150 }, { "epoch": 0.4080808080808081, "grad_norm": 2.0923423201487714, "learning_rate": 1.3394066040055071e-05, "loss": 0.3554, "step": 5151 }, { "epoch": 0.4081600316894435, "grad_norm": 1.8967020436756747, "learning_rate": 1.3391652216865682e-05, "loss": 0.3651, "step": 5152 }, { "epoch": 0.4082392552980788, "grad_norm": 1.6751382737272622, "learning_rate": 1.3389238170352318e-05, "loss": 0.3178, "step": 5153 }, { "epoch": 0.4083184789067142, "grad_norm": 1.2631128234870763, "learning_rate": 1.3386823900673926e-05, "loss": 0.174, "step": 5154 }, { "epoch": 0.40839770251534957, "grad_norm": 1.6265536160393501, "learning_rate": 1.3384409407989475e-05, "loss": 0.2561, "step": 5155 }, { "epoch": 0.40847692612398495, "grad_norm": 1.6957487309972814, "learning_rate": 1.3381994692457956e-05, "loss": 0.1714, "step": 5156 }, { "epoch": 0.40855614973262033, "grad_norm": 1.7574147686109933, "learning_rate": 1.3379579754238354e-05, "loss": 0.204, "step": 5157 }, { "epoch": 0.4086353733412557, "grad_norm": 2.097078066509515, "learning_rate": 1.3377164593489687e-05, "loss": 0.2867, "step": 5158 }, { "epoch": 0.40871459694989104, "grad_norm": 1.6220661313385847, "learning_rate": 1.3374749210370983e-05, "loss": 0.2124, "step": 5159 }, { "epoch": 0.4087938205585264, "grad_norm": 1.6110651182051976, "learning_rate": 1.3372333605041282e-05, "loss": 0.267, "step": 5160 }, { "epoch": 0.4088730441671618, "grad_norm": 1.974748168586152, "learning_rate": 1.3369917777659638e-05, "loss": 0.3461, "step": 5161 }, { "epoch": 0.4089522677757972, "grad_norm": 1.363031162050341, "learning_rate": 1.3367501728385124e-05, "loss": 0.1786, "step": 5162 }, { "epoch": 0.40903149138443257, "grad_norm": 1.395920425938681, "learning_rate": 1.3365085457376823e-05, "loss": 0.1945, "step": 5163 }, { "epoch": 0.40911071499306795, "grad_norm": 1.5738025260062642, "learning_rate": 1.336266896479384e-05, "loss": 0.2452, "step": 5164 }, { "epoch": 0.40918993860170333, "grad_norm": 2.021218829076842, "learning_rate": 1.3360252250795282e-05, "loss": 0.2913, "step": 5165 }, { "epoch": 0.40926916221033866, "grad_norm": 1.6007496199491629, "learning_rate": 1.3357835315540281e-05, "loss": 0.2754, "step": 5166 }, { "epoch": 0.40934838581897404, "grad_norm": 2.333871675387061, "learning_rate": 1.3355418159187988e-05, "loss": 0.2185, "step": 5167 }, { "epoch": 0.4094276094276094, "grad_norm": 1.9545293396520047, "learning_rate": 1.335300078189755e-05, "loss": 0.343, "step": 5168 }, { "epoch": 0.4095068330362448, "grad_norm": 1.546073710643735, "learning_rate": 1.3350583183828143e-05, "loss": 0.1742, "step": 5169 }, { "epoch": 0.4095860566448802, "grad_norm": 1.6717718479486692, "learning_rate": 1.3348165365138956e-05, "loss": 0.2436, "step": 5170 }, { "epoch": 0.40966528025351556, "grad_norm": 1.4706808735228825, "learning_rate": 1.3345747325989188e-05, "loss": 0.2193, "step": 5171 }, { "epoch": 0.40974450386215094, "grad_norm": 1.8568205353400227, "learning_rate": 1.3343329066538064e-05, "loss": 0.2955, "step": 5172 }, { "epoch": 0.40982372747078627, "grad_norm": 1.8285885832835829, "learning_rate": 1.3340910586944805e-05, "loss": 0.2064, "step": 5173 }, { "epoch": 0.40990295107942165, "grad_norm": 2.050711287397145, "learning_rate": 1.3338491887368656e-05, "loss": 0.2372, "step": 5174 }, { "epoch": 0.40998217468805703, "grad_norm": 1.9055540603478667, "learning_rate": 1.3336072967968882e-05, "loss": 0.2918, "step": 5175 }, { "epoch": 0.4100613982966924, "grad_norm": 1.5891300854150547, "learning_rate": 1.3333653828904755e-05, "loss": 0.2148, "step": 5176 }, { "epoch": 0.4101406219053278, "grad_norm": 1.7067643837485824, "learning_rate": 1.3331234470335566e-05, "loss": 0.3335, "step": 5177 }, { "epoch": 0.4102198455139632, "grad_norm": 1.331720194791185, "learning_rate": 1.3328814892420613e-05, "loss": 0.1647, "step": 5178 }, { "epoch": 0.41029906912259856, "grad_norm": 1.834686906446113, "learning_rate": 1.3326395095319218e-05, "loss": 0.3405, "step": 5179 }, { "epoch": 0.4103782927312339, "grad_norm": 1.5930543906984322, "learning_rate": 1.3323975079190713e-05, "loss": 0.2549, "step": 5180 }, { "epoch": 0.41045751633986927, "grad_norm": 1.8510150638600589, "learning_rate": 1.332155484419444e-05, "loss": 0.3297, "step": 5181 }, { "epoch": 0.41053673994850465, "grad_norm": 1.7059094686450418, "learning_rate": 1.3319134390489765e-05, "loss": 0.2846, "step": 5182 }, { "epoch": 0.41061596355714003, "grad_norm": 1.965460334486044, "learning_rate": 1.3316713718236061e-05, "loss": 0.3457, "step": 5183 }, { "epoch": 0.4106951871657754, "grad_norm": 1.812780032392159, "learning_rate": 1.3314292827592716e-05, "loss": 0.3125, "step": 5184 }, { "epoch": 0.4107744107744108, "grad_norm": 1.607173129636733, "learning_rate": 1.3311871718719137e-05, "loss": 0.1976, "step": 5185 }, { "epoch": 0.4108536343830462, "grad_norm": 1.6835184596520958, "learning_rate": 1.330945039177474e-05, "loss": 0.2003, "step": 5186 }, { "epoch": 0.4109328579916815, "grad_norm": 1.6440472358397658, "learning_rate": 1.3307028846918958e-05, "loss": 0.1903, "step": 5187 }, { "epoch": 0.4110120816003169, "grad_norm": 1.8080144299430012, "learning_rate": 1.3304607084311246e-05, "loss": 0.29, "step": 5188 }, { "epoch": 0.41109130520895226, "grad_norm": 1.987029536338346, "learning_rate": 1.3302185104111049e-05, "loss": 0.3819, "step": 5189 }, { "epoch": 0.41117052881758764, "grad_norm": 2.2764370943464747, "learning_rate": 1.3299762906477855e-05, "loss": 0.3204, "step": 5190 }, { "epoch": 0.411249752426223, "grad_norm": 1.6812041309588308, "learning_rate": 1.3297340491571153e-05, "loss": 0.2808, "step": 5191 }, { "epoch": 0.4113289760348584, "grad_norm": 1.8309150405418233, "learning_rate": 1.3294917859550444e-05, "loss": 0.2545, "step": 5192 }, { "epoch": 0.4114081996434938, "grad_norm": 2.1501783197844184, "learning_rate": 1.3292495010575249e-05, "loss": 0.2171, "step": 5193 }, { "epoch": 0.4114874232521291, "grad_norm": 1.436142715549541, "learning_rate": 1.3290071944805099e-05, "loss": 0.1733, "step": 5194 }, { "epoch": 0.4115666468607645, "grad_norm": 1.9903326398661199, "learning_rate": 1.3287648662399544e-05, "loss": 0.3297, "step": 5195 }, { "epoch": 0.4116458704693999, "grad_norm": 2.0961113365532027, "learning_rate": 1.3285225163518141e-05, "loss": 0.3485, "step": 5196 }, { "epoch": 0.41172509407803526, "grad_norm": 2.1688706093461314, "learning_rate": 1.328280144832047e-05, "loss": 0.2803, "step": 5197 }, { "epoch": 0.41180431768667064, "grad_norm": 2.093353150979585, "learning_rate": 1.3280377516966118e-05, "loss": 0.2104, "step": 5198 }, { "epoch": 0.411883541295306, "grad_norm": 2.196762821673357, "learning_rate": 1.3277953369614696e-05, "loss": 0.1586, "step": 5199 }, { "epoch": 0.41196276490394135, "grad_norm": 1.6668383747799789, "learning_rate": 1.3275529006425808e-05, "loss": 0.2006, "step": 5200 }, { "epoch": 0.41204198851257673, "grad_norm": 1.711982844670911, "learning_rate": 1.3273104427559102e-05, "loss": 0.1579, "step": 5201 }, { "epoch": 0.4121212121212121, "grad_norm": 1.6314340648189252, "learning_rate": 1.3270679633174219e-05, "loss": 0.2625, "step": 5202 }, { "epoch": 0.4122004357298475, "grad_norm": 1.5120713409937838, "learning_rate": 1.3268254623430817e-05, "loss": 0.1082, "step": 5203 }, { "epoch": 0.4122796593384829, "grad_norm": 1.7489700885427808, "learning_rate": 1.3265829398488576e-05, "loss": 0.237, "step": 5204 }, { "epoch": 0.41235888294711825, "grad_norm": 1.5012984104698848, "learning_rate": 1.3263403958507181e-05, "loss": 0.1677, "step": 5205 }, { "epoch": 0.41243810655575364, "grad_norm": 1.3957259372424156, "learning_rate": 1.326097830364634e-05, "loss": 0.2393, "step": 5206 }, { "epoch": 0.41251733016438896, "grad_norm": 1.8903925559142896, "learning_rate": 1.3258552434065768e-05, "loss": 0.3144, "step": 5207 }, { "epoch": 0.41259655377302434, "grad_norm": 2.0445936490573486, "learning_rate": 1.3256126349925195e-05, "loss": 0.277, "step": 5208 }, { "epoch": 0.4126757773816597, "grad_norm": 1.5232667035235046, "learning_rate": 1.3253700051384371e-05, "loss": 0.3095, "step": 5209 }, { "epoch": 0.4127550009902951, "grad_norm": 1.7996338482003338, "learning_rate": 1.3251273538603056e-05, "loss": 0.239, "step": 5210 }, { "epoch": 0.4128342245989305, "grad_norm": 2.0306475842675744, "learning_rate": 1.3248846811741021e-05, "loss": 0.3208, "step": 5211 }, { "epoch": 0.41291344820756587, "grad_norm": 1.7522197744915726, "learning_rate": 1.3246419870958056e-05, "loss": 0.2064, "step": 5212 }, { "epoch": 0.41299267181620125, "grad_norm": 1.8621301770693548, "learning_rate": 1.3243992716413962e-05, "loss": 0.2948, "step": 5213 }, { "epoch": 0.4130718954248366, "grad_norm": 1.8690129098080024, "learning_rate": 1.324156534826856e-05, "loss": 0.2599, "step": 5214 }, { "epoch": 0.41315111903347196, "grad_norm": 1.8768059540239044, "learning_rate": 1.3239137766681675e-05, "loss": 0.1781, "step": 5215 }, { "epoch": 0.41323034264210734, "grad_norm": 2.045483786114196, "learning_rate": 1.3236709971813153e-05, "loss": 0.238, "step": 5216 }, { "epoch": 0.4133095662507427, "grad_norm": 1.7166613052071729, "learning_rate": 1.3234281963822856e-05, "loss": 0.2379, "step": 5217 }, { "epoch": 0.4133887898593781, "grad_norm": 2.114992324498812, "learning_rate": 1.3231853742870652e-05, "loss": 0.3007, "step": 5218 }, { "epoch": 0.4134680134680135, "grad_norm": 1.8623630779606364, "learning_rate": 1.322942530911643e-05, "loss": 0.2617, "step": 5219 }, { "epoch": 0.41354723707664887, "grad_norm": 1.4836575719854659, "learning_rate": 1.3226996662720094e-05, "loss": 0.1843, "step": 5220 }, { "epoch": 0.4136264606852842, "grad_norm": 1.8596948321747533, "learning_rate": 1.322456780384155e-05, "loss": 0.3164, "step": 5221 }, { "epoch": 0.4137056842939196, "grad_norm": 1.6464179775636327, "learning_rate": 1.3222138732640732e-05, "loss": 0.1999, "step": 5222 }, { "epoch": 0.41378490790255495, "grad_norm": 1.937607391187395, "learning_rate": 1.3219709449277584e-05, "loss": 0.2381, "step": 5223 }, { "epoch": 0.41386413151119034, "grad_norm": 1.922053920233908, "learning_rate": 1.3217279953912061e-05, "loss": 0.2975, "step": 5224 }, { "epoch": 0.4139433551198257, "grad_norm": 1.6567096135748143, "learning_rate": 1.3214850246704134e-05, "loss": 0.2623, "step": 5225 }, { "epoch": 0.4140225787284611, "grad_norm": 1.4989643447832592, "learning_rate": 1.3212420327813789e-05, "loss": 0.2222, "step": 5226 }, { "epoch": 0.4141018023370965, "grad_norm": 1.9283219327322054, "learning_rate": 1.3209990197401016e-05, "loss": 0.2744, "step": 5227 }, { "epoch": 0.4141810259457318, "grad_norm": 1.7374988023831457, "learning_rate": 1.3207559855625842e-05, "loss": 0.2573, "step": 5228 }, { "epoch": 0.4142602495543672, "grad_norm": 1.8155447949094714, "learning_rate": 1.3205129302648282e-05, "loss": 0.3124, "step": 5229 }, { "epoch": 0.41433947316300257, "grad_norm": 2.101997272370507, "learning_rate": 1.3202698538628376e-05, "loss": 0.2681, "step": 5230 }, { "epoch": 0.41441869677163795, "grad_norm": 2.0657793079428997, "learning_rate": 1.3200267563726187e-05, "loss": 0.3074, "step": 5231 }, { "epoch": 0.41449792038027333, "grad_norm": 2.3618194154949776, "learning_rate": 1.3197836378101773e-05, "loss": 0.4118, "step": 5232 }, { "epoch": 0.4145771439889087, "grad_norm": 1.817317713696645, "learning_rate": 1.3195404981915223e-05, "loss": 0.2826, "step": 5233 }, { "epoch": 0.4146563675975441, "grad_norm": 1.814930338028792, "learning_rate": 1.3192973375326635e-05, "loss": 0.329, "step": 5234 }, { "epoch": 0.4147355912061794, "grad_norm": 1.9300993049829744, "learning_rate": 1.3190541558496106e-05, "loss": 0.3374, "step": 5235 }, { "epoch": 0.4148148148148148, "grad_norm": 1.7259491977036778, "learning_rate": 1.318810953158377e-05, "loss": 0.3181, "step": 5236 }, { "epoch": 0.4148940384234502, "grad_norm": 1.8701968392638888, "learning_rate": 1.3185677294749763e-05, "loss": 0.2945, "step": 5237 }, { "epoch": 0.41497326203208557, "grad_norm": 1.6894679515437134, "learning_rate": 1.3183244848154232e-05, "loss": 0.3069, "step": 5238 }, { "epoch": 0.41505248564072095, "grad_norm": 1.9965917243085374, "learning_rate": 1.3180812191957346e-05, "loss": 0.3633, "step": 5239 }, { "epoch": 0.41513170924935633, "grad_norm": 1.4216688900832601, "learning_rate": 1.3178379326319284e-05, "loss": 0.2976, "step": 5240 }, { "epoch": 0.41521093285799165, "grad_norm": 1.8794530865962518, "learning_rate": 1.3175946251400234e-05, "loss": 0.2979, "step": 5241 }, { "epoch": 0.41529015646662704, "grad_norm": 1.2856985918775345, "learning_rate": 1.3173512967360406e-05, "loss": 0.1585, "step": 5242 }, { "epoch": 0.4153693800752624, "grad_norm": 1.6765815028562654, "learning_rate": 1.317107947436002e-05, "loss": 0.332, "step": 5243 }, { "epoch": 0.4154486036838978, "grad_norm": 1.5236883384010385, "learning_rate": 1.3168645772559308e-05, "loss": 0.2816, "step": 5244 }, { "epoch": 0.4155278272925332, "grad_norm": 1.5648584965519263, "learning_rate": 1.3166211862118519e-05, "loss": 0.2192, "step": 5245 }, { "epoch": 0.41560705090116856, "grad_norm": 1.5256879916086248, "learning_rate": 1.3163777743197912e-05, "loss": 0.2118, "step": 5246 }, { "epoch": 0.41568627450980394, "grad_norm": 1.5205413116315978, "learning_rate": 1.3161343415957767e-05, "loss": 0.2379, "step": 5247 }, { "epoch": 0.41576549811843927, "grad_norm": 1.4177960353245953, "learning_rate": 1.3158908880558366e-05, "loss": 0.1392, "step": 5248 }, { "epoch": 0.41584472172707465, "grad_norm": 1.7540228890352578, "learning_rate": 1.3156474137160015e-05, "loss": 0.2691, "step": 5249 }, { "epoch": 0.41592394533571003, "grad_norm": 2.2833412720352544, "learning_rate": 1.3154039185923034e-05, "loss": 0.3523, "step": 5250 }, { "epoch": 0.4160031689443454, "grad_norm": 2.235976518031537, "learning_rate": 1.3151604027007744e-05, "loss": 0.4049, "step": 5251 }, { "epoch": 0.4160823925529808, "grad_norm": 1.4438007122077707, "learning_rate": 1.3149168660574495e-05, "loss": 0.2135, "step": 5252 }, { "epoch": 0.4161616161616162, "grad_norm": 1.8661355462098546, "learning_rate": 1.3146733086783646e-05, "loss": 0.3075, "step": 5253 }, { "epoch": 0.41624083977025156, "grad_norm": 1.775842709710853, "learning_rate": 1.3144297305795559e-05, "loss": 0.2227, "step": 5254 }, { "epoch": 0.4163200633788869, "grad_norm": 1.656949959918874, "learning_rate": 1.3141861317770628e-05, "loss": 0.2423, "step": 5255 }, { "epoch": 0.41639928698752227, "grad_norm": 1.8036213302740065, "learning_rate": 1.3139425122869244e-05, "loss": 0.3101, "step": 5256 }, { "epoch": 0.41647851059615765, "grad_norm": 1.6799140032312851, "learning_rate": 1.3136988721251823e-05, "loss": 0.2708, "step": 5257 }, { "epoch": 0.41655773420479303, "grad_norm": 2.296680665774159, "learning_rate": 1.3134552113078788e-05, "loss": 0.3344, "step": 5258 }, { "epoch": 0.4166369578134284, "grad_norm": 2.0155753623581893, "learning_rate": 1.3132115298510579e-05, "loss": 0.3025, "step": 5259 }, { "epoch": 0.4167161814220638, "grad_norm": 1.9435443817118128, "learning_rate": 1.312967827770765e-05, "loss": 0.3456, "step": 5260 }, { "epoch": 0.4167954050306992, "grad_norm": 1.8889477335751863, "learning_rate": 1.3127241050830463e-05, "loss": 0.3628, "step": 5261 }, { "epoch": 0.4168746286393345, "grad_norm": 1.804585188377334, "learning_rate": 1.3124803618039501e-05, "loss": 0.2484, "step": 5262 }, { "epoch": 0.4169538522479699, "grad_norm": 1.689622961679414, "learning_rate": 1.3122365979495259e-05, "loss": 0.2295, "step": 5263 }, { "epoch": 0.41703307585660526, "grad_norm": 1.6133616251922587, "learning_rate": 1.3119928135358238e-05, "loss": 0.2047, "step": 5264 }, { "epoch": 0.41711229946524064, "grad_norm": 1.788420824189975, "learning_rate": 1.3117490085788963e-05, "loss": 0.2654, "step": 5265 }, { "epoch": 0.417191523073876, "grad_norm": 1.9769138275297493, "learning_rate": 1.3115051830947966e-05, "loss": 0.365, "step": 5266 }, { "epoch": 0.4172707466825114, "grad_norm": 1.7116061003029828, "learning_rate": 1.3112613370995792e-05, "loss": 0.2633, "step": 5267 }, { "epoch": 0.4173499702911468, "grad_norm": 1.8468645056452868, "learning_rate": 1.3110174706093007e-05, "loss": 0.3064, "step": 5268 }, { "epoch": 0.4174291938997821, "grad_norm": 2.0785124181840455, "learning_rate": 1.3107735836400184e-05, "loss": 0.2956, "step": 5269 }, { "epoch": 0.4175084175084175, "grad_norm": 1.9209309419494196, "learning_rate": 1.3105296762077906e-05, "loss": 0.2608, "step": 5270 }, { "epoch": 0.4175876411170529, "grad_norm": 1.5873708085738265, "learning_rate": 1.3102857483286781e-05, "loss": 0.1823, "step": 5271 }, { "epoch": 0.41766686472568826, "grad_norm": 2.023338451498811, "learning_rate": 1.310041800018742e-05, "loss": 0.3227, "step": 5272 }, { "epoch": 0.41774608833432364, "grad_norm": 1.7275235130921136, "learning_rate": 1.3097978312940453e-05, "loss": 0.2771, "step": 5273 }, { "epoch": 0.417825311942959, "grad_norm": 1.5499425958351882, "learning_rate": 1.309553842170652e-05, "loss": 0.2284, "step": 5274 }, { "epoch": 0.4179045355515944, "grad_norm": 1.7397350796249147, "learning_rate": 1.3093098326646277e-05, "loss": 0.1957, "step": 5275 }, { "epoch": 0.41798375916022973, "grad_norm": 1.4392425689293202, "learning_rate": 1.3090658027920391e-05, "loss": 0.1714, "step": 5276 }, { "epoch": 0.4180629827688651, "grad_norm": 1.9745192965774891, "learning_rate": 1.3088217525689546e-05, "loss": 0.3665, "step": 5277 }, { "epoch": 0.4181422063775005, "grad_norm": 1.701401164972941, "learning_rate": 1.3085776820114435e-05, "loss": 0.2309, "step": 5278 }, { "epoch": 0.4182214299861359, "grad_norm": 1.8861890401580752, "learning_rate": 1.3083335911355768e-05, "loss": 0.3023, "step": 5279 }, { "epoch": 0.41830065359477125, "grad_norm": 1.639537165885871, "learning_rate": 1.3080894799574271e-05, "loss": 0.2693, "step": 5280 }, { "epoch": 0.41837987720340664, "grad_norm": 1.7811459961214888, "learning_rate": 1.3078453484930674e-05, "loss": 0.2919, "step": 5281 }, { "epoch": 0.41845910081204196, "grad_norm": 1.6712278031507863, "learning_rate": 1.3076011967585727e-05, "loss": 0.2327, "step": 5282 }, { "epoch": 0.41853832442067734, "grad_norm": 1.7417391340591746, "learning_rate": 1.3073570247700192e-05, "loss": 0.3444, "step": 5283 }, { "epoch": 0.4186175480293127, "grad_norm": 1.931846836836233, "learning_rate": 1.3071128325434845e-05, "loss": 0.3352, "step": 5284 }, { "epoch": 0.4186967716379481, "grad_norm": 1.4572105965174231, "learning_rate": 1.3068686200950475e-05, "loss": 0.1631, "step": 5285 }, { "epoch": 0.4187759952465835, "grad_norm": 1.3912395463488743, "learning_rate": 1.3066243874407886e-05, "loss": 0.2103, "step": 5286 }, { "epoch": 0.41885521885521887, "grad_norm": 1.5245415605188581, "learning_rate": 1.306380134596789e-05, "loss": 0.1921, "step": 5287 }, { "epoch": 0.41893444246385425, "grad_norm": 1.7795315791154995, "learning_rate": 1.306135861579132e-05, "loss": 0.2995, "step": 5288 }, { "epoch": 0.4190136660724896, "grad_norm": 1.534087796584696, "learning_rate": 1.3058915684039013e-05, "loss": 0.2132, "step": 5289 }, { "epoch": 0.41909288968112496, "grad_norm": 1.5299809687136736, "learning_rate": 1.3056472550871829e-05, "loss": 0.193, "step": 5290 }, { "epoch": 0.41917211328976034, "grad_norm": 1.65777487310565, "learning_rate": 1.3054029216450632e-05, "loss": 0.3148, "step": 5291 }, { "epoch": 0.4192513368983957, "grad_norm": 2.090520647761956, "learning_rate": 1.3051585680936305e-05, "loss": 0.3395, "step": 5292 }, { "epoch": 0.4193305605070311, "grad_norm": 1.743699918943753, "learning_rate": 1.304914194448975e-05, "loss": 0.1986, "step": 5293 }, { "epoch": 0.4194097841156665, "grad_norm": 1.9008619441110146, "learning_rate": 1.3046698007271864e-05, "loss": 0.2787, "step": 5294 }, { "epoch": 0.41948900772430187, "grad_norm": 1.5416233467110712, "learning_rate": 1.3044253869443575e-05, "loss": 0.2747, "step": 5295 }, { "epoch": 0.4195682313329372, "grad_norm": 1.4642403972621758, "learning_rate": 1.3041809531165819e-05, "loss": 0.1994, "step": 5296 }, { "epoch": 0.4196474549415726, "grad_norm": 2.320887547839888, "learning_rate": 1.3039364992599538e-05, "loss": 0.3794, "step": 5297 }, { "epoch": 0.41972667855020795, "grad_norm": 1.862682209757833, "learning_rate": 1.30369202539057e-05, "loss": 0.2795, "step": 5298 }, { "epoch": 0.41980590215884334, "grad_norm": 1.4791216553717002, "learning_rate": 1.3034475315245273e-05, "loss": 0.2739, "step": 5299 }, { "epoch": 0.4198851257674787, "grad_norm": 2.349124545979662, "learning_rate": 1.303203017677925e-05, "loss": 0.2666, "step": 5300 }, { "epoch": 0.4199643493761141, "grad_norm": 1.8222694978081466, "learning_rate": 1.302958483866863e-05, "loss": 0.3268, "step": 5301 }, { "epoch": 0.4200435729847495, "grad_norm": 1.5554712641977537, "learning_rate": 1.3027139301074423e-05, "loss": 0.2758, "step": 5302 }, { "epoch": 0.4201227965933848, "grad_norm": 1.5951832067539373, "learning_rate": 1.3024693564157658e-05, "loss": 0.2158, "step": 5303 }, { "epoch": 0.4202020202020202, "grad_norm": 2.3035560622154607, "learning_rate": 1.3022247628079381e-05, "loss": 0.3497, "step": 5304 }, { "epoch": 0.42028124381065557, "grad_norm": 2.016302820851935, "learning_rate": 1.3019801493000634e-05, "loss": 0.2421, "step": 5305 }, { "epoch": 0.42036046741929095, "grad_norm": 1.625619973075902, "learning_rate": 1.3017355159082495e-05, "loss": 0.1999, "step": 5306 }, { "epoch": 0.42043969102792633, "grad_norm": 1.3797597210355135, "learning_rate": 1.3014908626486032e-05, "loss": 0.2551, "step": 5307 }, { "epoch": 0.4205189146365617, "grad_norm": 1.4962566611498553, "learning_rate": 1.3012461895372343e-05, "loss": 0.2045, "step": 5308 }, { "epoch": 0.4205981382451971, "grad_norm": 1.7202227734224333, "learning_rate": 1.3010014965902535e-05, "loss": 0.2211, "step": 5309 }, { "epoch": 0.4206773618538324, "grad_norm": 1.995332339798482, "learning_rate": 1.3007567838237725e-05, "loss": 0.2348, "step": 5310 }, { "epoch": 0.4207565854624678, "grad_norm": 1.818271065911864, "learning_rate": 1.3005120512539042e-05, "loss": 0.286, "step": 5311 }, { "epoch": 0.4208358090711032, "grad_norm": 1.7061833414089553, "learning_rate": 1.300267298896764e-05, "loss": 0.2617, "step": 5312 }, { "epoch": 0.42091503267973857, "grad_norm": 2.0295291395244774, "learning_rate": 1.3000225267684663e-05, "loss": 0.2941, "step": 5313 }, { "epoch": 0.42099425628837395, "grad_norm": 1.8220830654245137, "learning_rate": 1.2997777348851288e-05, "loss": 0.2978, "step": 5314 }, { "epoch": 0.42107347989700933, "grad_norm": 1.9948853356414633, "learning_rate": 1.2995329232628702e-05, "loss": 0.2662, "step": 5315 }, { "epoch": 0.42115270350564465, "grad_norm": 1.5622268042548941, "learning_rate": 1.2992880919178097e-05, "loss": 0.2519, "step": 5316 }, { "epoch": 0.42123192711428004, "grad_norm": 1.7482158606983982, "learning_rate": 1.2990432408660682e-05, "loss": 0.1897, "step": 5317 }, { "epoch": 0.4213111507229154, "grad_norm": 1.5901910918658888, "learning_rate": 1.2987983701237688e-05, "loss": 0.2713, "step": 5318 }, { "epoch": 0.4213903743315508, "grad_norm": 2.4458224658396515, "learning_rate": 1.298553479707034e-05, "loss": 0.3555, "step": 5319 }, { "epoch": 0.4214695979401862, "grad_norm": 1.6720320572727707, "learning_rate": 1.2983085696319892e-05, "loss": 0.28, "step": 5320 }, { "epoch": 0.42154882154882156, "grad_norm": 1.5459557603633542, "learning_rate": 1.2980636399147606e-05, "loss": 0.1933, "step": 5321 }, { "epoch": 0.42162804515745694, "grad_norm": 1.8197302528710626, "learning_rate": 1.2978186905714752e-05, "loss": 0.2153, "step": 5322 }, { "epoch": 0.42170726876609227, "grad_norm": 1.9509194573752373, "learning_rate": 1.2975737216182625e-05, "loss": 0.2517, "step": 5323 }, { "epoch": 0.42178649237472765, "grad_norm": 1.3747995294809678, "learning_rate": 1.2973287330712516e-05, "loss": 0.1617, "step": 5324 }, { "epoch": 0.42186571598336303, "grad_norm": 1.6873013316156398, "learning_rate": 1.2970837249465746e-05, "loss": 0.2788, "step": 5325 }, { "epoch": 0.4219449395919984, "grad_norm": 1.9206875941699928, "learning_rate": 1.2968386972603635e-05, "loss": 0.2904, "step": 5326 }, { "epoch": 0.4220241632006338, "grad_norm": 1.8821599668747608, "learning_rate": 1.2965936500287526e-05, "loss": 0.2267, "step": 5327 }, { "epoch": 0.4221033868092692, "grad_norm": 1.6882944969178815, "learning_rate": 1.2963485832678772e-05, "loss": 0.2162, "step": 5328 }, { "epoch": 0.42218261041790456, "grad_norm": 1.9782034352060445, "learning_rate": 1.2961034969938732e-05, "loss": 0.1963, "step": 5329 }, { "epoch": 0.4222618340265399, "grad_norm": 1.8705005015044316, "learning_rate": 1.2958583912228785e-05, "loss": 0.3257, "step": 5330 }, { "epoch": 0.42234105763517527, "grad_norm": 1.506027358609764, "learning_rate": 1.295613265971033e-05, "loss": 0.1935, "step": 5331 }, { "epoch": 0.42242028124381065, "grad_norm": 1.8642259177441336, "learning_rate": 1.2953681212544757e-05, "loss": 0.2818, "step": 5332 }, { "epoch": 0.42249950485244603, "grad_norm": 1.915602154327558, "learning_rate": 1.2951229570893493e-05, "loss": 0.2535, "step": 5333 }, { "epoch": 0.4225787284610814, "grad_norm": 1.9270586802060459, "learning_rate": 1.2948777734917961e-05, "loss": 0.2655, "step": 5334 }, { "epoch": 0.4226579520697168, "grad_norm": 1.8091362621772333, "learning_rate": 1.2946325704779602e-05, "loss": 0.2943, "step": 5335 }, { "epoch": 0.4227371756783522, "grad_norm": 1.9385980196787909, "learning_rate": 1.2943873480639875e-05, "loss": 0.2716, "step": 5336 }, { "epoch": 0.4228163992869875, "grad_norm": 1.6522502876088858, "learning_rate": 1.294142106266024e-05, "loss": 0.3378, "step": 5337 }, { "epoch": 0.4228956228956229, "grad_norm": 2.312137753778925, "learning_rate": 1.2938968451002183e-05, "loss": 0.3319, "step": 5338 }, { "epoch": 0.42297484650425826, "grad_norm": 1.7757960413210792, "learning_rate": 1.2936515645827198e-05, "loss": 0.359, "step": 5339 }, { "epoch": 0.42305407011289364, "grad_norm": 1.6152680203144114, "learning_rate": 1.2934062647296783e-05, "loss": 0.1985, "step": 5340 }, { "epoch": 0.423133293721529, "grad_norm": 1.7620697388118594, "learning_rate": 1.2931609455572462e-05, "loss": 0.2423, "step": 5341 }, { "epoch": 0.4232125173301644, "grad_norm": 1.8255664656678905, "learning_rate": 1.2929156070815765e-05, "loss": 0.2393, "step": 5342 }, { "epoch": 0.4232917409387998, "grad_norm": 1.9061765497731176, "learning_rate": 1.2926702493188235e-05, "loss": 0.3265, "step": 5343 }, { "epoch": 0.4233709645474351, "grad_norm": 1.7186140003029269, "learning_rate": 1.292424872285143e-05, "loss": 0.2088, "step": 5344 }, { "epoch": 0.4234501881560705, "grad_norm": 1.935200215980244, "learning_rate": 1.2921794759966913e-05, "loss": 0.2368, "step": 5345 }, { "epoch": 0.4235294117647059, "grad_norm": 1.644584974229887, "learning_rate": 1.2919340604696272e-05, "loss": 0.237, "step": 5346 }, { "epoch": 0.42360863537334126, "grad_norm": 1.707562127909152, "learning_rate": 1.29168862572011e-05, "loss": 0.3126, "step": 5347 }, { "epoch": 0.42368785898197664, "grad_norm": 1.5626803481090905, "learning_rate": 1.2914431717643e-05, "loss": 0.3265, "step": 5348 }, { "epoch": 0.423767082590612, "grad_norm": 1.8831074350709485, "learning_rate": 1.2911976986183598e-05, "loss": 0.301, "step": 5349 }, { "epoch": 0.4238463061992474, "grad_norm": 1.8146522411546355, "learning_rate": 1.2909522062984524e-05, "loss": 0.2458, "step": 5350 }, { "epoch": 0.42392552980788273, "grad_norm": 1.4448526173348721, "learning_rate": 1.290706694820742e-05, "loss": 0.2366, "step": 5351 }, { "epoch": 0.4240047534165181, "grad_norm": 1.3324539222264242, "learning_rate": 1.2904611642013945e-05, "loss": 0.1628, "step": 5352 }, { "epoch": 0.4240839770251535, "grad_norm": 1.7934542794522415, "learning_rate": 1.2902156144565769e-05, "loss": 0.2851, "step": 5353 }, { "epoch": 0.4241632006337889, "grad_norm": 1.3289472061397964, "learning_rate": 1.2899700456024576e-05, "loss": 0.2118, "step": 5354 }, { "epoch": 0.42424242424242425, "grad_norm": 1.8400555231629665, "learning_rate": 1.2897244576552062e-05, "loss": 0.3162, "step": 5355 }, { "epoch": 0.42432164785105964, "grad_norm": 1.666685062296082, "learning_rate": 1.289478850630993e-05, "loss": 0.2224, "step": 5356 }, { "epoch": 0.42440087145969496, "grad_norm": 1.5062152873832801, "learning_rate": 1.2892332245459904e-05, "loss": 0.2847, "step": 5357 }, { "epoch": 0.42448009506833034, "grad_norm": 1.473074964787706, "learning_rate": 1.288987579416372e-05, "loss": 0.1833, "step": 5358 }, { "epoch": 0.4245593186769657, "grad_norm": 1.5985653513362799, "learning_rate": 1.2887419152583117e-05, "loss": 0.2457, "step": 5359 }, { "epoch": 0.4246385422856011, "grad_norm": 2.153596681911628, "learning_rate": 1.2884962320879857e-05, "loss": 0.2969, "step": 5360 }, { "epoch": 0.4247177658942365, "grad_norm": 2.0702961719461226, "learning_rate": 1.2882505299215711e-05, "loss": 0.3443, "step": 5361 }, { "epoch": 0.42479698950287187, "grad_norm": 1.702787559197427, "learning_rate": 1.288004808775246e-05, "loss": 0.197, "step": 5362 }, { "epoch": 0.42487621311150725, "grad_norm": 1.4922726383266287, "learning_rate": 1.28775906866519e-05, "loss": 0.1376, "step": 5363 }, { "epoch": 0.4249554367201426, "grad_norm": 1.87135215453094, "learning_rate": 1.2875133096075839e-05, "loss": 0.2684, "step": 5364 }, { "epoch": 0.42503466032877796, "grad_norm": 1.8583033954717476, "learning_rate": 1.2872675316186096e-05, "loss": 0.2831, "step": 5365 }, { "epoch": 0.42511388393741334, "grad_norm": 2.1650304660574746, "learning_rate": 1.2870217347144511e-05, "loss": 0.3172, "step": 5366 }, { "epoch": 0.4251931075460487, "grad_norm": 1.6590298553305656, "learning_rate": 1.2867759189112921e-05, "loss": 0.2436, "step": 5367 }, { "epoch": 0.4252723311546841, "grad_norm": 1.7659124444747074, "learning_rate": 1.2865300842253188e-05, "loss": 0.1691, "step": 5368 }, { "epoch": 0.4253515547633195, "grad_norm": 1.7124052522714717, "learning_rate": 1.2862842306727181e-05, "loss": 0.2355, "step": 5369 }, { "epoch": 0.42543077837195487, "grad_norm": 1.880332769584433, "learning_rate": 1.2860383582696783e-05, "loss": 0.2713, "step": 5370 }, { "epoch": 0.4255100019805902, "grad_norm": 1.8693987209668768, "learning_rate": 1.2857924670323892e-05, "loss": 0.2671, "step": 5371 }, { "epoch": 0.4255892255892256, "grad_norm": 2.7753845810807927, "learning_rate": 1.2855465569770407e-05, "loss": 0.2372, "step": 5372 }, { "epoch": 0.42566844919786095, "grad_norm": 1.8058623258430284, "learning_rate": 1.2853006281198257e-05, "loss": 0.2261, "step": 5373 }, { "epoch": 0.42574767280649634, "grad_norm": 2.2840985239287512, "learning_rate": 1.2850546804769372e-05, "loss": 0.3987, "step": 5374 }, { "epoch": 0.4258268964151317, "grad_norm": 1.746917631561016, "learning_rate": 1.2848087140645695e-05, "loss": 0.3313, "step": 5375 }, { "epoch": 0.4259061200237671, "grad_norm": 1.524329647115645, "learning_rate": 1.2845627288989186e-05, "loss": 0.1749, "step": 5376 }, { "epoch": 0.4259853436324025, "grad_norm": 1.842257345403571, "learning_rate": 1.284316724996181e-05, "loss": 0.2819, "step": 5377 }, { "epoch": 0.4260645672410378, "grad_norm": 2.158195232222975, "learning_rate": 1.2840707023725552e-05, "loss": 0.2363, "step": 5378 }, { "epoch": 0.4261437908496732, "grad_norm": 1.4442132356995627, "learning_rate": 1.2838246610442406e-05, "loss": 0.2089, "step": 5379 }, { "epoch": 0.42622301445830857, "grad_norm": 1.5426906647674505, "learning_rate": 1.2835786010274376e-05, "loss": 0.2405, "step": 5380 }, { "epoch": 0.42630223806694395, "grad_norm": 1.4825124278536477, "learning_rate": 1.283332522338348e-05, "loss": 0.1868, "step": 5381 }, { "epoch": 0.42638146167557933, "grad_norm": 1.8528798207041872, "learning_rate": 1.2830864249931756e-05, "loss": 0.3421, "step": 5382 }, { "epoch": 0.4264606852842147, "grad_norm": 1.5470101560190086, "learning_rate": 1.2828403090081238e-05, "loss": 0.2653, "step": 5383 }, { "epoch": 0.4265399088928501, "grad_norm": 1.9035733284706517, "learning_rate": 1.282594174399399e-05, "loss": 0.2839, "step": 5384 }, { "epoch": 0.4266191325014854, "grad_norm": 2.0533093535192948, "learning_rate": 1.2823480211832073e-05, "loss": 0.2062, "step": 5385 }, { "epoch": 0.4266983561101208, "grad_norm": 1.582668620664453, "learning_rate": 1.2821018493757569e-05, "loss": 0.2318, "step": 5386 }, { "epoch": 0.4267775797187562, "grad_norm": 1.940917753614369, "learning_rate": 1.2818556589932575e-05, "loss": 0.3238, "step": 5387 }, { "epoch": 0.42685680332739157, "grad_norm": 1.461519927659968, "learning_rate": 1.2816094500519188e-05, "loss": 0.245, "step": 5388 }, { "epoch": 0.42693602693602695, "grad_norm": 1.9187965375954472, "learning_rate": 1.2813632225679528e-05, "loss": 0.2883, "step": 5389 }, { "epoch": 0.42701525054466233, "grad_norm": 1.5228257338008833, "learning_rate": 1.281116976557573e-05, "loss": 0.2327, "step": 5390 }, { "epoch": 0.4270944741532977, "grad_norm": 1.5815830243191027, "learning_rate": 1.2808707120369923e-05, "loss": 0.2248, "step": 5391 }, { "epoch": 0.42717369776193304, "grad_norm": 1.8103231911112754, "learning_rate": 1.280624429022427e-05, "loss": 0.1637, "step": 5392 }, { "epoch": 0.4272529213705684, "grad_norm": 2.022183976038517, "learning_rate": 1.2803781275300933e-05, "loss": 0.3333, "step": 5393 }, { "epoch": 0.4273321449792038, "grad_norm": 1.5021408611429654, "learning_rate": 1.2801318075762088e-05, "loss": 0.2548, "step": 5394 }, { "epoch": 0.4274113685878392, "grad_norm": 1.4639210729849936, "learning_rate": 1.2798854691769927e-05, "loss": 0.2182, "step": 5395 }, { "epoch": 0.42749059219647456, "grad_norm": 2.297049941147285, "learning_rate": 1.2796391123486654e-05, "loss": 0.184, "step": 5396 }, { "epoch": 0.42756981580510994, "grad_norm": 1.8712805500727123, "learning_rate": 1.2793927371074477e-05, "loss": 0.272, "step": 5397 }, { "epoch": 0.42764903941374527, "grad_norm": 1.5633081657549015, "learning_rate": 1.279146343469563e-05, "loss": 0.2169, "step": 5398 }, { "epoch": 0.42772826302238065, "grad_norm": 1.5156355401866126, "learning_rate": 1.2788999314512347e-05, "loss": 0.148, "step": 5399 }, { "epoch": 0.42780748663101603, "grad_norm": 1.5359488123733942, "learning_rate": 1.2786535010686879e-05, "loss": 0.1755, "step": 5400 }, { "epoch": 0.4278867102396514, "grad_norm": 1.4993321739975185, "learning_rate": 1.2784070523381487e-05, "loss": 0.1866, "step": 5401 }, { "epoch": 0.4279659338482868, "grad_norm": 1.731990208789258, "learning_rate": 1.2781605852758448e-05, "loss": 0.1538, "step": 5402 }, { "epoch": 0.4280451574569222, "grad_norm": 2.032828969180398, "learning_rate": 1.2779140998980048e-05, "loss": 0.2935, "step": 5403 }, { "epoch": 0.42812438106555756, "grad_norm": 1.826500074746919, "learning_rate": 1.2776675962208585e-05, "loss": 0.2619, "step": 5404 }, { "epoch": 0.4282036046741929, "grad_norm": 1.7775231965261296, "learning_rate": 1.2774210742606368e-05, "loss": 0.2486, "step": 5405 }, { "epoch": 0.42828282828282827, "grad_norm": 1.7240917875847057, "learning_rate": 1.2771745340335726e-05, "loss": 0.3003, "step": 5406 }, { "epoch": 0.42836205189146365, "grad_norm": 1.4335769548113746, "learning_rate": 1.276927975555899e-05, "loss": 0.2083, "step": 5407 }, { "epoch": 0.42844127550009903, "grad_norm": 2.0101894665021036, "learning_rate": 1.2766813988438505e-05, "loss": 0.2552, "step": 5408 }, { "epoch": 0.4285204991087344, "grad_norm": 1.8716626060809927, "learning_rate": 1.2764348039136634e-05, "loss": 0.2651, "step": 5409 }, { "epoch": 0.4285997227173698, "grad_norm": 2.055183409829232, "learning_rate": 1.2761881907815744e-05, "loss": 0.3857, "step": 5410 }, { "epoch": 0.4286789463260052, "grad_norm": 1.8964593392670914, "learning_rate": 1.275941559463822e-05, "loss": 0.2867, "step": 5411 }, { "epoch": 0.4287581699346405, "grad_norm": 1.6525673120776647, "learning_rate": 1.2756949099766458e-05, "loss": 0.2624, "step": 5412 }, { "epoch": 0.4288373935432759, "grad_norm": 1.4822272390514033, "learning_rate": 1.2754482423362861e-05, "loss": 0.2376, "step": 5413 }, { "epoch": 0.42891661715191126, "grad_norm": 1.8093507181095638, "learning_rate": 1.2752015565589852e-05, "loss": 0.2831, "step": 5414 }, { "epoch": 0.42899584076054664, "grad_norm": 1.6626516576781947, "learning_rate": 1.2749548526609858e-05, "loss": 0.2935, "step": 5415 }, { "epoch": 0.429075064369182, "grad_norm": 1.690352518528334, "learning_rate": 1.2747081306585325e-05, "loss": 0.2507, "step": 5416 }, { "epoch": 0.4291542879778174, "grad_norm": 1.6597317607978157, "learning_rate": 1.2744613905678707e-05, "loss": 0.2959, "step": 5417 }, { "epoch": 0.4292335115864528, "grad_norm": 1.5780426260009828, "learning_rate": 1.2742146324052466e-05, "loss": 0.1623, "step": 5418 }, { "epoch": 0.4293127351950881, "grad_norm": 1.9898694340271916, "learning_rate": 1.273967856186909e-05, "loss": 0.3279, "step": 5419 }, { "epoch": 0.4293919588037235, "grad_norm": 1.8811669881933684, "learning_rate": 1.2737210619291058e-05, "loss": 0.2585, "step": 5420 }, { "epoch": 0.4294711824123589, "grad_norm": 1.68178818467249, "learning_rate": 1.2734742496480878e-05, "loss": 0.2954, "step": 5421 }, { "epoch": 0.42955040602099426, "grad_norm": 1.93435763884173, "learning_rate": 1.2732274193601066e-05, "loss": 0.3486, "step": 5422 }, { "epoch": 0.42962962962962964, "grad_norm": 1.6292207452604424, "learning_rate": 1.2729805710814142e-05, "loss": 0.3197, "step": 5423 }, { "epoch": 0.429708853238265, "grad_norm": 1.4927795912425283, "learning_rate": 1.2727337048282649e-05, "loss": 0.2438, "step": 5424 }, { "epoch": 0.4297880768469004, "grad_norm": 1.745199533843985, "learning_rate": 1.2724868206169134e-05, "loss": 0.1919, "step": 5425 }, { "epoch": 0.42986730045553573, "grad_norm": 1.7559047035876254, "learning_rate": 1.2722399184636158e-05, "loss": 0.2468, "step": 5426 }, { "epoch": 0.4299465240641711, "grad_norm": 1.8885260071622678, "learning_rate": 1.2719929983846298e-05, "loss": 0.1916, "step": 5427 }, { "epoch": 0.4300257476728065, "grad_norm": 1.4265432023812874, "learning_rate": 1.2717460603962132e-05, "loss": 0.2754, "step": 5428 }, { "epoch": 0.4301049712814419, "grad_norm": 1.9253587278463462, "learning_rate": 1.2714991045146265e-05, "loss": 0.3184, "step": 5429 }, { "epoch": 0.43018419489007725, "grad_norm": 2.7356114423023006, "learning_rate": 1.2712521307561298e-05, "loss": 0.2386, "step": 5430 }, { "epoch": 0.43026341849871264, "grad_norm": 1.7161370398496638, "learning_rate": 1.2710051391369857e-05, "loss": 0.212, "step": 5431 }, { "epoch": 0.430342642107348, "grad_norm": 2.126274607968103, "learning_rate": 1.270758129673457e-05, "loss": 0.259, "step": 5432 }, { "epoch": 0.43042186571598334, "grad_norm": 1.9176940533748545, "learning_rate": 1.2705111023818083e-05, "loss": 0.3205, "step": 5433 }, { "epoch": 0.4305010893246187, "grad_norm": 1.7948539985560719, "learning_rate": 1.2702640572783051e-05, "loss": 0.1773, "step": 5434 }, { "epoch": 0.4305803129332541, "grad_norm": 2.3653275001351006, "learning_rate": 1.2700169943792143e-05, "loss": 0.3302, "step": 5435 }, { "epoch": 0.4306595365418895, "grad_norm": 1.7949426445358412, "learning_rate": 1.2697699137008038e-05, "loss": 0.161, "step": 5436 }, { "epoch": 0.43073876015052487, "grad_norm": 1.5846959939957417, "learning_rate": 1.2695228152593419e-05, "loss": 0.2435, "step": 5437 }, { "epoch": 0.43081798375916025, "grad_norm": 1.6607361026155776, "learning_rate": 1.2692756990710998e-05, "loss": 0.2532, "step": 5438 }, { "epoch": 0.4308972073677956, "grad_norm": 1.8561217870204176, "learning_rate": 1.269028565152349e-05, "loss": 0.2077, "step": 5439 }, { "epoch": 0.43097643097643096, "grad_norm": 1.639455907986524, "learning_rate": 1.2687814135193613e-05, "loss": 0.2168, "step": 5440 }, { "epoch": 0.43105565458506634, "grad_norm": 1.5721960135252286, "learning_rate": 1.2685342441884107e-05, "loss": 0.2295, "step": 5441 }, { "epoch": 0.4311348781937017, "grad_norm": 2.3975404497159305, "learning_rate": 1.2682870571757724e-05, "loss": 0.3942, "step": 5442 }, { "epoch": 0.4312141018023371, "grad_norm": 2.119407524807117, "learning_rate": 1.2680398524977222e-05, "loss": 0.3352, "step": 5443 }, { "epoch": 0.4312933254109725, "grad_norm": 2.2495309558705543, "learning_rate": 1.2677926301705376e-05, "loss": 0.2969, "step": 5444 }, { "epoch": 0.43137254901960786, "grad_norm": 2.0352016589185546, "learning_rate": 1.2675453902104967e-05, "loss": 0.3301, "step": 5445 }, { "epoch": 0.4314517726282432, "grad_norm": 1.919627350714918, "learning_rate": 1.2672981326338793e-05, "loss": 0.2662, "step": 5446 }, { "epoch": 0.4315309962368786, "grad_norm": 1.7635225274372561, "learning_rate": 1.267050857456966e-05, "loss": 0.3038, "step": 5447 }, { "epoch": 0.43161021984551395, "grad_norm": 2.2901971036837097, "learning_rate": 1.2668035646960384e-05, "loss": 0.2443, "step": 5448 }, { "epoch": 0.43168944345414934, "grad_norm": 1.6261062971369695, "learning_rate": 1.2665562543673803e-05, "loss": 0.2792, "step": 5449 }, { "epoch": 0.4317686670627847, "grad_norm": 1.5629127154479712, "learning_rate": 1.2663089264872751e-05, "loss": 0.2346, "step": 5450 }, { "epoch": 0.4318478906714201, "grad_norm": 1.4355018568123559, "learning_rate": 1.2660615810720087e-05, "loss": 0.2289, "step": 5451 }, { "epoch": 0.4319271142800555, "grad_norm": 1.6112527287999885, "learning_rate": 1.2658142181378675e-05, "loss": 0.2412, "step": 5452 }, { "epoch": 0.4320063378886908, "grad_norm": 1.5799455883585092, "learning_rate": 1.2655668377011387e-05, "loss": 0.2263, "step": 5453 }, { "epoch": 0.4320855614973262, "grad_norm": 1.71696534381602, "learning_rate": 1.2653194397781117e-05, "loss": 0.3079, "step": 5454 }, { "epoch": 0.43216478510596157, "grad_norm": 1.4774875816103368, "learning_rate": 1.2650720243850762e-05, "loss": 0.2892, "step": 5455 }, { "epoch": 0.43224400871459695, "grad_norm": 1.9751794764373034, "learning_rate": 1.2648245915383233e-05, "loss": 0.3497, "step": 5456 }, { "epoch": 0.43232323232323233, "grad_norm": 1.9365298520538066, "learning_rate": 1.2645771412541455e-05, "loss": 0.306, "step": 5457 }, { "epoch": 0.4324024559318677, "grad_norm": 1.688136530431482, "learning_rate": 1.2643296735488355e-05, "loss": 0.2445, "step": 5458 }, { "epoch": 0.4324816795405031, "grad_norm": 1.7619735651018633, "learning_rate": 1.2640821884386887e-05, "loss": 0.3825, "step": 5459 }, { "epoch": 0.4325609031491384, "grad_norm": 2.0817750274592832, "learning_rate": 1.2638346859400006e-05, "loss": 0.2964, "step": 5460 }, { "epoch": 0.4326401267577738, "grad_norm": 1.7423077822766184, "learning_rate": 1.2635871660690677e-05, "loss": 0.2889, "step": 5461 }, { "epoch": 0.4327193503664092, "grad_norm": 1.8896748711458533, "learning_rate": 1.2633396288421884e-05, "loss": 0.3743, "step": 5462 }, { "epoch": 0.43279857397504456, "grad_norm": 1.9597792123374402, "learning_rate": 1.2630920742756616e-05, "loss": 0.2499, "step": 5463 }, { "epoch": 0.43287779758367995, "grad_norm": 1.7990870222222786, "learning_rate": 1.2628445023857875e-05, "loss": 0.3014, "step": 5464 }, { "epoch": 0.43295702119231533, "grad_norm": 1.572313647538007, "learning_rate": 1.2625969131888677e-05, "loss": 0.2474, "step": 5465 }, { "epoch": 0.4330362448009507, "grad_norm": 1.7151617500919933, "learning_rate": 1.2623493067012047e-05, "loss": 0.2932, "step": 5466 }, { "epoch": 0.43311546840958604, "grad_norm": 1.167642517070261, "learning_rate": 1.2621016829391022e-05, "loss": 0.1457, "step": 5467 }, { "epoch": 0.4331946920182214, "grad_norm": 1.2933591746811266, "learning_rate": 1.2618540419188654e-05, "loss": 0.2202, "step": 5468 }, { "epoch": 0.4332739156268568, "grad_norm": 2.0055816596212512, "learning_rate": 1.2616063836567994e-05, "loss": 0.2337, "step": 5469 }, { "epoch": 0.4333531392354922, "grad_norm": 1.5751077740266293, "learning_rate": 1.2613587081692118e-05, "loss": 0.2615, "step": 5470 }, { "epoch": 0.43343236284412756, "grad_norm": 2.2820851958304584, "learning_rate": 1.2611110154724113e-05, "loss": 0.4322, "step": 5471 }, { "epoch": 0.43351158645276294, "grad_norm": 1.8239004930029694, "learning_rate": 1.2608633055827064e-05, "loss": 0.2882, "step": 5472 }, { "epoch": 0.4335908100613983, "grad_norm": 1.5706907778246673, "learning_rate": 1.260615578516408e-05, "loss": 0.194, "step": 5473 }, { "epoch": 0.43367003367003365, "grad_norm": 1.8305559062055323, "learning_rate": 1.260367834289828e-05, "loss": 0.2851, "step": 5474 }, { "epoch": 0.43374925727866903, "grad_norm": 1.7282616822156323, "learning_rate": 1.2601200729192789e-05, "loss": 0.3158, "step": 5475 }, { "epoch": 0.4338284808873044, "grad_norm": 1.329400155830024, "learning_rate": 1.2598722944210746e-05, "loss": 0.2117, "step": 5476 }, { "epoch": 0.4339077044959398, "grad_norm": 2.166495569146077, "learning_rate": 1.25962449881153e-05, "loss": 0.365, "step": 5477 }, { "epoch": 0.4339869281045752, "grad_norm": 1.653593835744873, "learning_rate": 1.2593766861069615e-05, "loss": 0.251, "step": 5478 }, { "epoch": 0.43406615171321056, "grad_norm": 1.812698542999159, "learning_rate": 1.2591288563236864e-05, "loss": 0.2841, "step": 5479 }, { "epoch": 0.4341453753218459, "grad_norm": 1.956474221697521, "learning_rate": 1.2588810094780227e-05, "loss": 0.3164, "step": 5480 }, { "epoch": 0.43422459893048126, "grad_norm": 2.2571850808896183, "learning_rate": 1.2586331455862902e-05, "loss": 0.2218, "step": 5481 }, { "epoch": 0.43430382253911665, "grad_norm": 1.6317531260474232, "learning_rate": 1.2583852646648097e-05, "loss": 0.2784, "step": 5482 }, { "epoch": 0.434383046147752, "grad_norm": 2.3992499432315633, "learning_rate": 1.2581373667299026e-05, "loss": 0.2842, "step": 5483 }, { "epoch": 0.4344622697563874, "grad_norm": 1.8156317465830156, "learning_rate": 1.257889451797892e-05, "loss": 0.2902, "step": 5484 }, { "epoch": 0.4345414933650228, "grad_norm": 1.867246536200824, "learning_rate": 1.257641519885102e-05, "loss": 0.2513, "step": 5485 }, { "epoch": 0.43462071697365817, "grad_norm": 4.821655929546751, "learning_rate": 1.2573935710078576e-05, "loss": 0.2758, "step": 5486 }, { "epoch": 0.4346999405822935, "grad_norm": 1.900495069813588, "learning_rate": 1.2571456051824851e-05, "loss": 0.27, "step": 5487 }, { "epoch": 0.4347791641909289, "grad_norm": 1.7776921465600903, "learning_rate": 1.2568976224253115e-05, "loss": 0.1825, "step": 5488 }, { "epoch": 0.43485838779956426, "grad_norm": 1.9167540797299247, "learning_rate": 1.256649622752666e-05, "loss": 0.3305, "step": 5489 }, { "epoch": 0.43493761140819964, "grad_norm": 1.443967542835639, "learning_rate": 1.2564016061808774e-05, "loss": 0.1745, "step": 5490 }, { "epoch": 0.435016835016835, "grad_norm": 1.8918603333841295, "learning_rate": 1.2561535727262769e-05, "loss": 0.2952, "step": 5491 }, { "epoch": 0.4350960586254704, "grad_norm": 1.7909017470976067, "learning_rate": 1.2559055224051963e-05, "loss": 0.2702, "step": 5492 }, { "epoch": 0.4351752822341058, "grad_norm": 1.7642961151086562, "learning_rate": 1.2556574552339682e-05, "loss": 0.2888, "step": 5493 }, { "epoch": 0.4352545058427411, "grad_norm": 2.232147306807697, "learning_rate": 1.2554093712289267e-05, "loss": 0.4624, "step": 5494 }, { "epoch": 0.4353337294513765, "grad_norm": 1.994304661816742, "learning_rate": 1.2551612704064074e-05, "loss": 0.2933, "step": 5495 }, { "epoch": 0.4354129530600119, "grad_norm": 1.863028396711072, "learning_rate": 1.2549131527827458e-05, "loss": 0.341, "step": 5496 }, { "epoch": 0.43549217666864726, "grad_norm": 1.9004633400564483, "learning_rate": 1.2546650183742801e-05, "loss": 0.2501, "step": 5497 }, { "epoch": 0.43557140027728264, "grad_norm": 1.7787182114905022, "learning_rate": 1.254416867197348e-05, "loss": 0.2681, "step": 5498 }, { "epoch": 0.435650623885918, "grad_norm": 2.1338241241751894, "learning_rate": 1.2541686992682896e-05, "loss": 0.351, "step": 5499 }, { "epoch": 0.4357298474945534, "grad_norm": 2.1444470836773273, "learning_rate": 1.2539205146034452e-05, "loss": 0.3059, "step": 5500 }, { "epoch": 0.4358090711031887, "grad_norm": 1.804899149245901, "learning_rate": 1.2536723132191566e-05, "loss": 0.2822, "step": 5501 }, { "epoch": 0.4358882947118241, "grad_norm": 1.6909439641368984, "learning_rate": 1.2534240951317669e-05, "loss": 0.3134, "step": 5502 }, { "epoch": 0.4359675183204595, "grad_norm": 1.6813275567337902, "learning_rate": 1.25317586035762e-05, "loss": 0.2804, "step": 5503 }, { "epoch": 0.43604674192909487, "grad_norm": 1.9269916240950935, "learning_rate": 1.2529276089130607e-05, "loss": 0.2858, "step": 5504 }, { "epoch": 0.43612596553773025, "grad_norm": 1.4661097092602278, "learning_rate": 1.2526793408144355e-05, "loss": 0.2363, "step": 5505 }, { "epoch": 0.43620518914636564, "grad_norm": 2.2589710537496006, "learning_rate": 1.2524310560780914e-05, "loss": 0.3209, "step": 5506 }, { "epoch": 0.436284412755001, "grad_norm": 1.7050742570119874, "learning_rate": 1.2521827547203773e-05, "loss": 0.2347, "step": 5507 }, { "epoch": 0.43636363636363634, "grad_norm": 1.4272216551852428, "learning_rate": 1.2519344367576418e-05, "loss": 0.1991, "step": 5508 }, { "epoch": 0.4364428599722717, "grad_norm": 1.7641149987040359, "learning_rate": 1.2516861022062361e-05, "loss": 0.2518, "step": 5509 }, { "epoch": 0.4365220835809071, "grad_norm": 1.7226810933862249, "learning_rate": 1.2514377510825113e-05, "loss": 0.245, "step": 5510 }, { "epoch": 0.4366013071895425, "grad_norm": 1.817967480069139, "learning_rate": 1.2511893834028209e-05, "loss": 0.3702, "step": 5511 }, { "epoch": 0.43668053079817787, "grad_norm": 1.468023453662494, "learning_rate": 1.2509409991835178e-05, "loss": 0.2824, "step": 5512 }, { "epoch": 0.43675975440681325, "grad_norm": 1.8088047701744943, "learning_rate": 1.2506925984409574e-05, "loss": 0.2614, "step": 5513 }, { "epoch": 0.43683897801544863, "grad_norm": 1.9148143300817841, "learning_rate": 1.250444181191496e-05, "loss": 0.2179, "step": 5514 }, { "epoch": 0.43691820162408396, "grad_norm": 1.4062051180754462, "learning_rate": 1.2501957474514898e-05, "loss": 0.2315, "step": 5515 }, { "epoch": 0.43699742523271934, "grad_norm": 1.5843020885835293, "learning_rate": 1.249947297237298e-05, "loss": 0.2017, "step": 5516 }, { "epoch": 0.4370766488413547, "grad_norm": 1.4593567429079943, "learning_rate": 1.249698830565279e-05, "loss": 0.3039, "step": 5517 }, { "epoch": 0.4371558724499901, "grad_norm": 1.6725745706863753, "learning_rate": 1.2494503474517935e-05, "loss": 0.1894, "step": 5518 }, { "epoch": 0.4372350960586255, "grad_norm": 1.5462750484566714, "learning_rate": 1.2492018479132033e-05, "loss": 0.2277, "step": 5519 }, { "epoch": 0.43731431966726086, "grad_norm": 2.1372395529647394, "learning_rate": 1.2489533319658703e-05, "loss": 0.2408, "step": 5520 }, { "epoch": 0.4373935432758962, "grad_norm": 2.101696212519979, "learning_rate": 1.2487047996261578e-05, "loss": 0.3196, "step": 5521 }, { "epoch": 0.43747276688453157, "grad_norm": 2.085075491133594, "learning_rate": 1.2484562509104316e-05, "loss": 0.3495, "step": 5522 }, { "epoch": 0.43755199049316695, "grad_norm": 1.596447449650139, "learning_rate": 1.2482076858350564e-05, "loss": 0.2183, "step": 5523 }, { "epoch": 0.43763121410180233, "grad_norm": 2.0405537203790436, "learning_rate": 1.2479591044163997e-05, "loss": 0.3685, "step": 5524 }, { "epoch": 0.4377104377104377, "grad_norm": 1.6272053409271723, "learning_rate": 1.2477105066708286e-05, "loss": 0.2153, "step": 5525 }, { "epoch": 0.4377896613190731, "grad_norm": 2.058652815114697, "learning_rate": 1.2474618926147129e-05, "loss": 0.2604, "step": 5526 }, { "epoch": 0.4378688849277085, "grad_norm": 1.4614896895086462, "learning_rate": 1.2472132622644222e-05, "loss": 0.2066, "step": 5527 }, { "epoch": 0.4379481085363438, "grad_norm": 2.300909346858426, "learning_rate": 1.2469646156363276e-05, "loss": 0.3558, "step": 5528 }, { "epoch": 0.4380273321449792, "grad_norm": 2.2419840792969365, "learning_rate": 1.2467159527468014e-05, "loss": 0.3688, "step": 5529 }, { "epoch": 0.43810655575361457, "grad_norm": 2.0755338587433956, "learning_rate": 1.246467273612217e-05, "loss": 0.3243, "step": 5530 }, { "epoch": 0.43818577936224995, "grad_norm": 1.8103273346738842, "learning_rate": 1.2462185782489484e-05, "loss": 0.2803, "step": 5531 }, { "epoch": 0.43826500297088533, "grad_norm": 1.917407555372304, "learning_rate": 1.2459698666733712e-05, "loss": 0.3254, "step": 5532 }, { "epoch": 0.4383442265795207, "grad_norm": 1.5443887552858382, "learning_rate": 1.2457211389018619e-05, "loss": 0.2175, "step": 5533 }, { "epoch": 0.4384234501881561, "grad_norm": 1.7551780932551353, "learning_rate": 1.2454723949507978e-05, "loss": 0.2349, "step": 5534 }, { "epoch": 0.4385026737967914, "grad_norm": 1.7325451357549508, "learning_rate": 1.2452236348365579e-05, "loss": 0.2432, "step": 5535 }, { "epoch": 0.4385818974054268, "grad_norm": 1.5421334749096918, "learning_rate": 1.244974858575521e-05, "loss": 0.2165, "step": 5536 }, { "epoch": 0.4386611210140622, "grad_norm": 1.7874818317400485, "learning_rate": 1.2447260661840688e-05, "loss": 0.2635, "step": 5537 }, { "epoch": 0.43874034462269756, "grad_norm": 1.7830562624803026, "learning_rate": 1.2444772576785828e-05, "loss": 0.2868, "step": 5538 }, { "epoch": 0.43881956823133295, "grad_norm": 1.7947636442883355, "learning_rate": 1.2442284330754456e-05, "loss": 0.2936, "step": 5539 }, { "epoch": 0.4388987918399683, "grad_norm": 1.6460127837170502, "learning_rate": 1.2439795923910413e-05, "loss": 0.2217, "step": 5540 }, { "epoch": 0.4389780154486037, "grad_norm": 1.8546204948439255, "learning_rate": 1.2437307356417547e-05, "loss": 0.304, "step": 5541 }, { "epoch": 0.43905723905723903, "grad_norm": 1.6205382497888166, "learning_rate": 1.2434818628439718e-05, "loss": 0.2257, "step": 5542 }, { "epoch": 0.4391364626658744, "grad_norm": 1.9151888255326355, "learning_rate": 1.24323297401408e-05, "loss": 0.2797, "step": 5543 }, { "epoch": 0.4392156862745098, "grad_norm": 2.0800274853519447, "learning_rate": 1.2429840691684672e-05, "loss": 0.3313, "step": 5544 }, { "epoch": 0.4392949098831452, "grad_norm": 1.587651224950868, "learning_rate": 1.2427351483235224e-05, "loss": 0.2514, "step": 5545 }, { "epoch": 0.43937413349178056, "grad_norm": 2.575196396627111, "learning_rate": 1.2424862114956367e-05, "loss": 0.3126, "step": 5546 }, { "epoch": 0.43945335710041594, "grad_norm": 2.1599715381853, "learning_rate": 1.2422372587012001e-05, "loss": 0.3151, "step": 5547 }, { "epoch": 0.4395325807090513, "grad_norm": 1.8588654981225188, "learning_rate": 1.2419882899566056e-05, "loss": 0.2655, "step": 5548 }, { "epoch": 0.43961180431768665, "grad_norm": 1.7703227924498588, "learning_rate": 1.241739305278247e-05, "loss": 0.3364, "step": 5549 }, { "epoch": 0.43969102792632203, "grad_norm": 1.9765304445076906, "learning_rate": 1.2414903046825178e-05, "loss": 0.2676, "step": 5550 }, { "epoch": 0.4397702515349574, "grad_norm": 1.61452327389998, "learning_rate": 1.2412412881858142e-05, "loss": 0.236, "step": 5551 }, { "epoch": 0.4398494751435928, "grad_norm": 1.7669374352760172, "learning_rate": 1.240992255804533e-05, "loss": 0.1895, "step": 5552 }, { "epoch": 0.4399286987522282, "grad_norm": 2.105520387711574, "learning_rate": 1.2407432075550707e-05, "loss": 0.2739, "step": 5553 }, { "epoch": 0.44000792236086356, "grad_norm": 1.2661266224547834, "learning_rate": 1.2404941434538269e-05, "loss": 0.1389, "step": 5554 }, { "epoch": 0.4400871459694989, "grad_norm": 1.9841313451169875, "learning_rate": 1.2402450635172008e-05, "loss": 0.3841, "step": 5555 }, { "epoch": 0.44016636957813426, "grad_norm": 1.34577365985844, "learning_rate": 1.2399959677615932e-05, "loss": 0.1794, "step": 5556 }, { "epoch": 0.44024559318676965, "grad_norm": 1.7286740148924424, "learning_rate": 1.239746856203406e-05, "loss": 0.2663, "step": 5557 }, { "epoch": 0.440324816795405, "grad_norm": 2.067470759268853, "learning_rate": 1.239497728859042e-05, "loss": 0.3059, "step": 5558 }, { "epoch": 0.4404040404040404, "grad_norm": 1.5996789879842293, "learning_rate": 1.2392485857449048e-05, "loss": 0.2873, "step": 5559 }, { "epoch": 0.4404832640126758, "grad_norm": 1.925739671400025, "learning_rate": 1.2389994268773995e-05, "loss": 0.2891, "step": 5560 }, { "epoch": 0.44056248762131117, "grad_norm": 1.8705449086688861, "learning_rate": 1.238750252272932e-05, "loss": 0.2841, "step": 5561 }, { "epoch": 0.4406417112299465, "grad_norm": 1.7993176556123558, "learning_rate": 1.2385010619479093e-05, "loss": 0.2858, "step": 5562 }, { "epoch": 0.4407209348385819, "grad_norm": 1.6503914610901576, "learning_rate": 1.2382518559187389e-05, "loss": 0.2867, "step": 5563 }, { "epoch": 0.44080015844721726, "grad_norm": 1.562713994438325, "learning_rate": 1.23800263420183e-05, "loss": 0.2182, "step": 5564 }, { "epoch": 0.44087938205585264, "grad_norm": 1.5569650355680327, "learning_rate": 1.2377533968135934e-05, "loss": 0.2551, "step": 5565 }, { "epoch": 0.440958605664488, "grad_norm": 1.530833590028577, "learning_rate": 1.2375041437704394e-05, "loss": 0.1657, "step": 5566 }, { "epoch": 0.4410378292731234, "grad_norm": 1.4333135753332196, "learning_rate": 1.2372548750887805e-05, "loss": 0.2477, "step": 5567 }, { "epoch": 0.4411170528817588, "grad_norm": 1.6057533183158326, "learning_rate": 1.2370055907850293e-05, "loss": 0.2813, "step": 5568 }, { "epoch": 0.4411962764903941, "grad_norm": 1.8208680548108085, "learning_rate": 1.2367562908756005e-05, "loss": 0.2195, "step": 5569 }, { "epoch": 0.4412755000990295, "grad_norm": 1.4857069970802577, "learning_rate": 1.2365069753769092e-05, "loss": 0.1942, "step": 5570 }, { "epoch": 0.4413547237076649, "grad_norm": 1.738289976258963, "learning_rate": 1.2362576443053716e-05, "loss": 0.2732, "step": 5571 }, { "epoch": 0.44143394731630026, "grad_norm": 1.8382833584529485, "learning_rate": 1.2360082976774049e-05, "loss": 0.3294, "step": 5572 }, { "epoch": 0.44151317092493564, "grad_norm": 1.269740545350416, "learning_rate": 1.2357589355094275e-05, "loss": 0.149, "step": 5573 }, { "epoch": 0.441592394533571, "grad_norm": 1.4254127836215902, "learning_rate": 1.2355095578178582e-05, "loss": 0.2864, "step": 5574 }, { "epoch": 0.4416716181422064, "grad_norm": 1.550504855024614, "learning_rate": 1.2352601646191182e-05, "loss": 0.2744, "step": 5575 }, { "epoch": 0.4417508417508417, "grad_norm": 1.8465370653031214, "learning_rate": 1.235010755929628e-05, "loss": 0.2366, "step": 5576 }, { "epoch": 0.4418300653594771, "grad_norm": 2.52399882626482, "learning_rate": 1.2347613317658105e-05, "loss": 0.2743, "step": 5577 }, { "epoch": 0.4419092889681125, "grad_norm": 1.9979630358777374, "learning_rate": 1.234511892144089e-05, "loss": 0.3342, "step": 5578 }, { "epoch": 0.44198851257674787, "grad_norm": 1.8292849580158084, "learning_rate": 1.2342624370808876e-05, "loss": 0.3391, "step": 5579 }, { "epoch": 0.44206773618538325, "grad_norm": 1.8840703845049853, "learning_rate": 1.2340129665926319e-05, "loss": 0.2061, "step": 5580 }, { "epoch": 0.44214695979401863, "grad_norm": 1.4131557429062231, "learning_rate": 1.2337634806957486e-05, "loss": 0.199, "step": 5581 }, { "epoch": 0.442226183402654, "grad_norm": 1.4812768691978588, "learning_rate": 1.2335139794066645e-05, "loss": 0.2136, "step": 5582 }, { "epoch": 0.44230540701128934, "grad_norm": 1.3928135927593728, "learning_rate": 1.2332644627418088e-05, "loss": 0.2, "step": 5583 }, { "epoch": 0.4423846306199247, "grad_norm": 1.3945721608753796, "learning_rate": 1.2330149307176105e-05, "loss": 0.2361, "step": 5584 }, { "epoch": 0.4424638542285601, "grad_norm": 1.7030500572131864, "learning_rate": 1.2327653833505005e-05, "loss": 0.2669, "step": 5585 }, { "epoch": 0.4425430778371955, "grad_norm": 1.8683991648700715, "learning_rate": 1.2325158206569095e-05, "loss": 0.3727, "step": 5586 }, { "epoch": 0.44262230144583087, "grad_norm": 1.803765743201922, "learning_rate": 1.232266242653271e-05, "loss": 0.2772, "step": 5587 }, { "epoch": 0.44270152505446625, "grad_norm": 1.7128991807011351, "learning_rate": 1.2320166493560176e-05, "loss": 0.2883, "step": 5588 }, { "epoch": 0.44278074866310163, "grad_norm": 2.1271880829026184, "learning_rate": 1.2317670407815844e-05, "loss": 0.3474, "step": 5589 }, { "epoch": 0.44285997227173696, "grad_norm": 2.0133423031209925, "learning_rate": 1.2315174169464068e-05, "loss": 0.2137, "step": 5590 }, { "epoch": 0.44293919588037234, "grad_norm": 1.7251046875037204, "learning_rate": 1.2312677778669211e-05, "loss": 0.2447, "step": 5591 }, { "epoch": 0.4430184194890077, "grad_norm": 1.771664297739277, "learning_rate": 1.2310181235595652e-05, "loss": 0.2438, "step": 5592 }, { "epoch": 0.4430976430976431, "grad_norm": 1.5754437258239973, "learning_rate": 1.2307684540407775e-05, "loss": 0.2102, "step": 5593 }, { "epoch": 0.4431768667062785, "grad_norm": 1.6464045481880836, "learning_rate": 1.230518769326997e-05, "loss": 0.217, "step": 5594 }, { "epoch": 0.44325609031491386, "grad_norm": 1.307725877865682, "learning_rate": 1.2302690694346654e-05, "loss": 0.136, "step": 5595 }, { "epoch": 0.4433353139235492, "grad_norm": 1.6689501118796468, "learning_rate": 1.230019354380223e-05, "loss": 0.2133, "step": 5596 }, { "epoch": 0.44341453753218457, "grad_norm": 1.5316433588924565, "learning_rate": 1.2297696241801133e-05, "loss": 0.2113, "step": 5597 }, { "epoch": 0.44349376114081995, "grad_norm": 1.5760364514371232, "learning_rate": 1.2295198788507794e-05, "loss": 0.239, "step": 5598 }, { "epoch": 0.44357298474945533, "grad_norm": 1.7627453645705313, "learning_rate": 1.2292701184086656e-05, "loss": 0.26, "step": 5599 }, { "epoch": 0.4436522083580907, "grad_norm": 1.353336064216166, "learning_rate": 1.2290203428702178e-05, "loss": 0.2254, "step": 5600 }, { "epoch": 0.4437314319667261, "grad_norm": 1.990131106787289, "learning_rate": 1.2287705522518824e-05, "loss": 0.2373, "step": 5601 }, { "epoch": 0.4438106555753615, "grad_norm": 2.0707576472903306, "learning_rate": 1.228520746570107e-05, "loss": 0.2317, "step": 5602 }, { "epoch": 0.4438898791839968, "grad_norm": 1.9003787282143014, "learning_rate": 1.22827092584134e-05, "loss": 0.2423, "step": 5603 }, { "epoch": 0.4439691027926322, "grad_norm": 2.0097484417478255, "learning_rate": 1.2280210900820309e-05, "loss": 0.2623, "step": 5604 }, { "epoch": 0.44404832640126757, "grad_norm": 1.4604997957290904, "learning_rate": 1.22777123930863e-05, "loss": 0.1813, "step": 5605 }, { "epoch": 0.44412755000990295, "grad_norm": 1.586622149428566, "learning_rate": 1.227521373537589e-05, "loss": 0.217, "step": 5606 }, { "epoch": 0.44420677361853833, "grad_norm": 1.8423769598148934, "learning_rate": 1.2272714927853604e-05, "loss": 0.2672, "step": 5607 }, { "epoch": 0.4442859972271737, "grad_norm": 1.8180083283009185, "learning_rate": 1.2270215970683977e-05, "loss": 0.2677, "step": 5608 }, { "epoch": 0.4443652208358091, "grad_norm": 1.678765782490066, "learning_rate": 1.226771686403155e-05, "loss": 0.2714, "step": 5609 }, { "epoch": 0.4444444444444444, "grad_norm": 1.3530826818979518, "learning_rate": 1.2265217608060879e-05, "loss": 0.2218, "step": 5610 }, { "epoch": 0.4445236680530798, "grad_norm": 1.7687839103939773, "learning_rate": 1.226271820293653e-05, "loss": 0.2488, "step": 5611 }, { "epoch": 0.4446028916617152, "grad_norm": 1.4532043185049228, "learning_rate": 1.2260218648823073e-05, "loss": 0.2979, "step": 5612 }, { "epoch": 0.44468211527035056, "grad_norm": 1.5051170978763282, "learning_rate": 1.2257718945885096e-05, "loss": 0.2067, "step": 5613 }, { "epoch": 0.44476133887898595, "grad_norm": 1.599107926559045, "learning_rate": 1.2255219094287186e-05, "loss": 0.2974, "step": 5614 }, { "epoch": 0.4448405624876213, "grad_norm": 1.6996675287274634, "learning_rate": 1.225271909419395e-05, "loss": 0.2359, "step": 5615 }, { "epoch": 0.4449197860962567, "grad_norm": 1.5911728489437782, "learning_rate": 1.2250218945770005e-05, "loss": 0.2298, "step": 5616 }, { "epoch": 0.44499900970489203, "grad_norm": 1.7819883921466069, "learning_rate": 1.2247718649179966e-05, "loss": 0.3332, "step": 5617 }, { "epoch": 0.4450782333135274, "grad_norm": 1.7027583867730316, "learning_rate": 1.2245218204588474e-05, "loss": 0.2488, "step": 5618 }, { "epoch": 0.4451574569221628, "grad_norm": 1.7773787609735372, "learning_rate": 1.2242717612160163e-05, "loss": 0.2883, "step": 5619 }, { "epoch": 0.4452366805307982, "grad_norm": 1.9673029481501914, "learning_rate": 1.2240216872059687e-05, "loss": 0.3515, "step": 5620 }, { "epoch": 0.44531590413943356, "grad_norm": 1.621800032743037, "learning_rate": 1.2237715984451713e-05, "loss": 0.2755, "step": 5621 }, { "epoch": 0.44539512774806894, "grad_norm": 1.6726240888379609, "learning_rate": 1.2235214949500906e-05, "loss": 0.2304, "step": 5622 }, { "epoch": 0.4454743513567043, "grad_norm": 1.6827391371357892, "learning_rate": 1.223271376737195e-05, "loss": 0.3081, "step": 5623 }, { "epoch": 0.44555357496533965, "grad_norm": 1.7660659715012286, "learning_rate": 1.2230212438229539e-05, "loss": 0.2384, "step": 5624 }, { "epoch": 0.44563279857397503, "grad_norm": 1.4269374380961006, "learning_rate": 1.2227710962238367e-05, "loss": 0.2524, "step": 5625 }, { "epoch": 0.4457120221826104, "grad_norm": 1.8563296437003585, "learning_rate": 1.2225209339563144e-05, "loss": 0.2315, "step": 5626 }, { "epoch": 0.4457912457912458, "grad_norm": 1.5813856049921018, "learning_rate": 1.22227075703686e-05, "loss": 0.2439, "step": 5627 }, { "epoch": 0.4458704693998812, "grad_norm": 1.5684642176959884, "learning_rate": 1.2220205654819453e-05, "loss": 0.2004, "step": 5628 }, { "epoch": 0.44594969300851656, "grad_norm": 1.9540375860434265, "learning_rate": 1.2217703593080445e-05, "loss": 0.3284, "step": 5629 }, { "epoch": 0.44602891661715194, "grad_norm": 1.6334800298115226, "learning_rate": 1.221520138531633e-05, "loss": 0.2809, "step": 5630 }, { "epoch": 0.44610814022578726, "grad_norm": 1.4658006142445195, "learning_rate": 1.2212699031691861e-05, "loss": 0.1689, "step": 5631 }, { "epoch": 0.44618736383442265, "grad_norm": 1.7900767546554626, "learning_rate": 1.221019653237181e-05, "loss": 0.2753, "step": 5632 }, { "epoch": 0.446266587443058, "grad_norm": 1.7836698246748153, "learning_rate": 1.2207693887520949e-05, "loss": 0.2829, "step": 5633 }, { "epoch": 0.4463458110516934, "grad_norm": 1.8930673819747978, "learning_rate": 1.2205191097304067e-05, "loss": 0.1892, "step": 5634 }, { "epoch": 0.4464250346603288, "grad_norm": 1.667504374561492, "learning_rate": 1.2202688161885967e-05, "loss": 0.2881, "step": 5635 }, { "epoch": 0.44650425826896417, "grad_norm": 1.8539069139441045, "learning_rate": 1.2200185081431446e-05, "loss": 0.2925, "step": 5636 }, { "epoch": 0.4465834818775995, "grad_norm": 1.9632896204663735, "learning_rate": 1.2197681856105326e-05, "loss": 0.2926, "step": 5637 }, { "epoch": 0.4466627054862349, "grad_norm": 1.4986191288150035, "learning_rate": 1.219517848607243e-05, "loss": 0.242, "step": 5638 }, { "epoch": 0.44674192909487026, "grad_norm": 1.625662930972558, "learning_rate": 1.2192674971497593e-05, "loss": 0.2402, "step": 5639 }, { "epoch": 0.44682115270350564, "grad_norm": 1.2274533186976864, "learning_rate": 1.219017131254566e-05, "loss": 0.188, "step": 5640 }, { "epoch": 0.446900376312141, "grad_norm": 1.5926323311077184, "learning_rate": 1.2187667509381484e-05, "loss": 0.2242, "step": 5641 }, { "epoch": 0.4469795999207764, "grad_norm": 1.2706132917955917, "learning_rate": 1.2185163562169928e-05, "loss": 0.1539, "step": 5642 }, { "epoch": 0.4470588235294118, "grad_norm": 1.837584806937904, "learning_rate": 1.2182659471075868e-05, "loss": 0.2427, "step": 5643 }, { "epoch": 0.4471380471380471, "grad_norm": 1.4811924937857484, "learning_rate": 1.2180155236264182e-05, "loss": 0.1885, "step": 5644 }, { "epoch": 0.4472172707466825, "grad_norm": 1.445345043292019, "learning_rate": 1.2177650857899767e-05, "loss": 0.2325, "step": 5645 }, { "epoch": 0.4472964943553179, "grad_norm": 1.64068043823466, "learning_rate": 1.217514633614752e-05, "loss": 0.266, "step": 5646 }, { "epoch": 0.44737571796395326, "grad_norm": 1.8801734881053611, "learning_rate": 1.217264167117235e-05, "loss": 0.3213, "step": 5647 }, { "epoch": 0.44745494157258864, "grad_norm": 2.2525293814258585, "learning_rate": 1.2170136863139183e-05, "loss": 0.2368, "step": 5648 }, { "epoch": 0.447534165181224, "grad_norm": 1.7790331323131856, "learning_rate": 1.2167631912212942e-05, "loss": 0.3457, "step": 5649 }, { "epoch": 0.4476133887898594, "grad_norm": 1.936715387902646, "learning_rate": 1.2165126818558572e-05, "loss": 0.1923, "step": 5650 }, { "epoch": 0.4476926123984947, "grad_norm": 1.9259934773737695, "learning_rate": 1.2162621582341021e-05, "loss": 0.2649, "step": 5651 }, { "epoch": 0.4477718360071301, "grad_norm": 1.6704567895652418, "learning_rate": 1.2160116203725243e-05, "loss": 0.2096, "step": 5652 }, { "epoch": 0.4478510596157655, "grad_norm": 1.6943965762721673, "learning_rate": 1.2157610682876206e-05, "loss": 0.2351, "step": 5653 }, { "epoch": 0.44793028322440087, "grad_norm": 1.81810060944767, "learning_rate": 1.2155105019958888e-05, "loss": 0.1738, "step": 5654 }, { "epoch": 0.44800950683303625, "grad_norm": 2.0973218627662207, "learning_rate": 1.2152599215138274e-05, "loss": 0.2384, "step": 5655 }, { "epoch": 0.44808873044167163, "grad_norm": 1.910169325577818, "learning_rate": 1.215009326857936e-05, "loss": 0.2089, "step": 5656 }, { "epoch": 0.448167954050307, "grad_norm": 2.256710950714623, "learning_rate": 1.2147587180447149e-05, "loss": 0.2798, "step": 5657 }, { "epoch": 0.44824717765894234, "grad_norm": 1.565782817578264, "learning_rate": 1.2145080950906656e-05, "loss": 0.2186, "step": 5658 }, { "epoch": 0.4483264012675777, "grad_norm": 1.525088456470306, "learning_rate": 1.2142574580122903e-05, "loss": 0.1943, "step": 5659 }, { "epoch": 0.4484056248762131, "grad_norm": 1.8443753190917436, "learning_rate": 1.2140068068260923e-05, "loss": 0.2354, "step": 5660 }, { "epoch": 0.4484848484848485, "grad_norm": 1.5512746842008314, "learning_rate": 1.2137561415485761e-05, "loss": 0.2582, "step": 5661 }, { "epoch": 0.44856407209348387, "grad_norm": 1.8603636018452885, "learning_rate": 1.2135054621962464e-05, "loss": 0.2451, "step": 5662 }, { "epoch": 0.44864329570211925, "grad_norm": 1.889618114956987, "learning_rate": 1.2132547687856093e-05, "loss": 0.2357, "step": 5663 }, { "epoch": 0.44872251931075463, "grad_norm": 1.9815243497588506, "learning_rate": 1.2130040613331717e-05, "loss": 0.3195, "step": 5664 }, { "epoch": 0.44880174291938996, "grad_norm": 1.5243803797839164, "learning_rate": 1.2127533398554417e-05, "loss": 0.1674, "step": 5665 }, { "epoch": 0.44888096652802534, "grad_norm": 1.418760243918266, "learning_rate": 1.2125026043689278e-05, "loss": 0.1771, "step": 5666 }, { "epoch": 0.4489601901366607, "grad_norm": 1.7412611872463941, "learning_rate": 1.2122518548901401e-05, "loss": 0.1763, "step": 5667 }, { "epoch": 0.4490394137452961, "grad_norm": 1.8353027474063524, "learning_rate": 1.2120010914355888e-05, "loss": 0.2377, "step": 5668 }, { "epoch": 0.4491186373539315, "grad_norm": 1.918201063336504, "learning_rate": 1.2117503140217858e-05, "loss": 0.3078, "step": 5669 }, { "epoch": 0.44919786096256686, "grad_norm": 1.7551579801259165, "learning_rate": 1.2114995226652437e-05, "loss": 0.2168, "step": 5670 }, { "epoch": 0.44927708457120225, "grad_norm": 1.674301397411181, "learning_rate": 1.2112487173824755e-05, "loss": 0.2645, "step": 5671 }, { "epoch": 0.44935630817983757, "grad_norm": 1.4922935738061887, "learning_rate": 1.2109978981899956e-05, "loss": 0.2545, "step": 5672 }, { "epoch": 0.44943553178847295, "grad_norm": 2.0760449593188306, "learning_rate": 1.2107470651043198e-05, "loss": 0.3298, "step": 5673 }, { "epoch": 0.44951475539710833, "grad_norm": 1.9440791398158375, "learning_rate": 1.2104962181419635e-05, "loss": 0.3052, "step": 5674 }, { "epoch": 0.4495939790057437, "grad_norm": 1.750501798235069, "learning_rate": 1.2102453573194442e-05, "loss": 0.2402, "step": 5675 }, { "epoch": 0.4496732026143791, "grad_norm": 1.6263665516047727, "learning_rate": 1.2099944826532796e-05, "loss": 0.2278, "step": 5676 }, { "epoch": 0.4497524262230145, "grad_norm": 1.8257200513512624, "learning_rate": 1.2097435941599886e-05, "loss": 0.2198, "step": 5677 }, { "epoch": 0.4498316498316498, "grad_norm": 1.8320465927526974, "learning_rate": 1.2094926918560917e-05, "loss": 0.1903, "step": 5678 }, { "epoch": 0.4499108734402852, "grad_norm": 2.3613795721421393, "learning_rate": 1.2092417757581085e-05, "loss": 0.4167, "step": 5679 }, { "epoch": 0.44999009704892057, "grad_norm": 1.8109900768597607, "learning_rate": 1.2089908458825614e-05, "loss": 0.2132, "step": 5680 }, { "epoch": 0.45006932065755595, "grad_norm": 1.5151629803787015, "learning_rate": 1.2087399022459729e-05, "loss": 0.1851, "step": 5681 }, { "epoch": 0.45014854426619133, "grad_norm": 1.6994388633856963, "learning_rate": 1.208488944864866e-05, "loss": 0.183, "step": 5682 }, { "epoch": 0.4502277678748267, "grad_norm": 1.8816540048226704, "learning_rate": 1.2082379737557655e-05, "loss": 0.3413, "step": 5683 }, { "epoch": 0.4503069914834621, "grad_norm": 1.787224448096987, "learning_rate": 1.2079869889351961e-05, "loss": 0.2554, "step": 5684 }, { "epoch": 0.4503862150920974, "grad_norm": 1.661965270157595, "learning_rate": 1.2077359904196841e-05, "loss": 0.2392, "step": 5685 }, { "epoch": 0.4504654387007328, "grad_norm": 1.5009075526942932, "learning_rate": 1.2074849782257572e-05, "loss": 0.2269, "step": 5686 }, { "epoch": 0.4505446623093682, "grad_norm": 2.013573695623766, "learning_rate": 1.2072339523699426e-05, "loss": 0.3267, "step": 5687 }, { "epoch": 0.45062388591800356, "grad_norm": 2.023128416523469, "learning_rate": 1.2069829128687693e-05, "loss": 0.3402, "step": 5688 }, { "epoch": 0.45070310952663895, "grad_norm": 2.007862560420196, "learning_rate": 1.2067318597387672e-05, "loss": 0.2908, "step": 5689 }, { "epoch": 0.4507823331352743, "grad_norm": 2.0846805600088927, "learning_rate": 1.2064807929964668e-05, "loss": 0.2684, "step": 5690 }, { "epoch": 0.4508615567439097, "grad_norm": 1.467914372033032, "learning_rate": 1.2062297126584e-05, "loss": 0.1973, "step": 5691 }, { "epoch": 0.45094078035254503, "grad_norm": 1.6352678572822281, "learning_rate": 1.2059786187410984e-05, "loss": 0.2524, "step": 5692 }, { "epoch": 0.4510200039611804, "grad_norm": 1.7934860117601583, "learning_rate": 1.2057275112610962e-05, "loss": 0.2316, "step": 5693 }, { "epoch": 0.4510992275698158, "grad_norm": 2.0735360665530935, "learning_rate": 1.2054763902349273e-05, "loss": 0.3287, "step": 5694 }, { "epoch": 0.4511784511784512, "grad_norm": 1.6926272461466183, "learning_rate": 1.2052252556791267e-05, "loss": 0.2669, "step": 5695 }, { "epoch": 0.45125767478708656, "grad_norm": 1.8476100541327651, "learning_rate": 1.2049741076102307e-05, "loss": 0.2875, "step": 5696 }, { "epoch": 0.45133689839572194, "grad_norm": 1.8394593907200811, "learning_rate": 1.2047229460447759e-05, "loss": 0.3065, "step": 5697 }, { "epoch": 0.4514161220043573, "grad_norm": 1.6570746450863916, "learning_rate": 1.2044717709993e-05, "loss": 0.249, "step": 5698 }, { "epoch": 0.45149534561299265, "grad_norm": 1.6468534738421132, "learning_rate": 1.2042205824903419e-05, "loss": 0.3011, "step": 5699 }, { "epoch": 0.45157456922162803, "grad_norm": 1.5826133488286724, "learning_rate": 1.203969380534441e-05, "loss": 0.2141, "step": 5700 }, { "epoch": 0.4516537928302634, "grad_norm": 1.448858358123047, "learning_rate": 1.2037181651481378e-05, "loss": 0.1877, "step": 5701 }, { "epoch": 0.4517330164388988, "grad_norm": 1.4060736416732036, "learning_rate": 1.2034669363479741e-05, "loss": 0.1723, "step": 5702 }, { "epoch": 0.4518122400475342, "grad_norm": 2.194651302555289, "learning_rate": 1.2032156941504913e-05, "loss": 0.3147, "step": 5703 }, { "epoch": 0.45189146365616956, "grad_norm": 2.434042439309015, "learning_rate": 1.2029644385722327e-05, "loss": 0.4223, "step": 5704 }, { "epoch": 0.45197068726480494, "grad_norm": 1.7223276273115296, "learning_rate": 1.2027131696297429e-05, "loss": 0.1779, "step": 5705 }, { "epoch": 0.45204991087344026, "grad_norm": 1.8690637313818925, "learning_rate": 1.202461887339566e-05, "loss": 0.3313, "step": 5706 }, { "epoch": 0.45212913448207565, "grad_norm": 1.2984302609785088, "learning_rate": 1.2022105917182478e-05, "loss": 0.1613, "step": 5707 }, { "epoch": 0.452208358090711, "grad_norm": 1.5134060683355437, "learning_rate": 1.2019592827823354e-05, "loss": 0.2111, "step": 5708 }, { "epoch": 0.4522875816993464, "grad_norm": 1.7825850411321524, "learning_rate": 1.2017079605483758e-05, "loss": 0.2614, "step": 5709 }, { "epoch": 0.4523668053079818, "grad_norm": 1.3710241477174698, "learning_rate": 1.201456625032918e-05, "loss": 0.1453, "step": 5710 }, { "epoch": 0.45244602891661717, "grad_norm": 1.9447854544056213, "learning_rate": 1.2012052762525104e-05, "loss": 0.2591, "step": 5711 }, { "epoch": 0.45252525252525255, "grad_norm": 1.9027330082779925, "learning_rate": 1.2009539142237034e-05, "loss": 0.2847, "step": 5712 }, { "epoch": 0.4526044761338879, "grad_norm": 1.7255140795593775, "learning_rate": 1.2007025389630484e-05, "loss": 0.2531, "step": 5713 }, { "epoch": 0.45268369974252326, "grad_norm": 1.6766496571648108, "learning_rate": 1.2004511504870966e-05, "loss": 0.3097, "step": 5714 }, { "epoch": 0.45276292335115864, "grad_norm": 1.4525698942967835, "learning_rate": 1.2001997488124011e-05, "loss": 0.2155, "step": 5715 }, { "epoch": 0.452842146959794, "grad_norm": 1.4313806650051693, "learning_rate": 1.1999483339555159e-05, "loss": 0.2239, "step": 5716 }, { "epoch": 0.4529213705684294, "grad_norm": 2.0563171218989345, "learning_rate": 1.1996969059329944e-05, "loss": 0.221, "step": 5717 }, { "epoch": 0.4530005941770648, "grad_norm": 2.0020093390544575, "learning_rate": 1.1994454647613928e-05, "loss": 0.2808, "step": 5718 }, { "epoch": 0.4530798177857001, "grad_norm": 1.6310472674684602, "learning_rate": 1.199194010457267e-05, "loss": 0.2575, "step": 5719 }, { "epoch": 0.4531590413943355, "grad_norm": 1.6684955833381414, "learning_rate": 1.1989425430371739e-05, "loss": 0.2416, "step": 5720 }, { "epoch": 0.4532382650029709, "grad_norm": 1.7084574873374, "learning_rate": 1.198691062517672e-05, "loss": 0.2816, "step": 5721 }, { "epoch": 0.45331748861160626, "grad_norm": 1.8304227424179893, "learning_rate": 1.1984395689153195e-05, "loss": 0.1627, "step": 5722 }, { "epoch": 0.45339671222024164, "grad_norm": 1.4105149923080635, "learning_rate": 1.1981880622466759e-05, "loss": 0.2155, "step": 5723 }, { "epoch": 0.453475935828877, "grad_norm": 1.6962681680234089, "learning_rate": 1.1979365425283022e-05, "loss": 0.271, "step": 5724 }, { "epoch": 0.4535551594375124, "grad_norm": 1.8799150847503883, "learning_rate": 1.1976850097767598e-05, "loss": 0.2485, "step": 5725 }, { "epoch": 0.4536343830461477, "grad_norm": 1.7389361906903575, "learning_rate": 1.1974334640086104e-05, "loss": 0.2526, "step": 5726 }, { "epoch": 0.4537136066547831, "grad_norm": 1.8988759161283906, "learning_rate": 1.1971819052404177e-05, "loss": 0.1956, "step": 5727 }, { "epoch": 0.4537928302634185, "grad_norm": 1.798914781621013, "learning_rate": 1.196930333488745e-05, "loss": 0.19, "step": 5728 }, { "epoch": 0.45387205387205387, "grad_norm": 2.107663434616877, "learning_rate": 1.1966787487701577e-05, "loss": 0.3069, "step": 5729 }, { "epoch": 0.45395127748068925, "grad_norm": 2.2034162652334, "learning_rate": 1.1964271511012208e-05, "loss": 0.1783, "step": 5730 }, { "epoch": 0.45403050108932463, "grad_norm": 2.3184036760138027, "learning_rate": 1.1961755404985015e-05, "loss": 0.3463, "step": 5731 }, { "epoch": 0.45410972469796, "grad_norm": 2.1791121681031655, "learning_rate": 1.1959239169785668e-05, "loss": 0.3954, "step": 5732 }, { "epoch": 0.45418894830659534, "grad_norm": 1.6628015567935317, "learning_rate": 1.1956722805579846e-05, "loss": 0.2001, "step": 5733 }, { "epoch": 0.4542681719152307, "grad_norm": 2.077869327403923, "learning_rate": 1.1954206312533246e-05, "loss": 0.2385, "step": 5734 }, { "epoch": 0.4543473955238661, "grad_norm": 2.2045005215714255, "learning_rate": 1.1951689690811558e-05, "loss": 0.2856, "step": 5735 }, { "epoch": 0.4544266191325015, "grad_norm": 1.8371638284667358, "learning_rate": 1.1949172940580498e-05, "loss": 0.3068, "step": 5736 }, { "epoch": 0.45450584274113687, "grad_norm": 1.8522300201654076, "learning_rate": 1.1946656062005781e-05, "loss": 0.2227, "step": 5737 }, { "epoch": 0.45458506634977225, "grad_norm": 1.7320302182338698, "learning_rate": 1.1944139055253126e-05, "loss": 0.2321, "step": 5738 }, { "epoch": 0.45466428995840763, "grad_norm": 1.5701091947575414, "learning_rate": 1.1941621920488271e-05, "loss": 0.2887, "step": 5739 }, { "epoch": 0.45474351356704296, "grad_norm": 1.8035475755157526, "learning_rate": 1.1939104657876953e-05, "loss": 0.3246, "step": 5740 }, { "epoch": 0.45482273717567834, "grad_norm": 1.506252713456613, "learning_rate": 1.1936587267584924e-05, "loss": 0.2165, "step": 5741 }, { "epoch": 0.4549019607843137, "grad_norm": 1.5025311652810647, "learning_rate": 1.193406974977794e-05, "loss": 0.2393, "step": 5742 }, { "epoch": 0.4549811843929491, "grad_norm": 1.55815729235976, "learning_rate": 1.1931552104621776e-05, "loss": 0.2478, "step": 5743 }, { "epoch": 0.4550604080015845, "grad_norm": 1.6653861728121144, "learning_rate": 1.1929034332282192e-05, "loss": 0.2436, "step": 5744 }, { "epoch": 0.45513963161021986, "grad_norm": 1.6945743023892634, "learning_rate": 1.1926516432924984e-05, "loss": 0.2689, "step": 5745 }, { "epoch": 0.45521885521885525, "grad_norm": 1.5850622647354944, "learning_rate": 1.1923998406715937e-05, "loss": 0.1872, "step": 5746 }, { "epoch": 0.45529807882749057, "grad_norm": 1.9430325418481773, "learning_rate": 1.1921480253820852e-05, "loss": 0.2715, "step": 5747 }, { "epoch": 0.45537730243612595, "grad_norm": 1.8173887897524583, "learning_rate": 1.1918961974405539e-05, "loss": 0.2754, "step": 5748 }, { "epoch": 0.45545652604476133, "grad_norm": 1.280313438106088, "learning_rate": 1.1916443568635812e-05, "loss": 0.1448, "step": 5749 }, { "epoch": 0.4555357496533967, "grad_norm": 1.8065914795032998, "learning_rate": 1.1913925036677497e-05, "loss": 0.2318, "step": 5750 }, { "epoch": 0.4556149732620321, "grad_norm": 1.3941230617962175, "learning_rate": 1.191140637869643e-05, "loss": 0.1908, "step": 5751 }, { "epoch": 0.4556941968706675, "grad_norm": 1.8012365446164473, "learning_rate": 1.1908887594858447e-05, "loss": 0.3145, "step": 5752 }, { "epoch": 0.45577342047930286, "grad_norm": 2.003655272890045, "learning_rate": 1.1906368685329403e-05, "loss": 0.3109, "step": 5753 }, { "epoch": 0.4558526440879382, "grad_norm": 1.7791427389185093, "learning_rate": 1.1903849650275154e-05, "loss": 0.2439, "step": 5754 }, { "epoch": 0.45593186769657357, "grad_norm": 1.8198868020074712, "learning_rate": 1.1901330489861564e-05, "loss": 0.3041, "step": 5755 }, { "epoch": 0.45601109130520895, "grad_norm": 1.8983324369961625, "learning_rate": 1.1898811204254515e-05, "loss": 0.2702, "step": 5756 }, { "epoch": 0.45609031491384433, "grad_norm": 1.6998132353877888, "learning_rate": 1.189629179361988e-05, "loss": 0.2099, "step": 5757 }, { "epoch": 0.4561695385224797, "grad_norm": 1.770023711048656, "learning_rate": 1.1893772258123554e-05, "loss": 0.2208, "step": 5758 }, { "epoch": 0.4562487621311151, "grad_norm": 1.5521802466294234, "learning_rate": 1.1891252597931441e-05, "loss": 0.1861, "step": 5759 }, { "epoch": 0.4563279857397504, "grad_norm": 2.0872186412203098, "learning_rate": 1.1888732813209442e-05, "loss": 0.3252, "step": 5760 }, { "epoch": 0.4564072093483858, "grad_norm": 1.7383376525179475, "learning_rate": 1.1886212904123477e-05, "loss": 0.2736, "step": 5761 }, { "epoch": 0.4564864329570212, "grad_norm": 1.4851146181572743, "learning_rate": 1.1883692870839466e-05, "loss": 0.2334, "step": 5762 }, { "epoch": 0.45656565656565656, "grad_norm": 1.5889724507244454, "learning_rate": 1.1881172713523346e-05, "loss": 0.1323, "step": 5763 }, { "epoch": 0.45664488017429194, "grad_norm": 2.1243043322113913, "learning_rate": 1.1878652432341053e-05, "loss": 0.2817, "step": 5764 }, { "epoch": 0.4567241037829273, "grad_norm": 1.7035316138862764, "learning_rate": 1.1876132027458535e-05, "loss": 0.2476, "step": 5765 }, { "epoch": 0.4568033273915627, "grad_norm": 1.758939142378042, "learning_rate": 1.1873611499041752e-05, "loss": 0.2034, "step": 5766 }, { "epoch": 0.45688255100019803, "grad_norm": 1.6959868516346108, "learning_rate": 1.1871090847256667e-05, "loss": 0.2186, "step": 5767 }, { "epoch": 0.4569617746088334, "grad_norm": 1.632204906295364, "learning_rate": 1.1868570072269252e-05, "loss": 0.2214, "step": 5768 }, { "epoch": 0.4570409982174688, "grad_norm": 1.8184560474306608, "learning_rate": 1.186604917424549e-05, "loss": 0.2635, "step": 5769 }, { "epoch": 0.4571202218261042, "grad_norm": 1.693087665826069, "learning_rate": 1.1863528153351369e-05, "loss": 0.2084, "step": 5770 }, { "epoch": 0.45719944543473956, "grad_norm": 2.068639291248907, "learning_rate": 1.1861007009752884e-05, "loss": 0.3096, "step": 5771 }, { "epoch": 0.45727866904337494, "grad_norm": 1.6351346272512304, "learning_rate": 1.1858485743616044e-05, "loss": 0.2158, "step": 5772 }, { "epoch": 0.4573578926520103, "grad_norm": 2.0681240053978005, "learning_rate": 1.185596435510686e-05, "loss": 0.2142, "step": 5773 }, { "epoch": 0.45743711626064565, "grad_norm": 1.687055194181285, "learning_rate": 1.1853442844391354e-05, "loss": 0.2101, "step": 5774 }, { "epoch": 0.45751633986928103, "grad_norm": 2.1951786792761205, "learning_rate": 1.1850921211635554e-05, "loss": 0.3035, "step": 5775 }, { "epoch": 0.4575955634779164, "grad_norm": 1.6212772413129573, "learning_rate": 1.1848399457005496e-05, "loss": 0.2268, "step": 5776 }, { "epoch": 0.4576747870865518, "grad_norm": 2.1000464880750704, "learning_rate": 1.1845877580667232e-05, "loss": 0.1641, "step": 5777 }, { "epoch": 0.4577540106951872, "grad_norm": 1.8810821539518814, "learning_rate": 1.1843355582786806e-05, "loss": 0.2251, "step": 5778 }, { "epoch": 0.45783323430382256, "grad_norm": 2.395428251726005, "learning_rate": 1.1840833463530289e-05, "loss": 0.3123, "step": 5779 }, { "epoch": 0.45791245791245794, "grad_norm": 1.7391295158909688, "learning_rate": 1.1838311223063745e-05, "loss": 0.3127, "step": 5780 }, { "epoch": 0.45799168152109326, "grad_norm": 1.967443189627691, "learning_rate": 1.1835788861553252e-05, "loss": 0.3024, "step": 5781 }, { "epoch": 0.45807090512972864, "grad_norm": 1.8133512046860787, "learning_rate": 1.1833266379164894e-05, "loss": 0.2106, "step": 5782 }, { "epoch": 0.458150128738364, "grad_norm": 1.4623703715386362, "learning_rate": 1.183074377606477e-05, "loss": 0.2107, "step": 5783 }, { "epoch": 0.4582293523469994, "grad_norm": 1.9096201908070127, "learning_rate": 1.1828221052418973e-05, "loss": 0.2938, "step": 5784 }, { "epoch": 0.4583085759556348, "grad_norm": 1.6753449934392568, "learning_rate": 1.182569820839362e-05, "loss": 0.202, "step": 5785 }, { "epoch": 0.45838779956427017, "grad_norm": 1.8242711819879487, "learning_rate": 1.1823175244154823e-05, "loss": 0.2697, "step": 5786 }, { "epoch": 0.45846702317290555, "grad_norm": 1.4081247640611776, "learning_rate": 1.1820652159868706e-05, "loss": 0.2109, "step": 5787 }, { "epoch": 0.4585462467815409, "grad_norm": 1.529074088770382, "learning_rate": 1.1818128955701409e-05, "loss": 0.1721, "step": 5788 }, { "epoch": 0.45862547039017626, "grad_norm": 1.4828488754206717, "learning_rate": 1.1815605631819066e-05, "loss": 0.2021, "step": 5789 }, { "epoch": 0.45870469399881164, "grad_norm": 1.806729503536785, "learning_rate": 1.181308218838783e-05, "loss": 0.3213, "step": 5790 }, { "epoch": 0.458783917607447, "grad_norm": 1.5487741005707878, "learning_rate": 1.1810558625573856e-05, "loss": 0.1864, "step": 5791 }, { "epoch": 0.4588631412160824, "grad_norm": 1.7392990868517602, "learning_rate": 1.1808034943543308e-05, "loss": 0.2683, "step": 5792 }, { "epoch": 0.4589423648247178, "grad_norm": 1.6541046452481614, "learning_rate": 1.1805511142462355e-05, "loss": 0.1611, "step": 5793 }, { "epoch": 0.4590215884333531, "grad_norm": 1.5821801754425744, "learning_rate": 1.1802987222497186e-05, "loss": 0.2217, "step": 5794 }, { "epoch": 0.4591008120419885, "grad_norm": 1.8139946303305532, "learning_rate": 1.1800463183813982e-05, "loss": 0.2898, "step": 5795 }, { "epoch": 0.4591800356506239, "grad_norm": 1.8933410343974801, "learning_rate": 1.1797939026578941e-05, "loss": 0.2746, "step": 5796 }, { "epoch": 0.45925925925925926, "grad_norm": 1.8992641843732025, "learning_rate": 1.1795414750958265e-05, "loss": 0.3166, "step": 5797 }, { "epoch": 0.45933848286789464, "grad_norm": 1.7989903007294956, "learning_rate": 1.1792890357118165e-05, "loss": 0.317, "step": 5798 }, { "epoch": 0.45941770647653, "grad_norm": 1.5819186653080919, "learning_rate": 1.1790365845224866e-05, "loss": 0.2095, "step": 5799 }, { "epoch": 0.4594969300851654, "grad_norm": 1.7439990149458584, "learning_rate": 1.1787841215444588e-05, "loss": 0.3441, "step": 5800 }, { "epoch": 0.4595761536938007, "grad_norm": 2.035666682458812, "learning_rate": 1.1785316467943568e-05, "loss": 0.2876, "step": 5801 }, { "epoch": 0.4596553773024361, "grad_norm": 1.6470474336274994, "learning_rate": 1.1782791602888052e-05, "loss": 0.2947, "step": 5802 }, { "epoch": 0.4597346009110715, "grad_norm": 2.111066612780628, "learning_rate": 1.1780266620444285e-05, "loss": 0.1849, "step": 5803 }, { "epoch": 0.45981382451970687, "grad_norm": 1.4749315806855938, "learning_rate": 1.1777741520778529e-05, "loss": 0.1852, "step": 5804 }, { "epoch": 0.45989304812834225, "grad_norm": 1.808560130929128, "learning_rate": 1.1775216304057046e-05, "loss": 0.3892, "step": 5805 }, { "epoch": 0.45997227173697763, "grad_norm": 1.3150678294117604, "learning_rate": 1.1772690970446113e-05, "loss": 0.161, "step": 5806 }, { "epoch": 0.460051495345613, "grad_norm": 1.5040489890375237, "learning_rate": 1.177016552011201e-05, "loss": 0.2207, "step": 5807 }, { "epoch": 0.46013071895424834, "grad_norm": 1.3479858677554062, "learning_rate": 1.176763995322102e-05, "loss": 0.1882, "step": 5808 }, { "epoch": 0.4602099425628837, "grad_norm": 1.2297739170073987, "learning_rate": 1.1765114269939448e-05, "loss": 0.2293, "step": 5809 }, { "epoch": 0.4602891661715191, "grad_norm": 1.5554855153406992, "learning_rate": 1.1762588470433593e-05, "loss": 0.2815, "step": 5810 }, { "epoch": 0.4603683897801545, "grad_norm": 1.7346957038526587, "learning_rate": 1.176006255486977e-05, "loss": 0.2792, "step": 5811 }, { "epoch": 0.46044761338878987, "grad_norm": 1.871493236041805, "learning_rate": 1.1757536523414297e-05, "loss": 0.2401, "step": 5812 }, { "epoch": 0.46052683699742525, "grad_norm": 1.775567559196208, "learning_rate": 1.1755010376233498e-05, "loss": 0.3088, "step": 5813 }, { "epoch": 0.46060606060606063, "grad_norm": 1.5104027255890675, "learning_rate": 1.175248411349371e-05, "loss": 0.1875, "step": 5814 }, { "epoch": 0.46068528421469596, "grad_norm": 1.678974569795767, "learning_rate": 1.1749957735361279e-05, "loss": 0.2723, "step": 5815 }, { "epoch": 0.46076450782333134, "grad_norm": 1.8460194062530695, "learning_rate": 1.174743124200255e-05, "loss": 0.2761, "step": 5816 }, { "epoch": 0.4608437314319667, "grad_norm": 1.505189384180577, "learning_rate": 1.1744904633583883e-05, "loss": 0.1988, "step": 5817 }, { "epoch": 0.4609229550406021, "grad_norm": 1.839112409772032, "learning_rate": 1.1742377910271638e-05, "loss": 0.2902, "step": 5818 }, { "epoch": 0.4610021786492375, "grad_norm": 1.9124512564277765, "learning_rate": 1.1739851072232195e-05, "loss": 0.157, "step": 5819 }, { "epoch": 0.46108140225787286, "grad_norm": 1.353703020989209, "learning_rate": 1.1737324119631927e-05, "loss": 0.1562, "step": 5820 }, { "epoch": 0.46116062586650824, "grad_norm": 2.017406980376989, "learning_rate": 1.173479705263723e-05, "loss": 0.3418, "step": 5821 }, { "epoch": 0.46123984947514357, "grad_norm": 1.3243514393275548, "learning_rate": 1.1732269871414492e-05, "loss": 0.1615, "step": 5822 }, { "epoch": 0.46131907308377895, "grad_norm": 1.6905790301471357, "learning_rate": 1.1729742576130119e-05, "loss": 0.292, "step": 5823 }, { "epoch": 0.46139829669241433, "grad_norm": 1.9603246118344408, "learning_rate": 1.1727215166950519e-05, "loss": 0.2565, "step": 5824 }, { "epoch": 0.4614775203010497, "grad_norm": 1.917623176336931, "learning_rate": 1.172468764404211e-05, "loss": 0.2333, "step": 5825 }, { "epoch": 0.4615567439096851, "grad_norm": 1.9398954303630949, "learning_rate": 1.172216000757132e-05, "loss": 0.2284, "step": 5826 }, { "epoch": 0.4616359675183205, "grad_norm": 1.710060950505006, "learning_rate": 1.1719632257704581e-05, "loss": 0.2126, "step": 5827 }, { "epoch": 0.46171519112695586, "grad_norm": 1.684134186464337, "learning_rate": 1.171710439460833e-05, "loss": 0.2009, "step": 5828 }, { "epoch": 0.4617944147355912, "grad_norm": 1.7573281326112857, "learning_rate": 1.1714576418449017e-05, "loss": 0.2503, "step": 5829 }, { "epoch": 0.46187363834422657, "grad_norm": 1.8429373792776358, "learning_rate": 1.1712048329393097e-05, "loss": 0.3519, "step": 5830 }, { "epoch": 0.46195286195286195, "grad_norm": 1.2035485057140314, "learning_rate": 1.1709520127607035e-05, "loss": 0.1374, "step": 5831 }, { "epoch": 0.46203208556149733, "grad_norm": 1.4395536329198, "learning_rate": 1.1706991813257295e-05, "loss": 0.173, "step": 5832 }, { "epoch": 0.4621113091701327, "grad_norm": 1.5777412670779698, "learning_rate": 1.1704463386510358e-05, "loss": 0.2446, "step": 5833 }, { "epoch": 0.4621905327787681, "grad_norm": 1.2441828370826344, "learning_rate": 1.170193484753271e-05, "loss": 0.1502, "step": 5834 }, { "epoch": 0.4622697563874034, "grad_norm": 1.8554216741937737, "learning_rate": 1.169940619649084e-05, "loss": 0.3251, "step": 5835 }, { "epoch": 0.4623489799960388, "grad_norm": 2.1197263971540337, "learning_rate": 1.1696877433551248e-05, "loss": 0.3231, "step": 5836 }, { "epoch": 0.4624282036046742, "grad_norm": 1.28691621339965, "learning_rate": 1.1694348558880447e-05, "loss": 0.1736, "step": 5837 }, { "epoch": 0.46250742721330956, "grad_norm": 1.6761553157715703, "learning_rate": 1.1691819572644941e-05, "loss": 0.2948, "step": 5838 }, { "epoch": 0.46258665082194494, "grad_norm": 1.6913002529398702, "learning_rate": 1.1689290475011258e-05, "loss": 0.3126, "step": 5839 }, { "epoch": 0.4626658744305803, "grad_norm": 1.6965682391039474, "learning_rate": 1.1686761266145926e-05, "loss": 0.2546, "step": 5840 }, { "epoch": 0.4627450980392157, "grad_norm": 1.5356746583630596, "learning_rate": 1.1684231946215478e-05, "loss": 0.1902, "step": 5841 }, { "epoch": 0.46282432164785103, "grad_norm": 1.4642926616693295, "learning_rate": 1.1681702515386466e-05, "loss": 0.2615, "step": 5842 }, { "epoch": 0.4629035452564864, "grad_norm": 1.463419818752926, "learning_rate": 1.167917297382543e-05, "loss": 0.2114, "step": 5843 }, { "epoch": 0.4629827688651218, "grad_norm": 1.76563390471955, "learning_rate": 1.1676643321698934e-05, "loss": 0.223, "step": 5844 }, { "epoch": 0.4630619924737572, "grad_norm": 1.629574698200466, "learning_rate": 1.1674113559173548e-05, "loss": 0.2726, "step": 5845 }, { "epoch": 0.46314121608239256, "grad_norm": 1.7164989306621083, "learning_rate": 1.1671583686415833e-05, "loss": 0.2551, "step": 5846 }, { "epoch": 0.46322043969102794, "grad_norm": 2.1183135356475864, "learning_rate": 1.1669053703592381e-05, "loss": 0.3137, "step": 5847 }, { "epoch": 0.4632996632996633, "grad_norm": 1.8691419432454603, "learning_rate": 1.1666523610869769e-05, "loss": 0.2881, "step": 5848 }, { "epoch": 0.46337888690829865, "grad_norm": 1.7760078898439706, "learning_rate": 1.1663993408414597e-05, "loss": 0.2845, "step": 5849 }, { "epoch": 0.46345811051693403, "grad_norm": 1.6125545218339132, "learning_rate": 1.1661463096393468e-05, "loss": 0.1401, "step": 5850 }, { "epoch": 0.4635373341255694, "grad_norm": 1.549884293010929, "learning_rate": 1.1658932674972985e-05, "loss": 0.2693, "step": 5851 }, { "epoch": 0.4636165577342048, "grad_norm": 1.6752525928036788, "learning_rate": 1.1656402144319772e-05, "loss": 0.1787, "step": 5852 }, { "epoch": 0.4636957813428402, "grad_norm": 1.4066489473205002, "learning_rate": 1.1653871504600445e-05, "loss": 0.1911, "step": 5853 }, { "epoch": 0.46377500495147556, "grad_norm": 2.0172052296744876, "learning_rate": 1.1651340755981634e-05, "loss": 0.3093, "step": 5854 }, { "epoch": 0.46385422856011094, "grad_norm": 1.7226386063640242, "learning_rate": 1.1648809898629987e-05, "loss": 0.2696, "step": 5855 }, { "epoch": 0.46393345216874626, "grad_norm": 1.673781646983445, "learning_rate": 1.1646278932712138e-05, "loss": 0.311, "step": 5856 }, { "epoch": 0.46401267577738164, "grad_norm": 1.5792945001326468, "learning_rate": 1.1643747858394743e-05, "loss": 0.2205, "step": 5857 }, { "epoch": 0.464091899386017, "grad_norm": 1.3162905433832417, "learning_rate": 1.1641216675844461e-05, "loss": 0.1358, "step": 5858 }, { "epoch": 0.4641711229946524, "grad_norm": 1.793623245600044, "learning_rate": 1.1638685385227958e-05, "loss": 0.2384, "step": 5859 }, { "epoch": 0.4642503466032878, "grad_norm": 2.2250733701941896, "learning_rate": 1.1636153986711906e-05, "loss": 0.3525, "step": 5860 }, { "epoch": 0.46432957021192317, "grad_norm": 1.2837086196666392, "learning_rate": 1.163362248046299e-05, "loss": 0.194, "step": 5861 }, { "epoch": 0.46440879382055855, "grad_norm": 1.6959796571071342, "learning_rate": 1.1631090866647891e-05, "loss": 0.2472, "step": 5862 }, { "epoch": 0.4644880174291939, "grad_norm": 1.4252640370044873, "learning_rate": 1.1628559145433308e-05, "loss": 0.1967, "step": 5863 }, { "epoch": 0.46456724103782926, "grad_norm": 1.4949362407868994, "learning_rate": 1.1626027316985942e-05, "loss": 0.2233, "step": 5864 }, { "epoch": 0.46464646464646464, "grad_norm": 1.7117151143093574, "learning_rate": 1.1623495381472499e-05, "loss": 0.2572, "step": 5865 }, { "epoch": 0.4647256882551, "grad_norm": 1.3120864080302828, "learning_rate": 1.16209633390597e-05, "loss": 0.1218, "step": 5866 }, { "epoch": 0.4648049118637354, "grad_norm": 1.3455892955024618, "learning_rate": 1.161843118991426e-05, "loss": 0.1705, "step": 5867 }, { "epoch": 0.4648841354723708, "grad_norm": 1.7686439665846643, "learning_rate": 1.1615898934202917e-05, "loss": 0.2905, "step": 5868 }, { "epoch": 0.46496335908100617, "grad_norm": 1.6428213140958778, "learning_rate": 1.1613366572092404e-05, "loss": 0.325, "step": 5869 }, { "epoch": 0.4650425826896415, "grad_norm": 1.5073607155567734, "learning_rate": 1.1610834103749465e-05, "loss": 0.2388, "step": 5870 }, { "epoch": 0.4651218062982769, "grad_norm": 1.654840474182109, "learning_rate": 1.1608301529340848e-05, "loss": 0.2729, "step": 5871 }, { "epoch": 0.46520102990691226, "grad_norm": 1.3695519826063307, "learning_rate": 1.1605768849033318e-05, "loss": 0.2058, "step": 5872 }, { "epoch": 0.46528025351554764, "grad_norm": 1.6496276394374094, "learning_rate": 1.1603236062993635e-05, "loss": 0.2051, "step": 5873 }, { "epoch": 0.465359477124183, "grad_norm": 1.5837955769256125, "learning_rate": 1.1600703171388572e-05, "loss": 0.296, "step": 5874 }, { "epoch": 0.4654387007328184, "grad_norm": 1.5858161667344535, "learning_rate": 1.1598170174384907e-05, "loss": 0.2137, "step": 5875 }, { "epoch": 0.4655179243414537, "grad_norm": 2.2044829966044737, "learning_rate": 1.1595637072149424e-05, "loss": 0.3454, "step": 5876 }, { "epoch": 0.4655971479500891, "grad_norm": 1.5751336853180717, "learning_rate": 1.159310386484892e-05, "loss": 0.244, "step": 5877 }, { "epoch": 0.4656763715587245, "grad_norm": 1.4961983415381441, "learning_rate": 1.159057055265019e-05, "loss": 0.2294, "step": 5878 }, { "epoch": 0.46575559516735987, "grad_norm": 2.2327542940296157, "learning_rate": 1.1588037135720043e-05, "loss": 0.293, "step": 5879 }, { "epoch": 0.46583481877599525, "grad_norm": 1.5540430246777075, "learning_rate": 1.1585503614225292e-05, "loss": 0.2275, "step": 5880 }, { "epoch": 0.46591404238463063, "grad_norm": 1.7450219670311224, "learning_rate": 1.1582969988332757e-05, "loss": 0.1906, "step": 5881 }, { "epoch": 0.465993265993266, "grad_norm": 1.5810265163893842, "learning_rate": 1.1580436258209266e-05, "loss": 0.2842, "step": 5882 }, { "epoch": 0.46607248960190134, "grad_norm": 1.4339155912305483, "learning_rate": 1.1577902424021653e-05, "loss": 0.2104, "step": 5883 }, { "epoch": 0.4661517132105367, "grad_norm": 1.8259804906940587, "learning_rate": 1.1575368485936752e-05, "loss": 0.3006, "step": 5884 }, { "epoch": 0.4662309368191721, "grad_norm": 1.7650234823052482, "learning_rate": 1.1572834444121424e-05, "loss": 0.318, "step": 5885 }, { "epoch": 0.4663101604278075, "grad_norm": 1.6265532601487545, "learning_rate": 1.157030029874251e-05, "loss": 0.2348, "step": 5886 }, { "epoch": 0.46638938403644287, "grad_norm": 1.858229168855876, "learning_rate": 1.1567766049966882e-05, "loss": 0.3115, "step": 5887 }, { "epoch": 0.46646860764507825, "grad_norm": 1.6417037865221684, "learning_rate": 1.1565231697961398e-05, "loss": 0.2361, "step": 5888 }, { "epoch": 0.46654783125371363, "grad_norm": 1.7781177373037809, "learning_rate": 1.1562697242892939e-05, "loss": 0.2121, "step": 5889 }, { "epoch": 0.46662705486234896, "grad_norm": 1.5478601069683544, "learning_rate": 1.156016268492839e-05, "loss": 0.2505, "step": 5890 }, { "epoch": 0.46670627847098434, "grad_norm": 1.6655163893944778, "learning_rate": 1.155762802423463e-05, "loss": 0.2772, "step": 5891 }, { "epoch": 0.4667855020796197, "grad_norm": 1.3441881432412468, "learning_rate": 1.1555093260978562e-05, "loss": 0.1345, "step": 5892 }, { "epoch": 0.4668647256882551, "grad_norm": 1.6913774385342362, "learning_rate": 1.1552558395327087e-05, "loss": 0.2196, "step": 5893 }, { "epoch": 0.4669439492968905, "grad_norm": 1.5443723649287289, "learning_rate": 1.155002342744711e-05, "loss": 0.1774, "step": 5894 }, { "epoch": 0.46702317290552586, "grad_norm": 1.8948337041519354, "learning_rate": 1.1547488357505549e-05, "loss": 0.2881, "step": 5895 }, { "epoch": 0.46710239651416124, "grad_norm": 1.447661717384583, "learning_rate": 1.1544953185669327e-05, "loss": 0.2316, "step": 5896 }, { "epoch": 0.46718162012279657, "grad_norm": 1.4724782805804155, "learning_rate": 1.154241791210537e-05, "loss": 0.1881, "step": 5897 }, { "epoch": 0.46726084373143195, "grad_norm": 1.4247219818288188, "learning_rate": 1.1539882536980616e-05, "loss": 0.1581, "step": 5898 }, { "epoch": 0.46734006734006733, "grad_norm": 1.9894801439545307, "learning_rate": 1.1537347060462007e-05, "loss": 0.2828, "step": 5899 }, { "epoch": 0.4674192909487027, "grad_norm": 1.61977154934877, "learning_rate": 1.1534811482716487e-05, "loss": 0.224, "step": 5900 }, { "epoch": 0.4674985145573381, "grad_norm": 1.7842222259088774, "learning_rate": 1.1532275803911021e-05, "loss": 0.2586, "step": 5901 }, { "epoch": 0.4675777381659735, "grad_norm": 1.8640470406904812, "learning_rate": 1.1529740024212566e-05, "loss": 0.2545, "step": 5902 }, { "epoch": 0.46765696177460886, "grad_norm": 1.6857648464660526, "learning_rate": 1.1527204143788086e-05, "loss": 0.2761, "step": 5903 }, { "epoch": 0.4677361853832442, "grad_norm": 2.12366331228866, "learning_rate": 1.1524668162804566e-05, "loss": 0.2602, "step": 5904 }, { "epoch": 0.46781540899187957, "grad_norm": 2.053666085077014, "learning_rate": 1.1522132081428982e-05, "loss": 0.2526, "step": 5905 }, { "epoch": 0.46789463260051495, "grad_norm": 1.7862168777528808, "learning_rate": 1.1519595899828325e-05, "loss": 0.3094, "step": 5906 }, { "epoch": 0.46797385620915033, "grad_norm": 1.479279108155285, "learning_rate": 1.151705961816959e-05, "loss": 0.3455, "step": 5907 }, { "epoch": 0.4680530798177857, "grad_norm": 2.078889612531891, "learning_rate": 1.151452323661978e-05, "loss": 0.2586, "step": 5908 }, { "epoch": 0.4681323034264211, "grad_norm": 1.9682274819897958, "learning_rate": 1.15119867553459e-05, "loss": 0.2668, "step": 5909 }, { "epoch": 0.4682115270350565, "grad_norm": 1.5883886563625909, "learning_rate": 1.150945017451497e-05, "loss": 0.2138, "step": 5910 }, { "epoch": 0.4682907506436918, "grad_norm": 1.423357455823298, "learning_rate": 1.1506913494294005e-05, "loss": 0.1925, "step": 5911 }, { "epoch": 0.4683699742523272, "grad_norm": 1.7328076410115028, "learning_rate": 1.1504376714850041e-05, "loss": 0.3198, "step": 5912 }, { "epoch": 0.46844919786096256, "grad_norm": 1.7696019392216598, "learning_rate": 1.1501839836350106e-05, "loss": 0.2678, "step": 5913 }, { "epoch": 0.46852842146959794, "grad_norm": 1.559742855009872, "learning_rate": 1.1499302858961245e-05, "loss": 0.2619, "step": 5914 }, { "epoch": 0.4686076450782333, "grad_norm": 1.4716090861215645, "learning_rate": 1.1496765782850507e-05, "loss": 0.1983, "step": 5915 }, { "epoch": 0.4686868686868687, "grad_norm": 1.6597534022761087, "learning_rate": 1.149422860818494e-05, "loss": 0.2713, "step": 5916 }, { "epoch": 0.46876609229550403, "grad_norm": 3.4453795695685083, "learning_rate": 1.1491691335131614e-05, "loss": 0.3166, "step": 5917 }, { "epoch": 0.4688453159041394, "grad_norm": 1.5004831478370855, "learning_rate": 1.148915396385759e-05, "loss": 0.1918, "step": 5918 }, { "epoch": 0.4689245395127748, "grad_norm": 1.6768284148802883, "learning_rate": 1.1486616494529939e-05, "loss": 0.1988, "step": 5919 }, { "epoch": 0.4690037631214102, "grad_norm": 1.5487896672243708, "learning_rate": 1.1484078927315749e-05, "loss": 0.2471, "step": 5920 }, { "epoch": 0.46908298673004556, "grad_norm": 1.6786426945015251, "learning_rate": 1.1481541262382102e-05, "loss": 0.1906, "step": 5921 }, { "epoch": 0.46916221033868094, "grad_norm": 1.3558633459947318, "learning_rate": 1.1479003499896089e-05, "loss": 0.1621, "step": 5922 }, { "epoch": 0.4692414339473163, "grad_norm": 2.1234782517139617, "learning_rate": 1.1476465640024814e-05, "loss": 0.2068, "step": 5923 }, { "epoch": 0.46932065755595165, "grad_norm": 1.5551094339599982, "learning_rate": 1.147392768293538e-05, "loss": 0.2171, "step": 5924 }, { "epoch": 0.46939988116458703, "grad_norm": 1.8937768193968965, "learning_rate": 1.1471389628794902e-05, "loss": 0.2821, "step": 5925 }, { "epoch": 0.4694791047732224, "grad_norm": 1.9901206275209276, "learning_rate": 1.1468851477770495e-05, "loss": 0.2766, "step": 5926 }, { "epoch": 0.4695583283818578, "grad_norm": 1.69783825328348, "learning_rate": 1.1466313230029284e-05, "loss": 0.2788, "step": 5927 }, { "epoch": 0.4696375519904932, "grad_norm": 2.0116566575937473, "learning_rate": 1.1463774885738408e-05, "loss": 0.2903, "step": 5928 }, { "epoch": 0.46971677559912856, "grad_norm": 1.519640718995892, "learning_rate": 1.1461236445064993e-05, "loss": 0.2035, "step": 5929 }, { "epoch": 0.46979599920776394, "grad_norm": 1.5073273085434886, "learning_rate": 1.1458697908176194e-05, "loss": 0.2594, "step": 5930 }, { "epoch": 0.46987522281639926, "grad_norm": 1.5409873585497507, "learning_rate": 1.1456159275239153e-05, "loss": 0.217, "step": 5931 }, { "epoch": 0.46995444642503464, "grad_norm": 2.2870477656879604, "learning_rate": 1.1453620546421032e-05, "loss": 0.2058, "step": 5932 }, { "epoch": 0.47003367003367, "grad_norm": 1.7464011723006994, "learning_rate": 1.1451081721888992e-05, "loss": 0.2147, "step": 5933 }, { "epoch": 0.4701128936423054, "grad_norm": 1.7945781383608712, "learning_rate": 1.1448542801810203e-05, "loss": 0.2824, "step": 5934 }, { "epoch": 0.4701921172509408, "grad_norm": 2.1291540456483418, "learning_rate": 1.144600378635184e-05, "loss": 0.232, "step": 5935 }, { "epoch": 0.47027134085957617, "grad_norm": 1.8419887876146865, "learning_rate": 1.1443464675681089e-05, "loss": 0.2496, "step": 5936 }, { "epoch": 0.47035056446821155, "grad_norm": 1.613601013308607, "learning_rate": 1.1440925469965129e-05, "loss": 0.2427, "step": 5937 }, { "epoch": 0.4704297880768469, "grad_norm": 2.019869067885617, "learning_rate": 1.1438386169371164e-05, "loss": 0.3589, "step": 5938 }, { "epoch": 0.47050901168548226, "grad_norm": 1.9286486108054686, "learning_rate": 1.143584677406639e-05, "loss": 0.3202, "step": 5939 }, { "epoch": 0.47058823529411764, "grad_norm": 1.7291428537190483, "learning_rate": 1.1433307284218014e-05, "loss": 0.321, "step": 5940 }, { "epoch": 0.470667458902753, "grad_norm": 1.3536171989075028, "learning_rate": 1.1430767699993247e-05, "loss": 0.1746, "step": 5941 }, { "epoch": 0.4707466825113884, "grad_norm": 1.615695800394676, "learning_rate": 1.1428228021559316e-05, "loss": 0.2482, "step": 5942 }, { "epoch": 0.4708259061200238, "grad_norm": 1.3942045357144677, "learning_rate": 1.142568824908344e-05, "loss": 0.1579, "step": 5943 }, { "epoch": 0.47090512972865917, "grad_norm": 1.5336238529301847, "learning_rate": 1.1423148382732854e-05, "loss": 0.2179, "step": 5944 }, { "epoch": 0.4709843533372945, "grad_norm": 1.5990051697855165, "learning_rate": 1.1420608422674793e-05, "loss": 0.1587, "step": 5945 }, { "epoch": 0.4710635769459299, "grad_norm": 1.6327537852486975, "learning_rate": 1.1418068369076503e-05, "loss": 0.1887, "step": 5946 }, { "epoch": 0.47114280055456526, "grad_norm": 1.5468378558771658, "learning_rate": 1.1415528222105237e-05, "loss": 0.2608, "step": 5947 }, { "epoch": 0.47122202416320064, "grad_norm": 1.698229243888137, "learning_rate": 1.1412987981928245e-05, "loss": 0.2603, "step": 5948 }, { "epoch": 0.471301247771836, "grad_norm": 1.9253088422397535, "learning_rate": 1.1410447648712795e-05, "loss": 0.3264, "step": 5949 }, { "epoch": 0.4713804713804714, "grad_norm": 1.2722499408306336, "learning_rate": 1.1407907222626156e-05, "loss": 0.1986, "step": 5950 }, { "epoch": 0.4714596949891068, "grad_norm": 2.007166895575673, "learning_rate": 1.1405366703835596e-05, "loss": 0.3448, "step": 5951 }, { "epoch": 0.4715389185977421, "grad_norm": 1.5887415609918336, "learning_rate": 1.1402826092508405e-05, "loss": 0.2881, "step": 5952 }, { "epoch": 0.4716181422063775, "grad_norm": 1.5177232071861904, "learning_rate": 1.1400285388811862e-05, "loss": 0.1921, "step": 5953 }, { "epoch": 0.47169736581501287, "grad_norm": 1.884934397406955, "learning_rate": 1.1397744592913268e-05, "loss": 0.4125, "step": 5954 }, { "epoch": 0.47177658942364825, "grad_norm": 1.5871815107255094, "learning_rate": 1.1395203704979915e-05, "loss": 0.2222, "step": 5955 }, { "epoch": 0.47185581303228363, "grad_norm": 1.6146511416264648, "learning_rate": 1.1392662725179114e-05, "loss": 0.215, "step": 5956 }, { "epoch": 0.471935036640919, "grad_norm": 1.5308400016523158, "learning_rate": 1.139012165367817e-05, "loss": 0.224, "step": 5957 }, { "epoch": 0.47201426024955434, "grad_norm": 1.8252632599542646, "learning_rate": 1.1387580490644408e-05, "loss": 0.2122, "step": 5958 }, { "epoch": 0.4720934838581897, "grad_norm": 1.6595038069429957, "learning_rate": 1.1385039236245143e-05, "loss": 0.2207, "step": 5959 }, { "epoch": 0.4721727074668251, "grad_norm": 1.722176620058178, "learning_rate": 1.1382497890647712e-05, "loss": 0.335, "step": 5960 }, { "epoch": 0.4722519310754605, "grad_norm": 1.5006086489666106, "learning_rate": 1.1379956454019445e-05, "loss": 0.1928, "step": 5961 }, { "epoch": 0.47233115468409587, "grad_norm": 1.4487327488831339, "learning_rate": 1.1377414926527688e-05, "loss": 0.2295, "step": 5962 }, { "epoch": 0.47241037829273125, "grad_norm": 1.522165827725116, "learning_rate": 1.1374873308339784e-05, "loss": 0.2508, "step": 5963 }, { "epoch": 0.47248960190136663, "grad_norm": 1.7449739248545317, "learning_rate": 1.1372331599623088e-05, "loss": 0.292, "step": 5964 }, { "epoch": 0.47256882551000196, "grad_norm": 2.524450810797442, "learning_rate": 1.136978980054496e-05, "loss": 0.358, "step": 5965 }, { "epoch": 0.47264804911863734, "grad_norm": 1.684912949719457, "learning_rate": 1.1367247911272765e-05, "loss": 0.2318, "step": 5966 }, { "epoch": 0.4727272727272727, "grad_norm": 1.511757310609556, "learning_rate": 1.1364705931973872e-05, "loss": 0.2249, "step": 5967 }, { "epoch": 0.4728064963359081, "grad_norm": 1.4414956899813705, "learning_rate": 1.1362163862815663e-05, "loss": 0.2209, "step": 5968 }, { "epoch": 0.4728857199445435, "grad_norm": 1.7214169030508515, "learning_rate": 1.1359621703965516e-05, "loss": 0.2885, "step": 5969 }, { "epoch": 0.47296494355317886, "grad_norm": 1.9327022011563855, "learning_rate": 1.135707945559082e-05, "loss": 0.1959, "step": 5970 }, { "epoch": 0.47304416716181424, "grad_norm": 1.8496948642432174, "learning_rate": 1.1354537117858975e-05, "loss": 0.2989, "step": 5971 }, { "epoch": 0.47312339077044957, "grad_norm": 1.6580687628513424, "learning_rate": 1.1351994690937377e-05, "loss": 0.2198, "step": 5972 }, { "epoch": 0.47320261437908495, "grad_norm": 1.4220018593357153, "learning_rate": 1.1349452174993437e-05, "loss": 0.2145, "step": 5973 }, { "epoch": 0.47328183798772033, "grad_norm": 1.8503012229022804, "learning_rate": 1.1346909570194558e-05, "loss": 0.2079, "step": 5974 }, { "epoch": 0.4733610615963557, "grad_norm": 1.6450962999278003, "learning_rate": 1.134436687670817e-05, "loss": 0.2154, "step": 5975 }, { "epoch": 0.4734402852049911, "grad_norm": 1.526154492673174, "learning_rate": 1.134182409470169e-05, "loss": 0.308, "step": 5976 }, { "epoch": 0.4735195088136265, "grad_norm": 1.6582651436337914, "learning_rate": 1.133928122434255e-05, "loss": 0.2011, "step": 5977 }, { "epoch": 0.47359873242226186, "grad_norm": 1.634210934977372, "learning_rate": 1.1336738265798187e-05, "loss": 0.2046, "step": 5978 }, { "epoch": 0.4736779560308972, "grad_norm": 1.5620389453324508, "learning_rate": 1.1334195219236039e-05, "loss": 0.1813, "step": 5979 }, { "epoch": 0.47375717963953257, "grad_norm": 1.6976847037543041, "learning_rate": 1.1331652084823554e-05, "loss": 0.208, "step": 5980 }, { "epoch": 0.47383640324816795, "grad_norm": 1.5696445005943434, "learning_rate": 1.1329108862728192e-05, "loss": 0.2, "step": 5981 }, { "epoch": 0.47391562685680333, "grad_norm": 2.1081620043564055, "learning_rate": 1.1326565553117404e-05, "loss": 0.3016, "step": 5982 }, { "epoch": 0.4739948504654387, "grad_norm": 1.5018030278889556, "learning_rate": 1.1324022156158654e-05, "loss": 0.178, "step": 5983 }, { "epoch": 0.4740740740740741, "grad_norm": 1.5284495533440647, "learning_rate": 1.132147867201942e-05, "loss": 0.2049, "step": 5984 }, { "epoch": 0.4741532976827095, "grad_norm": 1.5992429866247275, "learning_rate": 1.1318935100867172e-05, "loss": 0.2148, "step": 5985 }, { "epoch": 0.4742325212913448, "grad_norm": 1.3763802567318808, "learning_rate": 1.1316391442869394e-05, "loss": 0.1736, "step": 5986 }, { "epoch": 0.4743117448999802, "grad_norm": 1.8334936431812883, "learning_rate": 1.1313847698193577e-05, "loss": 0.2462, "step": 5987 }, { "epoch": 0.47439096850861556, "grad_norm": 1.6878794250562925, "learning_rate": 1.1311303867007207e-05, "loss": 0.2422, "step": 5988 }, { "epoch": 0.47447019211725094, "grad_norm": 1.468401306601615, "learning_rate": 1.1308759949477786e-05, "loss": 0.2444, "step": 5989 }, { "epoch": 0.4745494157258863, "grad_norm": 1.664097701219563, "learning_rate": 1.1306215945772823e-05, "loss": 0.2615, "step": 5990 }, { "epoch": 0.4746286393345217, "grad_norm": 1.6991171924424213, "learning_rate": 1.1303671856059824e-05, "loss": 0.2034, "step": 5991 }, { "epoch": 0.4747078629431571, "grad_norm": 1.7967240057784828, "learning_rate": 1.1301127680506305e-05, "loss": 0.2052, "step": 5992 }, { "epoch": 0.4747870865517924, "grad_norm": 2.017587927012344, "learning_rate": 1.1298583419279792e-05, "loss": 0.2841, "step": 5993 }, { "epoch": 0.4748663101604278, "grad_norm": 1.7319780197345693, "learning_rate": 1.1296039072547804e-05, "loss": 0.2915, "step": 5994 }, { "epoch": 0.4749455337690632, "grad_norm": 1.6398799752152888, "learning_rate": 1.1293494640477885e-05, "loss": 0.2171, "step": 5995 }, { "epoch": 0.47502475737769856, "grad_norm": 1.5886829074291113, "learning_rate": 1.1290950123237564e-05, "loss": 0.2214, "step": 5996 }, { "epoch": 0.47510398098633394, "grad_norm": 1.8176377926816734, "learning_rate": 1.128840552099439e-05, "loss": 0.4143, "step": 5997 }, { "epoch": 0.4751832045949693, "grad_norm": 1.3686792953141944, "learning_rate": 1.1285860833915914e-05, "loss": 0.1308, "step": 5998 }, { "epoch": 0.47526242820360465, "grad_norm": 1.4152549698160461, "learning_rate": 1.1283316062169685e-05, "loss": 0.1647, "step": 5999 }, { "epoch": 0.47534165181224003, "grad_norm": 2.0255083140573436, "learning_rate": 1.1280771205923269e-05, "loss": 0.2668, "step": 6000 }, { "epoch": 0.4754208754208754, "grad_norm": 1.9224526679175966, "learning_rate": 1.1278226265344234e-05, "loss": 0.2819, "step": 6001 }, { "epoch": 0.4755000990295108, "grad_norm": 2.536595853740814, "learning_rate": 1.127568124060015e-05, "loss": 0.3714, "step": 6002 }, { "epoch": 0.4755793226381462, "grad_norm": 1.5779914578874135, "learning_rate": 1.1273136131858595e-05, "loss": 0.1928, "step": 6003 }, { "epoch": 0.47565854624678156, "grad_norm": 1.7261912670840855, "learning_rate": 1.1270590939287149e-05, "loss": 0.2882, "step": 6004 }, { "epoch": 0.47573776985541694, "grad_norm": 1.9174987661928171, "learning_rate": 1.1268045663053404e-05, "loss": 0.2894, "step": 6005 }, { "epoch": 0.47581699346405226, "grad_norm": 1.3406157997038364, "learning_rate": 1.1265500303324954e-05, "loss": 0.0995, "step": 6006 }, { "epoch": 0.47589621707268764, "grad_norm": 1.9482578310256058, "learning_rate": 1.12629548602694e-05, "loss": 0.2356, "step": 6007 }, { "epoch": 0.475975440681323, "grad_norm": 1.6795715555418553, "learning_rate": 1.1260409334054342e-05, "loss": 0.3422, "step": 6008 }, { "epoch": 0.4760546642899584, "grad_norm": 1.741939231866596, "learning_rate": 1.1257863724847398e-05, "loss": 0.1875, "step": 6009 }, { "epoch": 0.4761338878985938, "grad_norm": 1.6760448139876805, "learning_rate": 1.1255318032816175e-05, "loss": 0.2755, "step": 6010 }, { "epoch": 0.47621311150722917, "grad_norm": 1.4716612549658066, "learning_rate": 1.1252772258128303e-05, "loss": 0.2064, "step": 6011 }, { "epoch": 0.47629233511586455, "grad_norm": 1.239437971740309, "learning_rate": 1.1250226400951408e-05, "loss": 0.214, "step": 6012 }, { "epoch": 0.4763715587244999, "grad_norm": 2.3771186436242586, "learning_rate": 1.1247680461453114e-05, "loss": 0.1483, "step": 6013 }, { "epoch": 0.47645078233313526, "grad_norm": 1.5484783692359536, "learning_rate": 1.1245134439801073e-05, "loss": 0.205, "step": 6014 }, { "epoch": 0.47653000594177064, "grad_norm": 1.4540898940185139, "learning_rate": 1.1242588336162916e-05, "loss": 0.2185, "step": 6015 }, { "epoch": 0.476609229550406, "grad_norm": 1.9212478192320468, "learning_rate": 1.1240042150706296e-05, "loss": 0.2741, "step": 6016 }, { "epoch": 0.4766884531590414, "grad_norm": 1.6486229658306872, "learning_rate": 1.1237495883598868e-05, "loss": 0.2884, "step": 6017 }, { "epoch": 0.4767676767676768, "grad_norm": 1.7329416286713635, "learning_rate": 1.1234949535008289e-05, "loss": 0.195, "step": 6018 }, { "epoch": 0.47684690037631217, "grad_norm": 2.0867858501158154, "learning_rate": 1.1232403105102226e-05, "loss": 0.2635, "step": 6019 }, { "epoch": 0.4769261239849475, "grad_norm": 1.7178091288967594, "learning_rate": 1.122985659404835e-05, "loss": 0.2619, "step": 6020 }, { "epoch": 0.4770053475935829, "grad_norm": 1.4308391139353018, "learning_rate": 1.1227310002014332e-05, "loss": 0.1804, "step": 6021 }, { "epoch": 0.47708457120221825, "grad_norm": 2.0982399160570777, "learning_rate": 1.1224763329167859e-05, "loss": 0.2396, "step": 6022 }, { "epoch": 0.47716379481085364, "grad_norm": 1.822052514734417, "learning_rate": 1.122221657567661e-05, "loss": 0.2148, "step": 6023 }, { "epoch": 0.477243018419489, "grad_norm": 1.5489465808995513, "learning_rate": 1.1219669741708282e-05, "loss": 0.1746, "step": 6024 }, { "epoch": 0.4773222420281244, "grad_norm": 1.869029439141864, "learning_rate": 1.121712282743057e-05, "loss": 0.2004, "step": 6025 }, { "epoch": 0.4774014656367598, "grad_norm": 2.5212482111338117, "learning_rate": 1.1214575833011178e-05, "loss": 0.3236, "step": 6026 }, { "epoch": 0.4774806892453951, "grad_norm": 2.152290271707943, "learning_rate": 1.121202875861781e-05, "loss": 0.3529, "step": 6027 }, { "epoch": 0.4775599128540305, "grad_norm": 1.4701241056569854, "learning_rate": 1.1209481604418182e-05, "loss": 0.2313, "step": 6028 }, { "epoch": 0.47763913646266587, "grad_norm": 1.63276959640511, "learning_rate": 1.1206934370580009e-05, "loss": 0.1793, "step": 6029 }, { "epoch": 0.47771836007130125, "grad_norm": 2.013942105110664, "learning_rate": 1.1204387057271016e-05, "loss": 0.3182, "step": 6030 }, { "epoch": 0.47779758367993663, "grad_norm": 1.6732255072232611, "learning_rate": 1.1201839664658929e-05, "loss": 0.2155, "step": 6031 }, { "epoch": 0.477876807288572, "grad_norm": 2.133456711702031, "learning_rate": 1.1199292192911482e-05, "loss": 0.2971, "step": 6032 }, { "epoch": 0.47795603089720734, "grad_norm": 1.7458659587843834, "learning_rate": 1.1196744642196417e-05, "loss": 0.3109, "step": 6033 }, { "epoch": 0.4780352545058427, "grad_norm": 2.1189440187055797, "learning_rate": 1.1194197012681473e-05, "loss": 0.2344, "step": 6034 }, { "epoch": 0.4781144781144781, "grad_norm": 1.4454238642304105, "learning_rate": 1.1191649304534405e-05, "loss": 0.241, "step": 6035 }, { "epoch": 0.4781937017231135, "grad_norm": 1.3915525569937994, "learning_rate": 1.1189101517922961e-05, "loss": 0.2091, "step": 6036 }, { "epoch": 0.47827292533174887, "grad_norm": 1.5003487423239634, "learning_rate": 1.1186553653014906e-05, "loss": 0.2044, "step": 6037 }, { "epoch": 0.47835214894038425, "grad_norm": 1.4418556987633147, "learning_rate": 1.1184005709978002e-05, "loss": 0.2062, "step": 6038 }, { "epoch": 0.47843137254901963, "grad_norm": 1.4113814976968677, "learning_rate": 1.118145768898002e-05, "loss": 0.1691, "step": 6039 }, { "epoch": 0.47851059615765495, "grad_norm": 2.358713378776793, "learning_rate": 1.1178909590188731e-05, "loss": 0.3484, "step": 6040 }, { "epoch": 0.47858981976629034, "grad_norm": 1.7370461053292707, "learning_rate": 1.117636141377192e-05, "loss": 0.2604, "step": 6041 }, { "epoch": 0.4786690433749257, "grad_norm": 1.7287466122509574, "learning_rate": 1.117381315989737e-05, "loss": 0.193, "step": 6042 }, { "epoch": 0.4787482669835611, "grad_norm": 1.7814864273110071, "learning_rate": 1.117126482873287e-05, "loss": 0.2757, "step": 6043 }, { "epoch": 0.4788274905921965, "grad_norm": 1.6614476707629502, "learning_rate": 1.1168716420446219e-05, "loss": 0.2218, "step": 6044 }, { "epoch": 0.47890671420083186, "grad_norm": 1.782252623660112, "learning_rate": 1.1166167935205214e-05, "loss": 0.2748, "step": 6045 }, { "epoch": 0.47898593780946724, "grad_norm": 1.716919730758376, "learning_rate": 1.1163619373177663e-05, "loss": 0.2348, "step": 6046 }, { "epoch": 0.47906516141810257, "grad_norm": 1.6608149656562998, "learning_rate": 1.1161070734531375e-05, "loss": 0.2386, "step": 6047 }, { "epoch": 0.47914438502673795, "grad_norm": 1.6732170941344755, "learning_rate": 1.1158522019434163e-05, "loss": 0.2691, "step": 6048 }, { "epoch": 0.47922360863537333, "grad_norm": 1.849098697206598, "learning_rate": 1.1155973228053854e-05, "loss": 0.25, "step": 6049 }, { "epoch": 0.4793028322440087, "grad_norm": 1.8325764324480083, "learning_rate": 1.1153424360558268e-05, "loss": 0.3329, "step": 6050 }, { "epoch": 0.4793820558526441, "grad_norm": 1.5785672134828717, "learning_rate": 1.115087541711524e-05, "loss": 0.2506, "step": 6051 }, { "epoch": 0.4794612794612795, "grad_norm": 1.8442646354038115, "learning_rate": 1.1148326397892601e-05, "loss": 0.2594, "step": 6052 }, { "epoch": 0.47954050306991486, "grad_norm": 1.667486586375052, "learning_rate": 1.1145777303058197e-05, "loss": 0.2036, "step": 6053 }, { "epoch": 0.4796197266785502, "grad_norm": 1.026187163872373, "learning_rate": 1.1143228132779867e-05, "loss": 0.1148, "step": 6054 }, { "epoch": 0.47969895028718557, "grad_norm": 1.7192609302667536, "learning_rate": 1.1140678887225468e-05, "loss": 0.2826, "step": 6055 }, { "epoch": 0.47977817389582095, "grad_norm": 1.4925997087287428, "learning_rate": 1.1138129566562853e-05, "loss": 0.226, "step": 6056 }, { "epoch": 0.47985739750445633, "grad_norm": 1.6097119124164594, "learning_rate": 1.1135580170959881e-05, "loss": 0.2118, "step": 6057 }, { "epoch": 0.4799366211130917, "grad_norm": 1.950613791592059, "learning_rate": 1.1133030700584419e-05, "loss": 0.26, "step": 6058 }, { "epoch": 0.4800158447217271, "grad_norm": 1.9227954063980182, "learning_rate": 1.1130481155604336e-05, "loss": 0.2271, "step": 6059 }, { "epoch": 0.4800950683303625, "grad_norm": 2.3867979844179055, "learning_rate": 1.1127931536187511e-05, "loss": 0.275, "step": 6060 }, { "epoch": 0.4801742919389978, "grad_norm": 1.678605624774557, "learning_rate": 1.1125381842501819e-05, "loss": 0.2486, "step": 6061 }, { "epoch": 0.4802535155476332, "grad_norm": 2.0743673676492382, "learning_rate": 1.1122832074715149e-05, "loss": 0.2294, "step": 6062 }, { "epoch": 0.48033273915626856, "grad_norm": 2.457692304184814, "learning_rate": 1.1120282232995389e-05, "loss": 0.2926, "step": 6063 }, { "epoch": 0.48041196276490394, "grad_norm": 1.9550436873200134, "learning_rate": 1.1117732317510437e-05, "loss": 0.3064, "step": 6064 }, { "epoch": 0.4804911863735393, "grad_norm": 1.7388410603182607, "learning_rate": 1.111518232842819e-05, "loss": 0.2295, "step": 6065 }, { "epoch": 0.4805704099821747, "grad_norm": 1.915879146878589, "learning_rate": 1.1112632265916548e-05, "loss": 0.2852, "step": 6066 }, { "epoch": 0.4806496335908101, "grad_norm": 1.7567653643174015, "learning_rate": 1.1110082130143427e-05, "loss": 0.2075, "step": 6067 }, { "epoch": 0.4807288571994454, "grad_norm": 2.1064041973573646, "learning_rate": 1.1107531921276742e-05, "loss": 0.2111, "step": 6068 }, { "epoch": 0.4808080808080808, "grad_norm": 1.527472576445534, "learning_rate": 1.1104981639484404e-05, "loss": 0.1828, "step": 6069 }, { "epoch": 0.4808873044167162, "grad_norm": 1.589179987319218, "learning_rate": 1.1102431284934345e-05, "loss": 0.1624, "step": 6070 }, { "epoch": 0.48096652802535156, "grad_norm": 2.2218854768320124, "learning_rate": 1.1099880857794491e-05, "loss": 0.3398, "step": 6071 }, { "epoch": 0.48104575163398694, "grad_norm": 2.1902785408532455, "learning_rate": 1.1097330358232775e-05, "loss": 0.3716, "step": 6072 }, { "epoch": 0.4811249752426223, "grad_norm": 1.314182865750346, "learning_rate": 1.1094779786417133e-05, "loss": 0.1787, "step": 6073 }, { "epoch": 0.48120419885125765, "grad_norm": 1.7772959445541636, "learning_rate": 1.1092229142515512e-05, "loss": 0.2461, "step": 6074 }, { "epoch": 0.48128342245989303, "grad_norm": 1.6795739735856607, "learning_rate": 1.1089678426695854e-05, "loss": 0.227, "step": 6075 }, { "epoch": 0.4813626460685284, "grad_norm": 1.400188539829857, "learning_rate": 1.1087127639126118e-05, "loss": 0.1749, "step": 6076 }, { "epoch": 0.4814418696771638, "grad_norm": 1.1970582619896746, "learning_rate": 1.1084576779974257e-05, "loss": 0.1604, "step": 6077 }, { "epoch": 0.4815210932857992, "grad_norm": 1.5901052988698938, "learning_rate": 1.1082025849408231e-05, "loss": 0.2058, "step": 6078 }, { "epoch": 0.48160031689443455, "grad_norm": 1.6262617370031065, "learning_rate": 1.1079474847596014e-05, "loss": 0.3286, "step": 6079 }, { "epoch": 0.48167954050306994, "grad_norm": 2.1410384017366617, "learning_rate": 1.1076923774705568e-05, "loss": 0.2211, "step": 6080 }, { "epoch": 0.48175876411170526, "grad_norm": 1.4889292409739858, "learning_rate": 1.1074372630904878e-05, "loss": 0.2556, "step": 6081 }, { "epoch": 0.48183798772034064, "grad_norm": 1.4150729657725276, "learning_rate": 1.1071821416361917e-05, "loss": 0.1733, "step": 6082 }, { "epoch": 0.481917211328976, "grad_norm": 1.3223431311529998, "learning_rate": 1.106927013124467e-05, "loss": 0.1653, "step": 6083 }, { "epoch": 0.4819964349376114, "grad_norm": 1.5384518921365442, "learning_rate": 1.1066718775721135e-05, "loss": 0.1959, "step": 6084 }, { "epoch": 0.4820756585462468, "grad_norm": 1.554693220212927, "learning_rate": 1.1064167349959299e-05, "loss": 0.1866, "step": 6085 }, { "epoch": 0.48215488215488217, "grad_norm": 1.5100450086196078, "learning_rate": 1.1061615854127165e-05, "loss": 0.1854, "step": 6086 }, { "epoch": 0.48223410576351755, "grad_norm": 1.704183664910275, "learning_rate": 1.1059064288392733e-05, "loss": 0.2573, "step": 6087 }, { "epoch": 0.4823133293721529, "grad_norm": 1.5950789238437713, "learning_rate": 1.1056512652924014e-05, "loss": 0.1999, "step": 6088 }, { "epoch": 0.48239255298078826, "grad_norm": 1.470918572830471, "learning_rate": 1.1053960947889021e-05, "loss": 0.2014, "step": 6089 }, { "epoch": 0.48247177658942364, "grad_norm": 1.926235075911013, "learning_rate": 1.1051409173455771e-05, "loss": 0.2001, "step": 6090 }, { "epoch": 0.482551000198059, "grad_norm": 1.6414267216068597, "learning_rate": 1.1048857329792284e-05, "loss": 0.1506, "step": 6091 }, { "epoch": 0.4826302238066944, "grad_norm": 1.9329758126442254, "learning_rate": 1.1046305417066594e-05, "loss": 0.2904, "step": 6092 }, { "epoch": 0.4827094474153298, "grad_norm": 2.0943448014296564, "learning_rate": 1.1043753435446722e-05, "loss": 0.3623, "step": 6093 }, { "epoch": 0.48278867102396517, "grad_norm": 1.5800268648588092, "learning_rate": 1.104120138510071e-05, "loss": 0.2229, "step": 6094 }, { "epoch": 0.4828678946326005, "grad_norm": 1.6608987084190456, "learning_rate": 1.1038649266196597e-05, "loss": 0.2665, "step": 6095 }, { "epoch": 0.4829471182412359, "grad_norm": 1.5531043338342743, "learning_rate": 1.1036097078902428e-05, "loss": 0.2502, "step": 6096 }, { "epoch": 0.48302634184987125, "grad_norm": 1.655272962009836, "learning_rate": 1.1033544823386248e-05, "loss": 0.2664, "step": 6097 }, { "epoch": 0.48310556545850664, "grad_norm": 1.7081587714660573, "learning_rate": 1.103099249981612e-05, "loss": 0.1943, "step": 6098 }, { "epoch": 0.483184789067142, "grad_norm": 1.460811831054638, "learning_rate": 1.1028440108360092e-05, "loss": 0.2384, "step": 6099 }, { "epoch": 0.4832640126757774, "grad_norm": 2.474806249133684, "learning_rate": 1.1025887649186236e-05, "loss": 0.2603, "step": 6100 }, { "epoch": 0.4833432362844128, "grad_norm": 1.5206067052244827, "learning_rate": 1.1023335122462611e-05, "loss": 0.2223, "step": 6101 }, { "epoch": 0.4834224598930481, "grad_norm": 1.588879697676813, "learning_rate": 1.102078252835729e-05, "loss": 0.2085, "step": 6102 }, { "epoch": 0.4835016835016835, "grad_norm": 1.8732091504836876, "learning_rate": 1.1018229867038358e-05, "loss": 0.2595, "step": 6103 }, { "epoch": 0.48358090711031887, "grad_norm": 2.0376741281047543, "learning_rate": 1.1015677138673882e-05, "loss": 0.1894, "step": 6104 }, { "epoch": 0.48366013071895425, "grad_norm": 1.6908396734478435, "learning_rate": 1.1013124343431955e-05, "loss": 0.2631, "step": 6105 }, { "epoch": 0.48373935432758963, "grad_norm": 1.6279099511622805, "learning_rate": 1.1010571481480668e-05, "loss": 0.2899, "step": 6106 }, { "epoch": 0.483818577936225, "grad_norm": 1.549894099346484, "learning_rate": 1.1008018552988109e-05, "loss": 0.188, "step": 6107 }, { "epoch": 0.4838978015448604, "grad_norm": 1.6707462565369033, "learning_rate": 1.1005465558122382e-05, "loss": 0.2479, "step": 6108 }, { "epoch": 0.4839770251534957, "grad_norm": 1.6452858939134958, "learning_rate": 1.1002912497051582e-05, "loss": 0.1465, "step": 6109 }, { "epoch": 0.4840562487621311, "grad_norm": 1.6808560749976296, "learning_rate": 1.1000359369943818e-05, "loss": 0.192, "step": 6110 }, { "epoch": 0.4841354723707665, "grad_norm": 1.8481182705182186, "learning_rate": 1.099780617696721e-05, "loss": 0.2779, "step": 6111 }, { "epoch": 0.48421469597940187, "grad_norm": 1.573521682443844, "learning_rate": 1.099525291828986e-05, "loss": 0.1774, "step": 6112 }, { "epoch": 0.48429391958803725, "grad_norm": 1.425349550144392, "learning_rate": 1.0992699594079896e-05, "loss": 0.2797, "step": 6113 }, { "epoch": 0.48437314319667263, "grad_norm": 2.009487270160793, "learning_rate": 1.0990146204505444e-05, "loss": 0.261, "step": 6114 }, { "epoch": 0.48445236680530795, "grad_norm": 2.1488999585702393, "learning_rate": 1.0987592749734624e-05, "loss": 0.2596, "step": 6115 }, { "epoch": 0.48453159041394334, "grad_norm": 2.3107121820592575, "learning_rate": 1.0985039229935575e-05, "loss": 0.2907, "step": 6116 }, { "epoch": 0.4846108140225787, "grad_norm": 1.746708747349439, "learning_rate": 1.098248564527643e-05, "loss": 0.2346, "step": 6117 }, { "epoch": 0.4846900376312141, "grad_norm": 1.3520786326928806, "learning_rate": 1.0979931995925335e-05, "loss": 0.163, "step": 6118 }, { "epoch": 0.4847692612398495, "grad_norm": 1.703245748902048, "learning_rate": 1.0977378282050436e-05, "loss": 0.2197, "step": 6119 }, { "epoch": 0.48484848484848486, "grad_norm": 1.843487097522145, "learning_rate": 1.0974824503819877e-05, "loss": 0.2457, "step": 6120 }, { "epoch": 0.48492770845712024, "grad_norm": 1.709515263640577, "learning_rate": 1.0972270661401812e-05, "loss": 0.2491, "step": 6121 }, { "epoch": 0.48500693206575557, "grad_norm": 1.7716834428574988, "learning_rate": 1.0969716754964408e-05, "loss": 0.2244, "step": 6122 }, { "epoch": 0.48508615567439095, "grad_norm": 1.8040739729171662, "learning_rate": 1.0967162784675818e-05, "loss": 0.1685, "step": 6123 }, { "epoch": 0.48516537928302633, "grad_norm": 1.6617700510457143, "learning_rate": 1.0964608750704215e-05, "loss": 0.2344, "step": 6124 }, { "epoch": 0.4852446028916617, "grad_norm": 1.4949449581981267, "learning_rate": 1.0962054653217764e-05, "loss": 0.2284, "step": 6125 }, { "epoch": 0.4853238265002971, "grad_norm": 1.5779260366131818, "learning_rate": 1.0959500492384646e-05, "loss": 0.2089, "step": 6126 }, { "epoch": 0.4854030501089325, "grad_norm": 1.5850483205953467, "learning_rate": 1.0956946268373034e-05, "loss": 0.2093, "step": 6127 }, { "epoch": 0.48548227371756786, "grad_norm": 1.6595133946745808, "learning_rate": 1.0954391981351117e-05, "loss": 0.214, "step": 6128 }, { "epoch": 0.4855614973262032, "grad_norm": 1.300716622134671, "learning_rate": 1.0951837631487081e-05, "loss": 0.139, "step": 6129 }, { "epoch": 0.48564072093483857, "grad_norm": 1.7663575799646405, "learning_rate": 1.0949283218949117e-05, "loss": 0.3728, "step": 6130 }, { "epoch": 0.48571994454347395, "grad_norm": 1.5893907586242662, "learning_rate": 1.094672874390542e-05, "loss": 0.2762, "step": 6131 }, { "epoch": 0.48579916815210933, "grad_norm": 1.5263839071998777, "learning_rate": 1.094417420652419e-05, "loss": 0.264, "step": 6132 }, { "epoch": 0.4858783917607447, "grad_norm": 1.7614765577610025, "learning_rate": 1.0941619606973633e-05, "loss": 0.2399, "step": 6133 }, { "epoch": 0.4859576153693801, "grad_norm": 1.3299154787209122, "learning_rate": 1.0939064945421953e-05, "loss": 0.2019, "step": 6134 }, { "epoch": 0.4860368389780155, "grad_norm": 1.69326226197989, "learning_rate": 1.0936510222037368e-05, "loss": 0.2284, "step": 6135 }, { "epoch": 0.4861160625866508, "grad_norm": 1.3867158307142968, "learning_rate": 1.0933955436988088e-05, "loss": 0.1856, "step": 6136 }, { "epoch": 0.4861952861952862, "grad_norm": 2.09728385427811, "learning_rate": 1.0931400590442337e-05, "loss": 0.2487, "step": 6137 }, { "epoch": 0.48627450980392156, "grad_norm": 2.4672236442999984, "learning_rate": 1.0928845682568344e-05, "loss": 0.3023, "step": 6138 }, { "epoch": 0.48635373341255694, "grad_norm": 1.3444689656720372, "learning_rate": 1.0926290713534324e-05, "loss": 0.2568, "step": 6139 }, { "epoch": 0.4864329570211923, "grad_norm": 1.778188754536545, "learning_rate": 1.0923735683508521e-05, "loss": 0.217, "step": 6140 }, { "epoch": 0.4865121806298277, "grad_norm": 1.780770270725503, "learning_rate": 1.092118059265917e-05, "loss": 0.2628, "step": 6141 }, { "epoch": 0.4865914042384631, "grad_norm": 1.4176869598579112, "learning_rate": 1.0918625441154508e-05, "loss": 0.1981, "step": 6142 }, { "epoch": 0.4866706278470984, "grad_norm": 1.9258852800950363, "learning_rate": 1.091607022916278e-05, "loss": 0.3447, "step": 6143 }, { "epoch": 0.4867498514557338, "grad_norm": 1.3135352212781661, "learning_rate": 1.0913514956852236e-05, "loss": 0.1858, "step": 6144 }, { "epoch": 0.4868290750643692, "grad_norm": 1.5677065770661087, "learning_rate": 1.0910959624391127e-05, "loss": 0.2227, "step": 6145 }, { "epoch": 0.48690829867300456, "grad_norm": 1.9099063126692972, "learning_rate": 1.090840423194771e-05, "loss": 0.2988, "step": 6146 }, { "epoch": 0.48698752228163994, "grad_norm": 1.6363094178026687, "learning_rate": 1.0905848779690246e-05, "loss": 0.2852, "step": 6147 }, { "epoch": 0.4870667458902753, "grad_norm": 1.4844642554778813, "learning_rate": 1.0903293267786998e-05, "loss": 0.1707, "step": 6148 }, { "epoch": 0.4871459694989107, "grad_norm": 1.8019475876848121, "learning_rate": 1.0900737696406235e-05, "loss": 0.2145, "step": 6149 }, { "epoch": 0.48722519310754603, "grad_norm": 1.8317413813612966, "learning_rate": 1.0898182065716227e-05, "loss": 0.2077, "step": 6150 }, { "epoch": 0.4873044167161814, "grad_norm": 1.5146055599726223, "learning_rate": 1.0895626375885255e-05, "loss": 0.2438, "step": 6151 }, { "epoch": 0.4873836403248168, "grad_norm": 1.5366255100365689, "learning_rate": 1.0893070627081595e-05, "loss": 0.1401, "step": 6152 }, { "epoch": 0.4874628639334522, "grad_norm": 1.777147234266726, "learning_rate": 1.089051481947353e-05, "loss": 0.2652, "step": 6153 }, { "epoch": 0.48754208754208755, "grad_norm": 1.8967143317189588, "learning_rate": 1.0887958953229349e-05, "loss": 0.2517, "step": 6154 }, { "epoch": 0.48762131115072294, "grad_norm": 1.1405015618207417, "learning_rate": 1.0885403028517345e-05, "loss": 0.1332, "step": 6155 }, { "epoch": 0.48770053475935826, "grad_norm": 1.6299641167288337, "learning_rate": 1.0882847045505809e-05, "loss": 0.3274, "step": 6156 }, { "epoch": 0.48777975836799364, "grad_norm": 1.4423745116920934, "learning_rate": 1.0880291004363047e-05, "loss": 0.223, "step": 6157 }, { "epoch": 0.487858981976629, "grad_norm": 1.6379693450780084, "learning_rate": 1.0877734905257354e-05, "loss": 0.1759, "step": 6158 }, { "epoch": 0.4879382055852644, "grad_norm": 1.2827861539072318, "learning_rate": 1.0875178748357045e-05, "loss": 0.1468, "step": 6159 }, { "epoch": 0.4880174291938998, "grad_norm": 1.593309498979362, "learning_rate": 1.0872622533830423e-05, "loss": 0.2166, "step": 6160 }, { "epoch": 0.48809665280253517, "grad_norm": 1.6965804035508694, "learning_rate": 1.0870066261845807e-05, "loss": 0.2574, "step": 6161 }, { "epoch": 0.48817587641117055, "grad_norm": 1.4087665406499363, "learning_rate": 1.0867509932571517e-05, "loss": 0.2055, "step": 6162 }, { "epoch": 0.4882551000198059, "grad_norm": 1.6566225045751006, "learning_rate": 1.0864953546175867e-05, "loss": 0.2288, "step": 6163 }, { "epoch": 0.48833432362844126, "grad_norm": 2.0026294389901156, "learning_rate": 1.0862397102827189e-05, "loss": 0.3402, "step": 6164 }, { "epoch": 0.48841354723707664, "grad_norm": 1.4476077521092667, "learning_rate": 1.0859840602693813e-05, "loss": 0.2276, "step": 6165 }, { "epoch": 0.488492770845712, "grad_norm": 1.327480250661797, "learning_rate": 1.0857284045944071e-05, "loss": 0.1654, "step": 6166 }, { "epoch": 0.4885719944543474, "grad_norm": 1.4607350372368566, "learning_rate": 1.0854727432746302e-05, "loss": 0.2383, "step": 6167 }, { "epoch": 0.4886512180629828, "grad_norm": 1.5882571358781843, "learning_rate": 1.0852170763268838e-05, "loss": 0.1409, "step": 6168 }, { "epoch": 0.48873044167161817, "grad_norm": 1.6425249451096904, "learning_rate": 1.0849614037680032e-05, "loss": 0.2884, "step": 6169 }, { "epoch": 0.4888096652802535, "grad_norm": 1.7509850350459937, "learning_rate": 1.0847057256148234e-05, "loss": 0.2006, "step": 6170 }, { "epoch": 0.4888888888888889, "grad_norm": 1.3753872618805856, "learning_rate": 1.0844500418841788e-05, "loss": 0.2015, "step": 6171 }, { "epoch": 0.48896811249752425, "grad_norm": 2.269210263720901, "learning_rate": 1.0841943525929053e-05, "loss": 0.3382, "step": 6172 }, { "epoch": 0.48904733610615964, "grad_norm": 1.7117425630790397, "learning_rate": 1.0839386577578389e-05, "loss": 0.2681, "step": 6173 }, { "epoch": 0.489126559714795, "grad_norm": 1.5168418983762082, "learning_rate": 1.0836829573958155e-05, "loss": 0.1915, "step": 6174 }, { "epoch": 0.4892057833234304, "grad_norm": 1.5214665522613349, "learning_rate": 1.083427251523672e-05, "loss": 0.2751, "step": 6175 }, { "epoch": 0.4892850069320658, "grad_norm": 1.823119457155279, "learning_rate": 1.0831715401582458e-05, "loss": 0.193, "step": 6176 }, { "epoch": 0.4893642305407011, "grad_norm": 1.4058285661576775, "learning_rate": 1.0829158233163737e-05, "loss": 0.204, "step": 6177 }, { "epoch": 0.4894434541493365, "grad_norm": 1.6781061166192746, "learning_rate": 1.0826601010148935e-05, "loss": 0.3637, "step": 6178 }, { "epoch": 0.48952267775797187, "grad_norm": 1.822493057629355, "learning_rate": 1.0824043732706435e-05, "loss": 0.2831, "step": 6179 }, { "epoch": 0.48960190136660725, "grad_norm": 1.827192957488779, "learning_rate": 1.0821486401004618e-05, "loss": 0.2764, "step": 6180 }, { "epoch": 0.48968112497524263, "grad_norm": 1.3201956023986317, "learning_rate": 1.0818929015211877e-05, "loss": 0.2089, "step": 6181 }, { "epoch": 0.489760348583878, "grad_norm": 1.591364500543472, "learning_rate": 1.0816371575496598e-05, "loss": 0.1856, "step": 6182 }, { "epoch": 0.4898395721925134, "grad_norm": 1.5548064694852972, "learning_rate": 1.081381408202718e-05, "loss": 0.2305, "step": 6183 }, { "epoch": 0.4899187958011487, "grad_norm": 1.8984068867259387, "learning_rate": 1.0811256534972024e-05, "loss": 0.2974, "step": 6184 }, { "epoch": 0.4899980194097841, "grad_norm": 1.6690255863176764, "learning_rate": 1.0808698934499524e-05, "loss": 0.2095, "step": 6185 }, { "epoch": 0.4900772430184195, "grad_norm": 1.6384072389275626, "learning_rate": 1.0806141280778093e-05, "loss": 0.2383, "step": 6186 }, { "epoch": 0.49015646662705487, "grad_norm": 1.3964898991375696, "learning_rate": 1.0803583573976137e-05, "loss": 0.1816, "step": 6187 }, { "epoch": 0.49023569023569025, "grad_norm": 1.4869059356215997, "learning_rate": 1.0801025814262068e-05, "loss": 0.1526, "step": 6188 }, { "epoch": 0.49031491384432563, "grad_norm": 1.6555208196856617, "learning_rate": 1.0798468001804305e-05, "loss": 0.2435, "step": 6189 }, { "epoch": 0.490394137452961, "grad_norm": 1.707388444778532, "learning_rate": 1.0795910136771266e-05, "loss": 0.2595, "step": 6190 }, { "epoch": 0.49047336106159634, "grad_norm": 1.7413373290567598, "learning_rate": 1.0793352219331371e-05, "loss": 0.2422, "step": 6191 }, { "epoch": 0.4905525846702317, "grad_norm": 1.6546228157343745, "learning_rate": 1.0790794249653056e-05, "loss": 0.2713, "step": 6192 }, { "epoch": 0.4906318082788671, "grad_norm": 2.086706856315569, "learning_rate": 1.0788236227904738e-05, "loss": 0.2364, "step": 6193 }, { "epoch": 0.4907110318875025, "grad_norm": 1.234681659125135, "learning_rate": 1.0785678154254865e-05, "loss": 0.1477, "step": 6194 }, { "epoch": 0.49079025549613786, "grad_norm": 1.6177756384041073, "learning_rate": 1.0783120028871858e-05, "loss": 0.1776, "step": 6195 }, { "epoch": 0.49086947910477324, "grad_norm": 1.3683553128734272, "learning_rate": 1.0780561851924168e-05, "loss": 0.1628, "step": 6196 }, { "epoch": 0.49094870271340857, "grad_norm": 2.4870664651019707, "learning_rate": 1.0778003623580237e-05, "loss": 0.2969, "step": 6197 }, { "epoch": 0.49102792632204395, "grad_norm": 1.8429099995761549, "learning_rate": 1.077544534400851e-05, "loss": 0.2589, "step": 6198 }, { "epoch": 0.49110714993067933, "grad_norm": 1.8614813397662913, "learning_rate": 1.0772887013377438e-05, "loss": 0.3398, "step": 6199 }, { "epoch": 0.4911863735393147, "grad_norm": 1.8831971955690425, "learning_rate": 1.0770328631855476e-05, "loss": 0.3528, "step": 6200 }, { "epoch": 0.4912655971479501, "grad_norm": 1.8056050666799466, "learning_rate": 1.0767770199611078e-05, "loss": 0.2457, "step": 6201 }, { "epoch": 0.4913448207565855, "grad_norm": 1.3202956840548274, "learning_rate": 1.076521171681271e-05, "loss": 0.1962, "step": 6202 }, { "epoch": 0.49142404436522086, "grad_norm": 1.6439221517095415, "learning_rate": 1.0762653183628831e-05, "loss": 0.2307, "step": 6203 }, { "epoch": 0.4915032679738562, "grad_norm": 1.6337769615802755, "learning_rate": 1.0760094600227908e-05, "loss": 0.3183, "step": 6204 }, { "epoch": 0.49158249158249157, "grad_norm": 1.5978413299762448, "learning_rate": 1.0757535966778416e-05, "loss": 0.1749, "step": 6205 }, { "epoch": 0.49166171519112695, "grad_norm": 1.8764726026369707, "learning_rate": 1.0754977283448824e-05, "loss": 0.2052, "step": 6206 }, { "epoch": 0.49174093879976233, "grad_norm": 1.9308170808395688, "learning_rate": 1.0752418550407611e-05, "loss": 0.365, "step": 6207 }, { "epoch": 0.4918201624083977, "grad_norm": 1.7649882877851126, "learning_rate": 1.0749859767823256e-05, "loss": 0.3075, "step": 6208 }, { "epoch": 0.4918993860170331, "grad_norm": 1.5342336819764126, "learning_rate": 1.0747300935864245e-05, "loss": 0.2787, "step": 6209 }, { "epoch": 0.4919786096256685, "grad_norm": 1.6878091554591756, "learning_rate": 1.074474205469906e-05, "loss": 0.2376, "step": 6210 }, { "epoch": 0.4920578332343038, "grad_norm": 1.74027552563882, "learning_rate": 1.0742183124496197e-05, "loss": 0.2972, "step": 6211 }, { "epoch": 0.4921370568429392, "grad_norm": 1.8617598576763665, "learning_rate": 1.0739624145424146e-05, "loss": 0.2453, "step": 6212 }, { "epoch": 0.49221628045157456, "grad_norm": 2.043202923732088, "learning_rate": 1.0737065117651404e-05, "loss": 0.2416, "step": 6213 }, { "epoch": 0.49229550406020994, "grad_norm": 1.566968725000622, "learning_rate": 1.0734506041346468e-05, "loss": 0.2192, "step": 6214 }, { "epoch": 0.4923747276688453, "grad_norm": 1.9960030115157306, "learning_rate": 1.0731946916677847e-05, "loss": 0.3274, "step": 6215 }, { "epoch": 0.4924539512774807, "grad_norm": 1.5474557160628075, "learning_rate": 1.0729387743814041e-05, "loss": 0.1648, "step": 6216 }, { "epoch": 0.4925331748861161, "grad_norm": 1.8497404877238057, "learning_rate": 1.0726828522923563e-05, "loss": 0.2269, "step": 6217 }, { "epoch": 0.4926123984947514, "grad_norm": 1.4643826219838463, "learning_rate": 1.0724269254174921e-05, "loss": 0.2405, "step": 6218 }, { "epoch": 0.4926916221033868, "grad_norm": 1.917819643524853, "learning_rate": 1.0721709937736638e-05, "loss": 0.3066, "step": 6219 }, { "epoch": 0.4927708457120222, "grad_norm": 1.5417699427322982, "learning_rate": 1.0719150573777226e-05, "loss": 0.1755, "step": 6220 }, { "epoch": 0.49285006932065756, "grad_norm": 1.7728099455613477, "learning_rate": 1.071659116246521e-05, "loss": 0.2175, "step": 6221 }, { "epoch": 0.49292929292929294, "grad_norm": 1.8008396342929096, "learning_rate": 1.0714031703969112e-05, "loss": 0.2176, "step": 6222 }, { "epoch": 0.4930085165379283, "grad_norm": 1.736444090475803, "learning_rate": 1.0711472198457462e-05, "loss": 0.2584, "step": 6223 }, { "epoch": 0.4930877401465637, "grad_norm": 2.22861107242774, "learning_rate": 1.0708912646098795e-05, "loss": 0.2427, "step": 6224 }, { "epoch": 0.49316696375519903, "grad_norm": 1.8247391087058051, "learning_rate": 1.0706353047061638e-05, "loss": 0.2421, "step": 6225 }, { "epoch": 0.4932461873638344, "grad_norm": 1.8661070820992816, "learning_rate": 1.070379340151453e-05, "loss": 0.229, "step": 6226 }, { "epoch": 0.4933254109724698, "grad_norm": 1.3987662440318664, "learning_rate": 1.0701233709626018e-05, "loss": 0.1682, "step": 6227 }, { "epoch": 0.4934046345811052, "grad_norm": 1.5510543220236561, "learning_rate": 1.0698673971564637e-05, "loss": 0.1478, "step": 6228 }, { "epoch": 0.49348385818974055, "grad_norm": 1.6480913304124614, "learning_rate": 1.0696114187498938e-05, "loss": 0.2309, "step": 6229 }, { "epoch": 0.49356308179837594, "grad_norm": 1.5842404649955728, "learning_rate": 1.0693554357597469e-05, "loss": 0.205, "step": 6230 }, { "epoch": 0.4936423054070113, "grad_norm": 2.406755385993806, "learning_rate": 1.069099448202878e-05, "loss": 0.1801, "step": 6231 }, { "epoch": 0.49372152901564664, "grad_norm": 1.9988435795022914, "learning_rate": 1.0688434560961434e-05, "loss": 0.2474, "step": 6232 }, { "epoch": 0.493800752624282, "grad_norm": 1.950887030012949, "learning_rate": 1.068587459456398e-05, "loss": 0.2217, "step": 6233 }, { "epoch": 0.4938799762329174, "grad_norm": 1.6089132951068137, "learning_rate": 1.0683314583004986e-05, "loss": 0.1949, "step": 6234 }, { "epoch": 0.4939591998415528, "grad_norm": 1.8548905958510753, "learning_rate": 1.0680754526453017e-05, "loss": 0.2357, "step": 6235 }, { "epoch": 0.49403842345018817, "grad_norm": 1.943167464036002, "learning_rate": 1.0678194425076633e-05, "loss": 0.3554, "step": 6236 }, { "epoch": 0.49411764705882355, "grad_norm": 1.700182530393419, "learning_rate": 1.0675634279044416e-05, "loss": 0.189, "step": 6237 }, { "epoch": 0.4941968706674589, "grad_norm": 1.6672715305641543, "learning_rate": 1.0673074088524926e-05, "loss": 0.2305, "step": 6238 }, { "epoch": 0.49427609427609426, "grad_norm": 1.7138439404596937, "learning_rate": 1.067051385368675e-05, "loss": 0.1855, "step": 6239 }, { "epoch": 0.49435531788472964, "grad_norm": 2.02775499115976, "learning_rate": 1.0667953574698461e-05, "loss": 0.2236, "step": 6240 }, { "epoch": 0.494434541493365, "grad_norm": 1.520874435187763, "learning_rate": 1.0665393251728645e-05, "loss": 0.1385, "step": 6241 }, { "epoch": 0.4945137651020004, "grad_norm": 1.274025128194307, "learning_rate": 1.0662832884945884e-05, "loss": 0.1908, "step": 6242 }, { "epoch": 0.4945929887106358, "grad_norm": 1.2739953099257084, "learning_rate": 1.0660272474518767e-05, "loss": 0.1984, "step": 6243 }, { "epoch": 0.49467221231927117, "grad_norm": 1.7728928823269645, "learning_rate": 1.0657712020615885e-05, "loss": 0.2525, "step": 6244 }, { "epoch": 0.4947514359279065, "grad_norm": 1.7285672362809472, "learning_rate": 1.0655151523405831e-05, "loss": 0.2871, "step": 6245 }, { "epoch": 0.4948306595365419, "grad_norm": 1.3269595406192067, "learning_rate": 1.06525909830572e-05, "loss": 0.1822, "step": 6246 }, { "epoch": 0.49490988314517725, "grad_norm": 1.300431502959002, "learning_rate": 1.0650030399738594e-05, "loss": 0.1724, "step": 6247 }, { "epoch": 0.49498910675381264, "grad_norm": 1.2556205744629074, "learning_rate": 1.0647469773618617e-05, "loss": 0.1525, "step": 6248 }, { "epoch": 0.495068330362448, "grad_norm": 1.4014797610476046, "learning_rate": 1.0644909104865869e-05, "loss": 0.2244, "step": 6249 }, { "epoch": 0.4951475539710834, "grad_norm": 1.8979961708367814, "learning_rate": 1.0642348393648956e-05, "loss": 0.2852, "step": 6250 }, { "epoch": 0.4952267775797188, "grad_norm": 1.6449565023773456, "learning_rate": 1.0639787640136497e-05, "loss": 0.2212, "step": 6251 }, { "epoch": 0.4953060011883541, "grad_norm": 1.6999320369453943, "learning_rate": 1.0637226844497096e-05, "loss": 0.2247, "step": 6252 }, { "epoch": 0.4953852247969895, "grad_norm": 1.581359536928781, "learning_rate": 1.0634666006899375e-05, "loss": 0.1958, "step": 6253 }, { "epoch": 0.49546444840562487, "grad_norm": 1.4700221070014095, "learning_rate": 1.0632105127511952e-05, "loss": 0.1691, "step": 6254 }, { "epoch": 0.49554367201426025, "grad_norm": 1.7447688111101973, "learning_rate": 1.0629544206503445e-05, "loss": 0.3034, "step": 6255 }, { "epoch": 0.49562289562289563, "grad_norm": 2.0028160642807524, "learning_rate": 1.0626983244042486e-05, "loss": 0.1967, "step": 6256 }, { "epoch": 0.495702119231531, "grad_norm": 1.9759915357404259, "learning_rate": 1.0624422240297694e-05, "loss": 0.3106, "step": 6257 }, { "epoch": 0.4957813428401664, "grad_norm": 2.2473552691511673, "learning_rate": 1.0621861195437703e-05, "loss": 0.2636, "step": 6258 }, { "epoch": 0.4958605664488017, "grad_norm": 1.7080124839243644, "learning_rate": 1.0619300109631146e-05, "loss": 0.2223, "step": 6259 }, { "epoch": 0.4959397900574371, "grad_norm": 1.9493290404083532, "learning_rate": 1.0616738983046652e-05, "loss": 0.2912, "step": 6260 }, { "epoch": 0.4960190136660725, "grad_norm": 1.774180884373201, "learning_rate": 1.0614177815852866e-05, "loss": 0.2662, "step": 6261 }, { "epoch": 0.49609823727470787, "grad_norm": 2.104038298922806, "learning_rate": 1.0611616608218429e-05, "loss": 0.2715, "step": 6262 }, { "epoch": 0.49617746088334325, "grad_norm": 2.0133323885762953, "learning_rate": 1.0609055360311978e-05, "loss": 0.291, "step": 6263 }, { "epoch": 0.49625668449197863, "grad_norm": 1.6995673928053876, "learning_rate": 1.0606494072302164e-05, "loss": 0.1957, "step": 6264 }, { "epoch": 0.496335908100614, "grad_norm": 1.4371850695635577, "learning_rate": 1.0603932744357632e-05, "loss": 0.243, "step": 6265 }, { "epoch": 0.49641513170924934, "grad_norm": 1.5461293396673728, "learning_rate": 1.0601371376647034e-05, "loss": 0.2754, "step": 6266 }, { "epoch": 0.4964943553178847, "grad_norm": 2.272889394052342, "learning_rate": 1.0598809969339028e-05, "loss": 0.389, "step": 6267 }, { "epoch": 0.4965735789265201, "grad_norm": 1.9938155741621413, "learning_rate": 1.0596248522602264e-05, "loss": 0.2671, "step": 6268 }, { "epoch": 0.4966528025351555, "grad_norm": 1.5923380031549277, "learning_rate": 1.0593687036605402e-05, "loss": 0.161, "step": 6269 }, { "epoch": 0.49673202614379086, "grad_norm": 1.5391093068331152, "learning_rate": 1.0591125511517108e-05, "loss": 0.1889, "step": 6270 }, { "epoch": 0.49681124975242624, "grad_norm": 1.6143157315481598, "learning_rate": 1.0588563947506043e-05, "loss": 0.2365, "step": 6271 }, { "epoch": 0.49689047336106157, "grad_norm": 1.6028930820603855, "learning_rate": 1.0586002344740875e-05, "loss": 0.2258, "step": 6272 }, { "epoch": 0.49696969696969695, "grad_norm": 1.8658451845775084, "learning_rate": 1.0583440703390271e-05, "loss": 0.2944, "step": 6273 }, { "epoch": 0.49704892057833233, "grad_norm": 1.8139205198244914, "learning_rate": 1.0580879023622903e-05, "loss": 0.2128, "step": 6274 }, { "epoch": 0.4971281441869677, "grad_norm": 1.6876639323764948, "learning_rate": 1.0578317305607451e-05, "loss": 0.2267, "step": 6275 }, { "epoch": 0.4972073677956031, "grad_norm": 1.6145051881163168, "learning_rate": 1.057575554951258e-05, "loss": 0.2353, "step": 6276 }, { "epoch": 0.4972865914042385, "grad_norm": 1.541671363012306, "learning_rate": 1.0573193755506982e-05, "loss": 0.1943, "step": 6277 }, { "epoch": 0.49736581501287386, "grad_norm": 1.7805912243183721, "learning_rate": 1.0570631923759331e-05, "loss": 0.2255, "step": 6278 }, { "epoch": 0.4974450386215092, "grad_norm": 1.6582149202006344, "learning_rate": 1.0568070054438314e-05, "loss": 0.2408, "step": 6279 }, { "epoch": 0.49752426223014456, "grad_norm": 1.542038882128615, "learning_rate": 1.0565508147712618e-05, "loss": 0.2329, "step": 6280 }, { "epoch": 0.49760348583877995, "grad_norm": 1.9793328516101398, "learning_rate": 1.056294620375093e-05, "loss": 0.3322, "step": 6281 }, { "epoch": 0.49768270944741533, "grad_norm": 2.1944197763631634, "learning_rate": 1.0560384222721943e-05, "loss": 0.2469, "step": 6282 }, { "epoch": 0.4977619330560507, "grad_norm": 1.7035911251232576, "learning_rate": 1.0557822204794353e-05, "loss": 0.2668, "step": 6283 }, { "epoch": 0.4978411566646861, "grad_norm": 1.4342672728497798, "learning_rate": 1.0555260150136852e-05, "loss": 0.2024, "step": 6284 }, { "epoch": 0.4979203802733215, "grad_norm": 1.6415210042146333, "learning_rate": 1.0552698058918146e-05, "loss": 0.1334, "step": 6285 }, { "epoch": 0.4979996038819568, "grad_norm": 1.406911814066835, "learning_rate": 1.055013593130693e-05, "loss": 0.2135, "step": 6286 }, { "epoch": 0.4980788274905922, "grad_norm": 1.518470094211511, "learning_rate": 1.0547573767471913e-05, "loss": 0.2229, "step": 6287 }, { "epoch": 0.49815805109922756, "grad_norm": 1.4365778785115295, "learning_rate": 1.0545011567581794e-05, "loss": 0.1693, "step": 6288 }, { "epoch": 0.49823727470786294, "grad_norm": 1.7249480014507745, "learning_rate": 1.0542449331805287e-05, "loss": 0.2155, "step": 6289 }, { "epoch": 0.4983164983164983, "grad_norm": 1.6063150206212504, "learning_rate": 1.05398870603111e-05, "loss": 0.2411, "step": 6290 }, { "epoch": 0.4983957219251337, "grad_norm": 1.5541308965441618, "learning_rate": 1.0537324753267952e-05, "loss": 0.1631, "step": 6291 }, { "epoch": 0.4984749455337691, "grad_norm": 1.6414487467018528, "learning_rate": 1.053476241084455e-05, "loss": 0.2322, "step": 6292 }, { "epoch": 0.4985541691424044, "grad_norm": 1.2846477326502914, "learning_rate": 1.0532200033209618e-05, "loss": 0.1725, "step": 6293 }, { "epoch": 0.4986333927510398, "grad_norm": 1.3955416200753925, "learning_rate": 1.0529637620531876e-05, "loss": 0.1839, "step": 6294 }, { "epoch": 0.4987126163596752, "grad_norm": 2.0885884034288345, "learning_rate": 1.0527075172980043e-05, "loss": 0.2279, "step": 6295 }, { "epoch": 0.49879183996831056, "grad_norm": 1.9742302365033042, "learning_rate": 1.0524512690722848e-05, "loss": 0.3424, "step": 6296 }, { "epoch": 0.49887106357694594, "grad_norm": 1.8781039276787217, "learning_rate": 1.0521950173929017e-05, "loss": 0.3458, "step": 6297 }, { "epoch": 0.4989502871855813, "grad_norm": 1.717886744370777, "learning_rate": 1.0519387622767274e-05, "loss": 0.2295, "step": 6298 }, { "epoch": 0.4990295107942167, "grad_norm": 1.5564022869339005, "learning_rate": 1.051682503740636e-05, "loss": 0.2135, "step": 6299 }, { "epoch": 0.49910873440285203, "grad_norm": 1.6977806638522623, "learning_rate": 1.0514262418015e-05, "loss": 0.3145, "step": 6300 }, { "epoch": 0.4991879580114874, "grad_norm": 1.7819218782039914, "learning_rate": 1.0511699764761935e-05, "loss": 0.2523, "step": 6301 }, { "epoch": 0.4992671816201228, "grad_norm": 2.1345146772996775, "learning_rate": 1.0509137077815906e-05, "loss": 0.3132, "step": 6302 }, { "epoch": 0.4993464052287582, "grad_norm": 1.804156608044116, "learning_rate": 1.0506574357345647e-05, "loss": 0.2572, "step": 6303 }, { "epoch": 0.49942562883739355, "grad_norm": 1.0471112911205103, "learning_rate": 1.0504011603519904e-05, "loss": 0.116, "step": 6304 }, { "epoch": 0.49950485244602894, "grad_norm": 2.028595283466379, "learning_rate": 1.0501448816507425e-05, "loss": 0.2445, "step": 6305 }, { "epoch": 0.4995840760546643, "grad_norm": 1.8165845922227482, "learning_rate": 1.0498885996476952e-05, "loss": 0.2396, "step": 6306 }, { "epoch": 0.49966329966329964, "grad_norm": 2.015380461207272, "learning_rate": 1.0496323143597237e-05, "loss": 0.2768, "step": 6307 }, { "epoch": 0.499742523271935, "grad_norm": 1.6462508798604103, "learning_rate": 1.049376025803703e-05, "loss": 0.2161, "step": 6308 }, { "epoch": 0.4998217468805704, "grad_norm": 1.3749269335494156, "learning_rate": 1.0491197339965087e-05, "loss": 0.2323, "step": 6309 }, { "epoch": 0.4999009704892058, "grad_norm": 1.3425668212128603, "learning_rate": 1.0488634389550166e-05, "loss": 0.1674, "step": 6310 }, { "epoch": 0.49998019409784117, "grad_norm": 1.6571866474508175, "learning_rate": 1.0486071406961017e-05, "loss": 0.1912, "step": 6311 }, { "epoch": 0.5000594177064765, "grad_norm": 1.827655913078546, "learning_rate": 1.0483508392366404e-05, "loss": 0.2727, "step": 6312 }, { "epoch": 0.5001386413151119, "grad_norm": 1.7156013230434588, "learning_rate": 1.0480945345935094e-05, "loss": 0.2837, "step": 6313 }, { "epoch": 0.5002178649237473, "grad_norm": 1.846517908626761, "learning_rate": 1.0478382267835843e-05, "loss": 0.2715, "step": 6314 }, { "epoch": 0.5002970885323826, "grad_norm": 1.5305723207481021, "learning_rate": 1.0475819158237426e-05, "loss": 0.2572, "step": 6315 }, { "epoch": 0.5003763121410181, "grad_norm": 1.9127396577335278, "learning_rate": 1.0473256017308601e-05, "loss": 0.2569, "step": 6316 }, { "epoch": 0.5004555357496534, "grad_norm": 1.7553345457736045, "learning_rate": 1.047069284521815e-05, "loss": 0.2083, "step": 6317 }, { "epoch": 0.5005347593582887, "grad_norm": 1.660106564245848, "learning_rate": 1.0468129642134837e-05, "loss": 0.2752, "step": 6318 }, { "epoch": 0.5006139829669242, "grad_norm": 2.41784987591571, "learning_rate": 1.046556640822744e-05, "loss": 0.2469, "step": 6319 }, { "epoch": 0.5006932065755595, "grad_norm": 1.4166745255418107, "learning_rate": 1.0463003143664734e-05, "loss": 0.2187, "step": 6320 }, { "epoch": 0.5007724301841949, "grad_norm": 1.5265887074114244, "learning_rate": 1.0460439848615502e-05, "loss": 0.2423, "step": 6321 }, { "epoch": 0.5008516537928303, "grad_norm": 1.3372642398829884, "learning_rate": 1.0457876523248518e-05, "loss": 0.1326, "step": 6322 }, { "epoch": 0.5009308774014657, "grad_norm": 1.8058364796939765, "learning_rate": 1.0455313167732573e-05, "loss": 0.2854, "step": 6323 }, { "epoch": 0.501010101010101, "grad_norm": 1.5342150322869643, "learning_rate": 1.0452749782236443e-05, "loss": 0.2121, "step": 6324 }, { "epoch": 0.5010893246187363, "grad_norm": 1.3407703992101323, "learning_rate": 1.0450186366928917e-05, "loss": 0.1819, "step": 6325 }, { "epoch": 0.5011685482273718, "grad_norm": 1.3182866296450149, "learning_rate": 1.044762292197879e-05, "loss": 0.1994, "step": 6326 }, { "epoch": 0.5012477718360071, "grad_norm": 1.7854218945014915, "learning_rate": 1.0445059447554844e-05, "loss": 0.2794, "step": 6327 }, { "epoch": 0.5013269954446425, "grad_norm": 1.6298876965130429, "learning_rate": 1.0442495943825874e-05, "loss": 0.2383, "step": 6328 }, { "epoch": 0.5014062190532779, "grad_norm": 1.784303345781785, "learning_rate": 1.0439932410960678e-05, "loss": 0.2529, "step": 6329 }, { "epoch": 0.5014854426619133, "grad_norm": 1.3647047954385347, "learning_rate": 1.0437368849128046e-05, "loss": 0.1602, "step": 6330 }, { "epoch": 0.5015646662705486, "grad_norm": 1.6015511226112418, "learning_rate": 1.043480525849678e-05, "loss": 0.1831, "step": 6331 }, { "epoch": 0.501643889879184, "grad_norm": 1.4893033260344972, "learning_rate": 1.0432241639235686e-05, "loss": 0.2519, "step": 6332 }, { "epoch": 0.5017231134878194, "grad_norm": 1.5525755437047382, "learning_rate": 1.0429677991513554e-05, "loss": 0.1652, "step": 6333 }, { "epoch": 0.5018023370964547, "grad_norm": 1.6681135791132164, "learning_rate": 1.0427114315499196e-05, "loss": 0.2545, "step": 6334 }, { "epoch": 0.5018815607050902, "grad_norm": 2.090941206219267, "learning_rate": 1.0424550611361412e-05, "loss": 0.2507, "step": 6335 }, { "epoch": 0.5019607843137255, "grad_norm": 1.909480806462905, "learning_rate": 1.0421986879269017e-05, "loss": 0.308, "step": 6336 }, { "epoch": 0.5020400079223608, "grad_norm": 2.4079728072568067, "learning_rate": 1.0419423119390815e-05, "loss": 0.2683, "step": 6337 }, { "epoch": 0.5021192315309962, "grad_norm": 1.5609549837679797, "learning_rate": 1.041685933189562e-05, "loss": 0.1649, "step": 6338 }, { "epoch": 0.5021984551396316, "grad_norm": 1.5757878775776288, "learning_rate": 1.041429551695224e-05, "loss": 0.2232, "step": 6339 }, { "epoch": 0.502277678748267, "grad_norm": 1.5012421595006589, "learning_rate": 1.0411731674729497e-05, "loss": 0.238, "step": 6340 }, { "epoch": 0.5023569023569023, "grad_norm": 1.362074463931762, "learning_rate": 1.0409167805396202e-05, "loss": 0.1397, "step": 6341 }, { "epoch": 0.5024361259655378, "grad_norm": 1.7840272250382005, "learning_rate": 1.040660390912118e-05, "loss": 0.3561, "step": 6342 }, { "epoch": 0.5025153495741731, "grad_norm": 1.79990790691543, "learning_rate": 1.0404039986073244e-05, "loss": 0.2492, "step": 6343 }, { "epoch": 0.5025945731828084, "grad_norm": 1.7865429314491876, "learning_rate": 1.0401476036421219e-05, "loss": 0.2447, "step": 6344 }, { "epoch": 0.5026737967914439, "grad_norm": 1.6012233207399875, "learning_rate": 1.039891206033393e-05, "loss": 0.171, "step": 6345 }, { "epoch": 0.5027530204000792, "grad_norm": 1.7536637726753614, "learning_rate": 1.0396348057980202e-05, "loss": 0.2535, "step": 6346 }, { "epoch": 0.5028322440087146, "grad_norm": 1.6610435307688316, "learning_rate": 1.0393784029528858e-05, "loss": 0.2158, "step": 6347 }, { "epoch": 0.50291146761735, "grad_norm": 1.6140842049935724, "learning_rate": 1.0391219975148734e-05, "loss": 0.3368, "step": 6348 }, { "epoch": 0.5029906912259854, "grad_norm": 1.666984643226144, "learning_rate": 1.0388655895008654e-05, "loss": 0.214, "step": 6349 }, { "epoch": 0.5030699148346207, "grad_norm": 1.840109797374884, "learning_rate": 1.0386091789277458e-05, "loss": 0.2698, "step": 6350 }, { "epoch": 0.503149138443256, "grad_norm": 1.7346545437050827, "learning_rate": 1.038352765812397e-05, "loss": 0.2612, "step": 6351 }, { "epoch": 0.5032283620518915, "grad_norm": 1.3521963542225746, "learning_rate": 1.0380963501717034e-05, "loss": 0.2983, "step": 6352 }, { "epoch": 0.5033075856605268, "grad_norm": 2.1323357641259952, "learning_rate": 1.0378399320225486e-05, "loss": 0.2248, "step": 6353 }, { "epoch": 0.5033868092691622, "grad_norm": 1.416993577424726, "learning_rate": 1.037583511381816e-05, "loss": 0.1824, "step": 6354 }, { "epoch": 0.5034660328777976, "grad_norm": 1.6675615195126514, "learning_rate": 1.0373270882663899e-05, "loss": 0.3483, "step": 6355 }, { "epoch": 0.503545256486433, "grad_norm": 1.7897076886151495, "learning_rate": 1.0370706626931553e-05, "loss": 0.283, "step": 6356 }, { "epoch": 0.5036244800950683, "grad_norm": 1.429445325993827, "learning_rate": 1.0368142346789954e-05, "loss": 0.216, "step": 6357 }, { "epoch": 0.5037037037037037, "grad_norm": 1.8603280964120084, "learning_rate": 1.0365578042407956e-05, "loss": 0.2864, "step": 6358 }, { "epoch": 0.5037829273123391, "grad_norm": 1.7968005399613283, "learning_rate": 1.03630137139544e-05, "loss": 0.2399, "step": 6359 }, { "epoch": 0.5038621509209744, "grad_norm": 1.7691777966579632, "learning_rate": 1.0360449361598137e-05, "loss": 0.2415, "step": 6360 }, { "epoch": 0.5039413745296099, "grad_norm": 1.7441744503718601, "learning_rate": 1.0357884985508022e-05, "loss": 0.2448, "step": 6361 }, { "epoch": 0.5040205981382452, "grad_norm": 2.1249670969669716, "learning_rate": 1.03553205858529e-05, "loss": 0.3841, "step": 6362 }, { "epoch": 0.5040998217468806, "grad_norm": 2.2016152688141992, "learning_rate": 1.0352756162801626e-05, "loss": 0.3419, "step": 6363 }, { "epoch": 0.5041790453555159, "grad_norm": 1.5333408621875049, "learning_rate": 1.035019171652306e-05, "loss": 0.2508, "step": 6364 }, { "epoch": 0.5042582689641513, "grad_norm": 1.247294378243989, "learning_rate": 1.0347627247186053e-05, "loss": 0.177, "step": 6365 }, { "epoch": 0.5043374925727867, "grad_norm": 1.7516479384347678, "learning_rate": 1.0345062754959463e-05, "loss": 0.3175, "step": 6366 }, { "epoch": 0.504416716181422, "grad_norm": 1.6041067932546424, "learning_rate": 1.0342498240012153e-05, "loss": 0.2357, "step": 6367 }, { "epoch": 0.5044959397900575, "grad_norm": 1.3959411332554874, "learning_rate": 1.0339933702512978e-05, "loss": 0.2041, "step": 6368 }, { "epoch": 0.5045751633986928, "grad_norm": 1.3170003386496107, "learning_rate": 1.0337369142630808e-05, "loss": 0.1721, "step": 6369 }, { "epoch": 0.5046543870073282, "grad_norm": 1.3809195771000535, "learning_rate": 1.0334804560534504e-05, "loss": 0.21, "step": 6370 }, { "epoch": 0.5047336106159636, "grad_norm": 1.5716623222593202, "learning_rate": 1.0332239956392926e-05, "loss": 0.2344, "step": 6371 }, { "epoch": 0.5048128342245989, "grad_norm": 1.7380172335496857, "learning_rate": 1.032967533037495e-05, "loss": 0.2536, "step": 6372 }, { "epoch": 0.5048920578332343, "grad_norm": 1.4957415004431371, "learning_rate": 1.0327110682649436e-05, "loss": 0.2212, "step": 6373 }, { "epoch": 0.5049712814418696, "grad_norm": 1.517018916348201, "learning_rate": 1.0324546013385258e-05, "loss": 0.1815, "step": 6374 }, { "epoch": 0.5050505050505051, "grad_norm": 2.2578757051998917, "learning_rate": 1.0321981322751291e-05, "loss": 0.2644, "step": 6375 }, { "epoch": 0.5051297286591404, "grad_norm": 1.6413500872988736, "learning_rate": 1.03194166109164e-05, "loss": 0.2902, "step": 6376 }, { "epoch": 0.5052089522677758, "grad_norm": 1.6785424842887002, "learning_rate": 1.0316851878049465e-05, "loss": 0.1986, "step": 6377 }, { "epoch": 0.5052881758764112, "grad_norm": 1.671676268887419, "learning_rate": 1.0314287124319353e-05, "loss": 0.2984, "step": 6378 }, { "epoch": 0.5053673994850465, "grad_norm": 1.499333670173624, "learning_rate": 1.031172234989495e-05, "loss": 0.2448, "step": 6379 }, { "epoch": 0.5054466230936819, "grad_norm": 1.8918815385312764, "learning_rate": 1.030915755494513e-05, "loss": 0.3522, "step": 6380 }, { "epoch": 0.5055258467023173, "grad_norm": 1.8474202065018004, "learning_rate": 1.030659273963877e-05, "loss": 0.3227, "step": 6381 }, { "epoch": 0.5056050703109527, "grad_norm": 1.7072562151528647, "learning_rate": 1.0304027904144756e-05, "loss": 0.2689, "step": 6382 }, { "epoch": 0.505684293919588, "grad_norm": 1.7229229482924233, "learning_rate": 1.0301463048631968e-05, "loss": 0.3086, "step": 6383 }, { "epoch": 0.5057635175282235, "grad_norm": 1.5392113989401297, "learning_rate": 1.0298898173269285e-05, "loss": 0.1874, "step": 6384 }, { "epoch": 0.5058427411368588, "grad_norm": 1.7649293517453457, "learning_rate": 1.0296333278225599e-05, "loss": 0.1865, "step": 6385 }, { "epoch": 0.5059219647454941, "grad_norm": 1.3623992651479446, "learning_rate": 1.0293768363669791e-05, "loss": 0.1731, "step": 6386 }, { "epoch": 0.5060011883541295, "grad_norm": 2.0554672693123064, "learning_rate": 1.0291203429770749e-05, "loss": 0.241, "step": 6387 }, { "epoch": 0.5060804119627649, "grad_norm": 1.8977700782921658, "learning_rate": 1.0288638476697365e-05, "loss": 0.2363, "step": 6388 }, { "epoch": 0.5061596355714003, "grad_norm": 1.8018294158848267, "learning_rate": 1.0286073504618524e-05, "loss": 0.2462, "step": 6389 }, { "epoch": 0.5062388591800356, "grad_norm": 1.5086785074066884, "learning_rate": 1.0283508513703118e-05, "loss": 0.2675, "step": 6390 }, { "epoch": 0.5063180827886711, "grad_norm": 1.7316826770681022, "learning_rate": 1.0280943504120045e-05, "loss": 0.2275, "step": 6391 }, { "epoch": 0.5063973063973064, "grad_norm": 1.6406645468220442, "learning_rate": 1.027837847603819e-05, "loss": 0.24, "step": 6392 }, { "epoch": 0.5064765300059417, "grad_norm": 1.7896699181154339, "learning_rate": 1.0275813429626456e-05, "loss": 0.1906, "step": 6393 }, { "epoch": 0.5065557536145772, "grad_norm": 1.6423856572343716, "learning_rate": 1.027324836505373e-05, "loss": 0.3236, "step": 6394 }, { "epoch": 0.5066349772232125, "grad_norm": 1.7250301833281598, "learning_rate": 1.0270683282488913e-05, "loss": 0.2207, "step": 6395 }, { "epoch": 0.5067142008318479, "grad_norm": 1.5731810279847769, "learning_rate": 1.026811818210091e-05, "loss": 0.1528, "step": 6396 }, { "epoch": 0.5067934244404833, "grad_norm": 1.4487655673473139, "learning_rate": 1.0265553064058612e-05, "loss": 0.1318, "step": 6397 }, { "epoch": 0.5068726480491187, "grad_norm": 1.5064157461612981, "learning_rate": 1.0262987928530921e-05, "loss": 0.206, "step": 6398 }, { "epoch": 0.506951871657754, "grad_norm": 1.6435504834148675, "learning_rate": 1.0260422775686743e-05, "loss": 0.3464, "step": 6399 }, { "epoch": 0.5070310952663893, "grad_norm": 1.761862850803852, "learning_rate": 1.0257857605694976e-05, "loss": 0.1726, "step": 6400 }, { "epoch": 0.5071103188750248, "grad_norm": 1.5048964862319045, "learning_rate": 1.025529241872453e-05, "loss": 0.1936, "step": 6401 }, { "epoch": 0.5071895424836601, "grad_norm": 1.394518329988283, "learning_rate": 1.0252727214944302e-05, "loss": 0.1708, "step": 6402 }, { "epoch": 0.5072687660922955, "grad_norm": 1.7545048642906012, "learning_rate": 1.0250161994523205e-05, "loss": 0.2023, "step": 6403 }, { "epoch": 0.5073479897009309, "grad_norm": 1.5467169383848067, "learning_rate": 1.0247596757630147e-05, "loss": 0.1865, "step": 6404 }, { "epoch": 0.5074272133095663, "grad_norm": 1.6518271012331747, "learning_rate": 1.0245031504434032e-05, "loss": 0.203, "step": 6405 }, { "epoch": 0.5075064369182016, "grad_norm": 2.216923039993798, "learning_rate": 1.024246623510377e-05, "loss": 0.3118, "step": 6406 }, { "epoch": 0.507585660526837, "grad_norm": 2.011569551005349, "learning_rate": 1.0239900949808274e-05, "loss": 0.3412, "step": 6407 }, { "epoch": 0.5076648841354724, "grad_norm": 1.552065811383536, "learning_rate": 1.0237335648716456e-05, "loss": 0.2009, "step": 6408 }, { "epoch": 0.5077441077441077, "grad_norm": 2.092905558406769, "learning_rate": 1.0234770331997224e-05, "loss": 0.2606, "step": 6409 }, { "epoch": 0.5078233313527432, "grad_norm": 1.334211172803728, "learning_rate": 1.02322049998195e-05, "loss": 0.2531, "step": 6410 }, { "epoch": 0.5079025549613785, "grad_norm": 1.4945791457771147, "learning_rate": 1.022963965235219e-05, "loss": 0.1663, "step": 6411 }, { "epoch": 0.5079817785700138, "grad_norm": 1.6558749508619934, "learning_rate": 1.0227074289764216e-05, "loss": 0.2476, "step": 6412 }, { "epoch": 0.5080610021786492, "grad_norm": 1.7620882004001486, "learning_rate": 1.0224508912224491e-05, "loss": 0.2317, "step": 6413 }, { "epoch": 0.5081402257872846, "grad_norm": 1.7345229090289764, "learning_rate": 1.0221943519901935e-05, "loss": 0.2235, "step": 6414 }, { "epoch": 0.50821944939592, "grad_norm": 1.6271777181032696, "learning_rate": 1.0219378112965468e-05, "loss": 0.2814, "step": 6415 }, { "epoch": 0.5082986730045553, "grad_norm": 1.468712072624976, "learning_rate": 1.0216812691584005e-05, "loss": 0.1843, "step": 6416 }, { "epoch": 0.5083778966131908, "grad_norm": 1.4048311497207364, "learning_rate": 1.021424725592647e-05, "loss": 0.1714, "step": 6417 }, { "epoch": 0.5084571202218261, "grad_norm": 1.5878285725090504, "learning_rate": 1.0211681806161787e-05, "loss": 0.2162, "step": 6418 }, { "epoch": 0.5085363438304614, "grad_norm": 2.0226357300503603, "learning_rate": 1.0209116342458872e-05, "loss": 0.2872, "step": 6419 }, { "epoch": 0.5086155674390969, "grad_norm": 1.6647916303425558, "learning_rate": 1.0206550864986656e-05, "loss": 0.2014, "step": 6420 }, { "epoch": 0.5086947910477322, "grad_norm": 1.7010751940968944, "learning_rate": 1.0203985373914056e-05, "loss": 0.2903, "step": 6421 }, { "epoch": 0.5087740146563676, "grad_norm": 1.871187061792545, "learning_rate": 1.0201419869410001e-05, "loss": 0.242, "step": 6422 }, { "epoch": 0.508853238265003, "grad_norm": 1.558602497284484, "learning_rate": 1.0198854351643416e-05, "loss": 0.2132, "step": 6423 }, { "epoch": 0.5089324618736384, "grad_norm": 1.8563580399794568, "learning_rate": 1.0196288820783232e-05, "loss": 0.2735, "step": 6424 }, { "epoch": 0.5090116854822737, "grad_norm": 1.42370795225079, "learning_rate": 1.0193723276998371e-05, "loss": 0.2022, "step": 6425 }, { "epoch": 0.509090909090909, "grad_norm": 1.9281495104563846, "learning_rate": 1.0191157720457765e-05, "loss": 0.292, "step": 6426 }, { "epoch": 0.5091701326995445, "grad_norm": 1.5901155472651172, "learning_rate": 1.0188592151330343e-05, "loss": 0.1882, "step": 6427 }, { "epoch": 0.5092493563081798, "grad_norm": 1.5890069662185857, "learning_rate": 1.0186026569785037e-05, "loss": 0.1839, "step": 6428 }, { "epoch": 0.5093285799168152, "grad_norm": 1.8886193273457028, "learning_rate": 1.0183460975990773e-05, "loss": 0.3836, "step": 6429 }, { "epoch": 0.5094078035254506, "grad_norm": 2.281804303827039, "learning_rate": 1.0180895370116488e-05, "loss": 0.2251, "step": 6430 }, { "epoch": 0.509487027134086, "grad_norm": 1.5691845024170556, "learning_rate": 1.0178329752331116e-05, "loss": 0.2219, "step": 6431 }, { "epoch": 0.5095662507427213, "grad_norm": 1.508113269730381, "learning_rate": 1.0175764122803584e-05, "loss": 0.2213, "step": 6432 }, { "epoch": 0.5096454743513567, "grad_norm": 1.3733557420376317, "learning_rate": 1.017319848170283e-05, "loss": 0.2069, "step": 6433 }, { "epoch": 0.5097246979599921, "grad_norm": 1.6388112085454838, "learning_rate": 1.0170632829197792e-05, "loss": 0.282, "step": 6434 }, { "epoch": 0.5098039215686274, "grad_norm": 1.573158355579066, "learning_rate": 1.0168067165457403e-05, "loss": 0.1994, "step": 6435 }, { "epoch": 0.5098831451772629, "grad_norm": 1.7446573717928093, "learning_rate": 1.01655014906506e-05, "loss": 0.2512, "step": 6436 }, { "epoch": 0.5099623687858982, "grad_norm": 1.9753472245369228, "learning_rate": 1.016293580494632e-05, "loss": 0.2256, "step": 6437 }, { "epoch": 0.5100415923945336, "grad_norm": 1.8120790617315887, "learning_rate": 1.0160370108513497e-05, "loss": 0.2942, "step": 6438 }, { "epoch": 0.5101208160031689, "grad_norm": 1.5463563927654693, "learning_rate": 1.015780440152108e-05, "loss": 0.196, "step": 6439 }, { "epoch": 0.5102000396118043, "grad_norm": 1.993879172981582, "learning_rate": 1.0155238684138e-05, "loss": 0.2418, "step": 6440 }, { "epoch": 0.5102792632204397, "grad_norm": 1.473014134080035, "learning_rate": 1.0152672956533198e-05, "loss": 0.1642, "step": 6441 }, { "epoch": 0.510358486829075, "grad_norm": 1.5195950134969411, "learning_rate": 1.015010721887562e-05, "loss": 0.1991, "step": 6442 }, { "epoch": 0.5104377104377105, "grad_norm": 1.4990310244649965, "learning_rate": 1.0147541471334204e-05, "loss": 0.1822, "step": 6443 }, { "epoch": 0.5105169340463458, "grad_norm": 1.5539345408551732, "learning_rate": 1.0144975714077889e-05, "loss": 0.2007, "step": 6444 }, { "epoch": 0.5105961576549812, "grad_norm": 1.886014078203292, "learning_rate": 1.0142409947275621e-05, "loss": 0.3137, "step": 6445 }, { "epoch": 0.5106753812636166, "grad_norm": 1.9082891268777655, "learning_rate": 1.0139844171096345e-05, "loss": 0.2721, "step": 6446 }, { "epoch": 0.5107546048722519, "grad_norm": 1.7820543694869206, "learning_rate": 1.0137278385709004e-05, "loss": 0.2137, "step": 6447 }, { "epoch": 0.5108338284808873, "grad_norm": 1.4216125943418878, "learning_rate": 1.0134712591282539e-05, "loss": 0.2321, "step": 6448 }, { "epoch": 0.5109130520895226, "grad_norm": 1.4828076788256601, "learning_rate": 1.0132146787985898e-05, "loss": 0.222, "step": 6449 }, { "epoch": 0.5109922756981581, "grad_norm": 1.6226094682086722, "learning_rate": 1.0129580975988029e-05, "loss": 0.2279, "step": 6450 }, { "epoch": 0.5110714993067934, "grad_norm": 1.4555474390002656, "learning_rate": 1.0127015155457875e-05, "loss": 0.2145, "step": 6451 }, { "epoch": 0.5111507229154288, "grad_norm": 1.957626993857304, "learning_rate": 1.0124449326564383e-05, "loss": 0.2467, "step": 6452 }, { "epoch": 0.5112299465240642, "grad_norm": 1.5258791194181638, "learning_rate": 1.0121883489476505e-05, "loss": 0.2057, "step": 6453 }, { "epoch": 0.5113091701326995, "grad_norm": 2.258191570728336, "learning_rate": 1.0119317644363182e-05, "loss": 0.2725, "step": 6454 }, { "epoch": 0.5113883937413349, "grad_norm": 1.4826078654861197, "learning_rate": 1.0116751791393371e-05, "loss": 0.1997, "step": 6455 }, { "epoch": 0.5114676173499703, "grad_norm": 1.927519351867951, "learning_rate": 1.011418593073601e-05, "loss": 0.2836, "step": 6456 }, { "epoch": 0.5115468409586057, "grad_norm": 1.5548591916658565, "learning_rate": 1.0111620062560059e-05, "loss": 0.1915, "step": 6457 }, { "epoch": 0.511626064567241, "grad_norm": 1.630621271094443, "learning_rate": 1.0109054187034463e-05, "loss": 0.2337, "step": 6458 }, { "epoch": 0.5117052881758765, "grad_norm": 1.5688562493583613, "learning_rate": 1.0106488304328175e-05, "loss": 0.2321, "step": 6459 }, { "epoch": 0.5117845117845118, "grad_norm": 1.5215635271519548, "learning_rate": 1.010392241461014e-05, "loss": 0.1617, "step": 6460 }, { "epoch": 0.5118637353931471, "grad_norm": 1.6455981623625469, "learning_rate": 1.010135651804932e-05, "loss": 0.251, "step": 6461 }, { "epoch": 0.5119429590017825, "grad_norm": 1.7908248150126298, "learning_rate": 1.0098790614814658e-05, "loss": 0.2544, "step": 6462 }, { "epoch": 0.5120221826104179, "grad_norm": 1.6203023694841237, "learning_rate": 1.009622470507511e-05, "loss": 0.3074, "step": 6463 }, { "epoch": 0.5121014062190533, "grad_norm": 1.748613987489867, "learning_rate": 1.0093658788999628e-05, "loss": 0.2823, "step": 6464 }, { "epoch": 0.5121806298276886, "grad_norm": 1.7982651670894434, "learning_rate": 1.0091092866757164e-05, "loss": 0.2229, "step": 6465 }, { "epoch": 0.5122598534363241, "grad_norm": 1.7470309445326715, "learning_rate": 1.0088526938516676e-05, "loss": 0.1567, "step": 6466 }, { "epoch": 0.5123390770449594, "grad_norm": 1.368737317056197, "learning_rate": 1.0085961004447114e-05, "loss": 0.203, "step": 6467 }, { "epoch": 0.5124183006535947, "grad_norm": 1.6142680754896392, "learning_rate": 1.0083395064717429e-05, "loss": 0.2096, "step": 6468 }, { "epoch": 0.5124975242622302, "grad_norm": 1.398851658263601, "learning_rate": 1.0080829119496587e-05, "loss": 0.2239, "step": 6469 }, { "epoch": 0.5125767478708655, "grad_norm": 1.476244120589003, "learning_rate": 1.0078263168953532e-05, "loss": 0.2374, "step": 6470 }, { "epoch": 0.5126559714795009, "grad_norm": 1.8834724833440701, "learning_rate": 1.0075697213257227e-05, "loss": 0.2132, "step": 6471 }, { "epoch": 0.5127351950881363, "grad_norm": 1.5999307775822558, "learning_rate": 1.0073131252576622e-05, "loss": 0.201, "step": 6472 }, { "epoch": 0.5128144186967717, "grad_norm": 1.6201269162385037, "learning_rate": 1.0070565287080676e-05, "loss": 0.2692, "step": 6473 }, { "epoch": 0.512893642305407, "grad_norm": 2.179901360762801, "learning_rate": 1.0067999316938348e-05, "loss": 0.3759, "step": 6474 }, { "epoch": 0.5129728659140423, "grad_norm": 1.6245384222977508, "learning_rate": 1.006543334231859e-05, "loss": 0.2696, "step": 6475 }, { "epoch": 0.5130520895226778, "grad_norm": 1.6588856658611313, "learning_rate": 1.0062867363390361e-05, "loss": 0.2018, "step": 6476 }, { "epoch": 0.5131313131313131, "grad_norm": 1.4462668490416088, "learning_rate": 1.0060301380322622e-05, "loss": 0.274, "step": 6477 }, { "epoch": 0.5132105367399485, "grad_norm": 1.7701758979904199, "learning_rate": 1.0057735393284322e-05, "loss": 0.2409, "step": 6478 }, { "epoch": 0.5132897603485839, "grad_norm": 1.8788610060010669, "learning_rate": 1.0055169402444429e-05, "loss": 0.2684, "step": 6479 }, { "epoch": 0.5133689839572193, "grad_norm": 1.651947366616138, "learning_rate": 1.0052603407971892e-05, "loss": 0.2606, "step": 6480 }, { "epoch": 0.5134482075658546, "grad_norm": 1.4559351542456724, "learning_rate": 1.0050037410035676e-05, "loss": 0.1859, "step": 6481 }, { "epoch": 0.51352743117449, "grad_norm": 1.7119476031427558, "learning_rate": 1.004747140880474e-05, "loss": 0.2641, "step": 6482 }, { "epoch": 0.5136066547831254, "grad_norm": 1.9305920148387345, "learning_rate": 1.0044905404448037e-05, "loss": 0.2798, "step": 6483 }, { "epoch": 0.5136858783917607, "grad_norm": 1.438286786375271, "learning_rate": 1.0042339397134528e-05, "loss": 0.2061, "step": 6484 }, { "epoch": 0.5137651020003962, "grad_norm": 1.6862281209284795, "learning_rate": 1.0039773387033178e-05, "loss": 0.2195, "step": 6485 }, { "epoch": 0.5138443256090315, "grad_norm": 1.9216246369267767, "learning_rate": 1.0037207374312936e-05, "loss": 0.2356, "step": 6486 }, { "epoch": 0.5139235492176669, "grad_norm": 2.1234420696718534, "learning_rate": 1.003464135914277e-05, "loss": 0.2915, "step": 6487 }, { "epoch": 0.5140027728263022, "grad_norm": 1.9906207243768332, "learning_rate": 1.0032075341691639e-05, "loss": 0.276, "step": 6488 }, { "epoch": 0.5140819964349376, "grad_norm": 1.6081362312171994, "learning_rate": 1.0029509322128499e-05, "loss": 0.1747, "step": 6489 }, { "epoch": 0.514161220043573, "grad_norm": 2.2938857165541737, "learning_rate": 1.0026943300622313e-05, "loss": 0.2342, "step": 6490 }, { "epoch": 0.5142404436522083, "grad_norm": 1.3324365747926863, "learning_rate": 1.0024377277342038e-05, "loss": 0.1751, "step": 6491 }, { "epoch": 0.5143196672608438, "grad_norm": 1.8221635347817944, "learning_rate": 1.002181125245664e-05, "loss": 0.3477, "step": 6492 }, { "epoch": 0.5143988908694791, "grad_norm": 1.6905355523014922, "learning_rate": 1.0019245226135075e-05, "loss": 0.3393, "step": 6493 }, { "epoch": 0.5144781144781144, "grad_norm": 1.6086216908061008, "learning_rate": 1.0016679198546304e-05, "loss": 0.2739, "step": 6494 }, { "epoch": 0.5145573380867499, "grad_norm": 1.648337491237927, "learning_rate": 1.0014113169859285e-05, "loss": 0.2466, "step": 6495 }, { "epoch": 0.5146365616953852, "grad_norm": 1.9889232952540248, "learning_rate": 1.0011547140242987e-05, "loss": 0.2221, "step": 6496 }, { "epoch": 0.5147157853040206, "grad_norm": 1.2471124576251928, "learning_rate": 1.0008981109866363e-05, "loss": 0.1429, "step": 6497 }, { "epoch": 0.514795008912656, "grad_norm": 1.1779611503791618, "learning_rate": 1.0006415078898377e-05, "loss": 0.1462, "step": 6498 }, { "epoch": 0.5148742325212914, "grad_norm": 1.7394107681046125, "learning_rate": 1.0003849047507987e-05, "loss": 0.2431, "step": 6499 }, { "epoch": 0.5149534561299267, "grad_norm": 1.6497103485621865, "learning_rate": 1.0001283015864157e-05, "loss": 0.2454, "step": 6500 }, { "epoch": 0.515032679738562, "grad_norm": 2.154975406255334, "learning_rate": 9.998716984135847e-06, "loss": 0.3811, "step": 6501 }, { "epoch": 0.5151119033471975, "grad_norm": 1.8248567788152668, "learning_rate": 9.996150952492018e-06, "loss": 0.3131, "step": 6502 }, { "epoch": 0.5151911269558328, "grad_norm": 1.5164802093096938, "learning_rate": 9.993584921101628e-06, "loss": 0.2696, "step": 6503 }, { "epoch": 0.5152703505644682, "grad_norm": 1.5755242780437846, "learning_rate": 9.991018890133642e-06, "loss": 0.1882, "step": 6504 }, { "epoch": 0.5153495741731036, "grad_norm": 1.6795659349338317, "learning_rate": 9.988452859757017e-06, "loss": 0.2888, "step": 6505 }, { "epoch": 0.515428797781739, "grad_norm": 1.3401355952455904, "learning_rate": 9.985886830140717e-06, "loss": 0.1058, "step": 6506 }, { "epoch": 0.5155080213903743, "grad_norm": 1.9767542541509284, "learning_rate": 9.983320801453702e-06, "loss": 0.2207, "step": 6507 }, { "epoch": 0.5155872449990097, "grad_norm": 1.8604908163040235, "learning_rate": 9.98075477386493e-06, "loss": 0.2235, "step": 6508 }, { "epoch": 0.5156664686076451, "grad_norm": 1.8188640367413023, "learning_rate": 9.978188747543364e-06, "loss": 0.2067, "step": 6509 }, { "epoch": 0.5157456922162804, "grad_norm": 1.945383034910453, "learning_rate": 9.975622722657965e-06, "loss": 0.2682, "step": 6510 }, { "epoch": 0.5158249158249159, "grad_norm": 1.6440759120641035, "learning_rate": 9.973056699377692e-06, "loss": 0.2008, "step": 6511 }, { "epoch": 0.5159041394335512, "grad_norm": 1.5296992790532355, "learning_rate": 9.970490677871506e-06, "loss": 0.2041, "step": 6512 }, { "epoch": 0.5159833630421866, "grad_norm": 2.2574940211746375, "learning_rate": 9.967924658308366e-06, "loss": 0.286, "step": 6513 }, { "epoch": 0.5160625866508219, "grad_norm": 1.5644936848412478, "learning_rate": 9.965358640857231e-06, "loss": 0.1476, "step": 6514 }, { "epoch": 0.5161418102594573, "grad_norm": 1.853619464609191, "learning_rate": 9.962792625687067e-06, "loss": 0.2685, "step": 6515 }, { "epoch": 0.5162210338680927, "grad_norm": 1.8784718728183394, "learning_rate": 9.960226612966828e-06, "loss": 0.2651, "step": 6516 }, { "epoch": 0.516300257476728, "grad_norm": 1.584619426456885, "learning_rate": 9.957660602865477e-06, "loss": 0.2345, "step": 6517 }, { "epoch": 0.5163794810853635, "grad_norm": 1.9929728898305445, "learning_rate": 9.955094595551968e-06, "loss": 0.2416, "step": 6518 }, { "epoch": 0.5164587046939988, "grad_norm": 1.505456797458076, "learning_rate": 9.952528591195265e-06, "loss": 0.1944, "step": 6519 }, { "epoch": 0.5165379283026342, "grad_norm": 2.1043779710972372, "learning_rate": 9.949962589964327e-06, "loss": 0.2284, "step": 6520 }, { "epoch": 0.5166171519112696, "grad_norm": 1.8986433601644745, "learning_rate": 9.94739659202811e-06, "loss": 0.2095, "step": 6521 }, { "epoch": 0.5166963755199049, "grad_norm": 1.603464216578882, "learning_rate": 9.944830597555573e-06, "loss": 0.206, "step": 6522 }, { "epoch": 0.5167755991285403, "grad_norm": 1.365060663800126, "learning_rate": 9.94226460671568e-06, "loss": 0.1536, "step": 6523 }, { "epoch": 0.5168548227371756, "grad_norm": 1.570949045137009, "learning_rate": 9.939698619677383e-06, "loss": 0.2344, "step": 6524 }, { "epoch": 0.5169340463458111, "grad_norm": 1.58363180586898, "learning_rate": 9.937132636609642e-06, "loss": 0.1936, "step": 6525 }, { "epoch": 0.5170132699544464, "grad_norm": 1.6625818977250058, "learning_rate": 9.934566657681412e-06, "loss": 0.2312, "step": 6526 }, { "epoch": 0.5170924935630818, "grad_norm": 1.632393397693878, "learning_rate": 9.932000683061654e-06, "loss": 0.2811, "step": 6527 }, { "epoch": 0.5171717171717172, "grad_norm": 2.325660313764123, "learning_rate": 9.929434712919327e-06, "loss": 0.2905, "step": 6528 }, { "epoch": 0.5172509407803525, "grad_norm": 1.9542836586370809, "learning_rate": 9.926868747423381e-06, "loss": 0.3113, "step": 6529 }, { "epoch": 0.5173301643889879, "grad_norm": 1.4193671361261082, "learning_rate": 9.924302786742775e-06, "loss": 0.1913, "step": 6530 }, { "epoch": 0.5174093879976233, "grad_norm": 2.0511049777232726, "learning_rate": 9.92173683104647e-06, "loss": 0.2859, "step": 6531 }, { "epoch": 0.5174886116062587, "grad_norm": 1.2591616892251185, "learning_rate": 9.919170880503416e-06, "loss": 0.1377, "step": 6532 }, { "epoch": 0.517567835214894, "grad_norm": 1.7607508610627813, "learning_rate": 9.916604935282573e-06, "loss": 0.2959, "step": 6533 }, { "epoch": 0.5176470588235295, "grad_norm": 1.7008477722434605, "learning_rate": 9.914038995552891e-06, "loss": 0.2384, "step": 6534 }, { "epoch": 0.5177262824321648, "grad_norm": 1.5155639025430583, "learning_rate": 9.911473061483326e-06, "loss": 0.1866, "step": 6535 }, { "epoch": 0.5178055060408001, "grad_norm": 1.2256016833439958, "learning_rate": 9.908907133242838e-06, "loss": 0.1537, "step": 6536 }, { "epoch": 0.5178847296494355, "grad_norm": 1.771213535735738, "learning_rate": 9.906341211000375e-06, "loss": 0.2421, "step": 6537 }, { "epoch": 0.5179639532580709, "grad_norm": 1.7659075221832585, "learning_rate": 9.903775294924892e-06, "loss": 0.1875, "step": 6538 }, { "epoch": 0.5180431768667063, "grad_norm": 1.3359416558093027, "learning_rate": 9.901209385185345e-06, "loss": 0.1674, "step": 6539 }, { "epoch": 0.5181224004753416, "grad_norm": 1.9456109291691057, "learning_rate": 9.898643481950683e-06, "loss": 0.2839, "step": 6540 }, { "epoch": 0.5182016240839771, "grad_norm": 1.526768999538284, "learning_rate": 9.89607758538986e-06, "loss": 0.2095, "step": 6541 }, { "epoch": 0.5182808476926124, "grad_norm": 1.941398953329438, "learning_rate": 9.893511695671828e-06, "loss": 0.2512, "step": 6542 }, { "epoch": 0.5183600713012477, "grad_norm": 1.5815811107710815, "learning_rate": 9.890945812965538e-06, "loss": 0.1874, "step": 6543 }, { "epoch": 0.5184392949098832, "grad_norm": 1.7060226776628804, "learning_rate": 9.888379937439944e-06, "loss": 0.2101, "step": 6544 }, { "epoch": 0.5185185185185185, "grad_norm": 1.5443405070785197, "learning_rate": 9.885814069263991e-06, "loss": 0.1507, "step": 6545 }, { "epoch": 0.5185977421271539, "grad_norm": 1.823041231258554, "learning_rate": 9.883248208606632e-06, "loss": 0.2526, "step": 6546 }, { "epoch": 0.5186769657357893, "grad_norm": 1.6248133547875379, "learning_rate": 9.880682355636821e-06, "loss": 0.2107, "step": 6547 }, { "epoch": 0.5187561893444247, "grad_norm": 1.8220732762870397, "learning_rate": 9.878116510523498e-06, "loss": 0.2527, "step": 6548 }, { "epoch": 0.51883541295306, "grad_norm": 1.3542736862929772, "learning_rate": 9.87555067343562e-06, "loss": 0.1861, "step": 6549 }, { "epoch": 0.5189146365616953, "grad_norm": 2.4472864387334843, "learning_rate": 9.872984844542128e-06, "loss": 0.2237, "step": 6550 }, { "epoch": 0.5189938601703308, "grad_norm": 2.00422345654755, "learning_rate": 9.870419024011973e-06, "loss": 0.3057, "step": 6551 }, { "epoch": 0.5190730837789661, "grad_norm": 1.71440329717328, "learning_rate": 9.867853212014104e-06, "loss": 0.1759, "step": 6552 }, { "epoch": 0.5191523073876015, "grad_norm": 1.4883916148667324, "learning_rate": 9.865287408717464e-06, "loss": 0.2014, "step": 6553 }, { "epoch": 0.5192315309962369, "grad_norm": 1.5453193744707012, "learning_rate": 9.862721614291e-06, "loss": 0.2129, "step": 6554 }, { "epoch": 0.5193107546048723, "grad_norm": 1.712344328056902, "learning_rate": 9.860155828903658e-06, "loss": 0.2344, "step": 6555 }, { "epoch": 0.5193899782135076, "grad_norm": 1.7040747603656183, "learning_rate": 9.85759005272438e-06, "loss": 0.197, "step": 6556 }, { "epoch": 0.519469201822143, "grad_norm": 1.8715707541439077, "learning_rate": 9.855024285922114e-06, "loss": 0.2829, "step": 6557 }, { "epoch": 0.5195484254307784, "grad_norm": 1.4763358867820018, "learning_rate": 9.8524585286658e-06, "loss": 0.2671, "step": 6558 }, { "epoch": 0.5196276490394137, "grad_norm": 1.5999989200747373, "learning_rate": 9.84989278112438e-06, "loss": 0.1599, "step": 6559 }, { "epoch": 0.5197068726480492, "grad_norm": 1.8424211993894914, "learning_rate": 9.847327043466802e-06, "loss": 0.1662, "step": 6560 }, { "epoch": 0.5197860962566845, "grad_norm": 1.552895134645817, "learning_rate": 9.844761315862002e-06, "loss": 0.2166, "step": 6561 }, { "epoch": 0.5198653198653199, "grad_norm": 1.4998254529502497, "learning_rate": 9.842195598478922e-06, "loss": 0.2063, "step": 6562 }, { "epoch": 0.5199445434739552, "grad_norm": 1.7673422779636208, "learning_rate": 9.839629891486503e-06, "loss": 0.1904, "step": 6563 }, { "epoch": 0.5200237670825906, "grad_norm": 1.4665955444457102, "learning_rate": 9.83706419505368e-06, "loss": 0.2428, "step": 6564 }, { "epoch": 0.520102990691226, "grad_norm": 1.6208884192042912, "learning_rate": 9.834498509349402e-06, "loss": 0.2357, "step": 6565 }, { "epoch": 0.5201822142998613, "grad_norm": 1.5932399929774363, "learning_rate": 9.831932834542598e-06, "loss": 0.252, "step": 6566 }, { "epoch": 0.5202614379084968, "grad_norm": 2.015415844296962, "learning_rate": 9.829367170802208e-06, "loss": 0.3385, "step": 6567 }, { "epoch": 0.5203406615171321, "grad_norm": 1.6745329730599214, "learning_rate": 9.82680151829717e-06, "loss": 0.2413, "step": 6568 }, { "epoch": 0.5204198851257675, "grad_norm": 1.8267065809484035, "learning_rate": 9.824235877196418e-06, "loss": 0.2205, "step": 6569 }, { "epoch": 0.5204991087344029, "grad_norm": 1.2384389776442932, "learning_rate": 9.821670247668887e-06, "loss": 0.2188, "step": 6570 }, { "epoch": 0.5205783323430382, "grad_norm": 1.8113765211854829, "learning_rate": 9.819104629883513e-06, "loss": 0.1699, "step": 6571 }, { "epoch": 0.5206575559516736, "grad_norm": 1.672072546281759, "learning_rate": 9.816539024009227e-06, "loss": 0.2521, "step": 6572 }, { "epoch": 0.520736779560309, "grad_norm": 1.657661842027711, "learning_rate": 9.813973430214965e-06, "loss": 0.1934, "step": 6573 }, { "epoch": 0.5208160031689444, "grad_norm": 1.8574733120630307, "learning_rate": 9.811407848669657e-06, "loss": 0.3483, "step": 6574 }, { "epoch": 0.5208952267775797, "grad_norm": 1.7650044155731195, "learning_rate": 9.808842279542235e-06, "loss": 0.237, "step": 6575 }, { "epoch": 0.520974450386215, "grad_norm": 1.7481276613970684, "learning_rate": 9.80627672300163e-06, "loss": 0.1959, "step": 6576 }, { "epoch": 0.5210536739948505, "grad_norm": 1.8051628508398276, "learning_rate": 9.80371117921677e-06, "loss": 0.2802, "step": 6577 }, { "epoch": 0.5211328976034858, "grad_norm": 1.2362706753648025, "learning_rate": 9.801145648356585e-06, "loss": 0.1639, "step": 6578 }, { "epoch": 0.5212121212121212, "grad_norm": 1.4328823513851627, "learning_rate": 9.798580130590004e-06, "loss": 0.2086, "step": 6579 }, { "epoch": 0.5212913448207566, "grad_norm": 1.7484169598594526, "learning_rate": 9.79601462608595e-06, "loss": 0.2593, "step": 6580 }, { "epoch": 0.521370568429392, "grad_norm": 1.5034539086792649, "learning_rate": 9.79344913501335e-06, "loss": 0.2579, "step": 6581 }, { "epoch": 0.5214497920380273, "grad_norm": 1.7124036603017196, "learning_rate": 9.790883657541133e-06, "loss": 0.219, "step": 6582 }, { "epoch": 0.5215290156466627, "grad_norm": 1.9862851399739558, "learning_rate": 9.788318193838218e-06, "loss": 0.233, "step": 6583 }, { "epoch": 0.5216082392552981, "grad_norm": 1.483207523021381, "learning_rate": 9.785752744073534e-06, "loss": 0.1491, "step": 6584 }, { "epoch": 0.5216874628639334, "grad_norm": 2.2097305945009804, "learning_rate": 9.783187308416e-06, "loss": 0.3521, "step": 6585 }, { "epoch": 0.5217666864725689, "grad_norm": 1.4592939064313366, "learning_rate": 9.780621887034537e-06, "loss": 0.2081, "step": 6586 }, { "epoch": 0.5218459100812042, "grad_norm": 1.558753586449951, "learning_rate": 9.778056480098068e-06, "loss": 0.2058, "step": 6587 }, { "epoch": 0.5219251336898396, "grad_norm": 1.813196324028363, "learning_rate": 9.775491087775514e-06, "loss": 0.2254, "step": 6588 }, { "epoch": 0.5220043572984749, "grad_norm": 1.6105331121955642, "learning_rate": 9.772925710235789e-06, "loss": 0.2324, "step": 6589 }, { "epoch": 0.5220835809071103, "grad_norm": 1.9637127379313561, "learning_rate": 9.770360347647817e-06, "loss": 0.3305, "step": 6590 }, { "epoch": 0.5221628045157457, "grad_norm": 1.5397209821030116, "learning_rate": 9.767795000180507e-06, "loss": 0.1261, "step": 6591 }, { "epoch": 0.522242028124381, "grad_norm": 1.817566382058787, "learning_rate": 9.76522966800278e-06, "loss": 0.2491, "step": 6592 }, { "epoch": 0.5223212517330165, "grad_norm": 2.018775193405404, "learning_rate": 9.76266435128355e-06, "loss": 0.386, "step": 6593 }, { "epoch": 0.5224004753416518, "grad_norm": 1.8274604206698914, "learning_rate": 9.76009905019173e-06, "loss": 0.2382, "step": 6594 }, { "epoch": 0.5224796989502872, "grad_norm": 1.7260849909373308, "learning_rate": 9.757533764896235e-06, "loss": 0.1918, "step": 6595 }, { "epoch": 0.5225589225589226, "grad_norm": 1.318506626886228, "learning_rate": 9.754968495565973e-06, "loss": 0.1565, "step": 6596 }, { "epoch": 0.5226381461675579, "grad_norm": 1.687609626616036, "learning_rate": 9.752403242369857e-06, "loss": 0.2252, "step": 6597 }, { "epoch": 0.5227173697761933, "grad_norm": 1.6038874822009532, "learning_rate": 9.749838005476798e-06, "loss": 0.1796, "step": 6598 }, { "epoch": 0.5227965933848286, "grad_norm": 2.264460955607835, "learning_rate": 9.7472727850557e-06, "loss": 0.2401, "step": 6599 }, { "epoch": 0.5228758169934641, "grad_norm": 1.7600127058743176, "learning_rate": 9.744707581275473e-06, "loss": 0.2514, "step": 6600 }, { "epoch": 0.5229550406020994, "grad_norm": 1.6454397006077333, "learning_rate": 9.742142394305026e-06, "loss": 0.2406, "step": 6601 }, { "epoch": 0.5230342642107348, "grad_norm": 1.6536626576896352, "learning_rate": 9.739577224313258e-06, "loss": 0.2099, "step": 6602 }, { "epoch": 0.5231134878193702, "grad_norm": 1.2611062806032343, "learning_rate": 9.737012071469082e-06, "loss": 0.1673, "step": 6603 }, { "epoch": 0.5231927114280055, "grad_norm": 1.6670450104890164, "learning_rate": 9.734446935941392e-06, "loss": 0.1761, "step": 6604 }, { "epoch": 0.5232719350366409, "grad_norm": 1.5716250864543366, "learning_rate": 9.731881817899092e-06, "loss": 0.1819, "step": 6605 }, { "epoch": 0.5233511586452763, "grad_norm": 1.5426166047086236, "learning_rate": 9.729316717511088e-06, "loss": 0.2412, "step": 6606 }, { "epoch": 0.5234303822539117, "grad_norm": 1.9558353963362065, "learning_rate": 9.726751634946272e-06, "loss": 0.2241, "step": 6607 }, { "epoch": 0.523509605862547, "grad_norm": 1.834414036964593, "learning_rate": 9.724186570373548e-06, "loss": 0.2642, "step": 6608 }, { "epoch": 0.5235888294711825, "grad_norm": 1.5297948985591396, "learning_rate": 9.721621523961812e-06, "loss": 0.1945, "step": 6609 }, { "epoch": 0.5236680530798178, "grad_norm": 1.6316690372842118, "learning_rate": 9.719056495879958e-06, "loss": 0.1967, "step": 6610 }, { "epoch": 0.5237472766884531, "grad_norm": 1.712998210634295, "learning_rate": 9.716491486296883e-06, "loss": 0.2877, "step": 6611 }, { "epoch": 0.5238265002970885, "grad_norm": 1.5298057464968875, "learning_rate": 9.71392649538148e-06, "loss": 0.2295, "step": 6612 }, { "epoch": 0.5239057239057239, "grad_norm": 1.8130814438376366, "learning_rate": 9.711361523302638e-06, "loss": 0.2246, "step": 6613 }, { "epoch": 0.5239849475143593, "grad_norm": 1.5509073841618803, "learning_rate": 9.708796570229253e-06, "loss": 0.186, "step": 6614 }, { "epoch": 0.5240641711229946, "grad_norm": 1.4937879691190525, "learning_rate": 9.706231636330212e-06, "loss": 0.1683, "step": 6615 }, { "epoch": 0.5241433947316301, "grad_norm": 1.7531062992220119, "learning_rate": 9.703666721774403e-06, "loss": 0.2508, "step": 6616 }, { "epoch": 0.5242226183402654, "grad_norm": 1.575594756517319, "learning_rate": 9.701101826730718e-06, "loss": 0.1831, "step": 6617 }, { "epoch": 0.5243018419489007, "grad_norm": 1.559798698958206, "learning_rate": 9.698536951368035e-06, "loss": 0.2373, "step": 6618 }, { "epoch": 0.5243810655575362, "grad_norm": 1.2948045633210499, "learning_rate": 9.695972095855248e-06, "loss": 0.1585, "step": 6619 }, { "epoch": 0.5244602891661715, "grad_norm": 1.5587486981796865, "learning_rate": 9.693407260361231e-06, "loss": 0.1669, "step": 6620 }, { "epoch": 0.5245395127748069, "grad_norm": 1.9306859801846217, "learning_rate": 9.690842445054873e-06, "loss": 0.2645, "step": 6621 }, { "epoch": 0.5246187363834423, "grad_norm": 1.489265346961083, "learning_rate": 9.688277650105053e-06, "loss": 0.25, "step": 6622 }, { "epoch": 0.5246979599920777, "grad_norm": 1.5738148767507962, "learning_rate": 9.685712875680649e-06, "loss": 0.1943, "step": 6623 }, { "epoch": 0.524777183600713, "grad_norm": 1.6541461425395876, "learning_rate": 9.683148121950539e-06, "loss": 0.2048, "step": 6624 }, { "epoch": 0.5248564072093483, "grad_norm": 1.2756810027881804, "learning_rate": 9.680583389083602e-06, "loss": 0.1904, "step": 6625 }, { "epoch": 0.5249356308179838, "grad_norm": 1.6290058645319117, "learning_rate": 9.67801867724871e-06, "loss": 0.2947, "step": 6626 }, { "epoch": 0.5250148544266191, "grad_norm": 1.5592634810850434, "learning_rate": 9.675453986614743e-06, "loss": 0.2482, "step": 6627 }, { "epoch": 0.5250940780352545, "grad_norm": 1.7993948906587451, "learning_rate": 9.672889317350565e-06, "loss": 0.2261, "step": 6628 }, { "epoch": 0.5251733016438899, "grad_norm": 1.5006090749941114, "learning_rate": 9.670324669625053e-06, "loss": 0.1606, "step": 6629 }, { "epoch": 0.5252525252525253, "grad_norm": 1.675191994429445, "learning_rate": 9.667760043607077e-06, "loss": 0.2044, "step": 6630 }, { "epoch": 0.5253317488611606, "grad_norm": 1.3882031316943497, "learning_rate": 9.6651954394655e-06, "loss": 0.2, "step": 6631 }, { "epoch": 0.525410972469796, "grad_norm": 1.750119177385897, "learning_rate": 9.662630857369194e-06, "loss": 0.2777, "step": 6632 }, { "epoch": 0.5254901960784314, "grad_norm": 1.5913265087833688, "learning_rate": 9.660066297487024e-06, "loss": 0.1686, "step": 6633 }, { "epoch": 0.5255694196870667, "grad_norm": 1.2583668423759147, "learning_rate": 9.65750175998785e-06, "loss": 0.1765, "step": 6634 }, { "epoch": 0.5256486432957022, "grad_norm": 1.5942490905504325, "learning_rate": 9.65493724504054e-06, "loss": 0.2177, "step": 6635 }, { "epoch": 0.5257278669043375, "grad_norm": 1.93664542991259, "learning_rate": 9.65237275281395e-06, "loss": 0.2293, "step": 6636 }, { "epoch": 0.5258070905129729, "grad_norm": 1.9715943510944687, "learning_rate": 9.64980828347694e-06, "loss": 0.2478, "step": 6637 }, { "epoch": 0.5258863141216082, "grad_norm": 1.6583933411282301, "learning_rate": 9.647243837198375e-06, "loss": 0.2722, "step": 6638 }, { "epoch": 0.5259655377302436, "grad_norm": 1.7626576783291303, "learning_rate": 9.644679414147102e-06, "loss": 0.231, "step": 6639 }, { "epoch": 0.526044761338879, "grad_norm": 1.8629875495914667, "learning_rate": 9.64211501449198e-06, "loss": 0.1849, "step": 6640 }, { "epoch": 0.5261239849475143, "grad_norm": 1.8247654282660322, "learning_rate": 9.639550638401863e-06, "loss": 0.2797, "step": 6641 }, { "epoch": 0.5262032085561498, "grad_norm": 1.8397659809916322, "learning_rate": 9.6369862860456e-06, "loss": 0.2822, "step": 6642 }, { "epoch": 0.5262824321647851, "grad_norm": 1.4548770472068142, "learning_rate": 9.634421957592048e-06, "loss": 0.2004, "step": 6643 }, { "epoch": 0.5263616557734205, "grad_norm": 2.611513313692533, "learning_rate": 9.631857653210048e-06, "loss": 0.2488, "step": 6644 }, { "epoch": 0.5264408793820559, "grad_norm": 1.8321557010262473, "learning_rate": 9.629293373068449e-06, "loss": 0.252, "step": 6645 }, { "epoch": 0.5265201029906912, "grad_norm": 1.709438947012418, "learning_rate": 9.626729117336101e-06, "loss": 0.2659, "step": 6646 }, { "epoch": 0.5265993265993266, "grad_norm": 1.572182796128045, "learning_rate": 9.624164886181841e-06, "loss": 0.1842, "step": 6647 }, { "epoch": 0.526678550207962, "grad_norm": 1.3683525626723403, "learning_rate": 9.621600679774516e-06, "loss": 0.231, "step": 6648 }, { "epoch": 0.5267577738165974, "grad_norm": 1.7854714537760052, "learning_rate": 9.619036498282968e-06, "loss": 0.3232, "step": 6649 }, { "epoch": 0.5268369974252327, "grad_norm": 1.4651685607183893, "learning_rate": 9.61647234187603e-06, "loss": 0.1906, "step": 6650 }, { "epoch": 0.526916221033868, "grad_norm": 1.8337084587744106, "learning_rate": 9.613908210722546e-06, "loss": 0.2134, "step": 6651 }, { "epoch": 0.5269954446425035, "grad_norm": 1.5941261922743575, "learning_rate": 9.611344104991346e-06, "loss": 0.2399, "step": 6652 }, { "epoch": 0.5270746682511388, "grad_norm": 1.511790159172521, "learning_rate": 9.608780024851266e-06, "loss": 0.2017, "step": 6653 }, { "epoch": 0.5271538918597742, "grad_norm": 1.5008775012960016, "learning_rate": 9.606215970471142e-06, "loss": 0.1346, "step": 6654 }, { "epoch": 0.5272331154684096, "grad_norm": 1.7168626710340569, "learning_rate": 9.6036519420198e-06, "loss": 0.2133, "step": 6655 }, { "epoch": 0.527312339077045, "grad_norm": 1.5667632846405024, "learning_rate": 9.601087939666071e-06, "loss": 0.1479, "step": 6656 }, { "epoch": 0.5273915626856803, "grad_norm": 1.7926740422311847, "learning_rate": 9.598523963578785e-06, "loss": 0.2803, "step": 6657 }, { "epoch": 0.5274707862943157, "grad_norm": 1.6059482067695043, "learning_rate": 9.595960013926761e-06, "loss": 0.1931, "step": 6658 }, { "epoch": 0.5275500099029511, "grad_norm": 1.5127165446548374, "learning_rate": 9.593396090878823e-06, "loss": 0.1644, "step": 6659 }, { "epoch": 0.5276292335115864, "grad_norm": 1.4892583823446155, "learning_rate": 9.590832194603801e-06, "loss": 0.1836, "step": 6660 }, { "epoch": 0.5277084571202219, "grad_norm": 2.497602606422469, "learning_rate": 9.588268325270506e-06, "loss": 0.3302, "step": 6661 }, { "epoch": 0.5277876807288572, "grad_norm": 1.362954333513193, "learning_rate": 9.585704483047761e-06, "loss": 0.1366, "step": 6662 }, { "epoch": 0.5278669043374926, "grad_norm": 1.6089875736780563, "learning_rate": 9.583140668104387e-06, "loss": 0.2361, "step": 6663 }, { "epoch": 0.5279461279461279, "grad_norm": 1.773462069836003, "learning_rate": 9.58057688060919e-06, "loss": 0.2527, "step": 6664 }, { "epoch": 0.5280253515547633, "grad_norm": 1.8801863525211449, "learning_rate": 9.578013120730987e-06, "loss": 0.2627, "step": 6665 }, { "epoch": 0.5281045751633987, "grad_norm": 1.5152957647957932, "learning_rate": 9.575449388638592e-06, "loss": 0.1907, "step": 6666 }, { "epoch": 0.528183798772034, "grad_norm": 1.8036361744117901, "learning_rate": 9.57288568450081e-06, "loss": 0.3015, "step": 6667 }, { "epoch": 0.5282630223806695, "grad_norm": 1.8838291322583762, "learning_rate": 9.570322008486453e-06, "loss": 0.2451, "step": 6668 }, { "epoch": 0.5283422459893048, "grad_norm": 1.5034239764256554, "learning_rate": 9.567758360764321e-06, "loss": 0.2089, "step": 6669 }, { "epoch": 0.5284214695979402, "grad_norm": 1.6195194920710483, "learning_rate": 9.565194741503221e-06, "loss": 0.2313, "step": 6670 }, { "epoch": 0.5285006932065756, "grad_norm": 1.5463357780081222, "learning_rate": 9.562631150871959e-06, "loss": 0.2487, "step": 6671 }, { "epoch": 0.5285799168152109, "grad_norm": 1.4321370214688554, "learning_rate": 9.560067589039327e-06, "loss": 0.1816, "step": 6672 }, { "epoch": 0.5286591404238463, "grad_norm": 1.4905937188873857, "learning_rate": 9.55750405617413e-06, "loss": 0.1569, "step": 6673 }, { "epoch": 0.5287383640324816, "grad_norm": 2.3833644922495125, "learning_rate": 9.554940552445161e-06, "loss": 0.2216, "step": 6674 }, { "epoch": 0.5288175876411171, "grad_norm": 1.661038410211544, "learning_rate": 9.552377078021215e-06, "loss": 0.2827, "step": 6675 }, { "epoch": 0.5288968112497524, "grad_norm": 1.3943359011090264, "learning_rate": 9.549813633071085e-06, "loss": 0.2179, "step": 6676 }, { "epoch": 0.5289760348583878, "grad_norm": 1.7915763129053264, "learning_rate": 9.54725021776356e-06, "loss": 0.2427, "step": 6677 }, { "epoch": 0.5290552584670232, "grad_norm": 2.2491499294279307, "learning_rate": 9.54468683226743e-06, "loss": 0.2505, "step": 6678 }, { "epoch": 0.5291344820756585, "grad_norm": 1.4029439571383027, "learning_rate": 9.542123476751484e-06, "loss": 0.1428, "step": 6679 }, { "epoch": 0.5292137056842939, "grad_norm": 1.632703332472555, "learning_rate": 9.5395601513845e-06, "loss": 0.1874, "step": 6680 }, { "epoch": 0.5292929292929293, "grad_norm": 2.1049649496709404, "learning_rate": 9.536996856335269e-06, "loss": 0.2929, "step": 6681 }, { "epoch": 0.5293721529015647, "grad_norm": 1.8296071181548896, "learning_rate": 9.534433591772562e-06, "loss": 0.2617, "step": 6682 }, { "epoch": 0.5294513765102, "grad_norm": 1.9450150476803743, "learning_rate": 9.531870357865165e-06, "loss": 0.2145, "step": 6683 }, { "epoch": 0.5295306001188355, "grad_norm": 1.47473320876335, "learning_rate": 9.529307154781855e-06, "loss": 0.1979, "step": 6684 }, { "epoch": 0.5296098237274708, "grad_norm": 1.338591728638656, "learning_rate": 9.5267439826914e-06, "loss": 0.1645, "step": 6685 }, { "epoch": 0.5296890473361061, "grad_norm": 1.931847108495125, "learning_rate": 9.524180841762577e-06, "loss": 0.2417, "step": 6686 }, { "epoch": 0.5297682709447415, "grad_norm": 2.210729370265415, "learning_rate": 9.52161773216416e-06, "loss": 0.2576, "step": 6687 }, { "epoch": 0.5298474945533769, "grad_norm": 1.6437102160453194, "learning_rate": 9.519054654064909e-06, "loss": 0.2029, "step": 6688 }, { "epoch": 0.5299267181620123, "grad_norm": 1.5214474428938953, "learning_rate": 9.5164916076336e-06, "loss": 0.188, "step": 6689 }, { "epoch": 0.5300059417706476, "grad_norm": 1.845370636968282, "learning_rate": 9.513928593038987e-06, "loss": 0.2865, "step": 6690 }, { "epoch": 0.5300851653792831, "grad_norm": 1.8787258609209871, "learning_rate": 9.51136561044984e-06, "loss": 0.2375, "step": 6691 }, { "epoch": 0.5301643889879184, "grad_norm": 1.5151714693359617, "learning_rate": 9.508802660034915e-06, "loss": 0.1878, "step": 6692 }, { "epoch": 0.5302436125965537, "grad_norm": 2.1548326623262173, "learning_rate": 9.506239741962971e-06, "loss": 0.3541, "step": 6693 }, { "epoch": 0.5303228362051892, "grad_norm": 1.7527924388536071, "learning_rate": 9.503676856402764e-06, "loss": 0.2741, "step": 6694 }, { "epoch": 0.5304020598138245, "grad_norm": 1.2962090526177372, "learning_rate": 9.50111400352305e-06, "loss": 0.1803, "step": 6695 }, { "epoch": 0.5304812834224599, "grad_norm": 1.5194447912503397, "learning_rate": 9.498551183492578e-06, "loss": 0.1909, "step": 6696 }, { "epoch": 0.5305605070310953, "grad_norm": 1.412741052417984, "learning_rate": 9.495988396480097e-06, "loss": 0.247, "step": 6697 }, { "epoch": 0.5306397306397307, "grad_norm": 1.8234868886306743, "learning_rate": 9.493425642654356e-06, "loss": 0.3234, "step": 6698 }, { "epoch": 0.530718954248366, "grad_norm": 1.4803751124426618, "learning_rate": 9.490862922184096e-06, "loss": 0.2625, "step": 6699 }, { "epoch": 0.5307981778570013, "grad_norm": 1.6934213292679765, "learning_rate": 9.488300235238067e-06, "loss": 0.2256, "step": 6700 }, { "epoch": 0.5308774014656368, "grad_norm": 1.8482265898384123, "learning_rate": 9.485737581985002e-06, "loss": 0.1978, "step": 6701 }, { "epoch": 0.5309566250742721, "grad_norm": 1.148427836684831, "learning_rate": 9.483174962593644e-06, "loss": 0.0733, "step": 6702 }, { "epoch": 0.5310358486829075, "grad_norm": 1.5065220042849732, "learning_rate": 9.480612377232728e-06, "loss": 0.1749, "step": 6703 }, { "epoch": 0.5311150722915429, "grad_norm": 1.3931876475465617, "learning_rate": 9.478049826070988e-06, "loss": 0.2005, "step": 6704 }, { "epoch": 0.5311942959001783, "grad_norm": 1.547844036626027, "learning_rate": 9.475487309277156e-06, "loss": 0.2169, "step": 6705 }, { "epoch": 0.5312735195088136, "grad_norm": 1.3564708885872676, "learning_rate": 9.472924827019959e-06, "loss": 0.1944, "step": 6706 }, { "epoch": 0.531352743117449, "grad_norm": 1.4215847582519736, "learning_rate": 9.470362379468125e-06, "loss": 0.1887, "step": 6707 }, { "epoch": 0.5314319667260844, "grad_norm": 1.6639037965063888, "learning_rate": 9.467799966790384e-06, "loss": 0.1971, "step": 6708 }, { "epoch": 0.5315111903347197, "grad_norm": 1.7872959741549863, "learning_rate": 9.465237589155452e-06, "loss": 0.2035, "step": 6709 }, { "epoch": 0.5315904139433552, "grad_norm": 1.5266494139563354, "learning_rate": 9.462675246732051e-06, "loss": 0.1816, "step": 6710 }, { "epoch": 0.5316696375519905, "grad_norm": 1.7236073652852932, "learning_rate": 9.460112939688901e-06, "loss": 0.2813, "step": 6711 }, { "epoch": 0.5317488611606259, "grad_norm": 1.7651528915167407, "learning_rate": 9.457550668194714e-06, "loss": 0.2247, "step": 6712 }, { "epoch": 0.5318280847692612, "grad_norm": 2.0036443265858663, "learning_rate": 9.45498843241821e-06, "loss": 0.2509, "step": 6713 }, { "epoch": 0.5319073083778966, "grad_norm": 1.5789654364978454, "learning_rate": 9.452426232528092e-06, "loss": 0.2065, "step": 6714 }, { "epoch": 0.531986531986532, "grad_norm": 1.4504747767975756, "learning_rate": 9.449864068693072e-06, "loss": 0.1802, "step": 6715 }, { "epoch": 0.5320657555951673, "grad_norm": 1.6378494980445526, "learning_rate": 9.447301941081856e-06, "loss": 0.2352, "step": 6716 }, { "epoch": 0.5321449792038028, "grad_norm": 1.6869247085174373, "learning_rate": 9.444739849863146e-06, "loss": 0.2835, "step": 6717 }, { "epoch": 0.5322242028124381, "grad_norm": 1.4819390163273016, "learning_rate": 9.442177795205647e-06, "loss": 0.177, "step": 6718 }, { "epoch": 0.5323034264210735, "grad_norm": 1.6452706542873483, "learning_rate": 9.439615777278059e-06, "loss": 0.1653, "step": 6719 }, { "epoch": 0.5323826500297089, "grad_norm": 1.5842742962511123, "learning_rate": 9.437053796249071e-06, "loss": 0.1877, "step": 6720 }, { "epoch": 0.5324618736383442, "grad_norm": 1.5325389961654612, "learning_rate": 9.434491852287385e-06, "loss": 0.1717, "step": 6721 }, { "epoch": 0.5325410972469796, "grad_norm": 1.4712055393041057, "learning_rate": 9.431929945561688e-06, "loss": 0.1968, "step": 6722 }, { "epoch": 0.532620320855615, "grad_norm": 1.847194933031637, "learning_rate": 9.429368076240669e-06, "loss": 0.2141, "step": 6723 }, { "epoch": 0.5326995444642504, "grad_norm": 2.010953931471911, "learning_rate": 9.42680624449302e-06, "loss": 0.2813, "step": 6724 }, { "epoch": 0.5327787680728857, "grad_norm": 1.9905392212703936, "learning_rate": 9.42424445048742e-06, "loss": 0.2785, "step": 6725 }, { "epoch": 0.5328579916815211, "grad_norm": 1.5519898076389937, "learning_rate": 9.42168269439255e-06, "loss": 0.1744, "step": 6726 }, { "epoch": 0.5329372152901565, "grad_norm": 1.2489815884023632, "learning_rate": 9.419120976377098e-06, "loss": 0.1867, "step": 6727 }, { "epoch": 0.5330164388987918, "grad_norm": 1.556647804415713, "learning_rate": 9.41655929660973e-06, "loss": 0.2544, "step": 6728 }, { "epoch": 0.5330956625074272, "grad_norm": 1.4873696218984018, "learning_rate": 9.413997655259126e-06, "loss": 0.2269, "step": 6729 }, { "epoch": 0.5331748861160626, "grad_norm": 1.5828225641722773, "learning_rate": 9.411436052493957e-06, "loss": 0.2174, "step": 6730 }, { "epoch": 0.533254109724698, "grad_norm": 1.438560811196052, "learning_rate": 9.40887448848289e-06, "loss": 0.2025, "step": 6731 }, { "epoch": 0.5333333333333333, "grad_norm": 1.7749281631140923, "learning_rate": 9.406312963394598e-06, "loss": 0.1845, "step": 6732 }, { "epoch": 0.5334125569419687, "grad_norm": 1.7563477771745766, "learning_rate": 9.403751477397738e-06, "loss": 0.3005, "step": 6733 }, { "epoch": 0.5334917805506041, "grad_norm": 1.5707082099560055, "learning_rate": 9.401190030660975e-06, "loss": 0.2546, "step": 6734 }, { "epoch": 0.5335710041592394, "grad_norm": 1.6766375692726976, "learning_rate": 9.398628623352969e-06, "loss": 0.1494, "step": 6735 }, { "epoch": 0.5336502277678749, "grad_norm": 1.3285417095349021, "learning_rate": 9.396067255642373e-06, "loss": 0.1746, "step": 6736 }, { "epoch": 0.5337294513765102, "grad_norm": 1.4059534324614433, "learning_rate": 9.39350592769784e-06, "loss": 0.2389, "step": 6737 }, { "epoch": 0.5338086749851456, "grad_norm": 1.355034071935565, "learning_rate": 9.390944639688027e-06, "loss": 0.1767, "step": 6738 }, { "epoch": 0.5338878985937809, "grad_norm": 1.5922845269765298, "learning_rate": 9.388383391781576e-06, "loss": 0.1984, "step": 6739 }, { "epoch": 0.5339671222024163, "grad_norm": 1.719551087336993, "learning_rate": 9.385822184147136e-06, "loss": 0.2512, "step": 6740 }, { "epoch": 0.5340463458110517, "grad_norm": 1.7142518922085583, "learning_rate": 9.383261016953351e-06, "loss": 0.2326, "step": 6741 }, { "epoch": 0.534125569419687, "grad_norm": 1.763326968766568, "learning_rate": 9.38069989036886e-06, "loss": 0.2904, "step": 6742 }, { "epoch": 0.5342047930283225, "grad_norm": 1.3857171277720168, "learning_rate": 9.3781388045623e-06, "loss": 0.1528, "step": 6743 }, { "epoch": 0.5342840166369578, "grad_norm": 1.8505391177845503, "learning_rate": 9.37557775970231e-06, "loss": 0.2994, "step": 6744 }, { "epoch": 0.5343632402455932, "grad_norm": 1.8492972044462455, "learning_rate": 9.373016755957519e-06, "loss": 0.2947, "step": 6745 }, { "epoch": 0.5344424638542286, "grad_norm": 1.7409432756076915, "learning_rate": 9.370455793496558e-06, "loss": 0.2772, "step": 6746 }, { "epoch": 0.5345216874628639, "grad_norm": 2.028965044254343, "learning_rate": 9.367894872488053e-06, "loss": 0.3436, "step": 6747 }, { "epoch": 0.5346009110714993, "grad_norm": 3.0423782411705593, "learning_rate": 9.365333993100628e-06, "loss": 0.194, "step": 6748 }, { "epoch": 0.5346801346801346, "grad_norm": 1.4699543873430614, "learning_rate": 9.362773155502909e-06, "loss": 0.2351, "step": 6749 }, { "epoch": 0.5347593582887701, "grad_norm": 1.9852768916268493, "learning_rate": 9.360212359863508e-06, "loss": 0.318, "step": 6750 }, { "epoch": 0.5348385818974054, "grad_norm": 1.265955168851745, "learning_rate": 9.357651606351047e-06, "loss": 0.1414, "step": 6751 }, { "epoch": 0.5349178055060408, "grad_norm": 1.3497356084849454, "learning_rate": 9.355090895134138e-06, "loss": 0.1965, "step": 6752 }, { "epoch": 0.5349970291146762, "grad_norm": 1.566259114016372, "learning_rate": 9.352530226381388e-06, "loss": 0.2507, "step": 6753 }, { "epoch": 0.5350762527233115, "grad_norm": 1.442853681254746, "learning_rate": 9.349969600261408e-06, "loss": 0.1829, "step": 6754 }, { "epoch": 0.5351554763319469, "grad_norm": 1.6751769314191742, "learning_rate": 9.347409016942803e-06, "loss": 0.2513, "step": 6755 }, { "epoch": 0.5352346999405823, "grad_norm": 2.0313269456821863, "learning_rate": 9.344848476594172e-06, "loss": 0.3085, "step": 6756 }, { "epoch": 0.5353139235492177, "grad_norm": 1.5124093873092133, "learning_rate": 9.342287979384118e-06, "loss": 0.1808, "step": 6757 }, { "epoch": 0.535393147157853, "grad_norm": 1.7329358612448034, "learning_rate": 9.339727525481234e-06, "loss": 0.2653, "step": 6758 }, { "epoch": 0.5354723707664885, "grad_norm": 1.6803824824168774, "learning_rate": 9.33716711505412e-06, "loss": 0.2128, "step": 6759 }, { "epoch": 0.5355515943751238, "grad_norm": 1.7964456294819626, "learning_rate": 9.334606748271357e-06, "loss": 0.1935, "step": 6760 }, { "epoch": 0.5356308179837591, "grad_norm": 1.551025873097992, "learning_rate": 9.33204642530154e-06, "loss": 0.193, "step": 6761 }, { "epoch": 0.5357100415923945, "grad_norm": 1.4885285664233845, "learning_rate": 9.329486146313254e-06, "loss": 0.1571, "step": 6762 }, { "epoch": 0.5357892652010299, "grad_norm": 1.7748101804275802, "learning_rate": 9.326925911475075e-06, "loss": 0.212, "step": 6763 }, { "epoch": 0.5358684888096653, "grad_norm": 1.723281768491179, "learning_rate": 9.324365720955589e-06, "loss": 0.2359, "step": 6764 }, { "epoch": 0.5359477124183006, "grad_norm": 1.516767592161019, "learning_rate": 9.321805574923369e-06, "loss": 0.1937, "step": 6765 }, { "epoch": 0.5360269360269361, "grad_norm": 1.5222957327946214, "learning_rate": 9.319245473546987e-06, "loss": 0.1482, "step": 6766 }, { "epoch": 0.5361061596355714, "grad_norm": 1.531027295440473, "learning_rate": 9.316685416995017e-06, "loss": 0.2105, "step": 6767 }, { "epoch": 0.5361853832442067, "grad_norm": 1.5008528912792312, "learning_rate": 9.314125405436023e-06, "loss": 0.1662, "step": 6768 }, { "epoch": 0.5362646068528422, "grad_norm": 1.3968376128273732, "learning_rate": 9.311565439038571e-06, "loss": 0.1696, "step": 6769 }, { "epoch": 0.5363438304614775, "grad_norm": 1.7978002793367236, "learning_rate": 9.309005517971222e-06, "loss": 0.2559, "step": 6770 }, { "epoch": 0.5364230540701129, "grad_norm": 1.538462798294783, "learning_rate": 9.306445642402534e-06, "loss": 0.1735, "step": 6771 }, { "epoch": 0.5365022776787483, "grad_norm": 1.8106540947748968, "learning_rate": 9.303885812501064e-06, "loss": 0.2323, "step": 6772 }, { "epoch": 0.5365815012873837, "grad_norm": 1.4969367501225739, "learning_rate": 9.301326028435367e-06, "loss": 0.1896, "step": 6773 }, { "epoch": 0.536660724896019, "grad_norm": 2.9130970804528684, "learning_rate": 9.298766290373986e-06, "loss": 0.2045, "step": 6774 }, { "epoch": 0.5367399485046543, "grad_norm": 1.6689027093276674, "learning_rate": 9.296206598485471e-06, "loss": 0.2609, "step": 6775 }, { "epoch": 0.5368191721132898, "grad_norm": 1.884687721845564, "learning_rate": 9.293646952938365e-06, "loss": 0.2781, "step": 6776 }, { "epoch": 0.5368983957219251, "grad_norm": 1.475400807052087, "learning_rate": 9.291087353901208e-06, "loss": 0.2238, "step": 6777 }, { "epoch": 0.5369776193305605, "grad_norm": 1.6800887026100833, "learning_rate": 9.28852780154254e-06, "loss": 0.1718, "step": 6778 }, { "epoch": 0.5370568429391959, "grad_norm": 1.594646250244442, "learning_rate": 9.285968296030891e-06, "loss": 0.1928, "step": 6779 }, { "epoch": 0.5371360665478313, "grad_norm": 1.6684572489481007, "learning_rate": 9.283408837534793e-06, "loss": 0.2109, "step": 6780 }, { "epoch": 0.5372152901564666, "grad_norm": 1.5809270240124893, "learning_rate": 9.280849426222778e-06, "loss": 0.2158, "step": 6781 }, { "epoch": 0.537294513765102, "grad_norm": 1.5205443732991, "learning_rate": 9.278290062263364e-06, "loss": 0.1733, "step": 6782 }, { "epoch": 0.5373737373737374, "grad_norm": 1.3913000406741118, "learning_rate": 9.27573074582508e-06, "loss": 0.1599, "step": 6783 }, { "epoch": 0.5374529609823727, "grad_norm": 1.368914089211404, "learning_rate": 9.27317147707644e-06, "loss": 0.1134, "step": 6784 }, { "epoch": 0.5375321845910082, "grad_norm": 1.7023713108481129, "learning_rate": 9.270612256185962e-06, "loss": 0.2073, "step": 6785 }, { "epoch": 0.5376114081996435, "grad_norm": 1.9357893966901718, "learning_rate": 9.268053083322157e-06, "loss": 0.2758, "step": 6786 }, { "epoch": 0.5376906318082789, "grad_norm": 1.6010082869327436, "learning_rate": 9.265493958653533e-06, "loss": 0.1937, "step": 6787 }, { "epoch": 0.5377698554169142, "grad_norm": 1.6361248143081104, "learning_rate": 9.262934882348599e-06, "loss": 0.1854, "step": 6788 }, { "epoch": 0.5378490790255496, "grad_norm": 1.6057876065624228, "learning_rate": 9.260375854575857e-06, "loss": 0.1508, "step": 6789 }, { "epoch": 0.537928302634185, "grad_norm": 1.517449055211345, "learning_rate": 9.257816875503805e-06, "loss": 0.2046, "step": 6790 }, { "epoch": 0.5380075262428203, "grad_norm": 1.780544364691931, "learning_rate": 9.255257945300941e-06, "loss": 0.2048, "step": 6791 }, { "epoch": 0.5380867498514558, "grad_norm": 1.3008160343959099, "learning_rate": 9.252699064135759e-06, "loss": 0.1145, "step": 6792 }, { "epoch": 0.5381659734600911, "grad_norm": 1.6095789214980698, "learning_rate": 9.250140232176746e-06, "loss": 0.1947, "step": 6793 }, { "epoch": 0.5382451970687265, "grad_norm": 1.5748436730638056, "learning_rate": 9.247581449592392e-06, "loss": 0.2151, "step": 6794 }, { "epoch": 0.5383244206773619, "grad_norm": 1.911311198120432, "learning_rate": 9.245022716551178e-06, "loss": 0.2478, "step": 6795 }, { "epoch": 0.5384036442859972, "grad_norm": 1.5673164170569804, "learning_rate": 9.242464033221584e-06, "loss": 0.2216, "step": 6796 }, { "epoch": 0.5384828678946326, "grad_norm": 2.007826362531275, "learning_rate": 9.239905399772092e-06, "loss": 0.3116, "step": 6797 }, { "epoch": 0.538562091503268, "grad_norm": 2.073217020398983, "learning_rate": 9.237346816371169e-06, "loss": 0.2895, "step": 6798 }, { "epoch": 0.5386413151119034, "grad_norm": 2.5203239544483353, "learning_rate": 9.234788283187291e-06, "loss": 0.2933, "step": 6799 }, { "epoch": 0.5387205387205387, "grad_norm": 1.4939708121844684, "learning_rate": 9.23222980038892e-06, "loss": 0.2006, "step": 6800 }, { "epoch": 0.5387997623291741, "grad_norm": 2.242616962081198, "learning_rate": 9.229671368144524e-06, "loss": 0.3136, "step": 6801 }, { "epoch": 0.5388789859378095, "grad_norm": 1.7522087979966592, "learning_rate": 9.227112986622562e-06, "loss": 0.2659, "step": 6802 }, { "epoch": 0.5389582095464448, "grad_norm": 1.3821852784587902, "learning_rate": 9.224554655991492e-06, "loss": 0.1914, "step": 6803 }, { "epoch": 0.5390374331550802, "grad_norm": 2.1661008444848857, "learning_rate": 9.221996376419763e-06, "loss": 0.3001, "step": 6804 }, { "epoch": 0.5391166567637156, "grad_norm": 1.5375078321927154, "learning_rate": 9.219438148075834e-06, "loss": 0.2076, "step": 6805 }, { "epoch": 0.539195880372351, "grad_norm": 1.7228165284179944, "learning_rate": 9.216879971128142e-06, "loss": 0.2797, "step": 6806 }, { "epoch": 0.5392751039809863, "grad_norm": 1.6069938870513174, "learning_rate": 9.21432184574514e-06, "loss": 0.1893, "step": 6807 }, { "epoch": 0.5393543275896218, "grad_norm": 1.4410638900423316, "learning_rate": 9.21176377209526e-06, "loss": 0.2318, "step": 6808 }, { "epoch": 0.5394335511982571, "grad_norm": 1.7264312231655994, "learning_rate": 9.209205750346945e-06, "loss": 0.2414, "step": 6809 }, { "epoch": 0.5395127748068924, "grad_norm": 1.967709664441678, "learning_rate": 9.206647780668629e-06, "loss": 0.2487, "step": 6810 }, { "epoch": 0.5395919984155279, "grad_norm": 1.5089670342628774, "learning_rate": 9.204089863228736e-06, "loss": 0.2166, "step": 6811 }, { "epoch": 0.5396712220241632, "grad_norm": 1.4059351759289564, "learning_rate": 9.201531998195697e-06, "loss": 0.1653, "step": 6812 }, { "epoch": 0.5397504456327986, "grad_norm": 1.542161201926313, "learning_rate": 9.198974185737934e-06, "loss": 0.187, "step": 6813 }, { "epoch": 0.5398296692414339, "grad_norm": 1.6291370136864782, "learning_rate": 9.196416426023868e-06, "loss": 0.2021, "step": 6814 }, { "epoch": 0.5399088928500693, "grad_norm": 1.6932647921587172, "learning_rate": 9.193858719221912e-06, "loss": 0.2505, "step": 6815 }, { "epoch": 0.5399881164587047, "grad_norm": 1.6916988956884456, "learning_rate": 9.19130106550048e-06, "loss": 0.1859, "step": 6816 }, { "epoch": 0.54006734006734, "grad_norm": 1.39481475664272, "learning_rate": 9.188743465027981e-06, "loss": 0.1397, "step": 6817 }, { "epoch": 0.5401465636759755, "grad_norm": 1.520532635046863, "learning_rate": 9.186185917972821e-06, "loss": 0.1564, "step": 6818 }, { "epoch": 0.5402257872846108, "grad_norm": 1.8713052680090447, "learning_rate": 9.183628424503405e-06, "loss": 0.2362, "step": 6819 }, { "epoch": 0.5403050108932462, "grad_norm": 1.617227033114138, "learning_rate": 9.181070984788127e-06, "loss": 0.2031, "step": 6820 }, { "epoch": 0.5403842345018816, "grad_norm": 1.5983723180223535, "learning_rate": 9.178513598995384e-06, "loss": 0.2095, "step": 6821 }, { "epoch": 0.5404634581105169, "grad_norm": 1.6445190312669158, "learning_rate": 9.17595626729357e-06, "loss": 0.2308, "step": 6822 }, { "epoch": 0.5405426817191523, "grad_norm": 1.6140072254229159, "learning_rate": 9.17339898985107e-06, "loss": 0.2284, "step": 6823 }, { "epoch": 0.5406219053277876, "grad_norm": 1.8511931150082803, "learning_rate": 9.170841766836268e-06, "loss": 0.2776, "step": 6824 }, { "epoch": 0.5407011289364231, "grad_norm": 1.8773750824975899, "learning_rate": 9.168284598417547e-06, "loss": 0.282, "step": 6825 }, { "epoch": 0.5407803525450584, "grad_norm": 1.2955582505278562, "learning_rate": 9.165727484763283e-06, "loss": 0.1764, "step": 6826 }, { "epoch": 0.5408595761536938, "grad_norm": 1.709261923249498, "learning_rate": 9.16317042604185e-06, "loss": 0.312, "step": 6827 }, { "epoch": 0.5409387997623292, "grad_norm": 1.5455408348596222, "learning_rate": 9.160613422421616e-06, "loss": 0.2232, "step": 6828 }, { "epoch": 0.5410180233709645, "grad_norm": 1.683470175310256, "learning_rate": 9.158056474070952e-06, "loss": 0.1741, "step": 6829 }, { "epoch": 0.5410972469795999, "grad_norm": 1.390657859679486, "learning_rate": 9.155499581158217e-06, "loss": 0.1521, "step": 6830 }, { "epoch": 0.5411764705882353, "grad_norm": 1.342939265783351, "learning_rate": 9.152942743851771e-06, "loss": 0.1481, "step": 6831 }, { "epoch": 0.5412556941968707, "grad_norm": 1.5404597635699995, "learning_rate": 9.15038596231997e-06, "loss": 0.1657, "step": 6832 }, { "epoch": 0.541334917805506, "grad_norm": 1.7452959716462286, "learning_rate": 9.147829236731164e-06, "loss": 0.2441, "step": 6833 }, { "epoch": 0.5414141414141415, "grad_norm": 1.8456251227756069, "learning_rate": 9.145272567253703e-06, "loss": 0.2412, "step": 6834 }, { "epoch": 0.5414933650227768, "grad_norm": 1.6205294600626676, "learning_rate": 9.142715954055932e-06, "loss": 0.2302, "step": 6835 }, { "epoch": 0.5415725886314121, "grad_norm": 1.293886952678985, "learning_rate": 9.140159397306188e-06, "loss": 0.1517, "step": 6836 }, { "epoch": 0.5416518122400475, "grad_norm": 1.2160053073034036, "learning_rate": 9.137602897172814e-06, "loss": 0.1257, "step": 6837 }, { "epoch": 0.5417310358486829, "grad_norm": 1.4257844619134687, "learning_rate": 9.135046453824136e-06, "loss": 0.1576, "step": 6838 }, { "epoch": 0.5418102594573183, "grad_norm": 1.7166193055659036, "learning_rate": 9.132490067428488e-06, "loss": 0.2489, "step": 6839 }, { "epoch": 0.5418894830659536, "grad_norm": 1.685833293793044, "learning_rate": 9.129933738154196e-06, "loss": 0.2176, "step": 6840 }, { "epoch": 0.5419687066745891, "grad_norm": 1.8485552936770095, "learning_rate": 9.12737746616958e-06, "loss": 0.2867, "step": 6841 }, { "epoch": 0.5420479302832244, "grad_norm": 1.4195525431059022, "learning_rate": 9.124821251642959e-06, "loss": 0.1862, "step": 6842 }, { "epoch": 0.5421271538918597, "grad_norm": 1.7481022330162914, "learning_rate": 9.122265094742648e-06, "loss": 0.2348, "step": 6843 }, { "epoch": 0.5422063775004952, "grad_norm": 1.520956774038658, "learning_rate": 9.119708995636957e-06, "loss": 0.2061, "step": 6844 }, { "epoch": 0.5422856011091305, "grad_norm": 1.6501130389696557, "learning_rate": 9.117152954494195e-06, "loss": 0.2284, "step": 6845 }, { "epoch": 0.5423648247177659, "grad_norm": 1.4134937676704153, "learning_rate": 9.114596971482658e-06, "loss": 0.2372, "step": 6846 }, { "epoch": 0.5424440483264013, "grad_norm": 1.48107579491038, "learning_rate": 9.112041046770653e-06, "loss": 0.2205, "step": 6847 }, { "epoch": 0.5425232719350367, "grad_norm": 1.5267428398691307, "learning_rate": 9.109485180526474e-06, "loss": 0.2025, "step": 6848 }, { "epoch": 0.542602495543672, "grad_norm": 1.5472428285904707, "learning_rate": 9.106929372918408e-06, "loss": 0.2239, "step": 6849 }, { "epoch": 0.5426817191523073, "grad_norm": 1.6485598906539451, "learning_rate": 9.104373624114746e-06, "loss": 0.1905, "step": 6850 }, { "epoch": 0.5427609427609428, "grad_norm": 1.7167078014689015, "learning_rate": 9.101817934283775e-06, "loss": 0.2033, "step": 6851 }, { "epoch": 0.5428401663695781, "grad_norm": 1.7845452277875142, "learning_rate": 9.099262303593768e-06, "loss": 0.2484, "step": 6852 }, { "epoch": 0.5429193899782135, "grad_norm": 1.7053247342526823, "learning_rate": 9.096706732213005e-06, "loss": 0.274, "step": 6853 }, { "epoch": 0.5429986135868489, "grad_norm": 1.8035295520221963, "learning_rate": 9.094151220309757e-06, "loss": 0.2479, "step": 6854 }, { "epoch": 0.5430778371954843, "grad_norm": 2.2689328059763216, "learning_rate": 9.091595768052291e-06, "loss": 0.3611, "step": 6855 }, { "epoch": 0.5431570608041196, "grad_norm": 1.5834899513868252, "learning_rate": 9.089040375608876e-06, "loss": 0.1943, "step": 6856 }, { "epoch": 0.543236284412755, "grad_norm": 2.14135252106152, "learning_rate": 9.086485043147768e-06, "loss": 0.4022, "step": 6857 }, { "epoch": 0.5433155080213904, "grad_norm": 1.7820685706167885, "learning_rate": 9.083929770837222e-06, "loss": 0.2008, "step": 6858 }, { "epoch": 0.5433947316300257, "grad_norm": 1.6906391788519166, "learning_rate": 9.081374558845496e-06, "loss": 0.1718, "step": 6859 }, { "epoch": 0.5434739552386612, "grad_norm": 1.4666154262466298, "learning_rate": 9.078819407340833e-06, "loss": 0.2146, "step": 6860 }, { "epoch": 0.5435531788472965, "grad_norm": 1.9349018814647516, "learning_rate": 9.07626431649148e-06, "loss": 0.2326, "step": 6861 }, { "epoch": 0.5436324024559319, "grad_norm": 1.9896868511337484, "learning_rate": 9.073709286465678e-06, "loss": 0.2692, "step": 6862 }, { "epoch": 0.5437116260645672, "grad_norm": 1.9799856737716437, "learning_rate": 9.071154317431661e-06, "loss": 0.2285, "step": 6863 }, { "epoch": 0.5437908496732026, "grad_norm": 1.6326176647506145, "learning_rate": 9.068599409557664e-06, "loss": 0.223, "step": 6864 }, { "epoch": 0.543870073281838, "grad_norm": 1.5168782514780899, "learning_rate": 9.066044563011914e-06, "loss": 0.2702, "step": 6865 }, { "epoch": 0.5439492968904733, "grad_norm": 1.716583370934031, "learning_rate": 9.063489777962634e-06, "loss": 0.3145, "step": 6866 }, { "epoch": 0.5440285204991088, "grad_norm": 1.287990390476046, "learning_rate": 9.06093505457805e-06, "loss": 0.1591, "step": 6867 }, { "epoch": 0.5441077441077441, "grad_norm": 1.5966533475058962, "learning_rate": 9.058380393026369e-06, "loss": 0.2272, "step": 6868 }, { "epoch": 0.5441869677163795, "grad_norm": 1.7230728770952775, "learning_rate": 9.055825793475814e-06, "loss": 0.2604, "step": 6869 }, { "epoch": 0.5442661913250149, "grad_norm": 2.036746860379108, "learning_rate": 9.053271256094582e-06, "loss": 0.2219, "step": 6870 }, { "epoch": 0.5443454149336502, "grad_norm": 1.8948099519392991, "learning_rate": 9.050716781050885e-06, "loss": 0.2507, "step": 6871 }, { "epoch": 0.5444246385422856, "grad_norm": 1.3430327041262002, "learning_rate": 9.04816236851292e-06, "loss": 0.1968, "step": 6872 }, { "epoch": 0.544503862150921, "grad_norm": 1.621941606237805, "learning_rate": 9.045608018648884e-06, "loss": 0.2082, "step": 6873 }, { "epoch": 0.5445830857595564, "grad_norm": 1.5529975101368307, "learning_rate": 9.043053731626964e-06, "loss": 0.1695, "step": 6874 }, { "epoch": 0.5446623093681917, "grad_norm": 1.6020556779178161, "learning_rate": 9.040499507615356e-06, "loss": 0.2584, "step": 6875 }, { "epoch": 0.5447415329768271, "grad_norm": 1.7088753541123156, "learning_rate": 9.037945346782236e-06, "loss": 0.1704, "step": 6876 }, { "epoch": 0.5448207565854625, "grad_norm": 1.3635660552877806, "learning_rate": 9.035391249295788e-06, "loss": 0.2199, "step": 6877 }, { "epoch": 0.5448999801940978, "grad_norm": 1.6698307081465469, "learning_rate": 9.032837215324183e-06, "loss": 0.2058, "step": 6878 }, { "epoch": 0.5449792038027332, "grad_norm": 1.61733190614819, "learning_rate": 9.030283245035594e-06, "loss": 0.274, "step": 6879 }, { "epoch": 0.5450584274113686, "grad_norm": 1.4739765052193636, "learning_rate": 9.027729338598188e-06, "loss": 0.2064, "step": 6880 }, { "epoch": 0.545137651020004, "grad_norm": 1.3168806467659693, "learning_rate": 9.025175496180125e-06, "loss": 0.1464, "step": 6881 }, { "epoch": 0.5452168746286393, "grad_norm": 1.5843242248814735, "learning_rate": 9.022621717949566e-06, "loss": 0.1816, "step": 6882 }, { "epoch": 0.5452960982372748, "grad_norm": 1.6291385199715893, "learning_rate": 9.020068004074665e-06, "loss": 0.1827, "step": 6883 }, { "epoch": 0.5453753218459101, "grad_norm": 2.1919884741023283, "learning_rate": 9.01751435472357e-06, "loss": 0.2886, "step": 6884 }, { "epoch": 0.5454545454545454, "grad_norm": 1.8100975323825783, "learning_rate": 9.014960770064429e-06, "loss": 0.2819, "step": 6885 }, { "epoch": 0.5455337690631809, "grad_norm": 1.6905134710212093, "learning_rate": 9.012407250265377e-06, "loss": 0.2282, "step": 6886 }, { "epoch": 0.5456129926718162, "grad_norm": 2.0570904728909833, "learning_rate": 9.009853795494558e-06, "loss": 0.2597, "step": 6887 }, { "epoch": 0.5456922162804516, "grad_norm": 1.4600820729356667, "learning_rate": 9.007300405920105e-06, "loss": 0.24, "step": 6888 }, { "epoch": 0.5457714398890869, "grad_norm": 1.7745493208224687, "learning_rate": 9.00474708171014e-06, "loss": 0.2247, "step": 6889 }, { "epoch": 0.5458506634977223, "grad_norm": 1.6931209458367236, "learning_rate": 9.002193823032791e-06, "loss": 0.1876, "step": 6890 }, { "epoch": 0.5459298871063577, "grad_norm": 1.5057381067310711, "learning_rate": 8.999640630056183e-06, "loss": 0.2032, "step": 6891 }, { "epoch": 0.546009110714993, "grad_norm": 1.7450983974246, "learning_rate": 8.997087502948423e-06, "loss": 0.2253, "step": 6892 }, { "epoch": 0.5460883343236285, "grad_norm": 1.4917284206479513, "learning_rate": 8.994534441877625e-06, "loss": 0.2274, "step": 6893 }, { "epoch": 0.5461675579322638, "grad_norm": 2.001259630946284, "learning_rate": 8.991981447011896e-06, "loss": 0.2456, "step": 6894 }, { "epoch": 0.5462467815408992, "grad_norm": 1.5615594623000488, "learning_rate": 8.989428518519336e-06, "loss": 0.2113, "step": 6895 }, { "epoch": 0.5463260051495346, "grad_norm": 2.1911577341485944, "learning_rate": 8.986875656568047e-06, "loss": 0.2718, "step": 6896 }, { "epoch": 0.5464052287581699, "grad_norm": 1.7277958646873435, "learning_rate": 8.984322861326122e-06, "loss": 0.2703, "step": 6897 }, { "epoch": 0.5464844523668053, "grad_norm": 1.774571443720264, "learning_rate": 8.981770132961649e-06, "loss": 0.2203, "step": 6898 }, { "epoch": 0.5465636759754406, "grad_norm": 1.5073142137142648, "learning_rate": 8.979217471642712e-06, "loss": 0.2412, "step": 6899 }, { "epoch": 0.5466428995840761, "grad_norm": 1.3002400252869717, "learning_rate": 8.976664877537395e-06, "loss": 0.1795, "step": 6900 }, { "epoch": 0.5467221231927114, "grad_norm": 1.8578127862288634, "learning_rate": 8.974112350813771e-06, "loss": 0.2882, "step": 6901 }, { "epoch": 0.5468013468013468, "grad_norm": 1.4904534882014997, "learning_rate": 8.971559891639913e-06, "loss": 0.2295, "step": 6902 }, { "epoch": 0.5468805704099822, "grad_norm": 1.3041952674075796, "learning_rate": 8.969007500183886e-06, "loss": 0.1451, "step": 6903 }, { "epoch": 0.5469597940186175, "grad_norm": 1.8156540611505874, "learning_rate": 8.966455176613754e-06, "loss": 0.2618, "step": 6904 }, { "epoch": 0.5470390176272529, "grad_norm": 1.2825538006730177, "learning_rate": 8.963902921097579e-06, "loss": 0.2486, "step": 6905 }, { "epoch": 0.5471182412358883, "grad_norm": 1.8631402156611554, "learning_rate": 8.961350733803406e-06, "loss": 0.1946, "step": 6906 }, { "epoch": 0.5471974648445237, "grad_norm": 1.6470375724717945, "learning_rate": 8.958798614899291e-06, "loss": 0.2371, "step": 6907 }, { "epoch": 0.547276688453159, "grad_norm": 1.294652547885192, "learning_rate": 8.956246564553282e-06, "loss": 0.2098, "step": 6908 }, { "epoch": 0.5473559120617945, "grad_norm": 1.2678980500082684, "learning_rate": 8.95369458293341e-06, "loss": 0.1126, "step": 6909 }, { "epoch": 0.5474351356704298, "grad_norm": 1.5585670775659421, "learning_rate": 8.951142670207718e-06, "loss": 0.2345, "step": 6910 }, { "epoch": 0.5475143592790651, "grad_norm": 1.7024952271500027, "learning_rate": 8.948590826544232e-06, "loss": 0.2969, "step": 6911 }, { "epoch": 0.5475935828877005, "grad_norm": 1.6387397227618559, "learning_rate": 8.94603905211098e-06, "loss": 0.1659, "step": 6912 }, { "epoch": 0.5476728064963359, "grad_norm": 2.1061774324591918, "learning_rate": 8.943487347075988e-06, "loss": 0.314, "step": 6913 }, { "epoch": 0.5477520301049713, "grad_norm": 1.6830690489349167, "learning_rate": 8.94093571160727e-06, "loss": 0.2152, "step": 6914 }, { "epoch": 0.5478312537136066, "grad_norm": 1.5511579142019574, "learning_rate": 8.938384145872838e-06, "loss": 0.1727, "step": 6915 }, { "epoch": 0.5479104773222421, "grad_norm": 1.911199163525638, "learning_rate": 8.935832650040703e-06, "loss": 0.1795, "step": 6916 }, { "epoch": 0.5479897009308774, "grad_norm": 1.950852220217877, "learning_rate": 8.933281224278867e-06, "loss": 0.2546, "step": 6917 }, { "epoch": 0.5480689245395127, "grad_norm": 1.7313546153517896, "learning_rate": 8.930729868755333e-06, "loss": 0.2049, "step": 6918 }, { "epoch": 0.5481481481481482, "grad_norm": 1.4208556376706853, "learning_rate": 8.928178583638088e-06, "loss": 0.1915, "step": 6919 }, { "epoch": 0.5482273717567835, "grad_norm": 1.8168622063001627, "learning_rate": 8.925627369095125e-06, "loss": 0.2143, "step": 6920 }, { "epoch": 0.5483065953654189, "grad_norm": 1.5562270733643324, "learning_rate": 8.923076225294434e-06, "loss": 0.2199, "step": 6921 }, { "epoch": 0.5483858189740543, "grad_norm": 1.4541212312269738, "learning_rate": 8.920525152403989e-06, "loss": 0.1766, "step": 6922 }, { "epoch": 0.5484650425826897, "grad_norm": 1.7193693110797912, "learning_rate": 8.917974150591772e-06, "loss": 0.3321, "step": 6923 }, { "epoch": 0.548544266191325, "grad_norm": 1.6081911826509625, "learning_rate": 8.915423220025747e-06, "loss": 0.1312, "step": 6924 }, { "epoch": 0.5486234897999603, "grad_norm": 1.5676540063857207, "learning_rate": 8.912872360873885e-06, "loss": 0.2028, "step": 6925 }, { "epoch": 0.5487027134085958, "grad_norm": 1.5704225650977457, "learning_rate": 8.91032157330415e-06, "loss": 0.1777, "step": 6926 }, { "epoch": 0.5487819370172311, "grad_norm": 1.745454871079652, "learning_rate": 8.907770857484493e-06, "loss": 0.2213, "step": 6927 }, { "epoch": 0.5488611606258665, "grad_norm": 1.6703369921751172, "learning_rate": 8.90522021358287e-06, "loss": 0.2198, "step": 6928 }, { "epoch": 0.5489403842345019, "grad_norm": 2.0965192803406456, "learning_rate": 8.90266964176723e-06, "loss": 0.2699, "step": 6929 }, { "epoch": 0.5490196078431373, "grad_norm": 1.3559635350774237, "learning_rate": 8.90011914220551e-06, "loss": 0.2283, "step": 6930 }, { "epoch": 0.5490988314517726, "grad_norm": 1.5489031735078498, "learning_rate": 8.897568715065658e-06, "loss": 0.1436, "step": 6931 }, { "epoch": 0.549178055060408, "grad_norm": 1.852828862896294, "learning_rate": 8.895018360515597e-06, "loss": 0.2383, "step": 6932 }, { "epoch": 0.5492572786690434, "grad_norm": 1.7942279606738023, "learning_rate": 8.892468078723262e-06, "loss": 0.1639, "step": 6933 }, { "epoch": 0.5493365022776787, "grad_norm": 2.163698070692759, "learning_rate": 8.889917869856576e-06, "loss": 0.3001, "step": 6934 }, { "epoch": 0.5494157258863142, "grad_norm": 1.4347570990915213, "learning_rate": 8.887367734083454e-06, "loss": 0.1299, "step": 6935 }, { "epoch": 0.5494949494949495, "grad_norm": 1.6020948842533544, "learning_rate": 8.884817671571815e-06, "loss": 0.2203, "step": 6936 }, { "epoch": 0.5495741731035849, "grad_norm": 1.7404160690046266, "learning_rate": 8.882267682489566e-06, "loss": 0.2077, "step": 6937 }, { "epoch": 0.5496533967122202, "grad_norm": 1.4280478928277809, "learning_rate": 8.879717767004613e-06, "loss": 0.1662, "step": 6938 }, { "epoch": 0.5497326203208556, "grad_norm": 1.5522585438454959, "learning_rate": 8.877167925284855e-06, "loss": 0.1782, "step": 6939 }, { "epoch": 0.549811843929491, "grad_norm": 1.625170206623002, "learning_rate": 8.874618157498183e-06, "loss": 0.1876, "step": 6940 }, { "epoch": 0.5498910675381263, "grad_norm": 1.550319006406342, "learning_rate": 8.872068463812492e-06, "loss": 0.2436, "step": 6941 }, { "epoch": 0.5499702911467618, "grad_norm": 1.7177067006925795, "learning_rate": 8.869518844395667e-06, "loss": 0.2373, "step": 6942 }, { "epoch": 0.5500495147553971, "grad_norm": 1.6049615452117618, "learning_rate": 8.866969299415585e-06, "loss": 0.2415, "step": 6943 }, { "epoch": 0.5501287383640325, "grad_norm": 1.2897454364585075, "learning_rate": 8.864419829040122e-06, "loss": 0.1625, "step": 6944 }, { "epoch": 0.5502079619726679, "grad_norm": 1.6597303844577715, "learning_rate": 8.86187043343715e-06, "loss": 0.1362, "step": 6945 }, { "epoch": 0.5502871855813032, "grad_norm": 1.849790792496832, "learning_rate": 8.859321112774535e-06, "loss": 0.1893, "step": 6946 }, { "epoch": 0.5503664091899386, "grad_norm": 1.58852716633234, "learning_rate": 8.856771867220135e-06, "loss": 0.2134, "step": 6947 }, { "epoch": 0.550445632798574, "grad_norm": 1.7915293270654693, "learning_rate": 8.854222696941807e-06, "loss": 0.2057, "step": 6948 }, { "epoch": 0.5505248564072094, "grad_norm": 1.574054080189603, "learning_rate": 8.8516736021074e-06, "loss": 0.1613, "step": 6949 }, { "epoch": 0.5506040800158447, "grad_norm": 1.8713321733407189, "learning_rate": 8.849124582884762e-06, "loss": 0.2005, "step": 6950 }, { "epoch": 0.5506833036244801, "grad_norm": 1.7987161318970615, "learning_rate": 8.846575639441732e-06, "loss": 0.2935, "step": 6951 }, { "epoch": 0.5507625272331155, "grad_norm": 2.041162608096013, "learning_rate": 8.844026771946148e-06, "loss": 0.3758, "step": 6952 }, { "epoch": 0.5508417508417508, "grad_norm": 1.3627367382965248, "learning_rate": 8.841477980565838e-06, "loss": 0.217, "step": 6953 }, { "epoch": 0.5509209744503862, "grad_norm": 2.2267755660566944, "learning_rate": 8.838929265468627e-06, "loss": 0.2653, "step": 6954 }, { "epoch": 0.5510001980590216, "grad_norm": 1.711149475187765, "learning_rate": 8.836380626822339e-06, "loss": 0.2022, "step": 6955 }, { "epoch": 0.551079421667657, "grad_norm": 1.8631335363885488, "learning_rate": 8.833832064794787e-06, "loss": 0.3493, "step": 6956 }, { "epoch": 0.5511586452762923, "grad_norm": 1.7089585466617916, "learning_rate": 8.831283579553781e-06, "loss": 0.1445, "step": 6957 }, { "epoch": 0.5512378688849278, "grad_norm": 1.4259605261245403, "learning_rate": 8.828735171267131e-06, "loss": 0.2295, "step": 6958 }, { "epoch": 0.5513170924935631, "grad_norm": 1.127324555335012, "learning_rate": 8.82618684010263e-06, "loss": 0.1599, "step": 6959 }, { "epoch": 0.5513963161021984, "grad_norm": 1.6127379805024942, "learning_rate": 8.823638586228081e-06, "loss": 0.2846, "step": 6960 }, { "epoch": 0.5514755397108339, "grad_norm": 1.7341644378888477, "learning_rate": 8.82109040981127e-06, "loss": 0.2213, "step": 6961 }, { "epoch": 0.5515547633194692, "grad_norm": 1.606602164675874, "learning_rate": 8.818542311019982e-06, "loss": 0.2139, "step": 6962 }, { "epoch": 0.5516339869281046, "grad_norm": 1.751798026497039, "learning_rate": 8.815994290022e-06, "loss": 0.2841, "step": 6963 }, { "epoch": 0.5517132105367399, "grad_norm": 1.5172786185685385, "learning_rate": 8.813446346985095e-06, "loss": 0.1577, "step": 6964 }, { "epoch": 0.5517924341453754, "grad_norm": 1.861339583970913, "learning_rate": 8.810898482077038e-06, "loss": 0.2756, "step": 6965 }, { "epoch": 0.5518716577540107, "grad_norm": 1.5778570150187614, "learning_rate": 8.808350695465597e-06, "loss": 0.2155, "step": 6966 }, { "epoch": 0.551950881362646, "grad_norm": 1.7950250162561092, "learning_rate": 8.805802987318527e-06, "loss": 0.2523, "step": 6967 }, { "epoch": 0.5520301049712815, "grad_norm": 1.2794079673728658, "learning_rate": 8.803255357803584e-06, "loss": 0.1913, "step": 6968 }, { "epoch": 0.5521093285799168, "grad_norm": 1.5682419708075892, "learning_rate": 8.800707807088521e-06, "loss": 0.1828, "step": 6969 }, { "epoch": 0.5521885521885522, "grad_norm": 2.1114238654780615, "learning_rate": 8.798160335341078e-06, "loss": 0.2739, "step": 6970 }, { "epoch": 0.5522677757971876, "grad_norm": 1.8964277903416056, "learning_rate": 8.795612942728989e-06, "loss": 0.2621, "step": 6971 }, { "epoch": 0.5523469994058229, "grad_norm": 1.4650625724696595, "learning_rate": 8.793065629419996e-06, "loss": 0.2222, "step": 6972 }, { "epoch": 0.5524262230144583, "grad_norm": 1.5752744680573632, "learning_rate": 8.790518395581823e-06, "loss": 0.2224, "step": 6973 }, { "epoch": 0.5525054466230936, "grad_norm": 2.050986661584186, "learning_rate": 8.787971241382193e-06, "loss": 0.2312, "step": 6974 }, { "epoch": 0.5525846702317291, "grad_norm": 1.4061768629540126, "learning_rate": 8.785424166988827e-06, "loss": 0.1915, "step": 6975 }, { "epoch": 0.5526638938403644, "grad_norm": 1.4577425464026734, "learning_rate": 8.782877172569433e-06, "loss": 0.1579, "step": 6976 }, { "epoch": 0.5527431174489998, "grad_norm": 1.5977697023573567, "learning_rate": 8.78033025829172e-06, "loss": 0.2339, "step": 6977 }, { "epoch": 0.5528223410576352, "grad_norm": 1.5270255423035342, "learning_rate": 8.777783424323396e-06, "loss": 0.1964, "step": 6978 }, { "epoch": 0.5529015646662705, "grad_norm": 1.4756424020295873, "learning_rate": 8.775236670832146e-06, "loss": 0.1957, "step": 6979 }, { "epoch": 0.5529807882749059, "grad_norm": 1.6871241826511918, "learning_rate": 8.772689997985674e-06, "loss": 0.2115, "step": 6980 }, { "epoch": 0.5530600118835413, "grad_norm": 1.774865193215159, "learning_rate": 8.770143405951657e-06, "loss": 0.2942, "step": 6981 }, { "epoch": 0.5531392354921767, "grad_norm": 1.2867774873892495, "learning_rate": 8.76759689489778e-06, "loss": 0.1798, "step": 6982 }, { "epoch": 0.553218459100812, "grad_norm": 1.5293140409656585, "learning_rate": 8.765050464991716e-06, "loss": 0.2086, "step": 6983 }, { "epoch": 0.5532976827094475, "grad_norm": 1.5471963562459794, "learning_rate": 8.762504116401137e-06, "loss": 0.2097, "step": 6984 }, { "epoch": 0.5533769063180828, "grad_norm": 1.6418277040399487, "learning_rate": 8.759957849293707e-06, "loss": 0.1733, "step": 6985 }, { "epoch": 0.5534561299267181, "grad_norm": 1.9486942542674106, "learning_rate": 8.75741166383709e-06, "loss": 0.2954, "step": 6986 }, { "epoch": 0.5535353535353535, "grad_norm": 1.6323040056592095, "learning_rate": 8.754865560198932e-06, "loss": 0.202, "step": 6987 }, { "epoch": 0.5536145771439889, "grad_norm": 1.632650043921622, "learning_rate": 8.752319538546888e-06, "loss": 0.2318, "step": 6988 }, { "epoch": 0.5536938007526243, "grad_norm": 1.9049238118255067, "learning_rate": 8.749773599048597e-06, "loss": 0.3178, "step": 6989 }, { "epoch": 0.5537730243612596, "grad_norm": 1.2387705762804189, "learning_rate": 8.747227741871698e-06, "loss": 0.1208, "step": 6990 }, { "epoch": 0.5538522479698951, "grad_norm": 1.4827469680734027, "learning_rate": 8.744681967183826e-06, "loss": 0.2283, "step": 6991 }, { "epoch": 0.5539314715785304, "grad_norm": 2.5238770672702913, "learning_rate": 8.742136275152606e-06, "loss": 0.2698, "step": 6992 }, { "epoch": 0.5540106951871657, "grad_norm": 1.458531118939311, "learning_rate": 8.73959066594566e-06, "loss": 0.2072, "step": 6993 }, { "epoch": 0.5540899187958012, "grad_norm": 1.4685900241815824, "learning_rate": 8.737045139730605e-06, "loss": 0.2103, "step": 6994 }, { "epoch": 0.5541691424044365, "grad_norm": 1.088024988992133, "learning_rate": 8.734499696675048e-06, "loss": 0.1675, "step": 6995 }, { "epoch": 0.5542483660130719, "grad_norm": 1.454133301750155, "learning_rate": 8.731954336946599e-06, "loss": 0.1447, "step": 6996 }, { "epoch": 0.5543275896217073, "grad_norm": 1.6261155433945458, "learning_rate": 8.729409060712855e-06, "loss": 0.2502, "step": 6997 }, { "epoch": 0.5544068132303427, "grad_norm": 1.5350964635663378, "learning_rate": 8.726863868141408e-06, "loss": 0.1963, "step": 6998 }, { "epoch": 0.554486036838978, "grad_norm": 1.6261410328562709, "learning_rate": 8.724318759399853e-06, "loss": 0.2401, "step": 6999 }, { "epoch": 0.5545652604476133, "grad_norm": 1.6105373044716385, "learning_rate": 8.721773734655768e-06, "loss": 0.1873, "step": 7000 }, { "epoch": 0.5546444840562488, "grad_norm": 1.5434667181935557, "learning_rate": 8.719228794076733e-06, "loss": 0.1313, "step": 7001 }, { "epoch": 0.5547237076648841, "grad_norm": 1.4115933195861863, "learning_rate": 8.716683937830318e-06, "loss": 0.208, "step": 7002 }, { "epoch": 0.5548029312735195, "grad_norm": 1.7185539381379835, "learning_rate": 8.71413916608409e-06, "loss": 0.2173, "step": 7003 }, { "epoch": 0.5548821548821549, "grad_norm": 1.7608151033548984, "learning_rate": 8.711594479005614e-06, "loss": 0.1918, "step": 7004 }, { "epoch": 0.5549613784907903, "grad_norm": 1.545132372450632, "learning_rate": 8.709049876762438e-06, "loss": 0.1531, "step": 7005 }, { "epoch": 0.5550406020994256, "grad_norm": 1.8616259398056916, "learning_rate": 8.706505359522119e-06, "loss": 0.2884, "step": 7006 }, { "epoch": 0.555119825708061, "grad_norm": 1.599147292505235, "learning_rate": 8.703960927452197e-06, "loss": 0.1667, "step": 7007 }, { "epoch": 0.5551990493166964, "grad_norm": 1.8157891353523499, "learning_rate": 8.701416580720212e-06, "loss": 0.3096, "step": 7008 }, { "epoch": 0.5552782729253317, "grad_norm": 1.8618291776958673, "learning_rate": 8.698872319493698e-06, "loss": 0.2502, "step": 7009 }, { "epoch": 0.5553574965339672, "grad_norm": 1.5154651043474558, "learning_rate": 8.69632814394018e-06, "loss": 0.1671, "step": 7010 }, { "epoch": 0.5554367201426025, "grad_norm": 1.5029504510030596, "learning_rate": 8.693784054227179e-06, "loss": 0.2085, "step": 7011 }, { "epoch": 0.5555159437512379, "grad_norm": 2.4916637363742713, "learning_rate": 8.691240050522215e-06, "loss": 0.3906, "step": 7012 }, { "epoch": 0.5555951673598732, "grad_norm": 1.8632398798315006, "learning_rate": 8.688696132992797e-06, "loss": 0.251, "step": 7013 }, { "epoch": 0.5556743909685086, "grad_norm": 1.668005116987391, "learning_rate": 8.686152301806427e-06, "loss": 0.299, "step": 7014 }, { "epoch": 0.555753614577144, "grad_norm": 1.5763005914047759, "learning_rate": 8.683608557130608e-06, "loss": 0.1862, "step": 7015 }, { "epoch": 0.5558328381857793, "grad_norm": 1.8785291200939969, "learning_rate": 8.681064899132831e-06, "loss": 0.2573, "step": 7016 }, { "epoch": 0.5559120617944148, "grad_norm": 1.1427925614939018, "learning_rate": 8.678521327980585e-06, "loss": 0.1651, "step": 7017 }, { "epoch": 0.5559912854030501, "grad_norm": 1.3265957407081017, "learning_rate": 8.675977843841347e-06, "loss": 0.1765, "step": 7018 }, { "epoch": 0.5560705090116855, "grad_norm": 1.532299598003422, "learning_rate": 8.673434446882601e-06, "loss": 0.1854, "step": 7019 }, { "epoch": 0.5561497326203209, "grad_norm": 1.36563762271964, "learning_rate": 8.670891137271814e-06, "loss": 0.1601, "step": 7020 }, { "epoch": 0.5562289562289562, "grad_norm": 1.5720984646446576, "learning_rate": 8.668347915176448e-06, "loss": 0.1959, "step": 7021 }, { "epoch": 0.5563081798375916, "grad_norm": 1.647567756585152, "learning_rate": 8.665804780763963e-06, "loss": 0.2458, "step": 7022 }, { "epoch": 0.556387403446227, "grad_norm": 1.518473675760264, "learning_rate": 8.663261734201818e-06, "loss": 0.2018, "step": 7023 }, { "epoch": 0.5564666270548624, "grad_norm": 1.6392680624778466, "learning_rate": 8.660718775657453e-06, "loss": 0.2453, "step": 7024 }, { "epoch": 0.5565458506634977, "grad_norm": 1.626149944744027, "learning_rate": 8.658175905298314e-06, "loss": 0.225, "step": 7025 }, { "epoch": 0.5566250742721331, "grad_norm": 1.6966603918627359, "learning_rate": 8.655633123291833e-06, "loss": 0.1817, "step": 7026 }, { "epoch": 0.5567042978807685, "grad_norm": 1.5520737574007002, "learning_rate": 8.653090429805442e-06, "loss": 0.235, "step": 7027 }, { "epoch": 0.5567835214894038, "grad_norm": 1.505457128164782, "learning_rate": 8.650547825006568e-06, "loss": 0.1618, "step": 7028 }, { "epoch": 0.5568627450980392, "grad_norm": 1.425360727244507, "learning_rate": 8.648005309062623e-06, "loss": 0.2026, "step": 7029 }, { "epoch": 0.5569419687066746, "grad_norm": 1.362019184205673, "learning_rate": 8.645462882141026e-06, "loss": 0.1895, "step": 7030 }, { "epoch": 0.55702119231531, "grad_norm": 1.3744701799224086, "learning_rate": 8.64292054440918e-06, "loss": 0.1814, "step": 7031 }, { "epoch": 0.5571004159239453, "grad_norm": 1.5469267017965995, "learning_rate": 8.640378296034486e-06, "loss": 0.1479, "step": 7032 }, { "epoch": 0.5571796395325808, "grad_norm": 1.8600546026971057, "learning_rate": 8.63783613718434e-06, "loss": 0.288, "step": 7033 }, { "epoch": 0.5572588631412161, "grad_norm": 1.634389970043728, "learning_rate": 8.63529406802613e-06, "loss": 0.2219, "step": 7034 }, { "epoch": 0.5573380867498514, "grad_norm": 1.8232736260784956, "learning_rate": 8.632752088727237e-06, "loss": 0.1966, "step": 7035 }, { "epoch": 0.5574173103584869, "grad_norm": 1.2814330212891232, "learning_rate": 8.63021019945504e-06, "loss": 0.1842, "step": 7036 }, { "epoch": 0.5574965339671222, "grad_norm": 1.6824010340234732, "learning_rate": 8.627668400376914e-06, "loss": 0.1764, "step": 7037 }, { "epoch": 0.5575757575757576, "grad_norm": 2.0168679427511496, "learning_rate": 8.625126691660216e-06, "loss": 0.2556, "step": 7038 }, { "epoch": 0.5576549811843929, "grad_norm": 1.6774431748932268, "learning_rate": 8.622585073472314e-06, "loss": 0.213, "step": 7039 }, { "epoch": 0.5577342047930284, "grad_norm": 1.9162775200372715, "learning_rate": 8.620043545980554e-06, "loss": 0.3046, "step": 7040 }, { "epoch": 0.5578134284016637, "grad_norm": 2.0185677142736975, "learning_rate": 8.61750210935229e-06, "loss": 0.2078, "step": 7041 }, { "epoch": 0.557892652010299, "grad_norm": 1.7387826411982605, "learning_rate": 8.614960763754857e-06, "loss": 0.2617, "step": 7042 }, { "epoch": 0.5579718756189345, "grad_norm": 1.5293351639663046, "learning_rate": 8.612419509355593e-06, "loss": 0.1471, "step": 7043 }, { "epoch": 0.5580510992275698, "grad_norm": 1.5035641910072244, "learning_rate": 8.60987834632183e-06, "loss": 0.2019, "step": 7044 }, { "epoch": 0.5581303228362052, "grad_norm": 1.7479100345026106, "learning_rate": 8.607337274820888e-06, "loss": 0.2621, "step": 7045 }, { "epoch": 0.5582095464448406, "grad_norm": 1.5016324035738737, "learning_rate": 8.604796295020085e-06, "loss": 0.2455, "step": 7046 }, { "epoch": 0.558288770053476, "grad_norm": 1.8322583558460637, "learning_rate": 8.602255407086736e-06, "loss": 0.2273, "step": 7047 }, { "epoch": 0.5583679936621113, "grad_norm": 2.0342300687960817, "learning_rate": 8.599714611188141e-06, "loss": 0.284, "step": 7048 }, { "epoch": 0.5584472172707466, "grad_norm": 1.458983267594101, "learning_rate": 8.5971739074916e-06, "loss": 0.1706, "step": 7049 }, { "epoch": 0.5585264408793821, "grad_norm": 1.7067220816106976, "learning_rate": 8.594633296164409e-06, "loss": 0.2222, "step": 7050 }, { "epoch": 0.5586056644880174, "grad_norm": 1.5358251518042219, "learning_rate": 8.59209277737385e-06, "loss": 0.1735, "step": 7051 }, { "epoch": 0.5586848880966528, "grad_norm": 1.726884728030867, "learning_rate": 8.58955235128721e-06, "loss": 0.2544, "step": 7052 }, { "epoch": 0.5587641117052882, "grad_norm": 1.8539779357253885, "learning_rate": 8.58701201807176e-06, "loss": 0.2531, "step": 7053 }, { "epoch": 0.5588433353139235, "grad_norm": 1.7504385909576572, "learning_rate": 8.584471777894768e-06, "loss": 0.1592, "step": 7054 }, { "epoch": 0.5589225589225589, "grad_norm": 1.5961772254361881, "learning_rate": 8.581931630923499e-06, "loss": 0.175, "step": 7055 }, { "epoch": 0.5590017825311943, "grad_norm": 1.6783653790384048, "learning_rate": 8.57939157732521e-06, "loss": 0.1604, "step": 7056 }, { "epoch": 0.5590810061398297, "grad_norm": 2.2506319325283575, "learning_rate": 8.576851617267151e-06, "loss": 0.2192, "step": 7057 }, { "epoch": 0.559160229748465, "grad_norm": 1.5135739549219434, "learning_rate": 8.574311750916565e-06, "loss": 0.1792, "step": 7058 }, { "epoch": 0.5592394533571005, "grad_norm": 1.5576204767986435, "learning_rate": 8.571771978440689e-06, "loss": 0.3164, "step": 7059 }, { "epoch": 0.5593186769657358, "grad_norm": 1.5030744712196413, "learning_rate": 8.569232300006756e-06, "loss": 0.1651, "step": 7060 }, { "epoch": 0.5593979005743711, "grad_norm": 1.6299736578697477, "learning_rate": 8.566692715781992e-06, "loss": 0.1602, "step": 7061 }, { "epoch": 0.5594771241830065, "grad_norm": 1.6943766912092884, "learning_rate": 8.564153225933616e-06, "loss": 0.2802, "step": 7062 }, { "epoch": 0.5595563477916419, "grad_norm": 1.279686639516914, "learning_rate": 8.56161383062884e-06, "loss": 0.188, "step": 7063 }, { "epoch": 0.5596355714002773, "grad_norm": 1.5576386856168152, "learning_rate": 8.559074530034875e-06, "loss": 0.1752, "step": 7064 }, { "epoch": 0.5597147950089126, "grad_norm": 1.2931759137893757, "learning_rate": 8.556535324318916e-06, "loss": 0.1506, "step": 7065 }, { "epoch": 0.5597940186175481, "grad_norm": 1.5379778082888391, "learning_rate": 8.553996213648164e-06, "loss": 0.1985, "step": 7066 }, { "epoch": 0.5598732422261834, "grad_norm": 1.4897685073988978, "learning_rate": 8.551457198189799e-06, "loss": 0.1567, "step": 7067 }, { "epoch": 0.5599524658348187, "grad_norm": 1.2151206521891214, "learning_rate": 8.54891827811101e-06, "loss": 0.0967, "step": 7068 }, { "epoch": 0.5600316894434542, "grad_norm": 1.89789025465256, "learning_rate": 8.546379453578972e-06, "loss": 0.211, "step": 7069 }, { "epoch": 0.5601109130520895, "grad_norm": 2.259174037915149, "learning_rate": 8.543840724760848e-06, "loss": 0.2666, "step": 7070 }, { "epoch": 0.5601901366607249, "grad_norm": 1.393532075492857, "learning_rate": 8.541302091823809e-06, "loss": 0.1897, "step": 7071 }, { "epoch": 0.5602693602693603, "grad_norm": 1.7319833635416846, "learning_rate": 8.538763554935008e-06, "loss": 0.1904, "step": 7072 }, { "epoch": 0.5603485838779957, "grad_norm": 1.3014958372636867, "learning_rate": 8.536225114261597e-06, "loss": 0.1715, "step": 7073 }, { "epoch": 0.560427807486631, "grad_norm": 1.7671573332060513, "learning_rate": 8.533686769970717e-06, "loss": 0.2346, "step": 7074 }, { "epoch": 0.5605070310952663, "grad_norm": 1.5706183684872626, "learning_rate": 8.531148522229509e-06, "loss": 0.1548, "step": 7075 }, { "epoch": 0.5605862547039018, "grad_norm": 1.7312126931837069, "learning_rate": 8.528610371205102e-06, "loss": 0.2395, "step": 7076 }, { "epoch": 0.5606654783125371, "grad_norm": 2.362450875890349, "learning_rate": 8.526072317064623e-06, "loss": 0.289, "step": 7077 }, { "epoch": 0.5607447019211725, "grad_norm": 1.6111896914971335, "learning_rate": 8.52353435997519e-06, "loss": 0.2691, "step": 7078 }, { "epoch": 0.5608239255298079, "grad_norm": 1.6134377068100971, "learning_rate": 8.520996500103915e-06, "loss": 0.1893, "step": 7079 }, { "epoch": 0.5609031491384433, "grad_norm": 1.9219918345241058, "learning_rate": 8.518458737617903e-06, "loss": 0.2737, "step": 7080 }, { "epoch": 0.5609823727470786, "grad_norm": 1.538529187539912, "learning_rate": 8.515921072684255e-06, "loss": 0.1759, "step": 7081 }, { "epoch": 0.561061596355714, "grad_norm": 1.4713101838348777, "learning_rate": 8.513383505470065e-06, "loss": 0.2206, "step": 7082 }, { "epoch": 0.5611408199643494, "grad_norm": 1.7520911411183, "learning_rate": 8.510846036142415e-06, "loss": 0.3054, "step": 7083 }, { "epoch": 0.5612200435729847, "grad_norm": 1.1626877692646231, "learning_rate": 8.50830866486839e-06, "loss": 0.1594, "step": 7084 }, { "epoch": 0.5612992671816202, "grad_norm": 1.5193398768990873, "learning_rate": 8.505771391815061e-06, "loss": 0.2715, "step": 7085 }, { "epoch": 0.5613784907902555, "grad_norm": 1.5537944908146006, "learning_rate": 8.503234217149496e-06, "loss": 0.2298, "step": 7086 }, { "epoch": 0.5614577143988909, "grad_norm": 1.7183227787774709, "learning_rate": 8.500697141038758e-06, "loss": 0.2214, "step": 7087 }, { "epoch": 0.5615369380075262, "grad_norm": 1.3589841961446756, "learning_rate": 8.498160163649896e-06, "loss": 0.1803, "step": 7088 }, { "epoch": 0.5616161616161616, "grad_norm": 1.3748286419187215, "learning_rate": 8.495623285149962e-06, "loss": 0.2183, "step": 7089 }, { "epoch": 0.561695385224797, "grad_norm": 1.3126464107669036, "learning_rate": 8.493086505705998e-06, "loss": 0.1808, "step": 7090 }, { "epoch": 0.5617746088334323, "grad_norm": 1.479596500850297, "learning_rate": 8.490549825485036e-06, "loss": 0.262, "step": 7091 }, { "epoch": 0.5618538324420678, "grad_norm": 1.6719535508226904, "learning_rate": 8.488013244654103e-06, "loss": 0.1625, "step": 7092 }, { "epoch": 0.5619330560507031, "grad_norm": 1.6234137226584349, "learning_rate": 8.485476763380224e-06, "loss": 0.245, "step": 7093 }, { "epoch": 0.5620122796593385, "grad_norm": 1.562111667493653, "learning_rate": 8.482940381830412e-06, "loss": 0.1742, "step": 7094 }, { "epoch": 0.5620915032679739, "grad_norm": 1.6377421872221636, "learning_rate": 8.480404100171677e-06, "loss": 0.2491, "step": 7095 }, { "epoch": 0.5621707268766092, "grad_norm": 1.3929033722375574, "learning_rate": 8.47786791857102e-06, "loss": 0.208, "step": 7096 }, { "epoch": 0.5622499504852446, "grad_norm": 1.5094049320700702, "learning_rate": 8.475331837195435e-06, "loss": 0.2318, "step": 7097 }, { "epoch": 0.56232917409388, "grad_norm": 1.9417872528439555, "learning_rate": 8.472795856211916e-06, "loss": 0.2516, "step": 7098 }, { "epoch": 0.5624083977025154, "grad_norm": 1.508063606285105, "learning_rate": 8.470259975787438e-06, "loss": 0.2251, "step": 7099 }, { "epoch": 0.5624876213111507, "grad_norm": 1.7031937074744565, "learning_rate": 8.46772419608898e-06, "loss": 0.1813, "step": 7100 }, { "epoch": 0.5625668449197861, "grad_norm": 2.1771050434669417, "learning_rate": 8.465188517283514e-06, "loss": 0.2041, "step": 7101 }, { "epoch": 0.5626460685284215, "grad_norm": 1.7235678854995575, "learning_rate": 8.462652939537996e-06, "loss": 0.2245, "step": 7102 }, { "epoch": 0.5627252921370568, "grad_norm": 2.0493938854449705, "learning_rate": 8.460117463019387e-06, "loss": 0.1878, "step": 7103 }, { "epoch": 0.5628045157456922, "grad_norm": 1.454055133373079, "learning_rate": 8.457582087894631e-06, "loss": 0.1699, "step": 7104 }, { "epoch": 0.5628837393543276, "grad_norm": 1.7948045167220608, "learning_rate": 8.455046814330674e-06, "loss": 0.2897, "step": 7105 }, { "epoch": 0.562962962962963, "grad_norm": 1.488977244845906, "learning_rate": 8.452511642494453e-06, "loss": 0.2535, "step": 7106 }, { "epoch": 0.5630421865715983, "grad_norm": 1.7550587822166357, "learning_rate": 8.449976572552891e-06, "loss": 0.3068, "step": 7107 }, { "epoch": 0.5631214101802338, "grad_norm": 1.1724516498937883, "learning_rate": 8.447441604672913e-06, "loss": 0.1294, "step": 7108 }, { "epoch": 0.5632006337888691, "grad_norm": 1.35873742081911, "learning_rate": 8.444906739021438e-06, "loss": 0.1959, "step": 7109 }, { "epoch": 0.5632798573975044, "grad_norm": 1.849551135944485, "learning_rate": 8.442371975765368e-06, "loss": 0.1839, "step": 7110 }, { "epoch": 0.5633590810061399, "grad_norm": 1.7115123747383312, "learning_rate": 8.439837315071612e-06, "loss": 0.238, "step": 7111 }, { "epoch": 0.5634383046147752, "grad_norm": 1.5275704121290707, "learning_rate": 8.43730275710706e-06, "loss": 0.259, "step": 7112 }, { "epoch": 0.5635175282234106, "grad_norm": 1.2485642708862594, "learning_rate": 8.434768302038602e-06, "loss": 0.1226, "step": 7113 }, { "epoch": 0.5635967518320459, "grad_norm": 1.6221255556295489, "learning_rate": 8.432233950033122e-06, "loss": 0.2209, "step": 7114 }, { "epoch": 0.5636759754406814, "grad_norm": 1.8749249896524045, "learning_rate": 8.42969970125749e-06, "loss": 0.2987, "step": 7115 }, { "epoch": 0.5637551990493167, "grad_norm": 1.358266941421217, "learning_rate": 8.427165555878577e-06, "loss": 0.1657, "step": 7116 }, { "epoch": 0.563834422657952, "grad_norm": 1.6746509953691089, "learning_rate": 8.424631514063247e-06, "loss": 0.1594, "step": 7117 }, { "epoch": 0.5639136462665875, "grad_norm": 1.8553999194635002, "learning_rate": 8.422097575978349e-06, "loss": 0.2838, "step": 7118 }, { "epoch": 0.5639928698752228, "grad_norm": 1.1729592551162313, "learning_rate": 8.419563741790735e-06, "loss": 0.0975, "step": 7119 }, { "epoch": 0.5640720934838582, "grad_norm": 1.469682119112714, "learning_rate": 8.417030011667241e-06, "loss": 0.1765, "step": 7120 }, { "epoch": 0.5641513170924936, "grad_norm": 1.9009284070085266, "learning_rate": 8.414496385774706e-06, "loss": 0.3081, "step": 7121 }, { "epoch": 0.564230540701129, "grad_norm": 1.2972915281593267, "learning_rate": 8.411962864279957e-06, "loss": 0.1161, "step": 7122 }, { "epoch": 0.5643097643097643, "grad_norm": 1.421175009953936, "learning_rate": 8.409429447349811e-06, "loss": 0.1959, "step": 7123 }, { "epoch": 0.5643889879183996, "grad_norm": 1.5795193011973268, "learning_rate": 8.406896135151081e-06, "loss": 0.2066, "step": 7124 }, { "epoch": 0.5644682115270351, "grad_norm": 1.7249032655909373, "learning_rate": 8.40436292785058e-06, "loss": 0.1953, "step": 7125 }, { "epoch": 0.5645474351356704, "grad_norm": 1.6477759025681094, "learning_rate": 8.401829825615098e-06, "loss": 0.2457, "step": 7126 }, { "epoch": 0.5646266587443058, "grad_norm": 1.703867785688905, "learning_rate": 8.399296828611433e-06, "loss": 0.1727, "step": 7127 }, { "epoch": 0.5647058823529412, "grad_norm": 1.4537347447742368, "learning_rate": 8.396763937006369e-06, "loss": 0.1968, "step": 7128 }, { "epoch": 0.5647851059615765, "grad_norm": 1.7044491792562788, "learning_rate": 8.394231150966685e-06, "loss": 0.2567, "step": 7129 }, { "epoch": 0.5648643295702119, "grad_norm": 1.7494152612326808, "learning_rate": 8.391698470659154e-06, "loss": 0.239, "step": 7130 }, { "epoch": 0.5649435531788473, "grad_norm": 1.7824215198518916, "learning_rate": 8.38916589625054e-06, "loss": 0.1976, "step": 7131 }, { "epoch": 0.5650227767874827, "grad_norm": 1.6691651293977405, "learning_rate": 8.3866334279076e-06, "loss": 0.2897, "step": 7132 }, { "epoch": 0.565102000396118, "grad_norm": 1.6804170572525348, "learning_rate": 8.384101065797087e-06, "loss": 0.2608, "step": 7133 }, { "epoch": 0.5651812240047535, "grad_norm": 1.5717283029091211, "learning_rate": 8.381568810085745e-06, "loss": 0.1569, "step": 7134 }, { "epoch": 0.5652604476133888, "grad_norm": 1.8698896197112826, "learning_rate": 8.379036660940306e-06, "loss": 0.2896, "step": 7135 }, { "epoch": 0.5653396712220241, "grad_norm": 1.359905107395929, "learning_rate": 8.376504618527505e-06, "loss": 0.1529, "step": 7136 }, { "epoch": 0.5654188948306595, "grad_norm": 2.1172315685875507, "learning_rate": 8.373972683014063e-06, "loss": 0.2671, "step": 7137 }, { "epoch": 0.5654981184392949, "grad_norm": 1.8685949512210573, "learning_rate": 8.371440854566696e-06, "loss": 0.1816, "step": 7138 }, { "epoch": 0.5655773420479303, "grad_norm": 1.4443374683745343, "learning_rate": 8.368909133352114e-06, "loss": 0.2118, "step": 7139 }, { "epoch": 0.5656565656565656, "grad_norm": 1.8430231806352138, "learning_rate": 8.366377519537015e-06, "loss": 0.2176, "step": 7140 }, { "epoch": 0.5657357892652011, "grad_norm": 1.448194028148172, "learning_rate": 8.363846013288096e-06, "loss": 0.1763, "step": 7141 }, { "epoch": 0.5658150128738364, "grad_norm": 1.5906568744506333, "learning_rate": 8.361314614772047e-06, "loss": 0.2464, "step": 7142 }, { "epoch": 0.5658942364824717, "grad_norm": 1.586304249770699, "learning_rate": 8.358783324155542e-06, "loss": 0.1753, "step": 7143 }, { "epoch": 0.5659734600911072, "grad_norm": 1.5927055966297055, "learning_rate": 8.35625214160526e-06, "loss": 0.202, "step": 7144 }, { "epoch": 0.5660526836997425, "grad_norm": 1.8085719363079078, "learning_rate": 8.353721067287865e-06, "loss": 0.2004, "step": 7145 }, { "epoch": 0.5661319073083779, "grad_norm": 2.121741541407398, "learning_rate": 8.351190101370016e-06, "loss": 0.1954, "step": 7146 }, { "epoch": 0.5662111309170133, "grad_norm": 1.7456032842756144, "learning_rate": 8.348659244018367e-06, "loss": 0.2393, "step": 7147 }, { "epoch": 0.5662903545256487, "grad_norm": 2.033597040511318, "learning_rate": 8.34612849539956e-06, "loss": 0.2559, "step": 7148 }, { "epoch": 0.566369578134284, "grad_norm": 1.6191205875804606, "learning_rate": 8.343597855680231e-06, "loss": 0.186, "step": 7149 }, { "epoch": 0.5664488017429193, "grad_norm": 1.285906488415901, "learning_rate": 8.341067325027017e-06, "loss": 0.1488, "step": 7150 }, { "epoch": 0.5665280253515548, "grad_norm": 1.8129363283333348, "learning_rate": 8.338536903606535e-06, "loss": 0.1845, "step": 7151 }, { "epoch": 0.5666072489601901, "grad_norm": 1.4764238521334778, "learning_rate": 8.336006591585406e-06, "loss": 0.1514, "step": 7152 }, { "epoch": 0.5666864725688255, "grad_norm": 1.5697217819577614, "learning_rate": 8.333476389130234e-06, "loss": 0.2412, "step": 7153 }, { "epoch": 0.5667656961774609, "grad_norm": 1.3757906492537588, "learning_rate": 8.330946296407622e-06, "loss": 0.17, "step": 7154 }, { "epoch": 0.5668449197860963, "grad_norm": 1.417879707074937, "learning_rate": 8.328416313584169e-06, "loss": 0.1365, "step": 7155 }, { "epoch": 0.5669241433947316, "grad_norm": 1.3279243842852761, "learning_rate": 8.325886440826457e-06, "loss": 0.1422, "step": 7156 }, { "epoch": 0.567003367003367, "grad_norm": 1.5803829358564379, "learning_rate": 8.323356678301067e-06, "loss": 0.164, "step": 7157 }, { "epoch": 0.5670825906120024, "grad_norm": 1.926349933651766, "learning_rate": 8.320827026174572e-06, "loss": 0.2229, "step": 7158 }, { "epoch": 0.5671618142206377, "grad_norm": 1.8016315606923126, "learning_rate": 8.318297484613538e-06, "loss": 0.2279, "step": 7159 }, { "epoch": 0.5672410378292732, "grad_norm": 1.8206023360645793, "learning_rate": 8.315768053784524e-06, "loss": 0.2158, "step": 7160 }, { "epoch": 0.5673202614379085, "grad_norm": 1.8935170887687283, "learning_rate": 8.313238733854076e-06, "loss": 0.2595, "step": 7161 }, { "epoch": 0.5673994850465439, "grad_norm": 1.6527108055516293, "learning_rate": 8.310709524988743e-06, "loss": 0.1807, "step": 7162 }, { "epoch": 0.5674787086551792, "grad_norm": 2.237203453728549, "learning_rate": 8.308180427355062e-06, "loss": 0.3659, "step": 7163 }, { "epoch": 0.5675579322638146, "grad_norm": 1.9876975880569285, "learning_rate": 8.305651441119558e-06, "loss": 0.1707, "step": 7164 }, { "epoch": 0.56763715587245, "grad_norm": 2.0343381389879127, "learning_rate": 8.303122566448754e-06, "loss": 0.1755, "step": 7165 }, { "epoch": 0.5677163794810853, "grad_norm": 1.6880433066022393, "learning_rate": 8.300593803509163e-06, "loss": 0.2335, "step": 7166 }, { "epoch": 0.5677956030897208, "grad_norm": 1.799216347148959, "learning_rate": 8.298065152467293e-06, "loss": 0.2435, "step": 7167 }, { "epoch": 0.5678748266983561, "grad_norm": 1.4092505555146648, "learning_rate": 8.295536613489645e-06, "loss": 0.192, "step": 7168 }, { "epoch": 0.5679540503069915, "grad_norm": 1.567213366180453, "learning_rate": 8.293008186742708e-06, "loss": 0.2056, "step": 7169 }, { "epoch": 0.5680332739156269, "grad_norm": 1.50515292093294, "learning_rate": 8.290479872392969e-06, "loss": 0.1641, "step": 7170 }, { "epoch": 0.5681124975242622, "grad_norm": 1.4631926228585892, "learning_rate": 8.287951670606905e-06, "loss": 0.1985, "step": 7171 }, { "epoch": 0.5681917211328976, "grad_norm": 1.5906471413197742, "learning_rate": 8.285423581550985e-06, "loss": 0.2045, "step": 7172 }, { "epoch": 0.568270944741533, "grad_norm": 1.481088353499964, "learning_rate": 8.282895605391674e-06, "loss": 0.1759, "step": 7173 }, { "epoch": 0.5683501683501684, "grad_norm": 1.6193342793107977, "learning_rate": 8.280367742295424e-06, "loss": 0.2213, "step": 7174 }, { "epoch": 0.5684293919588037, "grad_norm": 1.921044475193414, "learning_rate": 8.277839992428683e-06, "loss": 0.2878, "step": 7175 }, { "epoch": 0.5685086155674391, "grad_norm": 1.5584366648187773, "learning_rate": 8.275312355957893e-06, "loss": 0.21, "step": 7176 }, { "epoch": 0.5685878391760745, "grad_norm": 1.5789958123311176, "learning_rate": 8.272784833049485e-06, "loss": 0.2029, "step": 7177 }, { "epoch": 0.5686670627847098, "grad_norm": 1.637634148940145, "learning_rate": 8.270257423869885e-06, "loss": 0.2191, "step": 7178 }, { "epoch": 0.5687462863933452, "grad_norm": 1.3934641068793392, "learning_rate": 8.267730128585511e-06, "loss": 0.1662, "step": 7179 }, { "epoch": 0.5688255100019806, "grad_norm": 2.1443933550161853, "learning_rate": 8.265202947362772e-06, "loss": 0.3144, "step": 7180 }, { "epoch": 0.568904733610616, "grad_norm": 1.7775579931099792, "learning_rate": 8.262675880368074e-06, "loss": 0.226, "step": 7181 }, { "epoch": 0.5689839572192513, "grad_norm": 1.7075282905209899, "learning_rate": 8.260148927767807e-06, "loss": 0.2915, "step": 7182 }, { "epoch": 0.5690631808278868, "grad_norm": 2.0249950980817255, "learning_rate": 8.257622089728362e-06, "loss": 0.3307, "step": 7183 }, { "epoch": 0.5691424044365221, "grad_norm": 1.7824973409860467, "learning_rate": 8.255095366416122e-06, "loss": 0.2789, "step": 7184 }, { "epoch": 0.5692216280451574, "grad_norm": 1.6721339940478186, "learning_rate": 8.25256875799745e-06, "loss": 0.1847, "step": 7185 }, { "epoch": 0.5693008516537928, "grad_norm": 1.5233904389011332, "learning_rate": 8.250042264638721e-06, "loss": 0.2198, "step": 7186 }, { "epoch": 0.5693800752624282, "grad_norm": 1.5901738527888205, "learning_rate": 8.24751588650629e-06, "loss": 0.2453, "step": 7187 }, { "epoch": 0.5694592988710636, "grad_norm": 1.4224255506150771, "learning_rate": 8.244989623766502e-06, "loss": 0.2052, "step": 7188 }, { "epoch": 0.5695385224796989, "grad_norm": 1.7414226383896743, "learning_rate": 8.242463476585707e-06, "loss": 0.2237, "step": 7189 }, { "epoch": 0.5696177460883344, "grad_norm": 1.713689844712856, "learning_rate": 8.239937445130232e-06, "loss": 0.203, "step": 7190 }, { "epoch": 0.5696969696969697, "grad_norm": 1.6590205727067242, "learning_rate": 8.237411529566407e-06, "loss": 0.2355, "step": 7191 }, { "epoch": 0.569776193305605, "grad_norm": 1.8908245667395156, "learning_rate": 8.234885730060554e-06, "loss": 0.2464, "step": 7192 }, { "epoch": 0.5698554169142405, "grad_norm": 1.6626577760630519, "learning_rate": 8.232360046778982e-06, "loss": 0.2008, "step": 7193 }, { "epoch": 0.5699346405228758, "grad_norm": 1.525945061799335, "learning_rate": 8.229834479887992e-06, "loss": 0.1848, "step": 7194 }, { "epoch": 0.5700138641315112, "grad_norm": 1.2094967268299146, "learning_rate": 8.227309029553889e-06, "loss": 0.1132, "step": 7195 }, { "epoch": 0.5700930877401466, "grad_norm": 1.8439241349566875, "learning_rate": 8.224783695942954e-06, "loss": 0.1555, "step": 7196 }, { "epoch": 0.570172311348782, "grad_norm": 1.7294648874207226, "learning_rate": 8.222258479221473e-06, "loss": 0.218, "step": 7197 }, { "epoch": 0.5702515349574173, "grad_norm": 1.7039752622642823, "learning_rate": 8.219733379555715e-06, "loss": 0.1981, "step": 7198 }, { "epoch": 0.5703307585660526, "grad_norm": 1.4493054380279036, "learning_rate": 8.217208397111948e-06, "loss": 0.1712, "step": 7199 }, { "epoch": 0.5704099821746881, "grad_norm": 1.6909015430438719, "learning_rate": 8.21468353205643e-06, "loss": 0.2229, "step": 7200 }, { "epoch": 0.5704892057833234, "grad_norm": 1.692378896432889, "learning_rate": 8.212158784555412e-06, "loss": 0.27, "step": 7201 }, { "epoch": 0.5705684293919588, "grad_norm": 1.537232110106939, "learning_rate": 8.209634154775134e-06, "loss": 0.1495, "step": 7202 }, { "epoch": 0.5706476530005942, "grad_norm": 1.7115009443493503, "learning_rate": 8.207109642881836e-06, "loss": 0.2137, "step": 7203 }, { "epoch": 0.5707268766092296, "grad_norm": 1.7248471314916907, "learning_rate": 8.20458524904174e-06, "loss": 0.2512, "step": 7204 }, { "epoch": 0.5708061002178649, "grad_norm": 1.5700271970662605, "learning_rate": 8.202060973421064e-06, "loss": 0.1893, "step": 7205 }, { "epoch": 0.5708853238265003, "grad_norm": 1.846018527733847, "learning_rate": 8.199536816186025e-06, "loss": 0.2002, "step": 7206 }, { "epoch": 0.5709645474351357, "grad_norm": 1.785997674820738, "learning_rate": 8.197012777502819e-06, "loss": 0.2979, "step": 7207 }, { "epoch": 0.571043771043771, "grad_norm": 1.466090700295504, "learning_rate": 8.194488857537646e-06, "loss": 0.1688, "step": 7208 }, { "epoch": 0.5711229946524065, "grad_norm": 1.6848150936657436, "learning_rate": 8.191965056456699e-06, "loss": 0.2262, "step": 7209 }, { "epoch": 0.5712022182610418, "grad_norm": 1.6258597244150454, "learning_rate": 8.18944137442615e-06, "loss": 0.2622, "step": 7210 }, { "epoch": 0.5712814418696771, "grad_norm": 2.0162694466437845, "learning_rate": 8.186917811612173e-06, "loss": 0.3038, "step": 7211 }, { "epoch": 0.5713606654783125, "grad_norm": 1.4975929344426002, "learning_rate": 8.184394368180937e-06, "loss": 0.1557, "step": 7212 }, { "epoch": 0.5714398890869479, "grad_norm": 1.3128093497968392, "learning_rate": 8.181871044298594e-06, "loss": 0.1773, "step": 7213 }, { "epoch": 0.5715191126955833, "grad_norm": 1.3339295785788323, "learning_rate": 8.179347840131297e-06, "loss": 0.1427, "step": 7214 }, { "epoch": 0.5715983363042186, "grad_norm": 1.704253023293362, "learning_rate": 8.176824755845183e-06, "loss": 0.2402, "step": 7215 }, { "epoch": 0.5716775599128541, "grad_norm": 1.5998406704208354, "learning_rate": 8.174301791606384e-06, "loss": 0.2053, "step": 7216 }, { "epoch": 0.5717567835214894, "grad_norm": 1.579364527988466, "learning_rate": 8.171778947581032e-06, "loss": 0.2245, "step": 7217 }, { "epoch": 0.5718360071301247, "grad_norm": 1.5894443403673484, "learning_rate": 8.169256223935236e-06, "loss": 0.2011, "step": 7218 }, { "epoch": 0.5719152307387602, "grad_norm": 1.6457889995542956, "learning_rate": 8.166733620835107e-06, "loss": 0.2605, "step": 7219 }, { "epoch": 0.5719944543473955, "grad_norm": 1.4879413333941212, "learning_rate": 8.164211138446753e-06, "loss": 0.2863, "step": 7220 }, { "epoch": 0.5720736779560309, "grad_norm": 1.620795512615661, "learning_rate": 8.161688776936259e-06, "loss": 0.2255, "step": 7221 }, { "epoch": 0.5721529015646662, "grad_norm": 1.5600584576401233, "learning_rate": 8.159166536469717e-06, "loss": 0.2198, "step": 7222 }, { "epoch": 0.5722321251733017, "grad_norm": 1.6844428399419666, "learning_rate": 8.156644417213196e-06, "loss": 0.2155, "step": 7223 }, { "epoch": 0.572311348781937, "grad_norm": 1.1161114589068044, "learning_rate": 8.154122419332772e-06, "loss": 0.1417, "step": 7224 }, { "epoch": 0.5723905723905723, "grad_norm": 1.5656457317724726, "learning_rate": 8.151600542994506e-06, "loss": 0.2177, "step": 7225 }, { "epoch": 0.5724697959992078, "grad_norm": 1.5475943800810086, "learning_rate": 8.149078788364451e-06, "loss": 0.188, "step": 7226 }, { "epoch": 0.5725490196078431, "grad_norm": 1.854962748245374, "learning_rate": 8.14655715560865e-06, "loss": 0.1981, "step": 7227 }, { "epoch": 0.5726282432164785, "grad_norm": 1.5940628017538636, "learning_rate": 8.144035644893143e-06, "loss": 0.2541, "step": 7228 }, { "epoch": 0.5727074668251139, "grad_norm": 1.2999784417169014, "learning_rate": 8.141514256383957e-06, "loss": 0.1764, "step": 7229 }, { "epoch": 0.5727866904337493, "grad_norm": 1.381332867582748, "learning_rate": 8.138992990247119e-06, "loss": 0.1961, "step": 7230 }, { "epoch": 0.5728659140423846, "grad_norm": 1.697555672404761, "learning_rate": 8.136471846648633e-06, "loss": 0.2601, "step": 7231 }, { "epoch": 0.57294513765102, "grad_norm": 1.7537931243838112, "learning_rate": 8.133950825754511e-06, "loss": 0.2834, "step": 7232 }, { "epoch": 0.5730243612596554, "grad_norm": 1.77155229334891, "learning_rate": 8.13142992773075e-06, "loss": 0.2091, "step": 7233 }, { "epoch": 0.5731035848682907, "grad_norm": 1.6048050727374819, "learning_rate": 8.128909152743334e-06, "loss": 0.2545, "step": 7234 }, { "epoch": 0.5731828084769262, "grad_norm": 1.5544839678344629, "learning_rate": 8.12638850095825e-06, "loss": 0.1799, "step": 7235 }, { "epoch": 0.5732620320855615, "grad_norm": 1.8944010759220822, "learning_rate": 8.123867972541466e-06, "loss": 0.2255, "step": 7236 }, { "epoch": 0.5733412556941969, "grad_norm": 2.0141425187757465, "learning_rate": 8.12134756765895e-06, "loss": 0.2448, "step": 7237 }, { "epoch": 0.5734204793028322, "grad_norm": 1.553231612125875, "learning_rate": 8.118827286476658e-06, "loss": 0.2796, "step": 7238 }, { "epoch": 0.5734997029114676, "grad_norm": 1.8101664552954098, "learning_rate": 8.116307129160535e-06, "loss": 0.2773, "step": 7239 }, { "epoch": 0.573578926520103, "grad_norm": 1.5021324965085898, "learning_rate": 8.113787095876525e-06, "loss": 0.2124, "step": 7240 }, { "epoch": 0.5736581501287383, "grad_norm": 1.4706068828036591, "learning_rate": 8.11126718679056e-06, "loss": 0.2022, "step": 7241 }, { "epoch": 0.5737373737373738, "grad_norm": 1.4912690632029677, "learning_rate": 8.10874740206856e-06, "loss": 0.2498, "step": 7242 }, { "epoch": 0.5738165973460091, "grad_norm": 1.675693886890341, "learning_rate": 8.106227741876447e-06, "loss": 0.3023, "step": 7243 }, { "epoch": 0.5738958209546445, "grad_norm": 1.7662578223830618, "learning_rate": 8.103708206380123e-06, "loss": 0.2428, "step": 7244 }, { "epoch": 0.5739750445632799, "grad_norm": 1.6891028802295858, "learning_rate": 8.101188795745489e-06, "loss": 0.1655, "step": 7245 }, { "epoch": 0.5740542681719152, "grad_norm": 1.470241860262505, "learning_rate": 8.098669510138438e-06, "loss": 0.2048, "step": 7246 }, { "epoch": 0.5741334917805506, "grad_norm": 1.3763211298664193, "learning_rate": 8.09615034972485e-06, "loss": 0.2379, "step": 7247 }, { "epoch": 0.574212715389186, "grad_norm": 1.9837225704042876, "learning_rate": 8.093631314670598e-06, "loss": 0.2625, "step": 7248 }, { "epoch": 0.5742919389978214, "grad_norm": 1.528437224140775, "learning_rate": 8.091112405141555e-06, "loss": 0.1805, "step": 7249 }, { "epoch": 0.5743711626064567, "grad_norm": 1.7958254538166991, "learning_rate": 8.088593621303573e-06, "loss": 0.2255, "step": 7250 }, { "epoch": 0.5744503862150921, "grad_norm": 1.3239041738970985, "learning_rate": 8.086074963322505e-06, "loss": 0.1891, "step": 7251 }, { "epoch": 0.5745296098237275, "grad_norm": 1.6670753342873152, "learning_rate": 8.083556431364191e-06, "loss": 0.2595, "step": 7252 }, { "epoch": 0.5746088334323628, "grad_norm": 1.4312438991694492, "learning_rate": 8.081038025594464e-06, "loss": 0.2067, "step": 7253 }, { "epoch": 0.5746880570409982, "grad_norm": 1.5986057118444013, "learning_rate": 8.078519746179153e-06, "loss": 0.2013, "step": 7254 }, { "epoch": 0.5747672806496336, "grad_norm": 1.3820754267029534, "learning_rate": 8.076001593284066e-06, "loss": 0.1804, "step": 7255 }, { "epoch": 0.574846504258269, "grad_norm": 1.6083103562027041, "learning_rate": 8.073483567075018e-06, "loss": 0.226, "step": 7256 }, { "epoch": 0.5749257278669043, "grad_norm": 1.3260904503125175, "learning_rate": 8.070965667717809e-06, "loss": 0.1593, "step": 7257 }, { "epoch": 0.5750049514755398, "grad_norm": 1.6781814899787393, "learning_rate": 8.06844789537823e-06, "loss": 0.255, "step": 7258 }, { "epoch": 0.5750841750841751, "grad_norm": 1.7037600283909466, "learning_rate": 8.065930250222061e-06, "loss": 0.1972, "step": 7259 }, { "epoch": 0.5751633986928104, "grad_norm": 1.2477962924815693, "learning_rate": 8.063412732415077e-06, "loss": 0.1675, "step": 7260 }, { "epoch": 0.5752426223014458, "grad_norm": 1.644909653105973, "learning_rate": 8.060895342123049e-06, "loss": 0.2057, "step": 7261 }, { "epoch": 0.5753218459100812, "grad_norm": 1.7224419201316707, "learning_rate": 8.058378079511732e-06, "loss": 0.2513, "step": 7262 }, { "epoch": 0.5754010695187166, "grad_norm": 1.3664927723470475, "learning_rate": 8.055860944746876e-06, "loss": 0.2052, "step": 7263 }, { "epoch": 0.5754802931273519, "grad_norm": 1.670790160406618, "learning_rate": 8.05334393799422e-06, "loss": 0.24, "step": 7264 }, { "epoch": 0.5755595167359874, "grad_norm": 1.2908889373837937, "learning_rate": 8.050827059419502e-06, "loss": 0.1549, "step": 7265 }, { "epoch": 0.5756387403446227, "grad_norm": 2.09136560361488, "learning_rate": 8.04831030918844e-06, "loss": 0.3494, "step": 7266 }, { "epoch": 0.575717963953258, "grad_norm": 1.5644870970355587, "learning_rate": 8.045793687466757e-06, "loss": 0.198, "step": 7267 }, { "epoch": 0.5757971875618935, "grad_norm": 1.4583632507235909, "learning_rate": 8.043277194420155e-06, "loss": 0.1843, "step": 7268 }, { "epoch": 0.5758764111705288, "grad_norm": 1.8484708684703344, "learning_rate": 8.040760830214334e-06, "loss": 0.2101, "step": 7269 }, { "epoch": 0.5759556347791642, "grad_norm": 1.2917664096857764, "learning_rate": 8.038244595014986e-06, "loss": 0.1735, "step": 7270 }, { "epoch": 0.5760348583877996, "grad_norm": 1.5158154177678604, "learning_rate": 8.03572848898779e-06, "loss": 0.1895, "step": 7271 }, { "epoch": 0.576114081996435, "grad_norm": 1.4357997985443058, "learning_rate": 8.033212512298422e-06, "loss": 0.2056, "step": 7272 }, { "epoch": 0.5761933056050703, "grad_norm": 1.9207206252555802, "learning_rate": 8.03069666511255e-06, "loss": 0.2706, "step": 7273 }, { "epoch": 0.5762725292137056, "grad_norm": 1.7526368059601876, "learning_rate": 8.028180947595823e-06, "loss": 0.1974, "step": 7274 }, { "epoch": 0.5763517528223411, "grad_norm": 1.6521947712187373, "learning_rate": 8.025665359913897e-06, "loss": 0.179, "step": 7275 }, { "epoch": 0.5764309764309764, "grad_norm": 1.9111428246261795, "learning_rate": 8.023149902232404e-06, "loss": 0.2338, "step": 7276 }, { "epoch": 0.5765102000396118, "grad_norm": 1.3963015069276818, "learning_rate": 8.020634574716976e-06, "loss": 0.1491, "step": 7277 }, { "epoch": 0.5765894236482472, "grad_norm": 1.4852121021266091, "learning_rate": 8.018119377533243e-06, "loss": 0.1889, "step": 7278 }, { "epoch": 0.5766686472568826, "grad_norm": 1.6877131783706179, "learning_rate": 8.015604310846807e-06, "loss": 0.2071, "step": 7279 }, { "epoch": 0.5767478708655179, "grad_norm": 1.6177034124873833, "learning_rate": 8.013089374823281e-06, "loss": 0.1991, "step": 7280 }, { "epoch": 0.5768270944741533, "grad_norm": 2.064247297204755, "learning_rate": 8.010574569628263e-06, "loss": 0.2441, "step": 7281 }, { "epoch": 0.5769063180827887, "grad_norm": 1.3653454892402546, "learning_rate": 8.008059895427334e-06, "loss": 0.1704, "step": 7282 }, { "epoch": 0.576985541691424, "grad_norm": 1.3672169177523648, "learning_rate": 8.005545352386077e-06, "loss": 0.1614, "step": 7283 }, { "epoch": 0.5770647653000595, "grad_norm": 1.4658636744979527, "learning_rate": 8.003030940670061e-06, "loss": 0.1417, "step": 7284 }, { "epoch": 0.5771439889086948, "grad_norm": 1.5060100916104144, "learning_rate": 8.000516660444848e-06, "loss": 0.243, "step": 7285 }, { "epoch": 0.5772232125173302, "grad_norm": 1.6372504794503895, "learning_rate": 7.99800251187599e-06, "loss": 0.1427, "step": 7286 }, { "epoch": 0.5773024361259655, "grad_norm": 2.0943432662912187, "learning_rate": 7.995488495129039e-06, "loss": 0.2679, "step": 7287 }, { "epoch": 0.5773816597346009, "grad_norm": 1.9847692729449438, "learning_rate": 7.992974610369521e-06, "loss": 0.2482, "step": 7288 }, { "epoch": 0.5774608833432363, "grad_norm": 1.5031415487500104, "learning_rate": 7.990460857762969e-06, "loss": 0.1762, "step": 7289 }, { "epoch": 0.5775401069518716, "grad_norm": 1.1995105282650207, "learning_rate": 7.987947237474903e-06, "loss": 0.2128, "step": 7290 }, { "epoch": 0.5776193305605071, "grad_norm": 1.679879937596472, "learning_rate": 7.985433749670825e-06, "loss": 0.2252, "step": 7291 }, { "epoch": 0.5776985541691424, "grad_norm": 1.6089326408370244, "learning_rate": 7.982920394516247e-06, "loss": 0.2108, "step": 7292 }, { "epoch": 0.5777777777777777, "grad_norm": 1.8065095948417769, "learning_rate": 7.98040717217665e-06, "loss": 0.2501, "step": 7293 }, { "epoch": 0.5778570013864132, "grad_norm": 1.5732673240721722, "learning_rate": 7.977894082817524e-06, "loss": 0.2463, "step": 7294 }, { "epoch": 0.5779362249950485, "grad_norm": 1.506557784267785, "learning_rate": 7.975381126604346e-06, "loss": 0.1568, "step": 7295 }, { "epoch": 0.5780154486036839, "grad_norm": 1.4889185041643265, "learning_rate": 7.972868303702576e-06, "loss": 0.1961, "step": 7296 }, { "epoch": 0.5780946722123192, "grad_norm": 1.6347677387781698, "learning_rate": 7.970355614277674e-06, "loss": 0.2082, "step": 7297 }, { "epoch": 0.5781738958209547, "grad_norm": 2.649268010522181, "learning_rate": 7.967843058495092e-06, "loss": 0.2781, "step": 7298 }, { "epoch": 0.57825311942959, "grad_norm": 1.5538571208514451, "learning_rate": 7.965330636520262e-06, "loss": 0.2095, "step": 7299 }, { "epoch": 0.5783323430382253, "grad_norm": 1.4633856760820738, "learning_rate": 7.962818348518623e-06, "loss": 0.257, "step": 7300 }, { "epoch": 0.5784115666468608, "grad_norm": 1.43828529775794, "learning_rate": 7.960306194655593e-06, "loss": 0.1819, "step": 7301 }, { "epoch": 0.5784907902554961, "grad_norm": 1.6101635965560042, "learning_rate": 7.957794175096585e-06, "loss": 0.2171, "step": 7302 }, { "epoch": 0.5785700138641315, "grad_norm": 2.0250001290962496, "learning_rate": 7.955282290007006e-06, "loss": 0.2732, "step": 7303 }, { "epoch": 0.5786492374727669, "grad_norm": 1.4420376384620313, "learning_rate": 7.952770539552246e-06, "loss": 0.1931, "step": 7304 }, { "epoch": 0.5787284610814023, "grad_norm": 1.3618466857212324, "learning_rate": 7.950258923897695e-06, "loss": 0.1772, "step": 7305 }, { "epoch": 0.5788076846900376, "grad_norm": 1.551858486834539, "learning_rate": 7.947747443208735e-06, "loss": 0.219, "step": 7306 }, { "epoch": 0.578886908298673, "grad_norm": 1.5917687245250431, "learning_rate": 7.945236097650729e-06, "loss": 0.1581, "step": 7307 }, { "epoch": 0.5789661319073084, "grad_norm": 1.5082789832828323, "learning_rate": 7.942724887389041e-06, "loss": 0.1771, "step": 7308 }, { "epoch": 0.5790453555159437, "grad_norm": 1.677984045977772, "learning_rate": 7.940213812589018e-06, "loss": 0.2054, "step": 7309 }, { "epoch": 0.5791245791245792, "grad_norm": 1.5881251260978109, "learning_rate": 7.937702873416005e-06, "loss": 0.2177, "step": 7310 }, { "epoch": 0.5792038027332145, "grad_norm": 1.6570658660141848, "learning_rate": 7.935192070035335e-06, "loss": 0.1636, "step": 7311 }, { "epoch": 0.5792830263418499, "grad_norm": 2.450106985192867, "learning_rate": 7.932681402612332e-06, "loss": 0.4511, "step": 7312 }, { "epoch": 0.5793622499504852, "grad_norm": 1.5040911320701662, "learning_rate": 7.93017087131231e-06, "loss": 0.1476, "step": 7313 }, { "epoch": 0.5794414735591206, "grad_norm": 1.6720726936121675, "learning_rate": 7.927660476300578e-06, "loss": 0.1799, "step": 7314 }, { "epoch": 0.579520697167756, "grad_norm": 1.4983948771718287, "learning_rate": 7.925150217742431e-06, "loss": 0.2251, "step": 7315 }, { "epoch": 0.5795999207763913, "grad_norm": 1.8777163764540121, "learning_rate": 7.92264009580316e-06, "loss": 0.2383, "step": 7316 }, { "epoch": 0.5796791443850268, "grad_norm": 1.9503488963376103, "learning_rate": 7.920130110648044e-06, "loss": 0.2333, "step": 7317 }, { "epoch": 0.5797583679936621, "grad_norm": 1.396599470150119, "learning_rate": 7.917620262442349e-06, "loss": 0.16, "step": 7318 }, { "epoch": 0.5798375916022975, "grad_norm": 1.3316024969176476, "learning_rate": 7.915110551351344e-06, "loss": 0.1445, "step": 7319 }, { "epoch": 0.5799168152109329, "grad_norm": 1.8475021004353738, "learning_rate": 7.912600977540275e-06, "loss": 0.2572, "step": 7320 }, { "epoch": 0.5799960388195682, "grad_norm": 1.8854731577534638, "learning_rate": 7.910091541174388e-06, "loss": 0.3087, "step": 7321 }, { "epoch": 0.5800752624282036, "grad_norm": 2.035559672891875, "learning_rate": 7.907582242418916e-06, "loss": 0.2637, "step": 7322 }, { "epoch": 0.580154486036839, "grad_norm": 1.7967319312949754, "learning_rate": 7.905073081439087e-06, "loss": 0.2327, "step": 7323 }, { "epoch": 0.5802337096454744, "grad_norm": 1.3853013185902623, "learning_rate": 7.902564058400116e-06, "loss": 0.1705, "step": 7324 }, { "epoch": 0.5803129332541097, "grad_norm": 1.6040940919908941, "learning_rate": 7.900055173467207e-06, "loss": 0.2192, "step": 7325 }, { "epoch": 0.5803921568627451, "grad_norm": 1.6915223518830884, "learning_rate": 7.897546426805561e-06, "loss": 0.2627, "step": 7326 }, { "epoch": 0.5804713804713805, "grad_norm": 1.4365202102715322, "learning_rate": 7.89503781858037e-06, "loss": 0.189, "step": 7327 }, { "epoch": 0.5805506040800158, "grad_norm": 1.656623708831888, "learning_rate": 7.892529348956805e-06, "loss": 0.2493, "step": 7328 }, { "epoch": 0.5806298276886512, "grad_norm": 0.9988737429943114, "learning_rate": 7.890021018100045e-06, "loss": 0.1021, "step": 7329 }, { "epoch": 0.5807090512972866, "grad_norm": 1.3282268789970066, "learning_rate": 7.887512826175247e-06, "loss": 0.2134, "step": 7330 }, { "epoch": 0.580788274905922, "grad_norm": 1.4715313871158777, "learning_rate": 7.885004773347565e-06, "loss": 0.1625, "step": 7331 }, { "epoch": 0.5808674985145573, "grad_norm": 2.3555701996219423, "learning_rate": 7.882496859782145e-06, "loss": 0.2184, "step": 7332 }, { "epoch": 0.5809467221231928, "grad_norm": 1.2667015407256914, "learning_rate": 7.879989085644114e-06, "loss": 0.1296, "step": 7333 }, { "epoch": 0.5810259457318281, "grad_norm": 1.7741974740350812, "learning_rate": 7.877481451098602e-06, "loss": 0.2255, "step": 7334 }, { "epoch": 0.5811051693404634, "grad_norm": 1.3740913128849466, "learning_rate": 7.874973956310726e-06, "loss": 0.1703, "step": 7335 }, { "epoch": 0.5811843929490988, "grad_norm": 1.9566208543741046, "learning_rate": 7.872466601445587e-06, "loss": 0.3091, "step": 7336 }, { "epoch": 0.5812636165577342, "grad_norm": 1.3321241556772043, "learning_rate": 7.869959386668286e-06, "loss": 0.1637, "step": 7337 }, { "epoch": 0.5813428401663696, "grad_norm": 1.6632698510891268, "learning_rate": 7.86745231214391e-06, "loss": 0.176, "step": 7338 }, { "epoch": 0.5814220637750049, "grad_norm": 1.6438140441934932, "learning_rate": 7.864945378037538e-06, "loss": 0.2706, "step": 7339 }, { "epoch": 0.5815012873836404, "grad_norm": 1.5787218788023105, "learning_rate": 7.862438584514242e-06, "loss": 0.1937, "step": 7340 }, { "epoch": 0.5815805109922757, "grad_norm": 1.3080732064679614, "learning_rate": 7.859931931739077e-06, "loss": 0.2035, "step": 7341 }, { "epoch": 0.581659734600911, "grad_norm": 1.486490302479561, "learning_rate": 7.857425419877097e-06, "loss": 0.1695, "step": 7342 }, { "epoch": 0.5817389582095465, "grad_norm": 1.4044690756786709, "learning_rate": 7.854919049093345e-06, "loss": 0.1458, "step": 7343 }, { "epoch": 0.5818181818181818, "grad_norm": 1.45693372678055, "learning_rate": 7.852412819552853e-06, "loss": 0.1476, "step": 7344 }, { "epoch": 0.5818974054268172, "grad_norm": 1.4199161809622265, "learning_rate": 7.849906731420642e-06, "loss": 0.1673, "step": 7345 }, { "epoch": 0.5819766290354526, "grad_norm": 1.0830407164113578, "learning_rate": 7.847400784861727e-06, "loss": 0.102, "step": 7346 }, { "epoch": 0.582055852644088, "grad_norm": 1.2206867799938337, "learning_rate": 7.844894980041112e-06, "loss": 0.1164, "step": 7347 }, { "epoch": 0.5821350762527233, "grad_norm": 1.347524174502622, "learning_rate": 7.842389317123795e-06, "loss": 0.1571, "step": 7348 }, { "epoch": 0.5822142998613586, "grad_norm": 1.4373286737459365, "learning_rate": 7.839883796274758e-06, "loss": 0.1447, "step": 7349 }, { "epoch": 0.5822935234699941, "grad_norm": 1.5678046119059164, "learning_rate": 7.83737841765898e-06, "loss": 0.2645, "step": 7350 }, { "epoch": 0.5823727470786294, "grad_norm": 1.8735561538804082, "learning_rate": 7.834873181441426e-06, "loss": 0.2379, "step": 7351 }, { "epoch": 0.5824519706872648, "grad_norm": 1.4910033748164775, "learning_rate": 7.832368087787056e-06, "loss": 0.1843, "step": 7352 }, { "epoch": 0.5825311942959002, "grad_norm": 1.4822589257996637, "learning_rate": 7.82986313686082e-06, "loss": 0.1491, "step": 7353 }, { "epoch": 0.5826104179045356, "grad_norm": 1.4546380221478576, "learning_rate": 7.82735832882765e-06, "loss": 0.1845, "step": 7354 }, { "epoch": 0.5826896415131709, "grad_norm": 2.228102250388726, "learning_rate": 7.824853663852482e-06, "loss": 0.2356, "step": 7355 }, { "epoch": 0.5827688651218063, "grad_norm": 1.74278369204629, "learning_rate": 7.822349142100236e-06, "loss": 0.1763, "step": 7356 }, { "epoch": 0.5828480887304417, "grad_norm": 1.2361151491785414, "learning_rate": 7.819844763735818e-06, "loss": 0.1507, "step": 7357 }, { "epoch": 0.582927312339077, "grad_norm": 1.6643136497569972, "learning_rate": 7.817340528924132e-06, "loss": 0.2407, "step": 7358 }, { "epoch": 0.5830065359477125, "grad_norm": 1.21829976363329, "learning_rate": 7.814836437830074e-06, "loss": 0.1637, "step": 7359 }, { "epoch": 0.5830857595563478, "grad_norm": 1.597439023932669, "learning_rate": 7.812332490618521e-06, "loss": 0.1991, "step": 7360 }, { "epoch": 0.5831649831649832, "grad_norm": 1.4610659664827095, "learning_rate": 7.809828687454343e-06, "loss": 0.1949, "step": 7361 }, { "epoch": 0.5832442067736185, "grad_norm": 1.2423700761020409, "learning_rate": 7.807325028502412e-06, "loss": 0.1424, "step": 7362 }, { "epoch": 0.5833234303822539, "grad_norm": 1.808372202672393, "learning_rate": 7.804821513927574e-06, "loss": 0.2757, "step": 7363 }, { "epoch": 0.5834026539908893, "grad_norm": 1.6933673668947338, "learning_rate": 7.802318143894678e-06, "loss": 0.208, "step": 7364 }, { "epoch": 0.5834818775995246, "grad_norm": 1.2728386526262256, "learning_rate": 7.799814918568559e-06, "loss": 0.1439, "step": 7365 }, { "epoch": 0.5835611012081601, "grad_norm": 1.4067736456268327, "learning_rate": 7.797311838114038e-06, "loss": 0.1763, "step": 7366 }, { "epoch": 0.5836403248167954, "grad_norm": 1.4137022613901862, "learning_rate": 7.794808902695935e-06, "loss": 0.1583, "step": 7367 }, { "epoch": 0.5837195484254307, "grad_norm": 1.5527576114466286, "learning_rate": 7.792306112479055e-06, "loss": 0.2426, "step": 7368 }, { "epoch": 0.5837987720340662, "grad_norm": 1.4868732528214235, "learning_rate": 7.789803467628196e-06, "loss": 0.2171, "step": 7369 }, { "epoch": 0.5838779956427015, "grad_norm": 1.2531848604775941, "learning_rate": 7.787300968308144e-06, "loss": 0.1809, "step": 7370 }, { "epoch": 0.5839572192513369, "grad_norm": 1.6035137846042486, "learning_rate": 7.784798614683675e-06, "loss": 0.1858, "step": 7371 }, { "epoch": 0.5840364428599722, "grad_norm": 1.5782136919648733, "learning_rate": 7.782296406919557e-06, "loss": 0.1892, "step": 7372 }, { "epoch": 0.5841156664686077, "grad_norm": 1.2940119101465892, "learning_rate": 7.779794345180552e-06, "loss": 0.1582, "step": 7373 }, { "epoch": 0.584194890077243, "grad_norm": 1.7977074156018713, "learning_rate": 7.777292429631405e-06, "loss": 0.1343, "step": 7374 }, { "epoch": 0.5842741136858783, "grad_norm": 1.8275288543711155, "learning_rate": 7.774790660436857e-06, "loss": 0.3028, "step": 7375 }, { "epoch": 0.5843533372945138, "grad_norm": 1.4315002334367006, "learning_rate": 7.772289037761639e-06, "loss": 0.2209, "step": 7376 }, { "epoch": 0.5844325609031491, "grad_norm": 1.5079418253540777, "learning_rate": 7.769787561770466e-06, "loss": 0.1877, "step": 7377 }, { "epoch": 0.5845117845117845, "grad_norm": 2.078985270683973, "learning_rate": 7.767286232628054e-06, "loss": 0.3406, "step": 7378 }, { "epoch": 0.5845910081204199, "grad_norm": 1.7108565953951784, "learning_rate": 7.764785050499098e-06, "loss": 0.1984, "step": 7379 }, { "epoch": 0.5846702317290553, "grad_norm": 1.4422617027044293, "learning_rate": 7.76228401554829e-06, "loss": 0.1533, "step": 7380 }, { "epoch": 0.5847494553376906, "grad_norm": 1.521485936719373, "learning_rate": 7.759783127940315e-06, "loss": 0.1655, "step": 7381 }, { "epoch": 0.584828678946326, "grad_norm": 1.444930411295526, "learning_rate": 7.757282387839842e-06, "loss": 0.2408, "step": 7382 }, { "epoch": 0.5849079025549614, "grad_norm": 1.5006625285954482, "learning_rate": 7.75478179541153e-06, "loss": 0.2169, "step": 7383 }, { "epoch": 0.5849871261635967, "grad_norm": 1.6704140909628131, "learning_rate": 7.752281350820037e-06, "loss": 0.2663, "step": 7384 }, { "epoch": 0.5850663497722322, "grad_norm": 1.8925580776486521, "learning_rate": 7.749781054229998e-06, "loss": 0.2998, "step": 7385 }, { "epoch": 0.5851455733808675, "grad_norm": 1.6760707482436805, "learning_rate": 7.747280905806051e-06, "loss": 0.2296, "step": 7386 }, { "epoch": 0.5852247969895029, "grad_norm": 1.575618260689309, "learning_rate": 7.744780905712818e-06, "loss": 0.1792, "step": 7387 }, { "epoch": 0.5853040205981382, "grad_norm": 1.4684360621138008, "learning_rate": 7.742281054114909e-06, "loss": 0.218, "step": 7388 }, { "epoch": 0.5853832442067736, "grad_norm": 1.813178613724032, "learning_rate": 7.73978135117693e-06, "loss": 0.2412, "step": 7389 }, { "epoch": 0.585462467815409, "grad_norm": 1.7942912971991012, "learning_rate": 7.737281797063473e-06, "loss": 0.1901, "step": 7390 }, { "epoch": 0.5855416914240443, "grad_norm": 1.5534491332276268, "learning_rate": 7.734782391939123e-06, "loss": 0.1962, "step": 7391 }, { "epoch": 0.5856209150326798, "grad_norm": 1.2370068229019497, "learning_rate": 7.732283135968452e-06, "loss": 0.1165, "step": 7392 }, { "epoch": 0.5857001386413151, "grad_norm": 1.38219487024793, "learning_rate": 7.729784029316025e-06, "loss": 0.1801, "step": 7393 }, { "epoch": 0.5857793622499505, "grad_norm": 1.6395266423136008, "learning_rate": 7.7272850721464e-06, "loss": 0.2612, "step": 7394 }, { "epoch": 0.5858585858585859, "grad_norm": 2.569825377769722, "learning_rate": 7.724786264624112e-06, "loss": 0.3263, "step": 7395 }, { "epoch": 0.5859378094672212, "grad_norm": 1.5540901626600006, "learning_rate": 7.722287606913703e-06, "loss": 0.2244, "step": 7396 }, { "epoch": 0.5860170330758566, "grad_norm": 1.2766898903574357, "learning_rate": 7.719789099179696e-06, "loss": 0.1477, "step": 7397 }, { "epoch": 0.586096256684492, "grad_norm": 1.7034877399301822, "learning_rate": 7.717290741586602e-06, "loss": 0.2134, "step": 7398 }, { "epoch": 0.5861754802931274, "grad_norm": 1.5668080866324068, "learning_rate": 7.714792534298934e-06, "loss": 0.2433, "step": 7399 }, { "epoch": 0.5862547039017627, "grad_norm": 2.1071109200674845, "learning_rate": 7.712294477481177e-06, "loss": 0.3131, "step": 7400 }, { "epoch": 0.5863339275103981, "grad_norm": 1.6374636452367801, "learning_rate": 7.709796571297823e-06, "loss": 0.1872, "step": 7401 }, { "epoch": 0.5864131511190335, "grad_norm": 1.752874226437224, "learning_rate": 7.707298815913346e-06, "loss": 0.2336, "step": 7402 }, { "epoch": 0.5864923747276688, "grad_norm": 1.5787076973281444, "learning_rate": 7.70480121149221e-06, "loss": 0.2516, "step": 7403 }, { "epoch": 0.5865715983363042, "grad_norm": 1.2778506845834356, "learning_rate": 7.702303758198868e-06, "loss": 0.1574, "step": 7404 }, { "epoch": 0.5866508219449396, "grad_norm": 1.263129459817484, "learning_rate": 7.699806456197771e-06, "loss": 0.1393, "step": 7405 }, { "epoch": 0.586730045553575, "grad_norm": 2.0536170410407393, "learning_rate": 7.697309305653348e-06, "loss": 0.2962, "step": 7406 }, { "epoch": 0.5868092691622103, "grad_norm": 1.6066563887601932, "learning_rate": 7.694812306730031e-06, "loss": 0.2089, "step": 7407 }, { "epoch": 0.5868884927708458, "grad_norm": 1.724127569304111, "learning_rate": 7.69231545959223e-06, "loss": 0.273, "step": 7408 }, { "epoch": 0.5869677163794811, "grad_norm": 1.772746835436174, "learning_rate": 7.689818764404351e-06, "loss": 0.2648, "step": 7409 }, { "epoch": 0.5870469399881164, "grad_norm": 1.400817299921641, "learning_rate": 7.687322221330794e-06, "loss": 0.1265, "step": 7410 }, { "epoch": 0.5871261635967518, "grad_norm": 1.7113843309178138, "learning_rate": 7.684825830535935e-06, "loss": 0.1714, "step": 7411 }, { "epoch": 0.5872053872053872, "grad_norm": 1.7658430467096589, "learning_rate": 7.682329592184158e-06, "loss": 0.205, "step": 7412 }, { "epoch": 0.5872846108140226, "grad_norm": 1.61182488260892, "learning_rate": 7.679833506439826e-06, "loss": 0.2136, "step": 7413 }, { "epoch": 0.5873638344226579, "grad_norm": 1.92473727839742, "learning_rate": 7.677337573467294e-06, "loss": 0.2135, "step": 7414 }, { "epoch": 0.5874430580312934, "grad_norm": 1.927164374383882, "learning_rate": 7.674841793430907e-06, "loss": 0.279, "step": 7415 }, { "epoch": 0.5875222816399287, "grad_norm": 2.1716251837227083, "learning_rate": 7.672346166494999e-06, "loss": 0.307, "step": 7416 }, { "epoch": 0.587601505248564, "grad_norm": 1.709594680815539, "learning_rate": 7.669850692823895e-06, "loss": 0.2193, "step": 7417 }, { "epoch": 0.5876807288571995, "grad_norm": 1.8776004149572332, "learning_rate": 7.667355372581913e-06, "loss": 0.2316, "step": 7418 }, { "epoch": 0.5877599524658348, "grad_norm": 2.0678547868731956, "learning_rate": 7.664860205933356e-06, "loss": 0.241, "step": 7419 }, { "epoch": 0.5878391760744702, "grad_norm": 1.8401528725309737, "learning_rate": 7.662365193042516e-06, "loss": 0.1509, "step": 7420 }, { "epoch": 0.5879183996831056, "grad_norm": 1.5633666386116454, "learning_rate": 7.659870334073683e-06, "loss": 0.1895, "step": 7421 }, { "epoch": 0.587997623291741, "grad_norm": 1.3746797454269115, "learning_rate": 7.657375629191126e-06, "loss": 0.1762, "step": 7422 }, { "epoch": 0.5880768469003763, "grad_norm": 1.771937331025057, "learning_rate": 7.654881078559112e-06, "loss": 0.2074, "step": 7423 }, { "epoch": 0.5881560705090116, "grad_norm": 1.5697221652671167, "learning_rate": 7.652386682341895e-06, "loss": 0.1974, "step": 7424 }, { "epoch": 0.5882352941176471, "grad_norm": 1.6771166483152264, "learning_rate": 7.64989244070372e-06, "loss": 0.2224, "step": 7425 }, { "epoch": 0.5883145177262824, "grad_norm": 1.5833939615961354, "learning_rate": 7.647398353808822e-06, "loss": 0.165, "step": 7426 }, { "epoch": 0.5883937413349178, "grad_norm": 1.3801981553068066, "learning_rate": 7.644904421821418e-06, "loss": 0.1534, "step": 7427 }, { "epoch": 0.5884729649435532, "grad_norm": 1.4833152072980584, "learning_rate": 7.642410644905726e-06, "loss": 0.1964, "step": 7428 }, { "epoch": 0.5885521885521886, "grad_norm": 1.1463566073384621, "learning_rate": 7.639917023225953e-06, "loss": 0.1464, "step": 7429 }, { "epoch": 0.5886314121608239, "grad_norm": 1.8298440492273966, "learning_rate": 7.637423556946284e-06, "loss": 0.1884, "step": 7430 }, { "epoch": 0.5887106357694593, "grad_norm": 1.482646746607733, "learning_rate": 7.63493024623091e-06, "loss": 0.1772, "step": 7431 }, { "epoch": 0.5887898593780947, "grad_norm": 1.5964567960701876, "learning_rate": 7.632437091243996e-06, "loss": 0.2114, "step": 7432 }, { "epoch": 0.58886908298673, "grad_norm": 1.4102131564329259, "learning_rate": 7.629944092149707e-06, "loss": 0.2064, "step": 7433 }, { "epoch": 0.5889483065953655, "grad_norm": 1.3930073216985237, "learning_rate": 7.627451249112199e-06, "loss": 0.1364, "step": 7434 }, { "epoch": 0.5890275302040008, "grad_norm": 1.395804725903595, "learning_rate": 7.624958562295607e-06, "loss": 0.1558, "step": 7435 }, { "epoch": 0.5891067538126362, "grad_norm": 1.4641264576087305, "learning_rate": 7.622466031864066e-06, "loss": 0.1363, "step": 7436 }, { "epoch": 0.5891859774212715, "grad_norm": 1.240265737552144, "learning_rate": 7.6199736579817005e-06, "loss": 0.1672, "step": 7437 }, { "epoch": 0.5892652010299069, "grad_norm": 1.7226526469263839, "learning_rate": 7.617481440812617e-06, "loss": 0.2194, "step": 7438 }, { "epoch": 0.5893444246385423, "grad_norm": 1.8455411006227738, "learning_rate": 7.614989380520914e-06, "loss": 0.2776, "step": 7439 }, { "epoch": 0.5894236482471776, "grad_norm": 1.907638351279368, "learning_rate": 7.612497477270686e-06, "loss": 0.1984, "step": 7440 }, { "epoch": 0.5895028718558131, "grad_norm": 1.6683352005687586, "learning_rate": 7.610005731226009e-06, "loss": 0.2036, "step": 7441 }, { "epoch": 0.5895820954644484, "grad_norm": 1.7231457261407273, "learning_rate": 7.607514142550955e-06, "loss": 0.3299, "step": 7442 }, { "epoch": 0.5896613190730838, "grad_norm": 1.6616231689487826, "learning_rate": 7.605022711409585e-06, "loss": 0.1898, "step": 7443 }, { "epoch": 0.5897405426817192, "grad_norm": 1.5384747991129377, "learning_rate": 7.602531437965943e-06, "loss": 0.1878, "step": 7444 }, { "epoch": 0.5898197662903545, "grad_norm": 1.7914997127146324, "learning_rate": 7.6000403223840714e-06, "loss": 0.2259, "step": 7445 }, { "epoch": 0.5898989898989899, "grad_norm": 1.675488354038786, "learning_rate": 7.597549364827997e-06, "loss": 0.2158, "step": 7446 }, { "epoch": 0.5899782135076252, "grad_norm": 1.7693188811033118, "learning_rate": 7.595058565461736e-06, "loss": 0.2245, "step": 7447 }, { "epoch": 0.5900574371162607, "grad_norm": 1.8404426526266984, "learning_rate": 7.5925679244492985e-06, "loss": 0.2779, "step": 7448 }, { "epoch": 0.590136660724896, "grad_norm": 1.6301558992273812, "learning_rate": 7.5900774419546775e-06, "loss": 0.247, "step": 7449 }, { "epoch": 0.5902158843335313, "grad_norm": 1.491175797758845, "learning_rate": 7.58758711814186e-06, "loss": 0.1344, "step": 7450 }, { "epoch": 0.5902951079421668, "grad_norm": 1.696416510881321, "learning_rate": 7.585096953174827e-06, "loss": 0.2807, "step": 7451 }, { "epoch": 0.5903743315508021, "grad_norm": 1.6755142588541274, "learning_rate": 7.582606947217537e-06, "loss": 0.1924, "step": 7452 }, { "epoch": 0.5904535551594375, "grad_norm": 1.7556816542604936, "learning_rate": 7.580117100433947e-06, "loss": 0.3028, "step": 7453 }, { "epoch": 0.5905327787680729, "grad_norm": 1.4336986850350117, "learning_rate": 7.577627412988005e-06, "loss": 0.1626, "step": 7454 }, { "epoch": 0.5906120023767083, "grad_norm": 1.4900141317463638, "learning_rate": 7.57513788504364e-06, "loss": 0.245, "step": 7455 }, { "epoch": 0.5906912259853436, "grad_norm": 2.2121266232198917, "learning_rate": 7.572648516764778e-06, "loss": 0.223, "step": 7456 }, { "epoch": 0.590770449593979, "grad_norm": 1.3869070717614518, "learning_rate": 7.570159308315331e-06, "loss": 0.1779, "step": 7457 }, { "epoch": 0.5908496732026144, "grad_norm": 1.6095382062402306, "learning_rate": 7.5676702598592025e-06, "loss": 0.2495, "step": 7458 }, { "epoch": 0.5909288968112497, "grad_norm": 1.6965519774818403, "learning_rate": 7.5651813715602855e-06, "loss": 0.2093, "step": 7459 }, { "epoch": 0.5910081204198852, "grad_norm": 1.4744062962455062, "learning_rate": 7.562692643582456e-06, "loss": 0.1582, "step": 7460 }, { "epoch": 0.5910873440285205, "grad_norm": 1.6782265245942305, "learning_rate": 7.56020407608959e-06, "loss": 0.245, "step": 7461 }, { "epoch": 0.5911665676371559, "grad_norm": 2.030536778917779, "learning_rate": 7.557715669245547e-06, "loss": 0.2352, "step": 7462 }, { "epoch": 0.5912457912457912, "grad_norm": 1.8017533290790013, "learning_rate": 7.555227423214174e-06, "loss": 0.1653, "step": 7463 }, { "epoch": 0.5913250148544266, "grad_norm": 1.6817661670520199, "learning_rate": 7.552739338159314e-06, "loss": 0.1248, "step": 7464 }, { "epoch": 0.591404238463062, "grad_norm": 1.410988868893279, "learning_rate": 7.550251414244791e-06, "loss": 0.1793, "step": 7465 }, { "epoch": 0.5914834620716973, "grad_norm": 2.194095789629852, "learning_rate": 7.5477636516344255e-06, "loss": 0.2322, "step": 7466 }, { "epoch": 0.5915626856803328, "grad_norm": 1.4728707036576527, "learning_rate": 7.545276050492025e-06, "loss": 0.2031, "step": 7467 }, { "epoch": 0.5916419092889681, "grad_norm": 1.814372102446906, "learning_rate": 7.542788610981384e-06, "loss": 0.2969, "step": 7468 }, { "epoch": 0.5917211328976035, "grad_norm": 1.8870305552345494, "learning_rate": 7.540301333266289e-06, "loss": 0.2752, "step": 7469 }, { "epoch": 0.5918003565062389, "grad_norm": 1.2628947142486275, "learning_rate": 7.537814217510518e-06, "loss": 0.1571, "step": 7470 }, { "epoch": 0.5918795801148742, "grad_norm": 1.4571467798044746, "learning_rate": 7.535327263877832e-06, "loss": 0.214, "step": 7471 }, { "epoch": 0.5919588037235096, "grad_norm": 1.3676288149330387, "learning_rate": 7.532840472531988e-06, "loss": 0.1923, "step": 7472 }, { "epoch": 0.592038027332145, "grad_norm": 1.8942295958228004, "learning_rate": 7.530353843636726e-06, "loss": 0.2364, "step": 7473 }, { "epoch": 0.5921172509407804, "grad_norm": 1.47835187108296, "learning_rate": 7.52786737735578e-06, "loss": 0.2148, "step": 7474 }, { "epoch": 0.5921964745494157, "grad_norm": 1.4904041274472746, "learning_rate": 7.525381073852874e-06, "loss": 0.1552, "step": 7475 }, { "epoch": 0.5922756981580511, "grad_norm": 1.6395740358422257, "learning_rate": 7.522894933291715e-06, "loss": 0.2664, "step": 7476 }, { "epoch": 0.5923549217666865, "grad_norm": 1.4791070926762029, "learning_rate": 7.5204089558360076e-06, "loss": 0.2207, "step": 7477 }, { "epoch": 0.5924341453753218, "grad_norm": 1.4312737917623066, "learning_rate": 7.517923141649439e-06, "loss": 0.2194, "step": 7478 }, { "epoch": 0.5925133689839572, "grad_norm": 1.6109664535306307, "learning_rate": 7.515437490895688e-06, "loss": 0.2778, "step": 7479 }, { "epoch": 0.5925925925925926, "grad_norm": 1.3401836430719771, "learning_rate": 7.5129520037384225e-06, "loss": 0.2119, "step": 7480 }, { "epoch": 0.592671816201228, "grad_norm": 1.7954808505865363, "learning_rate": 7.5104666803413015e-06, "loss": 0.2537, "step": 7481 }, { "epoch": 0.5927510398098633, "grad_norm": 1.2378367435262168, "learning_rate": 7.50798152086797e-06, "loss": 0.1458, "step": 7482 }, { "epoch": 0.5928302634184988, "grad_norm": 1.325387131806812, "learning_rate": 7.505496525482066e-06, "loss": 0.183, "step": 7483 }, { "epoch": 0.5929094870271341, "grad_norm": 2.895468924061322, "learning_rate": 7.503011694347212e-06, "loss": 0.2108, "step": 7484 }, { "epoch": 0.5929887106357694, "grad_norm": 1.9100198884730346, "learning_rate": 7.500527027627025e-06, "loss": 0.2398, "step": 7485 }, { "epoch": 0.5930679342444048, "grad_norm": 1.6497273421773464, "learning_rate": 7.4980425254851034e-06, "loss": 0.2276, "step": 7486 }, { "epoch": 0.5931471578530402, "grad_norm": 1.573713696957266, "learning_rate": 7.495558188085044e-06, "loss": 0.1782, "step": 7487 }, { "epoch": 0.5932263814616756, "grad_norm": 1.5540595415324925, "learning_rate": 7.493074015590429e-06, "loss": 0.2056, "step": 7488 }, { "epoch": 0.5933056050703109, "grad_norm": 1.3437221880306618, "learning_rate": 7.490590008164824e-06, "loss": 0.1472, "step": 7489 }, { "epoch": 0.5933848286789464, "grad_norm": 1.437666800499826, "learning_rate": 7.488106165971795e-06, "loss": 0.2073, "step": 7490 }, { "epoch": 0.5934640522875817, "grad_norm": 1.902829932381785, "learning_rate": 7.485622489174888e-06, "loss": 0.1442, "step": 7491 }, { "epoch": 0.593543275896217, "grad_norm": 1.4263930247751762, "learning_rate": 7.483138977937643e-06, "loss": 0.1706, "step": 7492 }, { "epoch": 0.5936224995048525, "grad_norm": 1.2750902489373617, "learning_rate": 7.480655632423586e-06, "loss": 0.1172, "step": 7493 }, { "epoch": 0.5937017231134878, "grad_norm": 1.9653500329408096, "learning_rate": 7.478172452796231e-06, "loss": 0.2398, "step": 7494 }, { "epoch": 0.5937809467221232, "grad_norm": 1.3090970040179197, "learning_rate": 7.475689439219085e-06, "loss": 0.1638, "step": 7495 }, { "epoch": 0.5938601703307586, "grad_norm": 1.4515458669484513, "learning_rate": 7.473206591855646e-06, "loss": 0.1983, "step": 7496 }, { "epoch": 0.593939393939394, "grad_norm": 2.4360764396838754, "learning_rate": 7.470723910869393e-06, "loss": 0.2866, "step": 7497 }, { "epoch": 0.5940186175480293, "grad_norm": 1.5758426551525762, "learning_rate": 7.468241396423801e-06, "loss": 0.1942, "step": 7498 }, { "epoch": 0.5940978411566646, "grad_norm": 1.284295747047346, "learning_rate": 7.465759048682333e-06, "loss": 0.1275, "step": 7499 }, { "epoch": 0.5941770647653001, "grad_norm": 1.6761975640343274, "learning_rate": 7.463276867808435e-06, "loss": 0.1733, "step": 7500 }, { "epoch": 0.5942562883739354, "grad_norm": 1.574147240134103, "learning_rate": 7.46079485396555e-06, "loss": 0.2497, "step": 7501 }, { "epoch": 0.5943355119825708, "grad_norm": 1.64665031059084, "learning_rate": 7.458313007317106e-06, "loss": 0.2369, "step": 7502 }, { "epoch": 0.5944147355912062, "grad_norm": 1.4885554041713425, "learning_rate": 7.45583132802652e-06, "loss": 0.2112, "step": 7503 }, { "epoch": 0.5944939591998416, "grad_norm": 1.56036903462225, "learning_rate": 7.4533498162572004e-06, "loss": 0.2529, "step": 7504 }, { "epoch": 0.5945731828084769, "grad_norm": 1.4564983783149468, "learning_rate": 7.450868472172541e-06, "loss": 0.211, "step": 7505 }, { "epoch": 0.5946524064171123, "grad_norm": 1.8085709021335465, "learning_rate": 7.448387295935926e-06, "loss": 0.2086, "step": 7506 }, { "epoch": 0.5947316300257477, "grad_norm": 1.3844658607981795, "learning_rate": 7.445906287710733e-06, "loss": 0.2122, "step": 7507 }, { "epoch": 0.594810853634383, "grad_norm": 1.466532617133725, "learning_rate": 7.443425447660319e-06, "loss": 0.1985, "step": 7508 }, { "epoch": 0.5948900772430185, "grad_norm": 1.591733919342183, "learning_rate": 7.4409447759480404e-06, "loss": 0.2274, "step": 7509 }, { "epoch": 0.5949693008516538, "grad_norm": 1.390284436839561, "learning_rate": 7.438464272737232e-06, "loss": 0.1894, "step": 7510 }, { "epoch": 0.5950485244602892, "grad_norm": 1.3942454172751464, "learning_rate": 7.435983938191227e-06, "loss": 0.1829, "step": 7511 }, { "epoch": 0.5951277480689245, "grad_norm": 1.716729276264812, "learning_rate": 7.433503772473343e-06, "loss": 0.2497, "step": 7512 }, { "epoch": 0.5952069716775599, "grad_norm": 1.8072010336025572, "learning_rate": 7.431023775746886e-06, "loss": 0.2936, "step": 7513 }, { "epoch": 0.5952861952861953, "grad_norm": 1.4508899835233136, "learning_rate": 7.428543948175151e-06, "loss": 0.1835, "step": 7514 }, { "epoch": 0.5953654188948306, "grad_norm": 1.7513932844304312, "learning_rate": 7.426064289921429e-06, "loss": 0.1893, "step": 7515 }, { "epoch": 0.5954446425034661, "grad_norm": 1.8882346323032806, "learning_rate": 7.423584801148985e-06, "loss": 0.2767, "step": 7516 }, { "epoch": 0.5955238661121014, "grad_norm": 1.1630629996727673, "learning_rate": 7.421105482021084e-06, "loss": 0.1906, "step": 7517 }, { "epoch": 0.5956030897207368, "grad_norm": 1.3363220896736516, "learning_rate": 7.41862633270098e-06, "loss": 0.1521, "step": 7518 }, { "epoch": 0.5956823133293722, "grad_norm": 1.3489488656597928, "learning_rate": 7.416147353351909e-06, "loss": 0.2176, "step": 7519 }, { "epoch": 0.5957615369380075, "grad_norm": 1.7409845482961002, "learning_rate": 7.4136685441371025e-06, "loss": 0.2367, "step": 7520 }, { "epoch": 0.5958407605466429, "grad_norm": 1.5579907491199265, "learning_rate": 7.41118990521978e-06, "loss": 0.197, "step": 7521 }, { "epoch": 0.5959199841552782, "grad_norm": 1.66325078638249, "learning_rate": 7.408711436763143e-06, "loss": 0.2323, "step": 7522 }, { "epoch": 0.5959992077639137, "grad_norm": 1.514226489610716, "learning_rate": 7.406233138930389e-06, "loss": 0.224, "step": 7523 }, { "epoch": 0.596078431372549, "grad_norm": 1.301425961876574, "learning_rate": 7.4037550118847044e-06, "loss": 0.1894, "step": 7524 }, { "epoch": 0.5961576549811844, "grad_norm": 2.302043459119219, "learning_rate": 7.401277055789259e-06, "loss": 0.234, "step": 7525 }, { "epoch": 0.5962368785898198, "grad_norm": 1.6691946135400209, "learning_rate": 7.398799270807217e-06, "loss": 0.2055, "step": 7526 }, { "epoch": 0.5963161021984551, "grad_norm": 1.4852209304539714, "learning_rate": 7.3963216571017235e-06, "loss": 0.222, "step": 7527 }, { "epoch": 0.5963953258070905, "grad_norm": 1.3847414162680347, "learning_rate": 7.3938442148359215e-06, "loss": 0.1793, "step": 7528 }, { "epoch": 0.5964745494157259, "grad_norm": 1.8818345843921624, "learning_rate": 7.391366944172941e-06, "loss": 0.2033, "step": 7529 }, { "epoch": 0.5965537730243613, "grad_norm": 1.949855559570462, "learning_rate": 7.388889845275893e-06, "loss": 0.2678, "step": 7530 }, { "epoch": 0.5966329966329966, "grad_norm": 2.0744829222270305, "learning_rate": 7.3864129183078835e-06, "loss": 0.2881, "step": 7531 }, { "epoch": 0.596712220241632, "grad_norm": 1.8056034247734498, "learning_rate": 7.38393616343201e-06, "loss": 0.1992, "step": 7532 }, { "epoch": 0.5967914438502674, "grad_norm": 1.4540548937764923, "learning_rate": 7.381459580811352e-06, "loss": 0.1902, "step": 7533 }, { "epoch": 0.5968706674589027, "grad_norm": 1.2133129477610578, "learning_rate": 7.378983170608982e-06, "loss": 0.166, "step": 7534 }, { "epoch": 0.5969498910675382, "grad_norm": 1.4185713740831576, "learning_rate": 7.376506932987956e-06, "loss": 0.2411, "step": 7535 }, { "epoch": 0.5970291146761735, "grad_norm": 1.2997916437712833, "learning_rate": 7.374030868111326e-06, "loss": 0.1401, "step": 7536 }, { "epoch": 0.5971083382848089, "grad_norm": 1.717198331922666, "learning_rate": 7.371554976142128e-06, "loss": 0.184, "step": 7537 }, { "epoch": 0.5971875618934442, "grad_norm": 1.533675257115316, "learning_rate": 7.369079257243388e-06, "loss": 0.1964, "step": 7538 }, { "epoch": 0.5972667855020796, "grad_norm": 1.4133944231430584, "learning_rate": 7.366603711578119e-06, "loss": 0.138, "step": 7539 }, { "epoch": 0.597346009110715, "grad_norm": 1.0047709943714092, "learning_rate": 7.364128339309326e-06, "loss": 0.1318, "step": 7540 }, { "epoch": 0.5974252327193503, "grad_norm": 1.6690406345432887, "learning_rate": 7.361653140599997e-06, "loss": 0.227, "step": 7541 }, { "epoch": 0.5975044563279858, "grad_norm": 1.473319163574499, "learning_rate": 7.359178115613116e-06, "loss": 0.1581, "step": 7542 }, { "epoch": 0.5975836799366211, "grad_norm": 1.7992236191033433, "learning_rate": 7.356703264511646e-06, "loss": 0.259, "step": 7543 }, { "epoch": 0.5976629035452565, "grad_norm": 1.5309951035045426, "learning_rate": 7.354228587458549e-06, "loss": 0.1908, "step": 7544 }, { "epoch": 0.5977421271538919, "grad_norm": 1.5413873328517331, "learning_rate": 7.351754084616771e-06, "loss": 0.2023, "step": 7545 }, { "epoch": 0.5978213507625272, "grad_norm": 1.8613854969727364, "learning_rate": 7.349279756149241e-06, "loss": 0.2618, "step": 7546 }, { "epoch": 0.5979005743711626, "grad_norm": 1.3782039045483327, "learning_rate": 7.346805602218885e-06, "loss": 0.1429, "step": 7547 }, { "epoch": 0.597979797979798, "grad_norm": 1.0950435839528003, "learning_rate": 7.344331622988616e-06, "loss": 0.1412, "step": 7548 }, { "epoch": 0.5980590215884334, "grad_norm": 1.2331082884778737, "learning_rate": 7.341857818621328e-06, "loss": 0.1536, "step": 7549 }, { "epoch": 0.5981382451970687, "grad_norm": 1.3944433181556044, "learning_rate": 7.339384189279917e-06, "loss": 0.133, "step": 7550 }, { "epoch": 0.5982174688057041, "grad_norm": 1.4105343209411911, "learning_rate": 7.33691073512725e-06, "loss": 0.1746, "step": 7551 }, { "epoch": 0.5982966924143395, "grad_norm": 1.4134313314750464, "learning_rate": 7.3344374563262e-06, "loss": 0.168, "step": 7552 }, { "epoch": 0.5983759160229748, "grad_norm": 1.4990726175232938, "learning_rate": 7.3319643530396175e-06, "loss": 0.2168, "step": 7553 }, { "epoch": 0.5984551396316102, "grad_norm": 1.4439393721882599, "learning_rate": 7.329491425430344e-06, "loss": 0.156, "step": 7554 }, { "epoch": 0.5985343632402456, "grad_norm": 1.8575783690839462, "learning_rate": 7.327018673661209e-06, "loss": 0.2932, "step": 7555 }, { "epoch": 0.598613586848881, "grad_norm": 1.4803929070883979, "learning_rate": 7.324546097895036e-06, "loss": 0.2327, "step": 7556 }, { "epoch": 0.5986928104575163, "grad_norm": 1.2508065486283875, "learning_rate": 7.3220736982946275e-06, "loss": 0.1574, "step": 7557 }, { "epoch": 0.5987720340661518, "grad_norm": 1.7400344217433672, "learning_rate": 7.3196014750227815e-06, "loss": 0.2446, "step": 7558 }, { "epoch": 0.5988512576747871, "grad_norm": 1.099608502479037, "learning_rate": 7.317129428242279e-06, "loss": 0.1695, "step": 7559 }, { "epoch": 0.5989304812834224, "grad_norm": 1.54710041193007, "learning_rate": 7.3146575581158945e-06, "loss": 0.1717, "step": 7560 }, { "epoch": 0.5990097048920578, "grad_norm": 1.470844078119281, "learning_rate": 7.312185864806391e-06, "loss": 0.2169, "step": 7561 }, { "epoch": 0.5990889285006932, "grad_norm": 1.8732210895999644, "learning_rate": 7.309714348476513e-06, "loss": 0.2031, "step": 7562 }, { "epoch": 0.5991681521093286, "grad_norm": 1.4888413490990122, "learning_rate": 7.307243009289005e-06, "loss": 0.1519, "step": 7563 }, { "epoch": 0.5992473757179639, "grad_norm": 1.575642740569434, "learning_rate": 7.304771847406582e-06, "loss": 0.2201, "step": 7564 }, { "epoch": 0.5993265993265994, "grad_norm": 1.3800534248022087, "learning_rate": 7.3023008629919665e-06, "loss": 0.1727, "step": 7565 }, { "epoch": 0.5994058229352347, "grad_norm": 1.306370755599113, "learning_rate": 7.299830056207861e-06, "loss": 0.1615, "step": 7566 }, { "epoch": 0.59948504654387, "grad_norm": 1.3689165387000066, "learning_rate": 7.29735942721695e-06, "loss": 0.1735, "step": 7567 }, { "epoch": 0.5995642701525055, "grad_norm": 1.810228618296614, "learning_rate": 7.294888976181919e-06, "loss": 0.299, "step": 7568 }, { "epoch": 0.5996434937611408, "grad_norm": 1.7211881323383862, "learning_rate": 7.2924187032654335e-06, "loss": 0.222, "step": 7569 }, { "epoch": 0.5997227173697762, "grad_norm": 1.228565037460542, "learning_rate": 7.289948608630146e-06, "loss": 0.1868, "step": 7570 }, { "epoch": 0.5998019409784116, "grad_norm": 1.7974019125352265, "learning_rate": 7.287478692438705e-06, "loss": 0.2586, "step": 7571 }, { "epoch": 0.599881164587047, "grad_norm": 1.6644761638656067, "learning_rate": 7.285008954853739e-06, "loss": 0.2356, "step": 7572 }, { "epoch": 0.5999603881956823, "grad_norm": 1.871366287911412, "learning_rate": 7.282539396037868e-06, "loss": 0.2008, "step": 7573 }, { "epoch": 0.6000396118043176, "grad_norm": 1.4257322776430963, "learning_rate": 7.280070016153706e-06, "loss": 0.1895, "step": 7574 }, { "epoch": 0.6001188354129531, "grad_norm": 1.3397558590154806, "learning_rate": 7.277600815363842e-06, "loss": 0.1387, "step": 7575 }, { "epoch": 0.6001980590215884, "grad_norm": 1.6561727555965249, "learning_rate": 7.275131793830865e-06, "loss": 0.1888, "step": 7576 }, { "epoch": 0.6002772826302238, "grad_norm": 1.679878499688621, "learning_rate": 7.272662951717352e-06, "loss": 0.24, "step": 7577 }, { "epoch": 0.6003565062388592, "grad_norm": 1.3243416613694476, "learning_rate": 7.270194289185858e-06, "loss": 0.1599, "step": 7578 }, { "epoch": 0.6004357298474946, "grad_norm": 1.7022961454073975, "learning_rate": 7.267725806398936e-06, "loss": 0.2084, "step": 7579 }, { "epoch": 0.6005149534561299, "grad_norm": 1.7721215784350584, "learning_rate": 7.265257503519122e-06, "loss": 0.1778, "step": 7580 }, { "epoch": 0.6005941770647653, "grad_norm": 1.5639756605187733, "learning_rate": 7.262789380708942e-06, "loss": 0.2644, "step": 7581 }, { "epoch": 0.6006734006734007, "grad_norm": 1.3104953891714552, "learning_rate": 7.260321438130913e-06, "loss": 0.2004, "step": 7582 }, { "epoch": 0.600752624282036, "grad_norm": 1.3226126988818692, "learning_rate": 7.257853675947533e-06, "loss": 0.1721, "step": 7583 }, { "epoch": 0.6008318478906715, "grad_norm": 1.7804861638423175, "learning_rate": 7.255386094321293e-06, "loss": 0.2284, "step": 7584 }, { "epoch": 0.6009110714993068, "grad_norm": 1.2520062156070564, "learning_rate": 7.2529186934146756e-06, "loss": 0.1329, "step": 7585 }, { "epoch": 0.6009902951079422, "grad_norm": 1.6255730569270301, "learning_rate": 7.250451473390141e-06, "loss": 0.2111, "step": 7586 }, { "epoch": 0.6010695187165775, "grad_norm": 1.28845236684381, "learning_rate": 7.24798443441015e-06, "loss": 0.1875, "step": 7587 }, { "epoch": 0.6011487423252129, "grad_norm": 1.7687523318478398, "learning_rate": 7.24551757663714e-06, "loss": 0.175, "step": 7588 }, { "epoch": 0.6012279659338483, "grad_norm": 1.3541737313406095, "learning_rate": 7.2430509002335434e-06, "loss": 0.2053, "step": 7589 }, { "epoch": 0.6013071895424836, "grad_norm": 1.5810467398208936, "learning_rate": 7.240584405361781e-06, "loss": 0.2237, "step": 7590 }, { "epoch": 0.6013864131511191, "grad_norm": 1.477947454842436, "learning_rate": 7.238118092184256e-06, "loss": 0.1613, "step": 7591 }, { "epoch": 0.6014656367597544, "grad_norm": 1.7007612933797356, "learning_rate": 7.2356519608633665e-06, "loss": 0.2579, "step": 7592 }, { "epoch": 0.6015448603683898, "grad_norm": 1.702217802654443, "learning_rate": 7.233186011561498e-06, "loss": 0.3759, "step": 7593 }, { "epoch": 0.6016240839770252, "grad_norm": 1.5962176798448573, "learning_rate": 7.230720244441016e-06, "loss": 0.1757, "step": 7594 }, { "epoch": 0.6017033075856605, "grad_norm": 1.6546982883316201, "learning_rate": 7.228254659664278e-06, "loss": 0.2649, "step": 7595 }, { "epoch": 0.6017825311942959, "grad_norm": 1.3941126631045617, "learning_rate": 7.225789257393636e-06, "loss": 0.1673, "step": 7596 }, { "epoch": 0.6018617548029312, "grad_norm": 1.6387317230827676, "learning_rate": 7.223324037791421e-06, "loss": 0.1928, "step": 7597 }, { "epoch": 0.6019409784115667, "grad_norm": 1.9360923284859015, "learning_rate": 7.220859001019957e-06, "loss": 0.2389, "step": 7598 }, { "epoch": 0.602020202020202, "grad_norm": 1.6318450898092798, "learning_rate": 7.218394147241559e-06, "loss": 0.2211, "step": 7599 }, { "epoch": 0.6020994256288374, "grad_norm": 1.7303257492357589, "learning_rate": 7.2159294766185174e-06, "loss": 0.1931, "step": 7600 }, { "epoch": 0.6021786492374728, "grad_norm": 1.2552983658595818, "learning_rate": 7.213464989313126e-06, "loss": 0.2034, "step": 7601 }, { "epoch": 0.6022578728461081, "grad_norm": 1.4279397659427802, "learning_rate": 7.211000685487658e-06, "loss": 0.1985, "step": 7602 }, { "epoch": 0.6023370964547435, "grad_norm": 1.6865625890477975, "learning_rate": 7.208536565304374e-06, "loss": 0.2601, "step": 7603 }, { "epoch": 0.6024163200633789, "grad_norm": 1.4249315049439206, "learning_rate": 7.206072628925526e-06, "loss": 0.1831, "step": 7604 }, { "epoch": 0.6024955436720143, "grad_norm": 1.6655400910839726, "learning_rate": 7.203608876513351e-06, "loss": 0.1877, "step": 7605 }, { "epoch": 0.6025747672806496, "grad_norm": 1.6819901841355187, "learning_rate": 7.201145308230075e-06, "loss": 0.1418, "step": 7606 }, { "epoch": 0.602653990889285, "grad_norm": 1.3693835928356586, "learning_rate": 7.198681924237918e-06, "loss": 0.1452, "step": 7607 }, { "epoch": 0.6027332144979204, "grad_norm": 1.6954150141154036, "learning_rate": 7.196218724699072e-06, "loss": 0.2241, "step": 7608 }, { "epoch": 0.6028124381065557, "grad_norm": 1.4867464992020691, "learning_rate": 7.193755709775734e-06, "loss": 0.231, "step": 7609 }, { "epoch": 0.6028916617151912, "grad_norm": 1.6601739580110069, "learning_rate": 7.191292879630081e-06, "loss": 0.2537, "step": 7610 }, { "epoch": 0.6029708853238265, "grad_norm": 1.1866121143397077, "learning_rate": 7.188830234424275e-06, "loss": 0.1146, "step": 7611 }, { "epoch": 0.6030501089324619, "grad_norm": 1.743733347516833, "learning_rate": 7.186367774320474e-06, "loss": 0.2669, "step": 7612 }, { "epoch": 0.6031293325410972, "grad_norm": 1.8904863862344754, "learning_rate": 7.1839054994808145e-06, "loss": 0.3211, "step": 7613 }, { "epoch": 0.6032085561497326, "grad_norm": 1.4128432653830687, "learning_rate": 7.181443410067428e-06, "loss": 0.1389, "step": 7614 }, { "epoch": 0.603287779758368, "grad_norm": 1.3912848230152774, "learning_rate": 7.1789815062424325e-06, "loss": 0.1715, "step": 7615 }, { "epoch": 0.6033670033670033, "grad_norm": 1.6256105545046848, "learning_rate": 7.176519788167929e-06, "loss": 0.2261, "step": 7616 }, { "epoch": 0.6034462269756388, "grad_norm": 1.88972356241294, "learning_rate": 7.174058256006012e-06, "loss": 0.2462, "step": 7617 }, { "epoch": 0.6035254505842741, "grad_norm": 1.8384833083689796, "learning_rate": 7.171596909918763e-06, "loss": 0.2116, "step": 7618 }, { "epoch": 0.6036046741929095, "grad_norm": 1.707874816952542, "learning_rate": 7.169135750068247e-06, "loss": 0.2439, "step": 7619 }, { "epoch": 0.6036838978015449, "grad_norm": 1.5957493492893893, "learning_rate": 7.1666747766165226e-06, "loss": 0.2514, "step": 7620 }, { "epoch": 0.6037631214101802, "grad_norm": 1.4690219957079473, "learning_rate": 7.164213989725628e-06, "loss": 0.1721, "step": 7621 }, { "epoch": 0.6038423450188156, "grad_norm": 1.7962754932292309, "learning_rate": 7.1617533895575975e-06, "loss": 0.2104, "step": 7622 }, { "epoch": 0.6039215686274509, "grad_norm": 1.5232146411566614, "learning_rate": 7.1592929762744515e-06, "loss": 0.2063, "step": 7623 }, { "epoch": 0.6040007922360864, "grad_norm": 1.7222871363603482, "learning_rate": 7.156832750038192e-06, "loss": 0.2553, "step": 7624 }, { "epoch": 0.6040800158447217, "grad_norm": 1.525523311669195, "learning_rate": 7.154372711010815e-06, "loss": 0.234, "step": 7625 }, { "epoch": 0.6041592394533571, "grad_norm": 1.265709238587938, "learning_rate": 7.1519128593543065e-06, "loss": 0.1742, "step": 7626 }, { "epoch": 0.6042384630619925, "grad_norm": 1.3485829711943362, "learning_rate": 7.149453195230629e-06, "loss": 0.1773, "step": 7627 }, { "epoch": 0.6043176866706278, "grad_norm": 1.7329119017807024, "learning_rate": 7.1469937188017444e-06, "loss": 0.2286, "step": 7628 }, { "epoch": 0.6043969102792632, "grad_norm": 1.6193505478847148, "learning_rate": 7.144534430229595e-06, "loss": 0.2037, "step": 7629 }, { "epoch": 0.6044761338878986, "grad_norm": 1.5670374638429332, "learning_rate": 7.142075329676112e-06, "loss": 0.2326, "step": 7630 }, { "epoch": 0.604555357496534, "grad_norm": 1.564422532751707, "learning_rate": 7.139616417303221e-06, "loss": 0.2628, "step": 7631 }, { "epoch": 0.6046345811051693, "grad_norm": 1.8680598130350998, "learning_rate": 7.137157693272822e-06, "loss": 0.2541, "step": 7632 }, { "epoch": 0.6047138047138048, "grad_norm": 1.3916907195722301, "learning_rate": 7.1346991577468136e-06, "loss": 0.1987, "step": 7633 }, { "epoch": 0.6047930283224401, "grad_norm": 1.7075846033850193, "learning_rate": 7.132240810887083e-06, "loss": 0.2446, "step": 7634 }, { "epoch": 0.6048722519310754, "grad_norm": 1.6268395678137915, "learning_rate": 7.129782652855492e-06, "loss": 0.1631, "step": 7635 }, { "epoch": 0.6049514755397108, "grad_norm": 1.535001715235308, "learning_rate": 7.127324683813906e-06, "loss": 0.1956, "step": 7636 }, { "epoch": 0.6050306991483462, "grad_norm": 1.6055154731305206, "learning_rate": 7.124866903924164e-06, "loss": 0.2002, "step": 7637 }, { "epoch": 0.6051099227569816, "grad_norm": 1.5428118648356541, "learning_rate": 7.122409313348102e-06, "loss": 0.1383, "step": 7638 }, { "epoch": 0.6051891463656169, "grad_norm": 1.9405258789655566, "learning_rate": 7.119951912247545e-06, "loss": 0.2723, "step": 7639 }, { "epoch": 0.6052683699742524, "grad_norm": 1.5421867881043776, "learning_rate": 7.117494700784292e-06, "loss": 0.2086, "step": 7640 }, { "epoch": 0.6053475935828877, "grad_norm": 1.2954947964818078, "learning_rate": 7.115037679120147e-06, "loss": 0.1651, "step": 7641 }, { "epoch": 0.605426817191523, "grad_norm": 1.6585366235179484, "learning_rate": 7.112580847416886e-06, "loss": 0.2105, "step": 7642 }, { "epoch": 0.6055060408001585, "grad_norm": 1.652038128325132, "learning_rate": 7.110124205836283e-06, "loss": 0.2311, "step": 7643 }, { "epoch": 0.6055852644087938, "grad_norm": 1.2432018411473804, "learning_rate": 7.107667754540097e-06, "loss": 0.146, "step": 7644 }, { "epoch": 0.6056644880174292, "grad_norm": 1.4015713876787912, "learning_rate": 7.105211493690073e-06, "loss": 0.1666, "step": 7645 }, { "epoch": 0.6057437116260646, "grad_norm": 1.4908357237233425, "learning_rate": 7.102755423447941e-06, "loss": 0.2707, "step": 7646 }, { "epoch": 0.6058229352347, "grad_norm": 1.7435092907695804, "learning_rate": 7.100299543975426e-06, "loss": 0.234, "step": 7647 }, { "epoch": 0.6059021588433353, "grad_norm": 1.6902016222644076, "learning_rate": 7.097843855434232e-06, "loss": 0.1954, "step": 7648 }, { "epoch": 0.6059813824519706, "grad_norm": 1.2524984582562644, "learning_rate": 7.09538835798606e-06, "loss": 0.1754, "step": 7649 }, { "epoch": 0.6060606060606061, "grad_norm": 1.829532888756569, "learning_rate": 7.092933051792583e-06, "loss": 0.2642, "step": 7650 }, { "epoch": 0.6061398296692414, "grad_norm": 1.8607923212682247, "learning_rate": 7.090477937015479e-06, "loss": 0.2222, "step": 7651 }, { "epoch": 0.6062190532778768, "grad_norm": 1.6300736121866581, "learning_rate": 7.088023013816403e-06, "loss": 0.1911, "step": 7652 }, { "epoch": 0.6062982768865122, "grad_norm": 1.5452479706372202, "learning_rate": 7.085568282357e-06, "loss": 0.1262, "step": 7653 }, { "epoch": 0.6063775004951476, "grad_norm": 1.2677158022075257, "learning_rate": 7.083113742798901e-06, "loss": 0.1722, "step": 7654 }, { "epoch": 0.6064567241037829, "grad_norm": 1.5793223069701792, "learning_rate": 7.080659395303729e-06, "loss": 0.1809, "step": 7655 }, { "epoch": 0.6065359477124183, "grad_norm": 1.7591775682165742, "learning_rate": 7.078205240033087e-06, "loss": 0.289, "step": 7656 }, { "epoch": 0.6066151713210537, "grad_norm": 1.693679217202281, "learning_rate": 7.075751277148574e-06, "loss": 0.2241, "step": 7657 }, { "epoch": 0.606694394929689, "grad_norm": 1.5230415465470217, "learning_rate": 7.073297506811766e-06, "loss": 0.1873, "step": 7658 }, { "epoch": 0.6067736185383245, "grad_norm": 1.8421205282833288, "learning_rate": 7.0708439291842345e-06, "loss": 0.2423, "step": 7659 }, { "epoch": 0.6068528421469598, "grad_norm": 1.9853180657405853, "learning_rate": 7.068390544427539e-06, "loss": 0.2465, "step": 7660 }, { "epoch": 0.6069320657555952, "grad_norm": 1.2262155409784774, "learning_rate": 7.065937352703218e-06, "loss": 0.1266, "step": 7661 }, { "epoch": 0.6070112893642305, "grad_norm": 1.7988475703684603, "learning_rate": 7.063484354172804e-06, "loss": 0.2253, "step": 7662 }, { "epoch": 0.6070905129728659, "grad_norm": 1.5244720804925753, "learning_rate": 7.061031548997818e-06, "loss": 0.1989, "step": 7663 }, { "epoch": 0.6071697365815013, "grad_norm": 1.859338204933103, "learning_rate": 7.058578937339759e-06, "loss": 0.2297, "step": 7664 }, { "epoch": 0.6072489601901366, "grad_norm": 1.1953807654098614, "learning_rate": 7.056126519360129e-06, "loss": 0.1569, "step": 7665 }, { "epoch": 0.6073281837987721, "grad_norm": 1.4569953666997117, "learning_rate": 7.053674295220399e-06, "loss": 0.2211, "step": 7666 }, { "epoch": 0.6074074074074074, "grad_norm": 1.2131943333860487, "learning_rate": 7.05122226508204e-06, "loss": 0.1547, "step": 7667 }, { "epoch": 0.6074866310160428, "grad_norm": 1.8784771341243491, "learning_rate": 7.048770429106509e-06, "loss": 0.1855, "step": 7668 }, { "epoch": 0.6075658546246782, "grad_norm": 1.5410335799529344, "learning_rate": 7.0463187874552415e-06, "loss": 0.1515, "step": 7669 }, { "epoch": 0.6076450782333135, "grad_norm": 2.0534669847913, "learning_rate": 7.043867340289672e-06, "loss": 0.2554, "step": 7670 }, { "epoch": 0.6077243018419489, "grad_norm": 1.6593503718491958, "learning_rate": 7.0414160877712155e-06, "loss": 0.2313, "step": 7671 }, { "epoch": 0.6078035254505842, "grad_norm": 1.648280044604803, "learning_rate": 7.038965030061273e-06, "loss": 0.2631, "step": 7672 }, { "epoch": 0.6078827490592197, "grad_norm": 1.5558673075659173, "learning_rate": 7.0365141673212336e-06, "loss": 0.2418, "step": 7673 }, { "epoch": 0.607961972667855, "grad_norm": 1.4441030087286562, "learning_rate": 7.034063499712479e-06, "loss": 0.1983, "step": 7674 }, { "epoch": 0.6080411962764904, "grad_norm": 1.4971161597896698, "learning_rate": 7.031613027396369e-06, "loss": 0.219, "step": 7675 }, { "epoch": 0.6081204198851258, "grad_norm": 1.375924989171503, "learning_rate": 7.029162750534259e-06, "loss": 0.1398, "step": 7676 }, { "epoch": 0.6081996434937611, "grad_norm": 1.5114631984579001, "learning_rate": 7.02671266928749e-06, "loss": 0.2326, "step": 7677 }, { "epoch": 0.6082788671023965, "grad_norm": 1.663758060656713, "learning_rate": 7.024262783817382e-06, "loss": 0.2109, "step": 7678 }, { "epoch": 0.6083580907110319, "grad_norm": 1.1813630021656605, "learning_rate": 7.02181309428525e-06, "loss": 0.1507, "step": 7679 }, { "epoch": 0.6084373143196673, "grad_norm": 1.7339369630515733, "learning_rate": 7.0193636008524e-06, "loss": 0.1457, "step": 7680 }, { "epoch": 0.6085165379283026, "grad_norm": 1.6476003094931166, "learning_rate": 7.016914303680111e-06, "loss": 0.2583, "step": 7681 }, { "epoch": 0.6085957615369381, "grad_norm": 1.5549549286706958, "learning_rate": 7.014465202929665e-06, "loss": 0.2171, "step": 7682 }, { "epoch": 0.6086749851455734, "grad_norm": 1.762090939941784, "learning_rate": 7.012016298762317e-06, "loss": 0.1781, "step": 7683 }, { "epoch": 0.6087542087542087, "grad_norm": 2.113377464183182, "learning_rate": 7.009567591339319e-06, "loss": 0.1996, "step": 7684 }, { "epoch": 0.6088334323628442, "grad_norm": 1.4403072082298425, "learning_rate": 7.007119080821908e-06, "loss": 0.1582, "step": 7685 }, { "epoch": 0.6089126559714795, "grad_norm": 1.3993503290713227, "learning_rate": 7.004670767371302e-06, "loss": 0.1964, "step": 7686 }, { "epoch": 0.6089918795801149, "grad_norm": 1.794888262809066, "learning_rate": 7.002222651148714e-06, "loss": 0.2128, "step": 7687 }, { "epoch": 0.6090711031887502, "grad_norm": 1.961321137280887, "learning_rate": 6.999774732315343e-06, "loss": 0.1983, "step": 7688 }, { "epoch": 0.6091503267973856, "grad_norm": 1.522627209550531, "learning_rate": 6.9973270110323666e-06, "loss": 0.2549, "step": 7689 }, { "epoch": 0.609229550406021, "grad_norm": 1.7873904756391688, "learning_rate": 6.994879487460961e-06, "loss": 0.1871, "step": 7690 }, { "epoch": 0.6093087740146563, "grad_norm": 1.4441905795424799, "learning_rate": 6.992432161762278e-06, "loss": 0.1475, "step": 7691 }, { "epoch": 0.6093879976232918, "grad_norm": 1.748925683680036, "learning_rate": 6.989985034097466e-06, "loss": 0.1901, "step": 7692 }, { "epoch": 0.6094672212319271, "grad_norm": 1.34826325200304, "learning_rate": 6.9875381046276605e-06, "loss": 0.1818, "step": 7693 }, { "epoch": 0.6095464448405625, "grad_norm": 1.5428512100267902, "learning_rate": 6.985091373513972e-06, "loss": 0.2029, "step": 7694 }, { "epoch": 0.6096256684491979, "grad_norm": 1.8332183640525483, "learning_rate": 6.982644840917509e-06, "loss": 0.2164, "step": 7695 }, { "epoch": 0.6097048920578332, "grad_norm": 2.0335466422565385, "learning_rate": 6.980198506999368e-06, "loss": 0.222, "step": 7696 }, { "epoch": 0.6097841156664686, "grad_norm": 1.5290000515964255, "learning_rate": 6.977752371920623e-06, "loss": 0.218, "step": 7697 }, { "epoch": 0.6098633392751039, "grad_norm": 1.4460977793360785, "learning_rate": 6.975306435842344e-06, "loss": 0.1989, "step": 7698 }, { "epoch": 0.6099425628837394, "grad_norm": 1.6477341896030866, "learning_rate": 6.97286069892558e-06, "loss": 0.265, "step": 7699 }, { "epoch": 0.6100217864923747, "grad_norm": 1.8500834955056498, "learning_rate": 6.970415161331373e-06, "loss": 0.2353, "step": 7700 }, { "epoch": 0.6101010101010101, "grad_norm": 1.7667719491407283, "learning_rate": 6.967969823220752e-06, "loss": 0.2656, "step": 7701 }, { "epoch": 0.6101802337096455, "grad_norm": 1.4895866644490012, "learning_rate": 6.965524684754729e-06, "loss": 0.1598, "step": 7702 }, { "epoch": 0.6102594573182808, "grad_norm": 1.544962785482416, "learning_rate": 6.963079746094302e-06, "loss": 0.2379, "step": 7703 }, { "epoch": 0.6103386809269162, "grad_norm": 1.2851613208771364, "learning_rate": 6.960635007400465e-06, "loss": 0.131, "step": 7704 }, { "epoch": 0.6104179045355516, "grad_norm": 1.108919454077273, "learning_rate": 6.9581904688341854e-06, "loss": 0.1359, "step": 7705 }, { "epoch": 0.610497128144187, "grad_norm": 1.5599839118979857, "learning_rate": 6.955746130556429e-06, "loss": 0.239, "step": 7706 }, { "epoch": 0.6105763517528223, "grad_norm": 1.579605500268842, "learning_rate": 6.95330199272814e-06, "loss": 0.2092, "step": 7707 }, { "epoch": 0.6106555753614578, "grad_norm": 1.6134924206653056, "learning_rate": 6.950858055510254e-06, "loss": 0.2149, "step": 7708 }, { "epoch": 0.6107347989700931, "grad_norm": 1.688202024288464, "learning_rate": 6.948414319063696e-06, "loss": 0.1677, "step": 7709 }, { "epoch": 0.6108140225787284, "grad_norm": 1.1859036459295644, "learning_rate": 6.945970783549372e-06, "loss": 0.1573, "step": 7710 }, { "epoch": 0.6108932461873638, "grad_norm": 1.6004186265449694, "learning_rate": 6.943527449128174e-06, "loss": 0.1525, "step": 7711 }, { "epoch": 0.6109724697959992, "grad_norm": 1.5961064999682426, "learning_rate": 6.9410843159609905e-06, "loss": 0.2269, "step": 7712 }, { "epoch": 0.6110516934046346, "grad_norm": 1.7675686310963605, "learning_rate": 6.9386413842086845e-06, "loss": 0.2737, "step": 7713 }, { "epoch": 0.6111309170132699, "grad_norm": 1.4223694711734998, "learning_rate": 6.936198654032114e-06, "loss": 0.2081, "step": 7714 }, { "epoch": 0.6112101406219054, "grad_norm": 1.2302534938709742, "learning_rate": 6.933756125592117e-06, "loss": 0.1603, "step": 7715 }, { "epoch": 0.6112893642305407, "grad_norm": 1.401804184482667, "learning_rate": 6.931313799049526e-06, "loss": 0.1821, "step": 7716 }, { "epoch": 0.611368587839176, "grad_norm": 1.436751158303, "learning_rate": 6.928871674565158e-06, "loss": 0.167, "step": 7717 }, { "epoch": 0.6114478114478115, "grad_norm": 1.5394257648501508, "learning_rate": 6.926429752299812e-06, "loss": 0.2342, "step": 7718 }, { "epoch": 0.6115270350564468, "grad_norm": 1.845597553088207, "learning_rate": 6.923988032414277e-06, "loss": 0.2925, "step": 7719 }, { "epoch": 0.6116062586650822, "grad_norm": 1.334730553246682, "learning_rate": 6.9215465150693305e-06, "loss": 0.1611, "step": 7720 }, { "epoch": 0.6116854822737176, "grad_norm": 1.7648865297550416, "learning_rate": 6.919105200425733e-06, "loss": 0.2268, "step": 7721 }, { "epoch": 0.611764705882353, "grad_norm": 1.7981697561396313, "learning_rate": 6.916664088644234e-06, "loss": 0.2298, "step": 7722 }, { "epoch": 0.6118439294909883, "grad_norm": 1.6566984573709413, "learning_rate": 6.914223179885567e-06, "loss": 0.1867, "step": 7723 }, { "epoch": 0.6119231530996236, "grad_norm": 1.5154254423614493, "learning_rate": 6.911782474310456e-06, "loss": 0.1598, "step": 7724 }, { "epoch": 0.6120023767082591, "grad_norm": 1.708572628079177, "learning_rate": 6.909341972079613e-06, "loss": 0.259, "step": 7725 }, { "epoch": 0.6120816003168944, "grad_norm": 1.3275729146856206, "learning_rate": 6.9069016733537255e-06, "loss": 0.1838, "step": 7726 }, { "epoch": 0.6121608239255298, "grad_norm": 1.6814734664527193, "learning_rate": 6.904461578293483e-06, "loss": 0.1953, "step": 7727 }, { "epoch": 0.6122400475341652, "grad_norm": 1.7965669989457957, "learning_rate": 6.902021687059549e-06, "loss": 0.2683, "step": 7728 }, { "epoch": 0.6123192711428006, "grad_norm": 1.8788348195847642, "learning_rate": 6.89958199981258e-06, "loss": 0.287, "step": 7729 }, { "epoch": 0.6123984947514359, "grad_norm": 2.0586221880761424, "learning_rate": 6.89714251671322e-06, "loss": 0.2441, "step": 7730 }, { "epoch": 0.6124777183600713, "grad_norm": 1.339751772977914, "learning_rate": 6.894703237922094e-06, "loss": 0.1822, "step": 7731 }, { "epoch": 0.6125569419687067, "grad_norm": 2.0740174138075433, "learning_rate": 6.892264163599817e-06, "loss": 0.2834, "step": 7732 }, { "epoch": 0.612636165577342, "grad_norm": 1.5504725615822852, "learning_rate": 6.889825293906993e-06, "loss": 0.1994, "step": 7733 }, { "epoch": 0.6127153891859775, "grad_norm": 1.5590286484174887, "learning_rate": 6.887386629004207e-06, "loss": 0.2114, "step": 7734 }, { "epoch": 0.6127946127946128, "grad_norm": 1.5428772940580473, "learning_rate": 6.884948169052037e-06, "loss": 0.1975, "step": 7735 }, { "epoch": 0.6128738364032482, "grad_norm": 2.4669080910655863, "learning_rate": 6.88250991421104e-06, "loss": 0.2354, "step": 7736 }, { "epoch": 0.6129530600118835, "grad_norm": 1.545272880772027, "learning_rate": 6.880071864641762e-06, "loss": 0.1999, "step": 7737 }, { "epoch": 0.6130322836205189, "grad_norm": 1.298425861912459, "learning_rate": 6.8776340205047446e-06, "loss": 0.2083, "step": 7738 }, { "epoch": 0.6131115072291543, "grad_norm": 1.4435926098110319, "learning_rate": 6.875196381960498e-06, "loss": 0.1994, "step": 7739 }, { "epoch": 0.6131907308377896, "grad_norm": 1.1964840107975034, "learning_rate": 6.872758949169536e-06, "loss": 0.1677, "step": 7740 }, { "epoch": 0.6132699544464251, "grad_norm": 1.366476003600934, "learning_rate": 6.8703217222923525e-06, "loss": 0.2017, "step": 7741 }, { "epoch": 0.6133491780550604, "grad_norm": 1.1843385763406453, "learning_rate": 6.867884701489421e-06, "loss": 0.1219, "step": 7742 }, { "epoch": 0.6134284016636958, "grad_norm": 1.2974364316455382, "learning_rate": 6.865447886921215e-06, "loss": 0.0953, "step": 7743 }, { "epoch": 0.6135076252723312, "grad_norm": 1.7274479229808215, "learning_rate": 6.86301127874818e-06, "loss": 0.2562, "step": 7744 }, { "epoch": 0.6135868488809665, "grad_norm": 1.5496842102806074, "learning_rate": 6.860574877130757e-06, "loss": 0.2233, "step": 7745 }, { "epoch": 0.6136660724896019, "grad_norm": 1.467108760413429, "learning_rate": 6.8581386822293765e-06, "loss": 0.1905, "step": 7746 }, { "epoch": 0.6137452960982372, "grad_norm": 2.0026071055842762, "learning_rate": 6.8557026942044425e-06, "loss": 0.3157, "step": 7747 }, { "epoch": 0.6138245197068727, "grad_norm": 1.1907568652405425, "learning_rate": 6.853266913216357e-06, "loss": 0.1012, "step": 7748 }, { "epoch": 0.613903743315508, "grad_norm": 1.4096046627866865, "learning_rate": 6.850831339425508e-06, "loss": 0.1969, "step": 7749 }, { "epoch": 0.6139829669241434, "grad_norm": 1.4932945929157266, "learning_rate": 6.848395972992261e-06, "loss": 0.2058, "step": 7750 }, { "epoch": 0.6140621905327788, "grad_norm": 1.5450903853733862, "learning_rate": 6.845960814076973e-06, "loss": 0.2726, "step": 7751 }, { "epoch": 0.6141414141414141, "grad_norm": 1.582389236581247, "learning_rate": 6.8435258628399905e-06, "loss": 0.1955, "step": 7752 }, { "epoch": 0.6142206377500495, "grad_norm": 1.4360353473784515, "learning_rate": 6.841091119441639e-06, "loss": 0.2029, "step": 7753 }, { "epoch": 0.6142998613586849, "grad_norm": 1.4553466143703913, "learning_rate": 6.8386565840422385e-06, "loss": 0.2256, "step": 7754 }, { "epoch": 0.6143790849673203, "grad_norm": 1.4443511229085004, "learning_rate": 6.836222256802093e-06, "loss": 0.174, "step": 7755 }, { "epoch": 0.6144583085759556, "grad_norm": 1.5692939212542871, "learning_rate": 6.833788137881486e-06, "loss": 0.2543, "step": 7756 }, { "epoch": 0.6145375321845911, "grad_norm": 1.470408084499561, "learning_rate": 6.8313542274406964e-06, "loss": 0.1969, "step": 7757 }, { "epoch": 0.6146167557932264, "grad_norm": 1.049736007709788, "learning_rate": 6.828920525639985e-06, "loss": 0.1592, "step": 7758 }, { "epoch": 0.6146959794018617, "grad_norm": 1.2899274194319184, "learning_rate": 6.826487032639597e-06, "loss": 0.1583, "step": 7759 }, { "epoch": 0.6147752030104972, "grad_norm": 1.6403175528594522, "learning_rate": 6.8240537485997704e-06, "loss": 0.2151, "step": 7760 }, { "epoch": 0.6148544266191325, "grad_norm": 1.712396712258188, "learning_rate": 6.821620673680721e-06, "loss": 0.2516, "step": 7761 }, { "epoch": 0.6149336502277679, "grad_norm": 1.6236426175346228, "learning_rate": 6.819187808042656e-06, "loss": 0.2148, "step": 7762 }, { "epoch": 0.6150128738364032, "grad_norm": 1.6843082427058818, "learning_rate": 6.816755151845771e-06, "loss": 0.2313, "step": 7763 }, { "epoch": 0.6150920974450387, "grad_norm": 1.7917335483741674, "learning_rate": 6.814322705250241e-06, "loss": 0.2481, "step": 7764 }, { "epoch": 0.615171321053674, "grad_norm": 1.947116269951721, "learning_rate": 6.8118904684162325e-06, "loss": 0.3276, "step": 7765 }, { "epoch": 0.6152505446623093, "grad_norm": 1.695559285051137, "learning_rate": 6.8094584415038975e-06, "loss": 0.2957, "step": 7766 }, { "epoch": 0.6153297682709448, "grad_norm": 1.525461372475352, "learning_rate": 6.807026624673372e-06, "loss": 0.1682, "step": 7767 }, { "epoch": 0.6154089918795801, "grad_norm": 1.2115235720647932, "learning_rate": 6.80459501808478e-06, "loss": 0.1593, "step": 7768 }, { "epoch": 0.6154882154882155, "grad_norm": 1.4850956226928616, "learning_rate": 6.8021636218982275e-06, "loss": 0.1378, "step": 7769 }, { "epoch": 0.6155674390968509, "grad_norm": 1.7707901458498578, "learning_rate": 6.799732436273816e-06, "loss": 0.1713, "step": 7770 }, { "epoch": 0.6156466627054862, "grad_norm": 1.3081373022727951, "learning_rate": 6.797301461371626e-06, "loss": 0.1538, "step": 7771 }, { "epoch": 0.6157258863141216, "grad_norm": 1.519540473896888, "learning_rate": 6.7948706973517235e-06, "loss": 0.1993, "step": 7772 }, { "epoch": 0.6158051099227569, "grad_norm": 1.5413612759578315, "learning_rate": 6.792440144374162e-06, "loss": 0.2383, "step": 7773 }, { "epoch": 0.6158843335313924, "grad_norm": 1.2274614108806103, "learning_rate": 6.790009802598984e-06, "loss": 0.1618, "step": 7774 }, { "epoch": 0.6159635571400277, "grad_norm": 1.7465765715264476, "learning_rate": 6.787579672186215e-06, "loss": 0.244, "step": 7775 }, { "epoch": 0.6160427807486631, "grad_norm": 1.1561774305577743, "learning_rate": 6.78514975329587e-06, "loss": 0.1273, "step": 7776 }, { "epoch": 0.6161220043572985, "grad_norm": 1.4510180561565333, "learning_rate": 6.78272004608794e-06, "loss": 0.1678, "step": 7777 }, { "epoch": 0.6162012279659338, "grad_norm": 1.77449949108753, "learning_rate": 6.780290550722417e-06, "loss": 0.3752, "step": 7778 }, { "epoch": 0.6162804515745692, "grad_norm": 1.2822063468207703, "learning_rate": 6.777861267359272e-06, "loss": 0.1562, "step": 7779 }, { "epoch": 0.6163596751832046, "grad_norm": 2.024009239336556, "learning_rate": 6.7754321961584535e-06, "loss": 0.1841, "step": 7780 }, { "epoch": 0.61643889879184, "grad_norm": 1.3148775566139552, "learning_rate": 6.773003337279911e-06, "loss": 0.1429, "step": 7781 }, { "epoch": 0.6165181224004753, "grad_norm": 1.5292328856004207, "learning_rate": 6.7705746908835734e-06, "loss": 0.2279, "step": 7782 }, { "epoch": 0.6165973460091108, "grad_norm": 1.3347754511220324, "learning_rate": 6.768146257129351e-06, "loss": 0.1628, "step": 7783 }, { "epoch": 0.6166765696177461, "grad_norm": 1.8389521658442611, "learning_rate": 6.765718036177148e-06, "loss": 0.2983, "step": 7784 }, { "epoch": 0.6167557932263814, "grad_norm": 1.5770412851955178, "learning_rate": 6.763290028186849e-06, "loss": 0.2232, "step": 7785 }, { "epoch": 0.6168350168350168, "grad_norm": 1.7306297190951418, "learning_rate": 6.760862233318327e-06, "loss": 0.2333, "step": 7786 }, { "epoch": 0.6169142404436522, "grad_norm": 1.9176375084991417, "learning_rate": 6.758434651731445e-06, "loss": 0.2297, "step": 7787 }, { "epoch": 0.6169934640522876, "grad_norm": 1.815241484907113, "learning_rate": 6.756007283586039e-06, "loss": 0.1927, "step": 7788 }, { "epoch": 0.6170726876609229, "grad_norm": 1.4777259085232395, "learning_rate": 6.753580129041945e-06, "loss": 0.2125, "step": 7789 }, { "epoch": 0.6171519112695584, "grad_norm": 1.492731248945997, "learning_rate": 6.751153188258983e-06, "loss": 0.2192, "step": 7790 }, { "epoch": 0.6172311348781937, "grad_norm": 2.0077460775507228, "learning_rate": 6.748726461396946e-06, "loss": 0.1549, "step": 7791 }, { "epoch": 0.617310358486829, "grad_norm": 1.261488324419248, "learning_rate": 6.7462999486156315e-06, "loss": 0.1587, "step": 7792 }, { "epoch": 0.6173895820954645, "grad_norm": 1.2649155249767312, "learning_rate": 6.743873650074807e-06, "loss": 0.1577, "step": 7793 }, { "epoch": 0.6174688057040998, "grad_norm": 1.5651750934289832, "learning_rate": 6.741447565934236e-06, "loss": 0.208, "step": 7794 }, { "epoch": 0.6175480293127352, "grad_norm": 1.2790467364672031, "learning_rate": 6.739021696353665e-06, "loss": 0.1304, "step": 7795 }, { "epoch": 0.6176272529213706, "grad_norm": 1.9962154254589741, "learning_rate": 6.736596041492821e-06, "loss": 0.2469, "step": 7796 }, { "epoch": 0.617706476530006, "grad_norm": 1.8078689585872731, "learning_rate": 6.734170601511427e-06, "loss": 0.2865, "step": 7797 }, { "epoch": 0.6177857001386413, "grad_norm": 1.4626140932035738, "learning_rate": 6.7317453765691855e-06, "loss": 0.1615, "step": 7798 }, { "epoch": 0.6178649237472766, "grad_norm": 1.160603390317872, "learning_rate": 6.729320366825785e-06, "loss": 0.1276, "step": 7799 }, { "epoch": 0.6179441473559121, "grad_norm": 1.4890843096656736, "learning_rate": 6.726895572440901e-06, "loss": 0.2062, "step": 7800 }, { "epoch": 0.6180233709645474, "grad_norm": 2.058948624716214, "learning_rate": 6.7244709935741925e-06, "loss": 0.243, "step": 7801 }, { "epoch": 0.6181025945731828, "grad_norm": 1.8117926833993583, "learning_rate": 6.722046630385309e-06, "loss": 0.2471, "step": 7802 }, { "epoch": 0.6181818181818182, "grad_norm": 1.7202056452665793, "learning_rate": 6.719622483033883e-06, "loss": 0.2233, "step": 7803 }, { "epoch": 0.6182610417904536, "grad_norm": 1.894869509695651, "learning_rate": 6.7171985516795315e-06, "loss": 0.1641, "step": 7804 }, { "epoch": 0.6183402653990889, "grad_norm": 1.6469666587090124, "learning_rate": 6.714774836481862e-06, "loss": 0.2267, "step": 7805 }, { "epoch": 0.6184194890077243, "grad_norm": 1.554052023337749, "learning_rate": 6.71235133760046e-06, "loss": 0.1635, "step": 7806 }, { "epoch": 0.6184987126163597, "grad_norm": 1.4420637540528445, "learning_rate": 6.709928055194902e-06, "loss": 0.1475, "step": 7807 }, { "epoch": 0.618577936224995, "grad_norm": 1.467254789872434, "learning_rate": 6.707504989424753e-06, "loss": 0.2286, "step": 7808 }, { "epoch": 0.6186571598336305, "grad_norm": 1.393884241931426, "learning_rate": 6.705082140449557e-06, "loss": 0.1594, "step": 7809 }, { "epoch": 0.6187363834422658, "grad_norm": 1.7762687766445593, "learning_rate": 6.702659508428847e-06, "loss": 0.214, "step": 7810 }, { "epoch": 0.6188156070509012, "grad_norm": 1.552419636603836, "learning_rate": 6.7002370935221454e-06, "loss": 0.2276, "step": 7811 }, { "epoch": 0.6188948306595365, "grad_norm": 1.377759892930884, "learning_rate": 6.697814895888951e-06, "loss": 0.1766, "step": 7812 }, { "epoch": 0.6189740542681719, "grad_norm": 1.6026921271772006, "learning_rate": 6.695392915688759e-06, "loss": 0.1879, "step": 7813 }, { "epoch": 0.6190532778768073, "grad_norm": 1.7550905475913694, "learning_rate": 6.692971153081041e-06, "loss": 0.2527, "step": 7814 }, { "epoch": 0.6191325014854426, "grad_norm": 1.436363118299248, "learning_rate": 6.690549608225258e-06, "loss": 0.166, "step": 7815 }, { "epoch": 0.6192117250940781, "grad_norm": 1.4071843002807982, "learning_rate": 6.688128281280863e-06, "loss": 0.1752, "step": 7816 }, { "epoch": 0.6192909487027134, "grad_norm": 1.3391895742895448, "learning_rate": 6.685707172407284e-06, "loss": 0.1536, "step": 7817 }, { "epoch": 0.6193701723113488, "grad_norm": 2.1293464971881866, "learning_rate": 6.683286281763939e-06, "loss": 0.1827, "step": 7818 }, { "epoch": 0.6194493959199842, "grad_norm": 1.396718845030991, "learning_rate": 6.6808656095102365e-06, "loss": 0.1559, "step": 7819 }, { "epoch": 0.6195286195286195, "grad_norm": 1.2961154980735496, "learning_rate": 6.6784451558055596e-06, "loss": 0.1639, "step": 7820 }, { "epoch": 0.6196078431372549, "grad_norm": 1.7605337476373997, "learning_rate": 6.67602492080929e-06, "loss": 0.1778, "step": 7821 }, { "epoch": 0.6196870667458902, "grad_norm": 1.7126802352449526, "learning_rate": 6.6736049046807815e-06, "loss": 0.1984, "step": 7822 }, { "epoch": 0.6197662903545257, "grad_norm": 1.3718199537462228, "learning_rate": 6.671185107579387e-06, "loss": 0.1321, "step": 7823 }, { "epoch": 0.619845513963161, "grad_norm": 1.3652329684378686, "learning_rate": 6.668765529664436e-06, "loss": 0.0989, "step": 7824 }, { "epoch": 0.6199247375717964, "grad_norm": 1.4527634699668837, "learning_rate": 6.6663461710952445e-06, "loss": 0.1805, "step": 7825 }, { "epoch": 0.6200039611804318, "grad_norm": 1.573924393543757, "learning_rate": 6.663927032031118e-06, "loss": 0.2202, "step": 7826 }, { "epoch": 0.6200831847890671, "grad_norm": 1.9794009597699758, "learning_rate": 6.661508112631347e-06, "loss": 0.191, "step": 7827 }, { "epoch": 0.6201624083977025, "grad_norm": 2.02077421575022, "learning_rate": 6.659089413055202e-06, "loss": 0.2438, "step": 7828 }, { "epoch": 0.6202416320063379, "grad_norm": 1.6340115751289448, "learning_rate": 6.656670933461942e-06, "loss": 0.2239, "step": 7829 }, { "epoch": 0.6203208556149733, "grad_norm": 1.303156684618766, "learning_rate": 6.654252674010815e-06, "loss": 0.1333, "step": 7830 }, { "epoch": 0.6204000792236086, "grad_norm": 1.395190634999042, "learning_rate": 6.6518346348610484e-06, "loss": 0.1337, "step": 7831 }, { "epoch": 0.6204793028322441, "grad_norm": 1.3471323328538636, "learning_rate": 6.649416816171861e-06, "loss": 0.193, "step": 7832 }, { "epoch": 0.6205585264408794, "grad_norm": 1.7693470753972365, "learning_rate": 6.646999218102457e-06, "loss": 0.1921, "step": 7833 }, { "epoch": 0.6206377500495147, "grad_norm": 2.006796199403565, "learning_rate": 6.644581840812019e-06, "loss": 0.2262, "step": 7834 }, { "epoch": 0.6207169736581502, "grad_norm": 1.753958188899018, "learning_rate": 6.64216468445972e-06, "loss": 0.2657, "step": 7835 }, { "epoch": 0.6207961972667855, "grad_norm": 1.633385184679226, "learning_rate": 6.639747749204723e-06, "loss": 0.2329, "step": 7836 }, { "epoch": 0.6208754208754209, "grad_norm": 1.4922652045363833, "learning_rate": 6.637331035206166e-06, "loss": 0.1316, "step": 7837 }, { "epoch": 0.6209546444840562, "grad_norm": 1.783399984258043, "learning_rate": 6.634914542623182e-06, "loss": 0.2565, "step": 7838 }, { "epoch": 0.6210338680926917, "grad_norm": 1.484108623254551, "learning_rate": 6.632498271614882e-06, "loss": 0.2606, "step": 7839 }, { "epoch": 0.621113091701327, "grad_norm": 1.5065649642025296, "learning_rate": 6.630082222340366e-06, "loss": 0.2385, "step": 7840 }, { "epoch": 0.6211923153099623, "grad_norm": 1.7025695628454245, "learning_rate": 6.627666394958725e-06, "loss": 0.2565, "step": 7841 }, { "epoch": 0.6212715389185978, "grad_norm": 2.40305701513319, "learning_rate": 6.625250789629021e-06, "loss": 0.2346, "step": 7842 }, { "epoch": 0.6213507625272331, "grad_norm": 1.3311065730431535, "learning_rate": 6.622835406510315e-06, "loss": 0.1648, "step": 7843 }, { "epoch": 0.6214299861358685, "grad_norm": 1.5544699034926304, "learning_rate": 6.620420245761651e-06, "loss": 0.1878, "step": 7844 }, { "epoch": 0.6215092097445039, "grad_norm": 1.4568278149859846, "learning_rate": 6.6180053075420484e-06, "loss": 0.1697, "step": 7845 }, { "epoch": 0.6215884333531392, "grad_norm": 1.7742298442586866, "learning_rate": 6.615590592010526e-06, "loss": 0.2611, "step": 7846 }, { "epoch": 0.6216676569617746, "grad_norm": 2.6871618122073873, "learning_rate": 6.613176099326077e-06, "loss": 0.2244, "step": 7847 }, { "epoch": 0.6217468805704099, "grad_norm": 1.2969903310902295, "learning_rate": 6.610761829647685e-06, "loss": 0.1597, "step": 7848 }, { "epoch": 0.6218261041790454, "grad_norm": 1.4145144500394258, "learning_rate": 6.608347783134319e-06, "loss": 0.2058, "step": 7849 }, { "epoch": 0.6219053277876807, "grad_norm": 1.4969661273895694, "learning_rate": 6.605933959944933e-06, "loss": 0.2164, "step": 7850 }, { "epoch": 0.6219845513963161, "grad_norm": 1.3614776938651494, "learning_rate": 6.603520360238462e-06, "loss": 0.1955, "step": 7851 }, { "epoch": 0.6220637750049515, "grad_norm": 1.8176205987293823, "learning_rate": 6.601106984173835e-06, "loss": 0.2122, "step": 7852 }, { "epoch": 0.6221429986135868, "grad_norm": 1.8053032324702936, "learning_rate": 6.598693831909957e-06, "loss": 0.2071, "step": 7853 }, { "epoch": 0.6222222222222222, "grad_norm": 1.6599932776281034, "learning_rate": 6.596280903605725e-06, "loss": 0.2571, "step": 7854 }, { "epoch": 0.6223014458308576, "grad_norm": 1.4472467412106185, "learning_rate": 6.593868199420017e-06, "loss": 0.212, "step": 7855 }, { "epoch": 0.622380669439493, "grad_norm": 1.5127955763863512, "learning_rate": 6.591455719511699e-06, "loss": 0.2028, "step": 7856 }, { "epoch": 0.6224598930481283, "grad_norm": 1.6375932304639091, "learning_rate": 6.589043464039624e-06, "loss": 0.3083, "step": 7857 }, { "epoch": 0.6225391166567638, "grad_norm": 1.9057316741438, "learning_rate": 6.58663143316262e-06, "loss": 0.285, "step": 7858 }, { "epoch": 0.6226183402653991, "grad_norm": 1.6177226168771455, "learning_rate": 6.584219627039513e-06, "loss": 0.2264, "step": 7859 }, { "epoch": 0.6226975638740344, "grad_norm": 1.4088879237062937, "learning_rate": 6.58180804582911e-06, "loss": 0.2026, "step": 7860 }, { "epoch": 0.6227767874826698, "grad_norm": 1.34170129169816, "learning_rate": 6.579396689690198e-06, "loss": 0.1917, "step": 7861 }, { "epoch": 0.6228560110913052, "grad_norm": 1.4353674413909872, "learning_rate": 6.576985558781557e-06, "loss": 0.1923, "step": 7862 }, { "epoch": 0.6229352346999406, "grad_norm": 1.6550588694569888, "learning_rate": 6.574574653261945e-06, "loss": 0.2382, "step": 7863 }, { "epoch": 0.6230144583085759, "grad_norm": 1.520229713270655, "learning_rate": 6.572163973290109e-06, "loss": 0.2127, "step": 7864 }, { "epoch": 0.6230936819172114, "grad_norm": 1.8793905473605552, "learning_rate": 6.569753519024784e-06, "loss": 0.2602, "step": 7865 }, { "epoch": 0.6231729055258467, "grad_norm": 1.7599093716223355, "learning_rate": 6.567343290624683e-06, "loss": 0.2268, "step": 7866 }, { "epoch": 0.623252129134482, "grad_norm": 1.1536658109816318, "learning_rate": 6.564933288248509e-06, "loss": 0.1206, "step": 7867 }, { "epoch": 0.6233313527431175, "grad_norm": 1.698620187974912, "learning_rate": 6.562523512054951e-06, "loss": 0.1711, "step": 7868 }, { "epoch": 0.6234105763517528, "grad_norm": 1.2768363169274073, "learning_rate": 6.560113962202679e-06, "loss": 0.1192, "step": 7869 }, { "epoch": 0.6234897999603882, "grad_norm": 1.6253243281763008, "learning_rate": 6.557704638850352e-06, "loss": 0.1982, "step": 7870 }, { "epoch": 0.6235690235690236, "grad_norm": 1.5837026098292162, "learning_rate": 6.555295542156609e-06, "loss": 0.2504, "step": 7871 }, { "epoch": 0.623648247177659, "grad_norm": 1.6127401725297927, "learning_rate": 6.55288667228008e-06, "loss": 0.1785, "step": 7872 }, { "epoch": 0.6237274707862943, "grad_norm": 2.2175356392876604, "learning_rate": 6.550478029379379e-06, "loss": 0.2498, "step": 7873 }, { "epoch": 0.6238066943949296, "grad_norm": 1.7599761311327187, "learning_rate": 6.548069613613099e-06, "loss": 0.2263, "step": 7874 }, { "epoch": 0.6238859180035651, "grad_norm": 1.9372065568810095, "learning_rate": 6.545661425139827e-06, "loss": 0.2443, "step": 7875 }, { "epoch": 0.6239651416122004, "grad_norm": 1.4477781353879597, "learning_rate": 6.543253464118131e-06, "loss": 0.1502, "step": 7876 }, { "epoch": 0.6240443652208358, "grad_norm": 1.6544573759020447, "learning_rate": 6.540845730706557e-06, "loss": 0.1924, "step": 7877 }, { "epoch": 0.6241235888294712, "grad_norm": 1.5159022206176498, "learning_rate": 6.538438225063653e-06, "loss": 0.1717, "step": 7878 }, { "epoch": 0.6242028124381066, "grad_norm": 1.4576665570617475, "learning_rate": 6.536030947347931e-06, "loss": 0.1343, "step": 7879 }, { "epoch": 0.6242820360467419, "grad_norm": 2.0244353800357553, "learning_rate": 6.533623897717905e-06, "loss": 0.2323, "step": 7880 }, { "epoch": 0.6243612596553773, "grad_norm": 1.9392669886849387, "learning_rate": 6.531217076332068e-06, "loss": 0.2666, "step": 7881 }, { "epoch": 0.6244404832640127, "grad_norm": 1.281287698891154, "learning_rate": 6.528810483348893e-06, "loss": 0.1574, "step": 7882 }, { "epoch": 0.624519706872648, "grad_norm": 1.6101515704384937, "learning_rate": 6.526404118926848e-06, "loss": 0.1787, "step": 7883 }, { "epoch": 0.6245989304812835, "grad_norm": 1.25469824957441, "learning_rate": 6.523997983224375e-06, "loss": 0.1561, "step": 7884 }, { "epoch": 0.6246781540899188, "grad_norm": 1.7500032693895498, "learning_rate": 6.52159207639991e-06, "loss": 0.2571, "step": 7885 }, { "epoch": 0.6247573776985542, "grad_norm": 1.6246852739696336, "learning_rate": 6.519186398611872e-06, "loss": 0.17, "step": 7886 }, { "epoch": 0.6248366013071895, "grad_norm": 1.7273253689408932, "learning_rate": 6.51678095001866e-06, "loss": 0.1682, "step": 7887 }, { "epoch": 0.6249158249158249, "grad_norm": 1.317922407366493, "learning_rate": 6.51437573077866e-06, "loss": 0.1542, "step": 7888 }, { "epoch": 0.6249950485244603, "grad_norm": 1.2180161169104873, "learning_rate": 6.5119707410502495e-06, "loss": 0.1292, "step": 7889 }, { "epoch": 0.6250742721330956, "grad_norm": 1.3836435381393863, "learning_rate": 6.509565980991781e-06, "loss": 0.1934, "step": 7890 }, { "epoch": 0.6251534957417311, "grad_norm": 1.3183575081037062, "learning_rate": 6.5071614507615985e-06, "loss": 0.1424, "step": 7891 }, { "epoch": 0.6252327193503664, "grad_norm": 1.3125494246746818, "learning_rate": 6.5047571505180265e-06, "loss": 0.1587, "step": 7892 }, { "epoch": 0.6253119429590018, "grad_norm": 2.050193006069645, "learning_rate": 6.502353080419379e-06, "loss": 0.3957, "step": 7893 }, { "epoch": 0.6253911665676372, "grad_norm": 1.7876277907671507, "learning_rate": 6.4999492406239525e-06, "loss": 0.1996, "step": 7894 }, { "epoch": 0.6254703901762725, "grad_norm": 1.490852829465011, "learning_rate": 6.497545631290025e-06, "loss": 0.1917, "step": 7895 }, { "epoch": 0.6255496137849079, "grad_norm": 1.3170881655199036, "learning_rate": 6.495142252575866e-06, "loss": 0.1758, "step": 7896 }, { "epoch": 0.6256288373935432, "grad_norm": 1.5944141139829973, "learning_rate": 6.492739104639727e-06, "loss": 0.1948, "step": 7897 }, { "epoch": 0.6257080610021787, "grad_norm": 1.5936983327495715, "learning_rate": 6.490336187639841e-06, "loss": 0.1439, "step": 7898 }, { "epoch": 0.625787284610814, "grad_norm": 1.5801527451779789, "learning_rate": 6.487933501734429e-06, "loss": 0.2152, "step": 7899 }, { "epoch": 0.6258665082194494, "grad_norm": 1.6507551259810622, "learning_rate": 6.485531047081697e-06, "loss": 0.1595, "step": 7900 }, { "epoch": 0.6259457318280848, "grad_norm": 2.27734118783868, "learning_rate": 6.483128823839835e-06, "loss": 0.276, "step": 7901 }, { "epoch": 0.6260249554367201, "grad_norm": 1.565796823546716, "learning_rate": 6.480726832167019e-06, "loss": 0.1672, "step": 7902 }, { "epoch": 0.6261041790453555, "grad_norm": 1.5137469420682084, "learning_rate": 6.4783250722214066e-06, "loss": 0.1605, "step": 7903 }, { "epoch": 0.6261834026539909, "grad_norm": 1.1787520873598003, "learning_rate": 6.475923544161142e-06, "loss": 0.1633, "step": 7904 }, { "epoch": 0.6262626262626263, "grad_norm": 1.159742081282386, "learning_rate": 6.473522248144359e-06, "loss": 0.1469, "step": 7905 }, { "epoch": 0.6263418498712616, "grad_norm": 1.2783348596837976, "learning_rate": 6.471121184329167e-06, "loss": 0.1124, "step": 7906 }, { "epoch": 0.6264210734798971, "grad_norm": 1.7741075046613175, "learning_rate": 6.468720352873662e-06, "loss": 0.2386, "step": 7907 }, { "epoch": 0.6265002970885324, "grad_norm": 1.4752064146101964, "learning_rate": 6.466319753935933e-06, "loss": 0.1936, "step": 7908 }, { "epoch": 0.6265795206971677, "grad_norm": 1.284769464375227, "learning_rate": 6.463919387674043e-06, "loss": 0.1575, "step": 7909 }, { "epoch": 0.6266587443058032, "grad_norm": 2.099376180836153, "learning_rate": 6.461519254246046e-06, "loss": 0.3644, "step": 7910 }, { "epoch": 0.6267379679144385, "grad_norm": 1.491872246312762, "learning_rate": 6.459119353809982e-06, "loss": 0.1701, "step": 7911 }, { "epoch": 0.6268171915230739, "grad_norm": 1.8710698682188274, "learning_rate": 6.45671968652387e-06, "loss": 0.2716, "step": 7912 }, { "epoch": 0.6268964151317092, "grad_norm": 1.7134978834300063, "learning_rate": 6.4543202525457175e-06, "loss": 0.2674, "step": 7913 }, { "epoch": 0.6269756387403447, "grad_norm": 1.4898753288129385, "learning_rate": 6.451921052033516e-06, "loss": 0.2058, "step": 7914 }, { "epoch": 0.62705486234898, "grad_norm": 1.615596523491488, "learning_rate": 6.449522085145241e-06, "loss": 0.2131, "step": 7915 }, { "epoch": 0.6271340859576153, "grad_norm": 1.3833741430795214, "learning_rate": 6.447123352038853e-06, "loss": 0.1284, "step": 7916 }, { "epoch": 0.6272133095662508, "grad_norm": 1.4985700035634617, "learning_rate": 6.444724852872297e-06, "loss": 0.1886, "step": 7917 }, { "epoch": 0.6272925331748861, "grad_norm": 1.4725338963061434, "learning_rate": 6.4423265878035015e-06, "loss": 0.1576, "step": 7918 }, { "epoch": 0.6273717567835215, "grad_norm": 1.4798489321096, "learning_rate": 6.439928556990382e-06, "loss": 0.1729, "step": 7919 }, { "epoch": 0.6274509803921569, "grad_norm": 1.5319312001039236, "learning_rate": 6.437530760590838e-06, "loss": 0.2167, "step": 7920 }, { "epoch": 0.6275302040007923, "grad_norm": 2.5943755672081648, "learning_rate": 6.435133198762751e-06, "loss": 0.2177, "step": 7921 }, { "epoch": 0.6276094276094276, "grad_norm": 1.5021069287035707, "learning_rate": 6.432735871663991e-06, "loss": 0.1498, "step": 7922 }, { "epoch": 0.6276886512180629, "grad_norm": 1.3621013039014824, "learning_rate": 6.430338779452407e-06, "loss": 0.158, "step": 7923 }, { "epoch": 0.6277678748266984, "grad_norm": 1.4806787941253534, "learning_rate": 6.4279419222858416e-06, "loss": 0.2161, "step": 7924 }, { "epoch": 0.6278470984353337, "grad_norm": 1.419841761954037, "learning_rate": 6.4255453003221115e-06, "loss": 0.2166, "step": 7925 }, { "epoch": 0.6279263220439691, "grad_norm": 1.3091978430645574, "learning_rate": 6.423148913719022e-06, "loss": 0.1468, "step": 7926 }, { "epoch": 0.6280055456526045, "grad_norm": 1.673773160867503, "learning_rate": 6.420752762634369e-06, "loss": 0.2671, "step": 7927 }, { "epoch": 0.6280847692612398, "grad_norm": 2.004494127782503, "learning_rate": 6.4183568472259216e-06, "loss": 0.2465, "step": 7928 }, { "epoch": 0.6281639928698752, "grad_norm": 1.5316460797627829, "learning_rate": 6.415961167651443e-06, "loss": 0.1813, "step": 7929 }, { "epoch": 0.6282432164785106, "grad_norm": 1.4609092935370414, "learning_rate": 6.413565724068678e-06, "loss": 0.1987, "step": 7930 }, { "epoch": 0.628322440087146, "grad_norm": 1.6128836549760186, "learning_rate": 6.4111705166353525e-06, "loss": 0.2445, "step": 7931 }, { "epoch": 0.6284016636957813, "grad_norm": 1.5546820546173823, "learning_rate": 6.40877554550918e-06, "loss": 0.232, "step": 7932 }, { "epoch": 0.6284808873044168, "grad_norm": 1.4104342061217787, "learning_rate": 6.406380810847856e-06, "loss": 0.1533, "step": 7933 }, { "epoch": 0.6285601109130521, "grad_norm": 1.7638370266461518, "learning_rate": 6.403986312809065e-06, "loss": 0.2476, "step": 7934 }, { "epoch": 0.6286393345216874, "grad_norm": 1.3556109123773417, "learning_rate": 6.401592051550475e-06, "loss": 0.1523, "step": 7935 }, { "epoch": 0.6287185581303228, "grad_norm": 1.659517186936443, "learning_rate": 6.399198027229732e-06, "loss": 0.2067, "step": 7936 }, { "epoch": 0.6287977817389582, "grad_norm": 1.5913665494212388, "learning_rate": 6.39680424000447e-06, "loss": 0.2012, "step": 7937 }, { "epoch": 0.6288770053475936, "grad_norm": 1.3731528814564413, "learning_rate": 6.3944106900323174e-06, "loss": 0.2018, "step": 7938 }, { "epoch": 0.6289562289562289, "grad_norm": 1.6091954723861581, "learning_rate": 6.392017377470867e-06, "loss": 0.2359, "step": 7939 }, { "epoch": 0.6290354525648644, "grad_norm": 1.4311807733127115, "learning_rate": 6.389624302477715e-06, "loss": 0.2047, "step": 7940 }, { "epoch": 0.6291146761734997, "grad_norm": 1.4836520355515233, "learning_rate": 6.387231465210428e-06, "loss": 0.1784, "step": 7941 }, { "epoch": 0.629193899782135, "grad_norm": 1.9051714198751553, "learning_rate": 6.384838865826567e-06, "loss": 0.2349, "step": 7942 }, { "epoch": 0.6292731233907705, "grad_norm": 1.453331753354582, "learning_rate": 6.382446504483672e-06, "loss": 0.18, "step": 7943 }, { "epoch": 0.6293523469994058, "grad_norm": 1.6644564917854334, "learning_rate": 6.380054381339267e-06, "loss": 0.2612, "step": 7944 }, { "epoch": 0.6294315706080412, "grad_norm": 1.4451494250246275, "learning_rate": 6.377662496550863e-06, "loss": 0.1703, "step": 7945 }, { "epoch": 0.6295107942166766, "grad_norm": 1.8435397218283522, "learning_rate": 6.375270850275956e-06, "loss": 0.2207, "step": 7946 }, { "epoch": 0.629590017825312, "grad_norm": 1.400221202007302, "learning_rate": 6.37287944267202e-06, "loss": 0.1446, "step": 7947 }, { "epoch": 0.6296692414339473, "grad_norm": 1.723450942102276, "learning_rate": 6.370488273896522e-06, "loss": 0.1875, "step": 7948 }, { "epoch": 0.6297484650425826, "grad_norm": 1.7713840152730844, "learning_rate": 6.368097344106905e-06, "loss": 0.2051, "step": 7949 }, { "epoch": 0.6298276886512181, "grad_norm": 1.3937403365606027, "learning_rate": 6.365706653460602e-06, "loss": 0.1856, "step": 7950 }, { "epoch": 0.6299069122598534, "grad_norm": 1.5131896324848333, "learning_rate": 6.363316202115033e-06, "loss": 0.1781, "step": 7951 }, { "epoch": 0.6299861358684888, "grad_norm": 1.3320947120240656, "learning_rate": 6.3609259902275884e-06, "loss": 0.1332, "step": 7952 }, { "epoch": 0.6300653594771242, "grad_norm": 1.495553962659539, "learning_rate": 6.358536017955659e-06, "loss": 0.2295, "step": 7953 }, { "epoch": 0.6301445830857596, "grad_norm": 1.7521066539337526, "learning_rate": 6.3561462854566135e-06, "loss": 0.1702, "step": 7954 }, { "epoch": 0.6302238066943949, "grad_norm": 1.749518352108375, "learning_rate": 6.3537567928878e-06, "loss": 0.2705, "step": 7955 }, { "epoch": 0.6303030303030303, "grad_norm": 1.4152723006226362, "learning_rate": 6.3513675404065575e-06, "loss": 0.118, "step": 7956 }, { "epoch": 0.6303822539116657, "grad_norm": 1.543881319263055, "learning_rate": 6.348978528170205e-06, "loss": 0.2047, "step": 7957 }, { "epoch": 0.630461477520301, "grad_norm": 1.2754375667232551, "learning_rate": 6.34658975633605e-06, "loss": 0.1566, "step": 7958 }, { "epoch": 0.6305407011289365, "grad_norm": 1.278788340479489, "learning_rate": 6.344201225061382e-06, "loss": 0.1586, "step": 7959 }, { "epoch": 0.6306199247375718, "grad_norm": 1.7023676523676752, "learning_rate": 6.341812934503469e-06, "loss": 0.1867, "step": 7960 }, { "epoch": 0.6306991483462072, "grad_norm": 1.261038584144999, "learning_rate": 6.339424884819574e-06, "loss": 0.1401, "step": 7961 }, { "epoch": 0.6307783719548425, "grad_norm": 1.5919417404903258, "learning_rate": 6.337037076166939e-06, "loss": 0.1946, "step": 7962 }, { "epoch": 0.6308575955634779, "grad_norm": 1.529102757026119, "learning_rate": 6.334649508702784e-06, "loss": 0.1759, "step": 7963 }, { "epoch": 0.6309368191721133, "grad_norm": 1.2855492439484082, "learning_rate": 6.332262182584325e-06, "loss": 0.1555, "step": 7964 }, { "epoch": 0.6310160427807486, "grad_norm": 1.7316650697955416, "learning_rate": 6.3298750979687515e-06, "loss": 0.1791, "step": 7965 }, { "epoch": 0.6310952663893841, "grad_norm": 1.3883250428557548, "learning_rate": 6.327488255013244e-06, "loss": 0.1399, "step": 7966 }, { "epoch": 0.6311744899980194, "grad_norm": 1.8687207046112349, "learning_rate": 6.325101653874965e-06, "loss": 0.2542, "step": 7967 }, { "epoch": 0.6312537136066548, "grad_norm": 1.6830502920266992, "learning_rate": 6.322715294711057e-06, "loss": 0.2258, "step": 7968 }, { "epoch": 0.6313329372152902, "grad_norm": 1.3694793381171697, "learning_rate": 6.320329177678656e-06, "loss": 0.1634, "step": 7969 }, { "epoch": 0.6314121608239255, "grad_norm": 1.7936779428889684, "learning_rate": 6.31794330293487e-06, "loss": 0.2409, "step": 7970 }, { "epoch": 0.6314913844325609, "grad_norm": 1.466690001573824, "learning_rate": 6.315557670636803e-06, "loss": 0.2139, "step": 7971 }, { "epoch": 0.6315706080411962, "grad_norm": 1.960243302503434, "learning_rate": 6.313172280941534e-06, "loss": 0.2877, "step": 7972 }, { "epoch": 0.6316498316498317, "grad_norm": 1.3559473092017202, "learning_rate": 6.31078713400613e-06, "loss": 0.18, "step": 7973 }, { "epoch": 0.631729055258467, "grad_norm": 1.8740920555756857, "learning_rate": 6.308402229987641e-06, "loss": 0.1647, "step": 7974 }, { "epoch": 0.6318082788671024, "grad_norm": 1.9188085070924887, "learning_rate": 6.3060175690431055e-06, "loss": 0.2129, "step": 7975 }, { "epoch": 0.6318875024757378, "grad_norm": 1.206371226723691, "learning_rate": 6.303633151329535e-06, "loss": 0.1141, "step": 7976 }, { "epoch": 0.6319667260843731, "grad_norm": 1.358942659246891, "learning_rate": 6.3012489770039396e-06, "loss": 0.1555, "step": 7977 }, { "epoch": 0.6320459496930085, "grad_norm": 1.966331961576497, "learning_rate": 6.2988650462232995e-06, "loss": 0.2249, "step": 7978 }, { "epoch": 0.6321251733016439, "grad_norm": 1.328642638467315, "learning_rate": 6.296481359144587e-06, "loss": 0.1313, "step": 7979 }, { "epoch": 0.6322043969102793, "grad_norm": 1.7156812657943237, "learning_rate": 6.29409791592476e-06, "loss": 0.2576, "step": 7980 }, { "epoch": 0.6322836205189146, "grad_norm": 1.4620505214140445, "learning_rate": 6.2917147167207495e-06, "loss": 0.1296, "step": 7981 }, { "epoch": 0.6323628441275501, "grad_norm": 1.2551153104109596, "learning_rate": 6.289331761689482e-06, "loss": 0.1693, "step": 7982 }, { "epoch": 0.6324420677361854, "grad_norm": 1.776775111513521, "learning_rate": 6.286949050987868e-06, "loss": 0.2249, "step": 7983 }, { "epoch": 0.6325212913448207, "grad_norm": 1.6480028185670865, "learning_rate": 6.284566584772791e-06, "loss": 0.1856, "step": 7984 }, { "epoch": 0.6326005149534562, "grad_norm": 1.229197666278799, "learning_rate": 6.2821843632011245e-06, "loss": 0.1475, "step": 7985 }, { "epoch": 0.6326797385620915, "grad_norm": 2.0871086808008945, "learning_rate": 6.2798023864297315e-06, "loss": 0.2383, "step": 7986 }, { "epoch": 0.6327589621707269, "grad_norm": 1.384433276064647, "learning_rate": 6.277420654615449e-06, "loss": 0.1619, "step": 7987 }, { "epoch": 0.6328381857793622, "grad_norm": 1.2986283054057641, "learning_rate": 6.275039167915103e-06, "loss": 0.1619, "step": 7988 }, { "epoch": 0.6329174093879977, "grad_norm": 1.1253483275005327, "learning_rate": 6.2726579264855084e-06, "loss": 0.1078, "step": 7989 }, { "epoch": 0.632996632996633, "grad_norm": 1.647147818670608, "learning_rate": 6.270276930483451e-06, "loss": 0.1988, "step": 7990 }, { "epoch": 0.6330758566052683, "grad_norm": 1.370382546463394, "learning_rate": 6.267896180065711e-06, "loss": 0.1607, "step": 7991 }, { "epoch": 0.6331550802139038, "grad_norm": 1.3730958137614806, "learning_rate": 6.265515675389053e-06, "loss": 0.1264, "step": 7992 }, { "epoch": 0.6332343038225391, "grad_norm": 1.714674195744542, "learning_rate": 6.263135416610217e-06, "loss": 0.2455, "step": 7993 }, { "epoch": 0.6333135274311745, "grad_norm": 1.561534783085332, "learning_rate": 6.260755403885934e-06, "loss": 0.2773, "step": 7994 }, { "epoch": 0.6333927510398099, "grad_norm": 1.6434314106560817, "learning_rate": 6.258375637372914e-06, "loss": 0.1878, "step": 7995 }, { "epoch": 0.6334719746484453, "grad_norm": 1.5689953445907228, "learning_rate": 6.2559961172278545e-06, "loss": 0.1968, "step": 7996 }, { "epoch": 0.6335511982570806, "grad_norm": 1.731654609938004, "learning_rate": 6.253616843607439e-06, "loss": 0.2052, "step": 7997 }, { "epoch": 0.6336304218657159, "grad_norm": 1.4905339583145942, "learning_rate": 6.251237816668324e-06, "loss": 0.2378, "step": 7998 }, { "epoch": 0.6337096454743514, "grad_norm": 1.439622356072016, "learning_rate": 6.248859036567162e-06, "loss": 0.2048, "step": 7999 }, { "epoch": 0.6337888690829867, "grad_norm": 1.468302441482144, "learning_rate": 6.246480503460585e-06, "loss": 0.2264, "step": 8000 }, { "epoch": 0.6338680926916221, "grad_norm": 1.6697032661549593, "learning_rate": 6.2441022175052034e-06, "loss": 0.2144, "step": 8001 }, { "epoch": 0.6339473163002575, "grad_norm": 1.5476388956752491, "learning_rate": 6.241724178857621e-06, "loss": 0.1465, "step": 8002 }, { "epoch": 0.6340265399088929, "grad_norm": 1.5791840926184573, "learning_rate": 6.2393463876744165e-06, "loss": 0.1702, "step": 8003 }, { "epoch": 0.6341057635175282, "grad_norm": 1.4380500215576228, "learning_rate": 6.236968844112157e-06, "loss": 0.1442, "step": 8004 }, { "epoch": 0.6341849871261636, "grad_norm": 1.763594671092584, "learning_rate": 6.234591548327393e-06, "loss": 0.2856, "step": 8005 }, { "epoch": 0.634264210734799, "grad_norm": 1.8216817820410947, "learning_rate": 6.232214500476657e-06, "loss": 0.1784, "step": 8006 }, { "epoch": 0.6343434343434343, "grad_norm": 1.5048471111398678, "learning_rate": 6.229837700716465e-06, "loss": 0.2304, "step": 8007 }, { "epoch": 0.6344226579520698, "grad_norm": 1.2514229945530448, "learning_rate": 6.227461149203324e-06, "loss": 0.1691, "step": 8008 }, { "epoch": 0.6345018815607051, "grad_norm": 1.4342496892213084, "learning_rate": 6.225084846093711e-06, "loss": 0.1324, "step": 8009 }, { "epoch": 0.6345811051693404, "grad_norm": 1.5452281036492033, "learning_rate": 6.222708791544098e-06, "loss": 0.1848, "step": 8010 }, { "epoch": 0.6346603287779758, "grad_norm": 1.6125328562744934, "learning_rate": 6.220332985710936e-06, "loss": 0.2322, "step": 8011 }, { "epoch": 0.6347395523866112, "grad_norm": 1.6867538648031914, "learning_rate": 6.21795742875066e-06, "loss": 0.2695, "step": 8012 }, { "epoch": 0.6348187759952466, "grad_norm": 1.542098298416949, "learning_rate": 6.21558212081969e-06, "loss": 0.2412, "step": 8013 }, { "epoch": 0.6348979996038819, "grad_norm": 1.5604046403215577, "learning_rate": 6.213207062074427e-06, "loss": 0.1929, "step": 8014 }, { "epoch": 0.6349772232125174, "grad_norm": 1.2822064583776942, "learning_rate": 6.210832252671257e-06, "loss": 0.1631, "step": 8015 }, { "epoch": 0.6350564468211527, "grad_norm": 1.3393994963057834, "learning_rate": 6.208457692766554e-06, "loss": 0.1456, "step": 8016 }, { "epoch": 0.635135670429788, "grad_norm": 1.4663882948028086, "learning_rate": 6.206083382516665e-06, "loss": 0.168, "step": 8017 }, { "epoch": 0.6352148940384235, "grad_norm": 1.4450798951628312, "learning_rate": 6.203709322077933e-06, "loss": 0.1876, "step": 8018 }, { "epoch": 0.6352941176470588, "grad_norm": 1.2051542041998966, "learning_rate": 6.201335511606673e-06, "loss": 0.1521, "step": 8019 }, { "epoch": 0.6353733412556942, "grad_norm": 2.32793696185281, "learning_rate": 6.198961951259193e-06, "loss": 0.2423, "step": 8020 }, { "epoch": 0.6354525648643295, "grad_norm": 1.4219563770923758, "learning_rate": 6.196588641191778e-06, "loss": 0.191, "step": 8021 }, { "epoch": 0.635531788472965, "grad_norm": 1.3256285093862534, "learning_rate": 6.194215581560701e-06, "loss": 0.1817, "step": 8022 }, { "epoch": 0.6356110120816003, "grad_norm": 1.598663212139321, "learning_rate": 6.191842772522214e-06, "loss": 0.1942, "step": 8023 }, { "epoch": 0.6356902356902356, "grad_norm": 1.7049793762495458, "learning_rate": 6.18947021423256e-06, "loss": 0.2757, "step": 8024 }, { "epoch": 0.6357694592988711, "grad_norm": 1.2680387316636024, "learning_rate": 6.187097906847954e-06, "loss": 0.1202, "step": 8025 }, { "epoch": 0.6358486829075064, "grad_norm": 1.5761126784924702, "learning_rate": 6.184725850524608e-06, "loss": 0.1928, "step": 8026 }, { "epoch": 0.6359279065161418, "grad_norm": 1.7719856018142073, "learning_rate": 6.182354045418704e-06, "loss": 0.2083, "step": 8027 }, { "epoch": 0.6360071301247772, "grad_norm": 1.610537671700797, "learning_rate": 6.179982491686416e-06, "loss": 0.1607, "step": 8028 }, { "epoch": 0.6360863537334126, "grad_norm": 1.3328018725701702, "learning_rate": 6.177611189483903e-06, "loss": 0.1896, "step": 8029 }, { "epoch": 0.6361655773420479, "grad_norm": 1.5406390887106478, "learning_rate": 6.175240138967299e-06, "loss": 0.1942, "step": 8030 }, { "epoch": 0.6362448009506833, "grad_norm": 1.4811725336319033, "learning_rate": 6.172869340292729e-06, "loss": 0.1503, "step": 8031 }, { "epoch": 0.6363240245593187, "grad_norm": 1.183938542177892, "learning_rate": 6.170498793616298e-06, "loss": 0.1209, "step": 8032 }, { "epoch": 0.636403248167954, "grad_norm": 1.3349842459706298, "learning_rate": 6.168128499094095e-06, "loss": 0.134, "step": 8033 }, { "epoch": 0.6364824717765895, "grad_norm": 1.4687541232142616, "learning_rate": 6.165758456882193e-06, "loss": 0.2101, "step": 8034 }, { "epoch": 0.6365616953852248, "grad_norm": 1.5803293593594072, "learning_rate": 6.163388667136646e-06, "loss": 0.2098, "step": 8035 }, { "epoch": 0.6366409189938602, "grad_norm": 1.4815992085433343, "learning_rate": 6.161019130013495e-06, "loss": 0.1764, "step": 8036 }, { "epoch": 0.6367201426024955, "grad_norm": 1.836571651703349, "learning_rate": 6.158649845668764e-06, "loss": 0.2195, "step": 8037 }, { "epoch": 0.6367993662111309, "grad_norm": 1.0736019083955324, "learning_rate": 6.156280814258455e-06, "loss": 0.1194, "step": 8038 }, { "epoch": 0.6368785898197663, "grad_norm": 1.3978079984978895, "learning_rate": 6.153912035938559e-06, "loss": 0.15, "step": 8039 }, { "epoch": 0.6369578134284016, "grad_norm": 1.466119670831299, "learning_rate": 6.151543510865053e-06, "loss": 0.1445, "step": 8040 }, { "epoch": 0.6370370370370371, "grad_norm": 1.4546352972210452, "learning_rate": 6.149175239193887e-06, "loss": 0.226, "step": 8041 }, { "epoch": 0.6371162606456724, "grad_norm": 1.007920686287903, "learning_rate": 6.1468072210810035e-06, "loss": 0.0947, "step": 8042 }, { "epoch": 0.6371954842543078, "grad_norm": 1.8470985737799304, "learning_rate": 6.144439456682323e-06, "loss": 0.2584, "step": 8043 }, { "epoch": 0.6372747078629432, "grad_norm": 1.6944984526212388, "learning_rate": 6.142071946153751e-06, "loss": 0.2167, "step": 8044 }, { "epoch": 0.6373539314715785, "grad_norm": 1.4051801216471371, "learning_rate": 6.139704689651181e-06, "loss": 0.1365, "step": 8045 }, { "epoch": 0.6374331550802139, "grad_norm": 1.7739276664167622, "learning_rate": 6.1373376873304814e-06, "loss": 0.1506, "step": 8046 }, { "epoch": 0.6375123786888492, "grad_norm": 1.2714198648717325, "learning_rate": 6.134970939347511e-06, "loss": 0.0867, "step": 8047 }, { "epoch": 0.6375916022974847, "grad_norm": 1.0336402065408437, "learning_rate": 6.132604445858104e-06, "loss": 0.0899, "step": 8048 }, { "epoch": 0.63767082590612, "grad_norm": 1.3218458711585934, "learning_rate": 6.130238207018085e-06, "loss": 0.1308, "step": 8049 }, { "epoch": 0.6377500495147554, "grad_norm": 1.4970597557743586, "learning_rate": 6.127872222983264e-06, "loss": 0.2102, "step": 8050 }, { "epoch": 0.6378292731233908, "grad_norm": 1.2775757732140645, "learning_rate": 6.125506493909422e-06, "loss": 0.143, "step": 8051 }, { "epoch": 0.6379084967320261, "grad_norm": 1.7512105988245497, "learning_rate": 6.123141019952334e-06, "loss": 0.2026, "step": 8052 }, { "epoch": 0.6379877203406615, "grad_norm": 1.5907695307831884, "learning_rate": 6.1207758012677595e-06, "loss": 0.1636, "step": 8053 }, { "epoch": 0.6380669439492969, "grad_norm": 1.328670161964659, "learning_rate": 6.11841083801143e-06, "loss": 0.1514, "step": 8054 }, { "epoch": 0.6381461675579323, "grad_norm": 1.8310762988425435, "learning_rate": 6.116046130339073e-06, "loss": 0.2295, "step": 8055 }, { "epoch": 0.6382253911665676, "grad_norm": 1.9124203676510925, "learning_rate": 6.1136816784063855e-06, "loss": 0.2446, "step": 8056 }, { "epoch": 0.6383046147752031, "grad_norm": 1.668881435707919, "learning_rate": 6.1113174823690615e-06, "loss": 0.1616, "step": 8057 }, { "epoch": 0.6383838383838384, "grad_norm": 1.4796825705987922, "learning_rate": 6.108953542382771e-06, "loss": 0.1854, "step": 8058 }, { "epoch": 0.6384630619924737, "grad_norm": 1.2890164689913686, "learning_rate": 6.106589858603167e-06, "loss": 0.1013, "step": 8059 }, { "epoch": 0.6385422856011091, "grad_norm": 1.5697355718352175, "learning_rate": 6.1042264311858845e-06, "loss": 0.2417, "step": 8060 }, { "epoch": 0.6386215092097445, "grad_norm": 2.177511507154298, "learning_rate": 6.101863260286551e-06, "loss": 0.2829, "step": 8061 }, { "epoch": 0.6387007328183799, "grad_norm": 1.1668171544310744, "learning_rate": 6.099500346060765e-06, "loss": 0.0986, "step": 8062 }, { "epoch": 0.6387799564270152, "grad_norm": 1.4728280492770907, "learning_rate": 6.09713768866411e-06, "loss": 0.168, "step": 8063 }, { "epoch": 0.6388591800356507, "grad_norm": 1.3612029718774463, "learning_rate": 6.094775288252157e-06, "loss": 0.1492, "step": 8064 }, { "epoch": 0.638938403644286, "grad_norm": 1.8434027969930993, "learning_rate": 6.092413144980465e-06, "loss": 0.2012, "step": 8065 }, { "epoch": 0.6390176272529213, "grad_norm": 1.9581884779647414, "learning_rate": 6.090051259004563e-06, "loss": 0.1881, "step": 8066 }, { "epoch": 0.6390968508615568, "grad_norm": 1.4612143064453014, "learning_rate": 6.087689630479974e-06, "loss": 0.2167, "step": 8067 }, { "epoch": 0.6391760744701921, "grad_norm": 2.0995615313927676, "learning_rate": 6.085328259562195e-06, "loss": 0.2361, "step": 8068 }, { "epoch": 0.6392552980788275, "grad_norm": 1.2713930323094393, "learning_rate": 6.082967146406714e-06, "loss": 0.1411, "step": 8069 }, { "epoch": 0.6393345216874629, "grad_norm": 1.6035218399675333, "learning_rate": 6.0806062911690025e-06, "loss": 0.2151, "step": 8070 }, { "epoch": 0.6394137452960983, "grad_norm": 1.5684345086926923, "learning_rate": 6.078245694004503e-06, "loss": 0.1732, "step": 8071 }, { "epoch": 0.6394929689047336, "grad_norm": 1.5788360301344264, "learning_rate": 6.075885355068658e-06, "loss": 0.2223, "step": 8072 }, { "epoch": 0.6395721925133689, "grad_norm": 1.896384841125591, "learning_rate": 6.073525274516879e-06, "loss": 0.2049, "step": 8073 }, { "epoch": 0.6396514161220044, "grad_norm": 1.3144364732799731, "learning_rate": 6.071165452504568e-06, "loss": 0.1972, "step": 8074 }, { "epoch": 0.6397306397306397, "grad_norm": 1.4977411743415656, "learning_rate": 6.068805889187109e-06, "loss": 0.1864, "step": 8075 }, { "epoch": 0.6398098633392751, "grad_norm": 2.09234114149141, "learning_rate": 6.066446584719864e-06, "loss": 0.1632, "step": 8076 }, { "epoch": 0.6398890869479105, "grad_norm": 1.4428675353555533, "learning_rate": 6.064087539258186e-06, "loss": 0.2058, "step": 8077 }, { "epoch": 0.6399683105565459, "grad_norm": 1.7317110855492674, "learning_rate": 6.061728752957406e-06, "loss": 0.1766, "step": 8078 }, { "epoch": 0.6400475341651812, "grad_norm": 1.715467553779848, "learning_rate": 6.059370225972834e-06, "loss": 0.2017, "step": 8079 }, { "epoch": 0.6401267577738166, "grad_norm": 1.3981638790005932, "learning_rate": 6.057011958459776e-06, "loss": 0.1643, "step": 8080 }, { "epoch": 0.640205981382452, "grad_norm": 1.9805349399470895, "learning_rate": 6.0546539505735055e-06, "loss": 0.1824, "step": 8081 }, { "epoch": 0.6402852049910873, "grad_norm": 2.07334063393122, "learning_rate": 6.052296202469288e-06, "loss": 0.3186, "step": 8082 }, { "epoch": 0.6403644285997228, "grad_norm": 1.8820150157041597, "learning_rate": 6.049938714302372e-06, "loss": 0.3467, "step": 8083 }, { "epoch": 0.6404436522083581, "grad_norm": 1.494153780691814, "learning_rate": 6.047581486227984e-06, "loss": 0.1572, "step": 8084 }, { "epoch": 0.6405228758169934, "grad_norm": 1.1695806773679274, "learning_rate": 6.045224518401338e-06, "loss": 0.1727, "step": 8085 }, { "epoch": 0.6406020994256288, "grad_norm": 3.175382121935935, "learning_rate": 6.04286781097763e-06, "loss": 0.3162, "step": 8086 }, { "epoch": 0.6406813230342642, "grad_norm": 1.5960691033404628, "learning_rate": 6.040511364112034e-06, "loss": 0.2031, "step": 8087 }, { "epoch": 0.6407605466428996, "grad_norm": 1.8020629298657165, "learning_rate": 6.038155177959715e-06, "loss": 0.1975, "step": 8088 }, { "epoch": 0.6408397702515349, "grad_norm": 1.3449893604145793, "learning_rate": 6.035799252675811e-06, "loss": 0.1279, "step": 8089 }, { "epoch": 0.6409189938601704, "grad_norm": 1.388994974244062, "learning_rate": 6.0334435884154526e-06, "loss": 0.142, "step": 8090 }, { "epoch": 0.6409982174688057, "grad_norm": 1.9092794094730143, "learning_rate": 6.031088185333751e-06, "loss": 0.2786, "step": 8091 }, { "epoch": 0.641077441077441, "grad_norm": 1.6587613465913738, "learning_rate": 6.028733043585793e-06, "loss": 0.178, "step": 8092 }, { "epoch": 0.6411566646860765, "grad_norm": 1.6876684821454455, "learning_rate": 6.026378163326654e-06, "loss": 0.2329, "step": 8093 }, { "epoch": 0.6412358882947118, "grad_norm": 1.3562833297180532, "learning_rate": 6.024023544711396e-06, "loss": 0.1408, "step": 8094 }, { "epoch": 0.6413151119033472, "grad_norm": 1.563157825770762, "learning_rate": 6.021669187895054e-06, "loss": 0.1734, "step": 8095 }, { "epoch": 0.6413943355119825, "grad_norm": 1.7509772789149751, "learning_rate": 6.019315093032656e-06, "loss": 0.2235, "step": 8096 }, { "epoch": 0.641473559120618, "grad_norm": 1.8134298364919954, "learning_rate": 6.016961260279204e-06, "loss": 0.2151, "step": 8097 }, { "epoch": 0.6415527827292533, "grad_norm": 1.2975379844934873, "learning_rate": 6.0146076897896865e-06, "loss": 0.1656, "step": 8098 }, { "epoch": 0.6416320063378886, "grad_norm": 2.1523475691966545, "learning_rate": 6.012254381719078e-06, "loss": 0.2399, "step": 8099 }, { "epoch": 0.6417112299465241, "grad_norm": 1.3792555952901389, "learning_rate": 6.0099013362223305e-06, "loss": 0.1523, "step": 8100 }, { "epoch": 0.6417904535551594, "grad_norm": 1.8247424909559966, "learning_rate": 6.007548553454379e-06, "loss": 0.2029, "step": 8101 }, { "epoch": 0.6418696771637948, "grad_norm": 1.7282070409500825, "learning_rate": 6.005196033570147e-06, "loss": 0.1895, "step": 8102 }, { "epoch": 0.6419489007724302, "grad_norm": 1.703459325217148, "learning_rate": 6.002843776724534e-06, "loss": 0.2743, "step": 8103 }, { "epoch": 0.6420281243810656, "grad_norm": 1.4141519713853017, "learning_rate": 6.000491783072426e-06, "loss": 0.1719, "step": 8104 }, { "epoch": 0.6421073479897009, "grad_norm": 1.5040055911682912, "learning_rate": 5.998140052768687e-06, "loss": 0.1692, "step": 8105 }, { "epoch": 0.6421865715983363, "grad_norm": 1.396176221166053, "learning_rate": 5.995788585968171e-06, "loss": 0.1717, "step": 8106 }, { "epoch": 0.6422657952069717, "grad_norm": 1.3728313497012354, "learning_rate": 5.993437382825711e-06, "loss": 0.1443, "step": 8107 }, { "epoch": 0.642345018815607, "grad_norm": 1.6668970586745686, "learning_rate": 5.991086443496119e-06, "loss": 0.1624, "step": 8108 }, { "epoch": 0.6424242424242425, "grad_norm": 1.6886237236606727, "learning_rate": 5.9887357681341955e-06, "loss": 0.1811, "step": 8109 }, { "epoch": 0.6425034660328778, "grad_norm": 1.8403077008745683, "learning_rate": 5.9863853568947215e-06, "loss": 0.2747, "step": 8110 }, { "epoch": 0.6425826896415132, "grad_norm": 1.3107299689935732, "learning_rate": 5.9840352099324595e-06, "loss": 0.161, "step": 8111 }, { "epoch": 0.6426619132501485, "grad_norm": 1.3216407299508082, "learning_rate": 5.981685327402156e-06, "loss": 0.1133, "step": 8112 }, { "epoch": 0.6427411368587839, "grad_norm": 1.4087810415080189, "learning_rate": 5.9793357094585365e-06, "loss": 0.1648, "step": 8113 }, { "epoch": 0.6428203604674193, "grad_norm": 1.4012284831722943, "learning_rate": 5.976986356256316e-06, "loss": 0.1856, "step": 8114 }, { "epoch": 0.6428995840760546, "grad_norm": 1.3091420768442874, "learning_rate": 5.974637267950187e-06, "loss": 0.1695, "step": 8115 }, { "epoch": 0.6429788076846901, "grad_norm": 1.332204940460913, "learning_rate": 5.972288444694822e-06, "loss": 0.1816, "step": 8116 }, { "epoch": 0.6430580312933254, "grad_norm": 1.719432836983353, "learning_rate": 5.9699398866448846e-06, "loss": 0.2237, "step": 8117 }, { "epoch": 0.6431372549019608, "grad_norm": 1.6451888621811877, "learning_rate": 5.967591593955016e-06, "loss": 0.2473, "step": 8118 }, { "epoch": 0.6432164785105962, "grad_norm": 1.2462144379853615, "learning_rate": 5.965243566779837e-06, "loss": 0.1873, "step": 8119 }, { "epoch": 0.6432957021192315, "grad_norm": 1.6682313182268105, "learning_rate": 5.962895805273956e-06, "loss": 0.2454, "step": 8120 }, { "epoch": 0.6433749257278669, "grad_norm": 1.522359786660533, "learning_rate": 5.960548309591958e-06, "loss": 0.1661, "step": 8121 }, { "epoch": 0.6434541493365022, "grad_norm": 1.8856366859578277, "learning_rate": 5.958201079888419e-06, "loss": 0.2052, "step": 8122 }, { "epoch": 0.6435333729451377, "grad_norm": 1.6425198675212893, "learning_rate": 5.9558541163178915e-06, "loss": 0.1978, "step": 8123 }, { "epoch": 0.643612596553773, "grad_norm": 1.8107131526831806, "learning_rate": 5.953507419034911e-06, "loss": 0.2065, "step": 8124 }, { "epoch": 0.6436918201624084, "grad_norm": 1.7537881617961941, "learning_rate": 5.951160988193998e-06, "loss": 0.296, "step": 8125 }, { "epoch": 0.6437710437710438, "grad_norm": 1.4564610174228458, "learning_rate": 5.948814823949649e-06, "loss": 0.1765, "step": 8126 }, { "epoch": 0.6438502673796791, "grad_norm": 1.5708870506658843, "learning_rate": 5.946468926456352e-06, "loss": 0.1873, "step": 8127 }, { "epoch": 0.6439294909883145, "grad_norm": 0.9833438986469489, "learning_rate": 5.944123295868574e-06, "loss": 0.0986, "step": 8128 }, { "epoch": 0.6440087145969499, "grad_norm": 1.55627241612504, "learning_rate": 5.9417779323407576e-06, "loss": 0.1905, "step": 8129 }, { "epoch": 0.6440879382055853, "grad_norm": 1.65050525282498, "learning_rate": 5.939432836027339e-06, "loss": 0.2036, "step": 8130 }, { "epoch": 0.6441671618142206, "grad_norm": 1.527701089722438, "learning_rate": 5.937088007082731e-06, "loss": 0.1863, "step": 8131 }, { "epoch": 0.6442463854228561, "grad_norm": 1.619678841610621, "learning_rate": 5.934743445661326e-06, "loss": 0.1869, "step": 8132 }, { "epoch": 0.6443256090314914, "grad_norm": 1.5934641414297468, "learning_rate": 5.932399151917507e-06, "loss": 0.2128, "step": 8133 }, { "epoch": 0.6444048326401267, "grad_norm": 1.4272613283253828, "learning_rate": 5.93005512600563e-06, "loss": 0.2056, "step": 8134 }, { "epoch": 0.6444840562487621, "grad_norm": 1.4895738772604328, "learning_rate": 5.92771136808004e-06, "loss": 0.1895, "step": 8135 }, { "epoch": 0.6445632798573975, "grad_norm": 2.090757004547409, "learning_rate": 5.925367878295063e-06, "loss": 0.1864, "step": 8136 }, { "epoch": 0.6446425034660329, "grad_norm": 1.5269085503875042, "learning_rate": 5.9230246568050035e-06, "loss": 0.1647, "step": 8137 }, { "epoch": 0.6447217270746682, "grad_norm": 1.2712634497428263, "learning_rate": 5.920681703764153e-06, "loss": 0.1466, "step": 8138 }, { "epoch": 0.6448009506833037, "grad_norm": 1.7711467342966756, "learning_rate": 5.918339019326789e-06, "loss": 0.2039, "step": 8139 }, { "epoch": 0.644880174291939, "grad_norm": 1.6106787105529103, "learning_rate": 5.915996603647157e-06, "loss": 0.2296, "step": 8140 }, { "epoch": 0.6449593979005743, "grad_norm": 1.9063362943262643, "learning_rate": 5.913654456879496e-06, "loss": 0.2585, "step": 8141 }, { "epoch": 0.6450386215092098, "grad_norm": 1.7339440527619234, "learning_rate": 5.911312579178028e-06, "loss": 0.2452, "step": 8142 }, { "epoch": 0.6451178451178451, "grad_norm": 1.5062218026206124, "learning_rate": 5.908970970696955e-06, "loss": 0.2279, "step": 8143 }, { "epoch": 0.6451970687264805, "grad_norm": 1.3909630211376605, "learning_rate": 5.906629631590457e-06, "loss": 0.1188, "step": 8144 }, { "epoch": 0.6452762923351159, "grad_norm": 1.3228113989492796, "learning_rate": 5.904288562012703e-06, "loss": 0.1277, "step": 8145 }, { "epoch": 0.6453555159437513, "grad_norm": 1.3890436089938607, "learning_rate": 5.901947762117838e-06, "loss": 0.1785, "step": 8146 }, { "epoch": 0.6454347395523866, "grad_norm": 1.4704428395392795, "learning_rate": 5.899607232059994e-06, "loss": 0.174, "step": 8147 }, { "epoch": 0.6455139631610219, "grad_norm": 1.3975944557489626, "learning_rate": 5.897266971993286e-06, "loss": 0.2414, "step": 8148 }, { "epoch": 0.6455931867696574, "grad_norm": 1.5378497945727905, "learning_rate": 5.894926982071805e-06, "loss": 0.1695, "step": 8149 }, { "epoch": 0.6456724103782927, "grad_norm": 1.7136073115435613, "learning_rate": 5.892587262449631e-06, "loss": 0.2202, "step": 8150 }, { "epoch": 0.6457516339869281, "grad_norm": 1.9443667063843741, "learning_rate": 5.890247813280822e-06, "loss": 0.2088, "step": 8151 }, { "epoch": 0.6458308575955635, "grad_norm": 1.9345781076500688, "learning_rate": 5.8879086347194196e-06, "loss": 0.193, "step": 8152 }, { "epoch": 0.6459100812041989, "grad_norm": 1.5134019496722813, "learning_rate": 5.885569726919449e-06, "loss": 0.1522, "step": 8153 }, { "epoch": 0.6459893048128342, "grad_norm": 1.3538760965517709, "learning_rate": 5.883231090034911e-06, "loss": 0.1808, "step": 8154 }, { "epoch": 0.6460685284214696, "grad_norm": 1.685558222770563, "learning_rate": 5.8808927242197984e-06, "loss": 0.2095, "step": 8155 }, { "epoch": 0.646147752030105, "grad_norm": 1.4062584319448033, "learning_rate": 5.878554629628081e-06, "loss": 0.1904, "step": 8156 }, { "epoch": 0.6462269756387403, "grad_norm": 1.4476301404899559, "learning_rate": 5.87621680641371e-06, "loss": 0.1975, "step": 8157 }, { "epoch": 0.6463061992473758, "grad_norm": 1.1391212925893133, "learning_rate": 5.873879254730621e-06, "loss": 0.1301, "step": 8158 }, { "epoch": 0.6463854228560111, "grad_norm": 1.7083001896441035, "learning_rate": 5.871541974732727e-06, "loss": 0.1384, "step": 8159 }, { "epoch": 0.6464646464646465, "grad_norm": 1.7312239132605354, "learning_rate": 5.869204966573929e-06, "loss": 0.1969, "step": 8160 }, { "epoch": 0.6465438700732818, "grad_norm": 1.4122443107824323, "learning_rate": 5.866868230408111e-06, "loss": 0.192, "step": 8161 }, { "epoch": 0.6466230936819172, "grad_norm": 1.7282599416588857, "learning_rate": 5.86453176638913e-06, "loss": 0.1697, "step": 8162 }, { "epoch": 0.6467023172905526, "grad_norm": 1.528172939160641, "learning_rate": 5.862195574670834e-06, "loss": 0.1729, "step": 8163 }, { "epoch": 0.6467815408991879, "grad_norm": 1.8040622991011739, "learning_rate": 5.85985965540705e-06, "loss": 0.2788, "step": 8164 }, { "epoch": 0.6468607645078234, "grad_norm": 1.5492634542794503, "learning_rate": 5.857524008751586e-06, "loss": 0.131, "step": 8165 }, { "epoch": 0.6469399881164587, "grad_norm": 1.647262123542445, "learning_rate": 5.855188634858235e-06, "loss": 0.2611, "step": 8166 }, { "epoch": 0.647019211725094, "grad_norm": 1.7564404844206631, "learning_rate": 5.852853533880768e-06, "loss": 0.1636, "step": 8167 }, { "epoch": 0.6470984353337295, "grad_norm": 1.6331654529510657, "learning_rate": 5.850518705972941e-06, "loss": 0.2051, "step": 8168 }, { "epoch": 0.6471776589423648, "grad_norm": 1.539374640159121, "learning_rate": 5.848184151288492e-06, "loss": 0.209, "step": 8169 }, { "epoch": 0.6472568825510002, "grad_norm": 1.5721915772426165, "learning_rate": 5.845849869981137e-06, "loss": 0.1562, "step": 8170 }, { "epoch": 0.6473361061596355, "grad_norm": 1.9839548267302927, "learning_rate": 5.843515862204581e-06, "loss": 0.2349, "step": 8171 }, { "epoch": 0.647415329768271, "grad_norm": 1.4924276726996406, "learning_rate": 5.841182128112506e-06, "loss": 0.1834, "step": 8172 }, { "epoch": 0.6474945533769063, "grad_norm": 1.4482647137077493, "learning_rate": 5.838848667858577e-06, "loss": 0.1912, "step": 8173 }, { "epoch": 0.6475737769855416, "grad_norm": 1.2887753077490391, "learning_rate": 5.83651548159644e-06, "loss": 0.1492, "step": 8174 }, { "epoch": 0.6476530005941771, "grad_norm": 1.8080667023935875, "learning_rate": 5.834182569479727e-06, "loss": 0.1839, "step": 8175 }, { "epoch": 0.6477322242028124, "grad_norm": 1.0766910119065871, "learning_rate": 5.831849931662047e-06, "loss": 0.0994, "step": 8176 }, { "epoch": 0.6478114478114478, "grad_norm": 1.359796154319617, "learning_rate": 5.829517568296989e-06, "loss": 0.1411, "step": 8177 }, { "epoch": 0.6478906714200832, "grad_norm": 1.895470911194082, "learning_rate": 5.827185479538138e-06, "loss": 0.2375, "step": 8178 }, { "epoch": 0.6479698950287186, "grad_norm": 1.950384073594469, "learning_rate": 5.824853665539043e-06, "loss": 0.1913, "step": 8179 }, { "epoch": 0.6480491186373539, "grad_norm": 1.5935832675858823, "learning_rate": 5.82252212645324e-06, "loss": 0.1389, "step": 8180 }, { "epoch": 0.6481283422459893, "grad_norm": 1.48907075446388, "learning_rate": 5.820190862434259e-06, "loss": 0.2118, "step": 8181 }, { "epoch": 0.6482075658546247, "grad_norm": 1.6304056787421364, "learning_rate": 5.8178598736355985e-06, "loss": 0.1525, "step": 8182 }, { "epoch": 0.64828678946326, "grad_norm": 1.7683509037124783, "learning_rate": 5.815529160210738e-06, "loss": 0.1849, "step": 8183 }, { "epoch": 0.6483660130718955, "grad_norm": 1.597123957088113, "learning_rate": 5.813198722313151e-06, "loss": 0.2211, "step": 8184 }, { "epoch": 0.6484452366805308, "grad_norm": 1.536023959144539, "learning_rate": 5.810868560096283e-06, "loss": 0.1865, "step": 8185 }, { "epoch": 0.6485244602891662, "grad_norm": 1.7736708834620623, "learning_rate": 5.808538673713564e-06, "loss": 0.1155, "step": 8186 }, { "epoch": 0.6486036838978015, "grad_norm": 1.8009006907956115, "learning_rate": 5.8062090633184e-06, "loss": 0.1892, "step": 8187 }, { "epoch": 0.6486829075064369, "grad_norm": 1.6267354181361393, "learning_rate": 5.803879729064195e-06, "loss": 0.1243, "step": 8188 }, { "epoch": 0.6487621311150723, "grad_norm": 1.92601166141222, "learning_rate": 5.801550671104319e-06, "loss": 0.2215, "step": 8189 }, { "epoch": 0.6488413547237076, "grad_norm": 0.9616011594442138, "learning_rate": 5.7992218895921256e-06, "loss": 0.0962, "step": 8190 }, { "epoch": 0.6489205783323431, "grad_norm": 1.2962036900258442, "learning_rate": 5.796893384680964e-06, "loss": 0.1686, "step": 8191 }, { "epoch": 0.6489998019409784, "grad_norm": 1.3376939103189183, "learning_rate": 5.7945651565241455e-06, "loss": 0.1626, "step": 8192 }, { "epoch": 0.6490790255496138, "grad_norm": 1.7175677058121748, "learning_rate": 5.792237205274974e-06, "loss": 0.1887, "step": 8193 }, { "epoch": 0.6491582491582492, "grad_norm": 1.7276408709061848, "learning_rate": 5.789909531086741e-06, "loss": 0.262, "step": 8194 }, { "epoch": 0.6492374727668845, "grad_norm": 1.5565591296273464, "learning_rate": 5.787582134112706e-06, "loss": 0.1948, "step": 8195 }, { "epoch": 0.6493166963755199, "grad_norm": 1.4046768935070544, "learning_rate": 5.785255014506115e-06, "loss": 0.1559, "step": 8196 }, { "epoch": 0.6493959199841552, "grad_norm": 1.884874702634769, "learning_rate": 5.782928172420206e-06, "loss": 0.1955, "step": 8197 }, { "epoch": 0.6494751435927907, "grad_norm": 1.7392460271953147, "learning_rate": 5.780601608008185e-06, "loss": 0.2122, "step": 8198 }, { "epoch": 0.649554367201426, "grad_norm": 1.547346264372615, "learning_rate": 5.778275321423241e-06, "loss": 0.2563, "step": 8199 }, { "epoch": 0.6496335908100614, "grad_norm": 1.71903547514637, "learning_rate": 5.7759493128185584e-06, "loss": 0.2214, "step": 8200 }, { "epoch": 0.6497128144186968, "grad_norm": 2.024562339691979, "learning_rate": 5.773623582347289e-06, "loss": 0.2489, "step": 8201 }, { "epoch": 0.6497920380273321, "grad_norm": 1.7269643699186403, "learning_rate": 5.77129813016257e-06, "loss": 0.2683, "step": 8202 }, { "epoch": 0.6498712616359675, "grad_norm": 1.4449158929677628, "learning_rate": 5.768972956417518e-06, "loss": 0.2212, "step": 8203 }, { "epoch": 0.6499504852446029, "grad_norm": 1.4062302772332789, "learning_rate": 5.766648061265242e-06, "loss": 0.1284, "step": 8204 }, { "epoch": 0.6500297088532383, "grad_norm": 1.5568032544909487, "learning_rate": 5.764323444858823e-06, "loss": 0.1682, "step": 8205 }, { "epoch": 0.6501089324618736, "grad_norm": 2.0939044448197035, "learning_rate": 5.761999107351319e-06, "loss": 0.2166, "step": 8206 }, { "epoch": 0.6501881560705091, "grad_norm": 1.535057755453949, "learning_rate": 5.759675048895785e-06, "loss": 0.2084, "step": 8207 }, { "epoch": 0.6502673796791444, "grad_norm": 2.0202311168534464, "learning_rate": 5.757351269645248e-06, "loss": 0.3489, "step": 8208 }, { "epoch": 0.6503466032877797, "grad_norm": 1.3078476275634898, "learning_rate": 5.75502776975271e-06, "loss": 0.1575, "step": 8209 }, { "epoch": 0.6504258268964151, "grad_norm": 1.5249387928311076, "learning_rate": 5.752704549371173e-06, "loss": 0.1745, "step": 8210 }, { "epoch": 0.6505050505050505, "grad_norm": 1.6182043081195674, "learning_rate": 5.750381608653605e-06, "loss": 0.2483, "step": 8211 }, { "epoch": 0.6505842741136859, "grad_norm": 1.3265089391079636, "learning_rate": 5.748058947752955e-06, "loss": 0.1242, "step": 8212 }, { "epoch": 0.6506634977223212, "grad_norm": 1.1294755049317864, "learning_rate": 5.745736566822169e-06, "loss": 0.1405, "step": 8213 }, { "epoch": 0.6507427213309567, "grad_norm": 1.5363640615073666, "learning_rate": 5.743414466014159e-06, "loss": 0.2634, "step": 8214 }, { "epoch": 0.650821944939592, "grad_norm": 1.7286602224365233, "learning_rate": 5.7410926454818265e-06, "loss": 0.2692, "step": 8215 }, { "epoch": 0.6509011685482273, "grad_norm": 1.4254908976240517, "learning_rate": 5.738771105378046e-06, "loss": 0.1405, "step": 8216 }, { "epoch": 0.6509803921568628, "grad_norm": 1.7378379782088345, "learning_rate": 5.7364498458556914e-06, "loss": 0.2609, "step": 8217 }, { "epoch": 0.6510596157654981, "grad_norm": 1.7205630807578267, "learning_rate": 5.734128867067593e-06, "loss": 0.2305, "step": 8218 }, { "epoch": 0.6511388393741335, "grad_norm": 1.4647885330041155, "learning_rate": 5.731808169166586e-06, "loss": 0.1838, "step": 8219 }, { "epoch": 0.6512180629827689, "grad_norm": 1.6481536694466383, "learning_rate": 5.7294877523054735e-06, "loss": 0.16, "step": 8220 }, { "epoch": 0.6512972865914043, "grad_norm": 1.589465223200507, "learning_rate": 5.727167616637042e-06, "loss": 0.2151, "step": 8221 }, { "epoch": 0.6513765102000396, "grad_norm": 1.8164892706488056, "learning_rate": 5.7248477623140655e-06, "loss": 0.2106, "step": 8222 }, { "epoch": 0.6514557338086749, "grad_norm": 1.326792198538502, "learning_rate": 5.722528189489294e-06, "loss": 0.144, "step": 8223 }, { "epoch": 0.6515349574173104, "grad_norm": 1.505013969938144, "learning_rate": 5.720208898315454e-06, "loss": 0.2383, "step": 8224 }, { "epoch": 0.6516141810259457, "grad_norm": 2.0294958874903655, "learning_rate": 5.717889888945271e-06, "loss": 0.2747, "step": 8225 }, { "epoch": 0.6516934046345811, "grad_norm": 1.2296416167317286, "learning_rate": 5.715571161531433e-06, "loss": 0.1342, "step": 8226 }, { "epoch": 0.6517726282432165, "grad_norm": 1.6248808580840808, "learning_rate": 5.7132527162266194e-06, "loss": 0.2235, "step": 8227 }, { "epoch": 0.6518518518518519, "grad_norm": 1.3883865823436097, "learning_rate": 5.710934553183484e-06, "loss": 0.2057, "step": 8228 }, { "epoch": 0.6519310754604872, "grad_norm": 1.5659566720195393, "learning_rate": 5.708616672554675e-06, "loss": 0.2167, "step": 8229 }, { "epoch": 0.6520102990691226, "grad_norm": 1.5946119236074876, "learning_rate": 5.7062990744928086e-06, "loss": 0.2286, "step": 8230 }, { "epoch": 0.652089522677758, "grad_norm": 1.6035955072047159, "learning_rate": 5.703981759150483e-06, "loss": 0.1704, "step": 8231 }, { "epoch": 0.6521687462863933, "grad_norm": 1.2107460366565461, "learning_rate": 5.701664726680294e-06, "loss": 0.1186, "step": 8232 }, { "epoch": 0.6522479698950288, "grad_norm": 1.5346477418657334, "learning_rate": 5.699347977234799e-06, "loss": 0.1731, "step": 8233 }, { "epoch": 0.6523271935036641, "grad_norm": 1.2114196624198978, "learning_rate": 5.697031510966542e-06, "loss": 0.1235, "step": 8234 }, { "epoch": 0.6524064171122995, "grad_norm": 1.5176144109473357, "learning_rate": 5.69471532802806e-06, "loss": 0.1862, "step": 8235 }, { "epoch": 0.6524856407209348, "grad_norm": 2.258782924325125, "learning_rate": 5.692399428571857e-06, "loss": 0.2012, "step": 8236 }, { "epoch": 0.6525648643295702, "grad_norm": 2.051295655870663, "learning_rate": 5.690083812750422e-06, "loss": 0.158, "step": 8237 }, { "epoch": 0.6526440879382056, "grad_norm": 1.4422505800086367, "learning_rate": 5.687768480716233e-06, "loss": 0.2302, "step": 8238 }, { "epoch": 0.6527233115468409, "grad_norm": 1.4627107526879832, "learning_rate": 5.685453432621741e-06, "loss": 0.1629, "step": 8239 }, { "epoch": 0.6528025351554764, "grad_norm": 1.8691078174129367, "learning_rate": 5.683138668619381e-06, "loss": 0.1945, "step": 8240 }, { "epoch": 0.6528817587641117, "grad_norm": 1.5677957642832823, "learning_rate": 5.680824188861564e-06, "loss": 0.1172, "step": 8241 }, { "epoch": 0.6529609823727471, "grad_norm": 1.7956218253747118, "learning_rate": 5.678509993500695e-06, "loss": 0.1411, "step": 8242 }, { "epoch": 0.6530402059813825, "grad_norm": 1.8814500390160878, "learning_rate": 5.676196082689149e-06, "loss": 0.2051, "step": 8243 }, { "epoch": 0.6531194295900178, "grad_norm": 1.6182176307545273, "learning_rate": 5.673882456579282e-06, "loss": 0.1673, "step": 8244 }, { "epoch": 0.6531986531986532, "grad_norm": 1.4898042752183114, "learning_rate": 5.6715691153234445e-06, "loss": 0.2146, "step": 8245 }, { "epoch": 0.6532778768072885, "grad_norm": 1.2612371101175517, "learning_rate": 5.669256059073953e-06, "loss": 0.113, "step": 8246 }, { "epoch": 0.653357100415924, "grad_norm": 1.501050117884269, "learning_rate": 5.666943287983106e-06, "loss": 0.219, "step": 8247 }, { "epoch": 0.6534363240245593, "grad_norm": 2.567253359423979, "learning_rate": 5.664630802203201e-06, "loss": 0.2652, "step": 8248 }, { "epoch": 0.6535155476331946, "grad_norm": 1.7313811344018724, "learning_rate": 5.662318601886496e-06, "loss": 0.2632, "step": 8249 }, { "epoch": 0.6535947712418301, "grad_norm": 1.8437192287823179, "learning_rate": 5.660006687185235e-06, "loss": 0.2027, "step": 8250 }, { "epoch": 0.6536739948504654, "grad_norm": 1.7258103204043658, "learning_rate": 5.657695058251656e-06, "loss": 0.2712, "step": 8251 }, { "epoch": 0.6537532184591008, "grad_norm": 1.7009843071513742, "learning_rate": 5.655383715237963e-06, "loss": 0.2087, "step": 8252 }, { "epoch": 0.6538324420677362, "grad_norm": 1.3926597375631904, "learning_rate": 5.653072658296344e-06, "loss": 0.1345, "step": 8253 }, { "epoch": 0.6539116656763716, "grad_norm": 1.680716453565513, "learning_rate": 5.650761887578977e-06, "loss": 0.2294, "step": 8254 }, { "epoch": 0.6539908892850069, "grad_norm": 1.6853990228446765, "learning_rate": 5.648451403238013e-06, "loss": 0.2854, "step": 8255 }, { "epoch": 0.6540701128936423, "grad_norm": 1.5846803940244052, "learning_rate": 5.646141205425586e-06, "loss": 0.1508, "step": 8256 }, { "epoch": 0.6541493365022777, "grad_norm": 1.426951628329609, "learning_rate": 5.643831294293808e-06, "loss": 0.1719, "step": 8257 }, { "epoch": 0.654228560110913, "grad_norm": 1.5752766927968165, "learning_rate": 5.641521669994782e-06, "loss": 0.185, "step": 8258 }, { "epoch": 0.6543077837195485, "grad_norm": 1.7943956435528206, "learning_rate": 5.639212332680581e-06, "loss": 0.1895, "step": 8259 }, { "epoch": 0.6543870073281838, "grad_norm": 1.443419387285084, "learning_rate": 5.636903282503263e-06, "loss": 0.1647, "step": 8260 }, { "epoch": 0.6544662309368192, "grad_norm": 1.6627029591077733, "learning_rate": 5.6345945196148734e-06, "loss": 0.2123, "step": 8261 }, { "epoch": 0.6545454545454545, "grad_norm": 1.2083913394737136, "learning_rate": 5.63228604416743e-06, "loss": 0.0895, "step": 8262 }, { "epoch": 0.6546246781540899, "grad_norm": 1.7234141931210247, "learning_rate": 5.62997785631293e-06, "loss": 0.1781, "step": 8263 }, { "epoch": 0.6547039017627253, "grad_norm": 1.3367523194438058, "learning_rate": 5.627669956203365e-06, "loss": 0.1369, "step": 8264 }, { "epoch": 0.6547831253713606, "grad_norm": 1.7990404996814022, "learning_rate": 5.6253623439906955e-06, "loss": 0.3385, "step": 8265 }, { "epoch": 0.6548623489799961, "grad_norm": 1.5872115950775798, "learning_rate": 5.623055019826862e-06, "loss": 0.2337, "step": 8266 }, { "epoch": 0.6549415725886314, "grad_norm": 2.0680215129891235, "learning_rate": 5.6207479838637995e-06, "loss": 0.155, "step": 8267 }, { "epoch": 0.6550207961972668, "grad_norm": 1.4252105373700081, "learning_rate": 5.618441236253411e-06, "loss": 0.1609, "step": 8268 }, { "epoch": 0.6551000198059022, "grad_norm": 1.6338568759994834, "learning_rate": 5.616134777147578e-06, "loss": 0.2116, "step": 8269 }, { "epoch": 0.6551792434145375, "grad_norm": 1.8941688066899198, "learning_rate": 5.6138286066981815e-06, "loss": 0.2034, "step": 8270 }, { "epoch": 0.6552584670231729, "grad_norm": 1.718581445993617, "learning_rate": 5.611522725057067e-06, "loss": 0.2025, "step": 8271 }, { "epoch": 0.6553376906318082, "grad_norm": 1.4140951727805453, "learning_rate": 5.6092171323760635e-06, "loss": 0.2221, "step": 8272 }, { "epoch": 0.6554169142404437, "grad_norm": 1.4434099805442253, "learning_rate": 5.6069118288069824e-06, "loss": 0.1456, "step": 8273 }, { "epoch": 0.655496137849079, "grad_norm": 1.554167909055817, "learning_rate": 5.604606814501623e-06, "loss": 0.1654, "step": 8274 }, { "epoch": 0.6555753614577144, "grad_norm": 1.308334470915961, "learning_rate": 5.602302089611755e-06, "loss": 0.108, "step": 8275 }, { "epoch": 0.6556545850663498, "grad_norm": 1.360120676151067, "learning_rate": 5.599997654289129e-06, "loss": 0.1583, "step": 8276 }, { "epoch": 0.6557338086749851, "grad_norm": 1.8632749560870947, "learning_rate": 5.5976935086854914e-06, "loss": 0.262, "step": 8277 }, { "epoch": 0.6558130322836205, "grad_norm": 1.3594113781132353, "learning_rate": 5.595389652952555e-06, "loss": 0.1823, "step": 8278 }, { "epoch": 0.6558922558922559, "grad_norm": 1.262093241615721, "learning_rate": 5.59308608724201e-06, "loss": 0.1383, "step": 8279 }, { "epoch": 0.6559714795008913, "grad_norm": 1.6542865620802023, "learning_rate": 5.590782811705547e-06, "loss": 0.1484, "step": 8280 }, { "epoch": 0.6560507031095266, "grad_norm": 1.7750792881244633, "learning_rate": 5.588479826494817e-06, "loss": 0.1872, "step": 8281 }, { "epoch": 0.6561299267181621, "grad_norm": 1.381334562936964, "learning_rate": 5.5861771317614624e-06, "loss": 0.1633, "step": 8282 }, { "epoch": 0.6562091503267974, "grad_norm": 1.5196390700062017, "learning_rate": 5.583874727657109e-06, "loss": 0.1739, "step": 8283 }, { "epoch": 0.6562883739354327, "grad_norm": 1.226429081274975, "learning_rate": 5.581572614333356e-06, "loss": 0.1241, "step": 8284 }, { "epoch": 0.6563675975440681, "grad_norm": 1.538723332684617, "learning_rate": 5.579270791941787e-06, "loss": 0.1281, "step": 8285 }, { "epoch": 0.6564468211527035, "grad_norm": 1.862194889943743, "learning_rate": 5.5769692606339584e-06, "loss": 0.2415, "step": 8286 }, { "epoch": 0.6565260447613389, "grad_norm": 1.6825072547604767, "learning_rate": 5.574668020561428e-06, "loss": 0.1878, "step": 8287 }, { "epoch": 0.6566052683699742, "grad_norm": 1.5372704747108141, "learning_rate": 5.572367071875715e-06, "loss": 0.1954, "step": 8288 }, { "epoch": 0.6566844919786097, "grad_norm": 2.1053733020299146, "learning_rate": 5.570066414728321e-06, "loss": 0.2638, "step": 8289 }, { "epoch": 0.656763715587245, "grad_norm": 1.1772582933802798, "learning_rate": 5.567766049270742e-06, "loss": 0.1342, "step": 8290 }, { "epoch": 0.6568429391958803, "grad_norm": 1.2301156329721834, "learning_rate": 5.5654659756544425e-06, "loss": 0.1527, "step": 8291 }, { "epoch": 0.6569221628045158, "grad_norm": 1.6200880575345542, "learning_rate": 5.563166194030868e-06, "loss": 0.1969, "step": 8292 }, { "epoch": 0.6570013864131511, "grad_norm": 2.293687925661075, "learning_rate": 5.560866704551454e-06, "loss": 0.3003, "step": 8293 }, { "epoch": 0.6570806100217865, "grad_norm": 1.6941611386529323, "learning_rate": 5.5585675073676085e-06, "loss": 0.127, "step": 8294 }, { "epoch": 0.6571598336304219, "grad_norm": 1.2594594997771908, "learning_rate": 5.556268602630721e-06, "loss": 0.1857, "step": 8295 }, { "epoch": 0.6572390572390573, "grad_norm": 1.3723528282572734, "learning_rate": 5.553969990492164e-06, "loss": 0.1511, "step": 8296 }, { "epoch": 0.6573182808476926, "grad_norm": 1.3414673081936097, "learning_rate": 5.5516716711032906e-06, "loss": 0.1842, "step": 8297 }, { "epoch": 0.6573975044563279, "grad_norm": 1.7230731338225382, "learning_rate": 5.54937364461543e-06, "loss": 0.2817, "step": 8298 }, { "epoch": 0.6574767280649634, "grad_norm": 1.7529165224556071, "learning_rate": 5.547075911179902e-06, "loss": 0.2023, "step": 8299 }, { "epoch": 0.6575559516735987, "grad_norm": 1.4044143302093148, "learning_rate": 5.544778470948001e-06, "loss": 0.1699, "step": 8300 }, { "epoch": 0.6576351752822341, "grad_norm": 1.529663986852082, "learning_rate": 5.542481324070996e-06, "loss": 0.2502, "step": 8301 }, { "epoch": 0.6577143988908695, "grad_norm": 1.281111555356486, "learning_rate": 5.540184470700152e-06, "loss": 0.1496, "step": 8302 }, { "epoch": 0.6577936224995049, "grad_norm": 1.4695873417340555, "learning_rate": 5.537887910986701e-06, "loss": 0.2307, "step": 8303 }, { "epoch": 0.6578728461081402, "grad_norm": 2.2157231188358044, "learning_rate": 5.535591645081857e-06, "loss": 0.2534, "step": 8304 }, { "epoch": 0.6579520697167756, "grad_norm": 1.5163328286495172, "learning_rate": 5.5332956731368245e-06, "loss": 0.1811, "step": 8305 }, { "epoch": 0.658031293325411, "grad_norm": 1.2490733694337601, "learning_rate": 5.530999995302781e-06, "loss": 0.1601, "step": 8306 }, { "epoch": 0.6581105169340463, "grad_norm": 1.2071918252900604, "learning_rate": 5.528704611730879e-06, "loss": 0.1427, "step": 8307 }, { "epoch": 0.6581897405426818, "grad_norm": 1.4515794317796984, "learning_rate": 5.5264095225722705e-06, "loss": 0.2078, "step": 8308 }, { "epoch": 0.6582689641513171, "grad_norm": 1.3364392852096108, "learning_rate": 5.524114727978067e-06, "loss": 0.155, "step": 8309 }, { "epoch": 0.6583481877599525, "grad_norm": 1.0463251475146411, "learning_rate": 5.5218202280993725e-06, "loss": 0.095, "step": 8310 }, { "epoch": 0.6584274113685878, "grad_norm": 1.2098217240018057, "learning_rate": 5.519526023087265e-06, "loss": 0.1147, "step": 8311 }, { "epoch": 0.6585066349772232, "grad_norm": 1.5521991050920934, "learning_rate": 5.517232113092814e-06, "loss": 0.1669, "step": 8312 }, { "epoch": 0.6585858585858586, "grad_norm": 1.7277931955754604, "learning_rate": 5.5149384982670585e-06, "loss": 0.2507, "step": 8313 }, { "epoch": 0.6586650821944939, "grad_norm": 1.398993685418058, "learning_rate": 5.512645178761018e-06, "loss": 0.1573, "step": 8314 }, { "epoch": 0.6587443058031294, "grad_norm": 2.4732740187214763, "learning_rate": 5.5103521547257045e-06, "loss": 0.1874, "step": 8315 }, { "epoch": 0.6588235294117647, "grad_norm": 1.3552063616391397, "learning_rate": 5.508059426312099e-06, "loss": 0.1867, "step": 8316 }, { "epoch": 0.6589027530204001, "grad_norm": 1.2871837519881852, "learning_rate": 5.5057669936711625e-06, "loss": 0.1181, "step": 8317 }, { "epoch": 0.6589819766290355, "grad_norm": 1.26733278893884, "learning_rate": 5.503474856953849e-06, "loss": 0.1507, "step": 8318 }, { "epoch": 0.6590612002376708, "grad_norm": 1.8799595411547216, "learning_rate": 5.50118301631108e-06, "loss": 0.2174, "step": 8319 }, { "epoch": 0.6591404238463062, "grad_norm": 1.4596648033533681, "learning_rate": 5.498891471893758e-06, "loss": 0.2096, "step": 8320 }, { "epoch": 0.6592196474549415, "grad_norm": 1.5792234032728056, "learning_rate": 5.49660022385278e-06, "loss": 0.2104, "step": 8321 }, { "epoch": 0.659298871063577, "grad_norm": 1.5745039753729293, "learning_rate": 5.494309272339007e-06, "loss": 0.1684, "step": 8322 }, { "epoch": 0.6593780946722123, "grad_norm": 1.066572703120052, "learning_rate": 5.492018617503284e-06, "loss": 0.1083, "step": 8323 }, { "epoch": 0.6594573182808476, "grad_norm": 1.3584827431389204, "learning_rate": 5.48972825949645e-06, "loss": 0.1688, "step": 8324 }, { "epoch": 0.6595365418894831, "grad_norm": 1.6090902305800052, "learning_rate": 5.487438198469306e-06, "loss": 0.2315, "step": 8325 }, { "epoch": 0.6596157654981184, "grad_norm": 1.9324663753762454, "learning_rate": 5.485148434572645e-06, "loss": 0.2677, "step": 8326 }, { "epoch": 0.6596949891067538, "grad_norm": 1.7636580760046552, "learning_rate": 5.48285896795723e-06, "loss": 0.3376, "step": 8327 }, { "epoch": 0.6597742127153892, "grad_norm": 1.7047806139347148, "learning_rate": 5.480569798773822e-06, "loss": 0.2314, "step": 8328 }, { "epoch": 0.6598534363240246, "grad_norm": 1.4507597322000743, "learning_rate": 5.478280927173145e-06, "loss": 0.1693, "step": 8329 }, { "epoch": 0.6599326599326599, "grad_norm": 1.4116253261484475, "learning_rate": 5.4759923533059105e-06, "loss": 0.2657, "step": 8330 }, { "epoch": 0.6600118835412953, "grad_norm": 1.3597514182091908, "learning_rate": 5.473704077322814e-06, "loss": 0.1797, "step": 8331 }, { "epoch": 0.6600911071499307, "grad_norm": 1.5648248443606272, "learning_rate": 5.471416099374525e-06, "loss": 0.1506, "step": 8332 }, { "epoch": 0.660170330758566, "grad_norm": 1.534842050700167, "learning_rate": 5.469128419611691e-06, "loss": 0.1571, "step": 8333 }, { "epoch": 0.6602495543672015, "grad_norm": 1.298555509051071, "learning_rate": 5.466841038184954e-06, "loss": 0.126, "step": 8334 }, { "epoch": 0.6603287779758368, "grad_norm": 1.5335731138737194, "learning_rate": 5.464553955244922e-06, "loss": 0.1817, "step": 8335 }, { "epoch": 0.6604080015844722, "grad_norm": 2.0217865644026936, "learning_rate": 5.4622671709421856e-06, "loss": 0.2089, "step": 8336 }, { "epoch": 0.6604872251931075, "grad_norm": 1.5634586385554383, "learning_rate": 5.459980685427326e-06, "loss": 0.2003, "step": 8337 }, { "epoch": 0.6605664488017429, "grad_norm": 1.4022026133342103, "learning_rate": 5.457694498850892e-06, "loss": 0.1585, "step": 8338 }, { "epoch": 0.6606456724103783, "grad_norm": 1.4575779322529085, "learning_rate": 5.455408611363416e-06, "loss": 0.1778, "step": 8339 }, { "epoch": 0.6607248960190136, "grad_norm": 1.5424267678030532, "learning_rate": 5.45312302311542e-06, "loss": 0.1827, "step": 8340 }, { "epoch": 0.6608041196276491, "grad_norm": 1.5607616711418228, "learning_rate": 5.450837734257395e-06, "loss": 0.177, "step": 8341 }, { "epoch": 0.6608833432362844, "grad_norm": 1.3096820485429626, "learning_rate": 5.448552744939815e-06, "loss": 0.1646, "step": 8342 }, { "epoch": 0.6609625668449198, "grad_norm": 1.364758389756013, "learning_rate": 5.446268055313132e-06, "loss": 0.1554, "step": 8343 }, { "epoch": 0.6610417904535552, "grad_norm": 1.26670137609939, "learning_rate": 5.443983665527792e-06, "loss": 0.2004, "step": 8344 }, { "epoch": 0.6611210140621905, "grad_norm": 1.5742824408510516, "learning_rate": 5.441699575734204e-06, "loss": 0.2143, "step": 8345 }, { "epoch": 0.6612002376708259, "grad_norm": 1.4884088168282303, "learning_rate": 5.439415786082762e-06, "loss": 0.1784, "step": 8346 }, { "epoch": 0.6612794612794612, "grad_norm": 1.5024291816741382, "learning_rate": 5.437132296723852e-06, "loss": 0.1437, "step": 8347 }, { "epoch": 0.6613586848880967, "grad_norm": 1.3772197022212198, "learning_rate": 5.434849107807823e-06, "loss": 0.1261, "step": 8348 }, { "epoch": 0.661437908496732, "grad_norm": 1.5384801302512507, "learning_rate": 5.432566219485012e-06, "loss": 0.2087, "step": 8349 }, { "epoch": 0.6615171321053674, "grad_norm": 1.4540796280258301, "learning_rate": 5.430283631905742e-06, "loss": 0.2003, "step": 8350 }, { "epoch": 0.6615963557140028, "grad_norm": 1.4486425732561794, "learning_rate": 5.428001345220306e-06, "loss": 0.1287, "step": 8351 }, { "epoch": 0.6616755793226381, "grad_norm": 1.4328318261930029, "learning_rate": 5.425719359578978e-06, "loss": 0.1559, "step": 8352 }, { "epoch": 0.6617548029312735, "grad_norm": 1.8826867500867754, "learning_rate": 5.423437675132025e-06, "loss": 0.2616, "step": 8353 }, { "epoch": 0.6618340265399089, "grad_norm": 1.5010281861733883, "learning_rate": 5.42115629202968e-06, "loss": 0.211, "step": 8354 }, { "epoch": 0.6619132501485443, "grad_norm": 1.467573700362327, "learning_rate": 5.4188752104221565e-06, "loss": 0.21, "step": 8355 }, { "epoch": 0.6619924737571796, "grad_norm": 1.4197586019676525, "learning_rate": 5.416594430459663e-06, "loss": 0.2292, "step": 8356 }, { "epoch": 0.6620716973658151, "grad_norm": 2.009264206716873, "learning_rate": 5.41431395229237e-06, "loss": 0.2649, "step": 8357 }, { "epoch": 0.6621509209744504, "grad_norm": 1.331529473981928, "learning_rate": 5.41203377607044e-06, "loss": 0.1584, "step": 8358 }, { "epoch": 0.6622301445830857, "grad_norm": 1.8404023642541216, "learning_rate": 5.409753901944006e-06, "loss": 0.2253, "step": 8359 }, { "epoch": 0.6623093681917211, "grad_norm": 1.5647360236328731, "learning_rate": 5.407474330063194e-06, "loss": 0.1901, "step": 8360 }, { "epoch": 0.6623885918003565, "grad_norm": 1.6595562334548706, "learning_rate": 5.4051950605781e-06, "loss": 0.1998, "step": 8361 }, { "epoch": 0.6624678154089919, "grad_norm": 1.5990882896869136, "learning_rate": 5.402916093638798e-06, "loss": 0.2028, "step": 8362 }, { "epoch": 0.6625470390176272, "grad_norm": 1.6378309146709, "learning_rate": 5.400637429395357e-06, "loss": 0.1957, "step": 8363 }, { "epoch": 0.6626262626262627, "grad_norm": 1.6828487593144763, "learning_rate": 5.398359067997808e-06, "loss": 0.2299, "step": 8364 }, { "epoch": 0.662705486234898, "grad_norm": 1.4290561703713485, "learning_rate": 5.3960810095961705e-06, "loss": 0.1934, "step": 8365 }, { "epoch": 0.6627847098435333, "grad_norm": 1.6431002619883153, "learning_rate": 5.39380325434045e-06, "loss": 0.1656, "step": 8366 }, { "epoch": 0.6628639334521688, "grad_norm": 1.5931842727217678, "learning_rate": 5.3915258023806195e-06, "loss": 0.2323, "step": 8367 }, { "epoch": 0.6629431570608041, "grad_norm": 1.4720821005836915, "learning_rate": 5.3892486538666386e-06, "loss": 0.1248, "step": 8368 }, { "epoch": 0.6630223806694395, "grad_norm": 1.4884125986257433, "learning_rate": 5.386971808948451e-06, "loss": 0.183, "step": 8369 }, { "epoch": 0.6631016042780749, "grad_norm": 6.621177502333374, "learning_rate": 5.384695267775975e-06, "loss": 0.2898, "step": 8370 }, { "epoch": 0.6631808278867103, "grad_norm": 1.8678937727996634, "learning_rate": 5.382419030499107e-06, "loss": 0.2177, "step": 8371 }, { "epoch": 0.6632600514953456, "grad_norm": 1.4177163884251203, "learning_rate": 5.380143097267723e-06, "loss": 0.162, "step": 8372 }, { "epoch": 0.6633392751039809, "grad_norm": 1.6491514939417578, "learning_rate": 5.377867468231695e-06, "loss": 0.1517, "step": 8373 }, { "epoch": 0.6634184987126164, "grad_norm": 1.3102944757942518, "learning_rate": 5.3755921435408464e-06, "loss": 0.1488, "step": 8374 }, { "epoch": 0.6634977223212517, "grad_norm": 1.3297498932773755, "learning_rate": 5.373317123345008e-06, "loss": 0.1735, "step": 8375 }, { "epoch": 0.6635769459298871, "grad_norm": 1.3075060533436977, "learning_rate": 5.371042407793974e-06, "loss": 0.1232, "step": 8376 }, { "epoch": 0.6636561695385225, "grad_norm": 1.6246313316613459, "learning_rate": 5.368767997037521e-06, "loss": 0.2386, "step": 8377 }, { "epoch": 0.6637353931471579, "grad_norm": 1.2445499720680815, "learning_rate": 5.366493891225415e-06, "loss": 0.1381, "step": 8378 }, { "epoch": 0.6638146167557932, "grad_norm": 1.2485231242478856, "learning_rate": 5.3642200905073914e-06, "loss": 0.1775, "step": 8379 }, { "epoch": 0.6638938403644286, "grad_norm": 1.429042663051072, "learning_rate": 5.361946595033165e-06, "loss": 0.1709, "step": 8380 }, { "epoch": 0.663973063973064, "grad_norm": 1.4968637694075029, "learning_rate": 5.359673404952442e-06, "loss": 0.183, "step": 8381 }, { "epoch": 0.6640522875816993, "grad_norm": 1.3586940217272707, "learning_rate": 5.357400520414898e-06, "loss": 0.221, "step": 8382 }, { "epoch": 0.6641315111903348, "grad_norm": 1.44246050573437, "learning_rate": 5.355127941570191e-06, "loss": 0.1813, "step": 8383 }, { "epoch": 0.6642107347989701, "grad_norm": 1.1793195336546596, "learning_rate": 5.352855668567956e-06, "loss": 0.0759, "step": 8384 }, { "epoch": 0.6642899584076055, "grad_norm": 1.8065011852859472, "learning_rate": 5.350583701557816e-06, "loss": 0.2097, "step": 8385 }, { "epoch": 0.6643691820162408, "grad_norm": 1.4303164112575943, "learning_rate": 5.348312040689369e-06, "loss": 0.1464, "step": 8386 }, { "epoch": 0.6644484056248762, "grad_norm": 1.3858884267602292, "learning_rate": 5.346040686112189e-06, "loss": 0.1678, "step": 8387 }, { "epoch": 0.6645276292335116, "grad_norm": 1.7496808836607711, "learning_rate": 5.34376963797584e-06, "loss": 0.1958, "step": 8388 }, { "epoch": 0.6646068528421469, "grad_norm": 1.632181402126805, "learning_rate": 5.3414988964298555e-06, "loss": 0.2187, "step": 8389 }, { "epoch": 0.6646860764507824, "grad_norm": 1.6491838932869427, "learning_rate": 5.3392284616237486e-06, "loss": 0.1495, "step": 8390 }, { "epoch": 0.6647653000594177, "grad_norm": 1.366183215607451, "learning_rate": 5.336958333707026e-06, "loss": 0.1416, "step": 8391 }, { "epoch": 0.6648445236680531, "grad_norm": 1.8062614768417506, "learning_rate": 5.33468851282916e-06, "loss": 0.194, "step": 8392 }, { "epoch": 0.6649237472766885, "grad_norm": 1.7222648244174699, "learning_rate": 5.332418999139604e-06, "loss": 0.196, "step": 8393 }, { "epoch": 0.6650029708853238, "grad_norm": 1.3896866463691977, "learning_rate": 5.330149792787801e-06, "loss": 0.1467, "step": 8394 }, { "epoch": 0.6650821944939592, "grad_norm": 2.078525204593878, "learning_rate": 5.3278808939231654e-06, "loss": 0.2262, "step": 8395 }, { "epoch": 0.6651614181025945, "grad_norm": 1.8430189050471089, "learning_rate": 5.32561230269509e-06, "loss": 0.1865, "step": 8396 }, { "epoch": 0.66524064171123, "grad_norm": 1.6271318672443138, "learning_rate": 5.32334401925295e-06, "loss": 0.1958, "step": 8397 }, { "epoch": 0.6653198653198653, "grad_norm": 1.5949078852329102, "learning_rate": 5.321076043746108e-06, "loss": 0.2202, "step": 8398 }, { "epoch": 0.6653990889285007, "grad_norm": 1.7590541902497947, "learning_rate": 5.318808376323895e-06, "loss": 0.2365, "step": 8399 }, { "epoch": 0.6654783125371361, "grad_norm": 1.8846674449433654, "learning_rate": 5.316541017135622e-06, "loss": 0.1719, "step": 8400 }, { "epoch": 0.6655575361457714, "grad_norm": 1.5738869492393905, "learning_rate": 5.314273966330591e-06, "loss": 0.1782, "step": 8401 }, { "epoch": 0.6656367597544068, "grad_norm": 1.808684709339619, "learning_rate": 5.3120072240580735e-06, "loss": 0.2737, "step": 8402 }, { "epoch": 0.6657159833630422, "grad_norm": 1.429516849546963, "learning_rate": 5.309740790467319e-06, "loss": 0.1813, "step": 8403 }, { "epoch": 0.6657952069716776, "grad_norm": 1.6111987538637318, "learning_rate": 5.307474665707569e-06, "loss": 0.1756, "step": 8404 }, { "epoch": 0.6658744305803129, "grad_norm": 1.237369458151828, "learning_rate": 5.305208849928034e-06, "loss": 0.1032, "step": 8405 }, { "epoch": 0.6659536541889483, "grad_norm": 1.9310984511875209, "learning_rate": 5.302943343277902e-06, "loss": 0.2288, "step": 8406 }, { "epoch": 0.6660328777975837, "grad_norm": 1.8574180080224556, "learning_rate": 5.300678145906354e-06, "loss": 0.2493, "step": 8407 }, { "epoch": 0.666112101406219, "grad_norm": 1.5272791552720995, "learning_rate": 5.298413257962538e-06, "loss": 0.1952, "step": 8408 }, { "epoch": 0.6661913250148545, "grad_norm": 1.4365339418974972, "learning_rate": 5.296148679595583e-06, "loss": 0.1676, "step": 8409 }, { "epoch": 0.6662705486234898, "grad_norm": 1.373422826464152, "learning_rate": 5.293884410954608e-06, "loss": 0.1653, "step": 8410 }, { "epoch": 0.6663497722321252, "grad_norm": 1.2286778427976257, "learning_rate": 5.291620452188699e-06, "loss": 0.1301, "step": 8411 }, { "epoch": 0.6664289958407605, "grad_norm": 1.7126344463740855, "learning_rate": 5.28935680344693e-06, "loss": 0.1142, "step": 8412 }, { "epoch": 0.6665082194493959, "grad_norm": 1.4778085301208934, "learning_rate": 5.287093464878343e-06, "loss": 0.1761, "step": 8413 }, { "epoch": 0.6665874430580313, "grad_norm": 1.6638324173461492, "learning_rate": 5.28483043663198e-06, "loss": 0.221, "step": 8414 }, { "epoch": 0.6666666666666666, "grad_norm": 1.4632197780577147, "learning_rate": 5.282567718856845e-06, "loss": 0.1946, "step": 8415 }, { "epoch": 0.6667458902753021, "grad_norm": 2.0202293446375528, "learning_rate": 5.280305311701921e-06, "loss": 0.2782, "step": 8416 }, { "epoch": 0.6668251138839374, "grad_norm": 1.6224498219175085, "learning_rate": 5.278043215316189e-06, "loss": 0.1863, "step": 8417 }, { "epoch": 0.6669043374925728, "grad_norm": 1.758034308953991, "learning_rate": 5.275781429848589e-06, "loss": 0.2892, "step": 8418 }, { "epoch": 0.6669835611012082, "grad_norm": 1.5396529438966589, "learning_rate": 5.273519955448047e-06, "loss": 0.1406, "step": 8419 }, { "epoch": 0.6670627847098435, "grad_norm": 1.3633056986651022, "learning_rate": 5.271258792263476e-06, "loss": 0.1103, "step": 8420 }, { "epoch": 0.6671420083184789, "grad_norm": 1.3332009559296645, "learning_rate": 5.268997940443762e-06, "loss": 0.1674, "step": 8421 }, { "epoch": 0.6672212319271142, "grad_norm": 1.1836574719363169, "learning_rate": 5.266737400137765e-06, "loss": 0.1451, "step": 8422 }, { "epoch": 0.6673004555357497, "grad_norm": 1.7315160507975944, "learning_rate": 5.26447717149434e-06, "loss": 0.2652, "step": 8423 }, { "epoch": 0.667379679144385, "grad_norm": 1.5924275843617288, "learning_rate": 5.2622172546623055e-06, "loss": 0.2263, "step": 8424 }, { "epoch": 0.6674589027530204, "grad_norm": 1.294274316531856, "learning_rate": 5.259957649790466e-06, "loss": 0.1423, "step": 8425 }, { "epoch": 0.6675381263616558, "grad_norm": 1.6395766360027566, "learning_rate": 5.257698357027609e-06, "loss": 0.1958, "step": 8426 }, { "epoch": 0.6676173499702911, "grad_norm": 1.576687506269694, "learning_rate": 5.2554393765225e-06, "loss": 0.1833, "step": 8427 }, { "epoch": 0.6676965735789265, "grad_norm": 1.3230966278515903, "learning_rate": 5.253180708423877e-06, "loss": 0.1677, "step": 8428 }, { "epoch": 0.6677757971875619, "grad_norm": 1.9005386966393554, "learning_rate": 5.25092235288046e-06, "loss": 0.2465, "step": 8429 }, { "epoch": 0.6678550207961973, "grad_norm": 1.9547205849324198, "learning_rate": 5.248664310040958e-06, "loss": 0.3169, "step": 8430 }, { "epoch": 0.6679342444048326, "grad_norm": 1.7870250985226812, "learning_rate": 5.246406580054051e-06, "loss": 0.2031, "step": 8431 }, { "epoch": 0.6680134680134681, "grad_norm": 1.4253189701582614, "learning_rate": 5.244149163068394e-06, "loss": 0.2081, "step": 8432 }, { "epoch": 0.6680926916221034, "grad_norm": 1.803340127607196, "learning_rate": 5.241892059232634e-06, "loss": 0.2729, "step": 8433 }, { "epoch": 0.6681719152307387, "grad_norm": 1.4727999636960147, "learning_rate": 5.239635268695386e-06, "loss": 0.2445, "step": 8434 }, { "epoch": 0.6682511388393741, "grad_norm": 1.326679382717561, "learning_rate": 5.237378791605249e-06, "loss": 0.1238, "step": 8435 }, { "epoch": 0.6683303624480095, "grad_norm": 1.6343449249181874, "learning_rate": 5.235122628110805e-06, "loss": 0.1741, "step": 8436 }, { "epoch": 0.6684095860566449, "grad_norm": 1.6066480354377857, "learning_rate": 5.232866778360608e-06, "loss": 0.1658, "step": 8437 }, { "epoch": 0.6684888096652802, "grad_norm": 1.248186060559615, "learning_rate": 5.230611242503193e-06, "loss": 0.2099, "step": 8438 }, { "epoch": 0.6685680332739157, "grad_norm": 1.4626188130594877, "learning_rate": 5.228356020687082e-06, "loss": 0.1338, "step": 8439 }, { "epoch": 0.668647256882551, "grad_norm": 1.9456100702607746, "learning_rate": 5.226101113060769e-06, "loss": 0.3776, "step": 8440 }, { "epoch": 0.6687264804911863, "grad_norm": 1.358269659326838, "learning_rate": 5.223846519772722e-06, "loss": 0.1433, "step": 8441 }, { "epoch": 0.6688057040998218, "grad_norm": 1.1021769097165868, "learning_rate": 5.221592240971403e-06, "loss": 0.0847, "step": 8442 }, { "epoch": 0.6688849277084571, "grad_norm": 1.482518461292099, "learning_rate": 5.219338276805243e-06, "loss": 0.2636, "step": 8443 }, { "epoch": 0.6689641513170925, "grad_norm": 1.474678263545345, "learning_rate": 5.217084627422656e-06, "loss": 0.1746, "step": 8444 }, { "epoch": 0.6690433749257279, "grad_norm": 1.7854770861797706, "learning_rate": 5.214831292972027e-06, "loss": 0.2393, "step": 8445 }, { "epoch": 0.6691225985343633, "grad_norm": 1.571451543787154, "learning_rate": 5.212578273601738e-06, "loss": 0.2033, "step": 8446 }, { "epoch": 0.6692018221429986, "grad_norm": 1.4525476942226887, "learning_rate": 5.210325569460133e-06, "loss": 0.1512, "step": 8447 }, { "epoch": 0.6692810457516339, "grad_norm": 1.6151916703297589, "learning_rate": 5.208073180695538e-06, "loss": 0.2012, "step": 8448 }, { "epoch": 0.6693602693602694, "grad_norm": 1.5837921310521381, "learning_rate": 5.205821107456273e-06, "loss": 0.2181, "step": 8449 }, { "epoch": 0.6694394929689047, "grad_norm": 1.4985629769247983, "learning_rate": 5.203569349890618e-06, "loss": 0.1749, "step": 8450 }, { "epoch": 0.6695187165775401, "grad_norm": 1.4477125860789766, "learning_rate": 5.201317908146843e-06, "loss": 0.14, "step": 8451 }, { "epoch": 0.6695979401861755, "grad_norm": 1.2128356644954386, "learning_rate": 5.199066782373194e-06, "loss": 0.1041, "step": 8452 }, { "epoch": 0.6696771637948109, "grad_norm": 1.7380161149990438, "learning_rate": 5.196815972717897e-06, "loss": 0.2273, "step": 8453 }, { "epoch": 0.6697563874034462, "grad_norm": 1.320456483444575, "learning_rate": 5.194565479329154e-06, "loss": 0.1702, "step": 8454 }, { "epoch": 0.6698356110120816, "grad_norm": 1.2760780946377717, "learning_rate": 5.192315302355153e-06, "loss": 0.121, "step": 8455 }, { "epoch": 0.669914834620717, "grad_norm": 1.9602400991161062, "learning_rate": 5.190065441944059e-06, "loss": 0.2023, "step": 8456 }, { "epoch": 0.6699940582293523, "grad_norm": 1.6601163865410746, "learning_rate": 5.187815898244006e-06, "loss": 0.1687, "step": 8457 }, { "epoch": 0.6700732818379878, "grad_norm": 1.7714548939190813, "learning_rate": 5.185566671403126e-06, "loss": 0.2346, "step": 8458 }, { "epoch": 0.6701525054466231, "grad_norm": 1.963645970252486, "learning_rate": 5.183317761569515e-06, "loss": 0.2416, "step": 8459 }, { "epoch": 0.6702317290552585, "grad_norm": 1.6347264964106687, "learning_rate": 5.181069168891248e-06, "loss": 0.1938, "step": 8460 }, { "epoch": 0.6703109526638938, "grad_norm": 1.316388528324005, "learning_rate": 5.178820893516394e-06, "loss": 0.1166, "step": 8461 }, { "epoch": 0.6703901762725292, "grad_norm": 1.7660386645387534, "learning_rate": 5.176572935592986e-06, "loss": 0.179, "step": 8462 }, { "epoch": 0.6704693998811646, "grad_norm": 1.692958187418363, "learning_rate": 5.1743252952690385e-06, "loss": 0.2476, "step": 8463 }, { "epoch": 0.6705486234897999, "grad_norm": 1.455119160419921, "learning_rate": 5.172077972692553e-06, "loss": 0.2149, "step": 8464 }, { "epoch": 0.6706278470984354, "grad_norm": 1.5222541645852785, "learning_rate": 5.1698309680115024e-06, "loss": 0.231, "step": 8465 }, { "epoch": 0.6707070707070707, "grad_norm": 1.8655684011704334, "learning_rate": 5.167584281373838e-06, "loss": 0.2481, "step": 8466 }, { "epoch": 0.6707862943157061, "grad_norm": 0.9371901567203602, "learning_rate": 5.165337912927502e-06, "loss": 0.0795, "step": 8467 }, { "epoch": 0.6708655179243415, "grad_norm": 1.9303799189343307, "learning_rate": 5.1630918628204e-06, "loss": 0.1398, "step": 8468 }, { "epoch": 0.6709447415329768, "grad_norm": 1.237869139683717, "learning_rate": 5.1608461312004245e-06, "loss": 0.1686, "step": 8469 }, { "epoch": 0.6710239651416122, "grad_norm": 1.330205690766006, "learning_rate": 5.158600718215443e-06, "loss": 0.1524, "step": 8470 }, { "epoch": 0.6711031887502475, "grad_norm": 1.4282553567661662, "learning_rate": 5.156355624013314e-06, "loss": 0.2584, "step": 8471 }, { "epoch": 0.671182412358883, "grad_norm": 1.5205653402761046, "learning_rate": 5.15411084874186e-06, "loss": 0.2449, "step": 8472 }, { "epoch": 0.6712616359675183, "grad_norm": 1.7682987176796745, "learning_rate": 5.151866392548886e-06, "loss": 0.1976, "step": 8473 }, { "epoch": 0.6713408595761537, "grad_norm": 1.2274999670131204, "learning_rate": 5.149622255582185e-06, "loss": 0.1577, "step": 8474 }, { "epoch": 0.6714200831847891, "grad_norm": 1.761366219411446, "learning_rate": 5.147378437989522e-06, "loss": 0.1403, "step": 8475 }, { "epoch": 0.6714993067934244, "grad_norm": 1.6078488067989742, "learning_rate": 5.145134939918634e-06, "loss": 0.1449, "step": 8476 }, { "epoch": 0.6715785304020598, "grad_norm": 1.522582411569965, "learning_rate": 5.1428917615172555e-06, "loss": 0.1931, "step": 8477 }, { "epoch": 0.6716577540106952, "grad_norm": 1.3261246424459898, "learning_rate": 5.140648902933083e-06, "loss": 0.1422, "step": 8478 }, { "epoch": 0.6717369776193306, "grad_norm": 1.2471759697094842, "learning_rate": 5.138406364313795e-06, "loss": 0.1292, "step": 8479 }, { "epoch": 0.6718162012279659, "grad_norm": 1.4296797541151514, "learning_rate": 5.136164145807059e-06, "loss": 0.1702, "step": 8480 }, { "epoch": 0.6718954248366014, "grad_norm": 2.0731282130675117, "learning_rate": 5.13392224756051e-06, "loss": 0.2812, "step": 8481 }, { "epoch": 0.6719746484452367, "grad_norm": 1.8748077553442462, "learning_rate": 5.131680669721768e-06, "loss": 0.1908, "step": 8482 }, { "epoch": 0.672053872053872, "grad_norm": 1.4970895713835946, "learning_rate": 5.129439412438424e-06, "loss": 0.2165, "step": 8483 }, { "epoch": 0.6721330956625075, "grad_norm": 2.3464362368497955, "learning_rate": 5.127198475858064e-06, "loss": 0.338, "step": 8484 }, { "epoch": 0.6722123192711428, "grad_norm": 1.442024160308719, "learning_rate": 5.124957860128237e-06, "loss": 0.1624, "step": 8485 }, { "epoch": 0.6722915428797782, "grad_norm": 1.445072924192, "learning_rate": 5.122717565396474e-06, "loss": 0.2108, "step": 8486 }, { "epoch": 0.6723707664884135, "grad_norm": 1.2247059397253373, "learning_rate": 5.1204775918102955e-06, "loss": 0.1416, "step": 8487 }, { "epoch": 0.6724499900970489, "grad_norm": 1.536056124793062, "learning_rate": 5.11823793951719e-06, "loss": 0.133, "step": 8488 }, { "epoch": 0.6725292137056843, "grad_norm": 1.95175779781337, "learning_rate": 5.115998608664621e-06, "loss": 0.2876, "step": 8489 }, { "epoch": 0.6726084373143196, "grad_norm": 1.095147510776463, "learning_rate": 5.1137595994000475e-06, "loss": 0.124, "step": 8490 }, { "epoch": 0.6726876609229551, "grad_norm": 1.3561832574624626, "learning_rate": 5.111520911870894e-06, "loss": 0.1466, "step": 8491 }, { "epoch": 0.6727668845315904, "grad_norm": 1.6023852398879184, "learning_rate": 5.109282546224563e-06, "loss": 0.197, "step": 8492 }, { "epoch": 0.6728461081402258, "grad_norm": 1.7465342089412041, "learning_rate": 5.107044502608447e-06, "loss": 0.2545, "step": 8493 }, { "epoch": 0.6729253317488612, "grad_norm": 1.6874387232615442, "learning_rate": 5.104806781169906e-06, "loss": 0.1764, "step": 8494 }, { "epoch": 0.6730045553574965, "grad_norm": 2.1778429413109133, "learning_rate": 5.102569382056281e-06, "loss": 0.2201, "step": 8495 }, { "epoch": 0.6730837789661319, "grad_norm": 1.3040181587514807, "learning_rate": 5.100332305414902e-06, "loss": 0.1437, "step": 8496 }, { "epoch": 0.6731630025747672, "grad_norm": 1.2939534298064392, "learning_rate": 5.098095551393066e-06, "loss": 0.1794, "step": 8497 }, { "epoch": 0.6732422261834027, "grad_norm": 1.73128537738855, "learning_rate": 5.095859120138049e-06, "loss": 0.1837, "step": 8498 }, { "epoch": 0.673321449792038, "grad_norm": 1.4551562330444414, "learning_rate": 5.093623011797108e-06, "loss": 0.1329, "step": 8499 }, { "epoch": 0.6734006734006734, "grad_norm": 1.7354653375431057, "learning_rate": 5.091387226517489e-06, "loss": 0.2266, "step": 8500 }, { "epoch": 0.6734798970093088, "grad_norm": 1.287523177847614, "learning_rate": 5.089151764446403e-06, "loss": 0.1332, "step": 8501 }, { "epoch": 0.6735591206179441, "grad_norm": 1.1666422224273876, "learning_rate": 5.086916625731038e-06, "loss": 0.0948, "step": 8502 }, { "epoch": 0.6736383442265795, "grad_norm": 1.9109298025577046, "learning_rate": 5.084681810518577e-06, "loss": 0.155, "step": 8503 }, { "epoch": 0.6737175678352149, "grad_norm": 1.6270810382742522, "learning_rate": 5.0824473189561695e-06, "loss": 0.2308, "step": 8504 }, { "epoch": 0.6737967914438503, "grad_norm": 1.4288671467595624, "learning_rate": 5.080213151190938e-06, "loss": 0.1473, "step": 8505 }, { "epoch": 0.6738760150524856, "grad_norm": 1.870443493909983, "learning_rate": 5.077979307370004e-06, "loss": 0.2177, "step": 8506 }, { "epoch": 0.6739552386611211, "grad_norm": 2.0361578428045224, "learning_rate": 5.075745787640448e-06, "loss": 0.3307, "step": 8507 }, { "epoch": 0.6740344622697564, "grad_norm": 1.3653574228417908, "learning_rate": 5.073512592149334e-06, "loss": 0.1854, "step": 8508 }, { "epoch": 0.6741136858783917, "grad_norm": 1.5414280084285639, "learning_rate": 5.071279721043716e-06, "loss": 0.1735, "step": 8509 }, { "epoch": 0.6741929094870271, "grad_norm": 2.0740311219640004, "learning_rate": 5.069047174470613e-06, "loss": 0.2222, "step": 8510 }, { "epoch": 0.6742721330956625, "grad_norm": 1.6227142469765012, "learning_rate": 5.066814952577021e-06, "loss": 0.2294, "step": 8511 }, { "epoch": 0.6743513567042979, "grad_norm": 1.6920823613896925, "learning_rate": 5.064583055509935e-06, "loss": 0.2339, "step": 8512 }, { "epoch": 0.6744305803129332, "grad_norm": 1.4006838480031687, "learning_rate": 5.062351483416304e-06, "loss": 0.196, "step": 8513 }, { "epoch": 0.6745098039215687, "grad_norm": 1.4031934165158546, "learning_rate": 5.060120236443071e-06, "loss": 0.1596, "step": 8514 }, { "epoch": 0.674589027530204, "grad_norm": 1.5377744173701284, "learning_rate": 5.057889314737148e-06, "loss": 0.1583, "step": 8515 }, { "epoch": 0.6746682511388393, "grad_norm": 1.668182620156662, "learning_rate": 5.055658718445435e-06, "loss": 0.1983, "step": 8516 }, { "epoch": 0.6747474747474748, "grad_norm": 2.1344458873589143, "learning_rate": 5.053428447714806e-06, "loss": 0.2957, "step": 8517 }, { "epoch": 0.6748266983561101, "grad_norm": 1.413463487704143, "learning_rate": 5.05119850269211e-06, "loss": 0.1639, "step": 8518 }, { "epoch": 0.6749059219647455, "grad_norm": 1.55141391647744, "learning_rate": 5.048968883524182e-06, "loss": 0.1378, "step": 8519 }, { "epoch": 0.6749851455733809, "grad_norm": 1.3625622993554278, "learning_rate": 5.046739590357832e-06, "loss": 0.1699, "step": 8520 }, { "epoch": 0.6750643691820163, "grad_norm": 1.3445522321860088, "learning_rate": 5.044510623339842e-06, "loss": 0.1691, "step": 8521 }, { "epoch": 0.6751435927906516, "grad_norm": 1.8681283864517408, "learning_rate": 5.042281982616986e-06, "loss": 0.3032, "step": 8522 }, { "epoch": 0.6752228163992869, "grad_norm": 1.3255191787263767, "learning_rate": 5.0400536683360064e-06, "loss": 0.1072, "step": 8523 }, { "epoch": 0.6753020400079224, "grad_norm": 1.8354589583669219, "learning_rate": 5.037825680643624e-06, "loss": 0.14, "step": 8524 }, { "epoch": 0.6753812636165577, "grad_norm": 1.589199015619873, "learning_rate": 5.035598019686549e-06, "loss": 0.1979, "step": 8525 }, { "epoch": 0.6754604872251931, "grad_norm": 1.3600072006946753, "learning_rate": 5.033370685611456e-06, "loss": 0.1658, "step": 8526 }, { "epoch": 0.6755397108338285, "grad_norm": 1.7568432837746937, "learning_rate": 5.031143678565005e-06, "loss": 0.2276, "step": 8527 }, { "epoch": 0.6756189344424639, "grad_norm": 1.5257621077406234, "learning_rate": 5.028916998693831e-06, "loss": 0.1879, "step": 8528 }, { "epoch": 0.6756981580510992, "grad_norm": 1.4386596301967385, "learning_rate": 5.02669064614456e-06, "loss": 0.1382, "step": 8529 }, { "epoch": 0.6757773816597346, "grad_norm": 1.8160107495596027, "learning_rate": 5.024464621063773e-06, "loss": 0.2048, "step": 8530 }, { "epoch": 0.67585660526837, "grad_norm": 1.5467975569183061, "learning_rate": 5.022238923598055e-06, "loss": 0.1968, "step": 8531 }, { "epoch": 0.6759358288770053, "grad_norm": 1.3747405622906388, "learning_rate": 5.020013553893952e-06, "loss": 0.1485, "step": 8532 }, { "epoch": 0.6760150524856408, "grad_norm": 1.6200214007104616, "learning_rate": 5.017788512097989e-06, "loss": 0.2157, "step": 8533 }, { "epoch": 0.6760942760942761, "grad_norm": 1.6361064107047978, "learning_rate": 5.015563798356684e-06, "loss": 0.2593, "step": 8534 }, { "epoch": 0.6761734997029115, "grad_norm": 1.347862985898063, "learning_rate": 5.0133394128165204e-06, "loss": 0.1423, "step": 8535 }, { "epoch": 0.6762527233115468, "grad_norm": 1.3549275562574086, "learning_rate": 5.011115355623957e-06, "loss": 0.1891, "step": 8536 }, { "epoch": 0.6763319469201822, "grad_norm": 1.5234051982373102, "learning_rate": 5.008891626925447e-06, "loss": 0.1835, "step": 8537 }, { "epoch": 0.6764111705288176, "grad_norm": 1.3754411236015436, "learning_rate": 5.006668226867407e-06, "loss": 0.1301, "step": 8538 }, { "epoch": 0.6764903941374529, "grad_norm": 1.6860678976739774, "learning_rate": 5.004445155596238e-06, "loss": 0.1941, "step": 8539 }, { "epoch": 0.6765696177460884, "grad_norm": 1.1545212715442095, "learning_rate": 5.0022224132583154e-06, "loss": 0.1302, "step": 8540 }, { "epoch": 0.6766488413547237, "grad_norm": 1.2298888040285771, "learning_rate": 5.000000000000003e-06, "loss": 0.1129, "step": 8541 }, { "epoch": 0.6767280649633591, "grad_norm": 1.324481207116287, "learning_rate": 4.997777915967631e-06, "loss": 0.1719, "step": 8542 }, { "epoch": 0.6768072885719945, "grad_norm": 1.5778495961484131, "learning_rate": 4.995556161307511e-06, "loss": 0.2469, "step": 8543 }, { "epoch": 0.6768865121806298, "grad_norm": 1.3587851286887267, "learning_rate": 4.993334736165941e-06, "loss": 0.1485, "step": 8544 }, { "epoch": 0.6769657357892652, "grad_norm": 1.63471823791878, "learning_rate": 4.991113640689189e-06, "loss": 0.1794, "step": 8545 }, { "epoch": 0.6770449593979005, "grad_norm": 1.6904132118066113, "learning_rate": 4.988892875023499e-06, "loss": 0.2388, "step": 8546 }, { "epoch": 0.677124183006536, "grad_norm": 1.7335100884559311, "learning_rate": 4.9866724393151044e-06, "loss": 0.2165, "step": 8547 }, { "epoch": 0.6772034066151713, "grad_norm": 1.2058727937874647, "learning_rate": 4.984452333710207e-06, "loss": 0.0942, "step": 8548 }, { "epoch": 0.6772826302238067, "grad_norm": 1.5124110442235879, "learning_rate": 4.982232558354986e-06, "loss": 0.1408, "step": 8549 }, { "epoch": 0.6773618538324421, "grad_norm": 1.282638828124292, "learning_rate": 4.980013113395612e-06, "loss": 0.133, "step": 8550 }, { "epoch": 0.6774410774410774, "grad_norm": 1.5428026279784988, "learning_rate": 4.9777939989782185e-06, "loss": 0.2498, "step": 8551 }, { "epoch": 0.6775203010497128, "grad_norm": 1.5973135123251054, "learning_rate": 4.975575215248926e-06, "loss": 0.2104, "step": 8552 }, { "epoch": 0.6775995246583482, "grad_norm": 2.1667810637505824, "learning_rate": 4.9733567623538245e-06, "loss": 0.3219, "step": 8553 }, { "epoch": 0.6776787482669836, "grad_norm": 1.40473252150783, "learning_rate": 4.9711386404389995e-06, "loss": 0.1391, "step": 8554 }, { "epoch": 0.6777579718756189, "grad_norm": 2.131269419652134, "learning_rate": 4.968920849650496e-06, "loss": 0.2287, "step": 8555 }, { "epoch": 0.6778371954842544, "grad_norm": 1.7496739998635797, "learning_rate": 4.966703390134343e-06, "loss": 0.1975, "step": 8556 }, { "epoch": 0.6779164190928897, "grad_norm": 1.3402601225762008, "learning_rate": 4.964486262036557e-06, "loss": 0.1646, "step": 8557 }, { "epoch": 0.677995642701525, "grad_norm": 1.5155336456082882, "learning_rate": 4.962269465503121e-06, "loss": 0.2325, "step": 8558 }, { "epoch": 0.6780748663101605, "grad_norm": 1.596258719887241, "learning_rate": 4.960053000679997e-06, "loss": 0.2032, "step": 8559 }, { "epoch": 0.6781540899187958, "grad_norm": 1.1478528809983357, "learning_rate": 4.957836867713138e-06, "loss": 0.1526, "step": 8560 }, { "epoch": 0.6782333135274312, "grad_norm": 1.258742314957348, "learning_rate": 4.955621066748457e-06, "loss": 0.1128, "step": 8561 }, { "epoch": 0.6783125371360665, "grad_norm": 1.2923005314200162, "learning_rate": 4.953405597931854e-06, "loss": 0.2014, "step": 8562 }, { "epoch": 0.6783917607447019, "grad_norm": 1.540768290421965, "learning_rate": 4.951190461409214e-06, "loss": 0.2474, "step": 8563 }, { "epoch": 0.6784709843533373, "grad_norm": 1.0842009487271456, "learning_rate": 4.948975657326388e-06, "loss": 0.1095, "step": 8564 }, { "epoch": 0.6785502079619726, "grad_norm": 1.4855075907221102, "learning_rate": 4.946761185829208e-06, "loss": 0.197, "step": 8565 }, { "epoch": 0.6786294315706081, "grad_norm": 1.4319057122684191, "learning_rate": 4.944547047063493e-06, "loss": 0.152, "step": 8566 }, { "epoch": 0.6787086551792434, "grad_norm": 1.3974249544087434, "learning_rate": 4.942333241175029e-06, "loss": 0.2162, "step": 8567 }, { "epoch": 0.6787878787878788, "grad_norm": 1.4998666889143208, "learning_rate": 4.940119768309585e-06, "loss": 0.2442, "step": 8568 }, { "epoch": 0.6788671023965142, "grad_norm": 1.8821341372241676, "learning_rate": 4.937906628612905e-06, "loss": 0.2019, "step": 8569 }, { "epoch": 0.6789463260051495, "grad_norm": 1.2868391946904214, "learning_rate": 4.93569382223072e-06, "loss": 0.1351, "step": 8570 }, { "epoch": 0.6790255496137849, "grad_norm": 1.367902006728048, "learning_rate": 4.933481349308728e-06, "loss": 0.115, "step": 8571 }, { "epoch": 0.6791047732224202, "grad_norm": 1.385837211257772, "learning_rate": 4.931269209992607e-06, "loss": 0.1499, "step": 8572 }, { "epoch": 0.6791839968310557, "grad_norm": 1.7607382267167446, "learning_rate": 4.929057404428023e-06, "loss": 0.1897, "step": 8573 }, { "epoch": 0.679263220439691, "grad_norm": 1.4065539419935775, "learning_rate": 4.926845932760609e-06, "loss": 0.1896, "step": 8574 }, { "epoch": 0.6793424440483264, "grad_norm": 1.7514918908737478, "learning_rate": 4.924634795135976e-06, "loss": 0.2262, "step": 8575 }, { "epoch": 0.6794216676569618, "grad_norm": 1.2586453017349515, "learning_rate": 4.922423991699725e-06, "loss": 0.1622, "step": 8576 }, { "epoch": 0.6795008912655971, "grad_norm": 1.3165998443886482, "learning_rate": 4.920213522597422e-06, "loss": 0.1308, "step": 8577 }, { "epoch": 0.6795801148742325, "grad_norm": 1.7436076082684013, "learning_rate": 4.918003387974614e-06, "loss": 0.1847, "step": 8578 }, { "epoch": 0.6796593384828679, "grad_norm": 1.5991089520432948, "learning_rate": 4.915793587976832e-06, "loss": 0.1879, "step": 8579 }, { "epoch": 0.6797385620915033, "grad_norm": 1.2925535099600607, "learning_rate": 4.913584122749578e-06, "loss": 0.1473, "step": 8580 }, { "epoch": 0.6798177857001386, "grad_norm": 1.4197447927759301, "learning_rate": 4.911374992438334e-06, "loss": 0.1463, "step": 8581 }, { "epoch": 0.6798970093087741, "grad_norm": 1.6233574438733154, "learning_rate": 4.909166197188563e-06, "loss": 0.1858, "step": 8582 }, { "epoch": 0.6799762329174094, "grad_norm": 1.845523476573834, "learning_rate": 4.906957737145703e-06, "loss": 0.182, "step": 8583 }, { "epoch": 0.6800554565260447, "grad_norm": 1.6731709411467286, "learning_rate": 4.904749612455171e-06, "loss": 0.2157, "step": 8584 }, { "epoch": 0.6801346801346801, "grad_norm": 1.641858619300692, "learning_rate": 4.902541823262356e-06, "loss": 0.161, "step": 8585 }, { "epoch": 0.6802139037433155, "grad_norm": 1.8308752657918839, "learning_rate": 4.900334369712637e-06, "loss": 0.2199, "step": 8586 }, { "epoch": 0.6802931273519509, "grad_norm": 1.5417994791697767, "learning_rate": 4.898127251951363e-06, "loss": 0.2204, "step": 8587 }, { "epoch": 0.6803723509605862, "grad_norm": 1.447278852663485, "learning_rate": 4.895920470123857e-06, "loss": 0.1726, "step": 8588 }, { "epoch": 0.6804515745692217, "grad_norm": 1.6376487142919485, "learning_rate": 4.893714024375432e-06, "loss": 0.1854, "step": 8589 }, { "epoch": 0.680530798177857, "grad_norm": 1.3599365330770072, "learning_rate": 4.89150791485137e-06, "loss": 0.1419, "step": 8590 }, { "epoch": 0.6806100217864923, "grad_norm": 1.5969374573522095, "learning_rate": 4.889302141696925e-06, "loss": 0.1779, "step": 8591 }, { "epoch": 0.6806892453951278, "grad_norm": 1.3955223042255005, "learning_rate": 4.88709670505735e-06, "loss": 0.1384, "step": 8592 }, { "epoch": 0.6807684690037631, "grad_norm": 1.6557109219727237, "learning_rate": 4.884891605077853e-06, "loss": 0.1707, "step": 8593 }, { "epoch": 0.6808476926123985, "grad_norm": 1.6475801155131908, "learning_rate": 4.882686841903627e-06, "loss": 0.212, "step": 8594 }, { "epoch": 0.6809269162210339, "grad_norm": 1.503630840263843, "learning_rate": 4.8804824156798544e-06, "loss": 0.1634, "step": 8595 }, { "epoch": 0.6810061398296693, "grad_norm": 1.5612340649170864, "learning_rate": 4.878278326551682e-06, "loss": 0.1869, "step": 8596 }, { "epoch": 0.6810853634383046, "grad_norm": 1.2960560024780041, "learning_rate": 4.876074574664232e-06, "loss": 0.1259, "step": 8597 }, { "epoch": 0.6811645870469399, "grad_norm": 1.393538759623158, "learning_rate": 4.873871160162622e-06, "loss": 0.158, "step": 8598 }, { "epoch": 0.6812438106555754, "grad_norm": 1.6361564763009502, "learning_rate": 4.871668083191931e-06, "loss": 0.1977, "step": 8599 }, { "epoch": 0.6813230342642107, "grad_norm": 1.9741867703390905, "learning_rate": 4.8694653438972195e-06, "loss": 0.1673, "step": 8600 }, { "epoch": 0.6814022578728461, "grad_norm": 1.4387389722285093, "learning_rate": 4.867262942423525e-06, "loss": 0.161, "step": 8601 }, { "epoch": 0.6814814814814815, "grad_norm": 1.701930408154907, "learning_rate": 4.865060878915873e-06, "loss": 0.1642, "step": 8602 }, { "epoch": 0.6815607050901169, "grad_norm": 2.0021225121916526, "learning_rate": 4.862859153519252e-06, "loss": 0.2728, "step": 8603 }, { "epoch": 0.6816399286987522, "grad_norm": 1.4913843276190428, "learning_rate": 4.860657766378637e-06, "loss": 0.1853, "step": 8604 }, { "epoch": 0.6817191523073876, "grad_norm": 1.1807206607478606, "learning_rate": 4.858456717638981e-06, "loss": 0.1164, "step": 8605 }, { "epoch": 0.681798375916023, "grad_norm": 1.3498192464577399, "learning_rate": 4.856256007445211e-06, "loss": 0.1281, "step": 8606 }, { "epoch": 0.6818775995246583, "grad_norm": 1.2528592144148911, "learning_rate": 4.8540556359422335e-06, "loss": 0.1629, "step": 8607 }, { "epoch": 0.6819568231332938, "grad_norm": 1.1578562032052755, "learning_rate": 4.85185560327493e-06, "loss": 0.143, "step": 8608 }, { "epoch": 0.6820360467419291, "grad_norm": 1.5195694080293936, "learning_rate": 4.849655909588165e-06, "loss": 0.1908, "step": 8609 }, { "epoch": 0.6821152703505645, "grad_norm": 1.838130817530145, "learning_rate": 4.847456555026773e-06, "loss": 0.2922, "step": 8610 }, { "epoch": 0.6821944939591998, "grad_norm": 1.3583243790507356, "learning_rate": 4.845257539735577e-06, "loss": 0.1323, "step": 8611 }, { "epoch": 0.6822737175678352, "grad_norm": 1.9146706970827245, "learning_rate": 4.843058863859369e-06, "loss": 0.2281, "step": 8612 }, { "epoch": 0.6823529411764706, "grad_norm": 1.2631825782674775, "learning_rate": 4.840860527542919e-06, "loss": 0.1454, "step": 8613 }, { "epoch": 0.6824321647851059, "grad_norm": 1.5780932205699636, "learning_rate": 4.838662530930981e-06, "loss": 0.1387, "step": 8614 }, { "epoch": 0.6825113883937414, "grad_norm": 1.3333092449744257, "learning_rate": 4.836464874168282e-06, "loss": 0.1836, "step": 8615 }, { "epoch": 0.6825906120023767, "grad_norm": 1.769202557702352, "learning_rate": 4.834267557399521e-06, "loss": 0.1831, "step": 8616 }, { "epoch": 0.6826698356110121, "grad_norm": 1.346387300277102, "learning_rate": 4.832070580769389e-06, "loss": 0.1849, "step": 8617 }, { "epoch": 0.6827490592196475, "grad_norm": 1.4092969389998287, "learning_rate": 4.829873944422544e-06, "loss": 0.1446, "step": 8618 }, { "epoch": 0.6828282828282828, "grad_norm": 1.7339774517482605, "learning_rate": 4.8276776485036185e-06, "loss": 0.2278, "step": 8619 }, { "epoch": 0.6829075064369182, "grad_norm": 1.6955699006646225, "learning_rate": 4.825481693157235e-06, "loss": 0.1879, "step": 8620 }, { "epoch": 0.6829867300455535, "grad_norm": 1.3629165159761114, "learning_rate": 4.823286078527984e-06, "loss": 0.1189, "step": 8621 }, { "epoch": 0.683065953654189, "grad_norm": 2.206024746466242, "learning_rate": 4.8210908047604336e-06, "loss": 0.2435, "step": 8622 }, { "epoch": 0.6831451772628243, "grad_norm": 1.650706399093415, "learning_rate": 4.818895871999136e-06, "loss": 0.1769, "step": 8623 }, { "epoch": 0.6832244008714597, "grad_norm": 1.4927354432691475, "learning_rate": 4.816701280388617e-06, "loss": 0.1381, "step": 8624 }, { "epoch": 0.6833036244800951, "grad_norm": 1.342977216804004, "learning_rate": 4.814507030073377e-06, "loss": 0.1258, "step": 8625 }, { "epoch": 0.6833828480887304, "grad_norm": 1.8638109438553099, "learning_rate": 4.812313121197896e-06, "loss": 0.2157, "step": 8626 }, { "epoch": 0.6834620716973658, "grad_norm": 1.983207606433488, "learning_rate": 4.810119553906637e-06, "loss": 0.2239, "step": 8627 }, { "epoch": 0.6835412953060012, "grad_norm": 1.5859256273035767, "learning_rate": 4.807926328344033e-06, "loss": 0.1384, "step": 8628 }, { "epoch": 0.6836205189146366, "grad_norm": 1.5810360359678104, "learning_rate": 4.805733444654496e-06, "loss": 0.1854, "step": 8629 }, { "epoch": 0.6836997425232719, "grad_norm": 1.4085303112832175, "learning_rate": 4.8035409029824195e-06, "loss": 0.168, "step": 8630 }, { "epoch": 0.6837789661319074, "grad_norm": 1.4163183407444602, "learning_rate": 4.801348703472173e-06, "loss": 0.1802, "step": 8631 }, { "epoch": 0.6838581897405427, "grad_norm": 2.0585092565426057, "learning_rate": 4.7991568462680945e-06, "loss": 0.1672, "step": 8632 }, { "epoch": 0.683937413349178, "grad_norm": 1.2640581576220085, "learning_rate": 4.796965331514517e-06, "loss": 0.1103, "step": 8633 }, { "epoch": 0.6840166369578135, "grad_norm": 1.333843195416572, "learning_rate": 4.794774159355737e-06, "loss": 0.1165, "step": 8634 }, { "epoch": 0.6840958605664488, "grad_norm": 1.4288040296992421, "learning_rate": 4.79258332993603e-06, "loss": 0.2208, "step": 8635 }, { "epoch": 0.6841750841750842, "grad_norm": 1.3548945775697205, "learning_rate": 4.7903928433996576e-06, "loss": 0.1256, "step": 8636 }, { "epoch": 0.6842543077837195, "grad_norm": 1.3128577428498032, "learning_rate": 4.788202699890848e-06, "loss": 0.1582, "step": 8637 }, { "epoch": 0.684333531392355, "grad_norm": 2.064313624811447, "learning_rate": 4.786012899553815e-06, "loss": 0.2567, "step": 8638 }, { "epoch": 0.6844127550009903, "grad_norm": 1.3536405598336756, "learning_rate": 4.783823442532739e-06, "loss": 0.2065, "step": 8639 }, { "epoch": 0.6844919786096256, "grad_norm": 1.4525240588117532, "learning_rate": 4.781634328971796e-06, "loss": 0.1703, "step": 8640 }, { "epoch": 0.6845712022182611, "grad_norm": 1.5287689114225305, "learning_rate": 4.779445559015122e-06, "loss": 0.1837, "step": 8641 }, { "epoch": 0.6846504258268964, "grad_norm": 1.9253531908599657, "learning_rate": 4.777257132806835e-06, "loss": 0.1983, "step": 8642 }, { "epoch": 0.6847296494355318, "grad_norm": 1.3591657661896646, "learning_rate": 4.775069050491039e-06, "loss": 0.1243, "step": 8643 }, { "epoch": 0.6848088730441672, "grad_norm": 1.519596700431314, "learning_rate": 4.772881312211805e-06, "loss": 0.2302, "step": 8644 }, { "epoch": 0.6848880966528025, "grad_norm": 1.5135885297213851, "learning_rate": 4.770693918113183e-06, "loss": 0.1793, "step": 8645 }, { "epoch": 0.6849673202614379, "grad_norm": 1.1440790841583879, "learning_rate": 4.768506868339206e-06, "loss": 0.1275, "step": 8646 }, { "epoch": 0.6850465438700732, "grad_norm": 1.7197533491395491, "learning_rate": 4.766320163033882e-06, "loss": 0.2059, "step": 8647 }, { "epoch": 0.6851257674787087, "grad_norm": 1.2826680838873479, "learning_rate": 4.764133802341188e-06, "loss": 0.1473, "step": 8648 }, { "epoch": 0.685204991087344, "grad_norm": 1.5301814973492749, "learning_rate": 4.761947786405092e-06, "loss": 0.248, "step": 8649 }, { "epoch": 0.6852842146959794, "grad_norm": 1.4016813442635385, "learning_rate": 4.759762115369531e-06, "loss": 0.1664, "step": 8650 }, { "epoch": 0.6853634383046148, "grad_norm": 1.671388275849299, "learning_rate": 4.7575767893784174e-06, "loss": 0.1807, "step": 8651 }, { "epoch": 0.6854426619132501, "grad_norm": 1.3489819520227455, "learning_rate": 4.755391808575651e-06, "loss": 0.143, "step": 8652 }, { "epoch": 0.6855218855218855, "grad_norm": 1.9152517469065016, "learning_rate": 4.7532071731050975e-06, "loss": 0.2727, "step": 8653 }, { "epoch": 0.6856011091305209, "grad_norm": 1.24174475158412, "learning_rate": 4.7510228831106064e-06, "loss": 0.134, "step": 8654 }, { "epoch": 0.6856803327391563, "grad_norm": 1.6081747422509234, "learning_rate": 4.748838938735999e-06, "loss": 0.2238, "step": 8655 }, { "epoch": 0.6857595563477916, "grad_norm": 1.2428472170982199, "learning_rate": 4.746655340125082e-06, "loss": 0.1564, "step": 8656 }, { "epoch": 0.6858387799564271, "grad_norm": 1.6532301274027756, "learning_rate": 4.744472087421635e-06, "loss": 0.1467, "step": 8657 }, { "epoch": 0.6859180035650624, "grad_norm": 1.2173960770815595, "learning_rate": 4.74228918076941e-06, "loss": 0.1144, "step": 8658 }, { "epoch": 0.6859972271736977, "grad_norm": 1.4581783599010332, "learning_rate": 4.740106620312147e-06, "loss": 0.1811, "step": 8659 }, { "epoch": 0.6860764507823331, "grad_norm": 1.4110417666553414, "learning_rate": 4.737924406193554e-06, "loss": 0.1634, "step": 8660 }, { "epoch": 0.6861556743909685, "grad_norm": 1.3954298236239187, "learning_rate": 4.735742538557316e-06, "loss": 0.1551, "step": 8661 }, { "epoch": 0.6862348979996039, "grad_norm": 1.7224920943508635, "learning_rate": 4.733561017547104e-06, "loss": 0.2154, "step": 8662 }, { "epoch": 0.6863141216082392, "grad_norm": 1.4352759584573156, "learning_rate": 4.73137984330656e-06, "loss": 0.1982, "step": 8663 }, { "epoch": 0.6863933452168747, "grad_norm": 1.1239504535986455, "learning_rate": 4.729199015979298e-06, "loss": 0.1372, "step": 8664 }, { "epoch": 0.68647256882551, "grad_norm": 1.2280103386989374, "learning_rate": 4.727018535708922e-06, "loss": 0.1188, "step": 8665 }, { "epoch": 0.6865517924341453, "grad_norm": 1.8473294865599927, "learning_rate": 4.724838402639006e-06, "loss": 0.2499, "step": 8666 }, { "epoch": 0.6866310160427808, "grad_norm": 1.1661714225758504, "learning_rate": 4.7226586169130925e-06, "loss": 0.1323, "step": 8667 }, { "epoch": 0.6867102396514161, "grad_norm": 1.5509361208713552, "learning_rate": 4.7204791786747215e-06, "loss": 0.1886, "step": 8668 }, { "epoch": 0.6867894632600515, "grad_norm": 1.8026075803806696, "learning_rate": 4.718300088067392e-06, "loss": 0.1883, "step": 8669 }, { "epoch": 0.6868686868686869, "grad_norm": 1.532212530360673, "learning_rate": 4.716121345234589e-06, "loss": 0.1784, "step": 8670 }, { "epoch": 0.6869479104773223, "grad_norm": 1.7311533499141019, "learning_rate": 4.713942950319767e-06, "loss": 0.177, "step": 8671 }, { "epoch": 0.6870271340859576, "grad_norm": 1.6446493445822377, "learning_rate": 4.71176490346637e-06, "loss": 0.1756, "step": 8672 }, { "epoch": 0.6871063576945929, "grad_norm": 1.1700753689368821, "learning_rate": 4.709587204817809e-06, "loss": 0.0979, "step": 8673 }, { "epoch": 0.6871855813032284, "grad_norm": 1.371687825816755, "learning_rate": 4.707409854517471e-06, "loss": 0.181, "step": 8674 }, { "epoch": 0.6872648049118637, "grad_norm": 1.788528039065579, "learning_rate": 4.705232852708732e-06, "loss": 0.2303, "step": 8675 }, { "epoch": 0.6873440285204991, "grad_norm": 1.6632465599959885, "learning_rate": 4.703056199534933e-06, "loss": 0.1777, "step": 8676 }, { "epoch": 0.6874232521291345, "grad_norm": 1.1073877635271137, "learning_rate": 4.700879895139391e-06, "loss": 0.1267, "step": 8677 }, { "epoch": 0.6875024757377699, "grad_norm": 1.5096000596143704, "learning_rate": 4.698703939665414e-06, "loss": 0.1709, "step": 8678 }, { "epoch": 0.6875816993464052, "grad_norm": 1.5486701488331398, "learning_rate": 4.696528333256275e-06, "loss": 0.1769, "step": 8679 }, { "epoch": 0.6876609229550406, "grad_norm": 1.5936474109007752, "learning_rate": 4.694353076055222e-06, "loss": 0.1784, "step": 8680 }, { "epoch": 0.687740146563676, "grad_norm": 1.1665492738772993, "learning_rate": 4.6921781682054954e-06, "loss": 0.1231, "step": 8681 }, { "epoch": 0.6878193701723113, "grad_norm": 1.2100819032032752, "learning_rate": 4.6900036098502956e-06, "loss": 0.1366, "step": 8682 }, { "epoch": 0.6878985937809468, "grad_norm": 1.422333271504052, "learning_rate": 4.687829401132804e-06, "loss": 0.1838, "step": 8683 }, { "epoch": 0.6879778173895821, "grad_norm": 0.9749413880131753, "learning_rate": 4.685655542196194e-06, "loss": 0.1383, "step": 8684 }, { "epoch": 0.6880570409982175, "grad_norm": 1.3402066322109363, "learning_rate": 4.6834820331835915e-06, "loss": 0.1824, "step": 8685 }, { "epoch": 0.6881362646068528, "grad_norm": 0.9367352461671669, "learning_rate": 4.681308874238112e-06, "loss": 0.0655, "step": 8686 }, { "epoch": 0.6882154882154882, "grad_norm": 1.6035176560395976, "learning_rate": 4.679136065502855e-06, "loss": 0.2201, "step": 8687 }, { "epoch": 0.6882947118241236, "grad_norm": 1.6486245767304224, "learning_rate": 4.676963607120886e-06, "loss": 0.2086, "step": 8688 }, { "epoch": 0.6883739354327589, "grad_norm": 1.6928345935177183, "learning_rate": 4.674791499235246e-06, "loss": 0.2289, "step": 8689 }, { "epoch": 0.6884531590413944, "grad_norm": 1.5480049299610399, "learning_rate": 4.672619741988966e-06, "loss": 0.1614, "step": 8690 }, { "epoch": 0.6885323826500297, "grad_norm": 1.257892792112472, "learning_rate": 4.670448335525043e-06, "loss": 0.1631, "step": 8691 }, { "epoch": 0.6886116062586651, "grad_norm": 1.1952894179582885, "learning_rate": 4.66827727998645e-06, "loss": 0.1308, "step": 8692 }, { "epoch": 0.6886908298673005, "grad_norm": 1.971716570449774, "learning_rate": 4.666106575516146e-06, "loss": 0.3032, "step": 8693 }, { "epoch": 0.6887700534759358, "grad_norm": 1.4230007737961512, "learning_rate": 4.663936222257059e-06, "loss": 0.1155, "step": 8694 }, { "epoch": 0.6888492770845712, "grad_norm": 1.874785931223803, "learning_rate": 4.661766220352098e-06, "loss": 0.2348, "step": 8695 }, { "epoch": 0.6889285006932065, "grad_norm": 1.4325628356186775, "learning_rate": 4.659596569944139e-06, "loss": 0.1523, "step": 8696 }, { "epoch": 0.689007724301842, "grad_norm": 1.325834981926651, "learning_rate": 4.657427271176055e-06, "loss": 0.1668, "step": 8697 }, { "epoch": 0.6890869479104773, "grad_norm": 2.0312528835403527, "learning_rate": 4.655258324190678e-06, "loss": 0.353, "step": 8698 }, { "epoch": 0.6891661715191127, "grad_norm": 1.6178334823322276, "learning_rate": 4.65308972913082e-06, "loss": 0.2728, "step": 8699 }, { "epoch": 0.6892453951277481, "grad_norm": 1.3541334824730633, "learning_rate": 4.6509214861392785e-06, "loss": 0.205, "step": 8700 }, { "epoch": 0.6893246187363834, "grad_norm": 1.4349157628828468, "learning_rate": 4.648753595358818e-06, "loss": 0.2138, "step": 8701 }, { "epoch": 0.6894038423450188, "grad_norm": 1.432057521051746, "learning_rate": 4.646586056932183e-06, "loss": 0.1773, "step": 8702 }, { "epoch": 0.6894830659536542, "grad_norm": 1.4339160875258519, "learning_rate": 4.6444188710021e-06, "loss": 0.2104, "step": 8703 }, { "epoch": 0.6895622895622896, "grad_norm": 1.7248934291141744, "learning_rate": 4.6422520377112646e-06, "loss": 0.1958, "step": 8704 }, { "epoch": 0.6896415131709249, "grad_norm": 1.3388525654957464, "learning_rate": 4.640085557202349e-06, "loss": 0.1354, "step": 8705 }, { "epoch": 0.6897207367795604, "grad_norm": 1.581499144453623, "learning_rate": 4.637919429618014e-06, "loss": 0.1607, "step": 8706 }, { "epoch": 0.6897999603881957, "grad_norm": 0.8982538474255346, "learning_rate": 4.635753655100883e-06, "loss": 0.1012, "step": 8707 }, { "epoch": 0.689879183996831, "grad_norm": 1.4749880481329436, "learning_rate": 4.633588233793559e-06, "loss": 0.1473, "step": 8708 }, { "epoch": 0.6899584076054665, "grad_norm": 1.1470822206467024, "learning_rate": 4.631423165838632e-06, "loss": 0.0726, "step": 8709 }, { "epoch": 0.6900376312141018, "grad_norm": 1.6982771812494444, "learning_rate": 4.629258451378658e-06, "loss": 0.2228, "step": 8710 }, { "epoch": 0.6901168548227372, "grad_norm": 1.9688465047513715, "learning_rate": 4.6270940905561725e-06, "loss": 0.214, "step": 8711 }, { "epoch": 0.6901960784313725, "grad_norm": 1.3314539680265636, "learning_rate": 4.624930083513684e-06, "loss": 0.109, "step": 8712 }, { "epoch": 0.690275302040008, "grad_norm": 1.3986515785934175, "learning_rate": 4.62276643039369e-06, "loss": 0.2048, "step": 8713 }, { "epoch": 0.6903545256486433, "grad_norm": 1.5742571587693621, "learning_rate": 4.620603131338655e-06, "loss": 0.1813, "step": 8714 }, { "epoch": 0.6904337492572786, "grad_norm": 1.7884500502894105, "learning_rate": 4.6184401864910136e-06, "loss": 0.1735, "step": 8715 }, { "epoch": 0.6905129728659141, "grad_norm": 1.660343164917738, "learning_rate": 4.616277595993196e-06, "loss": 0.1885, "step": 8716 }, { "epoch": 0.6905921964745494, "grad_norm": 1.4109575409332926, "learning_rate": 4.614115359987595e-06, "loss": 0.1306, "step": 8717 }, { "epoch": 0.6906714200831848, "grad_norm": 1.7199965159552877, "learning_rate": 4.6119534786165765e-06, "loss": 0.1405, "step": 8718 }, { "epoch": 0.6907506436918202, "grad_norm": 1.5895875125245018, "learning_rate": 4.609791952022501e-06, "loss": 0.1988, "step": 8719 }, { "epoch": 0.6908298673004556, "grad_norm": 1.8749744340682035, "learning_rate": 4.607630780347689e-06, "loss": 0.2327, "step": 8720 }, { "epoch": 0.6909090909090909, "grad_norm": 1.7836794920850219, "learning_rate": 4.60546996373444e-06, "loss": 0.1699, "step": 8721 }, { "epoch": 0.6909883145177262, "grad_norm": 1.7214650559801976, "learning_rate": 4.603309502325041e-06, "loss": 0.2047, "step": 8722 }, { "epoch": 0.6910675381263617, "grad_norm": 1.8908823606983562, "learning_rate": 4.601149396261744e-06, "loss": 0.2338, "step": 8723 }, { "epoch": 0.691146761734997, "grad_norm": 1.4957853564365362, "learning_rate": 4.598989645686782e-06, "loss": 0.098, "step": 8724 }, { "epoch": 0.6912259853436324, "grad_norm": 1.7823556390883566, "learning_rate": 4.596830250742359e-06, "loss": 0.2294, "step": 8725 }, { "epoch": 0.6913052089522678, "grad_norm": 1.2385164046684465, "learning_rate": 4.594671211570671e-06, "loss": 0.1321, "step": 8726 }, { "epoch": 0.6913844325609031, "grad_norm": 1.828158162834079, "learning_rate": 4.592512528313874e-06, "loss": 0.1689, "step": 8727 }, { "epoch": 0.6914636561695385, "grad_norm": 1.7926999264677321, "learning_rate": 4.590354201114103e-06, "loss": 0.2378, "step": 8728 }, { "epoch": 0.6915428797781739, "grad_norm": 1.610186300693577, "learning_rate": 4.588196230113483e-06, "loss": 0.2109, "step": 8729 }, { "epoch": 0.6916221033868093, "grad_norm": 1.5223618774951608, "learning_rate": 4.586038615454102e-06, "loss": 0.1677, "step": 8730 }, { "epoch": 0.6917013269954446, "grad_norm": 1.3886398562031925, "learning_rate": 4.583881357278023e-06, "loss": 0.1972, "step": 8731 }, { "epoch": 0.6917805506040801, "grad_norm": 1.7596511414833698, "learning_rate": 4.5817244557273e-06, "loss": 0.1948, "step": 8732 }, { "epoch": 0.6918597742127154, "grad_norm": 1.1345967722354966, "learning_rate": 4.5795679109439505e-06, "loss": 0.1432, "step": 8733 }, { "epoch": 0.6919389978213507, "grad_norm": 1.8069484754960743, "learning_rate": 4.57741172306997e-06, "loss": 0.2403, "step": 8734 }, { "epoch": 0.6920182214299861, "grad_norm": 1.7172458869331075, "learning_rate": 4.5752558922473376e-06, "loss": 0.2964, "step": 8735 }, { "epoch": 0.6920974450386215, "grad_norm": 1.631047794247156, "learning_rate": 4.573100418618004e-06, "loss": 0.1682, "step": 8736 }, { "epoch": 0.6921766686472569, "grad_norm": 1.8455462208712523, "learning_rate": 4.57094530232389e-06, "loss": 0.2753, "step": 8737 }, { "epoch": 0.6922558922558922, "grad_norm": 1.1787348654968008, "learning_rate": 4.5687905435069106e-06, "loss": 0.1247, "step": 8738 }, { "epoch": 0.6923351158645277, "grad_norm": 2.195912253818307, "learning_rate": 4.566636142308939e-06, "loss": 0.1579, "step": 8739 }, { "epoch": 0.692414339473163, "grad_norm": 3.091964749142207, "learning_rate": 4.564482098871834e-06, "loss": 0.2239, "step": 8740 }, { "epoch": 0.6924935630817983, "grad_norm": 1.3567211670479813, "learning_rate": 4.562328413337426e-06, "loss": 0.1428, "step": 8741 }, { "epoch": 0.6925727866904338, "grad_norm": 1.080283629361243, "learning_rate": 4.56017508584753e-06, "loss": 0.1262, "step": 8742 }, { "epoch": 0.6926520102990691, "grad_norm": 1.606299752985764, "learning_rate": 4.558022116543931e-06, "loss": 0.1625, "step": 8743 }, { "epoch": 0.6927312339077045, "grad_norm": 1.2212029780701115, "learning_rate": 4.555869505568386e-06, "loss": 0.1477, "step": 8744 }, { "epoch": 0.6928104575163399, "grad_norm": 1.5689616085354774, "learning_rate": 4.553717253062643e-06, "loss": 0.2156, "step": 8745 }, { "epoch": 0.6928896811249753, "grad_norm": 1.3995143891766861, "learning_rate": 4.551565359168411e-06, "loss": 0.1878, "step": 8746 }, { "epoch": 0.6929689047336106, "grad_norm": 1.3027589775296664, "learning_rate": 4.549413824027382e-06, "loss": 0.1739, "step": 8747 }, { "epoch": 0.6930481283422459, "grad_norm": 1.5205078437727644, "learning_rate": 4.54726264778123e-06, "loss": 0.2141, "step": 8748 }, { "epoch": 0.6931273519508814, "grad_norm": 1.7471575973709945, "learning_rate": 4.5451118305715954e-06, "loss": 0.201, "step": 8749 }, { "epoch": 0.6932065755595167, "grad_norm": 1.6023460534782106, "learning_rate": 4.542961372540096e-06, "loss": 0.1829, "step": 8750 }, { "epoch": 0.6932857991681521, "grad_norm": 1.6900977030189235, "learning_rate": 4.540811273828336e-06, "loss": 0.1763, "step": 8751 }, { "epoch": 0.6933650227767875, "grad_norm": 1.2231750768791714, "learning_rate": 4.538661534577886e-06, "loss": 0.1514, "step": 8752 }, { "epoch": 0.6934442463854229, "grad_norm": 1.455701378263639, "learning_rate": 4.5365121549302916e-06, "loss": 0.1377, "step": 8753 }, { "epoch": 0.6935234699940582, "grad_norm": 1.5172526079868556, "learning_rate": 4.534363135027086e-06, "loss": 0.1059, "step": 8754 }, { "epoch": 0.6936026936026936, "grad_norm": 1.4068100376808945, "learning_rate": 4.532214475009771e-06, "loss": 0.1586, "step": 8755 }, { "epoch": 0.693681917211329, "grad_norm": 1.510338022782643, "learning_rate": 4.530066175019823e-06, "loss": 0.1976, "step": 8756 }, { "epoch": 0.6937611408199643, "grad_norm": 1.3681933467887286, "learning_rate": 4.527918235198692e-06, "loss": 0.1736, "step": 8757 }, { "epoch": 0.6938403644285998, "grad_norm": 1.5002105724287578, "learning_rate": 4.525770655687821e-06, "loss": 0.1559, "step": 8758 }, { "epoch": 0.6939195880372351, "grad_norm": 1.6395539335526241, "learning_rate": 4.523623436628611e-06, "loss": 0.1948, "step": 8759 }, { "epoch": 0.6939988116458705, "grad_norm": 1.4844249947243022, "learning_rate": 4.521476578162445e-06, "loss": 0.1707, "step": 8760 }, { "epoch": 0.6940780352545058, "grad_norm": 1.660084412834462, "learning_rate": 4.519330080430687e-06, "loss": 0.1619, "step": 8761 }, { "epoch": 0.6941572588631412, "grad_norm": 1.5930209903055617, "learning_rate": 4.517183943574673e-06, "loss": 0.1126, "step": 8762 }, { "epoch": 0.6942364824717766, "grad_norm": 1.53277951821348, "learning_rate": 4.515038167735715e-06, "loss": 0.2223, "step": 8763 }, { "epoch": 0.6943157060804119, "grad_norm": 1.5668923502121344, "learning_rate": 4.5128927530551e-06, "loss": 0.1554, "step": 8764 }, { "epoch": 0.6943949296890474, "grad_norm": 1.0271379523963158, "learning_rate": 4.510747699674096e-06, "loss": 0.0723, "step": 8765 }, { "epoch": 0.6944741532976827, "grad_norm": 1.659729203208413, "learning_rate": 4.50860300773394e-06, "loss": 0.1685, "step": 8766 }, { "epoch": 0.6945533769063181, "grad_norm": 1.52005897124796, "learning_rate": 4.506458677375856e-06, "loss": 0.2384, "step": 8767 }, { "epoch": 0.6946326005149535, "grad_norm": 1.5540184120262797, "learning_rate": 4.504314708741037e-06, "loss": 0.2161, "step": 8768 }, { "epoch": 0.6947118241235888, "grad_norm": 1.5411222086441576, "learning_rate": 4.502171101970645e-06, "loss": 0.1397, "step": 8769 }, { "epoch": 0.6947910477322242, "grad_norm": 1.5807101510254558, "learning_rate": 4.5000278572058365e-06, "loss": 0.155, "step": 8770 }, { "epoch": 0.6948702713408595, "grad_norm": 1.327370532286676, "learning_rate": 4.497884974587729e-06, "loss": 0.1607, "step": 8771 }, { "epoch": 0.694949494949495, "grad_norm": 1.6274011071040737, "learning_rate": 4.495742454257418e-06, "loss": 0.1787, "step": 8772 }, { "epoch": 0.6950287185581303, "grad_norm": 1.7330720916192606, "learning_rate": 4.493600296355986e-06, "loss": 0.2272, "step": 8773 }, { "epoch": 0.6951079421667657, "grad_norm": 1.6835440092590113, "learning_rate": 4.491458501024479e-06, "loss": 0.2284, "step": 8774 }, { "epoch": 0.6951871657754011, "grad_norm": 1.4897429817901569, "learning_rate": 4.489317068403919e-06, "loss": 0.134, "step": 8775 }, { "epoch": 0.6952663893840364, "grad_norm": 1.4167617005001414, "learning_rate": 4.487175998635319e-06, "loss": 0.1939, "step": 8776 }, { "epoch": 0.6953456129926718, "grad_norm": 1.7576690089550147, "learning_rate": 4.485035291859654e-06, "loss": 0.1967, "step": 8777 }, { "epoch": 0.6954248366013072, "grad_norm": 1.5485858735484967, "learning_rate": 4.482894948217875e-06, "loss": 0.1281, "step": 8778 }, { "epoch": 0.6955040602099426, "grad_norm": 1.1978771136500905, "learning_rate": 4.48075496785092e-06, "loss": 0.1168, "step": 8779 }, { "epoch": 0.6955832838185779, "grad_norm": 1.6552400910180027, "learning_rate": 4.4786153508996944e-06, "loss": 0.1665, "step": 8780 }, { "epoch": 0.6956625074272134, "grad_norm": 1.4449352306810848, "learning_rate": 4.47647609750508e-06, "loss": 0.1752, "step": 8781 }, { "epoch": 0.6957417310358487, "grad_norm": 1.4604329747333165, "learning_rate": 4.4743372078079335e-06, "loss": 0.2001, "step": 8782 }, { "epoch": 0.695820954644484, "grad_norm": 1.6249618198063909, "learning_rate": 4.472198681949098e-06, "loss": 0.1974, "step": 8783 }, { "epoch": 0.6959001782531195, "grad_norm": 1.6229216171237462, "learning_rate": 4.470060520069381e-06, "loss": 0.2059, "step": 8784 }, { "epoch": 0.6959794018617548, "grad_norm": 1.5001605629559505, "learning_rate": 4.467922722309567e-06, "loss": 0.2107, "step": 8785 }, { "epoch": 0.6960586254703902, "grad_norm": 1.5417537946201236, "learning_rate": 4.465785288810427e-06, "loss": 0.1873, "step": 8786 }, { "epoch": 0.6961378490790255, "grad_norm": 1.5249782011005326, "learning_rate": 4.4636482197126965e-06, "loss": 0.1825, "step": 8787 }, { "epoch": 0.696217072687661, "grad_norm": 1.27441986603953, "learning_rate": 4.461511515157087e-06, "loss": 0.1317, "step": 8788 }, { "epoch": 0.6962962962962963, "grad_norm": 1.5797578812923285, "learning_rate": 4.459375175284299e-06, "loss": 0.1731, "step": 8789 }, { "epoch": 0.6963755199049316, "grad_norm": 1.6589594867140478, "learning_rate": 4.457239200234996e-06, "loss": 0.2071, "step": 8790 }, { "epoch": 0.6964547435135671, "grad_norm": 1.2242560138144052, "learning_rate": 4.4551035901498186e-06, "loss": 0.1612, "step": 8791 }, { "epoch": 0.6965339671222024, "grad_norm": 1.2752944417907712, "learning_rate": 4.4529683451693916e-06, "loss": 0.1648, "step": 8792 }, { "epoch": 0.6966131907308378, "grad_norm": 1.559373174148234, "learning_rate": 4.45083346543431e-06, "loss": 0.1804, "step": 8793 }, { "epoch": 0.6966924143394732, "grad_norm": 1.514190432817593, "learning_rate": 4.448698951085143e-06, "loss": 0.1849, "step": 8794 }, { "epoch": 0.6967716379481086, "grad_norm": 1.2216284628743566, "learning_rate": 4.446564802262435e-06, "loss": 0.0752, "step": 8795 }, { "epoch": 0.6968508615567439, "grad_norm": 1.6983014512321104, "learning_rate": 4.444431019106718e-06, "loss": 0.1408, "step": 8796 }, { "epoch": 0.6969300851653792, "grad_norm": 1.7940706976307725, "learning_rate": 4.4422976017584866e-06, "loss": 0.2423, "step": 8797 }, { "epoch": 0.6970093087740147, "grad_norm": 1.4713389715624672, "learning_rate": 4.440164550358212e-06, "loss": 0.1736, "step": 8798 }, { "epoch": 0.69708853238265, "grad_norm": 1.4864484111845657, "learning_rate": 4.438031865046353e-06, "loss": 0.1406, "step": 8799 }, { "epoch": 0.6971677559912854, "grad_norm": 1.4426463147987603, "learning_rate": 4.435899545963333e-06, "loss": 0.1687, "step": 8800 }, { "epoch": 0.6972469795999208, "grad_norm": 1.2427763308582487, "learning_rate": 4.4337675932495515e-06, "loss": 0.1252, "step": 8801 }, { "epoch": 0.6973262032085561, "grad_norm": 1.4530691095101038, "learning_rate": 4.431636007045396e-06, "loss": 0.1211, "step": 8802 }, { "epoch": 0.6974054268171915, "grad_norm": 1.5475078520014272, "learning_rate": 4.429504787491214e-06, "loss": 0.1457, "step": 8803 }, { "epoch": 0.6974846504258269, "grad_norm": 1.662190831622774, "learning_rate": 4.427373934727337e-06, "loss": 0.3278, "step": 8804 }, { "epoch": 0.6975638740344623, "grad_norm": 1.306433587450784, "learning_rate": 4.425243448894074e-06, "loss": 0.1169, "step": 8805 }, { "epoch": 0.6976430976430976, "grad_norm": 1.4739453168690189, "learning_rate": 4.423113330131708e-06, "loss": 0.1825, "step": 8806 }, { "epoch": 0.6977223212517331, "grad_norm": 2.0608129315474155, "learning_rate": 4.42098357858049e-06, "loss": 0.2356, "step": 8807 }, { "epoch": 0.6978015448603684, "grad_norm": 1.3136278113741746, "learning_rate": 4.418854194380663e-06, "loss": 0.1831, "step": 8808 }, { "epoch": 0.6978807684690037, "grad_norm": 1.1972763349322344, "learning_rate": 4.416725177672432e-06, "loss": 0.119, "step": 8809 }, { "epoch": 0.6979599920776391, "grad_norm": 1.2721603945797595, "learning_rate": 4.4145965285959836e-06, "loss": 0.1668, "step": 8810 }, { "epoch": 0.6980392156862745, "grad_norm": 1.4178797548286315, "learning_rate": 4.412468247291474e-06, "loss": 0.1585, "step": 8811 }, { "epoch": 0.6981184392949099, "grad_norm": 1.4284786010511097, "learning_rate": 4.410340333899049e-06, "loss": 0.1732, "step": 8812 }, { "epoch": 0.6981976629035452, "grad_norm": 1.7700032727310824, "learning_rate": 4.408212788558818e-06, "loss": 0.2066, "step": 8813 }, { "epoch": 0.6982768865121807, "grad_norm": 1.9077384061794467, "learning_rate": 4.406085611410864e-06, "loss": 0.2425, "step": 8814 }, { "epoch": 0.698356110120816, "grad_norm": 1.7435868726619532, "learning_rate": 4.403958802595261e-06, "loss": 0.2269, "step": 8815 }, { "epoch": 0.6984353337294513, "grad_norm": 1.59591571426409, "learning_rate": 4.401832362252044e-06, "loss": 0.1892, "step": 8816 }, { "epoch": 0.6985145573380868, "grad_norm": 1.5245462242490293, "learning_rate": 4.399706290521225e-06, "loss": 0.1577, "step": 8817 }, { "epoch": 0.6985937809467221, "grad_norm": 1.6348777969074537, "learning_rate": 4.397580587542805e-06, "loss": 0.18, "step": 8818 }, { "epoch": 0.6986730045553575, "grad_norm": 1.3656603707151018, "learning_rate": 4.3954552534567455e-06, "loss": 0.1871, "step": 8819 }, { "epoch": 0.6987522281639929, "grad_norm": 1.5013686411109204, "learning_rate": 4.393330288402986e-06, "loss": 0.2102, "step": 8820 }, { "epoch": 0.6988314517726283, "grad_norm": 1.7795540555044727, "learning_rate": 4.391205692521453e-06, "loss": 0.2148, "step": 8821 }, { "epoch": 0.6989106753812636, "grad_norm": 1.544563262211698, "learning_rate": 4.389081465952039e-06, "loss": 0.2224, "step": 8822 }, { "epoch": 0.6989898989898989, "grad_norm": 1.185014011067305, "learning_rate": 4.386957608834607e-06, "loss": 0.1303, "step": 8823 }, { "epoch": 0.6990691225985344, "grad_norm": 1.7588568938329712, "learning_rate": 4.384834121309013e-06, "loss": 0.2658, "step": 8824 }, { "epoch": 0.6991483462071697, "grad_norm": 1.7644065661209543, "learning_rate": 4.382711003515072e-06, "loss": 0.2251, "step": 8825 }, { "epoch": 0.6992275698158051, "grad_norm": 1.4340255292466626, "learning_rate": 4.3805882555925846e-06, "loss": 0.1406, "step": 8826 }, { "epoch": 0.6993067934244405, "grad_norm": 1.6475867037658725, "learning_rate": 4.378465877681317e-06, "loss": 0.179, "step": 8827 }, { "epoch": 0.6993860170330759, "grad_norm": 1.5567330383733182, "learning_rate": 4.376343869921027e-06, "loss": 0.1759, "step": 8828 }, { "epoch": 0.6994652406417112, "grad_norm": 1.6587662626714532, "learning_rate": 4.374222232451433e-06, "loss": 0.2468, "step": 8829 }, { "epoch": 0.6995444642503466, "grad_norm": 1.3883333374420803, "learning_rate": 4.3721009654122315e-06, "loss": 0.1909, "step": 8830 }, { "epoch": 0.699623687858982, "grad_norm": 1.046845583936344, "learning_rate": 4.369980068943106e-06, "loss": 0.1085, "step": 8831 }, { "epoch": 0.6997029114676173, "grad_norm": 1.3935156296815707, "learning_rate": 4.367859543183702e-06, "loss": 0.1833, "step": 8832 }, { "epoch": 0.6997821350762528, "grad_norm": 1.3237658160684287, "learning_rate": 4.3657393882736456e-06, "loss": 0.1298, "step": 8833 }, { "epoch": 0.6998613586848881, "grad_norm": 1.5712109424456409, "learning_rate": 4.3636196043525415e-06, "loss": 0.2106, "step": 8834 }, { "epoch": 0.6999405822935235, "grad_norm": 1.3725337887334645, "learning_rate": 4.361500191559967e-06, "loss": 0.1747, "step": 8835 }, { "epoch": 0.7000198059021588, "grad_norm": 1.5623309104647334, "learning_rate": 4.35938115003547e-06, "loss": 0.216, "step": 8836 }, { "epoch": 0.7000990295107942, "grad_norm": 1.2808268527345514, "learning_rate": 4.357262479918587e-06, "loss": 0.1374, "step": 8837 }, { "epoch": 0.7001782531194296, "grad_norm": 1.4413433223481251, "learning_rate": 4.355144181348819e-06, "loss": 0.1567, "step": 8838 }, { "epoch": 0.7002574767280649, "grad_norm": 1.3885586702137263, "learning_rate": 4.353026254465642e-06, "loss": 0.1537, "step": 8839 }, { "epoch": 0.7003367003367004, "grad_norm": 1.3189938295933838, "learning_rate": 4.350908699408521e-06, "loss": 0.1915, "step": 8840 }, { "epoch": 0.7004159239453357, "grad_norm": 1.5574676213052678, "learning_rate": 4.348791516316878e-06, "loss": 0.1929, "step": 8841 }, { "epoch": 0.7004951475539711, "grad_norm": 1.519744105590134, "learning_rate": 4.346674705330117e-06, "loss": 0.211, "step": 8842 }, { "epoch": 0.7005743711626065, "grad_norm": 1.4923351191068697, "learning_rate": 4.344558266587628e-06, "loss": 0.1728, "step": 8843 }, { "epoch": 0.7006535947712418, "grad_norm": 1.2777758524075027, "learning_rate": 4.342442200228766e-06, "loss": 0.1165, "step": 8844 }, { "epoch": 0.7007328183798772, "grad_norm": 1.4407727984525156, "learning_rate": 4.340326506392859e-06, "loss": 0.1399, "step": 8845 }, { "epoch": 0.7008120419885125, "grad_norm": 1.408086677255607, "learning_rate": 4.338211185219222e-06, "loss": 0.1806, "step": 8846 }, { "epoch": 0.700891265597148, "grad_norm": 1.8640505525064341, "learning_rate": 4.336096236847136e-06, "loss": 0.2228, "step": 8847 }, { "epoch": 0.7009704892057833, "grad_norm": 1.4004136210739297, "learning_rate": 4.333981661415856e-06, "loss": 0.0971, "step": 8848 }, { "epoch": 0.7010497128144187, "grad_norm": 1.9819534699666377, "learning_rate": 4.331867459064623e-06, "loss": 0.164, "step": 8849 }, { "epoch": 0.7011289364230541, "grad_norm": 1.4553427709485196, "learning_rate": 4.329753629932646e-06, "loss": 0.2207, "step": 8850 }, { "epoch": 0.7012081600316894, "grad_norm": 1.3298618348286386, "learning_rate": 4.327640174159109e-06, "loss": 0.1999, "step": 8851 }, { "epoch": 0.7012873836403248, "grad_norm": 2.6685826773151295, "learning_rate": 4.325527091883168e-06, "loss": 0.1119, "step": 8852 }, { "epoch": 0.7013666072489602, "grad_norm": 1.644816105366692, "learning_rate": 4.323414383243969e-06, "loss": 0.1971, "step": 8853 }, { "epoch": 0.7014458308575956, "grad_norm": 1.311762122734592, "learning_rate": 4.321302048380619e-06, "loss": 0.1716, "step": 8854 }, { "epoch": 0.7015250544662309, "grad_norm": 1.9886884479696556, "learning_rate": 4.319190087432201e-06, "loss": 0.2093, "step": 8855 }, { "epoch": 0.7016042780748664, "grad_norm": 1.531879407864641, "learning_rate": 4.317078500537785e-06, "loss": 0.242, "step": 8856 }, { "epoch": 0.7016835016835017, "grad_norm": 1.6220269552376552, "learning_rate": 4.314967287836405e-06, "loss": 0.1758, "step": 8857 }, { "epoch": 0.701762725292137, "grad_norm": 1.8019874746318434, "learning_rate": 4.3128564494670715e-06, "loss": 0.1991, "step": 8858 }, { "epoch": 0.7018419489007724, "grad_norm": 1.5588080158695856, "learning_rate": 4.310745985568779e-06, "loss": 0.192, "step": 8859 }, { "epoch": 0.7019211725094078, "grad_norm": 1.76137450276114, "learning_rate": 4.3086358962804885e-06, "loss": 0.1973, "step": 8860 }, { "epoch": 0.7020003961180432, "grad_norm": 1.6124635917507928, "learning_rate": 4.306526181741135e-06, "loss": 0.1804, "step": 8861 }, { "epoch": 0.7020796197266785, "grad_norm": 1.635892995978041, "learning_rate": 4.304416842089641e-06, "loss": 0.1845, "step": 8862 }, { "epoch": 0.702158843335314, "grad_norm": 1.9615192963171089, "learning_rate": 4.302307877464893e-06, "loss": 0.1946, "step": 8863 }, { "epoch": 0.7022380669439493, "grad_norm": 1.4449076665617835, "learning_rate": 4.300199288005753e-06, "loss": 0.1803, "step": 8864 }, { "epoch": 0.7023172905525846, "grad_norm": 1.3918651887048008, "learning_rate": 4.298091073851066e-06, "loss": 0.1124, "step": 8865 }, { "epoch": 0.7023965141612201, "grad_norm": 1.095485170651712, "learning_rate": 4.295983235139647e-06, "loss": 0.109, "step": 8866 }, { "epoch": 0.7024757377698554, "grad_norm": 2.058633734473713, "learning_rate": 4.293875772010287e-06, "loss": 0.1866, "step": 8867 }, { "epoch": 0.7025549613784908, "grad_norm": 1.9308386793385959, "learning_rate": 4.291768684601746e-06, "loss": 0.2198, "step": 8868 }, { "epoch": 0.7026341849871262, "grad_norm": 1.5543563999701846, "learning_rate": 4.289661973052774e-06, "loss": 0.1633, "step": 8869 }, { "epoch": 0.7027134085957616, "grad_norm": 1.5453341596645978, "learning_rate": 4.287555637502086e-06, "loss": 0.1829, "step": 8870 }, { "epoch": 0.7027926322043969, "grad_norm": 1.693391313464157, "learning_rate": 4.285449678088369e-06, "loss": 0.2424, "step": 8871 }, { "epoch": 0.7028718558130322, "grad_norm": 1.4568989201158453, "learning_rate": 4.283344094950297e-06, "loss": 0.1886, "step": 8872 }, { "epoch": 0.7029510794216677, "grad_norm": 1.6322315242858605, "learning_rate": 4.2812388882265095e-06, "loss": 0.1764, "step": 8873 }, { "epoch": 0.703030303030303, "grad_norm": 1.3878895168664522, "learning_rate": 4.279134058055622e-06, "loss": 0.1587, "step": 8874 }, { "epoch": 0.7031095266389384, "grad_norm": 2.111072501513597, "learning_rate": 4.2770296045762315e-06, "loss": 0.1631, "step": 8875 }, { "epoch": 0.7031887502475738, "grad_norm": 1.299754877623927, "learning_rate": 4.274925527926907e-06, "loss": 0.1962, "step": 8876 }, { "epoch": 0.7032679738562092, "grad_norm": 1.373546269350586, "learning_rate": 4.272821828246183e-06, "loss": 0.1201, "step": 8877 }, { "epoch": 0.7033471974648445, "grad_norm": 1.5419916617151102, "learning_rate": 4.270718505672588e-06, "loss": 0.1939, "step": 8878 }, { "epoch": 0.7034264210734799, "grad_norm": 1.5567728449586737, "learning_rate": 4.2686155603446134e-06, "loss": 0.1811, "step": 8879 }, { "epoch": 0.7035056446821153, "grad_norm": 1.702245443561457, "learning_rate": 4.266512992400726e-06, "loss": 0.2542, "step": 8880 }, { "epoch": 0.7035848682907506, "grad_norm": 1.2994612534873151, "learning_rate": 4.2644108019793665e-06, "loss": 0.182, "step": 8881 }, { "epoch": 0.7036640918993861, "grad_norm": 1.3977597901480356, "learning_rate": 4.262308989218961e-06, "loss": 0.1705, "step": 8882 }, { "epoch": 0.7037433155080214, "grad_norm": 1.3800724286233983, "learning_rate": 4.2602075542579e-06, "loss": 0.1781, "step": 8883 }, { "epoch": 0.7038225391166567, "grad_norm": 1.563107963642451, "learning_rate": 4.258106497234551e-06, "loss": 0.1775, "step": 8884 }, { "epoch": 0.7039017627252921, "grad_norm": 1.7822286172705617, "learning_rate": 4.256005818287265e-06, "loss": 0.2098, "step": 8885 }, { "epoch": 0.7039809863339275, "grad_norm": 1.3553466213219136, "learning_rate": 4.253905517554356e-06, "loss": 0.1552, "step": 8886 }, { "epoch": 0.7040602099425629, "grad_norm": 1.2839616722318572, "learning_rate": 4.251805595174117e-06, "loss": 0.1365, "step": 8887 }, { "epoch": 0.7041394335511982, "grad_norm": 1.493554645693374, "learning_rate": 4.249706051284824e-06, "loss": 0.1992, "step": 8888 }, { "epoch": 0.7042186571598337, "grad_norm": 1.6374959145264998, "learning_rate": 4.24760688602472e-06, "loss": 0.2374, "step": 8889 }, { "epoch": 0.704297880768469, "grad_norm": 1.6289974334495363, "learning_rate": 4.245508099532021e-06, "loss": 0.1874, "step": 8890 }, { "epoch": 0.7043771043771043, "grad_norm": 1.68010282707641, "learning_rate": 4.243409691944927e-06, "loss": 0.2111, "step": 8891 }, { "epoch": 0.7044563279857398, "grad_norm": 1.4476513419764068, "learning_rate": 4.241311663401606e-06, "loss": 0.1931, "step": 8892 }, { "epoch": 0.7045355515943751, "grad_norm": 1.7292278035688797, "learning_rate": 4.2392140140401996e-06, "loss": 0.1543, "step": 8893 }, { "epoch": 0.7046147752030105, "grad_norm": 1.6431321335300442, "learning_rate": 4.237116743998835e-06, "loss": 0.2225, "step": 8894 }, { "epoch": 0.7046939988116458, "grad_norm": 1.3570553370735827, "learning_rate": 4.235019853415603e-06, "loss": 0.1912, "step": 8895 }, { "epoch": 0.7047732224202813, "grad_norm": 1.227240285546194, "learning_rate": 4.232923342428574e-06, "loss": 0.1541, "step": 8896 }, { "epoch": 0.7048524460289166, "grad_norm": 1.6151021982027407, "learning_rate": 4.230827211175791e-06, "loss": 0.2092, "step": 8897 }, { "epoch": 0.7049316696375519, "grad_norm": 1.458975248192357, "learning_rate": 4.22873145979528e-06, "loss": 0.1252, "step": 8898 }, { "epoch": 0.7050108932461874, "grad_norm": 1.5557373638847387, "learning_rate": 4.226636088425033e-06, "loss": 0.1744, "step": 8899 }, { "epoch": 0.7050901168548227, "grad_norm": 1.6646056763561519, "learning_rate": 4.2245410972030154e-06, "loss": 0.1758, "step": 8900 }, { "epoch": 0.7051693404634581, "grad_norm": 1.4150966729128942, "learning_rate": 4.222446486267181e-06, "loss": 0.167, "step": 8901 }, { "epoch": 0.7052485640720935, "grad_norm": 1.6674804939628438, "learning_rate": 4.220352255755445e-06, "loss": 0.2148, "step": 8902 }, { "epoch": 0.7053277876807289, "grad_norm": 1.3256800193370304, "learning_rate": 4.218258405805701e-06, "loss": 0.1189, "step": 8903 }, { "epoch": 0.7054070112893642, "grad_norm": 1.673897002571839, "learning_rate": 4.216164936555823e-06, "loss": 0.2511, "step": 8904 }, { "epoch": 0.7054862348979996, "grad_norm": 2.2486388415797007, "learning_rate": 4.214071848143655e-06, "loss": 0.3101, "step": 8905 }, { "epoch": 0.705565458506635, "grad_norm": 1.34213107718111, "learning_rate": 4.211979140707012e-06, "loss": 0.1628, "step": 8906 }, { "epoch": 0.7056446821152703, "grad_norm": 1.6337656733968111, "learning_rate": 4.209886814383696e-06, "loss": 0.2478, "step": 8907 }, { "epoch": 0.7057239057239058, "grad_norm": 1.5786716513057881, "learning_rate": 4.207794869311472e-06, "loss": 0.2083, "step": 8908 }, { "epoch": 0.7058031293325411, "grad_norm": 1.7102498400449593, "learning_rate": 4.205703305628082e-06, "loss": 0.191, "step": 8909 }, { "epoch": 0.7058823529411765, "grad_norm": 1.49348634855011, "learning_rate": 4.203612123471254e-06, "loss": 0.214, "step": 8910 }, { "epoch": 0.7059615765498118, "grad_norm": 1.1954013435293973, "learning_rate": 4.201521322978677e-06, "loss": 0.1191, "step": 8911 }, { "epoch": 0.7060408001584472, "grad_norm": 1.6047416950397644, "learning_rate": 4.19943090428802e-06, "loss": 0.239, "step": 8912 }, { "epoch": 0.7061200237670826, "grad_norm": 1.283928520375124, "learning_rate": 4.197340867536923e-06, "loss": 0.097, "step": 8913 }, { "epoch": 0.7061992473757179, "grad_norm": 1.2820688306081138, "learning_rate": 4.195251212863014e-06, "loss": 0.1543, "step": 8914 }, { "epoch": 0.7062784709843534, "grad_norm": 2.3078728324176594, "learning_rate": 4.193161940403882e-06, "loss": 0.3387, "step": 8915 }, { "epoch": 0.7063576945929887, "grad_norm": 1.087293674546052, "learning_rate": 4.191073050297091e-06, "loss": 0.0712, "step": 8916 }, { "epoch": 0.7064369182016241, "grad_norm": 1.8274393871135801, "learning_rate": 4.188984542680192e-06, "loss": 0.2214, "step": 8917 }, { "epoch": 0.7065161418102595, "grad_norm": 1.599643811720071, "learning_rate": 4.186896417690701e-06, "loss": 0.1643, "step": 8918 }, { "epoch": 0.7065953654188948, "grad_norm": 1.5811607679026036, "learning_rate": 4.18480867546611e-06, "loss": 0.1555, "step": 8919 }, { "epoch": 0.7066745890275302, "grad_norm": 1.7593280724556661, "learning_rate": 4.182721316143888e-06, "loss": 0.2135, "step": 8920 }, { "epoch": 0.7067538126361655, "grad_norm": 1.2460739511293573, "learning_rate": 4.180634339861474e-06, "loss": 0.1188, "step": 8921 }, { "epoch": 0.706833036244801, "grad_norm": 1.5206373564706603, "learning_rate": 4.178547746756285e-06, "loss": 0.1931, "step": 8922 }, { "epoch": 0.7069122598534363, "grad_norm": 1.5331165661126867, "learning_rate": 4.17646153696572e-06, "loss": 0.1353, "step": 8923 }, { "epoch": 0.7069914834620717, "grad_norm": 1.4713285224804602, "learning_rate": 4.174375710627141e-06, "loss": 0.1645, "step": 8924 }, { "epoch": 0.7070707070707071, "grad_norm": 1.3565004481785639, "learning_rate": 4.172290267877887e-06, "loss": 0.1651, "step": 8925 }, { "epoch": 0.7071499306793424, "grad_norm": 1.1857166425406487, "learning_rate": 4.170205208855281e-06, "loss": 0.1084, "step": 8926 }, { "epoch": 0.7072291542879778, "grad_norm": 1.4229593454891545, "learning_rate": 4.1681205336966115e-06, "loss": 0.1765, "step": 8927 }, { "epoch": 0.7073083778966132, "grad_norm": 1.3232025580629834, "learning_rate": 4.16603624253914e-06, "loss": 0.1206, "step": 8928 }, { "epoch": 0.7073876015052486, "grad_norm": 1.9225876309425236, "learning_rate": 4.163952335520114e-06, "loss": 0.3107, "step": 8929 }, { "epoch": 0.7074668251138839, "grad_norm": 1.0757666515378737, "learning_rate": 4.161868812776746e-06, "loss": 0.1145, "step": 8930 }, { "epoch": 0.7075460487225194, "grad_norm": 1.5022710811190123, "learning_rate": 4.15978567444622e-06, "loss": 0.1465, "step": 8931 }, { "epoch": 0.7076252723311547, "grad_norm": 1.272838566728464, "learning_rate": 4.157702920665712e-06, "loss": 0.1375, "step": 8932 }, { "epoch": 0.70770449593979, "grad_norm": 1.4737454732909687, "learning_rate": 4.155620551572354e-06, "loss": 0.2105, "step": 8933 }, { "epoch": 0.7077837195484254, "grad_norm": 1.369309603297265, "learning_rate": 4.153538567303258e-06, "loss": 0.0912, "step": 8934 }, { "epoch": 0.7078629431570608, "grad_norm": 1.8278682791174095, "learning_rate": 4.151456967995519e-06, "loss": 0.2138, "step": 8935 }, { "epoch": 0.7079421667656962, "grad_norm": 1.2170316453851604, "learning_rate": 4.149375753786198e-06, "loss": 0.0982, "step": 8936 }, { "epoch": 0.7080213903743315, "grad_norm": 1.6068213562410223, "learning_rate": 4.147294924812332e-06, "loss": 0.1574, "step": 8937 }, { "epoch": 0.708100613982967, "grad_norm": 1.5793928878700478, "learning_rate": 4.14521448121093e-06, "loss": 0.1629, "step": 8938 }, { "epoch": 0.7081798375916023, "grad_norm": 1.5107443079064404, "learning_rate": 4.143134423118986e-06, "loss": 0.1481, "step": 8939 }, { "epoch": 0.7082590612002376, "grad_norm": 1.567755304830041, "learning_rate": 4.14105475067346e-06, "loss": 0.2081, "step": 8940 }, { "epoch": 0.7083382848088731, "grad_norm": 1.6685796617507627, "learning_rate": 4.138975464011284e-06, "loss": 0.2137, "step": 8941 }, { "epoch": 0.7084175084175084, "grad_norm": 1.6514773236188476, "learning_rate": 4.136896563269375e-06, "loss": 0.1853, "step": 8942 }, { "epoch": 0.7084967320261438, "grad_norm": 1.9418999289604644, "learning_rate": 4.1348180485846145e-06, "loss": 0.1971, "step": 8943 }, { "epoch": 0.7085759556347792, "grad_norm": 1.9723355117215167, "learning_rate": 4.1327399200938625e-06, "loss": 0.3027, "step": 8944 }, { "epoch": 0.7086551792434146, "grad_norm": 1.2330073929045136, "learning_rate": 4.1306621779339585e-06, "loss": 0.121, "step": 8945 }, { "epoch": 0.7087344028520499, "grad_norm": 1.8777243427088852, "learning_rate": 4.128584822241708e-06, "loss": 0.2223, "step": 8946 }, { "epoch": 0.7088136264606852, "grad_norm": 1.35663460768155, "learning_rate": 4.126507853153891e-06, "loss": 0.1125, "step": 8947 }, { "epoch": 0.7088928500693207, "grad_norm": 1.1195765404518176, "learning_rate": 4.124431270807277e-06, "loss": 0.121, "step": 8948 }, { "epoch": 0.708972073677956, "grad_norm": 1.6917336592800893, "learning_rate": 4.12235507533859e-06, "loss": 0.2218, "step": 8949 }, { "epoch": 0.7090512972865914, "grad_norm": 1.7499013182403091, "learning_rate": 4.120279266884537e-06, "loss": 0.2176, "step": 8950 }, { "epoch": 0.7091305208952268, "grad_norm": 1.5489162800662826, "learning_rate": 4.118203845581807e-06, "loss": 0.2054, "step": 8951 }, { "epoch": 0.7092097445038622, "grad_norm": 1.844033527755078, "learning_rate": 4.11612881156705e-06, "loss": 0.2106, "step": 8952 }, { "epoch": 0.7092889681124975, "grad_norm": 1.6095966575988605, "learning_rate": 4.114054164976902e-06, "loss": 0.1587, "step": 8953 }, { "epoch": 0.7093681917211329, "grad_norm": 1.4201294765393386, "learning_rate": 4.111979905947961e-06, "loss": 0.1545, "step": 8954 }, { "epoch": 0.7094474153297683, "grad_norm": 1.1165702107681494, "learning_rate": 4.109906034616816e-06, "loss": 0.1274, "step": 8955 }, { "epoch": 0.7095266389384036, "grad_norm": 1.4700168991468963, "learning_rate": 4.107832551120017e-06, "loss": 0.193, "step": 8956 }, { "epoch": 0.7096058625470391, "grad_norm": 1.1709800529163041, "learning_rate": 4.105759455594091e-06, "loss": 0.1316, "step": 8957 }, { "epoch": 0.7096850861556744, "grad_norm": 1.456579609825864, "learning_rate": 4.103686748175545e-06, "loss": 0.1847, "step": 8958 }, { "epoch": 0.7097643097643098, "grad_norm": 1.325141186202832, "learning_rate": 4.101614429000857e-06, "loss": 0.1402, "step": 8959 }, { "epoch": 0.7098435333729451, "grad_norm": 1.5067284432450918, "learning_rate": 4.099542498206473e-06, "loss": 0.1789, "step": 8960 }, { "epoch": 0.7099227569815805, "grad_norm": 1.705231640912638, "learning_rate": 4.0974709559288275e-06, "loss": 0.2167, "step": 8961 }, { "epoch": 0.7100019805902159, "grad_norm": 1.4578949177567861, "learning_rate": 4.095399802304319e-06, "loss": 0.1504, "step": 8962 }, { "epoch": 0.7100812041988512, "grad_norm": 2.1730986850409555, "learning_rate": 4.093329037469319e-06, "loss": 0.2615, "step": 8963 }, { "epoch": 0.7101604278074867, "grad_norm": 1.7145477781340637, "learning_rate": 4.091258661560184e-06, "loss": 0.1577, "step": 8964 }, { "epoch": 0.710239651416122, "grad_norm": 2.156651765800912, "learning_rate": 4.0891886747132356e-06, "loss": 0.2114, "step": 8965 }, { "epoch": 0.7103188750247573, "grad_norm": 1.6653329261705643, "learning_rate": 4.087119077064772e-06, "loss": 0.118, "step": 8966 }, { "epoch": 0.7103980986333928, "grad_norm": 1.3683795423548373, "learning_rate": 4.085049868751062e-06, "loss": 0.1931, "step": 8967 }, { "epoch": 0.7104773222420281, "grad_norm": 1.454576484937789, "learning_rate": 4.082981049908362e-06, "loss": 0.0995, "step": 8968 }, { "epoch": 0.7105565458506635, "grad_norm": 1.579544254935876, "learning_rate": 4.080912620672888e-06, "loss": 0.2261, "step": 8969 }, { "epoch": 0.7106357694592988, "grad_norm": 1.4682726161701445, "learning_rate": 4.078844581180833e-06, "loss": 0.1895, "step": 8970 }, { "epoch": 0.7107149930679343, "grad_norm": 1.8744033965405815, "learning_rate": 4.076776931568376e-06, "loss": 0.2606, "step": 8971 }, { "epoch": 0.7107942166765696, "grad_norm": 1.371485826727412, "learning_rate": 4.074709671971657e-06, "loss": 0.1256, "step": 8972 }, { "epoch": 0.7108734402852049, "grad_norm": 1.4662934552176865, "learning_rate": 4.0726428025267925e-06, "loss": 0.1427, "step": 8973 }, { "epoch": 0.7109526638938404, "grad_norm": 1.8949072818025252, "learning_rate": 4.070576323369882e-06, "loss": 0.2326, "step": 8974 }, { "epoch": 0.7110318875024757, "grad_norm": 1.8991883085910681, "learning_rate": 4.06851023463699e-06, "loss": 0.2902, "step": 8975 }, { "epoch": 0.7111111111111111, "grad_norm": 1.6392899595465211, "learning_rate": 4.066444536464155e-06, "loss": 0.2274, "step": 8976 }, { "epoch": 0.7111903347197465, "grad_norm": 1.7578667092894662, "learning_rate": 4.0643792289874e-06, "loss": 0.2305, "step": 8977 }, { "epoch": 0.7112695583283819, "grad_norm": 1.9626291415757584, "learning_rate": 4.062314312342712e-06, "loss": 0.1657, "step": 8978 }, { "epoch": 0.7113487819370172, "grad_norm": 1.7389273444282596, "learning_rate": 4.060249786666054e-06, "loss": 0.2038, "step": 8979 }, { "epoch": 0.7114280055456526, "grad_norm": 1.3703605387337299, "learning_rate": 4.0581856520933706e-06, "loss": 0.2161, "step": 8980 }, { "epoch": 0.711507229154288, "grad_norm": 1.6247431069039082, "learning_rate": 4.056121908760571e-06, "loss": 0.2027, "step": 8981 }, { "epoch": 0.7115864527629233, "grad_norm": 1.544129850790123, "learning_rate": 4.054058556803544e-06, "loss": 0.1786, "step": 8982 }, { "epoch": 0.7116656763715588, "grad_norm": 1.6311335668482034, "learning_rate": 4.051995596358147e-06, "loss": 0.2208, "step": 8983 }, { "epoch": 0.7117448999801941, "grad_norm": 3.124595982662079, "learning_rate": 4.049933027560225e-06, "loss": 0.3187, "step": 8984 }, { "epoch": 0.7118241235888295, "grad_norm": 1.7271318448930384, "learning_rate": 4.047870850545581e-06, "loss": 0.1982, "step": 8985 }, { "epoch": 0.7119033471974648, "grad_norm": 1.3808739422391276, "learning_rate": 4.045809065449999e-06, "loss": 0.1368, "step": 8986 }, { "epoch": 0.7119825708061002, "grad_norm": 1.0924354926833104, "learning_rate": 4.043747672409245e-06, "loss": 0.1299, "step": 8987 }, { "epoch": 0.7120617944147356, "grad_norm": 1.47416609643582, "learning_rate": 4.041686671559046e-06, "loss": 0.2158, "step": 8988 }, { "epoch": 0.7121410180233709, "grad_norm": 1.7090478794970305, "learning_rate": 4.039626063035107e-06, "loss": 0.2056, "step": 8989 }, { "epoch": 0.7122202416320064, "grad_norm": 1.3963615134577552, "learning_rate": 4.0375658469731164e-06, "loss": 0.1883, "step": 8990 }, { "epoch": 0.7122994652406417, "grad_norm": 1.3498996238213241, "learning_rate": 4.035506023508724e-06, "loss": 0.1626, "step": 8991 }, { "epoch": 0.7123786888492771, "grad_norm": 1.230653252995674, "learning_rate": 4.033446592777558e-06, "loss": 0.1322, "step": 8992 }, { "epoch": 0.7124579124579125, "grad_norm": 1.1739276112170387, "learning_rate": 4.031387554915228e-06, "loss": 0.1593, "step": 8993 }, { "epoch": 0.7125371360665478, "grad_norm": 1.4832182470118986, "learning_rate": 4.029328910057308e-06, "loss": 0.1964, "step": 8994 }, { "epoch": 0.7126163596751832, "grad_norm": 1.5219491163295604, "learning_rate": 4.027270658339347e-06, "loss": 0.1321, "step": 8995 }, { "epoch": 0.7126955832838185, "grad_norm": 1.6179589454559815, "learning_rate": 4.025212799896881e-06, "loss": 0.1946, "step": 8996 }, { "epoch": 0.712774806892454, "grad_norm": 1.613501230362664, "learning_rate": 4.023155334865401e-06, "loss": 0.1631, "step": 8997 }, { "epoch": 0.7128540305010893, "grad_norm": 1.7882736694309616, "learning_rate": 4.0210982633803784e-06, "loss": 0.1753, "step": 8998 }, { "epoch": 0.7129332541097247, "grad_norm": 1.4327642269184218, "learning_rate": 4.01904158557727e-06, "loss": 0.1615, "step": 8999 }, { "epoch": 0.7130124777183601, "grad_norm": 1.4956853444450924, "learning_rate": 4.016985301591496e-06, "loss": 0.1948, "step": 9000 }, { "epoch": 0.7130917013269954, "grad_norm": 1.4077558249004838, "learning_rate": 4.014929411558447e-06, "loss": 0.1405, "step": 9001 }, { "epoch": 0.7131709249356308, "grad_norm": 1.678598755123887, "learning_rate": 4.012873915613501e-06, "loss": 0.1211, "step": 9002 }, { "epoch": 0.7132501485442662, "grad_norm": 3.0052966281151092, "learning_rate": 4.010818813892e-06, "loss": 0.2329, "step": 9003 }, { "epoch": 0.7133293721529016, "grad_norm": 1.6560195408304361, "learning_rate": 4.008764106529259e-06, "loss": 0.2034, "step": 9004 }, { "epoch": 0.7134085957615369, "grad_norm": 1.612467944984783, "learning_rate": 4.006709793660577e-06, "loss": 0.1936, "step": 9005 }, { "epoch": 0.7134878193701724, "grad_norm": 1.5170749401348624, "learning_rate": 4.004655875421217e-06, "loss": 0.1835, "step": 9006 }, { "epoch": 0.7135670429788077, "grad_norm": 1.7396927164660396, "learning_rate": 4.00260235194642e-06, "loss": 0.2247, "step": 9007 }, { "epoch": 0.713646266587443, "grad_norm": 1.528678636120324, "learning_rate": 4.0005492233713964e-06, "loss": 0.2651, "step": 9008 }, { "epoch": 0.7137254901960784, "grad_norm": 1.9833407046910196, "learning_rate": 3.998496489831343e-06, "loss": 0.2176, "step": 9009 }, { "epoch": 0.7138047138047138, "grad_norm": 1.4040275471421508, "learning_rate": 3.996444151461417e-06, "loss": 0.1524, "step": 9010 }, { "epoch": 0.7138839374133492, "grad_norm": 1.6851042233139983, "learning_rate": 3.994392208396754e-06, "loss": 0.2111, "step": 9011 }, { "epoch": 0.7139631610219845, "grad_norm": 1.3036237866237972, "learning_rate": 3.992340660772472e-06, "loss": 0.1322, "step": 9012 }, { "epoch": 0.71404238463062, "grad_norm": 1.5896881335335635, "learning_rate": 3.990289508723648e-06, "loss": 0.2005, "step": 9013 }, { "epoch": 0.7141216082392553, "grad_norm": 1.1160630027882283, "learning_rate": 3.988238752385341e-06, "loss": 0.0934, "step": 9014 }, { "epoch": 0.7142008318478906, "grad_norm": 1.9948499790509049, "learning_rate": 3.986188391892587e-06, "loss": 0.3515, "step": 9015 }, { "epoch": 0.7142800554565261, "grad_norm": 1.7234042832334302, "learning_rate": 3.984138427380393e-06, "loss": 0.2016, "step": 9016 }, { "epoch": 0.7143592790651614, "grad_norm": 1.640750103653538, "learning_rate": 3.982088858983733e-06, "loss": 0.2004, "step": 9017 }, { "epoch": 0.7144385026737968, "grad_norm": 1.6074449983964227, "learning_rate": 3.9800396868375675e-06, "loss": 0.2131, "step": 9018 }, { "epoch": 0.7145177262824322, "grad_norm": 1.186496968633592, "learning_rate": 3.977990911076823e-06, "loss": 0.1657, "step": 9019 }, { "epoch": 0.7145969498910676, "grad_norm": 1.5971498052558097, "learning_rate": 3.975942531836397e-06, "loss": 0.2391, "step": 9020 }, { "epoch": 0.7146761734997029, "grad_norm": 2.2254404987212038, "learning_rate": 3.973894549251175e-06, "loss": 0.2264, "step": 9021 }, { "epoch": 0.7147553971083382, "grad_norm": 1.3796774919227595, "learning_rate": 3.971846963455999e-06, "loss": 0.167, "step": 9022 }, { "epoch": 0.7148346207169737, "grad_norm": 1.5307251298664517, "learning_rate": 3.969799774585696e-06, "loss": 0.1065, "step": 9023 }, { "epoch": 0.714913844325609, "grad_norm": 1.6004309959764664, "learning_rate": 3.967752982775058e-06, "loss": 0.1918, "step": 9024 }, { "epoch": 0.7149930679342444, "grad_norm": 1.865688504205946, "learning_rate": 3.965706588158865e-06, "loss": 0.2491, "step": 9025 }, { "epoch": 0.7150722915428798, "grad_norm": 1.478656642029139, "learning_rate": 3.963660590871858e-06, "loss": 0.1681, "step": 9026 }, { "epoch": 0.7151515151515152, "grad_norm": 1.4210059308888805, "learning_rate": 3.961614991048752e-06, "loss": 0.1682, "step": 9027 }, { "epoch": 0.7152307387601505, "grad_norm": 1.6888684210855989, "learning_rate": 3.959569788824248e-06, "loss": 0.1686, "step": 9028 }, { "epoch": 0.7153099623687859, "grad_norm": 1.477158490709553, "learning_rate": 3.957524984333009e-06, "loss": 0.125, "step": 9029 }, { "epoch": 0.7153891859774213, "grad_norm": 1.71168499741163, "learning_rate": 3.955480577709672e-06, "loss": 0.1873, "step": 9030 }, { "epoch": 0.7154684095860566, "grad_norm": 1.779959349211946, "learning_rate": 3.953436569088856e-06, "loss": 0.1739, "step": 9031 }, { "epoch": 0.7155476331946921, "grad_norm": 1.3560254000767384, "learning_rate": 3.951392958605149e-06, "loss": 0.1474, "step": 9032 }, { "epoch": 0.7156268568033274, "grad_norm": 1.7455533481947527, "learning_rate": 3.949349746393108e-06, "loss": 0.1647, "step": 9033 }, { "epoch": 0.7157060804119628, "grad_norm": 1.4528074698538187, "learning_rate": 3.947306932587277e-06, "loss": 0.184, "step": 9034 }, { "epoch": 0.7157853040205981, "grad_norm": 1.2692687219878889, "learning_rate": 3.945264517322159e-06, "loss": 0.1542, "step": 9035 }, { "epoch": 0.7158645276292335, "grad_norm": 1.4551494766354298, "learning_rate": 3.943222500732241e-06, "loss": 0.1578, "step": 9036 }, { "epoch": 0.7159437512378689, "grad_norm": 1.353822794206381, "learning_rate": 3.941180882951972e-06, "loss": 0.2059, "step": 9037 }, { "epoch": 0.7160229748465042, "grad_norm": 1.5168834929808608, "learning_rate": 3.9391396641157945e-06, "loss": 0.1856, "step": 9038 }, { "epoch": 0.7161021984551397, "grad_norm": 1.2626094497243283, "learning_rate": 3.937098844358106e-06, "loss": 0.092, "step": 9039 }, { "epoch": 0.716181422063775, "grad_norm": 1.50048188467296, "learning_rate": 3.935058423813282e-06, "loss": 0.2321, "step": 9040 }, { "epoch": 0.7162606456724103, "grad_norm": 1.4221612612287615, "learning_rate": 3.933018402615683e-06, "loss": 0.16, "step": 9041 }, { "epoch": 0.7163398692810458, "grad_norm": 1.7036620893662642, "learning_rate": 3.9309787808996284e-06, "loss": 0.1595, "step": 9042 }, { "epoch": 0.7164190928896811, "grad_norm": 1.2712702350305998, "learning_rate": 3.928939558799415e-06, "loss": 0.0866, "step": 9043 }, { "epoch": 0.7164983164983165, "grad_norm": 1.4241542221006147, "learning_rate": 3.926900736449324e-06, "loss": 0.1625, "step": 9044 }, { "epoch": 0.7165775401069518, "grad_norm": 1.2039491565505709, "learning_rate": 3.924862313983597e-06, "loss": 0.1172, "step": 9045 }, { "epoch": 0.7166567637155873, "grad_norm": 1.483794922138058, "learning_rate": 3.922824291536452e-06, "loss": 0.1898, "step": 9046 }, { "epoch": 0.7167359873242226, "grad_norm": 1.1218696089849776, "learning_rate": 3.920786669242089e-06, "loss": 0.0843, "step": 9047 }, { "epoch": 0.7168152109328579, "grad_norm": 1.8784289017637512, "learning_rate": 3.918749447234674e-06, "loss": 0.2198, "step": 9048 }, { "epoch": 0.7168944345414934, "grad_norm": 1.5897861681388985, "learning_rate": 3.9167126256483415e-06, "loss": 0.1635, "step": 9049 }, { "epoch": 0.7169736581501287, "grad_norm": 1.8535840508931065, "learning_rate": 3.914676204617216e-06, "loss": 0.1936, "step": 9050 }, { "epoch": 0.7170528817587641, "grad_norm": 1.458693675235977, "learning_rate": 3.912640184275381e-06, "loss": 0.1803, "step": 9051 }, { "epoch": 0.7171321053673995, "grad_norm": 1.482156299897613, "learning_rate": 3.9106045647569005e-06, "loss": 0.203, "step": 9052 }, { "epoch": 0.7172113289760349, "grad_norm": 1.5779143610523423, "learning_rate": 3.908569346195804e-06, "loss": 0.1602, "step": 9053 }, { "epoch": 0.7172905525846702, "grad_norm": 1.6529259345291873, "learning_rate": 3.90653452872611e-06, "loss": 0.1605, "step": 9054 }, { "epoch": 0.7173697761933056, "grad_norm": 2.181993139608414, "learning_rate": 3.904500112481798e-06, "loss": 0.1725, "step": 9055 }, { "epoch": 0.717448999801941, "grad_norm": 1.8115248726302062, "learning_rate": 3.902466097596821e-06, "loss": 0.1752, "step": 9056 }, { "epoch": 0.7175282234105763, "grad_norm": 1.6566757803948426, "learning_rate": 3.900432484205115e-06, "loss": 0.1612, "step": 9057 }, { "epoch": 0.7176074470192118, "grad_norm": 1.6978465046881248, "learning_rate": 3.89839927244058e-06, "loss": 0.208, "step": 9058 }, { "epoch": 0.7176866706278471, "grad_norm": 1.2188902030872955, "learning_rate": 3.89636646243709e-06, "loss": 0.1135, "step": 9059 }, { "epoch": 0.7177658942364825, "grad_norm": 1.3496657440401796, "learning_rate": 3.894334054328505e-06, "loss": 0.1914, "step": 9060 }, { "epoch": 0.7178451178451178, "grad_norm": 1.62484859302476, "learning_rate": 3.892302048248642e-06, "loss": 0.2044, "step": 9061 }, { "epoch": 0.7179243414537532, "grad_norm": 1.598928420791679, "learning_rate": 3.890270444331298e-06, "loss": 0.2004, "step": 9062 }, { "epoch": 0.7180035650623886, "grad_norm": 1.1838584888680506, "learning_rate": 3.888239242710251e-06, "loss": 0.1285, "step": 9063 }, { "epoch": 0.7180827886710239, "grad_norm": 1.2338198525155133, "learning_rate": 3.886208443519242e-06, "loss": 0.1618, "step": 9064 }, { "epoch": 0.7181620122796594, "grad_norm": 1.5850887432876848, "learning_rate": 3.884178046891984e-06, "loss": 0.1891, "step": 9065 }, { "epoch": 0.7182412358882947, "grad_norm": 1.92804658804293, "learning_rate": 3.88214805296218e-06, "loss": 0.2041, "step": 9066 }, { "epoch": 0.7183204594969301, "grad_norm": 1.8915883200648644, "learning_rate": 3.880118461863488e-06, "loss": 0.2235, "step": 9067 }, { "epoch": 0.7183996831055655, "grad_norm": 1.3440238910894948, "learning_rate": 3.878089273729549e-06, "loss": 0.1272, "step": 9068 }, { "epoch": 0.7184789067142008, "grad_norm": 1.6136231808937114, "learning_rate": 3.876060488693971e-06, "loss": 0.2128, "step": 9069 }, { "epoch": 0.7185581303228362, "grad_norm": 1.5417514075177148, "learning_rate": 3.874032106890347e-06, "loss": 0.2321, "step": 9070 }, { "epoch": 0.7186373539314715, "grad_norm": 1.4619880366560432, "learning_rate": 3.872004128452231e-06, "loss": 0.1119, "step": 9071 }, { "epoch": 0.718716577540107, "grad_norm": 1.5810958087342992, "learning_rate": 3.8699765535131565e-06, "loss": 0.1859, "step": 9072 }, { "epoch": 0.7187958011487423, "grad_norm": 1.4974380723545042, "learning_rate": 3.867949382206632e-06, "loss": 0.1673, "step": 9073 }, { "epoch": 0.7188750247573777, "grad_norm": 1.3916266660052012, "learning_rate": 3.8659226146661344e-06, "loss": 0.119, "step": 9074 }, { "epoch": 0.7189542483660131, "grad_norm": 1.6227143749682817, "learning_rate": 3.8638962510251175e-06, "loss": 0.2311, "step": 9075 }, { "epoch": 0.7190334719746484, "grad_norm": 1.7051339049259857, "learning_rate": 3.861870291417008e-06, "loss": 0.2257, "step": 9076 }, { "epoch": 0.7191126955832838, "grad_norm": 1.5799577133004064, "learning_rate": 3.859844735975205e-06, "loss": 0.15, "step": 9077 }, { "epoch": 0.7191919191919192, "grad_norm": 1.3884567522513096, "learning_rate": 3.857819584833078e-06, "loss": 0.1425, "step": 9078 }, { "epoch": 0.7192711428005546, "grad_norm": 1.858541351918183, "learning_rate": 3.855794838123981e-06, "loss": 0.1674, "step": 9079 }, { "epoch": 0.7193503664091899, "grad_norm": 1.4407869231448682, "learning_rate": 3.85377049598123e-06, "loss": 0.1161, "step": 9080 }, { "epoch": 0.7194295900178254, "grad_norm": 1.2113437592070886, "learning_rate": 3.851746558538113e-06, "loss": 0.1173, "step": 9081 }, { "epoch": 0.7195088136264607, "grad_norm": 1.6734177952858873, "learning_rate": 3.849723025927907e-06, "loss": 0.1468, "step": 9082 }, { "epoch": 0.719588037235096, "grad_norm": 1.6853728356199706, "learning_rate": 3.847699898283846e-06, "loss": 0.2097, "step": 9083 }, { "epoch": 0.7196672608437314, "grad_norm": 2.087847716941042, "learning_rate": 3.84567717573914e-06, "loss": 0.2189, "step": 9084 }, { "epoch": 0.7197464844523668, "grad_norm": 1.6563505204227469, "learning_rate": 3.843654858426981e-06, "loss": 0.1887, "step": 9085 }, { "epoch": 0.7198257080610022, "grad_norm": 1.550143621536969, "learning_rate": 3.84163294648053e-06, "loss": 0.1989, "step": 9086 }, { "epoch": 0.7199049316696375, "grad_norm": 1.9169435307826304, "learning_rate": 3.839611440032912e-06, "loss": 0.2223, "step": 9087 }, { "epoch": 0.719984155278273, "grad_norm": 1.9047305832146382, "learning_rate": 3.837590339217243e-06, "loss": 0.2023, "step": 9088 }, { "epoch": 0.7200633788869083, "grad_norm": 2.5056144609398894, "learning_rate": 3.835569644166599e-06, "loss": 0.1646, "step": 9089 }, { "epoch": 0.7201426024955436, "grad_norm": 1.5446784623354364, "learning_rate": 3.833549355014028e-06, "loss": 0.1638, "step": 9090 }, { "epoch": 0.7202218261041791, "grad_norm": 1.7494204197502379, "learning_rate": 3.8315294718925656e-06, "loss": 0.1592, "step": 9091 }, { "epoch": 0.7203010497128144, "grad_norm": 1.725774636577849, "learning_rate": 3.829509994935206e-06, "loss": 0.1985, "step": 9092 }, { "epoch": 0.7203802733214498, "grad_norm": 1.3667122392620439, "learning_rate": 3.827490924274922e-06, "loss": 0.1727, "step": 9093 }, { "epoch": 0.7204594969300852, "grad_norm": 1.3341205872074957, "learning_rate": 3.825472260044658e-06, "loss": 0.1124, "step": 9094 }, { "epoch": 0.7205387205387206, "grad_norm": 1.5994718599360567, "learning_rate": 3.8234540023773385e-06, "loss": 0.1785, "step": 9095 }, { "epoch": 0.7206179441473559, "grad_norm": 2.1104002211602193, "learning_rate": 3.821436151405854e-06, "loss": 0.1475, "step": 9096 }, { "epoch": 0.7206971677559912, "grad_norm": 2.044061381966268, "learning_rate": 3.819418707263065e-06, "loss": 0.2787, "step": 9097 }, { "epoch": 0.7207763913646267, "grad_norm": 1.3567126741090718, "learning_rate": 3.8174016700818196e-06, "loss": 0.1546, "step": 9098 }, { "epoch": 0.720855614973262, "grad_norm": 1.623895295465704, "learning_rate": 3.815385039994925e-06, "loss": 0.1826, "step": 9099 }, { "epoch": 0.7209348385818974, "grad_norm": 1.4418063123348093, "learning_rate": 3.8133688171351645e-06, "loss": 0.1761, "step": 9100 }, { "epoch": 0.7210140621905328, "grad_norm": 1.3342184119627303, "learning_rate": 3.811353001635302e-06, "loss": 0.1474, "step": 9101 }, { "epoch": 0.7210932857991682, "grad_norm": 1.2133827757075395, "learning_rate": 3.8093375936280665e-06, "loss": 0.1307, "step": 9102 }, { "epoch": 0.7211725094078035, "grad_norm": 1.8670532793034826, "learning_rate": 3.807322593246159e-06, "loss": 0.2793, "step": 9103 }, { "epoch": 0.7212517330164389, "grad_norm": 1.664944759625284, "learning_rate": 3.805308000622265e-06, "loss": 0.1602, "step": 9104 }, { "epoch": 0.7213309566250743, "grad_norm": 1.6328438505490472, "learning_rate": 3.8032938158890333e-06, "loss": 0.2175, "step": 9105 }, { "epoch": 0.7214101802337096, "grad_norm": 1.3106383845715255, "learning_rate": 3.8012800391790814e-06, "loss": 0.1164, "step": 9106 }, { "epoch": 0.7214894038423451, "grad_norm": 1.4425097714308814, "learning_rate": 3.799266670625018e-06, "loss": 0.134, "step": 9107 }, { "epoch": 0.7215686274509804, "grad_norm": 1.3673002516142871, "learning_rate": 3.797253710359409e-06, "loss": 0.1737, "step": 9108 }, { "epoch": 0.7216478510596158, "grad_norm": 2.264027446084552, "learning_rate": 3.7952411585147954e-06, "loss": 0.2603, "step": 9109 }, { "epoch": 0.7217270746682511, "grad_norm": 1.7800965293009932, "learning_rate": 3.793229015223694e-06, "loss": 0.2259, "step": 9110 }, { "epoch": 0.7218062982768865, "grad_norm": 1.2537544880547102, "learning_rate": 3.7912172806186e-06, "loss": 0.1499, "step": 9111 }, { "epoch": 0.7218855218855219, "grad_norm": 1.0522742003007164, "learning_rate": 3.7892059548319726e-06, "loss": 0.1023, "step": 9112 }, { "epoch": 0.7219647454941572, "grad_norm": 1.752092008053303, "learning_rate": 3.7871950379962463e-06, "loss": 0.1653, "step": 9113 }, { "epoch": 0.7220439691027927, "grad_norm": 1.7260211959752805, "learning_rate": 3.785184530243835e-06, "loss": 0.2495, "step": 9114 }, { "epoch": 0.722123192711428, "grad_norm": 1.8364188992883017, "learning_rate": 3.7831744317071194e-06, "loss": 0.236, "step": 9115 }, { "epoch": 0.7222024163200634, "grad_norm": 1.5612846767106707, "learning_rate": 3.7811647425184508e-06, "loss": 0.2291, "step": 9116 }, { "epoch": 0.7222816399286988, "grad_norm": 1.3421249378434938, "learning_rate": 3.7791554628101635e-06, "loss": 0.1849, "step": 9117 }, { "epoch": 0.7223608635373341, "grad_norm": 1.511705990419425, "learning_rate": 3.777146592714557e-06, "loss": 0.1425, "step": 9118 }, { "epoch": 0.7224400871459695, "grad_norm": 1.5589922664458769, "learning_rate": 3.7751381323639e-06, "loss": 0.1957, "step": 9119 }, { "epoch": 0.7225193107546048, "grad_norm": 1.4560120098600908, "learning_rate": 3.7731300818904494e-06, "loss": 0.1872, "step": 9120 }, { "epoch": 0.7225985343632403, "grad_norm": 1.2815084024791408, "learning_rate": 3.7711224414264216e-06, "loss": 0.1342, "step": 9121 }, { "epoch": 0.7226777579718756, "grad_norm": 1.4272790731694156, "learning_rate": 3.7691152111040087e-06, "loss": 0.232, "step": 9122 }, { "epoch": 0.7227569815805109, "grad_norm": 1.4058234830114376, "learning_rate": 3.767108391055374e-06, "loss": 0.1705, "step": 9123 }, { "epoch": 0.7228362051891464, "grad_norm": 1.3415809643670769, "learning_rate": 3.7651019814126656e-06, "loss": 0.1456, "step": 9124 }, { "epoch": 0.7229154287977817, "grad_norm": 1.5248380112282414, "learning_rate": 3.7630959823079914e-06, "loss": 0.1814, "step": 9125 }, { "epoch": 0.7229946524064171, "grad_norm": 1.0422113591476505, "learning_rate": 3.761090393873432e-06, "loss": 0.0957, "step": 9126 }, { "epoch": 0.7230738760150525, "grad_norm": 1.3098492919112241, "learning_rate": 3.7590852162410553e-06, "loss": 0.1041, "step": 9127 }, { "epoch": 0.7231530996236879, "grad_norm": 1.56969249461988, "learning_rate": 3.757080449542887e-06, "loss": 0.2144, "step": 9128 }, { "epoch": 0.7232323232323232, "grad_norm": 2.1130355348476346, "learning_rate": 3.7550760939109287e-06, "loss": 0.2178, "step": 9129 }, { "epoch": 0.7233115468409586, "grad_norm": 1.4914250784479086, "learning_rate": 3.7530721494771648e-06, "loss": 0.1873, "step": 9130 }, { "epoch": 0.723390770449594, "grad_norm": 1.5065213659543368, "learning_rate": 3.751068616373541e-06, "loss": 0.1754, "step": 9131 }, { "epoch": 0.7234699940582293, "grad_norm": 1.4524169435189498, "learning_rate": 3.749065494731978e-06, "loss": 0.1318, "step": 9132 }, { "epoch": 0.7235492176668648, "grad_norm": 1.5319927189304092, "learning_rate": 3.747062784684378e-06, "loss": 0.1856, "step": 9133 }, { "epoch": 0.7236284412755001, "grad_norm": 1.5811166745348826, "learning_rate": 3.7450604863626063e-06, "loss": 0.1493, "step": 9134 }, { "epoch": 0.7237076648841355, "grad_norm": 1.954204786441343, "learning_rate": 3.7430585998985004e-06, "loss": 0.2334, "step": 9135 }, { "epoch": 0.7237868884927708, "grad_norm": 1.3651315244576763, "learning_rate": 3.7410571254238835e-06, "loss": 0.1827, "step": 9136 }, { "epoch": 0.7238661121014062, "grad_norm": 1.498571782129823, "learning_rate": 3.7390560630705387e-06, "loss": 0.1772, "step": 9137 }, { "epoch": 0.7239453357100416, "grad_norm": 1.1379557011995205, "learning_rate": 3.7370554129702265e-06, "loss": 0.1121, "step": 9138 }, { "epoch": 0.7240245593186769, "grad_norm": 1.6552899317659178, "learning_rate": 3.735055175254676e-06, "loss": 0.1971, "step": 9139 }, { "epoch": 0.7241037829273124, "grad_norm": 1.3128254971816442, "learning_rate": 3.733055350055601e-06, "loss": 0.144, "step": 9140 }, { "epoch": 0.7241830065359477, "grad_norm": 1.3069125179203875, "learning_rate": 3.7310559375046774e-06, "loss": 0.1707, "step": 9141 }, { "epoch": 0.7242622301445831, "grad_norm": 1.1198542872534016, "learning_rate": 3.7290569377335517e-06, "loss": 0.1011, "step": 9142 }, { "epoch": 0.7243414537532185, "grad_norm": 1.4731175458061112, "learning_rate": 3.7270583508738565e-06, "loss": 0.1449, "step": 9143 }, { "epoch": 0.7244206773618538, "grad_norm": 1.672754668615413, "learning_rate": 3.725060177057185e-06, "loss": 0.2707, "step": 9144 }, { "epoch": 0.7244999009704892, "grad_norm": 1.6761777278488457, "learning_rate": 3.723062416415105e-06, "loss": 0.1937, "step": 9145 }, { "epoch": 0.7245791245791245, "grad_norm": 1.300352270630626, "learning_rate": 3.721065069079165e-06, "loss": 0.1403, "step": 9146 }, { "epoch": 0.72465834818776, "grad_norm": 1.537838132912087, "learning_rate": 3.7190681351808778e-06, "loss": 0.166, "step": 9147 }, { "epoch": 0.7247375717963953, "grad_norm": 1.9220091644531176, "learning_rate": 3.7170716148517294e-06, "loss": 0.1937, "step": 9148 }, { "epoch": 0.7248167954050307, "grad_norm": 1.2515736621489992, "learning_rate": 3.715075508223187e-06, "loss": 0.1435, "step": 9149 }, { "epoch": 0.7248960190136661, "grad_norm": 1.3327992702954297, "learning_rate": 3.71307981542668e-06, "loss": 0.1823, "step": 9150 }, { "epoch": 0.7249752426223014, "grad_norm": 1.4698977263415958, "learning_rate": 3.7110845365936144e-06, "loss": 0.1367, "step": 9151 }, { "epoch": 0.7250544662309368, "grad_norm": 1.528357635526201, "learning_rate": 3.709089671855378e-06, "loss": 0.1785, "step": 9152 }, { "epoch": 0.7251336898395722, "grad_norm": 1.3134327684528684, "learning_rate": 3.707095221343313e-06, "loss": 0.1364, "step": 9153 }, { "epoch": 0.7252129134482076, "grad_norm": 1.378788395914772, "learning_rate": 3.7051011851887455e-06, "loss": 0.1766, "step": 9154 }, { "epoch": 0.7252921370568429, "grad_norm": 1.6670125378340088, "learning_rate": 3.7031075635229787e-06, "loss": 0.2311, "step": 9155 }, { "epoch": 0.7253713606654784, "grad_norm": 1.488651109597415, "learning_rate": 3.70111435647728e-06, "loss": 0.1319, "step": 9156 }, { "epoch": 0.7254505842741137, "grad_norm": 1.3467921873259778, "learning_rate": 3.6991215641828903e-06, "loss": 0.1528, "step": 9157 }, { "epoch": 0.725529807882749, "grad_norm": 2.3256477098154433, "learning_rate": 3.6971291867710303e-06, "loss": 0.3004, "step": 9158 }, { "epoch": 0.7256090314913844, "grad_norm": 1.1369956053814774, "learning_rate": 3.6951372243728854e-06, "loss": 0.1017, "step": 9159 }, { "epoch": 0.7256882551000198, "grad_norm": 1.562159969793376, "learning_rate": 3.693145677119615e-06, "loss": 0.2144, "step": 9160 }, { "epoch": 0.7257674787086552, "grad_norm": 1.9340070299399497, "learning_rate": 3.691154545142357e-06, "loss": 0.2413, "step": 9161 }, { "epoch": 0.7258467023172905, "grad_norm": 1.9384254252035054, "learning_rate": 3.6891638285722176e-06, "loss": 0.1882, "step": 9162 }, { "epoch": 0.725925925925926, "grad_norm": 1.8001180384584912, "learning_rate": 3.687173527540273e-06, "loss": 0.2041, "step": 9163 }, { "epoch": 0.7260051495345613, "grad_norm": 1.9409758761693925, "learning_rate": 3.6851836421775733e-06, "loss": 0.2015, "step": 9164 }, { "epoch": 0.7260843731431966, "grad_norm": 1.4502710972461779, "learning_rate": 3.683194172615149e-06, "loss": 0.1716, "step": 9165 }, { "epoch": 0.7261635967518321, "grad_norm": 1.4684737044771066, "learning_rate": 3.681205118983995e-06, "loss": 0.1832, "step": 9166 }, { "epoch": 0.7262428203604674, "grad_norm": 1.3543165687679024, "learning_rate": 3.6792164814150756e-06, "loss": 0.1187, "step": 9167 }, { "epoch": 0.7263220439691028, "grad_norm": 1.3247265132719996, "learning_rate": 3.6772282600393393e-06, "loss": 0.1292, "step": 9168 }, { "epoch": 0.7264012675777382, "grad_norm": 1.8899551304933995, "learning_rate": 3.675240454987701e-06, "loss": 0.1392, "step": 9169 }, { "epoch": 0.7264804911863736, "grad_norm": 1.8459340470535968, "learning_rate": 3.6732530663910415e-06, "loss": 0.1454, "step": 9170 }, { "epoch": 0.7265597147950089, "grad_norm": 1.5808212763076916, "learning_rate": 3.6712660943802292e-06, "loss": 0.1177, "step": 9171 }, { "epoch": 0.7266389384036442, "grad_norm": 1.8172895966787481, "learning_rate": 3.6692795390860913e-06, "loss": 0.1691, "step": 9172 }, { "epoch": 0.7267181620122797, "grad_norm": 1.6153578932395543, "learning_rate": 3.667293400639432e-06, "loss": 0.1876, "step": 9173 }, { "epoch": 0.726797385620915, "grad_norm": 1.3174571376265898, "learning_rate": 3.665307679171034e-06, "loss": 0.1448, "step": 9174 }, { "epoch": 0.7268766092295504, "grad_norm": 1.6078091448188845, "learning_rate": 3.6633223748116454e-06, "loss": 0.1641, "step": 9175 }, { "epoch": 0.7269558328381858, "grad_norm": 1.781059429319135, "learning_rate": 3.661337487691985e-06, "loss": 0.2085, "step": 9176 }, { "epoch": 0.7270350564468212, "grad_norm": 1.5872870806572175, "learning_rate": 3.659353017942754e-06, "loss": 0.26, "step": 9177 }, { "epoch": 0.7271142800554565, "grad_norm": 1.4096828592033954, "learning_rate": 3.6573689656946177e-06, "loss": 0.1607, "step": 9178 }, { "epoch": 0.7271935036640919, "grad_norm": 1.3417721067761144, "learning_rate": 3.655385331078217e-06, "loss": 0.1115, "step": 9179 }, { "epoch": 0.7272727272727273, "grad_norm": 1.871430729197127, "learning_rate": 3.6534021142241595e-06, "loss": 0.1815, "step": 9180 }, { "epoch": 0.7273519508813626, "grad_norm": 2.07880282717452, "learning_rate": 3.6514193152630382e-06, "loss": 0.2215, "step": 9181 }, { "epoch": 0.7274311744899981, "grad_norm": 1.4841112804861578, "learning_rate": 3.649436934325409e-06, "loss": 0.1639, "step": 9182 }, { "epoch": 0.7275103980986334, "grad_norm": 1.417370074331635, "learning_rate": 3.647454971541796e-06, "loss": 0.1585, "step": 9183 }, { "epoch": 0.7275896217072688, "grad_norm": 1.0574246149159716, "learning_rate": 3.6454734270427107e-06, "loss": 0.0789, "step": 9184 }, { "epoch": 0.7276688453159041, "grad_norm": 1.806201252304038, "learning_rate": 3.6434923009586244e-06, "loss": 0.2075, "step": 9185 }, { "epoch": 0.7277480689245395, "grad_norm": 1.3665577693885003, "learning_rate": 3.6415115934199795e-06, "loss": 0.143, "step": 9186 }, { "epoch": 0.7278272925331749, "grad_norm": 1.658120082405557, "learning_rate": 3.6395313045572055e-06, "loss": 0.2047, "step": 9187 }, { "epoch": 0.7279065161418102, "grad_norm": 1.3744935377552778, "learning_rate": 3.6375514345006913e-06, "loss": 0.1451, "step": 9188 }, { "epoch": 0.7279857397504457, "grad_norm": 1.425797368850368, "learning_rate": 3.635571983380797e-06, "loss": 0.1265, "step": 9189 }, { "epoch": 0.728064963359081, "grad_norm": 1.8360229975625335, "learning_rate": 3.6335929513278667e-06, "loss": 0.2016, "step": 9190 }, { "epoch": 0.7281441869677164, "grad_norm": 1.2554453497590992, "learning_rate": 3.631614338472208e-06, "loss": 0.1475, "step": 9191 }, { "epoch": 0.7282234105763518, "grad_norm": 1.2209398196114771, "learning_rate": 3.6296361449440985e-06, "loss": 0.1611, "step": 9192 }, { "epoch": 0.7283026341849871, "grad_norm": 1.380453108321186, "learning_rate": 3.6276583708738013e-06, "loss": 0.1574, "step": 9193 }, { "epoch": 0.7283818577936225, "grad_norm": 1.8224506098765345, "learning_rate": 3.6256810163915368e-06, "loss": 0.1603, "step": 9194 }, { "epoch": 0.7284610814022578, "grad_norm": 1.5291065648693936, "learning_rate": 3.623704081627507e-06, "loss": 0.2159, "step": 9195 }, { "epoch": 0.7285403050108933, "grad_norm": 1.2218064221955427, "learning_rate": 3.62172756671188e-06, "loss": 0.1109, "step": 9196 }, { "epoch": 0.7286195286195286, "grad_norm": 1.531883415160384, "learning_rate": 3.619751471774805e-06, "loss": 0.1907, "step": 9197 }, { "epoch": 0.728698752228164, "grad_norm": 1.350046992375423, "learning_rate": 3.6177757969463956e-06, "loss": 0.1451, "step": 9198 }, { "epoch": 0.7287779758367994, "grad_norm": 1.5308613939871516, "learning_rate": 3.615800542356738e-06, "loss": 0.1539, "step": 9199 }, { "epoch": 0.7288571994454347, "grad_norm": 1.2910480544851555, "learning_rate": 3.6138257081358985e-06, "loss": 0.1513, "step": 9200 }, { "epoch": 0.7289364230540701, "grad_norm": 1.5349775397024419, "learning_rate": 3.6118512944139084e-06, "loss": 0.2038, "step": 9201 }, { "epoch": 0.7290156466627055, "grad_norm": 1.7447375925960995, "learning_rate": 3.609877301320769e-06, "loss": 0.2418, "step": 9202 }, { "epoch": 0.7290948702713409, "grad_norm": 1.7899060858289482, "learning_rate": 3.607903728986465e-06, "loss": 0.2047, "step": 9203 }, { "epoch": 0.7291740938799762, "grad_norm": 1.6093271768899993, "learning_rate": 3.6059305775409435e-06, "loss": 0.2542, "step": 9204 }, { "epoch": 0.7292533174886116, "grad_norm": 2.6406290776511367, "learning_rate": 3.6039578471141244e-06, "loss": 0.1979, "step": 9205 }, { "epoch": 0.729332541097247, "grad_norm": 1.639071862333085, "learning_rate": 3.6019855378359092e-06, "loss": 0.1922, "step": 9206 }, { "epoch": 0.7294117647058823, "grad_norm": 1.1598429744597092, "learning_rate": 3.6000136498361605e-06, "loss": 0.0941, "step": 9207 }, { "epoch": 0.7294909883145178, "grad_norm": 0.7114323589978904, "learning_rate": 3.5980421832447188e-06, "loss": 0.0859, "step": 9208 }, { "epoch": 0.7295702119231531, "grad_norm": 1.4064092688666743, "learning_rate": 3.5960711381913904e-06, "loss": 0.1573, "step": 9209 }, { "epoch": 0.7296494355317885, "grad_norm": 1.3669230885100645, "learning_rate": 3.5941005148059684e-06, "loss": 0.2027, "step": 9210 }, { "epoch": 0.7297286591404238, "grad_norm": 1.4249676741684758, "learning_rate": 3.5921303132182038e-06, "loss": 0.1921, "step": 9211 }, { "epoch": 0.7298078827490592, "grad_norm": 1.5814114857052903, "learning_rate": 3.5901605335578214e-06, "loss": 0.2168, "step": 9212 }, { "epoch": 0.7298871063576946, "grad_norm": 1.6356478831002252, "learning_rate": 3.5881911759545296e-06, "loss": 0.2195, "step": 9213 }, { "epoch": 0.7299663299663299, "grad_norm": 1.3394345448527973, "learning_rate": 3.5862222405379975e-06, "loss": 0.1075, "step": 9214 }, { "epoch": 0.7300455535749654, "grad_norm": 1.739069549305948, "learning_rate": 3.584253727437866e-06, "loss": 0.1567, "step": 9215 }, { "epoch": 0.7301247771836007, "grad_norm": 1.5177721771550932, "learning_rate": 3.5822856367837587e-06, "loss": 0.1675, "step": 9216 }, { "epoch": 0.7302040007922361, "grad_norm": 1.4747053893799875, "learning_rate": 3.5803179687052636e-06, "loss": 0.1502, "step": 9217 }, { "epoch": 0.7302832244008715, "grad_norm": 1.4593506848127025, "learning_rate": 3.578350723331937e-06, "loss": 0.1312, "step": 9218 }, { "epoch": 0.7303624480095068, "grad_norm": 1.7632007157436047, "learning_rate": 3.5763839007933186e-06, "loss": 0.2735, "step": 9219 }, { "epoch": 0.7304416716181422, "grad_norm": 1.502594467754403, "learning_rate": 3.574417501218913e-06, "loss": 0.1413, "step": 9220 }, { "epoch": 0.7305208952267775, "grad_norm": 1.5803799208773885, "learning_rate": 3.572451524738193e-06, "loss": 0.2593, "step": 9221 }, { "epoch": 0.730600118835413, "grad_norm": 1.3277431484787081, "learning_rate": 3.5704859714806162e-06, "loss": 0.1601, "step": 9222 }, { "epoch": 0.7306793424440483, "grad_norm": 1.3712244420537074, "learning_rate": 3.568520841575601e-06, "loss": 0.147, "step": 9223 }, { "epoch": 0.7307585660526837, "grad_norm": 1.682802466160093, "learning_rate": 3.5665561351525423e-06, "loss": 0.1993, "step": 9224 }, { "epoch": 0.7308377896613191, "grad_norm": 1.5173816752061855, "learning_rate": 3.564591852340803e-06, "loss": 0.1526, "step": 9225 }, { "epoch": 0.7309170132699544, "grad_norm": 1.503958862699897, "learning_rate": 3.562627993269728e-06, "loss": 0.1438, "step": 9226 }, { "epoch": 0.7309962368785898, "grad_norm": 1.929073476943581, "learning_rate": 3.5606645580686262e-06, "loss": 0.2218, "step": 9227 }, { "epoch": 0.7310754604872252, "grad_norm": 1.434326311432558, "learning_rate": 3.558701546866775e-06, "loss": 0.1942, "step": 9228 }, { "epoch": 0.7311546840958606, "grad_norm": 1.5581366239017558, "learning_rate": 3.5567389597934367e-06, "loss": 0.1801, "step": 9229 }, { "epoch": 0.7312339077044959, "grad_norm": 1.5939197873046507, "learning_rate": 3.5547767969778355e-06, "loss": 0.1234, "step": 9230 }, { "epoch": 0.7313131313131314, "grad_norm": 1.8396690125298694, "learning_rate": 3.5528150585491695e-06, "loss": 0.2584, "step": 9231 }, { "epoch": 0.7313923549217667, "grad_norm": 1.6265254160412324, "learning_rate": 3.5508537446366097e-06, "loss": 0.197, "step": 9232 }, { "epoch": 0.731471578530402, "grad_norm": 1.6252653754846322, "learning_rate": 3.548892855369299e-06, "loss": 0.1918, "step": 9233 }, { "epoch": 0.7315508021390374, "grad_norm": 1.3207003598644642, "learning_rate": 3.5469323908763507e-06, "loss": 0.1296, "step": 9234 }, { "epoch": 0.7316300257476728, "grad_norm": 1.5902352053182993, "learning_rate": 3.544972351286857e-06, "loss": 0.1903, "step": 9235 }, { "epoch": 0.7317092493563082, "grad_norm": 1.4170522780616377, "learning_rate": 3.543012736729875e-06, "loss": 0.1426, "step": 9236 }, { "epoch": 0.7317884729649435, "grad_norm": 1.6738582315907702, "learning_rate": 3.541053547334431e-06, "loss": 0.1555, "step": 9237 }, { "epoch": 0.731867696573579, "grad_norm": 2.017358471201567, "learning_rate": 3.5390947832295366e-06, "loss": 0.1397, "step": 9238 }, { "epoch": 0.7319469201822143, "grad_norm": 1.8306103500607795, "learning_rate": 3.5371364445441624e-06, "loss": 0.209, "step": 9239 }, { "epoch": 0.7320261437908496, "grad_norm": 1.2752453648619118, "learning_rate": 3.535178531407253e-06, "loss": 0.1937, "step": 9240 }, { "epoch": 0.7321053673994851, "grad_norm": 1.6076145454420305, "learning_rate": 3.5332210439477334e-06, "loss": 0.1656, "step": 9241 }, { "epoch": 0.7321845910081204, "grad_norm": 1.494379230965058, "learning_rate": 3.5312639822944917e-06, "loss": 0.1089, "step": 9242 }, { "epoch": 0.7322638146167558, "grad_norm": 1.478836369687022, "learning_rate": 3.529307346576388e-06, "loss": 0.2161, "step": 9243 }, { "epoch": 0.7323430382253912, "grad_norm": 1.6050499741314956, "learning_rate": 3.527351136922265e-06, "loss": 0.1685, "step": 9244 }, { "epoch": 0.7324222618340266, "grad_norm": 1.722522583946821, "learning_rate": 3.525395353460924e-06, "loss": 0.1525, "step": 9245 }, { "epoch": 0.7325014854426619, "grad_norm": 1.3641376699368235, "learning_rate": 3.5234399963211418e-06, "loss": 0.1404, "step": 9246 }, { "epoch": 0.7325807090512972, "grad_norm": 1.4039749195769666, "learning_rate": 3.521485065631677e-06, "loss": 0.1449, "step": 9247 }, { "epoch": 0.7326599326599327, "grad_norm": 1.6744201148765419, "learning_rate": 3.5195305615212473e-06, "loss": 0.1677, "step": 9248 }, { "epoch": 0.732739156268568, "grad_norm": 1.584820045873605, "learning_rate": 3.517576484118549e-06, "loss": 0.1324, "step": 9249 }, { "epoch": 0.7328183798772034, "grad_norm": 1.5973881571667095, "learning_rate": 3.5156228335522434e-06, "loss": 0.1633, "step": 9250 }, { "epoch": 0.7328976034858388, "grad_norm": 1.628524444441464, "learning_rate": 3.513669609950977e-06, "loss": 0.1978, "step": 9251 }, { "epoch": 0.7329768270944742, "grad_norm": 1.2756077508679642, "learning_rate": 3.5117168134433566e-06, "loss": 0.157, "step": 9252 }, { "epoch": 0.7330560507031095, "grad_norm": 1.8602242017760275, "learning_rate": 3.5097644441579602e-06, "loss": 0.2217, "step": 9253 }, { "epoch": 0.7331352743117449, "grad_norm": 1.8319572116481992, "learning_rate": 3.507812502223351e-06, "loss": 0.198, "step": 9254 }, { "epoch": 0.7332144979203803, "grad_norm": 1.4028066469281366, "learning_rate": 3.5058609877680495e-06, "loss": 0.1739, "step": 9255 }, { "epoch": 0.7332937215290156, "grad_norm": 1.3785013343154175, "learning_rate": 3.5039099009205503e-06, "loss": 0.1412, "step": 9256 }, { "epoch": 0.7333729451376511, "grad_norm": 1.1510380172008035, "learning_rate": 3.5019592418093306e-06, "loss": 0.1381, "step": 9257 }, { "epoch": 0.7334521687462864, "grad_norm": 1.7562008977207655, "learning_rate": 3.5000090105628282e-06, "loss": 0.1684, "step": 9258 }, { "epoch": 0.7335313923549218, "grad_norm": 1.8937344156720113, "learning_rate": 3.4980592073094533e-06, "loss": 0.2403, "step": 9259 }, { "epoch": 0.7336106159635571, "grad_norm": 1.4054696221329706, "learning_rate": 3.4961098321775978e-06, "loss": 0.1423, "step": 9260 }, { "epoch": 0.7336898395721925, "grad_norm": 1.3761612147329114, "learning_rate": 3.4941608852956143e-06, "loss": 0.1328, "step": 9261 }, { "epoch": 0.7337690631808279, "grad_norm": 1.701598253677154, "learning_rate": 3.4922123667918305e-06, "loss": 0.1821, "step": 9262 }, { "epoch": 0.7338482867894632, "grad_norm": 1.8066939691741852, "learning_rate": 3.4902642767945506e-06, "loss": 0.2017, "step": 9263 }, { "epoch": 0.7339275103980987, "grad_norm": 1.3923068824241427, "learning_rate": 3.488316615432047e-06, "loss": 0.151, "step": 9264 }, { "epoch": 0.734006734006734, "grad_norm": 1.731344961583298, "learning_rate": 3.486369382832561e-06, "loss": 0.188, "step": 9265 }, { "epoch": 0.7340859576153694, "grad_norm": 1.9471917214352137, "learning_rate": 3.484422579124306e-06, "loss": 0.2709, "step": 9266 }, { "epoch": 0.7341651812240048, "grad_norm": 1.4567886137717176, "learning_rate": 3.4824762044354763e-06, "loss": 0.1739, "step": 9267 }, { "epoch": 0.7342444048326401, "grad_norm": 1.9122218009324379, "learning_rate": 3.480530258894229e-06, "loss": 0.2177, "step": 9268 }, { "epoch": 0.7343236284412755, "grad_norm": 1.308680544729355, "learning_rate": 3.478584742628691e-06, "loss": 0.1649, "step": 9269 }, { "epoch": 0.7344028520499108, "grad_norm": 1.5787516637827443, "learning_rate": 3.4766396557669712e-06, "loss": 0.2063, "step": 9270 }, { "epoch": 0.7344820756585463, "grad_norm": 1.6720076858463355, "learning_rate": 3.4746949984371425e-06, "loss": 0.2134, "step": 9271 }, { "epoch": 0.7345612992671816, "grad_norm": 1.6553901487085008, "learning_rate": 3.472750770767247e-06, "loss": 0.2069, "step": 9272 }, { "epoch": 0.734640522875817, "grad_norm": 1.7169479553977398, "learning_rate": 3.470806972885309e-06, "loss": 0.1681, "step": 9273 }, { "epoch": 0.7347197464844524, "grad_norm": 1.2962242597923626, "learning_rate": 3.468863604919316e-06, "loss": 0.121, "step": 9274 }, { "epoch": 0.7347989700930877, "grad_norm": 1.6369086372682258, "learning_rate": 3.4669206669972254e-06, "loss": 0.1773, "step": 9275 }, { "epoch": 0.7348781937017231, "grad_norm": 1.3073963991436224, "learning_rate": 3.4649781592469765e-06, "loss": 0.1174, "step": 9276 }, { "epoch": 0.7349574173103585, "grad_norm": 1.5673592417489588, "learning_rate": 3.4630360817964715e-06, "loss": 0.2609, "step": 9277 }, { "epoch": 0.7350366409189939, "grad_norm": 1.8549002411668731, "learning_rate": 3.4610944347735864e-06, "loss": 0.2089, "step": 9278 }, { "epoch": 0.7351158645276292, "grad_norm": 1.5531267832692022, "learning_rate": 3.459153218306167e-06, "loss": 0.1936, "step": 9279 }, { "epoch": 0.7351950881362646, "grad_norm": 1.3987359798899248, "learning_rate": 3.457212432522038e-06, "loss": 0.1778, "step": 9280 }, { "epoch": 0.7352743117449, "grad_norm": 1.6450946295576279, "learning_rate": 3.455272077548989e-06, "loss": 0.2221, "step": 9281 }, { "epoch": 0.7353535353535353, "grad_norm": 1.6690793689600167, "learning_rate": 3.453332153514779e-06, "loss": 0.2068, "step": 9282 }, { "epoch": 0.7354327589621708, "grad_norm": 1.5494664313052775, "learning_rate": 3.4513926605471504e-06, "loss": 0.1461, "step": 9283 }, { "epoch": 0.7355119825708061, "grad_norm": 1.3877877055880727, "learning_rate": 3.449453598773804e-06, "loss": 0.1266, "step": 9284 }, { "epoch": 0.7355912061794415, "grad_norm": 1.425633582182246, "learning_rate": 3.4475149683224164e-06, "loss": 0.2326, "step": 9285 }, { "epoch": 0.7356704297880768, "grad_norm": 1.438177584247859, "learning_rate": 3.445576769320642e-06, "loss": 0.1388, "step": 9286 }, { "epoch": 0.7357496533967122, "grad_norm": 1.4824285657855427, "learning_rate": 3.4436390018960997e-06, "loss": 0.202, "step": 9287 }, { "epoch": 0.7358288770053476, "grad_norm": 1.430415837162107, "learning_rate": 3.4417016661763793e-06, "loss": 0.1713, "step": 9288 }, { "epoch": 0.7359081006139829, "grad_norm": 1.5975277218062287, "learning_rate": 3.439764762289051e-06, "loss": 0.1513, "step": 9289 }, { "epoch": 0.7359873242226184, "grad_norm": 1.3793798901900824, "learning_rate": 3.4378282903616457e-06, "loss": 0.1301, "step": 9290 }, { "epoch": 0.7360665478312537, "grad_norm": 1.6763429774800287, "learning_rate": 3.4358922505216707e-06, "loss": 0.2338, "step": 9291 }, { "epoch": 0.7361457714398891, "grad_norm": 1.199526460764395, "learning_rate": 3.4339566428966086e-06, "loss": 0.1005, "step": 9292 }, { "epoch": 0.7362249950485245, "grad_norm": 1.4249566403907705, "learning_rate": 3.4320214676139087e-06, "loss": 0.1774, "step": 9293 }, { "epoch": 0.7363042186571598, "grad_norm": 2.2759126802080303, "learning_rate": 3.4300867248009917e-06, "loss": 0.2253, "step": 9294 }, { "epoch": 0.7363834422657952, "grad_norm": 1.8030010609026996, "learning_rate": 3.4281524145852485e-06, "loss": 0.1618, "step": 9295 }, { "epoch": 0.7364626658744305, "grad_norm": 1.5076780114117971, "learning_rate": 3.4262185370940504e-06, "loss": 0.1124, "step": 9296 }, { "epoch": 0.736541889483066, "grad_norm": 1.2693258024399388, "learning_rate": 3.4242850924547297e-06, "loss": 0.0947, "step": 9297 }, { "epoch": 0.7366211130917013, "grad_norm": 1.6414827986512128, "learning_rate": 3.422352080794593e-06, "loss": 0.1355, "step": 9298 }, { "epoch": 0.7367003367003367, "grad_norm": 1.777672336959917, "learning_rate": 3.4204195022409247e-06, "loss": 0.1664, "step": 9299 }, { "epoch": 0.7367795603089721, "grad_norm": 2.083096831392534, "learning_rate": 3.418487356920974e-06, "loss": 0.2306, "step": 9300 }, { "epoch": 0.7368587839176074, "grad_norm": 1.4858841545435992, "learning_rate": 3.4165556449619584e-06, "loss": 0.1507, "step": 9301 }, { "epoch": 0.7369380075262428, "grad_norm": 2.4406334926710573, "learning_rate": 3.4146243664910804e-06, "loss": 0.1751, "step": 9302 }, { "epoch": 0.7370172311348782, "grad_norm": 1.0159323870260486, "learning_rate": 3.4126935216355005e-06, "loss": 0.0891, "step": 9303 }, { "epoch": 0.7370964547435136, "grad_norm": 1.5574304939224075, "learning_rate": 3.4107631105223528e-06, "loss": 0.199, "step": 9304 }, { "epoch": 0.7371756783521489, "grad_norm": 1.8613206664797182, "learning_rate": 3.4088331332787527e-06, "loss": 0.1968, "step": 9305 }, { "epoch": 0.7372549019607844, "grad_norm": 1.6016464423022756, "learning_rate": 3.406903590031776e-06, "loss": 0.2066, "step": 9306 }, { "epoch": 0.7373341255694197, "grad_norm": 1.2872302419669481, "learning_rate": 3.4049744809084697e-06, "loss": 0.1503, "step": 9307 }, { "epoch": 0.737413349178055, "grad_norm": 1.2911361225106146, "learning_rate": 3.4030458060358682e-06, "loss": 0.1504, "step": 9308 }, { "epoch": 0.7374925727866904, "grad_norm": 1.8083152712854311, "learning_rate": 3.4011175655409546e-06, "loss": 0.195, "step": 9309 }, { "epoch": 0.7375717963953258, "grad_norm": 1.3395017364910222, "learning_rate": 3.399189759550694e-06, "loss": 0.1429, "step": 9310 }, { "epoch": 0.7376510200039612, "grad_norm": 1.4825468625516056, "learning_rate": 3.3972623881920296e-06, "loss": 0.1936, "step": 9311 }, { "epoch": 0.7377302436125965, "grad_norm": 1.385687659095172, "learning_rate": 3.3953354515918667e-06, "loss": 0.1776, "step": 9312 }, { "epoch": 0.737809467221232, "grad_norm": 1.3280931032098882, "learning_rate": 3.3934089498770816e-06, "loss": 0.1491, "step": 9313 }, { "epoch": 0.7378886908298673, "grad_norm": 1.6269056777615707, "learning_rate": 3.3914828831745306e-06, "loss": 0.1568, "step": 9314 }, { "epoch": 0.7379679144385026, "grad_norm": 1.584624723752255, "learning_rate": 3.3895572516110353e-06, "loss": 0.2033, "step": 9315 }, { "epoch": 0.7380471380471381, "grad_norm": 1.6475655587896085, "learning_rate": 3.3876320553133834e-06, "loss": 0.154, "step": 9316 }, { "epoch": 0.7381263616557734, "grad_norm": 1.4345396080179786, "learning_rate": 3.385707294408347e-06, "loss": 0.1832, "step": 9317 }, { "epoch": 0.7382055852644088, "grad_norm": 1.5228599321570073, "learning_rate": 3.38378296902266e-06, "loss": 0.1833, "step": 9318 }, { "epoch": 0.7382848088730442, "grad_norm": 1.158066749423035, "learning_rate": 3.3818590792830285e-06, "loss": 0.1293, "step": 9319 }, { "epoch": 0.7383640324816796, "grad_norm": 1.8639376219796586, "learning_rate": 3.3799356253161288e-06, "loss": 0.2306, "step": 9320 }, { "epoch": 0.7384432560903149, "grad_norm": 1.4582046048947686, "learning_rate": 3.3780126072486188e-06, "loss": 0.1537, "step": 9321 }, { "epoch": 0.7385224796989502, "grad_norm": 1.6614339670549445, "learning_rate": 3.376090025207115e-06, "loss": 0.2216, "step": 9322 }, { "epoch": 0.7386017033075857, "grad_norm": 1.522492898509982, "learning_rate": 3.3741678793182077e-06, "loss": 0.1495, "step": 9323 }, { "epoch": 0.738680926916221, "grad_norm": 1.5782764997232395, "learning_rate": 3.372246169708466e-06, "loss": 0.1377, "step": 9324 }, { "epoch": 0.7387601505248564, "grad_norm": 1.2941478117245584, "learning_rate": 3.3703248965044253e-06, "loss": 0.1381, "step": 9325 }, { "epoch": 0.7388393741334918, "grad_norm": 1.991255140355642, "learning_rate": 3.368404059832586e-06, "loss": 0.2822, "step": 9326 }, { "epoch": 0.7389185977421272, "grad_norm": 1.3842367449387394, "learning_rate": 3.366483659819434e-06, "loss": 0.144, "step": 9327 }, { "epoch": 0.7389978213507625, "grad_norm": 1.5745940123767712, "learning_rate": 3.364563696591414e-06, "loss": 0.1691, "step": 9328 }, { "epoch": 0.7390770449593979, "grad_norm": 1.1482600007162196, "learning_rate": 3.3626441702749436e-06, "loss": 0.1174, "step": 9329 }, { "epoch": 0.7391562685680333, "grad_norm": 1.4965772429888502, "learning_rate": 3.360725080996421e-06, "loss": 0.1475, "step": 9330 }, { "epoch": 0.7392354921766686, "grad_norm": 1.8151353352812318, "learning_rate": 3.3588064288822055e-06, "loss": 0.2591, "step": 9331 }, { "epoch": 0.739314715785304, "grad_norm": 1.9651925188489936, "learning_rate": 3.356888214058629e-06, "loss": 0.1459, "step": 9332 }, { "epoch": 0.7393939393939394, "grad_norm": 2.1504601771332825, "learning_rate": 3.354970436652001e-06, "loss": 0.2426, "step": 9333 }, { "epoch": 0.7394731630025748, "grad_norm": 1.4223477195328218, "learning_rate": 3.3530530967885964e-06, "loss": 0.1395, "step": 9334 }, { "epoch": 0.7395523866112101, "grad_norm": 1.1037716480956214, "learning_rate": 3.351136194594662e-06, "loss": 0.0865, "step": 9335 }, { "epoch": 0.7396316102198455, "grad_norm": 1.4491905539308725, "learning_rate": 3.3492197301964145e-06, "loss": 0.1498, "step": 9336 }, { "epoch": 0.7397108338284809, "grad_norm": 1.9894708986344223, "learning_rate": 3.3473037037200484e-06, "loss": 0.2727, "step": 9337 }, { "epoch": 0.7397900574371162, "grad_norm": 1.6990974711784488, "learning_rate": 3.345388115291723e-06, "loss": 0.2105, "step": 9338 }, { "epoch": 0.7398692810457517, "grad_norm": 1.443708267902268, "learning_rate": 3.3434729650375675e-06, "loss": 0.1643, "step": 9339 }, { "epoch": 0.739948504654387, "grad_norm": 1.6362432477609234, "learning_rate": 3.341558253083692e-06, "loss": 0.1577, "step": 9340 }, { "epoch": 0.7400277282630224, "grad_norm": 1.8078991881716104, "learning_rate": 3.3396439795561662e-06, "loss": 0.2295, "step": 9341 }, { "epoch": 0.7401069518716578, "grad_norm": 1.7496771299142653, "learning_rate": 3.3377301445810327e-06, "loss": 0.1531, "step": 9342 }, { "epoch": 0.7401861754802931, "grad_norm": 1.594778358872988, "learning_rate": 3.3358167482843173e-06, "loss": 0.1776, "step": 9343 }, { "epoch": 0.7402653990889285, "grad_norm": 1.6781356099973521, "learning_rate": 3.3339037907920024e-06, "loss": 0.1384, "step": 9344 }, { "epoch": 0.7403446226975638, "grad_norm": 1.5182639092873484, "learning_rate": 3.331991272230044e-06, "loss": 0.1541, "step": 9345 }, { "epoch": 0.7404238463061993, "grad_norm": 1.4540555762844187, "learning_rate": 3.330079192724379e-06, "loss": 0.173, "step": 9346 }, { "epoch": 0.7405030699148346, "grad_norm": 1.6712152429125322, "learning_rate": 3.328167552400906e-06, "loss": 0.2466, "step": 9347 }, { "epoch": 0.74058229352347, "grad_norm": 1.6033877694117369, "learning_rate": 3.326256351385494e-06, "loss": 0.1412, "step": 9348 }, { "epoch": 0.7406615171321054, "grad_norm": 1.1948111751591572, "learning_rate": 3.324345589803991e-06, "loss": 0.1195, "step": 9349 }, { "epoch": 0.7407407407407407, "grad_norm": 1.2426886441315634, "learning_rate": 3.3224352677822115e-06, "loss": 0.1144, "step": 9350 }, { "epoch": 0.7408199643493761, "grad_norm": 1.3915999862572395, "learning_rate": 3.3205253854459386e-06, "loss": 0.1528, "step": 9351 }, { "epoch": 0.7408991879580115, "grad_norm": 2.1206972558200867, "learning_rate": 3.3186159429209263e-06, "loss": 0.1917, "step": 9352 }, { "epoch": 0.7409784115666469, "grad_norm": 1.5048324376194773, "learning_rate": 3.316706940332908e-06, "loss": 0.1529, "step": 9353 }, { "epoch": 0.7410576351752822, "grad_norm": 1.5914554689880618, "learning_rate": 3.314798377807581e-06, "loss": 0.1827, "step": 9354 }, { "epoch": 0.7411368587839177, "grad_norm": 1.7302336924830695, "learning_rate": 3.312890255470609e-06, "loss": 0.2293, "step": 9355 }, { "epoch": 0.741216082392553, "grad_norm": 1.3001394612183932, "learning_rate": 3.3109825734476407e-06, "loss": 0.1528, "step": 9356 }, { "epoch": 0.7412953060011883, "grad_norm": 1.3730314342935777, "learning_rate": 3.3090753318642855e-06, "loss": 0.1331, "step": 9357 }, { "epoch": 0.7413745296098238, "grad_norm": 1.4589420005796052, "learning_rate": 3.307168530846121e-06, "loss": 0.1331, "step": 9358 }, { "epoch": 0.7414537532184591, "grad_norm": 1.5200790879887773, "learning_rate": 3.3052621705187083e-06, "loss": 0.2065, "step": 9359 }, { "epoch": 0.7415329768270945, "grad_norm": 1.6862318268938457, "learning_rate": 3.303356251007569e-06, "loss": 0.1894, "step": 9360 }, { "epoch": 0.7416122004357298, "grad_norm": 1.1620113580523268, "learning_rate": 3.301450772438195e-06, "loss": 0.1255, "step": 9361 }, { "epoch": 0.7416914240443652, "grad_norm": 1.3621520942691776, "learning_rate": 3.2995457349360595e-06, "loss": 0.1105, "step": 9362 }, { "epoch": 0.7417706476530006, "grad_norm": 1.577920579072157, "learning_rate": 3.297641138626597e-06, "loss": 0.279, "step": 9363 }, { "epoch": 0.7418498712616359, "grad_norm": 1.6949766842956953, "learning_rate": 3.295736983635215e-06, "loss": 0.2005, "step": 9364 }, { "epoch": 0.7419290948702714, "grad_norm": 1.4354012168338248, "learning_rate": 3.293833270087291e-06, "loss": 0.1744, "step": 9365 }, { "epoch": 0.7420083184789067, "grad_norm": 1.3364671176844904, "learning_rate": 3.291929998108182e-06, "loss": 0.1422, "step": 9366 }, { "epoch": 0.7420875420875421, "grad_norm": 1.5726889955569778, "learning_rate": 3.2900271678232045e-06, "loss": 0.1848, "step": 9367 }, { "epoch": 0.7421667656961775, "grad_norm": 1.4682398322336907, "learning_rate": 3.2881247793576488e-06, "loss": 0.1602, "step": 9368 }, { "epoch": 0.7422459893048128, "grad_norm": 1.7601241974422648, "learning_rate": 3.286222832836784e-06, "loss": 0.1995, "step": 9369 }, { "epoch": 0.7423252129134482, "grad_norm": 1.13206513269023, "learning_rate": 3.284321328385842e-06, "loss": 0.1277, "step": 9370 }, { "epoch": 0.7424044365220835, "grad_norm": 1.0826381798264555, "learning_rate": 3.282420266130022e-06, "loss": 0.1018, "step": 9371 }, { "epoch": 0.742483660130719, "grad_norm": 1.46516471297859, "learning_rate": 3.280519646194509e-06, "loss": 0.2235, "step": 9372 }, { "epoch": 0.7425628837393543, "grad_norm": 1.2397203554498002, "learning_rate": 3.278619468704445e-06, "loss": 0.1266, "step": 9373 }, { "epoch": 0.7426421073479897, "grad_norm": 1.4545359333169543, "learning_rate": 3.276719733784943e-06, "loss": 0.1848, "step": 9374 }, { "epoch": 0.7427213309566251, "grad_norm": 1.6500272433078569, "learning_rate": 3.2748204415611016e-06, "loss": 0.1366, "step": 9375 }, { "epoch": 0.7428005545652604, "grad_norm": 1.6325770696106963, "learning_rate": 3.2729215921579738e-06, "loss": 0.2028, "step": 9376 }, { "epoch": 0.7428797781738958, "grad_norm": 1.15602012670514, "learning_rate": 3.271023185700587e-06, "loss": 0.1228, "step": 9377 }, { "epoch": 0.7429590017825312, "grad_norm": 1.843362747977543, "learning_rate": 3.269125222313949e-06, "loss": 0.203, "step": 9378 }, { "epoch": 0.7430382253911666, "grad_norm": 1.307618746222223, "learning_rate": 3.2672277021230283e-06, "loss": 0.1149, "step": 9379 }, { "epoch": 0.7431174489998019, "grad_norm": 1.4540246733583013, "learning_rate": 3.2653306252527673e-06, "loss": 0.1659, "step": 9380 }, { "epoch": 0.7431966726084374, "grad_norm": 1.557183174892844, "learning_rate": 3.2634339918280765e-06, "loss": 0.2084, "step": 9381 }, { "epoch": 0.7432758962170727, "grad_norm": 1.3097215630282952, "learning_rate": 3.2615378019738455e-06, "loss": 0.1541, "step": 9382 }, { "epoch": 0.743355119825708, "grad_norm": 1.406429453010852, "learning_rate": 3.2596420558149277e-06, "loss": 0.2183, "step": 9383 }, { "epoch": 0.7434343434343434, "grad_norm": 1.453859466775615, "learning_rate": 3.257746753476144e-06, "loss": 0.1952, "step": 9384 }, { "epoch": 0.7435135670429788, "grad_norm": 1.262752854981948, "learning_rate": 3.255851895082299e-06, "loss": 0.1466, "step": 9385 }, { "epoch": 0.7435927906516142, "grad_norm": 1.396992348994309, "learning_rate": 3.2539574807581555e-06, "loss": 0.16, "step": 9386 }, { "epoch": 0.7436720142602495, "grad_norm": 2.014407761236723, "learning_rate": 3.2520635106284516e-06, "loss": 0.1563, "step": 9387 }, { "epoch": 0.743751237868885, "grad_norm": 1.744449288493045, "learning_rate": 3.250169984817897e-06, "loss": 0.2748, "step": 9388 }, { "epoch": 0.7438304614775203, "grad_norm": 1.725947055342265, "learning_rate": 3.248276903451171e-06, "loss": 0.1688, "step": 9389 }, { "epoch": 0.7439096850861556, "grad_norm": 1.78963784822978, "learning_rate": 3.24638426665292e-06, "loss": 0.2706, "step": 9390 }, { "epoch": 0.7439889086947911, "grad_norm": 2.019731931146409, "learning_rate": 3.2444920745477727e-06, "loss": 0.1885, "step": 9391 }, { "epoch": 0.7440681323034264, "grad_norm": 1.512758703037509, "learning_rate": 3.2426003272603158e-06, "loss": 0.2144, "step": 9392 }, { "epoch": 0.7441473559120618, "grad_norm": 1.5672812969176664, "learning_rate": 3.2407090249151105e-06, "loss": 0.1679, "step": 9393 }, { "epoch": 0.7442265795206972, "grad_norm": 1.4761115109775866, "learning_rate": 3.238818167636695e-06, "loss": 0.1309, "step": 9394 }, { "epoch": 0.7443058031293326, "grad_norm": 1.3793122019726884, "learning_rate": 3.2369277555495705e-06, "loss": 0.1615, "step": 9395 }, { "epoch": 0.7443850267379679, "grad_norm": 1.8198411149293479, "learning_rate": 3.235037788778208e-06, "loss": 0.1588, "step": 9396 }, { "epoch": 0.7444642503466032, "grad_norm": 1.5415560755805162, "learning_rate": 3.2331482674470605e-06, "loss": 0.2437, "step": 9397 }, { "epoch": 0.7445434739552387, "grad_norm": 1.4677539744941233, "learning_rate": 3.2312591916805382e-06, "loss": 0.1498, "step": 9398 }, { "epoch": 0.744622697563874, "grad_norm": 1.353828997542222, "learning_rate": 3.2293705616030267e-06, "loss": 0.1107, "step": 9399 }, { "epoch": 0.7447019211725094, "grad_norm": 1.1243917441623805, "learning_rate": 3.2274823773388885e-06, "loss": 0.082, "step": 9400 }, { "epoch": 0.7447811447811448, "grad_norm": 1.2415656257365184, "learning_rate": 3.2255946390124482e-06, "loss": 0.1102, "step": 9401 }, { "epoch": 0.7448603683897802, "grad_norm": 1.5009386574586785, "learning_rate": 3.223707346748002e-06, "loss": 0.1442, "step": 9402 }, { "epoch": 0.7449395919984155, "grad_norm": 1.2630398148054123, "learning_rate": 3.221820500669823e-06, "loss": 0.1026, "step": 9403 }, { "epoch": 0.7450188156070509, "grad_norm": 1.4765042416967769, "learning_rate": 3.2199341009021514e-06, "loss": 0.196, "step": 9404 }, { "epoch": 0.7450980392156863, "grad_norm": 1.9950592831851934, "learning_rate": 3.218048147569195e-06, "loss": 0.2206, "step": 9405 }, { "epoch": 0.7451772628243216, "grad_norm": 3.0799890651613917, "learning_rate": 3.216162640795133e-06, "loss": 0.2244, "step": 9406 }, { "epoch": 0.745256486432957, "grad_norm": 1.782768468281133, "learning_rate": 3.2142775807041214e-06, "loss": 0.1787, "step": 9407 }, { "epoch": 0.7453357100415924, "grad_norm": 1.7789121044026834, "learning_rate": 3.2123929674202816e-06, "loss": 0.214, "step": 9408 }, { "epoch": 0.7454149336502278, "grad_norm": 1.6260684541933699, "learning_rate": 3.2105088010677e-06, "loss": 0.1881, "step": 9409 }, { "epoch": 0.7454941572588631, "grad_norm": 1.4632101493607146, "learning_rate": 3.2086250817704488e-06, "loss": 0.1556, "step": 9410 }, { "epoch": 0.7455733808674985, "grad_norm": 1.5008100394690242, "learning_rate": 3.2067418096525593e-06, "loss": 0.163, "step": 9411 }, { "epoch": 0.7456526044761339, "grad_norm": 1.9513070603270986, "learning_rate": 3.2048589848380297e-06, "loss": 0.1347, "step": 9412 }, { "epoch": 0.7457318280847692, "grad_norm": 1.3226089248980673, "learning_rate": 3.202976607450844e-06, "loss": 0.089, "step": 9413 }, { "epoch": 0.7458110516934047, "grad_norm": 1.569659335239659, "learning_rate": 3.201094677614943e-06, "loss": 0.1689, "step": 9414 }, { "epoch": 0.74589027530204, "grad_norm": 1.792883878128053, "learning_rate": 3.1992131954542404e-06, "loss": 0.2112, "step": 9415 }, { "epoch": 0.7459694989106754, "grad_norm": 1.4410446105303847, "learning_rate": 3.1973321610926277e-06, "loss": 0.1596, "step": 9416 }, { "epoch": 0.7460487225193108, "grad_norm": 1.6162279520850313, "learning_rate": 3.1954515746539616e-06, "loss": 0.1967, "step": 9417 }, { "epoch": 0.7461279461279461, "grad_norm": 1.4949360680688506, "learning_rate": 3.193571436262064e-06, "loss": 0.1659, "step": 9418 }, { "epoch": 0.7462071697365815, "grad_norm": 1.6659961745475955, "learning_rate": 3.191691746040739e-06, "loss": 0.1283, "step": 9419 }, { "epoch": 0.7462863933452168, "grad_norm": 1.3619291046913964, "learning_rate": 3.189812504113754e-06, "loss": 0.1177, "step": 9420 }, { "epoch": 0.7463656169538523, "grad_norm": 1.7671101665733155, "learning_rate": 3.187933710604847e-06, "loss": 0.1902, "step": 9421 }, { "epoch": 0.7464448405624876, "grad_norm": 1.2793405139697156, "learning_rate": 3.186055365637725e-06, "loss": 0.1365, "step": 9422 }, { "epoch": 0.746524064171123, "grad_norm": 2.045577367070868, "learning_rate": 3.184177469336073e-06, "loss": 0.2738, "step": 9423 }, { "epoch": 0.7466032877797584, "grad_norm": 1.7388327769947831, "learning_rate": 3.1823000218235388e-06, "loss": 0.1509, "step": 9424 }, { "epoch": 0.7466825113883937, "grad_norm": 1.5926056012829106, "learning_rate": 3.180423023223741e-06, "loss": 0.1725, "step": 9425 }, { "epoch": 0.7467617349970291, "grad_norm": 2.3044459028553423, "learning_rate": 3.1785464736602754e-06, "loss": 0.2752, "step": 9426 }, { "epoch": 0.7468409586056645, "grad_norm": 1.3631241901676512, "learning_rate": 3.1766703732567027e-06, "loss": 0.1517, "step": 9427 }, { "epoch": 0.7469201822142999, "grad_norm": 1.8131295120335895, "learning_rate": 3.1747947221365517e-06, "loss": 0.1823, "step": 9428 }, { "epoch": 0.7469994058229352, "grad_norm": 1.4104341139662362, "learning_rate": 3.17291952042333e-06, "loss": 0.129, "step": 9429 }, { "epoch": 0.7470786294315707, "grad_norm": 1.5564989955430768, "learning_rate": 3.171044768240508e-06, "loss": 0.1936, "step": 9430 }, { "epoch": 0.747157853040206, "grad_norm": 1.9275451811158177, "learning_rate": 3.169170465711525e-06, "loss": 0.2043, "step": 9431 }, { "epoch": 0.7472370766488413, "grad_norm": 1.6472144822267165, "learning_rate": 3.167296612959803e-06, "loss": 0.1162, "step": 9432 }, { "epoch": 0.7473163002574768, "grad_norm": 1.738899501926095, "learning_rate": 3.1654232101087225e-06, "loss": 0.2611, "step": 9433 }, { "epoch": 0.7473955238661121, "grad_norm": 1.4511805916709952, "learning_rate": 3.1635502572816333e-06, "loss": 0.155, "step": 9434 }, { "epoch": 0.7474747474747475, "grad_norm": 1.2958664386911982, "learning_rate": 3.1616777546018696e-06, "loss": 0.172, "step": 9435 }, { "epoch": 0.7475539710833828, "grad_norm": 1.5714686865174037, "learning_rate": 3.1598057021927207e-06, "loss": 0.1415, "step": 9436 }, { "epoch": 0.7476331946920183, "grad_norm": 1.5972021640799656, "learning_rate": 3.1579341001774546e-06, "loss": 0.1656, "step": 9437 }, { "epoch": 0.7477124183006536, "grad_norm": 1.2608370954512118, "learning_rate": 3.1560629486793014e-06, "loss": 0.1317, "step": 9438 }, { "epoch": 0.7477916419092889, "grad_norm": 1.8143260551602671, "learning_rate": 3.154192247821476e-06, "loss": 0.1862, "step": 9439 }, { "epoch": 0.7478708655179244, "grad_norm": 1.447571095309219, "learning_rate": 3.1523219977271515e-06, "loss": 0.1465, "step": 9440 }, { "epoch": 0.7479500891265597, "grad_norm": 1.6452166264730648, "learning_rate": 3.1504521985194715e-06, "loss": 0.2042, "step": 9441 }, { "epoch": 0.7480293127351951, "grad_norm": 1.8594082552500155, "learning_rate": 3.1485828503215588e-06, "loss": 0.1828, "step": 9442 }, { "epoch": 0.7481085363438305, "grad_norm": 1.4077381822167334, "learning_rate": 3.1467139532564985e-06, "loss": 0.1069, "step": 9443 }, { "epoch": 0.7481877599524658, "grad_norm": 1.5310639014817258, "learning_rate": 3.144845507447345e-06, "loss": 0.145, "step": 9444 }, { "epoch": 0.7482669835611012, "grad_norm": 1.6000990576281384, "learning_rate": 3.1429775130171337e-06, "loss": 0.1691, "step": 9445 }, { "epoch": 0.7483462071697365, "grad_norm": 1.6749874607193753, "learning_rate": 3.141109970088859e-06, "loss": 0.1878, "step": 9446 }, { "epoch": 0.748425430778372, "grad_norm": 1.4345275074123902, "learning_rate": 3.1392428787854865e-06, "loss": 0.1059, "step": 9447 }, { "epoch": 0.7485046543870073, "grad_norm": 1.7564017617656271, "learning_rate": 3.1373762392299632e-06, "loss": 0.1985, "step": 9448 }, { "epoch": 0.7485838779956427, "grad_norm": 1.639461351728904, "learning_rate": 3.135510051545192e-06, "loss": 0.1623, "step": 9449 }, { "epoch": 0.7486631016042781, "grad_norm": 1.3363716301873942, "learning_rate": 3.133644315854055e-06, "loss": 0.1843, "step": 9450 }, { "epoch": 0.7487423252129134, "grad_norm": 1.2541017702994535, "learning_rate": 3.131779032279397e-06, "loss": 0.1526, "step": 9451 }, { "epoch": 0.7488215488215488, "grad_norm": 1.3861024149164576, "learning_rate": 3.1299142009440463e-06, "loss": 0.1614, "step": 9452 }, { "epoch": 0.7489007724301842, "grad_norm": 1.6010788956064284, "learning_rate": 3.1280498219707876e-06, "loss": 0.155, "step": 9453 }, { "epoch": 0.7489799960388196, "grad_norm": 1.7283847995739627, "learning_rate": 3.1261858954823798e-06, "loss": 0.1936, "step": 9454 }, { "epoch": 0.7490592196474549, "grad_norm": 1.6908437941785552, "learning_rate": 3.12432242160156e-06, "loss": 0.1828, "step": 9455 }, { "epoch": 0.7491384432560904, "grad_norm": 1.31474546484202, "learning_rate": 3.1224594004510246e-06, "loss": 0.1578, "step": 9456 }, { "epoch": 0.7492176668647257, "grad_norm": 1.4845040695694474, "learning_rate": 3.1205968321534406e-06, "loss": 0.1868, "step": 9457 }, { "epoch": 0.749296890473361, "grad_norm": 1.6149009504559704, "learning_rate": 3.1187347168314586e-06, "loss": 0.2453, "step": 9458 }, { "epoch": 0.7493761140819964, "grad_norm": 1.3771976956138505, "learning_rate": 3.1168730546076844e-06, "loss": 0.1281, "step": 9459 }, { "epoch": 0.7494553376906318, "grad_norm": 1.9976614011919243, "learning_rate": 3.1150118456046963e-06, "loss": 0.2294, "step": 9460 }, { "epoch": 0.7495345612992672, "grad_norm": 1.3303440164698377, "learning_rate": 3.1131510899450533e-06, "loss": 0.1462, "step": 9461 }, { "epoch": 0.7496137849079025, "grad_norm": 1.7248594974234859, "learning_rate": 3.1112907877512732e-06, "loss": 0.1885, "step": 9462 }, { "epoch": 0.749693008516538, "grad_norm": 1.8288897650668423, "learning_rate": 3.1094309391458455e-06, "loss": 0.298, "step": 9463 }, { "epoch": 0.7497722321251733, "grad_norm": 1.8095255636537548, "learning_rate": 3.107571544251241e-06, "loss": 0.2326, "step": 9464 }, { "epoch": 0.7498514557338086, "grad_norm": 1.779288428091656, "learning_rate": 3.1057126031898843e-06, "loss": 0.1839, "step": 9465 }, { "epoch": 0.7499306793424441, "grad_norm": 1.4074582777607487, "learning_rate": 3.1038541160841752e-06, "loss": 0.1174, "step": 9466 }, { "epoch": 0.7500099029510794, "grad_norm": 1.5025661199259404, "learning_rate": 3.1019960830564945e-06, "loss": 0.1629, "step": 9467 }, { "epoch": 0.7500891265597148, "grad_norm": 1.7274635264554326, "learning_rate": 3.1001385042291797e-06, "loss": 0.2614, "step": 9468 }, { "epoch": 0.7501683501683502, "grad_norm": 1.9423456864226756, "learning_rate": 3.0982813797245413e-06, "loss": 0.2614, "step": 9469 }, { "epoch": 0.7502475737769856, "grad_norm": 1.5473805181905458, "learning_rate": 3.096424709664868e-06, "loss": 0.1711, "step": 9470 }, { "epoch": 0.7503267973856209, "grad_norm": 1.2942218161109502, "learning_rate": 3.094568494172411e-06, "loss": 0.1358, "step": 9471 }, { "epoch": 0.7504060209942562, "grad_norm": 1.4726105053901786, "learning_rate": 3.0927127333693872e-06, "loss": 0.1426, "step": 9472 }, { "epoch": 0.7504852446028917, "grad_norm": 1.9081955835206885, "learning_rate": 3.090857427377998e-06, "loss": 0.2108, "step": 9473 }, { "epoch": 0.750564468211527, "grad_norm": 2.3596802848866076, "learning_rate": 3.0890025763204025e-06, "loss": 0.2615, "step": 9474 }, { "epoch": 0.7506436918201624, "grad_norm": 1.4060438382280747, "learning_rate": 3.087148180318734e-06, "loss": 0.1506, "step": 9475 }, { "epoch": 0.7507229154287978, "grad_norm": 1.8389606171204427, "learning_rate": 3.0852942394950915e-06, "loss": 0.2007, "step": 9476 }, { "epoch": 0.7508021390374332, "grad_norm": 1.7784206539099288, "learning_rate": 3.083440753971556e-06, "loss": 0.1598, "step": 9477 }, { "epoch": 0.7508813626460685, "grad_norm": 1.3504421080075035, "learning_rate": 3.0815877238701653e-06, "loss": 0.1224, "step": 9478 }, { "epoch": 0.7509605862547039, "grad_norm": 1.5474801975431938, "learning_rate": 3.079735149312931e-06, "loss": 0.2434, "step": 9479 }, { "epoch": 0.7510398098633393, "grad_norm": 1.515364306173949, "learning_rate": 3.077883030421843e-06, "loss": 0.1341, "step": 9480 }, { "epoch": 0.7511190334719746, "grad_norm": 1.299646714737203, "learning_rate": 3.0760313673188493e-06, "loss": 0.119, "step": 9481 }, { "epoch": 0.75119825708061, "grad_norm": 1.466181747769846, "learning_rate": 3.0741801601258714e-06, "loss": 0.1838, "step": 9482 }, { "epoch": 0.7512774806892454, "grad_norm": 1.6578385617417195, "learning_rate": 3.072329408964808e-06, "loss": 0.1662, "step": 9483 }, { "epoch": 0.7513567042978808, "grad_norm": 1.1262558780260976, "learning_rate": 3.0704791139575195e-06, "loss": 0.1023, "step": 9484 }, { "epoch": 0.7514359279065161, "grad_norm": 2.00740069927691, "learning_rate": 3.0686292752258352e-06, "loss": 0.2138, "step": 9485 }, { "epoch": 0.7515151515151515, "grad_norm": 1.4809050641015151, "learning_rate": 3.066779892891564e-06, "loss": 0.1597, "step": 9486 }, { "epoch": 0.7515943751237869, "grad_norm": 1.375738002186897, "learning_rate": 3.064930967076477e-06, "loss": 0.139, "step": 9487 }, { "epoch": 0.7516735987324222, "grad_norm": 1.579032260047389, "learning_rate": 3.063082497902313e-06, "loss": 0.1606, "step": 9488 }, { "epoch": 0.7517528223410577, "grad_norm": 1.691196858421653, "learning_rate": 3.0612344854907917e-06, "loss": 0.1787, "step": 9489 }, { "epoch": 0.751832045949693, "grad_norm": 1.3844071548100543, "learning_rate": 3.0593869299635925e-06, "loss": 0.1259, "step": 9490 }, { "epoch": 0.7519112695583284, "grad_norm": 1.6548482180821298, "learning_rate": 3.0575398314423677e-06, "loss": 0.1792, "step": 9491 }, { "epoch": 0.7519904931669638, "grad_norm": 1.150447114616083, "learning_rate": 3.0556931900487365e-06, "loss": 0.0924, "step": 9492 }, { "epoch": 0.7520697167755991, "grad_norm": 1.378737897018675, "learning_rate": 3.053847005904298e-06, "loss": 0.1825, "step": 9493 }, { "epoch": 0.7521489403842345, "grad_norm": 1.676059084084236, "learning_rate": 3.052001279130612e-06, "loss": 0.1834, "step": 9494 }, { "epoch": 0.7522281639928698, "grad_norm": 1.6508074892109708, "learning_rate": 3.0501560098492056e-06, "loss": 0.18, "step": 9495 }, { "epoch": 0.7523073876015053, "grad_norm": 2.7021218603265207, "learning_rate": 3.0483111981815906e-06, "loss": 0.2028, "step": 9496 }, { "epoch": 0.7523866112101406, "grad_norm": 1.2323339967492575, "learning_rate": 3.046466844249232e-06, "loss": 0.1736, "step": 9497 }, { "epoch": 0.752465834818776, "grad_norm": 1.8515017942315066, "learning_rate": 3.0446229481735713e-06, "loss": 0.2052, "step": 9498 }, { "epoch": 0.7525450584274114, "grad_norm": 1.5596046746561878, "learning_rate": 3.042779510076025e-06, "loss": 0.1753, "step": 9499 }, { "epoch": 0.7526242820360467, "grad_norm": 1.5645444721132473, "learning_rate": 3.0409365300779725e-06, "loss": 0.1821, "step": 9500 }, { "epoch": 0.7527035056446821, "grad_norm": 1.4026216267594054, "learning_rate": 3.039094008300761e-06, "loss": 0.1543, "step": 9501 }, { "epoch": 0.7527827292533175, "grad_norm": 1.7575829887681125, "learning_rate": 3.0372519448657188e-06, "loss": 0.2061, "step": 9502 }, { "epoch": 0.7528619528619529, "grad_norm": 1.3372458080587726, "learning_rate": 3.0354103398941327e-06, "loss": 0.1356, "step": 9503 }, { "epoch": 0.7529411764705882, "grad_norm": 1.8576834048331887, "learning_rate": 3.0335691935072618e-06, "loss": 0.198, "step": 9504 }, { "epoch": 0.7530204000792237, "grad_norm": 1.5522534610656729, "learning_rate": 3.0317285058263426e-06, "loss": 0.1891, "step": 9505 }, { "epoch": 0.753099623687859, "grad_norm": 1.4958394215974595, "learning_rate": 3.029888276972571e-06, "loss": 0.1674, "step": 9506 }, { "epoch": 0.7531788472964943, "grad_norm": 1.7096614413036455, "learning_rate": 3.0280485070671197e-06, "loss": 0.2305, "step": 9507 }, { "epoch": 0.7532580709051298, "grad_norm": 1.424818762948807, "learning_rate": 3.0262091962311234e-06, "loss": 0.0966, "step": 9508 }, { "epoch": 0.7533372945137651, "grad_norm": 1.5254639625246011, "learning_rate": 3.0243703445856985e-06, "loss": 0.1632, "step": 9509 }, { "epoch": 0.7534165181224005, "grad_norm": 1.1802771609778897, "learning_rate": 3.0225319522519226e-06, "loss": 0.1021, "step": 9510 }, { "epoch": 0.7534957417310358, "grad_norm": 2.073340745982076, "learning_rate": 3.0206940193508404e-06, "loss": 0.188, "step": 9511 }, { "epoch": 0.7535749653396713, "grad_norm": 1.5967959405805734, "learning_rate": 3.018856546003479e-06, "loss": 0.1517, "step": 9512 }, { "epoch": 0.7536541889483066, "grad_norm": 1.6089020478761187, "learning_rate": 3.0170195323308216e-06, "loss": 0.1674, "step": 9513 }, { "epoch": 0.7537334125569419, "grad_norm": 1.5956859380251154, "learning_rate": 3.0151829784538257e-06, "loss": 0.2139, "step": 9514 }, { "epoch": 0.7538126361655774, "grad_norm": 1.5600369580201474, "learning_rate": 3.0133468844934245e-06, "loss": 0.1983, "step": 9515 }, { "epoch": 0.7538918597742127, "grad_norm": 1.8680555971439101, "learning_rate": 3.0115112505705134e-06, "loss": 0.1684, "step": 9516 }, { "epoch": 0.7539710833828481, "grad_norm": 1.432040150509459, "learning_rate": 3.0096760768059576e-06, "loss": 0.132, "step": 9517 }, { "epoch": 0.7540503069914835, "grad_norm": 1.1888462611985904, "learning_rate": 3.0078413633205995e-06, "loss": 0.1427, "step": 9518 }, { "epoch": 0.7541295306001188, "grad_norm": 1.3462789053793733, "learning_rate": 3.0060071102352438e-06, "loss": 0.1891, "step": 9519 }, { "epoch": 0.7542087542087542, "grad_norm": 1.741238356940325, "learning_rate": 3.0041733176706668e-06, "loss": 0.211, "step": 9520 }, { "epoch": 0.7542879778173895, "grad_norm": 1.3700825011663031, "learning_rate": 3.002339985747611e-06, "loss": 0.1374, "step": 9521 }, { "epoch": 0.754367201426025, "grad_norm": 1.432665966668152, "learning_rate": 3.0005071145868004e-06, "loss": 0.159, "step": 9522 }, { "epoch": 0.7544464250346603, "grad_norm": 1.2967558619901436, "learning_rate": 2.998674704308917e-06, "loss": 0.152, "step": 9523 }, { "epoch": 0.7545256486432957, "grad_norm": 1.1205437366595565, "learning_rate": 2.9968427550346136e-06, "loss": 0.1215, "step": 9524 }, { "epoch": 0.7546048722519311, "grad_norm": 1.2090976664941415, "learning_rate": 2.9950112668845198e-06, "loss": 0.1311, "step": 9525 }, { "epoch": 0.7546840958605664, "grad_norm": 1.5006182028397699, "learning_rate": 2.9931802399792285e-06, "loss": 0.1715, "step": 9526 }, { "epoch": 0.7547633194692018, "grad_norm": 1.3992895327314552, "learning_rate": 2.9913496744393e-06, "loss": 0.1628, "step": 9527 }, { "epoch": 0.7548425430778372, "grad_norm": 1.6366432547576821, "learning_rate": 2.9895195703852763e-06, "loss": 0.1797, "step": 9528 }, { "epoch": 0.7549217666864726, "grad_norm": 2.1379468857383874, "learning_rate": 2.987689927937656e-06, "loss": 0.184, "step": 9529 }, { "epoch": 0.7550009902951079, "grad_norm": 1.5281675528367953, "learning_rate": 2.98586074721691e-06, "loss": 0.1569, "step": 9530 }, { "epoch": 0.7550802139037434, "grad_norm": 1.6989503178686451, "learning_rate": 2.9840320283434865e-06, "loss": 0.1899, "step": 9531 }, { "epoch": 0.7551594375123787, "grad_norm": 1.6776677936587132, "learning_rate": 2.982203771437796e-06, "loss": 0.2745, "step": 9532 }, { "epoch": 0.755238661121014, "grad_norm": 1.5011554946039176, "learning_rate": 2.9803759766202157e-06, "loss": 0.2085, "step": 9533 }, { "epoch": 0.7553178847296494, "grad_norm": 1.3430163789805196, "learning_rate": 2.9785486440111044e-06, "loss": 0.1675, "step": 9534 }, { "epoch": 0.7553971083382848, "grad_norm": 1.0929451417473897, "learning_rate": 2.9767217737307805e-06, "loss": 0.1189, "step": 9535 }, { "epoch": 0.7554763319469202, "grad_norm": 1.4401374108741671, "learning_rate": 2.974895365899534e-06, "loss": 0.1539, "step": 9536 }, { "epoch": 0.7555555555555555, "grad_norm": 1.1513708039036818, "learning_rate": 2.973069420637621e-06, "loss": 0.1487, "step": 9537 }, { "epoch": 0.755634779164191, "grad_norm": 1.5639579634328595, "learning_rate": 2.971243938065279e-06, "loss": 0.1358, "step": 9538 }, { "epoch": 0.7557140027728263, "grad_norm": 1.382123959219964, "learning_rate": 2.9694189183027034e-06, "loss": 0.1664, "step": 9539 }, { "epoch": 0.7557932263814616, "grad_norm": 1.406794563778815, "learning_rate": 2.9675943614700588e-06, "loss": 0.1452, "step": 9540 }, { "epoch": 0.7558724499900971, "grad_norm": 1.514924470356589, "learning_rate": 2.965770267687492e-06, "loss": 0.1462, "step": 9541 }, { "epoch": 0.7559516735987324, "grad_norm": 1.8229121116583087, "learning_rate": 2.963946637075107e-06, "loss": 0.2201, "step": 9542 }, { "epoch": 0.7560308972073678, "grad_norm": 1.738524902932573, "learning_rate": 2.9621234697529787e-06, "loss": 0.174, "step": 9543 }, { "epoch": 0.7561101208160032, "grad_norm": 1.317188089571558, "learning_rate": 2.9603007658411575e-06, "loss": 0.1282, "step": 9544 }, { "epoch": 0.7561893444246386, "grad_norm": 1.6382582803074983, "learning_rate": 2.958478525459657e-06, "loss": 0.1706, "step": 9545 }, { "epoch": 0.7562685680332739, "grad_norm": 1.6255078569023884, "learning_rate": 2.9566567487284613e-06, "loss": 0.2551, "step": 9546 }, { "epoch": 0.7563477916419092, "grad_norm": 1.5063095558374369, "learning_rate": 2.9548354357675325e-06, "loss": 0.1402, "step": 9547 }, { "epoch": 0.7564270152505447, "grad_norm": 1.599615198473144, "learning_rate": 2.9530145866967897e-06, "loss": 0.0952, "step": 9548 }, { "epoch": 0.75650623885918, "grad_norm": 1.4484769409306653, "learning_rate": 2.951194201636125e-06, "loss": 0.1212, "step": 9549 }, { "epoch": 0.7565854624678154, "grad_norm": 1.4000576094962034, "learning_rate": 2.9493742807054094e-06, "loss": 0.1396, "step": 9550 }, { "epoch": 0.7566646860764508, "grad_norm": 2.0835471251443067, "learning_rate": 2.947554824024472e-06, "loss": 0.2206, "step": 9551 }, { "epoch": 0.7567439096850862, "grad_norm": 1.4233756794852277, "learning_rate": 2.9457358317131125e-06, "loss": 0.157, "step": 9552 }, { "epoch": 0.7568231332937215, "grad_norm": 1.4410107839892405, "learning_rate": 2.943917303891107e-06, "loss": 0.1582, "step": 9553 }, { "epoch": 0.7569023569023569, "grad_norm": 1.4712287477447172, "learning_rate": 2.942099240678197e-06, "loss": 0.1995, "step": 9554 }, { "epoch": 0.7569815805109923, "grad_norm": 1.2676143088681964, "learning_rate": 2.940281642194087e-06, "loss": 0.1041, "step": 9555 }, { "epoch": 0.7570608041196276, "grad_norm": 1.2500532005652834, "learning_rate": 2.938464508558466e-06, "loss": 0.1353, "step": 9556 }, { "epoch": 0.757140027728263, "grad_norm": 1.6491881488554911, "learning_rate": 2.936647839890979e-06, "loss": 0.2026, "step": 9557 }, { "epoch": 0.7572192513368984, "grad_norm": 1.4519368879855936, "learning_rate": 2.9348316363112417e-06, "loss": 0.1163, "step": 9558 }, { "epoch": 0.7572984749455338, "grad_norm": 1.4684016565934788, "learning_rate": 2.933015897938849e-06, "loss": 0.172, "step": 9559 }, { "epoch": 0.7573776985541691, "grad_norm": 1.8279741950236903, "learning_rate": 2.9312006248933543e-06, "loss": 0.1942, "step": 9560 }, { "epoch": 0.7574569221628045, "grad_norm": 1.2593997777981085, "learning_rate": 2.9293858172942867e-06, "loss": 0.0952, "step": 9561 }, { "epoch": 0.7575361457714399, "grad_norm": 1.685169523373592, "learning_rate": 2.9275714752611383e-06, "loss": 0.1777, "step": 9562 }, { "epoch": 0.7576153693800752, "grad_norm": 1.6293331300171128, "learning_rate": 2.9257575989133803e-06, "loss": 0.188, "step": 9563 }, { "epoch": 0.7576945929887107, "grad_norm": 1.7277992381634562, "learning_rate": 2.9239441883704455e-06, "loss": 0.1797, "step": 9564 }, { "epoch": 0.757773816597346, "grad_norm": 2.0718207155312234, "learning_rate": 2.9221312437517357e-06, "loss": 0.2353, "step": 9565 }, { "epoch": 0.7578530402059814, "grad_norm": 1.8272780663551813, "learning_rate": 2.9203187651766297e-06, "loss": 0.1789, "step": 9566 }, { "epoch": 0.7579322638146168, "grad_norm": 1.3008179692988004, "learning_rate": 2.918506752764467e-06, "loss": 0.1225, "step": 9567 }, { "epoch": 0.7580114874232521, "grad_norm": 1.5091268751971438, "learning_rate": 2.916695206634558e-06, "loss": 0.1632, "step": 9568 }, { "epoch": 0.7580907110318875, "grad_norm": 1.3912688793467687, "learning_rate": 2.91488412690619e-06, "loss": 0.1382, "step": 9569 }, { "epoch": 0.7581699346405228, "grad_norm": 1.5009418911861847, "learning_rate": 2.913073513698611e-06, "loss": 0.1864, "step": 9570 }, { "epoch": 0.7582491582491583, "grad_norm": 1.9032669807975269, "learning_rate": 2.9112633671310387e-06, "loss": 0.1325, "step": 9571 }, { "epoch": 0.7583283818577936, "grad_norm": 1.305921119378279, "learning_rate": 2.9094536873226663e-06, "loss": 0.1072, "step": 9572 }, { "epoch": 0.758407605466429, "grad_norm": 1.7762254367444177, "learning_rate": 2.9076444743926524e-06, "loss": 0.2143, "step": 9573 }, { "epoch": 0.7584868290750644, "grad_norm": 1.7018412652287815, "learning_rate": 2.9058357284601204e-06, "loss": 0.1191, "step": 9574 }, { "epoch": 0.7585660526836997, "grad_norm": 1.503477976295376, "learning_rate": 2.9040274496441732e-06, "loss": 0.2234, "step": 9575 }, { "epoch": 0.7586452762923351, "grad_norm": 1.7476893090817263, "learning_rate": 2.902219638063876e-06, "loss": 0.2661, "step": 9576 }, { "epoch": 0.7587244999009705, "grad_norm": 1.29339856905736, "learning_rate": 2.9004122938382617e-06, "loss": 0.1679, "step": 9577 }, { "epoch": 0.7588037235096059, "grad_norm": 1.4837709946084463, "learning_rate": 2.8986054170863344e-06, "loss": 0.1563, "step": 9578 }, { "epoch": 0.7588829471182412, "grad_norm": 1.5938066144230532, "learning_rate": 2.8967990079270736e-06, "loss": 0.1486, "step": 9579 }, { "epoch": 0.7589621707268767, "grad_norm": 2.4694283380933273, "learning_rate": 2.89499306647942e-06, "loss": 0.2052, "step": 9580 }, { "epoch": 0.759041394335512, "grad_norm": 1.437866552677991, "learning_rate": 2.8931875928622833e-06, "loss": 0.1401, "step": 9581 }, { "epoch": 0.7591206179441473, "grad_norm": 1.4191523455019126, "learning_rate": 2.89138258719455e-06, "loss": 0.177, "step": 9582 }, { "epoch": 0.7591998415527828, "grad_norm": 2.07873487029662, "learning_rate": 2.8895780495950687e-06, "loss": 0.1973, "step": 9583 }, { "epoch": 0.7592790651614181, "grad_norm": 1.6452833501135546, "learning_rate": 2.8877739801826577e-06, "loss": 0.1552, "step": 9584 }, { "epoch": 0.7593582887700535, "grad_norm": 1.369192058040035, "learning_rate": 2.8859703790761095e-06, "loss": 0.152, "step": 9585 }, { "epoch": 0.7594375123786888, "grad_norm": 1.9321763671502792, "learning_rate": 2.8841672463941827e-06, "loss": 0.1726, "step": 9586 }, { "epoch": 0.7595167359873243, "grad_norm": 1.921832610963072, "learning_rate": 2.8823645822556e-06, "loss": 0.1714, "step": 9587 }, { "epoch": 0.7595959595959596, "grad_norm": 1.8154885116918609, "learning_rate": 2.8805623867790655e-06, "loss": 0.2201, "step": 9588 }, { "epoch": 0.7596751832045949, "grad_norm": 1.9145352741394648, "learning_rate": 2.8787606600832408e-06, "loss": 0.2156, "step": 9589 }, { "epoch": 0.7597544068132304, "grad_norm": 1.271771240554462, "learning_rate": 2.876959402286759e-06, "loss": 0.138, "step": 9590 }, { "epoch": 0.7598336304218657, "grad_norm": 1.8684982347885792, "learning_rate": 2.8751586135082275e-06, "loss": 0.2539, "step": 9591 }, { "epoch": 0.7599128540305011, "grad_norm": 1.3755525710439678, "learning_rate": 2.873358293866221e-06, "loss": 0.13, "step": 9592 }, { "epoch": 0.7599920776391365, "grad_norm": 1.640054636391881, "learning_rate": 2.8715584434792786e-06, "loss": 0.1681, "step": 9593 }, { "epoch": 0.7600713012477719, "grad_norm": 1.6853438442092112, "learning_rate": 2.86975906246591e-06, "loss": 0.1613, "step": 9594 }, { "epoch": 0.7601505248564072, "grad_norm": 1.3471935320060047, "learning_rate": 2.867960150944602e-06, "loss": 0.1387, "step": 9595 }, { "epoch": 0.7602297484650425, "grad_norm": 1.256638804919288, "learning_rate": 2.8661617090338e-06, "loss": 0.0964, "step": 9596 }, { "epoch": 0.760308972073678, "grad_norm": 1.7778993680672321, "learning_rate": 2.864363736851922e-06, "loss": 0.2379, "step": 9597 }, { "epoch": 0.7603881956823133, "grad_norm": 1.2997168805276131, "learning_rate": 2.86256623451736e-06, "loss": 0.1272, "step": 9598 }, { "epoch": 0.7604674192909487, "grad_norm": 1.7084142028835376, "learning_rate": 2.860769202148468e-06, "loss": 0.1282, "step": 9599 }, { "epoch": 0.7605466428995841, "grad_norm": 1.5557942875260857, "learning_rate": 2.8589726398635688e-06, "loss": 0.1508, "step": 9600 }, { "epoch": 0.7606258665082194, "grad_norm": 1.3879859302814046, "learning_rate": 2.8571765477809645e-06, "loss": 0.167, "step": 9601 }, { "epoch": 0.7607050901168548, "grad_norm": 1.7750074451368518, "learning_rate": 2.8553809260189145e-06, "loss": 0.1393, "step": 9602 }, { "epoch": 0.7607843137254902, "grad_norm": 1.179348197774705, "learning_rate": 2.8535857746956507e-06, "loss": 0.094, "step": 9603 }, { "epoch": 0.7608635373341256, "grad_norm": 1.4127493906560673, "learning_rate": 2.8517910939293804e-06, "loss": 0.139, "step": 9604 }, { "epoch": 0.7609427609427609, "grad_norm": 1.2459826528965805, "learning_rate": 2.849996883838271e-06, "loss": 0.1002, "step": 9605 }, { "epoch": 0.7610219845513964, "grad_norm": 1.6451252235548848, "learning_rate": 2.8482031445404634e-06, "loss": 0.1791, "step": 9606 }, { "epoch": 0.7611012081600317, "grad_norm": 1.337216408052708, "learning_rate": 2.8464098761540637e-06, "loss": 0.1271, "step": 9607 }, { "epoch": 0.761180431768667, "grad_norm": 2.023526144744933, "learning_rate": 2.844617078797155e-06, "loss": 0.2025, "step": 9608 }, { "epoch": 0.7612596553773024, "grad_norm": 1.313264867396571, "learning_rate": 2.842824752587783e-06, "loss": 0.1312, "step": 9609 }, { "epoch": 0.7613388789859378, "grad_norm": 1.6196252825965052, "learning_rate": 2.8410328976439595e-06, "loss": 0.1617, "step": 9610 }, { "epoch": 0.7614181025945732, "grad_norm": 1.979528941108533, "learning_rate": 2.839241514083676e-06, "loss": 0.2677, "step": 9611 }, { "epoch": 0.7614973262032085, "grad_norm": 1.4385663910436548, "learning_rate": 2.837450602024884e-06, "loss": 0.18, "step": 9612 }, { "epoch": 0.761576549811844, "grad_norm": 1.0780845427989896, "learning_rate": 2.8356601615855027e-06, "loss": 0.126, "step": 9613 }, { "epoch": 0.7616557734204793, "grad_norm": 1.231273500924223, "learning_rate": 2.83387019288343e-06, "loss": 0.1441, "step": 9614 }, { "epoch": 0.7617349970291146, "grad_norm": 1.5717574795388507, "learning_rate": 2.8320806960365234e-06, "loss": 0.1613, "step": 9615 }, { "epoch": 0.7618142206377501, "grad_norm": 0.9814814106489694, "learning_rate": 2.8302916711626106e-06, "loss": 0.0814, "step": 9616 }, { "epoch": 0.7618934442463854, "grad_norm": 1.5192828646830723, "learning_rate": 2.8285031183794955e-06, "loss": 0.2242, "step": 9617 }, { "epoch": 0.7619726678550208, "grad_norm": 2.018901698230236, "learning_rate": 2.8267150378049437e-06, "loss": 0.2327, "step": 9618 }, { "epoch": 0.7620518914636562, "grad_norm": 1.2212080627070687, "learning_rate": 2.8249274295566863e-06, "loss": 0.1378, "step": 9619 }, { "epoch": 0.7621311150722916, "grad_norm": 1.5463847352459796, "learning_rate": 2.823140293752441e-06, "loss": 0.1649, "step": 9620 }, { "epoch": 0.7622103386809269, "grad_norm": 1.44265243264647, "learning_rate": 2.821353630509871e-06, "loss": 0.1677, "step": 9621 }, { "epoch": 0.7622895622895622, "grad_norm": 1.306445105055589, "learning_rate": 2.819567439946621e-06, "loss": 0.1226, "step": 9622 }, { "epoch": 0.7623687858981977, "grad_norm": 1.160189404636993, "learning_rate": 2.8177817221803074e-06, "loss": 0.0915, "step": 9623 }, { "epoch": 0.762448009506833, "grad_norm": 1.5325320464922008, "learning_rate": 2.8159964773285074e-06, "loss": 0.1163, "step": 9624 }, { "epoch": 0.7625272331154684, "grad_norm": 1.22957711943427, "learning_rate": 2.8142117055087704e-06, "loss": 0.106, "step": 9625 }, { "epoch": 0.7626064567241038, "grad_norm": 2.23182605516539, "learning_rate": 2.8124274068386203e-06, "loss": 0.2043, "step": 9626 }, { "epoch": 0.7626856803327392, "grad_norm": 1.422011298494945, "learning_rate": 2.8106435814355404e-06, "loss": 0.1611, "step": 9627 }, { "epoch": 0.7627649039413745, "grad_norm": 1.7184536338110958, "learning_rate": 2.808860229416984e-06, "loss": 0.2071, "step": 9628 }, { "epoch": 0.7628441275500099, "grad_norm": 1.694648239779594, "learning_rate": 2.8070773509003846e-06, "loss": 0.1873, "step": 9629 }, { "epoch": 0.7629233511586453, "grad_norm": 1.4848059830422866, "learning_rate": 2.80529494600313e-06, "loss": 0.2009, "step": 9630 }, { "epoch": 0.7630025747672806, "grad_norm": 1.324079533465585, "learning_rate": 2.8035130148425847e-06, "loss": 0.1172, "step": 9631 }, { "epoch": 0.763081798375916, "grad_norm": 1.3923843040518629, "learning_rate": 2.801731557536078e-06, "loss": 0.1434, "step": 9632 }, { "epoch": 0.7631610219845514, "grad_norm": 1.5433305862693556, "learning_rate": 2.799950574200915e-06, "loss": 0.1658, "step": 9633 }, { "epoch": 0.7632402455931868, "grad_norm": 1.3450891535687375, "learning_rate": 2.7981700649543618e-06, "loss": 0.1416, "step": 9634 }, { "epoch": 0.7633194692018221, "grad_norm": 1.5095267673182209, "learning_rate": 2.796390029913655e-06, "loss": 0.162, "step": 9635 }, { "epoch": 0.7633986928104575, "grad_norm": 1.8925942350968739, "learning_rate": 2.794610469196004e-06, "loss": 0.1841, "step": 9636 }, { "epoch": 0.7634779164190929, "grad_norm": 1.4807924470603768, "learning_rate": 2.792831382918585e-06, "loss": 0.2163, "step": 9637 }, { "epoch": 0.7635571400277282, "grad_norm": 1.4452815585896193, "learning_rate": 2.791052771198538e-06, "loss": 0.1112, "step": 9638 }, { "epoch": 0.7636363636363637, "grad_norm": 1.5232367235770772, "learning_rate": 2.7892746341529807e-06, "loss": 0.192, "step": 9639 }, { "epoch": 0.763715587244999, "grad_norm": 1.3599665957013363, "learning_rate": 2.7874969718989943e-06, "loss": 0.1521, "step": 9640 }, { "epoch": 0.7637948108536344, "grad_norm": 1.4447732718014117, "learning_rate": 2.785719784553624e-06, "loss": 0.1947, "step": 9641 }, { "epoch": 0.7638740344622698, "grad_norm": 1.5621212162892377, "learning_rate": 2.7839430722338956e-06, "loss": 0.13, "step": 9642 }, { "epoch": 0.7639532580709051, "grad_norm": 1.2877905146296533, "learning_rate": 2.7821668350567956e-06, "loss": 0.1647, "step": 9643 }, { "epoch": 0.7640324816795405, "grad_norm": 1.9262421814794235, "learning_rate": 2.7803910731392757e-06, "loss": 0.2392, "step": 9644 }, { "epoch": 0.7641117052881758, "grad_norm": 1.2307735896125025, "learning_rate": 2.778615786598269e-06, "loss": 0.1711, "step": 9645 }, { "epoch": 0.7641909288968113, "grad_norm": 1.2177517200257129, "learning_rate": 2.776840975550664e-06, "loss": 0.1363, "step": 9646 }, { "epoch": 0.7642701525054466, "grad_norm": 1.5107929969953622, "learning_rate": 2.7750666401133263e-06, "loss": 0.1369, "step": 9647 }, { "epoch": 0.764349376114082, "grad_norm": 1.2828901367921461, "learning_rate": 2.773292780403083e-06, "loss": 0.1163, "step": 9648 }, { "epoch": 0.7644285997227174, "grad_norm": 1.3996960526650157, "learning_rate": 2.7715193965367403e-06, "loss": 0.1517, "step": 9649 }, { "epoch": 0.7645078233313527, "grad_norm": 1.5365459237715637, "learning_rate": 2.769746488631064e-06, "loss": 0.1869, "step": 9650 }, { "epoch": 0.7645870469399881, "grad_norm": 1.5929381783597985, "learning_rate": 2.767974056802789e-06, "loss": 0.1539, "step": 9651 }, { "epoch": 0.7646662705486235, "grad_norm": 1.5861360416566839, "learning_rate": 2.766202101168628e-06, "loss": 0.1722, "step": 9652 }, { "epoch": 0.7647454941572589, "grad_norm": 1.196894639185933, "learning_rate": 2.76443062184525e-06, "loss": 0.1536, "step": 9653 }, { "epoch": 0.7648247177658942, "grad_norm": 1.7041088402699311, "learning_rate": 2.7626596189492983e-06, "loss": 0.1845, "step": 9654 }, { "epoch": 0.7649039413745297, "grad_norm": 1.577181474857777, "learning_rate": 2.76088909259739e-06, "loss": 0.1542, "step": 9655 }, { "epoch": 0.764983164983165, "grad_norm": 1.4651262723903933, "learning_rate": 2.7591190429061023e-06, "loss": 0.1305, "step": 9656 }, { "epoch": 0.7650623885918003, "grad_norm": 1.7243378428260292, "learning_rate": 2.757349469991981e-06, "loss": 0.1935, "step": 9657 }, { "epoch": 0.7651416122004358, "grad_norm": 1.1589142122615406, "learning_rate": 2.7555803739715512e-06, "loss": 0.141, "step": 9658 }, { "epoch": 0.7652208358090711, "grad_norm": 1.4854231604298274, "learning_rate": 2.7538117549612963e-06, "loss": 0.1379, "step": 9659 }, { "epoch": 0.7653000594177065, "grad_norm": 1.7407534740746045, "learning_rate": 2.752043613077667e-06, "loss": 0.1274, "step": 9660 }, { "epoch": 0.7653792830263418, "grad_norm": 1.5712677410420897, "learning_rate": 2.7502759484370946e-06, "loss": 0.1264, "step": 9661 }, { "epoch": 0.7654585066349773, "grad_norm": 1.7000941047417002, "learning_rate": 2.748508761155967e-06, "loss": 0.1915, "step": 9662 }, { "epoch": 0.7655377302436126, "grad_norm": 1.9445900370370082, "learning_rate": 2.746742051350646e-06, "loss": 0.1806, "step": 9663 }, { "epoch": 0.7656169538522479, "grad_norm": 1.7703423683995427, "learning_rate": 2.7449758191374574e-06, "loss": 0.2514, "step": 9664 }, { "epoch": 0.7656961774608834, "grad_norm": 1.3939962870276839, "learning_rate": 2.7432100646327043e-06, "loss": 0.1645, "step": 9665 }, { "epoch": 0.7657754010695187, "grad_norm": 1.7172410780905545, "learning_rate": 2.7414447879526517e-06, "loss": 0.1765, "step": 9666 }, { "epoch": 0.7658546246781541, "grad_norm": 1.6818708075775044, "learning_rate": 2.739679989213532e-06, "loss": 0.2081, "step": 9667 }, { "epoch": 0.7659338482867895, "grad_norm": 1.6190776937720763, "learning_rate": 2.7379156685315523e-06, "loss": 0.1101, "step": 9668 }, { "epoch": 0.7660130718954249, "grad_norm": 1.2116929712336613, "learning_rate": 2.7361518260228827e-06, "loss": 0.1321, "step": 9669 }, { "epoch": 0.7660922955040602, "grad_norm": 1.2525606397892568, "learning_rate": 2.734388461803661e-06, "loss": 0.1486, "step": 9670 }, { "epoch": 0.7661715191126955, "grad_norm": 1.1670401297199537, "learning_rate": 2.7326255759900024e-06, "loss": 0.1175, "step": 9671 }, { "epoch": 0.766250742721331, "grad_norm": 1.2527933234253887, "learning_rate": 2.7308631686979816e-06, "loss": 0.1383, "step": 9672 }, { "epoch": 0.7663299663299663, "grad_norm": 1.3854187710121246, "learning_rate": 2.7291012400436414e-06, "loss": 0.1162, "step": 9673 }, { "epoch": 0.7664091899386017, "grad_norm": 1.4693335401184864, "learning_rate": 2.7273397901430023e-06, "loss": 0.1125, "step": 9674 }, { "epoch": 0.7664884135472371, "grad_norm": 1.4071235110547582, "learning_rate": 2.7255788191120435e-06, "loss": 0.1585, "step": 9675 }, { "epoch": 0.7665676371558724, "grad_norm": 1.4597427004785417, "learning_rate": 2.723818327066717e-06, "loss": 0.1484, "step": 9676 }, { "epoch": 0.7666468607645078, "grad_norm": 1.9686593105675911, "learning_rate": 2.722058314122941e-06, "loss": 0.1512, "step": 9677 }, { "epoch": 0.7667260843731432, "grad_norm": 1.5239753346238663, "learning_rate": 2.7202987803966073e-06, "loss": 0.1411, "step": 9678 }, { "epoch": 0.7668053079817786, "grad_norm": 1.3758904014543183, "learning_rate": 2.718539726003573e-06, "loss": 0.1966, "step": 9679 }, { "epoch": 0.7668845315904139, "grad_norm": 1.4502387354498574, "learning_rate": 2.7167811510596577e-06, "loss": 0.1458, "step": 9680 }, { "epoch": 0.7669637551990494, "grad_norm": 1.7872649606072382, "learning_rate": 2.715023055680661e-06, "loss": 0.1841, "step": 9681 }, { "epoch": 0.7670429788076847, "grad_norm": 1.4927818796313443, "learning_rate": 2.7132654399823444e-06, "loss": 0.1733, "step": 9682 }, { "epoch": 0.76712220241632, "grad_norm": 1.503873075843439, "learning_rate": 2.7115083040804337e-06, "loss": 0.1893, "step": 9683 }, { "epoch": 0.7672014260249554, "grad_norm": 1.769570104840266, "learning_rate": 2.709751648090634e-06, "loss": 0.1411, "step": 9684 }, { "epoch": 0.7672806496335908, "grad_norm": 1.4620511932663662, "learning_rate": 2.7079954721286108e-06, "loss": 0.1342, "step": 9685 }, { "epoch": 0.7673598732422262, "grad_norm": 1.4318126117244618, "learning_rate": 2.7062397763099945e-06, "loss": 0.187, "step": 9686 }, { "epoch": 0.7674390968508615, "grad_norm": 1.338005773122725, "learning_rate": 2.7044845607503967e-06, "loss": 0.1443, "step": 9687 }, { "epoch": 0.767518320459497, "grad_norm": 1.8021723454150782, "learning_rate": 2.7027298255653878e-06, "loss": 0.2071, "step": 9688 }, { "epoch": 0.7675975440681323, "grad_norm": 1.6713797971425046, "learning_rate": 2.700975570870503e-06, "loss": 0.158, "step": 9689 }, { "epoch": 0.7676767676767676, "grad_norm": 1.4369677431522412, "learning_rate": 2.6992217967812606e-06, "loss": 0.1794, "step": 9690 }, { "epoch": 0.7677559912854031, "grad_norm": 1.9197999100199266, "learning_rate": 2.697468503413134e-06, "loss": 0.2019, "step": 9691 }, { "epoch": 0.7678352148940384, "grad_norm": 1.9034338866402514, "learning_rate": 2.6957156908815684e-06, "loss": 0.1579, "step": 9692 }, { "epoch": 0.7679144385026738, "grad_norm": 1.5096202116532995, "learning_rate": 2.6939633593019754e-06, "loss": 0.1723, "step": 9693 }, { "epoch": 0.7679936621113091, "grad_norm": 1.7017743281171718, "learning_rate": 2.692211508789744e-06, "loss": 0.1803, "step": 9694 }, { "epoch": 0.7680728857199446, "grad_norm": 1.8780963774433932, "learning_rate": 2.6904601394602216e-06, "loss": 0.2148, "step": 9695 }, { "epoch": 0.7681521093285799, "grad_norm": 1.9397399745656363, "learning_rate": 2.688709251428725e-06, "loss": 0.188, "step": 9696 }, { "epoch": 0.7682313329372152, "grad_norm": 1.69676072172985, "learning_rate": 2.6869588448105475e-06, "loss": 0.169, "step": 9697 }, { "epoch": 0.7683105565458507, "grad_norm": 1.1825123664861208, "learning_rate": 2.685208919720942e-06, "loss": 0.1625, "step": 9698 }, { "epoch": 0.768389780154486, "grad_norm": 1.436483432632273, "learning_rate": 2.683459476275133e-06, "loss": 0.2066, "step": 9699 }, { "epoch": 0.7684690037631214, "grad_norm": 1.5564603859537167, "learning_rate": 2.6817105145883117e-06, "loss": 0.1939, "step": 9700 }, { "epoch": 0.7685482273717568, "grad_norm": 2.280417242409304, "learning_rate": 2.6799620347756407e-06, "loss": 0.1575, "step": 9701 }, { "epoch": 0.7686274509803922, "grad_norm": 1.931068521887391, "learning_rate": 2.6782140369522435e-06, "loss": 0.2388, "step": 9702 }, { "epoch": 0.7687066745890275, "grad_norm": 1.4331824951222825, "learning_rate": 2.676466521233225e-06, "loss": 0.1809, "step": 9703 }, { "epoch": 0.7687858981976629, "grad_norm": 1.5840426542635977, "learning_rate": 2.674719487733649e-06, "loss": 0.163, "step": 9704 }, { "epoch": 0.7688651218062983, "grad_norm": 1.3173347634449153, "learning_rate": 2.672972936568543e-06, "loss": 0.1664, "step": 9705 }, { "epoch": 0.7689443454149336, "grad_norm": 1.6319341472274438, "learning_rate": 2.6712268678529187e-06, "loss": 0.1847, "step": 9706 }, { "epoch": 0.769023569023569, "grad_norm": 1.471449514737636, "learning_rate": 2.669481281701739e-06, "loss": 0.144, "step": 9707 }, { "epoch": 0.7691027926322044, "grad_norm": 1.3948400133422167, "learning_rate": 2.6677361782299437e-06, "loss": 0.1327, "step": 9708 }, { "epoch": 0.7691820162408398, "grad_norm": 1.3903835026277642, "learning_rate": 2.665991557552442e-06, "loss": 0.1632, "step": 9709 }, { "epoch": 0.7692612398494751, "grad_norm": 1.6207797290057946, "learning_rate": 2.6642474197841086e-06, "loss": 0.2303, "step": 9710 }, { "epoch": 0.7693404634581105, "grad_norm": 1.8834141426008586, "learning_rate": 2.6625037650397812e-06, "loss": 0.1247, "step": 9711 }, { "epoch": 0.7694196870667459, "grad_norm": 1.3870334523208032, "learning_rate": 2.6607605934342785e-06, "loss": 0.1609, "step": 9712 }, { "epoch": 0.7694989106753812, "grad_norm": 1.7396748490946308, "learning_rate": 2.659017905082376e-06, "loss": 0.221, "step": 9713 }, { "epoch": 0.7695781342840167, "grad_norm": 1.240843454545499, "learning_rate": 2.657275700098819e-06, "loss": 0.1665, "step": 9714 }, { "epoch": 0.769657357892652, "grad_norm": 1.4015503651814274, "learning_rate": 2.65553397859833e-06, "loss": 0.1737, "step": 9715 }, { "epoch": 0.7697365815012874, "grad_norm": 1.5220039389197275, "learning_rate": 2.6537927406955888e-06, "loss": 0.1305, "step": 9716 }, { "epoch": 0.7698158051099228, "grad_norm": 1.2478083610677968, "learning_rate": 2.6520519865052476e-06, "loss": 0.0952, "step": 9717 }, { "epoch": 0.7698950287185581, "grad_norm": 1.9702254997979598, "learning_rate": 2.6503117161419246e-06, "loss": 0.1986, "step": 9718 }, { "epoch": 0.7699742523271935, "grad_norm": 1.0945501678400622, "learning_rate": 2.6485719297202127e-06, "loss": 0.0925, "step": 9719 }, { "epoch": 0.7700534759358288, "grad_norm": 1.5997961895467323, "learning_rate": 2.646832627354667e-06, "loss": 0.1908, "step": 9720 }, { "epoch": 0.7701326995444643, "grad_norm": 1.088008676941749, "learning_rate": 2.645093809159809e-06, "loss": 0.151, "step": 9721 }, { "epoch": 0.7702119231530996, "grad_norm": 1.48723249216091, "learning_rate": 2.643355475250137e-06, "loss": 0.2438, "step": 9722 }, { "epoch": 0.770291146761735, "grad_norm": 1.4516588297601196, "learning_rate": 2.6416176257401083e-06, "loss": 0.1821, "step": 9723 }, { "epoch": 0.7703703703703704, "grad_norm": 1.6012799335730787, "learning_rate": 2.639880260744151e-06, "loss": 0.1535, "step": 9724 }, { "epoch": 0.7704495939790057, "grad_norm": 1.891786305396332, "learning_rate": 2.6381433803766654e-06, "loss": 0.2046, "step": 9725 }, { "epoch": 0.7705288175876411, "grad_norm": 1.5622293649243357, "learning_rate": 2.6364069847520155e-06, "loss": 0.1396, "step": 9726 }, { "epoch": 0.7706080411962765, "grad_norm": 1.0209495961864432, "learning_rate": 2.6346710739845317e-06, "loss": 0.0924, "step": 9727 }, { "epoch": 0.7706872648049119, "grad_norm": 1.3425405787146187, "learning_rate": 2.6329356481885215e-06, "loss": 0.1175, "step": 9728 }, { "epoch": 0.7707664884135472, "grad_norm": 1.34725029498791, "learning_rate": 2.6312007074782497e-06, "loss": 0.1341, "step": 9729 }, { "epoch": 0.7708457120221827, "grad_norm": 1.9182872177858583, "learning_rate": 2.6294662519679525e-06, "loss": 0.1947, "step": 9730 }, { "epoch": 0.770924935630818, "grad_norm": 1.595612126280618, "learning_rate": 2.627732281771841e-06, "loss": 0.0958, "step": 9731 }, { "epoch": 0.7710041592394533, "grad_norm": 1.2510076883521055, "learning_rate": 2.6259987970040858e-06, "loss": 0.1062, "step": 9732 }, { "epoch": 0.7710833828480887, "grad_norm": 1.414299427576783, "learning_rate": 2.6242657977788277e-06, "loss": 0.1438, "step": 9733 }, { "epoch": 0.7711626064567241, "grad_norm": 2.071766309134072, "learning_rate": 2.6225332842101746e-06, "loss": 0.1573, "step": 9734 }, { "epoch": 0.7712418300653595, "grad_norm": 1.52970541262464, "learning_rate": 2.6208012564122097e-06, "loss": 0.1198, "step": 9735 }, { "epoch": 0.7713210536739948, "grad_norm": 2.2384678881550117, "learning_rate": 2.6190697144989753e-06, "loss": 0.1716, "step": 9736 }, { "epoch": 0.7714002772826303, "grad_norm": 1.4394732609086163, "learning_rate": 2.617338658584483e-06, "loss": 0.1751, "step": 9737 }, { "epoch": 0.7714795008912656, "grad_norm": 1.3999492509883333, "learning_rate": 2.6156080887827183e-06, "loss": 0.1335, "step": 9738 }, { "epoch": 0.7715587244999009, "grad_norm": 1.481491709436734, "learning_rate": 2.613878005207631e-06, "loss": 0.2121, "step": 9739 }, { "epoch": 0.7716379481085364, "grad_norm": 1.2792671007799559, "learning_rate": 2.612148407973134e-06, "loss": 0.0851, "step": 9740 }, { "epoch": 0.7717171717171717, "grad_norm": 1.6363438372669241, "learning_rate": 2.6104192971931197e-06, "loss": 0.1794, "step": 9741 }, { "epoch": 0.7717963953258071, "grad_norm": 1.6994675334890936, "learning_rate": 2.6086906729814378e-06, "loss": 0.1927, "step": 9742 }, { "epoch": 0.7718756189344425, "grad_norm": 1.6619972490158255, "learning_rate": 2.606962535451907e-06, "loss": 0.1306, "step": 9743 }, { "epoch": 0.7719548425430779, "grad_norm": 1.4037253512727226, "learning_rate": 2.605234884718324e-06, "loss": 0.119, "step": 9744 }, { "epoch": 0.7720340661517132, "grad_norm": 1.5833786631067743, "learning_rate": 2.6035077208944416e-06, "loss": 0.1898, "step": 9745 }, { "epoch": 0.7721132897603485, "grad_norm": 1.230300162259317, "learning_rate": 2.601781044093984e-06, "loss": 0.0982, "step": 9746 }, { "epoch": 0.772192513368984, "grad_norm": 1.650317289533404, "learning_rate": 2.600054854430649e-06, "loss": 0.1565, "step": 9747 }, { "epoch": 0.7722717369776193, "grad_norm": 1.851354845951338, "learning_rate": 2.5983291520180965e-06, "loss": 0.2274, "step": 9748 }, { "epoch": 0.7723509605862547, "grad_norm": 1.9176741520932536, "learning_rate": 2.5966039369699537e-06, "loss": 0.2083, "step": 9749 }, { "epoch": 0.7724301841948901, "grad_norm": 1.6893577304842577, "learning_rate": 2.5948792093998167e-06, "loss": 0.2014, "step": 9750 }, { "epoch": 0.7725094078035255, "grad_norm": 1.6994842460804007, "learning_rate": 2.5931549694212545e-06, "loss": 0.1654, "step": 9751 }, { "epoch": 0.7725886314121608, "grad_norm": 1.5169542108974605, "learning_rate": 2.5914312171477983e-06, "loss": 0.1598, "step": 9752 }, { "epoch": 0.7726678550207962, "grad_norm": 1.3492658627299086, "learning_rate": 2.589707952692947e-06, "loss": 0.1058, "step": 9753 }, { "epoch": 0.7727470786294316, "grad_norm": 1.4680193746625914, "learning_rate": 2.5879851761701724e-06, "loss": 0.1338, "step": 9754 }, { "epoch": 0.7728263022380669, "grad_norm": 1.0322066829724559, "learning_rate": 2.586262887692911e-06, "loss": 0.0845, "step": 9755 }, { "epoch": 0.7729055258467024, "grad_norm": 1.2500200533604264, "learning_rate": 2.5845410873745614e-06, "loss": 0.158, "step": 9756 }, { "epoch": 0.7729847494553377, "grad_norm": 1.9357968267628918, "learning_rate": 2.5828197753285043e-06, "loss": 0.218, "step": 9757 }, { "epoch": 0.773063973063973, "grad_norm": 1.4909328794529666, "learning_rate": 2.581098951668075e-06, "loss": 0.1838, "step": 9758 }, { "epoch": 0.7731431966726084, "grad_norm": 1.8063545906846683, "learning_rate": 2.5793786165065805e-06, "loss": 0.1952, "step": 9759 }, { "epoch": 0.7732224202812438, "grad_norm": 1.4321678771562907, "learning_rate": 2.5776587699573007e-06, "loss": 0.132, "step": 9760 }, { "epoch": 0.7733016438898792, "grad_norm": 1.542022634786877, "learning_rate": 2.5759394121334767e-06, "loss": 0.1715, "step": 9761 }, { "epoch": 0.7733808674985145, "grad_norm": 1.2097978664145081, "learning_rate": 2.57422054314832e-06, "loss": 0.1103, "step": 9762 }, { "epoch": 0.77346009110715, "grad_norm": 1.4296828557359313, "learning_rate": 2.572502163115007e-06, "loss": 0.1637, "step": 9763 }, { "epoch": 0.7735393147157853, "grad_norm": 1.887411540859743, "learning_rate": 2.5707842721466914e-06, "loss": 0.2534, "step": 9764 }, { "epoch": 0.7736185383244206, "grad_norm": 1.5129329834481435, "learning_rate": 2.5690668703564835e-06, "loss": 0.1738, "step": 9765 }, { "epoch": 0.7736977619330561, "grad_norm": 2.367945613928465, "learning_rate": 2.5673499578574644e-06, "loss": 0.1966, "step": 9766 }, { "epoch": 0.7737769855416914, "grad_norm": 1.7368737694408045, "learning_rate": 2.565633534762689e-06, "loss": 0.1786, "step": 9767 }, { "epoch": 0.7738562091503268, "grad_norm": 1.8590987513742387, "learning_rate": 2.5639176011851753e-06, "loss": 0.2168, "step": 9768 }, { "epoch": 0.7739354327589621, "grad_norm": 1.5926198048873477, "learning_rate": 2.562202157237903e-06, "loss": 0.182, "step": 9769 }, { "epoch": 0.7740146563675976, "grad_norm": 1.9708474688096471, "learning_rate": 2.5604872030338336e-06, "loss": 0.1548, "step": 9770 }, { "epoch": 0.7740938799762329, "grad_norm": 1.2991272912314111, "learning_rate": 2.5587727386858853e-06, "loss": 0.1491, "step": 9771 }, { "epoch": 0.7741731035848682, "grad_norm": 1.5712179169661151, "learning_rate": 2.5570587643069435e-06, "loss": 0.172, "step": 9772 }, { "epoch": 0.7742523271935037, "grad_norm": 1.879258315975605, "learning_rate": 2.555345280009872e-06, "loss": 0.1936, "step": 9773 }, { "epoch": 0.774331550802139, "grad_norm": 1.454695856540906, "learning_rate": 2.5536322859074934e-06, "loss": 0.1547, "step": 9774 }, { "epoch": 0.7744107744107744, "grad_norm": 1.7214091516200096, "learning_rate": 2.551919782112596e-06, "loss": 0.2404, "step": 9775 }, { "epoch": 0.7744899980194098, "grad_norm": 1.55822482979566, "learning_rate": 2.550207768737949e-06, "loss": 0.236, "step": 9776 }, { "epoch": 0.7745692216280452, "grad_norm": 1.7239517012712033, "learning_rate": 2.54849624589627e-06, "loss": 0.208, "step": 9777 }, { "epoch": 0.7746484452366805, "grad_norm": 1.4791335434611836, "learning_rate": 2.546785213700258e-06, "loss": 0.1344, "step": 9778 }, { "epoch": 0.7747276688453159, "grad_norm": 1.2374391205197226, "learning_rate": 2.5450746722625785e-06, "loss": 0.0977, "step": 9779 }, { "epoch": 0.7748068924539513, "grad_norm": 1.346750574605836, "learning_rate": 2.5433646216958617e-06, "loss": 0.1221, "step": 9780 }, { "epoch": 0.7748861160625866, "grad_norm": 1.4056220772094195, "learning_rate": 2.5416550621127024e-06, "loss": 0.126, "step": 9781 }, { "epoch": 0.774965339671222, "grad_norm": 1.2451002382816594, "learning_rate": 2.539945993625673e-06, "loss": 0.0978, "step": 9782 }, { "epoch": 0.7750445632798574, "grad_norm": 1.8334259757084481, "learning_rate": 2.5382374163473046e-06, "loss": 0.1752, "step": 9783 }, { "epoch": 0.7751237868884928, "grad_norm": 1.20400435301815, "learning_rate": 2.536529330390095e-06, "loss": 0.1277, "step": 9784 }, { "epoch": 0.7752030104971281, "grad_norm": 1.7495749516651284, "learning_rate": 2.5348217358665207e-06, "loss": 0.1773, "step": 9785 }, { "epoch": 0.7752822341057635, "grad_norm": 1.7867115916421914, "learning_rate": 2.5331146328890145e-06, "loss": 0.1887, "step": 9786 }, { "epoch": 0.7753614577143989, "grad_norm": 1.437863090871042, "learning_rate": 2.5314080215699822e-06, "loss": 0.1681, "step": 9787 }, { "epoch": 0.7754406813230342, "grad_norm": 1.7774223714942095, "learning_rate": 2.5297019020217904e-06, "loss": 0.1984, "step": 9788 }, { "epoch": 0.7755199049316697, "grad_norm": 1.3875515800358615, "learning_rate": 2.5279962743567877e-06, "loss": 0.146, "step": 9789 }, { "epoch": 0.775599128540305, "grad_norm": 1.6204829785531747, "learning_rate": 2.526291138687278e-06, "loss": 0.1234, "step": 9790 }, { "epoch": 0.7756783521489404, "grad_norm": 1.9933384651096921, "learning_rate": 2.5245864951255317e-06, "loss": 0.261, "step": 9791 }, { "epoch": 0.7757575757575758, "grad_norm": 1.9217208451848666, "learning_rate": 2.522882343783799e-06, "loss": 0.1659, "step": 9792 }, { "epoch": 0.7758367993662111, "grad_norm": 1.4731265490539756, "learning_rate": 2.521178684774286e-06, "loss": 0.1284, "step": 9793 }, { "epoch": 0.7759160229748465, "grad_norm": 1.998911956802136, "learning_rate": 2.519475518209167e-06, "loss": 0.2368, "step": 9794 }, { "epoch": 0.7759952465834818, "grad_norm": 1.5012895065101473, "learning_rate": 2.5177728442005956e-06, "loss": 0.1253, "step": 9795 }, { "epoch": 0.7760744701921173, "grad_norm": 1.8593080920882146, "learning_rate": 2.516070662860679e-06, "loss": 0.2678, "step": 9796 }, { "epoch": 0.7761536938007526, "grad_norm": 1.4973455566323732, "learning_rate": 2.5143689743014966e-06, "loss": 0.1667, "step": 9797 }, { "epoch": 0.776232917409388, "grad_norm": 1.369211965989727, "learning_rate": 2.5126677786351005e-06, "loss": 0.1414, "step": 9798 }, { "epoch": 0.7763121410180234, "grad_norm": 1.778001705352899, "learning_rate": 2.5109670759735063e-06, "loss": 0.1435, "step": 9799 }, { "epoch": 0.7763913646266587, "grad_norm": 1.312856658962155, "learning_rate": 2.509266866428691e-06, "loss": 0.1456, "step": 9800 }, { "epoch": 0.7764705882352941, "grad_norm": 1.477858546155191, "learning_rate": 2.507567150112613e-06, "loss": 0.152, "step": 9801 }, { "epoch": 0.7765498118439295, "grad_norm": 1.8026840185953654, "learning_rate": 2.5058679271371865e-06, "loss": 0.17, "step": 9802 }, { "epoch": 0.7766290354525649, "grad_norm": 1.3915835459925203, "learning_rate": 2.504169197614298e-06, "loss": 0.1409, "step": 9803 }, { "epoch": 0.7767082590612002, "grad_norm": 1.1212030389828769, "learning_rate": 2.5024709616557964e-06, "loss": 0.0943, "step": 9804 }, { "epoch": 0.7767874826698357, "grad_norm": 1.4732687007129783, "learning_rate": 2.500773219373509e-06, "loss": 0.1908, "step": 9805 }, { "epoch": 0.776866706278471, "grad_norm": 1.2377783005686418, "learning_rate": 2.499075970879222e-06, "loss": 0.1277, "step": 9806 }, { "epoch": 0.7769459298871063, "grad_norm": 1.41121346586343, "learning_rate": 2.4973792162846878e-06, "loss": 0.1617, "step": 9807 }, { "epoch": 0.7770251534957417, "grad_norm": 1.6896059339122411, "learning_rate": 2.4956829557016336e-06, "loss": 0.2196, "step": 9808 }, { "epoch": 0.7771043771043771, "grad_norm": 1.3312844030840447, "learning_rate": 2.493987189241749e-06, "loss": 0.1119, "step": 9809 }, { "epoch": 0.7771836007130125, "grad_norm": 1.5314673577289795, "learning_rate": 2.4922919170166883e-06, "loss": 0.202, "step": 9810 }, { "epoch": 0.7772628243216478, "grad_norm": 1.5521891195803676, "learning_rate": 2.4905971391380823e-06, "loss": 0.1698, "step": 9811 }, { "epoch": 0.7773420479302833, "grad_norm": 1.4298165974024264, "learning_rate": 2.488902855717522e-06, "loss": 0.1531, "step": 9812 }, { "epoch": 0.7774212715389186, "grad_norm": 1.6079443450505917, "learning_rate": 2.487209066866565e-06, "loss": 0.1626, "step": 9813 }, { "epoch": 0.7775004951475539, "grad_norm": 1.479766326797685, "learning_rate": 2.485515772696745e-06, "loss": 0.1802, "step": 9814 }, { "epoch": 0.7775797187561894, "grad_norm": 1.9511307720203546, "learning_rate": 2.483822973319553e-06, "loss": 0.2114, "step": 9815 }, { "epoch": 0.7776589423648247, "grad_norm": 1.6470902883172156, "learning_rate": 2.482130668846451e-06, "loss": 0.1782, "step": 9816 }, { "epoch": 0.7777381659734601, "grad_norm": 1.4050107087219839, "learning_rate": 2.480438859388873e-06, "loss": 0.1364, "step": 9817 }, { "epoch": 0.7778173895820955, "grad_norm": 1.5688834229333537, "learning_rate": 2.4787475450582133e-06, "loss": 0.1442, "step": 9818 }, { "epoch": 0.7778966131907309, "grad_norm": 1.3212826997733387, "learning_rate": 2.4770567259658386e-06, "loss": 0.1381, "step": 9819 }, { "epoch": 0.7779758367993662, "grad_norm": 1.4963818967637699, "learning_rate": 2.4753664022230783e-06, "loss": 0.1521, "step": 9820 }, { "epoch": 0.7780550604080015, "grad_norm": 1.2425808022257758, "learning_rate": 2.473676573941236e-06, "loss": 0.1223, "step": 9821 }, { "epoch": 0.778134284016637, "grad_norm": 1.3869245581232976, "learning_rate": 2.471987241231577e-06, "loss": 0.1673, "step": 9822 }, { "epoch": 0.7782135076252723, "grad_norm": 1.4553644321840447, "learning_rate": 2.4702984042053335e-06, "loss": 0.1657, "step": 9823 }, { "epoch": 0.7782927312339077, "grad_norm": 1.4555058988367506, "learning_rate": 2.468610062973712e-06, "loss": 0.1056, "step": 9824 }, { "epoch": 0.7783719548425431, "grad_norm": 1.418367629692807, "learning_rate": 2.466922217647879e-06, "loss": 0.1343, "step": 9825 }, { "epoch": 0.7784511784511785, "grad_norm": 1.563017698175236, "learning_rate": 2.465234868338968e-06, "loss": 0.155, "step": 9826 }, { "epoch": 0.7785304020598138, "grad_norm": 1.564978379751095, "learning_rate": 2.4635480151580902e-06, "loss": 0.1738, "step": 9827 }, { "epoch": 0.7786096256684492, "grad_norm": 1.5218207589513273, "learning_rate": 2.461861658216311e-06, "loss": 0.1435, "step": 9828 }, { "epoch": 0.7786888492770846, "grad_norm": 1.6136781479505269, "learning_rate": 2.4601757976246685e-06, "loss": 0.1624, "step": 9829 }, { "epoch": 0.7787680728857199, "grad_norm": 1.3249496932637106, "learning_rate": 2.4584904334941728e-06, "loss": 0.094, "step": 9830 }, { "epoch": 0.7788472964943554, "grad_norm": 1.5017492963559536, "learning_rate": 2.456805565935795e-06, "loss": 0.1683, "step": 9831 }, { "epoch": 0.7789265201029907, "grad_norm": 1.4531046128085603, "learning_rate": 2.4551211950604713e-06, "loss": 0.1905, "step": 9832 }, { "epoch": 0.7790057437116261, "grad_norm": 1.706283272163017, "learning_rate": 2.4534373209791162e-06, "loss": 0.1624, "step": 9833 }, { "epoch": 0.7790849673202614, "grad_norm": 1.6380514707700125, "learning_rate": 2.451753943802603e-06, "loss": 0.1902, "step": 9834 }, { "epoch": 0.7791641909288968, "grad_norm": 1.6499999222540944, "learning_rate": 2.4500710636417725e-06, "loss": 0.1739, "step": 9835 }, { "epoch": 0.7792434145375322, "grad_norm": 1.4879660445719807, "learning_rate": 2.4483886806074308e-06, "loss": 0.1181, "step": 9836 }, { "epoch": 0.7793226381461675, "grad_norm": 1.7650918291893654, "learning_rate": 2.4467067948103616e-06, "loss": 0.2373, "step": 9837 }, { "epoch": 0.779401861754803, "grad_norm": 1.9501317030648648, "learning_rate": 2.4450254063613056e-06, "loss": 0.1755, "step": 9838 }, { "epoch": 0.7794810853634383, "grad_norm": 1.365525629253928, "learning_rate": 2.4433445153709722e-06, "loss": 0.0967, "step": 9839 }, { "epoch": 0.7795603089720736, "grad_norm": 2.284696402821749, "learning_rate": 2.441664121950045e-06, "loss": 0.1942, "step": 9840 }, { "epoch": 0.7796395325807091, "grad_norm": 1.5042012443899009, "learning_rate": 2.439984226209167e-06, "loss": 0.1322, "step": 9841 }, { "epoch": 0.7797187561893444, "grad_norm": 1.3875481099429432, "learning_rate": 2.438304828258947e-06, "loss": 0.1581, "step": 9842 }, { "epoch": 0.7797979797979798, "grad_norm": 1.9634923702246954, "learning_rate": 2.4366259282099737e-06, "loss": 0.1832, "step": 9843 }, { "epoch": 0.7798772034066151, "grad_norm": 1.3744684943728875, "learning_rate": 2.4349475261727905e-06, "loss": 0.155, "step": 9844 }, { "epoch": 0.7799564270152506, "grad_norm": 1.6895205019382182, "learning_rate": 2.4332696222579078e-06, "loss": 0.1691, "step": 9845 }, { "epoch": 0.7800356506238859, "grad_norm": 1.5850544854776158, "learning_rate": 2.4315922165758154e-06, "loss": 0.1976, "step": 9846 }, { "epoch": 0.7801148742325212, "grad_norm": 1.581470728898586, "learning_rate": 2.4299153092369598e-06, "loss": 0.1509, "step": 9847 }, { "epoch": 0.7801940978411567, "grad_norm": 1.4437674560460607, "learning_rate": 2.428238900351755e-06, "loss": 0.139, "step": 9848 }, { "epoch": 0.780273321449792, "grad_norm": 1.31502528899535, "learning_rate": 2.426562990030582e-06, "loss": 0.1298, "step": 9849 }, { "epoch": 0.7803525450584274, "grad_norm": 1.3514536223662552, "learning_rate": 2.424887578383799e-06, "loss": 0.1861, "step": 9850 }, { "epoch": 0.7804317686670628, "grad_norm": 1.7806678450907218, "learning_rate": 2.4232126655217202e-06, "loss": 0.1692, "step": 9851 }, { "epoch": 0.7805109922756982, "grad_norm": 1.2194089916371416, "learning_rate": 2.421538251554627e-06, "loss": 0.1284, "step": 9852 }, { "epoch": 0.7805902158843335, "grad_norm": 1.4233446865831838, "learning_rate": 2.4198643365927767e-06, "loss": 0.132, "step": 9853 }, { "epoch": 0.7806694394929689, "grad_norm": 1.3894587162835084, "learning_rate": 2.4181909207463873e-06, "loss": 0.1219, "step": 9854 }, { "epoch": 0.7807486631016043, "grad_norm": 1.4438565454138383, "learning_rate": 2.4165180041256444e-06, "loss": 0.1997, "step": 9855 }, { "epoch": 0.7808278867102396, "grad_norm": 1.4825858616325516, "learning_rate": 2.4148455868407015e-06, "loss": 0.1403, "step": 9856 }, { "epoch": 0.780907110318875, "grad_norm": 1.9852472755035369, "learning_rate": 2.413173669001676e-06, "loss": 0.221, "step": 9857 }, { "epoch": 0.7809863339275104, "grad_norm": 1.4129847202664567, "learning_rate": 2.4115022507186626e-06, "loss": 0.1268, "step": 9858 }, { "epoch": 0.7810655575361458, "grad_norm": 1.6444194614633125, "learning_rate": 2.409831332101712e-06, "loss": 0.1286, "step": 9859 }, { "epoch": 0.7811447811447811, "grad_norm": 1.6743803655198743, "learning_rate": 2.4081609132608464e-06, "loss": 0.1662, "step": 9860 }, { "epoch": 0.7812240047534165, "grad_norm": 1.5071777355332132, "learning_rate": 2.406490994306052e-06, "loss": 0.1959, "step": 9861 }, { "epoch": 0.7813032283620519, "grad_norm": 1.3217166468552706, "learning_rate": 2.4048215753472914e-06, "loss": 0.136, "step": 9862 }, { "epoch": 0.7813824519706872, "grad_norm": 1.8603288390768549, "learning_rate": 2.403152656494485e-06, "loss": 0.1748, "step": 9863 }, { "epoch": 0.7814616755793227, "grad_norm": 1.5524933169021706, "learning_rate": 2.401484237857519e-06, "loss": 0.1727, "step": 9864 }, { "epoch": 0.781540899187958, "grad_norm": 1.4513100745484329, "learning_rate": 2.3998163195462565e-06, "loss": 0.1192, "step": 9865 }, { "epoch": 0.7816201227965934, "grad_norm": 1.3655271254718757, "learning_rate": 2.398148901670521e-06, "loss": 0.1928, "step": 9866 }, { "epoch": 0.7816993464052288, "grad_norm": 1.1358946494058615, "learning_rate": 2.396481984340098e-06, "loss": 0.0843, "step": 9867 }, { "epoch": 0.7817785700138641, "grad_norm": 1.493678617900152, "learning_rate": 2.3948155676647546e-06, "loss": 0.0905, "step": 9868 }, { "epoch": 0.7818577936224995, "grad_norm": 2.2380240605705377, "learning_rate": 2.393149651754212e-06, "loss": 0.1764, "step": 9869 }, { "epoch": 0.7819370172311348, "grad_norm": 1.615659718099454, "learning_rate": 2.391484236718159e-06, "loss": 0.1237, "step": 9870 }, { "epoch": 0.7820162408397703, "grad_norm": 1.4542170550840967, "learning_rate": 2.389819322666264e-06, "loss": 0.1386, "step": 9871 }, { "epoch": 0.7820954644484056, "grad_norm": 1.6898234553595362, "learning_rate": 2.3881549097081467e-06, "loss": 0.155, "step": 9872 }, { "epoch": 0.782174688057041, "grad_norm": 2.139501797222874, "learning_rate": 2.3864909979534044e-06, "loss": 0.1915, "step": 9873 }, { "epoch": 0.7822539116656764, "grad_norm": 1.4481664144895345, "learning_rate": 2.3848275875115925e-06, "loss": 0.1551, "step": 9874 }, { "epoch": 0.7823331352743117, "grad_norm": 1.9653217027377339, "learning_rate": 2.3831646784922446e-06, "loss": 0.1831, "step": 9875 }, { "epoch": 0.7824123588829471, "grad_norm": 1.8148982093916635, "learning_rate": 2.381502271004853e-06, "loss": 0.2308, "step": 9876 }, { "epoch": 0.7824915824915825, "grad_norm": 1.4528137559606764, "learning_rate": 2.3798403651588765e-06, "loss": 0.187, "step": 9877 }, { "epoch": 0.7825708061002179, "grad_norm": 1.5887962836977498, "learning_rate": 2.3781789610637483e-06, "loss": 0.2413, "step": 9878 }, { "epoch": 0.7826500297088532, "grad_norm": 1.5512615286566966, "learning_rate": 2.376518058828863e-06, "loss": 0.1784, "step": 9879 }, { "epoch": 0.7827292533174887, "grad_norm": 1.1740892807392678, "learning_rate": 2.3748576585635774e-06, "loss": 0.1327, "step": 9880 }, { "epoch": 0.782808476926124, "grad_norm": 1.2857031530328165, "learning_rate": 2.373197760377228e-06, "loss": 0.1446, "step": 9881 }, { "epoch": 0.7828877005347593, "grad_norm": 1.5223889311563306, "learning_rate": 2.371538364379109e-06, "loss": 0.1616, "step": 9882 }, { "epoch": 0.7829669241433947, "grad_norm": 1.5839227481392508, "learning_rate": 2.36987947067848e-06, "loss": 0.2003, "step": 9883 }, { "epoch": 0.7830461477520301, "grad_norm": 1.4081995392008955, "learning_rate": 2.368221079384577e-06, "loss": 0.1549, "step": 9884 }, { "epoch": 0.7831253713606655, "grad_norm": 1.6971172908327063, "learning_rate": 2.3665631906065933e-06, "loss": 0.18, "step": 9885 }, { "epoch": 0.7832045949693008, "grad_norm": 1.1954142628303228, "learning_rate": 2.364905804453692e-06, "loss": 0.1489, "step": 9886 }, { "epoch": 0.7832838185779363, "grad_norm": 1.6572554709624563, "learning_rate": 2.3632489210350074e-06, "loss": 0.1338, "step": 9887 }, { "epoch": 0.7833630421865716, "grad_norm": 1.1781206400864117, "learning_rate": 2.361592540459636e-06, "loss": 0.0844, "step": 9888 }, { "epoch": 0.7834422657952069, "grad_norm": 1.5340634649743108, "learning_rate": 2.3599366628366427e-06, "loss": 0.1771, "step": 9889 }, { "epoch": 0.7835214894038424, "grad_norm": 1.6778345987190693, "learning_rate": 2.358281288275055e-06, "loss": 0.2209, "step": 9890 }, { "epoch": 0.7836007130124777, "grad_norm": 1.3264485644963284, "learning_rate": 2.356626416883878e-06, "loss": 0.1545, "step": 9891 }, { "epoch": 0.7836799366211131, "grad_norm": 1.9691156240211038, "learning_rate": 2.354972048772074e-06, "loss": 0.2074, "step": 9892 }, { "epoch": 0.7837591602297485, "grad_norm": 1.2191977698700902, "learning_rate": 2.353318184048573e-06, "loss": 0.0844, "step": 9893 }, { "epoch": 0.7838383838383839, "grad_norm": 1.137265551923628, "learning_rate": 2.351664822822277e-06, "loss": 0.08, "step": 9894 }, { "epoch": 0.7839176074470192, "grad_norm": 1.421115980040482, "learning_rate": 2.3500119652020526e-06, "loss": 0.1554, "step": 9895 }, { "epoch": 0.7839968310556545, "grad_norm": 1.5200703481219549, "learning_rate": 2.348359611296728e-06, "loss": 0.1846, "step": 9896 }, { "epoch": 0.78407605466429, "grad_norm": 1.6095400050334874, "learning_rate": 2.346707761215108e-06, "loss": 0.1502, "step": 9897 }, { "epoch": 0.7841552782729253, "grad_norm": 1.4076656297089143, "learning_rate": 2.345056415065956e-06, "loss": 0.1503, "step": 9898 }, { "epoch": 0.7842345018815607, "grad_norm": 1.541479098845182, "learning_rate": 2.343405572958004e-06, "loss": 0.172, "step": 9899 }, { "epoch": 0.7843137254901961, "grad_norm": 1.5297463587582756, "learning_rate": 2.341755234999956e-06, "loss": 0.245, "step": 9900 }, { "epoch": 0.7843929490988315, "grad_norm": 1.5833443207634692, "learning_rate": 2.3401054013004776e-06, "loss": 0.1508, "step": 9901 }, { "epoch": 0.7844721727074668, "grad_norm": 1.5892295059920318, "learning_rate": 2.338456071968198e-06, "loss": 0.1578, "step": 9902 }, { "epoch": 0.7845513963161022, "grad_norm": 1.4429492724249036, "learning_rate": 2.336807247111723e-06, "loss": 0.1612, "step": 9903 }, { "epoch": 0.7846306199247376, "grad_norm": 1.3529003984642856, "learning_rate": 2.3351589268396193e-06, "loss": 0.1335, "step": 9904 }, { "epoch": 0.7847098435333729, "grad_norm": 1.7004838622497822, "learning_rate": 2.3335111112604194e-06, "loss": 0.1933, "step": 9905 }, { "epoch": 0.7847890671420084, "grad_norm": 1.5184346920036407, "learning_rate": 2.33186380048262e-06, "loss": 0.2354, "step": 9906 }, { "epoch": 0.7848682907506437, "grad_norm": 1.7302373847009187, "learning_rate": 2.330216994614696e-06, "loss": 0.2152, "step": 9907 }, { "epoch": 0.7849475143592791, "grad_norm": 1.7780155831602766, "learning_rate": 2.3285706937650786e-06, "loss": 0.1689, "step": 9908 }, { "epoch": 0.7850267379679144, "grad_norm": 1.6726453576435996, "learning_rate": 2.3269248980421653e-06, "loss": 0.17, "step": 9909 }, { "epoch": 0.7851059615765498, "grad_norm": 1.2388215252220365, "learning_rate": 2.3252796075543295e-06, "loss": 0.1081, "step": 9910 }, { "epoch": 0.7851851851851852, "grad_norm": 1.0664201285624373, "learning_rate": 2.3236348224099038e-06, "loss": 0.1176, "step": 9911 }, { "epoch": 0.7852644087938205, "grad_norm": 2.0678534576917524, "learning_rate": 2.3219905427171864e-06, "loss": 0.2299, "step": 9912 }, { "epoch": 0.785343632402456, "grad_norm": 1.64307162014019, "learning_rate": 2.320346768584449e-06, "loss": 0.1576, "step": 9913 }, { "epoch": 0.7854228560110913, "grad_norm": 1.0788347048400195, "learning_rate": 2.3187035001199254e-06, "loss": 0.0896, "step": 9914 }, { "epoch": 0.7855020796197266, "grad_norm": 1.2997438253846387, "learning_rate": 2.317060737431813e-06, "loss": 0.1666, "step": 9915 }, { "epoch": 0.7855813032283621, "grad_norm": 1.3365023128119964, "learning_rate": 2.3154184806282863e-06, "loss": 0.1059, "step": 9916 }, { "epoch": 0.7856605268369974, "grad_norm": 1.5025039908520286, "learning_rate": 2.3137767298174774e-06, "loss": 0.124, "step": 9917 }, { "epoch": 0.7857397504456328, "grad_norm": 1.6868164976194144, "learning_rate": 2.312135485107486e-06, "loss": 0.1362, "step": 9918 }, { "epoch": 0.7858189740542681, "grad_norm": 1.2729634173030109, "learning_rate": 2.3104947466063785e-06, "loss": 0.1447, "step": 9919 }, { "epoch": 0.7858981976629036, "grad_norm": 0.9913998222470702, "learning_rate": 2.3088545144221964e-06, "loss": 0.067, "step": 9920 }, { "epoch": 0.7859774212715389, "grad_norm": 1.8229278522045034, "learning_rate": 2.307214788662936e-06, "loss": 0.2254, "step": 9921 }, { "epoch": 0.7860566448801742, "grad_norm": 1.5690718750912345, "learning_rate": 2.3055755694365644e-06, "loss": 0.1407, "step": 9922 }, { "epoch": 0.7861358684888097, "grad_norm": 1.5155520335935997, "learning_rate": 2.303936856851021e-06, "loss": 0.0957, "step": 9923 }, { "epoch": 0.786215092097445, "grad_norm": 1.2659496091344593, "learning_rate": 2.302298651014204e-06, "loss": 0.0895, "step": 9924 }, { "epoch": 0.7862943157060804, "grad_norm": 1.4116650662247483, "learning_rate": 2.3006609520339796e-06, "loss": 0.1089, "step": 9925 }, { "epoch": 0.7863735393147158, "grad_norm": 1.946327090543923, "learning_rate": 2.2990237600181864e-06, "loss": 0.2623, "step": 9926 }, { "epoch": 0.7864527629233512, "grad_norm": 1.4824259779344384, "learning_rate": 2.2973870750746253e-06, "loss": 0.1439, "step": 9927 }, { "epoch": 0.7865319865319865, "grad_norm": 2.0297942406853244, "learning_rate": 2.2957508973110586e-06, "loss": 0.1525, "step": 9928 }, { "epoch": 0.7866112101406219, "grad_norm": 1.7268831389340178, "learning_rate": 2.2941152268352284e-06, "loss": 0.1578, "step": 9929 }, { "epoch": 0.7866904337492573, "grad_norm": 1.7233516262432869, "learning_rate": 2.292480063754833e-06, "loss": 0.1908, "step": 9930 }, { "epoch": 0.7867696573578926, "grad_norm": 1.6122513701460157, "learning_rate": 2.2908454081775344e-06, "loss": 0.1761, "step": 9931 }, { "epoch": 0.786848880966528, "grad_norm": 1.2913194872695093, "learning_rate": 2.2892112602109783e-06, "loss": 0.0796, "step": 9932 }, { "epoch": 0.7869281045751634, "grad_norm": 1.191435906005958, "learning_rate": 2.2875776199627564e-06, "loss": 0.1467, "step": 9933 }, { "epoch": 0.7870073281837988, "grad_norm": 1.662942997145696, "learning_rate": 2.2859444875404347e-06, "loss": 0.1556, "step": 9934 }, { "epoch": 0.7870865517924341, "grad_norm": 1.9013726168634986, "learning_rate": 2.2843118630515536e-06, "loss": 0.2085, "step": 9935 }, { "epoch": 0.7871657754010695, "grad_norm": 1.4286296586426461, "learning_rate": 2.282679746603611e-06, "loss": 0.0949, "step": 9936 }, { "epoch": 0.7872449990097049, "grad_norm": 1.6938781461089853, "learning_rate": 2.281048138304072e-06, "loss": 0.204, "step": 9937 }, { "epoch": 0.7873242226183402, "grad_norm": 1.900504851065913, "learning_rate": 2.279417038260373e-06, "loss": 0.2873, "step": 9938 }, { "epoch": 0.7874034462269757, "grad_norm": 1.404110202195443, "learning_rate": 2.2777864465799137e-06, "loss": 0.163, "step": 9939 }, { "epoch": 0.787482669835611, "grad_norm": 1.5573164992739201, "learning_rate": 2.276156363370058e-06, "loss": 0.1483, "step": 9940 }, { "epoch": 0.7875618934442464, "grad_norm": 1.3045762965554017, "learning_rate": 2.274526788738143e-06, "loss": 0.147, "step": 9941 }, { "epoch": 0.7876411170528818, "grad_norm": 1.7295399244403333, "learning_rate": 2.272897722791466e-06, "loss": 0.2139, "step": 9942 }, { "epoch": 0.7877203406615171, "grad_norm": 1.6195616020356043, "learning_rate": 2.271269165637294e-06, "loss": 0.1934, "step": 9943 }, { "epoch": 0.7877995642701525, "grad_norm": 1.5485879008510388, "learning_rate": 2.2696411173828557e-06, "loss": 0.1413, "step": 9944 }, { "epoch": 0.7878787878787878, "grad_norm": 1.9380151936770873, "learning_rate": 2.268013578135357e-06, "loss": 0.2173, "step": 9945 }, { "epoch": 0.7879580114874233, "grad_norm": 1.4341914940444063, "learning_rate": 2.266386548001961e-06, "loss": 0.1822, "step": 9946 }, { "epoch": 0.7880372350960586, "grad_norm": 1.5959739300320508, "learning_rate": 2.264760027089795e-06, "loss": 0.2274, "step": 9947 }, { "epoch": 0.788116458704694, "grad_norm": 1.1242432050579019, "learning_rate": 2.2631340155059656e-06, "loss": 0.1102, "step": 9948 }, { "epoch": 0.7881956823133294, "grad_norm": 1.9244895012940224, "learning_rate": 2.261508513357532e-06, "loss": 0.1813, "step": 9949 }, { "epoch": 0.7882749059219647, "grad_norm": 1.4237682201949433, "learning_rate": 2.2598835207515267e-06, "loss": 0.1016, "step": 9950 }, { "epoch": 0.7883541295306001, "grad_norm": 1.7744325348652044, "learning_rate": 2.2582590377949497e-06, "loss": 0.216, "step": 9951 }, { "epoch": 0.7884333531392355, "grad_norm": 1.5749910436672572, "learning_rate": 2.2566350645947656e-06, "loss": 0.1746, "step": 9952 }, { "epoch": 0.7885125767478709, "grad_norm": 1.355511694526603, "learning_rate": 2.2550116012579004e-06, "loss": 0.0897, "step": 9953 }, { "epoch": 0.7885918003565062, "grad_norm": 1.6295008299820368, "learning_rate": 2.253388647891258e-06, "loss": 0.1643, "step": 9954 }, { "epoch": 0.7886710239651417, "grad_norm": 1.3467391373535633, "learning_rate": 2.2517662046016975e-06, "loss": 0.1004, "step": 9955 }, { "epoch": 0.788750247573777, "grad_norm": 4.682270455395327, "learning_rate": 2.250144271496049e-06, "loss": 0.1969, "step": 9956 }, { "epoch": 0.7888294711824123, "grad_norm": 1.5699310942789322, "learning_rate": 2.2485228486811128e-06, "loss": 0.1608, "step": 9957 }, { "epoch": 0.7889086947910477, "grad_norm": 1.486879949519601, "learning_rate": 2.2469019362636478e-06, "loss": 0.1361, "step": 9958 }, { "epoch": 0.7889879183996831, "grad_norm": 1.595931092624869, "learning_rate": 2.2452815343503862e-06, "loss": 0.1698, "step": 9959 }, { "epoch": 0.7890671420083185, "grad_norm": 1.784241410979693, "learning_rate": 2.2436616430480197e-06, "loss": 0.2391, "step": 9960 }, { "epoch": 0.7891463656169538, "grad_norm": 1.463888108877651, "learning_rate": 2.2420422624632153e-06, "loss": 0.1881, "step": 9961 }, { "epoch": 0.7892255892255893, "grad_norm": 1.3974477761097146, "learning_rate": 2.2404233927025985e-06, "loss": 0.1318, "step": 9962 }, { "epoch": 0.7893048128342246, "grad_norm": 1.5570581920763162, "learning_rate": 2.238805033872762e-06, "loss": 0.1454, "step": 9963 }, { "epoch": 0.7893840364428599, "grad_norm": 1.642197661575361, "learning_rate": 2.237187186080273e-06, "loss": 0.137, "step": 9964 }, { "epoch": 0.7894632600514954, "grad_norm": 1.6150614542953492, "learning_rate": 2.235569849431655e-06, "loss": 0.1696, "step": 9965 }, { "epoch": 0.7895424836601307, "grad_norm": 1.2972054456968034, "learning_rate": 2.2339530240333993e-06, "loss": 0.158, "step": 9966 }, { "epoch": 0.7896217072687661, "grad_norm": 1.2969935030274593, "learning_rate": 2.2323367099919724e-06, "loss": 0.1674, "step": 9967 }, { "epoch": 0.7897009308774015, "grad_norm": 1.519984704530541, "learning_rate": 2.230720907413797e-06, "loss": 0.1644, "step": 9968 }, { "epoch": 0.7897801544860369, "grad_norm": 1.327596584034342, "learning_rate": 2.2291056164052638e-06, "loss": 0.1164, "step": 9969 }, { "epoch": 0.7898593780946722, "grad_norm": 1.1748700946165422, "learning_rate": 2.2274908370727376e-06, "loss": 0.1397, "step": 9970 }, { "epoch": 0.7899386017033075, "grad_norm": 1.7387930116965715, "learning_rate": 2.2258765695225416e-06, "loss": 0.1185, "step": 9971 }, { "epoch": 0.790017825311943, "grad_norm": 1.2234761605340527, "learning_rate": 2.224262813860962e-06, "loss": 0.1084, "step": 9972 }, { "epoch": 0.7900970489205783, "grad_norm": 1.9514398896101708, "learning_rate": 2.2226495701942663e-06, "loss": 0.1962, "step": 9973 }, { "epoch": 0.7901762725292137, "grad_norm": 1.490312835581842, "learning_rate": 2.2210368386286742e-06, "loss": 0.2002, "step": 9974 }, { "epoch": 0.7902554961378491, "grad_norm": 1.4741412854177365, "learning_rate": 2.219424619270375e-06, "loss": 0.1099, "step": 9975 }, { "epoch": 0.7903347197464845, "grad_norm": 1.551959043376881, "learning_rate": 2.2178129122255255e-06, "loss": 0.139, "step": 9976 }, { "epoch": 0.7904139433551198, "grad_norm": 1.1980804146224264, "learning_rate": 2.2162017176002514e-06, "loss": 0.1337, "step": 9977 }, { "epoch": 0.7904931669637552, "grad_norm": 1.4266653732552625, "learning_rate": 2.2145910355006415e-06, "loss": 0.1677, "step": 9978 }, { "epoch": 0.7905723905723906, "grad_norm": 2.3174832590171084, "learning_rate": 2.212980866032749e-06, "loss": 0.1632, "step": 9979 }, { "epoch": 0.7906516141810259, "grad_norm": 1.754321042318844, "learning_rate": 2.2113712093025997e-06, "loss": 0.2074, "step": 9980 }, { "epoch": 0.7907308377896614, "grad_norm": 2.066931491831207, "learning_rate": 2.20976206541618e-06, "loss": 0.2058, "step": 9981 }, { "epoch": 0.7908100613982967, "grad_norm": 1.4004544365633376, "learning_rate": 2.208153434479442e-06, "loss": 0.1318, "step": 9982 }, { "epoch": 0.7908892850069321, "grad_norm": 1.3430351447290874, "learning_rate": 2.20654531659831e-06, "loss": 0.1399, "step": 9983 }, { "epoch": 0.7909685086155674, "grad_norm": 1.7441387425468462, "learning_rate": 2.2049377118786696e-06, "loss": 0.1891, "step": 9984 }, { "epoch": 0.7910477322242028, "grad_norm": 1.3147742498340642, "learning_rate": 2.2033306204263704e-06, "loss": 0.098, "step": 9985 }, { "epoch": 0.7911269558328382, "grad_norm": 1.4776189216839015, "learning_rate": 2.2017240423472384e-06, "loss": 0.125, "step": 9986 }, { "epoch": 0.7912061794414735, "grad_norm": 1.5624806083130125, "learning_rate": 2.200117977747055e-06, "loss": 0.1743, "step": 9987 }, { "epoch": 0.791285403050109, "grad_norm": 1.6305221424473388, "learning_rate": 2.198512426731568e-06, "loss": 0.147, "step": 9988 }, { "epoch": 0.7913646266587443, "grad_norm": 1.646593475485873, "learning_rate": 2.196907389406504e-06, "loss": 0.1817, "step": 9989 }, { "epoch": 0.7914438502673797, "grad_norm": 2.0803310071104955, "learning_rate": 2.195302865877541e-06, "loss": 0.2863, "step": 9990 }, { "epoch": 0.7915230738760151, "grad_norm": 1.7736444271091318, "learning_rate": 2.193698856250331e-06, "loss": 0.1744, "step": 9991 }, { "epoch": 0.7916022974846504, "grad_norm": 1.621184216725666, "learning_rate": 2.1920953606304875e-06, "loss": 0.2197, "step": 9992 }, { "epoch": 0.7916815210932858, "grad_norm": 1.6404741812759889, "learning_rate": 2.1904923791235965e-06, "loss": 0.1703, "step": 9993 }, { "epoch": 0.7917607447019211, "grad_norm": 1.4897659344779688, "learning_rate": 2.188889911835207e-06, "loss": 0.1374, "step": 9994 }, { "epoch": 0.7918399683105566, "grad_norm": 1.8663482662202842, "learning_rate": 2.1872879588708286e-06, "loss": 0.205, "step": 9995 }, { "epoch": 0.7919191919191919, "grad_norm": 1.6094847483146242, "learning_rate": 2.185686520335948e-06, "loss": 0.1729, "step": 9996 }, { "epoch": 0.7919984155278272, "grad_norm": 1.579945800895183, "learning_rate": 2.184085596336011e-06, "loss": 0.1748, "step": 9997 }, { "epoch": 0.7920776391364627, "grad_norm": 1.2182952293360618, "learning_rate": 2.1824851869764262e-06, "loss": 0.1085, "step": 9998 }, { "epoch": 0.792156862745098, "grad_norm": 1.9813333334345649, "learning_rate": 2.1808852923625802e-06, "loss": 0.2213, "step": 9999 }, { "epoch": 0.7922360863537334, "grad_norm": 2.3798008584447876, "learning_rate": 2.1792859125998134e-06, "loss": 0.2385, "step": 10000 }, { "epoch": 0.7923153099623688, "grad_norm": 1.2946411560688356, "learning_rate": 2.1776870477934353e-06, "loss": 0.1164, "step": 10001 }, { "epoch": 0.7923945335710042, "grad_norm": 1.5012788819907488, "learning_rate": 2.1760886980487307e-06, "loss": 0.1385, "step": 10002 }, { "epoch": 0.7924737571796395, "grad_norm": 1.8064772182168056, "learning_rate": 2.174490863470938e-06, "loss": 0.2323, "step": 10003 }, { "epoch": 0.7925529807882749, "grad_norm": 1.3205231349479927, "learning_rate": 2.1728935441652687e-06, "loss": 0.0755, "step": 10004 }, { "epoch": 0.7926322043969103, "grad_norm": 1.8299458070746077, "learning_rate": 2.1712967402368947e-06, "loss": 0.095, "step": 10005 }, { "epoch": 0.7927114280055456, "grad_norm": 2.0463888396517165, "learning_rate": 2.169700451790964e-06, "loss": 0.1594, "step": 10006 }, { "epoch": 0.792790651614181, "grad_norm": 1.6230727902814366, "learning_rate": 2.168104678932581e-06, "loss": 0.1748, "step": 10007 }, { "epoch": 0.7928698752228164, "grad_norm": 1.902277582263067, "learning_rate": 2.166509421766818e-06, "loss": 0.155, "step": 10008 }, { "epoch": 0.7929490988314518, "grad_norm": 1.3007070544993693, "learning_rate": 2.1649146803987197e-06, "loss": 0.1294, "step": 10009 }, { "epoch": 0.7930283224400871, "grad_norm": 1.6989870907121092, "learning_rate": 2.1633204549332897e-06, "loss": 0.1547, "step": 10010 }, { "epoch": 0.7931075460487225, "grad_norm": 1.2099287702109043, "learning_rate": 2.1617267454754996e-06, "loss": 0.0925, "step": 10011 }, { "epoch": 0.7931867696573579, "grad_norm": 1.4496158928678884, "learning_rate": 2.160133552130289e-06, "loss": 0.2026, "step": 10012 }, { "epoch": 0.7932659932659932, "grad_norm": 1.74231832692503, "learning_rate": 2.1585408750025584e-06, "loss": 0.1371, "step": 10013 }, { "epoch": 0.7933452168746287, "grad_norm": 1.522157793095154, "learning_rate": 2.1569487141971824e-06, "loss": 0.1764, "step": 10014 }, { "epoch": 0.793424440483264, "grad_norm": 1.629797164066686, "learning_rate": 2.155357069818995e-06, "loss": 0.232, "step": 10015 }, { "epoch": 0.7935036640918994, "grad_norm": 1.3883747090570508, "learning_rate": 2.1537659419727987e-06, "loss": 0.2118, "step": 10016 }, { "epoch": 0.7935828877005348, "grad_norm": 1.8661431270692048, "learning_rate": 2.152175330763359e-06, "loss": 0.108, "step": 10017 }, { "epoch": 0.7936621113091701, "grad_norm": 1.3944278233127205, "learning_rate": 2.150585236295415e-06, "loss": 0.1349, "step": 10018 }, { "epoch": 0.7937413349178055, "grad_norm": 1.392398724398443, "learning_rate": 2.148995658673665e-06, "loss": 0.1081, "step": 10019 }, { "epoch": 0.7938205585264408, "grad_norm": 1.6525013010408893, "learning_rate": 2.14740659800277e-06, "loss": 0.1809, "step": 10020 }, { "epoch": 0.7938997821350763, "grad_norm": 1.600757574755079, "learning_rate": 2.1458180543873697e-06, "loss": 0.1235, "step": 10021 }, { "epoch": 0.7939790057437116, "grad_norm": 1.6959189801343713, "learning_rate": 2.1442300279320593e-06, "loss": 0.1578, "step": 10022 }, { "epoch": 0.794058229352347, "grad_norm": 1.775667930781625, "learning_rate": 2.142642518741399e-06, "loss": 0.2742, "step": 10023 }, { "epoch": 0.7941374529609824, "grad_norm": 1.5579501042143638, "learning_rate": 2.141055526919924e-06, "loss": 0.114, "step": 10024 }, { "epoch": 0.7942166765696177, "grad_norm": 1.270325046075699, "learning_rate": 2.1394690525721275e-06, "loss": 0.1352, "step": 10025 }, { "epoch": 0.7942959001782531, "grad_norm": 1.6561846729585206, "learning_rate": 2.137883095802469e-06, "loss": 0.2266, "step": 10026 }, { "epoch": 0.7943751237868885, "grad_norm": 1.6738282412633614, "learning_rate": 2.1362976567153813e-06, "loss": 0.1764, "step": 10027 }, { "epoch": 0.7944543473955239, "grad_norm": 1.6976550261578531, "learning_rate": 2.134712735415255e-06, "loss": 0.2019, "step": 10028 }, { "epoch": 0.7945335710041592, "grad_norm": 1.4696840344431166, "learning_rate": 2.13312833200645e-06, "loss": 0.1435, "step": 10029 }, { "epoch": 0.7946127946127947, "grad_norm": 1.4003875061720121, "learning_rate": 2.131544446593289e-06, "loss": 0.1247, "step": 10030 }, { "epoch": 0.79469201822143, "grad_norm": 1.3843549351146476, "learning_rate": 2.1299610792800675e-06, "loss": 0.1037, "step": 10031 }, { "epoch": 0.7947712418300653, "grad_norm": 1.2525833791601648, "learning_rate": 2.1283782301710408e-06, "loss": 0.1203, "step": 10032 }, { "epoch": 0.7948504654387007, "grad_norm": 1.4484258631400002, "learning_rate": 2.1267958993704297e-06, "loss": 0.1765, "step": 10033 }, { "epoch": 0.7949296890473361, "grad_norm": 1.4152771342621562, "learning_rate": 2.1252140869824266e-06, "loss": 0.1485, "step": 10034 }, { "epoch": 0.7950089126559715, "grad_norm": 1.8643331778965033, "learning_rate": 2.1236327931111868e-06, "loss": 0.1861, "step": 10035 }, { "epoch": 0.7950881362646068, "grad_norm": 1.498891506088113, "learning_rate": 2.122052017860825e-06, "loss": 0.1426, "step": 10036 }, { "epoch": 0.7951673598732423, "grad_norm": 1.3808495616863234, "learning_rate": 2.120471761335434e-06, "loss": 0.129, "step": 10037 }, { "epoch": 0.7952465834818776, "grad_norm": 1.5324500141901025, "learning_rate": 2.118892023639064e-06, "loss": 0.0995, "step": 10038 }, { "epoch": 0.7953258070905129, "grad_norm": 1.6761058699444447, "learning_rate": 2.1173128048757307e-06, "loss": 0.1826, "step": 10039 }, { "epoch": 0.7954050306991484, "grad_norm": 1.585363782878541, "learning_rate": 2.115734105149422e-06, "loss": 0.2085, "step": 10040 }, { "epoch": 0.7954842543077837, "grad_norm": 1.5461038712788653, "learning_rate": 2.1141559245640865e-06, "loss": 0.1, "step": 10041 }, { "epoch": 0.7955634779164191, "grad_norm": 1.6139730034851818, "learning_rate": 2.1125782632236357e-06, "loss": 0.1482, "step": 10042 }, { "epoch": 0.7956427015250545, "grad_norm": 1.672022973226804, "learning_rate": 2.111001121231957e-06, "loss": 0.1368, "step": 10043 }, { "epoch": 0.7957219251336899, "grad_norm": 1.939269575701926, "learning_rate": 2.1094244986928956e-06, "loss": 0.199, "step": 10044 }, { "epoch": 0.7958011487423252, "grad_norm": 1.5465284925046832, "learning_rate": 2.1078483957102637e-06, "loss": 0.157, "step": 10045 }, { "epoch": 0.7958803723509605, "grad_norm": 1.2372117591846676, "learning_rate": 2.1062728123878383e-06, "loss": 0.1193, "step": 10046 }, { "epoch": 0.795959595959596, "grad_norm": 1.0899371777790527, "learning_rate": 2.1046977488293675e-06, "loss": 0.1078, "step": 10047 }, { "epoch": 0.7960388195682313, "grad_norm": 1.8196955824363443, "learning_rate": 2.1031232051385606e-06, "loss": 0.1874, "step": 10048 }, { "epoch": 0.7961180431768667, "grad_norm": 1.6934029344638837, "learning_rate": 2.1015491814190913e-06, "loss": 0.1905, "step": 10049 }, { "epoch": 0.7961972667855021, "grad_norm": 1.2761708467391744, "learning_rate": 2.099975677774606e-06, "loss": 0.1281, "step": 10050 }, { "epoch": 0.7962764903941375, "grad_norm": 1.3001720610321532, "learning_rate": 2.0984026943087087e-06, "loss": 0.1459, "step": 10051 }, { "epoch": 0.7963557140027728, "grad_norm": 1.7436757517148482, "learning_rate": 2.096830231124972e-06, "loss": 0.2174, "step": 10052 }, { "epoch": 0.7964349376114082, "grad_norm": 2.0254871002333967, "learning_rate": 2.0952582883269403e-06, "loss": 0.278, "step": 10053 }, { "epoch": 0.7965141612200436, "grad_norm": 2.0624690806201174, "learning_rate": 2.093686866018114e-06, "loss": 0.1953, "step": 10054 }, { "epoch": 0.7965933848286789, "grad_norm": 1.1574003552965064, "learning_rate": 2.0921159643019627e-06, "loss": 0.1018, "step": 10055 }, { "epoch": 0.7966726084373144, "grad_norm": 1.2272561335047898, "learning_rate": 2.0905455832819277e-06, "loss": 0.1385, "step": 10056 }, { "epoch": 0.7967518320459497, "grad_norm": 1.3680676860652303, "learning_rate": 2.088975723061408e-06, "loss": 0.1641, "step": 10057 }, { "epoch": 0.7968310556545851, "grad_norm": 1.2946262379219424, "learning_rate": 2.0874063837437687e-06, "loss": 0.1796, "step": 10058 }, { "epoch": 0.7969102792632204, "grad_norm": 1.6170030796631873, "learning_rate": 2.085837565432349e-06, "loss": 0.1764, "step": 10059 }, { "epoch": 0.7969895028718558, "grad_norm": 1.589702139256348, "learning_rate": 2.0842692682304442e-06, "loss": 0.1721, "step": 10060 }, { "epoch": 0.7970687264804912, "grad_norm": 1.4861482116536642, "learning_rate": 2.0827014922413213e-06, "loss": 0.1929, "step": 10061 }, { "epoch": 0.7971479500891265, "grad_norm": 2.116904576828805, "learning_rate": 2.0811342375682065e-06, "loss": 0.2659, "step": 10062 }, { "epoch": 0.797227173697762, "grad_norm": 1.394589371776751, "learning_rate": 2.0795675043143016e-06, "loss": 0.1196, "step": 10063 }, { "epoch": 0.7973063973063973, "grad_norm": 1.260542638084776, "learning_rate": 2.0780012925827653e-06, "loss": 0.0991, "step": 10064 }, { "epoch": 0.7973856209150327, "grad_norm": 1.399681603253, "learning_rate": 2.0764356024767228e-06, "loss": 0.2049, "step": 10065 }, { "epoch": 0.7974648445236681, "grad_norm": 1.7207505402694168, "learning_rate": 2.0748704340992743e-06, "loss": 0.1899, "step": 10066 }, { "epoch": 0.7975440681323034, "grad_norm": 1.3770166180821815, "learning_rate": 2.0733057875534734e-06, "loss": 0.1627, "step": 10067 }, { "epoch": 0.7976232917409388, "grad_norm": 1.309604247854031, "learning_rate": 2.0717416629423425e-06, "loss": 0.1297, "step": 10068 }, { "epoch": 0.7977025153495741, "grad_norm": 1.59963359782775, "learning_rate": 2.0701780603688783e-06, "loss": 0.0945, "step": 10069 }, { "epoch": 0.7977817389582096, "grad_norm": 1.3378879486392, "learning_rate": 2.068614979936032e-06, "loss": 0.1271, "step": 10070 }, { "epoch": 0.7978609625668449, "grad_norm": 1.720023720341635, "learning_rate": 2.0670524217467237e-06, "loss": 0.1558, "step": 10071 }, { "epoch": 0.7979401861754803, "grad_norm": 1.6811447599097589, "learning_rate": 2.0654903859038457e-06, "loss": 0.1417, "step": 10072 }, { "epoch": 0.7980194097841157, "grad_norm": 1.5077634819512435, "learning_rate": 2.0639288725102467e-06, "loss": 0.1623, "step": 10073 }, { "epoch": 0.798098633392751, "grad_norm": 1.2716758822265468, "learning_rate": 2.0623678816687433e-06, "loss": 0.095, "step": 10074 }, { "epoch": 0.7981778570013864, "grad_norm": 1.2788909379660118, "learning_rate": 2.0608074134821243e-06, "loss": 0.1128, "step": 10075 }, { "epoch": 0.7982570806100218, "grad_norm": 1.6234988712115461, "learning_rate": 2.0592474680531347e-06, "loss": 0.1477, "step": 10076 }, { "epoch": 0.7983363042186572, "grad_norm": 1.4192793330504276, "learning_rate": 2.0576880454844926e-06, "loss": 0.1663, "step": 10077 }, { "epoch": 0.7984155278272925, "grad_norm": 1.5796686463744987, "learning_rate": 2.0561291458788736e-06, "loss": 0.1446, "step": 10078 }, { "epoch": 0.7984947514359279, "grad_norm": 1.0989884124112725, "learning_rate": 2.0545707693389296e-06, "loss": 0.092, "step": 10079 }, { "epoch": 0.7985739750445633, "grad_norm": 1.5998706133694671, "learning_rate": 2.0530129159672685e-06, "loss": 0.1817, "step": 10080 }, { "epoch": 0.7986531986531986, "grad_norm": 1.1636296558117674, "learning_rate": 2.0514555858664663e-06, "loss": 0.0899, "step": 10081 }, { "epoch": 0.798732422261834, "grad_norm": 2.364029207493055, "learning_rate": 2.0498987791390713e-06, "loss": 0.164, "step": 10082 }, { "epoch": 0.7988116458704694, "grad_norm": 1.5230177713705173, "learning_rate": 2.0483424958875876e-06, "loss": 0.1112, "step": 10083 }, { "epoch": 0.7988908694791048, "grad_norm": 1.271856275702475, "learning_rate": 2.0467867362144867e-06, "loss": 0.1193, "step": 10084 }, { "epoch": 0.7989700930877401, "grad_norm": 1.8417327971505366, "learning_rate": 2.0452315002222134e-06, "loss": 0.1794, "step": 10085 }, { "epoch": 0.7990493166963755, "grad_norm": 1.901927848330103, "learning_rate": 2.04367678801317e-06, "loss": 0.1663, "step": 10086 }, { "epoch": 0.7991285403050109, "grad_norm": 1.2062639002162754, "learning_rate": 2.0421225996897243e-06, "loss": 0.1073, "step": 10087 }, { "epoch": 0.7992077639136462, "grad_norm": 1.551817034589452, "learning_rate": 2.0405689353542204e-06, "loss": 0.1513, "step": 10088 }, { "epoch": 0.7992869875222817, "grad_norm": 1.8257863477116614, "learning_rate": 2.0390157951089506e-06, "loss": 0.2563, "step": 10089 }, { "epoch": 0.799366211130917, "grad_norm": 1.3803848172969628, "learning_rate": 2.0374631790561815e-06, "loss": 0.1351, "step": 10090 }, { "epoch": 0.7994454347395524, "grad_norm": 1.4254438243035916, "learning_rate": 2.0359110872981526e-06, "loss": 0.1775, "step": 10091 }, { "epoch": 0.7995246583481878, "grad_norm": 1.4489089324482682, "learning_rate": 2.034359519937057e-06, "loss": 0.2376, "step": 10092 }, { "epoch": 0.7996038819568231, "grad_norm": 1.2478757956891127, "learning_rate": 2.032808477075057e-06, "loss": 0.103, "step": 10093 }, { "epoch": 0.7996831055654585, "grad_norm": 1.4629855566074748, "learning_rate": 2.0312579588142846e-06, "loss": 0.1324, "step": 10094 }, { "epoch": 0.7997623291740938, "grad_norm": 1.8229933961937135, "learning_rate": 2.029707965256833e-06, "loss": 0.3169, "step": 10095 }, { "epoch": 0.7998415527827293, "grad_norm": 1.5385007592991855, "learning_rate": 2.0281584965047585e-06, "loss": 0.1434, "step": 10096 }, { "epoch": 0.7999207763913646, "grad_norm": 1.5602318935580481, "learning_rate": 2.0266095526600925e-06, "loss": 0.1478, "step": 10097 }, { "epoch": 0.8, "grad_norm": 1.8613677904147585, "learning_rate": 2.0250611338248215e-06, "loss": 0.1783, "step": 10098 }, { "epoch": 0.8000792236086354, "grad_norm": 1.1816379135958381, "learning_rate": 2.0235132401008985e-06, "loss": 0.1016, "step": 10099 }, { "epoch": 0.8001584472172707, "grad_norm": 1.508357582334823, "learning_rate": 2.0219658715902514e-06, "loss": 0.1201, "step": 10100 }, { "epoch": 0.8002376708259061, "grad_norm": 1.700854720807967, "learning_rate": 2.0204190283947645e-06, "loss": 0.1715, "step": 10101 }, { "epoch": 0.8003168944345415, "grad_norm": 1.3664397670804829, "learning_rate": 2.0188727106162874e-06, "loss": 0.0966, "step": 10102 }, { "epoch": 0.8003961180431769, "grad_norm": 2.385838496637402, "learning_rate": 2.017326918356639e-06, "loss": 0.2359, "step": 10103 }, { "epoch": 0.8004753416518122, "grad_norm": 1.4365508647778298, "learning_rate": 2.0157816517176045e-06, "loss": 0.1421, "step": 10104 }, { "epoch": 0.8005545652604477, "grad_norm": 1.943683144532721, "learning_rate": 2.0142369108009306e-06, "loss": 0.2097, "step": 10105 }, { "epoch": 0.800633788869083, "grad_norm": 1.3436318033767165, "learning_rate": 2.012692695708328e-06, "loss": 0.1057, "step": 10106 }, { "epoch": 0.8007130124777183, "grad_norm": 1.5499590469250224, "learning_rate": 2.011149006541483e-06, "loss": 0.1768, "step": 10107 }, { "epoch": 0.8007922360863537, "grad_norm": 1.6404213910855365, "learning_rate": 2.0096058434020348e-06, "loss": 0.1633, "step": 10108 }, { "epoch": 0.8008714596949891, "grad_norm": 1.6223471620885814, "learning_rate": 2.0080632063915927e-06, "loss": 0.1328, "step": 10109 }, { "epoch": 0.8009506833036245, "grad_norm": 1.3071802592812904, "learning_rate": 2.0065210956117354e-06, "loss": 0.1393, "step": 10110 }, { "epoch": 0.8010299069122598, "grad_norm": 1.5464149552672812, "learning_rate": 2.0049795111640023e-06, "loss": 0.1868, "step": 10111 }, { "epoch": 0.8011091305208953, "grad_norm": 1.198399349005077, "learning_rate": 2.0034384531498962e-06, "loss": 0.1185, "step": 10112 }, { "epoch": 0.8011883541295306, "grad_norm": 1.7867543862279636, "learning_rate": 2.0018979216708935e-06, "loss": 0.1933, "step": 10113 }, { "epoch": 0.8012675777381659, "grad_norm": 1.8636313720727733, "learning_rate": 2.000357916828428e-06, "loss": 0.241, "step": 10114 }, { "epoch": 0.8013468013468014, "grad_norm": 1.128102643210593, "learning_rate": 1.9988184387239027e-06, "loss": 0.0846, "step": 10115 }, { "epoch": 0.8014260249554367, "grad_norm": 1.5799733785609547, "learning_rate": 1.9972794874586808e-06, "loss": 0.2104, "step": 10116 }, { "epoch": 0.8015052485640721, "grad_norm": 1.4905271844760142, "learning_rate": 1.9957410631341e-06, "loss": 0.1756, "step": 10117 }, { "epoch": 0.8015844721727075, "grad_norm": 1.3595749645203963, "learning_rate": 1.9942031658514573e-06, "loss": 0.1586, "step": 10118 }, { "epoch": 0.8016636957813429, "grad_norm": 1.2394181239867947, "learning_rate": 1.992665795712011e-06, "loss": 0.1094, "step": 10119 }, { "epoch": 0.8017429193899782, "grad_norm": 2.351212147907515, "learning_rate": 1.991128952816996e-06, "loss": 0.1992, "step": 10120 }, { "epoch": 0.8018221429986135, "grad_norm": 1.4138074303762753, "learning_rate": 1.9895926372676042e-06, "loss": 0.1358, "step": 10121 }, { "epoch": 0.801901366607249, "grad_norm": 2.550023113461321, "learning_rate": 1.988056849164991e-06, "loss": 0.2071, "step": 10122 }, { "epoch": 0.8019805902158843, "grad_norm": 1.407072783639433, "learning_rate": 1.986521588610285e-06, "loss": 0.1028, "step": 10123 }, { "epoch": 0.8020598138245197, "grad_norm": 1.5554430663669578, "learning_rate": 1.9849868557045738e-06, "loss": 0.1459, "step": 10124 }, { "epoch": 0.8021390374331551, "grad_norm": 1.3596984063936324, "learning_rate": 1.9834526505489105e-06, "loss": 0.1335, "step": 10125 }, { "epoch": 0.8022182610417905, "grad_norm": 1.6867999110951097, "learning_rate": 1.9819189732443187e-06, "loss": 0.1894, "step": 10126 }, { "epoch": 0.8022974846504258, "grad_norm": 1.2631506900831124, "learning_rate": 1.9803858238917826e-06, "loss": 0.1283, "step": 10127 }, { "epoch": 0.8023767082590612, "grad_norm": 1.590177265379649, "learning_rate": 1.97885320259225e-06, "loss": 0.1529, "step": 10128 }, { "epoch": 0.8024559318676966, "grad_norm": 1.7504912380558968, "learning_rate": 1.9773211094466404e-06, "loss": 0.1385, "step": 10129 }, { "epoch": 0.8025351554763319, "grad_norm": 1.383627748702993, "learning_rate": 1.975789544555834e-06, "loss": 0.1581, "step": 10130 }, { "epoch": 0.8026143790849674, "grad_norm": 0.9140639757121933, "learning_rate": 1.9742585080206754e-06, "loss": 0.0771, "step": 10131 }, { "epoch": 0.8026936026936027, "grad_norm": 1.2681141780402247, "learning_rate": 1.9727279999419745e-06, "loss": 0.0937, "step": 10132 }, { "epoch": 0.8027728263022381, "grad_norm": 1.4437582560043931, "learning_rate": 1.9711980204205115e-06, "loss": 0.1411, "step": 10133 }, { "epoch": 0.8028520499108734, "grad_norm": 2.0257689897551496, "learning_rate": 1.9696685695570285e-06, "loss": 0.1582, "step": 10134 }, { "epoch": 0.8029312735195088, "grad_norm": 1.380091833340432, "learning_rate": 1.9681396474522264e-06, "loss": 0.1294, "step": 10135 }, { "epoch": 0.8030104971281442, "grad_norm": 1.429248547889866, "learning_rate": 1.966611254206785e-06, "loss": 0.1574, "step": 10136 }, { "epoch": 0.8030897207367795, "grad_norm": 1.7395309804758408, "learning_rate": 1.9650833899213383e-06, "loss": 0.2106, "step": 10137 }, { "epoch": 0.803168944345415, "grad_norm": 1.543049780955099, "learning_rate": 1.963556054696487e-06, "loss": 0.1215, "step": 10138 }, { "epoch": 0.8032481679540503, "grad_norm": 1.4159356945475823, "learning_rate": 1.962029248632802e-06, "loss": 0.1223, "step": 10139 }, { "epoch": 0.8033273915626857, "grad_norm": 1.697947199007826, "learning_rate": 1.9605029718308156e-06, "loss": 0.147, "step": 10140 }, { "epoch": 0.8034066151713211, "grad_norm": 1.501095133673133, "learning_rate": 1.958977224391021e-06, "loss": 0.1293, "step": 10141 }, { "epoch": 0.8034858387799564, "grad_norm": 1.4484840347357082, "learning_rate": 1.957452006413889e-06, "loss": 0.1859, "step": 10142 }, { "epoch": 0.8035650623885918, "grad_norm": 1.7972089667484514, "learning_rate": 1.955927317999844e-06, "loss": 0.1473, "step": 10143 }, { "epoch": 0.8036442859972271, "grad_norm": 1.5542272362875562, "learning_rate": 1.9544031592492763e-06, "loss": 0.2153, "step": 10144 }, { "epoch": 0.8037235096058626, "grad_norm": 1.5303449893007977, "learning_rate": 1.9528795302625515e-06, "loss": 0.155, "step": 10145 }, { "epoch": 0.8038027332144979, "grad_norm": 1.963975460964055, "learning_rate": 1.951356431139988e-06, "loss": 0.194, "step": 10146 }, { "epoch": 0.8038819568231333, "grad_norm": 1.6653093872282159, "learning_rate": 1.949833861981877e-06, "loss": 0.1638, "step": 10147 }, { "epoch": 0.8039611804317687, "grad_norm": 2.0885010103761137, "learning_rate": 1.948311822888468e-06, "loss": 0.283, "step": 10148 }, { "epoch": 0.804040404040404, "grad_norm": 1.2158365030210319, "learning_rate": 1.9467903139599853e-06, "loss": 0.1182, "step": 10149 }, { "epoch": 0.8041196276490394, "grad_norm": 1.29934060553683, "learning_rate": 1.945269335296611e-06, "loss": 0.1091, "step": 10150 }, { "epoch": 0.8041988512576748, "grad_norm": 1.422297877206242, "learning_rate": 1.943748886998492e-06, "loss": 0.1589, "step": 10151 }, { "epoch": 0.8042780748663102, "grad_norm": 1.423596780024782, "learning_rate": 1.942228969165748e-06, "loss": 0.1929, "step": 10152 }, { "epoch": 0.8043572984749455, "grad_norm": 1.515650404112296, "learning_rate": 1.940709581898453e-06, "loss": 0.1561, "step": 10153 }, { "epoch": 0.8044365220835809, "grad_norm": 1.4124082812732461, "learning_rate": 1.9391907252966522e-06, "loss": 0.1675, "step": 10154 }, { "epoch": 0.8045157456922163, "grad_norm": 1.8424677881916665, "learning_rate": 1.9376723994603574e-06, "loss": 0.2195, "step": 10155 }, { "epoch": 0.8045949693008516, "grad_norm": 1.4488302868411622, "learning_rate": 1.936154604489543e-06, "loss": 0.141, "step": 10156 }, { "epoch": 0.804674192909487, "grad_norm": 1.3601410458661092, "learning_rate": 1.9346373404841433e-06, "loss": 0.1442, "step": 10157 }, { "epoch": 0.8047534165181224, "grad_norm": 1.5949133377001212, "learning_rate": 1.93312060754407e-06, "loss": 0.1283, "step": 10158 }, { "epoch": 0.8048326401267578, "grad_norm": 1.5321869344100048, "learning_rate": 1.9316044057691886e-06, "loss": 0.1166, "step": 10159 }, { "epoch": 0.8049118637353931, "grad_norm": 1.4355298856536187, "learning_rate": 1.9300887352593355e-06, "loss": 0.1761, "step": 10160 }, { "epoch": 0.8049910873440285, "grad_norm": 1.581620378126073, "learning_rate": 1.928573596114306e-06, "loss": 0.1828, "step": 10161 }, { "epoch": 0.8050703109526639, "grad_norm": 1.1867670072055803, "learning_rate": 1.9270589884338706e-06, "loss": 0.1132, "step": 10162 }, { "epoch": 0.8051495345612992, "grad_norm": 1.483492389032306, "learning_rate": 1.9255449123177563e-06, "loss": 0.1239, "step": 10163 }, { "epoch": 0.8052287581699347, "grad_norm": 1.937197663060176, "learning_rate": 1.924031367865655e-06, "loss": 0.2173, "step": 10164 }, { "epoch": 0.80530798177857, "grad_norm": 1.3902151136750005, "learning_rate": 1.922518355177232e-06, "loss": 0.1544, "step": 10165 }, { "epoch": 0.8053872053872054, "grad_norm": 1.5249798481509889, "learning_rate": 1.921005874352109e-06, "loss": 0.1901, "step": 10166 }, { "epoch": 0.8054664289958408, "grad_norm": 1.5704410352847817, "learning_rate": 1.9194939254898746e-06, "loss": 0.2062, "step": 10167 }, { "epoch": 0.8055456526044761, "grad_norm": 1.6748794058153496, "learning_rate": 1.917982508690085e-06, "loss": 0.1676, "step": 10168 }, { "epoch": 0.8056248762131115, "grad_norm": 1.6403306464579648, "learning_rate": 1.916471624052256e-06, "loss": 0.1739, "step": 10169 }, { "epoch": 0.8057040998217468, "grad_norm": 1.9178260160582072, "learning_rate": 1.914961271675879e-06, "loss": 0.1947, "step": 10170 }, { "epoch": 0.8057833234303823, "grad_norm": 1.5429835706375996, "learning_rate": 1.9134514516603987e-06, "loss": 0.1252, "step": 10171 }, { "epoch": 0.8058625470390176, "grad_norm": 1.3060493762375314, "learning_rate": 1.9119421641052294e-06, "loss": 0.1245, "step": 10172 }, { "epoch": 0.805941770647653, "grad_norm": 1.3233080574823295, "learning_rate": 1.91043340910975e-06, "loss": 0.1055, "step": 10173 }, { "epoch": 0.8060209942562884, "grad_norm": 1.4000093105511395, "learning_rate": 1.908925186773308e-06, "loss": 0.0821, "step": 10174 }, { "epoch": 0.8061002178649237, "grad_norm": 1.339805727400341, "learning_rate": 1.907417497195211e-06, "loss": 0.0893, "step": 10175 }, { "epoch": 0.8061794414735591, "grad_norm": 1.3999300862862118, "learning_rate": 1.9059103404747303e-06, "loss": 0.2248, "step": 10176 }, { "epoch": 0.8062586650821945, "grad_norm": 1.3871411333071113, "learning_rate": 1.9044037167111096e-06, "loss": 0.1595, "step": 10177 }, { "epoch": 0.8063378886908299, "grad_norm": 1.3162439612056145, "learning_rate": 1.9028976260035515e-06, "loss": 0.1314, "step": 10178 }, { "epoch": 0.8064171122994652, "grad_norm": 1.3438839106795213, "learning_rate": 1.901392068451221e-06, "loss": 0.1342, "step": 10179 }, { "epoch": 0.8064963359081007, "grad_norm": 1.6214944652941157, "learning_rate": 1.8998870441532569e-06, "loss": 0.1763, "step": 10180 }, { "epoch": 0.806575559516736, "grad_norm": 1.4374563650281171, "learning_rate": 1.8983825532087551e-06, "loss": 0.1284, "step": 10181 }, { "epoch": 0.8066547831253713, "grad_norm": 1.3766630589202025, "learning_rate": 1.8968785957167779e-06, "loss": 0.1376, "step": 10182 }, { "epoch": 0.8067340067340067, "grad_norm": 1.4271095294128944, "learning_rate": 1.8953751717763592e-06, "loss": 0.1351, "step": 10183 }, { "epoch": 0.8068132303426421, "grad_norm": 1.6732051555301723, "learning_rate": 1.8938722814864863e-06, "loss": 0.142, "step": 10184 }, { "epoch": 0.8068924539512775, "grad_norm": 1.9145342038410325, "learning_rate": 1.8923699249461214e-06, "loss": 0.2429, "step": 10185 }, { "epoch": 0.8069716775599128, "grad_norm": 1.6207421022625255, "learning_rate": 1.890868102254182e-06, "loss": 0.1901, "step": 10186 }, { "epoch": 0.8070509011685483, "grad_norm": 3.2524947785552434, "learning_rate": 1.8893668135095611e-06, "loss": 0.1623, "step": 10187 }, { "epoch": 0.8071301247771836, "grad_norm": 1.9078402332663684, "learning_rate": 1.8878660588111108e-06, "loss": 0.2134, "step": 10188 }, { "epoch": 0.8072093483858189, "grad_norm": 1.689520950573459, "learning_rate": 1.8863658382576444e-06, "loss": 0.117, "step": 10189 }, { "epoch": 0.8072885719944544, "grad_norm": 1.3460928841631432, "learning_rate": 1.8848661519479504e-06, "loss": 0.1252, "step": 10190 }, { "epoch": 0.8073677956030897, "grad_norm": 1.2925654138633549, "learning_rate": 1.8833669999807723e-06, "loss": 0.1263, "step": 10191 }, { "epoch": 0.8074470192117251, "grad_norm": 1.547395445615637, "learning_rate": 1.88186838245482e-06, "loss": 0.1644, "step": 10192 }, { "epoch": 0.8075262428203605, "grad_norm": 1.4744145856896094, "learning_rate": 1.8803702994687755e-06, "loss": 0.1493, "step": 10193 }, { "epoch": 0.8076054664289959, "grad_norm": 2.0520884858072597, "learning_rate": 1.8788727511212768e-06, "loss": 0.2142, "step": 10194 }, { "epoch": 0.8076846900376312, "grad_norm": 1.4537298009652566, "learning_rate": 1.8773757375109292e-06, "loss": 0.1166, "step": 10195 }, { "epoch": 0.8077639136462665, "grad_norm": 1.7031439386205676, "learning_rate": 1.8758792587363084e-06, "loss": 0.1472, "step": 10196 }, { "epoch": 0.807843137254902, "grad_norm": 1.4041291261632518, "learning_rate": 1.8743833148959479e-06, "loss": 0.1381, "step": 10197 }, { "epoch": 0.8079223608635373, "grad_norm": 1.4175470188767687, "learning_rate": 1.8728879060883443e-06, "loss": 0.1386, "step": 10198 }, { "epoch": 0.8080015844721727, "grad_norm": 1.5324494717929245, "learning_rate": 1.8713930324119711e-06, "loss": 0.1304, "step": 10199 }, { "epoch": 0.8080808080808081, "grad_norm": 1.6193221912454494, "learning_rate": 1.869898693965253e-06, "loss": 0.1355, "step": 10200 }, { "epoch": 0.8081600316894435, "grad_norm": 2.1245017621506683, "learning_rate": 1.868404890846587e-06, "loss": 0.1498, "step": 10201 }, { "epoch": 0.8082392552980788, "grad_norm": 1.7341259367735058, "learning_rate": 1.8669116231543294e-06, "loss": 0.0997, "step": 10202 }, { "epoch": 0.8083184789067142, "grad_norm": 1.5921317302274582, "learning_rate": 1.865418890986811e-06, "loss": 0.1963, "step": 10203 }, { "epoch": 0.8083977025153496, "grad_norm": 1.5524424906902254, "learning_rate": 1.8639266944423163e-06, "loss": 0.0806, "step": 10204 }, { "epoch": 0.8084769261239849, "grad_norm": 1.503889931488173, "learning_rate": 1.8624350336190977e-06, "loss": 0.1831, "step": 10205 }, { "epoch": 0.8085561497326204, "grad_norm": 2.4269029017068333, "learning_rate": 1.8609439086153803e-06, "loss": 0.1867, "step": 10206 }, { "epoch": 0.8086353733412557, "grad_norm": 1.628669563698834, "learning_rate": 1.859453319529343e-06, "loss": 0.2021, "step": 10207 }, { "epoch": 0.8087145969498911, "grad_norm": 1.4857230049323982, "learning_rate": 1.857963266459133e-06, "loss": 0.1583, "step": 10208 }, { "epoch": 0.8087938205585264, "grad_norm": 1.489637359184151, "learning_rate": 1.8564737495028673e-06, "loss": 0.1191, "step": 10209 }, { "epoch": 0.8088730441671618, "grad_norm": 1.9414988822093187, "learning_rate": 1.854984768758621e-06, "loss": 0.2305, "step": 10210 }, { "epoch": 0.8089522677757972, "grad_norm": 1.5178442810974178, "learning_rate": 1.853496324324434e-06, "loss": 0.1273, "step": 10211 }, { "epoch": 0.8090314913844325, "grad_norm": 1.4632503103421797, "learning_rate": 1.8520084162983176e-06, "loss": 0.1632, "step": 10212 }, { "epoch": 0.809110714993068, "grad_norm": 1.361152823939929, "learning_rate": 1.8505210447782418e-06, "loss": 0.1173, "step": 10213 }, { "epoch": 0.8091899386017033, "grad_norm": 1.4494503293152026, "learning_rate": 1.8490342098621395e-06, "loss": 0.1612, "step": 10214 }, { "epoch": 0.8092691622103387, "grad_norm": 1.4320130612131827, "learning_rate": 1.8475479116479166e-06, "loss": 0.1323, "step": 10215 }, { "epoch": 0.8093483858189741, "grad_norm": 1.9164216849273914, "learning_rate": 1.8460621502334375e-06, "loss": 0.2167, "step": 10216 }, { "epoch": 0.8094276094276094, "grad_norm": 2.1218331232522107, "learning_rate": 1.8445769257165314e-06, "loss": 0.279, "step": 10217 }, { "epoch": 0.8095068330362448, "grad_norm": 1.3690367984547889, "learning_rate": 1.8430922381949912e-06, "loss": 0.1227, "step": 10218 }, { "epoch": 0.8095860566448801, "grad_norm": 1.2720416047960106, "learning_rate": 1.84160808776658e-06, "loss": 0.1168, "step": 10219 }, { "epoch": 0.8096652802535156, "grad_norm": 1.2754975272351605, "learning_rate": 1.8401244745290214e-06, "loss": 0.1489, "step": 10220 }, { "epoch": 0.8097445038621509, "grad_norm": 1.684514865833414, "learning_rate": 1.838641398580001e-06, "loss": 0.1547, "step": 10221 }, { "epoch": 0.8098237274707863, "grad_norm": 1.2835216458748944, "learning_rate": 1.8371588600171764e-06, "loss": 0.135, "step": 10222 }, { "epoch": 0.8099029510794217, "grad_norm": 1.1807354882395644, "learning_rate": 1.8356768589381646e-06, "loss": 0.1333, "step": 10223 }, { "epoch": 0.809982174688057, "grad_norm": 1.4277479278290404, "learning_rate": 1.8341953954405434e-06, "loss": 0.1034, "step": 10224 }, { "epoch": 0.8100613982966924, "grad_norm": 1.3890390353434057, "learning_rate": 1.832714469621868e-06, "loss": 0.1501, "step": 10225 }, { "epoch": 0.8101406219053278, "grad_norm": 1.32985190738275, "learning_rate": 1.8312340815796458e-06, "loss": 0.1423, "step": 10226 }, { "epoch": 0.8102198455139632, "grad_norm": 1.6146013798383707, "learning_rate": 1.8297542314113515e-06, "loss": 0.1788, "step": 10227 }, { "epoch": 0.8102990691225985, "grad_norm": 1.555922643750267, "learning_rate": 1.82827491921443e-06, "loss": 0.1985, "step": 10228 }, { "epoch": 0.810378292731234, "grad_norm": 1.4560983811400832, "learning_rate": 1.8267961450862859e-06, "loss": 0.1647, "step": 10229 }, { "epoch": 0.8104575163398693, "grad_norm": 1.3916763100502931, "learning_rate": 1.8253179091242868e-06, "loss": 0.1692, "step": 10230 }, { "epoch": 0.8105367399485046, "grad_norm": 1.3225720232642015, "learning_rate": 1.8238402114257714e-06, "loss": 0.1291, "step": 10231 }, { "epoch": 0.81061596355714, "grad_norm": 1.6022248695588641, "learning_rate": 1.8223630520880365e-06, "loss": 0.15, "step": 10232 }, { "epoch": 0.8106951871657754, "grad_norm": 1.499400266212322, "learning_rate": 1.8208864312083462e-06, "loss": 0.1798, "step": 10233 }, { "epoch": 0.8107744107744108, "grad_norm": 1.0841858671403477, "learning_rate": 1.8194103488839265e-06, "loss": 0.0903, "step": 10234 }, { "epoch": 0.8108536343830461, "grad_norm": 1.9480837370727113, "learning_rate": 1.817934805211976e-06, "loss": 0.2158, "step": 10235 }, { "epoch": 0.8109328579916815, "grad_norm": 1.0799161844457754, "learning_rate": 1.8164598002896484e-06, "loss": 0.0699, "step": 10236 }, { "epoch": 0.8110120816003169, "grad_norm": 1.650267265375426, "learning_rate": 1.8149853342140644e-06, "loss": 0.1791, "step": 10237 }, { "epoch": 0.8110913052089522, "grad_norm": 1.5440341658163712, "learning_rate": 1.8135114070823145e-06, "loss": 0.122, "step": 10238 }, { "epoch": 0.8111705288175877, "grad_norm": 1.3154770436476249, "learning_rate": 1.8120380189914476e-06, "loss": 0.1393, "step": 10239 }, { "epoch": 0.811249752426223, "grad_norm": 1.3547774727000639, "learning_rate": 1.8105651700384764e-06, "loss": 0.13, "step": 10240 }, { "epoch": 0.8113289760348584, "grad_norm": 1.5862613403045687, "learning_rate": 1.8090928603203871e-06, "loss": 0.1583, "step": 10241 }, { "epoch": 0.8114081996434938, "grad_norm": 1.0949917882524616, "learning_rate": 1.8076210899341196e-06, "loss": 0.0723, "step": 10242 }, { "epoch": 0.8114874232521291, "grad_norm": 1.3598693622546882, "learning_rate": 1.8061498589765824e-06, "loss": 0.1375, "step": 10243 }, { "epoch": 0.8115666468607645, "grad_norm": 1.3442668370230686, "learning_rate": 1.804679167544655e-06, "loss": 0.1103, "step": 10244 }, { "epoch": 0.8116458704693998, "grad_norm": 1.8530545587670106, "learning_rate": 1.8032090157351701e-06, "loss": 0.2337, "step": 10245 }, { "epoch": 0.8117250940780353, "grad_norm": 1.6407431942640982, "learning_rate": 1.8017394036449276e-06, "loss": 0.1247, "step": 10246 }, { "epoch": 0.8118043176866706, "grad_norm": 1.7582120303689612, "learning_rate": 1.8002703313706993e-06, "loss": 0.1684, "step": 10247 }, { "epoch": 0.811883541295306, "grad_norm": 1.4737780048789422, "learning_rate": 1.7988017990092167e-06, "loss": 0.1663, "step": 10248 }, { "epoch": 0.8119627649039414, "grad_norm": 1.4752616047110867, "learning_rate": 1.797333806657171e-06, "loss": 0.1853, "step": 10249 }, { "epoch": 0.8120419885125767, "grad_norm": 1.5995545597588632, "learning_rate": 1.7958663544112277e-06, "loss": 0.1781, "step": 10250 }, { "epoch": 0.8121212121212121, "grad_norm": 1.5764097328095472, "learning_rate": 1.794399442368009e-06, "loss": 0.2008, "step": 10251 }, { "epoch": 0.8122004357298475, "grad_norm": 1.7650027714958156, "learning_rate": 1.7929330706241023e-06, "loss": 0.1848, "step": 10252 }, { "epoch": 0.8122796593384829, "grad_norm": 1.640980019165187, "learning_rate": 1.7914672392760645e-06, "loss": 0.1987, "step": 10253 }, { "epoch": 0.8123588829471182, "grad_norm": 1.3983029711889268, "learning_rate": 1.7900019484204135e-06, "loss": 0.1617, "step": 10254 }, { "epoch": 0.8124381065557537, "grad_norm": 1.6621175032104232, "learning_rate": 1.788537198153627e-06, "loss": 0.2053, "step": 10255 }, { "epoch": 0.812517330164389, "grad_norm": 1.3284279640787715, "learning_rate": 1.787072988572157e-06, "loss": 0.1618, "step": 10256 }, { "epoch": 0.8125965537730243, "grad_norm": 1.319564027415114, "learning_rate": 1.7856093197724133e-06, "loss": 0.2032, "step": 10257 }, { "epoch": 0.8126757773816597, "grad_norm": 1.2499998373100147, "learning_rate": 1.7841461918507708e-06, "loss": 0.1222, "step": 10258 }, { "epoch": 0.8127550009902951, "grad_norm": 2.3355292251028925, "learning_rate": 1.7826836049035655e-06, "loss": 0.2188, "step": 10259 }, { "epoch": 0.8128342245989305, "grad_norm": 1.5548397470585844, "learning_rate": 1.7812215590271099e-06, "loss": 0.1365, "step": 10260 }, { "epoch": 0.8129134482075658, "grad_norm": 1.3376105877286772, "learning_rate": 1.7797600543176675e-06, "loss": 0.1327, "step": 10261 }, { "epoch": 0.8129926718162013, "grad_norm": 1.6440723227014375, "learning_rate": 1.7782990908714703e-06, "loss": 0.1846, "step": 10262 }, { "epoch": 0.8130718954248366, "grad_norm": 1.3063096188681953, "learning_rate": 1.7768386687847194e-06, "loss": 0.1859, "step": 10263 }, { "epoch": 0.8131511190334719, "grad_norm": 1.4561815971044818, "learning_rate": 1.7753787881535757e-06, "loss": 0.1237, "step": 10264 }, { "epoch": 0.8132303426421074, "grad_norm": 1.487580613271827, "learning_rate": 1.7739194490741607e-06, "loss": 0.1847, "step": 10265 }, { "epoch": 0.8133095662507427, "grad_norm": 1.3401571802784014, "learning_rate": 1.7724606516425724e-06, "loss": 0.0887, "step": 10266 }, { "epoch": 0.8133887898593781, "grad_norm": 1.7160559866542362, "learning_rate": 1.7710023959548617e-06, "loss": 0.1992, "step": 10267 }, { "epoch": 0.8134680134680135, "grad_norm": 1.9485808893810683, "learning_rate": 1.7695446821070438e-06, "loss": 0.1365, "step": 10268 }, { "epoch": 0.8135472370766489, "grad_norm": 1.2232951640120895, "learning_rate": 1.76808751019511e-06, "loss": 0.1136, "step": 10269 }, { "epoch": 0.8136264606852842, "grad_norm": 1.6849270389519992, "learning_rate": 1.7666308803150045e-06, "loss": 0.164, "step": 10270 }, { "epoch": 0.8137056842939195, "grad_norm": 1.6571948338488582, "learning_rate": 1.7651747925626383e-06, "loss": 0.2078, "step": 10271 }, { "epoch": 0.813784907902555, "grad_norm": 1.2751020666041997, "learning_rate": 1.763719247033886e-06, "loss": 0.133, "step": 10272 }, { "epoch": 0.8138641315111903, "grad_norm": 1.7490706940946588, "learning_rate": 1.762264243824594e-06, "loss": 0.1533, "step": 10273 }, { "epoch": 0.8139433551198257, "grad_norm": 2.1033476044461703, "learning_rate": 1.7608097830305637e-06, "loss": 0.1296, "step": 10274 }, { "epoch": 0.8140225787284611, "grad_norm": 1.757098556024279, "learning_rate": 1.7593558647475627e-06, "loss": 0.2016, "step": 10275 }, { "epoch": 0.8141018023370965, "grad_norm": 1.4005303019412985, "learning_rate": 1.7579024890713282e-06, "loss": 0.1355, "step": 10276 }, { "epoch": 0.8141810259457318, "grad_norm": 1.2944314907712298, "learning_rate": 1.7564496560975574e-06, "loss": 0.1137, "step": 10277 }, { "epoch": 0.8142602495543672, "grad_norm": 1.2065437881129497, "learning_rate": 1.7549973659219077e-06, "loss": 0.1206, "step": 10278 }, { "epoch": 0.8143394731630026, "grad_norm": 1.563223827830552, "learning_rate": 1.7535456186400123e-06, "loss": 0.1349, "step": 10279 }, { "epoch": 0.8144186967716379, "grad_norm": 1.293468899058931, "learning_rate": 1.7520944143474584e-06, "loss": 0.1199, "step": 10280 }, { "epoch": 0.8144979203802734, "grad_norm": 1.9545872410534244, "learning_rate": 1.750643753139798e-06, "loss": 0.2305, "step": 10281 }, { "epoch": 0.8145771439889087, "grad_norm": 1.45167296938253, "learning_rate": 1.749193635112556e-06, "loss": 0.1846, "step": 10282 }, { "epoch": 0.8146563675975441, "grad_norm": 1.4773059217776972, "learning_rate": 1.7477440603612127e-06, "loss": 0.12, "step": 10283 }, { "epoch": 0.8147355912061794, "grad_norm": 1.6397864836033171, "learning_rate": 1.746295028981213e-06, "loss": 0.1707, "step": 10284 }, { "epoch": 0.8148148148148148, "grad_norm": 1.5442778327129143, "learning_rate": 1.7448465410679737e-06, "loss": 0.0818, "step": 10285 }, { "epoch": 0.8148940384234502, "grad_norm": 1.6107266736935286, "learning_rate": 1.7433985967168686e-06, "loss": 0.1548, "step": 10286 }, { "epoch": 0.8149732620320855, "grad_norm": 1.556060702484606, "learning_rate": 1.7419511960232384e-06, "loss": 0.143, "step": 10287 }, { "epoch": 0.815052485640721, "grad_norm": 1.3043812718686045, "learning_rate": 1.7405043390823827e-06, "loss": 0.1298, "step": 10288 }, { "epoch": 0.8151317092493563, "grad_norm": 1.5310681664110861, "learning_rate": 1.7390580259895783e-06, "loss": 0.1223, "step": 10289 }, { "epoch": 0.8152109328579917, "grad_norm": 1.813732762518309, "learning_rate": 1.7376122568400533e-06, "loss": 0.1887, "step": 10290 }, { "epoch": 0.8152901564666271, "grad_norm": 1.8248206105619527, "learning_rate": 1.7361670317290014e-06, "loss": 0.2044, "step": 10291 }, { "epoch": 0.8153693800752624, "grad_norm": 1.455164810741571, "learning_rate": 1.7347223507515908e-06, "loss": 0.1562, "step": 10292 }, { "epoch": 0.8154486036838978, "grad_norm": 1.2684545191068992, "learning_rate": 1.7332782140029436e-06, "loss": 0.1588, "step": 10293 }, { "epoch": 0.8155278272925331, "grad_norm": 1.3060185125176813, "learning_rate": 1.7318346215781468e-06, "loss": 0.1134, "step": 10294 }, { "epoch": 0.8156070509011686, "grad_norm": 1.2471042013693805, "learning_rate": 1.7303915735722586e-06, "loss": 0.1313, "step": 10295 }, { "epoch": 0.8156862745098039, "grad_norm": 1.4156723512949727, "learning_rate": 1.7289490700802947e-06, "loss": 0.1812, "step": 10296 }, { "epoch": 0.8157654981184393, "grad_norm": 1.4905878848810883, "learning_rate": 1.727507111197233e-06, "loss": 0.2018, "step": 10297 }, { "epoch": 0.8158447217270747, "grad_norm": 1.469917636532856, "learning_rate": 1.7260656970180268e-06, "loss": 0.1375, "step": 10298 }, { "epoch": 0.81592394533571, "grad_norm": 1.7008029424453908, "learning_rate": 1.7246248276375832e-06, "loss": 0.1848, "step": 10299 }, { "epoch": 0.8160031689443454, "grad_norm": 1.4404807252316005, "learning_rate": 1.7231845031507732e-06, "loss": 0.153, "step": 10300 }, { "epoch": 0.8160823925529808, "grad_norm": 1.5259418737319537, "learning_rate": 1.72174472365244e-06, "loss": 0.1729, "step": 10301 }, { "epoch": 0.8161616161616162, "grad_norm": 2.2294044179077632, "learning_rate": 1.720305489237385e-06, "loss": 0.1845, "step": 10302 }, { "epoch": 0.8162408397702515, "grad_norm": 1.51770991657262, "learning_rate": 1.718866800000375e-06, "loss": 0.1429, "step": 10303 }, { "epoch": 0.816320063378887, "grad_norm": 1.5252263534915778, "learning_rate": 1.7174286560361364e-06, "loss": 0.246, "step": 10304 }, { "epoch": 0.8163992869875223, "grad_norm": 1.2684446449199165, "learning_rate": 1.7159910574393702e-06, "loss": 0.1178, "step": 10305 }, { "epoch": 0.8164785105961576, "grad_norm": 1.4574327547842165, "learning_rate": 1.7145540043047327e-06, "loss": 0.1593, "step": 10306 }, { "epoch": 0.816557734204793, "grad_norm": 1.2894628056887252, "learning_rate": 1.713117496726845e-06, "loss": 0.1245, "step": 10307 }, { "epoch": 0.8166369578134284, "grad_norm": 1.9009598357048905, "learning_rate": 1.711681534800298e-06, "loss": 0.1963, "step": 10308 }, { "epoch": 0.8167161814220638, "grad_norm": 1.6017773650496285, "learning_rate": 1.7102461186196418e-06, "loss": 0.193, "step": 10309 }, { "epoch": 0.8167954050306991, "grad_norm": 1.7704882477449502, "learning_rate": 1.7088112482793872e-06, "loss": 0.1956, "step": 10310 }, { "epoch": 0.8168746286393346, "grad_norm": 1.827845277119032, "learning_rate": 1.7073769238740213e-06, "loss": 0.178, "step": 10311 }, { "epoch": 0.8169538522479699, "grad_norm": 1.338857371424728, "learning_rate": 1.7059431454979825e-06, "loss": 0.1071, "step": 10312 }, { "epoch": 0.8170330758566052, "grad_norm": 1.5104697165527032, "learning_rate": 1.7045099132456766e-06, "loss": 0.1268, "step": 10313 }, { "epoch": 0.8171122994652407, "grad_norm": 2.3705614942161084, "learning_rate": 1.7030772272114803e-06, "loss": 0.2403, "step": 10314 }, { "epoch": 0.817191523073876, "grad_norm": 1.551941842256551, "learning_rate": 1.7016450874897273e-06, "loss": 0.1075, "step": 10315 }, { "epoch": 0.8172707466825114, "grad_norm": 1.6601032928315478, "learning_rate": 1.7002134941747116e-06, "loss": 0.1404, "step": 10316 }, { "epoch": 0.8173499702911468, "grad_norm": 1.446943438006338, "learning_rate": 1.698782447360705e-06, "loss": 0.1211, "step": 10317 }, { "epoch": 0.8174291938997821, "grad_norm": 1.1791993478590417, "learning_rate": 1.697351947141932e-06, "loss": 0.077, "step": 10318 }, { "epoch": 0.8175084175084175, "grad_norm": 1.8786981615817122, "learning_rate": 1.6959219936125827e-06, "loss": 0.1996, "step": 10319 }, { "epoch": 0.8175876411170528, "grad_norm": 1.4084962785176613, "learning_rate": 1.6944925868668106e-06, "loss": 0.1438, "step": 10320 }, { "epoch": 0.8176668647256883, "grad_norm": 1.5637447247709173, "learning_rate": 1.6930637269987415e-06, "loss": 0.1424, "step": 10321 }, { "epoch": 0.8177460883343236, "grad_norm": 1.6968352641284936, "learning_rate": 1.691635414102455e-06, "loss": 0.1664, "step": 10322 }, { "epoch": 0.817825311942959, "grad_norm": 1.6194239769975236, "learning_rate": 1.6902076482719987e-06, "loss": 0.1361, "step": 10323 }, { "epoch": 0.8179045355515944, "grad_norm": 1.706610780483751, "learning_rate": 1.6887804296013854e-06, "loss": 0.2111, "step": 10324 }, { "epoch": 0.8179837591602297, "grad_norm": 1.589586373119487, "learning_rate": 1.6873537581845866e-06, "loss": 0.182, "step": 10325 }, { "epoch": 0.8180629827688651, "grad_norm": 2.3782431269676243, "learning_rate": 1.6859276341155483e-06, "loss": 0.2385, "step": 10326 }, { "epoch": 0.8181422063775005, "grad_norm": 1.6419110174945446, "learning_rate": 1.68450205748817e-06, "loss": 0.1624, "step": 10327 }, { "epoch": 0.8182214299861359, "grad_norm": 1.8000388462286745, "learning_rate": 1.6830770283963194e-06, "loss": 0.1585, "step": 10328 }, { "epoch": 0.8183006535947712, "grad_norm": 1.5882387791068995, "learning_rate": 1.6816525469338252e-06, "loss": 0.1745, "step": 10329 }, { "epoch": 0.8183798772034067, "grad_norm": 1.5503896355813822, "learning_rate": 1.6802286131944889e-06, "loss": 0.1778, "step": 10330 }, { "epoch": 0.818459100812042, "grad_norm": 1.8213112782804408, "learning_rate": 1.6788052272720656e-06, "loss": 0.1644, "step": 10331 }, { "epoch": 0.8185383244206773, "grad_norm": 1.4065908752001892, "learning_rate": 1.677382389260277e-06, "loss": 0.1372, "step": 10332 }, { "epoch": 0.8186175480293127, "grad_norm": 1.5053742806512196, "learning_rate": 1.6759600992528147e-06, "loss": 0.136, "step": 10333 }, { "epoch": 0.8186967716379481, "grad_norm": 1.5910039711765025, "learning_rate": 1.674538357343326e-06, "loss": 0.1711, "step": 10334 }, { "epoch": 0.8187759952465835, "grad_norm": 1.6852041934350679, "learning_rate": 1.6731171636254263e-06, "loss": 0.1427, "step": 10335 }, { "epoch": 0.8188552188552188, "grad_norm": 1.2593735125698782, "learning_rate": 1.6716965181926959e-06, "loss": 0.1498, "step": 10336 }, { "epoch": 0.8189344424638543, "grad_norm": 1.6431241496499942, "learning_rate": 1.670276421138677e-06, "loss": 0.1702, "step": 10337 }, { "epoch": 0.8190136660724896, "grad_norm": 1.59497049054573, "learning_rate": 1.6688568725568732e-06, "loss": 0.1609, "step": 10338 }, { "epoch": 0.8190928896811249, "grad_norm": 1.2004729142077848, "learning_rate": 1.6674378725407603e-06, "loss": 0.1301, "step": 10339 }, { "epoch": 0.8191721132897604, "grad_norm": 1.4718200700932034, "learning_rate": 1.6660194211837687e-06, "loss": 0.1326, "step": 10340 }, { "epoch": 0.8192513368983957, "grad_norm": 1.5987359360750721, "learning_rate": 1.6646015185792963e-06, "loss": 0.1958, "step": 10341 }, { "epoch": 0.8193305605070311, "grad_norm": 1.6960538878199312, "learning_rate": 1.6631841648207092e-06, "loss": 0.1693, "step": 10342 }, { "epoch": 0.8194097841156665, "grad_norm": 1.7701877100046661, "learning_rate": 1.6617673600013295e-06, "loss": 0.1686, "step": 10343 }, { "epoch": 0.8194890077243019, "grad_norm": 1.740906619506329, "learning_rate": 1.6603511042144494e-06, "loss": 0.1485, "step": 10344 }, { "epoch": 0.8195682313329372, "grad_norm": 1.5507463508608135, "learning_rate": 1.6589353975533174e-06, "loss": 0.1508, "step": 10345 }, { "epoch": 0.8196474549415725, "grad_norm": 1.204686433958875, "learning_rate": 1.6575202401111578e-06, "loss": 0.1497, "step": 10346 }, { "epoch": 0.819726678550208, "grad_norm": 1.4986351884970766, "learning_rate": 1.6561056319811497e-06, "loss": 0.1733, "step": 10347 }, { "epoch": 0.8198059021588433, "grad_norm": 1.5125905018831163, "learning_rate": 1.654691573256434e-06, "loss": 0.1467, "step": 10348 }, { "epoch": 0.8198851257674787, "grad_norm": 1.7310964853386275, "learning_rate": 1.653278064030126e-06, "loss": 0.2099, "step": 10349 }, { "epoch": 0.8199643493761141, "grad_norm": 2.1121796978152174, "learning_rate": 1.651865104395296e-06, "loss": 0.2821, "step": 10350 }, { "epoch": 0.8200435729847495, "grad_norm": 1.78586811416678, "learning_rate": 1.6504526944449772e-06, "loss": 0.1883, "step": 10351 }, { "epoch": 0.8201227965933848, "grad_norm": 1.0888208275566291, "learning_rate": 1.6490408342721764e-06, "loss": 0.1118, "step": 10352 }, { "epoch": 0.8202020202020202, "grad_norm": 1.5086020860669018, "learning_rate": 1.6476295239698537e-06, "loss": 0.1468, "step": 10353 }, { "epoch": 0.8202812438106556, "grad_norm": 1.7931152307010985, "learning_rate": 1.6462187636309345e-06, "loss": 0.2269, "step": 10354 }, { "epoch": 0.8203604674192909, "grad_norm": 1.7335744727508822, "learning_rate": 1.6448085533483172e-06, "loss": 0.2141, "step": 10355 }, { "epoch": 0.8204396910279264, "grad_norm": 1.2915757357350675, "learning_rate": 1.6433988932148547e-06, "loss": 0.1282, "step": 10356 }, { "epoch": 0.8205189146365617, "grad_norm": 1.5612157986408384, "learning_rate": 1.6419897833233644e-06, "loss": 0.2239, "step": 10357 }, { "epoch": 0.8205981382451971, "grad_norm": 1.6084479981473068, "learning_rate": 1.6405812237666296e-06, "loss": 0.1541, "step": 10358 }, { "epoch": 0.8206773618538324, "grad_norm": 1.359443615910525, "learning_rate": 1.6391732146373994e-06, "loss": 0.1475, "step": 10359 }, { "epoch": 0.8207565854624678, "grad_norm": 1.2763808489309336, "learning_rate": 1.6377657560283844e-06, "loss": 0.0933, "step": 10360 }, { "epoch": 0.8208358090711032, "grad_norm": 1.414698658074898, "learning_rate": 1.6363588480322545e-06, "loss": 0.1383, "step": 10361 }, { "epoch": 0.8209150326797385, "grad_norm": 1.4651512981191723, "learning_rate": 1.6349524907416536e-06, "loss": 0.1686, "step": 10362 }, { "epoch": 0.820994256288374, "grad_norm": 1.4025084777632981, "learning_rate": 1.6335466842491821e-06, "loss": 0.1921, "step": 10363 }, { "epoch": 0.8210734798970093, "grad_norm": 1.4501960291485576, "learning_rate": 1.6321414286474014e-06, "loss": 0.1474, "step": 10364 }, { "epoch": 0.8211527035056447, "grad_norm": 1.4328133623683392, "learning_rate": 1.6307367240288463e-06, "loss": 0.1489, "step": 10365 }, { "epoch": 0.8212319271142801, "grad_norm": 1.632903840416775, "learning_rate": 1.6293325704860087e-06, "loss": 0.2278, "step": 10366 }, { "epoch": 0.8213111507229154, "grad_norm": 1.7143170271539867, "learning_rate": 1.6279289681113407e-06, "loss": 0.1557, "step": 10367 }, { "epoch": 0.8213903743315508, "grad_norm": 1.3358997278288598, "learning_rate": 1.626525916997269e-06, "loss": 0.1158, "step": 10368 }, { "epoch": 0.8214695979401861, "grad_norm": 1.242847283899903, "learning_rate": 1.6251234172361763e-06, "loss": 0.1139, "step": 10369 }, { "epoch": 0.8215488215488216, "grad_norm": 1.4258896855857521, "learning_rate": 1.623721468920405e-06, "loss": 0.1187, "step": 10370 }, { "epoch": 0.8216280451574569, "grad_norm": 1.5389124303075783, "learning_rate": 1.6223200721422739e-06, "loss": 0.1477, "step": 10371 }, { "epoch": 0.8217072687660923, "grad_norm": 2.3975215197648096, "learning_rate": 1.6209192269940555e-06, "loss": 0.1429, "step": 10372 }, { "epoch": 0.8217864923747277, "grad_norm": 1.2650057683133027, "learning_rate": 1.6195189335679884e-06, "loss": 0.1505, "step": 10373 }, { "epoch": 0.821865715983363, "grad_norm": 1.8335978879588404, "learning_rate": 1.6181191919562734e-06, "loss": 0.2252, "step": 10374 }, { "epoch": 0.8219449395919984, "grad_norm": 1.4650376424076854, "learning_rate": 1.6167200022510799e-06, "loss": 0.1748, "step": 10375 }, { "epoch": 0.8220241632006338, "grad_norm": 1.955988790984777, "learning_rate": 1.6153213645445376e-06, "loss": 0.2561, "step": 10376 }, { "epoch": 0.8221033868092692, "grad_norm": 1.5508357430436572, "learning_rate": 1.613923278928735e-06, "loss": 0.1829, "step": 10377 }, { "epoch": 0.8221826104179045, "grad_norm": 1.8059499301520012, "learning_rate": 1.6125257454957365e-06, "loss": 0.2098, "step": 10378 }, { "epoch": 0.82226183402654, "grad_norm": 2.1598524364571494, "learning_rate": 1.6111287643375607e-06, "loss": 0.167, "step": 10379 }, { "epoch": 0.8223410576351753, "grad_norm": 2.035988592801404, "learning_rate": 1.6097323355461869e-06, "loss": 0.1384, "step": 10380 }, { "epoch": 0.8224202812438106, "grad_norm": 1.6738637565697734, "learning_rate": 1.6083364592135708e-06, "loss": 0.1385, "step": 10381 }, { "epoch": 0.822499504852446, "grad_norm": 1.244503578165528, "learning_rate": 1.6069411354316212e-06, "loss": 0.1112, "step": 10382 }, { "epoch": 0.8225787284610814, "grad_norm": 1.656622176169255, "learning_rate": 1.6055463642922098e-06, "loss": 0.1239, "step": 10383 }, { "epoch": 0.8226579520697168, "grad_norm": 1.3428949977064997, "learning_rate": 1.6041521458871812e-06, "loss": 0.1622, "step": 10384 }, { "epoch": 0.8227371756783521, "grad_norm": 1.5641554576348324, "learning_rate": 1.6027584803083351e-06, "loss": 0.1575, "step": 10385 }, { "epoch": 0.8228163992869876, "grad_norm": 1.411993080404408, "learning_rate": 1.6013653676474371e-06, "loss": 0.1806, "step": 10386 }, { "epoch": 0.8228956228956229, "grad_norm": 1.5339240738577442, "learning_rate": 1.5999728079962197e-06, "loss": 0.1315, "step": 10387 }, { "epoch": 0.8229748465042582, "grad_norm": 1.4727229216951876, "learning_rate": 1.5985808014463745e-06, "loss": 0.1464, "step": 10388 }, { "epoch": 0.8230540701128937, "grad_norm": 2.2163658001990325, "learning_rate": 1.5971893480895583e-06, "loss": 0.1399, "step": 10389 }, { "epoch": 0.823133293721529, "grad_norm": 1.4393577776760622, "learning_rate": 1.5957984480173893e-06, "loss": 0.15, "step": 10390 }, { "epoch": 0.8232125173301644, "grad_norm": 1.5996092927925485, "learning_rate": 1.5944081013214575e-06, "loss": 0.1286, "step": 10391 }, { "epoch": 0.8232917409387998, "grad_norm": 1.8738218925004884, "learning_rate": 1.593018308093306e-06, "loss": 0.2643, "step": 10392 }, { "epoch": 0.8233709645474351, "grad_norm": 1.950226506939809, "learning_rate": 1.5916290684244452e-06, "loss": 0.1764, "step": 10393 }, { "epoch": 0.8234501881560705, "grad_norm": 1.9310168181998901, "learning_rate": 1.5902403824063539e-06, "loss": 0.1719, "step": 10394 }, { "epoch": 0.8235294117647058, "grad_norm": 1.842916780135873, "learning_rate": 1.5888522501304682e-06, "loss": 0.1874, "step": 10395 }, { "epoch": 0.8236086353733413, "grad_norm": 1.5154251082834898, "learning_rate": 1.587464671688187e-06, "loss": 0.1503, "step": 10396 }, { "epoch": 0.8236878589819766, "grad_norm": 1.58940819301186, "learning_rate": 1.5860776471708816e-06, "loss": 0.1947, "step": 10397 }, { "epoch": 0.823767082590612, "grad_norm": 1.4734192225503464, "learning_rate": 1.5846911766698781e-06, "loss": 0.1702, "step": 10398 }, { "epoch": 0.8238463061992474, "grad_norm": 1.8562995864213159, "learning_rate": 1.5833052602764664e-06, "loss": 0.2415, "step": 10399 }, { "epoch": 0.8239255298078827, "grad_norm": 1.3954524152324503, "learning_rate": 1.5819198980819096e-06, "loss": 0.1429, "step": 10400 }, { "epoch": 0.8240047534165181, "grad_norm": 1.4347241991578803, "learning_rate": 1.5805350901774197e-06, "loss": 0.1455, "step": 10401 }, { "epoch": 0.8240839770251535, "grad_norm": 1.210332876336636, "learning_rate": 1.5791508366541797e-06, "loss": 0.109, "step": 10402 }, { "epoch": 0.8241632006337889, "grad_norm": 1.7049231532527356, "learning_rate": 1.577767137603341e-06, "loss": 0.1265, "step": 10403 }, { "epoch": 0.8242424242424242, "grad_norm": 1.0506811936293623, "learning_rate": 1.5763839931160108e-06, "loss": 0.0913, "step": 10404 }, { "epoch": 0.8243216478510597, "grad_norm": 1.5996917032968943, "learning_rate": 1.5750014032832617e-06, "loss": 0.18, "step": 10405 }, { "epoch": 0.824400871459695, "grad_norm": 1.4212408655501363, "learning_rate": 1.5736193681961332e-06, "loss": 0.1006, "step": 10406 }, { "epoch": 0.8244800950683303, "grad_norm": 1.2106947710392142, "learning_rate": 1.5722378879456234e-06, "loss": 0.1311, "step": 10407 }, { "epoch": 0.8245593186769657, "grad_norm": 1.2166450878347332, "learning_rate": 1.5708569626226954e-06, "loss": 0.1128, "step": 10408 }, { "epoch": 0.8246385422856011, "grad_norm": 1.219630824386909, "learning_rate": 1.5694765923182798e-06, "loss": 0.1022, "step": 10409 }, { "epoch": 0.8247177658942365, "grad_norm": 1.3392468145625938, "learning_rate": 1.5680967771232659e-06, "loss": 0.1456, "step": 10410 }, { "epoch": 0.8247969895028718, "grad_norm": 1.5876683201497666, "learning_rate": 1.5667175171285054e-06, "loss": 0.1384, "step": 10411 }, { "epoch": 0.8248762131115073, "grad_norm": 1.1821167185607704, "learning_rate": 1.5653388124248203e-06, "loss": 0.146, "step": 10412 }, { "epoch": 0.8249554367201426, "grad_norm": 1.4446984510941085, "learning_rate": 1.5639606631029892e-06, "loss": 0.1493, "step": 10413 }, { "epoch": 0.8250346603287779, "grad_norm": 1.8737353338642322, "learning_rate": 1.5625830692537569e-06, "loss": 0.249, "step": 10414 }, { "epoch": 0.8251138839374134, "grad_norm": 1.45948961535352, "learning_rate": 1.561206030967828e-06, "loss": 0.1351, "step": 10415 }, { "epoch": 0.8251931075460487, "grad_norm": 1.290693019786443, "learning_rate": 1.5598295483358804e-06, "loss": 0.1198, "step": 10416 }, { "epoch": 0.8252723311546841, "grad_norm": 1.3390180259225275, "learning_rate": 1.5584536214485457e-06, "loss": 0.1261, "step": 10417 }, { "epoch": 0.8253515547633195, "grad_norm": 2.063602378896092, "learning_rate": 1.5570782503964188e-06, "loss": 0.1914, "step": 10418 }, { "epoch": 0.8254307783719549, "grad_norm": 1.4035741308956817, "learning_rate": 1.5557034352700672e-06, "loss": 0.12, "step": 10419 }, { "epoch": 0.8255100019805902, "grad_norm": 1.5891147409405608, "learning_rate": 1.5543291761600133e-06, "loss": 0.1979, "step": 10420 }, { "epoch": 0.8255892255892255, "grad_norm": 1.3329053501075112, "learning_rate": 1.552955473156742e-06, "loss": 0.1633, "step": 10421 }, { "epoch": 0.825668449197861, "grad_norm": 1.5358401531839272, "learning_rate": 1.5515823263507112e-06, "loss": 0.1576, "step": 10422 }, { "epoch": 0.8257476728064963, "grad_norm": 1.0787482908661634, "learning_rate": 1.5502097358323321e-06, "loss": 0.09, "step": 10423 }, { "epoch": 0.8258268964151317, "grad_norm": 1.2604716499021176, "learning_rate": 1.548837701691983e-06, "loss": 0.1116, "step": 10424 }, { "epoch": 0.8259061200237671, "grad_norm": 1.4572633089092795, "learning_rate": 1.547466224020009e-06, "loss": 0.1247, "step": 10425 }, { "epoch": 0.8259853436324025, "grad_norm": 1.9520664048337526, "learning_rate": 1.5460953029067128e-06, "loss": 0.1351, "step": 10426 }, { "epoch": 0.8260645672410378, "grad_norm": 1.4268932591572854, "learning_rate": 1.5447249384423624e-06, "loss": 0.123, "step": 10427 }, { "epoch": 0.8261437908496732, "grad_norm": 1.6448199221259747, "learning_rate": 1.543355130717189e-06, "loss": 0.1561, "step": 10428 }, { "epoch": 0.8262230144583086, "grad_norm": 1.3967811918732347, "learning_rate": 1.5419858798213928e-06, "loss": 0.1254, "step": 10429 }, { "epoch": 0.8263022380669439, "grad_norm": 1.2648540259126664, "learning_rate": 1.540617185845128e-06, "loss": 0.1489, "step": 10430 }, { "epoch": 0.8263814616755794, "grad_norm": 1.4296467965865831, "learning_rate": 1.5392490488785151e-06, "loss": 0.1243, "step": 10431 }, { "epoch": 0.8264606852842147, "grad_norm": 1.8390185485198758, "learning_rate": 1.537881469011645e-06, "loss": 0.2429, "step": 10432 }, { "epoch": 0.8265399088928501, "grad_norm": 1.8535120444680417, "learning_rate": 1.5365144463345627e-06, "loss": 0.1497, "step": 10433 }, { "epoch": 0.8266191325014854, "grad_norm": 1.728725861797571, "learning_rate": 1.5351479809372772e-06, "loss": 0.2105, "step": 10434 }, { "epoch": 0.8266983561101208, "grad_norm": 1.3596185137767802, "learning_rate": 1.5337820729097697e-06, "loss": 0.1135, "step": 10435 }, { "epoch": 0.8267775797187562, "grad_norm": 1.6339166712688329, "learning_rate": 1.5324167223419762e-06, "loss": 0.1845, "step": 10436 }, { "epoch": 0.8268568033273915, "grad_norm": 1.7855396420387195, "learning_rate": 1.5310519293237958e-06, "loss": 0.2137, "step": 10437 }, { "epoch": 0.826936026936027, "grad_norm": 1.4004420618199764, "learning_rate": 1.5296876939450978e-06, "loss": 0.1064, "step": 10438 }, { "epoch": 0.8270152505446623, "grad_norm": 1.5502497092379819, "learning_rate": 1.528324016295709e-06, "loss": 0.1408, "step": 10439 }, { "epoch": 0.8270944741532977, "grad_norm": 1.7836419753485788, "learning_rate": 1.5269608964654181e-06, "loss": 0.1171, "step": 10440 }, { "epoch": 0.8271736977619331, "grad_norm": 1.6575619634308936, "learning_rate": 1.525598334543985e-06, "loss": 0.1442, "step": 10441 }, { "epoch": 0.8272529213705684, "grad_norm": 1.7407613428327606, "learning_rate": 1.524236330621125e-06, "loss": 0.1845, "step": 10442 }, { "epoch": 0.8273321449792038, "grad_norm": 1.4347332428198172, "learning_rate": 1.5228748847865205e-06, "loss": 0.1426, "step": 10443 }, { "epoch": 0.8274113685878391, "grad_norm": 1.3698020828193718, "learning_rate": 1.5215139971298131e-06, "loss": 0.1448, "step": 10444 }, { "epoch": 0.8274905921964746, "grad_norm": 1.6739333265735419, "learning_rate": 1.5201536677406147e-06, "loss": 0.1556, "step": 10445 }, { "epoch": 0.8275698158051099, "grad_norm": 1.4746853690416448, "learning_rate": 1.518793896708496e-06, "loss": 0.1185, "step": 10446 }, { "epoch": 0.8276490394137453, "grad_norm": 1.8048450054750969, "learning_rate": 1.517434684122987e-06, "loss": 0.1231, "step": 10447 }, { "epoch": 0.8277282630223807, "grad_norm": 1.8907254679962828, "learning_rate": 1.5160760300735911e-06, "loss": 0.2279, "step": 10448 }, { "epoch": 0.827807486631016, "grad_norm": 1.8222072880649218, "learning_rate": 1.5147179346497665e-06, "loss": 0.2103, "step": 10449 }, { "epoch": 0.8278867102396514, "grad_norm": 2.122667636450811, "learning_rate": 1.513360397940935e-06, "loss": 0.295, "step": 10450 }, { "epoch": 0.8279659338482868, "grad_norm": 1.7200946348052384, "learning_rate": 1.5120034200364885e-06, "loss": 0.2088, "step": 10451 }, { "epoch": 0.8280451574569222, "grad_norm": 1.3738326824158502, "learning_rate": 1.5106470010257758e-06, "loss": 0.1341, "step": 10452 }, { "epoch": 0.8281243810655575, "grad_norm": 1.5027063601514015, "learning_rate": 1.509291140998107e-06, "loss": 0.1461, "step": 10453 }, { "epoch": 0.828203604674193, "grad_norm": 1.6721475262122434, "learning_rate": 1.5079358400427635e-06, "loss": 0.1538, "step": 10454 }, { "epoch": 0.8282828282828283, "grad_norm": 1.6774558342530719, "learning_rate": 1.5065810982489849e-06, "loss": 0.1601, "step": 10455 }, { "epoch": 0.8283620518914636, "grad_norm": 1.6818453145166503, "learning_rate": 1.5052269157059707e-06, "loss": 0.1544, "step": 10456 }, { "epoch": 0.828441275500099, "grad_norm": 1.3627276408551925, "learning_rate": 1.503873292502892e-06, "loss": 0.1322, "step": 10457 }, { "epoch": 0.8285204991087344, "grad_norm": 1.411075376802412, "learning_rate": 1.5025202287288764e-06, "loss": 0.114, "step": 10458 }, { "epoch": 0.8285997227173698, "grad_norm": 1.458350494482667, "learning_rate": 1.501167724473016e-06, "loss": 0.1927, "step": 10459 }, { "epoch": 0.8286789463260051, "grad_norm": 1.4190803687970546, "learning_rate": 1.499815779824365e-06, "loss": 0.1194, "step": 10460 }, { "epoch": 0.8287581699346406, "grad_norm": 1.325797862383344, "learning_rate": 1.4984643948719469e-06, "loss": 0.1331, "step": 10461 }, { "epoch": 0.8288373935432759, "grad_norm": 1.38424853695347, "learning_rate": 1.4971135697047422e-06, "loss": 0.1583, "step": 10462 }, { "epoch": 0.8289166171519112, "grad_norm": 1.4697523749720716, "learning_rate": 1.4957633044116925e-06, "loss": 0.1271, "step": 10463 }, { "epoch": 0.8289958407605467, "grad_norm": 1.3988389706591748, "learning_rate": 1.4944135990817121e-06, "loss": 0.1512, "step": 10464 }, { "epoch": 0.829075064369182, "grad_norm": 1.111534357178801, "learning_rate": 1.4930644538036709e-06, "loss": 0.0868, "step": 10465 }, { "epoch": 0.8291542879778174, "grad_norm": 1.6657537168219245, "learning_rate": 1.4917158686663992e-06, "loss": 0.1753, "step": 10466 }, { "epoch": 0.8292335115864528, "grad_norm": 1.2148211390739012, "learning_rate": 1.490367843758701e-06, "loss": 0.133, "step": 10467 }, { "epoch": 0.8293127351950882, "grad_norm": 1.5329173406558545, "learning_rate": 1.4890203791693337e-06, "loss": 0.1753, "step": 10468 }, { "epoch": 0.8293919588037235, "grad_norm": 1.382283485781237, "learning_rate": 1.4876734749870213e-06, "loss": 0.1399, "step": 10469 }, { "epoch": 0.8294711824123588, "grad_norm": 1.4432399496278205, "learning_rate": 1.4863271313004535e-06, "loss": 0.1854, "step": 10470 }, { "epoch": 0.8295504060209943, "grad_norm": 1.434362292061377, "learning_rate": 1.4849813481982788e-06, "loss": 0.1524, "step": 10471 }, { "epoch": 0.8296296296296296, "grad_norm": 1.293517868604829, "learning_rate": 1.483636125769108e-06, "loss": 0.1226, "step": 10472 }, { "epoch": 0.829708853238265, "grad_norm": 1.645473271617257, "learning_rate": 1.482291464101523e-06, "loss": 0.1745, "step": 10473 }, { "epoch": 0.8297880768469004, "grad_norm": 1.6640206062544738, "learning_rate": 1.480947363284061e-06, "loss": 0.1559, "step": 10474 }, { "epoch": 0.8298673004555357, "grad_norm": 1.827833822706193, "learning_rate": 1.4796038234052235e-06, "loss": 0.152, "step": 10475 }, { "epoch": 0.8299465240641711, "grad_norm": 1.5847326784336713, "learning_rate": 1.4782608445534741e-06, "loss": 0.1833, "step": 10476 }, { "epoch": 0.8300257476728065, "grad_norm": 1.3526824269563864, "learning_rate": 1.4769184268172465e-06, "loss": 0.1343, "step": 10477 }, { "epoch": 0.8301049712814419, "grad_norm": 1.6452907709358355, "learning_rate": 1.4755765702849311e-06, "loss": 0.1767, "step": 10478 }, { "epoch": 0.8301841948900772, "grad_norm": 1.7567117556498941, "learning_rate": 1.4742352750448806e-06, "loss": 0.1671, "step": 10479 }, { "epoch": 0.8302634184987127, "grad_norm": 1.7550545175522747, "learning_rate": 1.4728945411854135e-06, "loss": 0.1986, "step": 10480 }, { "epoch": 0.830342642107348, "grad_norm": 1.6306571338844298, "learning_rate": 1.4715543687948096e-06, "loss": 0.168, "step": 10481 }, { "epoch": 0.8304218657159833, "grad_norm": 1.255040224432049, "learning_rate": 1.470214757961317e-06, "loss": 0.1334, "step": 10482 }, { "epoch": 0.8305010893246187, "grad_norm": 1.2978999779415066, "learning_rate": 1.4688757087731386e-06, "loss": 0.1459, "step": 10483 }, { "epoch": 0.8305803129332541, "grad_norm": 1.7039019014916967, "learning_rate": 1.4675372213184458e-06, "loss": 0.166, "step": 10484 }, { "epoch": 0.8306595365418895, "grad_norm": 2.6034323665882386, "learning_rate": 1.4661992956853699e-06, "loss": 0.1951, "step": 10485 }, { "epoch": 0.8307387601505248, "grad_norm": 1.847362845942156, "learning_rate": 1.4648619319620105e-06, "loss": 0.2026, "step": 10486 }, { "epoch": 0.8308179837591603, "grad_norm": 1.264175584467807, "learning_rate": 1.463525130236424e-06, "loss": 0.1149, "step": 10487 }, { "epoch": 0.8308972073677956, "grad_norm": 1.3686448353848946, "learning_rate": 1.4621888905966308e-06, "loss": 0.1109, "step": 10488 }, { "epoch": 0.8309764309764309, "grad_norm": 1.6418515933543478, "learning_rate": 1.4608532131306198e-06, "loss": 0.2549, "step": 10489 }, { "epoch": 0.8310556545850664, "grad_norm": 2.0124970527044983, "learning_rate": 1.459518097926337e-06, "loss": 0.1939, "step": 10490 }, { "epoch": 0.8311348781937017, "grad_norm": 1.0405927005080426, "learning_rate": 1.4581835450716907e-06, "loss": 0.0819, "step": 10491 }, { "epoch": 0.8312141018023371, "grad_norm": 1.3312072031435174, "learning_rate": 1.4568495546545603e-06, "loss": 0.1374, "step": 10492 }, { "epoch": 0.8312933254109725, "grad_norm": 1.9728663054895477, "learning_rate": 1.4555161267627793e-06, "loss": 0.2894, "step": 10493 }, { "epoch": 0.8313725490196079, "grad_norm": 1.4866272666577227, "learning_rate": 1.4541832614841455e-06, "loss": 0.1363, "step": 10494 }, { "epoch": 0.8314517726282432, "grad_norm": 1.529754605298281, "learning_rate": 1.4528509589064276e-06, "loss": 0.0868, "step": 10495 }, { "epoch": 0.8315309962368785, "grad_norm": 1.650501268851585, "learning_rate": 1.4515192191173466e-06, "loss": 0.1642, "step": 10496 }, { "epoch": 0.831610219845514, "grad_norm": 1.3909628219421502, "learning_rate": 1.45018804220459e-06, "loss": 0.1372, "step": 10497 }, { "epoch": 0.8316894434541493, "grad_norm": 1.2581369409862537, "learning_rate": 1.4488574282558143e-06, "loss": 0.1133, "step": 10498 }, { "epoch": 0.8317686670627847, "grad_norm": 1.433149621609104, "learning_rate": 1.4475273773586319e-06, "loss": 0.1487, "step": 10499 }, { "epoch": 0.8318478906714201, "grad_norm": 1.30609432038533, "learning_rate": 1.446197889600619e-06, "loss": 0.1665, "step": 10500 }, { "epoch": 0.8319271142800555, "grad_norm": 1.6653218909747391, "learning_rate": 1.444868965069315e-06, "loss": 0.1864, "step": 10501 }, { "epoch": 0.8320063378886908, "grad_norm": 1.4163967956303973, "learning_rate": 1.443540603852227e-06, "loss": 0.1362, "step": 10502 }, { "epoch": 0.8320855614973262, "grad_norm": 1.2849366947885508, "learning_rate": 1.4422128060368201e-06, "loss": 0.1072, "step": 10503 }, { "epoch": 0.8321647851059616, "grad_norm": 1.9601634183640058, "learning_rate": 1.4408855717105197e-06, "loss": 0.216, "step": 10504 }, { "epoch": 0.8322440087145969, "grad_norm": 1.4598953868221773, "learning_rate": 1.4395589009607225e-06, "loss": 0.1575, "step": 10505 }, { "epoch": 0.8323232323232324, "grad_norm": 1.7371467309628827, "learning_rate": 1.4382327938747808e-06, "loss": 0.1609, "step": 10506 }, { "epoch": 0.8324024559318677, "grad_norm": 1.5589338859986424, "learning_rate": 1.4369072505400117e-06, "loss": 0.1262, "step": 10507 }, { "epoch": 0.8324816795405031, "grad_norm": 1.5804549002657287, "learning_rate": 1.4355822710436995e-06, "loss": 0.135, "step": 10508 }, { "epoch": 0.8325609031491384, "grad_norm": 1.3066817922881286, "learning_rate": 1.4342578554730858e-06, "loss": 0.1051, "step": 10509 }, { "epoch": 0.8326401267577738, "grad_norm": 1.2173387730932184, "learning_rate": 1.4329340039153738e-06, "loss": 0.124, "step": 10510 }, { "epoch": 0.8327193503664092, "grad_norm": 1.4560198374978264, "learning_rate": 1.4316107164577376e-06, "loss": 0.1505, "step": 10511 }, { "epoch": 0.8327985739750445, "grad_norm": 1.264701314761224, "learning_rate": 1.430287993187307e-06, "loss": 0.1358, "step": 10512 }, { "epoch": 0.83287779758368, "grad_norm": 1.5112765519019675, "learning_rate": 1.4289658341911782e-06, "loss": 0.1336, "step": 10513 }, { "epoch": 0.8329570211923153, "grad_norm": 1.7954708873097363, "learning_rate": 1.4276442395564049e-06, "loss": 0.1939, "step": 10514 }, { "epoch": 0.8330362448009507, "grad_norm": 1.3780983896068477, "learning_rate": 1.426323209370014e-06, "loss": 0.1216, "step": 10515 }, { "epoch": 0.8331154684095861, "grad_norm": 2.0236118774975687, "learning_rate": 1.425002743718985e-06, "loss": 0.1644, "step": 10516 }, { "epoch": 0.8331946920182214, "grad_norm": 1.7012607371020356, "learning_rate": 1.4236828426902626e-06, "loss": 0.1808, "step": 10517 }, { "epoch": 0.8332739156268568, "grad_norm": 1.1425512918678684, "learning_rate": 1.4223635063707619e-06, "loss": 0.1277, "step": 10518 }, { "epoch": 0.8333531392354921, "grad_norm": 1.2659950838878717, "learning_rate": 1.421044734847351e-06, "loss": 0.1488, "step": 10519 }, { "epoch": 0.8334323628441276, "grad_norm": 1.8681973614222098, "learning_rate": 1.4197265282068618e-06, "loss": 0.2096, "step": 10520 }, { "epoch": 0.8335115864527629, "grad_norm": 2.0668473181305025, "learning_rate": 1.4184088865360978e-06, "loss": 0.2198, "step": 10521 }, { "epoch": 0.8335908100613983, "grad_norm": 1.8467580530748484, "learning_rate": 1.4170918099218166e-06, "loss": 0.1625, "step": 10522 }, { "epoch": 0.8336700336700337, "grad_norm": 1.7755054552795317, "learning_rate": 1.41577529845074e-06, "loss": 0.2042, "step": 10523 }, { "epoch": 0.833749257278669, "grad_norm": 1.5061588117386606, "learning_rate": 1.4144593522095563e-06, "loss": 0.1464, "step": 10524 }, { "epoch": 0.8338284808873044, "grad_norm": 1.590326443681092, "learning_rate": 1.4131439712849148e-06, "loss": 0.1475, "step": 10525 }, { "epoch": 0.8339077044959398, "grad_norm": 1.7514985934182123, "learning_rate": 1.4118291557634223e-06, "loss": 0.2652, "step": 10526 }, { "epoch": 0.8339869281045752, "grad_norm": 2.411398825069949, "learning_rate": 1.410514905731658e-06, "loss": 0.2085, "step": 10527 }, { "epoch": 0.8340661517132105, "grad_norm": 1.2223871485219073, "learning_rate": 1.4092012212761574e-06, "loss": 0.133, "step": 10528 }, { "epoch": 0.834145375321846, "grad_norm": 1.0762834006052167, "learning_rate": 1.4078881024834213e-06, "loss": 0.0802, "step": 10529 }, { "epoch": 0.8342245989304813, "grad_norm": 1.3533504551646354, "learning_rate": 1.406575549439907e-06, "loss": 0.1095, "step": 10530 }, { "epoch": 0.8343038225391166, "grad_norm": 1.5615696122225395, "learning_rate": 1.4052635622320477e-06, "loss": 0.1515, "step": 10531 }, { "epoch": 0.834383046147752, "grad_norm": 1.651797849652785, "learning_rate": 1.4039521409462265e-06, "loss": 0.2142, "step": 10532 }, { "epoch": 0.8344622697563874, "grad_norm": 1.5203158419970122, "learning_rate": 1.4026412856687931e-06, "loss": 0.1758, "step": 10533 }, { "epoch": 0.8345414933650228, "grad_norm": 1.4259578363282222, "learning_rate": 1.4013309964860667e-06, "loss": 0.1282, "step": 10534 }, { "epoch": 0.8346207169736581, "grad_norm": 1.3212456431686352, "learning_rate": 1.4000212734843187e-06, "loss": 0.119, "step": 10535 }, { "epoch": 0.8346999405822936, "grad_norm": 1.9600433079024036, "learning_rate": 1.3987121167497874e-06, "loss": 0.1707, "step": 10536 }, { "epoch": 0.8347791641909289, "grad_norm": 1.2621085532116656, "learning_rate": 1.3974035263686792e-06, "loss": 0.1421, "step": 10537 }, { "epoch": 0.8348583877995642, "grad_norm": 1.3129518464181507, "learning_rate": 1.396095502427155e-06, "loss": 0.0768, "step": 10538 }, { "epoch": 0.8349376114081997, "grad_norm": 1.558708814936815, "learning_rate": 1.3947880450113404e-06, "loss": 0.1544, "step": 10539 }, { "epoch": 0.835016835016835, "grad_norm": 1.462600127578735, "learning_rate": 1.39348115420733e-06, "loss": 0.1276, "step": 10540 }, { "epoch": 0.8350960586254704, "grad_norm": 1.598430391294099, "learning_rate": 1.392174830101174e-06, "loss": 0.1136, "step": 10541 }, { "epoch": 0.8351752822341058, "grad_norm": 1.2325142503958595, "learning_rate": 1.3908690727788842e-06, "loss": 0.0815, "step": 10542 }, { "epoch": 0.8352545058427412, "grad_norm": 1.4081913359628437, "learning_rate": 1.3895638823264447e-06, "loss": 0.1615, "step": 10543 }, { "epoch": 0.8353337294513765, "grad_norm": 1.978628812507835, "learning_rate": 1.3882592588297917e-06, "loss": 0.1672, "step": 10544 }, { "epoch": 0.8354129530600118, "grad_norm": 1.522327406025572, "learning_rate": 1.38695520237483e-06, "loss": 0.1482, "step": 10545 }, { "epoch": 0.8354921766686473, "grad_norm": 1.4364688419079543, "learning_rate": 1.3856517130474235e-06, "loss": 0.1688, "step": 10546 }, { "epoch": 0.8355714002772826, "grad_norm": 1.5535409529760795, "learning_rate": 1.384348790933403e-06, "loss": 0.2004, "step": 10547 }, { "epoch": 0.835650623885918, "grad_norm": 1.3479364472814601, "learning_rate": 1.3830464361185592e-06, "loss": 0.1021, "step": 10548 }, { "epoch": 0.8357298474945534, "grad_norm": 1.474193020992869, "learning_rate": 1.3817446486886433e-06, "loss": 0.119, "step": 10549 }, { "epoch": 0.8358090711031888, "grad_norm": 1.288308881807844, "learning_rate": 1.3804434287293756e-06, "loss": 0.1054, "step": 10550 }, { "epoch": 0.8358882947118241, "grad_norm": 1.8507964553654104, "learning_rate": 1.3791427763264342e-06, "loss": 0.1497, "step": 10551 }, { "epoch": 0.8359675183204595, "grad_norm": 1.9102776357586515, "learning_rate": 1.3778426915654575e-06, "loss": 0.2033, "step": 10552 }, { "epoch": 0.8360467419290949, "grad_norm": 1.8215271961125292, "learning_rate": 1.3765431745320546e-06, "loss": 0.1428, "step": 10553 }, { "epoch": 0.8361259655377302, "grad_norm": 1.6318011769839191, "learning_rate": 1.3752442253117903e-06, "loss": 0.2014, "step": 10554 }, { "epoch": 0.8362051891463657, "grad_norm": 1.3051396480445032, "learning_rate": 1.373945843990192e-06, "loss": 0.1414, "step": 10555 }, { "epoch": 0.836284412755001, "grad_norm": 1.5482829512802534, "learning_rate": 1.3726480306527578e-06, "loss": 0.1416, "step": 10556 }, { "epoch": 0.8363636363636363, "grad_norm": 1.5493978244151865, "learning_rate": 1.3713507853849373e-06, "loss": 0.1638, "step": 10557 }, { "epoch": 0.8364428599722717, "grad_norm": 1.2104152350452513, "learning_rate": 1.3700541082721464e-06, "loss": 0.1032, "step": 10558 }, { "epoch": 0.8365220835809071, "grad_norm": 1.6917040484946821, "learning_rate": 1.3687579993997703e-06, "loss": 0.1815, "step": 10559 }, { "epoch": 0.8366013071895425, "grad_norm": 1.4653591812220614, "learning_rate": 1.3674624588531481e-06, "loss": 0.1365, "step": 10560 }, { "epoch": 0.8366805307981778, "grad_norm": 1.5928506207818265, "learning_rate": 1.3661674867175844e-06, "loss": 0.1413, "step": 10561 }, { "epoch": 0.8367597544068133, "grad_norm": 1.3836968799451774, "learning_rate": 1.3648730830783507e-06, "loss": 0.1442, "step": 10562 }, { "epoch": 0.8368389780154486, "grad_norm": 1.336818670425734, "learning_rate": 1.3635792480206744e-06, "loss": 0.1299, "step": 10563 }, { "epoch": 0.8369182016240839, "grad_norm": 1.6592427137990626, "learning_rate": 1.3622859816297473e-06, "loss": 0.1707, "step": 10564 }, { "epoch": 0.8369974252327194, "grad_norm": 1.317400189646492, "learning_rate": 1.3609932839907281e-06, "loss": 0.1407, "step": 10565 }, { "epoch": 0.8370766488413547, "grad_norm": 1.5954592676631707, "learning_rate": 1.3597011551887329e-06, "loss": 0.1634, "step": 10566 }, { "epoch": 0.8371558724499901, "grad_norm": 1.4345800345204196, "learning_rate": 1.3584095953088405e-06, "loss": 0.139, "step": 10567 }, { "epoch": 0.8372350960586254, "grad_norm": 1.0091877112973309, "learning_rate": 1.3571186044360973e-06, "loss": 0.0891, "step": 10568 }, { "epoch": 0.8373143196672609, "grad_norm": 1.449045984177576, "learning_rate": 1.3558281826555065e-06, "loss": 0.1878, "step": 10569 }, { "epoch": 0.8373935432758962, "grad_norm": 2.6708432801095783, "learning_rate": 1.3545383300520375e-06, "loss": 0.2549, "step": 10570 }, { "epoch": 0.8374727668845315, "grad_norm": 1.6230606633414757, "learning_rate": 1.3532490467106186e-06, "loss": 0.1997, "step": 10571 }, { "epoch": 0.837551990493167, "grad_norm": 1.7002180598197696, "learning_rate": 1.3519603327161456e-06, "loss": 0.2332, "step": 10572 }, { "epoch": 0.8376312141018023, "grad_norm": 1.5496895141462625, "learning_rate": 1.3506721881534734e-06, "loss": 0.1076, "step": 10573 }, { "epoch": 0.8377104377104377, "grad_norm": 1.6682904891176977, "learning_rate": 1.3493846131074173e-06, "loss": 0.0915, "step": 10574 }, { "epoch": 0.8377896613190731, "grad_norm": 1.7975817576352944, "learning_rate": 1.3480976076627617e-06, "loss": 0.2087, "step": 10575 }, { "epoch": 0.8378688849277085, "grad_norm": 1.6679796344416764, "learning_rate": 1.3468111719042497e-06, "loss": 0.2132, "step": 10576 }, { "epoch": 0.8379481085363438, "grad_norm": 1.2110061635641807, "learning_rate": 1.345525305916583e-06, "loss": 0.0658, "step": 10577 }, { "epoch": 0.8380273321449792, "grad_norm": 1.4521348615640655, "learning_rate": 1.3442400097844344e-06, "loss": 0.1209, "step": 10578 }, { "epoch": 0.8381065557536146, "grad_norm": 1.4251480823392397, "learning_rate": 1.342955283592432e-06, "loss": 0.1274, "step": 10579 }, { "epoch": 0.8381857793622499, "grad_norm": 1.563206602406747, "learning_rate": 1.3416711274251671e-06, "loss": 0.19, "step": 10580 }, { "epoch": 0.8382650029708854, "grad_norm": 1.7772029117015589, "learning_rate": 1.3403875413671997e-06, "loss": 0.1569, "step": 10581 }, { "epoch": 0.8383442265795207, "grad_norm": 2.0137756255469603, "learning_rate": 1.3391045255030444e-06, "loss": 0.1786, "step": 10582 }, { "epoch": 0.8384234501881561, "grad_norm": 1.2991353238836698, "learning_rate": 1.3378220799171815e-06, "loss": 0.1282, "step": 10583 }, { "epoch": 0.8385026737967914, "grad_norm": 1.6763781060671676, "learning_rate": 1.3365402046940569e-06, "loss": 0.2153, "step": 10584 }, { "epoch": 0.8385818974054268, "grad_norm": 1.5906710242051936, "learning_rate": 1.3352588999180726e-06, "loss": 0.1684, "step": 10585 }, { "epoch": 0.8386611210140622, "grad_norm": 1.5146384858409359, "learning_rate": 1.3339781656735995e-06, "loss": 0.1582, "step": 10586 }, { "epoch": 0.8387403446226975, "grad_norm": 1.5384001888776324, "learning_rate": 1.3326980020449621e-06, "loss": 0.1332, "step": 10587 }, { "epoch": 0.838819568231333, "grad_norm": 1.8285184477364784, "learning_rate": 1.3314184091164605e-06, "loss": 0.2016, "step": 10588 }, { "epoch": 0.8388987918399683, "grad_norm": 1.9822735091743064, "learning_rate": 1.3301393869723457e-06, "loss": 0.1729, "step": 10589 }, { "epoch": 0.8389780154486037, "grad_norm": 1.9670849238208123, "learning_rate": 1.328860935696833e-06, "loss": 0.1524, "step": 10590 }, { "epoch": 0.8390572390572391, "grad_norm": 1.3845555195916062, "learning_rate": 1.3275830553741066e-06, "loss": 0.1004, "step": 10591 }, { "epoch": 0.8391364626658744, "grad_norm": 1.5513397752911098, "learning_rate": 1.3263057460883078e-06, "loss": 0.0931, "step": 10592 }, { "epoch": 0.8392156862745098, "grad_norm": 1.364158766351369, "learning_rate": 1.3250290079235383e-06, "loss": 0.1636, "step": 10593 }, { "epoch": 0.8392949098831451, "grad_norm": 1.5542462651824454, "learning_rate": 1.3237528409638688e-06, "loss": 0.1314, "step": 10594 }, { "epoch": 0.8393741334917806, "grad_norm": 1.8074584377236935, "learning_rate": 1.3224772452933277e-06, "loss": 0.2128, "step": 10595 }, { "epoch": 0.8394533571004159, "grad_norm": 1.3108518698049427, "learning_rate": 1.321202220995904e-06, "loss": 0.1643, "step": 10596 }, { "epoch": 0.8395325807090513, "grad_norm": 1.4929573229698696, "learning_rate": 1.3199277681555578e-06, "loss": 0.1231, "step": 10597 }, { "epoch": 0.8396118043176867, "grad_norm": 1.7200546763013393, "learning_rate": 1.3186538868562004e-06, "loss": 0.1619, "step": 10598 }, { "epoch": 0.839691027926322, "grad_norm": 1.434429725089167, "learning_rate": 1.3173805771817138e-06, "loss": 0.0887, "step": 10599 }, { "epoch": 0.8397702515349574, "grad_norm": 1.700529937298641, "learning_rate": 1.3161078392159355e-06, "loss": 0.1974, "step": 10600 }, { "epoch": 0.8398494751435928, "grad_norm": 1.322446418537356, "learning_rate": 1.3148356730426737e-06, "loss": 0.0995, "step": 10601 }, { "epoch": 0.8399286987522282, "grad_norm": 1.4898986168307924, "learning_rate": 1.3135640787456926e-06, "loss": 0.172, "step": 10602 }, { "epoch": 0.8400079223608635, "grad_norm": 1.659367245436722, "learning_rate": 1.312293056408719e-06, "loss": 0.1718, "step": 10603 }, { "epoch": 0.840087145969499, "grad_norm": 1.6175794090539586, "learning_rate": 1.3110226061154462e-06, "loss": 0.1763, "step": 10604 }, { "epoch": 0.8401663695781343, "grad_norm": 1.1190365769410344, "learning_rate": 1.309752727949527e-06, "loss": 0.0736, "step": 10605 }, { "epoch": 0.8402455931867696, "grad_norm": 1.2906239326450222, "learning_rate": 1.3084834219945731e-06, "loss": 0.1134, "step": 10606 }, { "epoch": 0.840324816795405, "grad_norm": 1.4438588750192283, "learning_rate": 1.3072146883341675e-06, "loss": 0.152, "step": 10607 }, { "epoch": 0.8404040404040404, "grad_norm": 1.5742421031576537, "learning_rate": 1.3059465270518469e-06, "loss": 0.1367, "step": 10608 }, { "epoch": 0.8404832640126758, "grad_norm": 1.6221856259207226, "learning_rate": 1.3046789382311132e-06, "loss": 0.1193, "step": 10609 }, { "epoch": 0.8405624876213111, "grad_norm": 1.6041009773109047, "learning_rate": 1.3034119219554341e-06, "loss": 0.1351, "step": 10610 }, { "epoch": 0.8406417112299466, "grad_norm": 1.092911996819517, "learning_rate": 1.3021454783082344e-06, "loss": 0.0845, "step": 10611 }, { "epoch": 0.8407209348385819, "grad_norm": 1.4935547009489953, "learning_rate": 1.3008796073729013e-06, "loss": 0.2057, "step": 10612 }, { "epoch": 0.8408001584472172, "grad_norm": 1.3958593359229354, "learning_rate": 1.2996143092327906e-06, "loss": 0.1326, "step": 10613 }, { "epoch": 0.8408793820558527, "grad_norm": 2.121863250466487, "learning_rate": 1.2983495839712146e-06, "loss": 0.2957, "step": 10614 }, { "epoch": 0.840958605664488, "grad_norm": 1.853954069617716, "learning_rate": 1.2970854316714477e-06, "loss": 0.1857, "step": 10615 }, { "epoch": 0.8410378292731234, "grad_norm": 1.5661612646522565, "learning_rate": 1.2958218524167288e-06, "loss": 0.1331, "step": 10616 }, { "epoch": 0.8411170528817588, "grad_norm": 1.6990134783980666, "learning_rate": 1.2945588462902603e-06, "loss": 0.1615, "step": 10617 }, { "epoch": 0.8411962764903942, "grad_norm": 1.44080534395435, "learning_rate": 1.2932964133752036e-06, "loss": 0.1542, "step": 10618 }, { "epoch": 0.8412755000990295, "grad_norm": 1.4644739533638742, "learning_rate": 1.292034553754683e-06, "loss": 0.1158, "step": 10619 }, { "epoch": 0.8413547237076648, "grad_norm": 1.7982692887055565, "learning_rate": 1.2907732675117878e-06, "loss": 0.1912, "step": 10620 }, { "epoch": 0.8414339473163003, "grad_norm": 1.5005404675239695, "learning_rate": 1.2895125547295672e-06, "loss": 0.1224, "step": 10621 }, { "epoch": 0.8415131709249356, "grad_norm": 1.4940693568453378, "learning_rate": 1.2882524154910314e-06, "loss": 0.1688, "step": 10622 }, { "epoch": 0.841592394533571, "grad_norm": 1.3976813593887059, "learning_rate": 1.2869928498791572e-06, "loss": 0.1359, "step": 10623 }, { "epoch": 0.8416716181422064, "grad_norm": 1.22190997864856, "learning_rate": 1.2857338579768796e-06, "loss": 0.1067, "step": 10624 }, { "epoch": 0.8417508417508418, "grad_norm": 1.720875838845788, "learning_rate": 1.2844754398670954e-06, "loss": 0.1808, "step": 10625 }, { "epoch": 0.8418300653594771, "grad_norm": 1.7900478640331172, "learning_rate": 1.2832175956326686e-06, "loss": 0.1225, "step": 10626 }, { "epoch": 0.8419092889681125, "grad_norm": 1.6646253019478188, "learning_rate": 1.2819603253564206e-06, "loss": 0.177, "step": 10627 }, { "epoch": 0.8419885125767479, "grad_norm": 1.6835433134538447, "learning_rate": 1.280703629121135e-06, "loss": 0.172, "step": 10628 }, { "epoch": 0.8420677361853832, "grad_norm": 1.74062051178521, "learning_rate": 1.2794475070095624e-06, "loss": 0.199, "step": 10629 }, { "epoch": 0.8421469597940187, "grad_norm": 2.315707132980495, "learning_rate": 1.2781919591044113e-06, "loss": 0.2665, "step": 10630 }, { "epoch": 0.842226183402654, "grad_norm": 1.580855381918244, "learning_rate": 1.2769369854883528e-06, "loss": 0.1344, "step": 10631 }, { "epoch": 0.8423054070112893, "grad_norm": 1.5580590840778559, "learning_rate": 1.2756825862440192e-06, "loss": 0.146, "step": 10632 }, { "epoch": 0.8423846306199247, "grad_norm": 1.3833071910128458, "learning_rate": 1.2744287614540108e-06, "loss": 0.1439, "step": 10633 }, { "epoch": 0.8424638542285601, "grad_norm": 1.467236894245345, "learning_rate": 1.2731755112008838e-06, "loss": 0.1439, "step": 10634 }, { "epoch": 0.8425430778371955, "grad_norm": 1.2835347226032316, "learning_rate": 1.2719228355671576e-06, "loss": 0.1028, "step": 10635 }, { "epoch": 0.8426223014458308, "grad_norm": 1.2814085411095317, "learning_rate": 1.2706707346353165e-06, "loss": 0.0918, "step": 10636 }, { "epoch": 0.8427015250544663, "grad_norm": 1.213247735202157, "learning_rate": 1.2694192084878032e-06, "loss": 0.1229, "step": 10637 }, { "epoch": 0.8427807486631016, "grad_norm": 1.6758196082021466, "learning_rate": 1.2681682572070275e-06, "loss": 0.1772, "step": 10638 }, { "epoch": 0.8428599722717369, "grad_norm": 1.4036752028238526, "learning_rate": 1.2669178808753568e-06, "loss": 0.1423, "step": 10639 }, { "epoch": 0.8429391958803724, "grad_norm": 1.803465808092052, "learning_rate": 1.265668079575124e-06, "loss": 0.1929, "step": 10640 }, { "epoch": 0.8430184194890077, "grad_norm": 1.3030318664980867, "learning_rate": 1.264418853388618e-06, "loss": 0.1173, "step": 10641 }, { "epoch": 0.8430976430976431, "grad_norm": 1.3591640343531863, "learning_rate": 1.2631702023980997e-06, "loss": 0.0885, "step": 10642 }, { "epoch": 0.8431768667062784, "grad_norm": 1.2844208331143927, "learning_rate": 1.2619221266857851e-06, "loss": 0.0997, "step": 10643 }, { "epoch": 0.8432560903149139, "grad_norm": 1.473859767773709, "learning_rate": 1.260674626333851e-06, "loss": 0.1358, "step": 10644 }, { "epoch": 0.8433353139235492, "grad_norm": 1.5754037872076125, "learning_rate": 1.259427701424445e-06, "loss": 0.2007, "step": 10645 }, { "epoch": 0.8434145375321845, "grad_norm": 1.330050597190308, "learning_rate": 1.2581813520396668e-06, "loss": 0.1124, "step": 10646 }, { "epoch": 0.84349376114082, "grad_norm": 1.4263162770836706, "learning_rate": 1.256935578261581e-06, "loss": 0.1177, "step": 10647 }, { "epoch": 0.8435729847494553, "grad_norm": 3.0772168883343913, "learning_rate": 1.255690380172222e-06, "loss": 0.1519, "step": 10648 }, { "epoch": 0.8436522083580907, "grad_norm": 1.831858704793479, "learning_rate": 1.2544457578535764e-06, "loss": 0.2082, "step": 10649 }, { "epoch": 0.8437314319667261, "grad_norm": 1.3041486853598445, "learning_rate": 1.253201711387594e-06, "loss": 0.104, "step": 10650 }, { "epoch": 0.8438106555753615, "grad_norm": 1.408366720003487, "learning_rate": 1.2519582408561936e-06, "loss": 0.1484, "step": 10651 }, { "epoch": 0.8438898791839968, "grad_norm": 1.3085834716889044, "learning_rate": 1.2507153463412513e-06, "loss": 0.099, "step": 10652 }, { "epoch": 0.8439691027926322, "grad_norm": 1.3676222409141765, "learning_rate": 1.2494730279246014e-06, "loss": 0.1312, "step": 10653 }, { "epoch": 0.8440483264012676, "grad_norm": 1.4013296168784446, "learning_rate": 1.2482312856880506e-06, "loss": 0.1051, "step": 10654 }, { "epoch": 0.8441275500099029, "grad_norm": 1.4668953876345308, "learning_rate": 1.2469901197133582e-06, "loss": 0.2093, "step": 10655 }, { "epoch": 0.8442067736185384, "grad_norm": 1.5331215415510298, "learning_rate": 1.2457495300822497e-06, "loss": 0.1576, "step": 10656 }, { "epoch": 0.8442859972271737, "grad_norm": 1.348251884135471, "learning_rate": 1.244509516876411e-06, "loss": 0.0886, "step": 10657 }, { "epoch": 0.8443652208358091, "grad_norm": 1.658490860092014, "learning_rate": 1.2432700801774923e-06, "loss": 0.2101, "step": 10658 }, { "epoch": 0.8444444444444444, "grad_norm": 1.897794198619597, "learning_rate": 1.2420312200671048e-06, "loss": 0.1657, "step": 10659 }, { "epoch": 0.8445236680530798, "grad_norm": 1.855774330869629, "learning_rate": 1.240792936626819e-06, "loss": 0.199, "step": 10660 }, { "epoch": 0.8446028916617152, "grad_norm": 1.4691100723579162, "learning_rate": 1.2395552299381742e-06, "loss": 0.1506, "step": 10661 }, { "epoch": 0.8446821152703505, "grad_norm": 1.0930413770737535, "learning_rate": 1.238318100082664e-06, "loss": 0.0826, "step": 10662 }, { "epoch": 0.844761338878986, "grad_norm": 1.7966270129389204, "learning_rate": 1.2370815471417464e-06, "loss": 0.2342, "step": 10663 }, { "epoch": 0.8448405624876213, "grad_norm": 1.2941394872342418, "learning_rate": 1.2358455711968463e-06, "loss": 0.1273, "step": 10664 }, { "epoch": 0.8449197860962567, "grad_norm": 1.6333518181697004, "learning_rate": 1.2346101723293457e-06, "loss": 0.1538, "step": 10665 }, { "epoch": 0.8449990097048921, "grad_norm": 1.317741960859693, "learning_rate": 1.233375350620587e-06, "loss": 0.1282, "step": 10666 }, { "epoch": 0.8450782333135274, "grad_norm": 1.7954736746770787, "learning_rate": 1.2321411061518807e-06, "loss": 0.1446, "step": 10667 }, { "epoch": 0.8451574569221628, "grad_norm": 2.059515867163736, "learning_rate": 1.2309074390044939e-06, "loss": 0.1657, "step": 10668 }, { "epoch": 0.8452366805307981, "grad_norm": 1.2631296994543926, "learning_rate": 1.2296743492596587e-06, "loss": 0.1166, "step": 10669 }, { "epoch": 0.8453159041394336, "grad_norm": 1.626095521050016, "learning_rate": 1.2284418369985651e-06, "loss": 0.182, "step": 10670 }, { "epoch": 0.8453951277480689, "grad_norm": 1.3158982902668888, "learning_rate": 1.227209902302372e-06, "loss": 0.1154, "step": 10671 }, { "epoch": 0.8454743513567043, "grad_norm": 1.3754095669626523, "learning_rate": 1.2259785452521956e-06, "loss": 0.1439, "step": 10672 }, { "epoch": 0.8455535749653397, "grad_norm": 1.4965944818283856, "learning_rate": 1.2247477659291118e-06, "loss": 0.1655, "step": 10673 }, { "epoch": 0.845632798573975, "grad_norm": 1.3557962784722621, "learning_rate": 1.223517564414166e-06, "loss": 0.0949, "step": 10674 }, { "epoch": 0.8457120221826104, "grad_norm": 1.543222769771887, "learning_rate": 1.2222879407883592e-06, "loss": 0.1262, "step": 10675 }, { "epoch": 0.8457912457912458, "grad_norm": 1.5546244580120567, "learning_rate": 1.2210588951326542e-06, "loss": 0.1899, "step": 10676 }, { "epoch": 0.8458704693998812, "grad_norm": 1.885352706403217, "learning_rate": 1.2198304275279805e-06, "loss": 0.1914, "step": 10677 }, { "epoch": 0.8459496930085165, "grad_norm": 2.0336636813408924, "learning_rate": 1.2186025380552259e-06, "loss": 0.2013, "step": 10678 }, { "epoch": 0.846028916617152, "grad_norm": 1.861662581942874, "learning_rate": 1.2173752267952376e-06, "loss": 0.2344, "step": 10679 }, { "epoch": 0.8461081402257873, "grad_norm": 1.5795910758171352, "learning_rate": 1.2161484938288348e-06, "loss": 0.1673, "step": 10680 }, { "epoch": 0.8461873638344226, "grad_norm": 1.5839058102081605, "learning_rate": 1.214922339236788e-06, "loss": 0.1903, "step": 10681 }, { "epoch": 0.846266587443058, "grad_norm": 1.399027769939382, "learning_rate": 1.213696763099832e-06, "loss": 0.1301, "step": 10682 }, { "epoch": 0.8463458110516934, "grad_norm": 1.348424042624097, "learning_rate": 1.2124717654986695e-06, "loss": 0.1279, "step": 10683 }, { "epoch": 0.8464250346603288, "grad_norm": 1.9523961313473757, "learning_rate": 1.2112473465139586e-06, "loss": 0.2408, "step": 10684 }, { "epoch": 0.8465042582689641, "grad_norm": 1.9523735953415242, "learning_rate": 1.210023506226321e-06, "loss": 0.2004, "step": 10685 }, { "epoch": 0.8465834818775996, "grad_norm": 1.5478258924179662, "learning_rate": 1.2088002447163383e-06, "loss": 0.1649, "step": 10686 }, { "epoch": 0.8466627054862349, "grad_norm": 1.520408475955764, "learning_rate": 1.2075775620645613e-06, "loss": 0.1288, "step": 10687 }, { "epoch": 0.8467419290948702, "grad_norm": 1.6425088673968629, "learning_rate": 1.2063554583514947e-06, "loss": 0.1825, "step": 10688 }, { "epoch": 0.8468211527035057, "grad_norm": 1.3383072044081166, "learning_rate": 1.2051339336576074e-06, "loss": 0.1113, "step": 10689 }, { "epoch": 0.846900376312141, "grad_norm": 1.409507248217889, "learning_rate": 1.203912988063335e-06, "loss": 0.1407, "step": 10690 }, { "epoch": 0.8469795999207764, "grad_norm": 1.6913524402417588, "learning_rate": 1.2026926216490675e-06, "loss": 0.2201, "step": 10691 }, { "epoch": 0.8470588235294118, "grad_norm": 1.7662384992939641, "learning_rate": 1.2014728344951587e-06, "loss": 0.1677, "step": 10692 }, { "epoch": 0.8471380471380472, "grad_norm": 1.3816241585909814, "learning_rate": 1.2002536266819309e-06, "loss": 0.1349, "step": 10693 }, { "epoch": 0.8472172707466825, "grad_norm": 2.0017394082175546, "learning_rate": 1.1990349982896598e-06, "loss": 0.2601, "step": 10694 }, { "epoch": 0.8472964943553178, "grad_norm": 1.4062860135652977, "learning_rate": 1.1978169493985836e-06, "loss": 0.1666, "step": 10695 }, { "epoch": 0.8473757179639533, "grad_norm": 1.466595131423128, "learning_rate": 1.1965994800889113e-06, "loss": 0.1551, "step": 10696 }, { "epoch": 0.8474549415725886, "grad_norm": 1.5917681099215155, "learning_rate": 1.1953825904408033e-06, "loss": 0.157, "step": 10697 }, { "epoch": 0.847534165181224, "grad_norm": 1.6968631714051232, "learning_rate": 1.1941662805343846e-06, "loss": 0.1566, "step": 10698 }, { "epoch": 0.8476133887898594, "grad_norm": 1.5197051484697242, "learning_rate": 1.1929505504497464e-06, "loss": 0.1716, "step": 10699 }, { "epoch": 0.8476926123984948, "grad_norm": 1.860364215240847, "learning_rate": 1.191735400266939e-06, "loss": 0.1929, "step": 10700 }, { "epoch": 0.8477718360071301, "grad_norm": 1.6662502298455804, "learning_rate": 1.190520830065972e-06, "loss": 0.1519, "step": 10701 }, { "epoch": 0.8478510596157655, "grad_norm": 1.029289426591469, "learning_rate": 1.189306839926818e-06, "loss": 0.0955, "step": 10702 }, { "epoch": 0.8479302832244009, "grad_norm": 1.3631240466217436, "learning_rate": 1.1880934299294167e-06, "loss": 0.1308, "step": 10703 }, { "epoch": 0.8480095068330362, "grad_norm": 1.6788271571037947, "learning_rate": 1.1868806001536625e-06, "loss": 0.1381, "step": 10704 }, { "epoch": 0.8480887304416717, "grad_norm": 1.746864661650298, "learning_rate": 1.185668350679413e-06, "loss": 0.1065, "step": 10705 }, { "epoch": 0.848167954050307, "grad_norm": 1.5660316279931281, "learning_rate": 1.1844566815864921e-06, "loss": 0.1403, "step": 10706 }, { "epoch": 0.8482471776589424, "grad_norm": 1.302487619894792, "learning_rate": 1.1832455929546827e-06, "loss": 0.0911, "step": 10707 }, { "epoch": 0.8483264012675777, "grad_norm": 1.662286876416712, "learning_rate": 1.182035084863724e-06, "loss": 0.145, "step": 10708 }, { "epoch": 0.8484056248762131, "grad_norm": 1.5309196113711163, "learning_rate": 1.1808251573933272e-06, "loss": 0.1571, "step": 10709 }, { "epoch": 0.8484848484848485, "grad_norm": 1.3450744293873835, "learning_rate": 1.1796158106231603e-06, "loss": 0.1353, "step": 10710 }, { "epoch": 0.8485640720934838, "grad_norm": 1.380369853613987, "learning_rate": 1.1784070446328477e-06, "loss": 0.0908, "step": 10711 }, { "epoch": 0.8486432957021193, "grad_norm": 1.6753488480979988, "learning_rate": 1.177198859501989e-06, "loss": 0.1583, "step": 10712 }, { "epoch": 0.8487225193107546, "grad_norm": 1.5865528090853542, "learning_rate": 1.1759912553101316e-06, "loss": 0.2, "step": 10713 }, { "epoch": 0.8488017429193899, "grad_norm": 1.1312338996565505, "learning_rate": 1.1747842321367886e-06, "loss": 0.1251, "step": 10714 }, { "epoch": 0.8488809665280254, "grad_norm": 1.3844549861810165, "learning_rate": 1.173577790061442e-06, "loss": 0.1447, "step": 10715 }, { "epoch": 0.8489601901366607, "grad_norm": 1.6629610526972813, "learning_rate": 1.1723719291635272e-06, "loss": 0.1592, "step": 10716 }, { "epoch": 0.8490394137452961, "grad_norm": 1.899786902491622, "learning_rate": 1.171166649522444e-06, "loss": 0.2097, "step": 10717 }, { "epoch": 0.8491186373539314, "grad_norm": 1.4779685483111693, "learning_rate": 1.1699619512175563e-06, "loss": 0.155, "step": 10718 }, { "epoch": 0.8491978609625669, "grad_norm": 1.5631792328393774, "learning_rate": 1.168757834328188e-06, "loss": 0.1201, "step": 10719 }, { "epoch": 0.8492770845712022, "grad_norm": 2.621245318854018, "learning_rate": 1.1675542989336208e-06, "loss": 0.1776, "step": 10720 }, { "epoch": 0.8493563081798375, "grad_norm": 1.6246803734351898, "learning_rate": 1.1663513451131047e-06, "loss": 0.1479, "step": 10721 }, { "epoch": 0.849435531788473, "grad_norm": 1.3830803311759983, "learning_rate": 1.1651489729458487e-06, "loss": 0.0968, "step": 10722 }, { "epoch": 0.8495147553971083, "grad_norm": 1.4377400573625967, "learning_rate": 1.1639471825110205e-06, "loss": 0.123, "step": 10723 }, { "epoch": 0.8495939790057437, "grad_norm": 1.5542109076718642, "learning_rate": 1.1627459738877557e-06, "loss": 0.1659, "step": 10724 }, { "epoch": 0.8496732026143791, "grad_norm": 1.6332324553844815, "learning_rate": 1.1615453471551462e-06, "loss": 0.1268, "step": 10725 }, { "epoch": 0.8497524262230145, "grad_norm": 1.2870130611851327, "learning_rate": 1.1603453023922473e-06, "loss": 0.0965, "step": 10726 }, { "epoch": 0.8498316498316498, "grad_norm": 1.9095991502783456, "learning_rate": 1.1591458396780753e-06, "loss": 0.251, "step": 10727 }, { "epoch": 0.8499108734402852, "grad_norm": 1.396403365650525, "learning_rate": 1.1579469590916125e-06, "loss": 0.17, "step": 10728 }, { "epoch": 0.8499900970489206, "grad_norm": 1.782881100550267, "learning_rate": 1.156748660711796e-06, "loss": 0.1983, "step": 10729 }, { "epoch": 0.8500693206575559, "grad_norm": 2.0620842280042146, "learning_rate": 1.1555509446175284e-06, "loss": 0.2231, "step": 10730 }, { "epoch": 0.8501485442661914, "grad_norm": 2.0084242249029116, "learning_rate": 1.1543538108876751e-06, "loss": 0.2524, "step": 10731 }, { "epoch": 0.8502277678748267, "grad_norm": 1.6925261180509055, "learning_rate": 1.153157259601062e-06, "loss": 0.1565, "step": 10732 }, { "epoch": 0.8503069914834621, "grad_norm": 1.5909196224828253, "learning_rate": 1.1519612908364718e-06, "loss": 0.1663, "step": 10733 }, { "epoch": 0.8503862150920974, "grad_norm": 1.2773661543164585, "learning_rate": 1.1507659046726605e-06, "loss": 0.1069, "step": 10734 }, { "epoch": 0.8504654387007328, "grad_norm": 1.7757580336150474, "learning_rate": 1.1495711011883325e-06, "loss": 0.1587, "step": 10735 }, { "epoch": 0.8505446623093682, "grad_norm": 1.4649717153794204, "learning_rate": 1.148376880462161e-06, "loss": 0.1371, "step": 10736 }, { "epoch": 0.8506238859180035, "grad_norm": 1.4027923989333666, "learning_rate": 1.1471832425727825e-06, "loss": 0.1273, "step": 10737 }, { "epoch": 0.850703109526639, "grad_norm": 2.2080231813661464, "learning_rate": 1.14599018759879e-06, "loss": 0.237, "step": 10738 }, { "epoch": 0.8507823331352743, "grad_norm": 1.1946025165554786, "learning_rate": 1.1447977156187395e-06, "loss": 0.098, "step": 10739 }, { "epoch": 0.8508615567439097, "grad_norm": 1.5738243768106932, "learning_rate": 1.1436058267111527e-06, "loss": 0.1262, "step": 10740 }, { "epoch": 0.8509407803525451, "grad_norm": 1.451179334028792, "learning_rate": 1.1424145209545079e-06, "loss": 0.1459, "step": 10741 }, { "epoch": 0.8510200039611804, "grad_norm": 1.3704756152187496, "learning_rate": 1.1412237984272467e-06, "loss": 0.0951, "step": 10742 }, { "epoch": 0.8510992275698158, "grad_norm": 1.4400951994687616, "learning_rate": 1.140033659207771e-06, "loss": 0.1094, "step": 10743 }, { "epoch": 0.8511784511784511, "grad_norm": 1.841590275935626, "learning_rate": 1.1388441033744502e-06, "loss": 0.1607, "step": 10744 }, { "epoch": 0.8512576747870866, "grad_norm": 1.588652919642301, "learning_rate": 1.1376551310056073e-06, "loss": 0.1519, "step": 10745 }, { "epoch": 0.8513368983957219, "grad_norm": 1.4371990395365755, "learning_rate": 1.1364667421795283e-06, "loss": 0.1583, "step": 10746 }, { "epoch": 0.8514161220043573, "grad_norm": 1.4173112946760453, "learning_rate": 1.1352789369744688e-06, "loss": 0.1133, "step": 10747 }, { "epoch": 0.8514953456129927, "grad_norm": 1.477009545268548, "learning_rate": 1.134091715468636e-06, "loss": 0.1484, "step": 10748 }, { "epoch": 0.851574569221628, "grad_norm": 2.156278001422682, "learning_rate": 1.132905077740203e-06, "loss": 0.2004, "step": 10749 }, { "epoch": 0.8516537928302634, "grad_norm": 2.384420320272095, "learning_rate": 1.131719023867306e-06, "loss": 0.2268, "step": 10750 }, { "epoch": 0.8517330164388988, "grad_norm": 1.6216524252611697, "learning_rate": 1.1305335539280392e-06, "loss": 0.1669, "step": 10751 }, { "epoch": 0.8518122400475342, "grad_norm": 1.402160304943576, "learning_rate": 1.1293486680004607e-06, "loss": 0.1135, "step": 10752 }, { "epoch": 0.8518914636561695, "grad_norm": 1.7332955130269427, "learning_rate": 1.1281643661625896e-06, "loss": 0.1824, "step": 10753 }, { "epoch": 0.851970687264805, "grad_norm": 1.6749409114378597, "learning_rate": 1.1269806484924072e-06, "loss": 0.1665, "step": 10754 }, { "epoch": 0.8520499108734403, "grad_norm": 1.6215578409924865, "learning_rate": 1.1257975150678557e-06, "loss": 0.1649, "step": 10755 }, { "epoch": 0.8521291344820756, "grad_norm": 2.0761668365764474, "learning_rate": 1.124614965966835e-06, "loss": 0.1978, "step": 10756 }, { "epoch": 0.852208358090711, "grad_norm": 1.3136853998749878, "learning_rate": 1.1234330012672146e-06, "loss": 0.0878, "step": 10757 }, { "epoch": 0.8522875816993464, "grad_norm": 1.592019295297516, "learning_rate": 1.1222516210468204e-06, "loss": 0.1363, "step": 10758 }, { "epoch": 0.8523668053079818, "grad_norm": 1.1758758040708581, "learning_rate": 1.121070825383438e-06, "loss": 0.096, "step": 10759 }, { "epoch": 0.8524460289166171, "grad_norm": 1.631126614257944, "learning_rate": 1.1198906143548216e-06, "loss": 0.1193, "step": 10760 }, { "epoch": 0.8525252525252526, "grad_norm": 1.4731571300171946, "learning_rate": 1.1187109880386794e-06, "loss": 0.1313, "step": 10761 }, { "epoch": 0.8526044761338879, "grad_norm": 1.5096728444822427, "learning_rate": 1.117531946512682e-06, "loss": 0.1284, "step": 10762 }, { "epoch": 0.8526836997425232, "grad_norm": 1.449893354010232, "learning_rate": 1.1163534898544692e-06, "loss": 0.1503, "step": 10763 }, { "epoch": 0.8527629233511587, "grad_norm": 1.3353347513699871, "learning_rate": 1.1151756181416328e-06, "loss": 0.1392, "step": 10764 }, { "epoch": 0.852842146959794, "grad_norm": 1.2333673607803275, "learning_rate": 1.1139983314517288e-06, "loss": 0.0815, "step": 10765 }, { "epoch": 0.8529213705684294, "grad_norm": 1.131887101651616, "learning_rate": 1.1128216298622808e-06, "loss": 0.0838, "step": 10766 }, { "epoch": 0.8530005941770648, "grad_norm": 1.4841197255398926, "learning_rate": 1.1116455134507665e-06, "loss": 0.1256, "step": 10767 }, { "epoch": 0.8530798177857002, "grad_norm": 2.0210254513643395, "learning_rate": 1.110469982294624e-06, "loss": 0.1639, "step": 10768 }, { "epoch": 0.8531590413943355, "grad_norm": 1.3508216702846854, "learning_rate": 1.1092950364712617e-06, "loss": 0.1139, "step": 10769 }, { "epoch": 0.8532382650029708, "grad_norm": 1.9734629410394948, "learning_rate": 1.1081206760580422e-06, "loss": 0.185, "step": 10770 }, { "epoch": 0.8533174886116063, "grad_norm": 1.4024693287905854, "learning_rate": 1.1069469011322908e-06, "loss": 0.1416, "step": 10771 }, { "epoch": 0.8533967122202416, "grad_norm": 1.6091626929897471, "learning_rate": 1.1057737117712941e-06, "loss": 0.158, "step": 10772 }, { "epoch": 0.853475935828877, "grad_norm": 1.3352103199377978, "learning_rate": 1.1046011080523034e-06, "loss": 0.1424, "step": 10773 }, { "epoch": 0.8535551594375124, "grad_norm": 1.4384358195654565, "learning_rate": 1.1034290900525279e-06, "loss": 0.176, "step": 10774 }, { "epoch": 0.8536343830461478, "grad_norm": 1.2364957322053722, "learning_rate": 1.1022576578491372e-06, "loss": 0.1267, "step": 10775 }, { "epoch": 0.8537136066547831, "grad_norm": 1.5226341616040144, "learning_rate": 1.1010868115192696e-06, "loss": 0.1675, "step": 10776 }, { "epoch": 0.8537928302634185, "grad_norm": 1.4993668987531505, "learning_rate": 1.0999165511400157e-06, "loss": 0.1191, "step": 10777 }, { "epoch": 0.8538720538720539, "grad_norm": 1.616445639301569, "learning_rate": 1.09874687678843e-06, "loss": 0.1332, "step": 10778 }, { "epoch": 0.8539512774806892, "grad_norm": 1.7291532180765008, "learning_rate": 1.097577788541535e-06, "loss": 0.1963, "step": 10779 }, { "epoch": 0.8540305010893247, "grad_norm": 1.9633627835641465, "learning_rate": 1.0964092864763065e-06, "loss": 0.1586, "step": 10780 }, { "epoch": 0.85410972469796, "grad_norm": 1.8994142774003182, "learning_rate": 1.095241370669684e-06, "loss": 0.1987, "step": 10781 }, { "epoch": 0.8541889483065954, "grad_norm": 1.213025168452882, "learning_rate": 1.0940740411985718e-06, "loss": 0.1057, "step": 10782 }, { "epoch": 0.8542681719152307, "grad_norm": 1.3279690628374319, "learning_rate": 1.0929072981398313e-06, "loss": 0.1029, "step": 10783 }, { "epoch": 0.8543473955238661, "grad_norm": 1.7370033958933218, "learning_rate": 1.091741141570285e-06, "loss": 0.1757, "step": 10784 }, { "epoch": 0.8544266191325015, "grad_norm": 1.5499041717521618, "learning_rate": 1.0905755715667222e-06, "loss": 0.1648, "step": 10785 }, { "epoch": 0.8545058427411368, "grad_norm": 1.6940885929497413, "learning_rate": 1.0894105882058891e-06, "loss": 0.1804, "step": 10786 }, { "epoch": 0.8545850663497723, "grad_norm": 2.006176652490394, "learning_rate": 1.0882461915644936e-06, "loss": 0.1417, "step": 10787 }, { "epoch": 0.8546642899584076, "grad_norm": 2.2155747054148796, "learning_rate": 1.0870823817192045e-06, "loss": 0.2456, "step": 10788 }, { "epoch": 0.854743513567043, "grad_norm": 1.3053202447701555, "learning_rate": 1.0859191587466556e-06, "loss": 0.0999, "step": 10789 }, { "epoch": 0.8548227371756784, "grad_norm": 1.5984221308445026, "learning_rate": 1.0847565227234392e-06, "loss": 0.1523, "step": 10790 }, { "epoch": 0.8549019607843137, "grad_norm": 1.5777703577484765, "learning_rate": 1.0835944737261072e-06, "loss": 0.1643, "step": 10791 }, { "epoch": 0.8549811843929491, "grad_norm": 1.4860269615427253, "learning_rate": 1.0824330118311765e-06, "loss": 0.1364, "step": 10792 }, { "epoch": 0.8550604080015844, "grad_norm": 1.9153429872493002, "learning_rate": 1.0812721371151213e-06, "loss": 0.1675, "step": 10793 }, { "epoch": 0.8551396316102199, "grad_norm": 1.2205180080888183, "learning_rate": 1.080111849654384e-06, "loss": 0.0926, "step": 10794 }, { "epoch": 0.8552188552188552, "grad_norm": 1.7285872323238727, "learning_rate": 1.078952149525362e-06, "loss": 0.1365, "step": 10795 }, { "epoch": 0.8552980788274905, "grad_norm": 1.3793799193755691, "learning_rate": 1.0777930368044143e-06, "loss": 0.1207, "step": 10796 }, { "epoch": 0.855377302436126, "grad_norm": 1.8215710165403955, "learning_rate": 1.0766345115678633e-06, "loss": 0.1572, "step": 10797 }, { "epoch": 0.8554565260447613, "grad_norm": 1.5703175832446667, "learning_rate": 1.0754765738919947e-06, "loss": 0.1833, "step": 10798 }, { "epoch": 0.8555357496533967, "grad_norm": 1.4602348802495926, "learning_rate": 1.074319223853052e-06, "loss": 0.1968, "step": 10799 }, { "epoch": 0.8556149732620321, "grad_norm": 1.1861156951345262, "learning_rate": 1.0731624615272385e-06, "loss": 0.0817, "step": 10800 }, { "epoch": 0.8556941968706675, "grad_norm": 1.2166464024514778, "learning_rate": 1.0720062869907255e-06, "loss": 0.1146, "step": 10801 }, { "epoch": 0.8557734204793028, "grad_norm": 1.4642846957734983, "learning_rate": 1.07085070031964e-06, "loss": 0.1175, "step": 10802 }, { "epoch": 0.8558526440879382, "grad_norm": 1.5136991003757623, "learning_rate": 1.06969570159007e-06, "loss": 0.229, "step": 10803 }, { "epoch": 0.8559318676965736, "grad_norm": 1.7684758075250464, "learning_rate": 1.0685412908780702e-06, "loss": 0.2779, "step": 10804 }, { "epoch": 0.8560110913052089, "grad_norm": 1.6070700715321955, "learning_rate": 1.0673874682596497e-06, "loss": 0.2305, "step": 10805 }, { "epoch": 0.8560903149138444, "grad_norm": 1.16904737079037, "learning_rate": 1.0662342338107823e-06, "loss": 0.0889, "step": 10806 }, { "epoch": 0.8561695385224797, "grad_norm": 1.9044992815805222, "learning_rate": 1.065081587607406e-06, "loss": 0.2386, "step": 10807 }, { "epoch": 0.8562487621311151, "grad_norm": 1.6172652451385783, "learning_rate": 1.0639295297254149e-06, "loss": 0.1262, "step": 10808 }, { "epoch": 0.8563279857397504, "grad_norm": 1.4735378412564195, "learning_rate": 1.0627780602406656e-06, "loss": 0.1347, "step": 10809 }, { "epoch": 0.8564072093483858, "grad_norm": 1.3239435164978808, "learning_rate": 1.061627179228979e-06, "loss": 0.1508, "step": 10810 }, { "epoch": 0.8564864329570212, "grad_norm": 1.9948249869847379, "learning_rate": 1.0604768867661342e-06, "loss": 0.1812, "step": 10811 }, { "epoch": 0.8565656565656565, "grad_norm": 1.9830625571650182, "learning_rate": 1.0593271829278718e-06, "loss": 0.2253, "step": 10812 }, { "epoch": 0.856644880174292, "grad_norm": 1.4365093643391995, "learning_rate": 1.0581780677898924e-06, "loss": 0.1588, "step": 10813 }, { "epoch": 0.8567241037829273, "grad_norm": 1.8006801448401626, "learning_rate": 1.0570295414278642e-06, "loss": 0.1818, "step": 10814 }, { "epoch": 0.8568033273915627, "grad_norm": 1.3889140714159647, "learning_rate": 1.0558816039174102e-06, "loss": 0.14, "step": 10815 }, { "epoch": 0.8568825510001981, "grad_norm": 2.2535281048472844, "learning_rate": 1.0547342553341144e-06, "loss": 0.1794, "step": 10816 }, { "epoch": 0.8569617746088334, "grad_norm": 1.3078616228040925, "learning_rate": 1.0535874957535275e-06, "loss": 0.1385, "step": 10817 }, { "epoch": 0.8570409982174688, "grad_norm": 1.3735011857183317, "learning_rate": 1.0524413252511567e-06, "loss": 0.1045, "step": 10818 }, { "epoch": 0.8571202218261041, "grad_norm": 1.5673797396505862, "learning_rate": 1.0512957439024697e-06, "loss": 0.141, "step": 10819 }, { "epoch": 0.8571994454347396, "grad_norm": 1.9024765870579567, "learning_rate": 1.0501507517829012e-06, "loss": 0.1314, "step": 10820 }, { "epoch": 0.8572786690433749, "grad_norm": 1.5778784050280112, "learning_rate": 1.0490063489678427e-06, "loss": 0.1269, "step": 10821 }, { "epoch": 0.8573578926520103, "grad_norm": 1.4779781148025237, "learning_rate": 1.0478625355326445e-06, "loss": 0.1351, "step": 10822 }, { "epoch": 0.8574371162606457, "grad_norm": 1.320203920000287, "learning_rate": 1.0467193115526254e-06, "loss": 0.1101, "step": 10823 }, { "epoch": 0.857516339869281, "grad_norm": 1.5433091785593682, "learning_rate": 1.0455766771030585e-06, "loss": 0.16, "step": 10824 }, { "epoch": 0.8575955634779164, "grad_norm": 1.8562092527200387, "learning_rate": 1.0444346322591804e-06, "loss": 0.2233, "step": 10825 }, { "epoch": 0.8576747870865518, "grad_norm": 1.6462935814332227, "learning_rate": 1.0432931770961907e-06, "loss": 0.1618, "step": 10826 }, { "epoch": 0.8577540106951872, "grad_norm": 1.5957889532741771, "learning_rate": 1.0421523116892496e-06, "loss": 0.1251, "step": 10827 }, { "epoch": 0.8578332343038225, "grad_norm": 1.304027012229241, "learning_rate": 1.0410120361134767e-06, "loss": 0.126, "step": 10828 }, { "epoch": 0.857912457912458, "grad_norm": 1.9042148690092953, "learning_rate": 1.0398723504439512e-06, "loss": 0.1829, "step": 10829 }, { "epoch": 0.8579916815210933, "grad_norm": 2.342898273114089, "learning_rate": 1.0387332547557194e-06, "loss": 0.1685, "step": 10830 }, { "epoch": 0.8580709051297286, "grad_norm": 1.7327325448050124, "learning_rate": 1.0375947491237836e-06, "loss": 0.1534, "step": 10831 }, { "epoch": 0.858150128738364, "grad_norm": 1.5784442859851633, "learning_rate": 1.0364568336231085e-06, "loss": 0.1815, "step": 10832 }, { "epoch": 0.8582293523469994, "grad_norm": 1.195464172406603, "learning_rate": 1.0353195083286226e-06, "loss": 0.0939, "step": 10833 }, { "epoch": 0.8583085759556348, "grad_norm": 1.5450215768030042, "learning_rate": 1.034182773315211e-06, "loss": 0.147, "step": 10834 }, { "epoch": 0.8583877995642701, "grad_norm": 1.8235731676005325, "learning_rate": 1.0330466286577224e-06, "loss": 0.1759, "step": 10835 }, { "epoch": 0.8584670231729056, "grad_norm": 1.8986666570621906, "learning_rate": 1.031911074430968e-06, "loss": 0.1597, "step": 10836 }, { "epoch": 0.8585462467815409, "grad_norm": 1.4176442126967628, "learning_rate": 1.030776110709718e-06, "loss": 0.1732, "step": 10837 }, { "epoch": 0.8586254703901762, "grad_norm": 1.7690483511991986, "learning_rate": 1.0296417375687017e-06, "loss": 0.1517, "step": 10838 }, { "epoch": 0.8587046939988117, "grad_norm": 1.3487759139396924, "learning_rate": 1.0285079550826172e-06, "loss": 0.1507, "step": 10839 }, { "epoch": 0.858783917607447, "grad_norm": 1.398186009240261, "learning_rate": 1.0273747633261144e-06, "loss": 0.1148, "step": 10840 }, { "epoch": 0.8588631412160824, "grad_norm": 1.4749994500332302, "learning_rate": 1.0262421623738105e-06, "loss": 0.1728, "step": 10841 }, { "epoch": 0.8589423648247178, "grad_norm": 1.5673828105074443, "learning_rate": 1.0251101523002805e-06, "loss": 0.1124, "step": 10842 }, { "epoch": 0.8590215884333532, "grad_norm": 1.2554095222069959, "learning_rate": 1.0239787331800632e-06, "loss": 0.0925, "step": 10843 }, { "epoch": 0.8591008120419885, "grad_norm": 1.2759442326899917, "learning_rate": 1.022847905087656e-06, "loss": 0.1163, "step": 10844 }, { "epoch": 0.8591800356506238, "grad_norm": 2.366365731786112, "learning_rate": 1.0217176680975183e-06, "loss": 0.1802, "step": 10845 }, { "epoch": 0.8592592592592593, "grad_norm": 1.4738024555333302, "learning_rate": 1.0205880222840726e-06, "loss": 0.1724, "step": 10846 }, { "epoch": 0.8593384828678946, "grad_norm": 1.4613123930405818, "learning_rate": 1.0194589677216992e-06, "loss": 0.1366, "step": 10847 }, { "epoch": 0.85941770647653, "grad_norm": 1.7077496912084549, "learning_rate": 1.0183305044847402e-06, "loss": 0.1232, "step": 10848 }, { "epoch": 0.8594969300851654, "grad_norm": 1.4517121254553849, "learning_rate": 1.0172026326475016e-06, "loss": 0.152, "step": 10849 }, { "epoch": 0.8595761536938008, "grad_norm": 1.4148203809648374, "learning_rate": 1.0160753522842482e-06, "loss": 0.1736, "step": 10850 }, { "epoch": 0.8596553773024361, "grad_norm": 2.086715696888453, "learning_rate": 1.0149486634692019e-06, "loss": 0.255, "step": 10851 }, { "epoch": 0.8597346009110715, "grad_norm": 1.9728700402459152, "learning_rate": 1.0138225662765555e-06, "loss": 0.1897, "step": 10852 }, { "epoch": 0.8598138245197069, "grad_norm": 1.7380816456086252, "learning_rate": 1.0126970607804532e-06, "loss": 0.1037, "step": 10853 }, { "epoch": 0.8598930481283422, "grad_norm": 1.0552491580208538, "learning_rate": 1.0115721470550045e-06, "loss": 0.1065, "step": 10854 }, { "epoch": 0.8599722717369777, "grad_norm": 1.7852307757394277, "learning_rate": 1.0104478251742822e-06, "loss": 0.2004, "step": 10855 }, { "epoch": 0.860051495345613, "grad_norm": 1.643207040178242, "learning_rate": 1.009324095212315e-06, "loss": 0.1728, "step": 10856 }, { "epoch": 0.8601307189542484, "grad_norm": 1.648967191646453, "learning_rate": 1.0082009572430963e-06, "loss": 0.1906, "step": 10857 }, { "epoch": 0.8602099425628837, "grad_norm": 1.654034385543417, "learning_rate": 1.0070784113405763e-06, "loss": 0.1747, "step": 10858 }, { "epoch": 0.8602891661715191, "grad_norm": 1.5233594064053226, "learning_rate": 1.005956457578675e-06, "loss": 0.1309, "step": 10859 }, { "epoch": 0.8603683897801545, "grad_norm": 1.3149657327820967, "learning_rate": 1.0048350960312637e-06, "loss": 0.1086, "step": 10860 }, { "epoch": 0.8604476133887898, "grad_norm": 1.2705457344890998, "learning_rate": 1.003714326772176e-06, "loss": 0.1007, "step": 10861 }, { "epoch": 0.8605268369974253, "grad_norm": 1.6777246883902823, "learning_rate": 1.0025941498752167e-06, "loss": 0.1468, "step": 10862 }, { "epoch": 0.8606060606060606, "grad_norm": 1.6829933122991079, "learning_rate": 1.001474565414139e-06, "loss": 0.2187, "step": 10863 }, { "epoch": 0.860685284214696, "grad_norm": 1.4023549832523587, "learning_rate": 1.0003555734626603e-06, "loss": 0.1048, "step": 10864 }, { "epoch": 0.8607645078233314, "grad_norm": 1.447306692629291, "learning_rate": 9.992371740944663e-07, "loss": 0.1227, "step": 10865 }, { "epoch": 0.8608437314319667, "grad_norm": 1.1714256896915287, "learning_rate": 9.981193673831946e-07, "loss": 0.1149, "step": 10866 }, { "epoch": 0.8609229550406021, "grad_norm": 2.1047922878558203, "learning_rate": 9.970021534024476e-07, "loss": 0.1971, "step": 10867 }, { "epoch": 0.8610021786492374, "grad_norm": 1.4249657713444897, "learning_rate": 9.958855322257922e-07, "loss": 0.1609, "step": 10868 }, { "epoch": 0.8610814022578729, "grad_norm": 1.599016886892758, "learning_rate": 9.94769503926748e-07, "loss": 0.1421, "step": 10869 }, { "epoch": 0.8611606258665082, "grad_norm": 1.237440408740556, "learning_rate": 9.936540685787998e-07, "loss": 0.1082, "step": 10870 }, { "epoch": 0.8612398494751435, "grad_norm": 1.6487295026807114, "learning_rate": 9.925392262553968e-07, "loss": 0.1617, "step": 10871 }, { "epoch": 0.861319073083779, "grad_norm": 1.741309506424304, "learning_rate": 9.914249770299445e-07, "loss": 0.1306, "step": 10872 }, { "epoch": 0.8613982966924143, "grad_norm": 1.279019951543846, "learning_rate": 9.903113209758098e-07, "loss": 0.1404, "step": 10873 }, { "epoch": 0.8614775203010497, "grad_norm": 1.3963363639152515, "learning_rate": 9.89198258166324e-07, "loss": 0.1525, "step": 10874 }, { "epoch": 0.8615567439096851, "grad_norm": 1.8041897769278583, "learning_rate": 9.880857886747753e-07, "loss": 0.1598, "step": 10875 }, { "epoch": 0.8616359675183205, "grad_norm": 1.7045084859332988, "learning_rate": 9.869739125744138e-07, "loss": 0.1602, "step": 10876 }, { "epoch": 0.8617151911269558, "grad_norm": 2.042644580505003, "learning_rate": 9.858626299384532e-07, "loss": 0.1366, "step": 10877 }, { "epoch": 0.8617944147355912, "grad_norm": 1.5019622331554636, "learning_rate": 9.847519408400663e-07, "loss": 0.1378, "step": 10878 }, { "epoch": 0.8618736383442266, "grad_norm": 1.3394970830297506, "learning_rate": 9.836418453523833e-07, "loss": 0.1108, "step": 10879 }, { "epoch": 0.8619528619528619, "grad_norm": 1.1974992808467884, "learning_rate": 9.825323435485024e-07, "loss": 0.1237, "step": 10880 }, { "epoch": 0.8620320855614974, "grad_norm": 1.366608349419337, "learning_rate": 9.814234355014774e-07, "loss": 0.1505, "step": 10881 }, { "epoch": 0.8621113091701327, "grad_norm": 1.8277586847796512, "learning_rate": 9.803151212843253e-07, "loss": 0.1458, "step": 10882 }, { "epoch": 0.8621905327787681, "grad_norm": 1.3973806595432972, "learning_rate": 9.792074009700192e-07, "loss": 0.1222, "step": 10883 }, { "epoch": 0.8622697563874034, "grad_norm": 1.330393299093837, "learning_rate": 9.781002746315039e-07, "loss": 0.1544, "step": 10884 }, { "epoch": 0.8623489799960388, "grad_norm": 1.3206180880977487, "learning_rate": 9.769937423416741e-07, "loss": 0.1195, "step": 10885 }, { "epoch": 0.8624282036046742, "grad_norm": 1.7941559545091346, "learning_rate": 9.758878041733877e-07, "loss": 0.1668, "step": 10886 }, { "epoch": 0.8625074272133095, "grad_norm": 1.219754197377734, "learning_rate": 9.747824601994715e-07, "loss": 0.1284, "step": 10887 }, { "epoch": 0.862586650821945, "grad_norm": 1.6937680598776363, "learning_rate": 9.73677710492703e-07, "loss": 0.1795, "step": 10888 }, { "epoch": 0.8626658744305803, "grad_norm": 1.1020138151894168, "learning_rate": 9.725735551258241e-07, "loss": 0.0877, "step": 10889 }, { "epoch": 0.8627450980392157, "grad_norm": 1.7430455854755134, "learning_rate": 9.7146999417154e-07, "loss": 0.1866, "step": 10890 }, { "epoch": 0.8628243216478511, "grad_norm": 1.6091114606406487, "learning_rate": 9.703670277025158e-07, "loss": 0.1385, "step": 10891 }, { "epoch": 0.8629035452564864, "grad_norm": 1.6923879144965714, "learning_rate": 9.69264655791372e-07, "loss": 0.1383, "step": 10892 }, { "epoch": 0.8629827688651218, "grad_norm": 1.3407640947865838, "learning_rate": 9.681628785107e-07, "loss": 0.1172, "step": 10893 }, { "epoch": 0.8630619924737571, "grad_norm": 2.0665182299967375, "learning_rate": 9.670616959330437e-07, "loss": 0.1897, "step": 10894 }, { "epoch": 0.8631412160823926, "grad_norm": 1.6023769922756916, "learning_rate": 9.659611081309095e-07, "loss": 0.1356, "step": 10895 }, { "epoch": 0.8632204396910279, "grad_norm": 1.533479378923773, "learning_rate": 9.648611151767683e-07, "loss": 0.1318, "step": 10896 }, { "epoch": 0.8632996632996633, "grad_norm": 1.684501421004511, "learning_rate": 9.637617171430492e-07, "loss": 0.1501, "step": 10897 }, { "epoch": 0.8633788869082987, "grad_norm": 1.3926314796341706, "learning_rate": 9.626629141021414e-07, "loss": 0.14, "step": 10898 }, { "epoch": 0.863458110516934, "grad_norm": 1.9107749568752463, "learning_rate": 9.615647061263933e-07, "loss": 0.1826, "step": 10899 }, { "epoch": 0.8635373341255694, "grad_norm": 1.5473046932735017, "learning_rate": 9.604670932881211e-07, "loss": 0.1558, "step": 10900 }, { "epoch": 0.8636165577342048, "grad_norm": 1.5360773646610772, "learning_rate": 9.593700756595958e-07, "loss": 0.1263, "step": 10901 }, { "epoch": 0.8636957813428402, "grad_norm": 1.2362728298280266, "learning_rate": 9.582736533130488e-07, "loss": 0.1155, "step": 10902 }, { "epoch": 0.8637750049514755, "grad_norm": 1.630227533659458, "learning_rate": 9.571778263206767e-07, "loss": 0.1688, "step": 10903 }, { "epoch": 0.863854228560111, "grad_norm": 1.2581368232886827, "learning_rate": 9.560825947546337e-07, "loss": 0.0715, "step": 10904 }, { "epoch": 0.8639334521687463, "grad_norm": 1.4397242223122362, "learning_rate": 9.549879586870336e-07, "loss": 0.1084, "step": 10905 }, { "epoch": 0.8640126757773816, "grad_norm": 1.935895679477906, "learning_rate": 9.538939181899565e-07, "loss": 0.1921, "step": 10906 }, { "epoch": 0.864091899386017, "grad_norm": 1.8147061921474954, "learning_rate": 9.528004733354379e-07, "loss": 0.224, "step": 10907 }, { "epoch": 0.8641711229946524, "grad_norm": 1.637831248536567, "learning_rate": 9.517076241954737e-07, "loss": 0.1346, "step": 10908 }, { "epoch": 0.8642503466032878, "grad_norm": 1.3868415411157224, "learning_rate": 9.506153708420263e-07, "loss": 0.1589, "step": 10909 }, { "epoch": 0.8643295702119231, "grad_norm": 1.8401082024487943, "learning_rate": 9.495237133470148e-07, "loss": 0.1527, "step": 10910 }, { "epoch": 0.8644087938205586, "grad_norm": 1.7338112602095568, "learning_rate": 9.484326517823173e-07, "loss": 0.1157, "step": 10911 }, { "epoch": 0.8644880174291939, "grad_norm": 1.3311708290987254, "learning_rate": 9.473421862197751e-07, "loss": 0.0921, "step": 10912 }, { "epoch": 0.8645672410378292, "grad_norm": 1.38500024275264, "learning_rate": 9.462523167311943e-07, "loss": 0.1236, "step": 10913 }, { "epoch": 0.8646464646464647, "grad_norm": 1.3587894687926279, "learning_rate": 9.45163043388333e-07, "loss": 0.1406, "step": 10914 }, { "epoch": 0.8647256882551, "grad_norm": 1.4853222950267773, "learning_rate": 9.440743662629149e-07, "loss": 0.119, "step": 10915 }, { "epoch": 0.8648049118637354, "grad_norm": 1.3161393839214954, "learning_rate": 9.429862854266281e-07, "loss": 0.1298, "step": 10916 }, { "epoch": 0.8648841354723708, "grad_norm": 1.30155870209671, "learning_rate": 9.418988009511143e-07, "loss": 0.1877, "step": 10917 }, { "epoch": 0.8649633590810062, "grad_norm": 1.3772659044981126, "learning_rate": 9.408119129079774e-07, "loss": 0.1074, "step": 10918 }, { "epoch": 0.8650425826896415, "grad_norm": 1.8152303268814725, "learning_rate": 9.397256213687877e-07, "loss": 0.1727, "step": 10919 }, { "epoch": 0.8651218062982768, "grad_norm": 1.630963268026159, "learning_rate": 9.386399264050705e-07, "loss": 0.1516, "step": 10920 }, { "epoch": 0.8652010299069123, "grad_norm": 1.7559384345626894, "learning_rate": 9.375548280883129e-07, "loss": 0.1752, "step": 10921 }, { "epoch": 0.8652802535155476, "grad_norm": 1.0052979564283187, "learning_rate": 9.364703264899655e-07, "loss": 0.0694, "step": 10922 }, { "epoch": 0.865359477124183, "grad_norm": 1.4307741989888934, "learning_rate": 9.353864216814356e-07, "loss": 0.1152, "step": 10923 }, { "epoch": 0.8654387007328184, "grad_norm": 1.5714513861751531, "learning_rate": 9.34303113734093e-07, "loss": 0.1613, "step": 10924 }, { "epoch": 0.8655179243414538, "grad_norm": 1.6158667691647737, "learning_rate": 9.332204027192693e-07, "loss": 0.162, "step": 10925 }, { "epoch": 0.8655971479500891, "grad_norm": 1.9475152060317027, "learning_rate": 9.321382887082564e-07, "loss": 0.1849, "step": 10926 }, { "epoch": 0.8656763715587245, "grad_norm": 1.4501968352367012, "learning_rate": 9.310567717723063e-07, "loss": 0.1395, "step": 10927 }, { "epoch": 0.8657555951673599, "grad_norm": 2.023412276534783, "learning_rate": 9.299758519826274e-07, "loss": 0.1785, "step": 10928 }, { "epoch": 0.8658348187759952, "grad_norm": 1.8156694064351233, "learning_rate": 9.288955294103996e-07, "loss": 0.1752, "step": 10929 }, { "epoch": 0.8659140423846307, "grad_norm": 1.419405780459077, "learning_rate": 9.278158041267526e-07, "loss": 0.1097, "step": 10930 }, { "epoch": 0.865993265993266, "grad_norm": 1.3306470307918916, "learning_rate": 9.267366762027818e-07, "loss": 0.1122, "step": 10931 }, { "epoch": 0.8660724896019014, "grad_norm": 1.4799954587417474, "learning_rate": 9.256581457095437e-07, "loss": 0.09, "step": 10932 }, { "epoch": 0.8661517132105367, "grad_norm": 1.618289662372727, "learning_rate": 9.245802127180547e-07, "loss": 0.1318, "step": 10933 }, { "epoch": 0.8662309368191721, "grad_norm": 1.8771465956869073, "learning_rate": 9.235028772992883e-07, "loss": 0.2053, "step": 10934 }, { "epoch": 0.8663101604278075, "grad_norm": 1.4435619988728658, "learning_rate": 9.224261395241862e-07, "loss": 0.1198, "step": 10935 }, { "epoch": 0.8663893840364428, "grad_norm": 1.4124381688410097, "learning_rate": 9.213499994636443e-07, "loss": 0.1343, "step": 10936 }, { "epoch": 0.8664686076450783, "grad_norm": 1.4718364248482843, "learning_rate": 9.202744571885191e-07, "loss": 0.1486, "step": 10937 }, { "epoch": 0.8665478312537136, "grad_norm": 1.3611804032830201, "learning_rate": 9.19199512769634e-07, "loss": 0.1545, "step": 10938 }, { "epoch": 0.866627054862349, "grad_norm": 1.5509965022058763, "learning_rate": 9.181251662777668e-07, "loss": 0.1542, "step": 10939 }, { "epoch": 0.8667062784709844, "grad_norm": 1.4241131746379252, "learning_rate": 9.170514177836565e-07, "loss": 0.1179, "step": 10940 }, { "epoch": 0.8667855020796197, "grad_norm": 1.4927536056643087, "learning_rate": 9.159782673580075e-07, "loss": 0.0852, "step": 10941 }, { "epoch": 0.8668647256882551, "grad_norm": 1.7781950234357393, "learning_rate": 9.149057150714802e-07, "loss": 0.1503, "step": 10942 }, { "epoch": 0.8669439492968904, "grad_norm": 1.3215882219520056, "learning_rate": 9.138337609946979e-07, "loss": 0.1348, "step": 10943 }, { "epoch": 0.8670231729055259, "grad_norm": 1.615757534741732, "learning_rate": 9.127624051982398e-07, "loss": 0.1485, "step": 10944 }, { "epoch": 0.8671023965141612, "grad_norm": 1.411099802195532, "learning_rate": 9.116916477526539e-07, "loss": 0.1247, "step": 10945 }, { "epoch": 0.8671816201227966, "grad_norm": 1.609359406492289, "learning_rate": 9.106214887284437e-07, "loss": 0.1871, "step": 10946 }, { "epoch": 0.867260843731432, "grad_norm": 1.5128977160429287, "learning_rate": 9.095519281960729e-07, "loss": 0.1332, "step": 10947 }, { "epoch": 0.8673400673400673, "grad_norm": 1.4739064857942477, "learning_rate": 9.084829662259665e-07, "loss": 0.1428, "step": 10948 }, { "epoch": 0.8674192909487027, "grad_norm": 2.0304058916317365, "learning_rate": 9.0741460288851e-07, "loss": 0.2366, "step": 10949 }, { "epoch": 0.8674985145573381, "grad_norm": 1.4281256566306535, "learning_rate": 9.06346838254053e-07, "loss": 0.1284, "step": 10950 }, { "epoch": 0.8675777381659735, "grad_norm": 1.4461723891510856, "learning_rate": 9.052796723929002e-07, "loss": 0.1362, "step": 10951 }, { "epoch": 0.8676569617746088, "grad_norm": 1.71993145427732, "learning_rate": 9.042131053753211e-07, "loss": 0.1475, "step": 10952 }, { "epoch": 0.8677361853832442, "grad_norm": 1.8596572122954313, "learning_rate": 9.031471372715405e-07, "loss": 0.1775, "step": 10953 }, { "epoch": 0.8678154089918796, "grad_norm": 1.6769984517695262, "learning_rate": 9.020817681517513e-07, "loss": 0.1508, "step": 10954 }, { "epoch": 0.8678946326005149, "grad_norm": 1.4244549219414049, "learning_rate": 9.010169980861005e-07, "loss": 0.1495, "step": 10955 }, { "epoch": 0.8679738562091504, "grad_norm": 1.505802156717825, "learning_rate": 8.999528271446989e-07, "loss": 0.1039, "step": 10956 }, { "epoch": 0.8680530798177857, "grad_norm": 1.4340402783786517, "learning_rate": 8.988892553976169e-07, "loss": 0.099, "step": 10957 }, { "epoch": 0.8681323034264211, "grad_norm": 2.115247406856523, "learning_rate": 8.978262829148876e-07, "loss": 0.1915, "step": 10958 }, { "epoch": 0.8682115270350564, "grad_norm": 1.8304690097727192, "learning_rate": 8.96763909766497e-07, "loss": 0.1898, "step": 10959 }, { "epoch": 0.8682907506436918, "grad_norm": 1.5608265902746414, "learning_rate": 8.957021360224039e-07, "loss": 0.1723, "step": 10960 }, { "epoch": 0.8683699742523272, "grad_norm": 1.8236701291473523, "learning_rate": 8.946409617525175e-07, "loss": 0.1876, "step": 10961 }, { "epoch": 0.8684491978609625, "grad_norm": 1.8918107502369663, "learning_rate": 8.935803870267101e-07, "loss": 0.1825, "step": 10962 }, { "epoch": 0.868528421469598, "grad_norm": 1.3430174678914524, "learning_rate": 8.925204119148189e-07, "loss": 0.1137, "step": 10963 }, { "epoch": 0.8686076450782333, "grad_norm": 1.5633821790368312, "learning_rate": 8.914610364866361e-07, "loss": 0.2083, "step": 10964 }, { "epoch": 0.8686868686868687, "grad_norm": 1.6862680967035257, "learning_rate": 8.904022608119145e-07, "loss": 0.1839, "step": 10965 }, { "epoch": 0.868766092295504, "grad_norm": 1.3817487535325526, "learning_rate": 8.89344084960374e-07, "loss": 0.161, "step": 10966 }, { "epoch": 0.8688453159041394, "grad_norm": 1.3764737775294633, "learning_rate": 8.882865090016868e-07, "loss": 0.1423, "step": 10967 }, { "epoch": 0.8689245395127748, "grad_norm": 2.0387122250176666, "learning_rate": 8.872295330054915e-07, "loss": 0.2511, "step": 10968 }, { "epoch": 0.8690037631214101, "grad_norm": 1.7417491095967168, "learning_rate": 8.861731570413801e-07, "loss": 0.1435, "step": 10969 }, { "epoch": 0.8690829867300456, "grad_norm": 1.8245586735407713, "learning_rate": 8.85117381178916e-07, "loss": 0.1719, "step": 10970 }, { "epoch": 0.8691622103386809, "grad_norm": 1.4348621479945678, "learning_rate": 8.840622054876147e-07, "loss": 0.1018, "step": 10971 }, { "epoch": 0.8692414339473163, "grad_norm": 1.4071228506766478, "learning_rate": 8.830076300369517e-07, "loss": 0.1069, "step": 10972 }, { "epoch": 0.8693206575559517, "grad_norm": 1.4981608182394808, "learning_rate": 8.819536548963703e-07, "loss": 0.1099, "step": 10973 }, { "epoch": 0.869399881164587, "grad_norm": 1.5620643659361901, "learning_rate": 8.809002801352673e-07, "loss": 0.127, "step": 10974 }, { "epoch": 0.8694791047732224, "grad_norm": 1.197118461905713, "learning_rate": 8.798475058230005e-07, "loss": 0.0705, "step": 10975 }, { "epoch": 0.8695583283818578, "grad_norm": 2.010391264621694, "learning_rate": 8.787953320288945e-07, "loss": 0.1983, "step": 10976 }, { "epoch": 0.8696375519904932, "grad_norm": 1.3910331028974718, "learning_rate": 8.777437588222271e-07, "loss": 0.1286, "step": 10977 }, { "epoch": 0.8697167755991285, "grad_norm": 1.985523289622962, "learning_rate": 8.766927862722374e-07, "loss": 0.1563, "step": 10978 }, { "epoch": 0.869795999207764, "grad_norm": 1.6063611268815923, "learning_rate": 8.756424144481313e-07, "loss": 0.1513, "step": 10979 }, { "epoch": 0.8698752228163993, "grad_norm": 1.5963887677771678, "learning_rate": 8.745926434190688e-07, "loss": 0.1044, "step": 10980 }, { "epoch": 0.8699544464250346, "grad_norm": 1.9144467977065878, "learning_rate": 8.735434732541704e-07, "loss": 0.1634, "step": 10981 }, { "epoch": 0.87003367003367, "grad_norm": 1.6848805101341802, "learning_rate": 8.724949040225217e-07, "loss": 0.1141, "step": 10982 }, { "epoch": 0.8701128936423054, "grad_norm": 1.3423442401763854, "learning_rate": 8.714469357931654e-07, "loss": 0.0825, "step": 10983 }, { "epoch": 0.8701921172509408, "grad_norm": 1.5686682865637216, "learning_rate": 8.703995686351041e-07, "loss": 0.1458, "step": 10984 }, { "epoch": 0.8702713408595761, "grad_norm": 1.437883277949371, "learning_rate": 8.693528026173015e-07, "loss": 0.1175, "step": 10985 }, { "epoch": 0.8703505644682116, "grad_norm": 1.6538878190964008, "learning_rate": 8.683066378086846e-07, "loss": 0.1655, "step": 10986 }, { "epoch": 0.8704297880768469, "grad_norm": 1.8758151922182287, "learning_rate": 8.672610742781363e-07, "loss": 0.1794, "step": 10987 }, { "epoch": 0.8705090116854822, "grad_norm": 1.5345979598187736, "learning_rate": 8.662161120945e-07, "loss": 0.1065, "step": 10988 }, { "epoch": 0.8705882352941177, "grad_norm": 1.8064827340296348, "learning_rate": 8.651717513265867e-07, "loss": 0.187, "step": 10989 }, { "epoch": 0.870667458902753, "grad_norm": 1.5954980427273124, "learning_rate": 8.641279920431589e-07, "loss": 0.1521, "step": 10990 }, { "epoch": 0.8707466825113884, "grad_norm": 1.306287513997135, "learning_rate": 8.630848343129417e-07, "loss": 0.1018, "step": 10991 }, { "epoch": 0.8708259061200238, "grad_norm": 1.7290935975526414, "learning_rate": 8.620422782046268e-07, "loss": 0.1897, "step": 10992 }, { "epoch": 0.8709051297286592, "grad_norm": 1.943586483832113, "learning_rate": 8.61000323786858e-07, "loss": 0.1669, "step": 10993 }, { "epoch": 0.8709843533372945, "grad_norm": 1.783851774803501, "learning_rate": 8.599589711282419e-07, "loss": 0.1933, "step": 10994 }, { "epoch": 0.8710635769459298, "grad_norm": 1.5047946982353315, "learning_rate": 8.589182202973512e-07, "loss": 0.1061, "step": 10995 }, { "epoch": 0.8711428005545653, "grad_norm": 1.851562489959027, "learning_rate": 8.578780713627111e-07, "loss": 0.1466, "step": 10996 }, { "epoch": 0.8712220241632006, "grad_norm": 1.5071103075973546, "learning_rate": 8.568385243928112e-07, "loss": 0.1472, "step": 10997 }, { "epoch": 0.871301247771836, "grad_norm": 1.8438473502919566, "learning_rate": 8.55799579456098e-07, "loss": 0.1521, "step": 10998 }, { "epoch": 0.8713804713804714, "grad_norm": 1.5013749162864427, "learning_rate": 8.547612366209856e-07, "loss": 0.117, "step": 10999 }, { "epoch": 0.8714596949891068, "grad_norm": 1.6431376866387268, "learning_rate": 8.537234959558416e-07, "loss": 0.1588, "step": 11000 }, { "epoch": 0.8715389185977421, "grad_norm": 1.4852220193625887, "learning_rate": 8.526863575289945e-07, "loss": 0.0998, "step": 11001 }, { "epoch": 0.8716181422063775, "grad_norm": 1.5706793268387085, "learning_rate": 8.516498214087387e-07, "loss": 0.1736, "step": 11002 }, { "epoch": 0.8716973658150129, "grad_norm": 1.598329115451917, "learning_rate": 8.50613887663323e-07, "loss": 0.1409, "step": 11003 }, { "epoch": 0.8717765894236482, "grad_norm": 1.7190647850687741, "learning_rate": 8.495785563609571e-07, "loss": 0.1634, "step": 11004 }, { "epoch": 0.8718558130322837, "grad_norm": 1.1667699969143297, "learning_rate": 8.485438275698154e-07, "loss": 0.0918, "step": 11005 }, { "epoch": 0.871935036640919, "grad_norm": 1.7757176440618103, "learning_rate": 8.475097013580292e-07, "loss": 0.2365, "step": 11006 }, { "epoch": 0.8720142602495544, "grad_norm": 1.976556341946614, "learning_rate": 8.46476177793688e-07, "loss": 0.2056, "step": 11007 }, { "epoch": 0.8720934838581897, "grad_norm": 1.5208400080275073, "learning_rate": 8.454432569448489e-07, "loss": 0.1599, "step": 11008 }, { "epoch": 0.8721727074668251, "grad_norm": 1.4873676647060463, "learning_rate": 8.444109388795218e-07, "loss": 0.1527, "step": 11009 }, { "epoch": 0.8722519310754605, "grad_norm": 1.9450527855229645, "learning_rate": 8.43379223665679e-07, "loss": 0.1575, "step": 11010 }, { "epoch": 0.8723311546840958, "grad_norm": 1.7083899811408323, "learning_rate": 8.423481113712573e-07, "loss": 0.1901, "step": 11011 }, { "epoch": 0.8724103782927313, "grad_norm": 1.7986818100625264, "learning_rate": 8.413176020641489e-07, "loss": 0.1876, "step": 11012 }, { "epoch": 0.8724896019013666, "grad_norm": 1.9088068931451552, "learning_rate": 8.402876958122075e-07, "loss": 0.1677, "step": 11013 }, { "epoch": 0.872568825510002, "grad_norm": 1.2644182697903634, "learning_rate": 8.392583926832454e-07, "loss": 0.1064, "step": 11014 }, { "epoch": 0.8726480491186374, "grad_norm": 1.3243914060493218, "learning_rate": 8.382296927450417e-07, "loss": 0.1341, "step": 11015 }, { "epoch": 0.8727272727272727, "grad_norm": 1.405091237630225, "learning_rate": 8.37201596065329e-07, "loss": 0.1585, "step": 11016 }, { "epoch": 0.8728064963359081, "grad_norm": 1.6326457541248793, "learning_rate": 8.361741027118009e-07, "loss": 0.1368, "step": 11017 }, { "epoch": 0.8728857199445434, "grad_norm": 1.4428408014739877, "learning_rate": 8.351472127521166e-07, "loss": 0.1571, "step": 11018 }, { "epoch": 0.8729649435531789, "grad_norm": 1.2730069115241511, "learning_rate": 8.341209262538896e-07, "loss": 0.129, "step": 11019 }, { "epoch": 0.8730441671618142, "grad_norm": 1.421899450309346, "learning_rate": 8.330952432846939e-07, "loss": 0.1286, "step": 11020 }, { "epoch": 0.8731233907704496, "grad_norm": 2.3210584919098376, "learning_rate": 8.320701639120709e-07, "loss": 0.1806, "step": 11021 }, { "epoch": 0.873202614379085, "grad_norm": 1.4065445920443453, "learning_rate": 8.310456882035145e-07, "loss": 0.1561, "step": 11022 }, { "epoch": 0.8732818379877203, "grad_norm": 1.9764765631166432, "learning_rate": 8.300218162264783e-07, "loss": 0.1898, "step": 11023 }, { "epoch": 0.8733610615963557, "grad_norm": 1.9887064023906842, "learning_rate": 8.289985480483864e-07, "loss": 0.1657, "step": 11024 }, { "epoch": 0.8734402852049911, "grad_norm": 1.9699979560114873, "learning_rate": 8.279758837366103e-07, "loss": 0.1978, "step": 11025 }, { "epoch": 0.8735195088136265, "grad_norm": 1.2423101171407702, "learning_rate": 8.269538233584884e-07, "loss": 0.0916, "step": 11026 }, { "epoch": 0.8735987324222618, "grad_norm": 1.6238746212862867, "learning_rate": 8.259323669813202e-07, "loss": 0.1076, "step": 11027 }, { "epoch": 0.8736779560308973, "grad_norm": 1.232171295620495, "learning_rate": 8.24911514672363e-07, "loss": 0.1082, "step": 11028 }, { "epoch": 0.8737571796395326, "grad_norm": 1.6016744884010354, "learning_rate": 8.23891266498833e-07, "loss": 0.1668, "step": 11029 }, { "epoch": 0.8738364032481679, "grad_norm": 1.502720769662286, "learning_rate": 8.228716225279121e-07, "loss": 0.1299, "step": 11030 }, { "epoch": 0.8739156268568034, "grad_norm": 1.4484411193736395, "learning_rate": 8.218525828267377e-07, "loss": 0.1459, "step": 11031 }, { "epoch": 0.8739948504654387, "grad_norm": 1.748556094345839, "learning_rate": 8.208341474624071e-07, "loss": 0.1867, "step": 11032 }, { "epoch": 0.8740740740740741, "grad_norm": 2.0911540876283157, "learning_rate": 8.198163165019812e-07, "loss": 0.2293, "step": 11033 }, { "epoch": 0.8741532976827094, "grad_norm": 1.3533978724512903, "learning_rate": 8.187990900124787e-07, "loss": 0.1198, "step": 11034 }, { "epoch": 0.8742325212913448, "grad_norm": 1.200006291736161, "learning_rate": 8.177824680608781e-07, "loss": 0.1085, "step": 11035 }, { "epoch": 0.8743117448999802, "grad_norm": 1.617583492298631, "learning_rate": 8.167664507141215e-07, "loss": 0.1895, "step": 11036 }, { "epoch": 0.8743909685086155, "grad_norm": 1.548224389199469, "learning_rate": 8.157510380391065e-07, "loss": 0.1409, "step": 11037 }, { "epoch": 0.874470192117251, "grad_norm": 1.9670762647277398, "learning_rate": 8.14736230102694e-07, "loss": 0.1696, "step": 11038 }, { "epoch": 0.8745494157258863, "grad_norm": 1.4834681256023374, "learning_rate": 8.137220269717028e-07, "loss": 0.1302, "step": 11039 }, { "epoch": 0.8746286393345217, "grad_norm": 1.893756388731371, "learning_rate": 8.127084287129161e-07, "loss": 0.1842, "step": 11040 }, { "epoch": 0.874707862943157, "grad_norm": 2.016710640036495, "learning_rate": 8.116954353930728e-07, "loss": 0.2187, "step": 11041 }, { "epoch": 0.8747870865517924, "grad_norm": 1.4940586794469284, "learning_rate": 8.106830470788729e-07, "loss": 0.1309, "step": 11042 }, { "epoch": 0.8748663101604278, "grad_norm": 1.6039098671434404, "learning_rate": 8.096712638369797e-07, "loss": 0.1453, "step": 11043 }, { "epoch": 0.8749455337690631, "grad_norm": 1.1653177222697704, "learning_rate": 8.086600857340121e-07, "loss": 0.1157, "step": 11044 }, { "epoch": 0.8750247573776986, "grad_norm": 1.3985778556104098, "learning_rate": 8.076495128365502e-07, "loss": 0.1258, "step": 11045 }, { "epoch": 0.8751039809863339, "grad_norm": 1.1897375410971311, "learning_rate": 8.066395452111387e-07, "loss": 0.1324, "step": 11046 }, { "epoch": 0.8751832045949693, "grad_norm": 1.329847737991157, "learning_rate": 8.056301829242785e-07, "loss": 0.1371, "step": 11047 }, { "epoch": 0.8752624282036047, "grad_norm": 1.553353020837809, "learning_rate": 8.046214260424279e-07, "loss": 0.1613, "step": 11048 }, { "epoch": 0.87534165181224, "grad_norm": 1.3751073872323294, "learning_rate": 8.036132746320125e-07, "loss": 0.1331, "step": 11049 }, { "epoch": 0.8754208754208754, "grad_norm": 1.5327896495551423, "learning_rate": 8.026057287594136e-07, "loss": 0.158, "step": 11050 }, { "epoch": 0.8755000990295108, "grad_norm": 1.5121267916789705, "learning_rate": 8.015987884909692e-07, "loss": 0.1677, "step": 11051 }, { "epoch": 0.8755793226381462, "grad_norm": 1.4042188522204424, "learning_rate": 8.005924538929877e-07, "loss": 0.1324, "step": 11052 }, { "epoch": 0.8756585462467815, "grad_norm": 1.328277647115414, "learning_rate": 7.99586725031728e-07, "loss": 0.1106, "step": 11053 }, { "epoch": 0.875737769855417, "grad_norm": 1.8239494505271974, "learning_rate": 7.985816019734127e-07, "loss": 0.1903, "step": 11054 }, { "epoch": 0.8758169934640523, "grad_norm": 1.3782876359957164, "learning_rate": 7.975770847842234e-07, "loss": 0.1229, "step": 11055 }, { "epoch": 0.8758962170726876, "grad_norm": 1.8534888944301684, "learning_rate": 7.965731735303051e-07, "loss": 0.17, "step": 11056 }, { "epoch": 0.875975440681323, "grad_norm": 1.2458916719109903, "learning_rate": 7.955698682777601e-07, "loss": 0.1014, "step": 11057 }, { "epoch": 0.8760546642899584, "grad_norm": 1.1449972517787468, "learning_rate": 7.945671690926471e-07, "loss": 0.0885, "step": 11058 }, { "epoch": 0.8761338878985938, "grad_norm": 1.6426687320259323, "learning_rate": 7.935650760409952e-07, "loss": 0.2175, "step": 11059 }, { "epoch": 0.8762131115072291, "grad_norm": 1.5807434425690674, "learning_rate": 7.925635891887839e-07, "loss": 0.1533, "step": 11060 }, { "epoch": 0.8762923351158646, "grad_norm": 1.5697158031136984, "learning_rate": 7.915627086019561e-07, "loss": 0.1276, "step": 11061 }, { "epoch": 0.8763715587244999, "grad_norm": 1.8997840016244778, "learning_rate": 7.905624343464169e-07, "loss": 0.1517, "step": 11062 }, { "epoch": 0.8764507823331352, "grad_norm": 1.3513186481944488, "learning_rate": 7.895627664880278e-07, "loss": 0.1231, "step": 11063 }, { "epoch": 0.8765300059417707, "grad_norm": 1.1353838374362546, "learning_rate": 7.88563705092612e-07, "loss": 0.0876, "step": 11064 }, { "epoch": 0.876609229550406, "grad_norm": 1.3803019825379057, "learning_rate": 7.875652502259545e-07, "loss": 0.1442, "step": 11065 }, { "epoch": 0.8766884531590414, "grad_norm": 1.123428824245722, "learning_rate": 7.865674019537983e-07, "loss": 0.0914, "step": 11066 }, { "epoch": 0.8767676767676768, "grad_norm": 1.5637562392873046, "learning_rate": 7.855701603418442e-07, "loss": 0.1787, "step": 11067 }, { "epoch": 0.8768469003763122, "grad_norm": 1.3514842481495226, "learning_rate": 7.845735254557608e-07, "loss": 0.1138, "step": 11068 }, { "epoch": 0.8769261239849475, "grad_norm": 1.4883758503748221, "learning_rate": 7.835774973611687e-07, "loss": 0.1294, "step": 11069 }, { "epoch": 0.8770053475935828, "grad_norm": 1.8425176573630073, "learning_rate": 7.825820761236514e-07, "loss": 0.2123, "step": 11070 }, { "epoch": 0.8770845712022183, "grad_norm": 1.5354551841523785, "learning_rate": 7.815872618087506e-07, "loss": 0.138, "step": 11071 }, { "epoch": 0.8771637948108536, "grad_norm": 1.2943596722900061, "learning_rate": 7.805930544819751e-07, "loss": 0.1532, "step": 11072 }, { "epoch": 0.877243018419489, "grad_norm": 1.5506195840527082, "learning_rate": 7.795994542087859e-07, "loss": 0.1807, "step": 11073 }, { "epoch": 0.8773222420281244, "grad_norm": 1.456548003620456, "learning_rate": 7.786064610546051e-07, "loss": 0.1704, "step": 11074 }, { "epoch": 0.8774014656367598, "grad_norm": 1.8466227504958324, "learning_rate": 7.776140750848205e-07, "loss": 0.1915, "step": 11075 }, { "epoch": 0.8774806892453951, "grad_norm": 1.5899723841286948, "learning_rate": 7.766222963647729e-07, "loss": 0.1586, "step": 11076 }, { "epoch": 0.8775599128540305, "grad_norm": 1.5812415549782357, "learning_rate": 7.756311249597659e-07, "loss": 0.1289, "step": 11077 }, { "epoch": 0.8776391364626659, "grad_norm": 1.1045692898606165, "learning_rate": 7.746405609350661e-07, "loss": 0.0926, "step": 11078 }, { "epoch": 0.8777183600713012, "grad_norm": 1.7299878193532292, "learning_rate": 7.736506043558956e-07, "loss": 0.2077, "step": 11079 }, { "epoch": 0.8777975836799367, "grad_norm": 1.675653079315312, "learning_rate": 7.726612552874368e-07, "loss": 0.1581, "step": 11080 }, { "epoch": 0.877876807288572, "grad_norm": 1.3068524458677393, "learning_rate": 7.716725137948366e-07, "loss": 0.1147, "step": 11081 }, { "epoch": 0.8779560308972074, "grad_norm": 1.4682485026277445, "learning_rate": 7.706843799431985e-07, "loss": 0.1436, "step": 11082 }, { "epoch": 0.8780352545058427, "grad_norm": 1.2990961209575327, "learning_rate": 7.696968537975847e-07, "loss": 0.1382, "step": 11083 }, { "epoch": 0.8781144781144781, "grad_norm": 1.7159384905615305, "learning_rate": 7.687099354230177e-07, "loss": 0.1943, "step": 11084 }, { "epoch": 0.8781937017231135, "grad_norm": 1.5114893507133043, "learning_rate": 7.677236248844855e-07, "loss": 0.1439, "step": 11085 }, { "epoch": 0.8782729253317488, "grad_norm": 1.440527015874435, "learning_rate": 7.667379222469295e-07, "loss": 0.1762, "step": 11086 }, { "epoch": 0.8783521489403843, "grad_norm": 1.3369850074725866, "learning_rate": 7.657528275752524e-07, "loss": 0.0848, "step": 11087 }, { "epoch": 0.8784313725490196, "grad_norm": 1.7124503510272913, "learning_rate": 7.647683409343198e-07, "loss": 0.1582, "step": 11088 }, { "epoch": 0.878510596157655, "grad_norm": 1.4861977833770101, "learning_rate": 7.637844623889557e-07, "loss": 0.1229, "step": 11089 }, { "epoch": 0.8785898197662904, "grad_norm": 1.3899642530828875, "learning_rate": 7.628011920039414e-07, "loss": 0.0837, "step": 11090 }, { "epoch": 0.8786690433749257, "grad_norm": 1.4446096629750547, "learning_rate": 7.618185298440239e-07, "loss": 0.1438, "step": 11091 }, { "epoch": 0.8787482669835611, "grad_norm": 1.1484021972326586, "learning_rate": 7.608364759739039e-07, "loss": 0.1127, "step": 11092 }, { "epoch": 0.8788274905921964, "grad_norm": 1.2636044929676284, "learning_rate": 7.598550304582453e-07, "loss": 0.0945, "step": 11093 }, { "epoch": 0.8789067142008319, "grad_norm": 1.6738270481531605, "learning_rate": 7.588741933616728e-07, "loss": 0.1621, "step": 11094 }, { "epoch": 0.8789859378094672, "grad_norm": 1.4038465432917206, "learning_rate": 7.578939647487705e-07, "loss": 0.1474, "step": 11095 }, { "epoch": 0.8790651614181026, "grad_norm": 2.3264592524034136, "learning_rate": 7.569143446840776e-07, "loss": 0.131, "step": 11096 }, { "epoch": 0.879144385026738, "grad_norm": 1.342160553010363, "learning_rate": 7.559353332321029e-07, "loss": 0.0931, "step": 11097 }, { "epoch": 0.8792236086353733, "grad_norm": 1.4823900789567563, "learning_rate": 7.549569304573057e-07, "loss": 0.222, "step": 11098 }, { "epoch": 0.8793028322440087, "grad_norm": 2.0199311663698385, "learning_rate": 7.539791364241111e-07, "loss": 0.1746, "step": 11099 }, { "epoch": 0.8793820558526441, "grad_norm": 1.604095758236551, "learning_rate": 7.530019511969e-07, "loss": 0.1492, "step": 11100 }, { "epoch": 0.8794612794612795, "grad_norm": 1.3453499361938412, "learning_rate": 7.520253748400175e-07, "loss": 0.1199, "step": 11101 }, { "epoch": 0.8795405030699148, "grad_norm": 1.5124738624809815, "learning_rate": 7.510494074177666e-07, "loss": 0.1048, "step": 11102 }, { "epoch": 0.8796197266785503, "grad_norm": 1.6276717268046157, "learning_rate": 7.500740489944092e-07, "loss": 0.1262, "step": 11103 }, { "epoch": 0.8796989502871856, "grad_norm": 1.2660817562754592, "learning_rate": 7.490992996341662e-07, "loss": 0.1038, "step": 11104 }, { "epoch": 0.8797781738958209, "grad_norm": 1.4808194266546642, "learning_rate": 7.481251594012218e-07, "loss": 0.1525, "step": 11105 }, { "epoch": 0.8798573975044564, "grad_norm": 1.1035835785590915, "learning_rate": 7.471516283597191e-07, "loss": 0.0901, "step": 11106 }, { "epoch": 0.8799366211130917, "grad_norm": 1.25726160744421, "learning_rate": 7.461787065737602e-07, "loss": 0.1165, "step": 11107 }, { "epoch": 0.8800158447217271, "grad_norm": 1.3486256848014806, "learning_rate": 7.452063941074073e-07, "loss": 0.1158, "step": 11108 }, { "epoch": 0.8800950683303624, "grad_norm": 1.2517075362925056, "learning_rate": 7.442346910246801e-07, "loss": 0.136, "step": 11109 }, { "epoch": 0.8801742919389978, "grad_norm": 1.2111080557371396, "learning_rate": 7.432635973895652e-07, "loss": 0.0863, "step": 11110 }, { "epoch": 0.8802535155476332, "grad_norm": 1.7483078900025897, "learning_rate": 7.422931132660005e-07, "loss": 0.1899, "step": 11111 }, { "epoch": 0.8803327391562685, "grad_norm": 1.5807034312357622, "learning_rate": 7.413232387178882e-07, "loss": 0.118, "step": 11112 }, { "epoch": 0.880411962764904, "grad_norm": 1.9485706575073423, "learning_rate": 7.403539738090914e-07, "loss": 0.1106, "step": 11113 }, { "epoch": 0.8804911863735393, "grad_norm": 1.6873901742328379, "learning_rate": 7.393853186034316e-07, "loss": 0.1899, "step": 11114 }, { "epoch": 0.8805704099821747, "grad_norm": 1.9597737852281722, "learning_rate": 7.384172731646877e-07, "loss": 0.158, "step": 11115 }, { "epoch": 0.88064963359081, "grad_norm": 1.8619822635570302, "learning_rate": 7.374498375566042e-07, "loss": 0.1961, "step": 11116 }, { "epoch": 0.8807288571994454, "grad_norm": 1.5695146854586293, "learning_rate": 7.364830118428801e-07, "loss": 0.2133, "step": 11117 }, { "epoch": 0.8808080808080808, "grad_norm": 1.478800109186462, "learning_rate": 7.355167960871745e-07, "loss": 0.1624, "step": 11118 }, { "epoch": 0.8808873044167161, "grad_norm": 1.6214009776786245, "learning_rate": 7.345511903531122e-07, "loss": 0.1399, "step": 11119 }, { "epoch": 0.8809665280253516, "grad_norm": 1.528845052740751, "learning_rate": 7.335861947042711e-07, "loss": 0.1222, "step": 11120 }, { "epoch": 0.8810457516339869, "grad_norm": 1.4993479716755802, "learning_rate": 7.326218092041903e-07, "loss": 0.133, "step": 11121 }, { "epoch": 0.8811249752426223, "grad_norm": 1.6820809362683793, "learning_rate": 7.316580339163736e-07, "loss": 0.1335, "step": 11122 }, { "epoch": 0.8812041988512577, "grad_norm": 1.2079859122053844, "learning_rate": 7.306948689042792e-07, "loss": 0.0947, "step": 11123 }, { "epoch": 0.881283422459893, "grad_norm": 1.5348029362662943, "learning_rate": 7.297323142313262e-07, "loss": 0.1198, "step": 11124 }, { "epoch": 0.8813626460685284, "grad_norm": 1.49480804702477, "learning_rate": 7.287703699608928e-07, "loss": 0.1517, "step": 11125 }, { "epoch": 0.8814418696771638, "grad_norm": 1.5221241334792392, "learning_rate": 7.278090361563228e-07, "loss": 0.1443, "step": 11126 }, { "epoch": 0.8815210932857992, "grad_norm": 1.940163117579796, "learning_rate": 7.268483128809122e-07, "loss": 0.205, "step": 11127 }, { "epoch": 0.8816003168944345, "grad_norm": 1.5362766393192315, "learning_rate": 7.258882001979184e-07, "loss": 0.1637, "step": 11128 }, { "epoch": 0.88167954050307, "grad_norm": 1.5644007245495675, "learning_rate": 7.24928698170565e-07, "loss": 0.1374, "step": 11129 }, { "epoch": 0.8817587641117053, "grad_norm": 2.004489483898855, "learning_rate": 7.239698068620272e-07, "loss": 0.2701, "step": 11130 }, { "epoch": 0.8818379877203406, "grad_norm": 1.3244070864876418, "learning_rate": 7.230115263354431e-07, "loss": 0.1344, "step": 11131 }, { "epoch": 0.881917211328976, "grad_norm": 1.679983697823597, "learning_rate": 7.220538566539137e-07, "loss": 0.1734, "step": 11132 }, { "epoch": 0.8819964349376114, "grad_norm": 1.786821865799786, "learning_rate": 7.21096797880495e-07, "loss": 0.1768, "step": 11133 }, { "epoch": 0.8820756585462468, "grad_norm": 1.8551648073826383, "learning_rate": 7.201403500782034e-07, "loss": 0.2055, "step": 11134 }, { "epoch": 0.8821548821548821, "grad_norm": 1.7889618735972586, "learning_rate": 7.191845133100195e-07, "loss": 0.1697, "step": 11135 }, { "epoch": 0.8822341057635176, "grad_norm": 1.397606079260697, "learning_rate": 7.182292876388785e-07, "loss": 0.1418, "step": 11136 }, { "epoch": 0.8823133293721529, "grad_norm": 1.6054860990465039, "learning_rate": 7.17274673127677e-07, "loss": 0.135, "step": 11137 }, { "epoch": 0.8823925529807882, "grad_norm": 1.7195520923106866, "learning_rate": 7.163206698392744e-07, "loss": 0.1376, "step": 11138 }, { "epoch": 0.8824717765894237, "grad_norm": 1.2884564375815863, "learning_rate": 7.153672778364851e-07, "loss": 0.1284, "step": 11139 }, { "epoch": 0.882551000198059, "grad_norm": 1.5600589826964066, "learning_rate": 7.144144971820855e-07, "loss": 0.1781, "step": 11140 }, { "epoch": 0.8826302238066944, "grad_norm": 1.7889125989457015, "learning_rate": 7.134623279388098e-07, "loss": 0.1577, "step": 11141 }, { "epoch": 0.8827094474153298, "grad_norm": 1.7736935452385398, "learning_rate": 7.12510770169359e-07, "loss": 0.1369, "step": 11142 }, { "epoch": 0.8827886710239652, "grad_norm": 1.6159978180694412, "learning_rate": 7.115598239363842e-07, "loss": 0.1781, "step": 11143 }, { "epoch": 0.8828678946326005, "grad_norm": 1.8947987201075354, "learning_rate": 7.106094893025006e-07, "loss": 0.2934, "step": 11144 }, { "epoch": 0.8829471182412358, "grad_norm": 1.6120085550912424, "learning_rate": 7.096597663302862e-07, "loss": 0.1452, "step": 11145 }, { "epoch": 0.8830263418498713, "grad_norm": 1.5986575421728753, "learning_rate": 7.087106550822731e-07, "loss": 0.1601, "step": 11146 }, { "epoch": 0.8831055654585066, "grad_norm": 1.0178386973477769, "learning_rate": 7.077621556209557e-07, "loss": 0.0728, "step": 11147 }, { "epoch": 0.883184789067142, "grad_norm": 1.4218344936942204, "learning_rate": 7.068142680087909e-07, "loss": 0.1493, "step": 11148 }, { "epoch": 0.8832640126757774, "grad_norm": 1.4753371306919423, "learning_rate": 7.058669923081896e-07, "loss": 0.1556, "step": 11149 }, { "epoch": 0.8833432362844128, "grad_norm": 1.5490919797379965, "learning_rate": 7.049203285815253e-07, "loss": 0.1173, "step": 11150 }, { "epoch": 0.8834224598930481, "grad_norm": 1.5942410600236587, "learning_rate": 7.03974276891134e-07, "loss": 0.1265, "step": 11151 }, { "epoch": 0.8835016835016835, "grad_norm": 1.4656320898371784, "learning_rate": 7.030288372993066e-07, "loss": 0.1468, "step": 11152 }, { "epoch": 0.8835809071103189, "grad_norm": 1.4680545887161833, "learning_rate": 7.020840098682968e-07, "loss": 0.1234, "step": 11153 }, { "epoch": 0.8836601307189542, "grad_norm": 1.389056483511112, "learning_rate": 7.011397946603138e-07, "loss": 0.1237, "step": 11154 }, { "epoch": 0.8837393543275897, "grad_norm": 1.7126887232272259, "learning_rate": 7.001961917375344e-07, "loss": 0.198, "step": 11155 }, { "epoch": 0.883818577936225, "grad_norm": 1.1415614785280985, "learning_rate": 6.992532011620878e-07, "loss": 0.1159, "step": 11156 }, { "epoch": 0.8838978015448604, "grad_norm": 1.4461974354079816, "learning_rate": 6.983108229960633e-07, "loss": 0.1034, "step": 11157 }, { "epoch": 0.8839770251534957, "grad_norm": 2.1506525876897937, "learning_rate": 6.973690573015168e-07, "loss": 0.2679, "step": 11158 }, { "epoch": 0.8840562487621311, "grad_norm": 1.6475841358886705, "learning_rate": 6.964279041404553e-07, "loss": 0.162, "step": 11159 }, { "epoch": 0.8841354723707665, "grad_norm": 1.4850362469694653, "learning_rate": 6.954873635748493e-07, "loss": 0.1441, "step": 11160 }, { "epoch": 0.8842146959794018, "grad_norm": 1.606768367852246, "learning_rate": 6.945474356666326e-07, "loss": 0.195, "step": 11161 }, { "epoch": 0.8842939195880373, "grad_norm": 1.8304760132245828, "learning_rate": 6.936081204776913e-07, "loss": 0.1573, "step": 11162 }, { "epoch": 0.8843731431966726, "grad_norm": 1.4679915803362746, "learning_rate": 6.926694180698734e-07, "loss": 0.1745, "step": 11163 }, { "epoch": 0.884452366805308, "grad_norm": 1.717960598890273, "learning_rate": 6.917313285049931e-07, "loss": 0.2052, "step": 11164 }, { "epoch": 0.8845315904139434, "grad_norm": 1.2154139342917198, "learning_rate": 6.907938518448154e-07, "loss": 0.0808, "step": 11165 }, { "epoch": 0.8846108140225787, "grad_norm": 1.7571815308144139, "learning_rate": 6.898569881510686e-07, "loss": 0.1876, "step": 11166 }, { "epoch": 0.8846900376312141, "grad_norm": 1.3074702335973698, "learning_rate": 6.889207374854434e-07, "loss": 0.0875, "step": 11167 }, { "epoch": 0.8847692612398494, "grad_norm": 1.4915280470452914, "learning_rate": 6.879850999095849e-07, "loss": 0.1169, "step": 11168 }, { "epoch": 0.8848484848484849, "grad_norm": 1.5858128281599722, "learning_rate": 6.870500754851017e-07, "loss": 0.1644, "step": 11169 }, { "epoch": 0.8849277084571202, "grad_norm": 1.6199546298355267, "learning_rate": 6.861156642735578e-07, "loss": 0.1831, "step": 11170 }, { "epoch": 0.8850069320657556, "grad_norm": 1.815086789074067, "learning_rate": 6.851818663364839e-07, "loss": 0.1278, "step": 11171 }, { "epoch": 0.885086155674391, "grad_norm": 1.489717338249987, "learning_rate": 6.842486817353633e-07, "loss": 0.1287, "step": 11172 }, { "epoch": 0.8851653792830263, "grad_norm": 1.3484807114867106, "learning_rate": 6.833161105316421e-07, "loss": 0.099, "step": 11173 }, { "epoch": 0.8852446028916617, "grad_norm": 1.380817401369569, "learning_rate": 6.823841527867259e-07, "loss": 0.0877, "step": 11174 }, { "epoch": 0.8853238265002971, "grad_norm": 1.3627900815025338, "learning_rate": 6.814528085619809e-07, "loss": 0.1395, "step": 11175 }, { "epoch": 0.8854030501089325, "grad_norm": 1.7115600357121634, "learning_rate": 6.805220779187293e-07, "loss": 0.2051, "step": 11176 }, { "epoch": 0.8854822737175678, "grad_norm": 1.908196926567162, "learning_rate": 6.795919609182566e-07, "loss": 0.1698, "step": 11177 }, { "epoch": 0.8855614973262033, "grad_norm": 1.326031720268804, "learning_rate": 6.78662457621807e-07, "loss": 0.112, "step": 11178 }, { "epoch": 0.8856407209348386, "grad_norm": 1.7568740812652275, "learning_rate": 6.777335680905817e-07, "loss": 0.1754, "step": 11179 }, { "epoch": 0.8857199445434739, "grad_norm": 1.7513806225288808, "learning_rate": 6.768052923857482e-07, "loss": 0.2034, "step": 11180 }, { "epoch": 0.8857991681521094, "grad_norm": 1.3711975828429002, "learning_rate": 6.758776305684245e-07, "loss": 0.1335, "step": 11181 }, { "epoch": 0.8858783917607447, "grad_norm": 1.4007591843055125, "learning_rate": 6.749505826996927e-07, "loss": 0.117, "step": 11182 }, { "epoch": 0.8859576153693801, "grad_norm": 1.5552915496574455, "learning_rate": 6.740241488405963e-07, "loss": 0.1246, "step": 11183 }, { "epoch": 0.8860368389780154, "grad_norm": 1.7304594683069021, "learning_rate": 6.730983290521365e-07, "loss": 0.1918, "step": 11184 }, { "epoch": 0.8861160625866509, "grad_norm": 1.4540912158424129, "learning_rate": 6.721731233952722e-07, "loss": 0.1399, "step": 11185 }, { "epoch": 0.8861952861952862, "grad_norm": 1.5808172287444398, "learning_rate": 6.712485319309258e-07, "loss": 0.1468, "step": 11186 }, { "epoch": 0.8862745098039215, "grad_norm": 1.7907370149774915, "learning_rate": 6.703245547199777e-07, "loss": 0.1875, "step": 11187 }, { "epoch": 0.886353733412557, "grad_norm": 1.46345190390323, "learning_rate": 6.694011918232635e-07, "loss": 0.1342, "step": 11188 }, { "epoch": 0.8864329570211923, "grad_norm": 2.262595414517965, "learning_rate": 6.684784433015867e-07, "loss": 0.2245, "step": 11189 }, { "epoch": 0.8865121806298277, "grad_norm": 1.932419712110693, "learning_rate": 6.675563092157044e-07, "loss": 0.2003, "step": 11190 }, { "epoch": 0.886591404238463, "grad_norm": 1.6139038501004332, "learning_rate": 6.666347896263326e-07, "loss": 0.1703, "step": 11191 }, { "epoch": 0.8866706278470984, "grad_norm": 1.6083030141776613, "learning_rate": 6.657138845941524e-07, "loss": 0.1518, "step": 11192 }, { "epoch": 0.8867498514557338, "grad_norm": 1.5460532694260236, "learning_rate": 6.64793594179799e-07, "loss": 0.13, "step": 11193 }, { "epoch": 0.8868290750643691, "grad_norm": 1.4099721470608344, "learning_rate": 6.638739184438681e-07, "loss": 0.1645, "step": 11194 }, { "epoch": 0.8869082986730046, "grad_norm": 1.7513948458341688, "learning_rate": 6.629548574469169e-07, "loss": 0.1453, "step": 11195 }, { "epoch": 0.8869875222816399, "grad_norm": 1.561618827569234, "learning_rate": 6.620364112494627e-07, "loss": 0.2136, "step": 11196 }, { "epoch": 0.8870667458902753, "grad_norm": 1.5661793984465628, "learning_rate": 6.611185799119791e-07, "loss": 0.1937, "step": 11197 }, { "epoch": 0.8871459694989107, "grad_norm": 1.4580213914665436, "learning_rate": 6.602013634949001e-07, "loss": 0.1156, "step": 11198 }, { "epoch": 0.887225193107546, "grad_norm": 1.329351429338928, "learning_rate": 6.592847620586217e-07, "loss": 0.1309, "step": 11199 }, { "epoch": 0.8873044167161814, "grad_norm": 1.6875267310544861, "learning_rate": 6.583687756634982e-07, "loss": 0.1955, "step": 11200 }, { "epoch": 0.8873836403248168, "grad_norm": 1.5599368515170422, "learning_rate": 6.574534043698399e-07, "loss": 0.122, "step": 11201 }, { "epoch": 0.8874628639334522, "grad_norm": 1.3859195779507871, "learning_rate": 6.565386482379221e-07, "loss": 0.1327, "step": 11202 }, { "epoch": 0.8875420875420875, "grad_norm": 1.5138946910295334, "learning_rate": 6.556245073279777e-07, "loss": 0.163, "step": 11203 }, { "epoch": 0.887621311150723, "grad_norm": 1.3326850825856658, "learning_rate": 6.547109817001951e-07, "loss": 0.1668, "step": 11204 }, { "epoch": 0.8877005347593583, "grad_norm": 1.6015704736011787, "learning_rate": 6.537980714147285e-07, "loss": 0.1388, "step": 11205 }, { "epoch": 0.8877797583679936, "grad_norm": 1.8286157952647148, "learning_rate": 6.528857765316887e-07, "loss": 0.1454, "step": 11206 }, { "epoch": 0.887858981976629, "grad_norm": 1.522324509793647, "learning_rate": 6.519740971111432e-07, "loss": 0.1599, "step": 11207 }, { "epoch": 0.8879382055852644, "grad_norm": 1.525973904566212, "learning_rate": 6.510630332131262e-07, "loss": 0.1706, "step": 11208 }, { "epoch": 0.8880174291938998, "grad_norm": 1.4522070152451843, "learning_rate": 6.501525848976231e-07, "loss": 0.1331, "step": 11209 }, { "epoch": 0.8880966528025351, "grad_norm": 1.495099835222104, "learning_rate": 6.492427522245836e-07, "loss": 0.1555, "step": 11210 }, { "epoch": 0.8881758764111706, "grad_norm": 1.2192352004704508, "learning_rate": 6.483335352539144e-07, "loss": 0.0837, "step": 11211 }, { "epoch": 0.8882551000198059, "grad_norm": 1.7804550544788196, "learning_rate": 6.474249340454874e-07, "loss": 0.1429, "step": 11212 }, { "epoch": 0.8883343236284412, "grad_norm": 1.5833327282776872, "learning_rate": 6.46516948659125e-07, "loss": 0.1629, "step": 11213 }, { "epoch": 0.8884135472370767, "grad_norm": 1.5123457353031424, "learning_rate": 6.456095791546147e-07, "loss": 0.1292, "step": 11214 }, { "epoch": 0.888492770845712, "grad_norm": 1.4086814871923838, "learning_rate": 6.447028255917054e-07, "loss": 0.1151, "step": 11215 }, { "epoch": 0.8885719944543474, "grad_norm": 1.5087089422295907, "learning_rate": 6.437966880300995e-07, "loss": 0.1115, "step": 11216 }, { "epoch": 0.8886512180629828, "grad_norm": 1.2840662545304073, "learning_rate": 6.428911665294601e-07, "loss": 0.0922, "step": 11217 }, { "epoch": 0.8887304416716182, "grad_norm": 1.3695665174865892, "learning_rate": 6.419862611494165e-07, "loss": 0.1217, "step": 11218 }, { "epoch": 0.8888096652802535, "grad_norm": 1.6259332497392347, "learning_rate": 6.410819719495498e-07, "loss": 0.1554, "step": 11219 }, { "epoch": 0.8888888888888888, "grad_norm": 1.6804022880656255, "learning_rate": 6.401782989894012e-07, "loss": 0.188, "step": 11220 }, { "epoch": 0.8889681124975243, "grad_norm": 1.7283123861924126, "learning_rate": 6.392752423284765e-07, "loss": 0.1592, "step": 11221 }, { "epoch": 0.8890473361061596, "grad_norm": 1.7761924947340235, "learning_rate": 6.383728020262359e-07, "loss": 0.1403, "step": 11222 }, { "epoch": 0.889126559714795, "grad_norm": 1.3749939235057573, "learning_rate": 6.374709781420995e-07, "loss": 0.1337, "step": 11223 }, { "epoch": 0.8892057833234304, "grad_norm": 1.5242141328141736, "learning_rate": 6.365697707354512e-07, "loss": 0.1444, "step": 11224 }, { "epoch": 0.8892850069320658, "grad_norm": 2.4773199167645084, "learning_rate": 6.3566917986563e-07, "loss": 0.1877, "step": 11225 }, { "epoch": 0.8893642305407011, "grad_norm": 1.1579857686049366, "learning_rate": 6.347692055919353e-07, "loss": 0.0776, "step": 11226 }, { "epoch": 0.8894434541493365, "grad_norm": 1.636475076877616, "learning_rate": 6.338698479736227e-07, "loss": 0.1753, "step": 11227 }, { "epoch": 0.8895226777579719, "grad_norm": 1.2199580781257486, "learning_rate": 6.329711070699162e-07, "loss": 0.1069, "step": 11228 }, { "epoch": 0.8896019013666072, "grad_norm": 1.6847169976544392, "learning_rate": 6.320729829399918e-07, "loss": 0.2067, "step": 11229 }, { "epoch": 0.8896811249752427, "grad_norm": 2.10032103768669, "learning_rate": 6.311754756429833e-07, "loss": 0.1863, "step": 11230 }, { "epoch": 0.889760348583878, "grad_norm": 1.8610953453560823, "learning_rate": 6.302785852379911e-07, "loss": 0.254, "step": 11231 }, { "epoch": 0.8898395721925134, "grad_norm": 1.8218724451059767, "learning_rate": 6.293823117840703e-07, "loss": 0.1762, "step": 11232 }, { "epoch": 0.8899187958011487, "grad_norm": 1.6441741701038273, "learning_rate": 6.284866553402347e-07, "loss": 0.2197, "step": 11233 }, { "epoch": 0.8899980194097841, "grad_norm": 1.5854646926362082, "learning_rate": 6.275916159654616e-07, "loss": 0.121, "step": 11234 }, { "epoch": 0.8900772430184195, "grad_norm": 1.7377751986544356, "learning_rate": 6.266971937186827e-07, "loss": 0.1548, "step": 11235 }, { "epoch": 0.8901564666270548, "grad_norm": 1.4061315713175284, "learning_rate": 6.258033886587911e-07, "loss": 0.1495, "step": 11236 }, { "epoch": 0.8902356902356903, "grad_norm": 1.4998357311709074, "learning_rate": 6.249102008446418e-07, "loss": 0.1431, "step": 11237 }, { "epoch": 0.8903149138443256, "grad_norm": 1.6175872568826988, "learning_rate": 6.240176303350453e-07, "loss": 0.1837, "step": 11238 }, { "epoch": 0.890394137452961, "grad_norm": 1.246091915795264, "learning_rate": 6.231256771887739e-07, "loss": 0.0959, "step": 11239 }, { "epoch": 0.8904733610615964, "grad_norm": 1.6529702667844364, "learning_rate": 6.222343414645571e-07, "loss": 0.1791, "step": 11240 }, { "epoch": 0.8905525846702317, "grad_norm": 1.5090258437887851, "learning_rate": 6.213436232210868e-07, "loss": 0.1907, "step": 11241 }, { "epoch": 0.8906318082788671, "grad_norm": 2.095791854337218, "learning_rate": 6.204535225170116e-07, "loss": 0.1741, "step": 11242 }, { "epoch": 0.8907110318875024, "grad_norm": 1.876696982929106, "learning_rate": 6.195640394109393e-07, "loss": 0.1649, "step": 11243 }, { "epoch": 0.8907902554961379, "grad_norm": 1.4199290772900632, "learning_rate": 6.186751739614405e-07, "loss": 0.1433, "step": 11244 }, { "epoch": 0.8908694791047732, "grad_norm": 1.7431171687864346, "learning_rate": 6.177869262270419e-07, "loss": 0.1284, "step": 11245 }, { "epoch": 0.8909487027134086, "grad_norm": 2.195327519966475, "learning_rate": 6.168992962662279e-07, "loss": 0.1708, "step": 11246 }, { "epoch": 0.891027926322044, "grad_norm": 1.7763622960297851, "learning_rate": 6.160122841374482e-07, "loss": 0.2129, "step": 11247 }, { "epoch": 0.8911071499306793, "grad_norm": 1.9501774640286642, "learning_rate": 6.151258898991064e-07, "loss": 0.2469, "step": 11248 }, { "epoch": 0.8911863735393147, "grad_norm": 1.851749359678267, "learning_rate": 6.142401136095666e-07, "loss": 0.1727, "step": 11249 }, { "epoch": 0.8912655971479501, "grad_norm": 1.0887285254803596, "learning_rate": 6.133549553271556e-07, "loss": 0.1168, "step": 11250 }, { "epoch": 0.8913448207565855, "grad_norm": 1.4013356163225112, "learning_rate": 6.124704151101546e-07, "loss": 0.0999, "step": 11251 }, { "epoch": 0.8914240443652208, "grad_norm": 1.694376069402616, "learning_rate": 6.115864930168058e-07, "loss": 0.2033, "step": 11252 }, { "epoch": 0.8915032679738563, "grad_norm": 1.8458441488883788, "learning_rate": 6.107031891053139e-07, "loss": 0.1726, "step": 11253 }, { "epoch": 0.8915824915824916, "grad_norm": 1.159657815093472, "learning_rate": 6.098205034338378e-07, "loss": 0.0735, "step": 11254 }, { "epoch": 0.8916617151911269, "grad_norm": 1.4033220964352697, "learning_rate": 6.089384360605e-07, "loss": 0.1828, "step": 11255 }, { "epoch": 0.8917409387997624, "grad_norm": 1.7516159313909134, "learning_rate": 6.080569870433773e-07, "loss": 0.1345, "step": 11256 }, { "epoch": 0.8918201624083977, "grad_norm": 1.6714637132205032, "learning_rate": 6.071761564405121e-07, "loss": 0.1865, "step": 11257 }, { "epoch": 0.8918993860170331, "grad_norm": 1.7742440519366174, "learning_rate": 6.062959443099014e-07, "loss": 0.205, "step": 11258 }, { "epoch": 0.8919786096256684, "grad_norm": 2.0297418055836185, "learning_rate": 6.054163507095035e-07, "loss": 0.2431, "step": 11259 }, { "epoch": 0.8920578332343039, "grad_norm": 1.3322366360320181, "learning_rate": 6.04537375697235e-07, "loss": 0.1114, "step": 11260 }, { "epoch": 0.8921370568429392, "grad_norm": 1.577470855731298, "learning_rate": 6.036590193309711e-07, "loss": 0.1358, "step": 11261 }, { "epoch": 0.8922162804515745, "grad_norm": 1.4110589792298522, "learning_rate": 6.027812816685497e-07, "loss": 0.1431, "step": 11262 }, { "epoch": 0.89229550406021, "grad_norm": 1.5870613425665359, "learning_rate": 6.019041627677635e-07, "loss": 0.152, "step": 11263 }, { "epoch": 0.8923747276688453, "grad_norm": 2.017644087815337, "learning_rate": 6.010276626863687e-07, "loss": 0.2127, "step": 11264 }, { "epoch": 0.8924539512774807, "grad_norm": 1.9355662399539144, "learning_rate": 6.001517814820757e-07, "loss": 0.1656, "step": 11265 }, { "epoch": 0.892533174886116, "grad_norm": 1.7887324927696548, "learning_rate": 5.992765192125594e-07, "loss": 0.1672, "step": 11266 }, { "epoch": 0.8926123984947515, "grad_norm": 1.8308073852761952, "learning_rate": 5.984018759354515e-07, "loss": 0.1837, "step": 11267 }, { "epoch": 0.8926916221033868, "grad_norm": 1.5167061556868657, "learning_rate": 5.975278517083405e-07, "loss": 0.1456, "step": 11268 }, { "epoch": 0.8927708457120221, "grad_norm": 1.3132974669812354, "learning_rate": 5.966544465887803e-07, "loss": 0.1294, "step": 11269 }, { "epoch": 0.8928500693206576, "grad_norm": 1.2186063300854928, "learning_rate": 5.957816606342792e-07, "loss": 0.1101, "step": 11270 }, { "epoch": 0.8929292929292929, "grad_norm": 1.846243225219194, "learning_rate": 5.949094939023037e-07, "loss": 0.1587, "step": 11271 }, { "epoch": 0.8930085165379283, "grad_norm": 1.5181098948173455, "learning_rate": 5.940379464502854e-07, "loss": 0.0929, "step": 11272 }, { "epoch": 0.8930877401465637, "grad_norm": 1.8059894076668794, "learning_rate": 5.931670183356097e-07, "loss": 0.2006, "step": 11273 }, { "epoch": 0.893166963755199, "grad_norm": 1.30408373614937, "learning_rate": 5.922967096156218e-07, "loss": 0.1283, "step": 11274 }, { "epoch": 0.8932461873638344, "grad_norm": 1.6571374410903401, "learning_rate": 5.914270203476291e-07, "loss": 0.1757, "step": 11275 }, { "epoch": 0.8933254109724698, "grad_norm": 1.4526104683390844, "learning_rate": 5.90557950588897e-07, "loss": 0.1023, "step": 11276 }, { "epoch": 0.8934046345811052, "grad_norm": 1.7298944643987848, "learning_rate": 5.896895003966463e-07, "loss": 0.143, "step": 11277 }, { "epoch": 0.8934838581897405, "grad_norm": 1.2827752791043883, "learning_rate": 5.888216698280646e-07, "loss": 0.109, "step": 11278 }, { "epoch": 0.893563081798376, "grad_norm": 1.3578336180229134, "learning_rate": 5.879544589402919e-07, "loss": 0.1232, "step": 11279 }, { "epoch": 0.8936423054070113, "grad_norm": 1.849784040165139, "learning_rate": 5.870878677904302e-07, "loss": 0.2735, "step": 11280 }, { "epoch": 0.8937215290156466, "grad_norm": 1.8077700092476061, "learning_rate": 5.862218964355382e-07, "loss": 0.1994, "step": 11281 }, { "epoch": 0.893800752624282, "grad_norm": 1.9009916428864504, "learning_rate": 5.853565449326404e-07, "loss": 0.1723, "step": 11282 }, { "epoch": 0.8938799762329174, "grad_norm": 1.5682009846048015, "learning_rate": 5.844918133387134e-07, "loss": 0.1355, "step": 11283 }, { "epoch": 0.8939591998415528, "grad_norm": 1.2714334361564996, "learning_rate": 5.836277017106951e-07, "loss": 0.1071, "step": 11284 }, { "epoch": 0.8940384234501881, "grad_norm": 1.591084559490241, "learning_rate": 5.827642101054854e-07, "loss": 0.1345, "step": 11285 }, { "epoch": 0.8941176470588236, "grad_norm": 1.9704366449407094, "learning_rate": 5.819013385799388e-07, "loss": 0.1989, "step": 11286 }, { "epoch": 0.8941968706674589, "grad_norm": 1.9061566520705489, "learning_rate": 5.810390871908711e-07, "loss": 0.1741, "step": 11287 }, { "epoch": 0.8942760942760942, "grad_norm": 1.6121238486483267, "learning_rate": 5.801774559950591e-07, "loss": 0.1263, "step": 11288 }, { "epoch": 0.8943553178847297, "grad_norm": 1.32607166595525, "learning_rate": 5.793164450492372e-07, "loss": 0.087, "step": 11289 }, { "epoch": 0.894434541493365, "grad_norm": 2.392354843710851, "learning_rate": 5.784560544100959e-07, "loss": 0.2312, "step": 11290 }, { "epoch": 0.8945137651020004, "grad_norm": 1.547948366456263, "learning_rate": 5.775962841342919e-07, "loss": 0.1689, "step": 11291 }, { "epoch": 0.8945929887106358, "grad_norm": 2.27664957216012, "learning_rate": 5.767371342784345e-07, "loss": 0.1949, "step": 11292 }, { "epoch": 0.8946722123192712, "grad_norm": 1.838662399074391, "learning_rate": 5.758786048990939e-07, "loss": 0.1854, "step": 11293 }, { "epoch": 0.8947514359279065, "grad_norm": 1.2599419099330276, "learning_rate": 5.750206960528027e-07, "loss": 0.1108, "step": 11294 }, { "epoch": 0.8948306595365418, "grad_norm": 1.7598522187783212, "learning_rate": 5.741634077960479e-07, "loss": 0.1982, "step": 11295 }, { "epoch": 0.8949098831451773, "grad_norm": 1.517262601487664, "learning_rate": 5.733067401852788e-07, "loss": 0.2039, "step": 11296 }, { "epoch": 0.8949891067538126, "grad_norm": 1.6145889972294232, "learning_rate": 5.724506932769014e-07, "loss": 0.1695, "step": 11297 }, { "epoch": 0.895068330362448, "grad_norm": 1.3094398597015489, "learning_rate": 5.71595267127284e-07, "loss": 0.0688, "step": 11298 }, { "epoch": 0.8951475539710834, "grad_norm": 1.6896357084447566, "learning_rate": 5.707404617927526e-07, "loss": 0.1168, "step": 11299 }, { "epoch": 0.8952267775797188, "grad_norm": 1.6449875147626893, "learning_rate": 5.698862773295888e-07, "loss": 0.1499, "step": 11300 }, { "epoch": 0.8953060011883541, "grad_norm": 1.6897512766602445, "learning_rate": 5.69032713794041e-07, "loss": 0.1442, "step": 11301 }, { "epoch": 0.8953852247969895, "grad_norm": 1.3085370897928394, "learning_rate": 5.681797712423099e-07, "loss": 0.1084, "step": 11302 }, { "epoch": 0.8954644484056249, "grad_norm": 1.3669161558332743, "learning_rate": 5.673274497305559e-07, "loss": 0.1333, "step": 11303 }, { "epoch": 0.8955436720142602, "grad_norm": 2.0376865016263155, "learning_rate": 5.664757493149042e-07, "loss": 0.2081, "step": 11304 }, { "epoch": 0.8956228956228957, "grad_norm": 1.571977400576846, "learning_rate": 5.656246700514323e-07, "loss": 0.1898, "step": 11305 }, { "epoch": 0.895702119231531, "grad_norm": 1.9198876385521253, "learning_rate": 5.647742119961797e-07, "loss": 0.1674, "step": 11306 }, { "epoch": 0.8957813428401664, "grad_norm": 1.3299311659699893, "learning_rate": 5.639243752051482e-07, "loss": 0.1105, "step": 11307 }, { "epoch": 0.8958605664488017, "grad_norm": 1.592886373269954, "learning_rate": 5.630751597342921e-07, "loss": 0.1071, "step": 11308 }, { "epoch": 0.8959397900574371, "grad_norm": 1.546762085242033, "learning_rate": 5.622265656395276e-07, "loss": 0.1426, "step": 11309 }, { "epoch": 0.8960190136660725, "grad_norm": 1.6661356466787718, "learning_rate": 5.613785929767335e-07, "loss": 0.1744, "step": 11310 }, { "epoch": 0.8960982372747078, "grad_norm": 1.9130021091242595, "learning_rate": 5.605312418017439e-07, "loss": 0.2088, "step": 11311 }, { "epoch": 0.8961774608833433, "grad_norm": 1.3767088898138513, "learning_rate": 5.59684512170352e-07, "loss": 0.1461, "step": 11312 }, { "epoch": 0.8962566844919786, "grad_norm": 1.473560398664012, "learning_rate": 5.588384041383089e-07, "loss": 0.1184, "step": 11313 }, { "epoch": 0.896335908100614, "grad_norm": 1.5160233684633657, "learning_rate": 5.579929177613308e-07, "loss": 0.1623, "step": 11314 }, { "epoch": 0.8964151317092494, "grad_norm": 1.3391100552727728, "learning_rate": 5.571480530950879e-07, "loss": 0.1407, "step": 11315 }, { "epoch": 0.8964943553178847, "grad_norm": 1.652836812915285, "learning_rate": 5.563038101952067e-07, "loss": 0.2171, "step": 11316 }, { "epoch": 0.8965735789265201, "grad_norm": 1.3700753634234208, "learning_rate": 5.554601891172817e-07, "loss": 0.1119, "step": 11317 }, { "epoch": 0.8966528025351554, "grad_norm": 1.4839320395216817, "learning_rate": 5.546171899168595e-07, "loss": 0.1419, "step": 11318 }, { "epoch": 0.8967320261437909, "grad_norm": 1.8921895587968263, "learning_rate": 5.537748126494446e-07, "loss": 0.2277, "step": 11319 }, { "epoch": 0.8968112497524262, "grad_norm": 1.4042344796227255, "learning_rate": 5.529330573705083e-07, "loss": 0.1294, "step": 11320 }, { "epoch": 0.8968904733610616, "grad_norm": 1.7041968714851021, "learning_rate": 5.520919241354728e-07, "loss": 0.1673, "step": 11321 }, { "epoch": 0.896969696969697, "grad_norm": 1.5651800598353514, "learning_rate": 5.512514129997227e-07, "loss": 0.1575, "step": 11322 }, { "epoch": 0.8970489205783323, "grad_norm": 1.4110693909785013, "learning_rate": 5.504115240186048e-07, "loss": 0.092, "step": 11323 }, { "epoch": 0.8971281441869677, "grad_norm": 1.5541585819536432, "learning_rate": 5.495722572474183e-07, "loss": 0.1799, "step": 11324 }, { "epoch": 0.8972073677956031, "grad_norm": 1.594218021174744, "learning_rate": 5.487336127414267e-07, "loss": 0.1556, "step": 11325 }, { "epoch": 0.8972865914042385, "grad_norm": 1.57926160174065, "learning_rate": 5.478955905558491e-07, "loss": 0.1688, "step": 11326 }, { "epoch": 0.8973658150128738, "grad_norm": 1.2120721334678117, "learning_rate": 5.470581907458672e-07, "loss": 0.0978, "step": 11327 }, { "epoch": 0.8974450386215093, "grad_norm": 1.4914908757731382, "learning_rate": 5.462214133666189e-07, "loss": 0.11, "step": 11328 }, { "epoch": 0.8975242622301446, "grad_norm": 1.7671302458626819, "learning_rate": 5.453852584732e-07, "loss": 0.1428, "step": 11329 }, { "epoch": 0.8976034858387799, "grad_norm": 1.7867272378550434, "learning_rate": 5.4454972612067e-07, "loss": 0.1282, "step": 11330 }, { "epoch": 0.8976827094474154, "grad_norm": 1.4743379797773373, "learning_rate": 5.437148163640449e-07, "loss": 0.1755, "step": 11331 }, { "epoch": 0.8977619330560507, "grad_norm": 2.016628735552289, "learning_rate": 5.428805292582973e-07, "loss": 0.2047, "step": 11332 }, { "epoch": 0.8978411566646861, "grad_norm": 1.2649879451604544, "learning_rate": 5.420468648583621e-07, "loss": 0.0848, "step": 11333 }, { "epoch": 0.8979203802733214, "grad_norm": 1.5785850638119205, "learning_rate": 5.412138232191333e-07, "loss": 0.0966, "step": 11334 }, { "epoch": 0.8979996038819569, "grad_norm": 1.9203816553077167, "learning_rate": 5.403814043954592e-07, "loss": 0.1588, "step": 11335 }, { "epoch": 0.8980788274905922, "grad_norm": 1.3511975089074213, "learning_rate": 5.39549608442157e-07, "loss": 0.0872, "step": 11336 }, { "epoch": 0.8981580510992275, "grad_norm": 1.6294520420028766, "learning_rate": 5.387184354139896e-07, "loss": 0.1017, "step": 11337 }, { "epoch": 0.898237274707863, "grad_norm": 1.4732938690481243, "learning_rate": 5.378878853656877e-07, "loss": 0.1177, "step": 11338 }, { "epoch": 0.8983164983164983, "grad_norm": 1.9978839265823294, "learning_rate": 5.370579583519409e-07, "loss": 0.1499, "step": 11339 }, { "epoch": 0.8983957219251337, "grad_norm": 1.604379496127464, "learning_rate": 5.362286544273942e-07, "loss": 0.1529, "step": 11340 }, { "epoch": 0.898474945533769, "grad_norm": 1.483344368761494, "learning_rate": 5.353999736466531e-07, "loss": 0.1232, "step": 11341 }, { "epoch": 0.8985541691424045, "grad_norm": 1.2542500492553612, "learning_rate": 5.345719160642848e-07, "loss": 0.1157, "step": 11342 }, { "epoch": 0.8986333927510398, "grad_norm": 1.4386845890827769, "learning_rate": 5.337444817348103e-07, "loss": 0.0905, "step": 11343 }, { "epoch": 0.8987126163596751, "grad_norm": 1.5307143742529814, "learning_rate": 5.329176707127115e-07, "loss": 0.1478, "step": 11344 }, { "epoch": 0.8987918399683106, "grad_norm": 1.321029166404186, "learning_rate": 5.320914830524337e-07, "loss": 0.1347, "step": 11345 }, { "epoch": 0.8988710635769459, "grad_norm": 1.5876254737255515, "learning_rate": 5.312659188083746e-07, "loss": 0.11, "step": 11346 }, { "epoch": 0.8989502871855813, "grad_norm": 1.6432029337053642, "learning_rate": 5.304409780348919e-07, "loss": 0.1774, "step": 11347 }, { "epoch": 0.8990295107942167, "grad_norm": 1.5641320811554813, "learning_rate": 5.296166607863085e-07, "loss": 0.197, "step": 11348 }, { "epoch": 0.899108734402852, "grad_norm": 1.5244923754489452, "learning_rate": 5.287929671168989e-07, "loss": 0.0989, "step": 11349 }, { "epoch": 0.8991879580114874, "grad_norm": 1.7158970641907811, "learning_rate": 5.279698970809011e-07, "loss": 0.1801, "step": 11350 }, { "epoch": 0.8992671816201228, "grad_norm": 1.4183447696963687, "learning_rate": 5.271474507325058e-07, "loss": 0.1377, "step": 11351 }, { "epoch": 0.8993464052287582, "grad_norm": 1.4773141886665055, "learning_rate": 5.263256281258733e-07, "loss": 0.1103, "step": 11352 }, { "epoch": 0.8994256288373935, "grad_norm": 1.6218057438215137, "learning_rate": 5.255044293151135e-07, "loss": 0.1844, "step": 11353 }, { "epoch": 0.899504852446029, "grad_norm": 1.4933641227010577, "learning_rate": 5.246838543542964e-07, "loss": 0.1397, "step": 11354 }, { "epoch": 0.8995840760546643, "grad_norm": 1.5396011767264581, "learning_rate": 5.23863903297458e-07, "loss": 0.1236, "step": 11355 }, { "epoch": 0.8996632996632996, "grad_norm": 1.8919777521382155, "learning_rate": 5.230445761985836e-07, "loss": 0.1955, "step": 11356 }, { "epoch": 0.899742523271935, "grad_norm": 1.8568798319959627, "learning_rate": 5.222258731116237e-07, "loss": 0.1766, "step": 11357 }, { "epoch": 0.8998217468805704, "grad_norm": 1.9133468366897972, "learning_rate": 5.214077940904872e-07, "loss": 0.1658, "step": 11358 }, { "epoch": 0.8999009704892058, "grad_norm": 1.6425193973081886, "learning_rate": 5.205903391890387e-07, "loss": 0.1523, "step": 11359 }, { "epoch": 0.8999801940978411, "grad_norm": 1.2474483394012947, "learning_rate": 5.197735084611033e-07, "loss": 0.1387, "step": 11360 }, { "epoch": 0.9000594177064766, "grad_norm": 1.3462790343248248, "learning_rate": 5.189573019604676e-07, "loss": 0.1126, "step": 11361 }, { "epoch": 0.9001386413151119, "grad_norm": 1.545323003050307, "learning_rate": 5.181417197408733e-07, "loss": 0.1697, "step": 11362 }, { "epoch": 0.9002178649237472, "grad_norm": 1.5548820157271666, "learning_rate": 5.173267618560229e-07, "loss": 0.17, "step": 11363 }, { "epoch": 0.9002970885323827, "grad_norm": 1.1205615384995606, "learning_rate": 5.165124283595779e-07, "loss": 0.0736, "step": 11364 }, { "epoch": 0.900376312141018, "grad_norm": 1.98818053162733, "learning_rate": 5.156987193051577e-07, "loss": 0.2034, "step": 11365 }, { "epoch": 0.9004555357496534, "grad_norm": 1.6542828239285352, "learning_rate": 5.148856347463416e-07, "loss": 0.1141, "step": 11366 }, { "epoch": 0.9005347593582887, "grad_norm": 1.5942589914927283, "learning_rate": 5.140731747366656e-07, "loss": 0.1243, "step": 11367 }, { "epoch": 0.9006139829669242, "grad_norm": 1.4300086113167827, "learning_rate": 5.132613393296293e-07, "loss": 0.1411, "step": 11368 }, { "epoch": 0.9006932065755595, "grad_norm": 1.6178021745768005, "learning_rate": 5.124501285786865e-07, "loss": 0.1196, "step": 11369 }, { "epoch": 0.9007724301841948, "grad_norm": 1.9545184399558575, "learning_rate": 5.1163954253725e-07, "loss": 0.1976, "step": 11370 }, { "epoch": 0.9008516537928303, "grad_norm": 1.3406387089035658, "learning_rate": 5.108295812586961e-07, "loss": 0.1307, "step": 11371 }, { "epoch": 0.9009308774014656, "grad_norm": 2.082724295705734, "learning_rate": 5.100202447963553e-07, "loss": 0.1879, "step": 11372 }, { "epoch": 0.901010101010101, "grad_norm": 1.661762069352776, "learning_rate": 5.092115332035163e-07, "loss": 0.1544, "step": 11373 }, { "epoch": 0.9010893246187364, "grad_norm": 1.5043254295374404, "learning_rate": 5.084034465334342e-07, "loss": 0.112, "step": 11374 }, { "epoch": 0.9011685482273718, "grad_norm": 1.8103321437180646, "learning_rate": 5.07595984839313e-07, "loss": 0.1764, "step": 11375 }, { "epoch": 0.9012477718360071, "grad_norm": 1.6908820620018818, "learning_rate": 5.067891481743203e-07, "loss": 0.1258, "step": 11376 }, { "epoch": 0.9013269954446425, "grad_norm": 1.8555870826772165, "learning_rate": 5.059829365915859e-07, "loss": 0.2098, "step": 11377 }, { "epoch": 0.9014062190532779, "grad_norm": 1.7446096382305152, "learning_rate": 5.051773501441926e-07, "loss": 0.1563, "step": 11378 }, { "epoch": 0.9014854426619132, "grad_norm": 1.7967718562152633, "learning_rate": 5.043723888851837e-07, "loss": 0.1808, "step": 11379 }, { "epoch": 0.9015646662705487, "grad_norm": 1.8487202047221145, "learning_rate": 5.035680528675635e-07, "loss": 0.1806, "step": 11380 }, { "epoch": 0.901643889879184, "grad_norm": 2.020903434076147, "learning_rate": 5.027643421442929e-07, "loss": 0.169, "step": 11381 }, { "epoch": 0.9017231134878194, "grad_norm": 1.5976327872226603, "learning_rate": 5.01961256768293e-07, "loss": 0.12, "step": 11382 }, { "epoch": 0.9018023370964547, "grad_norm": 1.202659850687613, "learning_rate": 5.011587967924414e-07, "loss": 0.0739, "step": 11383 }, { "epoch": 0.9018815607050901, "grad_norm": 1.2199042086708365, "learning_rate": 5.003569622695792e-07, "loss": 0.0919, "step": 11384 }, { "epoch": 0.9019607843137255, "grad_norm": 1.5701709718738008, "learning_rate": 4.99555753252502e-07, "loss": 0.1662, "step": 11385 }, { "epoch": 0.9020400079223608, "grad_norm": 1.6548432815623688, "learning_rate": 4.987551697939629e-07, "loss": 0.1702, "step": 11386 }, { "epoch": 0.9021192315309963, "grad_norm": 1.0829761303403709, "learning_rate": 4.979552119466802e-07, "loss": 0.0932, "step": 11387 }, { "epoch": 0.9021984551396316, "grad_norm": 1.6308302635424, "learning_rate": 4.971558797633258e-07, "loss": 0.1834, "step": 11388 }, { "epoch": 0.902277678748267, "grad_norm": 1.4169782185345832, "learning_rate": 4.963571732965311e-07, "loss": 0.1214, "step": 11389 }, { "epoch": 0.9023569023569024, "grad_norm": 2.114433740553322, "learning_rate": 4.955590925988896e-07, "loss": 0.2454, "step": 11390 }, { "epoch": 0.9024361259655377, "grad_norm": 1.8675460581321455, "learning_rate": 4.947616377229492e-07, "loss": 0.1358, "step": 11391 }, { "epoch": 0.9025153495741731, "grad_norm": 1.7578970333362844, "learning_rate": 4.939648087212168e-07, "loss": 0.1745, "step": 11392 }, { "epoch": 0.9025945731828084, "grad_norm": 1.7722405255081999, "learning_rate": 4.931686056461626e-07, "loss": 0.1781, "step": 11393 }, { "epoch": 0.9026737967914439, "grad_norm": 2.0153409981016512, "learning_rate": 4.923730285502126e-07, "loss": 0.1848, "step": 11394 }, { "epoch": 0.9027530204000792, "grad_norm": 1.6568347891034174, "learning_rate": 4.915780774857504e-07, "loss": 0.1856, "step": 11395 }, { "epoch": 0.9028322440087146, "grad_norm": 1.4383764201952762, "learning_rate": 4.907837525051196e-07, "loss": 0.1231, "step": 11396 }, { "epoch": 0.90291146761735, "grad_norm": 2.0260323706516106, "learning_rate": 4.89990053660624e-07, "loss": 0.2478, "step": 11397 }, { "epoch": 0.9029906912259853, "grad_norm": 1.6566336805540895, "learning_rate": 4.891969810045239e-07, "loss": 0.1579, "step": 11398 }, { "epoch": 0.9030699148346207, "grad_norm": 1.4074813418733512, "learning_rate": 4.884045345890387e-07, "loss": 0.1366, "step": 11399 }, { "epoch": 0.9031491384432561, "grad_norm": 1.964438753026122, "learning_rate": 4.87612714466349e-07, "loss": 0.2293, "step": 11400 }, { "epoch": 0.9032283620518915, "grad_norm": 2.0421436854789223, "learning_rate": 4.868215206885918e-07, "loss": 0.1583, "step": 11401 }, { "epoch": 0.9033075856605268, "grad_norm": 1.2624632556187516, "learning_rate": 4.860309533078611e-07, "loss": 0.0778, "step": 11402 }, { "epoch": 0.9033868092691623, "grad_norm": 2.155488995237254, "learning_rate": 4.852410123762164e-07, "loss": 0.2024, "step": 11403 }, { "epoch": 0.9034660328777976, "grad_norm": 1.4810079746012292, "learning_rate": 4.844516979456671e-07, "loss": 0.128, "step": 11404 }, { "epoch": 0.9035452564864329, "grad_norm": 1.4881442493700057, "learning_rate": 4.836630100681872e-07, "loss": 0.1067, "step": 11405 }, { "epoch": 0.9036244800950683, "grad_norm": 1.9283246715174054, "learning_rate": 4.828749487957097e-07, "loss": 0.2044, "step": 11406 }, { "epoch": 0.9037037037037037, "grad_norm": 1.363813160298884, "learning_rate": 4.82087514180124e-07, "loss": 0.1322, "step": 11407 }, { "epoch": 0.9037829273123391, "grad_norm": 1.7902310414222058, "learning_rate": 4.813007062732756e-07, "loss": 0.1482, "step": 11408 }, { "epoch": 0.9038621509209744, "grad_norm": 1.2489883691533643, "learning_rate": 4.805145251269772e-07, "loss": 0.1039, "step": 11409 }, { "epoch": 0.9039413745296099, "grad_norm": 1.3935651931034179, "learning_rate": 4.797289707929919e-07, "loss": 0.1136, "step": 11410 }, { "epoch": 0.9040205981382452, "grad_norm": 1.406768123138463, "learning_rate": 4.789440433230452e-07, "loss": 0.1278, "step": 11411 }, { "epoch": 0.9040998217468805, "grad_norm": 1.619004642502583, "learning_rate": 4.781597427688189e-07, "loss": 0.1398, "step": 11412 }, { "epoch": 0.904179045355516, "grad_norm": 1.6282128895076493, "learning_rate": 4.773760691819596e-07, "loss": 0.1499, "step": 11413 }, { "epoch": 0.9042582689641513, "grad_norm": 2.2494812738014933, "learning_rate": 4.765930226140658e-07, "loss": 0.1706, "step": 11414 }, { "epoch": 0.9043374925727867, "grad_norm": 1.526250178082704, "learning_rate": 4.7581060311669757e-07, "loss": 0.1186, "step": 11415 }, { "epoch": 0.904416716181422, "grad_norm": 1.3764706450447917, "learning_rate": 4.7502881074137476e-07, "loss": 0.1432, "step": 11416 }, { "epoch": 0.9044959397900575, "grad_norm": 1.4298051863304106, "learning_rate": 4.742476455395706e-07, "loss": 0.1373, "step": 11417 }, { "epoch": 0.9045751633986928, "grad_norm": 1.4785058799251485, "learning_rate": 4.734671075627262e-07, "loss": 0.1298, "step": 11418 }, { "epoch": 0.9046543870073281, "grad_norm": 1.555622667199011, "learning_rate": 4.726871968622337e-07, "loss": 0.1513, "step": 11419 }, { "epoch": 0.9047336106159636, "grad_norm": 1.6606247073598899, "learning_rate": 4.7190791348944777e-07, "loss": 0.1493, "step": 11420 }, { "epoch": 0.9048128342245989, "grad_norm": 1.2205852258418255, "learning_rate": 4.711292574956772e-07, "loss": 0.1191, "step": 11421 }, { "epoch": 0.9048920578332343, "grad_norm": 1.9490785495485299, "learning_rate": 4.7035122893219653e-07, "loss": 0.1741, "step": 11422 }, { "epoch": 0.9049712814418697, "grad_norm": 3.575777555965547, "learning_rate": 4.695738278502338e-07, "loss": 0.1204, "step": 11423 }, { "epoch": 0.9050505050505051, "grad_norm": 1.8982510466699347, "learning_rate": 4.6879705430097566e-07, "loss": 0.1712, "step": 11424 }, { "epoch": 0.9051297286591404, "grad_norm": 1.5860373350741164, "learning_rate": 4.6802090833557136e-07, "loss": 0.1518, "step": 11425 }, { "epoch": 0.9052089522677758, "grad_norm": 1.0487312466854741, "learning_rate": 4.6724539000512546e-07, "loss": 0.0756, "step": 11426 }, { "epoch": 0.9052881758764112, "grad_norm": 1.3378899060647163, "learning_rate": 4.6647049936070054e-07, "loss": 0.1318, "step": 11427 }, { "epoch": 0.9053673994850465, "grad_norm": 1.4264166642309284, "learning_rate": 4.656962364533224e-07, "loss": 0.0955, "step": 11428 }, { "epoch": 0.905446623093682, "grad_norm": 1.2982669978194084, "learning_rate": 4.649226013339703e-07, "loss": 0.1542, "step": 11429 }, { "epoch": 0.9055258467023173, "grad_norm": 1.356803060594413, "learning_rate": 4.641495940535845e-07, "loss": 0.1386, "step": 11430 }, { "epoch": 0.9056050703109526, "grad_norm": 1.5557342496371263, "learning_rate": 4.633772146630655e-07, "loss": 0.1591, "step": 11431 }, { "epoch": 0.905684293919588, "grad_norm": 1.8995539995481001, "learning_rate": 4.626054632132693e-07, "loss": 0.1821, "step": 11432 }, { "epoch": 0.9057635175282234, "grad_norm": 1.700038313275163, "learning_rate": 4.6183433975501067e-07, "loss": 0.1451, "step": 11433 }, { "epoch": 0.9058427411368588, "grad_norm": 1.832993371380585, "learning_rate": 4.61063844339068e-07, "loss": 0.1174, "step": 11434 }, { "epoch": 0.9059219647454941, "grad_norm": 1.65751923386094, "learning_rate": 4.6029397701617296e-07, "loss": 0.1291, "step": 11435 }, { "epoch": 0.9060011883541296, "grad_norm": 1.9533427523696023, "learning_rate": 4.595247378370171e-07, "loss": 0.2368, "step": 11436 }, { "epoch": 0.9060804119627649, "grad_norm": 1.519366410392257, "learning_rate": 4.5875612685225e-07, "loss": 0.1234, "step": 11437 }, { "epoch": 0.9061596355714002, "grad_norm": 1.731676213932306, "learning_rate": 4.5798814411248336e-07, "loss": 0.1264, "step": 11438 }, { "epoch": 0.9062388591800357, "grad_norm": 1.6178598376552586, "learning_rate": 4.5722078966828455e-07, "loss": 0.1556, "step": 11439 }, { "epoch": 0.906318082788671, "grad_norm": 1.6351938104257702, "learning_rate": 4.5645406357017865e-07, "loss": 0.1929, "step": 11440 }, { "epoch": 0.9063973063973064, "grad_norm": 1.4601227302114204, "learning_rate": 4.5568796586865304e-07, "loss": 0.1198, "step": 11441 }, { "epoch": 0.9064765300059417, "grad_norm": 2.4113144449324, "learning_rate": 4.5492249661415077e-07, "loss": 0.1908, "step": 11442 }, { "epoch": 0.9065557536145772, "grad_norm": 1.7621537032939296, "learning_rate": 4.541576558570726e-07, "loss": 0.1689, "step": 11443 }, { "epoch": 0.9066349772232125, "grad_norm": 1.329967633141681, "learning_rate": 4.533934436477827e-07, "loss": 0.0894, "step": 11444 }, { "epoch": 0.9067142008318478, "grad_norm": 1.3935119403098621, "learning_rate": 4.526298600365997e-07, "loss": 0.1822, "step": 11445 }, { "epoch": 0.9067934244404833, "grad_norm": 1.358613009555309, "learning_rate": 4.5186690507379894e-07, "loss": 0.1313, "step": 11446 }, { "epoch": 0.9068726480491186, "grad_norm": 1.2676754987561336, "learning_rate": 4.5110457880962246e-07, "loss": 0.0775, "step": 11447 }, { "epoch": 0.906951871657754, "grad_norm": 1.663215876255719, "learning_rate": 4.503428812942623e-07, "loss": 0.143, "step": 11448 }, { "epoch": 0.9070310952663894, "grad_norm": 1.7666869367452651, "learning_rate": 4.495818125778717e-07, "loss": 0.1901, "step": 11449 }, { "epoch": 0.9071103188750248, "grad_norm": 1.2659551483511309, "learning_rate": 4.488213727105672e-07, "loss": 0.086, "step": 11450 }, { "epoch": 0.9071895424836601, "grad_norm": 1.8523283017604075, "learning_rate": 4.4806156174241776e-07, "loss": 0.2006, "step": 11451 }, { "epoch": 0.9072687660922955, "grad_norm": 1.2893120423432343, "learning_rate": 4.4730237972345326e-07, "loss": 0.1062, "step": 11452 }, { "epoch": 0.9073479897009309, "grad_norm": 1.5218562161802534, "learning_rate": 4.465438267036604e-07, "loss": 0.1368, "step": 11453 }, { "epoch": 0.9074272133095662, "grad_norm": 1.5222837280333117, "learning_rate": 4.4578590273299027e-07, "loss": 0.129, "step": 11454 }, { "epoch": 0.9075064369182017, "grad_norm": 2.0373974163545068, "learning_rate": 4.4502860786134747e-07, "loss": 0.1235, "step": 11455 }, { "epoch": 0.907585660526837, "grad_norm": 1.7761946526984598, "learning_rate": 4.4427194213859216e-07, "loss": 0.1409, "step": 11456 }, { "epoch": 0.9076648841354724, "grad_norm": 1.6236772582510532, "learning_rate": 4.435159056145533e-07, "loss": 0.1334, "step": 11457 }, { "epoch": 0.9077441077441077, "grad_norm": 1.4811407774522563, "learning_rate": 4.427604983390077e-07, "loss": 0.1735, "step": 11458 }, { "epoch": 0.9078233313527431, "grad_norm": 2.018968731941394, "learning_rate": 4.420057203616956e-07, "loss": 0.1982, "step": 11459 }, { "epoch": 0.9079025549613785, "grad_norm": 1.4590780705199156, "learning_rate": 4.4125157173231847e-07, "loss": 0.0875, "step": 11460 }, { "epoch": 0.9079817785700138, "grad_norm": 1.6296435205133828, "learning_rate": 4.40498052500532e-07, "loss": 0.2, "step": 11461 }, { "epoch": 0.9080610021786493, "grad_norm": 1.7082663538612024, "learning_rate": 4.397451627159499e-07, "loss": 0.1283, "step": 11462 }, { "epoch": 0.9081402257872846, "grad_norm": 1.954016097680302, "learning_rate": 4.389929024281492e-07, "loss": 0.2037, "step": 11463 }, { "epoch": 0.90821944939592, "grad_norm": 1.4045344721135806, "learning_rate": 4.382412716866602e-07, "loss": 0.1595, "step": 11464 }, { "epoch": 0.9082986730045554, "grad_norm": 1.3395634336756332, "learning_rate": 4.374902705409745e-07, "loss": 0.1343, "step": 11465 }, { "epoch": 0.9083778966131907, "grad_norm": 1.280373094394481, "learning_rate": 4.367398990405447e-07, "loss": 0.0947, "step": 11466 }, { "epoch": 0.9084571202218261, "grad_norm": 1.4916047142841655, "learning_rate": 4.359901572347758e-07, "loss": 0.1688, "step": 11467 }, { "epoch": 0.9085363438304614, "grad_norm": 1.3628814348833307, "learning_rate": 4.3524104517303714e-07, "loss": 0.127, "step": 11468 }, { "epoch": 0.9086155674390969, "grad_norm": 2.6665253956652473, "learning_rate": 4.3449256290465035e-07, "loss": 0.1734, "step": 11469 }, { "epoch": 0.9086947910477322, "grad_norm": 1.488086262716902, "learning_rate": 4.3374471047890497e-07, "loss": 0.1562, "step": 11470 }, { "epoch": 0.9087740146563676, "grad_norm": 1.53872033396023, "learning_rate": 4.329974879450394e-07, "loss": 0.1611, "step": 11471 }, { "epoch": 0.908853238265003, "grad_norm": 1.552253372938291, "learning_rate": 4.3225089535225415e-07, "loss": 0.1553, "step": 11472 }, { "epoch": 0.9089324618736383, "grad_norm": 1.516760891347147, "learning_rate": 4.3150493274971227e-07, "loss": 0.1644, "step": 11473 }, { "epoch": 0.9090116854822737, "grad_norm": 1.494441336588106, "learning_rate": 4.3075960018652995e-07, "loss": 0.1122, "step": 11474 }, { "epoch": 0.9090909090909091, "grad_norm": 1.6144792076667926, "learning_rate": 4.300148977117824e-07, "loss": 0.1662, "step": 11475 }, { "epoch": 0.9091701326995445, "grad_norm": 1.2885401572931912, "learning_rate": 4.2927082537450705e-07, "loss": 0.1025, "step": 11476 }, { "epoch": 0.9092493563081798, "grad_norm": 1.3474983101967246, "learning_rate": 4.285273832236969e-07, "loss": 0.1176, "step": 11477 }, { "epoch": 0.9093285799168153, "grad_norm": 1.875344140418163, "learning_rate": 4.277845713083018e-07, "loss": 0.2049, "step": 11478 }, { "epoch": 0.9094078035254506, "grad_norm": 1.615629943363044, "learning_rate": 4.2704238967723574e-07, "loss": 0.1634, "step": 11479 }, { "epoch": 0.9094870271340859, "grad_norm": 1.4123259716174827, "learning_rate": 4.2630083837936654e-07, "loss": 0.115, "step": 11480 }, { "epoch": 0.9095662507427213, "grad_norm": 1.5196796232562162, "learning_rate": 4.2555991746352054e-07, "loss": 0.1865, "step": 11481 }, { "epoch": 0.9096454743513567, "grad_norm": 2.110429034207658, "learning_rate": 4.2481962697848323e-07, "loss": 0.1659, "step": 11482 }, { "epoch": 0.9097246979599921, "grad_norm": 1.789308279589873, "learning_rate": 4.240799669730034e-07, "loss": 0.2024, "step": 11483 }, { "epoch": 0.9098039215686274, "grad_norm": 1.7360816472168967, "learning_rate": 4.2334093749577975e-07, "loss": 0.1565, "step": 11484 }, { "epoch": 0.9098831451772629, "grad_norm": 1.5890408322521405, "learning_rate": 4.226025385954746e-07, "loss": 0.1324, "step": 11485 }, { "epoch": 0.9099623687858982, "grad_norm": 1.260621694056252, "learning_rate": 4.218647703207113e-07, "loss": 0.0799, "step": 11486 }, { "epoch": 0.9100415923945335, "grad_norm": 1.4641042877468629, "learning_rate": 4.211276327200642e-07, "loss": 0.1397, "step": 11487 }, { "epoch": 0.910120816003169, "grad_norm": 1.7766762268382965, "learning_rate": 4.203911258420712e-07, "loss": 0.162, "step": 11488 }, { "epoch": 0.9102000396118043, "grad_norm": 1.9630476454092027, "learning_rate": 4.196552497352302e-07, "loss": 0.1117, "step": 11489 }, { "epoch": 0.9102792632204397, "grad_norm": 1.3637213618537847, "learning_rate": 4.189200044479924e-07, "loss": 0.1432, "step": 11490 }, { "epoch": 0.910358486829075, "grad_norm": 2.0088292049886842, "learning_rate": 4.1818539002877024e-07, "loss": 0.2268, "step": 11491 }, { "epoch": 0.9104377104377105, "grad_norm": 1.4150811023124612, "learning_rate": 4.174514065259383e-07, "loss": 0.1392, "step": 11492 }, { "epoch": 0.9105169340463458, "grad_norm": 1.4594828016024535, "learning_rate": 4.167180539878213e-07, "loss": 0.1034, "step": 11493 }, { "epoch": 0.9105961576549811, "grad_norm": 2.011774466012604, "learning_rate": 4.1598533246270833e-07, "loss": 0.1638, "step": 11494 }, { "epoch": 0.9106753812636166, "grad_norm": 1.875467935125843, "learning_rate": 4.152532419988453e-07, "loss": 0.248, "step": 11495 }, { "epoch": 0.9107546048722519, "grad_norm": 1.5698093927419363, "learning_rate": 4.145217826444392e-07, "loss": 0.1729, "step": 11496 }, { "epoch": 0.9108338284808873, "grad_norm": 1.7009017025483715, "learning_rate": 4.1379095444764926e-07, "loss": 0.1228, "step": 11497 }, { "epoch": 0.9109130520895227, "grad_norm": 1.412021408610375, "learning_rate": 4.130607574566003e-07, "loss": 0.1408, "step": 11498 }, { "epoch": 0.9109922756981581, "grad_norm": 1.5105646720320853, "learning_rate": 4.1233119171937065e-07, "loss": 0.161, "step": 11499 }, { "epoch": 0.9110714993067934, "grad_norm": 1.4556037366622985, "learning_rate": 4.116022572839984e-07, "loss": 0.1228, "step": 11500 }, { "epoch": 0.9111507229154288, "grad_norm": 1.5244095482547964, "learning_rate": 4.1087395419848186e-07, "loss": 0.1308, "step": 11501 }, { "epoch": 0.9112299465240642, "grad_norm": 1.348523017951419, "learning_rate": 4.10146282510776e-07, "loss": 0.1154, "step": 11502 }, { "epoch": 0.9113091701326995, "grad_norm": 1.3128465629445263, "learning_rate": 4.094192422687926e-07, "loss": 0.0797, "step": 11503 }, { "epoch": 0.911388393741335, "grad_norm": 1.550695791337716, "learning_rate": 4.0869283352040656e-07, "loss": 0.1498, "step": 11504 }, { "epoch": 0.9114676173499703, "grad_norm": 1.452680901527569, "learning_rate": 4.079670563134475e-07, "loss": 0.1888, "step": 11505 }, { "epoch": 0.9115468409586057, "grad_norm": 1.600964369445469, "learning_rate": 4.072419106957026e-07, "loss": 0.1169, "step": 11506 }, { "epoch": 0.911626064567241, "grad_norm": 1.7918237432120638, "learning_rate": 4.065173967149205e-07, "loss": 0.1469, "step": 11507 }, { "epoch": 0.9117052881758764, "grad_norm": 1.4150082529440795, "learning_rate": 4.057935144188074e-07, "loss": 0.0823, "step": 11508 }, { "epoch": 0.9117845117845118, "grad_norm": 1.3364288318451276, "learning_rate": 4.0507026385502747e-07, "loss": 0.1333, "step": 11509 }, { "epoch": 0.9118637353931471, "grad_norm": 1.1833849746772958, "learning_rate": 4.043476450712014e-07, "loss": 0.1506, "step": 11510 }, { "epoch": 0.9119429590017826, "grad_norm": 1.652049602662587, "learning_rate": 4.036256581149123e-07, "loss": 0.1657, "step": 11511 }, { "epoch": 0.9120221826104179, "grad_norm": 1.7263545888738163, "learning_rate": 4.0290430303369876e-07, "loss": 0.1779, "step": 11512 }, { "epoch": 0.9121014062190532, "grad_norm": 1.7207571872753231, "learning_rate": 4.021835798750584e-07, "loss": 0.1996, "step": 11513 }, { "epoch": 0.9121806298276887, "grad_norm": 1.5695124587485665, "learning_rate": 4.0146348868644767e-07, "loss": 0.1797, "step": 11514 }, { "epoch": 0.912259853436324, "grad_norm": 1.5382496131762027, "learning_rate": 4.0074402951528204e-07, "loss": 0.1, "step": 11515 }, { "epoch": 0.9123390770449594, "grad_norm": 1.5840713797519137, "learning_rate": 4.000252024089313e-07, "loss": 0.1195, "step": 11516 }, { "epoch": 0.9124183006535947, "grad_norm": 1.7705422429782756, "learning_rate": 3.9930700741473093e-07, "loss": 0.2405, "step": 11517 }, { "epoch": 0.9124975242622302, "grad_norm": 1.5622492562438988, "learning_rate": 3.985894445799676e-07, "loss": 0.1498, "step": 11518 }, { "epoch": 0.9125767478708655, "grad_norm": 1.4064816930871453, "learning_rate": 3.978725139518891e-07, "loss": 0.1305, "step": 11519 }, { "epoch": 0.9126559714795008, "grad_norm": 1.8413556462738536, "learning_rate": 3.9715621557770535e-07, "loss": 0.1805, "step": 11520 }, { "epoch": 0.9127351950881363, "grad_norm": 1.9057942315284813, "learning_rate": 3.9644054950457753e-07, "loss": 0.1519, "step": 11521 }, { "epoch": 0.9128144186967716, "grad_norm": 1.9093191561874774, "learning_rate": 3.9572551577963135e-07, "loss": 0.1727, "step": 11522 }, { "epoch": 0.912893642305407, "grad_norm": 1.4370936444534879, "learning_rate": 3.9501111444994576e-07, "loss": 0.1483, "step": 11523 }, { "epoch": 0.9129728659140424, "grad_norm": 2.1219661549947526, "learning_rate": 3.9429734556256205e-07, "loss": 0.2495, "step": 11524 }, { "epoch": 0.9130520895226778, "grad_norm": 1.805258930501115, "learning_rate": 3.9358420916447927e-07, "loss": 0.1946, "step": 11525 }, { "epoch": 0.9131313131313131, "grad_norm": 1.7335832960592692, "learning_rate": 3.9287170530265206e-07, "loss": 0.1278, "step": 11526 }, { "epoch": 0.9132105367399485, "grad_norm": 1.2158623326202613, "learning_rate": 3.9215983402399736e-07, "loss": 0.1075, "step": 11527 }, { "epoch": 0.9132897603485839, "grad_norm": 1.2351151263867965, "learning_rate": 3.914485953753888e-07, "loss": 0.0794, "step": 11528 }, { "epoch": 0.9133689839572192, "grad_norm": 1.4047343315874632, "learning_rate": 3.907379894036545e-07, "loss": 0.1047, "step": 11529 }, { "epoch": 0.9134482075658547, "grad_norm": 2.2716094433711085, "learning_rate": 3.9002801615558805e-07, "loss": 0.2498, "step": 11530 }, { "epoch": 0.91352743117449, "grad_norm": 1.7866964598227082, "learning_rate": 3.893186756779366e-07, "loss": 0.1751, "step": 11531 }, { "epoch": 0.9136066547831254, "grad_norm": 1.3599024389724523, "learning_rate": 3.886099680174049e-07, "loss": 0.1578, "step": 11532 }, { "epoch": 0.9136858783917607, "grad_norm": 1.4278646897137819, "learning_rate": 3.879018932206624e-07, "loss": 0.1609, "step": 11533 }, { "epoch": 0.9137651020003961, "grad_norm": 1.7372751891319695, "learning_rate": 3.871944513343284e-07, "loss": 0.1903, "step": 11534 }, { "epoch": 0.9138443256090315, "grad_norm": 1.4377724234209375, "learning_rate": 3.864876424049857e-07, "loss": 0.1413, "step": 11535 }, { "epoch": 0.9139235492176668, "grad_norm": 1.223255049661162, "learning_rate": 3.857814664791748e-07, "loss": 0.0839, "step": 11536 }, { "epoch": 0.9140027728263023, "grad_norm": 1.4495305857515581, "learning_rate": 3.8507592360339407e-07, "loss": 0.1394, "step": 11537 }, { "epoch": 0.9140819964349376, "grad_norm": 1.6285867213681162, "learning_rate": 3.843710138240997e-07, "loss": 0.1442, "step": 11538 }, { "epoch": 0.914161220043573, "grad_norm": 1.3411802527815546, "learning_rate": 3.8366673718770564e-07, "loss": 0.102, "step": 11539 }, { "epoch": 0.9142404436522084, "grad_norm": 1.5876884342169022, "learning_rate": 3.8296309374058704e-07, "loss": 0.1504, "step": 11540 }, { "epoch": 0.9143196672608437, "grad_norm": 1.7265560493276777, "learning_rate": 3.8226008352907464e-07, "loss": 0.1295, "step": 11541 }, { "epoch": 0.9143988908694791, "grad_norm": 1.118984677423103, "learning_rate": 3.815577065994569e-07, "loss": 0.0891, "step": 11542 }, { "epoch": 0.9144781144781144, "grad_norm": 2.5964336678687228, "learning_rate": 3.8085596299798465e-07, "loss": 0.1915, "step": 11543 }, { "epoch": 0.9145573380867499, "grad_norm": 1.8021713531838894, "learning_rate": 3.801548527708621e-07, "loss": 0.1555, "step": 11544 }, { "epoch": 0.9146365616953852, "grad_norm": 1.5575912487444539, "learning_rate": 3.794543759642544e-07, "loss": 0.1348, "step": 11545 }, { "epoch": 0.9147157853040206, "grad_norm": 1.5610257957820701, "learning_rate": 3.7875453262428584e-07, "loss": 0.1445, "step": 11546 }, { "epoch": 0.914795008912656, "grad_norm": 1.4484869564359644, "learning_rate": 3.7805532279703625e-07, "loss": 0.1232, "step": 11547 }, { "epoch": 0.9148742325212913, "grad_norm": 1.4893814936190304, "learning_rate": 3.773567465285455e-07, "loss": 0.1386, "step": 11548 }, { "epoch": 0.9149534561299267, "grad_norm": 1.9547831899284507, "learning_rate": 3.7665880386481226e-07, "loss": 0.1908, "step": 11549 }, { "epoch": 0.9150326797385621, "grad_norm": 1.272824327408018, "learning_rate": 3.759614948517931e-07, "loss": 0.0918, "step": 11550 }, { "epoch": 0.9151119033471975, "grad_norm": 1.6318645870591142, "learning_rate": 3.7526481953539915e-07, "loss": 0.1976, "step": 11551 }, { "epoch": 0.9151911269558328, "grad_norm": 2.058839492962686, "learning_rate": 3.74568777961507e-07, "loss": 0.1649, "step": 11552 }, { "epoch": 0.9152703505644683, "grad_norm": 1.505000230961725, "learning_rate": 3.7387337017594674e-07, "loss": 0.1312, "step": 11553 }, { "epoch": 0.9153495741731036, "grad_norm": 1.3895035978217933, "learning_rate": 3.7317859622450714e-07, "loss": 0.1194, "step": 11554 }, { "epoch": 0.9154287977817389, "grad_norm": 1.8297069473841547, "learning_rate": 3.7248445615293506e-07, "loss": 0.1553, "step": 11555 }, { "epoch": 0.9155080213903743, "grad_norm": 1.7772629678778158, "learning_rate": 3.7179095000693723e-07, "loss": 0.1813, "step": 11556 }, { "epoch": 0.9155872449990097, "grad_norm": 1.670179032618852, "learning_rate": 3.710980778321771e-07, "loss": 0.1478, "step": 11557 }, { "epoch": 0.9156664686076451, "grad_norm": 1.6544428166526033, "learning_rate": 3.70405839674276e-07, "loss": 0.1106, "step": 11558 }, { "epoch": 0.9157456922162804, "grad_norm": 2.075296757575573, "learning_rate": 3.697142355788175e-07, "loss": 0.1677, "step": 11559 }, { "epoch": 0.9158249158249159, "grad_norm": 1.899923451301728, "learning_rate": 3.6902326559133836e-07, "loss": 0.2148, "step": 11560 }, { "epoch": 0.9159041394335512, "grad_norm": 1.7522757224615029, "learning_rate": 3.683329297573346e-07, "loss": 0.1674, "step": 11561 }, { "epoch": 0.9159833630421865, "grad_norm": 1.7573486374578033, "learning_rate": 3.6764322812226416e-07, "loss": 0.1267, "step": 11562 }, { "epoch": 0.916062586650822, "grad_norm": 1.6285239229569703, "learning_rate": 3.669541607315397e-07, "loss": 0.1264, "step": 11563 }, { "epoch": 0.9161418102594573, "grad_norm": 1.4735365135225411, "learning_rate": 3.6626572763053034e-07, "loss": 0.1486, "step": 11564 }, { "epoch": 0.9162210338680927, "grad_norm": 1.9874340762709108, "learning_rate": 3.6557792886457e-07, "loss": 0.1427, "step": 11565 }, { "epoch": 0.916300257476728, "grad_norm": 1.8464329164877902, "learning_rate": 3.6489076447894456e-07, "loss": 0.1761, "step": 11566 }, { "epoch": 0.9163794810853635, "grad_norm": 1.4968989741471816, "learning_rate": 3.642042345189023e-07, "loss": 0.1648, "step": 11567 }, { "epoch": 0.9164587046939988, "grad_norm": 1.6823491570843476, "learning_rate": 3.6351833902964485e-07, "loss": 0.1351, "step": 11568 }, { "epoch": 0.9165379283026341, "grad_norm": 1.5933423637451072, "learning_rate": 3.6283307805633714e-07, "loss": 0.1931, "step": 11569 }, { "epoch": 0.9166171519112696, "grad_norm": 1.383141472402163, "learning_rate": 3.6214845164410205e-07, "loss": 0.1039, "step": 11570 }, { "epoch": 0.9166963755199049, "grad_norm": 1.766979450990567, "learning_rate": 3.614644598380157e-07, "loss": 0.187, "step": 11571 }, { "epoch": 0.9167755991285403, "grad_norm": 1.8081345949348906, "learning_rate": 3.607811026831176e-07, "loss": 0.2761, "step": 11572 }, { "epoch": 0.9168548227371757, "grad_norm": 1.617022495769536, "learning_rate": 3.600983802244007e-07, "loss": 0.1534, "step": 11573 }, { "epoch": 0.9169340463458111, "grad_norm": 1.5527745764611558, "learning_rate": 3.594162925068234e-07, "loss": 0.1194, "step": 11574 }, { "epoch": 0.9170132699544464, "grad_norm": 1.8497763853159939, "learning_rate": 3.587348395752954e-07, "loss": 0.172, "step": 11575 }, { "epoch": 0.9170924935630818, "grad_norm": 1.7865374395055618, "learning_rate": 3.5805402147468746e-07, "loss": 0.2308, "step": 11576 }, { "epoch": 0.9171717171717172, "grad_norm": 1.812559172587517, "learning_rate": 3.573738382498271e-07, "loss": 0.1521, "step": 11577 }, { "epoch": 0.9172509407803525, "grad_norm": 1.5404171224819538, "learning_rate": 3.566942899455039e-07, "loss": 0.1433, "step": 11578 }, { "epoch": 0.917330164388988, "grad_norm": 1.3737733339516849, "learning_rate": 3.5601537660646e-07, "loss": 0.0984, "step": 11579 }, { "epoch": 0.9174093879976233, "grad_norm": 1.4178263612374546, "learning_rate": 3.553370982773985e-07, "loss": 0.1234, "step": 11580 }, { "epoch": 0.9174886116062587, "grad_norm": 1.3987086617889926, "learning_rate": 3.546594550029836e-07, "loss": 0.1506, "step": 11581 }, { "epoch": 0.917567835214894, "grad_norm": 1.3053407349738233, "learning_rate": 3.53982446827833e-07, "loss": 0.1261, "step": 11582 }, { "epoch": 0.9176470588235294, "grad_norm": 1.2981639049950129, "learning_rate": 3.533060737965244e-07, "loss": 0.129, "step": 11583 }, { "epoch": 0.9177262824321648, "grad_norm": 1.7152670182804346, "learning_rate": 3.526303359535932e-07, "loss": 0.1296, "step": 11584 }, { "epoch": 0.9178055060408001, "grad_norm": 1.4413341806356037, "learning_rate": 3.519552333435361e-07, "loss": 0.1483, "step": 11585 }, { "epoch": 0.9178847296494356, "grad_norm": 1.5133653871837465, "learning_rate": 3.5128076601080087e-07, "loss": 0.1111, "step": 11586 }, { "epoch": 0.9179639532580709, "grad_norm": 1.6782389871616465, "learning_rate": 3.5060693399980194e-07, "loss": 0.1927, "step": 11587 }, { "epoch": 0.9180431768667062, "grad_norm": 1.4103115490126616, "learning_rate": 3.499337373549072e-07, "loss": 0.1214, "step": 11588 }, { "epoch": 0.9181224004753417, "grad_norm": 2.0548603485890364, "learning_rate": 3.4926117612044117e-07, "loss": 0.1626, "step": 11589 }, { "epoch": 0.918201624083977, "grad_norm": 1.772403923619835, "learning_rate": 3.485892503406907e-07, "loss": 0.1552, "step": 11590 }, { "epoch": 0.9182808476926124, "grad_norm": 1.5935080185895591, "learning_rate": 3.4791796005989917e-07, "loss": 0.1519, "step": 11591 }, { "epoch": 0.9183600713012477, "grad_norm": 1.5556321116925378, "learning_rate": 3.4724730532226693e-07, "loss": 0.1573, "step": 11592 }, { "epoch": 0.9184392949098832, "grad_norm": 1.6362103712809155, "learning_rate": 3.4657728617195295e-07, "loss": 0.1648, "step": 11593 }, { "epoch": 0.9185185185185185, "grad_norm": 1.2292794872086064, "learning_rate": 3.459079026530754e-07, "loss": 0.1077, "step": 11594 }, { "epoch": 0.9185977421271538, "grad_norm": 1.62138625214792, "learning_rate": 3.4523915480971113e-07, "loss": 0.1229, "step": 11595 }, { "epoch": 0.9186769657357893, "grad_norm": 1.7245256202720183, "learning_rate": 3.445710426858906e-07, "loss": 0.1765, "step": 11596 }, { "epoch": 0.9187561893444246, "grad_norm": 1.3099747045657828, "learning_rate": 3.439035663256096e-07, "loss": 0.1182, "step": 11597 }, { "epoch": 0.91883541295306, "grad_norm": 1.7343117225883748, "learning_rate": 3.4323672577281754e-07, "loss": 0.1789, "step": 11598 }, { "epoch": 0.9189146365616954, "grad_norm": 2.14604571805114, "learning_rate": 3.425705210714192e-07, "loss": 0.1285, "step": 11599 }, { "epoch": 0.9189938601703308, "grad_norm": 2.048265676265873, "learning_rate": 3.419049522652851e-07, "loss": 0.1955, "step": 11600 }, { "epoch": 0.9190730837789661, "grad_norm": 1.3961606142733436, "learning_rate": 3.412400193982379e-07, "loss": 0.1103, "step": 11601 }, { "epoch": 0.9191523073876015, "grad_norm": 1.5634537347847361, "learning_rate": 3.4057572251405936e-07, "loss": 0.1258, "step": 11602 }, { "epoch": 0.9192315309962369, "grad_norm": 1.9822298179705902, "learning_rate": 3.3991206165649213e-07, "loss": 0.1528, "step": 11603 }, { "epoch": 0.9193107546048722, "grad_norm": 1.4833246757830865, "learning_rate": 3.392490368692347e-07, "loss": 0.1108, "step": 11604 }, { "epoch": 0.9193899782135077, "grad_norm": 2.0044503593001153, "learning_rate": 3.385866481959432e-07, "loss": 0.1768, "step": 11605 }, { "epoch": 0.919469201822143, "grad_norm": 1.2465074971948669, "learning_rate": 3.379248956802328e-07, "loss": 0.1063, "step": 11606 }, { "epoch": 0.9195484254307784, "grad_norm": 1.7156708888361036, "learning_rate": 3.3726377936567856e-07, "loss": 0.1476, "step": 11607 }, { "epoch": 0.9196276490394137, "grad_norm": 2.896158622026451, "learning_rate": 3.3660329929580904e-07, "loss": 0.1596, "step": 11608 }, { "epoch": 0.9197068726480491, "grad_norm": 1.3472800425011235, "learning_rate": 3.3594345551411503e-07, "loss": 0.1053, "step": 11609 }, { "epoch": 0.9197860962566845, "grad_norm": 1.1842206137896167, "learning_rate": 3.352842480640439e-07, "loss": 0.1042, "step": 11610 }, { "epoch": 0.9198653198653198, "grad_norm": 1.1964695718924736, "learning_rate": 3.346256769890022e-07, "loss": 0.1037, "step": 11611 }, { "epoch": 0.9199445434739553, "grad_norm": 1.3335666224752123, "learning_rate": 3.3396774233235173e-07, "loss": 0.0832, "step": 11612 }, { "epoch": 0.9200237670825906, "grad_norm": 1.5331923588499068, "learning_rate": 3.333104441374158e-07, "loss": 0.2056, "step": 11613 }, { "epoch": 0.920102990691226, "grad_norm": 1.7182370851103397, "learning_rate": 3.32653782447474e-07, "loss": 0.167, "step": 11614 }, { "epoch": 0.9201822142998614, "grad_norm": 1.2212365012362176, "learning_rate": 3.319977573057642e-07, "loss": 0.129, "step": 11615 }, { "epoch": 0.9202614379084967, "grad_norm": 1.8317428040916024, "learning_rate": 3.313423687554829e-07, "loss": 0.2041, "step": 11616 }, { "epoch": 0.9203406615171321, "grad_norm": 1.7281923692129009, "learning_rate": 3.3068761683978434e-07, "loss": 0.0947, "step": 11617 }, { "epoch": 0.9204198851257674, "grad_norm": 1.5338419037822246, "learning_rate": 3.3003350160177974e-07, "loss": 0.1464, "step": 11618 }, { "epoch": 0.9204991087344029, "grad_norm": 1.3389794976151554, "learning_rate": 3.293800230845412e-07, "loss": 0.1056, "step": 11619 }, { "epoch": 0.9205783323430382, "grad_norm": 1.3275207160487668, "learning_rate": 3.287271813310955e-07, "loss": 0.1365, "step": 11620 }, { "epoch": 0.9206575559516736, "grad_norm": 1.3843883194498903, "learning_rate": 3.280749763844293e-07, "loss": 0.0964, "step": 11621 }, { "epoch": 0.920736779560309, "grad_norm": 1.5719911462765894, "learning_rate": 3.274234082874872e-07, "loss": 0.152, "step": 11622 }, { "epoch": 0.9208160031689443, "grad_norm": 1.874855980187166, "learning_rate": 3.267724770831737e-07, "loss": 0.1273, "step": 11623 }, { "epoch": 0.9208952267775797, "grad_norm": 1.6198544459298125, "learning_rate": 3.2612218281434794e-07, "loss": 0.1575, "step": 11624 }, { "epoch": 0.9209744503862151, "grad_norm": 1.763212027577717, "learning_rate": 3.254725255238267e-07, "loss": 0.2235, "step": 11625 }, { "epoch": 0.9210536739948505, "grad_norm": 1.6911522138304946, "learning_rate": 3.2482350525439023e-07, "loss": 0.1513, "step": 11626 }, { "epoch": 0.9211328976034858, "grad_norm": 1.456452226797364, "learning_rate": 3.241751220487721e-07, "loss": 0.1372, "step": 11627 }, { "epoch": 0.9212121212121213, "grad_norm": 1.4405901795807932, "learning_rate": 3.235273759496638e-07, "loss": 0.1307, "step": 11628 }, { "epoch": 0.9212913448207566, "grad_norm": 1.0882347880236594, "learning_rate": 3.2288026699971884e-07, "loss": 0.0769, "step": 11629 }, { "epoch": 0.9213705684293919, "grad_norm": 1.3426771319618553, "learning_rate": 3.222337952415455e-07, "loss": 0.1046, "step": 11630 }, { "epoch": 0.9214497920380273, "grad_norm": 1.5799093221154634, "learning_rate": 3.215879607177086e-07, "loss": 0.1304, "step": 11631 }, { "epoch": 0.9215290156466627, "grad_norm": 1.968446649826686, "learning_rate": 3.2094276347073626e-07, "loss": 0.2047, "step": 11632 }, { "epoch": 0.9216082392552981, "grad_norm": 2.280575118186498, "learning_rate": 3.2029820354311014e-07, "loss": 0.1311, "step": 11633 }, { "epoch": 0.9216874628639334, "grad_norm": 1.4690123540354785, "learning_rate": 3.196542809772707e-07, "loss": 0.1572, "step": 11634 }, { "epoch": 0.9217666864725689, "grad_norm": 1.5379390361554928, "learning_rate": 3.1901099581561846e-07, "loss": 0.1375, "step": 11635 }, { "epoch": 0.9218459100812042, "grad_norm": 1.2376937629579075, "learning_rate": 3.183683481005106e-07, "loss": 0.1077, "step": 11636 }, { "epoch": 0.9219251336898395, "grad_norm": 1.4936200104044857, "learning_rate": 3.1772633787426233e-07, "loss": 0.1323, "step": 11637 }, { "epoch": 0.922004357298475, "grad_norm": 1.7174130093389426, "learning_rate": 3.1708496517914523e-07, "loss": 0.2056, "step": 11638 }, { "epoch": 0.9220835809071103, "grad_norm": 1.4418865168568653, "learning_rate": 3.1644423005739335e-07, "loss": 0.1508, "step": 11639 }, { "epoch": 0.9221628045157457, "grad_norm": 1.316743610570187, "learning_rate": 3.15804132551194e-07, "loss": 0.1196, "step": 11640 }, { "epoch": 0.922242028124381, "grad_norm": 1.7073826396258642, "learning_rate": 3.151646727026947e-07, "loss": 0.1647, "step": 11641 }, { "epoch": 0.9223212517330165, "grad_norm": 1.6376988051497907, "learning_rate": 3.1452585055400167e-07, "loss": 0.1989, "step": 11642 }, { "epoch": 0.9224004753416518, "grad_norm": 1.4923469373310065, "learning_rate": 3.138876661471779e-07, "loss": 0.1281, "step": 11643 }, { "epoch": 0.9224796989502871, "grad_norm": 1.2899564588739745, "learning_rate": 3.1325011952424435e-07, "loss": 0.1175, "step": 11644 }, { "epoch": 0.9225589225589226, "grad_norm": 1.7718355652328004, "learning_rate": 3.1261321072718063e-07, "loss": 0.1712, "step": 11645 }, { "epoch": 0.9226381461675579, "grad_norm": 1.6351016246175194, "learning_rate": 3.1197693979792556e-07, "loss": 0.1519, "step": 11646 }, { "epoch": 0.9227173697761933, "grad_norm": 1.628244352165676, "learning_rate": 3.1134130677837103e-07, "loss": 0.127, "step": 11647 }, { "epoch": 0.9227965933848287, "grad_norm": 1.5151095923441855, "learning_rate": 3.107063117103759e-07, "loss": 0.0792, "step": 11648 }, { "epoch": 0.9228758169934641, "grad_norm": 2.4022307320517458, "learning_rate": 3.100719546357467e-07, "loss": 0.1349, "step": 11649 }, { "epoch": 0.9229550406020994, "grad_norm": 1.6479057999461215, "learning_rate": 3.0943823559625217e-07, "loss": 0.1886, "step": 11650 }, { "epoch": 0.9230342642107348, "grad_norm": 1.8424348287345138, "learning_rate": 3.088051546336246e-07, "loss": 0.2588, "step": 11651 }, { "epoch": 0.9231134878193702, "grad_norm": 1.6874009228266151, "learning_rate": 3.08172711789545e-07, "loss": 0.1619, "step": 11652 }, { "epoch": 0.9231927114280055, "grad_norm": 1.5086799190636198, "learning_rate": 3.0754090710565785e-07, "loss": 0.2206, "step": 11653 }, { "epoch": 0.923271935036641, "grad_norm": 1.7584008865145941, "learning_rate": 3.069097406235666e-07, "loss": 0.1813, "step": 11654 }, { "epoch": 0.9233511586452763, "grad_norm": 1.307879113416923, "learning_rate": 3.0627921238482794e-07, "loss": 0.148, "step": 11655 }, { "epoch": 0.9234303822539117, "grad_norm": 1.791320729241961, "learning_rate": 3.056493224309587e-07, "loss": 0.1778, "step": 11656 }, { "epoch": 0.923509605862547, "grad_norm": 1.9374329520498341, "learning_rate": 3.0502007080343675e-07, "loss": 0.167, "step": 11657 }, { "epoch": 0.9235888294711824, "grad_norm": 1.3895261273343018, "learning_rate": 3.043914575436946e-07, "loss": 0.1162, "step": 11658 }, { "epoch": 0.9236680530798178, "grad_norm": 1.3992705024143886, "learning_rate": 3.0376348269312017e-07, "loss": 0.1089, "step": 11659 }, { "epoch": 0.9237472766884531, "grad_norm": 1.3016853448378236, "learning_rate": 3.031361462930671e-07, "loss": 0.1353, "step": 11660 }, { "epoch": 0.9238265002970886, "grad_norm": 2.241201197216433, "learning_rate": 3.025094483848401e-07, "loss": 0.164, "step": 11661 }, { "epoch": 0.9239057239057239, "grad_norm": 1.8320411760526856, "learning_rate": 3.0188338900970505e-07, "loss": 0.2267, "step": 11662 }, { "epoch": 0.9239849475143593, "grad_norm": 1.4397073178874091, "learning_rate": 3.0125796820888343e-07, "loss": 0.1176, "step": 11663 }, { "epoch": 0.9240641711229947, "grad_norm": 1.4271924456601242, "learning_rate": 3.0063318602355787e-07, "loss": 0.1182, "step": 11664 }, { "epoch": 0.92414339473163, "grad_norm": 1.8845527000929638, "learning_rate": 3.000090424948665e-07, "loss": 0.1804, "step": 11665 }, { "epoch": 0.9242226183402654, "grad_norm": 2.2425951312324215, "learning_rate": 2.993855376639054e-07, "loss": 0.2159, "step": 11666 }, { "epoch": 0.9243018419489007, "grad_norm": 1.778330137799533, "learning_rate": 2.987626715717318e-07, "loss": 0.2485, "step": 11667 }, { "epoch": 0.9243810655575362, "grad_norm": 1.5228936539295022, "learning_rate": 2.9814044425935605e-07, "loss": 0.1393, "step": 11668 }, { "epoch": 0.9244602891661715, "grad_norm": 1.3641193094591961, "learning_rate": 2.9751885576774887e-07, "loss": 0.1682, "step": 11669 }, { "epoch": 0.9245395127748068, "grad_norm": 1.4390503180778005, "learning_rate": 2.9689790613784073e-07, "loss": 0.1204, "step": 11670 }, { "epoch": 0.9246187363834423, "grad_norm": 1.675419583148962, "learning_rate": 2.962775954105179e-07, "loss": 0.1268, "step": 11671 }, { "epoch": 0.9246979599920776, "grad_norm": 1.2661279475292775, "learning_rate": 2.9565792362662213e-07, "loss": 0.0962, "step": 11672 }, { "epoch": 0.924777183600713, "grad_norm": 1.8175015118198954, "learning_rate": 2.9503889082695967e-07, "loss": 0.164, "step": 11673 }, { "epoch": 0.9248564072093484, "grad_norm": 1.6461564442567023, "learning_rate": 2.9442049705228794e-07, "loss": 0.0976, "step": 11674 }, { "epoch": 0.9249356308179838, "grad_norm": 1.505026992419722, "learning_rate": 2.938027423433254e-07, "loss": 0.1249, "step": 11675 }, { "epoch": 0.9250148544266191, "grad_norm": 1.5764070871938913, "learning_rate": 2.931856267407507e-07, "loss": 0.1244, "step": 11676 }, { "epoch": 0.9250940780352545, "grad_norm": 1.708192777427909, "learning_rate": 2.9256915028519575e-07, "loss": 0.1479, "step": 11677 }, { "epoch": 0.9251733016438899, "grad_norm": 1.904073161409538, "learning_rate": 2.919533130172536e-07, "loss": 0.1764, "step": 11678 }, { "epoch": 0.9252525252525252, "grad_norm": 1.5731454496797521, "learning_rate": 2.913381149774719e-07, "loss": 0.1007, "step": 11679 }, { "epoch": 0.9253317488611607, "grad_norm": 1.232005169787576, "learning_rate": 2.907235562063615e-07, "loss": 0.1047, "step": 11680 }, { "epoch": 0.925410972469796, "grad_norm": 2.1321905265613723, "learning_rate": 2.9010963674438674e-07, "loss": 0.3028, "step": 11681 }, { "epoch": 0.9254901960784314, "grad_norm": 1.5450402958356704, "learning_rate": 2.8949635663197087e-07, "loss": 0.1288, "step": 11682 }, { "epoch": 0.9255694196870667, "grad_norm": 1.3152807694139295, "learning_rate": 2.8888371590949703e-07, "loss": 0.114, "step": 11683 }, { "epoch": 0.9256486432957021, "grad_norm": 1.5908223031267135, "learning_rate": 2.882717146173031e-07, "loss": 0.1605, "step": 11684 }, { "epoch": 0.9257278669043375, "grad_norm": 1.744822215313669, "learning_rate": 2.8766035279568563e-07, "loss": 0.1654, "step": 11685 }, { "epoch": 0.9258070905129728, "grad_norm": 1.4112028815113598, "learning_rate": 2.8704963048490243e-07, "loss": 0.1136, "step": 11686 }, { "epoch": 0.9258863141216083, "grad_norm": 1.3275109723200844, "learning_rate": 2.864395477251658e-07, "loss": 0.1271, "step": 11687 }, { "epoch": 0.9259655377302436, "grad_norm": 1.9629416281676029, "learning_rate": 2.858301045566447e-07, "loss": 0.1316, "step": 11688 }, { "epoch": 0.926044761338879, "grad_norm": 1.5980592306847683, "learning_rate": 2.8522130101947045e-07, "loss": 0.1265, "step": 11689 }, { "epoch": 0.9261239849475144, "grad_norm": 1.4230943112183498, "learning_rate": 2.8461313715372976e-07, "loss": 0.0955, "step": 11690 }, { "epoch": 0.9262032085561497, "grad_norm": 1.8304223296406066, "learning_rate": 2.8400561299946503e-07, "loss": 0.1766, "step": 11691 }, { "epoch": 0.9262824321647851, "grad_norm": 1.7353593044395943, "learning_rate": 2.8339872859668103e-07, "loss": 0.1552, "step": 11692 }, { "epoch": 0.9263616557734204, "grad_norm": 1.9076415849588544, "learning_rate": 2.82792483985338e-07, "loss": 0.1213, "step": 11693 }, { "epoch": 0.9264408793820559, "grad_norm": 1.8558378166517273, "learning_rate": 2.8218687920535395e-07, "loss": 0.1345, "step": 11694 }, { "epoch": 0.9265201029906912, "grad_norm": 1.4577450763751478, "learning_rate": 2.8158191429660364e-07, "loss": 0.1095, "step": 11695 }, { "epoch": 0.9265993265993266, "grad_norm": 1.4249962356324197, "learning_rate": 2.8097758929892196e-07, "loss": 0.1354, "step": 11696 }, { "epoch": 0.926678550207962, "grad_norm": 1.5769509722267274, "learning_rate": 2.803739042521025e-07, "loss": 0.1524, "step": 11697 }, { "epoch": 0.9267577738165973, "grad_norm": 1.644574895200532, "learning_rate": 2.7977085919589253e-07, "loss": 0.1624, "step": 11698 }, { "epoch": 0.9268369974252327, "grad_norm": 1.8638902429061044, "learning_rate": 2.791684541700013e-07, "loss": 0.1791, "step": 11699 }, { "epoch": 0.9269162210338681, "grad_norm": 1.5134004418321778, "learning_rate": 2.785666892140937e-07, "loss": 0.1345, "step": 11700 }, { "epoch": 0.9269954446425035, "grad_norm": 1.3544409921124534, "learning_rate": 2.7796556436779144e-07, "loss": 0.1125, "step": 11701 }, { "epoch": 0.9270746682511388, "grad_norm": 1.334768128109655, "learning_rate": 2.773650796706795e-07, "loss": 0.1241, "step": 11702 }, { "epoch": 0.9271538918597743, "grad_norm": 2.256049474205574, "learning_rate": 2.7676523516229404e-07, "loss": 0.1636, "step": 11703 }, { "epoch": 0.9272331154684096, "grad_norm": 1.2334360930280261, "learning_rate": 2.7616603088213126e-07, "loss": 0.093, "step": 11704 }, { "epoch": 0.9273123390770449, "grad_norm": 1.8538842423884008, "learning_rate": 2.755674668696495e-07, "loss": 0.1198, "step": 11705 }, { "epoch": 0.9273915626856803, "grad_norm": 1.7571718732385617, "learning_rate": 2.749695431642574e-07, "loss": 0.1522, "step": 11706 }, { "epoch": 0.9274707862943157, "grad_norm": 1.6408522026123453, "learning_rate": 2.743722598053278e-07, "loss": 0.1488, "step": 11707 }, { "epoch": 0.9275500099029511, "grad_norm": 1.5240448412180032, "learning_rate": 2.737756168321881e-07, "loss": 0.1568, "step": 11708 }, { "epoch": 0.9276292335115864, "grad_norm": 1.6592881811409121, "learning_rate": 2.7317961428412475e-07, "loss": 0.117, "step": 11709 }, { "epoch": 0.9277084571202219, "grad_norm": 2.0577890108814145, "learning_rate": 2.7258425220038077e-07, "loss": 0.2154, "step": 11710 }, { "epoch": 0.9277876807288572, "grad_norm": 1.522763535952322, "learning_rate": 2.719895306201581e-07, "loss": 0.158, "step": 11711 }, { "epoch": 0.9278669043374925, "grad_norm": 2.1385756090334054, "learning_rate": 2.7139544958261765e-07, "loss": 0.1965, "step": 11712 }, { "epoch": 0.927946127946128, "grad_norm": 1.5491272044064366, "learning_rate": 2.7080200912687484e-07, "loss": 0.1211, "step": 11713 }, { "epoch": 0.9280253515547633, "grad_norm": 1.7967937233245501, "learning_rate": 2.702092092920061e-07, "loss": 0.1519, "step": 11714 }, { "epoch": 0.9281045751633987, "grad_norm": 2.0472651051884556, "learning_rate": 2.6961705011704475e-07, "loss": 0.2119, "step": 11715 }, { "epoch": 0.928183798772034, "grad_norm": 1.8568698084885338, "learning_rate": 2.6902553164098065e-07, "loss": 0.1477, "step": 11716 }, { "epoch": 0.9282630223806695, "grad_norm": 1.8856808832454568, "learning_rate": 2.684346539027616e-07, "loss": 0.1985, "step": 11717 }, { "epoch": 0.9283422459893048, "grad_norm": 1.3754590570852714, "learning_rate": 2.6784441694129747e-07, "loss": 0.1434, "step": 11718 }, { "epoch": 0.9284214695979401, "grad_norm": 1.5196496708283196, "learning_rate": 2.672548207954495e-07, "loss": 0.1058, "step": 11719 }, { "epoch": 0.9285006932065756, "grad_norm": 1.904537390083598, "learning_rate": 2.6666586550403884e-07, "loss": 0.2214, "step": 11720 }, { "epoch": 0.9285799168152109, "grad_norm": 1.547897253552797, "learning_rate": 2.6607755110584886e-07, "loss": 0.2243, "step": 11721 }, { "epoch": 0.9286591404238463, "grad_norm": 1.9861157922471282, "learning_rate": 2.654898776396164e-07, "loss": 0.1466, "step": 11722 }, { "epoch": 0.9287383640324817, "grad_norm": 1.3037673283245397, "learning_rate": 2.64902845144035e-07, "loss": 0.1046, "step": 11723 }, { "epoch": 0.9288175876411171, "grad_norm": 1.5569984170578262, "learning_rate": 2.6431645365775806e-07, "loss": 0.1834, "step": 11724 }, { "epoch": 0.9288968112497524, "grad_norm": 1.6671195524416909, "learning_rate": 2.637307032193992e-07, "loss": 0.1805, "step": 11725 }, { "epoch": 0.9289760348583878, "grad_norm": 1.8116149059344988, "learning_rate": 2.6314559386752423e-07, "loss": 0.1773, "step": 11726 }, { "epoch": 0.9290552584670232, "grad_norm": 1.6679532697430413, "learning_rate": 2.6256112564066236e-07, "loss": 0.1572, "step": 11727 }, { "epoch": 0.9291344820756585, "grad_norm": 1.2627075249199327, "learning_rate": 2.6197729857729617e-07, "loss": 0.1052, "step": 11728 }, { "epoch": 0.929213705684294, "grad_norm": 1.873068512818658, "learning_rate": 2.613941127158681e-07, "loss": 0.1858, "step": 11729 }, { "epoch": 0.9292929292929293, "grad_norm": 1.7520818616751512, "learning_rate": 2.608115680947787e-07, "loss": 0.0973, "step": 11730 }, { "epoch": 0.9293721529015647, "grad_norm": 1.372344191359215, "learning_rate": 2.602296647523861e-07, "loss": 0.1562, "step": 11731 }, { "epoch": 0.9294513765102, "grad_norm": 1.5803983238554975, "learning_rate": 2.596484027270041e-07, "loss": 0.1211, "step": 11732 }, { "epoch": 0.9295306001188354, "grad_norm": 1.6961601693365884, "learning_rate": 2.5906778205690876e-07, "loss": 0.1615, "step": 11733 }, { "epoch": 0.9296098237274708, "grad_norm": 1.6323890919766126, "learning_rate": 2.5848780278032836e-07, "loss": 0.1163, "step": 11734 }, { "epoch": 0.9296890473361061, "grad_norm": 1.93391318545221, "learning_rate": 2.579084649354546e-07, "loss": 0.1493, "step": 11735 }, { "epoch": 0.9297682709447416, "grad_norm": 1.23711990802364, "learning_rate": 2.5732976856043034e-07, "loss": 0.1228, "step": 11736 }, { "epoch": 0.9298474945533769, "grad_norm": 1.402640321770987, "learning_rate": 2.5675171369336284e-07, "loss": 0.0972, "step": 11737 }, { "epoch": 0.9299267181620123, "grad_norm": 1.874691499538644, "learning_rate": 2.5617430037231495e-07, "loss": 0.2169, "step": 11738 }, { "epoch": 0.9300059417706477, "grad_norm": 1.503266271582883, "learning_rate": 2.5559752863530295e-07, "loss": 0.134, "step": 11739 }, { "epoch": 0.930085165379283, "grad_norm": 1.30202069378424, "learning_rate": 2.550213985203076e-07, "loss": 0.1195, "step": 11740 }, { "epoch": 0.9301643889879184, "grad_norm": 2.0467902772257935, "learning_rate": 2.54445910065263e-07, "loss": 0.2347, "step": 11741 }, { "epoch": 0.9302436125965537, "grad_norm": 1.556236904715564, "learning_rate": 2.538710633080621e-07, "loss": 0.1348, "step": 11742 }, { "epoch": 0.9303228362051892, "grad_norm": 1.5966395550494747, "learning_rate": 2.5329685828655803e-07, "loss": 0.136, "step": 11743 }, { "epoch": 0.9304020598138245, "grad_norm": 1.5473971958105641, "learning_rate": 2.527232950385572e-07, "loss": 0.127, "step": 11744 }, { "epoch": 0.93048128342246, "grad_norm": 1.5703237930725045, "learning_rate": 2.521503736018249e-07, "loss": 0.1049, "step": 11745 }, { "epoch": 0.9305605070310953, "grad_norm": 1.448853025755151, "learning_rate": 2.5157809401408775e-07, "loss": 0.0826, "step": 11746 }, { "epoch": 0.9306397306397306, "grad_norm": 2.0281940952923128, "learning_rate": 2.510064563130277e-07, "loss": 0.2286, "step": 11747 }, { "epoch": 0.930718954248366, "grad_norm": 1.551753253328473, "learning_rate": 2.5043546053628245e-07, "loss": 0.1233, "step": 11748 }, { "epoch": 0.9307981778570014, "grad_norm": 1.441535896794083, "learning_rate": 2.498651067214497e-07, "loss": 0.1469, "step": 11749 }, { "epoch": 0.9308774014656368, "grad_norm": 1.6904437906659537, "learning_rate": 2.4929539490608614e-07, "loss": 0.142, "step": 11750 }, { "epoch": 0.9309566250742721, "grad_norm": 1.6465222213549568, "learning_rate": 2.487263251277028e-07, "loss": 0.1839, "step": 11751 }, { "epoch": 0.9310358486829075, "grad_norm": 1.2254054174644144, "learning_rate": 2.481578974237697e-07, "loss": 0.1133, "step": 11752 }, { "epoch": 0.9311150722915429, "grad_norm": 1.437412364689459, "learning_rate": 2.475901118317181e-07, "loss": 0.1271, "step": 11753 }, { "epoch": 0.9311942959001782, "grad_norm": 2.473454675057739, "learning_rate": 2.4702296838893134e-07, "loss": 0.1931, "step": 11754 }, { "epoch": 0.9312735195088137, "grad_norm": 1.5278442789807862, "learning_rate": 2.464564671327529e-07, "loss": 0.1404, "step": 11755 }, { "epoch": 0.931352743117449, "grad_norm": 2.442156424732724, "learning_rate": 2.4589060810048635e-07, "loss": 0.207, "step": 11756 }, { "epoch": 0.9314319667260844, "grad_norm": 1.7555572369017296, "learning_rate": 2.453253913293896e-07, "loss": 0.195, "step": 11757 }, { "epoch": 0.9315111903347197, "grad_norm": 1.5702849739689868, "learning_rate": 2.447608168566784e-07, "loss": 0.1222, "step": 11758 }, { "epoch": 0.9315904139433551, "grad_norm": 1.791227537014577, "learning_rate": 2.441968847195286e-07, "loss": 0.155, "step": 11759 }, { "epoch": 0.9316696375519905, "grad_norm": 1.5921267912218913, "learning_rate": 2.4363359495507166e-07, "loss": 0.1542, "step": 11760 }, { "epoch": 0.9317488611606258, "grad_norm": 1.5137004254318036, "learning_rate": 2.430709476003978e-07, "loss": 0.1184, "step": 11761 }, { "epoch": 0.9318280847692613, "grad_norm": 1.537585032324018, "learning_rate": 2.425089426925553e-07, "loss": 0.1103, "step": 11762 }, { "epoch": 0.9319073083778966, "grad_norm": 1.166210295983881, "learning_rate": 2.419475802685489e-07, "loss": 0.1257, "step": 11763 }, { "epoch": 0.931986531986532, "grad_norm": 1.5056680088985932, "learning_rate": 2.413868603653413e-07, "loss": 0.1432, "step": 11764 }, { "epoch": 0.9320657555951674, "grad_norm": 1.7107389531715969, "learning_rate": 2.4082678301985297e-07, "loss": 0.1635, "step": 11765 }, { "epoch": 0.9321449792038027, "grad_norm": 1.7032793824361265, "learning_rate": 2.402673482689633e-07, "loss": 0.1455, "step": 11766 }, { "epoch": 0.9322242028124381, "grad_norm": 1.7789611557520117, "learning_rate": 2.3970855614950827e-07, "loss": 0.1684, "step": 11767 }, { "epoch": 0.9323034264210734, "grad_norm": 1.2485721638856666, "learning_rate": 2.3915040669828084e-07, "loss": 0.116, "step": 11768 }, { "epoch": 0.9323826500297089, "grad_norm": 1.5400855527799107, "learning_rate": 2.385928999520326e-07, "loss": 0.1674, "step": 11769 }, { "epoch": 0.9324618736383442, "grad_norm": 1.8110532243877115, "learning_rate": 2.3803603594747427e-07, "loss": 0.1637, "step": 11770 }, { "epoch": 0.9325410972469796, "grad_norm": 1.3892586061718182, "learning_rate": 2.374798147212698e-07, "loss": 0.0876, "step": 11771 }, { "epoch": 0.932620320855615, "grad_norm": 1.6998567675729621, "learning_rate": 2.3692423631004658e-07, "loss": 0.1526, "step": 11772 }, { "epoch": 0.9326995444642503, "grad_norm": 1.5728413231250775, "learning_rate": 2.3636930075038534e-07, "loss": 0.1644, "step": 11773 }, { "epoch": 0.9327787680728857, "grad_norm": 1.306152458781243, "learning_rate": 2.3581500807882462e-07, "loss": 0.1013, "step": 11774 }, { "epoch": 0.9328579916815211, "grad_norm": 1.2367809775139602, "learning_rate": 2.3526135833186527e-07, "loss": 0.0999, "step": 11775 }, { "epoch": 0.9329372152901565, "grad_norm": 1.518973710176375, "learning_rate": 2.3470835154595918e-07, "loss": 0.1551, "step": 11776 }, { "epoch": 0.9330164388987918, "grad_norm": 1.916302663346596, "learning_rate": 2.3415598775752057e-07, "loss": 0.154, "step": 11777 }, { "epoch": 0.9330956625074273, "grad_norm": 1.5825780600232948, "learning_rate": 2.3360426700292038e-07, "loss": 0.1525, "step": 11778 }, { "epoch": 0.9331748861160626, "grad_norm": 1.7366911644653875, "learning_rate": 2.330531893184873e-07, "loss": 0.1327, "step": 11779 }, { "epoch": 0.9332541097246979, "grad_norm": 1.5056015014530373, "learning_rate": 2.3250275474050565e-07, "loss": 0.1138, "step": 11780 }, { "epoch": 0.9333333333333333, "grad_norm": 1.8118610587436466, "learning_rate": 2.3195296330521756e-07, "loss": 0.1623, "step": 11781 }, { "epoch": 0.9334125569419687, "grad_norm": 1.101253846813473, "learning_rate": 2.3140381504882736e-07, "loss": 0.0734, "step": 11782 }, { "epoch": 0.9334917805506041, "grad_norm": 1.2918483464448234, "learning_rate": 2.3085531000749285e-07, "loss": 0.1189, "step": 11783 }, { "epoch": 0.9335710041592394, "grad_norm": 1.399526896938626, "learning_rate": 2.3030744821732953e-07, "loss": 0.1037, "step": 11784 }, { "epoch": 0.9336502277678749, "grad_norm": 2.192032769487839, "learning_rate": 2.297602297144119e-07, "loss": 0.2054, "step": 11785 }, { "epoch": 0.9337294513765102, "grad_norm": 1.487565667790463, "learning_rate": 2.2921365453477229e-07, "loss": 0.1547, "step": 11786 }, { "epoch": 0.9338086749851455, "grad_norm": 1.9532299725016544, "learning_rate": 2.286677227143985e-07, "loss": 0.1657, "step": 11787 }, { "epoch": 0.933887898593781, "grad_norm": 1.8591841442476353, "learning_rate": 2.2812243428923964e-07, "loss": 0.1968, "step": 11788 }, { "epoch": 0.9339671222024163, "grad_norm": 1.3346485701087105, "learning_rate": 2.2757778929519914e-07, "loss": 0.1162, "step": 11789 }, { "epoch": 0.9340463458110517, "grad_norm": 1.494221886850817, "learning_rate": 2.2703378776813833e-07, "loss": 0.1565, "step": 11790 }, { "epoch": 0.934125569419687, "grad_norm": 1.690149925920761, "learning_rate": 2.2649042974387858e-07, "loss": 0.1263, "step": 11791 }, { "epoch": 0.9342047930283225, "grad_norm": 1.5318479002724577, "learning_rate": 2.259477152581979e-07, "loss": 0.1528, "step": 11792 }, { "epoch": 0.9342840166369578, "grad_norm": 1.6617996207191215, "learning_rate": 2.2540564434682998e-07, "loss": 0.1469, "step": 11793 }, { "epoch": 0.9343632402455931, "grad_norm": 1.573186267108376, "learning_rate": 2.2486421704546623e-07, "loss": 0.1398, "step": 11794 }, { "epoch": 0.9344424638542286, "grad_norm": 2.01657978511238, "learning_rate": 2.2432343338976038e-07, "loss": 0.1498, "step": 11795 }, { "epoch": 0.9345216874628639, "grad_norm": 1.5210543427056888, "learning_rate": 2.2378329341531946e-07, "loss": 0.1344, "step": 11796 }, { "epoch": 0.9346009110714993, "grad_norm": 1.9042795631154794, "learning_rate": 2.2324379715770728e-07, "loss": 0.1647, "step": 11797 }, { "epoch": 0.9346801346801347, "grad_norm": 1.9030321722899153, "learning_rate": 2.2270494465244874e-07, "loss": 0.1827, "step": 11798 }, { "epoch": 0.9347593582887701, "grad_norm": 1.4187241485207902, "learning_rate": 2.2216673593502437e-07, "loss": 0.1131, "step": 11799 }, { "epoch": 0.9348385818974054, "grad_norm": 1.3954590933482958, "learning_rate": 2.2162917104087245e-07, "loss": 0.1304, "step": 11800 }, { "epoch": 0.9349178055060408, "grad_norm": 1.8524393466504487, "learning_rate": 2.2109225000538915e-07, "loss": 0.1664, "step": 11801 }, { "epoch": 0.9349970291146762, "grad_norm": 1.301756748047014, "learning_rate": 2.2055597286392838e-07, "loss": 0.1274, "step": 11802 }, { "epoch": 0.9350762527233115, "grad_norm": 1.5516407754826604, "learning_rate": 2.200203396517997e-07, "loss": 0.0869, "step": 11803 }, { "epoch": 0.935155476331947, "grad_norm": 1.4493743024754189, "learning_rate": 2.19485350404276e-07, "loss": 0.1283, "step": 11804 }, { "epoch": 0.9352346999405823, "grad_norm": 1.583324656995076, "learning_rate": 2.1895100515658019e-07, "loss": 0.1607, "step": 11805 }, { "epoch": 0.9353139235492177, "grad_norm": 1.7837059448578352, "learning_rate": 2.1841730394389527e-07, "loss": 0.1668, "step": 11806 }, { "epoch": 0.935393147157853, "grad_norm": 1.5015022099402, "learning_rate": 2.1788424680136756e-07, "loss": 0.1383, "step": 11807 }, { "epoch": 0.9354723707664884, "grad_norm": 1.4783481188874439, "learning_rate": 2.173518337640923e-07, "loss": 0.1345, "step": 11808 }, { "epoch": 0.9355515943751238, "grad_norm": 1.6246832148375558, "learning_rate": 2.1682006486712703e-07, "loss": 0.1494, "step": 11809 }, { "epoch": 0.9356308179837591, "grad_norm": 1.719021390524658, "learning_rate": 2.1628894014548819e-07, "loss": 0.1951, "step": 11810 }, { "epoch": 0.9357100415923946, "grad_norm": 1.4800054525416697, "learning_rate": 2.1575845963414555e-07, "loss": 0.1459, "step": 11811 }, { "epoch": 0.9357892652010299, "grad_norm": 1.8837283606291502, "learning_rate": 2.1522862336803008e-07, "loss": 0.2133, "step": 11812 }, { "epoch": 0.9358684888096653, "grad_norm": 1.4382904518897168, "learning_rate": 2.146994313820283e-07, "loss": 0.1649, "step": 11813 }, { "epoch": 0.9359477124183007, "grad_norm": 1.4455128948114109, "learning_rate": 2.141708837109846e-07, "loss": 0.1294, "step": 11814 }, { "epoch": 0.936026936026936, "grad_norm": 1.5174409312633135, "learning_rate": 2.136429803897022e-07, "loss": 0.1672, "step": 11815 }, { "epoch": 0.9361061596355714, "grad_norm": 1.8262995788463858, "learning_rate": 2.1311572145294114e-07, "loss": 0.1997, "step": 11816 }, { "epoch": 0.9361853832442067, "grad_norm": 1.1188181527878025, "learning_rate": 2.1258910693541802e-07, "loss": 0.1053, "step": 11817 }, { "epoch": 0.9362646068528422, "grad_norm": 1.50152094855707, "learning_rate": 2.1206313687180845e-07, "loss": 0.1481, "step": 11818 }, { "epoch": 0.9363438304614775, "grad_norm": 1.3124352964054526, "learning_rate": 2.1153781129674367e-07, "loss": 0.1167, "step": 11819 }, { "epoch": 0.936423054070113, "grad_norm": 2.4761160578646013, "learning_rate": 2.1101313024481595e-07, "loss": 0.1853, "step": 11820 }, { "epoch": 0.9365022776787483, "grad_norm": 1.6695380534388198, "learning_rate": 2.1048909375057103e-07, "loss": 0.1309, "step": 11821 }, { "epoch": 0.9365815012873836, "grad_norm": 1.2270510797044871, "learning_rate": 2.0996570184851572e-07, "loss": 0.1013, "step": 11822 }, { "epoch": 0.936660724896019, "grad_norm": 1.6055190771815957, "learning_rate": 2.0944295457311247e-07, "loss": 0.231, "step": 11823 }, { "epoch": 0.9367399485046544, "grad_norm": 1.5052949547604648, "learning_rate": 2.0892085195878154e-07, "loss": 0.2038, "step": 11824 }, { "epoch": 0.9368191721132898, "grad_norm": 1.1386516468727237, "learning_rate": 2.0839939403989984e-07, "loss": 0.1102, "step": 11825 }, { "epoch": 0.9368983957219251, "grad_norm": 1.519251922528252, "learning_rate": 2.078785808508055e-07, "loss": 0.1124, "step": 11826 }, { "epoch": 0.9369776193305605, "grad_norm": 1.6783001312255867, "learning_rate": 2.0735841242578992e-07, "loss": 0.1729, "step": 11827 }, { "epoch": 0.9370568429391959, "grad_norm": 1.6920556802597297, "learning_rate": 2.068388887991013e-07, "loss": 0.176, "step": 11828 }, { "epoch": 0.9371360665478312, "grad_norm": 2.1153530231105413, "learning_rate": 2.0632001000495228e-07, "loss": 0.1886, "step": 11829 }, { "epoch": 0.9372152901564667, "grad_norm": 1.309756203243174, "learning_rate": 2.0580177607750663e-07, "loss": 0.1186, "step": 11830 }, { "epoch": 0.937294513765102, "grad_norm": 1.5113294616684176, "learning_rate": 2.0528418705088592e-07, "loss": 0.1788, "step": 11831 }, { "epoch": 0.9373737373737374, "grad_norm": 1.8656262585228682, "learning_rate": 2.0476724295917294e-07, "loss": 0.1708, "step": 11832 }, { "epoch": 0.9374529609823727, "grad_norm": 1.4272889626751728, "learning_rate": 2.04250943836406e-07, "loss": 0.1225, "step": 11833 }, { "epoch": 0.9375321845910081, "grad_norm": 1.6975446229970488, "learning_rate": 2.0373528971658009e-07, "loss": 0.177, "step": 11834 }, { "epoch": 0.9376114081996435, "grad_norm": 1.502992718576467, "learning_rate": 2.0322028063364806e-07, "loss": 0.1683, "step": 11835 }, { "epoch": 0.9376906318082788, "grad_norm": 1.724092480014759, "learning_rate": 2.0270591662152173e-07, "loss": 0.1898, "step": 11836 }, { "epoch": 0.9377698554169143, "grad_norm": 2.1788719188978125, "learning_rate": 2.0219219771406952e-07, "loss": 0.2069, "step": 11837 }, { "epoch": 0.9378490790255496, "grad_norm": 1.595599678095054, "learning_rate": 2.0167912394511657e-07, "loss": 0.1423, "step": 11838 }, { "epoch": 0.937928302634185, "grad_norm": 1.422738462376261, "learning_rate": 2.01166695348447e-07, "loss": 0.1514, "step": 11839 }, { "epoch": 0.9380075262428204, "grad_norm": 1.5142781979094804, "learning_rate": 2.0065491195780163e-07, "loss": 0.1418, "step": 11840 }, { "epoch": 0.9380867498514557, "grad_norm": 1.502200509695498, "learning_rate": 2.00143773806879e-07, "loss": 0.1476, "step": 11841 }, { "epoch": 0.9381659734600911, "grad_norm": 1.697517799507699, "learning_rate": 1.9963328092933444e-07, "loss": 0.1726, "step": 11842 }, { "epoch": 0.9382451970687264, "grad_norm": 2.2267506741690566, "learning_rate": 1.9912343335878326e-07, "loss": 0.2278, "step": 11843 }, { "epoch": 0.9383244206773619, "grad_norm": 1.4106274960545038, "learning_rate": 1.9861423112879308e-07, "loss": 0.1273, "step": 11844 }, { "epoch": 0.9384036442859972, "grad_norm": 1.9308164959449465, "learning_rate": 1.9810567427289596e-07, "loss": 0.1615, "step": 11845 }, { "epoch": 0.9384828678946326, "grad_norm": 1.4111916275200567, "learning_rate": 1.9759776282457731e-07, "loss": 0.0799, "step": 11846 }, { "epoch": 0.938562091503268, "grad_norm": 1.3648174089371212, "learning_rate": 1.970904968172771e-07, "loss": 0.1071, "step": 11847 }, { "epoch": 0.9386413151119033, "grad_norm": 1.5094485254495393, "learning_rate": 1.965838762844019e-07, "loss": 0.1778, "step": 11848 }, { "epoch": 0.9387205387205387, "grad_norm": 1.6990181391425632, "learning_rate": 1.9607790125930614e-07, "loss": 0.1431, "step": 11849 }, { "epoch": 0.9387997623291741, "grad_norm": 2.148605854081362, "learning_rate": 1.9557257177530763e-07, "loss": 0.2264, "step": 11850 }, { "epoch": 0.9388789859378095, "grad_norm": 1.2558382377717134, "learning_rate": 1.9506788786567865e-07, "loss": 0.0883, "step": 11851 }, { "epoch": 0.9389582095464448, "grad_norm": 1.8049545793541213, "learning_rate": 1.9456384956365149e-07, "loss": 0.1885, "step": 11852 }, { "epoch": 0.9390374331550803, "grad_norm": 1.5641479904797764, "learning_rate": 1.9406045690241404e-07, "loss": 0.1504, "step": 11853 }, { "epoch": 0.9391166567637156, "grad_norm": 1.262705490604946, "learning_rate": 1.935577099151109e-07, "loss": 0.1484, "step": 11854 }, { "epoch": 0.9391958803723509, "grad_norm": 1.6160988203637365, "learning_rate": 1.9305560863484896e-07, "loss": 0.1419, "step": 11855 }, { "epoch": 0.9392751039809863, "grad_norm": 1.8135757520694857, "learning_rate": 1.9255415309468618e-07, "loss": 0.1141, "step": 11856 }, { "epoch": 0.9393543275896217, "grad_norm": 1.8564696353131633, "learning_rate": 1.920533433276417e-07, "loss": 0.2042, "step": 11857 }, { "epoch": 0.9394335511982571, "grad_norm": 1.6158178817323292, "learning_rate": 1.9155317936669248e-07, "loss": 0.1262, "step": 11858 }, { "epoch": 0.9395127748068924, "grad_norm": 1.7371704020105014, "learning_rate": 1.910536612447711e-07, "loss": 0.1897, "step": 11859 }, { "epoch": 0.9395919984155279, "grad_norm": 1.4258654479404713, "learning_rate": 1.9055478899476788e-07, "loss": 0.0854, "step": 11860 }, { "epoch": 0.9396712220241632, "grad_norm": 1.801823519288332, "learning_rate": 1.900565626495332e-07, "loss": 0.193, "step": 11861 }, { "epoch": 0.9397504456327985, "grad_norm": 1.7534619658607473, "learning_rate": 1.8955898224187086e-07, "loss": 0.1689, "step": 11862 }, { "epoch": 0.939829669241434, "grad_norm": 1.8847822580272615, "learning_rate": 1.890620478045435e-07, "loss": 0.2257, "step": 11863 }, { "epoch": 0.9399088928500693, "grad_norm": 1.2565745974518263, "learning_rate": 1.8856575937027388e-07, "loss": 0.1232, "step": 11864 }, { "epoch": 0.9399881164587047, "grad_norm": 1.6528358842259696, "learning_rate": 1.8807011697174027e-07, "loss": 0.1856, "step": 11865 }, { "epoch": 0.94006734006734, "grad_norm": 1.7884068169795333, "learning_rate": 1.8757512064157658e-07, "loss": 0.2185, "step": 11866 }, { "epoch": 0.9401465636759755, "grad_norm": 1.958864352190528, "learning_rate": 1.870807704123756e-07, "loss": 0.1852, "step": 11867 }, { "epoch": 0.9402257872846108, "grad_norm": 1.819078019215524, "learning_rate": 1.8658706631669133e-07, "loss": 0.1337, "step": 11868 }, { "epoch": 0.9403050108932461, "grad_norm": 2.8134074669061926, "learning_rate": 1.8609400838702884e-07, "loss": 0.1194, "step": 11869 }, { "epoch": 0.9403842345018816, "grad_norm": 1.0202566566551032, "learning_rate": 1.856015966558533e-07, "loss": 0.0732, "step": 11870 }, { "epoch": 0.9404634581105169, "grad_norm": 1.1491139965322026, "learning_rate": 1.8510983115558988e-07, "loss": 0.08, "step": 11871 }, { "epoch": 0.9405426817191523, "grad_norm": 1.8716048443873254, "learning_rate": 1.8461871191861825e-07, "loss": 0.1165, "step": 11872 }, { "epoch": 0.9406219053277877, "grad_norm": 1.6399236291466963, "learning_rate": 1.8412823897727473e-07, "loss": 0.1834, "step": 11873 }, { "epoch": 0.9407011289364231, "grad_norm": 1.3377943631831999, "learning_rate": 1.8363841236385571e-07, "loss": 0.1089, "step": 11874 }, { "epoch": 0.9407803525450584, "grad_norm": 1.4589650557439726, "learning_rate": 1.8314923211061542e-07, "loss": 0.1115, "step": 11875 }, { "epoch": 0.9408595761536938, "grad_norm": 1.7220854065481161, "learning_rate": 1.826606982497603e-07, "loss": 0.1429, "step": 11876 }, { "epoch": 0.9409387997623292, "grad_norm": 1.744555537826347, "learning_rate": 1.8217281081346238e-07, "loss": 0.1952, "step": 11877 }, { "epoch": 0.9410180233709645, "grad_norm": 1.4567505532885319, "learning_rate": 1.8168556983384377e-07, "loss": 0.1167, "step": 11878 }, { "epoch": 0.9410972469796, "grad_norm": 1.417973923946339, "learning_rate": 1.811989753429877e-07, "loss": 0.1393, "step": 11879 }, { "epoch": 0.9411764705882353, "grad_norm": 1.8771413000173662, "learning_rate": 1.8071302737293294e-07, "loss": 0.1733, "step": 11880 }, { "epoch": 0.9412556941968707, "grad_norm": 1.1588549996085826, "learning_rate": 1.802277259556784e-07, "loss": 0.0958, "step": 11881 }, { "epoch": 0.941334917805506, "grad_norm": 1.3770262535771376, "learning_rate": 1.7974307112317957e-07, "loss": 0.1264, "step": 11882 }, { "epoch": 0.9414141414141414, "grad_norm": 1.3065675512850603, "learning_rate": 1.7925906290734653e-07, "loss": 0.1041, "step": 11883 }, { "epoch": 0.9414933650227768, "grad_norm": 1.2029892945540868, "learning_rate": 1.787757013400504e-07, "loss": 0.0954, "step": 11884 }, { "epoch": 0.9415725886314121, "grad_norm": 1.6001102408512853, "learning_rate": 1.7829298645311688e-07, "loss": 0.1583, "step": 11885 }, { "epoch": 0.9416518122400476, "grad_norm": 1.6902794289418774, "learning_rate": 1.7781091827833164e-07, "loss": 0.1414, "step": 11886 }, { "epoch": 0.9417310358486829, "grad_norm": 1.8141361973656667, "learning_rate": 1.7732949684743593e-07, "loss": 0.1798, "step": 11887 }, { "epoch": 0.9418102594573183, "grad_norm": 1.2303315489034052, "learning_rate": 1.768487221921278e-07, "loss": 0.0871, "step": 11888 }, { "epoch": 0.9418894830659537, "grad_norm": 2.4208613987094796, "learning_rate": 1.763685943440674e-07, "loss": 0.2057, "step": 11889 }, { "epoch": 0.941968706674589, "grad_norm": 1.5992007215536663, "learning_rate": 1.7588911333486614e-07, "loss": 0.1568, "step": 11890 }, { "epoch": 0.9420479302832244, "grad_norm": 1.9025729802431248, "learning_rate": 1.7541027919609545e-07, "loss": 0.1484, "step": 11891 }, { "epoch": 0.9421271538918597, "grad_norm": 1.544020209843473, "learning_rate": 1.7493209195928562e-07, "loss": 0.1179, "step": 11892 }, { "epoch": 0.9422063775004952, "grad_norm": 1.2881880574726279, "learning_rate": 1.7445455165592262e-07, "loss": 0.0872, "step": 11893 }, { "epoch": 0.9422856011091305, "grad_norm": 2.14072433958395, "learning_rate": 1.7397765831744905e-07, "loss": 0.2236, "step": 11894 }, { "epoch": 0.942364824717766, "grad_norm": 1.973501900863922, "learning_rate": 1.7350141197526648e-07, "loss": 0.2124, "step": 11895 }, { "epoch": 0.9424440483264013, "grad_norm": 1.4984592138655644, "learning_rate": 1.7302581266073537e-07, "loss": 0.1107, "step": 11896 }, { "epoch": 0.9425232719350366, "grad_norm": 1.084127345416034, "learning_rate": 1.7255086040516954e-07, "loss": 0.0624, "step": 11897 }, { "epoch": 0.942602495543672, "grad_norm": 1.712665399028556, "learning_rate": 1.7207655523984179e-07, "loss": 0.1397, "step": 11898 }, { "epoch": 0.9426817191523074, "grad_norm": 1.9042278255961511, "learning_rate": 1.71602897195986e-07, "loss": 0.162, "step": 11899 }, { "epoch": 0.9427609427609428, "grad_norm": 1.8445177330445153, "learning_rate": 1.711298863047872e-07, "loss": 0.1124, "step": 11900 }, { "epoch": 0.9428401663695781, "grad_norm": 2.2523130067303696, "learning_rate": 1.7065752259739056e-07, "loss": 0.2643, "step": 11901 }, { "epoch": 0.9429193899782136, "grad_norm": 1.7144277609924206, "learning_rate": 1.701858061049022e-07, "loss": 0.1761, "step": 11902 }, { "epoch": 0.9429986135868489, "grad_norm": 2.149629817809522, "learning_rate": 1.697147368583796e-07, "loss": 0.2157, "step": 11903 }, { "epoch": 0.9430778371954842, "grad_norm": 1.424607724576598, "learning_rate": 1.692443148888412e-07, "loss": 0.1797, "step": 11904 }, { "epoch": 0.9431570608041197, "grad_norm": 1.8353657077533234, "learning_rate": 1.6877454022726225e-07, "loss": 0.1166, "step": 11905 }, { "epoch": 0.943236284412755, "grad_norm": 1.3460872683562073, "learning_rate": 1.6830541290457468e-07, "loss": 0.1264, "step": 11906 }, { "epoch": 0.9433155080213904, "grad_norm": 1.5012394850707114, "learning_rate": 1.6783693295166935e-07, "loss": 0.1287, "step": 11907 }, { "epoch": 0.9433947316300257, "grad_norm": 1.706287936584026, "learning_rate": 1.6736910039939159e-07, "loss": 0.1817, "step": 11908 }, { "epoch": 0.9434739552386611, "grad_norm": 1.6575383833199284, "learning_rate": 1.6690191527854782e-07, "loss": 0.1342, "step": 11909 }, { "epoch": 0.9435531788472965, "grad_norm": 1.5724106277205052, "learning_rate": 1.6643537761989904e-07, "loss": 0.1605, "step": 11910 }, { "epoch": 0.9436324024559318, "grad_norm": 1.4620817229547778, "learning_rate": 1.6596948745416397e-07, "loss": 0.1435, "step": 11911 }, { "epoch": 0.9437116260645673, "grad_norm": 1.4580899994611065, "learning_rate": 1.6550424481202032e-07, "loss": 0.1249, "step": 11912 }, { "epoch": 0.9437908496732026, "grad_norm": 2.2336838851586958, "learning_rate": 1.6503964972410136e-07, "loss": 0.2336, "step": 11913 }, { "epoch": 0.943870073281838, "grad_norm": 1.4682091262727397, "learning_rate": 1.6457570222099816e-07, "loss": 0.1109, "step": 11914 }, { "epoch": 0.9439492968904734, "grad_norm": 1.4460866124241982, "learning_rate": 1.6411240233326076e-07, "loss": 0.1327, "step": 11915 }, { "epoch": 0.9440285204991087, "grad_norm": 2.1642810716238055, "learning_rate": 1.6364975009139473e-07, "loss": 0.1285, "step": 11916 }, { "epoch": 0.9441077441077441, "grad_norm": 1.4216987919559858, "learning_rate": 1.6318774552586237e-07, "loss": 0.097, "step": 11917 }, { "epoch": 0.9441869677163794, "grad_norm": 1.481563450957902, "learning_rate": 1.627263886670849e-07, "loss": 0.2165, "step": 11918 }, { "epoch": 0.9442661913250149, "grad_norm": 1.6766837177581069, "learning_rate": 1.6226567954544248e-07, "loss": 0.1179, "step": 11919 }, { "epoch": 0.9443454149336502, "grad_norm": 1.5105550202839497, "learning_rate": 1.618056181912675e-07, "loss": 0.1751, "step": 11920 }, { "epoch": 0.9444246385422856, "grad_norm": 1.0786000641288729, "learning_rate": 1.6134620463485352e-07, "loss": 0.0666, "step": 11921 }, { "epoch": 0.944503862150921, "grad_norm": 1.6202304222779838, "learning_rate": 1.6088743890645297e-07, "loss": 0.1503, "step": 11922 }, { "epoch": 0.9445830857595563, "grad_norm": 1.9078972540576251, "learning_rate": 1.6042932103627174e-07, "loss": 0.15, "step": 11923 }, { "epoch": 0.9446623093681917, "grad_norm": 1.3460728220788978, "learning_rate": 1.5997185105447344e-07, "loss": 0.1405, "step": 11924 }, { "epoch": 0.9447415329768271, "grad_norm": 1.7681649985736054, "learning_rate": 1.5951502899118176e-07, "loss": 0.1324, "step": 11925 }, { "epoch": 0.9448207565854625, "grad_norm": 1.3148303965560717, "learning_rate": 1.590588548764771e-07, "loss": 0.0946, "step": 11926 }, { "epoch": 0.9448999801940978, "grad_norm": 1.4187422289031857, "learning_rate": 1.586033287403943e-07, "loss": 0.1462, "step": 11927 }, { "epoch": 0.9449792038027333, "grad_norm": 1.1150388978288979, "learning_rate": 1.5814845061292938e-07, "loss": 0.1129, "step": 11928 }, { "epoch": 0.9450584274113686, "grad_norm": 1.3459520433959042, "learning_rate": 1.5769422052403172e-07, "loss": 0.1088, "step": 11929 }, { "epoch": 0.9451376510200039, "grad_norm": 1.641433006390426, "learning_rate": 1.572406385036118e-07, "loss": 0.1466, "step": 11930 }, { "epoch": 0.9452168746286393, "grad_norm": 2.0384269998773203, "learning_rate": 1.5678770458153693e-07, "loss": 0.1916, "step": 11931 }, { "epoch": 0.9452960982372747, "grad_norm": 1.4361041621176975, "learning_rate": 1.563354187876287e-07, "loss": 0.1125, "step": 11932 }, { "epoch": 0.9453753218459101, "grad_norm": 1.5793929846571342, "learning_rate": 1.558837811516667e-07, "loss": 0.1291, "step": 11933 }, { "epoch": 0.9454545454545454, "grad_norm": 1.4849138907097514, "learning_rate": 1.5543279170339265e-07, "loss": 0.0997, "step": 11934 }, { "epoch": 0.9455337690631809, "grad_norm": 1.6494464893231329, "learning_rate": 1.5498245047249948e-07, "loss": 0.1687, "step": 11935 }, { "epoch": 0.9456129926718162, "grad_norm": 1.3112012404275095, "learning_rate": 1.5453275748864128e-07, "loss": 0.1378, "step": 11936 }, { "epoch": 0.9456922162804515, "grad_norm": 1.8406951283931834, "learning_rate": 1.5408371278142652e-07, "loss": 0.1609, "step": 11937 }, { "epoch": 0.945771439889087, "grad_norm": 1.2447757624739588, "learning_rate": 1.5363531638042494e-07, "loss": 0.0939, "step": 11938 }, { "epoch": 0.9458506634977223, "grad_norm": 0.9840578335210307, "learning_rate": 1.5318756831516069e-07, "loss": 0.0767, "step": 11939 }, { "epoch": 0.9459298871063577, "grad_norm": 1.6467282259061267, "learning_rate": 1.5274046861511348e-07, "loss": 0.0972, "step": 11940 }, { "epoch": 0.946009110714993, "grad_norm": 2.363151800716459, "learning_rate": 1.5229401730972536e-07, "loss": 0.1558, "step": 11941 }, { "epoch": 0.9460883343236285, "grad_norm": 1.9614867022253675, "learning_rate": 1.518482144283917e-07, "loss": 0.2179, "step": 11942 }, { "epoch": 0.9461675579322638, "grad_norm": 1.700805055586085, "learning_rate": 1.514030600004668e-07, "loss": 0.1772, "step": 11943 }, { "epoch": 0.9462467815408991, "grad_norm": 1.7050464671088847, "learning_rate": 1.5095855405526272e-07, "loss": 0.2091, "step": 11944 }, { "epoch": 0.9463260051495346, "grad_norm": 1.4897690622708966, "learning_rate": 1.505146966220461e-07, "loss": 0.1309, "step": 11945 }, { "epoch": 0.9464052287581699, "grad_norm": 1.5862026258795623, "learning_rate": 1.5007148773004466e-07, "loss": 0.1306, "step": 11946 }, { "epoch": 0.9464844523668053, "grad_norm": 1.3636104079234208, "learning_rate": 1.496289274084417e-07, "loss": 0.1303, "step": 11947 }, { "epoch": 0.9465636759754407, "grad_norm": 1.9887388251704603, "learning_rate": 1.4918701568637618e-07, "loss": 0.2155, "step": 11948 }, { "epoch": 0.9466428995840761, "grad_norm": 1.6575947433379106, "learning_rate": 1.4874575259294588e-07, "loss": 0.146, "step": 11949 }, { "epoch": 0.9467221231927114, "grad_norm": 1.687923479965547, "learning_rate": 1.483051381572076e-07, "loss": 0.1392, "step": 11950 }, { "epoch": 0.9468013468013468, "grad_norm": 1.2161660130109961, "learning_rate": 1.4786517240817255e-07, "loss": 0.1162, "step": 11951 }, { "epoch": 0.9468805704099822, "grad_norm": 1.5624870403432545, "learning_rate": 1.474258553748098e-07, "loss": 0.148, "step": 11952 }, { "epoch": 0.9469597940186175, "grad_norm": 1.8689356110888757, "learning_rate": 1.469871870860473e-07, "loss": 0.155, "step": 11953 }, { "epoch": 0.947039017627253, "grad_norm": 1.5350277594702901, "learning_rate": 1.4654916757076865e-07, "loss": 0.1671, "step": 11954 }, { "epoch": 0.9471182412358883, "grad_norm": 1.7356327114580496, "learning_rate": 1.461117968578163e-07, "loss": 0.137, "step": 11955 }, { "epoch": 0.9471974648445237, "grad_norm": 1.6171128745584276, "learning_rate": 1.4567507497598722e-07, "loss": 0.1287, "step": 11956 }, { "epoch": 0.947276688453159, "grad_norm": 1.9759607064110165, "learning_rate": 1.452390019540384e-07, "loss": 0.1774, "step": 11957 }, { "epoch": 0.9473559120617944, "grad_norm": 1.6152995081730877, "learning_rate": 1.4480357782068467e-07, "loss": 0.1109, "step": 11958 }, { "epoch": 0.9474351356704298, "grad_norm": 1.5319687616532214, "learning_rate": 1.4436880260459307e-07, "loss": 0.1574, "step": 11959 }, { "epoch": 0.9475143592790651, "grad_norm": 1.8299763193149086, "learning_rate": 1.4393467633439629e-07, "loss": 0.1667, "step": 11960 }, { "epoch": 0.9475935828877006, "grad_norm": 1.449959841952018, "learning_rate": 1.4350119903867477e-07, "loss": 0.1461, "step": 11961 }, { "epoch": 0.9476728064963359, "grad_norm": 2.0000677480423144, "learning_rate": 1.4306837074597235e-07, "loss": 0.1272, "step": 11962 }, { "epoch": 0.9477520301049713, "grad_norm": 1.693370238789433, "learning_rate": 1.426361914847907e-07, "loss": 0.1638, "step": 11963 }, { "epoch": 0.9478312537136067, "grad_norm": 1.6362350847715208, "learning_rate": 1.422046612835848e-07, "loss": 0.1694, "step": 11964 }, { "epoch": 0.947910477322242, "grad_norm": 1.8513043844379247, "learning_rate": 1.417737801707686e-07, "loss": 0.2078, "step": 11965 }, { "epoch": 0.9479897009308774, "grad_norm": 1.5496872318447052, "learning_rate": 1.4134354817471497e-07, "loss": 0.1095, "step": 11966 }, { "epoch": 0.9480689245395127, "grad_norm": 1.9026858120044008, "learning_rate": 1.4091396532375123e-07, "loss": 0.1985, "step": 11967 }, { "epoch": 0.9481481481481482, "grad_norm": 1.5429302334196537, "learning_rate": 1.4048503164616367e-07, "loss": 0.1184, "step": 11968 }, { "epoch": 0.9482273717567835, "grad_norm": 2.0347941339117073, "learning_rate": 1.4005674717019746e-07, "loss": 0.2167, "step": 11969 }, { "epoch": 0.948306595365419, "grad_norm": 1.7773007394502391, "learning_rate": 1.3962911192405004e-07, "loss": 0.179, "step": 11970 }, { "epoch": 0.9483858189740543, "grad_norm": 1.7088106095254136, "learning_rate": 1.3920212593588113e-07, "loss": 0.1642, "step": 11971 }, { "epoch": 0.9484650425826896, "grad_norm": 1.5023799075502868, "learning_rate": 1.3877578923380486e-07, "loss": 0.1183, "step": 11972 }, { "epoch": 0.948544266191325, "grad_norm": 2.3937551198981746, "learning_rate": 1.3835010184589325e-07, "loss": 0.1605, "step": 11973 }, { "epoch": 0.9486234897999604, "grad_norm": 1.766024357180978, "learning_rate": 1.3792506380017612e-07, "loss": 0.1768, "step": 11974 }, { "epoch": 0.9487027134085958, "grad_norm": 1.719660452276552, "learning_rate": 1.3750067512464105e-07, "loss": 0.1482, "step": 11975 }, { "epoch": 0.9487819370172311, "grad_norm": 1.5262864672716732, "learning_rate": 1.3707693584723124e-07, "loss": 0.1562, "step": 11976 }, { "epoch": 0.9488611606258666, "grad_norm": 1.7261102797847192, "learning_rate": 1.3665384599584774e-07, "loss": 0.1718, "step": 11977 }, { "epoch": 0.9489403842345019, "grad_norm": 1.5864637180113903, "learning_rate": 1.3623140559834824e-07, "loss": 0.193, "step": 11978 }, { "epoch": 0.9490196078431372, "grad_norm": 1.4792759024368576, "learning_rate": 1.358096146825505e-07, "loss": 0.1667, "step": 11979 }, { "epoch": 0.9490988314517727, "grad_norm": 1.362874396659318, "learning_rate": 1.353884732762256e-07, "loss": 0.0475, "step": 11980 }, { "epoch": 0.949178055060408, "grad_norm": 1.728558506787126, "learning_rate": 1.3496798140710365e-07, "loss": 0.0831, "step": 11981 }, { "epoch": 0.9492572786690434, "grad_norm": 1.7417228751369813, "learning_rate": 1.3454813910287358e-07, "loss": 0.1827, "step": 11982 }, { "epoch": 0.9493365022776787, "grad_norm": 2.389794919147696, "learning_rate": 1.341289463911788e-07, "loss": 0.1876, "step": 11983 }, { "epoch": 0.9494157258863142, "grad_norm": 1.2601374217472465, "learning_rate": 1.337104032996206e-07, "loss": 0.0726, "step": 11984 }, { "epoch": 0.9494949494949495, "grad_norm": 1.9089770087538627, "learning_rate": 1.3329250985575915e-07, "loss": 0.1202, "step": 11985 }, { "epoch": 0.9495741731035848, "grad_norm": 1.303908150559167, "learning_rate": 1.3287526608711132e-07, "loss": 0.1052, "step": 11986 }, { "epoch": 0.9496533967122203, "grad_norm": 2.0089282180301415, "learning_rate": 1.324586720211485e-07, "loss": 0.1987, "step": 11987 }, { "epoch": 0.9497326203208556, "grad_norm": 1.5389078576242174, "learning_rate": 1.3204272768530313e-07, "loss": 0.1591, "step": 11988 }, { "epoch": 0.949811843929491, "grad_norm": 1.7282731944243857, "learning_rate": 1.3162743310696224e-07, "loss": 0.1554, "step": 11989 }, { "epoch": 0.9498910675381264, "grad_norm": 1.5207433211225512, "learning_rate": 1.3121278831347172e-07, "loss": 0.1618, "step": 11990 }, { "epoch": 0.9499702911467617, "grad_norm": 1.4786580595344196, "learning_rate": 1.3079879333213308e-07, "loss": 0.1308, "step": 11991 }, { "epoch": 0.9500495147553971, "grad_norm": 1.4060282658150705, "learning_rate": 1.303854481902067e-07, "loss": 0.1405, "step": 11992 }, { "epoch": 0.9501287383640324, "grad_norm": 1.4353414308806975, "learning_rate": 1.2997275291490863e-07, "loss": 0.1549, "step": 11993 }, { "epoch": 0.9502079619726679, "grad_norm": 1.3168202052568103, "learning_rate": 1.2956070753341265e-07, "loss": 0.1424, "step": 11994 }, { "epoch": 0.9502871855813032, "grad_norm": 1.180036403468606, "learning_rate": 1.2914931207285154e-07, "loss": 0.1101, "step": 11995 }, { "epoch": 0.9503664091899386, "grad_norm": 1.8683831369353276, "learning_rate": 1.2873856656031358e-07, "loss": 0.1697, "step": 11996 }, { "epoch": 0.950445632798574, "grad_norm": 1.4114591680532926, "learning_rate": 1.2832847102284162e-07, "loss": 0.1585, "step": 11997 }, { "epoch": 0.9505248564072093, "grad_norm": 1.8468183058619627, "learning_rate": 1.2791902548744185e-07, "loss": 0.1951, "step": 11998 }, { "epoch": 0.9506040800158447, "grad_norm": 1.9933481557191437, "learning_rate": 1.2751022998107154e-07, "loss": 0.2607, "step": 11999 }, { "epoch": 0.9506833036244801, "grad_norm": 1.9963279242713887, "learning_rate": 1.271020845306492e-07, "loss": 0.1575, "step": 12000 }, { "epoch": 0.9507625272331155, "grad_norm": 1.7104243492040376, "learning_rate": 1.2669458916305112e-07, "loss": 0.1479, "step": 12001 }, { "epoch": 0.9508417508417508, "grad_norm": 1.23456583492303, "learning_rate": 1.2628774390510578e-07, "loss": 0.0921, "step": 12002 }, { "epoch": 0.9509209744503863, "grad_norm": 1.1440957922195196, "learning_rate": 1.2588154878360293e-07, "loss": 0.0585, "step": 12003 }, { "epoch": 0.9510001980590216, "grad_norm": 1.4943588956271991, "learning_rate": 1.254760038252889e-07, "loss": 0.1309, "step": 12004 }, { "epoch": 0.9510794216676569, "grad_norm": 2.005179153398323, "learning_rate": 1.2507110905686793e-07, "loss": 0.1799, "step": 12005 }, { "epoch": 0.9511586452762923, "grad_norm": 1.5338207207701673, "learning_rate": 1.2466686450499866e-07, "loss": 0.1416, "step": 12006 }, { "epoch": 0.9512378688849277, "grad_norm": 1.506157165613995, "learning_rate": 1.242632701962987e-07, "loss": 0.1239, "step": 12007 }, { "epoch": 0.9513170924935631, "grad_norm": 1.792253698059906, "learning_rate": 1.2386032615734345e-07, "loss": 0.1802, "step": 12008 }, { "epoch": 0.9513963161021984, "grad_norm": 1.3138492666234793, "learning_rate": 1.2345803241466504e-07, "loss": 0.1184, "step": 12009 }, { "epoch": 0.9514755397108339, "grad_norm": 1.6784904215248027, "learning_rate": 1.2305638899475226e-07, "loss": 0.1614, "step": 12010 }, { "epoch": 0.9515547633194692, "grad_norm": 1.378568569924922, "learning_rate": 1.2265539592405173e-07, "loss": 0.1513, "step": 12011 }, { "epoch": 0.9516339869281045, "grad_norm": 1.7673684678605055, "learning_rate": 1.222550532289668e-07, "loss": 0.1788, "step": 12012 }, { "epoch": 0.95171321053674, "grad_norm": 1.7261021485108556, "learning_rate": 1.218553609358575e-07, "loss": 0.1458, "step": 12013 }, { "epoch": 0.9517924341453753, "grad_norm": 2.0395886870501414, "learning_rate": 1.214563190710416e-07, "loss": 0.2136, "step": 12014 }, { "epoch": 0.9518716577540107, "grad_norm": 1.7761442070779414, "learning_rate": 1.2105792766079594e-07, "loss": 0.2062, "step": 12015 }, { "epoch": 0.951950881362646, "grad_norm": 1.5915709930985433, "learning_rate": 1.2066018673134948e-07, "loss": 0.0977, "step": 12016 }, { "epoch": 0.9520301049712815, "grad_norm": 1.5366812070122335, "learning_rate": 1.2026309630889465e-07, "loss": 0.1472, "step": 12017 }, { "epoch": 0.9521093285799168, "grad_norm": 1.695453467314009, "learning_rate": 1.1986665641957718e-07, "loss": 0.1684, "step": 12018 }, { "epoch": 0.9521885521885521, "grad_norm": 1.956582060748221, "learning_rate": 1.194708670894984e-07, "loss": 0.2115, "step": 12019 }, { "epoch": 0.9522677757971876, "grad_norm": 1.8331217653148717, "learning_rate": 1.1907572834472303e-07, "loss": 0.156, "step": 12020 }, { "epoch": 0.9523469994058229, "grad_norm": 1.4616866307873881, "learning_rate": 1.1868124021126582e-07, "loss": 0.1698, "step": 12021 }, { "epoch": 0.9524262230144583, "grad_norm": 1.785338298014463, "learning_rate": 1.1828740271510375e-07, "loss": 0.1986, "step": 12022 }, { "epoch": 0.9525054466230937, "grad_norm": 1.4376578658409527, "learning_rate": 1.1789421588216721e-07, "loss": 0.117, "step": 12023 }, { "epoch": 0.9525846702317291, "grad_norm": 1.4557152152029316, "learning_rate": 1.1750167973834769e-07, "loss": 0.1286, "step": 12024 }, { "epoch": 0.9526638938403644, "grad_norm": 1.3652174097322807, "learning_rate": 1.171097943094912e-07, "loss": 0.1066, "step": 12025 }, { "epoch": 0.9527431174489998, "grad_norm": 1.8076583144588017, "learning_rate": 1.1671855962140045e-07, "loss": 0.1343, "step": 12026 }, { "epoch": 0.9528223410576352, "grad_norm": 1.723962110125695, "learning_rate": 1.1632797569983811e-07, "loss": 0.1292, "step": 12027 }, { "epoch": 0.9529015646662705, "grad_norm": 1.8335463439964539, "learning_rate": 1.1593804257052143e-07, "loss": 0.1604, "step": 12028 }, { "epoch": 0.952980788274906, "grad_norm": 1.5248365587438832, "learning_rate": 1.1554876025912432e-07, "loss": 0.118, "step": 12029 }, { "epoch": 0.9530600118835413, "grad_norm": 1.5335885639969342, "learning_rate": 1.151601287912818e-07, "loss": 0.1052, "step": 12030 }, { "epoch": 0.9531392354921767, "grad_norm": 1.670575917637804, "learning_rate": 1.147721481925812e-07, "loss": 0.1895, "step": 12031 }, { "epoch": 0.953218459100812, "grad_norm": 2.2687123214352867, "learning_rate": 1.1438481848856986e-07, "loss": 0.2066, "step": 12032 }, { "epoch": 0.9532976827094474, "grad_norm": 2.1372727810964953, "learning_rate": 1.1399813970475293e-07, "loss": 0.1923, "step": 12033 }, { "epoch": 0.9533769063180828, "grad_norm": 1.4975834870785496, "learning_rate": 1.1361211186658893e-07, "loss": 0.127, "step": 12034 }, { "epoch": 0.9534561299267181, "grad_norm": 1.8447286433253713, "learning_rate": 1.1322673499949754e-07, "loss": 0.1323, "step": 12035 }, { "epoch": 0.9535353535353536, "grad_norm": 1.683692159063323, "learning_rate": 1.1284200912885291e-07, "loss": 0.1554, "step": 12036 }, { "epoch": 0.9536145771439889, "grad_norm": 1.6286660098898258, "learning_rate": 1.1245793427998919e-07, "loss": 0.1341, "step": 12037 }, { "epoch": 0.9536938007526243, "grad_norm": 1.457421158329244, "learning_rate": 1.1207451047819396e-07, "loss": 0.1556, "step": 12038 }, { "epoch": 0.9537730243612597, "grad_norm": 1.27266027303811, "learning_rate": 1.1169173774871478e-07, "loss": 0.0897, "step": 12039 }, { "epoch": 0.953852247969895, "grad_norm": 1.8659504097124968, "learning_rate": 1.1130961611675484e-07, "loss": 0.1935, "step": 12040 }, { "epoch": 0.9539314715785304, "grad_norm": 1.667601274749327, "learning_rate": 1.1092814560747511e-07, "loss": 0.1641, "step": 12041 }, { "epoch": 0.9540106951871657, "grad_norm": 1.5366170062860218, "learning_rate": 1.105473262459944e-07, "loss": 0.1564, "step": 12042 }, { "epoch": 0.9540899187958012, "grad_norm": 1.9129907140412452, "learning_rate": 1.1016715805738709e-07, "loss": 0.1761, "step": 12043 }, { "epoch": 0.9541691424044365, "grad_norm": 1.762676037732583, "learning_rate": 1.0978764106668538e-07, "loss": 0.205, "step": 12044 }, { "epoch": 0.954248366013072, "grad_norm": 2.0368552232851513, "learning_rate": 1.0940877529887928e-07, "loss": 0.2391, "step": 12045 }, { "epoch": 0.9543275896217073, "grad_norm": 2.086444359412718, "learning_rate": 1.0903056077891438e-07, "loss": 0.1787, "step": 12046 }, { "epoch": 0.9544068132303426, "grad_norm": 1.7700579531488503, "learning_rate": 1.0865299753169522e-07, "loss": 0.2692, "step": 12047 }, { "epoch": 0.954486036838978, "grad_norm": 1.3133828256695508, "learning_rate": 1.0827608558208192e-07, "loss": 0.1096, "step": 12048 }, { "epoch": 0.9545652604476134, "grad_norm": 2.208530437178883, "learning_rate": 1.0789982495489238e-07, "loss": 0.1527, "step": 12049 }, { "epoch": 0.9546444840562488, "grad_norm": 1.4546182918366382, "learning_rate": 1.0752421567490123e-07, "loss": 0.1054, "step": 12050 }, { "epoch": 0.9547237076648841, "grad_norm": 1.5806288253935212, "learning_rate": 1.0714925776684093e-07, "loss": 0.1379, "step": 12051 }, { "epoch": 0.9548029312735196, "grad_norm": 2.2037909835431764, "learning_rate": 1.067749512554006e-07, "loss": 0.1519, "step": 12052 }, { "epoch": 0.9548821548821549, "grad_norm": 1.4432888169332159, "learning_rate": 1.0640129616522721e-07, "loss": 0.0798, "step": 12053 }, { "epoch": 0.9549613784907902, "grad_norm": 1.475526789033575, "learning_rate": 1.0602829252092328e-07, "loss": 0.1209, "step": 12054 }, { "epoch": 0.9550406020994257, "grad_norm": 1.8891381390407387, "learning_rate": 1.0565594034704918e-07, "loss": 0.23, "step": 12055 }, { "epoch": 0.955119825708061, "grad_norm": 1.5051609161624158, "learning_rate": 1.0528423966812307e-07, "loss": 0.1321, "step": 12056 }, { "epoch": 0.9551990493166964, "grad_norm": 1.698008438317734, "learning_rate": 1.0491319050861981e-07, "loss": 0.2419, "step": 12057 }, { "epoch": 0.9552782729253317, "grad_norm": 1.2470871141957662, "learning_rate": 1.0454279289296987e-07, "loss": 0.0995, "step": 12058 }, { "epoch": 0.9553574965339672, "grad_norm": 1.7326133783796482, "learning_rate": 1.0417304684556373e-07, "loss": 0.1336, "step": 12059 }, { "epoch": 0.9554367201426025, "grad_norm": 2.1848232285938676, "learning_rate": 1.0380395239074747e-07, "loss": 0.2114, "step": 12060 }, { "epoch": 0.9555159437512378, "grad_norm": 2.047669789129029, "learning_rate": 1.0343550955282278e-07, "loss": 0.1694, "step": 12061 }, { "epoch": 0.9555951673598733, "grad_norm": 1.8249573262195395, "learning_rate": 1.0306771835605022e-07, "loss": 0.2031, "step": 12062 }, { "epoch": 0.9556743909685086, "grad_norm": 1.5757765949747649, "learning_rate": 1.0270057882464823e-07, "loss": 0.1412, "step": 12063 }, { "epoch": 0.955753614577144, "grad_norm": 1.5730205319358732, "learning_rate": 1.0233409098278967e-07, "loss": 0.1858, "step": 12064 }, { "epoch": 0.9558328381857794, "grad_norm": 1.2878382038072824, "learning_rate": 1.0196825485460637e-07, "loss": 0.112, "step": 12065 }, { "epoch": 0.9559120617944147, "grad_norm": 1.4095404333114911, "learning_rate": 1.0160307046418794e-07, "loss": 0.0951, "step": 12066 }, { "epoch": 0.9559912854030501, "grad_norm": 1.0658902420587906, "learning_rate": 1.0123853783557847e-07, "loss": 0.0778, "step": 12067 }, { "epoch": 0.9560705090116854, "grad_norm": 1.8862117022948965, "learning_rate": 1.0087465699278321e-07, "loss": 0.1907, "step": 12068 }, { "epoch": 0.9561497326203209, "grad_norm": 1.5368700250623735, "learning_rate": 1.0051142795975855e-07, "loss": 0.1589, "step": 12069 }, { "epoch": 0.9562289562289562, "grad_norm": 1.5215946392950483, "learning_rate": 1.0014885076042313e-07, "loss": 0.1036, "step": 12070 }, { "epoch": 0.9563081798375916, "grad_norm": 1.7367286662883004, "learning_rate": 9.978692541865121e-08, "loss": 0.1522, "step": 12071 }, { "epoch": 0.956387403446227, "grad_norm": 1.426243673494032, "learning_rate": 9.94256519582748e-08, "loss": 0.1265, "step": 12072 }, { "epoch": 0.9564666270548623, "grad_norm": 1.6340634294843608, "learning_rate": 9.906503040307824e-08, "loss": 0.1731, "step": 12073 }, { "epoch": 0.9565458506634977, "grad_norm": 1.9078597306467755, "learning_rate": 9.87050607768103e-08, "loss": 0.1778, "step": 12074 }, { "epoch": 0.9566250742721331, "grad_norm": 2.120718233222186, "learning_rate": 9.834574310317313e-08, "loss": 0.1349, "step": 12075 }, { "epoch": 0.9567042978807685, "grad_norm": 1.233042675438917, "learning_rate": 9.798707740582447e-08, "loss": 0.1029, "step": 12076 }, { "epoch": 0.9567835214894038, "grad_norm": 1.7527488840278964, "learning_rate": 9.762906370837988e-08, "loss": 0.123, "step": 12077 }, { "epoch": 0.9568627450980393, "grad_norm": 1.2302812798749247, "learning_rate": 9.727170203441605e-08, "loss": 0.1082, "step": 12078 }, { "epoch": 0.9569419687066746, "grad_norm": 1.4618757138846394, "learning_rate": 9.691499240746083e-08, "loss": 0.1166, "step": 12079 }, { "epoch": 0.9570211923153099, "grad_norm": 1.4825068340556253, "learning_rate": 9.65589348510032e-08, "loss": 0.1397, "step": 12080 }, { "epoch": 0.9571004159239453, "grad_norm": 2.2389272262292237, "learning_rate": 9.620352938848665e-08, "loss": 0.1726, "step": 12081 }, { "epoch": 0.9571796395325807, "grad_norm": 1.2667102678562707, "learning_rate": 9.584877604331467e-08, "loss": 0.0798, "step": 12082 }, { "epoch": 0.9572588631412161, "grad_norm": 1.7395745703920986, "learning_rate": 9.549467483884412e-08, "loss": 0.1517, "step": 12083 }, { "epoch": 0.9573380867498514, "grad_norm": 1.6505099377510957, "learning_rate": 9.514122579839302e-08, "loss": 0.1038, "step": 12084 }, { "epoch": 0.9574173103584869, "grad_norm": 1.1480141201529224, "learning_rate": 9.478842894523165e-08, "loss": 0.1307, "step": 12085 }, { "epoch": 0.9574965339671222, "grad_norm": 2.1904626018609905, "learning_rate": 9.443628430259144e-08, "loss": 0.2708, "step": 12086 }, { "epoch": 0.9575757575757575, "grad_norm": 1.914669211959344, "learning_rate": 9.408479189366049e-08, "loss": 0.1658, "step": 12087 }, { "epoch": 0.957654981184393, "grad_norm": 1.4187114823357372, "learning_rate": 9.37339517415814e-08, "loss": 0.1128, "step": 12088 }, { "epoch": 0.9577342047930283, "grad_norm": 1.5353651587950317, "learning_rate": 9.33837638694557e-08, "loss": 0.096, "step": 12089 }, { "epoch": 0.9578134284016637, "grad_norm": 1.0547737643939115, "learning_rate": 9.30342283003416e-08, "loss": 0.1184, "step": 12090 }, { "epoch": 0.957892652010299, "grad_norm": 1.7886008155212836, "learning_rate": 9.268534505725402e-08, "loss": 0.1751, "step": 12091 }, { "epoch": 0.9579718756189345, "grad_norm": 1.2160077694727411, "learning_rate": 9.233711416316571e-08, "loss": 0.0697, "step": 12092 }, { "epoch": 0.9580510992275698, "grad_norm": 1.586486962071358, "learning_rate": 9.1989535641005e-08, "loss": 0.1733, "step": 12093 }, { "epoch": 0.9581303228362051, "grad_norm": 2.117994009454468, "learning_rate": 9.164260951366021e-08, "loss": 0.22, "step": 12094 }, { "epoch": 0.9582095464448406, "grad_norm": 1.611869808407201, "learning_rate": 9.129633580397312e-08, "loss": 0.1255, "step": 12095 }, { "epoch": 0.9582887700534759, "grad_norm": 1.8391116847771045, "learning_rate": 9.095071453474435e-08, "loss": 0.1725, "step": 12096 }, { "epoch": 0.9583679936621113, "grad_norm": 1.6769304174578155, "learning_rate": 9.060574572873238e-08, "loss": 0.1585, "step": 12097 }, { "epoch": 0.9584472172707467, "grad_norm": 2.0762247540312755, "learning_rate": 9.026142940865013e-08, "loss": 0.1069, "step": 12098 }, { "epoch": 0.9585264408793821, "grad_norm": 1.6375306988207565, "learning_rate": 8.991776559717058e-08, "loss": 0.1567, "step": 12099 }, { "epoch": 0.9586056644880174, "grad_norm": 1.8206376668955544, "learning_rate": 8.95747543169223e-08, "loss": 0.2215, "step": 12100 }, { "epoch": 0.9586848880966528, "grad_norm": 1.1741291418552378, "learning_rate": 8.923239559049057e-08, "loss": 0.1113, "step": 12101 }, { "epoch": 0.9587641117052882, "grad_norm": 1.4176499894958317, "learning_rate": 8.889068944041734e-08, "loss": 0.1724, "step": 12102 }, { "epoch": 0.9588433353139235, "grad_norm": 2.547430860660644, "learning_rate": 8.854963588920351e-08, "loss": 0.202, "step": 12103 }, { "epoch": 0.958922558922559, "grad_norm": 1.609332245853523, "learning_rate": 8.820923495930556e-08, "loss": 0.1812, "step": 12104 }, { "epoch": 0.9590017825311943, "grad_norm": 1.6627823016899432, "learning_rate": 8.786948667313667e-08, "loss": 0.1692, "step": 12105 }, { "epoch": 0.9590810061398297, "grad_norm": 1.6909104047198948, "learning_rate": 8.753039105306782e-08, "loss": 0.1281, "step": 12106 }, { "epoch": 0.959160229748465, "grad_norm": 1.4483914647110465, "learning_rate": 8.719194812142673e-08, "loss": 0.1302, "step": 12107 }, { "epoch": 0.9592394533571004, "grad_norm": 1.2991725781184642, "learning_rate": 8.685415790049889e-08, "loss": 0.1101, "step": 12108 }, { "epoch": 0.9593186769657358, "grad_norm": 1.6322710688229296, "learning_rate": 8.651702041252541e-08, "loss": 0.1706, "step": 12109 }, { "epoch": 0.9593979005743711, "grad_norm": 1.6691305386991055, "learning_rate": 8.61805356797063e-08, "loss": 0.1761, "step": 12110 }, { "epoch": 0.9594771241830066, "grad_norm": 1.494303436196463, "learning_rate": 8.584470372419606e-08, "loss": 0.1723, "step": 12111 }, { "epoch": 0.9595563477916419, "grad_norm": 1.5875946677400037, "learning_rate": 8.550952456810813e-08, "loss": 0.1462, "step": 12112 }, { "epoch": 0.9596355714002773, "grad_norm": 1.5350487547338978, "learning_rate": 8.517499823351261e-08, "loss": 0.1541, "step": 12113 }, { "epoch": 0.9597147950089127, "grad_norm": 1.5421713670763921, "learning_rate": 8.484112474243633e-08, "loss": 0.153, "step": 12114 }, { "epoch": 0.959794018617548, "grad_norm": 1.9755182692330675, "learning_rate": 8.450790411686282e-08, "loss": 0.1709, "step": 12115 }, { "epoch": 0.9598732422261834, "grad_norm": 1.4154566799834094, "learning_rate": 8.417533637873454e-08, "loss": 0.126, "step": 12116 }, { "epoch": 0.9599524658348187, "grad_norm": 1.8474593560933468, "learning_rate": 8.384342154994841e-08, "loss": 0.1724, "step": 12117 }, { "epoch": 0.9600316894434542, "grad_norm": 1.7195199759996236, "learning_rate": 8.351215965235915e-08, "loss": 0.1401, "step": 12118 }, { "epoch": 0.9601109130520895, "grad_norm": 1.7528145359642984, "learning_rate": 8.318155070777822e-08, "loss": 0.153, "step": 12119 }, { "epoch": 0.960190136660725, "grad_norm": 1.616263862220609, "learning_rate": 8.28515947379771e-08, "loss": 0.1292, "step": 12120 }, { "epoch": 0.9602693602693603, "grad_norm": 1.9343860887939304, "learning_rate": 8.252229176467841e-08, "loss": 0.1594, "step": 12121 }, { "epoch": 0.9603485838779956, "grad_norm": 1.5607249082531798, "learning_rate": 8.219364180956812e-08, "loss": 0.1105, "step": 12122 }, { "epoch": 0.960427807486631, "grad_norm": 1.8509707889624776, "learning_rate": 8.186564489428561e-08, "loss": 0.1749, "step": 12123 }, { "epoch": 0.9605070310952664, "grad_norm": 1.5640763931704413, "learning_rate": 8.153830104042582e-08, "loss": 0.133, "step": 12124 }, { "epoch": 0.9605862547039018, "grad_norm": 1.3064404636144864, "learning_rate": 8.121161026954482e-08, "loss": 0.1123, "step": 12125 }, { "epoch": 0.9606654783125371, "grad_norm": 1.4646992759527617, "learning_rate": 8.088557260315322e-08, "loss": 0.1334, "step": 12126 }, { "epoch": 0.9607447019211726, "grad_norm": 1.8387419792084765, "learning_rate": 8.056018806271937e-08, "loss": 0.1965, "step": 12127 }, { "epoch": 0.9608239255298079, "grad_norm": 1.392154643132027, "learning_rate": 8.023545666966726e-08, "loss": 0.1189, "step": 12128 }, { "epoch": 0.9609031491384432, "grad_norm": 1.8625017470286498, "learning_rate": 7.991137844537977e-08, "loss": 0.1744, "step": 12129 }, { "epoch": 0.9609823727470787, "grad_norm": 1.4608593105457481, "learning_rate": 7.958795341119541e-08, "loss": 0.0691, "step": 12130 }, { "epoch": 0.961061596355714, "grad_norm": 1.6072790194870434, "learning_rate": 7.926518158841045e-08, "loss": 0.1343, "step": 12131 }, { "epoch": 0.9611408199643494, "grad_norm": 1.7507664759807247, "learning_rate": 7.894306299827791e-08, "loss": 0.1621, "step": 12132 }, { "epoch": 0.9612200435729847, "grad_norm": 1.8596896128581366, "learning_rate": 7.86215976620075e-08, "loss": 0.132, "step": 12133 }, { "epoch": 0.9612992671816202, "grad_norm": 1.7348715749835064, "learning_rate": 7.83007856007667e-08, "loss": 0.2028, "step": 12134 }, { "epoch": 0.9613784907902555, "grad_norm": 1.774924733682001, "learning_rate": 7.798062683567864e-08, "loss": 0.1264, "step": 12135 }, { "epoch": 0.9614577143988908, "grad_norm": 1.7624686262664964, "learning_rate": 7.766112138782422e-08, "loss": 0.1119, "step": 12136 }, { "epoch": 0.9615369380075263, "grad_norm": 1.8687604332498013, "learning_rate": 7.734226927824106e-08, "loss": 0.1226, "step": 12137 }, { "epoch": 0.9616161616161616, "grad_norm": 1.5491955344841446, "learning_rate": 7.70240705279257e-08, "loss": 0.1341, "step": 12138 }, { "epoch": 0.961695385224797, "grad_norm": 1.9313415441824746, "learning_rate": 7.670652515782917e-08, "loss": 0.1488, "step": 12139 }, { "epoch": 0.9617746088334324, "grad_norm": 1.1323492233667762, "learning_rate": 7.638963318886028e-08, "loss": 0.0936, "step": 12140 }, { "epoch": 0.9618538324420678, "grad_norm": 1.6864150245433278, "learning_rate": 7.607339464188346e-08, "loss": 0.1498, "step": 12141 }, { "epoch": 0.9619330560507031, "grad_norm": 2.1217611676178927, "learning_rate": 7.575780953772427e-08, "loss": 0.2073, "step": 12142 }, { "epoch": 0.9620122796593384, "grad_norm": 1.621069799261994, "learning_rate": 7.544287789715943e-08, "loss": 0.1631, "step": 12143 }, { "epoch": 0.9620915032679739, "grad_norm": 1.761778432246736, "learning_rate": 7.51285997409279e-08, "loss": 0.1343, "step": 12144 }, { "epoch": 0.9621707268766092, "grad_norm": 2.0516542711916643, "learning_rate": 7.481497508972313e-08, "loss": 0.176, "step": 12145 }, { "epoch": 0.9622499504852446, "grad_norm": 1.7309353700905166, "learning_rate": 7.450200396419416e-08, "loss": 0.1813, "step": 12146 }, { "epoch": 0.96232917409388, "grad_norm": 2.4858564225928594, "learning_rate": 7.418968638495006e-08, "loss": 0.2158, "step": 12147 }, { "epoch": 0.9624083977025153, "grad_norm": 1.090230145992707, "learning_rate": 7.387802237255658e-08, "loss": 0.0991, "step": 12148 }, { "epoch": 0.9624876213111507, "grad_norm": 1.8472206510931541, "learning_rate": 7.35670119475329e-08, "loss": 0.2088, "step": 12149 }, { "epoch": 0.9625668449197861, "grad_norm": 1.425841344524634, "learning_rate": 7.325665513035707e-08, "loss": 0.1284, "step": 12150 }, { "epoch": 0.9626460685284215, "grad_norm": 1.6105599148447856, "learning_rate": 7.294695194146829e-08, "loss": 0.1495, "step": 12151 }, { "epoch": 0.9627252921370568, "grad_norm": 1.75081021673955, "learning_rate": 7.263790240125579e-08, "loss": 0.1864, "step": 12152 }, { "epoch": 0.9628045157456923, "grad_norm": 2.2093693410274056, "learning_rate": 7.232950653006998e-08, "loss": 0.2369, "step": 12153 }, { "epoch": 0.9628837393543276, "grad_norm": 1.8158126265179304, "learning_rate": 7.202176434821683e-08, "loss": 0.1718, "step": 12154 }, { "epoch": 0.9629629629629629, "grad_norm": 1.6057954680335182, "learning_rate": 7.171467587596126e-08, "loss": 0.1475, "step": 12155 }, { "epoch": 0.9630421865715983, "grad_norm": 1.8303159105234104, "learning_rate": 7.140824113352151e-08, "loss": 0.1737, "step": 12156 }, { "epoch": 0.9631214101802337, "grad_norm": 1.5569510451554864, "learning_rate": 7.110246014107592e-08, "loss": 0.204, "step": 12157 }, { "epoch": 0.9632006337888691, "grad_norm": 1.8077520298408258, "learning_rate": 7.079733291875945e-08, "loss": 0.1592, "step": 12158 }, { "epoch": 0.9632798573975044, "grad_norm": 1.5956800822433268, "learning_rate": 7.049285948666052e-08, "loss": 0.1561, "step": 12159 }, { "epoch": 0.9633590810061399, "grad_norm": 1.9266238742232196, "learning_rate": 7.018903986483083e-08, "loss": 0.1822, "step": 12160 }, { "epoch": 0.9634383046147752, "grad_norm": 1.5554891946202893, "learning_rate": 6.988587407327219e-08, "loss": 0.1846, "step": 12161 }, { "epoch": 0.9635175282234105, "grad_norm": 1.3742648690595887, "learning_rate": 6.958336213194972e-08, "loss": 0.1232, "step": 12162 }, { "epoch": 0.963596751832046, "grad_norm": 1.4240397140485792, "learning_rate": 6.928150406077861e-08, "loss": 0.1356, "step": 12163 }, { "epoch": 0.9636759754406813, "grad_norm": 2.009020871528144, "learning_rate": 6.89802998796385e-08, "loss": 0.1603, "step": 12164 }, { "epoch": 0.9637551990493167, "grad_norm": 2.343740539239381, "learning_rate": 6.867974960836022e-08, "loss": 0.1993, "step": 12165 }, { "epoch": 0.963834422657952, "grad_norm": 1.372622452307348, "learning_rate": 6.837985326673457e-08, "loss": 0.1606, "step": 12166 }, { "epoch": 0.9639136462665875, "grad_norm": 2.2409000862998547, "learning_rate": 6.80806108745069e-08, "loss": 0.2391, "step": 12167 }, { "epoch": 0.9639928698752228, "grad_norm": 1.4134459308277265, "learning_rate": 6.778202245138144e-08, "loss": 0.1267, "step": 12168 }, { "epoch": 0.9640720934838581, "grad_norm": 1.1118892698380483, "learning_rate": 6.748408801701911e-08, "loss": 0.073, "step": 12169 }, { "epoch": 0.9641513170924936, "grad_norm": 1.759762943514985, "learning_rate": 6.718680759103757e-08, "loss": 0.1845, "step": 12170 }, { "epoch": 0.9642305407011289, "grad_norm": 1.934575108634639, "learning_rate": 6.689018119301227e-08, "loss": 0.1702, "step": 12171 }, { "epoch": 0.9643097643097643, "grad_norm": 1.5966213941066827, "learning_rate": 6.659420884247203e-08, "loss": 0.1662, "step": 12172 }, { "epoch": 0.9643889879183997, "grad_norm": 1.4215974566947838, "learning_rate": 6.629889055890682e-08, "loss": 0.0979, "step": 12173 }, { "epoch": 0.9644682115270351, "grad_norm": 1.7061554365021858, "learning_rate": 6.600422636176219e-08, "loss": 0.1798, "step": 12174 }, { "epoch": 0.9645474351356704, "grad_norm": 1.5023981290093678, "learning_rate": 6.571021627043928e-08, "loss": 0.1095, "step": 12175 }, { "epoch": 0.9646266587443058, "grad_norm": 1.9367645147158803, "learning_rate": 6.541686030429817e-08, "loss": 0.1778, "step": 12176 }, { "epoch": 0.9647058823529412, "grad_norm": 1.3596421973220458, "learning_rate": 6.512415848265453e-08, "loss": 0.1268, "step": 12177 }, { "epoch": 0.9647851059615765, "grad_norm": 1.3793876622542929, "learning_rate": 6.48321108247818e-08, "loss": 0.1279, "step": 12178 }, { "epoch": 0.964864329570212, "grad_norm": 1.638438377080994, "learning_rate": 6.454071734990907e-08, "loss": 0.195, "step": 12179 }, { "epoch": 0.9649435531788473, "grad_norm": 1.7042653753711203, "learning_rate": 6.424997807722433e-08, "loss": 0.232, "step": 12180 }, { "epoch": 0.9650227767874827, "grad_norm": 1.7291737471909923, "learning_rate": 6.395989302587113e-08, "loss": 0.1246, "step": 12181 }, { "epoch": 0.965102000396118, "grad_norm": 2.2099748067056986, "learning_rate": 6.367046221494866e-08, "loss": 0.2676, "step": 12182 }, { "epoch": 0.9651812240047534, "grad_norm": 1.3611406874045902, "learning_rate": 6.33816856635161e-08, "loss": 0.1151, "step": 12183 }, { "epoch": 0.9652604476133888, "grad_norm": 1.401636083475635, "learning_rate": 6.309356339058825e-08, "loss": 0.1121, "step": 12184 }, { "epoch": 0.9653396712220241, "grad_norm": 1.056085526994235, "learning_rate": 6.28060954151355e-08, "loss": 0.095, "step": 12185 }, { "epoch": 0.9654188948306596, "grad_norm": 1.5174559327140367, "learning_rate": 6.251928175608602e-08, "loss": 0.1461, "step": 12186 }, { "epoch": 0.9654981184392949, "grad_norm": 1.9305071683652986, "learning_rate": 6.223312243232693e-08, "loss": 0.2208, "step": 12187 }, { "epoch": 0.9655773420479303, "grad_norm": 1.9536493315406718, "learning_rate": 6.194761746269762e-08, "loss": 0.2035, "step": 12188 }, { "epoch": 0.9656565656565657, "grad_norm": 1.8463156244153012, "learning_rate": 6.16627668659997e-08, "loss": 0.1494, "step": 12189 }, { "epoch": 0.965735789265201, "grad_norm": 1.4768600429782186, "learning_rate": 6.137857066098929e-08, "loss": 0.1305, "step": 12190 }, { "epoch": 0.9658150128738364, "grad_norm": 1.5299778278325868, "learning_rate": 6.109502886637697e-08, "loss": 0.1461, "step": 12191 }, { "epoch": 0.9658942364824717, "grad_norm": 1.1428062978459437, "learning_rate": 6.081214150083447e-08, "loss": 0.0699, "step": 12192 }, { "epoch": 0.9659734600911072, "grad_norm": 2.508774274921111, "learning_rate": 6.052990858298801e-08, "loss": 0.1779, "step": 12193 }, { "epoch": 0.9660526836997425, "grad_norm": 1.6106357260987798, "learning_rate": 6.024833013142272e-08, "loss": 0.1547, "step": 12194 }, { "epoch": 0.966131907308378, "grad_norm": 1.5543591071303986, "learning_rate": 5.9967406164676e-08, "loss": 0.1606, "step": 12195 }, { "epoch": 0.9662111309170133, "grad_norm": 1.319013454416618, "learning_rate": 5.96871367012486e-08, "loss": 0.1042, "step": 12196 }, { "epoch": 0.9662903545256486, "grad_norm": 1.4304249431689604, "learning_rate": 5.9407521759592414e-08, "loss": 0.1433, "step": 12197 }, { "epoch": 0.966369578134284, "grad_norm": 1.346567722446685, "learning_rate": 5.912856135812051e-08, "loss": 0.1191, "step": 12198 }, { "epoch": 0.9664488017429194, "grad_norm": 1.8214917433769506, "learning_rate": 5.8850255515200405e-08, "loss": 0.1925, "step": 12199 }, { "epoch": 0.9665280253515548, "grad_norm": 1.6687924048278213, "learning_rate": 5.857260424915634e-08, "loss": 0.1715, "step": 12200 }, { "epoch": 0.9666072489601901, "grad_norm": 1.9601689205495383, "learning_rate": 5.8295607578272575e-08, "loss": 0.1881, "step": 12201 }, { "epoch": 0.9666864725688256, "grad_norm": 1.7156119412555264, "learning_rate": 5.801926552078563e-08, "loss": 0.1296, "step": 12202 }, { "epoch": 0.9667656961774609, "grad_norm": 1.492349812715188, "learning_rate": 5.774357809489317e-08, "loss": 0.1194, "step": 12203 }, { "epoch": 0.9668449197860962, "grad_norm": 1.5796337554619697, "learning_rate": 5.746854531874624e-08, "loss": 0.1766, "step": 12204 }, { "epoch": 0.9669241433947316, "grad_norm": 1.3449913608313366, "learning_rate": 5.7194167210454785e-08, "loss": 0.1081, "step": 12205 }, { "epoch": 0.967003367003367, "grad_norm": 1.7063251682300757, "learning_rate": 5.692044378808659e-08, "loss": 0.1278, "step": 12206 }, { "epoch": 0.9670825906120024, "grad_norm": 1.435370766426812, "learning_rate": 5.664737506966389e-08, "loss": 0.1511, "step": 12207 }, { "epoch": 0.9671618142206377, "grad_norm": 1.742852558468855, "learning_rate": 5.6374961073166757e-08, "loss": 0.1953, "step": 12208 }, { "epoch": 0.9672410378292732, "grad_norm": 1.4578707634976182, "learning_rate": 5.610320181653306e-08, "loss": 0.153, "step": 12209 }, { "epoch": 0.9673202614379085, "grad_norm": 1.5743535480838153, "learning_rate": 5.583209731765626e-08, "loss": 0.1118, "step": 12210 }, { "epoch": 0.9673994850465438, "grad_norm": 1.6567126193519128, "learning_rate": 5.5561647594388756e-08, "loss": 0.1464, "step": 12211 }, { "epoch": 0.9674787086551793, "grad_norm": 1.6756614832940175, "learning_rate": 5.529185266453629e-08, "loss": 0.1592, "step": 12212 }, { "epoch": 0.9675579322638146, "grad_norm": 1.4557702155772512, "learning_rate": 5.502271254586356e-08, "loss": 0.101, "step": 12213 }, { "epoch": 0.96763715587245, "grad_norm": 1.5122756097834296, "learning_rate": 5.4754227256094136e-08, "loss": 0.129, "step": 12214 }, { "epoch": 0.9677163794810854, "grad_norm": 1.1905356724898708, "learning_rate": 5.4486396812906125e-08, "loss": 0.0793, "step": 12215 }, { "epoch": 0.9677956030897208, "grad_norm": 1.5804237242200136, "learning_rate": 5.421922123393208e-08, "loss": 0.1969, "step": 12216 }, { "epoch": 0.9678748266983561, "grad_norm": 1.5749341215106953, "learning_rate": 5.395270053676793e-08, "loss": 0.1146, "step": 12217 }, { "epoch": 0.9679540503069914, "grad_norm": 1.744983224373391, "learning_rate": 5.3686834738960744e-08, "loss": 0.1874, "step": 12218 }, { "epoch": 0.9680332739156269, "grad_norm": 1.4598429596910647, "learning_rate": 5.3421623858016525e-08, "loss": 0.1597, "step": 12219 }, { "epoch": 0.9681124975242622, "grad_norm": 1.3191276517370558, "learning_rate": 5.3157067911399076e-08, "loss": 0.1136, "step": 12220 }, { "epoch": 0.9681917211328976, "grad_norm": 1.6665480109621238, "learning_rate": 5.289316691652668e-08, "loss": 0.16, "step": 12221 }, { "epoch": 0.968270944741533, "grad_norm": 1.6830878361407333, "learning_rate": 5.2629920890777676e-08, "loss": 0.1563, "step": 12222 }, { "epoch": 0.9683501683501684, "grad_norm": 1.4552761614082275, "learning_rate": 5.236732985148374e-08, "loss": 0.1392, "step": 12223 }, { "epoch": 0.9684293919588037, "grad_norm": 1.6979146159028426, "learning_rate": 5.21053938159366e-08, "loss": 0.1195, "step": 12224 }, { "epoch": 0.9685086155674391, "grad_norm": 1.6490534324196082, "learning_rate": 5.1844112801383576e-08, "loss": 0.1387, "step": 12225 }, { "epoch": 0.9685878391760745, "grad_norm": 1.6727387044155833, "learning_rate": 5.158348682502756e-08, "loss": 0.1632, "step": 12226 }, { "epoch": 0.9686670627847098, "grad_norm": 1.9233115750074272, "learning_rate": 5.1323515904031506e-08, "loss": 0.218, "step": 12227 }, { "epoch": 0.9687462863933453, "grad_norm": 2.0701705368404735, "learning_rate": 5.1064200055510606e-08, "loss": 0.1902, "step": 12228 }, { "epoch": 0.9688255100019806, "grad_norm": 1.7573631797742955, "learning_rate": 5.080553929654119e-08, "loss": 0.1573, "step": 12229 }, { "epoch": 0.9689047336106159, "grad_norm": 1.4248217123443074, "learning_rate": 5.05475336441541e-08, "loss": 0.0989, "step": 12230 }, { "epoch": 0.9689839572192513, "grad_norm": 1.7188010926404635, "learning_rate": 5.0290183115339065e-08, "loss": 0.1603, "step": 12231 }, { "epoch": 0.9690631808278867, "grad_norm": 2.0484794131566724, "learning_rate": 5.003348772704031e-08, "loss": 0.128, "step": 12232 }, { "epoch": 0.9691424044365221, "grad_norm": 1.8423067705510663, "learning_rate": 4.977744749615987e-08, "loss": 0.1826, "step": 12233 }, { "epoch": 0.9692216280451574, "grad_norm": 1.3726862045873143, "learning_rate": 4.9522062439557595e-08, "loss": 0.1162, "step": 12234 }, { "epoch": 0.9693008516537929, "grad_norm": 1.2156007068105075, "learning_rate": 4.926733257404892e-08, "loss": 0.0933, "step": 12235 }, { "epoch": 0.9693800752624282, "grad_norm": 2.118271358406553, "learning_rate": 4.901325791640599e-08, "loss": 0.2133, "step": 12236 }, { "epoch": 0.9694592988710635, "grad_norm": 1.4316443331628415, "learning_rate": 4.8759838483358745e-08, "loss": 0.1363, "step": 12237 }, { "epoch": 0.969538522479699, "grad_norm": 2.231133858446585, "learning_rate": 4.850707429159496e-08, "loss": 0.1442, "step": 12238 }, { "epoch": 0.9696177460883343, "grad_norm": 1.858483934633542, "learning_rate": 4.825496535775576e-08, "loss": 0.2662, "step": 12239 }, { "epoch": 0.9696969696969697, "grad_norm": 1.6228771538490545, "learning_rate": 4.800351169844231e-08, "loss": 0.276, "step": 12240 }, { "epoch": 0.969776193305605, "grad_norm": 1.9197277747048371, "learning_rate": 4.7752713330212475e-08, "loss": 0.1544, "step": 12241 }, { "epoch": 0.9698554169142405, "grad_norm": 1.5942272946424842, "learning_rate": 4.7502570269578605e-08, "loss": 0.179, "step": 12242 }, { "epoch": 0.9699346405228758, "grad_norm": 1.176154300957126, "learning_rate": 4.725308253301197e-08, "loss": 0.1173, "step": 12243 }, { "epoch": 0.9700138641315111, "grad_norm": 1.473245821437402, "learning_rate": 4.7004250136940547e-08, "loss": 0.1358, "step": 12244 }, { "epoch": 0.9700930877401466, "grad_norm": 1.6579333250109767, "learning_rate": 4.675607309774899e-08, "loss": 0.1719, "step": 12245 }, { "epoch": 0.9701723113487819, "grad_norm": 1.3872893085772509, "learning_rate": 4.650855143177757e-08, "loss": 0.1332, "step": 12246 }, { "epoch": 0.9702515349574173, "grad_norm": 1.6763678585168633, "learning_rate": 4.626168515532548e-08, "loss": 0.164, "step": 12247 }, { "epoch": 0.9703307585660527, "grad_norm": 1.3758037686984572, "learning_rate": 4.6015474284646366e-08, "loss": 0.1185, "step": 12248 }, { "epoch": 0.9704099821746881, "grad_norm": 1.4678518549071355, "learning_rate": 4.576991883595283e-08, "loss": 0.1258, "step": 12249 }, { "epoch": 0.9704892057833234, "grad_norm": 1.6874343154693545, "learning_rate": 4.5525018825414157e-08, "loss": 0.1613, "step": 12250 }, { "epoch": 0.9705684293919588, "grad_norm": 1.923420199133331, "learning_rate": 4.528077426915412e-08, "loss": 0.1577, "step": 12251 }, { "epoch": 0.9706476530005942, "grad_norm": 1.9357821043678036, "learning_rate": 4.50371851832565e-08, "loss": 0.1686, "step": 12252 }, { "epoch": 0.9707268766092295, "grad_norm": 1.6241608989308536, "learning_rate": 4.4794251583759604e-08, "loss": 0.1544, "step": 12253 }, { "epoch": 0.970806100217865, "grad_norm": 1.8138886908027003, "learning_rate": 4.4551973486660625e-08, "loss": 0.1341, "step": 12254 }, { "epoch": 0.9708853238265003, "grad_norm": 1.6765462657123031, "learning_rate": 4.431035090791125e-08, "loss": 0.2141, "step": 12255 }, { "epoch": 0.9709645474351357, "grad_norm": 1.2930391348615318, "learning_rate": 4.4069383863420966e-08, "loss": 0.1158, "step": 12256 }, { "epoch": 0.971043771043771, "grad_norm": 2.317494027905004, "learning_rate": 4.38290723690582e-08, "loss": 0.2502, "step": 12257 }, { "epoch": 0.9711229946524064, "grad_norm": 1.2881238163666477, "learning_rate": 4.3589416440643626e-08, "loss": 0.0993, "step": 12258 }, { "epoch": 0.9712022182610418, "grad_norm": 1.2386644075052187, "learning_rate": 4.335041609396018e-08, "loss": 0.0794, "step": 12259 }, { "epoch": 0.9712814418696771, "grad_norm": 1.4977607827020032, "learning_rate": 4.3112071344741935e-08, "loss": 0.122, "step": 12260 }, { "epoch": 0.9713606654783126, "grad_norm": 1.7435984247342418, "learning_rate": 4.287438220868523e-08, "loss": 0.1473, "step": 12261 }, { "epoch": 0.9714398890869479, "grad_norm": 1.672742217943731, "learning_rate": 4.263734870143976e-08, "loss": 0.1694, "step": 12262 }, { "epoch": 0.9715191126955833, "grad_norm": 1.5515555579461853, "learning_rate": 4.2400970838613057e-08, "loss": 0.1667, "step": 12263 }, { "epoch": 0.9715983363042187, "grad_norm": 1.4113169346013799, "learning_rate": 4.216524863576932e-08, "loss": 0.123, "step": 12264 }, { "epoch": 0.971677559912854, "grad_norm": 1.4795753906771039, "learning_rate": 4.1930182108430584e-08, "loss": 0.1157, "step": 12265 }, { "epoch": 0.9717567835214894, "grad_norm": 1.4842462040286233, "learning_rate": 4.1695771272073357e-08, "loss": 0.0996, "step": 12266 }, { "epoch": 0.9718360071301247, "grad_norm": 1.5847326353624451, "learning_rate": 4.146201614213419e-08, "loss": 0.1538, "step": 12267 }, { "epoch": 0.9719152307387602, "grad_norm": 1.7115599138195556, "learning_rate": 4.1228916734002976e-08, "loss": 0.1423, "step": 12268 }, { "epoch": 0.9719944543473955, "grad_norm": 1.922178548584178, "learning_rate": 4.099647306302856e-08, "loss": 0.255, "step": 12269 }, { "epoch": 0.972073677956031, "grad_norm": 1.664771845151745, "learning_rate": 4.076468514451759e-08, "loss": 0.1712, "step": 12270 }, { "epoch": 0.9721529015646663, "grad_norm": 1.4457877086651802, "learning_rate": 4.0533552993731186e-08, "loss": 0.1139, "step": 12271 }, { "epoch": 0.9722321251733016, "grad_norm": 1.5502760742015664, "learning_rate": 4.030307662588939e-08, "loss": 0.1346, "step": 12272 }, { "epoch": 0.972311348781937, "grad_norm": 1.2962776593766618, "learning_rate": 4.007325605616563e-08, "loss": 0.1058, "step": 12273 }, { "epoch": 0.9723905723905724, "grad_norm": 1.9403016994656344, "learning_rate": 3.9844091299694466e-08, "loss": 0.1974, "step": 12274 }, { "epoch": 0.9724697959992078, "grad_norm": 1.9776788389513635, "learning_rate": 3.961558237156493e-08, "loss": 0.1763, "step": 12275 }, { "epoch": 0.9725490196078431, "grad_norm": 1.4671565254664445, "learning_rate": 3.9387729286821666e-08, "loss": 0.1046, "step": 12276 }, { "epoch": 0.9726282432164786, "grad_norm": 1.485582103943266, "learning_rate": 3.9160532060470435e-08, "loss": 0.1522, "step": 12277 }, { "epoch": 0.9727074668251139, "grad_norm": 1.4942999489691178, "learning_rate": 3.893399070746928e-08, "loss": 0.145, "step": 12278 }, { "epoch": 0.9727866904337492, "grad_norm": 1.80356809190626, "learning_rate": 3.870810524273516e-08, "loss": 0.1752, "step": 12279 }, { "epoch": 0.9728659140423846, "grad_norm": 2.113860172918961, "learning_rate": 3.8482875681140616e-08, "loss": 0.1395, "step": 12280 }, { "epoch": 0.97294513765102, "grad_norm": 1.8215794032153978, "learning_rate": 3.8258302037518234e-08, "loss": 0.1625, "step": 12281 }, { "epoch": 0.9730243612596554, "grad_norm": 1.4758744946163298, "learning_rate": 3.803438432665396e-08, "loss": 0.146, "step": 12282 }, { "epoch": 0.9731035848682907, "grad_norm": 1.2525440344160492, "learning_rate": 3.781112256329045e-08, "loss": 0.1088, "step": 12283 }, { "epoch": 0.9731828084769262, "grad_norm": 2.0848807064143045, "learning_rate": 3.758851676213038e-08, "loss": 0.1802, "step": 12284 }, { "epoch": 0.9732620320855615, "grad_norm": 1.8031336795280852, "learning_rate": 3.7366566937829804e-08, "loss": 0.1865, "step": 12285 }, { "epoch": 0.9733412556941968, "grad_norm": 1.558698712066729, "learning_rate": 3.714527310500371e-08, "loss": 0.1664, "step": 12286 }, { "epoch": 0.9734204793028323, "grad_norm": 1.6940523142881616, "learning_rate": 3.692463527822376e-08, "loss": 0.1948, "step": 12287 }, { "epoch": 0.9734997029114676, "grad_norm": 1.4824452484540096, "learning_rate": 3.670465347201724e-08, "loss": 0.2134, "step": 12288 }, { "epoch": 0.973578926520103, "grad_norm": 1.8051065737107204, "learning_rate": 3.6485327700869214e-08, "loss": 0.1553, "step": 12289 }, { "epoch": 0.9736581501287384, "grad_norm": 1.5249407529640326, "learning_rate": 3.6266657979220356e-08, "loss": 0.167, "step": 12290 }, { "epoch": 0.9737373737373738, "grad_norm": 1.652902783417573, "learning_rate": 3.604864432147026e-08, "loss": 0.1402, "step": 12291 }, { "epoch": 0.9738165973460091, "grad_norm": 1.4538661599255542, "learning_rate": 3.5831286741973006e-08, "loss": 0.1032, "step": 12292 }, { "epoch": 0.9738958209546444, "grad_norm": 1.5529578926696304, "learning_rate": 3.561458525504047e-08, "loss": 0.143, "step": 12293 }, { "epoch": 0.9739750445632799, "grad_norm": 1.6498829464772267, "learning_rate": 3.539853987494235e-08, "loss": 0.1721, "step": 12294 }, { "epoch": 0.9740542681719152, "grad_norm": 1.0127699652748523, "learning_rate": 3.518315061590394e-08, "loss": 0.0764, "step": 12295 }, { "epoch": 0.9741334917805506, "grad_norm": 1.467772557606543, "learning_rate": 3.496841749210722e-08, "loss": 0.0901, "step": 12296 }, { "epoch": 0.974212715389186, "grad_norm": 1.38345115040309, "learning_rate": 3.4754340517691996e-08, "loss": 0.1245, "step": 12297 }, { "epoch": 0.9742919389978214, "grad_norm": 1.6873371470936867, "learning_rate": 3.454091970675366e-08, "loss": 0.1666, "step": 12298 }, { "epoch": 0.9743711626064567, "grad_norm": 1.7562563910055584, "learning_rate": 3.4328155073344306e-08, "loss": 0.1563, "step": 12299 }, { "epoch": 0.9744503862150921, "grad_norm": 1.7035885288201764, "learning_rate": 3.411604663147494e-08, "loss": 0.1796, "step": 12300 }, { "epoch": 0.9745296098237275, "grad_norm": 1.6751344057612154, "learning_rate": 3.3904594395111066e-08, "loss": 0.1512, "step": 12301 }, { "epoch": 0.9746088334323628, "grad_norm": 1.729286171717593, "learning_rate": 3.369379837817599e-08, "loss": 0.1698, "step": 12302 }, { "epoch": 0.9746880570409983, "grad_norm": 1.8535654306516638, "learning_rate": 3.3483658594548606e-08, "loss": 0.1227, "step": 12303 }, { "epoch": 0.9747672806496336, "grad_norm": 1.5419905694767184, "learning_rate": 3.327417505806785e-08, "loss": 0.1009, "step": 12304 }, { "epoch": 0.9748465042582689, "grad_norm": 1.3946092062800934, "learning_rate": 3.30653477825249e-08, "loss": 0.1402, "step": 12305 }, { "epoch": 0.9749257278669043, "grad_norm": 1.7567383203217113, "learning_rate": 3.2857176781671e-08, "loss": 0.156, "step": 12306 }, { "epoch": 0.9750049514755397, "grad_norm": 1.3206105291682744, "learning_rate": 3.264966206921294e-08, "loss": 0.1254, "step": 12307 }, { "epoch": 0.9750841750841751, "grad_norm": 1.1594276730411217, "learning_rate": 3.244280365881536e-08, "loss": 0.1018, "step": 12308 }, { "epoch": 0.9751633986928104, "grad_norm": 2.0649529144558434, "learning_rate": 3.223660156409847e-08, "loss": 0.2217, "step": 12309 }, { "epoch": 0.9752426223014459, "grad_norm": 1.5251856628053613, "learning_rate": 3.203105579863919e-08, "loss": 0.133, "step": 12310 }, { "epoch": 0.9753218459100812, "grad_norm": 1.7406985643029933, "learning_rate": 3.1826166375972246e-08, "loss": 0.1759, "step": 12311 }, { "epoch": 0.9754010695187165, "grad_norm": 2.054767277732287, "learning_rate": 3.162193330958796e-08, "loss": 0.165, "step": 12312 }, { "epoch": 0.975480293127352, "grad_norm": 1.5064493767195608, "learning_rate": 3.141835661293557e-08, "loss": 0.1337, "step": 12313 }, { "epoch": 0.9755595167359873, "grad_norm": 1.1727129082290657, "learning_rate": 3.12154362994177e-08, "loss": 0.0985, "step": 12314 }, { "epoch": 0.9756387403446227, "grad_norm": 1.3346813257715266, "learning_rate": 3.1013172382396984e-08, "loss": 0.079, "step": 12315 }, { "epoch": 0.975717963953258, "grad_norm": 1.537474562921926, "learning_rate": 3.0811564875190544e-08, "loss": 0.1364, "step": 12316 }, { "epoch": 0.9757971875618935, "grad_norm": 1.5185982233065618, "learning_rate": 3.061061379107555e-08, "loss": 0.1302, "step": 12317 }, { "epoch": 0.9758764111705288, "grad_norm": 1.649497239936244, "learning_rate": 3.04103191432803e-08, "loss": 0.1393, "step": 12318 }, { "epoch": 0.9759556347791641, "grad_norm": 1.7964027018384028, "learning_rate": 3.0210680944995354e-08, "loss": 0.1821, "step": 12319 }, { "epoch": 0.9760348583877996, "grad_norm": 1.3749182469481005, "learning_rate": 3.001169920936575e-08, "loss": 0.117, "step": 12320 }, { "epoch": 0.9761140819964349, "grad_norm": 1.7813499193635711, "learning_rate": 2.981337394949324e-08, "loss": 0.1781, "step": 12321 }, { "epoch": 0.9761933056050703, "grad_norm": 1.8254876787685672, "learning_rate": 2.961570517843626e-08, "loss": 0.1719, "step": 12322 }, { "epoch": 0.9762725292137057, "grad_norm": 1.5373118917235846, "learning_rate": 2.9418692909211066e-08, "loss": 0.0946, "step": 12323 }, { "epoch": 0.9763517528223411, "grad_norm": 2.294324286088914, "learning_rate": 2.9222337154789504e-08, "loss": 0.1841, "step": 12324 }, { "epoch": 0.9764309764309764, "grad_norm": 1.7009510254794615, "learning_rate": 2.902663792810012e-08, "loss": 0.1744, "step": 12325 }, { "epoch": 0.9765102000396118, "grad_norm": 1.3767262510544032, "learning_rate": 2.8831595242030387e-08, "loss": 0.1186, "step": 12326 }, { "epoch": 0.9765894236482472, "grad_norm": 1.7998842911251063, "learning_rate": 2.863720910942114e-08, "loss": 0.1188, "step": 12327 }, { "epoch": 0.9766686472568825, "grad_norm": 1.2628103947452436, "learning_rate": 2.8443479543073248e-08, "loss": 0.1157, "step": 12328 }, { "epoch": 0.976747870865518, "grad_norm": 1.6706260728083613, "learning_rate": 2.825040655574207e-08, "loss": 0.1096, "step": 12329 }, { "epoch": 0.9768270944741533, "grad_norm": 1.9990557068158898, "learning_rate": 2.8057990160139658e-08, "loss": 0.2288, "step": 12330 }, { "epoch": 0.9769063180827887, "grad_norm": 1.7579280682421616, "learning_rate": 2.7866230368936986e-08, "loss": 0.1146, "step": 12331 }, { "epoch": 0.976985541691424, "grad_norm": 1.7279991057868305, "learning_rate": 2.767512719476062e-08, "loss": 0.1875, "step": 12332 }, { "epoch": 0.9770647653000594, "grad_norm": 1.3687024420762037, "learning_rate": 2.7484680650193827e-08, "loss": 0.1241, "step": 12333 }, { "epoch": 0.9771439889086948, "grad_norm": 1.4081058281509866, "learning_rate": 2.729489074777547e-08, "loss": 0.1209, "step": 12334 }, { "epoch": 0.9772232125173301, "grad_norm": 1.7023883912598565, "learning_rate": 2.7105757500002215e-08, "loss": 0.1282, "step": 12335 }, { "epoch": 0.9773024361259656, "grad_norm": 1.7665546038957183, "learning_rate": 2.6917280919329656e-08, "loss": 0.2054, "step": 12336 }, { "epoch": 0.9773816597346009, "grad_norm": 1.386720657581816, "learning_rate": 2.6729461018166758e-08, "loss": 0.096, "step": 12337 }, { "epoch": 0.9774608833432363, "grad_norm": 1.2272890528464564, "learning_rate": 2.654229780887918e-08, "loss": 0.0873, "step": 12338 }, { "epoch": 0.9775401069518717, "grad_norm": 2.0200800657909475, "learning_rate": 2.6355791303792622e-08, "loss": 0.1253, "step": 12339 }, { "epoch": 0.977619330560507, "grad_norm": 2.11011497374221, "learning_rate": 2.6169941515188368e-08, "loss": 0.2692, "step": 12340 }, { "epoch": 0.9776985541691424, "grad_norm": 1.686480819313864, "learning_rate": 2.5984748455301077e-08, "loss": 0.1388, "step": 12341 }, { "epoch": 0.9777777777777777, "grad_norm": 1.7632877333272383, "learning_rate": 2.5800212136326552e-08, "loss": 0.1498, "step": 12342 }, { "epoch": 0.9778570013864132, "grad_norm": 1.4215797813800939, "learning_rate": 2.561633257041507e-08, "loss": 0.1692, "step": 12343 }, { "epoch": 0.9779362249950485, "grad_norm": 1.8101549045097034, "learning_rate": 2.5433109769674724e-08, "loss": 0.224, "step": 12344 }, { "epoch": 0.978015448603684, "grad_norm": 1.5023008720450823, "learning_rate": 2.52505437461692e-08, "loss": 0.1343, "step": 12345 }, { "epoch": 0.9780946722123193, "grad_norm": 1.465131783218658, "learning_rate": 2.5068634511919986e-08, "loss": 0.1606, "step": 12346 }, { "epoch": 0.9781738958209546, "grad_norm": 1.5005017965152394, "learning_rate": 2.4887382078905287e-08, "loss": 0.1081, "step": 12347 }, { "epoch": 0.97825311942959, "grad_norm": 2.3065767553961813, "learning_rate": 2.4706786459058885e-08, "loss": 0.228, "step": 12348 }, { "epoch": 0.9783323430382254, "grad_norm": 1.5593496311131996, "learning_rate": 2.4526847664273488e-08, "loss": 0.1612, "step": 12349 }, { "epoch": 0.9784115666468608, "grad_norm": 1.4836981297066107, "learning_rate": 2.434756570639518e-08, "loss": 0.109, "step": 12350 }, { "epoch": 0.9784907902554961, "grad_norm": 1.7853589878744967, "learning_rate": 2.4168940597230074e-08, "loss": 0.2265, "step": 12351 }, { "epoch": 0.9785700138641316, "grad_norm": 1.335425438605114, "learning_rate": 2.3990972348539864e-08, "loss": 0.1426, "step": 12352 }, { "epoch": 0.9786492374727669, "grad_norm": 1.6431303660377838, "learning_rate": 2.381366097204296e-08, "loss": 0.1716, "step": 12353 }, { "epoch": 0.9787284610814022, "grad_norm": 1.7708768598609295, "learning_rate": 2.363700647941336e-08, "loss": 0.1647, "step": 12354 }, { "epoch": 0.9788076846900376, "grad_norm": 1.2547347260398576, "learning_rate": 2.3461008882283977e-08, "loss": 0.095, "step": 12355 }, { "epoch": 0.978886908298673, "grad_norm": 1.517382104842785, "learning_rate": 2.3285668192243317e-08, "loss": 0.1025, "step": 12356 }, { "epoch": 0.9789661319073084, "grad_norm": 1.506392849719763, "learning_rate": 2.311098442083659e-08, "loss": 0.1622, "step": 12357 }, { "epoch": 0.9790453555159437, "grad_norm": 1.5891836980254153, "learning_rate": 2.293695757956571e-08, "loss": 0.116, "step": 12358 }, { "epoch": 0.9791245791245792, "grad_norm": 1.3760585934136587, "learning_rate": 2.2763587679889288e-08, "loss": 0.1604, "step": 12359 }, { "epoch": 0.9792038027332145, "grad_norm": 1.713441982280558, "learning_rate": 2.2590874733223744e-08, "loss": 0.1372, "step": 12360 }, { "epoch": 0.9792830263418498, "grad_norm": 1.407217576682397, "learning_rate": 2.2418818750939986e-08, "loss": 0.1398, "step": 12361 }, { "epoch": 0.9793622499504853, "grad_norm": 2.05142013732917, "learning_rate": 2.2247419744368946e-08, "loss": 0.2117, "step": 12362 }, { "epoch": 0.9794414735591206, "grad_norm": 1.7065494669617909, "learning_rate": 2.207667772479494e-08, "loss": 0.1743, "step": 12363 }, { "epoch": 0.979520697167756, "grad_norm": 1.5762757522980348, "learning_rate": 2.190659270346118e-08, "loss": 0.1265, "step": 12364 }, { "epoch": 0.9795999207763914, "grad_norm": 1.7896956344573591, "learning_rate": 2.1737164691566502e-08, "loss": 0.1998, "step": 12365 }, { "epoch": 0.9796791443850268, "grad_norm": 1.5755686454952322, "learning_rate": 2.156839370026753e-08, "loss": 0.1253, "step": 12366 }, { "epoch": 0.9797583679936621, "grad_norm": 1.7397258737336976, "learning_rate": 2.140027974067649e-08, "loss": 0.182, "step": 12367 }, { "epoch": 0.9798375916022974, "grad_norm": 1.5180263662006157, "learning_rate": 2.1232822823862297e-08, "loss": 0.1158, "step": 12368 }, { "epoch": 0.9799168152109329, "grad_norm": 1.8696905032132345, "learning_rate": 2.1066022960852806e-08, "loss": 0.1805, "step": 12369 }, { "epoch": 0.9799960388195682, "grad_norm": 1.5758674838547122, "learning_rate": 2.0899880162630336e-08, "loss": 0.1713, "step": 12370 }, { "epoch": 0.9800752624282036, "grad_norm": 1.6070949633931175, "learning_rate": 2.073439444013392e-08, "loss": 0.1055, "step": 12371 }, { "epoch": 0.980154486036839, "grad_norm": 1.2616297100127591, "learning_rate": 2.0569565804260393e-08, "loss": 0.1423, "step": 12372 }, { "epoch": 0.9802337096454744, "grad_norm": 1.7696098281385033, "learning_rate": 2.04053942658633e-08, "loss": 0.1422, "step": 12373 }, { "epoch": 0.9803129332541097, "grad_norm": 2.3166446060251564, "learning_rate": 2.0241879835752875e-08, "loss": 0.2003, "step": 12374 }, { "epoch": 0.9803921568627451, "grad_norm": 0.973956074444193, "learning_rate": 2.0079022524694957e-08, "loss": 0.0906, "step": 12375 }, { "epoch": 0.9804713804713805, "grad_norm": 1.833521156966805, "learning_rate": 1.991682234341208e-08, "loss": 0.1436, "step": 12376 }, { "epoch": 0.9805506040800158, "grad_norm": 1.7486733911926768, "learning_rate": 1.9755279302585696e-08, "loss": 0.1899, "step": 12377 }, { "epoch": 0.9806298276886513, "grad_norm": 1.3368269980677352, "learning_rate": 1.959439341285285e-08, "loss": 0.1195, "step": 12378 }, { "epoch": 0.9807090512972866, "grad_norm": 2.0364620521665926, "learning_rate": 1.943416468480619e-08, "loss": 0.2122, "step": 12379 }, { "epoch": 0.980788274905922, "grad_norm": 1.7251592677506418, "learning_rate": 1.9274593128996155e-08, "loss": 0.1831, "step": 12380 }, { "epoch": 0.9808674985145573, "grad_norm": 1.063597819311851, "learning_rate": 1.9115678755929902e-08, "loss": 0.103, "step": 12381 }, { "epoch": 0.9809467221231927, "grad_norm": 1.4831017040020886, "learning_rate": 1.8957421576071277e-08, "loss": 0.1577, "step": 12382 }, { "epoch": 0.9810259457318281, "grad_norm": 1.85113426373805, "learning_rate": 1.879982159984084e-08, "loss": 0.1607, "step": 12383 }, { "epoch": 0.9811051693404634, "grad_norm": 1.6443047632513856, "learning_rate": 1.864287883761695e-08, "loss": 0.1806, "step": 12384 }, { "epoch": 0.9811843929490989, "grad_norm": 1.1664488402340851, "learning_rate": 1.8486593299730236e-08, "loss": 0.0816, "step": 12385 }, { "epoch": 0.9812636165577342, "grad_norm": 1.595847226597165, "learning_rate": 1.8330964996474688e-08, "loss": 0.1469, "step": 12386 }, { "epoch": 0.9813428401663695, "grad_norm": 1.4409879254381188, "learning_rate": 1.817599393809544e-08, "loss": 0.1208, "step": 12387 }, { "epoch": 0.981422063775005, "grad_norm": 1.387103115696771, "learning_rate": 1.802168013479877e-08, "loss": 0.122, "step": 12388 }, { "epoch": 0.9815012873836403, "grad_norm": 1.647408554712536, "learning_rate": 1.7868023596743224e-08, "loss": 0.1859, "step": 12389 }, { "epoch": 0.9815805109922757, "grad_norm": 1.8336037062497803, "learning_rate": 1.771502433404737e-08, "loss": 0.1536, "step": 12390 }, { "epoch": 0.981659734600911, "grad_norm": 1.3492332601414139, "learning_rate": 1.7562682356786488e-08, "loss": 0.1375, "step": 12391 }, { "epoch": 0.9817389582095465, "grad_norm": 1.3102297016720257, "learning_rate": 1.7410997674989215e-08, "loss": 0.0954, "step": 12392 }, { "epoch": 0.9818181818181818, "grad_norm": 1.5943298437790678, "learning_rate": 1.7259970298645345e-08, "loss": 0.1417, "step": 12393 }, { "epoch": 0.9818974054268171, "grad_norm": 1.4361320944307887, "learning_rate": 1.7109600237698032e-08, "loss": 0.1344, "step": 12394 }, { "epoch": 0.9819766290354526, "grad_norm": 1.538976429551498, "learning_rate": 1.6959887502049356e-08, "loss": 0.1213, "step": 12395 }, { "epoch": 0.9820558526440879, "grad_norm": 1.7975829501764093, "learning_rate": 1.6810832101556984e-08, "loss": 0.1758, "step": 12396 }, { "epoch": 0.9821350762527233, "grad_norm": 1.5534587008111254, "learning_rate": 1.666243404603529e-08, "loss": 0.1174, "step": 12397 }, { "epoch": 0.9822142998613587, "grad_norm": 1.3660759641984646, "learning_rate": 1.651469334525424e-08, "loss": 0.1243, "step": 12398 }, { "epoch": 0.9822935234699941, "grad_norm": 1.268354649410221, "learning_rate": 1.6367610008944935e-08, "loss": 0.1036, "step": 12399 }, { "epoch": 0.9823727470786294, "grad_norm": 1.6621867918107378, "learning_rate": 1.622118404678963e-08, "loss": 0.1546, "step": 12400 }, { "epoch": 0.9824519706872648, "grad_norm": 1.7822762481260703, "learning_rate": 1.607541546843061e-08, "loss": 0.1702, "step": 12401 }, { "epoch": 0.9825311942959002, "grad_norm": 1.5019228339204276, "learning_rate": 1.593030428346576e-08, "loss": 0.0975, "step": 12402 }, { "epoch": 0.9826104179045355, "grad_norm": 1.5644568229964495, "learning_rate": 1.578585050144965e-08, "loss": 0.1381, "step": 12403 }, { "epoch": 0.982689641513171, "grad_norm": 1.5760466205170158, "learning_rate": 1.564205413189468e-08, "loss": 0.1388, "step": 12404 }, { "epoch": 0.9827688651218063, "grad_norm": 1.291388015400666, "learning_rate": 1.5498915184268826e-08, "loss": 0.1539, "step": 12405 }, { "epoch": 0.9828480887304417, "grad_norm": 1.8307683536200092, "learning_rate": 1.5356433667996772e-08, "loss": 0.133, "step": 12406 }, { "epoch": 0.982927312339077, "grad_norm": 1.5639722882261975, "learning_rate": 1.5214609592461015e-08, "loss": 0.1261, "step": 12407 }, { "epoch": 0.9830065359477124, "grad_norm": 1.6015810161272572, "learning_rate": 1.507344296699964e-08, "loss": 0.1531, "step": 12408 }, { "epoch": 0.9830857595563478, "grad_norm": 1.7458563521434858, "learning_rate": 1.4932933800907435e-08, "loss": 0.1959, "step": 12409 }, { "epoch": 0.9831649831649831, "grad_norm": 1.8379263732882256, "learning_rate": 1.4793082103435885e-08, "loss": 0.2069, "step": 12410 }, { "epoch": 0.9832442067736186, "grad_norm": 1.1615904573796025, "learning_rate": 1.4653887883794293e-08, "loss": 0.1017, "step": 12411 }, { "epoch": 0.9833234303822539, "grad_norm": 1.5293867056843282, "learning_rate": 1.451535115114866e-08, "loss": 0.1311, "step": 12412 }, { "epoch": 0.9834026539908893, "grad_norm": 1.7724390307381672, "learning_rate": 1.4377471914619468e-08, "loss": 0.163, "step": 12413 }, { "epoch": 0.9834818775995247, "grad_norm": 1.5914676445095526, "learning_rate": 1.424025018328612e-08, "loss": 0.1193, "step": 12414 }, { "epoch": 0.98356110120816, "grad_norm": 2.0343428409157074, "learning_rate": 1.4103685966183612e-08, "loss": 0.193, "step": 12415 }, { "epoch": 0.9836403248167954, "grad_norm": 1.3555722208666934, "learning_rate": 1.396777927230475e-08, "loss": 0.1288, "step": 12416 }, { "epoch": 0.9837195484254307, "grad_norm": 1.570240237481506, "learning_rate": 1.383253011059682e-08, "loss": 0.1382, "step": 12417 }, { "epoch": 0.9837987720340662, "grad_norm": 1.4183520822735458, "learning_rate": 1.3697938489967144e-08, "loss": 0.1064, "step": 12418 }, { "epoch": 0.9838779956427015, "grad_norm": 1.5676707529226253, "learning_rate": 1.3564004419277522e-08, "loss": 0.1482, "step": 12419 }, { "epoch": 0.983957219251337, "grad_norm": 1.627178275901783, "learning_rate": 1.3430727907346453e-08, "loss": 0.2793, "step": 12420 }, { "epoch": 0.9840364428599723, "grad_norm": 2.039031538972755, "learning_rate": 1.329810896294914e-08, "loss": 0.1992, "step": 12421 }, { "epoch": 0.9841156664686076, "grad_norm": 1.7038378864158576, "learning_rate": 1.3166147594818601e-08, "loss": 0.1794, "step": 12422 }, { "epoch": 0.984194890077243, "grad_norm": 1.7026495195518299, "learning_rate": 1.3034843811644548e-08, "loss": 0.1396, "step": 12423 }, { "epoch": 0.9842741136858784, "grad_norm": 1.9302224215795944, "learning_rate": 1.290419762207007e-08, "loss": 0.1891, "step": 12424 }, { "epoch": 0.9843533372945138, "grad_norm": 1.3202966029866599, "learning_rate": 1.2774209034700503e-08, "loss": 0.129, "step": 12425 }, { "epoch": 0.9844325609031491, "grad_norm": 1.455624966054213, "learning_rate": 1.2644878058093446e-08, "loss": 0.1179, "step": 12426 }, { "epoch": 0.9845117845117846, "grad_norm": 1.7698781678304694, "learning_rate": 1.2516204700765422e-08, "loss": 0.1345, "step": 12427 }, { "epoch": 0.9845910081204199, "grad_norm": 1.7969947142856464, "learning_rate": 1.2388188971188542e-08, "loss": 0.1764, "step": 12428 }, { "epoch": 0.9846702317290552, "grad_norm": 2.1238329988796507, "learning_rate": 1.2260830877792729e-08, "loss": 0.1694, "step": 12429 }, { "epoch": 0.9847494553376906, "grad_norm": 1.7992510263293502, "learning_rate": 1.2134130428962387e-08, "loss": 0.1812, "step": 12430 }, { "epoch": 0.984828678946326, "grad_norm": 1.548033712911175, "learning_rate": 1.2008087633040843e-08, "loss": 0.1212, "step": 12431 }, { "epoch": 0.9849079025549614, "grad_norm": 1.9677071585439214, "learning_rate": 1.1882702498328125e-08, "loss": 0.2039, "step": 12432 }, { "epoch": 0.9849871261635967, "grad_norm": 1.4092562399212292, "learning_rate": 1.175797503307874e-08, "loss": 0.0864, "step": 12433 }, { "epoch": 0.9850663497722322, "grad_norm": 2.134091849388255, "learning_rate": 1.1633905245507227e-08, "loss": 0.1488, "step": 12434 }, { "epoch": 0.9851455733808675, "grad_norm": 1.894407133560039, "learning_rate": 1.1510493143782609e-08, "loss": 0.1336, "step": 12435 }, { "epoch": 0.9852247969895028, "grad_norm": 2.08084771972134, "learning_rate": 1.1387738736029496e-08, "loss": 0.1832, "step": 12436 }, { "epoch": 0.9853040205981383, "grad_norm": 1.7229281735764992, "learning_rate": 1.1265642030331426e-08, "loss": 0.1279, "step": 12437 }, { "epoch": 0.9853832442067736, "grad_norm": 1.5428103897919618, "learning_rate": 1.114420303472974e-08, "loss": 0.1416, "step": 12438 }, { "epoch": 0.985462467815409, "grad_norm": 1.041621391030963, "learning_rate": 1.1023421757216934e-08, "loss": 0.0667, "step": 12439 }, { "epoch": 0.9855416914240444, "grad_norm": 1.1926698127427373, "learning_rate": 1.090329820574887e-08, "loss": 0.0907, "step": 12440 }, { "epoch": 0.9856209150326798, "grad_norm": 1.7650891754571278, "learning_rate": 1.0783832388234772e-08, "loss": 0.2197, "step": 12441 }, { "epoch": 0.9857001386413151, "grad_norm": 1.8156022016626312, "learning_rate": 1.0665024312539462e-08, "loss": 0.2318, "step": 12442 }, { "epoch": 0.9857793622499504, "grad_norm": 1.4554797638441235, "learning_rate": 1.0546873986486682e-08, "loss": 0.1042, "step": 12443 }, { "epoch": 0.9858585858585859, "grad_norm": 1.495279435464479, "learning_rate": 1.0429381417856877e-08, "loss": 0.164, "step": 12444 }, { "epoch": 0.9859378094672212, "grad_norm": 1.5965361214574643, "learning_rate": 1.0312546614384966e-08, "loss": 0.1046, "step": 12445 }, { "epoch": 0.9860170330758566, "grad_norm": 1.8227203231951694, "learning_rate": 1.0196369583763688e-08, "loss": 0.1967, "step": 12446 }, { "epoch": 0.986096256684492, "grad_norm": 1.5944055995617108, "learning_rate": 1.0080850333644698e-08, "loss": 0.1369, "step": 12447 }, { "epoch": 0.9861754802931274, "grad_norm": 1.5407824429045571, "learning_rate": 9.965988871633025e-09, "loss": 0.1143, "step": 12448 }, { "epoch": 0.9862547039017627, "grad_norm": 1.5726821932994055, "learning_rate": 9.851785205291508e-09, "loss": 0.1395, "step": 12449 }, { "epoch": 0.9863339275103981, "grad_norm": 1.5981491445323333, "learning_rate": 9.738239342141909e-09, "loss": 0.1665, "step": 12450 }, { "epoch": 0.9864131511190335, "grad_norm": 1.8835536194709517, "learning_rate": 9.625351289658247e-09, "loss": 0.135, "step": 12451 }, { "epoch": 0.9864923747276688, "grad_norm": 2.0981991982530874, "learning_rate": 9.513121055273467e-09, "loss": 0.1847, "step": 12452 }, { "epoch": 0.9865715983363043, "grad_norm": 1.2161602171531127, "learning_rate": 9.401548646380543e-09, "loss": 0.0756, "step": 12453 }, { "epoch": 0.9866508219449396, "grad_norm": 1.4432815565664276, "learning_rate": 9.290634070322491e-09, "loss": 0.1613, "step": 12454 }, { "epoch": 0.986730045553575, "grad_norm": 1.4339986456789633, "learning_rate": 9.180377334404577e-09, "loss": 0.0987, "step": 12455 }, { "epoch": 0.9868092691622103, "grad_norm": 1.5112757708050688, "learning_rate": 9.070778445885442e-09, "loss": 0.1678, "step": 12456 }, { "epoch": 0.9868884927708457, "grad_norm": 1.5930466202038724, "learning_rate": 8.961837411982643e-09, "loss": 0.1492, "step": 12457 }, { "epoch": 0.9869677163794811, "grad_norm": 1.5440770106530788, "learning_rate": 8.853554239869333e-09, "loss": 0.155, "step": 12458 }, { "epoch": 0.9870469399881164, "grad_norm": 2.109187059235162, "learning_rate": 8.745928936675363e-09, "loss": 0.1759, "step": 12459 }, { "epoch": 0.9871261635967519, "grad_norm": 1.5907348927131344, "learning_rate": 8.638961509486177e-09, "loss": 0.126, "step": 12460 }, { "epoch": 0.9872053872053872, "grad_norm": 1.2728650579515424, "learning_rate": 8.53265196534725e-09, "loss": 0.1232, "step": 12461 }, { "epoch": 0.9872846108140226, "grad_norm": 1.669839858049219, "learning_rate": 8.427000311256317e-09, "loss": 0.1334, "step": 12462 }, { "epoch": 0.987363834422658, "grad_norm": 1.5214898269807555, "learning_rate": 8.322006554171147e-09, "loss": 0.1106, "step": 12463 }, { "epoch": 0.9874430580312933, "grad_norm": 1.72843950399946, "learning_rate": 8.217670701005098e-09, "loss": 0.1556, "step": 12464 }, { "epoch": 0.9875222816399287, "grad_norm": 1.873778461971483, "learning_rate": 8.113992758628231e-09, "loss": 0.1603, "step": 12465 }, { "epoch": 0.987601505248564, "grad_norm": 1.8446210558315352, "learning_rate": 8.010972733867306e-09, "loss": 0.1642, "step": 12466 }, { "epoch": 0.9876807288571995, "grad_norm": 1.745284742975174, "learning_rate": 7.908610633504676e-09, "loss": 0.1584, "step": 12467 }, { "epoch": 0.9877599524658348, "grad_norm": 1.4580676008636861, "learning_rate": 7.806906464281617e-09, "loss": 0.1217, "step": 12468 }, { "epoch": 0.9878391760744701, "grad_norm": 1.4265942620387675, "learning_rate": 7.70586023289388e-09, "loss": 0.0978, "step": 12469 }, { "epoch": 0.9879183996831056, "grad_norm": 1.9317009627320343, "learning_rate": 7.605471945996146e-09, "loss": 0.1696, "step": 12470 }, { "epoch": 0.9879976232917409, "grad_norm": 1.845378819060387, "learning_rate": 7.50574161019757e-09, "loss": 0.1224, "step": 12471 }, { "epoch": 0.9880768469003763, "grad_norm": 1.3799040928303554, "learning_rate": 7.406669232065122e-09, "loss": 0.1563, "step": 12472 }, { "epoch": 0.9881560705090117, "grad_norm": 1.7504875401876603, "learning_rate": 7.3082548181213635e-09, "loss": 0.1694, "step": 12473 }, { "epoch": 0.9882352941176471, "grad_norm": 1.4143016346263686, "learning_rate": 7.210498374848884e-09, "loss": 0.1938, "step": 12474 }, { "epoch": 0.9883145177262824, "grad_norm": 1.3466417506863884, "learning_rate": 7.113399908681429e-09, "loss": 0.1083, "step": 12475 }, { "epoch": 0.9883937413349178, "grad_norm": 1.253325418021243, "learning_rate": 7.016959426013881e-09, "loss": 0.0888, "step": 12476 }, { "epoch": 0.9884729649435532, "grad_norm": 1.5729106238270223, "learning_rate": 6.9211769331978265e-09, "loss": 0.1459, "step": 12477 }, { "epoch": 0.9885521885521885, "grad_norm": 1.6735713565307373, "learning_rate": 6.8260524365371115e-09, "loss": 0.1461, "step": 12478 }, { "epoch": 0.988631412160824, "grad_norm": 2.2715362557723915, "learning_rate": 6.731585942297836e-09, "loss": 0.216, "step": 12479 }, { "epoch": 0.9887106357694593, "grad_norm": 1.5830046317645625, "learning_rate": 6.637777456698358e-09, "loss": 0.1153, "step": 12480 }, { "epoch": 0.9887898593780947, "grad_norm": 1.908092672299486, "learning_rate": 6.544626985915958e-09, "loss": 0.1592, "step": 12481 }, { "epoch": 0.98886908298673, "grad_norm": 1.569610161505841, "learning_rate": 6.45213453608573e-09, "loss": 0.1117, "step": 12482 }, { "epoch": 0.9889483065953654, "grad_norm": 1.5408769875627841, "learning_rate": 6.360300113295026e-09, "loss": 0.1383, "step": 12483 }, { "epoch": 0.9890275302040008, "grad_norm": 1.5454651306386455, "learning_rate": 6.269123723593451e-09, "loss": 0.132, "step": 12484 }, { "epoch": 0.9891067538126361, "grad_norm": 1.5499991004134308, "learning_rate": 6.178605372982871e-09, "loss": 0.1151, "step": 12485 }, { "epoch": 0.9891859774212716, "grad_norm": 2.1185039543375632, "learning_rate": 6.088745067424073e-09, "loss": 0.2297, "step": 12486 }, { "epoch": 0.9892652010299069, "grad_norm": 1.695955702178954, "learning_rate": 5.9995428128334365e-09, "loss": 0.1199, "step": 12487 }, { "epoch": 0.9893444246385423, "grad_norm": 1.5292472163103024, "learning_rate": 5.910998615085151e-09, "loss": 0.1172, "step": 12488 }, { "epoch": 0.9894236482471777, "grad_norm": 1.697023058224961, "learning_rate": 5.8231124800089965e-09, "loss": 0.1455, "step": 12489 }, { "epoch": 0.989502871855813, "grad_norm": 1.734252971230233, "learning_rate": 5.735884413391457e-09, "loss": 0.1408, "step": 12490 }, { "epoch": 0.9895820954644484, "grad_norm": 1.6950596643042386, "learning_rate": 5.6493144209768255e-09, "loss": 0.1405, "step": 12491 }, { "epoch": 0.9896613190730837, "grad_norm": 1.679907328225051, "learning_rate": 5.5634025084660985e-09, "loss": 0.1427, "step": 12492 }, { "epoch": 0.9897405426817192, "grad_norm": 1.0661739850630583, "learning_rate": 5.47814868151364e-09, "loss": 0.0819, "step": 12493 }, { "epoch": 0.9898197662903545, "grad_norm": 1.2144825197657225, "learning_rate": 5.393552945736069e-09, "loss": 0.0875, "step": 12494 }, { "epoch": 0.98989898989899, "grad_norm": 1.6319387686024347, "learning_rate": 5.309615306701155e-09, "loss": 0.1605, "step": 12495 }, { "epoch": 0.9899782135076253, "grad_norm": 1.7583045477991277, "learning_rate": 5.226335769936697e-09, "loss": 0.1448, "step": 12496 }, { "epoch": 0.9900574371162606, "grad_norm": 2.2495382702872284, "learning_rate": 5.143714340926087e-09, "loss": 0.2549, "step": 12497 }, { "epoch": 0.990136660724896, "grad_norm": 1.5806241613422494, "learning_rate": 5.0617510251105284e-09, "loss": 0.2028, "step": 12498 }, { "epoch": 0.9902158843335314, "grad_norm": 1.5622916836858032, "learning_rate": 4.980445827885705e-09, "loss": 0.1557, "step": 12499 }, { "epoch": 0.9902951079421668, "grad_norm": 1.4487181852219657, "learning_rate": 4.899798754605112e-09, "loss": 0.1301, "step": 12500 }, { "epoch": 0.9903743315508021, "grad_norm": 1.5954246758334834, "learning_rate": 4.819809810578946e-09, "loss": 0.1808, "step": 12501 }, { "epoch": 0.9904535551594376, "grad_norm": 1.8207106551207466, "learning_rate": 4.740479001076326e-09, "loss": 0.1737, "step": 12502 }, { "epoch": 0.9905327787680729, "grad_norm": 1.6710375648548415, "learning_rate": 4.66180633131752e-09, "loss": 0.1613, "step": 12503 }, { "epoch": 0.9906120023767082, "grad_norm": 1.955536958794469, "learning_rate": 4.583791806485049e-09, "loss": 0.1925, "step": 12504 }, { "epoch": 0.9906912259853436, "grad_norm": 1.9307270194253165, "learning_rate": 4.506435431714806e-09, "loss": 0.1935, "step": 12505 }, { "epoch": 0.990770449593979, "grad_norm": 2.031935580157722, "learning_rate": 4.429737212100493e-09, "loss": 0.186, "step": 12506 }, { "epoch": 0.9908496732026144, "grad_norm": 1.4441024988029327, "learning_rate": 4.353697152692515e-09, "loss": 0.1164, "step": 12507 }, { "epoch": 0.9909288968112497, "grad_norm": 1.4596490498243184, "learning_rate": 4.278315258496868e-09, "loss": 0.1011, "step": 12508 }, { "epoch": 0.9910081204198852, "grad_norm": 1.9955289863048005, "learning_rate": 4.203591534478468e-09, "loss": 0.1808, "step": 12509 }, { "epoch": 0.9910873440285205, "grad_norm": 1.879734776827009, "learning_rate": 4.129525985556715e-09, "loss": 0.1169, "step": 12510 }, { "epoch": 0.9911665676371558, "grad_norm": 1.7022893611376544, "learning_rate": 4.056118616608817e-09, "loss": 0.1102, "step": 12511 }, { "epoch": 0.9912457912457913, "grad_norm": 1.8708370037859146, "learning_rate": 3.9833694324686864e-09, "loss": 0.1633, "step": 12512 }, { "epoch": 0.9913250148544266, "grad_norm": 1.6312338238760893, "learning_rate": 3.9112784379247145e-09, "loss": 0.2489, "step": 12513 }, { "epoch": 0.991404238463062, "grad_norm": 1.1383080596143327, "learning_rate": 3.839845637725326e-09, "loss": 0.0918, "step": 12514 }, { "epoch": 0.9914834620716974, "grad_norm": 1.1507404576028977, "learning_rate": 3.769071036573424e-09, "loss": 0.0815, "step": 12515 }, { "epoch": 0.9915626856803328, "grad_norm": 1.43518976677598, "learning_rate": 3.698954639129726e-09, "loss": 0.1713, "step": 12516 }, { "epoch": 0.9916419092889681, "grad_norm": 1.7404432219927013, "learning_rate": 3.6294964500116492e-09, "loss": 0.1717, "step": 12517 }, { "epoch": 0.9917211328976034, "grad_norm": 1.4518134615651712, "learning_rate": 3.560696473789982e-09, "loss": 0.1663, "step": 12518 }, { "epoch": 0.9918003565062389, "grad_norm": 1.6742078275419632, "learning_rate": 3.4925547149977645e-09, "loss": 0.1444, "step": 12519 }, { "epoch": 0.9918795801148742, "grad_norm": 1.7398603438386264, "learning_rate": 3.425071178120298e-09, "loss": 0.1877, "step": 12520 }, { "epoch": 0.9919588037235096, "grad_norm": 1.1020223644771245, "learning_rate": 3.3582458676018058e-09, "loss": 0.1069, "step": 12521 }, { "epoch": 0.992038027332145, "grad_norm": 1.6777717194486836, "learning_rate": 3.292078787842101e-09, "loss": 0.1614, "step": 12522 }, { "epoch": 0.9921172509407804, "grad_norm": 1.51816524195961, "learning_rate": 3.226569943197699e-09, "loss": 0.1083, "step": 12523 }, { "epoch": 0.9921964745494157, "grad_norm": 1.421678240607462, "learning_rate": 3.1617193379818167e-09, "loss": 0.1169, "step": 12524 }, { "epoch": 0.9922756981580511, "grad_norm": 1.557199992140855, "learning_rate": 3.0975269764654816e-09, "loss": 0.1036, "step": 12525 }, { "epoch": 0.9923549217666865, "grad_norm": 1.983853466937719, "learning_rate": 3.033992862875312e-09, "loss": 0.1805, "step": 12526 }, { "epoch": 0.9924341453753218, "grad_norm": 1.5473320986508203, "learning_rate": 2.9711170013935196e-09, "loss": 0.1455, "step": 12527 }, { "epoch": 0.9925133689839573, "grad_norm": 1.853921675187228, "learning_rate": 2.9088993961612355e-09, "loss": 0.1819, "step": 12528 }, { "epoch": 0.9925925925925926, "grad_norm": 1.2826764830920114, "learning_rate": 2.8473400512762928e-09, "loss": 0.0827, "step": 12529 }, { "epoch": 0.992671816201228, "grad_norm": 1.5361954606160375, "learning_rate": 2.7864389707887853e-09, "loss": 0.1199, "step": 12530 }, { "epoch": 0.9927510398098633, "grad_norm": 1.1710480172134994, "learning_rate": 2.726196158712169e-09, "loss": 0.0775, "step": 12531 }, { "epoch": 0.9928302634184987, "grad_norm": 1.4398788573484873, "learning_rate": 2.66661161901105e-09, "loss": 0.1347, "step": 12532 }, { "epoch": 0.9929094870271341, "grad_norm": 1.5783453334716187, "learning_rate": 2.607685355610068e-09, "loss": 0.208, "step": 12533 }, { "epoch": 0.9929887106357694, "grad_norm": 1.6650130877913982, "learning_rate": 2.549417372388341e-09, "loss": 0.1571, "step": 12534 }, { "epoch": 0.9930679342444049, "grad_norm": 1.7842176243273924, "learning_rate": 2.4918076731828e-09, "loss": 0.1509, "step": 12535 }, { "epoch": 0.9931471578530402, "grad_norm": 1.2611477176380135, "learning_rate": 2.434856261785967e-09, "loss": 0.101, "step": 12536 }, { "epoch": 0.9932263814616756, "grad_norm": 1.5007191209089834, "learning_rate": 2.378563141949286e-09, "loss": 0.1141, "step": 12537 }, { "epoch": 0.993305605070311, "grad_norm": 1.4105135631721666, "learning_rate": 2.322928317378681e-09, "loss": 0.1244, "step": 12538 }, { "epoch": 0.9933848286789463, "grad_norm": 1.680041564849387, "learning_rate": 2.267951791737888e-09, "loss": 0.1411, "step": 12539 }, { "epoch": 0.9934640522875817, "grad_norm": 2.1454745653516962, "learning_rate": 2.213633568646234e-09, "loss": 0.2092, "step": 12540 }, { "epoch": 0.993543275896217, "grad_norm": 1.3865966803231098, "learning_rate": 2.1599736516808577e-09, "loss": 0.1322, "step": 12541 }, { "epoch": 0.9936224995048525, "grad_norm": 1.0853765039685164, "learning_rate": 2.106972044373379e-09, "loss": 0.084, "step": 12542 }, { "epoch": 0.9937017231134878, "grad_norm": 2.5082567214771276, "learning_rate": 2.0546287502165583e-09, "loss": 0.1972, "step": 12543 }, { "epoch": 0.9937809467221231, "grad_norm": 1.515542335321475, "learning_rate": 2.002943772654309e-09, "loss": 0.1388, "step": 12544 }, { "epoch": 0.9938601703307586, "grad_norm": 1.642570156987105, "learning_rate": 1.951917115091684e-09, "loss": 0.1968, "step": 12545 }, { "epoch": 0.9939393939393939, "grad_norm": 1.886974594798946, "learning_rate": 1.901548780887108e-09, "loss": 0.1578, "step": 12546 }, { "epoch": 0.9940186175480293, "grad_norm": 1.537882308490948, "learning_rate": 1.851838773357928e-09, "loss": 0.1622, "step": 12547 }, { "epoch": 0.9940978411566647, "grad_norm": 1.4636521504651094, "learning_rate": 1.8027870957781912e-09, "loss": 0.1073, "step": 12548 }, { "epoch": 0.9941770647653001, "grad_norm": 1.7547111066154795, "learning_rate": 1.7543937513753161e-09, "loss": 0.1672, "step": 12549 }, { "epoch": 0.9942562883739354, "grad_norm": 1.5913788771883455, "learning_rate": 1.7066587433378634e-09, "loss": 0.1306, "step": 12550 }, { "epoch": 0.9943355119825708, "grad_norm": 2.220406713309917, "learning_rate": 1.659582074807764e-09, "loss": 0.1297, "step": 12551 }, { "epoch": 0.9944147355912062, "grad_norm": 1.7678326596030107, "learning_rate": 1.6131637488858708e-09, "loss": 0.1244, "step": 12552 }, { "epoch": 0.9944939591998415, "grad_norm": 1.707166138068885, "learning_rate": 1.5674037686275178e-09, "loss": 0.1464, "step": 12553 }, { "epoch": 0.994573182808477, "grad_norm": 1.4235220696247661, "learning_rate": 1.5223021370458502e-09, "loss": 0.1388, "step": 12554 }, { "epoch": 0.9946524064171123, "grad_norm": 1.2816741157993223, "learning_rate": 1.4778588571107144e-09, "loss": 0.0997, "step": 12555 }, { "epoch": 0.9947316300257477, "grad_norm": 1.8855796186399163, "learning_rate": 1.4340739317497688e-09, "loss": 0.2045, "step": 12556 }, { "epoch": 0.994810853634383, "grad_norm": 2.1874284339007777, "learning_rate": 1.390947363845152e-09, "loss": 0.2436, "step": 12557 }, { "epoch": 0.9948900772430184, "grad_norm": 1.2451765098993242, "learning_rate": 1.3484791562357048e-09, "loss": 0.0917, "step": 12558 }, { "epoch": 0.9949693008516538, "grad_norm": 2.0896669793671085, "learning_rate": 1.3066693117191886e-09, "loss": 0.2073, "step": 12559 }, { "epoch": 0.9950485244602891, "grad_norm": 1.3917184927427793, "learning_rate": 1.2655178330467366e-09, "loss": 0.1704, "step": 12560 }, { "epoch": 0.9951277480689246, "grad_norm": 1.8642197015980146, "learning_rate": 1.2250247229295132e-09, "loss": 0.25, "step": 12561 }, { "epoch": 0.9952069716775599, "grad_norm": 1.1924418422126835, "learning_rate": 1.185189984034274e-09, "loss": 0.1052, "step": 12562 }, { "epoch": 0.9952861952861953, "grad_norm": 1.4368852330984734, "learning_rate": 1.1460136189822556e-09, "loss": 0.0934, "step": 12563 }, { "epoch": 0.9953654188948307, "grad_norm": 1.5812181761570436, "learning_rate": 1.1074956303536165e-09, "loss": 0.156, "step": 12564 }, { "epoch": 0.995444642503466, "grad_norm": 1.2348527043417064, "learning_rate": 1.0696360206852162e-09, "loss": 0.141, "step": 12565 }, { "epoch": 0.9955238661121014, "grad_norm": 1.7238606719674299, "learning_rate": 1.0324347924695055e-09, "loss": 0.1963, "step": 12566 }, { "epoch": 0.9956030897207367, "grad_norm": 1.4707733794748916, "learning_rate": 9.958919481556362e-10, "loss": 0.1342, "step": 12567 }, { "epoch": 0.9956823133293722, "grad_norm": 1.980208072614021, "learning_rate": 9.600074901505718e-10, "loss": 0.1752, "step": 12568 }, { "epoch": 0.9957615369380075, "grad_norm": 2.0886227072375516, "learning_rate": 9.24781420816867e-10, "loss": 0.2134, "step": 12569 }, { "epoch": 0.995840760546643, "grad_norm": 2.049776224692578, "learning_rate": 8.902137424726675e-10, "loss": 0.1645, "step": 12570 }, { "epoch": 0.9959199841552783, "grad_norm": 1.9966983008614256, "learning_rate": 8.56304457396151e-10, "loss": 0.1786, "step": 12571 }, { "epoch": 0.9959992077639136, "grad_norm": 1.9810016644480402, "learning_rate": 8.230535678188656e-10, "loss": 0.1591, "step": 12572 }, { "epoch": 0.996078431372549, "grad_norm": 1.3003271327558548, "learning_rate": 7.904610759312814e-10, "loss": 0.0927, "step": 12573 }, { "epoch": 0.9961576549811844, "grad_norm": 1.7514914233199208, "learning_rate": 7.585269838783494e-10, "loss": 0.2282, "step": 12574 }, { "epoch": 0.9962368785898198, "grad_norm": 1.2762052213663577, "learning_rate": 7.272512937628318e-10, "loss": 0.1158, "step": 12575 }, { "epoch": 0.9963161021984551, "grad_norm": 1.3055602544838694, "learning_rate": 6.966340076441924e-10, "loss": 0.1292, "step": 12576 }, { "epoch": 0.9963953258070906, "grad_norm": 1.486082995247119, "learning_rate": 6.666751275385963e-10, "loss": 0.1282, "step": 12577 }, { "epoch": 0.9964745494157259, "grad_norm": 1.9000829152584406, "learning_rate": 6.3737465542002e-10, "loss": 0.1893, "step": 12578 }, { "epoch": 0.9965537730243612, "grad_norm": 1.6425556885186559, "learning_rate": 6.087325932147003e-10, "loss": 0.1668, "step": 12579 }, { "epoch": 0.9966329966329966, "grad_norm": 1.7921914899086733, "learning_rate": 5.807489428111268e-10, "loss": 0.197, "step": 12580 }, { "epoch": 0.996712220241632, "grad_norm": 1.4539885655148488, "learning_rate": 5.534237060511594e-10, "loss": 0.0958, "step": 12581 }, { "epoch": 0.9967914438502674, "grad_norm": 2.395950895005591, "learning_rate": 5.267568847344695e-10, "loss": 0.1577, "step": 12582 }, { "epoch": 0.9968706674589027, "grad_norm": 1.7147772142600528, "learning_rate": 5.007484806152097e-10, "loss": 0.2076, "step": 12583 }, { "epoch": 0.9969498910675382, "grad_norm": 1.5324147197995575, "learning_rate": 4.753984954086743e-10, "loss": 0.1557, "step": 12584 }, { "epoch": 0.9970291146761735, "grad_norm": 1.5273650241913026, "learning_rate": 4.5070693078130834e-10, "loss": 0.0963, "step": 12585 }, { "epoch": 0.9971083382848088, "grad_norm": 1.5436915232982433, "learning_rate": 4.266737883606986e-10, "loss": 0.1747, "step": 12586 }, { "epoch": 0.9971875618934443, "grad_norm": 1.383404640238586, "learning_rate": 4.0329906972780276e-10, "loss": 0.1302, "step": 12587 }, { "epoch": 0.9972667855020796, "grad_norm": 1.5651438717352404, "learning_rate": 3.805827764236103e-10, "loss": 0.1389, "step": 12588 }, { "epoch": 0.997346009110715, "grad_norm": 1.400690444792526, "learning_rate": 3.585249099435917e-10, "loss": 0.1944, "step": 12589 }, { "epoch": 0.9974252327193504, "grad_norm": 1.6045688483513236, "learning_rate": 3.3712547173769816e-10, "loss": 0.1523, "step": 12590 }, { "epoch": 0.9975044563279858, "grad_norm": 2.0571902335363594, "learning_rate": 3.163844632181334e-10, "loss": 0.1549, "step": 12591 }, { "epoch": 0.9975836799366211, "grad_norm": 1.648080725395552, "learning_rate": 2.963018857493616e-10, "loss": 0.1357, "step": 12592 }, { "epoch": 0.9976629035452564, "grad_norm": 1.3643007072673048, "learning_rate": 2.7687774065254804e-10, "loss": 0.1056, "step": 12593 }, { "epoch": 0.9977421271538919, "grad_norm": 1.4160793206480224, "learning_rate": 2.581120292077799e-10, "loss": 0.1221, "step": 12594 }, { "epoch": 0.9978213507625272, "grad_norm": 1.6120568264091362, "learning_rate": 2.400047526518456e-10, "loss": 0.1564, "step": 12595 }, { "epoch": 0.9979005743711626, "grad_norm": 1.8865861598706208, "learning_rate": 2.2255591217490437e-10, "loss": 0.1651, "step": 12596 }, { "epoch": 0.997979797979798, "grad_norm": 1.5441932077135865, "learning_rate": 2.057655089271471e-10, "loss": 0.0956, "step": 12597 }, { "epoch": 0.9980590215884334, "grad_norm": 2.3684282200725626, "learning_rate": 1.8963354401324575e-10, "loss": 0.2751, "step": 12598 }, { "epoch": 0.9981382451970687, "grad_norm": 2.0902260431451407, "learning_rate": 1.74160018496794e-10, "loss": 0.1698, "step": 12599 }, { "epoch": 0.9982174688057041, "grad_norm": 1.6540591798795525, "learning_rate": 1.593449333947561e-10, "loss": 0.1686, "step": 12600 }, { "epoch": 0.9982966924143395, "grad_norm": 1.7276707569039078, "learning_rate": 1.4518828968523857e-10, "loss": 0.1949, "step": 12601 }, { "epoch": 0.9983759160229748, "grad_norm": 1.4032736567744148, "learning_rate": 1.3169008829749808e-10, "loss": 0.1346, "step": 12602 }, { "epoch": 0.9984551396316103, "grad_norm": 1.5707102377424764, "learning_rate": 1.1885033012193348e-10, "loss": 0.1179, "step": 12603 }, { "epoch": 0.9985343632402456, "grad_norm": 1.5065612911181991, "learning_rate": 1.0666901600453473e-10, "loss": 0.1335, "step": 12604 }, { "epoch": 0.998613586848881, "grad_norm": 1.5593091127511316, "learning_rate": 9.51461467457726e-11, "loss": 0.145, "step": 12605 }, { "epoch": 0.9986928104575163, "grad_norm": 1.467564480822186, "learning_rate": 8.428172310503968e-11, "loss": 0.1495, "step": 12606 }, { "epoch": 0.9987720340661517, "grad_norm": 1.410824828686225, "learning_rate": 7.40757457984298e-11, "loss": 0.116, "step": 12607 }, { "epoch": 0.9988512576747871, "grad_norm": 2.2064869273444403, "learning_rate": 6.452821549651766e-11, "loss": 0.2888, "step": 12608 }, { "epoch": 0.9989304812834224, "grad_norm": 2.261509178509875, "learning_rate": 5.563913282990996e-11, "loss": 0.2347, "step": 12609 }, { "epoch": 0.9990097048920579, "grad_norm": 2.028624806504608, "learning_rate": 4.7408498381473765e-11, "loss": 0.2317, "step": 12610 }, { "epoch": 0.9990889285006932, "grad_norm": 1.8873035241611806, "learning_rate": 3.983631269521837e-11, "loss": 0.1491, "step": 12611 }, { "epoch": 0.9991681521093286, "grad_norm": 1.5129482390371531, "learning_rate": 3.292257626963391e-11, "loss": 0.1418, "step": 12612 }, { "epoch": 0.999247375717964, "grad_norm": 1.3692280142308895, "learning_rate": 2.6667289557691378e-11, "loss": 0.1366, "step": 12613 }, { "epoch": 0.9993265993265993, "grad_norm": 1.5306499111713083, "learning_rate": 2.1070452974614187e-11, "loss": 0.1265, "step": 12614 }, { "epoch": 0.9994058229352347, "grad_norm": 1.9484315086369337, "learning_rate": 1.6132066886775932e-11, "loss": 0.1964, "step": 12615 }, { "epoch": 0.99948504654387, "grad_norm": 1.104251280544726, "learning_rate": 1.1852131619471963e-11, "loss": 0.0761, "step": 12616 }, { "epoch": 0.9995642701525055, "grad_norm": 1.8596185762158755, "learning_rate": 8.230647454698926e-12, "loss": 0.1732, "step": 12617 }, { "epoch": 0.9996434937611408, "grad_norm": 1.907338930572874, "learning_rate": 5.267614631154772e-12, "loss": 0.2302, "step": 12618 }, { "epoch": 0.9997227173697762, "grad_norm": 1.369808211921963, "learning_rate": 2.9630333442387525e-12, "loss": 0.104, "step": 12619 }, { "epoch": 0.9998019409784116, "grad_norm": 1.8195352866791545, "learning_rate": 1.3169037449412004e-12, "loss": 0.1904, "step": 12620 }, { "epoch": 0.9998811645870469, "grad_norm": 1.8033316327087292, "learning_rate": 3.29225942063971e-13, "loss": 0.1548, "step": 12621 }, { "epoch": 0.9999603881956823, "grad_norm": 1.6205011021565534, "learning_rate": 0.0, "loss": 0.181, "step": 12622 }, { "epoch": 0.9999603881956823, "step": 12622, "total_flos": 1046427608678400.0, "train_loss": 0.28040552918104605, "train_runtime": 53940.9723, "train_samples_per_second": 29.952, "train_steps_per_second": 0.234 } ], "logging_steps": 1.0, "max_steps": 12622, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1046427608678400.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }