{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9996863729026186, "eval_steps": 500, "global_step": 3188, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006272541947624275, "grad_norm": 14.32480525970459, "learning_rate": 4.1666666666666667e-07, "loss": 1.1353, "step": 1 }, { "epoch": 0.001254508389524855, "grad_norm": 16.002328872680664, "learning_rate": 8.333333333333333e-07, "loss": 1.1649, "step": 2 }, { "epoch": 0.0018817625842872823, "grad_norm": 15.3951416015625, "learning_rate": 1.25e-06, "loss": 1.1479, "step": 3 }, { "epoch": 0.00250901677904971, "grad_norm": 17.10584259033203, "learning_rate": 1.6666666666666667e-06, "loss": 1.0933, "step": 4 }, { "epoch": 0.0031362709738121373, "grad_norm": 12.109578132629395, "learning_rate": 2.0833333333333334e-06, "loss": 1.0699, "step": 5 }, { "epoch": 0.0037635251685745647, "grad_norm": 8.512700080871582, "learning_rate": 2.5e-06, "loss": 0.9948, "step": 6 }, { "epoch": 0.0043907793633369925, "grad_norm": 8.072388648986816, "learning_rate": 2.916666666666667e-06, "loss": 1.0097, "step": 7 }, { "epoch": 0.00501803355809942, "grad_norm": 4.751190662384033, "learning_rate": 3.3333333333333333e-06, "loss": 0.9145, "step": 8 }, { "epoch": 0.005645287752861847, "grad_norm": 5.013059139251709, "learning_rate": 3.7500000000000005e-06, "loss": 0.9029, "step": 9 }, { "epoch": 0.006272541947624275, "grad_norm": 5.35429048538208, "learning_rate": 4.166666666666667e-06, "loss": 0.8575, "step": 10 }, { "epoch": 0.006899796142386702, "grad_norm": 3.0817456245422363, "learning_rate": 4.583333333333333e-06, "loss": 0.807, "step": 11 }, { "epoch": 0.007527050337149129, "grad_norm": 2.9373440742492676, "learning_rate": 5e-06, "loss": 0.7738, "step": 12 }, { "epoch": 0.008154304531911558, "grad_norm": 3.1255123615264893, "learning_rate": 5.416666666666667e-06, "loss": 0.717, "step": 13 }, { "epoch": 0.008781558726673985, "grad_norm": 2.4374232292175293, "learning_rate": 5.833333333333334e-06, "loss": 0.6959, "step": 14 }, { "epoch": 0.009408812921436412, "grad_norm": 3.297788619995117, "learning_rate": 6.25e-06, "loss": 0.7199, "step": 15 }, { "epoch": 0.01003606711619884, "grad_norm": 3.0335464477539062, "learning_rate": 6.666666666666667e-06, "loss": 0.6926, "step": 16 }, { "epoch": 0.010663321310961267, "grad_norm": 2.9025216102600098, "learning_rate": 7.083333333333335e-06, "loss": 0.6634, "step": 17 }, { "epoch": 0.011290575505723694, "grad_norm": 2.294412136077881, "learning_rate": 7.500000000000001e-06, "loss": 0.6045, "step": 18 }, { "epoch": 0.011917829700486122, "grad_norm": 2.4451427459716797, "learning_rate": 7.916666666666667e-06, "loss": 0.6442, "step": 19 }, { "epoch": 0.01254508389524855, "grad_norm": 2.1892669200897217, "learning_rate": 8.333333333333334e-06, "loss": 0.6858, "step": 20 }, { "epoch": 0.013172338090010977, "grad_norm": 2.2129485607147217, "learning_rate": 8.750000000000001e-06, "loss": 0.6401, "step": 21 }, { "epoch": 0.013799592284773404, "grad_norm": 2.105389356613159, "learning_rate": 9.166666666666666e-06, "loss": 0.6603, "step": 22 }, { "epoch": 0.014426846479535831, "grad_norm": 2.1527175903320312, "learning_rate": 9.583333333333335e-06, "loss": 0.5991, "step": 23 }, { "epoch": 0.015054100674298259, "grad_norm": 2.1053991317749023, "learning_rate": 1e-05, "loss": 0.6137, "step": 24 }, { "epoch": 0.015681354869060686, "grad_norm": 2.17954683303833, "learning_rate": 1.0416666666666668e-05, "loss": 0.6025, "step": 25 }, { "epoch": 0.016308609063823115, "grad_norm": 1.9039045572280884, "learning_rate": 1.0833333333333334e-05, "loss": 0.5373, "step": 26 }, { "epoch": 0.01693586325858554, "grad_norm": 3.1504151821136475, "learning_rate": 1.125e-05, "loss": 0.5966, "step": 27 }, { "epoch": 0.01756311745334797, "grad_norm": 2.2774252891540527, "learning_rate": 1.1666666666666668e-05, "loss": 0.6107, "step": 28 }, { "epoch": 0.018190371648110396, "grad_norm": 1.9291549921035767, "learning_rate": 1.2083333333333333e-05, "loss": 0.5637, "step": 29 }, { "epoch": 0.018817625842872825, "grad_norm": 2.0674891471862793, "learning_rate": 1.25e-05, "loss": 0.5801, "step": 30 }, { "epoch": 0.01944488003763525, "grad_norm": 2.1760141849517822, "learning_rate": 1.2916666666666668e-05, "loss": 0.5665, "step": 31 }, { "epoch": 0.02007213423239768, "grad_norm": 2.295644998550415, "learning_rate": 1.3333333333333333e-05, "loss": 0.6056, "step": 32 }, { "epoch": 0.020699388427160105, "grad_norm": 2.258507013320923, "learning_rate": 1.375e-05, "loss": 0.5676, "step": 33 }, { "epoch": 0.021326642621922534, "grad_norm": 2.356708526611328, "learning_rate": 1.416666666666667e-05, "loss": 0.5353, "step": 34 }, { "epoch": 0.021953896816684963, "grad_norm": 2.042067766189575, "learning_rate": 1.4583333333333333e-05, "loss": 0.5116, "step": 35 }, { "epoch": 0.02258115101144739, "grad_norm": 2.107545852661133, "learning_rate": 1.5000000000000002e-05, "loss": 0.5721, "step": 36 }, { "epoch": 0.023208405206209818, "grad_norm": 2.1624722480773926, "learning_rate": 1.5416666666666668e-05, "loss": 0.5174, "step": 37 }, { "epoch": 0.023835659400972244, "grad_norm": 2.340648889541626, "learning_rate": 1.5833333333333333e-05, "loss": 0.5976, "step": 38 }, { "epoch": 0.024462913595734673, "grad_norm": 2.218324661254883, "learning_rate": 1.6250000000000002e-05, "loss": 0.5678, "step": 39 }, { "epoch": 0.0250901677904971, "grad_norm": 2.8050646781921387, "learning_rate": 1.6666666666666667e-05, "loss": 0.594, "step": 40 }, { "epoch": 0.025717421985259527, "grad_norm": 2.405874013900757, "learning_rate": 1.7083333333333333e-05, "loss": 0.5582, "step": 41 }, { "epoch": 0.026344676180021953, "grad_norm": 2.1661484241485596, "learning_rate": 1.7500000000000002e-05, "loss": 0.5674, "step": 42 }, { "epoch": 0.026971930374784382, "grad_norm": 2.0582382678985596, "learning_rate": 1.7916666666666667e-05, "loss": 0.5784, "step": 43 }, { "epoch": 0.027599184569546808, "grad_norm": 2.0946953296661377, "learning_rate": 1.8333333333333333e-05, "loss": 0.5104, "step": 44 }, { "epoch": 0.028226438764309237, "grad_norm": 2.11618971824646, "learning_rate": 1.8750000000000002e-05, "loss": 0.5508, "step": 45 }, { "epoch": 0.028853692959071663, "grad_norm": 2.330487012863159, "learning_rate": 1.916666666666667e-05, "loss": 0.5452, "step": 46 }, { "epoch": 0.02948094715383409, "grad_norm": 2.367173194885254, "learning_rate": 1.9583333333333333e-05, "loss": 0.5368, "step": 47 }, { "epoch": 0.030108201348596517, "grad_norm": 2.389894723892212, "learning_rate": 2e-05, "loss": 0.5093, "step": 48 }, { "epoch": 0.030735455543358946, "grad_norm": 2.0740127563476562, "learning_rate": 2.0416666666666667e-05, "loss": 0.5411, "step": 49 }, { "epoch": 0.03136270973812137, "grad_norm": 2.1907505989074707, "learning_rate": 2.0833333333333336e-05, "loss": 0.5201, "step": 50 }, { "epoch": 0.0319899639328838, "grad_norm": 2.1322927474975586, "learning_rate": 2.125e-05, "loss": 0.5064, "step": 51 }, { "epoch": 0.03261721812764623, "grad_norm": 2.274514675140381, "learning_rate": 2.1666666666666667e-05, "loss": 0.5398, "step": 52 }, { "epoch": 0.03324447232240866, "grad_norm": 2.8248438835144043, "learning_rate": 2.2083333333333336e-05, "loss": 0.5083, "step": 53 }, { "epoch": 0.03387172651717108, "grad_norm": 2.0930676460266113, "learning_rate": 2.25e-05, "loss": 0.5392, "step": 54 }, { "epoch": 0.03449898071193351, "grad_norm": 2.262333869934082, "learning_rate": 2.2916666666666667e-05, "loss": 0.5642, "step": 55 }, { "epoch": 0.03512623490669594, "grad_norm": 2.224430799484253, "learning_rate": 2.3333333333333336e-05, "loss": 0.5438, "step": 56 }, { "epoch": 0.03575348910145837, "grad_norm": 2.2733497619628906, "learning_rate": 2.375e-05, "loss": 0.5178, "step": 57 }, { "epoch": 0.03638074329622079, "grad_norm": 2.2176079750061035, "learning_rate": 2.4166666666666667e-05, "loss": 0.5658, "step": 58 }, { "epoch": 0.03700799749098322, "grad_norm": 2.0688469409942627, "learning_rate": 2.4583333333333336e-05, "loss": 0.5296, "step": 59 }, { "epoch": 0.03763525168574565, "grad_norm": 2.167177677154541, "learning_rate": 2.5e-05, "loss": 0.5691, "step": 60 }, { "epoch": 0.03826250588050808, "grad_norm": 2.094510078430176, "learning_rate": 2.5416666666666667e-05, "loss": 0.5326, "step": 61 }, { "epoch": 0.0388897600752705, "grad_norm": 2.416938066482544, "learning_rate": 2.5833333333333336e-05, "loss": 0.547, "step": 62 }, { "epoch": 0.03951701427003293, "grad_norm": 2.5658774375915527, "learning_rate": 2.625e-05, "loss": 0.5641, "step": 63 }, { "epoch": 0.04014426846479536, "grad_norm": 3.0288596153259277, "learning_rate": 2.6666666666666667e-05, "loss": 0.532, "step": 64 }, { "epoch": 0.04077152265955779, "grad_norm": 2.6260979175567627, "learning_rate": 2.7083333333333335e-05, "loss": 0.52, "step": 65 }, { "epoch": 0.04139877685432021, "grad_norm": 2.6695799827575684, "learning_rate": 2.75e-05, "loss": 0.5547, "step": 66 }, { "epoch": 0.04202603104908264, "grad_norm": 2.1383681297302246, "learning_rate": 2.7916666666666666e-05, "loss": 0.5475, "step": 67 }, { "epoch": 0.04265328524384507, "grad_norm": 2.1226611137390137, "learning_rate": 2.833333333333334e-05, "loss": 0.5785, "step": 68 }, { "epoch": 0.0432805394386075, "grad_norm": 2.173794746398926, "learning_rate": 2.875e-05, "loss": 0.5368, "step": 69 }, { "epoch": 0.043907793633369926, "grad_norm": 1.7689262628555298, "learning_rate": 2.9166666666666666e-05, "loss": 0.5106, "step": 70 }, { "epoch": 0.04453504782813235, "grad_norm": 2.331559419631958, "learning_rate": 2.958333333333334e-05, "loss": 0.5657, "step": 71 }, { "epoch": 0.04516230202289478, "grad_norm": 2.389425754547119, "learning_rate": 3.0000000000000004e-05, "loss": 0.5645, "step": 72 }, { "epoch": 0.04578955621765721, "grad_norm": 2.035191297531128, "learning_rate": 3.0416666666666666e-05, "loss": 0.5482, "step": 73 }, { "epoch": 0.046416810412419636, "grad_norm": 2.112544059753418, "learning_rate": 3.0833333333333335e-05, "loss": 0.577, "step": 74 }, { "epoch": 0.04704406460718206, "grad_norm": 1.9131213426589966, "learning_rate": 3.125e-05, "loss": 0.5463, "step": 75 }, { "epoch": 0.04767131880194449, "grad_norm": 1.978047490119934, "learning_rate": 3.1666666666666666e-05, "loss": 0.5304, "step": 76 }, { "epoch": 0.048298572996706916, "grad_norm": 2.5002808570861816, "learning_rate": 3.208333333333334e-05, "loss": 0.5329, "step": 77 }, { "epoch": 0.048925827191469345, "grad_norm": 2.056502103805542, "learning_rate": 3.2500000000000004e-05, "loss": 0.5628, "step": 78 }, { "epoch": 0.04955308138623177, "grad_norm": 2.020198106765747, "learning_rate": 3.291666666666667e-05, "loss": 0.569, "step": 79 }, { "epoch": 0.0501803355809942, "grad_norm": 2.3168647289276123, "learning_rate": 3.3333333333333335e-05, "loss": 0.5478, "step": 80 }, { "epoch": 0.050807589775756626, "grad_norm": 2.414154529571533, "learning_rate": 3.375e-05, "loss": 0.5546, "step": 81 }, { "epoch": 0.051434843970519055, "grad_norm": 1.8864158391952515, "learning_rate": 3.4166666666666666e-05, "loss": 0.5245, "step": 82 }, { "epoch": 0.05206209816528148, "grad_norm": 2.200151205062866, "learning_rate": 3.458333333333334e-05, "loss": 0.4861, "step": 83 }, { "epoch": 0.052689352360043906, "grad_norm": 2.844407558441162, "learning_rate": 3.5000000000000004e-05, "loss": 0.6039, "step": 84 }, { "epoch": 0.053316606554806335, "grad_norm": 2.2177329063415527, "learning_rate": 3.541666666666667e-05, "loss": 0.5096, "step": 85 }, { "epoch": 0.053943860749568764, "grad_norm": 1.8765463829040527, "learning_rate": 3.5833333333333335e-05, "loss": 0.5152, "step": 86 }, { "epoch": 0.054571114944331194, "grad_norm": 2.0037007331848145, "learning_rate": 3.625e-05, "loss": 0.5499, "step": 87 }, { "epoch": 0.055198369139093616, "grad_norm": 2.07822322845459, "learning_rate": 3.6666666666666666e-05, "loss": 0.5642, "step": 88 }, { "epoch": 0.055825623333856045, "grad_norm": 1.2361040115356445, "learning_rate": 3.708333333333334e-05, "loss": 0.4945, "step": 89 }, { "epoch": 0.056452877528618474, "grad_norm": 2.390566349029541, "learning_rate": 3.7500000000000003e-05, "loss": 0.5, "step": 90 }, { "epoch": 0.0570801317233809, "grad_norm": 2.0272109508514404, "learning_rate": 3.791666666666667e-05, "loss": 0.5207, "step": 91 }, { "epoch": 0.057707385918143325, "grad_norm": 2.093296527862549, "learning_rate": 3.833333333333334e-05, "loss": 0.5015, "step": 92 }, { "epoch": 0.058334640112905754, "grad_norm": 1.8016709089279175, "learning_rate": 3.875e-05, "loss": 0.5522, "step": 93 }, { "epoch": 0.05896189430766818, "grad_norm": 1.7712875604629517, "learning_rate": 3.9166666666666665e-05, "loss": 0.5239, "step": 94 }, { "epoch": 0.05958914850243061, "grad_norm": 2.0883615016937256, "learning_rate": 3.958333333333334e-05, "loss": 0.5488, "step": 95 }, { "epoch": 0.060216402697193035, "grad_norm": 1.9757493734359741, "learning_rate": 4e-05, "loss": 0.542, "step": 96 }, { "epoch": 0.060843656891955464, "grad_norm": 2.7352821826934814, "learning_rate": 3.999998967664797e-05, "loss": 0.5771, "step": 97 }, { "epoch": 0.06147091108671789, "grad_norm": 1.8274654150009155, "learning_rate": 3.999995870660251e-05, "loss": 0.5859, "step": 98 }, { "epoch": 0.06209816528148032, "grad_norm": 1.8539522886276245, "learning_rate": 3.99999070898956e-05, "loss": 0.5153, "step": 99 }, { "epoch": 0.06272541947624274, "grad_norm": 2.2877085208892822, "learning_rate": 3.999983482658054e-05, "loss": 0.5611, "step": 100 }, { "epoch": 0.06335267367100518, "grad_norm": 2.228724718093872, "learning_rate": 3.99997419167319e-05, "loss": 0.5685, "step": 101 }, { "epoch": 0.0639799278657676, "grad_norm": 2.013684034347534, "learning_rate": 3.999962836044563e-05, "loss": 0.4979, "step": 102 }, { "epoch": 0.06460718206053002, "grad_norm": 2.16192889213562, "learning_rate": 3.999949415783893e-05, "loss": 0.5113, "step": 103 }, { "epoch": 0.06523443625529246, "grad_norm": 1.7808058261871338, "learning_rate": 3.9999339309050354e-05, "loss": 0.5392, "step": 104 }, { "epoch": 0.06586169045005488, "grad_norm": 1.9338231086730957, "learning_rate": 3.999916381423976e-05, "loss": 0.5294, "step": 105 }, { "epoch": 0.06648894464481732, "grad_norm": 1.8520095348358154, "learning_rate": 3.999896767358832e-05, "loss": 0.5084, "step": 106 }, { "epoch": 0.06711619883957974, "grad_norm": 2.6990466117858887, "learning_rate": 3.99987508872985e-05, "loss": 0.5662, "step": 107 }, { "epoch": 0.06774345303434216, "grad_norm": 2.1028778553009033, "learning_rate": 3.99985134555941e-05, "loss": 0.5066, "step": 108 }, { "epoch": 0.0683707072291046, "grad_norm": 2.1288583278656006, "learning_rate": 3.999825537872025e-05, "loss": 0.5326, "step": 109 }, { "epoch": 0.06899796142386702, "grad_norm": 2.033177375793457, "learning_rate": 3.999797665694335e-05, "loss": 0.5255, "step": 110 }, { "epoch": 0.06962521561862944, "grad_norm": 1.5184273719787598, "learning_rate": 3.999767729055115e-05, "loss": 0.5342, "step": 111 }, { "epoch": 0.07025246981339188, "grad_norm": 2.1982336044311523, "learning_rate": 3.999735727985268e-05, "loss": 0.5264, "step": 112 }, { "epoch": 0.0708797240081543, "grad_norm": 2.133439302444458, "learning_rate": 3.999701662517831e-05, "loss": 0.5293, "step": 113 }, { "epoch": 0.07150697820291674, "grad_norm": 2.059997320175171, "learning_rate": 3.99966553268797e-05, "loss": 0.5311, "step": 114 }, { "epoch": 0.07213423239767916, "grad_norm": 2.0948469638824463, "learning_rate": 3.9996273385329845e-05, "loss": 0.5141, "step": 115 }, { "epoch": 0.07276148659244158, "grad_norm": 1.8727495670318604, "learning_rate": 3.999587080092303e-05, "loss": 0.4775, "step": 116 }, { "epoch": 0.07338874078720402, "grad_norm": 1.7768906354904175, "learning_rate": 3.999544757407485e-05, "loss": 0.4938, "step": 117 }, { "epoch": 0.07401599498196644, "grad_norm": 1.797930121421814, "learning_rate": 3.9995003705222224e-05, "loss": 0.4991, "step": 118 }, { "epoch": 0.07464324917672886, "grad_norm": 1.623299479484558, "learning_rate": 3.9994539194823376e-05, "loss": 0.4888, "step": 119 }, { "epoch": 0.0752705033714913, "grad_norm": 1.712273359298706, "learning_rate": 3.999405404335783e-05, "loss": 0.4958, "step": 120 }, { "epoch": 0.07589775756625372, "grad_norm": 1.6322563886642456, "learning_rate": 3.999354825132644e-05, "loss": 0.5035, "step": 121 }, { "epoch": 0.07652501176101616, "grad_norm": 1.5734409093856812, "learning_rate": 3.999302181925133e-05, "loss": 0.5249, "step": 122 }, { "epoch": 0.07715226595577858, "grad_norm": 2.058497428894043, "learning_rate": 3.9992474747675965e-05, "loss": 0.5335, "step": 123 }, { "epoch": 0.077779520150541, "grad_norm": 1.8265260457992554, "learning_rate": 3.999190703716511e-05, "loss": 0.5389, "step": 124 }, { "epoch": 0.07840677434530344, "grad_norm": 1.323286533355713, "learning_rate": 3.999131868830482e-05, "loss": 0.4437, "step": 125 }, { "epoch": 0.07903402854006586, "grad_norm": 1.5716819763183594, "learning_rate": 3.999070970170249e-05, "loss": 0.5257, "step": 126 }, { "epoch": 0.0796612827348283, "grad_norm": 1.7783918380737305, "learning_rate": 3.999008007798678e-05, "loss": 0.5242, "step": 127 }, { "epoch": 0.08028853692959072, "grad_norm": 1.7182682752609253, "learning_rate": 3.9989429817807674e-05, "loss": 0.5095, "step": 128 }, { "epoch": 0.08091579112435314, "grad_norm": 1.639614462852478, "learning_rate": 3.998875892183647e-05, "loss": 0.5386, "step": 129 }, { "epoch": 0.08154304531911558, "grad_norm": 1.953802466392517, "learning_rate": 3.998806739076575e-05, "loss": 0.5062, "step": 130 }, { "epoch": 0.082170299513878, "grad_norm": 1.7502291202545166, "learning_rate": 3.998735522530941e-05, "loss": 0.5126, "step": 131 }, { "epoch": 0.08279755370864042, "grad_norm": 1.5992299318313599, "learning_rate": 3.9986622426202626e-05, "loss": 0.4931, "step": 132 }, { "epoch": 0.08342480790340286, "grad_norm": 1.9821819067001343, "learning_rate": 3.998586899420192e-05, "loss": 0.5361, "step": 133 }, { "epoch": 0.08405206209816528, "grad_norm": 1.9804344177246094, "learning_rate": 3.998509493008506e-05, "loss": 0.5656, "step": 134 }, { "epoch": 0.08467931629292771, "grad_norm": 2.0142171382904053, "learning_rate": 3.998430023465116e-05, "loss": 0.5339, "step": 135 }, { "epoch": 0.08530657048769014, "grad_norm": 1.8212288618087769, "learning_rate": 3.99834849087206e-05, "loss": 0.5231, "step": 136 }, { "epoch": 0.08593382468245256, "grad_norm": 2.456505298614502, "learning_rate": 3.9982648953135076e-05, "loss": 0.5488, "step": 137 }, { "epoch": 0.086561078877215, "grad_norm": 2.4705069065093994, "learning_rate": 3.998179236875757e-05, "loss": 0.5157, "step": 138 }, { "epoch": 0.08718833307197742, "grad_norm": 2.2800309658050537, "learning_rate": 3.9980915156472366e-05, "loss": 0.4889, "step": 139 }, { "epoch": 0.08781558726673985, "grad_norm": 1.8949161767959595, "learning_rate": 3.998001731718505e-05, "loss": 0.5009, "step": 140 }, { "epoch": 0.08844284146150228, "grad_norm": 1.7371482849121094, "learning_rate": 3.997909885182248e-05, "loss": 0.5241, "step": 141 }, { "epoch": 0.0890700956562647, "grad_norm": 2.2259998321533203, "learning_rate": 3.997815976133282e-05, "loss": 0.4674, "step": 142 }, { "epoch": 0.08969734985102713, "grad_norm": 2.1841187477111816, "learning_rate": 3.9977200046685536e-05, "loss": 0.4897, "step": 143 }, { "epoch": 0.09032460404578956, "grad_norm": 1.913406491279602, "learning_rate": 3.997621970887137e-05, "loss": 0.504, "step": 144 }, { "epoch": 0.09095185824055198, "grad_norm": 1.6738669872283936, "learning_rate": 3.9975218748902356e-05, "loss": 0.5081, "step": 145 }, { "epoch": 0.09157911243531441, "grad_norm": 2.280686378479004, "learning_rate": 3.997419716781183e-05, "loss": 0.5569, "step": 146 }, { "epoch": 0.09220636663007684, "grad_norm": 2.0637691020965576, "learning_rate": 3.9973154966654396e-05, "loss": 0.5153, "step": 147 }, { "epoch": 0.09283362082483927, "grad_norm": 1.6154152154922485, "learning_rate": 3.997209214650595e-05, "loss": 0.5022, "step": 148 }, { "epoch": 0.0934608750196017, "grad_norm": 1.67239511013031, "learning_rate": 3.99710087084637e-05, "loss": 0.4846, "step": 149 }, { "epoch": 0.09408812921436412, "grad_norm": 1.6062970161437988, "learning_rate": 3.99699046536461e-05, "loss": 0.5185, "step": 150 }, { "epoch": 0.09471538340912655, "grad_norm": 1.733093500137329, "learning_rate": 3.99687799831929e-05, "loss": 0.5387, "step": 151 }, { "epoch": 0.09534263760388897, "grad_norm": 1.7517509460449219, "learning_rate": 3.996763469826516e-05, "loss": 0.5322, "step": 152 }, { "epoch": 0.0959698917986514, "grad_norm": 1.6479005813598633, "learning_rate": 3.996646880004518e-05, "loss": 0.5165, "step": 153 }, { "epoch": 0.09659714599341383, "grad_norm": 1.7011340856552124, "learning_rate": 3.9965282289736557e-05, "loss": 0.5157, "step": 154 }, { "epoch": 0.09722440018817625, "grad_norm": 1.596718668937683, "learning_rate": 3.996407516856418e-05, "loss": 0.517, "step": 155 }, { "epoch": 0.09785165438293869, "grad_norm": 2.0577502250671387, "learning_rate": 3.996284743777419e-05, "loss": 0.5421, "step": 156 }, { "epoch": 0.09847890857770111, "grad_norm": 1.754554033279419, "learning_rate": 3.996159909863402e-05, "loss": 0.493, "step": 157 }, { "epoch": 0.09910616277246354, "grad_norm": 1.9648163318634033, "learning_rate": 3.996033015243238e-05, "loss": 0.5173, "step": 158 }, { "epoch": 0.09973341696722597, "grad_norm": 2.0128462314605713, "learning_rate": 3.995904060047924e-05, "loss": 0.4702, "step": 159 }, { "epoch": 0.1003606711619884, "grad_norm": 1.6790200471878052, "learning_rate": 3.995773044410585e-05, "loss": 0.4902, "step": 160 }, { "epoch": 0.10098792535675083, "grad_norm": 1.7891554832458496, "learning_rate": 3.995639968466475e-05, "loss": 0.4998, "step": 161 }, { "epoch": 0.10161517955151325, "grad_norm": 1.5091252326965332, "learning_rate": 3.99550483235297e-05, "loss": 0.4433, "step": 162 }, { "epoch": 0.10224243374627567, "grad_norm": 1.6836857795715332, "learning_rate": 3.9953676362095774e-05, "loss": 0.5127, "step": 163 }, { "epoch": 0.10286968794103811, "grad_norm": 1.5248311758041382, "learning_rate": 3.99522838017793e-05, "loss": 0.4925, "step": 164 }, { "epoch": 0.10349694213580053, "grad_norm": 1.7744476795196533, "learning_rate": 3.9950870644017854e-05, "loss": 0.4807, "step": 165 }, { "epoch": 0.10412419633056295, "grad_norm": 2.1345975399017334, "learning_rate": 3.99494368902703e-05, "loss": 0.5226, "step": 166 }, { "epoch": 0.10475145052532539, "grad_norm": 1.655029535293579, "learning_rate": 3.9947982542016744e-05, "loss": 0.5134, "step": 167 }, { "epoch": 0.10537870472008781, "grad_norm": 1.9914706945419312, "learning_rate": 3.9946507600758565e-05, "loss": 0.5149, "step": 168 }, { "epoch": 0.10600595891485025, "grad_norm": 1.765305757522583, "learning_rate": 3.9945012068018396e-05, "loss": 0.4751, "step": 169 }, { "epoch": 0.10663321310961267, "grad_norm": 1.4296326637268066, "learning_rate": 3.9943495945340134e-05, "loss": 0.4881, "step": 170 }, { "epoch": 0.10726046730437509, "grad_norm": 1.626119613647461, "learning_rate": 3.994195923428891e-05, "loss": 0.4823, "step": 171 }, { "epoch": 0.10788772149913753, "grad_norm": 1.6655110120773315, "learning_rate": 3.994040193645114e-05, "loss": 0.4852, "step": 172 }, { "epoch": 0.10851497569389995, "grad_norm": 1.44282066822052, "learning_rate": 3.993882405343448e-05, "loss": 0.4823, "step": 173 }, { "epoch": 0.10914222988866239, "grad_norm": 1.7007629871368408, "learning_rate": 3.9937225586867824e-05, "loss": 0.4787, "step": 174 }, { "epoch": 0.10976948408342481, "grad_norm": 1.6929346323013306, "learning_rate": 3.993560653840133e-05, "loss": 0.4838, "step": 175 }, { "epoch": 0.11039673827818723, "grad_norm": 1.8040822744369507, "learning_rate": 3.993396690970639e-05, "loss": 0.5002, "step": 176 }, { "epoch": 0.11102399247294967, "grad_norm": 1.395953893661499, "learning_rate": 3.9932306702475655e-05, "loss": 0.4543, "step": 177 }, { "epoch": 0.11165124666771209, "grad_norm": 2.022174119949341, "learning_rate": 3.993062591842303e-05, "loss": 0.4643, "step": 178 }, { "epoch": 0.11227850086247451, "grad_norm": 1.7045681476593018, "learning_rate": 3.9928924559283626e-05, "loss": 0.5578, "step": 179 }, { "epoch": 0.11290575505723695, "grad_norm": 1.59603750705719, "learning_rate": 3.992720262681383e-05, "loss": 0.4858, "step": 180 }, { "epoch": 0.11353300925199937, "grad_norm": 1.837712287902832, "learning_rate": 3.992546012279125e-05, "loss": 0.5204, "step": 181 }, { "epoch": 0.1141602634467618, "grad_norm": 1.828675389289856, "learning_rate": 3.9923697049014724e-05, "loss": 0.4654, "step": 182 }, { "epoch": 0.11478751764152423, "grad_norm": 2.048175811767578, "learning_rate": 3.992191340730435e-05, "loss": 0.4729, "step": 183 }, { "epoch": 0.11541477183628665, "grad_norm": 1.7749079465866089, "learning_rate": 3.9920109199501435e-05, "loss": 0.4678, "step": 184 }, { "epoch": 0.11604202603104909, "grad_norm": 2.1206958293914795, "learning_rate": 3.9918284427468536e-05, "loss": 0.508, "step": 185 }, { "epoch": 0.11666928022581151, "grad_norm": 1.678009271621704, "learning_rate": 3.991643909308942e-05, "loss": 0.5016, "step": 186 }, { "epoch": 0.11729653442057393, "grad_norm": 1.4095009565353394, "learning_rate": 3.9914573198269086e-05, "loss": 0.4784, "step": 187 }, { "epoch": 0.11792378861533637, "grad_norm": 1.7792853116989136, "learning_rate": 3.991268674493378e-05, "loss": 0.4687, "step": 188 }, { "epoch": 0.11855104281009879, "grad_norm": 1.4806967973709106, "learning_rate": 3.991077973503094e-05, "loss": 0.4812, "step": 189 }, { "epoch": 0.11917829700486123, "grad_norm": 1.5337979793548584, "learning_rate": 3.990885217052925e-05, "loss": 0.4738, "step": 190 }, { "epoch": 0.11980555119962365, "grad_norm": 1.493370771408081, "learning_rate": 3.990690405341859e-05, "loss": 0.4698, "step": 191 }, { "epoch": 0.12043280539438607, "grad_norm": 1.6235061883926392, "learning_rate": 3.990493538571009e-05, "loss": 0.4647, "step": 192 }, { "epoch": 0.1210600595891485, "grad_norm": 1.65871000289917, "learning_rate": 3.9902946169436045e-05, "loss": 0.4461, "step": 193 }, { "epoch": 0.12168731378391093, "grad_norm": 1.5810375213623047, "learning_rate": 3.9900936406650016e-05, "loss": 0.4626, "step": 194 }, { "epoch": 0.12231456797867336, "grad_norm": 1.635001301765442, "learning_rate": 3.989890609942674e-05, "loss": 0.4851, "step": 195 }, { "epoch": 0.12294182217343579, "grad_norm": 1.480832576751709, "learning_rate": 3.989685524986219e-05, "loss": 0.465, "step": 196 }, { "epoch": 0.12356907636819821, "grad_norm": 1.6289597749710083, "learning_rate": 3.989478386007351e-05, "loss": 0.4996, "step": 197 }, { "epoch": 0.12419633056296064, "grad_norm": 1.557519793510437, "learning_rate": 3.989269193219909e-05, "loss": 0.425, "step": 198 }, { "epoch": 0.12482358475772307, "grad_norm": 1.946897029876709, "learning_rate": 3.989057946839848e-05, "loss": 0.4981, "step": 199 }, { "epoch": 0.1254508389524855, "grad_norm": 1.511824369430542, "learning_rate": 3.9888446470852464e-05, "loss": 0.4981, "step": 200 }, { "epoch": 0.1260780931472479, "grad_norm": 1.5527335405349731, "learning_rate": 3.988629294176301e-05, "loss": 0.479, "step": 201 }, { "epoch": 0.12670534734201036, "grad_norm": 1.5926644802093506, "learning_rate": 3.9884118883353275e-05, "loss": 0.438, "step": 202 }, { "epoch": 0.12733260153677278, "grad_norm": 1.6053894758224487, "learning_rate": 3.988192429786762e-05, "loss": 0.4984, "step": 203 }, { "epoch": 0.1279598557315352, "grad_norm": 1.9284647703170776, "learning_rate": 3.987970918757159e-05, "loss": 0.4753, "step": 204 }, { "epoch": 0.12858710992629763, "grad_norm": 1.6620078086853027, "learning_rate": 3.9877473554751935e-05, "loss": 0.4458, "step": 205 }, { "epoch": 0.12921436412106005, "grad_norm": 1.523679256439209, "learning_rate": 3.9875217401716557e-05, "loss": 0.477, "step": 206 }, { "epoch": 0.1298416183158225, "grad_norm": 1.6295181512832642, "learning_rate": 3.987294073079457e-05, "loss": 0.5062, "step": 207 }, { "epoch": 0.13046887251058492, "grad_norm": 1.52949059009552, "learning_rate": 3.987064354433627e-05, "loss": 0.4642, "step": 208 }, { "epoch": 0.13109612670534734, "grad_norm": 1.5415035486221313, "learning_rate": 3.9868325844713115e-05, "loss": 0.4537, "step": 209 }, { "epoch": 0.13172338090010977, "grad_norm": 1.7037349939346313, "learning_rate": 3.986598763431775e-05, "loss": 0.4638, "step": 210 }, { "epoch": 0.1323506350948722, "grad_norm": 1.8606200218200684, "learning_rate": 3.986362891556398e-05, "loss": 0.485, "step": 211 }, { "epoch": 0.13297788928963464, "grad_norm": 1.6160262823104858, "learning_rate": 3.986124969088682e-05, "loss": 0.4789, "step": 212 }, { "epoch": 0.13360514348439706, "grad_norm": 1.791393518447876, "learning_rate": 3.9858849962742404e-05, "loss": 0.4621, "step": 213 }, { "epoch": 0.13423239767915948, "grad_norm": 1.5315974950790405, "learning_rate": 3.9856429733608066e-05, "loss": 0.4739, "step": 214 }, { "epoch": 0.1348596518739219, "grad_norm": 1.6416422128677368, "learning_rate": 3.98539890059823e-05, "loss": 0.4703, "step": 215 }, { "epoch": 0.13548690606868433, "grad_norm": 1.4342625141143799, "learning_rate": 3.985152778238474e-05, "loss": 0.4601, "step": 216 }, { "epoch": 0.13611416026344675, "grad_norm": 1.5981355905532837, "learning_rate": 3.98490460653562e-05, "loss": 0.4549, "step": 217 }, { "epoch": 0.1367414144582092, "grad_norm": 1.8291752338409424, "learning_rate": 3.9846543857458645e-05, "loss": 0.4482, "step": 218 }, { "epoch": 0.13736866865297162, "grad_norm": 1.4503984451293945, "learning_rate": 3.9844021161275186e-05, "loss": 0.4647, "step": 219 }, { "epoch": 0.13799592284773404, "grad_norm": 1.7972581386566162, "learning_rate": 3.984147797941011e-05, "loss": 0.4419, "step": 220 }, { "epoch": 0.13862317704249646, "grad_norm": 1.6375170946121216, "learning_rate": 3.983891431448881e-05, "loss": 0.461, "step": 221 }, { "epoch": 0.1392504312372589, "grad_norm": 1.7466061115264893, "learning_rate": 3.983633016915786e-05, "loss": 0.4601, "step": 222 }, { "epoch": 0.13987768543202134, "grad_norm": 1.5807160139083862, "learning_rate": 3.9833725546084966e-05, "loss": 0.4961, "step": 223 }, { "epoch": 0.14050493962678376, "grad_norm": 1.974726676940918, "learning_rate": 3.9831100447958963e-05, "loss": 0.4462, "step": 224 }, { "epoch": 0.14113219382154618, "grad_norm": 1.9808381795883179, "learning_rate": 3.982845487748984e-05, "loss": 0.4914, "step": 225 }, { "epoch": 0.1417594480163086, "grad_norm": 1.7497327327728271, "learning_rate": 3.982578883740871e-05, "loss": 0.4705, "step": 226 }, { "epoch": 0.14238670221107103, "grad_norm": 2.0800588130950928, "learning_rate": 3.982310233046781e-05, "loss": 0.4661, "step": 227 }, { "epoch": 0.14301395640583348, "grad_norm": 1.6125692129135132, "learning_rate": 3.982039535944054e-05, "loss": 0.4418, "step": 228 }, { "epoch": 0.1436412106005959, "grad_norm": 1.6863831281661987, "learning_rate": 3.9817667927121376e-05, "loss": 0.4683, "step": 229 }, { "epoch": 0.14426846479535832, "grad_norm": 1.620818018913269, "learning_rate": 3.981492003632596e-05, "loss": 0.4375, "step": 230 }, { "epoch": 0.14489571899012074, "grad_norm": 1.3409901857376099, "learning_rate": 3.9812151689891026e-05, "loss": 0.4468, "step": 231 }, { "epoch": 0.14552297318488316, "grad_norm": 1.4613945484161377, "learning_rate": 3.980936289067444e-05, "loss": 0.4528, "step": 232 }, { "epoch": 0.14615022737964561, "grad_norm": 1.4099844694137573, "learning_rate": 3.980655364155518e-05, "loss": 0.454, "step": 233 }, { "epoch": 0.14677748157440804, "grad_norm": 1.5455256700515747, "learning_rate": 3.980372394543332e-05, "loss": 0.4494, "step": 234 }, { "epoch": 0.14740473576917046, "grad_norm": 1.351446509361267, "learning_rate": 3.9800873805230075e-05, "loss": 0.4479, "step": 235 }, { "epoch": 0.14803198996393288, "grad_norm": 1.8010048866271973, "learning_rate": 3.979800322388773e-05, "loss": 0.5094, "step": 236 }, { "epoch": 0.1486592441586953, "grad_norm": 1.400158166885376, "learning_rate": 3.979511220436969e-05, "loss": 0.4157, "step": 237 }, { "epoch": 0.14928649835345773, "grad_norm": 1.4748667478561401, "learning_rate": 3.979220074966046e-05, "loss": 0.4776, "step": 238 }, { "epoch": 0.14991375254822017, "grad_norm": 1.5479722023010254, "learning_rate": 3.978926886276564e-05, "loss": 0.4863, "step": 239 }, { "epoch": 0.1505410067429826, "grad_norm": 1.7338252067565918, "learning_rate": 3.978631654671191e-05, "loss": 0.4525, "step": 240 }, { "epoch": 0.15116826093774502, "grad_norm": 1.8159891366958618, "learning_rate": 3.9783343804547055e-05, "loss": 0.5029, "step": 241 }, { "epoch": 0.15179551513250744, "grad_norm": 1.8051979541778564, "learning_rate": 3.978035063933994e-05, "loss": 0.406, "step": 242 }, { "epoch": 0.15242276932726986, "grad_norm": 1.5267044305801392, "learning_rate": 3.977733705418052e-05, "loss": 0.4829, "step": 243 }, { "epoch": 0.1530500235220323, "grad_norm": 1.4523890018463135, "learning_rate": 3.9774303052179816e-05, "loss": 0.4392, "step": 244 }, { "epoch": 0.15367727771679474, "grad_norm": 1.6153901815414429, "learning_rate": 3.977124863646994e-05, "loss": 0.4849, "step": 245 }, { "epoch": 0.15430453191155716, "grad_norm": 1.575958013534546, "learning_rate": 3.976817381020408e-05, "loss": 0.4659, "step": 246 }, { "epoch": 0.15493178610631958, "grad_norm": 1.4412821531295776, "learning_rate": 3.976507857655648e-05, "loss": 0.4048, "step": 247 }, { "epoch": 0.155559040301082, "grad_norm": 1.5614181756973267, "learning_rate": 3.976196293872246e-05, "loss": 0.439, "step": 248 }, { "epoch": 0.15618629449584445, "grad_norm": 1.375407099723816, "learning_rate": 3.975882689991839e-05, "loss": 0.4249, "step": 249 }, { "epoch": 0.15681354869060687, "grad_norm": 1.4382860660552979, "learning_rate": 3.9755670463381735e-05, "loss": 0.4281, "step": 250 }, { "epoch": 0.1574408028853693, "grad_norm": 1.3078750371932983, "learning_rate": 3.9752493632370983e-05, "loss": 0.4415, "step": 251 }, { "epoch": 0.15806805708013172, "grad_norm": 1.4344347715377808, "learning_rate": 3.9749296410165694e-05, "loss": 0.4135, "step": 252 }, { "epoch": 0.15869531127489414, "grad_norm": 1.4680067300796509, "learning_rate": 3.974607880006647e-05, "loss": 0.4178, "step": 253 }, { "epoch": 0.1593225654696566, "grad_norm": 1.4680876731872559, "learning_rate": 3.974284080539496e-05, "loss": 0.4249, "step": 254 }, { "epoch": 0.159949819664419, "grad_norm": 1.4555891752243042, "learning_rate": 3.973958242949387e-05, "loss": 0.4534, "step": 255 }, { "epoch": 0.16057707385918144, "grad_norm": 1.4939229488372803, "learning_rate": 3.973630367572692e-05, "loss": 0.5196, "step": 256 }, { "epoch": 0.16120432805394386, "grad_norm": 1.591544270515442, "learning_rate": 3.9733004547478897e-05, "loss": 0.4704, "step": 257 }, { "epoch": 0.16183158224870628, "grad_norm": 1.500394582748413, "learning_rate": 3.9729685048155604e-05, "loss": 0.4432, "step": 258 }, { "epoch": 0.16245883644346873, "grad_norm": 1.663857102394104, "learning_rate": 3.972634518118387e-05, "loss": 0.4546, "step": 259 }, { "epoch": 0.16308609063823115, "grad_norm": 1.3415054082870483, "learning_rate": 3.972298495001157e-05, "loss": 0.4328, "step": 260 }, { "epoch": 0.16371334483299357, "grad_norm": 1.4470168352127075, "learning_rate": 3.9719604358107576e-05, "loss": 0.3941, "step": 261 }, { "epoch": 0.164340599027756, "grad_norm": 1.5569263696670532, "learning_rate": 3.97162034089618e-05, "loss": 0.4389, "step": 262 }, { "epoch": 0.16496785322251842, "grad_norm": 1.9183214902877808, "learning_rate": 3.971278210608516e-05, "loss": 0.4015, "step": 263 }, { "epoch": 0.16559510741728084, "grad_norm": 1.5554015636444092, "learning_rate": 3.9709340453009584e-05, "loss": 0.4753, "step": 264 }, { "epoch": 0.1662223616120433, "grad_norm": 1.5944366455078125, "learning_rate": 3.970587845328802e-05, "loss": 0.4813, "step": 265 }, { "epoch": 0.1668496158068057, "grad_norm": 1.6086550951004028, "learning_rate": 3.97023961104944e-05, "loss": 0.5004, "step": 266 }, { "epoch": 0.16747687000156813, "grad_norm": 1.234297752380371, "learning_rate": 3.9698893428223674e-05, "loss": 0.4305, "step": 267 }, { "epoch": 0.16810412419633056, "grad_norm": 1.2687492370605469, "learning_rate": 3.969537041009179e-05, "loss": 0.4292, "step": 268 }, { "epoch": 0.16873137839109298, "grad_norm": 1.5316917896270752, "learning_rate": 3.9691827059735676e-05, "loss": 0.4486, "step": 269 }, { "epoch": 0.16935863258585543, "grad_norm": 1.5229873657226562, "learning_rate": 3.968826338081327e-05, "loss": 0.4321, "step": 270 }, { "epoch": 0.16998588678061785, "grad_norm": 1.6713693141937256, "learning_rate": 3.968467937700347e-05, "loss": 0.4628, "step": 271 }, { "epoch": 0.17061314097538027, "grad_norm": 1.7388298511505127, "learning_rate": 3.968107505200617e-05, "loss": 0.4182, "step": 272 }, { "epoch": 0.1712403951701427, "grad_norm": 1.4563961029052734, "learning_rate": 3.9677450409542243e-05, "loss": 0.405, "step": 273 }, { "epoch": 0.17186764936490512, "grad_norm": 1.4333897829055786, "learning_rate": 3.967380545335354e-05, "loss": 0.42, "step": 274 }, { "epoch": 0.17249490355966757, "grad_norm": 1.6393811702728271, "learning_rate": 3.9670140187202875e-05, "loss": 0.4636, "step": 275 }, { "epoch": 0.17312215775443, "grad_norm": 1.5931137800216675, "learning_rate": 3.9666454614874035e-05, "loss": 0.4459, "step": 276 }, { "epoch": 0.1737494119491924, "grad_norm": 1.5716625452041626, "learning_rate": 3.966274874017176e-05, "loss": 0.4184, "step": 277 }, { "epoch": 0.17437666614395483, "grad_norm": 1.7171566486358643, "learning_rate": 3.9659022566921755e-05, "loss": 0.4256, "step": 278 }, { "epoch": 0.17500392033871726, "grad_norm": 1.461174488067627, "learning_rate": 3.965527609897068e-05, "loss": 0.3932, "step": 279 }, { "epoch": 0.1756311745334797, "grad_norm": 1.4326555728912354, "learning_rate": 3.965150934018614e-05, "loss": 0.3798, "step": 280 }, { "epoch": 0.17625842872824213, "grad_norm": 1.4033962488174438, "learning_rate": 3.964772229445671e-05, "loss": 0.3764, "step": 281 }, { "epoch": 0.17688568292300455, "grad_norm": 1.3416073322296143, "learning_rate": 3.964391496569188e-05, "loss": 0.4138, "step": 282 }, { "epoch": 0.17751293711776697, "grad_norm": 1.5007586479187012, "learning_rate": 3.964008735782209e-05, "loss": 0.4117, "step": 283 }, { "epoch": 0.1781401913125294, "grad_norm": 1.332431674003601, "learning_rate": 3.963623947479871e-05, "loss": 0.4387, "step": 284 }, { "epoch": 0.17876744550729182, "grad_norm": 1.4107450246810913, "learning_rate": 3.963237132059405e-05, "loss": 0.4079, "step": 285 }, { "epoch": 0.17939469970205427, "grad_norm": 1.5216684341430664, "learning_rate": 3.9628482899201347e-05, "loss": 0.4097, "step": 286 }, { "epoch": 0.1800219538968167, "grad_norm": 1.3541133403778076, "learning_rate": 3.962457421463475e-05, "loss": 0.4188, "step": 287 }, { "epoch": 0.1806492080915791, "grad_norm": 1.3088887929916382, "learning_rate": 3.9620645270929324e-05, "loss": 0.4739, "step": 288 }, { "epoch": 0.18127646228634153, "grad_norm": 1.384234070777893, "learning_rate": 3.961669607214106e-05, "loss": 0.3998, "step": 289 }, { "epoch": 0.18190371648110396, "grad_norm": 1.684230089187622, "learning_rate": 3.961272662234687e-05, "loss": 0.4014, "step": 290 }, { "epoch": 0.1825309706758664, "grad_norm": 1.3789516687393188, "learning_rate": 3.960873692564454e-05, "loss": 0.4188, "step": 291 }, { "epoch": 0.18315822487062883, "grad_norm": 1.4697010517120361, "learning_rate": 3.9604726986152784e-05, "loss": 0.4248, "step": 292 }, { "epoch": 0.18378547906539125, "grad_norm": 1.6977431774139404, "learning_rate": 3.9600696808011194e-05, "loss": 0.429, "step": 293 }, { "epoch": 0.18441273326015367, "grad_norm": 1.623642921447754, "learning_rate": 3.959664639538027e-05, "loss": 0.3813, "step": 294 }, { "epoch": 0.1850399874549161, "grad_norm": 1.4522424936294556, "learning_rate": 3.959257575244139e-05, "loss": 0.4441, "step": 295 }, { "epoch": 0.18566724164967854, "grad_norm": 1.3811010122299194, "learning_rate": 3.9588484883396836e-05, "loss": 0.4286, "step": 296 }, { "epoch": 0.18629449584444097, "grad_norm": 1.416077733039856, "learning_rate": 3.958437379246975e-05, "loss": 0.4021, "step": 297 }, { "epoch": 0.1869217500392034, "grad_norm": 1.5948553085327148, "learning_rate": 3.958024248390414e-05, "loss": 0.4049, "step": 298 }, { "epoch": 0.1875490042339658, "grad_norm": 1.7502264976501465, "learning_rate": 3.9576090961964925e-05, "loss": 0.458, "step": 299 }, { "epoch": 0.18817625842872823, "grad_norm": 1.3904632329940796, "learning_rate": 3.957191923093785e-05, "loss": 0.4346, "step": 300 }, { "epoch": 0.18880351262349068, "grad_norm": 1.64029061794281, "learning_rate": 3.956772729512955e-05, "loss": 0.4512, "step": 301 }, { "epoch": 0.1894307668182531, "grad_norm": 1.4691493511199951, "learning_rate": 3.956351515886751e-05, "loss": 0.4115, "step": 302 }, { "epoch": 0.19005802101301553, "grad_norm": 1.3712893724441528, "learning_rate": 3.955928282650005e-05, "loss": 0.3901, "step": 303 }, { "epoch": 0.19068527520777795, "grad_norm": 1.2790814638137817, "learning_rate": 3.955503030239638e-05, "loss": 0.4318, "step": 304 }, { "epoch": 0.19131252940254037, "grad_norm": 1.3729325532913208, "learning_rate": 3.9550757590946505e-05, "loss": 0.4505, "step": 305 }, { "epoch": 0.1919397835973028, "grad_norm": 1.6766351461410522, "learning_rate": 3.954646469656132e-05, "loss": 0.4333, "step": 306 }, { "epoch": 0.19256703779206524, "grad_norm": 1.444437861442566, "learning_rate": 3.95421516236725e-05, "loss": 0.4192, "step": 307 }, { "epoch": 0.19319429198682767, "grad_norm": 1.205487847328186, "learning_rate": 3.953781837673262e-05, "loss": 0.3647, "step": 308 }, { "epoch": 0.1938215461815901, "grad_norm": 1.4272749423980713, "learning_rate": 3.953346496021501e-05, "loss": 0.41, "step": 309 }, { "epoch": 0.1944488003763525, "grad_norm": 1.4003968238830566, "learning_rate": 3.952909137861388e-05, "loss": 0.3954, "step": 310 }, { "epoch": 0.19507605457111493, "grad_norm": 1.4121627807617188, "learning_rate": 3.952469763644421e-05, "loss": 0.3789, "step": 311 }, { "epoch": 0.19570330876587738, "grad_norm": 1.2762525081634521, "learning_rate": 3.952028373824183e-05, "loss": 0.4221, "step": 312 }, { "epoch": 0.1963305629606398, "grad_norm": 1.363707184791565, "learning_rate": 3.951584968856336e-05, "loss": 0.3754, "step": 313 }, { "epoch": 0.19695781715540223, "grad_norm": 1.5307010412216187, "learning_rate": 3.951139549198623e-05, "loss": 0.4095, "step": 314 }, { "epoch": 0.19758507135016465, "grad_norm": 1.5087469816207886, "learning_rate": 3.9506921153108656e-05, "loss": 0.4083, "step": 315 }, { "epoch": 0.19821232554492707, "grad_norm": 1.495043396949768, "learning_rate": 3.950242667654965e-05, "loss": 0.4412, "step": 316 }, { "epoch": 0.19883957973968952, "grad_norm": 1.4186872243881226, "learning_rate": 3.949791206694903e-05, "loss": 0.4067, "step": 317 }, { "epoch": 0.19946683393445194, "grad_norm": 1.3536276817321777, "learning_rate": 3.9493377328967384e-05, "loss": 0.4339, "step": 318 }, { "epoch": 0.20009408812921436, "grad_norm": 1.5129317045211792, "learning_rate": 3.948882246728608e-05, "loss": 0.5023, "step": 319 }, { "epoch": 0.2007213423239768, "grad_norm": 1.2687675952911377, "learning_rate": 3.948424748660726e-05, "loss": 0.4013, "step": 320 }, { "epoch": 0.2013485965187392, "grad_norm": 1.4301279783248901, "learning_rate": 3.9479652391653835e-05, "loss": 0.4344, "step": 321 }, { "epoch": 0.20197585071350166, "grad_norm": 1.4821714162826538, "learning_rate": 3.94750371871695e-05, "loss": 0.4155, "step": 322 }, { "epoch": 0.20260310490826408, "grad_norm": 1.252396821975708, "learning_rate": 3.947040187791867e-05, "loss": 0.3807, "step": 323 }, { "epoch": 0.2032303591030265, "grad_norm": 1.598359227180481, "learning_rate": 3.946574646868655e-05, "loss": 0.4269, "step": 324 }, { "epoch": 0.20385761329778893, "grad_norm": 1.3481076955795288, "learning_rate": 3.9461070964279084e-05, "loss": 0.4113, "step": 325 }, { "epoch": 0.20448486749255135, "grad_norm": 1.299596905708313, "learning_rate": 3.945637536952296e-05, "loss": 0.4432, "step": 326 }, { "epoch": 0.2051121216873138, "grad_norm": 1.4996416568756104, "learning_rate": 3.94516596892656e-05, "loss": 0.4161, "step": 327 }, { "epoch": 0.20573937588207622, "grad_norm": 1.520275592803955, "learning_rate": 3.9446923928375174e-05, "loss": 0.4482, "step": 328 }, { "epoch": 0.20636663007683864, "grad_norm": 1.2884626388549805, "learning_rate": 3.944216809174057e-05, "loss": 0.4304, "step": 329 }, { "epoch": 0.20699388427160106, "grad_norm": 1.6496778726577759, "learning_rate": 3.943739218427141e-05, "loss": 0.4208, "step": 330 }, { "epoch": 0.2076211384663635, "grad_norm": 1.434212565422058, "learning_rate": 3.943259621089801e-05, "loss": 0.4171, "step": 331 }, { "epoch": 0.2082483926611259, "grad_norm": 1.2932742834091187, "learning_rate": 3.942778017657146e-05, "loss": 0.377, "step": 332 }, { "epoch": 0.20887564685588836, "grad_norm": 1.3967646360397339, "learning_rate": 3.9422944086263494e-05, "loss": 0.4526, "step": 333 }, { "epoch": 0.20950290105065078, "grad_norm": 1.1630432605743408, "learning_rate": 3.941808794496659e-05, "loss": 0.3943, "step": 334 }, { "epoch": 0.2101301552454132, "grad_norm": 1.549743413925171, "learning_rate": 3.94132117576939e-05, "loss": 0.3819, "step": 335 }, { "epoch": 0.21075740944017562, "grad_norm": 1.455093264579773, "learning_rate": 3.940831552947931e-05, "loss": 0.3965, "step": 336 }, { "epoch": 0.21138466363493805, "grad_norm": 1.2519598007202148, "learning_rate": 3.940339926537734e-05, "loss": 0.4236, "step": 337 }, { "epoch": 0.2120119178297005, "grad_norm": 1.519441843032837, "learning_rate": 3.9398462970463234e-05, "loss": 0.4191, "step": 338 }, { "epoch": 0.21263917202446292, "grad_norm": 1.5170977115631104, "learning_rate": 3.93935066498329e-05, "loss": 0.4308, "step": 339 }, { "epoch": 0.21326642621922534, "grad_norm": 1.3977679014205933, "learning_rate": 3.9388530308602935e-05, "loss": 0.4476, "step": 340 }, { "epoch": 0.21389368041398776, "grad_norm": 1.6217604875564575, "learning_rate": 3.938353395191058e-05, "loss": 0.4158, "step": 341 }, { "epoch": 0.21452093460875019, "grad_norm": 1.3827109336853027, "learning_rate": 3.937851758491375e-05, "loss": 0.3565, "step": 342 }, { "epoch": 0.21514818880351264, "grad_norm": 1.4254240989685059, "learning_rate": 3.937348121279102e-05, "loss": 0.4092, "step": 343 }, { "epoch": 0.21577544299827506, "grad_norm": 1.8280800580978394, "learning_rate": 3.936842484074161e-05, "loss": 0.4093, "step": 344 }, { "epoch": 0.21640269719303748, "grad_norm": 1.4849143028259277, "learning_rate": 3.93633484739854e-05, "loss": 0.4213, "step": 345 }, { "epoch": 0.2170299513877999, "grad_norm": 1.3602157831192017, "learning_rate": 3.93582521177629e-05, "loss": 0.4088, "step": 346 }, { "epoch": 0.21765720558256232, "grad_norm": 1.8441216945648193, "learning_rate": 3.9353135777335244e-05, "loss": 0.4077, "step": 347 }, { "epoch": 0.21828445977732477, "grad_norm": 1.284321904182434, "learning_rate": 3.934799945798423e-05, "loss": 0.4113, "step": 348 }, { "epoch": 0.2189117139720872, "grad_norm": 1.2958706617355347, "learning_rate": 3.934284316501225e-05, "loss": 0.3932, "step": 349 }, { "epoch": 0.21953896816684962, "grad_norm": 1.341288447380066, "learning_rate": 3.933766690374232e-05, "loss": 0.41, "step": 350 }, { "epoch": 0.22016622236161204, "grad_norm": 1.4277088642120361, "learning_rate": 3.9332470679518095e-05, "loss": 0.4162, "step": 351 }, { "epoch": 0.22079347655637446, "grad_norm": 1.3362282514572144, "learning_rate": 3.9327254497703806e-05, "loss": 0.3986, "step": 352 }, { "epoch": 0.22142073075113688, "grad_norm": 1.4247506856918335, "learning_rate": 3.9322018363684306e-05, "loss": 0.3634, "step": 353 }, { "epoch": 0.22204798494589933, "grad_norm": 1.8625775575637817, "learning_rate": 3.9316762282865046e-05, "loss": 0.4288, "step": 354 }, { "epoch": 0.22267523914066176, "grad_norm": 1.6340423822402954, "learning_rate": 3.931148626067205e-05, "loss": 0.415, "step": 355 }, { "epoch": 0.22330249333542418, "grad_norm": 1.3341423273086548, "learning_rate": 3.930619030255196e-05, "loss": 0.3979, "step": 356 }, { "epoch": 0.2239297475301866, "grad_norm": 1.362868070602417, "learning_rate": 3.930087441397196e-05, "loss": 0.3724, "step": 357 }, { "epoch": 0.22455700172494902, "grad_norm": 1.2193236351013184, "learning_rate": 3.929553860041984e-05, "loss": 0.4193, "step": 358 }, { "epoch": 0.22518425591971147, "grad_norm": 1.515285849571228, "learning_rate": 3.929018286740394e-05, "loss": 0.4735, "step": 359 }, { "epoch": 0.2258115101144739, "grad_norm": 1.4257380962371826, "learning_rate": 3.928480722045319e-05, "loss": 0.4074, "step": 360 }, { "epoch": 0.22643876430923632, "grad_norm": 1.409763216972351, "learning_rate": 3.927941166511704e-05, "loss": 0.4456, "step": 361 }, { "epoch": 0.22706601850399874, "grad_norm": 1.384979009628296, "learning_rate": 3.927399620696553e-05, "loss": 0.3816, "step": 362 }, { "epoch": 0.22769327269876116, "grad_norm": 1.2846951484680176, "learning_rate": 3.92685608515892e-05, "loss": 0.4072, "step": 363 }, { "epoch": 0.2283205268935236, "grad_norm": 1.151328444480896, "learning_rate": 3.926310560459919e-05, "loss": 0.4332, "step": 364 }, { "epoch": 0.22894778108828603, "grad_norm": 1.4825246334075928, "learning_rate": 3.925763047162712e-05, "loss": 0.3971, "step": 365 }, { "epoch": 0.22957503528304846, "grad_norm": 1.3169598579406738, "learning_rate": 3.9252135458325176e-05, "loss": 0.3898, "step": 366 }, { "epoch": 0.23020228947781088, "grad_norm": 1.3232101202011108, "learning_rate": 3.9246620570366046e-05, "loss": 0.378, "step": 367 }, { "epoch": 0.2308295436725733, "grad_norm": 1.3413264751434326, "learning_rate": 3.924108581344295e-05, "loss": 0.4331, "step": 368 }, { "epoch": 0.23145679786733575, "grad_norm": 1.4002594947814941, "learning_rate": 3.9235531193269614e-05, "loss": 0.4, "step": 369 }, { "epoch": 0.23208405206209817, "grad_norm": 1.4046937227249146, "learning_rate": 3.922995671558026e-05, "loss": 0.3886, "step": 370 }, { "epoch": 0.2327113062568606, "grad_norm": 1.4203039407730103, "learning_rate": 3.922436238612962e-05, "loss": 0.402, "step": 371 }, { "epoch": 0.23333856045162302, "grad_norm": 1.3028756380081177, "learning_rate": 3.9218748210692916e-05, "loss": 0.3552, "step": 372 }, { "epoch": 0.23396581464638544, "grad_norm": 1.2306022644042969, "learning_rate": 3.921311419506586e-05, "loss": 0.411, "step": 373 }, { "epoch": 0.23459306884114786, "grad_norm": 1.2577680349349976, "learning_rate": 3.920746034506465e-05, "loss": 0.418, "step": 374 }, { "epoch": 0.2352203230359103, "grad_norm": 1.234844446182251, "learning_rate": 3.920178666652595e-05, "loss": 0.3706, "step": 375 }, { "epoch": 0.23584757723067273, "grad_norm": 1.4486712217330933, "learning_rate": 3.919609316530689e-05, "loss": 0.3818, "step": 376 }, { "epoch": 0.23647483142543516, "grad_norm": 1.2798314094543457, "learning_rate": 3.9190379847285085e-05, "loss": 0.4522, "step": 377 }, { "epoch": 0.23710208562019758, "grad_norm": 1.3837852478027344, "learning_rate": 3.918464671835858e-05, "loss": 0.3565, "step": 378 }, { "epoch": 0.23772933981496, "grad_norm": 1.4889065027236938, "learning_rate": 3.917889378444591e-05, "loss": 0.428, "step": 379 }, { "epoch": 0.23835659400972245, "grad_norm": 1.42939293384552, "learning_rate": 3.9173121051486014e-05, "loss": 0.416, "step": 380 }, { "epoch": 0.23898384820448487, "grad_norm": 1.412003755569458, "learning_rate": 3.9167328525438285e-05, "loss": 0.4457, "step": 381 }, { "epoch": 0.2396111023992473, "grad_norm": 1.4608865976333618, "learning_rate": 3.9161516212282555e-05, "loss": 0.4138, "step": 382 }, { "epoch": 0.24023835659400972, "grad_norm": 1.4364871978759766, "learning_rate": 3.915568411801908e-05, "loss": 0.371, "step": 383 }, { "epoch": 0.24086561078877214, "grad_norm": 1.456733226776123, "learning_rate": 3.914983224866854e-05, "loss": 0.4101, "step": 384 }, { "epoch": 0.2414928649835346, "grad_norm": 1.2100237607955933, "learning_rate": 3.914396061027202e-05, "loss": 0.3938, "step": 385 }, { "epoch": 0.242120119178297, "grad_norm": 1.3283522129058838, "learning_rate": 3.913806920889102e-05, "loss": 0.3727, "step": 386 }, { "epoch": 0.24274737337305943, "grad_norm": 1.5742121934890747, "learning_rate": 3.913215805060745e-05, "loss": 0.3825, "step": 387 }, { "epoch": 0.24337462756782186, "grad_norm": 1.551124095916748, "learning_rate": 3.91262271415236e-05, "loss": 0.4228, "step": 388 }, { "epoch": 0.24400188176258428, "grad_norm": 1.488900065422058, "learning_rate": 3.912027648776215e-05, "loss": 0.403, "step": 389 }, { "epoch": 0.24462913595734673, "grad_norm": 1.4520589113235474, "learning_rate": 3.9114306095466165e-05, "loss": 0.3894, "step": 390 }, { "epoch": 0.24525639015210915, "grad_norm": 1.4398682117462158, "learning_rate": 3.910831597079912e-05, "loss": 0.3537, "step": 391 }, { "epoch": 0.24588364434687157, "grad_norm": 1.324266791343689, "learning_rate": 3.9102306119944795e-05, "loss": 0.3257, "step": 392 }, { "epoch": 0.246510898541634, "grad_norm": 1.2131379842758179, "learning_rate": 3.90962765491074e-05, "loss": 0.4242, "step": 393 }, { "epoch": 0.24713815273639642, "grad_norm": 1.186140775680542, "learning_rate": 3.909022726451146e-05, "loss": 0.3969, "step": 394 }, { "epoch": 0.24776540693115887, "grad_norm": 1.2435379028320312, "learning_rate": 3.9084158272401865e-05, "loss": 0.4061, "step": 395 }, { "epoch": 0.2483926611259213, "grad_norm": 1.3766072988510132, "learning_rate": 3.907806957904385e-05, "loss": 0.3967, "step": 396 }, { "epoch": 0.2490199153206837, "grad_norm": 1.316792368888855, "learning_rate": 3.907196119072299e-05, "loss": 0.389, "step": 397 }, { "epoch": 0.24964716951544613, "grad_norm": 1.2080376148223877, "learning_rate": 3.906583311374518e-05, "loss": 0.3774, "step": 398 }, { "epoch": 0.2502744237102086, "grad_norm": 1.3271780014038086, "learning_rate": 3.905968535443666e-05, "loss": 0.4158, "step": 399 }, { "epoch": 0.250901677904971, "grad_norm": 1.442582130432129, "learning_rate": 3.905351791914398e-05, "loss": 0.3879, "step": 400 }, { "epoch": 0.2515289320997334, "grad_norm": 1.319524884223938, "learning_rate": 3.904733081423399e-05, "loss": 0.3404, "step": 401 }, { "epoch": 0.2521561862944958, "grad_norm": 1.310402750968933, "learning_rate": 3.9041124046093866e-05, "loss": 0.3603, "step": 402 }, { "epoch": 0.25278344048925827, "grad_norm": 1.2856608629226685, "learning_rate": 3.903489762113106e-05, "loss": 0.3566, "step": 403 }, { "epoch": 0.2534106946840207, "grad_norm": 1.3391993045806885, "learning_rate": 3.902865154577335e-05, "loss": 0.337, "step": 404 }, { "epoch": 0.2540379488787831, "grad_norm": 1.288784146308899, "learning_rate": 3.902238582646876e-05, "loss": 0.3885, "step": 405 }, { "epoch": 0.25466520307354557, "grad_norm": 1.1475400924682617, "learning_rate": 3.9016100469685625e-05, "loss": 0.3283, "step": 406 }, { "epoch": 0.25529245726830796, "grad_norm": 1.5950651168823242, "learning_rate": 3.900979548191254e-05, "loss": 0.4125, "step": 407 }, { "epoch": 0.2559197114630704, "grad_norm": 1.4678776264190674, "learning_rate": 3.900347086965835e-05, "loss": 0.3735, "step": 408 }, { "epoch": 0.25654696565783286, "grad_norm": 1.5201358795166016, "learning_rate": 3.899712663945219e-05, "loss": 0.4163, "step": 409 }, { "epoch": 0.25717421985259525, "grad_norm": 1.0794888734817505, "learning_rate": 3.8990762797843436e-05, "loss": 0.3985, "step": 410 }, { "epoch": 0.2578014740473577, "grad_norm": 1.3003852367401123, "learning_rate": 3.8984379351401696e-05, "loss": 0.3735, "step": 411 }, { "epoch": 0.2584287282421201, "grad_norm": 1.7244070768356323, "learning_rate": 3.897797630671682e-05, "loss": 0.4131, "step": 412 }, { "epoch": 0.25905598243688255, "grad_norm": 1.3861531019210815, "learning_rate": 3.897155367039892e-05, "loss": 0.3649, "step": 413 }, { "epoch": 0.259683236631645, "grad_norm": 1.6174381971359253, "learning_rate": 3.896511144907829e-05, "loss": 0.363, "step": 414 }, { "epoch": 0.2603104908264074, "grad_norm": 1.4827419519424438, "learning_rate": 3.895864964940546e-05, "loss": 0.3722, "step": 415 }, { "epoch": 0.26093774502116984, "grad_norm": 1.174606442451477, "learning_rate": 3.8952168278051184e-05, "loss": 0.3527, "step": 416 }, { "epoch": 0.26156499921593224, "grad_norm": 1.4897773265838623, "learning_rate": 3.8945667341706405e-05, "loss": 0.3602, "step": 417 }, { "epoch": 0.2621922534106947, "grad_norm": 1.1790823936462402, "learning_rate": 3.893914684708227e-05, "loss": 0.3857, "step": 418 }, { "epoch": 0.26281950760545714, "grad_norm": 1.4660543203353882, "learning_rate": 3.893260680091011e-05, "loss": 0.3808, "step": 419 }, { "epoch": 0.26344676180021953, "grad_norm": 1.4048151969909668, "learning_rate": 3.892604720994146e-05, "loss": 0.3589, "step": 420 }, { "epoch": 0.264074015994982, "grad_norm": 1.247186541557312, "learning_rate": 3.8919468080947996e-05, "loss": 0.3675, "step": 421 }, { "epoch": 0.2647012701897444, "grad_norm": 1.426533818244934, "learning_rate": 3.8912869420721604e-05, "loss": 0.3759, "step": 422 }, { "epoch": 0.2653285243845068, "grad_norm": 1.4955856800079346, "learning_rate": 3.890625123607429e-05, "loss": 0.3558, "step": 423 }, { "epoch": 0.2659557785792693, "grad_norm": 1.2410643100738525, "learning_rate": 3.8899613533838265e-05, "loss": 0.3567, "step": 424 }, { "epoch": 0.26658303277403167, "grad_norm": 1.9216816425323486, "learning_rate": 3.8892956320865846e-05, "loss": 0.4118, "step": 425 }, { "epoch": 0.2672102869687941, "grad_norm": 1.4261921644210815, "learning_rate": 3.888627960402952e-05, "loss": 0.3689, "step": 426 }, { "epoch": 0.2678375411635565, "grad_norm": 1.1982080936431885, "learning_rate": 3.887958339022188e-05, "loss": 0.3706, "step": 427 }, { "epoch": 0.26846479535831896, "grad_norm": 1.3578213453292847, "learning_rate": 3.887286768635568e-05, "loss": 0.37, "step": 428 }, { "epoch": 0.2690920495530814, "grad_norm": 1.3112714290618896, "learning_rate": 3.886613249936376e-05, "loss": 0.3861, "step": 429 }, { "epoch": 0.2697193037478438, "grad_norm": 1.2745850086212158, "learning_rate": 3.885937783619911e-05, "loss": 0.4104, "step": 430 }, { "epoch": 0.27034655794260626, "grad_norm": 1.239473819732666, "learning_rate": 3.885260370383479e-05, "loss": 0.3458, "step": 431 }, { "epoch": 0.27097381213736865, "grad_norm": 1.2031855583190918, "learning_rate": 3.884581010926399e-05, "loss": 0.3464, "step": 432 }, { "epoch": 0.2716010663321311, "grad_norm": 1.2598925828933716, "learning_rate": 3.883899705949997e-05, "loss": 0.3721, "step": 433 }, { "epoch": 0.2722283205268935, "grad_norm": 1.157799482345581, "learning_rate": 3.883216456157608e-05, "loss": 0.3513, "step": 434 }, { "epoch": 0.27285557472165595, "grad_norm": 1.3330152034759521, "learning_rate": 3.8825312622545745e-05, "loss": 0.4207, "step": 435 }, { "epoch": 0.2734828289164184, "grad_norm": 1.162471890449524, "learning_rate": 3.881844124948247e-05, "loss": 0.3748, "step": 436 }, { "epoch": 0.2741100831111808, "grad_norm": 1.442447543144226, "learning_rate": 3.881155044947981e-05, "loss": 0.372, "step": 437 }, { "epoch": 0.27473733730594324, "grad_norm": 1.4956692457199097, "learning_rate": 3.880464022965138e-05, "loss": 0.3877, "step": 438 }, { "epoch": 0.27536459150070564, "grad_norm": 1.2240872383117676, "learning_rate": 3.8797710597130855e-05, "loss": 0.3811, "step": 439 }, { "epoch": 0.2759918456954681, "grad_norm": 1.4835654497146606, "learning_rate": 3.879076155907193e-05, "loss": 0.3826, "step": 440 }, { "epoch": 0.27661909989023054, "grad_norm": 1.3184068202972412, "learning_rate": 3.878379312264833e-05, "loss": 0.3868, "step": 441 }, { "epoch": 0.27724635408499293, "grad_norm": 1.30062735080719, "learning_rate": 3.8776805295053835e-05, "loss": 0.3799, "step": 442 }, { "epoch": 0.2778736082797554, "grad_norm": 1.2152583599090576, "learning_rate": 3.876979808350222e-05, "loss": 0.3565, "step": 443 }, { "epoch": 0.2785008624745178, "grad_norm": 1.1691176891326904, "learning_rate": 3.876277149522727e-05, "loss": 0.343, "step": 444 }, { "epoch": 0.2791281166692802, "grad_norm": 1.3291245698928833, "learning_rate": 3.8755725537482785e-05, "loss": 0.336, "step": 445 }, { "epoch": 0.2797553708640427, "grad_norm": 1.3104655742645264, "learning_rate": 3.874866021754256e-05, "loss": 0.3728, "step": 446 }, { "epoch": 0.28038262505880507, "grad_norm": 1.2047230005264282, "learning_rate": 3.8741575542700366e-05, "loss": 0.3284, "step": 447 }, { "epoch": 0.2810098792535675, "grad_norm": 1.5223572254180908, "learning_rate": 3.873447152026996e-05, "loss": 0.4426, "step": 448 }, { "epoch": 0.2816371334483299, "grad_norm": 1.2537058591842651, "learning_rate": 3.872734815758508e-05, "loss": 0.3687, "step": 449 }, { "epoch": 0.28226438764309236, "grad_norm": 1.32411527633667, "learning_rate": 3.8720205461999426e-05, "loss": 0.3903, "step": 450 }, { "epoch": 0.2828916418378548, "grad_norm": 1.271552324295044, "learning_rate": 3.871304344088665e-05, "loss": 0.3738, "step": 451 }, { "epoch": 0.2835188960326172, "grad_norm": 1.0983104705810547, "learning_rate": 3.870586210164035e-05, "loss": 0.3529, "step": 452 }, { "epoch": 0.28414615022737966, "grad_norm": 1.3040975332260132, "learning_rate": 3.86986614516741e-05, "loss": 0.3524, "step": 453 }, { "epoch": 0.28477340442214205, "grad_norm": 1.2826478481292725, "learning_rate": 3.869144149842136e-05, "loss": 0.3693, "step": 454 }, { "epoch": 0.2854006586169045, "grad_norm": 1.199196457862854, "learning_rate": 3.868420224933555e-05, "loss": 0.3779, "step": 455 }, { "epoch": 0.28602791281166695, "grad_norm": 1.2873188257217407, "learning_rate": 3.8676943711890006e-05, "loss": 0.3436, "step": 456 }, { "epoch": 0.28665516700642935, "grad_norm": 1.2819852828979492, "learning_rate": 3.8669665893577975e-05, "loss": 0.3779, "step": 457 }, { "epoch": 0.2872824212011918, "grad_norm": 1.288419246673584, "learning_rate": 3.866236880191259e-05, "loss": 0.3918, "step": 458 }, { "epoch": 0.2879096753959542, "grad_norm": 1.165993332862854, "learning_rate": 3.865505244442691e-05, "loss": 0.3715, "step": 459 }, { "epoch": 0.28853692959071664, "grad_norm": 1.201463222503662, "learning_rate": 3.864771682867386e-05, "loss": 0.3701, "step": 460 }, { "epoch": 0.2891641837854791, "grad_norm": 1.2459325790405273, "learning_rate": 3.8640361962226254e-05, "loss": 0.3279, "step": 461 }, { "epoch": 0.2897914379802415, "grad_norm": 1.205305814743042, "learning_rate": 3.863298785267678e-05, "loss": 0.3772, "step": 462 }, { "epoch": 0.29041869217500393, "grad_norm": 1.2977550029754639, "learning_rate": 3.8625594507638e-05, "loss": 0.3818, "step": 463 }, { "epoch": 0.29104594636976633, "grad_norm": 1.1289427280426025, "learning_rate": 3.861818193474231e-05, "loss": 0.3551, "step": 464 }, { "epoch": 0.2916732005645288, "grad_norm": 1.2582975625991821, "learning_rate": 3.8610750141641984e-05, "loss": 0.3849, "step": 465 }, { "epoch": 0.29230045475929123, "grad_norm": 1.1075705289840698, "learning_rate": 3.860329913600912e-05, "loss": 0.3662, "step": 466 }, { "epoch": 0.2929277089540536, "grad_norm": 1.2868229150772095, "learning_rate": 3.8595828925535646e-05, "loss": 0.3765, "step": 467 }, { "epoch": 0.2935549631488161, "grad_norm": 1.1359583139419556, "learning_rate": 3.858833951793333e-05, "loss": 0.3182, "step": 468 }, { "epoch": 0.29418221734357847, "grad_norm": 1.207962155342102, "learning_rate": 3.858083092093375e-05, "loss": 0.3768, "step": 469 }, { "epoch": 0.2948094715383409, "grad_norm": 1.1784318685531616, "learning_rate": 3.8573303142288295e-05, "loss": 0.3576, "step": 470 }, { "epoch": 0.29543672573310337, "grad_norm": 1.2792519330978394, "learning_rate": 3.8565756189768146e-05, "loss": 0.3437, "step": 471 }, { "epoch": 0.29606397992786576, "grad_norm": 1.5046852827072144, "learning_rate": 3.8558190071164305e-05, "loss": 0.3556, "step": 472 }, { "epoch": 0.2966912341226282, "grad_norm": 1.2456021308898926, "learning_rate": 3.8550604794287536e-05, "loss": 0.3052, "step": 473 }, { "epoch": 0.2973184883173906, "grad_norm": 1.3958989381790161, "learning_rate": 3.8543000366968385e-05, "loss": 0.3994, "step": 474 }, { "epoch": 0.29794574251215306, "grad_norm": 1.4529577493667603, "learning_rate": 3.853537679705716e-05, "loss": 0.3377, "step": 475 }, { "epoch": 0.29857299670691545, "grad_norm": 1.3536865711212158, "learning_rate": 3.8527734092423966e-05, "loss": 0.3789, "step": 476 }, { "epoch": 0.2992002509016779, "grad_norm": 1.2169958353042603, "learning_rate": 3.852007226095861e-05, "loss": 0.3557, "step": 477 }, { "epoch": 0.29982750509644035, "grad_norm": 1.295535683631897, "learning_rate": 3.851239131057069e-05, "loss": 0.3687, "step": 478 }, { "epoch": 0.30045475929120274, "grad_norm": 1.296625018119812, "learning_rate": 3.850469124918951e-05, "loss": 0.3856, "step": 479 }, { "epoch": 0.3010820134859652, "grad_norm": 1.459418773651123, "learning_rate": 3.8496972084764116e-05, "loss": 0.3622, "step": 480 }, { "epoch": 0.3017092676807276, "grad_norm": 1.2736573219299316, "learning_rate": 3.848923382526327e-05, "loss": 0.3921, "step": 481 }, { "epoch": 0.30233652187549004, "grad_norm": 1.26237952709198, "learning_rate": 3.8481476478675464e-05, "loss": 0.3326, "step": 482 }, { "epoch": 0.3029637760702525, "grad_norm": 1.3334345817565918, "learning_rate": 3.847370005300887e-05, "loss": 0.4125, "step": 483 }, { "epoch": 0.3035910302650149, "grad_norm": 1.1309443712234497, "learning_rate": 3.8465904556291366e-05, "loss": 0.3206, "step": 484 }, { "epoch": 0.30421828445977733, "grad_norm": 1.3421224355697632, "learning_rate": 3.8458089996570516e-05, "loss": 0.3545, "step": 485 }, { "epoch": 0.3048455386545397, "grad_norm": 1.3264871835708618, "learning_rate": 3.845025638191357e-05, "loss": 0.3736, "step": 486 }, { "epoch": 0.3054727928493022, "grad_norm": 1.199151873588562, "learning_rate": 3.844240372040744e-05, "loss": 0.3658, "step": 487 }, { "epoch": 0.3061000470440646, "grad_norm": 1.4013934135437012, "learning_rate": 3.843453202015871e-05, "loss": 0.3631, "step": 488 }, { "epoch": 0.306727301238827, "grad_norm": 1.254109263420105, "learning_rate": 3.84266412892936e-05, "loss": 0.3534, "step": 489 }, { "epoch": 0.30735455543358947, "grad_norm": 1.218373417854309, "learning_rate": 3.841873153595801e-05, "loss": 0.3358, "step": 490 }, { "epoch": 0.30798180962835187, "grad_norm": 1.1666988134384155, "learning_rate": 3.841080276831744e-05, "loss": 0.3714, "step": 491 }, { "epoch": 0.3086090638231143, "grad_norm": 1.1505777835845947, "learning_rate": 3.8402854994557036e-05, "loss": 0.3689, "step": 492 }, { "epoch": 0.30923631801787677, "grad_norm": 1.1682237386703491, "learning_rate": 3.8394888222881576e-05, "loss": 0.365, "step": 493 }, { "epoch": 0.30986357221263916, "grad_norm": 1.2740191221237183, "learning_rate": 3.838690246151544e-05, "loss": 0.3514, "step": 494 }, { "epoch": 0.3104908264074016, "grad_norm": 1.1906490325927734, "learning_rate": 3.8378897718702595e-05, "loss": 0.3505, "step": 495 }, { "epoch": 0.311118080602164, "grad_norm": 1.2643780708312988, "learning_rate": 3.837087400270663e-05, "loss": 0.3209, "step": 496 }, { "epoch": 0.31174533479692645, "grad_norm": 1.479504942893982, "learning_rate": 3.83628313218107e-05, "loss": 0.4005, "step": 497 }, { "epoch": 0.3123725889916889, "grad_norm": 1.326695442199707, "learning_rate": 3.835476968431756e-05, "loss": 0.3244, "step": 498 }, { "epoch": 0.3129998431864513, "grad_norm": 1.3005268573760986, "learning_rate": 3.8346689098549525e-05, "loss": 0.3717, "step": 499 }, { "epoch": 0.31362709738121375, "grad_norm": 1.1932462453842163, "learning_rate": 3.833858957284845e-05, "loss": 0.3364, "step": 500 }, { "epoch": 0.31425435157597614, "grad_norm": 1.4240779876708984, "learning_rate": 3.833047111557578e-05, "loss": 0.371, "step": 501 }, { "epoch": 0.3148816057707386, "grad_norm": 1.4582301378250122, "learning_rate": 3.8322333735112466e-05, "loss": 0.389, "step": 502 }, { "epoch": 0.31550885996550104, "grad_norm": 1.1639416217803955, "learning_rate": 3.831417743985903e-05, "loss": 0.3803, "step": 503 }, { "epoch": 0.31613611416026344, "grad_norm": 1.6114230155944824, "learning_rate": 3.830600223823548e-05, "loss": 0.37, "step": 504 }, { "epoch": 0.3167633683550259, "grad_norm": 1.3899036645889282, "learning_rate": 3.829780813868139e-05, "loss": 0.4002, "step": 505 }, { "epoch": 0.3173906225497883, "grad_norm": 1.1674432754516602, "learning_rate": 3.82895951496558e-05, "loss": 0.3284, "step": 506 }, { "epoch": 0.31801787674455073, "grad_norm": 1.1973252296447754, "learning_rate": 3.8281363279637276e-05, "loss": 0.4293, "step": 507 }, { "epoch": 0.3186451309393132, "grad_norm": 1.3239046335220337, "learning_rate": 3.8273112537123864e-05, "loss": 0.3344, "step": 508 }, { "epoch": 0.3192723851340756, "grad_norm": 1.2565101385116577, "learning_rate": 3.8264842930633095e-05, "loss": 0.3603, "step": 509 }, { "epoch": 0.319899639328838, "grad_norm": 1.2703858613967896, "learning_rate": 3.8256554468701985e-05, "loss": 0.3771, "step": 510 }, { "epoch": 0.3205268935236004, "grad_norm": 1.3956307172775269, "learning_rate": 3.824824715988698e-05, "loss": 0.3476, "step": 511 }, { "epoch": 0.32115414771836287, "grad_norm": 1.2926727533340454, "learning_rate": 3.823992101276404e-05, "loss": 0.3706, "step": 512 }, { "epoch": 0.3217814019131253, "grad_norm": 1.1364754438400269, "learning_rate": 3.823157603592852e-05, "loss": 0.3034, "step": 513 }, { "epoch": 0.3224086561078877, "grad_norm": 1.1433966159820557, "learning_rate": 3.8223212237995235e-05, "loss": 0.3442, "step": 514 }, { "epoch": 0.32303591030265016, "grad_norm": 1.21794855594635, "learning_rate": 3.821482962759843e-05, "loss": 0.3795, "step": 515 }, { "epoch": 0.32366316449741256, "grad_norm": 1.1466041803359985, "learning_rate": 3.820642821339177e-05, "loss": 0.3156, "step": 516 }, { "epoch": 0.324290418692175, "grad_norm": 1.362508773803711, "learning_rate": 3.819800800404832e-05, "loss": 0.355, "step": 517 }, { "epoch": 0.32491767288693746, "grad_norm": 1.2425683736801147, "learning_rate": 3.818956900826058e-05, "loss": 0.3461, "step": 518 }, { "epoch": 0.32554492708169985, "grad_norm": 0.8988218307495117, "learning_rate": 3.818111123474041e-05, "loss": 0.3696, "step": 519 }, { "epoch": 0.3261721812764623, "grad_norm": 1.2538793087005615, "learning_rate": 3.817263469221907e-05, "loss": 0.358, "step": 520 }, { "epoch": 0.3267994354712247, "grad_norm": 1.2860736846923828, "learning_rate": 3.816413938944718e-05, "loss": 0.3074, "step": 521 }, { "epoch": 0.32742668966598715, "grad_norm": 1.2698616981506348, "learning_rate": 3.815562533519476e-05, "loss": 0.3474, "step": 522 }, { "epoch": 0.32805394386074954, "grad_norm": 1.1828845739364624, "learning_rate": 3.814709253825115e-05, "loss": 0.3791, "step": 523 }, { "epoch": 0.328681198055512, "grad_norm": 1.1985173225402832, "learning_rate": 3.813854100742507e-05, "loss": 0.3597, "step": 524 }, { "epoch": 0.32930845225027444, "grad_norm": 1.3471806049346924, "learning_rate": 3.812997075154457e-05, "loss": 0.3697, "step": 525 }, { "epoch": 0.32993570644503684, "grad_norm": 1.2784637212753296, "learning_rate": 3.812138177945701e-05, "loss": 0.3718, "step": 526 }, { "epoch": 0.3305629606397993, "grad_norm": 1.35215163230896, "learning_rate": 3.81127741000291e-05, "loss": 0.3958, "step": 527 }, { "epoch": 0.3311902148345617, "grad_norm": 1.283639907836914, "learning_rate": 3.810414772214685e-05, "loss": 0.3842, "step": 528 }, { "epoch": 0.33181746902932413, "grad_norm": 1.1455283164978027, "learning_rate": 3.809550265471557e-05, "loss": 0.3605, "step": 529 }, { "epoch": 0.3324447232240866, "grad_norm": 1.1047929525375366, "learning_rate": 3.8086838906659865e-05, "loss": 0.3275, "step": 530 }, { "epoch": 0.333071977418849, "grad_norm": 1.631423830986023, "learning_rate": 3.8078156486923636e-05, "loss": 0.3591, "step": 531 }, { "epoch": 0.3336992316136114, "grad_norm": 1.3591557741165161, "learning_rate": 3.8069455404470046e-05, "loss": 0.378, "step": 532 }, { "epoch": 0.3343264858083738, "grad_norm": 1.312411904335022, "learning_rate": 3.8060735668281527e-05, "loss": 0.3579, "step": 533 }, { "epoch": 0.33495374000313627, "grad_norm": 1.4502252340316772, "learning_rate": 3.805199728735977e-05, "loss": 0.3674, "step": 534 }, { "epoch": 0.3355809941978987, "grad_norm": 1.1842923164367676, "learning_rate": 3.8043240270725714e-05, "loss": 0.3137, "step": 535 }, { "epoch": 0.3362082483926611, "grad_norm": 1.2416174411773682, "learning_rate": 3.8034464627419535e-05, "loss": 0.3362, "step": 536 }, { "epoch": 0.33683550258742356, "grad_norm": 1.1419811248779297, "learning_rate": 3.8025670366500635e-05, "loss": 0.3369, "step": 537 }, { "epoch": 0.33746275678218596, "grad_norm": 1.2286869287490845, "learning_rate": 3.8016857497047655e-05, "loss": 0.317, "step": 538 }, { "epoch": 0.3380900109769484, "grad_norm": 1.616650938987732, "learning_rate": 3.80080260281584e-05, "loss": 0.3304, "step": 539 }, { "epoch": 0.33871726517171086, "grad_norm": 1.1498764753341675, "learning_rate": 3.799917596894994e-05, "loss": 0.3206, "step": 540 }, { "epoch": 0.33934451936647325, "grad_norm": 1.123672604560852, "learning_rate": 3.799030732855848e-05, "loss": 0.3231, "step": 541 }, { "epoch": 0.3399717735612357, "grad_norm": 1.2703033685684204, "learning_rate": 3.798142011613944e-05, "loss": 0.3143, "step": 542 }, { "epoch": 0.3405990277559981, "grad_norm": 1.1162161827087402, "learning_rate": 3.7972514340867404e-05, "loss": 0.3324, "step": 543 }, { "epoch": 0.34122628195076055, "grad_norm": 1.2812544107437134, "learning_rate": 3.796359001193611e-05, "loss": 0.3229, "step": 544 }, { "epoch": 0.341853536145523, "grad_norm": 1.1549571752548218, "learning_rate": 3.7954647138558455e-05, "loss": 0.3268, "step": 545 }, { "epoch": 0.3424807903402854, "grad_norm": 1.4987828731536865, "learning_rate": 3.794568572996649e-05, "loss": 0.364, "step": 546 }, { "epoch": 0.34310804453504784, "grad_norm": 1.5693942308425903, "learning_rate": 3.79367057954114e-05, "loss": 0.348, "step": 547 }, { "epoch": 0.34373529872981023, "grad_norm": 1.3781763315200806, "learning_rate": 3.7927707344163464e-05, "loss": 0.2911, "step": 548 }, { "epoch": 0.3443625529245727, "grad_norm": 1.1173834800720215, "learning_rate": 3.791869038551212e-05, "loss": 0.322, "step": 549 }, { "epoch": 0.34498980711933513, "grad_norm": 1.2679322957992554, "learning_rate": 3.7909654928765875e-05, "loss": 0.3907, "step": 550 }, { "epoch": 0.34561706131409753, "grad_norm": 1.3186674118041992, "learning_rate": 3.790060098325237e-05, "loss": 0.3744, "step": 551 }, { "epoch": 0.34624431550886, "grad_norm": 1.333404779434204, "learning_rate": 3.789152855831829e-05, "loss": 0.3881, "step": 552 }, { "epoch": 0.3468715697036224, "grad_norm": 1.2173197269439697, "learning_rate": 3.7882437663329434e-05, "loss": 0.3365, "step": 553 }, { "epoch": 0.3474988238983848, "grad_norm": 1.1697617769241333, "learning_rate": 3.787332830767064e-05, "loss": 0.3129, "step": 554 }, { "epoch": 0.3481260780931473, "grad_norm": 1.214918851852417, "learning_rate": 3.786420050074584e-05, "loss": 0.3574, "step": 555 }, { "epoch": 0.34875333228790967, "grad_norm": 1.1760424375534058, "learning_rate": 3.785505425197797e-05, "loss": 0.3307, "step": 556 }, { "epoch": 0.3493805864826721, "grad_norm": 1.1405760049819946, "learning_rate": 3.7845889570809025e-05, "loss": 0.359, "step": 557 }, { "epoch": 0.3500078406774345, "grad_norm": 1.1279023885726929, "learning_rate": 3.7836706466700036e-05, "loss": 0.3233, "step": 558 }, { "epoch": 0.35063509487219696, "grad_norm": 1.1536526679992676, "learning_rate": 3.782750494913104e-05, "loss": 0.3556, "step": 559 }, { "epoch": 0.3512623490669594, "grad_norm": 1.2649977207183838, "learning_rate": 3.7818285027601086e-05, "loss": 0.3147, "step": 560 }, { "epoch": 0.3518896032617218, "grad_norm": 1.190099835395813, "learning_rate": 3.780904671162823e-05, "loss": 0.2968, "step": 561 }, { "epoch": 0.35251685745648426, "grad_norm": 1.1292306184768677, "learning_rate": 3.779979001074951e-05, "loss": 0.3268, "step": 562 }, { "epoch": 0.35314411165124665, "grad_norm": 1.0524557828903198, "learning_rate": 3.7790514934520936e-05, "loss": 0.3412, "step": 563 }, { "epoch": 0.3537713658460091, "grad_norm": 1.3033021688461304, "learning_rate": 3.77812214925175e-05, "loss": 0.3541, "step": 564 }, { "epoch": 0.35439862004077155, "grad_norm": 1.1734102964401245, "learning_rate": 3.7771909694333156e-05, "loss": 0.3378, "step": 565 }, { "epoch": 0.35502587423553394, "grad_norm": 1.104713797569275, "learning_rate": 3.776257954958079e-05, "loss": 0.3196, "step": 566 }, { "epoch": 0.3556531284302964, "grad_norm": 1.1589550971984863, "learning_rate": 3.775323106789225e-05, "loss": 0.3302, "step": 567 }, { "epoch": 0.3562803826250588, "grad_norm": 1.1092143058776855, "learning_rate": 3.774386425891829e-05, "loss": 0.3228, "step": 568 }, { "epoch": 0.35690763681982124, "grad_norm": 1.1946264505386353, "learning_rate": 3.7734479132328615e-05, "loss": 0.3391, "step": 569 }, { "epoch": 0.35753489101458363, "grad_norm": 1.216552734375, "learning_rate": 3.77250756978118e-05, "loss": 0.3491, "step": 570 }, { "epoch": 0.3581621452093461, "grad_norm": 1.0963689088821411, "learning_rate": 3.771565396507535e-05, "loss": 0.3377, "step": 571 }, { "epoch": 0.35878939940410853, "grad_norm": 1.323681116104126, "learning_rate": 3.7706213943845666e-05, "loss": 0.3252, "step": 572 }, { "epoch": 0.3594166535988709, "grad_norm": 1.1954137086868286, "learning_rate": 3.7696755643867993e-05, "loss": 0.3651, "step": 573 }, { "epoch": 0.3600439077936334, "grad_norm": 1.2133947610855103, "learning_rate": 3.7687279074906474e-05, "loss": 0.3439, "step": 574 }, { "epoch": 0.36067116198839577, "grad_norm": 1.4137613773345947, "learning_rate": 3.76777842467441e-05, "loss": 0.3366, "step": 575 }, { "epoch": 0.3612984161831582, "grad_norm": 1.1659473180770874, "learning_rate": 3.7668271169182736e-05, "loss": 0.3335, "step": 576 }, { "epoch": 0.36192567037792067, "grad_norm": 1.2614892721176147, "learning_rate": 3.765873985204305e-05, "loss": 0.4005, "step": 577 }, { "epoch": 0.36255292457268307, "grad_norm": 1.0579726696014404, "learning_rate": 3.764919030516456e-05, "loss": 0.3114, "step": 578 }, { "epoch": 0.3631801787674455, "grad_norm": 1.2478663921356201, "learning_rate": 3.7639622538405595e-05, "loss": 0.3205, "step": 579 }, { "epoch": 0.3638074329622079, "grad_norm": 1.1137259006500244, "learning_rate": 3.7630036561643304e-05, "loss": 0.3056, "step": 580 }, { "epoch": 0.36443468715697036, "grad_norm": 1.081593632698059, "learning_rate": 3.762043238477363e-05, "loss": 0.327, "step": 581 }, { "epoch": 0.3650619413517328, "grad_norm": 1.2735402584075928, "learning_rate": 3.761081001771129e-05, "loss": 0.3226, "step": 582 }, { "epoch": 0.3656891955464952, "grad_norm": 1.0872917175292969, "learning_rate": 3.760116947038981e-05, "loss": 0.3428, "step": 583 }, { "epoch": 0.36631644974125765, "grad_norm": 1.2489984035491943, "learning_rate": 3.759151075276145e-05, "loss": 0.3488, "step": 584 }, { "epoch": 0.36694370393602005, "grad_norm": 1.4249989986419678, "learning_rate": 3.7581833874797254e-05, "loss": 0.3354, "step": 585 }, { "epoch": 0.3675709581307825, "grad_norm": 1.3429278135299683, "learning_rate": 3.7572138846487e-05, "loss": 0.3471, "step": 586 }, { "epoch": 0.36819821232554495, "grad_norm": 1.1182597875595093, "learning_rate": 3.756242567783921e-05, "loss": 0.3138, "step": 587 }, { "epoch": 0.36882546652030734, "grad_norm": 1.22892427444458, "learning_rate": 3.755269437888113e-05, "loss": 0.3459, "step": 588 }, { "epoch": 0.3694527207150698, "grad_norm": 1.56260347366333, "learning_rate": 3.754294495965872e-05, "loss": 0.348, "step": 589 }, { "epoch": 0.3700799749098322, "grad_norm": 1.3148378133773804, "learning_rate": 3.753317743023665e-05, "loss": 0.3643, "step": 590 }, { "epoch": 0.37070722910459464, "grad_norm": 1.1588764190673828, "learning_rate": 3.7523391800698276e-05, "loss": 0.3409, "step": 591 }, { "epoch": 0.3713344832993571, "grad_norm": 1.202447772026062, "learning_rate": 3.751358808114567e-05, "loss": 0.3276, "step": 592 }, { "epoch": 0.3719617374941195, "grad_norm": 1.1717889308929443, "learning_rate": 3.7503766281699535e-05, "loss": 0.3171, "step": 593 }, { "epoch": 0.37258899168888193, "grad_norm": 1.106449842453003, "learning_rate": 3.7493926412499267e-05, "loss": 0.311, "step": 594 }, { "epoch": 0.3732162458836443, "grad_norm": 1.0862950086593628, "learning_rate": 3.748406848370291e-05, "loss": 0.319, "step": 595 }, { "epoch": 0.3738435000784068, "grad_norm": 1.1050410270690918, "learning_rate": 3.747419250548715e-05, "loss": 0.3431, "step": 596 }, { "epoch": 0.3744707542731692, "grad_norm": 1.0442489385604858, "learning_rate": 3.7464298488047314e-05, "loss": 0.3031, "step": 597 }, { "epoch": 0.3750980084679316, "grad_norm": 1.0916913747787476, "learning_rate": 3.745438644159734e-05, "loss": 0.3401, "step": 598 }, { "epoch": 0.37572526266269407, "grad_norm": 1.247828483581543, "learning_rate": 3.744445637636978e-05, "loss": 0.3915, "step": 599 }, { "epoch": 0.37635251685745646, "grad_norm": 1.3874841928482056, "learning_rate": 3.743450830261578e-05, "loss": 0.3032, "step": 600 }, { "epoch": 0.3769797710522189, "grad_norm": 1.1481797695159912, "learning_rate": 3.7424542230605115e-05, "loss": 0.3731, "step": 601 }, { "epoch": 0.37760702524698136, "grad_norm": 1.1188069581985474, "learning_rate": 3.7414558170626084e-05, "loss": 0.3415, "step": 602 }, { "epoch": 0.37823427944174376, "grad_norm": 1.2879520654678345, "learning_rate": 3.74045561329856e-05, "loss": 0.3372, "step": 603 }, { "epoch": 0.3788615336365062, "grad_norm": 1.4175255298614502, "learning_rate": 3.739453612800911e-05, "loss": 0.34, "step": 604 }, { "epoch": 0.3794887878312686, "grad_norm": 1.1900228261947632, "learning_rate": 3.738449816604062e-05, "loss": 0.3712, "step": 605 }, { "epoch": 0.38011604202603105, "grad_norm": 1.2238755226135254, "learning_rate": 3.737444225744267e-05, "loss": 0.3031, "step": 606 }, { "epoch": 0.3807432962207935, "grad_norm": 1.266327977180481, "learning_rate": 3.736436841259634e-05, "loss": 0.352, "step": 607 }, { "epoch": 0.3813705504155559, "grad_norm": 1.0682594776153564, "learning_rate": 3.7354276641901195e-05, "loss": 0.3583, "step": 608 }, { "epoch": 0.38199780461031835, "grad_norm": 1.384995698928833, "learning_rate": 3.734416695577534e-05, "loss": 0.3207, "step": 609 }, { "epoch": 0.38262505880508074, "grad_norm": 1.1252288818359375, "learning_rate": 3.733403936465536e-05, "loss": 0.2987, "step": 610 }, { "epoch": 0.3832523129998432, "grad_norm": 1.1938388347625732, "learning_rate": 3.7323893878996316e-05, "loss": 0.3524, "step": 611 }, { "epoch": 0.3838795671946056, "grad_norm": 1.3328951597213745, "learning_rate": 3.731373050927175e-05, "loss": 0.3044, "step": 612 }, { "epoch": 0.38450682138936804, "grad_norm": 1.1329494714736938, "learning_rate": 3.730354926597368e-05, "loss": 0.323, "step": 613 }, { "epoch": 0.3851340755841305, "grad_norm": 1.1388083696365356, "learning_rate": 3.7293350159612545e-05, "loss": 0.3148, "step": 614 }, { "epoch": 0.3857613297788929, "grad_norm": 1.149870753288269, "learning_rate": 3.728313320071725e-05, "loss": 0.2885, "step": 615 }, { "epoch": 0.38638858397365533, "grad_norm": 1.1834301948547363, "learning_rate": 3.7272898399835114e-05, "loss": 0.3581, "step": 616 }, { "epoch": 0.3870158381684177, "grad_norm": 1.1635024547576904, "learning_rate": 3.726264576753189e-05, "loss": 0.3265, "step": 617 }, { "epoch": 0.3876430923631802, "grad_norm": 1.914668321609497, "learning_rate": 3.7252375314391736e-05, "loss": 0.3274, "step": 618 }, { "epoch": 0.3882703465579426, "grad_norm": 1.167209506034851, "learning_rate": 3.724208705101719e-05, "loss": 0.2844, "step": 619 }, { "epoch": 0.388897600752705, "grad_norm": 1.2498699426651, "learning_rate": 3.72317809880292e-05, "loss": 0.2896, "step": 620 }, { "epoch": 0.38952485494746747, "grad_norm": 1.3018776178359985, "learning_rate": 3.722145713606706e-05, "loss": 0.3243, "step": 621 }, { "epoch": 0.39015210914222986, "grad_norm": 1.1689425706863403, "learning_rate": 3.7211115505788467e-05, "loss": 0.3126, "step": 622 }, { "epoch": 0.3907793633369923, "grad_norm": 1.0691388845443726, "learning_rate": 3.720075610786943e-05, "loss": 0.2924, "step": 623 }, { "epoch": 0.39140661753175476, "grad_norm": 1.1361101865768433, "learning_rate": 3.719037895300434e-05, "loss": 0.3153, "step": 624 }, { "epoch": 0.39203387172651716, "grad_norm": 1.1004093885421753, "learning_rate": 3.717998405190589e-05, "loss": 0.3138, "step": 625 }, { "epoch": 0.3926611259212796, "grad_norm": 0.9791253209114075, "learning_rate": 3.7169571415305104e-05, "loss": 0.2875, "step": 626 }, { "epoch": 0.393288380116042, "grad_norm": 1.083272099494934, "learning_rate": 3.71591410539513e-05, "loss": 0.3178, "step": 627 }, { "epoch": 0.39391563431080445, "grad_norm": 1.0247498750686646, "learning_rate": 3.714869297861213e-05, "loss": 0.357, "step": 628 }, { "epoch": 0.3945428885055669, "grad_norm": 1.1475611925125122, "learning_rate": 3.7138227200073495e-05, "loss": 0.3464, "step": 629 }, { "epoch": 0.3951701427003293, "grad_norm": 1.127882480621338, "learning_rate": 3.712774372913959e-05, "loss": 0.3384, "step": 630 }, { "epoch": 0.39579739689509175, "grad_norm": 1.4782390594482422, "learning_rate": 3.711724257663287e-05, "loss": 0.3523, "step": 631 }, { "epoch": 0.39642465108985414, "grad_norm": 1.1467382907867432, "learning_rate": 3.710672375339405e-05, "loss": 0.3009, "step": 632 }, { "epoch": 0.3970519052846166, "grad_norm": 1.283112645149231, "learning_rate": 3.709618727028208e-05, "loss": 0.3304, "step": 633 }, { "epoch": 0.39767915947937904, "grad_norm": 1.074562668800354, "learning_rate": 3.708563313817413e-05, "loss": 0.3218, "step": 634 }, { "epoch": 0.39830641367414144, "grad_norm": 1.056430697441101, "learning_rate": 3.7075061367965614e-05, "loss": 0.3028, "step": 635 }, { "epoch": 0.3989336678689039, "grad_norm": 0.8145391345024109, "learning_rate": 3.7064471970570146e-05, "loss": 0.2704, "step": 636 }, { "epoch": 0.3995609220636663, "grad_norm": 1.1371313333511353, "learning_rate": 3.7053864956919523e-05, "loss": 0.329, "step": 637 }, { "epoch": 0.40018817625842873, "grad_norm": 1.0839051008224487, "learning_rate": 3.704324033796375e-05, "loss": 0.3107, "step": 638 }, { "epoch": 0.4008154304531912, "grad_norm": 1.1648067235946655, "learning_rate": 3.703259812467098e-05, "loss": 0.3164, "step": 639 }, { "epoch": 0.4014426846479536, "grad_norm": 1.1891517639160156, "learning_rate": 3.7021938328027566e-05, "loss": 0.3461, "step": 640 }, { "epoch": 0.402069938842716, "grad_norm": 1.3886860609054565, "learning_rate": 3.7011260959037964e-05, "loss": 0.3754, "step": 641 }, { "epoch": 0.4026971930374784, "grad_norm": 1.1208279132843018, "learning_rate": 3.7000566028724825e-05, "loss": 0.2906, "step": 642 }, { "epoch": 0.40332444723224087, "grad_norm": 1.2986044883728027, "learning_rate": 3.6989853548128886e-05, "loss": 0.2868, "step": 643 }, { "epoch": 0.4039517014270033, "grad_norm": 1.0989553928375244, "learning_rate": 3.697912352830902e-05, "loss": 0.3277, "step": 644 }, { "epoch": 0.4045789556217657, "grad_norm": 1.2516005039215088, "learning_rate": 3.6968375980342206e-05, "loss": 0.3303, "step": 645 }, { "epoch": 0.40520620981652816, "grad_norm": 1.2921992540359497, "learning_rate": 3.6957610915323516e-05, "loss": 0.3041, "step": 646 }, { "epoch": 0.40583346401129056, "grad_norm": 1.204262375831604, "learning_rate": 3.69468283443661e-05, "loss": 0.3059, "step": 647 }, { "epoch": 0.406460718206053, "grad_norm": 1.205397605895996, "learning_rate": 3.69360282786012e-05, "loss": 0.3184, "step": 648 }, { "epoch": 0.40708797240081546, "grad_norm": 1.1826282739639282, "learning_rate": 3.692521072917809e-05, "loss": 0.3203, "step": 649 }, { "epoch": 0.40771522659557785, "grad_norm": 1.1708602905273438, "learning_rate": 3.691437570726412e-05, "loss": 0.3165, "step": 650 }, { "epoch": 0.4083424807903403, "grad_norm": 1.2916769981384277, "learning_rate": 3.690352322404465e-05, "loss": 0.3326, "step": 651 }, { "epoch": 0.4089697349851027, "grad_norm": 1.1950420141220093, "learning_rate": 3.6892653290723085e-05, "loss": 0.305, "step": 652 }, { "epoch": 0.40959698917986515, "grad_norm": 1.3934024572372437, "learning_rate": 3.688176591852084e-05, "loss": 0.3076, "step": 653 }, { "epoch": 0.4102242433746276, "grad_norm": 1.1073112487792969, "learning_rate": 3.6870861118677345e-05, "loss": 0.3165, "step": 654 }, { "epoch": 0.41085149756939, "grad_norm": 1.1293233633041382, "learning_rate": 3.6859938902449996e-05, "loss": 0.3136, "step": 655 }, { "epoch": 0.41147875176415244, "grad_norm": 1.1260145902633667, "learning_rate": 3.684899928111418e-05, "loss": 0.352, "step": 656 }, { "epoch": 0.41210600595891483, "grad_norm": 1.3531520366668701, "learning_rate": 3.683804226596326e-05, "loss": 0.3678, "step": 657 }, { "epoch": 0.4127332601536773, "grad_norm": 1.3921016454696655, "learning_rate": 3.682706786830854e-05, "loss": 0.3163, "step": 658 }, { "epoch": 0.4133605143484397, "grad_norm": 1.1174161434173584, "learning_rate": 3.6816076099479286e-05, "loss": 0.3197, "step": 659 }, { "epoch": 0.41398776854320213, "grad_norm": 1.5480095148086548, "learning_rate": 3.680506697082269e-05, "loss": 0.3097, "step": 660 }, { "epoch": 0.4146150227379646, "grad_norm": 1.157529354095459, "learning_rate": 3.6794040493703856e-05, "loss": 0.3148, "step": 661 }, { "epoch": 0.415242276932727, "grad_norm": 1.011178970336914, "learning_rate": 3.6782996679505794e-05, "loss": 0.3122, "step": 662 }, { "epoch": 0.4158695311274894, "grad_norm": 1.20054292678833, "learning_rate": 3.677193553962944e-05, "loss": 0.3488, "step": 663 }, { "epoch": 0.4164967853222518, "grad_norm": 1.2420406341552734, "learning_rate": 3.6760857085493595e-05, "loss": 0.3387, "step": 664 }, { "epoch": 0.41712403951701427, "grad_norm": 1.2079081535339355, "learning_rate": 3.674976132853494e-05, "loss": 0.3474, "step": 665 }, { "epoch": 0.4177512937117767, "grad_norm": 1.1342418193817139, "learning_rate": 3.6738648280208e-05, "loss": 0.3176, "step": 666 }, { "epoch": 0.4183785479065391, "grad_norm": 1.09598970413208, "learning_rate": 3.6727517951985185e-05, "loss": 0.3147, "step": 667 }, { "epoch": 0.41900580210130156, "grad_norm": 1.0939102172851562, "learning_rate": 3.671637035535671e-05, "loss": 0.2975, "step": 668 }, { "epoch": 0.41963305629606396, "grad_norm": 1.0264432430267334, "learning_rate": 3.6705205501830635e-05, "loss": 0.3037, "step": 669 }, { "epoch": 0.4202603104908264, "grad_norm": 1.2257050275802612, "learning_rate": 3.6694023402932835e-05, "loss": 0.2768, "step": 670 }, { "epoch": 0.42088756468558886, "grad_norm": 1.299730896949768, "learning_rate": 3.668282407020699e-05, "loss": 0.3634, "step": 671 }, { "epoch": 0.42151481888035125, "grad_norm": 1.1695563793182373, "learning_rate": 3.667160751521455e-05, "loss": 0.3129, "step": 672 }, { "epoch": 0.4221420730751137, "grad_norm": 1.1521062850952148, "learning_rate": 3.666037374953477e-05, "loss": 0.3308, "step": 673 }, { "epoch": 0.4227693272698761, "grad_norm": 1.1733102798461914, "learning_rate": 3.664912278476466e-05, "loss": 0.2691, "step": 674 }, { "epoch": 0.42339658146463854, "grad_norm": 1.1485964059829712, "learning_rate": 3.6637854632518985e-05, "loss": 0.2979, "step": 675 }, { "epoch": 0.424023835659401, "grad_norm": 1.2730668783187866, "learning_rate": 3.662656930443026e-05, "loss": 0.3181, "step": 676 }, { "epoch": 0.4246510898541634, "grad_norm": 1.181434988975525, "learning_rate": 3.6615266812148724e-05, "loss": 0.3085, "step": 677 }, { "epoch": 0.42527834404892584, "grad_norm": 1.1413264274597168, "learning_rate": 3.6603947167342335e-05, "loss": 0.3222, "step": 678 }, { "epoch": 0.42590559824368823, "grad_norm": 1.2547311782836914, "learning_rate": 3.659261038169676e-05, "loss": 0.3595, "step": 679 }, { "epoch": 0.4265328524384507, "grad_norm": 1.1639899015426636, "learning_rate": 3.658125646691537e-05, "loss": 0.3257, "step": 680 }, { "epoch": 0.42716010663321313, "grad_norm": 1.4667763710021973, "learning_rate": 3.6569885434719205e-05, "loss": 0.3502, "step": 681 }, { "epoch": 0.4277873608279755, "grad_norm": 1.1822808980941772, "learning_rate": 3.6558497296846976e-05, "loss": 0.3213, "step": 682 }, { "epoch": 0.428414615022738, "grad_norm": 1.0532478094100952, "learning_rate": 3.654709206505507e-05, "loss": 0.3176, "step": 683 }, { "epoch": 0.42904186921750037, "grad_norm": 1.0396804809570312, "learning_rate": 3.65356697511175e-05, "loss": 0.2881, "step": 684 }, { "epoch": 0.4296691234122628, "grad_norm": 1.1286225318908691, "learning_rate": 3.6524230366825935e-05, "loss": 0.3311, "step": 685 }, { "epoch": 0.43029637760702527, "grad_norm": 1.092599630355835, "learning_rate": 3.6512773923989645e-05, "loss": 0.2964, "step": 686 }, { "epoch": 0.43092363180178767, "grad_norm": 1.198910117149353, "learning_rate": 3.650130043443552e-05, "loss": 0.3192, "step": 687 }, { "epoch": 0.4315508859965501, "grad_norm": 1.1863718032836914, "learning_rate": 3.648980991000804e-05, "loss": 0.2836, "step": 688 }, { "epoch": 0.4321781401913125, "grad_norm": 1.3961267471313477, "learning_rate": 3.6478302362569293e-05, "loss": 0.3608, "step": 689 }, { "epoch": 0.43280539438607496, "grad_norm": 1.1938984394073486, "learning_rate": 3.6466777803998915e-05, "loss": 0.3076, "step": 690 }, { "epoch": 0.4334326485808374, "grad_norm": 1.128846287727356, "learning_rate": 3.645523624619412e-05, "loss": 0.3223, "step": 691 }, { "epoch": 0.4340599027755998, "grad_norm": 1.1837422847747803, "learning_rate": 3.644367770106966e-05, "loss": 0.3188, "step": 692 }, { "epoch": 0.43468715697036225, "grad_norm": 1.122409462928772, "learning_rate": 3.643210218055783e-05, "loss": 0.2997, "step": 693 }, { "epoch": 0.43531441116512465, "grad_norm": 1.1411001682281494, "learning_rate": 3.642050969660844e-05, "loss": 0.337, "step": 694 }, { "epoch": 0.4359416653598871, "grad_norm": 1.231703758239746, "learning_rate": 3.640890026118883e-05, "loss": 0.3357, "step": 695 }, { "epoch": 0.43656891955464955, "grad_norm": 0.9981296062469482, "learning_rate": 3.639727388628383e-05, "loss": 0.2708, "step": 696 }, { "epoch": 0.43719617374941194, "grad_norm": 1.0697611570358276, "learning_rate": 3.638563058389574e-05, "loss": 0.3111, "step": 697 }, { "epoch": 0.4378234279441744, "grad_norm": 1.3875848054885864, "learning_rate": 3.6373970366044366e-05, "loss": 0.3417, "step": 698 }, { "epoch": 0.4384506821389368, "grad_norm": 1.3041980266571045, "learning_rate": 3.636229324476696e-05, "loss": 0.3391, "step": 699 }, { "epoch": 0.43907793633369924, "grad_norm": 1.058411955833435, "learning_rate": 3.635059923211822e-05, "loss": 0.3055, "step": 700 }, { "epoch": 0.4397051905284617, "grad_norm": 1.2041007280349731, "learning_rate": 3.633888834017029e-05, "loss": 0.3058, "step": 701 }, { "epoch": 0.4403324447232241, "grad_norm": 1.2628463506698608, "learning_rate": 3.632716058101273e-05, "loss": 0.3144, "step": 702 }, { "epoch": 0.44095969891798653, "grad_norm": 1.3504395484924316, "learning_rate": 3.631541596675253e-05, "loss": 0.3256, "step": 703 }, { "epoch": 0.4415869531127489, "grad_norm": 1.0061591863632202, "learning_rate": 3.630365450951406e-05, "loss": 0.2783, "step": 704 }, { "epoch": 0.4422142073075114, "grad_norm": 1.075989007949829, "learning_rate": 3.629187622143909e-05, "loss": 0.2889, "step": 705 }, { "epoch": 0.44284146150227377, "grad_norm": 1.089695930480957, "learning_rate": 3.6280081114686757e-05, "loss": 0.3028, "step": 706 }, { "epoch": 0.4434687156970362, "grad_norm": 1.1697717905044556, "learning_rate": 3.626826920143357e-05, "loss": 0.3198, "step": 707 }, { "epoch": 0.44409596989179867, "grad_norm": 1.14765465259552, "learning_rate": 3.6256440493873385e-05, "loss": 0.2959, "step": 708 }, { "epoch": 0.44472322408656106, "grad_norm": 1.1275452375411987, "learning_rate": 3.6244595004217385e-05, "loss": 0.3092, "step": 709 }, { "epoch": 0.4453504782813235, "grad_norm": 0.986223578453064, "learning_rate": 3.623273274469409e-05, "loss": 0.2817, "step": 710 }, { "epoch": 0.4459777324760859, "grad_norm": 1.11913001537323, "learning_rate": 3.622085372754933e-05, "loss": 0.2931, "step": 711 }, { "epoch": 0.44660498667084836, "grad_norm": 1.1649326086044312, "learning_rate": 3.6208957965046234e-05, "loss": 0.2926, "step": 712 }, { "epoch": 0.4472322408656108, "grad_norm": 1.0906825065612793, "learning_rate": 3.619704546946521e-05, "loss": 0.2339, "step": 713 }, { "epoch": 0.4478594950603732, "grad_norm": 1.333734154701233, "learning_rate": 3.618511625310395e-05, "loss": 0.3512, "step": 714 }, { "epoch": 0.44848674925513565, "grad_norm": 1.1435856819152832, "learning_rate": 3.617317032827741e-05, "loss": 0.2765, "step": 715 }, { "epoch": 0.44911400344989805, "grad_norm": 1.174895167350769, "learning_rate": 3.616120770731778e-05, "loss": 0.3099, "step": 716 }, { "epoch": 0.4497412576446605, "grad_norm": 1.352873682975769, "learning_rate": 3.6149228402574485e-05, "loss": 0.3625, "step": 717 }, { "epoch": 0.45036851183942295, "grad_norm": 1.1254205703735352, "learning_rate": 3.613723242641421e-05, "loss": 0.3131, "step": 718 }, { "epoch": 0.45099576603418534, "grad_norm": 1.3090929985046387, "learning_rate": 3.6125219791220804e-05, "loss": 0.3151, "step": 719 }, { "epoch": 0.4516230202289478, "grad_norm": 1.2363439798355103, "learning_rate": 3.611319050939534e-05, "loss": 0.293, "step": 720 }, { "epoch": 0.4522502744237102, "grad_norm": 1.2641264200210571, "learning_rate": 3.6101144593356066e-05, "loss": 0.3381, "step": 721 }, { "epoch": 0.45287752861847264, "grad_norm": 1.0665712356567383, "learning_rate": 3.608908205553841e-05, "loss": 0.3114, "step": 722 }, { "epoch": 0.4535047828132351, "grad_norm": 1.1418439149856567, "learning_rate": 3.607700290839495e-05, "loss": 0.2869, "step": 723 }, { "epoch": 0.4541320370079975, "grad_norm": 1.339607834815979, "learning_rate": 3.606490716439541e-05, "loss": 0.313, "step": 724 }, { "epoch": 0.45475929120275993, "grad_norm": 1.1564210653305054, "learning_rate": 3.605279483602666e-05, "loss": 0.3399, "step": 725 }, { "epoch": 0.4553865453975223, "grad_norm": 1.1793553829193115, "learning_rate": 3.604066593579269e-05, "loss": 0.2752, "step": 726 }, { "epoch": 0.4560137995922848, "grad_norm": 1.0112354755401611, "learning_rate": 3.602852047621457e-05, "loss": 0.3143, "step": 727 }, { "epoch": 0.4566410537870472, "grad_norm": 1.1206257343292236, "learning_rate": 3.60163584698305e-05, "loss": 0.3092, "step": 728 }, { "epoch": 0.4572683079818096, "grad_norm": 1.2401798963546753, "learning_rate": 3.600417992919575e-05, "loss": 0.3128, "step": 729 }, { "epoch": 0.45789556217657207, "grad_norm": 1.2118809223175049, "learning_rate": 3.599198486688264e-05, "loss": 0.3121, "step": 730 }, { "epoch": 0.45852281637133446, "grad_norm": 1.0759859085083008, "learning_rate": 3.597977329548059e-05, "loss": 0.3004, "step": 731 }, { "epoch": 0.4591500705660969, "grad_norm": 1.1380683183670044, "learning_rate": 3.596754522759601e-05, "loss": 0.3007, "step": 732 }, { "epoch": 0.45977732476085936, "grad_norm": 1.0690897703170776, "learning_rate": 3.595530067585237e-05, "loss": 0.2819, "step": 733 }, { "epoch": 0.46040457895562176, "grad_norm": 1.059333086013794, "learning_rate": 3.5943039652890154e-05, "loss": 0.3236, "step": 734 }, { "epoch": 0.4610318331503842, "grad_norm": 1.1993666887283325, "learning_rate": 3.5930762171366855e-05, "loss": 0.2923, "step": 735 }, { "epoch": 0.4616590873451466, "grad_norm": 1.2433502674102783, "learning_rate": 3.591846824395694e-05, "loss": 0.32, "step": 736 }, { "epoch": 0.46228634153990905, "grad_norm": 1.0564230680465698, "learning_rate": 3.5906157883351864e-05, "loss": 0.3052, "step": 737 }, { "epoch": 0.4629135957346715, "grad_norm": 1.0913139581680298, "learning_rate": 3.589383110226004e-05, "loss": 0.3229, "step": 738 }, { "epoch": 0.4635408499294339, "grad_norm": 1.049662709236145, "learning_rate": 3.588148791340686e-05, "loss": 0.2653, "step": 739 }, { "epoch": 0.46416810412419635, "grad_norm": 1.0421611070632935, "learning_rate": 3.586912832953461e-05, "loss": 0.3305, "step": 740 }, { "epoch": 0.46479535831895874, "grad_norm": 1.1456973552703857, "learning_rate": 3.585675236340254e-05, "loss": 0.2904, "step": 741 }, { "epoch": 0.4654226125137212, "grad_norm": 1.1543675661087036, "learning_rate": 3.5844360027786776e-05, "loss": 0.299, "step": 742 }, { "epoch": 0.46604986670848364, "grad_norm": 1.1671220064163208, "learning_rate": 3.583195133548038e-05, "loss": 0.3057, "step": 743 }, { "epoch": 0.46667712090324603, "grad_norm": 1.256575107574463, "learning_rate": 3.581952629929327e-05, "loss": 0.3138, "step": 744 }, { "epoch": 0.4673043750980085, "grad_norm": 1.1006120443344116, "learning_rate": 3.580708493205226e-05, "loss": 0.3048, "step": 745 }, { "epoch": 0.4679316292927709, "grad_norm": 1.2163777351379395, "learning_rate": 3.5794627246600996e-05, "loss": 0.3516, "step": 746 }, { "epoch": 0.46855888348753333, "grad_norm": 1.1042163372039795, "learning_rate": 3.57821532558e-05, "loss": 0.2687, "step": 747 }, { "epoch": 0.4691861376822957, "grad_norm": 1.1301376819610596, "learning_rate": 3.5769662972526604e-05, "loss": 0.3447, "step": 748 }, { "epoch": 0.4698133918770582, "grad_norm": 1.1545332670211792, "learning_rate": 3.5757156409674976e-05, "loss": 0.3084, "step": 749 }, { "epoch": 0.4704406460718206, "grad_norm": 1.0360727310180664, "learning_rate": 3.574463358015607e-05, "loss": 0.2986, "step": 750 }, { "epoch": 0.471067900266583, "grad_norm": 1.0242793560028076, "learning_rate": 3.573209449689765e-05, "loss": 0.3232, "step": 751 }, { "epoch": 0.47169515446134547, "grad_norm": 1.1782797574996948, "learning_rate": 3.5719539172844245e-05, "loss": 0.3084, "step": 752 }, { "epoch": 0.47232240865610786, "grad_norm": 1.0539721250534058, "learning_rate": 3.570696762095716e-05, "loss": 0.3313, "step": 753 }, { "epoch": 0.4729496628508703, "grad_norm": 1.0124305486679077, "learning_rate": 3.569437985421447e-05, "loss": 0.2994, "step": 754 }, { "epoch": 0.47357691704563276, "grad_norm": 1.290825366973877, "learning_rate": 3.568177588561095e-05, "loss": 0.3425, "step": 755 }, { "epoch": 0.47420417124039516, "grad_norm": 1.0773361921310425, "learning_rate": 3.566915572815812e-05, "loss": 0.2549, "step": 756 }, { "epoch": 0.4748314254351576, "grad_norm": 1.2297642230987549, "learning_rate": 3.565651939488422e-05, "loss": 0.2747, "step": 757 }, { "epoch": 0.47545867962992, "grad_norm": 1.120935082435608, "learning_rate": 3.5643866898834175e-05, "loss": 0.296, "step": 758 }, { "epoch": 0.47608593382468245, "grad_norm": 1.1990840435028076, "learning_rate": 3.563119825306961e-05, "loss": 0.2879, "step": 759 }, { "epoch": 0.4767131880194449, "grad_norm": 1.2168325185775757, "learning_rate": 3.561851347066881e-05, "loss": 0.3373, "step": 760 }, { "epoch": 0.4773404422142073, "grad_norm": 1.1412773132324219, "learning_rate": 3.560581256472672e-05, "loss": 0.3045, "step": 761 }, { "epoch": 0.47796769640896974, "grad_norm": 1.0286948680877686, "learning_rate": 3.559309554835494e-05, "loss": 0.3177, "step": 762 }, { "epoch": 0.47859495060373214, "grad_norm": 1.2216492891311646, "learning_rate": 3.558036243468169e-05, "loss": 0.2898, "step": 763 }, { "epoch": 0.4792222047984946, "grad_norm": 1.2539747953414917, "learning_rate": 3.556761323685181e-05, "loss": 0.3046, "step": 764 }, { "epoch": 0.47984945899325704, "grad_norm": 1.0702846050262451, "learning_rate": 3.555484796802674e-05, "loss": 0.3107, "step": 765 }, { "epoch": 0.48047671318801943, "grad_norm": 0.9739949107170105, "learning_rate": 3.554206664138453e-05, "loss": 0.3133, "step": 766 }, { "epoch": 0.4811039673827819, "grad_norm": 1.2004176378250122, "learning_rate": 3.5529269270119777e-05, "loss": 0.3141, "step": 767 }, { "epoch": 0.4817312215775443, "grad_norm": 0.9493788480758667, "learning_rate": 3.551645586744368e-05, "loss": 0.2525, "step": 768 }, { "epoch": 0.4823584757723067, "grad_norm": 1.0642143487930298, "learning_rate": 3.550362644658394e-05, "loss": 0.3068, "step": 769 }, { "epoch": 0.4829857299670692, "grad_norm": 1.0598788261413574, "learning_rate": 3.549078102078484e-05, "loss": 0.3209, "step": 770 }, { "epoch": 0.48361298416183157, "grad_norm": 1.1478248834609985, "learning_rate": 3.547791960330716e-05, "loss": 0.3304, "step": 771 }, { "epoch": 0.484240238356594, "grad_norm": 1.1824041604995728, "learning_rate": 3.546504220742818e-05, "loss": 0.2554, "step": 772 }, { "epoch": 0.4848674925513564, "grad_norm": 0.9910576939582825, "learning_rate": 3.5452148846441715e-05, "loss": 0.2553, "step": 773 }, { "epoch": 0.48549474674611887, "grad_norm": 1.1317683458328247, "learning_rate": 3.543923953365801e-05, "loss": 0.2876, "step": 774 }, { "epoch": 0.4861220009408813, "grad_norm": 1.0811703205108643, "learning_rate": 3.5426314282403815e-05, "loss": 0.2653, "step": 775 }, { "epoch": 0.4867492551356437, "grad_norm": 1.1303654909133911, "learning_rate": 3.541337310602233e-05, "loss": 0.2883, "step": 776 }, { "epoch": 0.48737650933040616, "grad_norm": 1.4702017307281494, "learning_rate": 3.5400416017873165e-05, "loss": 0.3021, "step": 777 }, { "epoch": 0.48800376352516855, "grad_norm": 0.9657360911369324, "learning_rate": 3.53874430313324e-05, "loss": 0.2672, "step": 778 }, { "epoch": 0.488631017719931, "grad_norm": 0.9891339540481567, "learning_rate": 3.5374454159792493e-05, "loss": 0.2653, "step": 779 }, { "epoch": 0.48925827191469345, "grad_norm": 0.9610922932624817, "learning_rate": 3.536144941666233e-05, "loss": 0.2724, "step": 780 }, { "epoch": 0.48988552610945585, "grad_norm": 0.9398078918457031, "learning_rate": 3.5348428815367134e-05, "loss": 0.2575, "step": 781 }, { "epoch": 0.4905127803042183, "grad_norm": 0.9750205278396606, "learning_rate": 3.533539236934856e-05, "loss": 0.2673, "step": 782 }, { "epoch": 0.4911400344989807, "grad_norm": 1.1842604875564575, "learning_rate": 3.5322340092064575e-05, "loss": 0.2975, "step": 783 }, { "epoch": 0.49176728869374314, "grad_norm": 1.1964186429977417, "learning_rate": 3.530927199698951e-05, "loss": 0.2884, "step": 784 }, { "epoch": 0.4923945428885056, "grad_norm": 1.0833983421325684, "learning_rate": 3.529618809761402e-05, "loss": 0.2802, "step": 785 }, { "epoch": 0.493021797083268, "grad_norm": 1.0984426736831665, "learning_rate": 3.528308840744506e-05, "loss": 0.2913, "step": 786 }, { "epoch": 0.49364905127803044, "grad_norm": 1.1128063201904297, "learning_rate": 3.526997294000593e-05, "loss": 0.311, "step": 787 }, { "epoch": 0.49427630547279283, "grad_norm": 1.005001187324524, "learning_rate": 3.5256841708836164e-05, "loss": 0.276, "step": 788 }, { "epoch": 0.4949035596675553, "grad_norm": 1.0180959701538086, "learning_rate": 3.524369472749161e-05, "loss": 0.3069, "step": 789 }, { "epoch": 0.49553081386231773, "grad_norm": 0.9974920153617859, "learning_rate": 3.523053200954435e-05, "loss": 0.2875, "step": 790 }, { "epoch": 0.4961580680570801, "grad_norm": 1.0186494588851929, "learning_rate": 3.5217353568582715e-05, "loss": 0.2798, "step": 791 }, { "epoch": 0.4967853222518426, "grad_norm": 1.0674657821655273, "learning_rate": 3.520415941821129e-05, "loss": 0.3156, "step": 792 }, { "epoch": 0.49741257644660497, "grad_norm": 1.0009441375732422, "learning_rate": 3.519094957205085e-05, "loss": 0.2827, "step": 793 }, { "epoch": 0.4980398306413674, "grad_norm": 0.9863251447677612, "learning_rate": 3.517772404373839e-05, "loss": 0.2943, "step": 794 }, { "epoch": 0.4986670848361298, "grad_norm": 1.0082939863204956, "learning_rate": 3.5164482846927084e-05, "loss": 0.2979, "step": 795 }, { "epoch": 0.49929433903089226, "grad_norm": 1.0588964223861694, "learning_rate": 3.515122599528629e-05, "loss": 0.2701, "step": 796 }, { "epoch": 0.4999215932256547, "grad_norm": 1.0246416330337524, "learning_rate": 3.5137953502501516e-05, "loss": 0.3135, "step": 797 }, { "epoch": 0.5005488474204172, "grad_norm": 0.9969562888145447, "learning_rate": 3.512466538227443e-05, "loss": 0.315, "step": 798 }, { "epoch": 0.5011761016151796, "grad_norm": 0.9564127326011658, "learning_rate": 3.511136164832282e-05, "loss": 0.2659, "step": 799 }, { "epoch": 0.501803355809942, "grad_norm": 0.97264564037323, "learning_rate": 3.509804231438061e-05, "loss": 0.2534, "step": 800 }, { "epoch": 0.5024306100047045, "grad_norm": 1.067979335784912, "learning_rate": 3.508470739419781e-05, "loss": 0.3077, "step": 801 }, { "epoch": 0.5030578641994669, "grad_norm": 1.0767468214035034, "learning_rate": 3.5071356901540524e-05, "loss": 0.2903, "step": 802 }, { "epoch": 0.5036851183942292, "grad_norm": 1.055936336517334, "learning_rate": 3.505799085019094e-05, "loss": 0.2412, "step": 803 }, { "epoch": 0.5043123725889916, "grad_norm": 1.1777431964874268, "learning_rate": 3.50446092539473e-05, "loss": 0.3098, "step": 804 }, { "epoch": 0.5049396267837541, "grad_norm": 1.3164564371109009, "learning_rate": 3.50312121266239e-05, "loss": 0.3483, "step": 805 }, { "epoch": 0.5055668809785165, "grad_norm": 1.227178692817688, "learning_rate": 3.5017799482051064e-05, "loss": 0.274, "step": 806 }, { "epoch": 0.5061941351732789, "grad_norm": 1.074154019355774, "learning_rate": 3.500437133407514e-05, "loss": 0.2481, "step": 807 }, { "epoch": 0.5068213893680414, "grad_norm": 0.9530861973762512, "learning_rate": 3.499092769655849e-05, "loss": 0.2857, "step": 808 }, { "epoch": 0.5074486435628038, "grad_norm": 1.2343348264694214, "learning_rate": 3.497746858337943e-05, "loss": 0.2962, "step": 809 }, { "epoch": 0.5080758977575662, "grad_norm": 1.5502104759216309, "learning_rate": 3.4963994008432284e-05, "loss": 0.3189, "step": 810 }, { "epoch": 0.5087031519523287, "grad_norm": 1.1012052297592163, "learning_rate": 3.4950503985627336e-05, "loss": 0.3039, "step": 811 }, { "epoch": 0.5093304061470911, "grad_norm": 0.9534533023834229, "learning_rate": 3.493699852889081e-05, "loss": 0.2334, "step": 812 }, { "epoch": 0.5099576603418535, "grad_norm": 0.9859058260917664, "learning_rate": 3.4923477652164855e-05, "loss": 0.2561, "step": 813 }, { "epoch": 0.5105849145366159, "grad_norm": 1.1005891561508179, "learning_rate": 3.490994136940757e-05, "loss": 0.2924, "step": 814 }, { "epoch": 0.5112121687313784, "grad_norm": 1.0567169189453125, "learning_rate": 3.489638969459292e-05, "loss": 0.3075, "step": 815 }, { "epoch": 0.5118394229261408, "grad_norm": 0.9554606676101685, "learning_rate": 3.488282264171077e-05, "loss": 0.2678, "step": 816 }, { "epoch": 0.5124666771209032, "grad_norm": 1.0339974164962769, "learning_rate": 3.486924022476688e-05, "loss": 0.2809, "step": 817 }, { "epoch": 0.5130939313156657, "grad_norm": 1.0772579908370972, "learning_rate": 3.485564245778285e-05, "loss": 0.2799, "step": 818 }, { "epoch": 0.5137211855104281, "grad_norm": 0.9478000402450562, "learning_rate": 3.484202935479614e-05, "loss": 0.2386, "step": 819 }, { "epoch": 0.5143484397051905, "grad_norm": 1.0336586236953735, "learning_rate": 3.4828400929860023e-05, "loss": 0.3335, "step": 820 }, { "epoch": 0.5149756938999529, "grad_norm": 1.0443904399871826, "learning_rate": 3.481475719704362e-05, "loss": 0.304, "step": 821 }, { "epoch": 0.5156029480947154, "grad_norm": 1.0557197332382202, "learning_rate": 3.480109817043182e-05, "loss": 0.2804, "step": 822 }, { "epoch": 0.5162302022894778, "grad_norm": 1.3195269107818604, "learning_rate": 3.478742386412533e-05, "loss": 0.3085, "step": 823 }, { "epoch": 0.5168574564842402, "grad_norm": 1.0132356882095337, "learning_rate": 3.47737342922406e-05, "loss": 0.2515, "step": 824 }, { "epoch": 0.5174847106790027, "grad_norm": 0.9900258183479309, "learning_rate": 3.4760029468909876e-05, "loss": 0.2801, "step": 825 }, { "epoch": 0.5181119648737651, "grad_norm": 1.2215389013290405, "learning_rate": 3.4746309408281123e-05, "loss": 0.295, "step": 826 }, { "epoch": 0.5187392190685275, "grad_norm": 1.0417739152908325, "learning_rate": 3.473257412451803e-05, "loss": 0.2856, "step": 827 }, { "epoch": 0.51936647326329, "grad_norm": 1.002961277961731, "learning_rate": 3.471882363180004e-05, "loss": 0.2985, "step": 828 }, { "epoch": 0.5199937274580524, "grad_norm": 1.025743007659912, "learning_rate": 3.4705057944322254e-05, "loss": 0.3011, "step": 829 }, { "epoch": 0.5206209816528148, "grad_norm": 0.9813662767410278, "learning_rate": 3.4691277076295464e-05, "loss": 0.2533, "step": 830 }, { "epoch": 0.5212482358475772, "grad_norm": 0.9720662236213684, "learning_rate": 3.467748104194617e-05, "loss": 0.2474, "step": 831 }, { "epoch": 0.5218754900423397, "grad_norm": 0.9965443015098572, "learning_rate": 3.46636698555165e-05, "loss": 0.3097, "step": 832 }, { "epoch": 0.5225027442371021, "grad_norm": 0.9662636518478394, "learning_rate": 3.4649843531264214e-05, "loss": 0.297, "step": 833 }, { "epoch": 0.5231299984318645, "grad_norm": 1.0954393148422241, "learning_rate": 3.4636002083462714e-05, "loss": 0.272, "step": 834 }, { "epoch": 0.523757252626627, "grad_norm": 1.066875696182251, "learning_rate": 3.462214552640103e-05, "loss": 0.3458, "step": 835 }, { "epoch": 0.5243845068213894, "grad_norm": 1.1262314319610596, "learning_rate": 3.460827387438376e-05, "loss": 0.2879, "step": 836 }, { "epoch": 0.5250117610161518, "grad_norm": 1.11569344997406, "learning_rate": 3.4594387141731095e-05, "loss": 0.2554, "step": 837 }, { "epoch": 0.5256390152109143, "grad_norm": 1.0312837362289429, "learning_rate": 3.4580485342778815e-05, "loss": 0.2899, "step": 838 }, { "epoch": 0.5262662694056767, "grad_norm": 1.1218152046203613, "learning_rate": 3.456656849187822e-05, "loss": 0.2901, "step": 839 }, { "epoch": 0.5268935236004391, "grad_norm": 1.0698727369308472, "learning_rate": 3.455263660339618e-05, "loss": 0.3009, "step": 840 }, { "epoch": 0.5275207777952015, "grad_norm": 1.0733284950256348, "learning_rate": 3.453868969171507e-05, "loss": 0.2664, "step": 841 }, { "epoch": 0.528148031989964, "grad_norm": 1.1404551267623901, "learning_rate": 3.452472777123277e-05, "loss": 0.2939, "step": 842 }, { "epoch": 0.5287752861847264, "grad_norm": 1.1484930515289307, "learning_rate": 3.451075085636265e-05, "loss": 0.294, "step": 843 }, { "epoch": 0.5294025403794888, "grad_norm": 1.1404722929000854, "learning_rate": 3.44967589615336e-05, "loss": 0.2949, "step": 844 }, { "epoch": 0.5300297945742513, "grad_norm": 1.1326576471328735, "learning_rate": 3.4482752101189934e-05, "loss": 0.2869, "step": 845 }, { "epoch": 0.5306570487690137, "grad_norm": 1.152813196182251, "learning_rate": 3.4468730289791424e-05, "loss": 0.2791, "step": 846 }, { "epoch": 0.531284302963776, "grad_norm": 1.0741047859191895, "learning_rate": 3.445469354181328e-05, "loss": 0.2799, "step": 847 }, { "epoch": 0.5319115571585386, "grad_norm": 1.0504900217056274, "learning_rate": 3.444064187174613e-05, "loss": 0.3542, "step": 848 }, { "epoch": 0.532538811353301, "grad_norm": 0.9990710020065308, "learning_rate": 3.442657529409601e-05, "loss": 0.2624, "step": 849 }, { "epoch": 0.5331660655480633, "grad_norm": 1.1457931995391846, "learning_rate": 3.4412493823384354e-05, "loss": 0.2787, "step": 850 }, { "epoch": 0.5337933197428257, "grad_norm": 1.0615814924240112, "learning_rate": 3.439839747414795e-05, "loss": 0.2402, "step": 851 }, { "epoch": 0.5344205739375882, "grad_norm": 1.1380528211593628, "learning_rate": 3.438428626093893e-05, "loss": 0.2785, "step": 852 }, { "epoch": 0.5350478281323506, "grad_norm": 1.1444776058197021, "learning_rate": 3.437016019832485e-05, "loss": 0.3303, "step": 853 }, { "epoch": 0.535675082327113, "grad_norm": 1.1833291053771973, "learning_rate": 3.43560193008885e-05, "loss": 0.3053, "step": 854 }, { "epoch": 0.5363023365218755, "grad_norm": 1.057030200958252, "learning_rate": 3.4341863583228035e-05, "loss": 0.2852, "step": 855 }, { "epoch": 0.5369295907166379, "grad_norm": 1.0322626829147339, "learning_rate": 3.432769305995691e-05, "loss": 0.2795, "step": 856 }, { "epoch": 0.5375568449114003, "grad_norm": 1.1600500345230103, "learning_rate": 3.4313507745703855e-05, "loss": 0.3004, "step": 857 }, { "epoch": 0.5381840991061628, "grad_norm": 1.047410488128662, "learning_rate": 3.429930765511286e-05, "loss": 0.2798, "step": 858 }, { "epoch": 0.5388113533009252, "grad_norm": 1.1107338666915894, "learning_rate": 3.428509280284318e-05, "loss": 0.2828, "step": 859 }, { "epoch": 0.5394386074956876, "grad_norm": 1.0454515218734741, "learning_rate": 3.427086320356931e-05, "loss": 0.2958, "step": 860 }, { "epoch": 0.54006586169045, "grad_norm": 1.0651618242263794, "learning_rate": 3.4256618871980964e-05, "loss": 0.2995, "step": 861 }, { "epoch": 0.5406931158852125, "grad_norm": 1.0665225982666016, "learning_rate": 3.4242359822783075e-05, "loss": 0.2952, "step": 862 }, { "epoch": 0.5413203700799749, "grad_norm": 0.9790911078453064, "learning_rate": 3.422808607069575e-05, "loss": 0.299, "step": 863 }, { "epoch": 0.5419476242747373, "grad_norm": 0.9757672548294067, "learning_rate": 3.4213797630454286e-05, "loss": 0.2996, "step": 864 }, { "epoch": 0.5425748784694998, "grad_norm": 1.08747398853302, "learning_rate": 3.419949451680916e-05, "loss": 0.3108, "step": 865 }, { "epoch": 0.5432021326642622, "grad_norm": 1.0883815288543701, "learning_rate": 3.418517674452597e-05, "loss": 0.2838, "step": 866 }, { "epoch": 0.5438293868590246, "grad_norm": 0.935845136642456, "learning_rate": 3.4170844328385446e-05, "loss": 0.282, "step": 867 }, { "epoch": 0.544456641053787, "grad_norm": 1.0941154956817627, "learning_rate": 3.415649728318345e-05, "loss": 0.295, "step": 868 }, { "epoch": 0.5450838952485495, "grad_norm": 1.1535578966140747, "learning_rate": 3.4142135623730954e-05, "loss": 0.2653, "step": 869 }, { "epoch": 0.5457111494433119, "grad_norm": 0.768397867679596, "learning_rate": 3.4127759364854e-05, "loss": 0.2877, "step": 870 }, { "epoch": 0.5463384036380743, "grad_norm": 0.944739580154419, "learning_rate": 3.411336852139369e-05, "loss": 0.2566, "step": 871 }, { "epoch": 0.5469656578328368, "grad_norm": 1.490142822265625, "learning_rate": 3.4098963108206215e-05, "loss": 0.262, "step": 872 }, { "epoch": 0.5475929120275992, "grad_norm": 1.1872611045837402, "learning_rate": 3.408454314016279e-05, "loss": 0.3117, "step": 873 }, { "epoch": 0.5482201662223616, "grad_norm": 1.2164311408996582, "learning_rate": 3.407010863214965e-05, "loss": 0.3071, "step": 874 }, { "epoch": 0.5488474204171241, "grad_norm": 0.8649755716323853, "learning_rate": 3.4055659599068044e-05, "loss": 0.256, "step": 875 }, { "epoch": 0.5494746746118865, "grad_norm": 1.3158848285675049, "learning_rate": 3.4041196055834234e-05, "loss": 0.2494, "step": 876 }, { "epoch": 0.5501019288066489, "grad_norm": 1.0252341032028198, "learning_rate": 3.402671801737942e-05, "loss": 0.3016, "step": 877 }, { "epoch": 0.5507291830014113, "grad_norm": 1.2007358074188232, "learning_rate": 3.40122254986498e-05, "loss": 0.3121, "step": 878 }, { "epoch": 0.5513564371961738, "grad_norm": 0.943353533744812, "learning_rate": 3.399771851460653e-05, "loss": 0.2685, "step": 879 }, { "epoch": 0.5519836913909362, "grad_norm": 1.036067008972168, "learning_rate": 3.398319708022565e-05, "loss": 0.3, "step": 880 }, { "epoch": 0.5526109455856986, "grad_norm": 1.1122220754623413, "learning_rate": 3.396866121049816e-05, "loss": 0.277, "step": 881 }, { "epoch": 0.5532381997804611, "grad_norm": 0.9816188812255859, "learning_rate": 3.395411092042996e-05, "loss": 0.2745, "step": 882 }, { "epoch": 0.5538654539752235, "grad_norm": 1.0659526586532593, "learning_rate": 3.3939546225041816e-05, "loss": 0.3018, "step": 883 }, { "epoch": 0.5544927081699859, "grad_norm": 1.1326079368591309, "learning_rate": 3.392496713936938e-05, "loss": 0.312, "step": 884 }, { "epoch": 0.5551199623647484, "grad_norm": 1.0870763063430786, "learning_rate": 3.391037367846315e-05, "loss": 0.2741, "step": 885 }, { "epoch": 0.5557472165595108, "grad_norm": 1.2274421453475952, "learning_rate": 3.389576585738848e-05, "loss": 0.2911, "step": 886 }, { "epoch": 0.5563744707542732, "grad_norm": 1.053361177444458, "learning_rate": 3.3881143691225534e-05, "loss": 0.2496, "step": 887 }, { "epoch": 0.5570017249490355, "grad_norm": 0.9817186594009399, "learning_rate": 3.386650719506927e-05, "loss": 0.2351, "step": 888 }, { "epoch": 0.557628979143798, "grad_norm": 1.211648941040039, "learning_rate": 3.385185638402949e-05, "loss": 0.284, "step": 889 }, { "epoch": 0.5582562333385604, "grad_norm": 1.075768232345581, "learning_rate": 3.383719127323072e-05, "loss": 0.2909, "step": 890 }, { "epoch": 0.5588834875333228, "grad_norm": 1.1833033561706543, "learning_rate": 3.382251187781228e-05, "loss": 0.2985, "step": 891 }, { "epoch": 0.5595107417280853, "grad_norm": 1.2154804468154907, "learning_rate": 3.380781821292822e-05, "loss": 0.2503, "step": 892 }, { "epoch": 0.5601379959228477, "grad_norm": 0.6897361278533936, "learning_rate": 3.3793110293747334e-05, "loss": 0.2373, "step": 893 }, { "epoch": 0.5607652501176101, "grad_norm": 1.0964821577072144, "learning_rate": 3.377838813545313e-05, "loss": 0.2711, "step": 894 }, { "epoch": 0.5613925043123726, "grad_norm": 1.0501883029937744, "learning_rate": 3.3763651753243783e-05, "loss": 0.3124, "step": 895 }, { "epoch": 0.562019758507135, "grad_norm": 0.9332281947135925, "learning_rate": 3.3748901162332205e-05, "loss": 0.291, "step": 896 }, { "epoch": 0.5626470127018974, "grad_norm": 1.2695751190185547, "learning_rate": 3.373413637794595e-05, "loss": 0.295, "step": 897 }, { "epoch": 0.5632742668966598, "grad_norm": 1.011654257774353, "learning_rate": 3.3719357415327214e-05, "loss": 0.3049, "step": 898 }, { "epoch": 0.5639015210914223, "grad_norm": 1.0462952852249146, "learning_rate": 3.370456428973285e-05, "loss": 0.2726, "step": 899 }, { "epoch": 0.5645287752861847, "grad_norm": 1.046999454498291, "learning_rate": 3.368975701643431e-05, "loss": 0.2477, "step": 900 }, { "epoch": 0.5651560294809471, "grad_norm": 1.1205613613128662, "learning_rate": 3.367493561071767e-05, "loss": 0.2988, "step": 901 }, { "epoch": 0.5657832836757096, "grad_norm": 1.0621720552444458, "learning_rate": 3.366010008788359e-05, "loss": 0.3005, "step": 902 }, { "epoch": 0.566410537870472, "grad_norm": 1.545395016670227, "learning_rate": 3.36452504632473e-05, "loss": 0.2953, "step": 903 }, { "epoch": 0.5670377920652344, "grad_norm": 0.95975661277771, "learning_rate": 3.363038675213859e-05, "loss": 0.2769, "step": 904 }, { "epoch": 0.5676650462599968, "grad_norm": 1.022896647453308, "learning_rate": 3.36155089699018e-05, "loss": 0.2779, "step": 905 }, { "epoch": 0.5682923004547593, "grad_norm": 1.1125088930130005, "learning_rate": 3.360061713189578e-05, "loss": 0.2888, "step": 906 }, { "epoch": 0.5689195546495217, "grad_norm": 1.1785695552825928, "learning_rate": 3.3585711253493904e-05, "loss": 0.2538, "step": 907 }, { "epoch": 0.5695468088442841, "grad_norm": 1.1789590120315552, "learning_rate": 3.3570791350084026e-05, "loss": 0.2806, "step": 908 }, { "epoch": 0.5701740630390466, "grad_norm": 1.133196234703064, "learning_rate": 3.35558574370685e-05, "loss": 0.2784, "step": 909 }, { "epoch": 0.570801317233809, "grad_norm": 0.9845195412635803, "learning_rate": 3.354090952986412e-05, "loss": 0.2394, "step": 910 }, { "epoch": 0.5714285714285714, "grad_norm": 1.0305912494659424, "learning_rate": 3.352594764390214e-05, "loss": 0.2733, "step": 911 }, { "epoch": 0.5720558256233339, "grad_norm": 1.159439206123352, "learning_rate": 3.3510971794628244e-05, "loss": 0.2828, "step": 912 }, { "epoch": 0.5726830798180963, "grad_norm": 1.128682255744934, "learning_rate": 3.349598199750252e-05, "loss": 0.247, "step": 913 }, { "epoch": 0.5733103340128587, "grad_norm": 0.911435067653656, "learning_rate": 3.348097826799948e-05, "loss": 0.2775, "step": 914 }, { "epoch": 0.5739375882076211, "grad_norm": 1.0393047332763672, "learning_rate": 3.3465960621607995e-05, "loss": 0.2769, "step": 915 }, { "epoch": 0.5745648424023836, "grad_norm": 0.9499742984771729, "learning_rate": 3.3450929073831296e-05, "loss": 0.2639, "step": 916 }, { "epoch": 0.575192096597146, "grad_norm": 1.0762925148010254, "learning_rate": 3.3435883640186996e-05, "loss": 0.2847, "step": 917 }, { "epoch": 0.5758193507919084, "grad_norm": 1.2723565101623535, "learning_rate": 3.342082433620702e-05, "loss": 0.3105, "step": 918 }, { "epoch": 0.5764466049866709, "grad_norm": 1.0838451385498047, "learning_rate": 3.340575117743761e-05, "loss": 0.2527, "step": 919 }, { "epoch": 0.5770738591814333, "grad_norm": 0.9201546311378479, "learning_rate": 3.3390664179439336e-05, "loss": 0.2539, "step": 920 }, { "epoch": 0.5777011133761957, "grad_norm": 0.951526939868927, "learning_rate": 3.337556335778702e-05, "loss": 0.2623, "step": 921 }, { "epoch": 0.5783283675709582, "grad_norm": 0.9981276988983154, "learning_rate": 3.336044872806978e-05, "loss": 0.2454, "step": 922 }, { "epoch": 0.5789556217657206, "grad_norm": 0.9986087083816528, "learning_rate": 3.334532030589098e-05, "loss": 0.2502, "step": 923 }, { "epoch": 0.579582875960483, "grad_norm": 1.0848922729492188, "learning_rate": 3.333017810686823e-05, "loss": 0.2748, "step": 924 }, { "epoch": 0.5802101301552454, "grad_norm": 0.9143623113632202, "learning_rate": 3.331502214663333e-05, "loss": 0.2559, "step": 925 }, { "epoch": 0.5808373843500079, "grad_norm": 1.2553857564926147, "learning_rate": 3.329985244083234e-05, "loss": 0.2646, "step": 926 }, { "epoch": 0.5814646385447703, "grad_norm": 1.1474798917770386, "learning_rate": 3.328466900512547e-05, "loss": 0.2959, "step": 927 }, { "epoch": 0.5820918927395327, "grad_norm": 0.9720746278762817, "learning_rate": 3.3269471855187114e-05, "loss": 0.3125, "step": 928 }, { "epoch": 0.5827191469342952, "grad_norm": 0.8761600852012634, "learning_rate": 3.325426100670583e-05, "loss": 0.2655, "step": 929 }, { "epoch": 0.5833464011290576, "grad_norm": 1.0103437900543213, "learning_rate": 3.3239036475384295e-05, "loss": 0.2791, "step": 930 }, { "epoch": 0.58397365532382, "grad_norm": 1.0584696531295776, "learning_rate": 3.322379827693935e-05, "loss": 0.2663, "step": 931 }, { "epoch": 0.5846009095185825, "grad_norm": 1.164749264717102, "learning_rate": 3.3208546427101915e-05, "loss": 0.2835, "step": 932 }, { "epoch": 0.5852281637133449, "grad_norm": 1.2362803220748901, "learning_rate": 3.3193280941617015e-05, "loss": 0.2661, "step": 933 }, { "epoch": 0.5858554179081072, "grad_norm": 1.0678917169570923, "learning_rate": 3.317800183624374e-05, "loss": 0.2567, "step": 934 }, { "epoch": 0.5864826721028696, "grad_norm": 1.1115533113479614, "learning_rate": 3.316270912675526e-05, "loss": 0.2901, "step": 935 }, { "epoch": 0.5871099262976321, "grad_norm": 0.9827170372009277, "learning_rate": 3.3147402828938764e-05, "loss": 0.2706, "step": 936 }, { "epoch": 0.5877371804923945, "grad_norm": 1.029221534729004, "learning_rate": 3.313208295859549e-05, "loss": 0.2538, "step": 937 }, { "epoch": 0.5883644346871569, "grad_norm": 1.0566000938415527, "learning_rate": 3.311674953154068e-05, "loss": 0.2468, "step": 938 }, { "epoch": 0.5889916888819194, "grad_norm": 1.0722020864486694, "learning_rate": 3.310140256360357e-05, "loss": 0.2823, "step": 939 }, { "epoch": 0.5896189430766818, "grad_norm": 1.1388366222381592, "learning_rate": 3.308604207062738e-05, "loss": 0.2806, "step": 940 }, { "epoch": 0.5902461972714442, "grad_norm": 0.9928994178771973, "learning_rate": 3.307066806846927e-05, "loss": 0.2534, "step": 941 }, { "epoch": 0.5908734514662067, "grad_norm": 1.0917091369628906, "learning_rate": 3.305528057300039e-05, "loss": 0.3106, "step": 942 }, { "epoch": 0.5915007056609691, "grad_norm": 1.0011969804763794, "learning_rate": 3.3039879600105765e-05, "loss": 0.2558, "step": 943 }, { "epoch": 0.5921279598557315, "grad_norm": 1.084661602973938, "learning_rate": 3.302446516568438e-05, "loss": 0.2477, "step": 944 }, { "epoch": 0.5927552140504939, "grad_norm": 1.0037258863449097, "learning_rate": 3.300903728564909e-05, "loss": 0.2581, "step": 945 }, { "epoch": 0.5933824682452564, "grad_norm": 0.993988573551178, "learning_rate": 3.2993595975926644e-05, "loss": 0.2829, "step": 946 }, { "epoch": 0.5940097224400188, "grad_norm": 1.0701007843017578, "learning_rate": 3.297814125245765e-05, "loss": 0.258, "step": 947 }, { "epoch": 0.5946369766347812, "grad_norm": 1.08833909034729, "learning_rate": 3.296267313119656e-05, "loss": 0.3067, "step": 948 }, { "epoch": 0.5952642308295437, "grad_norm": 1.0432220697402954, "learning_rate": 3.294719162811166e-05, "loss": 0.2657, "step": 949 }, { "epoch": 0.5958914850243061, "grad_norm": 1.281551480293274, "learning_rate": 3.293169675918506e-05, "loss": 0.3095, "step": 950 }, { "epoch": 0.5965187392190685, "grad_norm": 0.9822801351547241, "learning_rate": 3.291618854041265e-05, "loss": 0.2619, "step": 951 }, { "epoch": 0.5971459934138309, "grad_norm": 1.0419542789459229, "learning_rate": 3.290066698780409e-05, "loss": 0.253, "step": 952 }, { "epoch": 0.5977732476085934, "grad_norm": 0.9738790392875671, "learning_rate": 3.2885132117382865e-05, "loss": 0.2536, "step": 953 }, { "epoch": 0.5984005018033558, "grad_norm": 0.9893375039100647, "learning_rate": 3.286958394518615e-05, "loss": 0.2432, "step": 954 }, { "epoch": 0.5990277559981182, "grad_norm": 1.023341178894043, "learning_rate": 3.2854022487264865e-05, "loss": 0.2955, "step": 955 }, { "epoch": 0.5996550101928807, "grad_norm": 1.0082974433898926, "learning_rate": 3.2838447759683646e-05, "loss": 0.2479, "step": 956 }, { "epoch": 0.6002822643876431, "grad_norm": 0.9385503530502319, "learning_rate": 3.282285977852086e-05, "loss": 0.235, "step": 957 }, { "epoch": 0.6009095185824055, "grad_norm": 0.960944652557373, "learning_rate": 3.28072585598685e-05, "loss": 0.2548, "step": 958 }, { "epoch": 0.601536772777168, "grad_norm": 1.061529278755188, "learning_rate": 3.2791644119832265e-05, "loss": 0.2594, "step": 959 }, { "epoch": 0.6021640269719304, "grad_norm": 0.9216713309288025, "learning_rate": 3.277601647453149e-05, "loss": 0.2867, "step": 960 }, { "epoch": 0.6027912811666928, "grad_norm": 1.1350970268249512, "learning_rate": 3.276037564009915e-05, "loss": 0.2707, "step": 961 }, { "epoch": 0.6034185353614552, "grad_norm": 1.0315324068069458, "learning_rate": 3.2744721632681816e-05, "loss": 0.2968, "step": 962 }, { "epoch": 0.6040457895562177, "grad_norm": 1.111251950263977, "learning_rate": 3.272905446843968e-05, "loss": 0.2876, "step": 963 }, { "epoch": 0.6046730437509801, "grad_norm": 0.9278132915496826, "learning_rate": 3.2713374163546504e-05, "loss": 0.2883, "step": 964 }, { "epoch": 0.6053002979457425, "grad_norm": 1.0058590173721313, "learning_rate": 3.269768073418962e-05, "loss": 0.2652, "step": 965 }, { "epoch": 0.605927552140505, "grad_norm": 1.0049445629119873, "learning_rate": 3.268197419656991e-05, "loss": 0.2581, "step": 966 }, { "epoch": 0.6065548063352674, "grad_norm": 0.8966968059539795, "learning_rate": 3.266625456690178e-05, "loss": 0.2511, "step": 967 }, { "epoch": 0.6071820605300298, "grad_norm": 0.9507226347923279, "learning_rate": 3.265052186141317e-05, "loss": 0.257, "step": 968 }, { "epoch": 0.6078093147247923, "grad_norm": 0.849480152130127, "learning_rate": 3.263477609634549e-05, "loss": 0.2645, "step": 969 }, { "epoch": 0.6084365689195547, "grad_norm": 1.1491882801055908, "learning_rate": 3.261901728795364e-05, "loss": 0.2859, "step": 970 }, { "epoch": 0.6090638231143171, "grad_norm": 1.0749951601028442, "learning_rate": 3.260324545250602e-05, "loss": 0.2804, "step": 971 }, { "epoch": 0.6096910773090795, "grad_norm": 0.9273468255996704, "learning_rate": 3.2587460606284435e-05, "loss": 0.2458, "step": 972 }, { "epoch": 0.610318331503842, "grad_norm": 1.2743276357650757, "learning_rate": 3.257166276558414e-05, "loss": 0.2651, "step": 973 }, { "epoch": 0.6109455856986044, "grad_norm": 0.9892858862876892, "learning_rate": 3.2555851946713794e-05, "loss": 0.2674, "step": 974 }, { "epoch": 0.6115728398933667, "grad_norm": 0.8826314210891724, "learning_rate": 3.254002816599548e-05, "loss": 0.2755, "step": 975 }, { "epoch": 0.6122000940881293, "grad_norm": 1.0605132579803467, "learning_rate": 3.2524191439764624e-05, "loss": 0.2944, "step": 976 }, { "epoch": 0.6128273482828916, "grad_norm": 0.9647715091705322, "learning_rate": 3.250834178437005e-05, "loss": 0.259, "step": 977 }, { "epoch": 0.613454602477654, "grad_norm": 0.9203553199768066, "learning_rate": 3.249247921617391e-05, "loss": 0.2446, "step": 978 }, { "epoch": 0.6140818566724165, "grad_norm": 0.9496483206748962, "learning_rate": 3.2476603751551695e-05, "loss": 0.2573, "step": 979 }, { "epoch": 0.6147091108671789, "grad_norm": 0.9901736974716187, "learning_rate": 3.24607154068922e-05, "loss": 0.2896, "step": 980 }, { "epoch": 0.6153363650619413, "grad_norm": 1.043483018875122, "learning_rate": 3.244481419859752e-05, "loss": 0.2976, "step": 981 }, { "epoch": 0.6159636192567037, "grad_norm": 0.9606313109397888, "learning_rate": 3.242890014308305e-05, "loss": 0.2781, "step": 982 }, { "epoch": 0.6165908734514662, "grad_norm": 1.1747736930847168, "learning_rate": 3.241297325677742e-05, "loss": 0.295, "step": 983 }, { "epoch": 0.6172181276462286, "grad_norm": 1.166608214378357, "learning_rate": 3.2397033556122496e-05, "loss": 0.2732, "step": 984 }, { "epoch": 0.617845381840991, "grad_norm": 0.9529145359992981, "learning_rate": 3.238108105757342e-05, "loss": 0.2793, "step": 985 }, { "epoch": 0.6184726360357535, "grad_norm": 0.9561068415641785, "learning_rate": 3.23651157775985e-05, "loss": 0.24, "step": 986 }, { "epoch": 0.6190998902305159, "grad_norm": 1.0726927518844604, "learning_rate": 3.234913773267927e-05, "loss": 0.2791, "step": 987 }, { "epoch": 0.6197271444252783, "grad_norm": 1.0708917379379272, "learning_rate": 3.233314693931042e-05, "loss": 0.2682, "step": 988 }, { "epoch": 0.6203543986200408, "grad_norm": 1.0202605724334717, "learning_rate": 3.2317143413999815e-05, "loss": 0.2736, "step": 989 }, { "epoch": 0.6209816528148032, "grad_norm": 1.00557541847229, "learning_rate": 3.2301127173268445e-05, "loss": 0.2661, "step": 990 }, { "epoch": 0.6216089070095656, "grad_norm": 1.2042478322982788, "learning_rate": 3.2285098233650455e-05, "loss": 0.3188, "step": 991 }, { "epoch": 0.622236161204328, "grad_norm": 1.0422487258911133, "learning_rate": 3.2269056611693075e-05, "loss": 0.2754, "step": 992 }, { "epoch": 0.6228634153990905, "grad_norm": 0.9788997173309326, "learning_rate": 3.225300232395664e-05, "loss": 0.2528, "step": 993 }, { "epoch": 0.6234906695938529, "grad_norm": 1.1364682912826538, "learning_rate": 3.223693538701454e-05, "loss": 0.2894, "step": 994 }, { "epoch": 0.6241179237886153, "grad_norm": 1.0093938112258911, "learning_rate": 3.222085581745326e-05, "loss": 0.2838, "step": 995 }, { "epoch": 0.6247451779833778, "grad_norm": 1.216389775276184, "learning_rate": 3.220476363187231e-05, "loss": 0.272, "step": 996 }, { "epoch": 0.6253724321781402, "grad_norm": 1.0009663105010986, "learning_rate": 3.218865884688419e-05, "loss": 0.3109, "step": 997 }, { "epoch": 0.6259996863729026, "grad_norm": 0.9530799984931946, "learning_rate": 3.2172541479114466e-05, "loss": 0.2968, "step": 998 }, { "epoch": 0.626626940567665, "grad_norm": 0.8794124722480774, "learning_rate": 3.215641154520165e-05, "loss": 0.2284, "step": 999 }, { "epoch": 0.6272541947624275, "grad_norm": 1.0209484100341797, "learning_rate": 3.214026906179725e-05, "loss": 0.2554, "step": 1000 }, { "epoch": 0.6278814489571899, "grad_norm": 0.8757167458534241, "learning_rate": 3.21241140455657e-05, "loss": 0.2195, "step": 1001 }, { "epoch": 0.6285087031519523, "grad_norm": 0.9012476801872253, "learning_rate": 3.210794651318442e-05, "loss": 0.243, "step": 1002 }, { "epoch": 0.6291359573467148, "grad_norm": 0.9644747972488403, "learning_rate": 3.20917664813437e-05, "loss": 0.2588, "step": 1003 }, { "epoch": 0.6297632115414772, "grad_norm": 1.0049502849578857, "learning_rate": 3.207557396674677e-05, "loss": 0.2262, "step": 1004 }, { "epoch": 0.6303904657362396, "grad_norm": 0.924791693687439, "learning_rate": 3.205936898610973e-05, "loss": 0.2547, "step": 1005 }, { "epoch": 0.6310177199310021, "grad_norm": 1.0177044868469238, "learning_rate": 3.204315155616154e-05, "loss": 0.2557, "step": 1006 }, { "epoch": 0.6316449741257645, "grad_norm": 0.9623924493789673, "learning_rate": 3.202692169364404e-05, "loss": 0.2381, "step": 1007 }, { "epoch": 0.6322722283205269, "grad_norm": 1.1295835971832275, "learning_rate": 3.2010679415311886e-05, "loss": 0.2704, "step": 1008 }, { "epoch": 0.6328994825152893, "grad_norm": 1.0597741603851318, "learning_rate": 3.199442473793254e-05, "loss": 0.2674, "step": 1009 }, { "epoch": 0.6335267367100518, "grad_norm": 1.0375666618347168, "learning_rate": 3.197815767828629e-05, "loss": 0.2909, "step": 1010 }, { "epoch": 0.6341539909048142, "grad_norm": 0.9671528339385986, "learning_rate": 3.19618782531662e-05, "loss": 0.2621, "step": 1011 }, { "epoch": 0.6347812450995766, "grad_norm": 1.0041483640670776, "learning_rate": 3.1945586479378076e-05, "loss": 0.2358, "step": 1012 }, { "epoch": 0.6354084992943391, "grad_norm": 1.0127429962158203, "learning_rate": 3.1929282373740505e-05, "loss": 0.2986, "step": 1013 }, { "epoch": 0.6360357534891015, "grad_norm": 1.0777404308319092, "learning_rate": 3.191296595308478e-05, "loss": 0.2539, "step": 1014 }, { "epoch": 0.6366630076838639, "grad_norm": 0.8917059898376465, "learning_rate": 3.189663723425492e-05, "loss": 0.2578, "step": 1015 }, { "epoch": 0.6372902618786264, "grad_norm": 0.9886910915374756, "learning_rate": 3.188029623410764e-05, "loss": 0.2777, "step": 1016 }, { "epoch": 0.6379175160733888, "grad_norm": 0.9957547187805176, "learning_rate": 3.186394296951232e-05, "loss": 0.2935, "step": 1017 }, { "epoch": 0.6385447702681512, "grad_norm": 1.0073680877685547, "learning_rate": 3.184757745735102e-05, "loss": 0.2772, "step": 1018 }, { "epoch": 0.6391720244629135, "grad_norm": 1.0120540857315063, "learning_rate": 3.1831199714518436e-05, "loss": 0.2751, "step": 1019 }, { "epoch": 0.639799278657676, "grad_norm": 1.0629538297653198, "learning_rate": 3.181480975792187e-05, "loss": 0.2723, "step": 1020 }, { "epoch": 0.6404265328524384, "grad_norm": 1.1036429405212402, "learning_rate": 3.179840760448127e-05, "loss": 0.2914, "step": 1021 }, { "epoch": 0.6410537870472008, "grad_norm": 0.9888352751731873, "learning_rate": 3.178199327112916e-05, "loss": 0.237, "step": 1022 }, { "epoch": 0.6416810412419633, "grad_norm": 1.2374451160430908, "learning_rate": 3.1765566774810615e-05, "loss": 0.2701, "step": 1023 }, { "epoch": 0.6423082954367257, "grad_norm": 1.161687970161438, "learning_rate": 3.17491281324833e-05, "loss": 0.2812, "step": 1024 }, { "epoch": 0.6429355496314881, "grad_norm": 1.0261205434799194, "learning_rate": 3.17326773611174e-05, "loss": 0.2707, "step": 1025 }, { "epoch": 0.6435628038262506, "grad_norm": 1.058673620223999, "learning_rate": 3.171621447769562e-05, "loss": 0.2663, "step": 1026 }, { "epoch": 0.644190058021013, "grad_norm": 0.968376100063324, "learning_rate": 3.169973949921318e-05, "loss": 0.2366, "step": 1027 }, { "epoch": 0.6448173122157754, "grad_norm": 0.8703855872154236, "learning_rate": 3.168325244267778e-05, "loss": 0.2519, "step": 1028 }, { "epoch": 0.6454445664105378, "grad_norm": 1.0061933994293213, "learning_rate": 3.166675332510959e-05, "loss": 0.2435, "step": 1029 }, { "epoch": 0.6460718206053003, "grad_norm": 0.9254626631736755, "learning_rate": 3.165024216354123e-05, "loss": 0.2422, "step": 1030 }, { "epoch": 0.6466990748000627, "grad_norm": 1.0174202919006348, "learning_rate": 3.163371897501775e-05, "loss": 0.2597, "step": 1031 }, { "epoch": 0.6473263289948251, "grad_norm": 1.0172829627990723, "learning_rate": 3.161718377659663e-05, "loss": 0.2645, "step": 1032 }, { "epoch": 0.6479535831895876, "grad_norm": 1.0958163738250732, "learning_rate": 3.160063658534771e-05, "loss": 0.2797, "step": 1033 }, { "epoch": 0.64858083738435, "grad_norm": 0.9608843922615051, "learning_rate": 3.1584077418353264e-05, "loss": 0.253, "step": 1034 }, { "epoch": 0.6492080915791124, "grad_norm": 1.0445313453674316, "learning_rate": 3.1567506292707904e-05, "loss": 0.3066, "step": 1035 }, { "epoch": 0.6498353457738749, "grad_norm": 0.8894146680831909, "learning_rate": 3.1550923225518564e-05, "loss": 0.2694, "step": 1036 }, { "epoch": 0.6504625999686373, "grad_norm": 1.1175638437271118, "learning_rate": 3.1534328233904554e-05, "loss": 0.2864, "step": 1037 }, { "epoch": 0.6510898541633997, "grad_norm": 0.9953340291976929, "learning_rate": 3.151772133499744e-05, "loss": 0.2807, "step": 1038 }, { "epoch": 0.6517171083581621, "grad_norm": 0.9881662130355835, "learning_rate": 3.1501102545941135e-05, "loss": 0.2885, "step": 1039 }, { "epoch": 0.6523443625529246, "grad_norm": 1.1190524101257324, "learning_rate": 3.148447188389179e-05, "loss": 0.2456, "step": 1040 }, { "epoch": 0.652971616747687, "grad_norm": 1.0610312223434448, "learning_rate": 3.146782936601782e-05, "loss": 0.2853, "step": 1041 }, { "epoch": 0.6535988709424494, "grad_norm": 1.0105746984481812, "learning_rate": 3.145117500949988e-05, "loss": 0.2751, "step": 1042 }, { "epoch": 0.6542261251372119, "grad_norm": 1.307153344154358, "learning_rate": 3.143450883153086e-05, "loss": 0.2934, "step": 1043 }, { "epoch": 0.6548533793319743, "grad_norm": 1.1009116172790527, "learning_rate": 3.1417830849315835e-05, "loss": 0.2827, "step": 1044 }, { "epoch": 0.6554806335267367, "grad_norm": 0.9631924629211426, "learning_rate": 3.140114108007206e-05, "loss": 0.284, "step": 1045 }, { "epoch": 0.6561078877214991, "grad_norm": 1.0630031824111938, "learning_rate": 3.1384439541029004e-05, "loss": 0.2785, "step": 1046 }, { "epoch": 0.6567351419162616, "grad_norm": 0.908596396446228, "learning_rate": 3.1367726249428224e-05, "loss": 0.2668, "step": 1047 }, { "epoch": 0.657362396111024, "grad_norm": 0.9869924783706665, "learning_rate": 3.135100122252345e-05, "loss": 0.2626, "step": 1048 }, { "epoch": 0.6579896503057864, "grad_norm": 0.807806670665741, "learning_rate": 3.133426447758052e-05, "loss": 0.2776, "step": 1049 }, { "epoch": 0.6586169045005489, "grad_norm": 0.930079996585846, "learning_rate": 3.131751603187734e-05, "loss": 0.2356, "step": 1050 }, { "epoch": 0.6592441586953113, "grad_norm": 0.9175042510032654, "learning_rate": 3.130075590270396e-05, "loss": 0.2663, "step": 1051 }, { "epoch": 0.6598714128900737, "grad_norm": 0.9869060516357422, "learning_rate": 3.1283984107362426e-05, "loss": 0.254, "step": 1052 }, { "epoch": 0.6604986670848362, "grad_norm": 1.0021368265151978, "learning_rate": 3.1267200663166866e-05, "loss": 0.2679, "step": 1053 }, { "epoch": 0.6611259212795986, "grad_norm": 0.9547368884086609, "learning_rate": 3.12504055874434e-05, "loss": 0.2427, "step": 1054 }, { "epoch": 0.661753175474361, "grad_norm": 0.8755058646202087, "learning_rate": 3.12335988975302e-05, "loss": 0.2045, "step": 1055 }, { "epoch": 0.6623804296691234, "grad_norm": 1.010550618171692, "learning_rate": 3.121678061077739e-05, "loss": 0.2848, "step": 1056 }, { "epoch": 0.6630076838638859, "grad_norm": 0.9710196256637573, "learning_rate": 3.119995074454708e-05, "loss": 0.2818, "step": 1057 }, { "epoch": 0.6636349380586483, "grad_norm": 0.9580225944519043, "learning_rate": 3.118310931621333e-05, "loss": 0.2438, "step": 1058 }, { "epoch": 0.6642621922534107, "grad_norm": 0.9465218186378479, "learning_rate": 3.116625634316216e-05, "loss": 0.2275, "step": 1059 }, { "epoch": 0.6648894464481732, "grad_norm": 1.0197880268096924, "learning_rate": 3.114939184279146e-05, "loss": 0.2715, "step": 1060 }, { "epoch": 0.6655167006429356, "grad_norm": 1.1116091012954712, "learning_rate": 3.1132515832511064e-05, "loss": 0.2522, "step": 1061 }, { "epoch": 0.666143954837698, "grad_norm": 0.9236192107200623, "learning_rate": 3.111562832974266e-05, "loss": 0.266, "step": 1062 }, { "epoch": 0.6667712090324605, "grad_norm": 0.8119783401489258, "learning_rate": 3.109872935191983e-05, "loss": 0.2518, "step": 1063 }, { "epoch": 0.6673984632272228, "grad_norm": 1.0820034742355347, "learning_rate": 3.108181891648796e-05, "loss": 0.2845, "step": 1064 }, { "epoch": 0.6680257174219852, "grad_norm": 1.0736851692199707, "learning_rate": 3.106489704090431e-05, "loss": 0.2814, "step": 1065 }, { "epoch": 0.6686529716167476, "grad_norm": 1.084129810333252, "learning_rate": 3.104796374263791e-05, "loss": 0.2776, "step": 1066 }, { "epoch": 0.6692802258115101, "grad_norm": 1.1576014757156372, "learning_rate": 3.1031019039169616e-05, "loss": 0.3009, "step": 1067 }, { "epoch": 0.6699074800062725, "grad_norm": 0.9448037147521973, "learning_rate": 3.101406294799203e-05, "loss": 0.2486, "step": 1068 }, { "epoch": 0.6705347342010349, "grad_norm": 0.9121865034103394, "learning_rate": 3.0997095486609527e-05, "loss": 0.2472, "step": 1069 }, { "epoch": 0.6711619883957974, "grad_norm": 0.9719688892364502, "learning_rate": 3.098011667253821e-05, "loss": 0.2833, "step": 1070 }, { "epoch": 0.6717892425905598, "grad_norm": 1.2267473936080933, "learning_rate": 3.096312652330592e-05, "loss": 0.2813, "step": 1071 }, { "epoch": 0.6724164967853222, "grad_norm": 1.098172903060913, "learning_rate": 3.0946125056452164e-05, "loss": 0.2527, "step": 1072 }, { "epoch": 0.6730437509800847, "grad_norm": 1.091026782989502, "learning_rate": 3.092911228952817e-05, "loss": 0.2897, "step": 1073 }, { "epoch": 0.6736710051748471, "grad_norm": 1.0833760499954224, "learning_rate": 3.0912088240096814e-05, "loss": 0.2932, "step": 1074 }, { "epoch": 0.6742982593696095, "grad_norm": 0.9569733142852783, "learning_rate": 3.0895052925732625e-05, "loss": 0.238, "step": 1075 }, { "epoch": 0.6749255135643719, "grad_norm": 0.8709613084793091, "learning_rate": 3.087800636402174e-05, "loss": 0.2215, "step": 1076 }, { "epoch": 0.6755527677591344, "grad_norm": 1.1714290380477905, "learning_rate": 3.0860948572561956e-05, "loss": 0.265, "step": 1077 }, { "epoch": 0.6761800219538968, "grad_norm": 1.476657509803772, "learning_rate": 3.08438795689626e-05, "loss": 0.2577, "step": 1078 }, { "epoch": 0.6768072761486592, "grad_norm": 1.0154451131820679, "learning_rate": 3.082679937084462e-05, "loss": 0.2584, "step": 1079 }, { "epoch": 0.6774345303434217, "grad_norm": 1.0655109882354736, "learning_rate": 3.080970799584051e-05, "loss": 0.2903, "step": 1080 }, { "epoch": 0.6780617845381841, "grad_norm": 0.8489439487457275, "learning_rate": 3.07926054615943e-05, "loss": 0.2663, "step": 1081 }, { "epoch": 0.6786890387329465, "grad_norm": 1.0146068334579468, "learning_rate": 3.0775491785761524e-05, "loss": 0.2731, "step": 1082 }, { "epoch": 0.679316292927709, "grad_norm": 1.1330286264419556, "learning_rate": 3.075836698600924e-05, "loss": 0.2706, "step": 1083 }, { "epoch": 0.6799435471224714, "grad_norm": 0.9808182716369629, "learning_rate": 3.074123108001597e-05, "loss": 0.2444, "step": 1084 }, { "epoch": 0.6805708013172338, "grad_norm": 0.9529983401298523, "learning_rate": 3.072408408547173e-05, "loss": 0.2646, "step": 1085 }, { "epoch": 0.6811980555119962, "grad_norm": 1.1290792226791382, "learning_rate": 3.070692602007797e-05, "loss": 0.2799, "step": 1086 }, { "epoch": 0.6818253097067587, "grad_norm": 0.9760487675666809, "learning_rate": 3.0689756901547546e-05, "loss": 0.2323, "step": 1087 }, { "epoch": 0.6824525639015211, "grad_norm": 1.0980072021484375, "learning_rate": 3.067257674760475e-05, "loss": 0.2705, "step": 1088 }, { "epoch": 0.6830798180962835, "grad_norm": 0.9709072709083557, "learning_rate": 3.065538557598527e-05, "loss": 0.2588, "step": 1089 }, { "epoch": 0.683707072291046, "grad_norm": 0.9089717864990234, "learning_rate": 3.063818340443614e-05, "loss": 0.199, "step": 1090 }, { "epoch": 0.6843343264858084, "grad_norm": 0.8791358470916748, "learning_rate": 3.0620970250715784e-05, "loss": 0.2476, "step": 1091 }, { "epoch": 0.6849615806805708, "grad_norm": 1.0623133182525635, "learning_rate": 3.060374613259394e-05, "loss": 0.2524, "step": 1092 }, { "epoch": 0.6855888348753332, "grad_norm": 1.2946134805679321, "learning_rate": 3.058651106785167e-05, "loss": 0.3077, "step": 1093 }, { "epoch": 0.6862160890700957, "grad_norm": 1.0471159219741821, "learning_rate": 3.056926507428134e-05, "loss": 0.2481, "step": 1094 }, { "epoch": 0.6868433432648581, "grad_norm": 0.9360001683235168, "learning_rate": 3.0552008169686595e-05, "loss": 0.2384, "step": 1095 }, { "epoch": 0.6874705974596205, "grad_norm": 1.0947415828704834, "learning_rate": 3.0534740371882335e-05, "loss": 0.2579, "step": 1096 }, { "epoch": 0.688097851654383, "grad_norm": 1.0543622970581055, "learning_rate": 3.0517461698694737e-05, "loss": 0.2634, "step": 1097 }, { "epoch": 0.6887251058491454, "grad_norm": 0.9454314708709717, "learning_rate": 3.0500172167961168e-05, "loss": 0.2363, "step": 1098 }, { "epoch": 0.6893523600439078, "grad_norm": 0.9035005569458008, "learning_rate": 3.048287179753023e-05, "loss": 0.2573, "step": 1099 }, { "epoch": 0.6899796142386703, "grad_norm": 0.9288584589958191, "learning_rate": 3.046556060526169e-05, "loss": 0.2871, "step": 1100 }, { "epoch": 0.6906068684334327, "grad_norm": 0.9902932047843933, "learning_rate": 3.0448238609026516e-05, "loss": 0.2828, "step": 1101 }, { "epoch": 0.6912341226281951, "grad_norm": 0.7389153838157654, "learning_rate": 3.0430905826706807e-05, "loss": 0.222, "step": 1102 }, { "epoch": 0.6918613768229575, "grad_norm": 0.9360471963882446, "learning_rate": 3.0413562276195803e-05, "loss": 0.2641, "step": 1103 }, { "epoch": 0.69248863101772, "grad_norm": 0.9741581082344055, "learning_rate": 3.0396207975397868e-05, "loss": 0.2155, "step": 1104 }, { "epoch": 0.6931158852124824, "grad_norm": 0.8907262086868286, "learning_rate": 3.037884294222845e-05, "loss": 0.2204, "step": 1105 }, { "epoch": 0.6937431394072447, "grad_norm": 0.9536752700805664, "learning_rate": 3.0361467194614084e-05, "loss": 0.2402, "step": 1106 }, { "epoch": 0.6943703936020073, "grad_norm": 0.9764878153800964, "learning_rate": 3.034408075049238e-05, "loss": 0.2383, "step": 1107 }, { "epoch": 0.6949976477967696, "grad_norm": 1.0302314758300781, "learning_rate": 3.0326683627811966e-05, "loss": 0.2781, "step": 1108 }, { "epoch": 0.695624901991532, "grad_norm": 0.9398666620254517, "learning_rate": 3.03092758445325e-05, "loss": 0.2886, "step": 1109 }, { "epoch": 0.6962521561862945, "grad_norm": 1.0185917615890503, "learning_rate": 3.0291857418624648e-05, "loss": 0.3049, "step": 1110 }, { "epoch": 0.6968794103810569, "grad_norm": 1.0808707475662231, "learning_rate": 3.0274428368070076e-05, "loss": 0.2757, "step": 1111 }, { "epoch": 0.6975066645758193, "grad_norm": 0.9350001811981201, "learning_rate": 3.0256988710861395e-05, "loss": 0.2507, "step": 1112 }, { "epoch": 0.6981339187705817, "grad_norm": 0.9316309690475464, "learning_rate": 3.023953846500219e-05, "loss": 0.2604, "step": 1113 }, { "epoch": 0.6987611729653442, "grad_norm": 1.0132993459701538, "learning_rate": 3.0222077648506958e-05, "loss": 0.2496, "step": 1114 }, { "epoch": 0.6993884271601066, "grad_norm": 1.0161550045013428, "learning_rate": 3.0204606279401107e-05, "loss": 0.2851, "step": 1115 }, { "epoch": 0.700015681354869, "grad_norm": 1.0576071739196777, "learning_rate": 3.0187124375720952e-05, "loss": 0.2286, "step": 1116 }, { "epoch": 0.7006429355496315, "grad_norm": 1.0544531345367432, "learning_rate": 3.016963195551368e-05, "loss": 0.3107, "step": 1117 }, { "epoch": 0.7012701897443939, "grad_norm": 0.9492296576499939, "learning_rate": 3.0152129036837336e-05, "loss": 0.2332, "step": 1118 }, { "epoch": 0.7018974439391563, "grad_norm": 0.9895444512367249, "learning_rate": 3.0134615637760785e-05, "loss": 0.2095, "step": 1119 }, { "epoch": 0.7025246981339188, "grad_norm": 0.8719656467437744, "learning_rate": 3.011709177636374e-05, "loss": 0.2219, "step": 1120 }, { "epoch": 0.7031519523286812, "grad_norm": 0.9544978141784668, "learning_rate": 3.0099557470736695e-05, "loss": 0.2206, "step": 1121 }, { "epoch": 0.7037792065234436, "grad_norm": 1.007846474647522, "learning_rate": 3.0082012738980926e-05, "loss": 0.2551, "step": 1122 }, { "epoch": 0.704406460718206, "grad_norm": 0.8393685817718506, "learning_rate": 3.0064457599208487e-05, "loss": 0.2535, "step": 1123 }, { "epoch": 0.7050337149129685, "grad_norm": 0.8983904719352722, "learning_rate": 3.0046892069542154e-05, "loss": 0.2584, "step": 1124 }, { "epoch": 0.7056609691077309, "grad_norm": 0.8997087478637695, "learning_rate": 3.0029316168115445e-05, "loss": 0.2537, "step": 1125 }, { "epoch": 0.7062882233024933, "grad_norm": 1.1286370754241943, "learning_rate": 3.00117299130726e-05, "loss": 0.2693, "step": 1126 }, { "epoch": 0.7069154774972558, "grad_norm": 0.8487894535064697, "learning_rate": 2.9994133322568505e-05, "loss": 0.2597, "step": 1127 }, { "epoch": 0.7075427316920182, "grad_norm": 0.999539315700531, "learning_rate": 2.9976526414768745e-05, "loss": 0.2686, "step": 1128 }, { "epoch": 0.7081699858867806, "grad_norm": 0.8149359226226807, "learning_rate": 2.9958909207849563e-05, "loss": 0.2248, "step": 1129 }, { "epoch": 0.7087972400815431, "grad_norm": 1.0735280513763428, "learning_rate": 2.994128171999781e-05, "loss": 0.2786, "step": 1130 }, { "epoch": 0.7094244942763055, "grad_norm": 0.9906747341156006, "learning_rate": 2.9923643969410973e-05, "loss": 0.2675, "step": 1131 }, { "epoch": 0.7100517484710679, "grad_norm": 0.9118907451629639, "learning_rate": 2.9905995974297107e-05, "loss": 0.2361, "step": 1132 }, { "epoch": 0.7106790026658303, "grad_norm": 1.1021521091461182, "learning_rate": 2.9888337752874875e-05, "loss": 0.2695, "step": 1133 }, { "epoch": 0.7113062568605928, "grad_norm": 0.9408007264137268, "learning_rate": 2.9870669323373473e-05, "loss": 0.2801, "step": 1134 }, { "epoch": 0.7119335110553552, "grad_norm": 1.0125051736831665, "learning_rate": 2.9852990704032643e-05, "loss": 0.2851, "step": 1135 }, { "epoch": 0.7125607652501176, "grad_norm": 0.8995490074157715, "learning_rate": 2.9835301913102644e-05, "loss": 0.2445, "step": 1136 }, { "epoch": 0.7131880194448801, "grad_norm": 1.1181654930114746, "learning_rate": 2.9817602968844236e-05, "loss": 0.2898, "step": 1137 }, { "epoch": 0.7138152736396425, "grad_norm": 0.8702493906021118, "learning_rate": 2.979989388952867e-05, "loss": 0.2305, "step": 1138 }, { "epoch": 0.7144425278344049, "grad_norm": 0.9106255769729614, "learning_rate": 2.9782174693437654e-05, "loss": 0.2199, "step": 1139 }, { "epoch": 0.7150697820291673, "grad_norm": 1.2732893228530884, "learning_rate": 2.9764445398863328e-05, "loss": 0.3188, "step": 1140 }, { "epoch": 0.7156970362239298, "grad_norm": 0.9374165534973145, "learning_rate": 2.974670602410827e-05, "loss": 0.2242, "step": 1141 }, { "epoch": 0.7163242904186922, "grad_norm": 0.8125789761543274, "learning_rate": 2.972895658748546e-05, "loss": 0.2217, "step": 1142 }, { "epoch": 0.7169515446134546, "grad_norm": 0.9294368624687195, "learning_rate": 2.971119710731827e-05, "loss": 0.2253, "step": 1143 }, { "epoch": 0.7175787988082171, "grad_norm": 1.0166178941726685, "learning_rate": 2.9693427601940433e-05, "loss": 0.2191, "step": 1144 }, { "epoch": 0.7182060530029795, "grad_norm": 1.0041168928146362, "learning_rate": 2.9675648089696038e-05, "loss": 0.2146, "step": 1145 }, { "epoch": 0.7188333071977419, "grad_norm": 1.0499262809753418, "learning_rate": 2.9657858588939502e-05, "loss": 0.2383, "step": 1146 }, { "epoch": 0.7194605613925044, "grad_norm": 1.1398004293441772, "learning_rate": 2.9640059118035555e-05, "loss": 0.2968, "step": 1147 }, { "epoch": 0.7200878155872668, "grad_norm": 0.99320387840271, "learning_rate": 2.9622249695359206e-05, "loss": 0.2769, "step": 1148 }, { "epoch": 0.7207150697820291, "grad_norm": 1.0669426918029785, "learning_rate": 2.9604430339295763e-05, "loss": 0.2642, "step": 1149 }, { "epoch": 0.7213423239767915, "grad_norm": 1.0032626390457153, "learning_rate": 2.958660106824076e-05, "loss": 0.2761, "step": 1150 }, { "epoch": 0.721969578171554, "grad_norm": 0.8565042018890381, "learning_rate": 2.9568761900599992e-05, "loss": 0.2337, "step": 1151 }, { "epoch": 0.7225968323663164, "grad_norm": 0.980816125869751, "learning_rate": 2.9550912854789463e-05, "loss": 0.2272, "step": 1152 }, { "epoch": 0.7232240865610788, "grad_norm": 0.967366635799408, "learning_rate": 2.953305394923536e-05, "loss": 0.2384, "step": 1153 }, { "epoch": 0.7238513407558413, "grad_norm": 0.904085099697113, "learning_rate": 2.9515185202374068e-05, "loss": 0.2469, "step": 1154 }, { "epoch": 0.7244785949506037, "grad_norm": 0.9692016243934631, "learning_rate": 2.949730663265212e-05, "loss": 0.2576, "step": 1155 }, { "epoch": 0.7251058491453661, "grad_norm": 0.9735581278800964, "learning_rate": 2.9479418258526196e-05, "loss": 0.2501, "step": 1156 }, { "epoch": 0.7257331033401286, "grad_norm": 0.8929852247238159, "learning_rate": 2.9461520098463085e-05, "loss": 0.2195, "step": 1157 }, { "epoch": 0.726360357534891, "grad_norm": 1.1679868698120117, "learning_rate": 2.9443612170939705e-05, "loss": 0.2593, "step": 1158 }, { "epoch": 0.7269876117296534, "grad_norm": 0.9278621077537537, "learning_rate": 2.9425694494443024e-05, "loss": 0.2605, "step": 1159 }, { "epoch": 0.7276148659244158, "grad_norm": 0.9248387813568115, "learning_rate": 2.9407767087470094e-05, "loss": 0.2295, "step": 1160 }, { "epoch": 0.7282421201191783, "grad_norm": 0.9732686281204224, "learning_rate": 2.9389829968528013e-05, "loss": 0.233, "step": 1161 }, { "epoch": 0.7288693743139407, "grad_norm": 0.8317962288856506, "learning_rate": 2.9371883156133894e-05, "loss": 0.2278, "step": 1162 }, { "epoch": 0.7294966285087031, "grad_norm": 0.8876512050628662, "learning_rate": 2.935392666881487e-05, "loss": 0.2412, "step": 1163 }, { "epoch": 0.7301238827034656, "grad_norm": 0.89940345287323, "learning_rate": 2.9335960525108052e-05, "loss": 0.2202, "step": 1164 }, { "epoch": 0.730751136898228, "grad_norm": 0.8470626473426819, "learning_rate": 2.9317984743560523e-05, "loss": 0.22, "step": 1165 }, { "epoch": 0.7313783910929904, "grad_norm": 1.0272356271743774, "learning_rate": 2.9299999342729308e-05, "loss": 0.3156, "step": 1166 }, { "epoch": 0.7320056452877529, "grad_norm": 0.9586045742034912, "learning_rate": 2.9282004341181384e-05, "loss": 0.2302, "step": 1167 }, { "epoch": 0.7326328994825153, "grad_norm": 1.1000994443893433, "learning_rate": 2.9263999757493616e-05, "loss": 0.2551, "step": 1168 }, { "epoch": 0.7332601536772777, "grad_norm": 1.0782554149627686, "learning_rate": 2.9245985610252767e-05, "loss": 0.2966, "step": 1169 }, { "epoch": 0.7338874078720401, "grad_norm": 1.1621781587600708, "learning_rate": 2.922796191805548e-05, "loss": 0.2416, "step": 1170 }, { "epoch": 0.7345146620668026, "grad_norm": 0.9000127911567688, "learning_rate": 2.9209928699508244e-05, "loss": 0.2071, "step": 1171 }, { "epoch": 0.735141916261565, "grad_norm": 1.0584723949432373, "learning_rate": 2.9191885973227392e-05, "loss": 0.247, "step": 1172 }, { "epoch": 0.7357691704563274, "grad_norm": 1.134102702140808, "learning_rate": 2.9173833757839055e-05, "loss": 0.264, "step": 1173 }, { "epoch": 0.7363964246510899, "grad_norm": 0.8952019214630127, "learning_rate": 2.9155772071979176e-05, "loss": 0.2123, "step": 1174 }, { "epoch": 0.7370236788458523, "grad_norm": 0.9572092294692993, "learning_rate": 2.9137700934293472e-05, "loss": 0.2525, "step": 1175 }, { "epoch": 0.7376509330406147, "grad_norm": 1.1004743576049805, "learning_rate": 2.9119620363437412e-05, "loss": 0.2701, "step": 1176 }, { "epoch": 0.7382781872353771, "grad_norm": 0.9321638345718384, "learning_rate": 2.9101530378076203e-05, "loss": 0.2122, "step": 1177 }, { "epoch": 0.7389054414301396, "grad_norm": 0.930284321308136, "learning_rate": 2.9083430996884777e-05, "loss": 0.2247, "step": 1178 }, { "epoch": 0.739532695624902, "grad_norm": 0.9212769865989685, "learning_rate": 2.906532223854776e-05, "loss": 0.2319, "step": 1179 }, { "epoch": 0.7401599498196644, "grad_norm": 0.9486585259437561, "learning_rate": 2.9047204121759458e-05, "loss": 0.2576, "step": 1180 }, { "epoch": 0.7407872040144269, "grad_norm": 0.960632860660553, "learning_rate": 2.9029076665223855e-05, "loss": 0.2629, "step": 1181 }, { "epoch": 0.7414144582091893, "grad_norm": 0.950925350189209, "learning_rate": 2.9010939887654547e-05, "loss": 0.2804, "step": 1182 }, { "epoch": 0.7420417124039517, "grad_norm": 0.9938424825668335, "learning_rate": 2.8992793807774768e-05, "loss": 0.2598, "step": 1183 }, { "epoch": 0.7426689665987142, "grad_norm": 0.8266270160675049, "learning_rate": 2.8974638444317373e-05, "loss": 0.2562, "step": 1184 }, { "epoch": 0.7432962207934766, "grad_norm": 0.9874655604362488, "learning_rate": 2.8956473816024764e-05, "loss": 0.2668, "step": 1185 }, { "epoch": 0.743923474988239, "grad_norm": 1.0183908939361572, "learning_rate": 2.893829994164893e-05, "loss": 0.2724, "step": 1186 }, { "epoch": 0.7445507291830014, "grad_norm": 0.9691697955131531, "learning_rate": 2.8920116839951408e-05, "loss": 0.2325, "step": 1187 }, { "epoch": 0.7451779833777639, "grad_norm": 1.0251258611679077, "learning_rate": 2.890192452970325e-05, "loss": 0.2714, "step": 1188 }, { "epoch": 0.7458052375725263, "grad_norm": 1.0905992984771729, "learning_rate": 2.8883723029685017e-05, "loss": 0.2714, "step": 1189 }, { "epoch": 0.7464324917672887, "grad_norm": 1.0011322498321533, "learning_rate": 2.8865512358686757e-05, "loss": 0.2719, "step": 1190 }, { "epoch": 0.7470597459620512, "grad_norm": 1.1232945919036865, "learning_rate": 2.8847292535507994e-05, "loss": 0.2959, "step": 1191 }, { "epoch": 0.7476870001568136, "grad_norm": 1.031429648399353, "learning_rate": 2.8829063578957682e-05, "loss": 0.2335, "step": 1192 }, { "epoch": 0.748314254351576, "grad_norm": 0.9283905625343323, "learning_rate": 2.8810825507854227e-05, "loss": 0.2574, "step": 1193 }, { "epoch": 0.7489415085463385, "grad_norm": 0.9369612336158752, "learning_rate": 2.8792578341025418e-05, "loss": 0.2666, "step": 1194 }, { "epoch": 0.7495687627411008, "grad_norm": 1.0052746534347534, "learning_rate": 2.877432209730846e-05, "loss": 0.251, "step": 1195 }, { "epoch": 0.7501960169358632, "grad_norm": 1.0359718799591064, "learning_rate": 2.8756056795549912e-05, "loss": 0.2499, "step": 1196 }, { "epoch": 0.7508232711306256, "grad_norm": 0.9635895490646362, "learning_rate": 2.873778245460569e-05, "loss": 0.2453, "step": 1197 }, { "epoch": 0.7514505253253881, "grad_norm": 0.9521752595901489, "learning_rate": 2.871949909334103e-05, "loss": 0.2607, "step": 1198 }, { "epoch": 0.7520777795201505, "grad_norm": 1.0811841487884521, "learning_rate": 2.8701206730630507e-05, "loss": 0.3175, "step": 1199 }, { "epoch": 0.7527050337149129, "grad_norm": 0.9252716302871704, "learning_rate": 2.8682905385357954e-05, "loss": 0.2498, "step": 1200 }, { "epoch": 0.7533322879096754, "grad_norm": 0.9174233078956604, "learning_rate": 2.8664595076416497e-05, "loss": 0.2515, "step": 1201 }, { "epoch": 0.7539595421044378, "grad_norm": 0.865044355392456, "learning_rate": 2.8646275822708518e-05, "loss": 0.2067, "step": 1202 }, { "epoch": 0.7545867962992002, "grad_norm": 0.9602584838867188, "learning_rate": 2.8627947643145625e-05, "loss": 0.2564, "step": 1203 }, { "epoch": 0.7552140504939627, "grad_norm": 0.9477353692054749, "learning_rate": 2.8609610556648642e-05, "loss": 0.2554, "step": 1204 }, { "epoch": 0.7558413046887251, "grad_norm": 0.9231157898902893, "learning_rate": 2.8591264582147594e-05, "loss": 0.2355, "step": 1205 }, { "epoch": 0.7564685588834875, "grad_norm": 0.924030065536499, "learning_rate": 2.857290973858167e-05, "loss": 0.2244, "step": 1206 }, { "epoch": 0.7570958130782499, "grad_norm": 0.9852567911148071, "learning_rate": 2.8554546044899223e-05, "loss": 0.2543, "step": 1207 }, { "epoch": 0.7577230672730124, "grad_norm": 1.033445954322815, "learning_rate": 2.8536173520057744e-05, "loss": 0.3014, "step": 1208 }, { "epoch": 0.7583503214677748, "grad_norm": 0.8468286991119385, "learning_rate": 2.851779218302383e-05, "loss": 0.2266, "step": 1209 }, { "epoch": 0.7589775756625372, "grad_norm": 1.0191102027893066, "learning_rate": 2.849940205277319e-05, "loss": 0.2342, "step": 1210 }, { "epoch": 0.7596048298572997, "grad_norm": 0.8493043780326843, "learning_rate": 2.8481003148290606e-05, "loss": 0.2553, "step": 1211 }, { "epoch": 0.7602320840520621, "grad_norm": 0.855071485042572, "learning_rate": 2.84625954885699e-05, "loss": 0.2524, "step": 1212 }, { "epoch": 0.7608593382468245, "grad_norm": 1.0303425788879395, "learning_rate": 2.8444179092613953e-05, "loss": 0.2379, "step": 1213 }, { "epoch": 0.761486592441587, "grad_norm": 0.9178991913795471, "learning_rate": 2.842575397943467e-05, "loss": 0.2295, "step": 1214 }, { "epoch": 0.7621138466363494, "grad_norm": 0.869573712348938, "learning_rate": 2.8407320168052925e-05, "loss": 0.2125, "step": 1215 }, { "epoch": 0.7627411008311118, "grad_norm": 0.8731804490089417, "learning_rate": 2.838887767749861e-05, "loss": 0.2302, "step": 1216 }, { "epoch": 0.7633683550258742, "grad_norm": 0.7152383923530579, "learning_rate": 2.8370426526810545e-05, "loss": 0.1908, "step": 1217 }, { "epoch": 0.7639956092206367, "grad_norm": 0.9450547099113464, "learning_rate": 2.8351966735036502e-05, "loss": 0.2948, "step": 1218 }, { "epoch": 0.7646228634153991, "grad_norm": 0.875137209892273, "learning_rate": 2.8333498321233183e-05, "loss": 0.2154, "step": 1219 }, { "epoch": 0.7652501176101615, "grad_norm": 0.8878747820854187, "learning_rate": 2.831502130446617e-05, "loss": 0.236, "step": 1220 }, { "epoch": 0.765877371804924, "grad_norm": 0.898963987827301, "learning_rate": 2.8296535703809947e-05, "loss": 0.271, "step": 1221 }, { "epoch": 0.7665046259996864, "grad_norm": 0.9544633626937866, "learning_rate": 2.8278041538347847e-05, "loss": 0.2678, "step": 1222 }, { "epoch": 0.7671318801944488, "grad_norm": 0.8638365268707275, "learning_rate": 2.8259538827172047e-05, "loss": 0.2071, "step": 1223 }, { "epoch": 0.7677591343892112, "grad_norm": 0.8723821640014648, "learning_rate": 2.8241027589383553e-05, "loss": 0.233, "step": 1224 }, { "epoch": 0.7683863885839737, "grad_norm": 0.843165397644043, "learning_rate": 2.8222507844092163e-05, "loss": 0.2341, "step": 1225 }, { "epoch": 0.7690136427787361, "grad_norm": 0.994552731513977, "learning_rate": 2.8203979610416457e-05, "loss": 0.2601, "step": 1226 }, { "epoch": 0.7696408969734985, "grad_norm": 0.9148455858230591, "learning_rate": 2.8185442907483786e-05, "loss": 0.2302, "step": 1227 }, { "epoch": 0.770268151168261, "grad_norm": 0.8745561242103577, "learning_rate": 2.816689775443025e-05, "loss": 0.2478, "step": 1228 }, { "epoch": 0.7708954053630234, "grad_norm": 0.9031297564506531, "learning_rate": 2.8148344170400654e-05, "loss": 0.2429, "step": 1229 }, { "epoch": 0.7715226595577858, "grad_norm": 1.0213173627853394, "learning_rate": 2.812978217454851e-05, "loss": 0.2483, "step": 1230 }, { "epoch": 0.7721499137525483, "grad_norm": 0.9234693646430969, "learning_rate": 2.811121178603604e-05, "loss": 0.2613, "step": 1231 }, { "epoch": 0.7727771679473107, "grad_norm": 0.8913220167160034, "learning_rate": 2.809263302403409e-05, "loss": 0.2203, "step": 1232 }, { "epoch": 0.7734044221420731, "grad_norm": 0.9702144861221313, "learning_rate": 2.8074045907722176e-05, "loss": 0.2684, "step": 1233 }, { "epoch": 0.7740316763368355, "grad_norm": 0.8771833777427673, "learning_rate": 2.805545045628844e-05, "loss": 0.2265, "step": 1234 }, { "epoch": 0.774658930531598, "grad_norm": 0.8456547260284424, "learning_rate": 2.8036846688929614e-05, "loss": 0.2088, "step": 1235 }, { "epoch": 0.7752861847263604, "grad_norm": 0.9730178713798523, "learning_rate": 2.8018234624851016e-05, "loss": 0.2324, "step": 1236 }, { "epoch": 0.7759134389211227, "grad_norm": 1.0039517879486084, "learning_rate": 2.7999614283266558e-05, "loss": 0.2594, "step": 1237 }, { "epoch": 0.7765406931158852, "grad_norm": 0.9516322612762451, "learning_rate": 2.7980985683398648e-05, "loss": 0.21, "step": 1238 }, { "epoch": 0.7771679473106476, "grad_norm": 0.9338817596435547, "learning_rate": 2.7962348844478255e-05, "loss": 0.229, "step": 1239 }, { "epoch": 0.77779520150541, "grad_norm": 0.8492531776428223, "learning_rate": 2.794370378574485e-05, "loss": 0.2305, "step": 1240 }, { "epoch": 0.7784224557001725, "grad_norm": 1.0080124139785767, "learning_rate": 2.7925050526446377e-05, "loss": 0.2237, "step": 1241 }, { "epoch": 0.7790497098949349, "grad_norm": 1.0470832586288452, "learning_rate": 2.7906389085839248e-05, "loss": 0.2628, "step": 1242 }, { "epoch": 0.7796769640896973, "grad_norm": 1.0095301866531372, "learning_rate": 2.7887719483188335e-05, "loss": 0.2941, "step": 1243 }, { "epoch": 0.7803042182844597, "grad_norm": 0.9931348562240601, "learning_rate": 2.7869041737766916e-05, "loss": 0.2444, "step": 1244 }, { "epoch": 0.7809314724792222, "grad_norm": 1.0151265859603882, "learning_rate": 2.7850355868856697e-05, "loss": 0.2643, "step": 1245 }, { "epoch": 0.7815587266739846, "grad_norm": 0.9596518278121948, "learning_rate": 2.7831661895747753e-05, "loss": 0.2752, "step": 1246 }, { "epoch": 0.782185980868747, "grad_norm": 0.9194288849830627, "learning_rate": 2.781295983773853e-05, "loss": 0.234, "step": 1247 }, { "epoch": 0.7828132350635095, "grad_norm": 0.9633300304412842, "learning_rate": 2.779424971413582e-05, "loss": 0.2282, "step": 1248 }, { "epoch": 0.7834404892582719, "grad_norm": 0.8626919388771057, "learning_rate": 2.7775531544254735e-05, "loss": 0.2708, "step": 1249 }, { "epoch": 0.7840677434530343, "grad_norm": 0.9322224855422974, "learning_rate": 2.7756805347418717e-05, "loss": 0.2398, "step": 1250 }, { "epoch": 0.7846949976477968, "grad_norm": 0.949925422668457, "learning_rate": 2.7738071142959468e-05, "loss": 0.2425, "step": 1251 }, { "epoch": 0.7853222518425592, "grad_norm": 0.9091950058937073, "learning_rate": 2.7719328950216967e-05, "loss": 0.2028, "step": 1252 }, { "epoch": 0.7859495060373216, "grad_norm": 0.9050923585891724, "learning_rate": 2.770057878853944e-05, "loss": 0.2661, "step": 1253 }, { "epoch": 0.786576760232084, "grad_norm": 0.8346881866455078, "learning_rate": 2.7681820677283338e-05, "loss": 0.1991, "step": 1254 }, { "epoch": 0.7872040144268465, "grad_norm": 0.9741007685661316, "learning_rate": 2.7663054635813325e-05, "loss": 0.262, "step": 1255 }, { "epoch": 0.7878312686216089, "grad_norm": 0.8977965116500854, "learning_rate": 2.7644280683502243e-05, "loss": 0.2438, "step": 1256 }, { "epoch": 0.7884585228163713, "grad_norm": 1.0643701553344727, "learning_rate": 2.762549883973111e-05, "loss": 0.2365, "step": 1257 }, { "epoch": 0.7890857770111338, "grad_norm": 1.0908955335617065, "learning_rate": 2.7606709123889067e-05, "loss": 0.2488, "step": 1258 }, { "epoch": 0.7897130312058962, "grad_norm": 0.9831537008285522, "learning_rate": 2.758791155537341e-05, "loss": 0.2613, "step": 1259 }, { "epoch": 0.7903402854006586, "grad_norm": 1.0671886205673218, "learning_rate": 2.7569106153589536e-05, "loss": 0.2367, "step": 1260 }, { "epoch": 0.7909675395954211, "grad_norm": 1.1107702255249023, "learning_rate": 2.755029293795092e-05, "loss": 0.2667, "step": 1261 }, { "epoch": 0.7915947937901835, "grad_norm": 1.015174150466919, "learning_rate": 2.7531471927879104e-05, "loss": 0.2239, "step": 1262 }, { "epoch": 0.7922220479849459, "grad_norm": 0.8444801568984985, "learning_rate": 2.7512643142803676e-05, "loss": 0.2036, "step": 1263 }, { "epoch": 0.7928493021797083, "grad_norm": 0.9486952424049377, "learning_rate": 2.7493806602162262e-05, "loss": 0.2306, "step": 1264 }, { "epoch": 0.7934765563744708, "grad_norm": 0.963703989982605, "learning_rate": 2.747496232540048e-05, "loss": 0.2228, "step": 1265 }, { "epoch": 0.7941038105692332, "grad_norm": 1.0590280294418335, "learning_rate": 2.7456110331971945e-05, "loss": 0.2403, "step": 1266 }, { "epoch": 0.7947310647639956, "grad_norm": 1.037078619003296, "learning_rate": 2.7437250641338225e-05, "loss": 0.2553, "step": 1267 }, { "epoch": 0.7953583189587581, "grad_norm": 0.9628890752792358, "learning_rate": 2.7418383272968854e-05, "loss": 0.2267, "step": 1268 }, { "epoch": 0.7959855731535205, "grad_norm": 1.0467262268066406, "learning_rate": 2.7399508246341273e-05, "loss": 0.2447, "step": 1269 }, { "epoch": 0.7966128273482829, "grad_norm": 0.9027785658836365, "learning_rate": 2.7380625580940835e-05, "loss": 0.2427, "step": 1270 }, { "epoch": 0.7972400815430453, "grad_norm": 1.0007820129394531, "learning_rate": 2.7361735296260786e-05, "loss": 0.2593, "step": 1271 }, { "epoch": 0.7978673357378078, "grad_norm": 1.091827392578125, "learning_rate": 2.734283741180223e-05, "loss": 0.2685, "step": 1272 }, { "epoch": 0.7984945899325702, "grad_norm": 1.0432686805725098, "learning_rate": 2.7323931947074117e-05, "loss": 0.2617, "step": 1273 }, { "epoch": 0.7991218441273326, "grad_norm": 0.9992380738258362, "learning_rate": 2.7305018921593225e-05, "loss": 0.2396, "step": 1274 }, { "epoch": 0.7997490983220951, "grad_norm": 0.9518929123878479, "learning_rate": 2.7286098354884142e-05, "loss": 0.2642, "step": 1275 }, { "epoch": 0.8003763525168575, "grad_norm": 1.0347591638565063, "learning_rate": 2.726717026647922e-05, "loss": 0.2499, "step": 1276 }, { "epoch": 0.8010036067116199, "grad_norm": 0.9323564171791077, "learning_rate": 2.7248234675918605e-05, "loss": 0.265, "step": 1277 }, { "epoch": 0.8016308609063824, "grad_norm": 0.8762786388397217, "learning_rate": 2.722929160275017e-05, "loss": 0.2168, "step": 1278 }, { "epoch": 0.8022581151011448, "grad_norm": 0.8518358469009399, "learning_rate": 2.721034106652951e-05, "loss": 0.243, "step": 1279 }, { "epoch": 0.8028853692959071, "grad_norm": 0.8623441457748413, "learning_rate": 2.719138308681993e-05, "loss": 0.2518, "step": 1280 }, { "epoch": 0.8035126234906695, "grad_norm": 0.8846951723098755, "learning_rate": 2.7172417683192436e-05, "loss": 0.1975, "step": 1281 }, { "epoch": 0.804139877685432, "grad_norm": 0.8244737386703491, "learning_rate": 2.7153444875225667e-05, "loss": 0.2026, "step": 1282 }, { "epoch": 0.8047671318801944, "grad_norm": 1.1555349826812744, "learning_rate": 2.7134464682505926e-05, "loss": 0.2711, "step": 1283 }, { "epoch": 0.8053943860749568, "grad_norm": 0.9521437883377075, "learning_rate": 2.7115477124627134e-05, "loss": 0.2383, "step": 1284 }, { "epoch": 0.8060216402697193, "grad_norm": 0.9691430926322937, "learning_rate": 2.7096482221190807e-05, "loss": 0.2627, "step": 1285 }, { "epoch": 0.8066488944644817, "grad_norm": 0.9191070795059204, "learning_rate": 2.7077479991806062e-05, "loss": 0.1817, "step": 1286 }, { "epoch": 0.8072761486592441, "grad_norm": 1.0996882915496826, "learning_rate": 2.7058470456089576e-05, "loss": 0.2402, "step": 1287 }, { "epoch": 0.8079034028540066, "grad_norm": 0.908535361289978, "learning_rate": 2.703945363366555e-05, "loss": 0.2901, "step": 1288 }, { "epoch": 0.808530657048769, "grad_norm": 0.8575511574745178, "learning_rate": 2.7020429544165718e-05, "loss": 0.2502, "step": 1289 }, { "epoch": 0.8091579112435314, "grad_norm": 0.9514098167419434, "learning_rate": 2.7001398207229324e-05, "loss": 0.2376, "step": 1290 }, { "epoch": 0.8097851654382938, "grad_norm": 0.956186056137085, "learning_rate": 2.6982359642503082e-05, "loss": 0.2388, "step": 1291 }, { "epoch": 0.8104124196330563, "grad_norm": 1.0436739921569824, "learning_rate": 2.6963313869641177e-05, "loss": 0.2925, "step": 1292 }, { "epoch": 0.8110396738278187, "grad_norm": 0.7947832345962524, "learning_rate": 2.6944260908305233e-05, "loss": 0.2262, "step": 1293 }, { "epoch": 0.8116669280225811, "grad_norm": 0.7879627346992493, "learning_rate": 2.692520077816428e-05, "loss": 0.2211, "step": 1294 }, { "epoch": 0.8122941822173436, "grad_norm": 0.8735244870185852, "learning_rate": 2.690613349889478e-05, "loss": 0.2588, "step": 1295 }, { "epoch": 0.812921436412106, "grad_norm": 0.880125105381012, "learning_rate": 2.6887059090180542e-05, "loss": 0.2136, "step": 1296 }, { "epoch": 0.8135486906068684, "grad_norm": 0.7848929166793823, "learning_rate": 2.686797757171275e-05, "loss": 0.2106, "step": 1297 }, { "epoch": 0.8141759448016309, "grad_norm": 0.8269034624099731, "learning_rate": 2.6848888963189943e-05, "loss": 0.2001, "step": 1298 }, { "epoch": 0.8148031989963933, "grad_norm": 0.900639533996582, "learning_rate": 2.6829793284317945e-05, "loss": 0.2377, "step": 1299 }, { "epoch": 0.8154304531911557, "grad_norm": 0.8968613743782043, "learning_rate": 2.68106905548099e-05, "loss": 0.2387, "step": 1300 }, { "epoch": 0.8160577073859181, "grad_norm": 1.0977859497070312, "learning_rate": 2.679158079438624e-05, "loss": 0.2524, "step": 1301 }, { "epoch": 0.8166849615806806, "grad_norm": 1.0224528312683105, "learning_rate": 2.677246402277464e-05, "loss": 0.252, "step": 1302 }, { "epoch": 0.817312215775443, "grad_norm": 0.9429143667221069, "learning_rate": 2.6753340259710005e-05, "loss": 0.2295, "step": 1303 }, { "epoch": 0.8179394699702054, "grad_norm": 0.9691178798675537, "learning_rate": 2.673420952493448e-05, "loss": 0.2387, "step": 1304 }, { "epoch": 0.8185667241649679, "grad_norm": 0.8709486722946167, "learning_rate": 2.6715071838197393e-05, "loss": 0.1901, "step": 1305 }, { "epoch": 0.8191939783597303, "grad_norm": 0.9485816955566406, "learning_rate": 2.6695927219255248e-05, "loss": 0.26, "step": 1306 }, { "epoch": 0.8198212325544927, "grad_norm": 0.95992112159729, "learning_rate": 2.6676775687871716e-05, "loss": 0.2627, "step": 1307 }, { "epoch": 0.8204484867492552, "grad_norm": 1.0174671411514282, "learning_rate": 2.6657617263817587e-05, "loss": 0.2474, "step": 1308 }, { "epoch": 0.8210757409440176, "grad_norm": 1.0580779314041138, "learning_rate": 2.6638451966870788e-05, "loss": 0.2257, "step": 1309 }, { "epoch": 0.82170299513878, "grad_norm": 1.1434040069580078, "learning_rate": 2.6619279816816324e-05, "loss": 0.2668, "step": 1310 }, { "epoch": 0.8223302493335424, "grad_norm": 0.9524603486061096, "learning_rate": 2.660010083344628e-05, "loss": 0.222, "step": 1311 }, { "epoch": 0.8229575035283049, "grad_norm": 1.0641026496887207, "learning_rate": 2.65809150365598e-05, "loss": 0.2243, "step": 1312 }, { "epoch": 0.8235847577230673, "grad_norm": 0.9155853390693665, "learning_rate": 2.6561722445963054e-05, "loss": 0.2534, "step": 1313 }, { "epoch": 0.8242120119178297, "grad_norm": 0.8461087942123413, "learning_rate": 2.654252308146923e-05, "loss": 0.2423, "step": 1314 }, { "epoch": 0.8248392661125922, "grad_norm": 1.0586851835250854, "learning_rate": 2.6523316962898507e-05, "loss": 0.2554, "step": 1315 }, { "epoch": 0.8254665203073546, "grad_norm": 0.9544086456298828, "learning_rate": 2.650410411007804e-05, "loss": 0.2614, "step": 1316 }, { "epoch": 0.826093774502117, "grad_norm": 1.1493490934371948, "learning_rate": 2.6484884542841922e-05, "loss": 0.2487, "step": 1317 }, { "epoch": 0.8267210286968794, "grad_norm": 1.180282473564148, "learning_rate": 2.6465658281031205e-05, "loss": 0.2465, "step": 1318 }, { "epoch": 0.8273482828916419, "grad_norm": 0.8955004811286926, "learning_rate": 2.6446425344493825e-05, "loss": 0.2502, "step": 1319 }, { "epoch": 0.8279755370864043, "grad_norm": 0.9922782778739929, "learning_rate": 2.6427185753084622e-05, "loss": 0.1815, "step": 1320 }, { "epoch": 0.8286027912811667, "grad_norm": 1.016087293624878, "learning_rate": 2.640793952666531e-05, "loss": 0.2208, "step": 1321 }, { "epoch": 0.8292300454759292, "grad_norm": 1.1681716442108154, "learning_rate": 2.6388686685104438e-05, "loss": 0.2762, "step": 1322 }, { "epoch": 0.8298572996706916, "grad_norm": 1.0408824682235718, "learning_rate": 2.6369427248277387e-05, "loss": 0.251, "step": 1323 }, { "epoch": 0.830484553865454, "grad_norm": 0.9965060353279114, "learning_rate": 2.635016123606637e-05, "loss": 0.249, "step": 1324 }, { "epoch": 0.8311118080602165, "grad_norm": 0.824463427066803, "learning_rate": 2.633088866836036e-05, "loss": 0.1927, "step": 1325 }, { "epoch": 0.8317390622549788, "grad_norm": 1.0201879739761353, "learning_rate": 2.63116095650551e-05, "loss": 0.2386, "step": 1326 }, { "epoch": 0.8323663164497412, "grad_norm": 0.9999285340309143, "learning_rate": 2.6292323946053096e-05, "loss": 0.2616, "step": 1327 }, { "epoch": 0.8329935706445036, "grad_norm": 1.0046961307525635, "learning_rate": 2.6273031831263574e-05, "loss": 0.249, "step": 1328 }, { "epoch": 0.8336208248392661, "grad_norm": 0.978674054145813, "learning_rate": 2.625373324060245e-05, "loss": 0.2559, "step": 1329 }, { "epoch": 0.8342480790340285, "grad_norm": 0.9145632386207581, "learning_rate": 2.623442819399234e-05, "loss": 0.2227, "step": 1330 }, { "epoch": 0.8348753332287909, "grad_norm": 0.8744785189628601, "learning_rate": 2.621511671136254e-05, "loss": 0.2334, "step": 1331 }, { "epoch": 0.8355025874235534, "grad_norm": 1.0927536487579346, "learning_rate": 2.619579881264896e-05, "loss": 0.2629, "step": 1332 }, { "epoch": 0.8361298416183158, "grad_norm": 0.8395379781723022, "learning_rate": 2.6176474517794152e-05, "loss": 0.2319, "step": 1333 }, { "epoch": 0.8367570958130782, "grad_norm": 1.0170912742614746, "learning_rate": 2.6157143846747265e-05, "loss": 0.2574, "step": 1334 }, { "epoch": 0.8373843500078407, "grad_norm": 0.8509354591369629, "learning_rate": 2.613780681946403e-05, "loss": 0.1948, "step": 1335 }, { "epoch": 0.8380116042026031, "grad_norm": 0.8112233281135559, "learning_rate": 2.611846345590674e-05, "loss": 0.1838, "step": 1336 }, { "epoch": 0.8386388583973655, "grad_norm": 1.1284143924713135, "learning_rate": 2.609911377604423e-05, "loss": 0.2349, "step": 1337 }, { "epoch": 0.8392661125921279, "grad_norm": 0.8029350638389587, "learning_rate": 2.6079757799851853e-05, "loss": 0.249, "step": 1338 }, { "epoch": 0.8398933667868904, "grad_norm": 1.106877326965332, "learning_rate": 2.6060395547311476e-05, "loss": 0.2542, "step": 1339 }, { "epoch": 0.8405206209816528, "grad_norm": 1.000625729560852, "learning_rate": 2.6041027038411427e-05, "loss": 0.2461, "step": 1340 }, { "epoch": 0.8411478751764152, "grad_norm": 1.044447660446167, "learning_rate": 2.6021652293146493e-05, "loss": 0.2177, "step": 1341 }, { "epoch": 0.8417751293711777, "grad_norm": 1.2332682609558105, "learning_rate": 2.600227133151791e-05, "loss": 0.2558, "step": 1342 }, { "epoch": 0.8424023835659401, "grad_norm": 1.0727463960647583, "learning_rate": 2.5982884173533335e-05, "loss": 0.2329, "step": 1343 }, { "epoch": 0.8430296377607025, "grad_norm": 0.9513474106788635, "learning_rate": 2.5963490839206806e-05, "loss": 0.2508, "step": 1344 }, { "epoch": 0.843656891955465, "grad_norm": 0.9870790839195251, "learning_rate": 2.5944091348558743e-05, "loss": 0.2565, "step": 1345 }, { "epoch": 0.8442841461502274, "grad_norm": 0.8659030795097351, "learning_rate": 2.5924685721615928e-05, "loss": 0.2309, "step": 1346 }, { "epoch": 0.8449114003449898, "grad_norm": 1.0346086025238037, "learning_rate": 2.5905273978411468e-05, "loss": 0.2429, "step": 1347 }, { "epoch": 0.8455386545397522, "grad_norm": 1.0064115524291992, "learning_rate": 2.58858561389848e-05, "loss": 0.2251, "step": 1348 }, { "epoch": 0.8461659087345147, "grad_norm": 0.9384128451347351, "learning_rate": 2.5866432223381627e-05, "loss": 0.2579, "step": 1349 }, { "epoch": 0.8467931629292771, "grad_norm": 0.9889576435089111, "learning_rate": 2.5847002251653953e-05, "loss": 0.1964, "step": 1350 }, { "epoch": 0.8474204171240395, "grad_norm": 0.9557651877403259, "learning_rate": 2.5827566243860017e-05, "loss": 0.2596, "step": 1351 }, { "epoch": 0.848047671318802, "grad_norm": 0.9052563309669495, "learning_rate": 2.580812422006429e-05, "loss": 0.2287, "step": 1352 }, { "epoch": 0.8486749255135644, "grad_norm": 0.8695682287216187, "learning_rate": 2.578867620033747e-05, "loss": 0.215, "step": 1353 }, { "epoch": 0.8493021797083268, "grad_norm": 0.9117851853370667, "learning_rate": 2.5769222204756426e-05, "loss": 0.2254, "step": 1354 }, { "epoch": 0.8499294339030893, "grad_norm": 0.98007732629776, "learning_rate": 2.57497622534042e-05, "loss": 0.2862, "step": 1355 }, { "epoch": 0.8505566880978517, "grad_norm": 1.0553463697433472, "learning_rate": 2.573029636636998e-05, "loss": 0.2336, "step": 1356 }, { "epoch": 0.8511839422926141, "grad_norm": 0.931348443031311, "learning_rate": 2.5710824563749097e-05, "loss": 0.2471, "step": 1357 }, { "epoch": 0.8518111964873765, "grad_norm": 0.9068348407745361, "learning_rate": 2.569134686564297e-05, "loss": 0.2348, "step": 1358 }, { "epoch": 0.852438450682139, "grad_norm": 0.9847040176391602, "learning_rate": 2.5671863292159122e-05, "loss": 0.2385, "step": 1359 }, { "epoch": 0.8530657048769014, "grad_norm": 0.7689850926399231, "learning_rate": 2.5652373863411124e-05, "loss": 0.2093, "step": 1360 }, { "epoch": 0.8536929590716638, "grad_norm": 0.9209250211715698, "learning_rate": 2.56328785995186e-05, "loss": 0.2745, "step": 1361 }, { "epoch": 0.8543202132664263, "grad_norm": 1.0016462802886963, "learning_rate": 2.56133775206072e-05, "loss": 0.217, "step": 1362 }, { "epoch": 0.8549474674611887, "grad_norm": 0.8809705972671509, "learning_rate": 2.5593870646808575e-05, "loss": 0.226, "step": 1363 }, { "epoch": 0.855574721655951, "grad_norm": 0.8996021151542664, "learning_rate": 2.5574357998260352e-05, "loss": 0.2319, "step": 1364 }, { "epoch": 0.8562019758507134, "grad_norm": 0.8426946401596069, "learning_rate": 2.5554839595106136e-05, "loss": 0.2236, "step": 1365 }, { "epoch": 0.856829230045476, "grad_norm": 0.8030539751052856, "learning_rate": 2.5535315457495455e-05, "loss": 0.221, "step": 1366 }, { "epoch": 0.8574564842402383, "grad_norm": 0.9328519701957703, "learning_rate": 2.5515785605583752e-05, "loss": 0.2569, "step": 1367 }, { "epoch": 0.8580837384350007, "grad_norm": 1.004563570022583, "learning_rate": 2.5496250059532398e-05, "loss": 0.2587, "step": 1368 }, { "epoch": 0.8587109926297632, "grad_norm": 0.8493061661720276, "learning_rate": 2.5476708839508616e-05, "loss": 0.2118, "step": 1369 }, { "epoch": 0.8593382468245256, "grad_norm": 0.9640793204307556, "learning_rate": 2.5457161965685503e-05, "loss": 0.2513, "step": 1370 }, { "epoch": 0.859965501019288, "grad_norm": 0.9193623065948486, "learning_rate": 2.543760945824197e-05, "loss": 0.2017, "step": 1371 }, { "epoch": 0.8605927552140505, "grad_norm": 0.9694982171058655, "learning_rate": 2.541805133736278e-05, "loss": 0.2349, "step": 1372 }, { "epoch": 0.8612200094088129, "grad_norm": 0.9842789769172668, "learning_rate": 2.5398487623238452e-05, "loss": 0.2396, "step": 1373 }, { "epoch": 0.8618472636035753, "grad_norm": 1.1239285469055176, "learning_rate": 2.5378918336065307e-05, "loss": 0.2265, "step": 1374 }, { "epoch": 0.8624745177983377, "grad_norm": 0.8942157626152039, "learning_rate": 2.53593434960454e-05, "loss": 0.2217, "step": 1375 }, { "epoch": 0.8631017719931002, "grad_norm": 0.8285519480705261, "learning_rate": 2.5339763123386537e-05, "loss": 0.2228, "step": 1376 }, { "epoch": 0.8637290261878626, "grad_norm": 0.9027263522148132, "learning_rate": 2.5320177238302222e-05, "loss": 0.2288, "step": 1377 }, { "epoch": 0.864356280382625, "grad_norm": 0.8536057472229004, "learning_rate": 2.5300585861011656e-05, "loss": 0.2015, "step": 1378 }, { "epoch": 0.8649835345773875, "grad_norm": 1.072025179862976, "learning_rate": 2.5280989011739698e-05, "loss": 0.2264, "step": 1379 }, { "epoch": 0.8656107887721499, "grad_norm": 0.979217529296875, "learning_rate": 2.5261386710716872e-05, "loss": 0.2375, "step": 1380 }, { "epoch": 0.8662380429669123, "grad_norm": 0.9342636466026306, "learning_rate": 2.524177897817933e-05, "loss": 0.2489, "step": 1381 }, { "epoch": 0.8668652971616748, "grad_norm": 0.9185689091682434, "learning_rate": 2.522216583436881e-05, "loss": 0.2125, "step": 1382 }, { "epoch": 0.8674925513564372, "grad_norm": 0.8563762903213501, "learning_rate": 2.520254729953267e-05, "loss": 0.2628, "step": 1383 }, { "epoch": 0.8681198055511996, "grad_norm": 0.897680938243866, "learning_rate": 2.518292339392379e-05, "loss": 0.1953, "step": 1384 }, { "epoch": 0.868747059745962, "grad_norm": 0.8742828965187073, "learning_rate": 2.516329413780064e-05, "loss": 0.2075, "step": 1385 }, { "epoch": 0.8693743139407245, "grad_norm": 0.8942375779151917, "learning_rate": 2.5143659551427186e-05, "loss": 0.2214, "step": 1386 }, { "epoch": 0.8700015681354869, "grad_norm": 1.0169836282730103, "learning_rate": 2.5124019655072897e-05, "loss": 0.2487, "step": 1387 }, { "epoch": 0.8706288223302493, "grad_norm": 0.8696025013923645, "learning_rate": 2.5104374469012726e-05, "loss": 0.2398, "step": 1388 }, { "epoch": 0.8712560765250118, "grad_norm": 0.8479281067848206, "learning_rate": 2.5084724013527106e-05, "loss": 0.2114, "step": 1389 }, { "epoch": 0.8718833307197742, "grad_norm": 0.8906635046005249, "learning_rate": 2.506506830890188e-05, "loss": 0.1894, "step": 1390 }, { "epoch": 0.8725105849145366, "grad_norm": 0.8798310160636902, "learning_rate": 2.5045407375428338e-05, "loss": 0.237, "step": 1391 }, { "epoch": 0.8731378391092991, "grad_norm": 1.0019210577011108, "learning_rate": 2.5025741233403143e-05, "loss": 0.2334, "step": 1392 }, { "epoch": 0.8737650933040615, "grad_norm": 0.9461895227432251, "learning_rate": 2.500606990312834e-05, "loss": 0.2196, "step": 1393 }, { "epoch": 0.8743923474988239, "grad_norm": 0.9007999300956726, "learning_rate": 2.498639340491135e-05, "loss": 0.216, "step": 1394 }, { "epoch": 0.8750196016935863, "grad_norm": 0.9154345393180847, "learning_rate": 2.4966711759064906e-05, "loss": 0.2569, "step": 1395 }, { "epoch": 0.8756468558883488, "grad_norm": 0.8156167268753052, "learning_rate": 2.494702498590706e-05, "loss": 0.2341, "step": 1396 }, { "epoch": 0.8762741100831112, "grad_norm": 0.8945419192314148, "learning_rate": 2.4927333105761172e-05, "loss": 0.2173, "step": 1397 }, { "epoch": 0.8769013642778736, "grad_norm": 0.8813133835792542, "learning_rate": 2.490763613895586e-05, "loss": 0.2243, "step": 1398 }, { "epoch": 0.8775286184726361, "grad_norm": 0.9496026635169983, "learning_rate": 2.4887934105824992e-05, "loss": 0.2178, "step": 1399 }, { "epoch": 0.8781558726673985, "grad_norm": 1.0122556686401367, "learning_rate": 2.4868227026707668e-05, "loss": 0.2393, "step": 1400 }, { "epoch": 0.8787831268621609, "grad_norm": 0.9015643000602722, "learning_rate": 2.484851492194821e-05, "loss": 0.2274, "step": 1401 }, { "epoch": 0.8794103810569234, "grad_norm": 0.8066772222518921, "learning_rate": 2.482879781189611e-05, "loss": 0.1984, "step": 1402 }, { "epoch": 0.8800376352516858, "grad_norm": 1.0243488550186157, "learning_rate": 2.4809075716906038e-05, "loss": 0.2265, "step": 1403 }, { "epoch": 0.8806648894464482, "grad_norm": 0.9530049562454224, "learning_rate": 2.478934865733781e-05, "loss": 0.2383, "step": 1404 }, { "epoch": 0.8812921436412106, "grad_norm": 0.8689175844192505, "learning_rate": 2.476961665355635e-05, "loss": 0.2527, "step": 1405 }, { "epoch": 0.8819193978359731, "grad_norm": 0.8891976475715637, "learning_rate": 2.4749879725931716e-05, "loss": 0.2017, "step": 1406 }, { "epoch": 0.8825466520307355, "grad_norm": 0.9147886633872986, "learning_rate": 2.4730137894839023e-05, "loss": 0.2672, "step": 1407 }, { "epoch": 0.8831739062254979, "grad_norm": 1.0061230659484863, "learning_rate": 2.4710391180658465e-05, "loss": 0.2183, "step": 1408 }, { "epoch": 0.8838011604202604, "grad_norm": 0.9156420230865479, "learning_rate": 2.4690639603775263e-05, "loss": 0.2306, "step": 1409 }, { "epoch": 0.8844284146150228, "grad_norm": 0.9554071426391602, "learning_rate": 2.467088318457968e-05, "loss": 0.221, "step": 1410 }, { "epoch": 0.8850556688097851, "grad_norm": 1.0293577909469604, "learning_rate": 2.4651121943466946e-05, "loss": 0.2522, "step": 1411 }, { "epoch": 0.8856829230045475, "grad_norm": 0.8481536507606506, "learning_rate": 2.463135590083729e-05, "loss": 0.1971, "step": 1412 }, { "epoch": 0.88631017719931, "grad_norm": 1.4141706228256226, "learning_rate": 2.4611585077095898e-05, "loss": 0.2494, "step": 1413 }, { "epoch": 0.8869374313940724, "grad_norm": 0.9040833711624146, "learning_rate": 2.4591809492652887e-05, "loss": 0.2355, "step": 1414 }, { "epoch": 0.8875646855888348, "grad_norm": 0.8359858989715576, "learning_rate": 2.457202916792329e-05, "loss": 0.2351, "step": 1415 }, { "epoch": 0.8881919397835973, "grad_norm": 0.9249632358551025, "learning_rate": 2.4552244123327024e-05, "loss": 0.2177, "step": 1416 }, { "epoch": 0.8888191939783597, "grad_norm": 0.8339369297027588, "learning_rate": 2.4532454379288896e-05, "loss": 0.2255, "step": 1417 }, { "epoch": 0.8894464481731221, "grad_norm": 0.9239262938499451, "learning_rate": 2.4512659956238555e-05, "loss": 0.2406, "step": 1418 }, { "epoch": 0.8900737023678846, "grad_norm": 0.8235772848129272, "learning_rate": 2.4492860874610478e-05, "loss": 0.2202, "step": 1419 }, { "epoch": 0.890700956562647, "grad_norm": 0.7412683367729187, "learning_rate": 2.4473057154843952e-05, "loss": 0.2467, "step": 1420 }, { "epoch": 0.8913282107574094, "grad_norm": 0.8566434383392334, "learning_rate": 2.445324881738305e-05, "loss": 0.2481, "step": 1421 }, { "epoch": 0.8919554649521718, "grad_norm": 0.9057849645614624, "learning_rate": 2.4433435882676634e-05, "loss": 0.2426, "step": 1422 }, { "epoch": 0.8925827191469343, "grad_norm": 0.9396740794181824, "learning_rate": 2.4413618371178273e-05, "loss": 0.2753, "step": 1423 }, { "epoch": 0.8932099733416967, "grad_norm": 0.8927632570266724, "learning_rate": 2.4393796303346303e-05, "loss": 0.2286, "step": 1424 }, { "epoch": 0.8938372275364591, "grad_norm": 1.01885986328125, "learning_rate": 2.4373969699643718e-05, "loss": 0.2325, "step": 1425 }, { "epoch": 0.8944644817312216, "grad_norm": 0.86164790391922, "learning_rate": 2.4354138580538237e-05, "loss": 0.2242, "step": 1426 }, { "epoch": 0.895091735925984, "grad_norm": 0.8852266669273376, "learning_rate": 2.433430296650222e-05, "loss": 0.2094, "step": 1427 }, { "epoch": 0.8957189901207464, "grad_norm": 0.9729646444320679, "learning_rate": 2.4314462878012664e-05, "loss": 0.2435, "step": 1428 }, { "epoch": 0.8963462443155089, "grad_norm": 0.9526697993278503, "learning_rate": 2.429461833555119e-05, "loss": 0.2385, "step": 1429 }, { "epoch": 0.8969734985102713, "grad_norm": 0.9755507707595825, "learning_rate": 2.4274769359604028e-05, "loss": 0.2457, "step": 1430 }, { "epoch": 0.8976007527050337, "grad_norm": 0.9439208507537842, "learning_rate": 2.4254915970661968e-05, "loss": 0.2438, "step": 1431 }, { "epoch": 0.8982280068997961, "grad_norm": 0.9157652854919434, "learning_rate": 2.4235058189220353e-05, "loss": 0.2378, "step": 1432 }, { "epoch": 0.8988552610945586, "grad_norm": 0.8972637057304382, "learning_rate": 2.4215196035779086e-05, "loss": 0.1998, "step": 1433 }, { "epoch": 0.899482515289321, "grad_norm": 0.7806968092918396, "learning_rate": 2.419532953084256e-05, "loss": 0.2411, "step": 1434 }, { "epoch": 0.9001097694840834, "grad_norm": 0.9046010971069336, "learning_rate": 2.417545869491966e-05, "loss": 0.2189, "step": 1435 }, { "epoch": 0.9007370236788459, "grad_norm": 0.9269365072250366, "learning_rate": 2.4155583548523768e-05, "loss": 0.2323, "step": 1436 }, { "epoch": 0.9013642778736083, "grad_norm": 0.9623416662216187, "learning_rate": 2.4135704112172683e-05, "loss": 0.2403, "step": 1437 }, { "epoch": 0.9019915320683707, "grad_norm": 0.9461166858673096, "learning_rate": 2.4115820406388646e-05, "loss": 0.2322, "step": 1438 }, { "epoch": 0.9026187862631332, "grad_norm": 0.8908488750457764, "learning_rate": 2.409593245169832e-05, "loss": 0.2394, "step": 1439 }, { "epoch": 0.9032460404578956, "grad_norm": 1.0029852390289307, "learning_rate": 2.4076040268632722e-05, "loss": 0.2839, "step": 1440 }, { "epoch": 0.903873294652658, "grad_norm": 0.8697084188461304, "learning_rate": 2.4056143877727266e-05, "loss": 0.2166, "step": 1441 }, { "epoch": 0.9045005488474204, "grad_norm": 0.8713654279708862, "learning_rate": 2.4036243299521695e-05, "loss": 0.1948, "step": 1442 }, { "epoch": 0.9051278030421829, "grad_norm": 0.8880418539047241, "learning_rate": 2.401633855456008e-05, "loss": 0.2412, "step": 1443 }, { "epoch": 0.9057550572369453, "grad_norm": 0.810232400894165, "learning_rate": 2.3996429663390775e-05, "loss": 0.2468, "step": 1444 }, { "epoch": 0.9063823114317077, "grad_norm": 0.9070370197296143, "learning_rate": 2.3976516646566443e-05, "loss": 0.215, "step": 1445 }, { "epoch": 0.9070095656264702, "grad_norm": 1.0040373802185059, "learning_rate": 2.3956599524643985e-05, "loss": 0.2207, "step": 1446 }, { "epoch": 0.9076368198212326, "grad_norm": 0.7925657033920288, "learning_rate": 2.393667831818455e-05, "loss": 0.2389, "step": 1447 }, { "epoch": 0.908264074015995, "grad_norm": 0.9810481667518616, "learning_rate": 2.3916753047753508e-05, "loss": 0.2502, "step": 1448 }, { "epoch": 0.9088913282107574, "grad_norm": 0.8342772722244263, "learning_rate": 2.3896823733920406e-05, "loss": 0.2314, "step": 1449 }, { "epoch": 0.9095185824055199, "grad_norm": 0.9573843479156494, "learning_rate": 2.387689039725898e-05, "loss": 0.2215, "step": 1450 }, { "epoch": 0.9101458366002823, "grad_norm": 1.0381935834884644, "learning_rate": 2.3856953058347116e-05, "loss": 0.2319, "step": 1451 }, { "epoch": 0.9107730907950446, "grad_norm": 0.8801733255386353, "learning_rate": 2.3837011737766828e-05, "loss": 0.18, "step": 1452 }, { "epoch": 0.9114003449898072, "grad_norm": 0.8234224915504456, "learning_rate": 2.3817066456104253e-05, "loss": 0.2063, "step": 1453 }, { "epoch": 0.9120275991845695, "grad_norm": 0.828215479850769, "learning_rate": 2.379711723394959e-05, "loss": 0.1751, "step": 1454 }, { "epoch": 0.9126548533793319, "grad_norm": 1.0685749053955078, "learning_rate": 2.3777164091897137e-05, "loss": 0.2159, "step": 1455 }, { "epoch": 0.9132821075740944, "grad_norm": 0.9756271243095398, "learning_rate": 2.3757207050545227e-05, "loss": 0.2366, "step": 1456 }, { "epoch": 0.9139093617688568, "grad_norm": 0.844516396522522, "learning_rate": 2.3737246130496206e-05, "loss": 0.2433, "step": 1457 }, { "epoch": 0.9145366159636192, "grad_norm": 0.8922362327575684, "learning_rate": 2.3717281352356438e-05, "loss": 0.2433, "step": 1458 }, { "epoch": 0.9151638701583816, "grad_norm": 1.1780900955200195, "learning_rate": 2.3697312736736268e-05, "loss": 0.2332, "step": 1459 }, { "epoch": 0.9157911243531441, "grad_norm": 0.9307076334953308, "learning_rate": 2.3677340304250003e-05, "loss": 0.2548, "step": 1460 }, { "epoch": 0.9164183785479065, "grad_norm": 1.1400376558303833, "learning_rate": 2.3657364075515884e-05, "loss": 0.2158, "step": 1461 }, { "epoch": 0.9170456327426689, "grad_norm": 1.0676137208938599, "learning_rate": 2.3637384071156078e-05, "loss": 0.2487, "step": 1462 }, { "epoch": 0.9176728869374314, "grad_norm": 0.9103914499282837, "learning_rate": 2.361740031179664e-05, "loss": 0.2311, "step": 1463 }, { "epoch": 0.9183001411321938, "grad_norm": 0.9785495400428772, "learning_rate": 2.359741281806752e-05, "loss": 0.2337, "step": 1464 }, { "epoch": 0.9189273953269562, "grad_norm": 0.9982824921607971, "learning_rate": 2.3577421610602507e-05, "loss": 0.2136, "step": 1465 }, { "epoch": 0.9195546495217187, "grad_norm": 0.9072036743164062, "learning_rate": 2.355742671003922e-05, "loss": 0.2117, "step": 1466 }, { "epoch": 0.9201819037164811, "grad_norm": 0.8125027418136597, "learning_rate": 2.3537428137019105e-05, "loss": 0.2219, "step": 1467 }, { "epoch": 0.9208091579112435, "grad_norm": 0.9591137170791626, "learning_rate": 2.3517425912187397e-05, "loss": 0.2372, "step": 1468 }, { "epoch": 0.9214364121060059, "grad_norm": 0.9125718474388123, "learning_rate": 2.3497420056193095e-05, "loss": 0.2641, "step": 1469 }, { "epoch": 0.9220636663007684, "grad_norm": 0.8828108906745911, "learning_rate": 2.347741058968894e-05, "loss": 0.2189, "step": 1470 }, { "epoch": 0.9226909204955308, "grad_norm": 0.8246212601661682, "learning_rate": 2.3457397533331424e-05, "loss": 0.2316, "step": 1471 }, { "epoch": 0.9233181746902932, "grad_norm": 0.8619717955589294, "learning_rate": 2.3437380907780713e-05, "loss": 0.2064, "step": 1472 }, { "epoch": 0.9239454288850557, "grad_norm": 0.9750192165374756, "learning_rate": 2.3417360733700685e-05, "loss": 0.2424, "step": 1473 }, { "epoch": 0.9245726830798181, "grad_norm": 0.9998435974121094, "learning_rate": 2.339733703175887e-05, "loss": 0.2377, "step": 1474 }, { "epoch": 0.9251999372745805, "grad_norm": 0.8929082751274109, "learning_rate": 2.337730982262643e-05, "loss": 0.2397, "step": 1475 }, { "epoch": 0.925827191469343, "grad_norm": 0.7877259254455566, "learning_rate": 2.335727912697817e-05, "loss": 0.2208, "step": 1476 }, { "epoch": 0.9264544456641054, "grad_norm": 0.8633270263671875, "learning_rate": 2.333724496549248e-05, "loss": 0.1959, "step": 1477 }, { "epoch": 0.9270816998588678, "grad_norm": 0.9239407777786255, "learning_rate": 2.3317207358851322e-05, "loss": 0.2076, "step": 1478 }, { "epoch": 0.9277089540536302, "grad_norm": 0.8067747950553894, "learning_rate": 2.3297166327740234e-05, "loss": 0.195, "step": 1479 }, { "epoch": 0.9283362082483927, "grad_norm": 0.9614524841308594, "learning_rate": 2.327712189284828e-05, "loss": 0.267, "step": 1480 }, { "epoch": 0.9289634624431551, "grad_norm": 0.6122102737426758, "learning_rate": 2.3257074074868023e-05, "loss": 0.2062, "step": 1481 }, { "epoch": 0.9295907166379175, "grad_norm": 0.8468174338340759, "learning_rate": 2.3237022894495532e-05, "loss": 0.2433, "step": 1482 }, { "epoch": 0.93021797083268, "grad_norm": 0.9263379573822021, "learning_rate": 2.3216968372430362e-05, "loss": 0.192, "step": 1483 }, { "epoch": 0.9308452250274424, "grad_norm": 0.8937593102455139, "learning_rate": 2.319691052937548e-05, "loss": 0.2115, "step": 1484 }, { "epoch": 0.9314724792222048, "grad_norm": 0.8346992135047913, "learning_rate": 2.317684938603732e-05, "loss": 0.1919, "step": 1485 }, { "epoch": 0.9320997334169673, "grad_norm": 1.161743402481079, "learning_rate": 2.3156784963125705e-05, "loss": 0.2597, "step": 1486 }, { "epoch": 0.9327269876117297, "grad_norm": 0.8451173305511475, "learning_rate": 2.3136717281353836e-05, "loss": 0.2224, "step": 1487 }, { "epoch": 0.9333542418064921, "grad_norm": 0.9135767221450806, "learning_rate": 2.3116646361438294e-05, "loss": 0.2344, "step": 1488 }, { "epoch": 0.9339814960012545, "grad_norm": 0.9136664271354675, "learning_rate": 2.3096572224099e-05, "loss": 0.2214, "step": 1489 }, { "epoch": 0.934608750196017, "grad_norm": 0.8622660636901855, "learning_rate": 2.3076494890059184e-05, "loss": 0.2412, "step": 1490 }, { "epoch": 0.9352360043907794, "grad_norm": 1.0650670528411865, "learning_rate": 2.3056414380045388e-05, "loss": 0.2563, "step": 1491 }, { "epoch": 0.9358632585855418, "grad_norm": 1.0359560251235962, "learning_rate": 2.3036330714787432e-05, "loss": 0.2574, "step": 1492 }, { "epoch": 0.9364905127803043, "grad_norm": 0.8969426155090332, "learning_rate": 2.3016243915018386e-05, "loss": 0.2285, "step": 1493 }, { "epoch": 0.9371177669750667, "grad_norm": 0.7989252805709839, "learning_rate": 2.299615400147457e-05, "loss": 0.245, "step": 1494 }, { "epoch": 0.937745021169829, "grad_norm": 1.1010055541992188, "learning_rate": 2.2976060994895497e-05, "loss": 0.2686, "step": 1495 }, { "epoch": 0.9383722753645914, "grad_norm": 0.8471121788024902, "learning_rate": 2.295596491602389e-05, "loss": 0.2069, "step": 1496 }, { "epoch": 0.938999529559354, "grad_norm": 0.9189316034317017, "learning_rate": 2.2935865785605636e-05, "loss": 0.235, "step": 1497 }, { "epoch": 0.9396267837541163, "grad_norm": 0.96001797914505, "learning_rate": 2.2915763624389785e-05, "loss": 0.2087, "step": 1498 }, { "epoch": 0.9402540379488787, "grad_norm": 0.7247317433357239, "learning_rate": 2.289565845312849e-05, "loss": 0.1773, "step": 1499 }, { "epoch": 0.9408812921436412, "grad_norm": 0.7679764628410339, "learning_rate": 2.2875550292577043e-05, "loss": 0.2105, "step": 1500 }, { "epoch": 0.9415085463384036, "grad_norm": 0.8632218241691589, "learning_rate": 2.2855439163493796e-05, "loss": 0.1966, "step": 1501 }, { "epoch": 0.942135800533166, "grad_norm": 0.8614953756332397, "learning_rate": 2.2835325086640178e-05, "loss": 0.2215, "step": 1502 }, { "epoch": 0.9427630547279285, "grad_norm": 1.0454775094985962, "learning_rate": 2.2815208082780665e-05, "loss": 0.232, "step": 1503 }, { "epoch": 0.9433903089226909, "grad_norm": 0.8676419258117676, "learning_rate": 2.2795088172682732e-05, "loss": 0.2233, "step": 1504 }, { "epoch": 0.9440175631174533, "grad_norm": 1.0755561590194702, "learning_rate": 2.2774965377116885e-05, "loss": 0.2582, "step": 1505 }, { "epoch": 0.9446448173122157, "grad_norm": 0.8055601716041565, "learning_rate": 2.275483971685659e-05, "loss": 0.1712, "step": 1506 }, { "epoch": 0.9452720715069782, "grad_norm": 0.9284416437149048, "learning_rate": 2.273471121267828e-05, "loss": 0.2087, "step": 1507 }, { "epoch": 0.9458993257017406, "grad_norm": 1.0512845516204834, "learning_rate": 2.2714579885361304e-05, "loss": 0.1975, "step": 1508 }, { "epoch": 0.946526579896503, "grad_norm": 0.83321613073349, "learning_rate": 2.2694445755687956e-05, "loss": 0.2174, "step": 1509 }, { "epoch": 0.9471538340912655, "grad_norm": 0.9688567519187927, "learning_rate": 2.2674308844443394e-05, "loss": 0.2499, "step": 1510 }, { "epoch": 0.9477810882860279, "grad_norm": 0.7848716974258423, "learning_rate": 2.265416917241566e-05, "loss": 0.1968, "step": 1511 }, { "epoch": 0.9484083424807903, "grad_norm": 0.9451969861984253, "learning_rate": 2.2634026760395664e-05, "loss": 0.2262, "step": 1512 }, { "epoch": 0.9490355966755528, "grad_norm": 1.0070687532424927, "learning_rate": 2.2613881629177112e-05, "loss": 0.2543, "step": 1513 }, { "epoch": 0.9496628508703152, "grad_norm": 1.049132227897644, "learning_rate": 2.259373379955653e-05, "loss": 0.2521, "step": 1514 }, { "epoch": 0.9502901050650776, "grad_norm": 0.9367115497589111, "learning_rate": 2.257358329233325e-05, "loss": 0.1957, "step": 1515 }, { "epoch": 0.95091735925984, "grad_norm": 1.0050894021987915, "learning_rate": 2.2553430128309325e-05, "loss": 0.2179, "step": 1516 }, { "epoch": 0.9515446134546025, "grad_norm": 0.890467643737793, "learning_rate": 2.2533274328289593e-05, "loss": 0.2328, "step": 1517 }, { "epoch": 0.9521718676493649, "grad_norm": 0.8494214415550232, "learning_rate": 2.251311591308159e-05, "loss": 0.2164, "step": 1518 }, { "epoch": 0.9527991218441273, "grad_norm": 0.9842292666435242, "learning_rate": 2.2492954903495558e-05, "loss": 0.2441, "step": 1519 }, { "epoch": 0.9534263760388898, "grad_norm": 0.9736828804016113, "learning_rate": 2.247279132034442e-05, "loss": 0.2571, "step": 1520 }, { "epoch": 0.9540536302336522, "grad_norm": 0.9789125919342041, "learning_rate": 2.2452625184443756e-05, "loss": 0.2305, "step": 1521 }, { "epoch": 0.9546808844284146, "grad_norm": 0.8945363163948059, "learning_rate": 2.2432456516611763e-05, "loss": 0.2327, "step": 1522 }, { "epoch": 0.9553081386231771, "grad_norm": 1.0674930810928345, "learning_rate": 2.2412285337669272e-05, "loss": 0.2552, "step": 1523 }, { "epoch": 0.9559353928179395, "grad_norm": 0.9890167117118835, "learning_rate": 2.239211166843972e-05, "loss": 0.2399, "step": 1524 }, { "epoch": 0.9565626470127019, "grad_norm": 0.8804594278335571, "learning_rate": 2.237193552974907e-05, "loss": 0.2687, "step": 1525 }, { "epoch": 0.9571899012074643, "grad_norm": 0.939110279083252, "learning_rate": 2.235175694242587e-05, "loss": 0.1726, "step": 1526 }, { "epoch": 0.9578171554022268, "grad_norm": 0.8611268401145935, "learning_rate": 2.2331575927301198e-05, "loss": 0.2008, "step": 1527 }, { "epoch": 0.9584444095969892, "grad_norm": 0.9728996753692627, "learning_rate": 2.231139250520861e-05, "loss": 0.2279, "step": 1528 }, { "epoch": 0.9590716637917516, "grad_norm": 0.9554197788238525, "learning_rate": 2.2291206696984165e-05, "loss": 0.2452, "step": 1529 }, { "epoch": 0.9596989179865141, "grad_norm": 0.8852481245994568, "learning_rate": 2.2271018523466395e-05, "loss": 0.221, "step": 1530 }, { "epoch": 0.9603261721812765, "grad_norm": 0.8912515044212341, "learning_rate": 2.2250828005496245e-05, "loss": 0.244, "step": 1531 }, { "epoch": 0.9609534263760389, "grad_norm": 0.7839436531066895, "learning_rate": 2.223063516391712e-05, "loss": 0.2125, "step": 1532 }, { "epoch": 0.9615806805708014, "grad_norm": 0.8890122175216675, "learning_rate": 2.2210440019574785e-05, "loss": 0.2429, "step": 1533 }, { "epoch": 0.9622079347655638, "grad_norm": 0.9256410002708435, "learning_rate": 2.2190242593317396e-05, "loss": 0.2341, "step": 1534 }, { "epoch": 0.9628351889603262, "grad_norm": 0.9652786254882812, "learning_rate": 2.2170042905995474e-05, "loss": 0.2379, "step": 1535 }, { "epoch": 0.9634624431550886, "grad_norm": 1.1375311613082886, "learning_rate": 2.2149840978461868e-05, "loss": 0.2318, "step": 1536 }, { "epoch": 0.9640896973498511, "grad_norm": 0.9609313607215881, "learning_rate": 2.212963683157174e-05, "loss": 0.2056, "step": 1537 }, { "epoch": 0.9647169515446135, "grad_norm": 0.7776309847831726, "learning_rate": 2.210943048618253e-05, "loss": 0.1841, "step": 1538 }, { "epoch": 0.9653442057393758, "grad_norm": 0.8857976198196411, "learning_rate": 2.208922196315398e-05, "loss": 0.1961, "step": 1539 }, { "epoch": 0.9659714599341384, "grad_norm": 0.9588800668716431, "learning_rate": 2.2069011283348044e-05, "loss": 0.2297, "step": 1540 }, { "epoch": 0.9665987141289007, "grad_norm": 0.9649481177330017, "learning_rate": 2.2048798467628926e-05, "loss": 0.2109, "step": 1541 }, { "epoch": 0.9672259683236631, "grad_norm": 1.0013532638549805, "learning_rate": 2.2028583536863022e-05, "loss": 0.2119, "step": 1542 }, { "epoch": 0.9678532225184255, "grad_norm": 1.0082463026046753, "learning_rate": 2.2008366511918915e-05, "loss": 0.2574, "step": 1543 }, { "epoch": 0.968480476713188, "grad_norm": 0.9393874406814575, "learning_rate": 2.1988147413667363e-05, "loss": 0.2669, "step": 1544 }, { "epoch": 0.9691077309079504, "grad_norm": 0.8781605362892151, "learning_rate": 2.196792626298124e-05, "loss": 0.2055, "step": 1545 }, { "epoch": 0.9697349851027128, "grad_norm": 0.9629719853401184, "learning_rate": 2.1947703080735564e-05, "loss": 0.19, "step": 1546 }, { "epoch": 0.9703622392974753, "grad_norm": 0.9251545667648315, "learning_rate": 2.1927477887807426e-05, "loss": 0.2356, "step": 1547 }, { "epoch": 0.9709894934922377, "grad_norm": 0.9196063876152039, "learning_rate": 2.190725070507601e-05, "loss": 0.2106, "step": 1548 }, { "epoch": 0.9716167476870001, "grad_norm": 0.9102229475975037, "learning_rate": 2.188702155342255e-05, "loss": 0.2273, "step": 1549 }, { "epoch": 0.9722440018817626, "grad_norm": 0.8885928392410278, "learning_rate": 2.186679045373031e-05, "loss": 0.2346, "step": 1550 }, { "epoch": 0.972871256076525, "grad_norm": 0.8653280138969421, "learning_rate": 2.1846557426884566e-05, "loss": 0.199, "step": 1551 }, { "epoch": 0.9734985102712874, "grad_norm": 0.9529988169670105, "learning_rate": 2.182632249377258e-05, "loss": 0.2339, "step": 1552 }, { "epoch": 0.9741257644660498, "grad_norm": 0.9183457493782043, "learning_rate": 2.18060856752836e-05, "loss": 0.2289, "step": 1553 }, { "epoch": 0.9747530186608123, "grad_norm": 0.7829636335372925, "learning_rate": 2.1785846992308784e-05, "loss": 0.2589, "step": 1554 }, { "epoch": 0.9753802728555747, "grad_norm": 0.893418550491333, "learning_rate": 2.1765606465741253e-05, "loss": 0.2363, "step": 1555 }, { "epoch": 0.9760075270503371, "grad_norm": 0.7944826483726501, "learning_rate": 2.1745364116476017e-05, "loss": 0.1994, "step": 1556 }, { "epoch": 0.9766347812450996, "grad_norm": 0.9511778354644775, "learning_rate": 2.1725119965409957e-05, "loss": 0.2518, "step": 1557 }, { "epoch": 0.977262035439862, "grad_norm": 0.6436446905136108, "learning_rate": 2.170487403344182e-05, "loss": 0.243, "step": 1558 }, { "epoch": 0.9778892896346244, "grad_norm": 0.8381584882736206, "learning_rate": 2.168462634147222e-05, "loss": 0.2275, "step": 1559 }, { "epoch": 0.9785165438293869, "grad_norm": 0.852128803730011, "learning_rate": 2.1664376910403524e-05, "loss": 0.2099, "step": 1560 }, { "epoch": 0.9791437980241493, "grad_norm": 0.9113161563873291, "learning_rate": 2.1644125761139954e-05, "loss": 0.2269, "step": 1561 }, { "epoch": 0.9797710522189117, "grad_norm": 0.8905954957008362, "learning_rate": 2.1623872914587486e-05, "loss": 0.2655, "step": 1562 }, { "epoch": 0.9803983064136741, "grad_norm": 0.9145949482917786, "learning_rate": 2.1603618391653835e-05, "loss": 0.1956, "step": 1563 }, { "epoch": 0.9810255606084366, "grad_norm": 0.813274621963501, "learning_rate": 2.158336221324847e-05, "loss": 0.2186, "step": 1564 }, { "epoch": 0.981652814803199, "grad_norm": 1.031652569770813, "learning_rate": 2.1563104400282554e-05, "loss": 0.2218, "step": 1565 }, { "epoch": 0.9822800689979614, "grad_norm": 0.8783631324768066, "learning_rate": 2.1542844973668927e-05, "loss": 0.2237, "step": 1566 }, { "epoch": 0.9829073231927239, "grad_norm": 0.9718444347381592, "learning_rate": 2.1522583954322128e-05, "loss": 0.2356, "step": 1567 }, { "epoch": 0.9835345773874863, "grad_norm": 0.7708685398101807, "learning_rate": 2.1502321363158315e-05, "loss": 0.1831, "step": 1568 }, { "epoch": 0.9841618315822487, "grad_norm": 0.8163751363754272, "learning_rate": 2.1482057221095265e-05, "loss": 0.1993, "step": 1569 }, { "epoch": 0.9847890857770112, "grad_norm": 0.8872279524803162, "learning_rate": 2.1461791549052374e-05, "loss": 0.239, "step": 1570 }, { "epoch": 0.9854163399717736, "grad_norm": 0.8377917408943176, "learning_rate": 2.1441524367950608e-05, "loss": 0.2046, "step": 1571 }, { "epoch": 0.986043594166536, "grad_norm": 0.8231236338615417, "learning_rate": 2.1421255698712492e-05, "loss": 0.2061, "step": 1572 }, { "epoch": 0.9866708483612984, "grad_norm": 0.7960736751556396, "learning_rate": 2.140098556226208e-05, "loss": 0.1915, "step": 1573 }, { "epoch": 0.9872981025560609, "grad_norm": 0.932736337184906, "learning_rate": 2.1380713979524956e-05, "loss": 0.2073, "step": 1574 }, { "epoch": 0.9879253567508233, "grad_norm": 0.8400558233261108, "learning_rate": 2.136044097142818e-05, "loss": 0.1567, "step": 1575 }, { "epoch": 0.9885526109455857, "grad_norm": 0.912680983543396, "learning_rate": 2.1340166558900294e-05, "loss": 0.2281, "step": 1576 }, { "epoch": 0.9891798651403482, "grad_norm": 0.9152987003326416, "learning_rate": 2.1319890762871294e-05, "loss": 0.1829, "step": 1577 }, { "epoch": 0.9898071193351106, "grad_norm": 1.1444475650787354, "learning_rate": 2.1299613604272594e-05, "loss": 0.2924, "step": 1578 }, { "epoch": 0.990434373529873, "grad_norm": 0.9289518594741821, "learning_rate": 2.1279335104037025e-05, "loss": 0.2289, "step": 1579 }, { "epoch": 0.9910616277246355, "grad_norm": 0.9576685428619385, "learning_rate": 2.1259055283098782e-05, "loss": 0.2195, "step": 1580 }, { "epoch": 0.9916888819193979, "grad_norm": 0.893746554851532, "learning_rate": 2.123877416239345e-05, "loss": 0.2407, "step": 1581 }, { "epoch": 0.9923161361141603, "grad_norm": 0.8622097969055176, "learning_rate": 2.1218491762857943e-05, "loss": 0.2014, "step": 1582 }, { "epoch": 0.9929433903089226, "grad_norm": 0.8856543898582458, "learning_rate": 2.119820810543049e-05, "loss": 0.1957, "step": 1583 }, { "epoch": 0.9935706445036852, "grad_norm": 1.0095499753952026, "learning_rate": 2.1177923211050628e-05, "loss": 0.2179, "step": 1584 }, { "epoch": 0.9941978986984475, "grad_norm": 0.9774998426437378, "learning_rate": 2.1157637100659173e-05, "loss": 0.194, "step": 1585 }, { "epoch": 0.9948251528932099, "grad_norm": 0.815413773059845, "learning_rate": 2.1137349795198182e-05, "loss": 0.202, "step": 1586 }, { "epoch": 0.9954524070879724, "grad_norm": 0.8930078148841858, "learning_rate": 2.111706131561096e-05, "loss": 0.2171, "step": 1587 }, { "epoch": 0.9960796612827348, "grad_norm": 1.0153467655181885, "learning_rate": 2.1096771682842016e-05, "loss": 0.2223, "step": 1588 }, { "epoch": 0.9967069154774972, "grad_norm": 0.9196330904960632, "learning_rate": 2.107648091783705e-05, "loss": 0.2069, "step": 1589 }, { "epoch": 0.9973341696722596, "grad_norm": 0.8963049650192261, "learning_rate": 2.1056189041542938e-05, "loss": 0.224, "step": 1590 }, { "epoch": 0.9979614238670221, "grad_norm": 0.8232291340827942, "learning_rate": 2.10358960749077e-05, "loss": 0.2092, "step": 1591 }, { "epoch": 0.9985886780617845, "grad_norm": 1.0254426002502441, "learning_rate": 2.101560203888047e-05, "loss": 0.2107, "step": 1592 }, { "epoch": 0.9992159322565469, "grad_norm": 0.9355930685997009, "learning_rate": 2.099530695441151e-05, "loss": 0.2155, "step": 1593 }, { "epoch": 0.9998431864513094, "grad_norm": 0.9351606965065002, "learning_rate": 2.0975010842452138e-05, "loss": 0.2061, "step": 1594 }, { "epoch": 1.0004704406460718, "grad_norm": 0.7437397837638855, "learning_rate": 2.095471372395475e-05, "loss": 0.1526, "step": 1595 }, { "epoch": 1.0010976948408343, "grad_norm": 0.9794983863830566, "learning_rate": 2.0934415619872773e-05, "loss": 0.1749, "step": 1596 }, { "epoch": 1.0017249490355966, "grad_norm": 0.8396321535110474, "learning_rate": 2.0914116551160663e-05, "loss": 0.1517, "step": 1597 }, { "epoch": 1.0023522032303591, "grad_norm": 0.7245031595230103, "learning_rate": 2.0893816538773853e-05, "loss": 0.1462, "step": 1598 }, { "epoch": 1.0029794574251216, "grad_norm": 0.8666381239891052, "learning_rate": 2.087351560366876e-05, "loss": 0.1772, "step": 1599 }, { "epoch": 1.003606711619884, "grad_norm": 0.8373695015907288, "learning_rate": 2.0853213766802763e-05, "loss": 0.1766, "step": 1600 }, { "epoch": 1.0042339658146464, "grad_norm": 0.811313271522522, "learning_rate": 2.0832911049134154e-05, "loss": 0.1562, "step": 1601 }, { "epoch": 1.004861220009409, "grad_norm": 0.6714876890182495, "learning_rate": 2.0812607471622152e-05, "loss": 0.1355, "step": 1602 }, { "epoch": 1.0054884742041712, "grad_norm": 0.7351215481758118, "learning_rate": 2.079230305522685e-05, "loss": 0.1478, "step": 1603 }, { "epoch": 1.0061157283989337, "grad_norm": 0.5395284295082092, "learning_rate": 2.077199782090921e-05, "loss": 0.1277, "step": 1604 }, { "epoch": 1.006742982593696, "grad_norm": 1.017524242401123, "learning_rate": 2.0751691789631037e-05, "loss": 0.176, "step": 1605 }, { "epoch": 1.0073702367884585, "grad_norm": 0.8757364749908447, "learning_rate": 2.0731384982354977e-05, "loss": 0.1736, "step": 1606 }, { "epoch": 1.007997490983221, "grad_norm": 1.1049363613128662, "learning_rate": 2.0711077420044448e-05, "loss": 0.2043, "step": 1607 }, { "epoch": 1.0086247451779833, "grad_norm": 0.8929004669189453, "learning_rate": 2.069076912366366e-05, "loss": 0.1678, "step": 1608 }, { "epoch": 1.0092519993727458, "grad_norm": 0.9069939255714417, "learning_rate": 2.0670460114177597e-05, "loss": 0.1718, "step": 1609 }, { "epoch": 1.0098792535675083, "grad_norm": 0.9183425903320312, "learning_rate": 2.0650150412551958e-05, "loss": 0.14, "step": 1610 }, { "epoch": 1.0105065077622706, "grad_norm": 0.84815913438797, "learning_rate": 2.062984003975315e-05, "loss": 0.1478, "step": 1611 }, { "epoch": 1.011133761957033, "grad_norm": 1.0737385749816895, "learning_rate": 2.0609529016748304e-05, "loss": 0.218, "step": 1612 }, { "epoch": 1.0117610161517956, "grad_norm": 0.8041346073150635, "learning_rate": 2.058921736450519e-05, "loss": 0.1647, "step": 1613 }, { "epoch": 1.0123882703465579, "grad_norm": 0.8681744933128357, "learning_rate": 2.056890510399225e-05, "loss": 0.1537, "step": 1614 }, { "epoch": 1.0130155245413204, "grad_norm": 0.7892470359802246, "learning_rate": 2.054859225617855e-05, "loss": 0.1578, "step": 1615 }, { "epoch": 1.0136427787360829, "grad_norm": 0.8767917156219482, "learning_rate": 2.052827884203374e-05, "loss": 0.1697, "step": 1616 }, { "epoch": 1.0142700329308452, "grad_norm": 1.0207929611206055, "learning_rate": 2.0507964882528092e-05, "loss": 0.1829, "step": 1617 }, { "epoch": 1.0148972871256077, "grad_norm": 0.777074933052063, "learning_rate": 2.0487650398632408e-05, "loss": 0.1492, "step": 1618 }, { "epoch": 1.0155245413203702, "grad_norm": 0.8585703372955322, "learning_rate": 2.0467335411318056e-05, "loss": 0.1783, "step": 1619 }, { "epoch": 1.0161517955151325, "grad_norm": 0.7692691683769226, "learning_rate": 2.0447019941556902e-05, "loss": 0.1432, "step": 1620 }, { "epoch": 1.016779049709895, "grad_norm": 0.8173703551292419, "learning_rate": 2.0426704010321327e-05, "loss": 0.1565, "step": 1621 }, { "epoch": 1.0174063039046572, "grad_norm": 0.8283787965774536, "learning_rate": 2.040638763858418e-05, "loss": 0.1531, "step": 1622 }, { "epoch": 1.0180335580994198, "grad_norm": 0.9298859238624573, "learning_rate": 2.038607084731877e-05, "loss": 0.1648, "step": 1623 }, { "epoch": 1.0186608122941823, "grad_norm": 0.8722273707389832, "learning_rate": 2.0365753657498838e-05, "loss": 0.1807, "step": 1624 }, { "epoch": 1.0192880664889445, "grad_norm": 0.7768132090568542, "learning_rate": 2.0345436090098517e-05, "loss": 0.1519, "step": 1625 }, { "epoch": 1.019915320683707, "grad_norm": 0.8653201460838318, "learning_rate": 2.032511816609237e-05, "loss": 0.1723, "step": 1626 }, { "epoch": 1.0205425748784696, "grad_norm": 0.8122580051422119, "learning_rate": 2.030479990645529e-05, "loss": 0.1677, "step": 1627 }, { "epoch": 1.0211698290732318, "grad_norm": 0.8771058917045593, "learning_rate": 2.0284481332162534e-05, "loss": 0.1628, "step": 1628 }, { "epoch": 1.0217970832679943, "grad_norm": 0.7717471122741699, "learning_rate": 2.0264162464189692e-05, "loss": 0.154, "step": 1629 }, { "epoch": 1.0224243374627568, "grad_norm": 0.8961104154586792, "learning_rate": 2.0243843323512632e-05, "loss": 0.1597, "step": 1630 }, { "epoch": 1.0230515916575191, "grad_norm": 0.8729629516601562, "learning_rate": 2.0223523931107523e-05, "loss": 0.1504, "step": 1631 }, { "epoch": 1.0236788458522816, "grad_norm": 0.9216261506080627, "learning_rate": 2.0203204307950796e-05, "loss": 0.1669, "step": 1632 }, { "epoch": 1.0243061000470441, "grad_norm": 0.965466320514679, "learning_rate": 2.0182884475019108e-05, "loss": 0.1608, "step": 1633 }, { "epoch": 1.0249333542418064, "grad_norm": 0.8710114359855652, "learning_rate": 2.0162564453289333e-05, "loss": 0.1709, "step": 1634 }, { "epoch": 1.025560608436569, "grad_norm": 0.8559769988059998, "learning_rate": 2.014224426373856e-05, "loss": 0.161, "step": 1635 }, { "epoch": 1.0261878626313314, "grad_norm": 0.8614868521690369, "learning_rate": 2.0121923927344018e-05, "loss": 0.1535, "step": 1636 }, { "epoch": 1.0268151168260937, "grad_norm": 0.7848617434501648, "learning_rate": 2.0101603465083114e-05, "loss": 0.1281, "step": 1637 }, { "epoch": 1.0274423710208562, "grad_norm": 0.8579975366592407, "learning_rate": 2.008128289793338e-05, "loss": 0.151, "step": 1638 }, { "epoch": 1.0280696252156187, "grad_norm": 1.0866503715515137, "learning_rate": 2.0060962246872448e-05, "loss": 0.177, "step": 1639 }, { "epoch": 1.028696879410381, "grad_norm": 0.9099308252334595, "learning_rate": 2.004064153287804e-05, "loss": 0.1956, "step": 1640 }, { "epoch": 1.0293241336051435, "grad_norm": 0.8447299003601074, "learning_rate": 2.0020320776927947e-05, "loss": 0.1667, "step": 1641 }, { "epoch": 1.0299513877999058, "grad_norm": 0.9115908741950989, "learning_rate": 2e-05, "loss": 0.1908, "step": 1642 }, { "epoch": 1.0305786419946683, "grad_norm": 0.9186663627624512, "learning_rate": 1.9979679223072056e-05, "loss": 0.1649, "step": 1643 }, { "epoch": 1.0312058961894308, "grad_norm": 0.9531528353691101, "learning_rate": 1.9959358467121966e-05, "loss": 0.1584, "step": 1644 }, { "epoch": 1.031833150384193, "grad_norm": 0.8582137227058411, "learning_rate": 1.9939037753127556e-05, "loss": 0.1472, "step": 1645 }, { "epoch": 1.0324604045789556, "grad_norm": 0.8075041770935059, "learning_rate": 1.9918717102066622e-05, "loss": 0.162, "step": 1646 }, { "epoch": 1.033087658773718, "grad_norm": 0.9009033441543579, "learning_rate": 1.989839653491689e-05, "loss": 0.1703, "step": 1647 }, { "epoch": 1.0337149129684804, "grad_norm": 0.826914370059967, "learning_rate": 1.9878076072655985e-05, "loss": 0.1507, "step": 1648 }, { "epoch": 1.034342167163243, "grad_norm": 0.8428979516029358, "learning_rate": 1.985775573626145e-05, "loss": 0.1562, "step": 1649 }, { "epoch": 1.0349694213580054, "grad_norm": 0.7274463772773743, "learning_rate": 1.983743554671067e-05, "loss": 0.1406, "step": 1650 }, { "epoch": 1.0355966755527677, "grad_norm": 0.8123782277107239, "learning_rate": 1.9817115524980895e-05, "loss": 0.1669, "step": 1651 }, { "epoch": 1.0362239297475302, "grad_norm": 0.8698951601982117, "learning_rate": 1.9796795692049207e-05, "loss": 0.1598, "step": 1652 }, { "epoch": 1.0368511839422927, "grad_norm": 0.7227709293365479, "learning_rate": 1.977647606889248e-05, "loss": 0.1469, "step": 1653 }, { "epoch": 1.037478438137055, "grad_norm": 0.7994180917739868, "learning_rate": 1.975615667648737e-05, "loss": 0.1385, "step": 1654 }, { "epoch": 1.0381056923318175, "grad_norm": 0.7999747395515442, "learning_rate": 1.9735837535810318e-05, "loss": 0.1601, "step": 1655 }, { "epoch": 1.03873294652658, "grad_norm": 0.8406004905700684, "learning_rate": 1.9715518667837473e-05, "loss": 0.1591, "step": 1656 }, { "epoch": 1.0393602007213423, "grad_norm": 0.8649516105651855, "learning_rate": 1.969520009354472e-05, "loss": 0.156, "step": 1657 }, { "epoch": 1.0399874549161048, "grad_norm": 0.8329012393951416, "learning_rate": 1.9674881833907638e-05, "loss": 0.1434, "step": 1658 }, { "epoch": 1.0406147091108673, "grad_norm": 0.8117157816886902, "learning_rate": 1.965456390990149e-05, "loss": 0.157, "step": 1659 }, { "epoch": 1.0412419633056296, "grad_norm": 0.8369453549385071, "learning_rate": 1.9634246342501172e-05, "loss": 0.1673, "step": 1660 }, { "epoch": 1.041869217500392, "grad_norm": 0.9822908043861389, "learning_rate": 1.9613929152681234e-05, "loss": 0.18, "step": 1661 }, { "epoch": 1.0424964716951544, "grad_norm": 1.0761197805404663, "learning_rate": 1.9593612361415824e-05, "loss": 0.1992, "step": 1662 }, { "epoch": 1.0431237258899169, "grad_norm": 0.8977817893028259, "learning_rate": 1.9573295989678677e-05, "loss": 0.1493, "step": 1663 }, { "epoch": 1.0437509800846794, "grad_norm": 0.7583467364311218, "learning_rate": 1.95529800584431e-05, "loss": 0.1324, "step": 1664 }, { "epoch": 1.0443782342794417, "grad_norm": 0.8257169127464294, "learning_rate": 1.953266458868195e-05, "loss": 0.1681, "step": 1665 }, { "epoch": 1.0450054884742042, "grad_norm": 0.8764519691467285, "learning_rate": 1.9512349601367596e-05, "loss": 0.1469, "step": 1666 }, { "epoch": 1.0456327426689667, "grad_norm": 0.8687977194786072, "learning_rate": 1.9492035117471918e-05, "loss": 0.1456, "step": 1667 }, { "epoch": 1.046259996863729, "grad_norm": 0.84560227394104, "learning_rate": 1.9471721157966267e-05, "loss": 0.1521, "step": 1668 }, { "epoch": 1.0468872510584915, "grad_norm": 0.81574946641922, "learning_rate": 1.9451407743821458e-05, "loss": 0.1692, "step": 1669 }, { "epoch": 1.047514505253254, "grad_norm": 0.7992091178894043, "learning_rate": 1.9431094896007757e-05, "loss": 0.1605, "step": 1670 }, { "epoch": 1.0481417594480162, "grad_norm": 0.9301627278327942, "learning_rate": 1.9410782635494815e-05, "loss": 0.1776, "step": 1671 }, { "epoch": 1.0487690136427787, "grad_norm": 0.8068720698356628, "learning_rate": 1.93904709832517e-05, "loss": 0.1646, "step": 1672 }, { "epoch": 1.0493962678375413, "grad_norm": 0.775219202041626, "learning_rate": 1.9370159960246853e-05, "loss": 0.1407, "step": 1673 }, { "epoch": 1.0500235220323035, "grad_norm": 0.88523268699646, "learning_rate": 1.934984958744805e-05, "loss": 0.1696, "step": 1674 }, { "epoch": 1.050650776227066, "grad_norm": 0.8547267913818359, "learning_rate": 1.9329539885822403e-05, "loss": 0.1464, "step": 1675 }, { "epoch": 1.0512780304218285, "grad_norm": 0.8653497695922852, "learning_rate": 1.9309230876336337e-05, "loss": 0.1675, "step": 1676 }, { "epoch": 1.0519052846165908, "grad_norm": 1.0238136053085327, "learning_rate": 1.9288922579955552e-05, "loss": 0.1653, "step": 1677 }, { "epoch": 1.0525325388113533, "grad_norm": 0.7988049387931824, "learning_rate": 1.9268615017645023e-05, "loss": 0.1587, "step": 1678 }, { "epoch": 1.0531597930061158, "grad_norm": 0.8365591764450073, "learning_rate": 1.9248308210368962e-05, "loss": 0.1705, "step": 1679 }, { "epoch": 1.0537870472008781, "grad_norm": 0.8894667029380798, "learning_rate": 1.9228002179090793e-05, "loss": 0.1811, "step": 1680 }, { "epoch": 1.0544143013956406, "grad_norm": 0.7917067408561707, "learning_rate": 1.920769694477316e-05, "loss": 0.1506, "step": 1681 }, { "epoch": 1.055041555590403, "grad_norm": 0.8458444476127625, "learning_rate": 1.9187392528377855e-05, "loss": 0.1511, "step": 1682 }, { "epoch": 1.0556688097851654, "grad_norm": 0.870349109172821, "learning_rate": 1.9167088950865853e-05, "loss": 0.1583, "step": 1683 }, { "epoch": 1.056296063979928, "grad_norm": 0.7882643342018127, "learning_rate": 1.9146786233197247e-05, "loss": 0.1429, "step": 1684 }, { "epoch": 1.0569233181746902, "grad_norm": 0.7835047245025635, "learning_rate": 1.912648439633125e-05, "loss": 0.1432, "step": 1685 }, { "epoch": 1.0575505723694527, "grad_norm": 0.8960422873497009, "learning_rate": 1.910618346122616e-05, "loss": 0.179, "step": 1686 }, { "epoch": 1.0581778265642152, "grad_norm": 0.8102217316627502, "learning_rate": 1.9085883448839347e-05, "loss": 0.157, "step": 1687 }, { "epoch": 1.0588050807589775, "grad_norm": 0.8699423670768738, "learning_rate": 1.9065584380127237e-05, "loss": 0.1655, "step": 1688 }, { "epoch": 1.05943233495374, "grad_norm": 0.8518077731132507, "learning_rate": 1.904528627604526e-05, "loss": 0.1653, "step": 1689 }, { "epoch": 1.0600595891485025, "grad_norm": 0.7880319952964783, "learning_rate": 1.902498915754787e-05, "loss": 0.1574, "step": 1690 }, { "epoch": 1.0606868433432648, "grad_norm": 0.8024574518203735, "learning_rate": 1.90046930455885e-05, "loss": 0.1671, "step": 1691 }, { "epoch": 1.0613140975380273, "grad_norm": 0.7825784683227539, "learning_rate": 1.8984397961119533e-05, "loss": 0.1459, "step": 1692 }, { "epoch": 1.0619413517327898, "grad_norm": 0.7848218679428101, "learning_rate": 1.8964103925092304e-05, "loss": 0.1638, "step": 1693 }, { "epoch": 1.062568605927552, "grad_norm": 0.7753551602363586, "learning_rate": 1.8943810958457066e-05, "loss": 0.1304, "step": 1694 }, { "epoch": 1.0631958601223146, "grad_norm": 0.7678571343421936, "learning_rate": 1.8923519082162956e-05, "loss": 0.1498, "step": 1695 }, { "epoch": 1.0638231143170769, "grad_norm": 0.8414244055747986, "learning_rate": 1.890322831715799e-05, "loss": 0.1661, "step": 1696 }, { "epoch": 1.0644503685118394, "grad_norm": 0.8053593635559082, "learning_rate": 1.888293868438905e-05, "loss": 0.1605, "step": 1697 }, { "epoch": 1.065077622706602, "grad_norm": 0.5466974377632141, "learning_rate": 1.886265020480182e-05, "loss": 0.1214, "step": 1698 }, { "epoch": 1.0657048769013642, "grad_norm": 0.8131811618804932, "learning_rate": 1.8842362899340834e-05, "loss": 0.1454, "step": 1699 }, { "epoch": 1.0663321310961267, "grad_norm": 1.0224452018737793, "learning_rate": 1.8822076788949376e-05, "loss": 0.1797, "step": 1700 }, { "epoch": 1.0669593852908892, "grad_norm": 0.7746246457099915, "learning_rate": 1.8801791894569513e-05, "loss": 0.1576, "step": 1701 }, { "epoch": 1.0675866394856515, "grad_norm": 0.8536249995231628, "learning_rate": 1.8781508237142064e-05, "loss": 0.1587, "step": 1702 }, { "epoch": 1.068213893680414, "grad_norm": 0.9272940158843994, "learning_rate": 1.8761225837606552e-05, "loss": 0.1656, "step": 1703 }, { "epoch": 1.0688411478751765, "grad_norm": 0.9104244709014893, "learning_rate": 1.8740944716901218e-05, "loss": 0.1744, "step": 1704 }, { "epoch": 1.0694684020699388, "grad_norm": 0.8467017412185669, "learning_rate": 1.872066489596298e-05, "loss": 0.1486, "step": 1705 }, { "epoch": 1.0700956562647013, "grad_norm": 0.768990695476532, "learning_rate": 1.8700386395727403e-05, "loss": 0.1367, "step": 1706 }, { "epoch": 1.0707229104594638, "grad_norm": 0.9271062016487122, "learning_rate": 1.8680109237128712e-05, "loss": 0.1706, "step": 1707 }, { "epoch": 1.071350164654226, "grad_norm": 0.791062593460083, "learning_rate": 1.865983344109971e-05, "loss": 0.1624, "step": 1708 }, { "epoch": 1.0719774188489886, "grad_norm": 0.8225182294845581, "learning_rate": 1.8639559028571832e-05, "loss": 0.1357, "step": 1709 }, { "epoch": 1.072604673043751, "grad_norm": 0.8716280460357666, "learning_rate": 1.8619286020475054e-05, "loss": 0.1525, "step": 1710 }, { "epoch": 1.0732319272385134, "grad_norm": 1.0095592737197876, "learning_rate": 1.8599014437737924e-05, "loss": 0.1718, "step": 1711 }, { "epoch": 1.0738591814332759, "grad_norm": 0.8246644139289856, "learning_rate": 1.8578744301287518e-05, "loss": 0.156, "step": 1712 }, { "epoch": 1.0744864356280384, "grad_norm": 0.939094603061676, "learning_rate": 1.85584756320494e-05, "loss": 0.1817, "step": 1713 }, { "epoch": 1.0751136898228006, "grad_norm": 0.8982072472572327, "learning_rate": 1.8538208450947632e-05, "loss": 0.1633, "step": 1714 }, { "epoch": 1.0757409440175631, "grad_norm": 1.0006636381149292, "learning_rate": 1.851794277890474e-05, "loss": 0.157, "step": 1715 }, { "epoch": 1.0763681982123257, "grad_norm": 0.7728843688964844, "learning_rate": 1.849767863684169e-05, "loss": 0.1486, "step": 1716 }, { "epoch": 1.076995452407088, "grad_norm": 0.7604233622550964, "learning_rate": 1.8477416045677875e-05, "loss": 0.1469, "step": 1717 }, { "epoch": 1.0776227066018504, "grad_norm": 0.8282923698425293, "learning_rate": 1.8457155026331077e-05, "loss": 0.1576, "step": 1718 }, { "epoch": 1.0782499607966127, "grad_norm": 0.7721243500709534, "learning_rate": 1.8436895599717456e-05, "loss": 0.1365, "step": 1719 }, { "epoch": 1.0788772149913752, "grad_norm": 1.1395034790039062, "learning_rate": 1.8416637786751537e-05, "loss": 0.1945, "step": 1720 }, { "epoch": 1.0795044691861377, "grad_norm": 0.7299290895462036, "learning_rate": 1.8396381608346168e-05, "loss": 0.1142, "step": 1721 }, { "epoch": 1.0801317233809, "grad_norm": 0.8539410829544067, "learning_rate": 1.837612708541252e-05, "loss": 0.1388, "step": 1722 }, { "epoch": 1.0807589775756625, "grad_norm": 0.9020755887031555, "learning_rate": 1.8355874238860053e-05, "loss": 0.1621, "step": 1723 }, { "epoch": 1.081386231770425, "grad_norm": 0.7987878322601318, "learning_rate": 1.8335623089596483e-05, "loss": 0.1441, "step": 1724 }, { "epoch": 1.0820134859651873, "grad_norm": 0.778041660785675, "learning_rate": 1.8315373658527788e-05, "loss": 0.1513, "step": 1725 }, { "epoch": 1.0826407401599498, "grad_norm": 0.9035307168960571, "learning_rate": 1.8295125966558175e-05, "loss": 0.1855, "step": 1726 }, { "epoch": 1.0832679943547123, "grad_norm": 0.9106894135475159, "learning_rate": 1.8274880034590043e-05, "loss": 0.1713, "step": 1727 }, { "epoch": 1.0838952485494746, "grad_norm": 0.8942307829856873, "learning_rate": 1.8254635883523986e-05, "loss": 0.1654, "step": 1728 }, { "epoch": 1.0845225027442371, "grad_norm": 0.868350625038147, "learning_rate": 1.8234393534258744e-05, "loss": 0.1444, "step": 1729 }, { "epoch": 1.0851497569389996, "grad_norm": 0.7952873110771179, "learning_rate": 1.8214153007691216e-05, "loss": 0.1429, "step": 1730 }, { "epoch": 1.085777011133762, "grad_norm": 0.9205557107925415, "learning_rate": 1.819391432471641e-05, "loss": 0.1652, "step": 1731 }, { "epoch": 1.0864042653285244, "grad_norm": 0.8573682904243469, "learning_rate": 1.817367750622742e-05, "loss": 0.1892, "step": 1732 }, { "epoch": 1.087031519523287, "grad_norm": 0.8403246402740479, "learning_rate": 1.8153442573115445e-05, "loss": 0.1425, "step": 1733 }, { "epoch": 1.0876587737180492, "grad_norm": 0.9676584601402283, "learning_rate": 1.81332095462697e-05, "loss": 0.1943, "step": 1734 }, { "epoch": 1.0882860279128117, "grad_norm": 0.7942099571228027, "learning_rate": 1.811297844657746e-05, "loss": 0.153, "step": 1735 }, { "epoch": 1.088913282107574, "grad_norm": 0.8771933913230896, "learning_rate": 1.8092749294924e-05, "loss": 0.1371, "step": 1736 }, { "epoch": 1.0895405363023365, "grad_norm": 0.86127769947052, "learning_rate": 1.807252211219258e-05, "loss": 0.1902, "step": 1737 }, { "epoch": 1.090167790497099, "grad_norm": 0.957034170627594, "learning_rate": 1.8052296919264447e-05, "loss": 0.1623, "step": 1738 }, { "epoch": 1.0907950446918613, "grad_norm": 0.8910097479820251, "learning_rate": 1.8032073737018766e-05, "loss": 0.1546, "step": 1739 }, { "epoch": 1.0914222988866238, "grad_norm": 0.9799900650978088, "learning_rate": 1.8011852586332643e-05, "loss": 0.1501, "step": 1740 }, { "epoch": 1.0920495530813863, "grad_norm": 0.8283793330192566, "learning_rate": 1.7991633488081092e-05, "loss": 0.1539, "step": 1741 }, { "epoch": 1.0926768072761486, "grad_norm": 0.7672228813171387, "learning_rate": 1.7971416463136984e-05, "loss": 0.1345, "step": 1742 }, { "epoch": 1.093304061470911, "grad_norm": 0.8319811224937439, "learning_rate": 1.7951201532371078e-05, "loss": 0.1458, "step": 1743 }, { "epoch": 1.0939313156656736, "grad_norm": 0.7316347360610962, "learning_rate": 1.7930988716651963e-05, "loss": 0.1445, "step": 1744 }, { "epoch": 1.0945585698604359, "grad_norm": 0.9188635349273682, "learning_rate": 1.7910778036846026e-05, "loss": 0.1725, "step": 1745 }, { "epoch": 1.0951858240551984, "grad_norm": 0.8345009684562683, "learning_rate": 1.7890569513817473e-05, "loss": 0.144, "step": 1746 }, { "epoch": 1.0958130782499609, "grad_norm": 0.8347561955451965, "learning_rate": 1.787036316842827e-05, "loss": 0.1568, "step": 1747 }, { "epoch": 1.0964403324447232, "grad_norm": 0.7968394160270691, "learning_rate": 1.7850159021538135e-05, "loss": 0.1358, "step": 1748 }, { "epoch": 1.0970675866394857, "grad_norm": 0.7191606163978577, "learning_rate": 1.7829957094004532e-05, "loss": 0.1455, "step": 1749 }, { "epoch": 1.0976948408342482, "grad_norm": 0.7700591087341309, "learning_rate": 1.780975740668261e-05, "loss": 0.1414, "step": 1750 }, { "epoch": 1.0983220950290105, "grad_norm": 0.8214758038520813, "learning_rate": 1.778955998042522e-05, "loss": 0.1571, "step": 1751 }, { "epoch": 1.098949349223773, "grad_norm": 0.8999004364013672, "learning_rate": 1.7769364836082886e-05, "loss": 0.1753, "step": 1752 }, { "epoch": 1.0995766034185355, "grad_norm": 0.8949674367904663, "learning_rate": 1.7749171994503752e-05, "loss": 0.1605, "step": 1753 }, { "epoch": 1.1002038576132978, "grad_norm": 0.8810386061668396, "learning_rate": 1.7728981476533608e-05, "loss": 0.1645, "step": 1754 }, { "epoch": 1.1008311118080603, "grad_norm": 0.8616423606872559, "learning_rate": 1.7708793303015835e-05, "loss": 0.1594, "step": 1755 }, { "epoch": 1.1014583660028225, "grad_norm": 0.8571405410766602, "learning_rate": 1.7688607494791394e-05, "loss": 0.1804, "step": 1756 }, { "epoch": 1.102085620197585, "grad_norm": 0.7818455696105957, "learning_rate": 1.7668424072698806e-05, "loss": 0.1613, "step": 1757 }, { "epoch": 1.1027128743923476, "grad_norm": 0.8706740140914917, "learning_rate": 1.7648243057574136e-05, "loss": 0.1821, "step": 1758 }, { "epoch": 1.1033401285871098, "grad_norm": 0.7925953269004822, "learning_rate": 1.762806447025094e-05, "loss": 0.1467, "step": 1759 }, { "epoch": 1.1039673827818723, "grad_norm": 0.9717243909835815, "learning_rate": 1.760788833156029e-05, "loss": 0.1661, "step": 1760 }, { "epoch": 1.1045946369766348, "grad_norm": 1.0377686023712158, "learning_rate": 1.758771466233073e-05, "loss": 0.1895, "step": 1761 }, { "epoch": 1.1052218911713971, "grad_norm": 0.876140832901001, "learning_rate": 1.756754348338825e-05, "loss": 0.1473, "step": 1762 }, { "epoch": 1.1058491453661596, "grad_norm": 0.6880743503570557, "learning_rate": 1.7547374815556257e-05, "loss": 0.119, "step": 1763 }, { "epoch": 1.1064763995609221, "grad_norm": 0.9143416881561279, "learning_rate": 1.7527208679655587e-05, "loss": 0.1698, "step": 1764 }, { "epoch": 1.1071036537556844, "grad_norm": 0.8823429346084595, "learning_rate": 1.7507045096504446e-05, "loss": 0.1396, "step": 1765 }, { "epoch": 1.107730907950447, "grad_norm": 0.9152858853340149, "learning_rate": 1.7486884086918414e-05, "loss": 0.1541, "step": 1766 }, { "epoch": 1.1083581621452094, "grad_norm": 0.7654640078544617, "learning_rate": 1.7466725671710417e-05, "loss": 0.1334, "step": 1767 }, { "epoch": 1.1089854163399717, "grad_norm": 0.7074160575866699, "learning_rate": 1.744656987169068e-05, "loss": 0.1412, "step": 1768 }, { "epoch": 1.1096126705347342, "grad_norm": 0.8951393961906433, "learning_rate": 1.742641670766676e-05, "loss": 0.1636, "step": 1769 }, { "epoch": 1.1102399247294967, "grad_norm": 0.8777140378952026, "learning_rate": 1.7406266200443472e-05, "loss": 0.1382, "step": 1770 }, { "epoch": 1.110867178924259, "grad_norm": 0.8367090821266174, "learning_rate": 1.7386118370822895e-05, "loss": 0.182, "step": 1771 }, { "epoch": 1.1114944331190215, "grad_norm": 0.751369833946228, "learning_rate": 1.7365973239604342e-05, "loss": 0.146, "step": 1772 }, { "epoch": 1.1121216873137838, "grad_norm": 0.9562219977378845, "learning_rate": 1.7345830827584344e-05, "loss": 0.1548, "step": 1773 }, { "epoch": 1.1127489415085463, "grad_norm": 0.804206907749176, "learning_rate": 1.7325691155556613e-05, "loss": 0.1645, "step": 1774 }, { "epoch": 1.1133761957033088, "grad_norm": 0.8179658055305481, "learning_rate": 1.7305554244312054e-05, "loss": 0.1617, "step": 1775 }, { "epoch": 1.114003449898071, "grad_norm": 0.8444033265113831, "learning_rate": 1.7285420114638703e-05, "loss": 0.158, "step": 1776 }, { "epoch": 1.1146307040928336, "grad_norm": 0.8494364023208618, "learning_rate": 1.7265288787321728e-05, "loss": 0.1606, "step": 1777 }, { "epoch": 1.115257958287596, "grad_norm": 0.8242295980453491, "learning_rate": 1.7245160283143415e-05, "loss": 0.1507, "step": 1778 }, { "epoch": 1.1158852124823584, "grad_norm": 0.860016942024231, "learning_rate": 1.7225034622883118e-05, "loss": 0.1604, "step": 1779 }, { "epoch": 1.116512466677121, "grad_norm": 0.7402696013450623, "learning_rate": 1.7204911827317268e-05, "loss": 0.1454, "step": 1780 }, { "epoch": 1.1171397208718834, "grad_norm": 0.804934024810791, "learning_rate": 1.7184791917219345e-05, "loss": 0.1468, "step": 1781 }, { "epoch": 1.1177669750666457, "grad_norm": 0.8034117221832275, "learning_rate": 1.7164674913359822e-05, "loss": 0.1401, "step": 1782 }, { "epoch": 1.1183942292614082, "grad_norm": 0.7933990955352783, "learning_rate": 1.7144560836506204e-05, "loss": 0.1571, "step": 1783 }, { "epoch": 1.1190214834561707, "grad_norm": 0.8252025246620178, "learning_rate": 1.7124449707422963e-05, "loss": 0.1603, "step": 1784 }, { "epoch": 1.119648737650933, "grad_norm": 0.8010521531105042, "learning_rate": 1.7104341546871515e-05, "loss": 0.1708, "step": 1785 }, { "epoch": 1.1202759918456955, "grad_norm": 0.7285284996032715, "learning_rate": 1.7084236375610225e-05, "loss": 0.1218, "step": 1786 }, { "epoch": 1.120903246040458, "grad_norm": 0.7609200477600098, "learning_rate": 1.7064134214394367e-05, "loss": 0.1528, "step": 1787 }, { "epoch": 1.1215305002352203, "grad_norm": 0.9738415479660034, "learning_rate": 1.704403508397612e-05, "loss": 0.1575, "step": 1788 }, { "epoch": 1.1221577544299828, "grad_norm": 0.7736012935638428, "learning_rate": 1.7023939005104513e-05, "loss": 0.1312, "step": 1789 }, { "epoch": 1.1227850086247453, "grad_norm": 0.8661496043205261, "learning_rate": 1.7003845998525438e-05, "loss": 0.145, "step": 1790 }, { "epoch": 1.1234122628195076, "grad_norm": 0.9104440808296204, "learning_rate": 1.698375608498162e-05, "loss": 0.1496, "step": 1791 }, { "epoch": 1.12403951701427, "grad_norm": 0.9806966781616211, "learning_rate": 1.6963669285212575e-05, "loss": 0.1438, "step": 1792 }, { "epoch": 1.1246667712090324, "grad_norm": 0.8026207089424133, "learning_rate": 1.6943585619954623e-05, "loss": 0.1556, "step": 1793 }, { "epoch": 1.1252940254037949, "grad_norm": 0.7851147055625916, "learning_rate": 1.6923505109940823e-05, "loss": 0.1505, "step": 1794 }, { "epoch": 1.1259212795985574, "grad_norm": 0.7137161493301392, "learning_rate": 1.6903427775901006e-05, "loss": 0.126, "step": 1795 }, { "epoch": 1.1265485337933197, "grad_norm": 0.8256273865699768, "learning_rate": 1.6883353638561713e-05, "loss": 0.1476, "step": 1796 }, { "epoch": 1.1271757879880822, "grad_norm": 1.027342438697815, "learning_rate": 1.686328271864617e-05, "loss": 0.1624, "step": 1797 }, { "epoch": 1.1278030421828447, "grad_norm": 0.957568883895874, "learning_rate": 1.68432150368743e-05, "loss": 0.1767, "step": 1798 }, { "epoch": 1.128430296377607, "grad_norm": 0.9146484732627869, "learning_rate": 1.6823150613962686e-05, "loss": 0.1623, "step": 1799 }, { "epoch": 1.1290575505723695, "grad_norm": 0.9292408227920532, "learning_rate": 1.6803089470624524e-05, "loss": 0.1562, "step": 1800 }, { "epoch": 1.129684804767132, "grad_norm": 0.8380172848701477, "learning_rate": 1.6783031627569645e-05, "loss": 0.163, "step": 1801 }, { "epoch": 1.1303120589618942, "grad_norm": 0.798717737197876, "learning_rate": 1.676297710550447e-05, "loss": 0.1305, "step": 1802 }, { "epoch": 1.1309393131566567, "grad_norm": 0.8696675896644592, "learning_rate": 1.6742925925131984e-05, "loss": 0.1803, "step": 1803 }, { "epoch": 1.1315665673514193, "grad_norm": 0.8462779521942139, "learning_rate": 1.6722878107151723e-05, "loss": 0.18, "step": 1804 }, { "epoch": 1.1321938215461815, "grad_norm": 0.8923316597938538, "learning_rate": 1.6702833672259763e-05, "loss": 0.1615, "step": 1805 }, { "epoch": 1.132821075740944, "grad_norm": 0.9322481751441956, "learning_rate": 1.6682792641148674e-05, "loss": 0.1612, "step": 1806 }, { "epoch": 1.1334483299357063, "grad_norm": 0.8081589937210083, "learning_rate": 1.6662755034507525e-05, "loss": 0.1488, "step": 1807 }, { "epoch": 1.1340755841304688, "grad_norm": 0.902973473072052, "learning_rate": 1.6642720873021833e-05, "loss": 0.1581, "step": 1808 }, { "epoch": 1.1347028383252313, "grad_norm": 0.791506826877594, "learning_rate": 1.662269017737357e-05, "loss": 0.1386, "step": 1809 }, { "epoch": 1.1353300925199936, "grad_norm": 0.6856691241264343, "learning_rate": 1.6602662968241145e-05, "loss": 0.1253, "step": 1810 }, { "epoch": 1.1359573467147561, "grad_norm": 0.7945860624313354, "learning_rate": 1.6582639266299328e-05, "loss": 0.1403, "step": 1811 }, { "epoch": 1.1365846009095186, "grad_norm": 0.8668363690376282, "learning_rate": 1.6562619092219298e-05, "loss": 0.1491, "step": 1812 }, { "epoch": 1.137211855104281, "grad_norm": 0.8494446277618408, "learning_rate": 1.6542602466668586e-05, "loss": 0.1414, "step": 1813 }, { "epoch": 1.1378391092990434, "grad_norm": 0.9679626822471619, "learning_rate": 1.6522589410311067e-05, "loss": 0.1808, "step": 1814 }, { "epoch": 1.138466363493806, "grad_norm": 0.8872665762901306, "learning_rate": 1.6502579943806915e-05, "loss": 0.1499, "step": 1815 }, { "epoch": 1.1390936176885682, "grad_norm": 0.7795957326889038, "learning_rate": 1.648257408781261e-05, "loss": 0.1473, "step": 1816 }, { "epoch": 1.1397208718833307, "grad_norm": 0.8384416103363037, "learning_rate": 1.6462571862980902e-05, "loss": 0.1642, "step": 1817 }, { "epoch": 1.1403481260780932, "grad_norm": 0.8306025862693787, "learning_rate": 1.6442573289960788e-05, "loss": 0.1547, "step": 1818 }, { "epoch": 1.1409753802728555, "grad_norm": 0.8154910206794739, "learning_rate": 1.64225783893975e-05, "loss": 0.1569, "step": 1819 }, { "epoch": 1.141602634467618, "grad_norm": 0.9459575414657593, "learning_rate": 1.6402587181932488e-05, "loss": 0.1817, "step": 1820 }, { "epoch": 1.1422298886623805, "grad_norm": 0.7928250432014465, "learning_rate": 1.6382599688203362e-05, "loss": 0.1214, "step": 1821 }, { "epoch": 1.1428571428571428, "grad_norm": 0.8686726689338684, "learning_rate": 1.636261592884393e-05, "loss": 0.1406, "step": 1822 }, { "epoch": 1.1434843970519053, "grad_norm": 0.849258303642273, "learning_rate": 1.6342635924484123e-05, "loss": 0.1286, "step": 1823 }, { "epoch": 1.1441116512466678, "grad_norm": 0.8623886704444885, "learning_rate": 1.6322659695750003e-05, "loss": 0.1682, "step": 1824 }, { "epoch": 1.14473890544143, "grad_norm": 0.8463473916053772, "learning_rate": 1.6302687263263735e-05, "loss": 0.1682, "step": 1825 }, { "epoch": 1.1453661596361926, "grad_norm": 0.779043436050415, "learning_rate": 1.628271864764357e-05, "loss": 0.1671, "step": 1826 }, { "epoch": 1.145993413830955, "grad_norm": 1.2017778158187866, "learning_rate": 1.6262753869503797e-05, "loss": 0.1809, "step": 1827 }, { "epoch": 1.1466206680257174, "grad_norm": 0.8701357245445251, "learning_rate": 1.624279294945478e-05, "loss": 0.1583, "step": 1828 }, { "epoch": 1.1472479222204799, "grad_norm": 0.8049497008323669, "learning_rate": 1.6222835908102866e-05, "loss": 0.1343, "step": 1829 }, { "epoch": 1.1478751764152424, "grad_norm": 0.8608588576316833, "learning_rate": 1.6202882766050412e-05, "loss": 0.1714, "step": 1830 }, { "epoch": 1.1485024306100047, "grad_norm": 0.7564876079559326, "learning_rate": 1.6182933543895757e-05, "loss": 0.1312, "step": 1831 }, { "epoch": 1.1491296848047672, "grad_norm": 0.8293046951293945, "learning_rate": 1.6162988262233176e-05, "loss": 0.153, "step": 1832 }, { "epoch": 1.1497569389995295, "grad_norm": 0.7152065634727478, "learning_rate": 1.6143046941652887e-05, "loss": 0.1418, "step": 1833 }, { "epoch": 1.150384193194292, "grad_norm": 0.7662420868873596, "learning_rate": 1.6123109602741025e-05, "loss": 0.1358, "step": 1834 }, { "epoch": 1.1510114473890545, "grad_norm": 0.6615224480628967, "learning_rate": 1.6103176266079598e-05, "loss": 0.1275, "step": 1835 }, { "epoch": 1.1516387015838168, "grad_norm": 0.9032341241836548, "learning_rate": 1.6083246952246502e-05, "loss": 0.1534, "step": 1836 }, { "epoch": 1.1522659557785793, "grad_norm": 0.8961499929428101, "learning_rate": 1.6063321681815454e-05, "loss": 0.1574, "step": 1837 }, { "epoch": 1.1528932099733418, "grad_norm": 0.8611046671867371, "learning_rate": 1.6043400475356025e-05, "loss": 0.1766, "step": 1838 }, { "epoch": 1.153520464168104, "grad_norm": 0.8358910083770752, "learning_rate": 1.6023483353433567e-05, "loss": 0.1366, "step": 1839 }, { "epoch": 1.1541477183628666, "grad_norm": 0.8706210255622864, "learning_rate": 1.6003570336609235e-05, "loss": 0.1796, "step": 1840 }, { "epoch": 1.154774972557629, "grad_norm": 2.1682252883911133, "learning_rate": 1.5983661445439933e-05, "loss": 0.186, "step": 1841 }, { "epoch": 1.1554022267523913, "grad_norm": 0.9323776364326477, "learning_rate": 1.596375670047831e-05, "loss": 0.1578, "step": 1842 }, { "epoch": 1.1560294809471539, "grad_norm": 0.8486548066139221, "learning_rate": 1.594385612227274e-05, "loss": 0.1508, "step": 1843 }, { "epoch": 1.1566567351419161, "grad_norm": 0.8433166146278381, "learning_rate": 1.5923959731367285e-05, "loss": 0.1526, "step": 1844 }, { "epoch": 1.1572839893366786, "grad_norm": 0.8981841802597046, "learning_rate": 1.5904067548301688e-05, "loss": 0.1853, "step": 1845 }, { "epoch": 1.1579112435314411, "grad_norm": 0.8817017078399658, "learning_rate": 1.5884179593611357e-05, "loss": 0.1443, "step": 1846 }, { "epoch": 1.1585384977262034, "grad_norm": 0.9960587024688721, "learning_rate": 1.5864295887827324e-05, "loss": 0.1676, "step": 1847 }, { "epoch": 1.159165751920966, "grad_norm": 0.8518686890602112, "learning_rate": 1.5844416451476236e-05, "loss": 0.167, "step": 1848 }, { "epoch": 1.1597930061157284, "grad_norm": 0.8934053182601929, "learning_rate": 1.5824541305080342e-05, "loss": 0.1212, "step": 1849 }, { "epoch": 1.1604202603104907, "grad_norm": 0.8080291152000427, "learning_rate": 1.5804670469157447e-05, "loss": 0.1349, "step": 1850 }, { "epoch": 1.1610475145052532, "grad_norm": 0.7122689485549927, "learning_rate": 1.5784803964220918e-05, "loss": 0.1521, "step": 1851 }, { "epoch": 1.1616747687000157, "grad_norm": 0.9208701252937317, "learning_rate": 1.5764941810779653e-05, "loss": 0.1893, "step": 1852 }, { "epoch": 1.162302022894778, "grad_norm": 0.9723432064056396, "learning_rate": 1.574508402933804e-05, "loss": 0.1564, "step": 1853 }, { "epoch": 1.1629292770895405, "grad_norm": 1.0611326694488525, "learning_rate": 1.5725230640395982e-05, "loss": 0.1711, "step": 1854 }, { "epoch": 1.163556531284303, "grad_norm": 1.152522087097168, "learning_rate": 1.570538166444881e-05, "loss": 0.1553, "step": 1855 }, { "epoch": 1.1641837854790653, "grad_norm": 0.9438923597335815, "learning_rate": 1.568553712198734e-05, "loss": 0.1629, "step": 1856 }, { "epoch": 1.1648110396738278, "grad_norm": 0.7039708495140076, "learning_rate": 1.5665697033497788e-05, "loss": 0.1373, "step": 1857 }, { "epoch": 1.1654382938685903, "grad_norm": 0.9011950492858887, "learning_rate": 1.5645861419461766e-05, "loss": 0.1603, "step": 1858 }, { "epoch": 1.1660655480633526, "grad_norm": 0.9298847913742065, "learning_rate": 1.5626030300356282e-05, "loss": 0.1461, "step": 1859 }, { "epoch": 1.1666928022581151, "grad_norm": 1.023101568222046, "learning_rate": 1.5606203696653704e-05, "loss": 0.1704, "step": 1860 }, { "epoch": 1.1673200564528776, "grad_norm": 0.8772397041320801, "learning_rate": 1.5586381628821724e-05, "loss": 0.1733, "step": 1861 }, { "epoch": 1.16794731064764, "grad_norm": 0.8202458620071411, "learning_rate": 1.5566564117323376e-05, "loss": 0.141, "step": 1862 }, { "epoch": 1.1685745648424024, "grad_norm": 0.7463909983634949, "learning_rate": 1.5546751182616953e-05, "loss": 0.14, "step": 1863 }, { "epoch": 1.169201819037165, "grad_norm": 0.9149695634841919, "learning_rate": 1.552694284515606e-05, "loss": 0.1575, "step": 1864 }, { "epoch": 1.1698290732319272, "grad_norm": 0.7667670845985413, "learning_rate": 1.5507139125389532e-05, "loss": 0.1377, "step": 1865 }, { "epoch": 1.1704563274266897, "grad_norm": 0.9685397148132324, "learning_rate": 1.5487340043761452e-05, "loss": 0.1708, "step": 1866 }, { "epoch": 1.1710835816214522, "grad_norm": 0.7025471329689026, "learning_rate": 1.546754562071111e-05, "loss": 0.1304, "step": 1867 }, { "epoch": 1.1717108358162145, "grad_norm": 0.7441644668579102, "learning_rate": 1.544775587667298e-05, "loss": 0.1225, "step": 1868 }, { "epoch": 1.172338090010977, "grad_norm": 0.7965696454048157, "learning_rate": 1.5427970832076717e-05, "loss": 0.1664, "step": 1869 }, { "epoch": 1.1729653442057393, "grad_norm": 0.9268738031387329, "learning_rate": 1.540819050734712e-05, "loss": 0.1625, "step": 1870 }, { "epoch": 1.1735925984005018, "grad_norm": 0.9017819166183472, "learning_rate": 1.5388414922904106e-05, "loss": 0.1594, "step": 1871 }, { "epoch": 1.1742198525952643, "grad_norm": 0.7799805998802185, "learning_rate": 1.5368644099162717e-05, "loss": 0.1351, "step": 1872 }, { "epoch": 1.1748471067900266, "grad_norm": 0.880843997001648, "learning_rate": 1.5348878056533064e-05, "loss": 0.1697, "step": 1873 }, { "epoch": 1.175474360984789, "grad_norm": 0.9333112835884094, "learning_rate": 1.5329116815420325e-05, "loss": 0.1457, "step": 1874 }, { "epoch": 1.1761016151795516, "grad_norm": 0.8713697195053101, "learning_rate": 1.530936039622474e-05, "loss": 0.1601, "step": 1875 }, { "epoch": 1.1767288693743139, "grad_norm": 0.8385375142097473, "learning_rate": 1.5289608819341538e-05, "loss": 0.1611, "step": 1876 }, { "epoch": 1.1773561235690764, "grad_norm": 0.8101088404655457, "learning_rate": 1.526986210516098e-05, "loss": 0.1552, "step": 1877 }, { "epoch": 1.1779833777638389, "grad_norm": 0.7988011240959167, "learning_rate": 1.5250120274068292e-05, "loss": 0.1547, "step": 1878 }, { "epoch": 1.1786106319586012, "grad_norm": 0.8540886640548706, "learning_rate": 1.5230383346443654e-05, "loss": 0.1571, "step": 1879 }, { "epoch": 1.1792378861533637, "grad_norm": 0.7687944173812866, "learning_rate": 1.5210651342662197e-05, "loss": 0.127, "step": 1880 }, { "epoch": 1.1798651403481262, "grad_norm": 0.6662045121192932, "learning_rate": 1.5190924283093967e-05, "loss": 0.1338, "step": 1881 }, { "epoch": 1.1804923945428885, "grad_norm": 0.8382959961891174, "learning_rate": 1.517120218810389e-05, "loss": 0.1502, "step": 1882 }, { "epoch": 1.181119648737651, "grad_norm": 0.7887606024742126, "learning_rate": 1.515148507805179e-05, "loss": 0.1623, "step": 1883 }, { "epoch": 1.1817469029324132, "grad_norm": 1.0467164516448975, "learning_rate": 1.5131772973292334e-05, "loss": 0.1724, "step": 1884 }, { "epoch": 1.1823741571271758, "grad_norm": 0.972026526927948, "learning_rate": 1.5112065894175011e-05, "loss": 0.1625, "step": 1885 }, { "epoch": 1.1830014113219383, "grad_norm": 0.702070415019989, "learning_rate": 1.5092363861044146e-05, "loss": 0.1297, "step": 1886 }, { "epoch": 1.1836286655167005, "grad_norm": 0.9498781561851501, "learning_rate": 1.5072666894238835e-05, "loss": 0.1802, "step": 1887 }, { "epoch": 1.184255919711463, "grad_norm": 0.8403610587120056, "learning_rate": 1.5052975014092947e-05, "loss": 0.1514, "step": 1888 }, { "epoch": 1.1848831739062256, "grad_norm": 0.9342296719551086, "learning_rate": 1.5033288240935106e-05, "loss": 0.1659, "step": 1889 }, { "epoch": 1.1855104281009878, "grad_norm": 0.738856315612793, "learning_rate": 1.5013606595088663e-05, "loss": 0.1297, "step": 1890 }, { "epoch": 1.1861376822957503, "grad_norm": 0.7916891574859619, "learning_rate": 1.4993930096871668e-05, "loss": 0.1625, "step": 1891 }, { "epoch": 1.1867649364905128, "grad_norm": 0.7939260005950928, "learning_rate": 1.4974258766596866e-05, "loss": 0.1635, "step": 1892 }, { "epoch": 1.1873921906852751, "grad_norm": 0.8619648218154907, "learning_rate": 1.495459262457167e-05, "loss": 0.168, "step": 1893 }, { "epoch": 1.1880194448800376, "grad_norm": 0.9105474352836609, "learning_rate": 1.493493169109812e-05, "loss": 0.1868, "step": 1894 }, { "epoch": 1.1886466990748001, "grad_norm": 0.9018944501876831, "learning_rate": 1.4915275986472896e-05, "loss": 0.1671, "step": 1895 }, { "epoch": 1.1892739532695624, "grad_norm": 0.906952440738678, "learning_rate": 1.4895625530987277e-05, "loss": 0.1768, "step": 1896 }, { "epoch": 1.189901207464325, "grad_norm": 0.7171775102615356, "learning_rate": 1.4875980344927112e-05, "loss": 0.1674, "step": 1897 }, { "epoch": 1.1905284616590874, "grad_norm": 0.7971838116645813, "learning_rate": 1.4856340448572821e-05, "loss": 0.1412, "step": 1898 }, { "epoch": 1.1911557158538497, "grad_norm": 0.9042351841926575, "learning_rate": 1.4836705862199367e-05, "loss": 0.1784, "step": 1899 }, { "epoch": 1.1917829700486122, "grad_norm": 0.7793372869491577, "learning_rate": 1.4817076606076213e-05, "loss": 0.1529, "step": 1900 }, { "epoch": 1.1924102242433747, "grad_norm": 1.0335478782653809, "learning_rate": 1.4797452700467339e-05, "loss": 0.1947, "step": 1901 }, { "epoch": 1.193037478438137, "grad_norm": 0.8207297921180725, "learning_rate": 1.4777834165631193e-05, "loss": 0.1745, "step": 1902 }, { "epoch": 1.1936647326328995, "grad_norm": 0.8895414471626282, "learning_rate": 1.4758221021820678e-05, "loss": 0.1516, "step": 1903 }, { "epoch": 1.194291986827662, "grad_norm": 0.7220791578292847, "learning_rate": 1.4738613289283134e-05, "loss": 0.1381, "step": 1904 }, { "epoch": 1.1949192410224243, "grad_norm": 0.8622303009033203, "learning_rate": 1.4719010988260309e-05, "loss": 0.1603, "step": 1905 }, { "epoch": 1.1955464952171868, "grad_norm": 0.875914454460144, "learning_rate": 1.4699414138988352e-05, "loss": 0.1518, "step": 1906 }, { "epoch": 1.1961737494119493, "grad_norm": 0.7252442240715027, "learning_rate": 1.4679822761697784e-05, "loss": 0.1415, "step": 1907 }, { "epoch": 1.1968010036067116, "grad_norm": 0.7394492626190186, "learning_rate": 1.4660236876613465e-05, "loss": 0.1274, "step": 1908 }, { "epoch": 1.197428257801474, "grad_norm": 0.9310888648033142, "learning_rate": 1.46406565039546e-05, "loss": 0.145, "step": 1909 }, { "epoch": 1.1980555119962364, "grad_norm": 0.8693878054618835, "learning_rate": 1.46210816639347e-05, "loss": 0.1623, "step": 1910 }, { "epoch": 1.198682766190999, "grad_norm": 0.6786130666732788, "learning_rate": 1.4601512376761551e-05, "loss": 0.1269, "step": 1911 }, { "epoch": 1.1993100203857614, "grad_norm": 0.8285700678825378, "learning_rate": 1.458194866263722e-05, "loss": 0.1727, "step": 1912 }, { "epoch": 1.1999372745805237, "grad_norm": 0.841225266456604, "learning_rate": 1.4562390541758034e-05, "loss": 0.1618, "step": 1913 }, { "epoch": 1.2005645287752862, "grad_norm": 0.721674382686615, "learning_rate": 1.454283803431451e-05, "loss": 0.1402, "step": 1914 }, { "epoch": 1.2011917829700487, "grad_norm": 0.8150315284729004, "learning_rate": 1.452329116049139e-05, "loss": 0.1542, "step": 1915 }, { "epoch": 1.201819037164811, "grad_norm": 0.9047179222106934, "learning_rate": 1.450374994046761e-05, "loss": 0.1654, "step": 1916 }, { "epoch": 1.2024462913595735, "grad_norm": 0.8281776905059814, "learning_rate": 1.4484214394416256e-05, "loss": 0.1549, "step": 1917 }, { "epoch": 1.203073545554336, "grad_norm": 0.9416970610618591, "learning_rate": 1.4464684542504559e-05, "loss": 0.1579, "step": 1918 }, { "epoch": 1.2037007997490983, "grad_norm": 0.93312007188797, "learning_rate": 1.444516040489387e-05, "loss": 0.1842, "step": 1919 }, { "epoch": 1.2043280539438608, "grad_norm": 0.8868372440338135, "learning_rate": 1.4425642001739651e-05, "loss": 0.1647, "step": 1920 }, { "epoch": 1.204955308138623, "grad_norm": 0.8125755786895752, "learning_rate": 1.440612935319143e-05, "loss": 0.1755, "step": 1921 }, { "epoch": 1.2055825623333856, "grad_norm": 0.823745608329773, "learning_rate": 1.4386622479392807e-05, "loss": 0.1257, "step": 1922 }, { "epoch": 1.206209816528148, "grad_norm": 0.7787184119224548, "learning_rate": 1.4367121400481405e-05, "loss": 0.138, "step": 1923 }, { "epoch": 1.2068370707229104, "grad_norm": 0.810903012752533, "learning_rate": 1.4347626136588881e-05, "loss": 0.1487, "step": 1924 }, { "epoch": 1.2074643249176729, "grad_norm": 0.7609171867370605, "learning_rate": 1.4328136707840885e-05, "loss": 0.1444, "step": 1925 }, { "epoch": 1.2080915791124354, "grad_norm": 0.7199875712394714, "learning_rate": 1.4308653134357034e-05, "loss": 0.1343, "step": 1926 }, { "epoch": 1.2087188333071976, "grad_norm": 0.7784453630447388, "learning_rate": 1.4289175436250908e-05, "loss": 0.1401, "step": 1927 }, { "epoch": 1.2093460875019602, "grad_norm": 0.8262721300125122, "learning_rate": 1.4269703633630028e-05, "loss": 0.1307, "step": 1928 }, { "epoch": 1.2099733416967227, "grad_norm": 0.7545008063316345, "learning_rate": 1.425023774659581e-05, "loss": 0.1401, "step": 1929 }, { "epoch": 1.210600595891485, "grad_norm": 0.7401162385940552, "learning_rate": 1.4230777795243577e-05, "loss": 0.1198, "step": 1930 }, { "epoch": 1.2112278500862474, "grad_norm": 0.8155125975608826, "learning_rate": 1.4211323799662533e-05, "loss": 0.1568, "step": 1931 }, { "epoch": 1.21185510428101, "grad_norm": 0.8564190864562988, "learning_rate": 1.4191875779935709e-05, "loss": 0.1618, "step": 1932 }, { "epoch": 1.2124823584757722, "grad_norm": 0.9466654062271118, "learning_rate": 1.417243375613999e-05, "loss": 0.163, "step": 1933 }, { "epoch": 1.2131096126705347, "grad_norm": 0.7641735076904297, "learning_rate": 1.4152997748346052e-05, "loss": 0.1456, "step": 1934 }, { "epoch": 1.2137368668652972, "grad_norm": 0.8033687472343445, "learning_rate": 1.4133567776618376e-05, "loss": 0.1373, "step": 1935 }, { "epoch": 1.2143641210600595, "grad_norm": 0.812255322933197, "learning_rate": 1.4114143861015208e-05, "loss": 0.1571, "step": 1936 }, { "epoch": 1.214991375254822, "grad_norm": 0.7928274869918823, "learning_rate": 1.4094726021588532e-05, "loss": 0.1458, "step": 1937 }, { "epoch": 1.2156186294495845, "grad_norm": 0.851686418056488, "learning_rate": 1.4075314278384074e-05, "loss": 0.1336, "step": 1938 }, { "epoch": 1.2162458836443468, "grad_norm": 0.8840798139572144, "learning_rate": 1.4055908651441263e-05, "loss": 0.1512, "step": 1939 }, { "epoch": 1.2168731378391093, "grad_norm": 0.7764620780944824, "learning_rate": 1.4036509160793206e-05, "loss": 0.1341, "step": 1940 }, { "epoch": 1.2175003920338718, "grad_norm": 0.8411002159118652, "learning_rate": 1.4017115826466673e-05, "loss": 0.1472, "step": 1941 }, { "epoch": 1.2181276462286341, "grad_norm": 0.673572838306427, "learning_rate": 1.3997728668482093e-05, "loss": 0.1179, "step": 1942 }, { "epoch": 1.2187549004233966, "grad_norm": 0.838390052318573, "learning_rate": 1.3978347706853518e-05, "loss": 0.1295, "step": 1943 }, { "epoch": 1.2193821546181591, "grad_norm": 0.8127596974372864, "learning_rate": 1.3958972961588583e-05, "loss": 0.1454, "step": 1944 }, { "epoch": 1.2200094088129214, "grad_norm": 0.7803665399551392, "learning_rate": 1.393960445268853e-05, "loss": 0.1448, "step": 1945 }, { "epoch": 1.220636663007684, "grad_norm": 0.8282830119132996, "learning_rate": 1.392024220014815e-05, "loss": 0.1598, "step": 1946 }, { "epoch": 1.2212639172024462, "grad_norm": 0.7870911359786987, "learning_rate": 1.3900886223955778e-05, "loss": 0.1562, "step": 1947 }, { "epoch": 1.2218911713972087, "grad_norm": 0.8417776823043823, "learning_rate": 1.3881536544093266e-05, "loss": 0.16, "step": 1948 }, { "epoch": 1.2225184255919712, "grad_norm": 0.9041074514389038, "learning_rate": 1.3862193180535979e-05, "loss": 0.1648, "step": 1949 }, { "epoch": 1.2231456797867335, "grad_norm": 1.0169646739959717, "learning_rate": 1.3842856153252737e-05, "loss": 0.1943, "step": 1950 }, { "epoch": 1.223772933981496, "grad_norm": 0.9628655910491943, "learning_rate": 1.382352548220585e-05, "loss": 0.2029, "step": 1951 }, { "epoch": 1.2244001881762585, "grad_norm": 0.7748029828071594, "learning_rate": 1.380420118735104e-05, "loss": 0.128, "step": 1952 }, { "epoch": 1.2250274423710208, "grad_norm": 0.7870191335678101, "learning_rate": 1.378488328863746e-05, "loss": 0.1264, "step": 1953 }, { "epoch": 1.2256546965657833, "grad_norm": 0.9672967195510864, "learning_rate": 1.376557180600766e-05, "loss": 0.1577, "step": 1954 }, { "epoch": 1.2262819507605458, "grad_norm": 0.8693897128105164, "learning_rate": 1.374626675939756e-05, "loss": 0.1349, "step": 1955 }, { "epoch": 1.226909204955308, "grad_norm": 0.8322724103927612, "learning_rate": 1.3726968168736434e-05, "loss": 0.141, "step": 1956 }, { "epoch": 1.2275364591500706, "grad_norm": 0.8421970009803772, "learning_rate": 1.3707676053946909e-05, "loss": 0.1399, "step": 1957 }, { "epoch": 1.2281637133448329, "grad_norm": 0.8611185550689697, "learning_rate": 1.3688390434944903e-05, "loss": 0.1438, "step": 1958 }, { "epoch": 1.2287909675395954, "grad_norm": 0.8574026823043823, "learning_rate": 1.3669111331639644e-05, "loss": 0.1369, "step": 1959 }, { "epoch": 1.2294182217343579, "grad_norm": 0.9769577383995056, "learning_rate": 1.3649838763933633e-05, "loss": 0.1591, "step": 1960 }, { "epoch": 1.2300454759291202, "grad_norm": 0.8494389057159424, "learning_rate": 1.3630572751722611e-05, "loss": 0.1361, "step": 1961 }, { "epoch": 1.2306727301238827, "grad_norm": 0.8490456938743591, "learning_rate": 1.3611313314895567e-05, "loss": 0.1638, "step": 1962 }, { "epoch": 1.2312999843186452, "grad_norm": 0.9514238834381104, "learning_rate": 1.3592060473334697e-05, "loss": 0.1557, "step": 1963 }, { "epoch": 1.2319272385134075, "grad_norm": 0.8845386505126953, "learning_rate": 1.357281424691538e-05, "loss": 0.1457, "step": 1964 }, { "epoch": 1.23255449270817, "grad_norm": 0.8918384313583374, "learning_rate": 1.3553574655506185e-05, "loss": 0.1574, "step": 1965 }, { "epoch": 1.2331817469029325, "grad_norm": 0.7711067795753479, "learning_rate": 1.3534341718968805e-05, "loss": 0.1289, "step": 1966 }, { "epoch": 1.2338090010976948, "grad_norm": 0.7397522330284119, "learning_rate": 1.3515115457158088e-05, "loss": 0.1431, "step": 1967 }, { "epoch": 1.2344362552924573, "grad_norm": 0.7456501722335815, "learning_rate": 1.3495895889921973e-05, "loss": 0.1564, "step": 1968 }, { "epoch": 1.2350635094872198, "grad_norm": 0.7631916999816895, "learning_rate": 1.3476683037101506e-05, "loss": 0.1354, "step": 1969 }, { "epoch": 1.235690763681982, "grad_norm": 0.9010176658630371, "learning_rate": 1.3457476918530779e-05, "loss": 0.1641, "step": 1970 }, { "epoch": 1.2363180178767446, "grad_norm": 0.7279947400093079, "learning_rate": 1.343827755403695e-05, "loss": 0.1375, "step": 1971 }, { "epoch": 1.236945272071507, "grad_norm": 0.832119882106781, "learning_rate": 1.3419084963440207e-05, "loss": 0.1682, "step": 1972 }, { "epoch": 1.2375725262662693, "grad_norm": 0.7645889520645142, "learning_rate": 1.3399899166553722e-05, "loss": 0.1405, "step": 1973 }, { "epoch": 1.2381997804610319, "grad_norm": 0.7561132907867432, "learning_rate": 1.3380720183183677e-05, "loss": 0.1493, "step": 1974 }, { "epoch": 1.2388270346557944, "grad_norm": 0.7001355886459351, "learning_rate": 1.3361548033129217e-05, "loss": 0.1265, "step": 1975 }, { "epoch": 1.2394542888505566, "grad_norm": 0.7829381823539734, "learning_rate": 1.3342382736182415e-05, "loss": 0.1576, "step": 1976 }, { "epoch": 1.2400815430453191, "grad_norm": 0.9344700574874878, "learning_rate": 1.332322431212829e-05, "loss": 0.159, "step": 1977 }, { "epoch": 1.2407087972400817, "grad_norm": 0.8084545135498047, "learning_rate": 1.3304072780744759e-05, "loss": 0.1359, "step": 1978 }, { "epoch": 1.241336051434844, "grad_norm": 0.9364140629768372, "learning_rate": 1.3284928161802614e-05, "loss": 0.1706, "step": 1979 }, { "epoch": 1.2419633056296064, "grad_norm": 0.7873445153236389, "learning_rate": 1.3265790475065523e-05, "loss": 0.121, "step": 1980 }, { "epoch": 1.242590559824369, "grad_norm": 0.6898486614227295, "learning_rate": 1.3246659740290002e-05, "loss": 0.1254, "step": 1981 }, { "epoch": 1.2432178140191312, "grad_norm": 0.944259762763977, "learning_rate": 1.3227535977225364e-05, "loss": 0.1484, "step": 1982 }, { "epoch": 1.2438450682138937, "grad_norm": 0.8232392072677612, "learning_rate": 1.3208419205613762e-05, "loss": 0.1266, "step": 1983 }, { "epoch": 1.244472322408656, "grad_norm": 0.8024344444274902, "learning_rate": 1.31893094451901e-05, "loss": 0.146, "step": 1984 }, { "epoch": 1.2450995766034185, "grad_norm": 0.7504177093505859, "learning_rate": 1.3170206715682058e-05, "loss": 0.1398, "step": 1985 }, { "epoch": 1.245726830798181, "grad_norm": 0.9191885590553284, "learning_rate": 1.3151111036810065e-05, "loss": 0.1498, "step": 1986 }, { "epoch": 1.2463540849929433, "grad_norm": 0.9054408073425293, "learning_rate": 1.313202242828725e-05, "loss": 0.1392, "step": 1987 }, { "epoch": 1.2469813391877058, "grad_norm": 0.8019949197769165, "learning_rate": 1.311294090981946e-05, "loss": 0.1446, "step": 1988 }, { "epoch": 1.2476085933824683, "grad_norm": 0.8508473634719849, "learning_rate": 1.3093866501105226e-05, "loss": 0.1444, "step": 1989 }, { "epoch": 1.2482358475772306, "grad_norm": 0.9246096611022949, "learning_rate": 1.3074799221835728e-05, "loss": 0.1505, "step": 1990 }, { "epoch": 1.2488631017719931, "grad_norm": 0.8947358727455139, "learning_rate": 1.3055739091694779e-05, "loss": 0.1621, "step": 1991 }, { "epoch": 1.2494903559667556, "grad_norm": 0.7604474425315857, "learning_rate": 1.3036686130358828e-05, "loss": 0.1221, "step": 1992 }, { "epoch": 1.250117610161518, "grad_norm": 0.8415438532829285, "learning_rate": 1.3017640357496925e-05, "loss": 0.1724, "step": 1993 }, { "epoch": 1.2507448643562804, "grad_norm": 0.8152422308921814, "learning_rate": 1.2998601792770684e-05, "loss": 0.1332, "step": 1994 }, { "epoch": 1.2513721185510427, "grad_norm": 0.870513916015625, "learning_rate": 1.2979570455834289e-05, "loss": 0.1415, "step": 1995 }, { "epoch": 1.2519993727458052, "grad_norm": 0.8987338542938232, "learning_rate": 1.2960546366334458e-05, "loss": 0.1698, "step": 1996 }, { "epoch": 1.2526266269405677, "grad_norm": 0.8302509784698486, "learning_rate": 1.2941529543910429e-05, "loss": 0.1508, "step": 1997 }, { "epoch": 1.25325388113533, "grad_norm": 0.9404724836349487, "learning_rate": 1.292252000819394e-05, "loss": 0.1458, "step": 1998 }, { "epoch": 1.2538811353300925, "grad_norm": 0.6932850480079651, "learning_rate": 1.2903517778809196e-05, "loss": 0.12, "step": 1999 }, { "epoch": 1.254508389524855, "grad_norm": 0.6985989212989807, "learning_rate": 1.2884522875372874e-05, "loss": 0.127, "step": 2000 }, { "epoch": 1.2551356437196173, "grad_norm": 0.7476592659950256, "learning_rate": 1.2865535317494083e-05, "loss": 0.1517, "step": 2001 }, { "epoch": 1.2557628979143798, "grad_norm": 0.7512590885162354, "learning_rate": 1.2846555124774338e-05, "loss": 0.1344, "step": 2002 }, { "epoch": 1.2563901521091423, "grad_norm": 0.7579845190048218, "learning_rate": 1.2827582316807568e-05, "loss": 0.1335, "step": 2003 }, { "epoch": 1.2570174063039046, "grad_norm": 0.8196955323219299, "learning_rate": 1.2808616913180073e-05, "loss": 0.1488, "step": 2004 }, { "epoch": 1.257644660498667, "grad_norm": 0.8363924026489258, "learning_rate": 1.2789658933470497e-05, "loss": 0.1378, "step": 2005 }, { "epoch": 1.2582719146934296, "grad_norm": 0.8742525577545166, "learning_rate": 1.2770708397249836e-05, "loss": 0.1406, "step": 2006 }, { "epoch": 1.2588991688881919, "grad_norm": 0.9705551862716675, "learning_rate": 1.27517653240814e-05, "loss": 0.1775, "step": 2007 }, { "epoch": 1.2595264230829544, "grad_norm": 0.8819530606269836, "learning_rate": 1.2732829733520782e-05, "loss": 0.1589, "step": 2008 }, { "epoch": 1.2601536772777169, "grad_norm": 0.7328044772148132, "learning_rate": 1.2713901645115861e-05, "loss": 0.1168, "step": 2009 }, { "epoch": 1.2607809314724792, "grad_norm": 0.8703687787055969, "learning_rate": 1.2694981078406775e-05, "loss": 0.1561, "step": 2010 }, { "epoch": 1.2614081856672417, "grad_norm": 0.7671546339988708, "learning_rate": 1.2676068052925883e-05, "loss": 0.1458, "step": 2011 }, { "epoch": 1.2620354398620042, "grad_norm": 0.9110318422317505, "learning_rate": 1.2657162588197774e-05, "loss": 0.1748, "step": 2012 }, { "epoch": 1.2626626940567665, "grad_norm": 0.919498085975647, "learning_rate": 1.2638264703739215e-05, "loss": 0.1471, "step": 2013 }, { "epoch": 1.263289948251529, "grad_norm": 0.9211678504943848, "learning_rate": 1.2619374419059165e-05, "loss": 0.18, "step": 2014 }, { "epoch": 1.2639172024462915, "grad_norm": 0.9267973303794861, "learning_rate": 1.2600491753658735e-05, "loss": 0.1491, "step": 2015 }, { "epoch": 1.2645444566410537, "grad_norm": 0.8233495354652405, "learning_rate": 1.2581616727031159e-05, "loss": 0.1558, "step": 2016 }, { "epoch": 1.2651717108358163, "grad_norm": 0.8945194482803345, "learning_rate": 1.2562749358661783e-05, "loss": 0.143, "step": 2017 }, { "epoch": 1.2657989650305788, "grad_norm": 0.7861148118972778, "learning_rate": 1.2543889668028064e-05, "loss": 0.1596, "step": 2018 }, { "epoch": 1.266426219225341, "grad_norm": 0.8920468688011169, "learning_rate": 1.2525037674599529e-05, "loss": 0.1391, "step": 2019 }, { "epoch": 1.2670534734201035, "grad_norm": 1.0049667358398438, "learning_rate": 1.2506193397837746e-05, "loss": 0.1426, "step": 2020 }, { "epoch": 1.267680727614866, "grad_norm": 0.9976416230201721, "learning_rate": 1.2487356857196331e-05, "loss": 0.1685, "step": 2021 }, { "epoch": 1.2683079818096283, "grad_norm": 0.7630930542945862, "learning_rate": 1.2468528072120908e-05, "loss": 0.1473, "step": 2022 }, { "epoch": 1.2689352360043908, "grad_norm": 0.8132131695747375, "learning_rate": 1.2449707062049088e-05, "loss": 0.1658, "step": 2023 }, { "epoch": 1.2695624901991531, "grad_norm": 0.8889040946960449, "learning_rate": 1.2430893846410465e-05, "loss": 0.1531, "step": 2024 }, { "epoch": 1.2701897443939156, "grad_norm": 0.7996199727058411, "learning_rate": 1.2412088444626593e-05, "loss": 0.1516, "step": 2025 }, { "epoch": 1.2708169985886781, "grad_norm": 0.8301317095756531, "learning_rate": 1.239329087611094e-05, "loss": 0.1449, "step": 2026 }, { "epoch": 1.2714442527834404, "grad_norm": 0.8246314525604248, "learning_rate": 1.2374501160268899e-05, "loss": 0.1217, "step": 2027 }, { "epoch": 1.272071506978203, "grad_norm": 0.7794330716133118, "learning_rate": 1.235571931649776e-05, "loss": 0.1351, "step": 2028 }, { "epoch": 1.2726987611729652, "grad_norm": 0.9376643300056458, "learning_rate": 1.2336945364186676e-05, "loss": 0.1797, "step": 2029 }, { "epoch": 1.2733260153677277, "grad_norm": 0.8853619694709778, "learning_rate": 1.2318179322716665e-05, "loss": 0.1446, "step": 2030 }, { "epoch": 1.2739532695624902, "grad_norm": 0.8093166947364807, "learning_rate": 1.2299421211460564e-05, "loss": 0.1457, "step": 2031 }, { "epoch": 1.2745805237572525, "grad_norm": 0.852875828742981, "learning_rate": 1.2280671049783037e-05, "loss": 0.1688, "step": 2032 }, { "epoch": 1.275207777952015, "grad_norm": 0.9001780152320862, "learning_rate": 1.226192885704054e-05, "loss": 0.1567, "step": 2033 }, { "epoch": 1.2758350321467775, "grad_norm": 0.8690427541732788, "learning_rate": 1.2243194652581288e-05, "loss": 0.1473, "step": 2034 }, { "epoch": 1.2764622863415398, "grad_norm": 0.8091393709182739, "learning_rate": 1.2224468455745266e-05, "loss": 0.1603, "step": 2035 }, { "epoch": 1.2770895405363023, "grad_norm": 0.8217348456382751, "learning_rate": 1.2205750285864188e-05, "loss": 0.1657, "step": 2036 }, { "epoch": 1.2777167947310648, "grad_norm": 0.8783489465713501, "learning_rate": 1.2187040162261475e-05, "loss": 0.1197, "step": 2037 }, { "epoch": 1.278344048925827, "grad_norm": 0.7721499800682068, "learning_rate": 1.2168338104252247e-05, "loss": 0.1313, "step": 2038 }, { "epoch": 1.2789713031205896, "grad_norm": 0.8439178466796875, "learning_rate": 1.2149644131143303e-05, "loss": 0.1257, "step": 2039 }, { "epoch": 1.279598557315352, "grad_norm": 0.8752883672714233, "learning_rate": 1.213095826223308e-05, "loss": 0.1364, "step": 2040 }, { "epoch": 1.2802258115101144, "grad_norm": 0.8068944811820984, "learning_rate": 1.2112280516811667e-05, "loss": 0.1578, "step": 2041 }, { "epoch": 1.280853065704877, "grad_norm": 0.779299795627594, "learning_rate": 1.2093610914160759e-05, "loss": 0.1328, "step": 2042 }, { "epoch": 1.2814803198996394, "grad_norm": 0.808593213558197, "learning_rate": 1.2074949473553636e-05, "loss": 0.1445, "step": 2043 }, { "epoch": 1.2821075740944017, "grad_norm": 0.7457171082496643, "learning_rate": 1.205629621425516e-05, "loss": 0.1281, "step": 2044 }, { "epoch": 1.2827348282891642, "grad_norm": 0.7942360043525696, "learning_rate": 1.203765115552175e-05, "loss": 0.1379, "step": 2045 }, { "epoch": 1.2833620824839267, "grad_norm": 0.8352336883544922, "learning_rate": 1.201901431660136e-05, "loss": 0.1461, "step": 2046 }, { "epoch": 1.283989336678689, "grad_norm": 0.7953838109970093, "learning_rate": 1.2000385716733449e-05, "loss": 0.1471, "step": 2047 }, { "epoch": 1.2846165908734515, "grad_norm": 0.8717171549797058, "learning_rate": 1.1981765375148985e-05, "loss": 0.1448, "step": 2048 }, { "epoch": 1.285243845068214, "grad_norm": 0.7027206420898438, "learning_rate": 1.1963153311070392e-05, "loss": 0.1176, "step": 2049 }, { "epoch": 1.2858710992629763, "grad_norm": 0.7868241667747498, "learning_rate": 1.1944549543711563e-05, "loss": 0.146, "step": 2050 }, { "epoch": 1.2864983534577388, "grad_norm": 0.9464139938354492, "learning_rate": 1.1925954092277827e-05, "loss": 0.1678, "step": 2051 }, { "epoch": 1.2871256076525013, "grad_norm": 0.7622083425521851, "learning_rate": 1.1907366975965916e-05, "loss": 0.1458, "step": 2052 }, { "epoch": 1.2877528618472636, "grad_norm": 0.7768915891647339, "learning_rate": 1.1888788213963966e-05, "loss": 0.133, "step": 2053 }, { "epoch": 1.288380116042026, "grad_norm": 0.8117609024047852, "learning_rate": 1.1870217825451493e-05, "loss": 0.1599, "step": 2054 }, { "epoch": 1.2890073702367886, "grad_norm": 0.8793721199035645, "learning_rate": 1.1851655829599355e-05, "loss": 0.1462, "step": 2055 }, { "epoch": 1.2896346244315509, "grad_norm": 0.8287612795829773, "learning_rate": 1.1833102245569755e-05, "loss": 0.1656, "step": 2056 }, { "epoch": 1.2902618786263134, "grad_norm": 0.736072301864624, "learning_rate": 1.1814557092516218e-05, "loss": 0.1303, "step": 2057 }, { "epoch": 1.2908891328210759, "grad_norm": 0.802575409412384, "learning_rate": 1.179602038958355e-05, "loss": 0.1532, "step": 2058 }, { "epoch": 1.2915163870158382, "grad_norm": 0.8144501447677612, "learning_rate": 1.1777492155907844e-05, "loss": 0.1316, "step": 2059 }, { "epoch": 1.2921436412106007, "grad_norm": 0.8047012090682983, "learning_rate": 1.1758972410616452e-05, "loss": 0.1355, "step": 2060 }, { "epoch": 1.292770895405363, "grad_norm": 0.8402323722839355, "learning_rate": 1.1740461172827953e-05, "loss": 0.1408, "step": 2061 }, { "epoch": 1.2933981496001254, "grad_norm": 0.781400203704834, "learning_rate": 1.1721958461652156e-05, "loss": 0.1382, "step": 2062 }, { "epoch": 1.294025403794888, "grad_norm": 0.9326108694076538, "learning_rate": 1.1703464296190056e-05, "loss": 0.1801, "step": 2063 }, { "epoch": 1.2946526579896502, "grad_norm": 0.7394958138465881, "learning_rate": 1.1684978695533831e-05, "loss": 0.1244, "step": 2064 }, { "epoch": 1.2952799121844127, "grad_norm": 0.8357262015342712, "learning_rate": 1.166650167876682e-05, "loss": 0.1501, "step": 2065 }, { "epoch": 1.295907166379175, "grad_norm": 0.7975276112556458, "learning_rate": 1.1648033264963503e-05, "loss": 0.1338, "step": 2066 }, { "epoch": 1.2965344205739375, "grad_norm": 0.8282522559165955, "learning_rate": 1.1629573473189461e-05, "loss": 0.1567, "step": 2067 }, { "epoch": 1.2971616747687, "grad_norm": 0.8194451332092285, "learning_rate": 1.1611122322501403e-05, "loss": 0.1302, "step": 2068 }, { "epoch": 1.2977889289634623, "grad_norm": 0.9897724390029907, "learning_rate": 1.1592679831947083e-05, "loss": 0.1664, "step": 2069 }, { "epoch": 1.2984161831582248, "grad_norm": 0.8145666122436523, "learning_rate": 1.1574246020565341e-05, "loss": 0.1356, "step": 2070 }, { "epoch": 1.2990434373529873, "grad_norm": 0.7804147601127625, "learning_rate": 1.155582090738605e-05, "loss": 0.1444, "step": 2071 }, { "epoch": 1.2996706915477496, "grad_norm": 0.7265585064888, "learning_rate": 1.1537404511430106e-05, "loss": 0.1287, "step": 2072 }, { "epoch": 1.3002979457425121, "grad_norm": 0.8573222756385803, "learning_rate": 1.1518996851709408e-05, "loss": 0.1432, "step": 2073 }, { "epoch": 1.3009251999372746, "grad_norm": 0.893895149230957, "learning_rate": 1.1500597947226818e-05, "loss": 0.1472, "step": 2074 }, { "epoch": 1.301552454132037, "grad_norm": 0.8899487853050232, "learning_rate": 1.1482207816976174e-05, "loss": 0.1469, "step": 2075 }, { "epoch": 1.3021797083267994, "grad_norm": 0.8849236369132996, "learning_rate": 1.1463826479942261e-05, "loss": 0.1775, "step": 2076 }, { "epoch": 1.302806962521562, "grad_norm": 0.974793553352356, "learning_rate": 1.144545395510078e-05, "loss": 0.145, "step": 2077 }, { "epoch": 1.3034342167163242, "grad_norm": 0.8298199772834778, "learning_rate": 1.1427090261418332e-05, "loss": 0.1311, "step": 2078 }, { "epoch": 1.3040614709110867, "grad_norm": 0.9482800364494324, "learning_rate": 1.1408735417852414e-05, "loss": 0.1453, "step": 2079 }, { "epoch": 1.3046887251058492, "grad_norm": 0.831490695476532, "learning_rate": 1.1390389443351361e-05, "loss": 0.1385, "step": 2080 }, { "epoch": 1.3053159793006115, "grad_norm": 0.9137451648712158, "learning_rate": 1.1372052356854382e-05, "loss": 0.1524, "step": 2081 }, { "epoch": 1.305943233495374, "grad_norm": 0.7532322406768799, "learning_rate": 1.1353724177291485e-05, "loss": 0.1246, "step": 2082 }, { "epoch": 1.3065704876901365, "grad_norm": 0.8048055768013, "learning_rate": 1.1335404923583507e-05, "loss": 0.1384, "step": 2083 }, { "epoch": 1.3071977418848988, "grad_norm": 0.8215882778167725, "learning_rate": 1.131709461464206e-05, "loss": 0.165, "step": 2084 }, { "epoch": 1.3078249960796613, "grad_norm": 0.7621756196022034, "learning_rate": 1.1298793269369503e-05, "loss": 0.1272, "step": 2085 }, { "epoch": 1.3084522502744238, "grad_norm": 0.7866765260696411, "learning_rate": 1.128050090665897e-05, "loss": 0.1617, "step": 2086 }, { "epoch": 1.309079504469186, "grad_norm": 0.8137136697769165, "learning_rate": 1.1262217545394312e-05, "loss": 0.1435, "step": 2087 }, { "epoch": 1.3097067586639486, "grad_norm": 0.891494870185852, "learning_rate": 1.1243943204450086e-05, "loss": 0.1516, "step": 2088 }, { "epoch": 1.310334012858711, "grad_norm": 0.863868772983551, "learning_rate": 1.1225677902691537e-05, "loss": 0.1487, "step": 2089 }, { "epoch": 1.3109612670534734, "grad_norm": 0.8466315269470215, "learning_rate": 1.1207421658974585e-05, "loss": 0.1538, "step": 2090 }, { "epoch": 1.3115885212482359, "grad_norm": 0.9184805750846863, "learning_rate": 1.1189174492145778e-05, "loss": 0.1691, "step": 2091 }, { "epoch": 1.3122157754429984, "grad_norm": 0.8500133156776428, "learning_rate": 1.117093642104232e-05, "loss": 0.1371, "step": 2092 }, { "epoch": 1.3128430296377607, "grad_norm": 0.8608570694923401, "learning_rate": 1.115270746449202e-05, "loss": 0.1809, "step": 2093 }, { "epoch": 1.3134702838325232, "grad_norm": 0.8924458622932434, "learning_rate": 1.113448764131325e-05, "loss": 0.1551, "step": 2094 }, { "epoch": 1.3140975380272857, "grad_norm": 0.7744616866111755, "learning_rate": 1.1116276970314993e-05, "loss": 0.1326, "step": 2095 }, { "epoch": 1.314724792222048, "grad_norm": 0.8079293370246887, "learning_rate": 1.1098075470296756e-05, "loss": 0.1497, "step": 2096 }, { "epoch": 1.3153520464168105, "grad_norm": 0.9292060136795044, "learning_rate": 1.1079883160048604e-05, "loss": 0.1667, "step": 2097 }, { "epoch": 1.315979300611573, "grad_norm": 0.7225872874259949, "learning_rate": 1.1061700058351078e-05, "loss": 0.1397, "step": 2098 }, { "epoch": 1.3166065548063353, "grad_norm": 0.7413331866264343, "learning_rate": 1.1043526183975244e-05, "loss": 0.1194, "step": 2099 }, { "epoch": 1.3172338090010978, "grad_norm": 0.7993394732475281, "learning_rate": 1.1025361555682634e-05, "loss": 0.1374, "step": 2100 }, { "epoch": 1.31786106319586, "grad_norm": 0.7846838235855103, "learning_rate": 1.100720619222523e-05, "loss": 0.1577, "step": 2101 }, { "epoch": 1.3184883173906226, "grad_norm": 0.8102317452430725, "learning_rate": 1.0989060112345466e-05, "loss": 0.1337, "step": 2102 }, { "epoch": 1.319115571585385, "grad_norm": 0.9342648983001709, "learning_rate": 1.0970923334776155e-05, "loss": 0.1631, "step": 2103 }, { "epoch": 1.3197428257801473, "grad_norm": 0.8400986194610596, "learning_rate": 1.0952795878240546e-05, "loss": 0.1371, "step": 2104 }, { "epoch": 1.3203700799749098, "grad_norm": 0.7187572717666626, "learning_rate": 1.0934677761452248e-05, "loss": 0.1234, "step": 2105 }, { "epoch": 1.3209973341696721, "grad_norm": 0.7044083476066589, "learning_rate": 1.091656900311523e-05, "loss": 0.1224, "step": 2106 }, { "epoch": 1.3216245883644346, "grad_norm": 0.9172436594963074, "learning_rate": 1.0898469621923802e-05, "loss": 0.1749, "step": 2107 }, { "epoch": 1.3222518425591971, "grad_norm": 0.8661630153656006, "learning_rate": 1.08803796365626e-05, "loss": 0.1642, "step": 2108 }, { "epoch": 1.3228790967539594, "grad_norm": 1.0116245746612549, "learning_rate": 1.0862299065706536e-05, "loss": 0.169, "step": 2109 }, { "epoch": 1.323506350948722, "grad_norm": 0.7918195128440857, "learning_rate": 1.0844227928020828e-05, "loss": 0.1259, "step": 2110 }, { "epoch": 1.3241336051434844, "grad_norm": 0.9659956693649292, "learning_rate": 1.082616624216095e-05, "loss": 0.141, "step": 2111 }, { "epoch": 1.3247608593382467, "grad_norm": 0.8624740242958069, "learning_rate": 1.0808114026772612e-05, "loss": 0.1337, "step": 2112 }, { "epoch": 1.3253881135330092, "grad_norm": 1.061497449874878, "learning_rate": 1.0790071300491754e-05, "loss": 0.1813, "step": 2113 }, { "epoch": 1.3260153677277717, "grad_norm": 0.8342097401618958, "learning_rate": 1.0772038081944527e-05, "loss": 0.153, "step": 2114 }, { "epoch": 1.326642621922534, "grad_norm": 0.9330176115036011, "learning_rate": 1.0754014389747236e-05, "loss": 0.1259, "step": 2115 }, { "epoch": 1.3272698761172965, "grad_norm": 0.7098188996315002, "learning_rate": 1.0736000242506389e-05, "loss": 0.1224, "step": 2116 }, { "epoch": 1.327897130312059, "grad_norm": 0.7948756814002991, "learning_rate": 1.0717995658818616e-05, "loss": 0.1603, "step": 2117 }, { "epoch": 1.3285243845068213, "grad_norm": 0.8998217582702637, "learning_rate": 1.0700000657270688e-05, "loss": 0.1668, "step": 2118 }, { "epoch": 1.3291516387015838, "grad_norm": 0.9300773739814758, "learning_rate": 1.0682015256439487e-05, "loss": 0.1639, "step": 2119 }, { "epoch": 1.3297788928963463, "grad_norm": 0.9498723745346069, "learning_rate": 1.0664039474891961e-05, "loss": 0.1553, "step": 2120 }, { "epoch": 1.3304061470911086, "grad_norm": 0.8543471693992615, "learning_rate": 1.064607333118514e-05, "loss": 0.1323, "step": 2121 }, { "epoch": 1.331033401285871, "grad_norm": 0.7313621640205383, "learning_rate": 1.0628116843866114e-05, "loss": 0.1268, "step": 2122 }, { "epoch": 1.3316606554806336, "grad_norm": 0.8691371083259583, "learning_rate": 1.0610170031471995e-05, "loss": 0.1634, "step": 2123 }, { "epoch": 1.332287909675396, "grad_norm": 0.8829277753829956, "learning_rate": 1.0592232912529913e-05, "loss": 0.1572, "step": 2124 }, { "epoch": 1.3329151638701584, "grad_norm": 0.9373019337654114, "learning_rate": 1.0574305505556983e-05, "loss": 0.1677, "step": 2125 }, { "epoch": 1.333542418064921, "grad_norm": 0.6969695091247559, "learning_rate": 1.0556387829060308e-05, "loss": 0.1268, "step": 2126 }, { "epoch": 1.3341696722596832, "grad_norm": 0.804305374622345, "learning_rate": 1.0538479901536922e-05, "loss": 0.1354, "step": 2127 }, { "epoch": 1.3347969264544457, "grad_norm": 0.8717243075370789, "learning_rate": 1.0520581741473813e-05, "loss": 0.1543, "step": 2128 }, { "epoch": 1.3354241806492082, "grad_norm": 0.9425841569900513, "learning_rate": 1.0502693367347882e-05, "loss": 0.1507, "step": 2129 }, { "epoch": 1.3360514348439705, "grad_norm": 0.7681815028190613, "learning_rate": 1.0484814797625936e-05, "loss": 0.1347, "step": 2130 }, { "epoch": 1.336678689038733, "grad_norm": 0.8692004680633545, "learning_rate": 1.046694605076464e-05, "loss": 0.1577, "step": 2131 }, { "epoch": 1.3373059432334955, "grad_norm": 0.780180037021637, "learning_rate": 1.0449087145210543e-05, "loss": 0.1404, "step": 2132 }, { "epoch": 1.3379331974282578, "grad_norm": 0.7524372339248657, "learning_rate": 1.043123809940001e-05, "loss": 0.1369, "step": 2133 }, { "epoch": 1.3385604516230203, "grad_norm": 0.8653045296669006, "learning_rate": 1.0413398931759243e-05, "loss": 0.1605, "step": 2134 }, { "epoch": 1.3391877058177828, "grad_norm": 0.8803753852844238, "learning_rate": 1.0395569660704243e-05, "loss": 0.1603, "step": 2135 }, { "epoch": 1.339814960012545, "grad_norm": 0.8828588724136353, "learning_rate": 1.0377750304640794e-05, "loss": 0.1433, "step": 2136 }, { "epoch": 1.3404422142073076, "grad_norm": 0.9002618193626404, "learning_rate": 1.0359940881964454e-05, "loss": 0.1584, "step": 2137 }, { "epoch": 1.3410694684020699, "grad_norm": 0.7786149978637695, "learning_rate": 1.03421414110605e-05, "loss": 0.1446, "step": 2138 }, { "epoch": 1.3416967225968324, "grad_norm": 0.7834008932113647, "learning_rate": 1.0324351910303965e-05, "loss": 0.1397, "step": 2139 }, { "epoch": 1.3423239767915949, "grad_norm": 0.8710561990737915, "learning_rate": 1.0306572398059566e-05, "loss": 0.1281, "step": 2140 }, { "epoch": 1.3429512309863572, "grad_norm": 0.8711461424827576, "learning_rate": 1.028880289268173e-05, "loss": 0.1391, "step": 2141 }, { "epoch": 1.3435784851811197, "grad_norm": 0.854386031627655, "learning_rate": 1.0271043412514539e-05, "loss": 0.146, "step": 2142 }, { "epoch": 1.344205739375882, "grad_norm": 0.7929691672325134, "learning_rate": 1.0253293975891739e-05, "loss": 0.1254, "step": 2143 }, { "epoch": 1.3448329935706445, "grad_norm": 0.8346929550170898, "learning_rate": 1.0235554601136677e-05, "loss": 0.1246, "step": 2144 }, { "epoch": 1.345460247765407, "grad_norm": 0.7438914179801941, "learning_rate": 1.021782530656236e-05, "loss": 0.1282, "step": 2145 }, { "epoch": 1.3460875019601692, "grad_norm": 0.691235363483429, "learning_rate": 1.0200106110471335e-05, "loss": 0.1277, "step": 2146 }, { "epoch": 1.3467147561549317, "grad_norm": 0.7530035376548767, "learning_rate": 1.0182397031155769e-05, "loss": 0.1366, "step": 2147 }, { "epoch": 1.3473420103496943, "grad_norm": 0.9042640328407288, "learning_rate": 1.0164698086897365e-05, "loss": 0.1582, "step": 2148 }, { "epoch": 1.3479692645444565, "grad_norm": 0.8310264348983765, "learning_rate": 1.014700929596737e-05, "loss": 0.1189, "step": 2149 }, { "epoch": 1.348596518739219, "grad_norm": 0.8199609518051147, "learning_rate": 1.0129330676626537e-05, "loss": 0.1344, "step": 2150 }, { "epoch": 1.3492237729339815, "grad_norm": 0.7133363485336304, "learning_rate": 1.0111662247125131e-05, "loss": 0.1178, "step": 2151 }, { "epoch": 1.3498510271287438, "grad_norm": 0.8384912014007568, "learning_rate": 1.0094004025702894e-05, "loss": 0.1475, "step": 2152 }, { "epoch": 1.3504782813235063, "grad_norm": 0.8178553581237793, "learning_rate": 1.0076356030589033e-05, "loss": 0.1367, "step": 2153 }, { "epoch": 1.3511055355182688, "grad_norm": 0.7055070400238037, "learning_rate": 1.005871828000219e-05, "loss": 0.1074, "step": 2154 }, { "epoch": 1.3517327897130311, "grad_norm": 0.7852804660797119, "learning_rate": 1.0041090792150446e-05, "loss": 0.1443, "step": 2155 }, { "epoch": 1.3523600439077936, "grad_norm": 0.882070004940033, "learning_rate": 1.0023473585231258e-05, "loss": 0.1518, "step": 2156 }, { "epoch": 1.3529872981025561, "grad_norm": 0.8362320065498352, "learning_rate": 1.0005866677431503e-05, "loss": 0.1446, "step": 2157 }, { "epoch": 1.3536145522973184, "grad_norm": 0.976771891117096, "learning_rate": 9.988270086927407e-06, "loss": 0.1376, "step": 2158 }, { "epoch": 1.354241806492081, "grad_norm": 0.8807855248451233, "learning_rate": 9.970683831884554e-06, "loss": 0.1428, "step": 2159 }, { "epoch": 1.3548690606868434, "grad_norm": 0.8212952017784119, "learning_rate": 9.953107930457848e-06, "loss": 0.1441, "step": 2160 }, { "epoch": 1.3554963148816057, "grad_norm": 0.8011760115623474, "learning_rate": 9.935542400791525e-06, "loss": 0.1242, "step": 2161 }, { "epoch": 1.3561235690763682, "grad_norm": 0.7925354242324829, "learning_rate": 9.91798726101908e-06, "loss": 0.1198, "step": 2162 }, { "epoch": 1.3567508232711307, "grad_norm": 0.843718409538269, "learning_rate": 9.90044252926331e-06, "loss": 0.1294, "step": 2163 }, { "epoch": 1.357378077465893, "grad_norm": 0.90981525182724, "learning_rate": 9.882908223636261e-06, "loss": 0.1601, "step": 2164 }, { "epoch": 1.3580053316606555, "grad_norm": 0.7558876276016235, "learning_rate": 9.865384362239212e-06, "loss": 0.1311, "step": 2165 }, { "epoch": 1.358632585855418, "grad_norm": 0.8670477271080017, "learning_rate": 9.847870963162671e-06, "loss": 0.1396, "step": 2166 }, { "epoch": 1.3592598400501803, "grad_norm": 0.9721325039863586, "learning_rate": 9.830368044486322e-06, "loss": 0.1452, "step": 2167 }, { "epoch": 1.3598870942449428, "grad_norm": 0.9157195687294006, "learning_rate": 9.812875624279051e-06, "loss": 0.172, "step": 2168 }, { "epoch": 1.3605143484397053, "grad_norm": 0.7177037000656128, "learning_rate": 9.795393720598896e-06, "loss": 0.1239, "step": 2169 }, { "epoch": 1.3611416026344676, "grad_norm": 0.8860791921615601, "learning_rate": 9.777922351493045e-06, "loss": 0.1261, "step": 2170 }, { "epoch": 1.36176885682923, "grad_norm": 0.8846379518508911, "learning_rate": 9.760461534997816e-06, "loss": 0.1367, "step": 2171 }, { "epoch": 1.3623961110239926, "grad_norm": 0.7757281064987183, "learning_rate": 9.743011289138606e-06, "loss": 0.1177, "step": 2172 }, { "epoch": 1.363023365218755, "grad_norm": 0.8190012574195862, "learning_rate": 9.725571631929938e-06, "loss": 0.1431, "step": 2173 }, { "epoch": 1.3636506194135174, "grad_norm": 0.7819278240203857, "learning_rate": 9.708142581375362e-06, "loss": 0.1329, "step": 2174 }, { "epoch": 1.3642778736082797, "grad_norm": 0.8488762974739075, "learning_rate": 9.690724155467515e-06, "loss": 0.1272, "step": 2175 }, { "epoch": 1.3649051278030422, "grad_norm": 0.7727980613708496, "learning_rate": 9.673316372188047e-06, "loss": 0.1347, "step": 2176 }, { "epoch": 1.3655323819978047, "grad_norm": 0.7058163285255432, "learning_rate": 9.655919249507626e-06, "loss": 0.1192, "step": 2177 }, { "epoch": 1.366159636192567, "grad_norm": 0.9108099341392517, "learning_rate": 9.638532805385915e-06, "loss": 0.1378, "step": 2178 }, { "epoch": 1.3667868903873295, "grad_norm": 0.8259838819503784, "learning_rate": 9.62115705777156e-06, "loss": 0.1641, "step": 2179 }, { "epoch": 1.3674141445820918, "grad_norm": 0.7587583661079407, "learning_rate": 9.60379202460214e-06, "loss": 0.1308, "step": 2180 }, { "epoch": 1.3680413987768543, "grad_norm": 0.7948460578918457, "learning_rate": 9.586437723804204e-06, "loss": 0.1543, "step": 2181 }, { "epoch": 1.3686686529716168, "grad_norm": 0.7781640887260437, "learning_rate": 9.569094173293198e-06, "loss": 0.1281, "step": 2182 }, { "epoch": 1.369295907166379, "grad_norm": 0.8927295207977295, "learning_rate": 9.551761390973487e-06, "loss": 0.145, "step": 2183 }, { "epoch": 1.3699231613611416, "grad_norm": 1.0925737619400024, "learning_rate": 9.534439394738316e-06, "loss": 0.1768, "step": 2184 }, { "epoch": 1.370550415555904, "grad_norm": 0.886824369430542, "learning_rate": 9.51712820246978e-06, "loss": 0.1516, "step": 2185 }, { "epoch": 1.3711776697506664, "grad_norm": 0.8679983615875244, "learning_rate": 9.499827832038835e-06, "loss": 0.1275, "step": 2186 }, { "epoch": 1.3718049239454289, "grad_norm": 0.786601185798645, "learning_rate": 9.482538301305266e-06, "loss": 0.1204, "step": 2187 }, { "epoch": 1.3724321781401914, "grad_norm": 0.871163547039032, "learning_rate": 9.465259628117665e-06, "loss": 0.1466, "step": 2188 }, { "epoch": 1.3730594323349536, "grad_norm": 0.8171364665031433, "learning_rate": 9.447991830313412e-06, "loss": 0.1484, "step": 2189 }, { "epoch": 1.3736866865297161, "grad_norm": 0.709475576877594, "learning_rate": 9.43073492571867e-06, "loss": 0.1163, "step": 2190 }, { "epoch": 1.3743139407244787, "grad_norm": 0.8342685699462891, "learning_rate": 9.413488932148336e-06, "loss": 0.1522, "step": 2191 }, { "epoch": 1.374941194919241, "grad_norm": 0.8246030807495117, "learning_rate": 9.396253867406061e-06, "loss": 0.1484, "step": 2192 }, { "epoch": 1.3755684491140034, "grad_norm": 0.9686318635940552, "learning_rate": 9.379029749284216e-06, "loss": 0.2097, "step": 2193 }, { "epoch": 1.376195703308766, "grad_norm": 0.9869811534881592, "learning_rate": 9.361816595563855e-06, "loss": 0.1497, "step": 2194 }, { "epoch": 1.3768229575035282, "grad_norm": 0.7771713733673096, "learning_rate": 9.344614424014738e-06, "loss": 0.1242, "step": 2195 }, { "epoch": 1.3774502116982907, "grad_norm": 0.7462970614433289, "learning_rate": 9.327423252395252e-06, "loss": 0.1203, "step": 2196 }, { "epoch": 1.3780774658930532, "grad_norm": 0.9352504014968872, "learning_rate": 9.310243098452464e-06, "loss": 0.1713, "step": 2197 }, { "epoch": 1.3787047200878155, "grad_norm": 0.8267216682434082, "learning_rate": 9.293073979922041e-06, "loss": 0.1593, "step": 2198 }, { "epoch": 1.379331974282578, "grad_norm": 0.8669594526290894, "learning_rate": 9.275915914528271e-06, "loss": 0.1142, "step": 2199 }, { "epoch": 1.3799592284773405, "grad_norm": 1.0197635889053345, "learning_rate": 9.258768919984032e-06, "loss": 0.1358, "step": 2200 }, { "epoch": 1.3805864826721028, "grad_norm": 0.746362030506134, "learning_rate": 9.24163301399077e-06, "loss": 0.1249, "step": 2201 }, { "epoch": 1.3812137368668653, "grad_norm": 0.7596211433410645, "learning_rate": 9.22450821423849e-06, "loss": 0.1354, "step": 2202 }, { "epoch": 1.3818409910616278, "grad_norm": 0.7407150268554688, "learning_rate": 9.20739453840571e-06, "loss": 0.1444, "step": 2203 }, { "epoch": 1.3824682452563901, "grad_norm": 0.7925580143928528, "learning_rate": 9.190292004159491e-06, "loss": 0.1371, "step": 2204 }, { "epoch": 1.3830954994511526, "grad_norm": 0.764828085899353, "learning_rate": 9.173200629155379e-06, "loss": 0.1216, "step": 2205 }, { "epoch": 1.3837227536459151, "grad_norm": 0.8288745284080505, "learning_rate": 9.156120431037401e-06, "loss": 0.1504, "step": 2206 }, { "epoch": 1.3843500078406774, "grad_norm": 0.8584113717079163, "learning_rate": 9.139051427438048e-06, "loss": 0.15, "step": 2207 }, { "epoch": 1.38497726203544, "grad_norm": 0.8606441020965576, "learning_rate": 9.121993635978262e-06, "loss": 0.1503, "step": 2208 }, { "epoch": 1.3856045162302024, "grad_norm": 0.8186155557632446, "learning_rate": 9.104947074267385e-06, "loss": 0.1226, "step": 2209 }, { "epoch": 1.3862317704249647, "grad_norm": 0.7461327314376831, "learning_rate": 9.08791175990319e-06, "loss": 0.1476, "step": 2210 }, { "epoch": 1.3868590246197272, "grad_norm": 0.9064809083938599, "learning_rate": 9.070887710471832e-06, "loss": 0.1608, "step": 2211 }, { "epoch": 1.3874862788144895, "grad_norm": 0.9225057363510132, "learning_rate": 9.053874943547837e-06, "loss": 0.1507, "step": 2212 }, { "epoch": 1.388113533009252, "grad_norm": 0.7884825468063354, "learning_rate": 9.036873476694092e-06, "loss": 0.1394, "step": 2213 }, { "epoch": 1.3887407872040145, "grad_norm": 0.7843522429466248, "learning_rate": 9.019883327461794e-06, "loss": 0.1303, "step": 2214 }, { "epoch": 1.3893680413987768, "grad_norm": 0.7671374082565308, "learning_rate": 9.002904513390478e-06, "loss": 0.1265, "step": 2215 }, { "epoch": 1.3899952955935393, "grad_norm": 0.7901222109794617, "learning_rate": 8.985937052007973e-06, "loss": 0.1425, "step": 2216 }, { "epoch": 1.3906225497883016, "grad_norm": 0.8651731610298157, "learning_rate": 8.968980960830384e-06, "loss": 0.14, "step": 2217 }, { "epoch": 1.391249803983064, "grad_norm": 1.0074193477630615, "learning_rate": 8.952036257362086e-06, "loss": 0.1685, "step": 2218 }, { "epoch": 1.3918770581778266, "grad_norm": 0.803781270980835, "learning_rate": 8.935102959095697e-06, "loss": 0.1368, "step": 2219 }, { "epoch": 1.3925043123725889, "grad_norm": 0.8673598766326904, "learning_rate": 8.918181083512041e-06, "loss": 0.1201, "step": 2220 }, { "epoch": 1.3931315665673514, "grad_norm": 0.7912781238555908, "learning_rate": 8.901270648080175e-06, "loss": 0.1543, "step": 2221 }, { "epoch": 1.3937588207621139, "grad_norm": 0.8338043093681335, "learning_rate": 8.884371670257345e-06, "loss": 0.143, "step": 2222 }, { "epoch": 1.3943860749568762, "grad_norm": 0.922680675983429, "learning_rate": 8.867484167488946e-06, "loss": 0.1425, "step": 2223 }, { "epoch": 1.3950133291516387, "grad_norm": 0.7326045632362366, "learning_rate": 8.850608157208547e-06, "loss": 0.1283, "step": 2224 }, { "epoch": 1.3956405833464012, "grad_norm": 0.8759211897850037, "learning_rate": 8.833743656837847e-06, "loss": 0.1519, "step": 2225 }, { "epoch": 1.3962678375411635, "grad_norm": 0.8462759852409363, "learning_rate": 8.816890683786674e-06, "loss": 0.1214, "step": 2226 }, { "epoch": 1.396895091735926, "grad_norm": 0.9513422846794128, "learning_rate": 8.800049255452929e-06, "loss": 0.1515, "step": 2227 }, { "epoch": 1.3975223459306885, "grad_norm": 0.7908011674880981, "learning_rate": 8.783219389222616e-06, "loss": 0.1326, "step": 2228 }, { "epoch": 1.3981496001254508, "grad_norm": 0.7702666521072388, "learning_rate": 8.766401102469803e-06, "loss": 0.1398, "step": 2229 }, { "epoch": 1.3987768543202133, "grad_norm": 0.9492427110671997, "learning_rate": 8.7495944125566e-06, "loss": 0.16, "step": 2230 }, { "epoch": 1.3994041085149758, "grad_norm": 0.8880034685134888, "learning_rate": 8.732799336833147e-06, "loss": 0.1613, "step": 2231 }, { "epoch": 1.400031362709738, "grad_norm": 0.7824158072471619, "learning_rate": 8.71601589263758e-06, "loss": 0.1329, "step": 2232 }, { "epoch": 1.4006586169045006, "grad_norm": 0.8795452117919922, "learning_rate": 8.699244097296044e-06, "loss": 0.1497, "step": 2233 }, { "epoch": 1.401285871099263, "grad_norm": 0.8127107620239258, "learning_rate": 8.682483968122659e-06, "loss": 0.1474, "step": 2234 }, { "epoch": 1.4019131252940253, "grad_norm": 0.9448059797286987, "learning_rate": 8.665735522419491e-06, "loss": 0.1628, "step": 2235 }, { "epoch": 1.4025403794887878, "grad_norm": 0.84309321641922, "learning_rate": 8.648998777476556e-06, "loss": 0.1658, "step": 2236 }, { "epoch": 1.4031676336835504, "grad_norm": 0.814670979976654, "learning_rate": 8.632273750571786e-06, "loss": 0.1416, "step": 2237 }, { "epoch": 1.4037948878783126, "grad_norm": 0.7217344641685486, "learning_rate": 8.615560458971004e-06, "loss": 0.1204, "step": 2238 }, { "epoch": 1.4044221420730751, "grad_norm": 0.8013978600502014, "learning_rate": 8.598858919927937e-06, "loss": 0.1431, "step": 2239 }, { "epoch": 1.4050493962678376, "grad_norm": 0.7642830014228821, "learning_rate": 8.58216915068417e-06, "loss": 0.1377, "step": 2240 }, { "epoch": 1.4056766504626, "grad_norm": 0.9084851741790771, "learning_rate": 8.56549116846914e-06, "loss": 0.1417, "step": 2241 }, { "epoch": 1.4063039046573624, "grad_norm": 0.7790862917900085, "learning_rate": 8.548824990500123e-06, "loss": 0.146, "step": 2242 }, { "epoch": 1.406931158852125, "grad_norm": 0.9662855267524719, "learning_rate": 8.532170633982185e-06, "loss": 0.1202, "step": 2243 }, { "epoch": 1.4075584130468872, "grad_norm": 0.8067671656608582, "learning_rate": 8.515528116108214e-06, "loss": 0.1252, "step": 2244 }, { "epoch": 1.4081856672416497, "grad_norm": 0.8642739653587341, "learning_rate": 8.498897454058863e-06, "loss": 0.1304, "step": 2245 }, { "epoch": 1.4088129214364122, "grad_norm": 0.8284696340560913, "learning_rate": 8.482278665002556e-06, "loss": 0.1579, "step": 2246 }, { "epoch": 1.4094401756311745, "grad_norm": 0.8237106204032898, "learning_rate": 8.46567176609545e-06, "loss": 0.1441, "step": 2247 }, { "epoch": 1.410067429825937, "grad_norm": 0.8551800847053528, "learning_rate": 8.449076774481439e-06, "loss": 0.1277, "step": 2248 }, { "epoch": 1.4106946840206993, "grad_norm": 0.8437981605529785, "learning_rate": 8.432493707292111e-06, "loss": 0.1382, "step": 2249 }, { "epoch": 1.4113219382154618, "grad_norm": 0.7792211771011353, "learning_rate": 8.415922581646743e-06, "loss": 0.1492, "step": 2250 }, { "epoch": 1.4119491924102243, "grad_norm": 0.8435136079788208, "learning_rate": 8.399363414652298e-06, "loss": 0.1437, "step": 2251 }, { "epoch": 1.4125764466049866, "grad_norm": 0.8436769843101501, "learning_rate": 8.382816223403386e-06, "loss": 0.1361, "step": 2252 }, { "epoch": 1.413203700799749, "grad_norm": 0.8829277753829956, "learning_rate": 8.366281024982252e-06, "loss": 0.1494, "step": 2253 }, { "epoch": 1.4138309549945114, "grad_norm": 0.9682825803756714, "learning_rate": 8.349757836458771e-06, "loss": 0.1632, "step": 2254 }, { "epoch": 1.414458209189274, "grad_norm": 1.1217553615570068, "learning_rate": 8.333246674890417e-06, "loss": 0.1383, "step": 2255 }, { "epoch": 1.4150854633840364, "grad_norm": 0.7459167838096619, "learning_rate": 8.316747557322224e-06, "loss": 0.1099, "step": 2256 }, { "epoch": 1.4157127175787987, "grad_norm": 0.880784809589386, "learning_rate": 8.300260500786825e-06, "loss": 0.14, "step": 2257 }, { "epoch": 1.4163399717735612, "grad_norm": 0.9267962574958801, "learning_rate": 8.283785522304386e-06, "loss": 0.1327, "step": 2258 }, { "epoch": 1.4169672259683237, "grad_norm": 1.0290577411651611, "learning_rate": 8.267322638882606e-06, "loss": 0.1669, "step": 2259 }, { "epoch": 1.417594480163086, "grad_norm": 0.8680477738380432, "learning_rate": 8.250871867516713e-06, "loss": 0.1392, "step": 2260 }, { "epoch": 1.4182217343578485, "grad_norm": 1.068716287612915, "learning_rate": 8.234433225189393e-06, "loss": 0.1546, "step": 2261 }, { "epoch": 1.418848988552611, "grad_norm": 1.1250039339065552, "learning_rate": 8.218006728870848e-06, "loss": 0.1675, "step": 2262 }, { "epoch": 1.4194762427473733, "grad_norm": 0.9158503413200378, "learning_rate": 8.201592395518732e-06, "loss": 0.1378, "step": 2263 }, { "epoch": 1.4201034969421358, "grad_norm": 0.7678124308586121, "learning_rate": 8.185190242078132e-06, "loss": 0.1241, "step": 2264 }, { "epoch": 1.4207307511368983, "grad_norm": 1.0234214067459106, "learning_rate": 8.168800285481571e-06, "loss": 0.1592, "step": 2265 }, { "epoch": 1.4213580053316606, "grad_norm": 0.8566369414329529, "learning_rate": 8.152422542648988e-06, "loss": 0.157, "step": 2266 }, { "epoch": 1.421985259526423, "grad_norm": 0.9309266805648804, "learning_rate": 8.136057030487684e-06, "loss": 0.1463, "step": 2267 }, { "epoch": 1.4226125137211856, "grad_norm": 0.9169596433639526, "learning_rate": 8.119703765892366e-06, "loss": 0.1627, "step": 2268 }, { "epoch": 1.4232397679159479, "grad_norm": 0.8501594066619873, "learning_rate": 8.103362765745081e-06, "loss": 0.1558, "step": 2269 }, { "epoch": 1.4238670221107104, "grad_norm": 0.902891993522644, "learning_rate": 8.08703404691522e-06, "loss": 0.1365, "step": 2270 }, { "epoch": 1.4244942763054729, "grad_norm": 0.9229341745376587, "learning_rate": 8.070717626259497e-06, "loss": 0.1449, "step": 2271 }, { "epoch": 1.4251215305002352, "grad_norm": 0.7798023819923401, "learning_rate": 8.054413520621929e-06, "loss": 0.1418, "step": 2272 }, { "epoch": 1.4257487846949977, "grad_norm": 0.9616074562072754, "learning_rate": 8.038121746833807e-06, "loss": 0.1979, "step": 2273 }, { "epoch": 1.4263760388897602, "grad_norm": 0.8707701563835144, "learning_rate": 8.021842321713718e-06, "loss": 0.1491, "step": 2274 }, { "epoch": 1.4270032930845225, "grad_norm": 0.9088772535324097, "learning_rate": 8.005575262067466e-06, "loss": 0.1491, "step": 2275 }, { "epoch": 1.427630547279285, "grad_norm": 0.891663134098053, "learning_rate": 7.989320584688125e-06, "loss": 0.1414, "step": 2276 }, { "epoch": 1.4282578014740475, "grad_norm": 0.9096094965934753, "learning_rate": 7.973078306355965e-06, "loss": 0.1336, "step": 2277 }, { "epoch": 1.4288850556688097, "grad_norm": 0.8948990702629089, "learning_rate": 7.956848443838469e-06, "loss": 0.1348, "step": 2278 }, { "epoch": 1.4295123098635723, "grad_norm": 1.0273152589797974, "learning_rate": 7.940631013890283e-06, "loss": 0.154, "step": 2279 }, { "epoch": 1.4301395640583348, "grad_norm": 0.768093466758728, "learning_rate": 7.924426033253239e-06, "loss": 0.13, "step": 2280 }, { "epoch": 1.430766818253097, "grad_norm": 0.8098708391189575, "learning_rate": 7.908233518656304e-06, "loss": 0.1386, "step": 2281 }, { "epoch": 1.4313940724478595, "grad_norm": 0.990982174873352, "learning_rate": 7.892053486815585e-06, "loss": 0.1493, "step": 2282 }, { "epoch": 1.432021326642622, "grad_norm": 0.8665327429771423, "learning_rate": 7.8758859544343e-06, "loss": 0.1387, "step": 2283 }, { "epoch": 1.4326485808373843, "grad_norm": 0.9492656588554382, "learning_rate": 7.859730938202763e-06, "loss": 0.1488, "step": 2284 }, { "epoch": 1.4332758350321468, "grad_norm": 0.8344292044639587, "learning_rate": 7.843588454798357e-06, "loss": 0.1373, "step": 2285 }, { "epoch": 1.4339030892269091, "grad_norm": 0.7794461250305176, "learning_rate": 7.827458520885541e-06, "loss": 0.1182, "step": 2286 }, { "epoch": 1.4345303434216716, "grad_norm": 0.8400171995162964, "learning_rate": 7.811341153115813e-06, "loss": 0.121, "step": 2287 }, { "epoch": 1.4351575976164341, "grad_norm": 0.9185622930526733, "learning_rate": 7.795236368127698e-06, "loss": 0.1426, "step": 2288 }, { "epoch": 1.4357848518111964, "grad_norm": 0.8727949261665344, "learning_rate": 7.779144182546746e-06, "loss": 0.1224, "step": 2289 }, { "epoch": 1.436412106005959, "grad_norm": 0.7786707878112793, "learning_rate": 7.763064612985465e-06, "loss": 0.1445, "step": 2290 }, { "epoch": 1.4370393602007212, "grad_norm": 0.8271812200546265, "learning_rate": 7.746997676043372e-06, "loss": 0.1361, "step": 2291 }, { "epoch": 1.4376666143954837, "grad_norm": 0.7741566300392151, "learning_rate": 7.73094338830693e-06, "loss": 0.1596, "step": 2292 }, { "epoch": 1.4382938685902462, "grad_norm": 0.8974960446357727, "learning_rate": 7.714901766349548e-06, "loss": 0.1529, "step": 2293 }, { "epoch": 1.4389211227850085, "grad_norm": 0.9158388376235962, "learning_rate": 7.698872826731551e-06, "loss": 0.1371, "step": 2294 }, { "epoch": 1.439548376979771, "grad_norm": 0.9374398589134216, "learning_rate": 7.682856586000192e-06, "loss": 0.1659, "step": 2295 }, { "epoch": 1.4401756311745335, "grad_norm": 0.8173784613609314, "learning_rate": 7.666853060689581e-06, "loss": 0.1652, "step": 2296 }, { "epoch": 1.4408028853692958, "grad_norm": 0.9208851456642151, "learning_rate": 7.650862267320731e-06, "loss": 0.1512, "step": 2297 }, { "epoch": 1.4414301395640583, "grad_norm": 0.8968892097473145, "learning_rate": 7.634884222401497e-06, "loss": 0.1545, "step": 2298 }, { "epoch": 1.4420573937588208, "grad_norm": 0.8849417567253113, "learning_rate": 7.6189189424265805e-06, "loss": 0.1416, "step": 2299 }, { "epoch": 1.442684647953583, "grad_norm": 0.7522055506706238, "learning_rate": 7.602966443877509e-06, "loss": 0.1145, "step": 2300 }, { "epoch": 1.4433119021483456, "grad_norm": 0.8512992858886719, "learning_rate": 7.587026743222592e-06, "loss": 0.1479, "step": 2301 }, { "epoch": 1.443939156343108, "grad_norm": 1.0570125579833984, "learning_rate": 7.5710998569169595e-06, "loss": 0.1714, "step": 2302 }, { "epoch": 1.4445664105378704, "grad_norm": 0.8478622436523438, "learning_rate": 7.555185801402483e-06, "loss": 0.138, "step": 2303 }, { "epoch": 1.4451936647326329, "grad_norm": 0.7958086133003235, "learning_rate": 7.539284593107807e-06, "loss": 0.1362, "step": 2304 }, { "epoch": 1.4458209189273954, "grad_norm": 1.0083904266357422, "learning_rate": 7.523396248448314e-06, "loss": 0.1692, "step": 2305 }, { "epoch": 1.4464481731221577, "grad_norm": 0.5430143475532532, "learning_rate": 7.5075207838260945e-06, "loss": 0.1385, "step": 2306 }, { "epoch": 1.4470754273169202, "grad_norm": 0.7983195781707764, "learning_rate": 7.49165821562996e-06, "loss": 0.1292, "step": 2307 }, { "epoch": 1.4477026815116827, "grad_norm": 0.9012365341186523, "learning_rate": 7.475808560235385e-06, "loss": 0.1676, "step": 2308 }, { "epoch": 1.448329935706445, "grad_norm": 0.8816755414009094, "learning_rate": 7.4599718340045315e-06, "loss": 0.1437, "step": 2309 }, { "epoch": 1.4489571899012075, "grad_norm": 0.8799365758895874, "learning_rate": 7.444148053286211e-06, "loss": 0.1346, "step": 2310 }, { "epoch": 1.44958444409597, "grad_norm": 0.8972303867340088, "learning_rate": 7.42833723441587e-06, "loss": 0.153, "step": 2311 }, { "epoch": 1.4502116982907323, "grad_norm": 0.9287340044975281, "learning_rate": 7.41253939371557e-06, "loss": 0.1559, "step": 2312 }, { "epoch": 1.4508389524854948, "grad_norm": 0.7804572582244873, "learning_rate": 7.396754547493989e-06, "loss": 0.1198, "step": 2313 }, { "epoch": 1.4514662066802573, "grad_norm": 0.8070683479309082, "learning_rate": 7.380982712046365e-06, "loss": 0.1202, "step": 2314 }, { "epoch": 1.4520934608750196, "grad_norm": 0.8352468013763428, "learning_rate": 7.365223903654524e-06, "loss": 0.1347, "step": 2315 }, { "epoch": 1.452720715069782, "grad_norm": 0.7718518376350403, "learning_rate": 7.349478138586841e-06, "loss": 0.1399, "step": 2316 }, { "epoch": 1.4533479692645446, "grad_norm": 0.8753253221511841, "learning_rate": 7.333745433098221e-06, "loss": 0.1415, "step": 2317 }, { "epoch": 1.4539752234593069, "grad_norm": 0.805276095867157, "learning_rate": 7.318025803430089e-06, "loss": 0.131, "step": 2318 }, { "epoch": 1.4546024776540694, "grad_norm": 0.9128892421722412, "learning_rate": 7.302319265810383e-06, "loss": 0.1329, "step": 2319 }, { "epoch": 1.4552297318488319, "grad_norm": 0.8427088856697083, "learning_rate": 7.286625836453498e-06, "loss": 0.1357, "step": 2320 }, { "epoch": 1.4558569860435941, "grad_norm": 0.8445613384246826, "learning_rate": 7.270945531560323e-06, "loss": 0.1459, "step": 2321 }, { "epoch": 1.4564842402383567, "grad_norm": 0.8637903928756714, "learning_rate": 7.255278367318186e-06, "loss": 0.1341, "step": 2322 }, { "epoch": 1.4571114944331192, "grad_norm": 0.8728572726249695, "learning_rate": 7.239624359900852e-06, "loss": 0.1483, "step": 2323 }, { "epoch": 1.4577387486278814, "grad_norm": 1.0005639791488647, "learning_rate": 7.223983525468514e-06, "loss": 0.1511, "step": 2324 }, { "epoch": 1.458366002822644, "grad_norm": 0.823235034942627, "learning_rate": 7.208355880167746e-06, "loss": 0.1274, "step": 2325 }, { "epoch": 1.4589932570174062, "grad_norm": 0.9102848172187805, "learning_rate": 7.192741440131512e-06, "loss": 0.1474, "step": 2326 }, { "epoch": 1.4596205112121687, "grad_norm": 0.808292031288147, "learning_rate": 7.177140221479153e-06, "loss": 0.1299, "step": 2327 }, { "epoch": 1.4602477654069312, "grad_norm": 0.9165015816688538, "learning_rate": 7.161552240316356e-06, "loss": 0.1385, "step": 2328 }, { "epoch": 1.4608750196016935, "grad_norm": 0.8659415245056152, "learning_rate": 7.1459775127351446e-06, "loss": 0.1426, "step": 2329 }, { "epoch": 1.461502273796456, "grad_norm": 0.816756010055542, "learning_rate": 7.130416054813858e-06, "loss": 0.1402, "step": 2330 }, { "epoch": 1.4621295279912183, "grad_norm": 0.9069549441337585, "learning_rate": 7.114867882617143e-06, "loss": 0.1424, "step": 2331 }, { "epoch": 1.4627567821859808, "grad_norm": 0.9110038876533508, "learning_rate": 7.099333012195913e-06, "loss": 0.1834, "step": 2332 }, { "epoch": 1.4633840363807433, "grad_norm": 0.7811709046363831, "learning_rate": 7.083811459587364e-06, "loss": 0.1143, "step": 2333 }, { "epoch": 1.4640112905755056, "grad_norm": 1.0482608079910278, "learning_rate": 7.068303240814947e-06, "loss": 0.1392, "step": 2334 }, { "epoch": 1.4646385447702681, "grad_norm": 0.7268643379211426, "learning_rate": 7.05280837188834e-06, "loss": 0.124, "step": 2335 }, { "epoch": 1.4652657989650306, "grad_norm": 1.0208662748336792, "learning_rate": 7.03732686880344e-06, "loss": 0.1467, "step": 2336 }, { "epoch": 1.465893053159793, "grad_norm": 0.8626693487167358, "learning_rate": 7.021858747542356e-06, "loss": 0.1407, "step": 2337 }, { "epoch": 1.4665203073545554, "grad_norm": 0.7776216268539429, "learning_rate": 7.006404024073359e-06, "loss": 0.1161, "step": 2338 }, { "epoch": 1.467147561549318, "grad_norm": 0.8511480689048767, "learning_rate": 6.990962714350913e-06, "loss": 0.1597, "step": 2339 }, { "epoch": 1.4677748157440802, "grad_norm": 0.9013397693634033, "learning_rate": 6.975534834315625e-06, "loss": 0.1465, "step": 2340 }, { "epoch": 1.4684020699388427, "grad_norm": 1.035351037979126, "learning_rate": 6.9601203998942394e-06, "loss": 0.1426, "step": 2341 }, { "epoch": 1.4690293241336052, "grad_norm": 1.0567203760147095, "learning_rate": 6.944719426999624e-06, "loss": 0.1542, "step": 2342 }, { "epoch": 1.4696565783283675, "grad_norm": 0.9788340926170349, "learning_rate": 6.929331931530734e-06, "loss": 0.1605, "step": 2343 }, { "epoch": 1.47028383252313, "grad_norm": 0.8828837275505066, "learning_rate": 6.913957929372628e-06, "loss": 0.13, "step": 2344 }, { "epoch": 1.4709110867178925, "grad_norm": 0.8553276062011719, "learning_rate": 6.898597436396428e-06, "loss": 0.1285, "step": 2345 }, { "epoch": 1.4715383409126548, "grad_norm": 0.8594926595687866, "learning_rate": 6.883250468459317e-06, "loss": 0.1283, "step": 2346 }, { "epoch": 1.4721655951074173, "grad_norm": 0.8016849160194397, "learning_rate": 6.867917041404506e-06, "loss": 0.1166, "step": 2347 }, { "epoch": 1.4727928493021798, "grad_norm": 0.8469088077545166, "learning_rate": 6.852597171061239e-06, "loss": 0.1329, "step": 2348 }, { "epoch": 1.473420103496942, "grad_norm": 0.7968443632125854, "learning_rate": 6.837290873244744e-06, "loss": 0.1445, "step": 2349 }, { "epoch": 1.4740473576917046, "grad_norm": 0.8820021152496338, "learning_rate": 6.821998163756258e-06, "loss": 0.1442, "step": 2350 }, { "epoch": 1.474674611886467, "grad_norm": 0.8443328738212585, "learning_rate": 6.806719058382991e-06, "loss": 0.1397, "step": 2351 }, { "epoch": 1.4753018660812294, "grad_norm": 0.8024336695671082, "learning_rate": 6.791453572898088e-06, "loss": 0.1443, "step": 2352 }, { "epoch": 1.4759291202759919, "grad_norm": 0.7728897333145142, "learning_rate": 6.776201723060651e-06, "loss": 0.1235, "step": 2353 }, { "epoch": 1.4765563744707544, "grad_norm": 0.8861445188522339, "learning_rate": 6.760963524615711e-06, "loss": 0.1513, "step": 2354 }, { "epoch": 1.4771836286655167, "grad_norm": 0.9280038475990295, "learning_rate": 6.745738993294184e-06, "loss": 0.1581, "step": 2355 }, { "epoch": 1.4778108828602792, "grad_norm": 0.9169256687164307, "learning_rate": 6.730528144812893e-06, "loss": 0.1441, "step": 2356 }, { "epoch": 1.4784381370550417, "grad_norm": 0.9574286937713623, "learning_rate": 6.715330994874534e-06, "loss": 0.1394, "step": 2357 }, { "epoch": 1.479065391249804, "grad_norm": 0.9301524758338928, "learning_rate": 6.700147559167662e-06, "loss": 0.149, "step": 2358 }, { "epoch": 1.4796926454445665, "grad_norm": 0.8063904643058777, "learning_rate": 6.684977853366667e-06, "loss": 0.1208, "step": 2359 }, { "epoch": 1.480319899639329, "grad_norm": 0.927034854888916, "learning_rate": 6.669821893131783e-06, "loss": 0.1394, "step": 2360 }, { "epoch": 1.4809471538340913, "grad_norm": 0.7899417877197266, "learning_rate": 6.654679694109025e-06, "loss": 0.1121, "step": 2361 }, { "epoch": 1.4815744080288538, "grad_norm": 0.8486856818199158, "learning_rate": 6.639551271930225e-06, "loss": 0.1416, "step": 2362 }, { "epoch": 1.482201662223616, "grad_norm": 0.7845723032951355, "learning_rate": 6.624436642212983e-06, "loss": 0.1247, "step": 2363 }, { "epoch": 1.4828289164183786, "grad_norm": 0.8707875609397888, "learning_rate": 6.609335820560669e-06, "loss": 0.1515, "step": 2364 }, { "epoch": 1.483456170613141, "grad_norm": 0.8075608611106873, "learning_rate": 6.59424882256239e-06, "loss": 0.1463, "step": 2365 }, { "epoch": 1.4840834248079033, "grad_norm": 0.9238713979721069, "learning_rate": 6.579175663792991e-06, "loss": 0.1459, "step": 2366 }, { "epoch": 1.4847106790026658, "grad_norm": 0.832962155342102, "learning_rate": 6.564116359813011e-06, "loss": 0.1452, "step": 2367 }, { "epoch": 1.4853379331974281, "grad_norm": 0.9457533955574036, "learning_rate": 6.549070926168708e-06, "loss": 0.1586, "step": 2368 }, { "epoch": 1.4859651873921906, "grad_norm": 0.744782567024231, "learning_rate": 6.534039378392012e-06, "loss": 0.1159, "step": 2369 }, { "epoch": 1.4865924415869531, "grad_norm": 0.8590942621231079, "learning_rate": 6.519021732000519e-06, "loss": 0.1231, "step": 2370 }, { "epoch": 1.4872196957817154, "grad_norm": 0.8148005604743958, "learning_rate": 6.504018002497481e-06, "loss": 0.1512, "step": 2371 }, { "epoch": 1.487846949976478, "grad_norm": 0.8527929782867432, "learning_rate": 6.489028205371762e-06, "loss": 0.1511, "step": 2372 }, { "epoch": 1.4884742041712404, "grad_norm": 0.9304593205451965, "learning_rate": 6.4740523560978644e-06, "loss": 0.1425, "step": 2373 }, { "epoch": 1.4891014583660027, "grad_norm": 0.9466644525527954, "learning_rate": 6.4590904701358845e-06, "loss": 0.1528, "step": 2374 }, { "epoch": 1.4897287125607652, "grad_norm": 0.772213339805603, "learning_rate": 6.444142562931503e-06, "loss": 0.1157, "step": 2375 }, { "epoch": 1.4903559667555277, "grad_norm": 0.8148625493049622, "learning_rate": 6.429208649915974e-06, "loss": 0.1354, "step": 2376 }, { "epoch": 1.49098322095029, "grad_norm": 0.8912999629974365, "learning_rate": 6.414288746506106e-06, "loss": 0.1302, "step": 2377 }, { "epoch": 1.4916104751450525, "grad_norm": 0.9323480129241943, "learning_rate": 6.399382868104231e-06, "loss": 0.1335, "step": 2378 }, { "epoch": 1.492237729339815, "grad_norm": 0.8342500329017639, "learning_rate": 6.3844910300982095e-06, "loss": 0.1314, "step": 2379 }, { "epoch": 1.4928649835345773, "grad_norm": 0.8314619064331055, "learning_rate": 6.369613247861417e-06, "loss": 0.1294, "step": 2380 }, { "epoch": 1.4934922377293398, "grad_norm": 0.8739069700241089, "learning_rate": 6.354749536752709e-06, "loss": 0.1407, "step": 2381 }, { "epoch": 1.4941194919241023, "grad_norm": 0.9383721351623535, "learning_rate": 6.3398999121164185e-06, "loss": 0.1653, "step": 2382 }, { "epoch": 1.4947467461188646, "grad_norm": 1.0760291814804077, "learning_rate": 6.325064389282338e-06, "loss": 0.159, "step": 2383 }, { "epoch": 1.495374000313627, "grad_norm": 0.8461733460426331, "learning_rate": 6.310242983565704e-06, "loss": 0.1215, "step": 2384 }, { "epoch": 1.4960012545083896, "grad_norm": 0.7973076701164246, "learning_rate": 6.2954357102671636e-06, "loss": 0.1161, "step": 2385 }, { "epoch": 1.496628508703152, "grad_norm": 0.8130058646202087, "learning_rate": 6.2806425846727914e-06, "loss": 0.1276, "step": 2386 }, { "epoch": 1.4972557628979144, "grad_norm": 0.7450674176216125, "learning_rate": 6.265863622054053e-06, "loss": 0.1151, "step": 2387 }, { "epoch": 1.497883017092677, "grad_norm": 0.9922374486923218, "learning_rate": 6.251098837667792e-06, "loss": 0.1458, "step": 2388 }, { "epoch": 1.4985102712874392, "grad_norm": 0.8384523391723633, "learning_rate": 6.236348246756223e-06, "loss": 0.1408, "step": 2389 }, { "epoch": 1.4991375254822017, "grad_norm": 0.9825587868690491, "learning_rate": 6.221611864546884e-06, "loss": 0.1508, "step": 2390 }, { "epoch": 1.4997647796769642, "grad_norm": 0.6755114793777466, "learning_rate": 6.206889706252668e-06, "loss": 0.1055, "step": 2391 }, { "epoch": 1.5003920338717265, "grad_norm": 0.788532018661499, "learning_rate": 6.192181787071781e-06, "loss": 0.1165, "step": 2392 }, { "epoch": 1.501019288066489, "grad_norm": 0.9015203714370728, "learning_rate": 6.17748812218772e-06, "loss": 0.1237, "step": 2393 }, { "epoch": 1.5016465422612515, "grad_norm": 0.8451139330863953, "learning_rate": 6.162808726769276e-06, "loss": 0.1367, "step": 2394 }, { "epoch": 1.5022737964560138, "grad_norm": 0.7379468679428101, "learning_rate": 6.148143615970514e-06, "loss": 0.1031, "step": 2395 }, { "epoch": 1.5029010506507763, "grad_norm": 0.932864248752594, "learning_rate": 6.133492804930728e-06, "loss": 0.1188, "step": 2396 }, { "epoch": 1.5035283048455388, "grad_norm": 0.9268395304679871, "learning_rate": 6.118856308774472e-06, "loss": 0.136, "step": 2397 }, { "epoch": 1.504155559040301, "grad_norm": 0.7807483077049255, "learning_rate": 6.10423414261152e-06, "loss": 0.1183, "step": 2398 }, { "epoch": 1.5047828132350634, "grad_norm": 0.9302142858505249, "learning_rate": 6.0896263215368454e-06, "loss": 0.1417, "step": 2399 }, { "epoch": 1.505410067429826, "grad_norm": 0.8320773839950562, "learning_rate": 6.075032860630625e-06, "loss": 0.1358, "step": 2400 }, { "epoch": 1.5060373216245884, "grad_norm": 0.7353841066360474, "learning_rate": 6.060453774958186e-06, "loss": 0.1291, "step": 2401 }, { "epoch": 1.5066645758193506, "grad_norm": 0.7645012736320496, "learning_rate": 6.045889079570042e-06, "loss": 0.1086, "step": 2402 }, { "epoch": 1.5072918300141134, "grad_norm": 0.9877032041549683, "learning_rate": 6.0313387895018435e-06, "loss": 0.1613, "step": 2403 }, { "epoch": 1.5079190842088757, "grad_norm": 0.9352614283561707, "learning_rate": 6.016802919774358e-06, "loss": 0.1537, "step": 2404 }, { "epoch": 1.508546338403638, "grad_norm": 0.8109005093574524, "learning_rate": 6.002281485393482e-06, "loss": 0.1295, "step": 2405 }, { "epoch": 1.5091735925984004, "grad_norm": 0.8273382782936096, "learning_rate": 5.987774501350198e-06, "loss": 0.1185, "step": 2406 }, { "epoch": 1.509800846793163, "grad_norm": 0.8394461274147034, "learning_rate": 5.973281982620589e-06, "loss": 0.1049, "step": 2407 }, { "epoch": 1.5104281009879252, "grad_norm": 0.537996768951416, "learning_rate": 5.9588039441657766e-06, "loss": 0.1327, "step": 2408 }, { "epoch": 1.5110553551826877, "grad_norm": 0.9410959482192993, "learning_rate": 5.9443404009319565e-06, "loss": 0.1489, "step": 2409 }, { "epoch": 1.5116826093774502, "grad_norm": 0.9202421307563782, "learning_rate": 5.929891367850354e-06, "loss": 0.1627, "step": 2410 }, { "epoch": 1.5123098635722125, "grad_norm": 0.8550167083740234, "learning_rate": 5.915456859837212e-06, "loss": 0.1599, "step": 2411 }, { "epoch": 1.512937117766975, "grad_norm": 0.9108312726020813, "learning_rate": 5.901036891793785e-06, "loss": 0.1359, "step": 2412 }, { "epoch": 1.5135643719617375, "grad_norm": 0.8570768237113953, "learning_rate": 5.886631478606317e-06, "loss": 0.1366, "step": 2413 }, { "epoch": 1.5141916261564998, "grad_norm": 1.050724983215332, "learning_rate": 5.872240635146011e-06, "loss": 0.1494, "step": 2414 }, { "epoch": 1.5148188803512623, "grad_norm": 0.91434645652771, "learning_rate": 5.857864376269051e-06, "loss": 0.1391, "step": 2415 }, { "epoch": 1.5154461345460248, "grad_norm": 0.7957209348678589, "learning_rate": 5.843502716816552e-06, "loss": 0.1419, "step": 2416 }, { "epoch": 1.5160733887407871, "grad_norm": 0.881434977054596, "learning_rate": 5.8291556716145595e-06, "loss": 0.1381, "step": 2417 }, { "epoch": 1.5167006429355496, "grad_norm": 0.8157554268836975, "learning_rate": 5.814823255474043e-06, "loss": 0.1319, "step": 2418 }, { "epoch": 1.5173278971303121, "grad_norm": 0.9174538254737854, "learning_rate": 5.800505483190846e-06, "loss": 0.1293, "step": 2419 }, { "epoch": 1.5179551513250744, "grad_norm": 0.9651191830635071, "learning_rate": 5.786202369545712e-06, "loss": 0.1572, "step": 2420 }, { "epoch": 1.518582405519837, "grad_norm": 0.8735745549201965, "learning_rate": 5.771913929304256e-06, "loss": 0.121, "step": 2421 }, { "epoch": 1.5192096597145994, "grad_norm": 0.889881432056427, "learning_rate": 5.757640177216932e-06, "loss": 0.137, "step": 2422 }, { "epoch": 1.5198369139093617, "grad_norm": 0.784126341342926, "learning_rate": 5.743381128019036e-06, "loss": 0.137, "step": 2423 }, { "epoch": 1.5204641681041242, "grad_norm": 0.9778151512145996, "learning_rate": 5.7291367964306985e-06, "loss": 0.1468, "step": 2424 }, { "epoch": 1.5210914222988867, "grad_norm": 0.8209559321403503, "learning_rate": 5.71490719715683e-06, "loss": 0.1224, "step": 2425 }, { "epoch": 1.521718676493649, "grad_norm": 0.9283830523490906, "learning_rate": 5.700692344887149e-06, "loss": 0.1296, "step": 2426 }, { "epoch": 1.5223459306884115, "grad_norm": 0.8546496033668518, "learning_rate": 5.6864922542961496e-06, "loss": 0.122, "step": 2427 }, { "epoch": 1.522973184883174, "grad_norm": 0.856823742389679, "learning_rate": 5.672306940043093e-06, "loss": 0.1161, "step": 2428 }, { "epoch": 1.5236004390779363, "grad_norm": 0.8136732578277588, "learning_rate": 5.658136416771966e-06, "loss": 0.1287, "step": 2429 }, { "epoch": 1.5242276932726988, "grad_norm": 0.9049329161643982, "learning_rate": 5.643980699111507e-06, "loss": 0.1346, "step": 2430 }, { "epoch": 1.5248549474674613, "grad_norm": 0.8317587971687317, "learning_rate": 5.629839801675161e-06, "loss": 0.1389, "step": 2431 }, { "epoch": 1.5254822016622236, "grad_norm": 0.8538785576820374, "learning_rate": 5.615713739061068e-06, "loss": 0.1425, "step": 2432 }, { "epoch": 1.526109455856986, "grad_norm": 1.051171064376831, "learning_rate": 5.601602525852063e-06, "loss": 0.147, "step": 2433 }, { "epoch": 1.5267367100517486, "grad_norm": 0.8037875294685364, "learning_rate": 5.587506176615651e-06, "loss": 0.1166, "step": 2434 }, { "epoch": 1.5273639642465109, "grad_norm": 0.7876270413398743, "learning_rate": 5.573424705903987e-06, "loss": 0.1363, "step": 2435 }, { "epoch": 1.5279912184412732, "grad_norm": 0.8768879771232605, "learning_rate": 5.5593581282538755e-06, "loss": 0.1276, "step": 2436 }, { "epoch": 1.528618472636036, "grad_norm": 0.8771581649780273, "learning_rate": 5.545306458186728e-06, "loss": 0.1408, "step": 2437 }, { "epoch": 1.5292457268307982, "grad_norm": 0.8572736382484436, "learning_rate": 5.531269710208582e-06, "loss": 0.1241, "step": 2438 }, { "epoch": 1.5298729810255605, "grad_norm": 0.8630357980728149, "learning_rate": 5.517247898810072e-06, "loss": 0.1242, "step": 2439 }, { "epoch": 1.5305002352203232, "grad_norm": 0.9446988105773926, "learning_rate": 5.503241038466403e-06, "loss": 0.1338, "step": 2440 }, { "epoch": 1.5311274894150855, "grad_norm": 0.9041091799736023, "learning_rate": 5.48924914363735e-06, "loss": 0.1302, "step": 2441 }, { "epoch": 1.5317547436098478, "grad_norm": 0.9294798970222473, "learning_rate": 5.475272228767248e-06, "loss": 0.1445, "step": 2442 }, { "epoch": 1.5323819978046103, "grad_norm": 0.8078564405441284, "learning_rate": 5.461310308284941e-06, "loss": 0.1208, "step": 2443 }, { "epoch": 1.5330092519993728, "grad_norm": 0.8502971529960632, "learning_rate": 5.447363396603822e-06, "loss": 0.1247, "step": 2444 }, { "epoch": 1.533636506194135, "grad_norm": 0.8516446948051453, "learning_rate": 5.433431508121778e-06, "loss": 0.1428, "step": 2445 }, { "epoch": 1.5342637603888976, "grad_norm": 0.9445379376411438, "learning_rate": 5.419514657221185e-06, "loss": 0.1303, "step": 2446 }, { "epoch": 1.53489101458366, "grad_norm": 1.029537558555603, "learning_rate": 5.405612858268907e-06, "loss": 0.1331, "step": 2447 }, { "epoch": 1.5355182687784223, "grad_norm": 0.8077148795127869, "learning_rate": 5.391726125616248e-06, "loss": 0.1246, "step": 2448 }, { "epoch": 1.5361455229731849, "grad_norm": 0.8353478908538818, "learning_rate": 5.377854473598976e-06, "loss": 0.1435, "step": 2449 }, { "epoch": 1.5367727771679474, "grad_norm": 0.8239552974700928, "learning_rate": 5.363997916537287e-06, "loss": 0.1339, "step": 2450 }, { "epoch": 1.5374000313627096, "grad_norm": 0.7541061639785767, "learning_rate": 5.350156468735792e-06, "loss": 0.1248, "step": 2451 }, { "epoch": 1.5380272855574721, "grad_norm": 0.7956366539001465, "learning_rate": 5.3363301444835035e-06, "loss": 0.1181, "step": 2452 }, { "epoch": 1.5386545397522347, "grad_norm": 0.9982238411903381, "learning_rate": 5.3225189580538304e-06, "loss": 0.1423, "step": 2453 }, { "epoch": 1.539281793946997, "grad_norm": 0.9753595590591431, "learning_rate": 5.308722923704542e-06, "loss": 0.1282, "step": 2454 }, { "epoch": 1.5399090481417594, "grad_norm": 0.9468235969543457, "learning_rate": 5.294942055677759e-06, "loss": 0.1324, "step": 2455 }, { "epoch": 1.540536302336522, "grad_norm": 1.0198054313659668, "learning_rate": 5.281176368199967e-06, "loss": 0.1355, "step": 2456 }, { "epoch": 1.5411635565312842, "grad_norm": 1.0024781227111816, "learning_rate": 5.267425875481971e-06, "loss": 0.1334, "step": 2457 }, { "epoch": 1.5417908107260467, "grad_norm": 0.9851219654083252, "learning_rate": 5.253690591718885e-06, "loss": 0.1347, "step": 2458 }, { "epoch": 1.5424180649208092, "grad_norm": 0.789305567741394, "learning_rate": 5.239970531090128e-06, "loss": 0.1562, "step": 2459 }, { "epoch": 1.5430453191155715, "grad_norm": 0.7955427169799805, "learning_rate": 5.226265707759408e-06, "loss": 0.1251, "step": 2460 }, { "epoch": 1.543672573310334, "grad_norm": 0.8244566917419434, "learning_rate": 5.212576135874683e-06, "loss": 0.1199, "step": 2461 }, { "epoch": 1.5442998275050965, "grad_norm": 0.8037431240081787, "learning_rate": 5.1989018295681855e-06, "loss": 0.1174, "step": 2462 }, { "epoch": 1.5449270816998588, "grad_norm": 0.8412739038467407, "learning_rate": 5.185242802956387e-06, "loss": 0.1297, "step": 2463 }, { "epoch": 1.5455543358946213, "grad_norm": 0.9513403177261353, "learning_rate": 5.171599070139976e-06, "loss": 0.1319, "step": 2464 }, { "epoch": 1.5461815900893838, "grad_norm": 0.806678056716919, "learning_rate": 5.1579706452038695e-06, "loss": 0.1091, "step": 2465 }, { "epoch": 1.5468088442841461, "grad_norm": 0.8771329522132874, "learning_rate": 5.144357542217155e-06, "loss": 0.1339, "step": 2466 }, { "epoch": 1.5474360984789086, "grad_norm": 0.8276905417442322, "learning_rate": 5.130759775233125e-06, "loss": 0.1428, "step": 2467 }, { "epoch": 1.5480633526736711, "grad_norm": 0.8733172416687012, "learning_rate": 5.117177358289234e-06, "loss": 0.1434, "step": 2468 }, { "epoch": 1.5486906068684334, "grad_norm": 0.8888440728187561, "learning_rate": 5.1036103054070876e-06, "loss": 0.1376, "step": 2469 }, { "epoch": 1.549317861063196, "grad_norm": 0.810010552406311, "learning_rate": 5.090058630592434e-06, "loss": 0.1232, "step": 2470 }, { "epoch": 1.5499451152579584, "grad_norm": 0.8017184734344482, "learning_rate": 5.076522347835147e-06, "loss": 0.1197, "step": 2471 }, { "epoch": 1.5505723694527207, "grad_norm": 0.8911049365997314, "learning_rate": 5.0630014711091965e-06, "loss": 0.14, "step": 2472 }, { "epoch": 1.551199623647483, "grad_norm": 0.7320021390914917, "learning_rate": 5.04949601437267e-06, "loss": 0.1095, "step": 2473 }, { "epoch": 1.5518268778422457, "grad_norm": 0.9221493601799011, "learning_rate": 5.03600599156772e-06, "loss": 0.1368, "step": 2474 }, { "epoch": 1.552454132037008, "grad_norm": 0.8870469927787781, "learning_rate": 5.022531416620577e-06, "loss": 0.1362, "step": 2475 }, { "epoch": 1.5530813862317703, "grad_norm": 0.9370519518852234, "learning_rate": 5.009072303441516e-06, "loss": 0.1277, "step": 2476 }, { "epoch": 1.553708640426533, "grad_norm": 0.9179979562759399, "learning_rate": 4.9956286659248585e-06, "loss": 0.1386, "step": 2477 }, { "epoch": 1.5543358946212953, "grad_norm": 0.7572410106658936, "learning_rate": 4.9822005179489365e-06, "loss": 0.1081, "step": 2478 }, { "epoch": 1.5549631488160576, "grad_norm": 0.9240134954452515, "learning_rate": 4.968787873376102e-06, "loss": 0.1454, "step": 2479 }, { "epoch": 1.5555904030108203, "grad_norm": 0.7326767444610596, "learning_rate": 4.9553907460527084e-06, "loss": 0.1135, "step": 2480 }, { "epoch": 1.5562176572055826, "grad_norm": 0.825032114982605, "learning_rate": 4.942009149809068e-06, "loss": 0.1298, "step": 2481 }, { "epoch": 1.5568449114003449, "grad_norm": 0.9178546667098999, "learning_rate": 4.928643098459485e-06, "loss": 0.1362, "step": 2482 }, { "epoch": 1.5574721655951074, "grad_norm": 0.7995597720146179, "learning_rate": 4.915292605802202e-06, "loss": 0.1216, "step": 2483 }, { "epoch": 1.5580994197898699, "grad_norm": 0.8662647008895874, "learning_rate": 4.901957685619398e-06, "loss": 0.1247, "step": 2484 }, { "epoch": 1.5587266739846322, "grad_norm": 0.8464028835296631, "learning_rate": 4.888638351677184e-06, "loss": 0.15, "step": 2485 }, { "epoch": 1.5593539281793947, "grad_norm": 0.8262073993682861, "learning_rate": 4.8753346177255755e-06, "loss": 0.1082, "step": 2486 }, { "epoch": 1.5599811823741572, "grad_norm": 0.9026674032211304, "learning_rate": 4.86204649749849e-06, "loss": 0.1163, "step": 2487 }, { "epoch": 1.5606084365689195, "grad_norm": 0.8266077637672424, "learning_rate": 4.848774004713714e-06, "loss": 0.122, "step": 2488 }, { "epoch": 1.561235690763682, "grad_norm": 0.955435574054718, "learning_rate": 4.835517153072924e-06, "loss": 0.1325, "step": 2489 }, { "epoch": 1.5618629449584445, "grad_norm": 0.8240264654159546, "learning_rate": 4.822275956261617e-06, "loss": 0.147, "step": 2490 }, { "epoch": 1.5624901991532067, "grad_norm": 0.9496490359306335, "learning_rate": 4.809050427949156e-06, "loss": 0.1397, "step": 2491 }, { "epoch": 1.5631174533479693, "grad_norm": 0.9616060256958008, "learning_rate": 4.795840581788715e-06, "loss": 0.1487, "step": 2492 }, { "epoch": 1.5637447075427318, "grad_norm": 0.736541211605072, "learning_rate": 4.782646431417288e-06, "loss": 0.1207, "step": 2493 }, { "epoch": 1.564371961737494, "grad_norm": 0.845628023147583, "learning_rate": 4.769467990455663e-06, "loss": 0.1271, "step": 2494 }, { "epoch": 1.5649992159322565, "grad_norm": 0.9478265047073364, "learning_rate": 4.7563052725083995e-06, "loss": 0.1478, "step": 2495 }, { "epoch": 1.565626470127019, "grad_norm": 0.8801705837249756, "learning_rate": 4.743158291163838e-06, "loss": 0.1515, "step": 2496 }, { "epoch": 1.5662537243217813, "grad_norm": 0.9115733504295349, "learning_rate": 4.730027059994073e-06, "loss": 0.1133, "step": 2497 }, { "epoch": 1.5668809785165438, "grad_norm": 0.8305857181549072, "learning_rate": 4.7169115925549356e-06, "loss": 0.1234, "step": 2498 }, { "epoch": 1.5675082327113063, "grad_norm": 0.8001347184181213, "learning_rate": 4.703811902385984e-06, "loss": 0.1146, "step": 2499 }, { "epoch": 1.5681354869060686, "grad_norm": 0.9758232235908508, "learning_rate": 4.690728003010496e-06, "loss": 0.1534, "step": 2500 }, { "epoch": 1.5687627411008311, "grad_norm": 0.8125451803207397, "learning_rate": 4.677659907935428e-06, "loss": 0.1211, "step": 2501 }, { "epoch": 1.5693899952955936, "grad_norm": 0.7471377849578857, "learning_rate": 4.664607630651443e-06, "loss": 0.1113, "step": 2502 }, { "epoch": 1.570017249490356, "grad_norm": 0.9791470170021057, "learning_rate": 4.651571184632866e-06, "loss": 0.1512, "step": 2503 }, { "epoch": 1.5706445036851184, "grad_norm": 0.7867892980575562, "learning_rate": 4.6385505833376755e-06, "loss": 0.1191, "step": 2504 }, { "epoch": 1.571271757879881, "grad_norm": 0.8695036172866821, "learning_rate": 4.625545840207501e-06, "loss": 0.122, "step": 2505 }, { "epoch": 1.5718990120746432, "grad_norm": 0.8763251304626465, "learning_rate": 4.612556968667603e-06, "loss": 0.1349, "step": 2506 }, { "epoch": 1.5725262662694057, "grad_norm": 0.9683310389518738, "learning_rate": 4.59958398212684e-06, "loss": 0.153, "step": 2507 }, { "epoch": 1.5731535204641682, "grad_norm": 0.777633547782898, "learning_rate": 4.586626893977681e-06, "loss": 0.1204, "step": 2508 }, { "epoch": 1.5737807746589305, "grad_norm": 0.9412129521369934, "learning_rate": 4.57368571759619e-06, "loss": 0.1661, "step": 2509 }, { "epoch": 1.5744080288536928, "grad_norm": 0.9540891647338867, "learning_rate": 4.5607604663419955e-06, "loss": 0.1441, "step": 2510 }, { "epoch": 1.5750352830484555, "grad_norm": 0.8474165201187134, "learning_rate": 4.547851153558293e-06, "loss": 0.1125, "step": 2511 }, { "epoch": 1.5756625372432178, "grad_norm": 0.7620456218719482, "learning_rate": 4.534957792571826e-06, "loss": 0.1125, "step": 2512 }, { "epoch": 1.57628979143798, "grad_norm": 0.7852665185928345, "learning_rate": 4.522080396692852e-06, "loss": 0.1172, "step": 2513 }, { "epoch": 1.5769170456327428, "grad_norm": 0.8036596775054932, "learning_rate": 4.509218979215166e-06, "loss": 0.1255, "step": 2514 }, { "epoch": 1.577544299827505, "grad_norm": 0.7978661060333252, "learning_rate": 4.4963735534160625e-06, "loss": 0.1309, "step": 2515 }, { "epoch": 1.5781715540222674, "grad_norm": 0.7998348474502563, "learning_rate": 4.483544132556328e-06, "loss": 0.1087, "step": 2516 }, { "epoch": 1.5787988082170301, "grad_norm": 0.8894103169441223, "learning_rate": 4.470730729880222e-06, "loss": 0.1476, "step": 2517 }, { "epoch": 1.5794260624117924, "grad_norm": 0.8098848462104797, "learning_rate": 4.457933358615478e-06, "loss": 0.1247, "step": 2518 }, { "epoch": 1.5800533166065547, "grad_norm": 0.7475268840789795, "learning_rate": 4.445152031973263e-06, "loss": 0.1193, "step": 2519 }, { "epoch": 1.5806805708013172, "grad_norm": 0.8451122641563416, "learning_rate": 4.432386763148197e-06, "loss": 0.1528, "step": 2520 }, { "epoch": 1.5813078249960797, "grad_norm": 0.8940575122833252, "learning_rate": 4.419637565318313e-06, "loss": 0.1447, "step": 2521 }, { "epoch": 1.581935079190842, "grad_norm": 1.0932834148406982, "learning_rate": 4.40690445164506e-06, "loss": 0.1416, "step": 2522 }, { "epoch": 1.5825623333856045, "grad_norm": 0.8093680143356323, "learning_rate": 4.394187435273278e-06, "loss": 0.1442, "step": 2523 }, { "epoch": 1.583189587580367, "grad_norm": 0.7737075686454773, "learning_rate": 4.381486529331196e-06, "loss": 0.112, "step": 2524 }, { "epoch": 1.5838168417751293, "grad_norm": 0.8742368221282959, "learning_rate": 4.368801746930396e-06, "loss": 0.1209, "step": 2525 }, { "epoch": 1.5844440959698918, "grad_norm": 0.8883172869682312, "learning_rate": 4.356133101165829e-06, "loss": 0.1252, "step": 2526 }, { "epoch": 1.5850713501646543, "grad_norm": 0.8819630146026611, "learning_rate": 4.343480605115787e-06, "loss": 0.1195, "step": 2527 }, { "epoch": 1.5856986043594166, "grad_norm": 0.8639972805976868, "learning_rate": 4.330844271841885e-06, "loss": 0.1405, "step": 2528 }, { "epoch": 1.586325858554179, "grad_norm": 0.8901621103286743, "learning_rate": 4.3182241143890605e-06, "loss": 0.1451, "step": 2529 }, { "epoch": 1.5869531127489416, "grad_norm": 0.7380594611167908, "learning_rate": 4.305620145785536e-06, "loss": 0.1122, "step": 2530 }, { "epoch": 1.5875803669437039, "grad_norm": 0.8391431570053101, "learning_rate": 4.2930323790428404e-06, "loss": 0.1464, "step": 2531 }, { "epoch": 1.5882076211384664, "grad_norm": 0.7736996412277222, "learning_rate": 4.280460827155764e-06, "loss": 0.1121, "step": 2532 }, { "epoch": 1.5888348753332289, "grad_norm": 0.7368905544281006, "learning_rate": 4.267905503102358e-06, "loss": 0.1255, "step": 2533 }, { "epoch": 1.5894621295279912, "grad_norm": 0.7692631483078003, "learning_rate": 4.2553664198439356e-06, "loss": 0.1386, "step": 2534 }, { "epoch": 1.5900893837227537, "grad_norm": 0.9218795895576477, "learning_rate": 4.242843590325028e-06, "loss": 0.1337, "step": 2535 }, { "epoch": 1.5907166379175162, "grad_norm": 0.8344318866729736, "learning_rate": 4.230337027473399e-06, "loss": 0.1419, "step": 2536 }, { "epoch": 1.5913438921122784, "grad_norm": 0.8635849952697754, "learning_rate": 4.217846744200003e-06, "loss": 0.1236, "step": 2537 }, { "epoch": 1.591971146307041, "grad_norm": 0.7777072787284851, "learning_rate": 4.205372753399006e-06, "loss": 0.0933, "step": 2538 }, { "epoch": 1.5925984005018035, "grad_norm": 0.9210771322250366, "learning_rate": 4.192915067947747e-06, "loss": 0.1345, "step": 2539 }, { "epoch": 1.5932256546965657, "grad_norm": 0.8880153894424438, "learning_rate": 4.180473700706733e-06, "loss": 0.1363, "step": 2540 }, { "epoch": 1.5938529088913282, "grad_norm": 0.7697297930717468, "learning_rate": 4.1680486645196235e-06, "loss": 0.1174, "step": 2541 }, { "epoch": 1.5944801630860908, "grad_norm": 0.7962117195129395, "learning_rate": 4.15563997221323e-06, "loss": 0.1193, "step": 2542 }, { "epoch": 1.595107417280853, "grad_norm": 0.8743627071380615, "learning_rate": 4.143247636597469e-06, "loss": 0.1508, "step": 2543 }, { "epoch": 1.5957346714756155, "grad_norm": 0.8989070653915405, "learning_rate": 4.1308716704653926e-06, "loss": 0.1421, "step": 2544 }, { "epoch": 1.596361925670378, "grad_norm": 0.8142381310462952, "learning_rate": 4.118512086593143e-06, "loss": 0.1182, "step": 2545 }, { "epoch": 1.5969891798651403, "grad_norm": 0.9252510070800781, "learning_rate": 4.106168897739955e-06, "loss": 0.1669, "step": 2546 }, { "epoch": 1.5976164340599026, "grad_norm": 0.8701350688934326, "learning_rate": 4.093842116648146e-06, "loss": 0.1618, "step": 2547 }, { "epoch": 1.5982436882546653, "grad_norm": 0.9177315831184387, "learning_rate": 4.081531756043069e-06, "loss": 0.1338, "step": 2548 }, { "epoch": 1.5988709424494276, "grad_norm": 0.7954108119010925, "learning_rate": 4.06923782863315e-06, "loss": 0.1295, "step": 2549 }, { "epoch": 1.59949819664419, "grad_norm": 0.8146564364433289, "learning_rate": 4.056960347109846e-06, "loss": 0.1243, "step": 2550 }, { "epoch": 1.6001254508389526, "grad_norm": 0.8106422424316406, "learning_rate": 4.044699324147632e-06, "loss": 0.1272, "step": 2551 }, { "epoch": 1.600752705033715, "grad_norm": 0.7793592214584351, "learning_rate": 4.032454772403993e-06, "loss": 0.1333, "step": 2552 }, { "epoch": 1.6013799592284772, "grad_norm": 0.9862551689147949, "learning_rate": 4.020226704519416e-06, "loss": 0.1681, "step": 2553 }, { "epoch": 1.60200721342324, "grad_norm": 0.852508008480072, "learning_rate": 4.008015133117356e-06, "loss": 0.1148, "step": 2554 }, { "epoch": 1.6026344676180022, "grad_norm": 0.9125605821609497, "learning_rate": 3.995820070804253e-06, "loss": 0.1256, "step": 2555 }, { "epoch": 1.6032617218127645, "grad_norm": 0.8080053329467773, "learning_rate": 3.983641530169497e-06, "loss": 0.1151, "step": 2556 }, { "epoch": 1.603888976007527, "grad_norm": 0.8587335348129272, "learning_rate": 3.971479523785435e-06, "loss": 0.1102, "step": 2557 }, { "epoch": 1.6045162302022895, "grad_norm": 0.9160087704658508, "learning_rate": 3.959334064207318e-06, "loss": 0.1205, "step": 2558 }, { "epoch": 1.6051434843970518, "grad_norm": 0.965299129486084, "learning_rate": 3.947205163973347e-06, "loss": 0.1425, "step": 2559 }, { "epoch": 1.6057707385918143, "grad_norm": 0.8616262674331665, "learning_rate": 3.935092835604597e-06, "loss": 0.1215, "step": 2560 }, { "epoch": 1.6063979927865768, "grad_norm": 0.8299267292022705, "learning_rate": 3.922997091605059e-06, "loss": 0.1329, "step": 2561 }, { "epoch": 1.607025246981339, "grad_norm": 0.8477101922035217, "learning_rate": 3.9109179444615965e-06, "loss": 0.1245, "step": 2562 }, { "epoch": 1.6076525011761016, "grad_norm": 0.8761797547340393, "learning_rate": 3.898855406643935e-06, "loss": 0.1444, "step": 2563 }, { "epoch": 1.608279755370864, "grad_norm": 0.7521011233329773, "learning_rate": 3.886809490604661e-06, "loss": 0.1182, "step": 2564 }, { "epoch": 1.6089070095656264, "grad_norm": 0.8995316624641418, "learning_rate": 3.874780208779201e-06, "loss": 0.1287, "step": 2565 }, { "epoch": 1.6095342637603889, "grad_norm": 0.8634257912635803, "learning_rate": 3.862767573585795e-06, "loss": 0.1505, "step": 2566 }, { "epoch": 1.6101615179551514, "grad_norm": 0.8332936763763428, "learning_rate": 3.850771597425515e-06, "loss": 0.1372, "step": 2567 }, { "epoch": 1.6107887721499137, "grad_norm": 1.0028297901153564, "learning_rate": 3.83879229268223e-06, "loss": 0.1472, "step": 2568 }, { "epoch": 1.6114160263446762, "grad_norm": 0.8762323260307312, "learning_rate": 3.826829671722596e-06, "loss": 0.1237, "step": 2569 }, { "epoch": 1.6120432805394387, "grad_norm": 0.8835314512252808, "learning_rate": 3.81488374689605e-06, "loss": 0.1247, "step": 2570 }, { "epoch": 1.612670534734201, "grad_norm": 0.8781018257141113, "learning_rate": 3.802954530534795e-06, "loss": 0.1283, "step": 2571 }, { "epoch": 1.6132977889289635, "grad_norm": 0.706692636013031, "learning_rate": 3.7910420349537714e-06, "loss": 0.0883, "step": 2572 }, { "epoch": 1.613925043123726, "grad_norm": 0.8565866351127625, "learning_rate": 3.779146272450671e-06, "loss": 0.1276, "step": 2573 }, { "epoch": 1.6145522973184883, "grad_norm": 0.8782655596733093, "learning_rate": 3.767267255305911e-06, "loss": 0.1428, "step": 2574 }, { "epoch": 1.6151795515132508, "grad_norm": 0.8743089437484741, "learning_rate": 3.7554049957826166e-06, "loss": 0.1405, "step": 2575 }, { "epoch": 1.6158068057080133, "grad_norm": 0.9464399218559265, "learning_rate": 3.7435595061266216e-06, "loss": 0.1462, "step": 2576 }, { "epoch": 1.6164340599027756, "grad_norm": 0.9497423768043518, "learning_rate": 3.731730798566433e-06, "loss": 0.1555, "step": 2577 }, { "epoch": 1.617061314097538, "grad_norm": 0.877797544002533, "learning_rate": 3.719918885313247e-06, "loss": 0.1341, "step": 2578 }, { "epoch": 1.6176885682923006, "grad_norm": 0.8285330533981323, "learning_rate": 3.7081237785609147e-06, "loss": 0.1228, "step": 2579 }, { "epoch": 1.6183158224870628, "grad_norm": 0.9165721535682678, "learning_rate": 3.696345490485944e-06, "loss": 0.1289, "step": 2580 }, { "epoch": 1.6189430766818254, "grad_norm": 0.7632472515106201, "learning_rate": 3.6845840332474715e-06, "loss": 0.127, "step": 2581 }, { "epoch": 1.6195703308765879, "grad_norm": 0.8485530018806458, "learning_rate": 3.672839418987275e-06, "loss": 0.1282, "step": 2582 }, { "epoch": 1.6201975850713501, "grad_norm": 0.8329845070838928, "learning_rate": 3.6611116598297213e-06, "loss": 0.1356, "step": 2583 }, { "epoch": 1.6208248392661124, "grad_norm": 0.84341961145401, "learning_rate": 3.64940076788179e-06, "loss": 0.1126, "step": 2584 }, { "epoch": 1.6214520934608752, "grad_norm": 0.9287056922912598, "learning_rate": 3.637706755233048e-06, "loss": 0.1452, "step": 2585 }, { "epoch": 1.6220793476556374, "grad_norm": 0.7329952716827393, "learning_rate": 3.626029633955639e-06, "loss": 0.1081, "step": 2586 }, { "epoch": 1.6227066018503997, "grad_norm": 0.8448726534843445, "learning_rate": 3.6143694161042643e-06, "loss": 0.1279, "step": 2587 }, { "epoch": 1.6233338560451624, "grad_norm": 0.9451857805252075, "learning_rate": 3.6027261137161774e-06, "loss": 0.15, "step": 2588 }, { "epoch": 1.6239611102399247, "grad_norm": 0.9256661534309387, "learning_rate": 3.5910997388111767e-06, "loss": 0.1312, "step": 2589 }, { "epoch": 1.624588364434687, "grad_norm": 0.8317173719406128, "learning_rate": 3.579490303391564e-06, "loss": 0.1181, "step": 2590 }, { "epoch": 1.6252156186294497, "grad_norm": 0.8810442686080933, "learning_rate": 3.5678978194421787e-06, "loss": 0.1285, "step": 2591 }, { "epoch": 1.625842872824212, "grad_norm": 0.8828345537185669, "learning_rate": 3.5563222989303458e-06, "loss": 0.1187, "step": 2592 }, { "epoch": 1.6264701270189743, "grad_norm": 0.8922680616378784, "learning_rate": 3.544763753805882e-06, "loss": 0.142, "step": 2593 }, { "epoch": 1.6270973812137368, "grad_norm": 0.8479432463645935, "learning_rate": 3.5332221960010892e-06, "loss": 0.1437, "step": 2594 }, { "epoch": 1.6277246354084993, "grad_norm": 0.7755201458930969, "learning_rate": 3.5216976374307123e-06, "loss": 0.129, "step": 2595 }, { "epoch": 1.6283518896032616, "grad_norm": 0.7489088773727417, "learning_rate": 3.5101900899919626e-06, "loss": 0.1231, "step": 2596 }, { "epoch": 1.628979143798024, "grad_norm": 0.8229769468307495, "learning_rate": 3.4986995655644874e-06, "loss": 0.1143, "step": 2597 }, { "epoch": 1.6296063979927866, "grad_norm": 0.9471097588539124, "learning_rate": 3.487226076010359e-06, "loss": 0.1612, "step": 2598 }, { "epoch": 1.630233652187549, "grad_norm": 0.8467300534248352, "learning_rate": 3.4757696331740666e-06, "loss": 0.137, "step": 2599 }, { "epoch": 1.6308609063823114, "grad_norm": 0.8977757692337036, "learning_rate": 3.4643302488825017e-06, "loss": 0.1168, "step": 2600 }, { "epoch": 1.631488160577074, "grad_norm": 0.8392802476882935, "learning_rate": 3.4529079349449358e-06, "loss": 0.1483, "step": 2601 }, { "epoch": 1.6321154147718362, "grad_norm": 0.9977639317512512, "learning_rate": 3.441502703153028e-06, "loss": 0.1422, "step": 2602 }, { "epoch": 1.6327426689665987, "grad_norm": 1.0911308526992798, "learning_rate": 3.4301145652808022e-06, "loss": 0.1435, "step": 2603 }, { "epoch": 1.6333699231613612, "grad_norm": 0.6966939568519592, "learning_rate": 3.418743533084634e-06, "loss": 0.1079, "step": 2604 }, { "epoch": 1.6339971773561235, "grad_norm": 0.870735228061676, "learning_rate": 3.407389618303245e-06, "loss": 0.137, "step": 2605 }, { "epoch": 1.634624431550886, "grad_norm": 0.8636801838874817, "learning_rate": 3.3960528326576724e-06, "loss": 0.1127, "step": 2606 }, { "epoch": 1.6352516857456485, "grad_norm": 0.9069063067436218, "learning_rate": 3.3847331878512814e-06, "loss": 0.1503, "step": 2607 }, { "epoch": 1.6358789399404108, "grad_norm": 0.8030480742454529, "learning_rate": 3.3734306955697417e-06, "loss": 0.1182, "step": 2608 }, { "epoch": 1.6365061941351733, "grad_norm": 0.7923745512962341, "learning_rate": 3.36214536748102e-06, "loss": 0.1091, "step": 2609 }, { "epoch": 1.6371334483299358, "grad_norm": 0.843481719493866, "learning_rate": 3.3508772152353464e-06, "loss": 0.1318, "step": 2610 }, { "epoch": 1.637760702524698, "grad_norm": 0.7991217970848083, "learning_rate": 3.3396262504652353e-06, "loss": 0.1253, "step": 2611 }, { "epoch": 1.6383879567194606, "grad_norm": 0.8465505242347717, "learning_rate": 3.3283924847854588e-06, "loss": 0.1455, "step": 2612 }, { "epoch": 1.639015210914223, "grad_norm": 0.7803559303283691, "learning_rate": 3.3171759297930194e-06, "loss": 0.0915, "step": 2613 }, { "epoch": 1.6396424651089854, "grad_norm": 1.0076162815093994, "learning_rate": 3.3059765970671688e-06, "loss": 0.1335, "step": 2614 }, { "epoch": 1.6402697193037479, "grad_norm": 0.9411731362342834, "learning_rate": 3.2947944981693693e-06, "loss": 0.1426, "step": 2615 }, { "epoch": 1.6408969734985104, "grad_norm": 0.8761032223701477, "learning_rate": 3.283629644643296e-06, "loss": 0.1259, "step": 2616 }, { "epoch": 1.6415242276932727, "grad_norm": 0.7741644382476807, "learning_rate": 3.2724820480148202e-06, "loss": 0.1206, "step": 2617 }, { "epoch": 1.6421514818880352, "grad_norm": 0.8049193620681763, "learning_rate": 3.261351719792005e-06, "loss": 0.1224, "step": 2618 }, { "epoch": 1.6427787360827977, "grad_norm": 0.8451152443885803, "learning_rate": 3.250238671465067e-06, "loss": 0.1262, "step": 2619 }, { "epoch": 1.64340599027756, "grad_norm": 0.8271937370300293, "learning_rate": 3.239142914506406e-06, "loss": 0.126, "step": 2620 }, { "epoch": 1.6440332444723225, "grad_norm": 0.7851383090019226, "learning_rate": 3.228064460370559e-06, "loss": 0.1078, "step": 2621 }, { "epoch": 1.644660498667085, "grad_norm": 0.8023667931556702, "learning_rate": 3.2170033204942073e-06, "loss": 0.1078, "step": 2622 }, { "epoch": 1.6452877528618473, "grad_norm": 0.7937674522399902, "learning_rate": 3.2059595062961568e-06, "loss": 0.1263, "step": 2623 }, { "epoch": 1.6459150070566095, "grad_norm": 0.889525830745697, "learning_rate": 3.1949330291773183e-06, "loss": 0.1452, "step": 2624 }, { "epoch": 1.6465422612513723, "grad_norm": 1.006691336631775, "learning_rate": 3.1839239005207156e-06, "loss": 0.157, "step": 2625 }, { "epoch": 1.6471695154461345, "grad_norm": 0.8261495232582092, "learning_rate": 3.1729321316914617e-06, "loss": 0.1197, "step": 2626 }, { "epoch": 1.6477967696408968, "grad_norm": 0.816116988658905, "learning_rate": 3.1619577340367445e-06, "loss": 0.1204, "step": 2627 }, { "epoch": 1.6484240238356596, "grad_norm": 0.8447760343551636, "learning_rate": 3.1510007188858214e-06, "loss": 0.1361, "step": 2628 }, { "epoch": 1.6490512780304218, "grad_norm": 0.8570554256439209, "learning_rate": 3.140061097550011e-06, "loss": 0.1388, "step": 2629 }, { "epoch": 1.6496785322251841, "grad_norm": 0.8310171961784363, "learning_rate": 3.1291388813226574e-06, "loss": 0.1351, "step": 2630 }, { "epoch": 1.6503057864199466, "grad_norm": 0.9378737807273865, "learning_rate": 3.1182340814791567e-06, "loss": 0.1333, "step": 2631 }, { "epoch": 1.6509330406147091, "grad_norm": 0.6849513649940491, "learning_rate": 3.1073467092769148e-06, "loss": 0.0923, "step": 2632 }, { "epoch": 1.6515602948094714, "grad_norm": 0.8584526181221008, "learning_rate": 3.096476775955353e-06, "loss": 0.1311, "step": 2633 }, { "epoch": 1.652187549004234, "grad_norm": 0.8407584428787231, "learning_rate": 3.085624292735887e-06, "loss": 0.1438, "step": 2634 }, { "epoch": 1.6528148031989964, "grad_norm": 0.9150035381317139, "learning_rate": 3.0747892708219095e-06, "loss": 0.1515, "step": 2635 }, { "epoch": 1.6534420573937587, "grad_norm": 0.9339888095855713, "learning_rate": 3.0639717213988064e-06, "loss": 0.1405, "step": 2636 }, { "epoch": 1.6540693115885212, "grad_norm": 0.8367180824279785, "learning_rate": 3.053171655633902e-06, "loss": 0.1204, "step": 2637 }, { "epoch": 1.6546965657832837, "grad_norm": 0.8894864320755005, "learning_rate": 3.0423890846764914e-06, "loss": 0.1461, "step": 2638 }, { "epoch": 1.655323819978046, "grad_norm": 0.7762343287467957, "learning_rate": 3.0316240196577993e-06, "loss": 0.1161, "step": 2639 }, { "epoch": 1.6559510741728085, "grad_norm": 0.910273551940918, "learning_rate": 3.0208764716909854e-06, "loss": 0.1278, "step": 2640 }, { "epoch": 1.656578328367571, "grad_norm": 0.6781446933746338, "learning_rate": 3.0101464518711232e-06, "loss": 0.143, "step": 2641 }, { "epoch": 1.6572055825623333, "grad_norm": 0.8713527917861938, "learning_rate": 2.999433971275181e-06, "loss": 0.1454, "step": 2642 }, { "epoch": 1.6578328367570958, "grad_norm": 0.8391584753990173, "learning_rate": 2.9887390409620363e-06, "loss": 0.1311, "step": 2643 }, { "epoch": 1.6584600909518583, "grad_norm": 0.9159807562828064, "learning_rate": 2.9780616719724407e-06, "loss": 0.1355, "step": 2644 }, { "epoch": 1.6590873451466206, "grad_norm": 0.8163064122200012, "learning_rate": 2.967401875329019e-06, "loss": 0.1267, "step": 2645 }, { "epoch": 1.659714599341383, "grad_norm": 0.8416815996170044, "learning_rate": 2.956759662036253e-06, "loss": 0.1349, "step": 2646 }, { "epoch": 1.6603418535361456, "grad_norm": 0.858839213848114, "learning_rate": 2.94613504308048e-06, "loss": 0.153, "step": 2647 }, { "epoch": 1.660969107730908, "grad_norm": 1.034128189086914, "learning_rate": 2.9355280294298595e-06, "loss": 0.1144, "step": 2648 }, { "epoch": 1.6615963619256704, "grad_norm": 0.9208030700683594, "learning_rate": 2.9249386320343863e-06, "loss": 0.1586, "step": 2649 }, { "epoch": 1.662223616120433, "grad_norm": 0.8803120255470276, "learning_rate": 2.9143668618258723e-06, "loss": 0.1444, "step": 2650 }, { "epoch": 1.6628508703151952, "grad_norm": 0.8457797169685364, "learning_rate": 2.9038127297179273e-06, "loss": 0.1185, "step": 2651 }, { "epoch": 1.6634781245099577, "grad_norm": 0.8965927362442017, "learning_rate": 2.8932762466059563e-06, "loss": 0.1329, "step": 2652 }, { "epoch": 1.6641053787047202, "grad_norm": 0.8928632736206055, "learning_rate": 2.882757423367133e-06, "loss": 0.1345, "step": 2653 }, { "epoch": 1.6647326328994825, "grad_norm": 1.097901463508606, "learning_rate": 2.872256270860414e-06, "loss": 0.1435, "step": 2654 }, { "epoch": 1.665359887094245, "grad_norm": 0.7610005140304565, "learning_rate": 2.861772799926508e-06, "loss": 0.1171, "step": 2655 }, { "epoch": 1.6659871412890075, "grad_norm": 0.8609413504600525, "learning_rate": 2.85130702138787e-06, "loss": 0.1315, "step": 2656 }, { "epoch": 1.6666143954837698, "grad_norm": 0.8173717856407166, "learning_rate": 2.840858946048697e-06, "loss": 0.1396, "step": 2657 }, { "epoch": 1.6672416496785323, "grad_norm": 0.8212631344795227, "learning_rate": 2.830428584694904e-06, "loss": 0.1376, "step": 2658 }, { "epoch": 1.6678689038732948, "grad_norm": 0.8188292384147644, "learning_rate": 2.8200159480941125e-06, "loss": 0.1194, "step": 2659 }, { "epoch": 1.668496158068057, "grad_norm": 0.7810712456703186, "learning_rate": 2.809621046995663e-06, "loss": 0.1218, "step": 2660 }, { "epoch": 1.6691234122628194, "grad_norm": 0.8900648951530457, "learning_rate": 2.79924389213057e-06, "loss": 0.1168, "step": 2661 }, { "epoch": 1.669750666457582, "grad_norm": 0.8871285319328308, "learning_rate": 2.788884494211539e-06, "loss": 0.1426, "step": 2662 }, { "epoch": 1.6703779206523444, "grad_norm": 0.9678382873535156, "learning_rate": 2.7785428639329427e-06, "loss": 0.1472, "step": 2663 }, { "epoch": 1.6710051748471066, "grad_norm": 0.796698808670044, "learning_rate": 2.7682190119708074e-06, "loss": 0.1071, "step": 2664 }, { "epoch": 1.6716324290418694, "grad_norm": 0.9197902083396912, "learning_rate": 2.7579129489828173e-06, "loss": 0.1468, "step": 2665 }, { "epoch": 1.6722596832366317, "grad_norm": 1.0166407823562622, "learning_rate": 2.74762468560827e-06, "loss": 0.1384, "step": 2666 }, { "epoch": 1.672886937431394, "grad_norm": 0.8844588994979858, "learning_rate": 2.7373542324681126e-06, "loss": 0.143, "step": 2667 }, { "epoch": 1.6735141916261567, "grad_norm": 0.7596662044525146, "learning_rate": 2.7271016001648877e-06, "loss": 0.0926, "step": 2668 }, { "epoch": 1.674141445820919, "grad_norm": 0.9629493951797485, "learning_rate": 2.716866799282756e-06, "loss": 0.1499, "step": 2669 }, { "epoch": 1.6747687000156812, "grad_norm": 0.8590734004974365, "learning_rate": 2.706649840387463e-06, "loss": 0.1347, "step": 2670 }, { "epoch": 1.6753959542104437, "grad_norm": 0.7214009165763855, "learning_rate": 2.696450734026328e-06, "loss": 0.115, "step": 2671 }, { "epoch": 1.6760232084052062, "grad_norm": 0.8959335684776306, "learning_rate": 2.6862694907282506e-06, "loss": 0.1403, "step": 2672 }, { "epoch": 1.6766504625999685, "grad_norm": 0.8892949819564819, "learning_rate": 2.6761061210036877e-06, "loss": 0.13, "step": 2673 }, { "epoch": 1.677277716794731, "grad_norm": 0.8894079327583313, "learning_rate": 2.665960635344642e-06, "loss": 0.1321, "step": 2674 }, { "epoch": 1.6779049709894935, "grad_norm": 1.1011821031570435, "learning_rate": 2.655833044224658e-06, "loss": 0.1542, "step": 2675 }, { "epoch": 1.6785322251842558, "grad_norm": 0.945216715335846, "learning_rate": 2.6457233580988083e-06, "loss": 0.1472, "step": 2676 }, { "epoch": 1.6791594793790183, "grad_norm": 0.953877329826355, "learning_rate": 2.6356315874036685e-06, "loss": 0.1247, "step": 2677 }, { "epoch": 1.6797867335737808, "grad_norm": 0.8800573945045471, "learning_rate": 2.625557742557332e-06, "loss": 0.1112, "step": 2678 }, { "epoch": 1.6804139877685431, "grad_norm": 0.7551929950714111, "learning_rate": 2.615501833959384e-06, "loss": 0.1242, "step": 2679 }, { "epoch": 1.6810412419633056, "grad_norm": 0.9266740679740906, "learning_rate": 2.605463871990894e-06, "loss": 0.1424, "step": 2680 }, { "epoch": 1.6816684961580681, "grad_norm": 0.9326827526092529, "learning_rate": 2.5954438670144044e-06, "loss": 0.1296, "step": 2681 }, { "epoch": 1.6822957503528304, "grad_norm": 0.9425510168075562, "learning_rate": 2.5854418293739204e-06, "loss": 0.1474, "step": 2682 }, { "epoch": 1.682923004547593, "grad_norm": 0.9114566445350647, "learning_rate": 2.575457769394891e-06, "loss": 0.1332, "step": 2683 }, { "epoch": 1.6835502587423554, "grad_norm": 0.9489275217056274, "learning_rate": 2.5654916973842168e-06, "loss": 0.1237, "step": 2684 }, { "epoch": 1.6841775129371177, "grad_norm": 0.8633596897125244, "learning_rate": 2.5555436236302254e-06, "loss": 0.1306, "step": 2685 }, { "epoch": 1.6848047671318802, "grad_norm": 0.9675174355506897, "learning_rate": 2.5456135584026664e-06, "loss": 0.1303, "step": 2686 }, { "epoch": 1.6854320213266427, "grad_norm": 0.9195914268493652, "learning_rate": 2.5357015119526882e-06, "loss": 0.1418, "step": 2687 }, { "epoch": 1.686059275521405, "grad_norm": 0.9421305060386658, "learning_rate": 2.525807494512853e-06, "loss": 0.1124, "step": 2688 }, { "epoch": 1.6866865297161675, "grad_norm": 0.8732928037643433, "learning_rate": 2.515931516297094e-06, "loss": 0.1361, "step": 2689 }, { "epoch": 1.68731378391093, "grad_norm": 0.7002175450325012, "learning_rate": 2.5060735875007392e-06, "loss": 0.1036, "step": 2690 }, { "epoch": 1.6879410381056923, "grad_norm": 0.8896955251693726, "learning_rate": 2.4962337183004714e-06, "loss": 0.1205, "step": 2691 }, { "epoch": 1.6885682923004548, "grad_norm": 0.8062140941619873, "learning_rate": 2.4864119188543368e-06, "loss": 0.1283, "step": 2692 }, { "epoch": 1.6891955464952173, "grad_norm": 0.9641050696372986, "learning_rate": 2.476608199301722e-06, "loss": 0.1402, "step": 2693 }, { "epoch": 1.6898228006899796, "grad_norm": 0.9702235460281372, "learning_rate": 2.4668225697633586e-06, "loss": 0.1447, "step": 2694 }, { "epoch": 1.690450054884742, "grad_norm": 0.7521775364875793, "learning_rate": 2.4570550403412854e-06, "loss": 0.1153, "step": 2695 }, { "epoch": 1.6910773090795046, "grad_norm": 0.8183186054229736, "learning_rate": 2.447305621118874e-06, "loss": 0.1217, "step": 2696 }, { "epoch": 1.6917045632742669, "grad_norm": 0.7753691673278809, "learning_rate": 2.437574322160792e-06, "loss": 0.1029, "step": 2697 }, { "epoch": 1.6923318174690292, "grad_norm": 0.8064447641372681, "learning_rate": 2.4278611535130005e-06, "loss": 0.1164, "step": 2698 }, { "epoch": 1.692959071663792, "grad_norm": 0.930521547794342, "learning_rate": 2.418166125202752e-06, "loss": 0.1381, "step": 2699 }, { "epoch": 1.6935863258585542, "grad_norm": 0.869580864906311, "learning_rate": 2.4084892472385545e-06, "loss": 0.1373, "step": 2700 }, { "epoch": 1.6942135800533165, "grad_norm": 0.9124370217323303, "learning_rate": 2.3988305296101965e-06, "loss": 0.171, "step": 2701 }, { "epoch": 1.6948408342480792, "grad_norm": 1.0721293687820435, "learning_rate": 2.389189982288711e-06, "loss": 0.118, "step": 2702 }, { "epoch": 1.6954680884428415, "grad_norm": 0.8098970651626587, "learning_rate": 2.3795676152263746e-06, "loss": 0.119, "step": 2703 }, { "epoch": 1.6960953426376038, "grad_norm": 0.9644606113433838, "learning_rate": 2.3699634383566973e-06, "loss": 0.1485, "step": 2704 }, { "epoch": 1.6967225968323665, "grad_norm": 0.9537535309791565, "learning_rate": 2.3603774615944098e-06, "loss": 0.1513, "step": 2705 }, { "epoch": 1.6973498510271288, "grad_norm": 0.9182994961738586, "learning_rate": 2.3508096948354453e-06, "loss": 0.1406, "step": 2706 }, { "epoch": 1.697977105221891, "grad_norm": 0.9129054546356201, "learning_rate": 2.341260147956952e-06, "loss": 0.129, "step": 2707 }, { "epoch": 1.6986043594166536, "grad_norm": 0.824851930141449, "learning_rate": 2.331728830817264e-06, "loss": 0.1117, "step": 2708 }, { "epoch": 1.699231613611416, "grad_norm": 0.7824007272720337, "learning_rate": 2.3222157532558944e-06, "loss": 0.1115, "step": 2709 }, { "epoch": 1.6998588678061783, "grad_norm": 0.7941312193870544, "learning_rate": 2.312720925093528e-06, "loss": 0.1452, "step": 2710 }, { "epoch": 1.7004861220009408, "grad_norm": 0.9578248858451843, "learning_rate": 2.303244356132015e-06, "loss": 0.1447, "step": 2711 }, { "epoch": 1.7011133761957034, "grad_norm": 0.839382529258728, "learning_rate": 2.2937860561543457e-06, "loss": 0.1208, "step": 2712 }, { "epoch": 1.7017406303904656, "grad_norm": 0.8220772743225098, "learning_rate": 2.284346034924654e-06, "loss": 0.1278, "step": 2713 }, { "epoch": 1.7023678845852281, "grad_norm": 0.7147632241249084, "learning_rate": 2.2749243021882085e-06, "loss": 0.1155, "step": 2714 }, { "epoch": 1.7029951387799906, "grad_norm": 0.8697649836540222, "learning_rate": 2.2655208676713957e-06, "loss": 0.1293, "step": 2715 }, { "epoch": 1.703622392974753, "grad_norm": 0.7797321081161499, "learning_rate": 2.256135741081711e-06, "loss": 0.1135, "step": 2716 }, { "epoch": 1.7042496471695154, "grad_norm": 1.2459222078323364, "learning_rate": 2.246768932107759e-06, "loss": 0.1219, "step": 2717 }, { "epoch": 1.704876901364278, "grad_norm": 0.8825497627258301, "learning_rate": 2.2374204504192163e-06, "loss": 0.134, "step": 2718 }, { "epoch": 1.7055041555590402, "grad_norm": 0.9286799430847168, "learning_rate": 2.2280903056668523e-06, "loss": 0.1337, "step": 2719 }, { "epoch": 1.7061314097538027, "grad_norm": 0.8434159159660339, "learning_rate": 2.218778507482504e-06, "loss": 0.1069, "step": 2720 }, { "epoch": 1.7067586639485652, "grad_norm": 1.0079706907272339, "learning_rate": 2.209485065479069e-06, "loss": 0.1464, "step": 2721 }, { "epoch": 1.7073859181433275, "grad_norm": 0.8292884230613708, "learning_rate": 2.2002099892504967e-06, "loss": 0.1369, "step": 2722 }, { "epoch": 1.70801317233809, "grad_norm": 0.9433373212814331, "learning_rate": 2.1909532883717753e-06, "loss": 0.1464, "step": 2723 }, { "epoch": 1.7086404265328525, "grad_norm": 0.8257498741149902, "learning_rate": 2.1817149723989185e-06, "loss": 0.1345, "step": 2724 }, { "epoch": 1.7092676807276148, "grad_norm": 0.8046967387199402, "learning_rate": 2.1724950508689656e-06, "loss": 0.1067, "step": 2725 }, { "epoch": 1.7098949349223773, "grad_norm": 0.8804117441177368, "learning_rate": 2.1632935332999706e-06, "loss": 0.15, "step": 2726 }, { "epoch": 1.7105221891171398, "grad_norm": 0.9799605011940002, "learning_rate": 2.1541104291909787e-06, "loss": 0.1411, "step": 2727 }, { "epoch": 1.711149443311902, "grad_norm": 0.8501812219619751, "learning_rate": 2.144945748022036e-06, "loss": 0.1242, "step": 2728 }, { "epoch": 1.7117766975066646, "grad_norm": 0.9060745239257812, "learning_rate": 2.1357994992541653e-06, "loss": 0.1176, "step": 2729 }, { "epoch": 1.7124039517014271, "grad_norm": 0.8523778319358826, "learning_rate": 2.1266716923293583e-06, "loss": 0.1443, "step": 2730 }, { "epoch": 1.7130312058961894, "grad_norm": 0.8337844610214233, "learning_rate": 2.117562336670571e-06, "loss": 0.1128, "step": 2731 }, { "epoch": 1.713658460090952, "grad_norm": 0.6823314428329468, "learning_rate": 2.1084714416817144e-06, "loss": 0.1215, "step": 2732 }, { "epoch": 1.7142857142857144, "grad_norm": 0.9481781125068665, "learning_rate": 2.0993990167476387e-06, "loss": 0.1317, "step": 2733 }, { "epoch": 1.7149129684804767, "grad_norm": 0.973198652267456, "learning_rate": 2.09034507123413e-06, "loss": 0.1483, "step": 2734 }, { "epoch": 1.715540222675239, "grad_norm": 0.8245511054992676, "learning_rate": 2.0813096144878895e-06, "loss": 0.1278, "step": 2735 }, { "epoch": 1.7161674768700017, "grad_norm": 0.8046514391899109, "learning_rate": 2.072292655836541e-06, "loss": 0.1166, "step": 2736 }, { "epoch": 1.716794731064764, "grad_norm": 0.9386309385299683, "learning_rate": 2.063294204588606e-06, "loss": 0.1192, "step": 2737 }, { "epoch": 1.7174219852595263, "grad_norm": 0.9409434199333191, "learning_rate": 2.0543142700335106e-06, "loss": 0.1485, "step": 2738 }, { "epoch": 1.718049239454289, "grad_norm": 0.811586320400238, "learning_rate": 2.045352861441545e-06, "loss": 0.114, "step": 2739 }, { "epoch": 1.7186764936490513, "grad_norm": 0.8804102540016174, "learning_rate": 2.036409988063892e-06, "loss": 0.1384, "step": 2740 }, { "epoch": 1.7193037478438136, "grad_norm": 0.8443262577056885, "learning_rate": 2.027485659132602e-06, "loss": 0.1226, "step": 2741 }, { "epoch": 1.7199310020385763, "grad_norm": 0.9686700701713562, "learning_rate": 2.018579883860561e-06, "loss": 0.147, "step": 2742 }, { "epoch": 1.7205582562333386, "grad_norm": 0.915995180606842, "learning_rate": 2.009692671441521e-06, "loss": 0.1287, "step": 2743 }, { "epoch": 1.7211855104281009, "grad_norm": 0.906000018119812, "learning_rate": 2.000824031050064e-06, "loss": 0.1383, "step": 2744 }, { "epoch": 1.7218127646228634, "grad_norm": 0.9242513179779053, "learning_rate": 1.9919739718415987e-06, "loss": 0.1238, "step": 2745 }, { "epoch": 1.7224400188176259, "grad_norm": 0.8009757399559021, "learning_rate": 1.9831425029523534e-06, "loss": 0.1066, "step": 2746 }, { "epoch": 1.7230672730123882, "grad_norm": 0.9236252307891846, "learning_rate": 1.97432963349937e-06, "loss": 0.1482, "step": 2747 }, { "epoch": 1.7236945272071507, "grad_norm": 0.854402482509613, "learning_rate": 1.965535372580474e-06, "loss": 0.1272, "step": 2748 }, { "epoch": 1.7243217814019132, "grad_norm": 0.9159762859344482, "learning_rate": 1.9567597292742934e-06, "loss": 0.145, "step": 2749 }, { "epoch": 1.7249490355966755, "grad_norm": 0.8271155953407288, "learning_rate": 1.9480027126402357e-06, "loss": 0.1165, "step": 2750 }, { "epoch": 1.725576289791438, "grad_norm": 0.7804858088493347, "learning_rate": 1.939264331718478e-06, "loss": 0.1198, "step": 2751 }, { "epoch": 1.7262035439862005, "grad_norm": 0.8453112840652466, "learning_rate": 1.9305445955299594e-06, "loss": 0.1066, "step": 2752 }, { "epoch": 1.7268307981809627, "grad_norm": 0.8296924233436584, "learning_rate": 1.921843513076367e-06, "loss": 0.118, "step": 2753 }, { "epoch": 1.7274580523757253, "grad_norm": 0.9044693112373352, "learning_rate": 1.9131610933401366e-06, "loss": 0.141, "step": 2754 }, { "epoch": 1.7280853065704878, "grad_norm": 0.791060209274292, "learning_rate": 1.904497345284433e-06, "loss": 0.1169, "step": 2755 }, { "epoch": 1.72871256076525, "grad_norm": 0.8272935152053833, "learning_rate": 1.8958522778531518e-06, "loss": 0.1336, "step": 2756 }, { "epoch": 1.7293398149600125, "grad_norm": 0.8372897505760193, "learning_rate": 1.8872258999708992e-06, "loss": 0.125, "step": 2757 }, { "epoch": 1.729967069154775, "grad_norm": 0.9450139403343201, "learning_rate": 1.8786182205429938e-06, "loss": 0.1385, "step": 2758 }, { "epoch": 1.7305943233495373, "grad_norm": 0.8529419898986816, "learning_rate": 1.8700292484554362e-06, "loss": 0.1308, "step": 2759 }, { "epoch": 1.7312215775442998, "grad_norm": 0.8922459483146667, "learning_rate": 1.8614589925749293e-06, "loss": 0.1282, "step": 2760 }, { "epoch": 1.7318488317390623, "grad_norm": 0.7917664051055908, "learning_rate": 1.8529074617488497e-06, "loss": 0.122, "step": 2761 }, { "epoch": 1.7324760859338246, "grad_norm": 1.03391432762146, "learning_rate": 1.844374664805244e-06, "loss": 0.1622, "step": 2762 }, { "epoch": 1.7331033401285871, "grad_norm": 0.8683478832244873, "learning_rate": 1.8358606105528242e-06, "loss": 0.1234, "step": 2763 }, { "epoch": 1.7337305943233496, "grad_norm": 0.8627936840057373, "learning_rate": 1.8273653077809395e-06, "loss": 0.1297, "step": 2764 }, { "epoch": 1.734357848518112, "grad_norm": 0.8438630700111389, "learning_rate": 1.8188887652595966e-06, "loss": 0.1156, "step": 2765 }, { "epoch": 1.7349851027128744, "grad_norm": 0.7723935842514038, "learning_rate": 1.8104309917394248e-06, "loss": 0.1155, "step": 2766 }, { "epoch": 1.735612356907637, "grad_norm": 0.8524870872497559, "learning_rate": 1.8019919959516796e-06, "loss": 0.1332, "step": 2767 }, { "epoch": 1.7362396111023992, "grad_norm": 0.8028182983398438, "learning_rate": 1.793571786608237e-06, "loss": 0.1027, "step": 2768 }, { "epoch": 1.7368668652971617, "grad_norm": 0.9504607915878296, "learning_rate": 1.7851703724015745e-06, "loss": 0.1234, "step": 2769 }, { "epoch": 1.7374941194919242, "grad_norm": 0.7929568886756897, "learning_rate": 1.7767877620047723e-06, "loss": 0.1258, "step": 2770 }, { "epoch": 1.7381213736866865, "grad_norm": 0.9246236085891724, "learning_rate": 1.7684239640714862e-06, "loss": 0.1306, "step": 2771 }, { "epoch": 1.7387486278814488, "grad_norm": 0.8959844708442688, "learning_rate": 1.7600789872359624e-06, "loss": 0.1223, "step": 2772 }, { "epoch": 1.7393758820762115, "grad_norm": 0.8668091893196106, "learning_rate": 1.7517528401130168e-06, "loss": 0.1261, "step": 2773 }, { "epoch": 1.7400031362709738, "grad_norm": 0.8081668615341187, "learning_rate": 1.7434455312980203e-06, "loss": 0.1152, "step": 2774 }, { "epoch": 1.740630390465736, "grad_norm": 0.9705616235733032, "learning_rate": 1.735157069366904e-06, "loss": 0.136, "step": 2775 }, { "epoch": 1.7412576446604988, "grad_norm": 0.8360944390296936, "learning_rate": 1.7268874628761389e-06, "loss": 0.1317, "step": 2776 }, { "epoch": 1.741884898855261, "grad_norm": 0.7722758054733276, "learning_rate": 1.7186367203627274e-06, "loss": 0.1298, "step": 2777 }, { "epoch": 1.7425121530500234, "grad_norm": 1.0844967365264893, "learning_rate": 1.7104048503442028e-06, "loss": 0.1545, "step": 2778 }, { "epoch": 1.743139407244786, "grad_norm": 0.7706978917121887, "learning_rate": 1.702191861318614e-06, "loss": 0.1155, "step": 2779 }, { "epoch": 1.7437666614395484, "grad_norm": 0.8536537885665894, "learning_rate": 1.6939977617645186e-06, "loss": 0.1042, "step": 2780 }, { "epoch": 1.7443939156343107, "grad_norm": 0.8721825480461121, "learning_rate": 1.6858225601409794e-06, "loss": 0.1234, "step": 2781 }, { "epoch": 1.7450211698290732, "grad_norm": 0.8884703516960144, "learning_rate": 1.6776662648875386e-06, "loss": 0.1409, "step": 2782 }, { "epoch": 1.7456484240238357, "grad_norm": 0.8294079899787903, "learning_rate": 1.6695288844242274e-06, "loss": 0.1196, "step": 2783 }, { "epoch": 1.746275678218598, "grad_norm": 1.0719852447509766, "learning_rate": 1.6614104271515508e-06, "loss": 0.1359, "step": 2784 }, { "epoch": 1.7469029324133605, "grad_norm": 0.9199581146240234, "learning_rate": 1.6533109014504777e-06, "loss": 0.1251, "step": 2785 }, { "epoch": 1.747530186608123, "grad_norm": 0.8534532785415649, "learning_rate": 1.6452303156824357e-06, "loss": 0.1343, "step": 2786 }, { "epoch": 1.7481574408028853, "grad_norm": 0.9075412154197693, "learning_rate": 1.6371686781893003e-06, "loss": 0.1252, "step": 2787 }, { "epoch": 1.7487846949976478, "grad_norm": 0.8409426808357239, "learning_rate": 1.629125997293377e-06, "loss": 0.1122, "step": 2788 }, { "epoch": 1.7494119491924103, "grad_norm": 0.8862391114234924, "learning_rate": 1.6211022812974152e-06, "loss": 0.1331, "step": 2789 }, { "epoch": 1.7500392033871726, "grad_norm": 0.9557252526283264, "learning_rate": 1.6130975384845715e-06, "loss": 0.1566, "step": 2790 }, { "epoch": 1.750666457581935, "grad_norm": 0.7308382391929626, "learning_rate": 1.605111777118429e-06, "loss": 0.1056, "step": 2791 }, { "epoch": 1.7512937117766976, "grad_norm": 0.9058966040611267, "learning_rate": 1.5971450054429682e-06, "loss": 0.1309, "step": 2792 }, { "epoch": 1.7519209659714599, "grad_norm": 0.9257788062095642, "learning_rate": 1.5891972316825688e-06, "loss": 0.1473, "step": 2793 }, { "epoch": 1.7525482201662224, "grad_norm": 1.0357989072799683, "learning_rate": 1.5812684640420028e-06, "loss": 0.1596, "step": 2794 }, { "epoch": 1.7531754743609849, "grad_norm": 0.8932484984397888, "learning_rate": 1.5733587107064051e-06, "loss": 0.1342, "step": 2795 }, { "epoch": 1.7538027285557471, "grad_norm": 0.908905029296875, "learning_rate": 1.5654679798412997e-06, "loss": 0.137, "step": 2796 }, { "epoch": 1.7544299827505097, "grad_norm": 0.84003084897995, "learning_rate": 1.557596279592566e-06, "loss": 0.1196, "step": 2797 }, { "epoch": 1.7550572369452722, "grad_norm": 0.8938060402870178, "learning_rate": 1.5497436180864344e-06, "loss": 0.134, "step": 2798 }, { "epoch": 1.7556844911400344, "grad_norm": 0.9037652611732483, "learning_rate": 1.5419100034294876e-06, "loss": 0.1254, "step": 2799 }, { "epoch": 1.756311745334797, "grad_norm": 0.9804080128669739, "learning_rate": 1.5340954437086387e-06, "loss": 0.1376, "step": 2800 }, { "epoch": 1.7569389995295595, "grad_norm": 0.8713802695274353, "learning_rate": 1.5262999469911322e-06, "loss": 0.1367, "step": 2801 }, { "epoch": 1.7575662537243217, "grad_norm": 0.9325335025787354, "learning_rate": 1.5185235213245352e-06, "loss": 0.1314, "step": 2802 }, { "epoch": 1.7581935079190842, "grad_norm": 0.7857791185379028, "learning_rate": 1.5107661747367263e-06, "loss": 0.119, "step": 2803 }, { "epoch": 1.7588207621138467, "grad_norm": 0.8473135232925415, "learning_rate": 1.5030279152358862e-06, "loss": 0.1118, "step": 2804 }, { "epoch": 1.759448016308609, "grad_norm": 0.9458979368209839, "learning_rate": 1.4953087508104958e-06, "loss": 0.1407, "step": 2805 }, { "epoch": 1.7600752705033715, "grad_norm": 1.1570056676864624, "learning_rate": 1.4876086894293163e-06, "loss": 0.1667, "step": 2806 }, { "epoch": 1.760702524698134, "grad_norm": 0.7316725254058838, "learning_rate": 1.4799277390413913e-06, "loss": 0.1002, "step": 2807 }, { "epoch": 1.7613297788928963, "grad_norm": 0.969429075717926, "learning_rate": 1.4722659075760404e-06, "loss": 0.1343, "step": 2808 }, { "epoch": 1.7619570330876586, "grad_norm": 0.8668022751808167, "learning_rate": 1.4646232029428387e-06, "loss": 0.1336, "step": 2809 }, { "epoch": 1.7625842872824213, "grad_norm": 0.8704190850257874, "learning_rate": 1.4569996330316238e-06, "loss": 0.129, "step": 2810 }, { "epoch": 1.7632115414771836, "grad_norm": 0.829531729221344, "learning_rate": 1.4493952057124715e-06, "loss": 0.1453, "step": 2811 }, { "epoch": 1.763838795671946, "grad_norm": 0.8531040549278259, "learning_rate": 1.4418099288356979e-06, "loss": 0.1488, "step": 2812 }, { "epoch": 1.7644660498667086, "grad_norm": 0.7893531918525696, "learning_rate": 1.4342438102318546e-06, "loss": 0.1131, "step": 2813 }, { "epoch": 1.765093304061471, "grad_norm": 0.788480818271637, "learning_rate": 1.4266968577117114e-06, "loss": 0.1316, "step": 2814 }, { "epoch": 1.7657205582562332, "grad_norm": 0.8491449356079102, "learning_rate": 1.419169079066256e-06, "loss": 0.1291, "step": 2815 }, { "epoch": 1.766347812450996, "grad_norm": 0.9434386491775513, "learning_rate": 1.4116604820666746e-06, "loss": 0.151, "step": 2816 }, { "epoch": 1.7669750666457582, "grad_norm": 0.8945111036300659, "learning_rate": 1.40417107446436e-06, "loss": 0.1329, "step": 2817 }, { "epoch": 1.7676023208405205, "grad_norm": 0.8145248889923096, "learning_rate": 1.3967008639908851e-06, "loss": 0.1332, "step": 2818 }, { "epoch": 1.768229575035283, "grad_norm": 0.8825253844261169, "learning_rate": 1.389249858358017e-06, "loss": 0.134, "step": 2819 }, { "epoch": 1.7688568292300455, "grad_norm": 0.9387741684913635, "learning_rate": 1.3818180652576895e-06, "loss": 0.1431, "step": 2820 }, { "epoch": 1.7694840834248078, "grad_norm": 0.8380280137062073, "learning_rate": 1.3744054923620053e-06, "loss": 0.1157, "step": 2821 }, { "epoch": 1.7701113376195703, "grad_norm": 0.781251847743988, "learning_rate": 1.3670121473232211e-06, "loss": 0.1094, "step": 2822 }, { "epoch": 1.7707385918143328, "grad_norm": 0.965782880783081, "learning_rate": 1.3596380377737539e-06, "loss": 0.1408, "step": 2823 }, { "epoch": 1.771365846009095, "grad_norm": 0.8496629595756531, "learning_rate": 1.3522831713261498e-06, "loss": 0.1261, "step": 2824 }, { "epoch": 1.7719931002038576, "grad_norm": 0.9037855267524719, "learning_rate": 1.3449475555730973e-06, "loss": 0.127, "step": 2825 }, { "epoch": 1.77262035439862, "grad_norm": 0.8562514781951904, "learning_rate": 1.3376311980874146e-06, "loss": 0.1256, "step": 2826 }, { "epoch": 1.7732476085933824, "grad_norm": 0.5597630143165588, "learning_rate": 1.330334106422031e-06, "loss": 0.1119, "step": 2827 }, { "epoch": 1.7738748627881449, "grad_norm": 0.8352013826370239, "learning_rate": 1.3230562881099963e-06, "loss": 0.1163, "step": 2828 }, { "epoch": 1.7745021169829074, "grad_norm": 0.9308125972747803, "learning_rate": 1.3157977506644515e-06, "loss": 0.1497, "step": 2829 }, { "epoch": 1.7751293711776697, "grad_norm": 0.8582794070243835, "learning_rate": 1.308558501578643e-06, "loss": 0.1329, "step": 2830 }, { "epoch": 1.7757566253724322, "grad_norm": 0.8619057536125183, "learning_rate": 1.3013385483259055e-06, "loss": 0.1218, "step": 2831 }, { "epoch": 1.7763838795671947, "grad_norm": 0.8505441546440125, "learning_rate": 1.2941378983596464e-06, "loss": 0.1488, "step": 2832 }, { "epoch": 1.777011133761957, "grad_norm": 0.714715838432312, "learning_rate": 1.2869565591133548e-06, "loss": 0.0959, "step": 2833 }, { "epoch": 1.7776383879567195, "grad_norm": 0.8384492993354797, "learning_rate": 1.2797945380005805e-06, "loss": 0.1251, "step": 2834 }, { "epoch": 1.778265642151482, "grad_norm": 0.9021497368812561, "learning_rate": 1.2726518424149247e-06, "loss": 0.1364, "step": 2835 }, { "epoch": 1.7788928963462443, "grad_norm": 1.215956687927246, "learning_rate": 1.2655284797300449e-06, "loss": 0.1395, "step": 2836 }, { "epoch": 1.7795201505410068, "grad_norm": 0.9060066342353821, "learning_rate": 1.2584244572996406e-06, "loss": 0.1421, "step": 2837 }, { "epoch": 1.7801474047357693, "grad_norm": 0.8602017760276794, "learning_rate": 1.2513397824574436e-06, "loss": 0.1158, "step": 2838 }, { "epoch": 1.7807746589305316, "grad_norm": 0.9450860023498535, "learning_rate": 1.2442744625172165e-06, "loss": 0.1341, "step": 2839 }, { "epoch": 1.781401913125294, "grad_norm": 0.9497483372688293, "learning_rate": 1.237228504772734e-06, "loss": 0.1579, "step": 2840 }, { "epoch": 1.7820291673200566, "grad_norm": 1.0387485027313232, "learning_rate": 1.2302019164977886e-06, "loss": 0.1303, "step": 2841 }, { "epoch": 1.7826564215148188, "grad_norm": 0.9795166850090027, "learning_rate": 1.2231947049461712e-06, "loss": 0.1569, "step": 2842 }, { "epoch": 1.7832836757095814, "grad_norm": 0.908534824848175, "learning_rate": 1.2162068773516732e-06, "loss": 0.1184, "step": 2843 }, { "epoch": 1.7839109299043439, "grad_norm": 0.8440238237380981, "learning_rate": 1.2092384409280778e-06, "loss": 0.1279, "step": 2844 }, { "epoch": 1.7845381840991061, "grad_norm": 0.9186972379684448, "learning_rate": 1.202289402869148e-06, "loss": 0.1268, "step": 2845 }, { "epoch": 1.7851654382938686, "grad_norm": 0.8728666305541992, "learning_rate": 1.1953597703486208e-06, "loss": 0.1273, "step": 2846 }, { "epoch": 1.7857926924886312, "grad_norm": 0.7897111177444458, "learning_rate": 1.1884495505201944e-06, "loss": 0.1269, "step": 2847 }, { "epoch": 1.7864199466833934, "grad_norm": 0.8890313506126404, "learning_rate": 1.1815587505175351e-06, "loss": 0.1314, "step": 2848 }, { "epoch": 1.7870472008781557, "grad_norm": 0.9216718077659607, "learning_rate": 1.1746873774542578e-06, "loss": 0.1202, "step": 2849 }, { "epoch": 1.7876744550729184, "grad_norm": 1.0509883165359497, "learning_rate": 1.1678354384239255e-06, "loss": 0.1229, "step": 2850 }, { "epoch": 1.7883017092676807, "grad_norm": 0.8760930299758911, "learning_rate": 1.1610029405000335e-06, "loss": 0.1305, "step": 2851 }, { "epoch": 1.788928963462443, "grad_norm": 0.9807936549186707, "learning_rate": 1.1541898907360128e-06, "loss": 0.1388, "step": 2852 }, { "epoch": 1.7895562176572057, "grad_norm": 0.8667734265327454, "learning_rate": 1.1473962961652107e-06, "loss": 0.1146, "step": 2853 }, { "epoch": 1.790183471851968, "grad_norm": 1.0074526071548462, "learning_rate": 1.1406221638008952e-06, "loss": 0.1526, "step": 2854 }, { "epoch": 1.7908107260467303, "grad_norm": 0.736807107925415, "learning_rate": 1.1338675006362432e-06, "loss": 0.1202, "step": 2855 }, { "epoch": 1.7914379802414928, "grad_norm": 0.8689996004104614, "learning_rate": 1.127132313644328e-06, "loss": 0.1405, "step": 2856 }, { "epoch": 1.7920652344362553, "grad_norm": 0.9014766812324524, "learning_rate": 1.1204166097781278e-06, "loss": 0.1191, "step": 2857 }, { "epoch": 1.7926924886310176, "grad_norm": 0.9162267446517944, "learning_rate": 1.1137203959704901e-06, "loss": 0.113, "step": 2858 }, { "epoch": 1.79331974282578, "grad_norm": 0.8885189890861511, "learning_rate": 1.1070436791341566e-06, "loss": 0.115, "step": 2859 }, { "epoch": 1.7939469970205426, "grad_norm": 1.0271906852722168, "learning_rate": 1.100386466161738e-06, "loss": 0.1321, "step": 2860 }, { "epoch": 1.794574251215305, "grad_norm": 0.8138547539710999, "learning_rate": 1.0937487639257083e-06, "loss": 0.1115, "step": 2861 }, { "epoch": 1.7952015054100674, "grad_norm": 0.8766812086105347, "learning_rate": 1.0871305792784015e-06, "loss": 0.1207, "step": 2862 }, { "epoch": 1.79582875960483, "grad_norm": 0.7150856256484985, "learning_rate": 1.080531919052008e-06, "loss": 0.1062, "step": 2863 }, { "epoch": 1.7964560137995922, "grad_norm": 0.9094497561454773, "learning_rate": 1.0739527900585478e-06, "loss": 0.1241, "step": 2864 }, { "epoch": 1.7970832679943547, "grad_norm": 0.8997982740402222, "learning_rate": 1.06739319908989e-06, "loss": 0.1271, "step": 2865 }, { "epoch": 1.7977105221891172, "grad_norm": 0.8818061351776123, "learning_rate": 1.060853152917738e-06, "loss": 0.1137, "step": 2866 }, { "epoch": 1.7983377763838795, "grad_norm": 0.8932895660400391, "learning_rate": 1.054332658293602e-06, "loss": 0.1255, "step": 2867 }, { "epoch": 1.798965030578642, "grad_norm": 0.7263736128807068, "learning_rate": 1.0478317219488244e-06, "loss": 0.1058, "step": 2868 }, { "epoch": 1.7995922847734045, "grad_norm": 0.8277838230133057, "learning_rate": 1.0413503505945477e-06, "loss": 0.1432, "step": 2869 }, { "epoch": 1.8002195389681668, "grad_norm": 0.8268225193023682, "learning_rate": 1.0348885509217222e-06, "loss": 0.1216, "step": 2870 }, { "epoch": 1.8008467931629293, "grad_norm": 0.8564212322235107, "learning_rate": 1.0284463296010893e-06, "loss": 0.1059, "step": 2871 }, { "epoch": 1.8014740473576918, "grad_norm": 0.9777878522872925, "learning_rate": 1.0220236932831807e-06, "loss": 0.1444, "step": 2872 }, { "epoch": 1.802101301552454, "grad_norm": 0.9540963172912598, "learning_rate": 1.0156206485983123e-06, "loss": 0.1415, "step": 2873 }, { "epoch": 1.8027285557472166, "grad_norm": 0.9117012619972229, "learning_rate": 1.0092372021565678e-06, "loss": 0.1196, "step": 2874 }, { "epoch": 1.803355809941979, "grad_norm": 0.965158224105835, "learning_rate": 1.0028733605478115e-06, "loss": 0.1353, "step": 2875 }, { "epoch": 1.8039830641367414, "grad_norm": 0.9235615134239197, "learning_rate": 9.965291303416547e-07, "loss": 0.1256, "step": 2876 }, { "epoch": 1.8046103183315039, "grad_norm": 0.8588432669639587, "learning_rate": 9.902045180874699e-07, "loss": 0.1395, "step": 2877 }, { "epoch": 1.8052375725262664, "grad_norm": 0.8039693236351013, "learning_rate": 9.838995303143784e-07, "loss": 0.1267, "step": 2878 }, { "epoch": 1.8058648267210287, "grad_norm": 0.8312050700187683, "learning_rate": 9.776141735312428e-07, "loss": 0.0992, "step": 2879 }, { "epoch": 1.8064920809157912, "grad_norm": 0.9416694045066833, "learning_rate": 9.71348454226655e-07, "loss": 0.1207, "step": 2880 }, { "epoch": 1.8071193351105537, "grad_norm": 0.7612276673316956, "learning_rate": 9.651023788689406e-07, "loss": 0.1089, "step": 2881 }, { "epoch": 1.807746589305316, "grad_norm": 1.1019048690795898, "learning_rate": 9.58875953906142e-07, "loss": 0.1338, "step": 2882 }, { "epoch": 1.8083738435000785, "grad_norm": 0.7640385031700134, "learning_rate": 9.526691857660131e-07, "loss": 0.1003, "step": 2883 }, { "epoch": 1.809001097694841, "grad_norm": 0.9395763874053955, "learning_rate": 9.46482080856026e-07, "loss": 0.1448, "step": 2884 }, { "epoch": 1.8096283518896032, "grad_norm": 0.8155563473701477, "learning_rate": 9.403146455633405e-07, "loss": 0.1337, "step": 2885 }, { "epoch": 1.8102556060843655, "grad_norm": 0.8867181539535522, "learning_rate": 9.341668862548214e-07, "loss": 0.141, "step": 2886 }, { "epoch": 1.8108828602791283, "grad_norm": 0.7554093599319458, "learning_rate": 9.280388092770165e-07, "loss": 0.1071, "step": 2887 }, { "epoch": 1.8115101144738905, "grad_norm": 0.9735627770423889, "learning_rate": 9.219304209561541e-07, "loss": 0.1491, "step": 2888 }, { "epoch": 1.8121373686686528, "grad_norm": 0.9940028786659241, "learning_rate": 9.15841727598139e-07, "loss": 0.1551, "step": 2889 }, { "epoch": 1.8127646228634156, "grad_norm": 0.8162161707878113, "learning_rate": 9.097727354885432e-07, "loss": 0.1358, "step": 2890 }, { "epoch": 1.8133918770581778, "grad_norm": 0.7892768979072571, "learning_rate": 9.037234508926018e-07, "loss": 0.116, "step": 2891 }, { "epoch": 1.8140191312529401, "grad_norm": 0.8267726302146912, "learning_rate": 8.976938800552059e-07, "loss": 0.1225, "step": 2892 }, { "epoch": 1.8146463854477028, "grad_norm": 0.8965389728546143, "learning_rate": 8.916840292008921e-07, "loss": 0.1145, "step": 2893 }, { "epoch": 1.8152736396424651, "grad_norm": 1.0514887571334839, "learning_rate": 8.856939045338375e-07, "loss": 0.1642, "step": 2894 }, { "epoch": 1.8159008938372274, "grad_norm": 0.8502829074859619, "learning_rate": 8.797235122378623e-07, "loss": 0.1234, "step": 2895 }, { "epoch": 1.81652814803199, "grad_norm": 0.9405344724655151, "learning_rate": 8.737728584764116e-07, "loss": 0.1415, "step": 2896 }, { "epoch": 1.8171554022267524, "grad_norm": 0.867764949798584, "learning_rate": 8.67841949392556e-07, "loss": 0.1112, "step": 2897 }, { "epoch": 1.8177826564215147, "grad_norm": 0.9235755205154419, "learning_rate": 8.619307911089803e-07, "loss": 0.1453, "step": 2898 }, { "epoch": 1.8184099106162772, "grad_norm": 0.9043789505958557, "learning_rate": 8.560393897279851e-07, "loss": 0.1333, "step": 2899 }, { "epoch": 1.8190371648110397, "grad_norm": 0.7508955001831055, "learning_rate": 8.501677513314654e-07, "loss": 0.1076, "step": 2900 }, { "epoch": 1.819664419005802, "grad_norm": 0.8686279058456421, "learning_rate": 8.443158819809239e-07, "loss": 0.1297, "step": 2901 }, { "epoch": 1.8202916732005645, "grad_norm": 0.895136296749115, "learning_rate": 8.384837877174501e-07, "loss": 0.1353, "step": 2902 }, { "epoch": 1.820918927395327, "grad_norm": 0.8528735041618347, "learning_rate": 8.326714745617193e-07, "loss": 0.1298, "step": 2903 }, { "epoch": 1.8215461815900893, "grad_norm": 0.7849574089050293, "learning_rate": 8.268789485139916e-07, "loss": 0.1142, "step": 2904 }, { "epoch": 1.8221734357848518, "grad_norm": 0.9688342809677124, "learning_rate": 8.211062155540928e-07, "loss": 0.1088, "step": 2905 }, { "epoch": 1.8228006899796143, "grad_norm": 0.901724100112915, "learning_rate": 8.153532816414156e-07, "loss": 0.1269, "step": 2906 }, { "epoch": 1.8234279441743766, "grad_norm": 0.8248329758644104, "learning_rate": 8.096201527149183e-07, "loss": 0.1192, "step": 2907 }, { "epoch": 1.824055198369139, "grad_norm": 0.8081274628639221, "learning_rate": 8.039068346931134e-07, "loss": 0.1206, "step": 2908 }, { "epoch": 1.8246824525639016, "grad_norm": 0.88519686460495, "learning_rate": 7.982133334740583e-07, "loss": 0.1178, "step": 2909 }, { "epoch": 1.8253097067586639, "grad_norm": 0.9536784291267395, "learning_rate": 7.925396549353559e-07, "loss": 0.1379, "step": 2910 }, { "epoch": 1.8259369609534264, "grad_norm": 0.8239837288856506, "learning_rate": 7.868858049341432e-07, "loss": 0.1202, "step": 2911 }, { "epoch": 1.826564215148189, "grad_norm": 0.8505097031593323, "learning_rate": 7.812517893070892e-07, "loss": 0.124, "step": 2912 }, { "epoch": 1.8271914693429512, "grad_norm": 0.8420546650886536, "learning_rate": 7.756376138703859e-07, "loss": 0.134, "step": 2913 }, { "epoch": 1.8278187235377137, "grad_norm": 1.1077483892440796, "learning_rate": 7.700432844197436e-07, "loss": 0.1689, "step": 2914 }, { "epoch": 1.8284459777324762, "grad_norm": 0.8993534445762634, "learning_rate": 7.644688067303874e-07, "loss": 0.1239, "step": 2915 }, { "epoch": 1.8290732319272385, "grad_norm": 0.8536310195922852, "learning_rate": 7.589141865570493e-07, "loss": 0.1358, "step": 2916 }, { "epoch": 1.829700486122001, "grad_norm": 0.8410910964012146, "learning_rate": 7.533794296339536e-07, "loss": 0.1039, "step": 2917 }, { "epoch": 1.8303277403167635, "grad_norm": 0.916085422039032, "learning_rate": 7.478645416748298e-07, "loss": 0.1362, "step": 2918 }, { "epoch": 1.8309549945115258, "grad_norm": 0.8140565752983093, "learning_rate": 7.423695283728838e-07, "loss": 0.1255, "step": 2919 }, { "epoch": 1.8315822487062883, "grad_norm": 0.9491639137268066, "learning_rate": 7.368943954008179e-07, "loss": 0.1356, "step": 2920 }, { "epoch": 1.8322095029010508, "grad_norm": 0.8765885829925537, "learning_rate": 7.314391484108019e-07, "loss": 0.1404, "step": 2921 }, { "epoch": 1.832836757095813, "grad_norm": 0.9048905372619629, "learning_rate": 7.260037930344821e-07, "loss": 0.1324, "step": 2922 }, { "epoch": 1.8334640112905753, "grad_norm": 0.9376583099365234, "learning_rate": 7.205883348829612e-07, "loss": 0.1256, "step": 2923 }, { "epoch": 1.834091265485338, "grad_norm": 0.9348058104515076, "learning_rate": 7.151927795468139e-07, "loss": 0.1406, "step": 2924 }, { "epoch": 1.8347185196801004, "grad_norm": 0.8614555597305298, "learning_rate": 7.098171325960579e-07, "loss": 0.1179, "step": 2925 }, { "epoch": 1.8353457738748626, "grad_norm": 0.7639898061752319, "learning_rate": 7.04461399580163e-07, "loss": 0.1206, "step": 2926 }, { "epoch": 1.8359730280696254, "grad_norm": 0.7343938946723938, "learning_rate": 6.991255860280444e-07, "loss": 0.0943, "step": 2927 }, { "epoch": 1.8366002822643877, "grad_norm": 0.9248484969139099, "learning_rate": 6.938096974480468e-07, "loss": 0.1055, "step": 2928 }, { "epoch": 1.83722753645915, "grad_norm": 0.7805013656616211, "learning_rate": 6.885137393279495e-07, "loss": 0.1135, "step": 2929 }, { "epoch": 1.8378547906539127, "grad_norm": 0.9166563153266907, "learning_rate": 6.832377171349569e-07, "loss": 0.1192, "step": 2930 }, { "epoch": 1.838482044848675, "grad_norm": 1.0043163299560547, "learning_rate": 6.779816363156944e-07, "loss": 0.1586, "step": 2931 }, { "epoch": 1.8391092990434372, "grad_norm": 0.894653856754303, "learning_rate": 6.727455022961948e-07, "loss": 0.1547, "step": 2932 }, { "epoch": 1.8397365532381997, "grad_norm": 0.8928349614143372, "learning_rate": 6.675293204819078e-07, "loss": 0.1315, "step": 2933 }, { "epoch": 1.8403638074329622, "grad_norm": 0.883644700050354, "learning_rate": 6.623330962576835e-07, "loss": 0.1495, "step": 2934 }, { "epoch": 1.8409910616277245, "grad_norm": 0.9369957447052002, "learning_rate": 6.571568349877578e-07, "loss": 0.1354, "step": 2935 }, { "epoch": 1.841618315822487, "grad_norm": 0.8621444702148438, "learning_rate": 6.52000542015776e-07, "loss": 0.1201, "step": 2936 }, { "epoch": 1.8422455700172495, "grad_norm": 0.8870798349380493, "learning_rate": 6.468642226647582e-07, "loss": 0.151, "step": 2937 }, { "epoch": 1.8428728242120118, "grad_norm": 0.8756100535392761, "learning_rate": 6.417478822371071e-07, "loss": 0.1324, "step": 2938 }, { "epoch": 1.8435000784067743, "grad_norm": 0.8785619139671326, "learning_rate": 6.366515260146022e-07, "loss": 0.1262, "step": 2939 }, { "epoch": 1.8441273326015368, "grad_norm": 0.8960946798324585, "learning_rate": 6.315751592583908e-07, "loss": 0.1395, "step": 2940 }, { "epoch": 1.8447545867962991, "grad_norm": 0.9922255277633667, "learning_rate": 6.265187872089851e-07, "loss": 0.1409, "step": 2941 }, { "epoch": 1.8453818409910616, "grad_norm": 0.767423152923584, "learning_rate": 6.214824150862542e-07, "loss": 0.1037, "step": 2942 }, { "epoch": 1.8460090951858241, "grad_norm": 0.9177699685096741, "learning_rate": 6.164660480894235e-07, "loss": 0.1525, "step": 2943 }, { "epoch": 1.8466363493805864, "grad_norm": 0.9612424373626709, "learning_rate": 6.114696913970708e-07, "loss": 0.1493, "step": 2944 }, { "epoch": 1.847263603575349, "grad_norm": 0.6578683257102966, "learning_rate": 6.064933501671011e-07, "loss": 0.1368, "step": 2945 }, { "epoch": 1.8478908577701114, "grad_norm": 0.9469631910324097, "learning_rate": 6.015370295367739e-07, "loss": 0.1342, "step": 2946 }, { "epoch": 1.8485181119648737, "grad_norm": 0.9297729134559631, "learning_rate": 5.966007346226699e-07, "loss": 0.1198, "step": 2947 }, { "epoch": 1.8491453661596362, "grad_norm": 0.9222840666770935, "learning_rate": 5.916844705207014e-07, "loss": 0.1262, "step": 2948 }, { "epoch": 1.8497726203543987, "grad_norm": 0.8505162596702576, "learning_rate": 5.867882423061e-07, "loss": 0.1204, "step": 2949 }, { "epoch": 1.850399874549161, "grad_norm": 0.8304736018180847, "learning_rate": 5.819120550334157e-07, "loss": 0.12, "step": 2950 }, { "epoch": 1.8510271287439235, "grad_norm": 0.816307008266449, "learning_rate": 5.770559137365084e-07, "loss": 0.1164, "step": 2951 }, { "epoch": 1.851654382938686, "grad_norm": 0.8012158274650574, "learning_rate": 5.722198234285436e-07, "loss": 0.123, "step": 2952 }, { "epoch": 1.8522816371334483, "grad_norm": 0.7665334343910217, "learning_rate": 5.674037891019857e-07, "loss": 0.12, "step": 2953 }, { "epoch": 1.8529088913282108, "grad_norm": 0.87762451171875, "learning_rate": 5.626078157285996e-07, "loss": 0.116, "step": 2954 }, { "epoch": 1.8535361455229733, "grad_norm": 0.9224951863288879, "learning_rate": 5.578319082594341e-07, "loss": 0.1446, "step": 2955 }, { "epoch": 1.8541633997177356, "grad_norm": 0.8734859228134155, "learning_rate": 5.530760716248274e-07, "loss": 0.1242, "step": 2956 }, { "epoch": 1.854790653912498, "grad_norm": 0.9852175712585449, "learning_rate": 5.48340310734401e-07, "loss": 0.1414, "step": 2957 }, { "epoch": 1.8554179081072606, "grad_norm": 0.8510518670082092, "learning_rate": 5.43624630477042e-07, "loss": 0.1275, "step": 2958 }, { "epoch": 1.8560451623020229, "grad_norm": 0.8023516535758972, "learning_rate": 5.389290357209165e-07, "loss": 0.0947, "step": 2959 }, { "epoch": 1.8566724164967852, "grad_norm": 1.0212147235870361, "learning_rate": 5.342535313134511e-07, "loss": 0.1418, "step": 2960 }, { "epoch": 1.8572996706915479, "grad_norm": 0.8918972015380859, "learning_rate": 5.29598122081334e-07, "loss": 0.1208, "step": 2961 }, { "epoch": 1.8579269248863102, "grad_norm": 0.8254719376564026, "learning_rate": 5.249628128305073e-07, "loss": 0.1187, "step": 2962 }, { "epoch": 1.8585541790810725, "grad_norm": 0.8412727117538452, "learning_rate": 5.203476083461678e-07, "loss": 0.1292, "step": 2963 }, { "epoch": 1.8591814332758352, "grad_norm": 0.7970728874206543, "learning_rate": 5.157525133927465e-07, "loss": 0.1186, "step": 2964 }, { "epoch": 1.8598086874705975, "grad_norm": 0.8740736246109009, "learning_rate": 5.111775327139268e-07, "loss": 0.1425, "step": 2965 }, { "epoch": 1.8604359416653597, "grad_norm": 0.7795693278312683, "learning_rate": 5.066226710326216e-07, "loss": 0.1175, "step": 2966 }, { "epoch": 1.8610631958601225, "grad_norm": 0.770154595375061, "learning_rate": 5.02087933050972e-07, "loss": 0.1143, "step": 2967 }, { "epoch": 1.8616904500548848, "grad_norm": 0.9342362880706787, "learning_rate": 4.975733234503532e-07, "loss": 0.1372, "step": 2968 }, { "epoch": 1.862317704249647, "grad_norm": 0.9226587414741516, "learning_rate": 4.930788468913528e-07, "loss": 0.159, "step": 2969 }, { "epoch": 1.8629449584444095, "grad_norm": 0.8717913627624512, "learning_rate": 4.886045080137769e-07, "loss": 0.1161, "step": 2970 }, { "epoch": 1.863572212639172, "grad_norm": 0.8667058348655701, "learning_rate": 4.841503114366397e-07, "loss": 0.1362, "step": 2971 }, { "epoch": 1.8641994668339343, "grad_norm": 0.864131510257721, "learning_rate": 4.797162617581696e-07, "loss": 0.1247, "step": 2972 }, { "epoch": 1.8648267210286968, "grad_norm": 0.6426189541816711, "learning_rate": 4.7530236355579186e-07, "loss": 0.0818, "step": 2973 }, { "epoch": 1.8654539752234593, "grad_norm": 0.78322434425354, "learning_rate": 4.70908621386128e-07, "loss": 0.1062, "step": 2974 }, { "epoch": 1.8660812294182216, "grad_norm": 0.8293128609657288, "learning_rate": 4.665350397849922e-07, "loss": 0.1242, "step": 2975 }, { "epoch": 1.8667084836129841, "grad_norm": 0.8424291610717773, "learning_rate": 4.621816232673881e-07, "loss": 0.1294, "step": 2976 }, { "epoch": 1.8673357378077466, "grad_norm": 0.8814021348953247, "learning_rate": 4.578483763274988e-07, "loss": 0.1292, "step": 2977 }, { "epoch": 1.867962992002509, "grad_norm": 0.8511276245117188, "learning_rate": 4.5353530343869025e-07, "loss": 0.1195, "step": 2978 }, { "epoch": 1.8685902461972714, "grad_norm": 0.8883662819862366, "learning_rate": 4.4924240905349635e-07, "loss": 0.1269, "step": 2979 }, { "epoch": 1.869217500392034, "grad_norm": 0.899042546749115, "learning_rate": 4.449696976036277e-07, "loss": 0.1163, "step": 2980 }, { "epoch": 1.8698447545867962, "grad_norm": 0.888177216053009, "learning_rate": 4.4071717349995157e-07, "loss": 0.1195, "step": 2981 }, { "epoch": 1.8704720087815587, "grad_norm": 0.8384679555892944, "learning_rate": 4.364848411324962e-07, "loss": 0.1324, "step": 2982 }, { "epoch": 1.8710992629763212, "grad_norm": 0.9146760702133179, "learning_rate": 4.3227270487045114e-07, "loss": 0.121, "step": 2983 }, { "epoch": 1.8717265171710835, "grad_norm": 0.9058627486228943, "learning_rate": 4.2808076906215135e-07, "loss": 0.1149, "step": 2984 }, { "epoch": 1.872353771365846, "grad_norm": 0.8663889169692993, "learning_rate": 4.2390903803507965e-07, "loss": 0.133, "step": 2985 }, { "epoch": 1.8729810255606085, "grad_norm": 0.7832933664321899, "learning_rate": 4.1975751609586225e-07, "loss": 0.0965, "step": 2986 }, { "epoch": 1.8736082797553708, "grad_norm": 0.8505681157112122, "learning_rate": 4.1562620753025975e-07, "loss": 0.1207, "step": 2987 }, { "epoch": 1.8742355339501333, "grad_norm": 0.8499687910079956, "learning_rate": 4.115151166031672e-07, "loss": 0.1255, "step": 2988 }, { "epoch": 1.8748627881448958, "grad_norm": 0.8543071746826172, "learning_rate": 4.074242475586099e-07, "loss": 0.1346, "step": 2989 }, { "epoch": 1.875490042339658, "grad_norm": 0.8508833050727844, "learning_rate": 4.0335360461973395e-07, "loss": 0.115, "step": 2990 }, { "epoch": 1.8761172965344206, "grad_norm": 0.8167310953140259, "learning_rate": 3.993031919888113e-07, "loss": 0.1202, "step": 2991 }, { "epoch": 1.8767445507291831, "grad_norm": 0.8891164660453796, "learning_rate": 3.952730138472216e-07, "loss": 0.1417, "step": 2992 }, { "epoch": 1.8773718049239454, "grad_norm": 0.7716173529624939, "learning_rate": 3.912630743554613e-07, "loss": 0.1173, "step": 2993 }, { "epoch": 1.877999059118708, "grad_norm": 1.014568567276001, "learning_rate": 3.8727337765313013e-07, "loss": 0.143, "step": 2994 }, { "epoch": 1.8786263133134704, "grad_norm": 0.8445636630058289, "learning_rate": 3.833039278589379e-07, "loss": 0.1265, "step": 2995 }, { "epoch": 1.8792535675082327, "grad_norm": 0.7436814904212952, "learning_rate": 3.7935472907068005e-07, "loss": 0.1048, "step": 2996 }, { "epoch": 1.879880821702995, "grad_norm": 0.8433228135108948, "learning_rate": 3.7542578536525763e-07, "loss": 0.1154, "step": 2997 }, { "epoch": 1.8805080758977577, "grad_norm": 0.8996210098266602, "learning_rate": 3.715171007986551e-07, "loss": 0.133, "step": 2998 }, { "epoch": 1.88113533009252, "grad_norm": 0.83963543176651, "learning_rate": 3.676286794059514e-07, "loss": 0.1017, "step": 2999 }, { "epoch": 1.8817625842872823, "grad_norm": 0.823365330696106, "learning_rate": 3.637605252012932e-07, "loss": 0.1167, "step": 3000 }, { "epoch": 1.882389838482045, "grad_norm": 0.7450454235076904, "learning_rate": 3.5991264217791535e-07, "loss": 0.117, "step": 3001 }, { "epoch": 1.8830170926768073, "grad_norm": 0.8203656077384949, "learning_rate": 3.560850343081246e-07, "loss": 0.1154, "step": 3002 }, { "epoch": 1.8836443468715696, "grad_norm": 0.8934017419815063, "learning_rate": 3.5227770554329134e-07, "loss": 0.1252, "step": 3003 }, { "epoch": 1.8842716010663323, "grad_norm": 0.8821412324905396, "learning_rate": 3.484906598138604e-07, "loss": 0.1241, "step": 3004 }, { "epoch": 1.8848988552610946, "grad_norm": 0.8018887042999268, "learning_rate": 3.4472390102932904e-07, "loss": 0.1273, "step": 3005 }, { "epoch": 1.8855261094558569, "grad_norm": 1.057244896888733, "learning_rate": 3.409774330782534e-07, "loss": 0.1173, "step": 3006 }, { "epoch": 1.8861533636506194, "grad_norm": 0.8295458555221558, "learning_rate": 3.372512598282462e-07, "loss": 0.1212, "step": 3007 }, { "epoch": 1.8867806178453819, "grad_norm": 1.0016921758651733, "learning_rate": 3.3354538512596847e-07, "loss": 0.1569, "step": 3008 }, { "epoch": 1.8874078720401442, "grad_norm": 0.8008216023445129, "learning_rate": 3.298598127971242e-07, "loss": 0.1238, "step": 3009 }, { "epoch": 1.8880351262349067, "grad_norm": 0.8262543082237244, "learning_rate": 3.261945466464611e-07, "loss": 0.1132, "step": 3010 }, { "epoch": 1.8886623804296692, "grad_norm": 0.9967755675315857, "learning_rate": 3.225495904577569e-07, "loss": 0.1236, "step": 3011 }, { "epoch": 1.8892896346244314, "grad_norm": 0.8778575658798218, "learning_rate": 3.1892494799383276e-07, "loss": 0.1194, "step": 3012 }, { "epoch": 1.889916888819194, "grad_norm": 0.7905102968215942, "learning_rate": 3.1532062299653334e-07, "loss": 0.1027, "step": 3013 }, { "epoch": 1.8905441430139565, "grad_norm": 0.8325642347335815, "learning_rate": 3.1173661918673326e-07, "loss": 0.102, "step": 3014 }, { "epoch": 1.8911713972087187, "grad_norm": 0.9177772998809814, "learning_rate": 3.081729402643219e-07, "loss": 0.1741, "step": 3015 }, { "epoch": 1.8917986514034812, "grad_norm": 0.8657806515693665, "learning_rate": 3.046295899082119e-07, "loss": 0.1316, "step": 3016 }, { "epoch": 1.8924259055982438, "grad_norm": 0.834739089012146, "learning_rate": 3.0110657177632616e-07, "loss": 0.1177, "step": 3017 }, { "epoch": 1.893053159793006, "grad_norm": 0.7131056189537048, "learning_rate": 2.976038895056044e-07, "loss": 0.1066, "step": 3018 }, { "epoch": 1.8936804139877685, "grad_norm": 1.0009664297103882, "learning_rate": 2.9412154671198735e-07, "loss": 0.1511, "step": 3019 }, { "epoch": 1.894307668182531, "grad_norm": 0.7970536351203918, "learning_rate": 2.906595469904194e-07, "loss": 0.1213, "step": 3020 }, { "epoch": 1.8949349223772933, "grad_norm": 0.8229966163635254, "learning_rate": 2.8721789391484623e-07, "loss": 0.1176, "step": 3021 }, { "epoch": 1.8955621765720558, "grad_norm": 0.9330523610115051, "learning_rate": 2.837965910382057e-07, "loss": 0.13, "step": 3022 }, { "epoch": 1.8961894307668183, "grad_norm": 0.9967787265777588, "learning_rate": 2.8039564189243027e-07, "loss": 0.1529, "step": 3023 }, { "epoch": 1.8968166849615806, "grad_norm": 0.7935978174209595, "learning_rate": 2.7701504998843834e-07, "loss": 0.1405, "step": 3024 }, { "epoch": 1.8974439391563431, "grad_norm": 0.8954715132713318, "learning_rate": 2.7365481881613363e-07, "loss": 0.1269, "step": 3025 }, { "epoch": 1.8980711933511056, "grad_norm": 0.8750838041305542, "learning_rate": 2.703149518444037e-07, "loss": 0.1174, "step": 3026 }, { "epoch": 1.898698447545868, "grad_norm": 0.9578278660774231, "learning_rate": 2.6699545252110824e-07, "loss": 0.1426, "step": 3027 }, { "epoch": 1.8993257017406304, "grad_norm": 0.8749047517776489, "learning_rate": 2.636963242730861e-07, "loss": 0.1159, "step": 3028 }, { "epoch": 1.899952955935393, "grad_norm": 0.7772375345230103, "learning_rate": 2.604175705061396e-07, "loss": 0.1023, "step": 3029 }, { "epoch": 1.9005802101301552, "grad_norm": 0.9160566926002502, "learning_rate": 2.5715919460504336e-07, "loss": 0.1263, "step": 3030 }, { "epoch": 1.9012074643249177, "grad_norm": 0.7499338388442993, "learning_rate": 2.539211999335356e-07, "loss": 0.1117, "step": 3031 }, { "epoch": 1.9018347185196802, "grad_norm": 0.8376059532165527, "learning_rate": 2.5070358983430907e-07, "loss": 0.1137, "step": 3032 }, { "epoch": 1.9024619727144425, "grad_norm": 0.9330485463142395, "learning_rate": 2.475063676290179e-07, "loss": 0.1388, "step": 3033 }, { "epoch": 1.9030892269092048, "grad_norm": 0.926713228225708, "learning_rate": 2.443295366182663e-07, "loss": 0.12, "step": 3034 }, { "epoch": 1.9037164811039675, "grad_norm": 0.8909222483634949, "learning_rate": 2.411731000816109e-07, "loss": 0.1291, "step": 3035 }, { "epoch": 1.9043437352987298, "grad_norm": 0.9053573608398438, "learning_rate": 2.3803706127754732e-07, "loss": 0.1333, "step": 3036 }, { "epoch": 1.904970989493492, "grad_norm": 0.8453134894371033, "learning_rate": 2.3492142344352375e-07, "loss": 0.1175, "step": 3037 }, { "epoch": 1.9055982436882548, "grad_norm": 0.9282842874526978, "learning_rate": 2.3182618979592065e-07, "loss": 0.1426, "step": 3038 }, { "epoch": 1.906225497883017, "grad_norm": 0.7932876944541931, "learning_rate": 2.2875136353005755e-07, "loss": 0.113, "step": 3039 }, { "epoch": 1.9068527520777794, "grad_norm": 0.7805168032646179, "learning_rate": 2.2569694782018426e-07, "loss": 0.139, "step": 3040 }, { "epoch": 1.907480006272542, "grad_norm": 1.2216638326644897, "learning_rate": 2.226629458194851e-07, "loss": 0.1432, "step": 3041 }, { "epoch": 1.9081072604673044, "grad_norm": 0.9536857604980469, "learning_rate": 2.1964936066006138e-07, "loss": 0.1372, "step": 3042 }, { "epoch": 1.9087345146620667, "grad_norm": 0.8344295024871826, "learning_rate": 2.166561954529489e-07, "loss": 0.108, "step": 3043 }, { "epoch": 1.9093617688568292, "grad_norm": 0.7920276522636414, "learning_rate": 2.1368345328809382e-07, "loss": 0.1274, "step": 3044 }, { "epoch": 1.9099890230515917, "grad_norm": 0.8191457986831665, "learning_rate": 2.1073113723436566e-07, "loss": 0.1308, "step": 3045 }, { "epoch": 1.910616277246354, "grad_norm": 0.7653352618217468, "learning_rate": 2.0779925033953985e-07, "loss": 0.1087, "step": 3046 }, { "epoch": 1.9112435314411165, "grad_norm": 0.8056324124336243, "learning_rate": 2.048877956303108e-07, "loss": 0.1159, "step": 3047 }, { "epoch": 1.911870785635879, "grad_norm": 1.0009253025054932, "learning_rate": 2.0199677611227209e-07, "loss": 0.1403, "step": 3048 }, { "epoch": 1.9124980398306413, "grad_norm": 1.1077009439468384, "learning_rate": 1.9912619476992745e-07, "loss": 0.1588, "step": 3049 }, { "epoch": 1.9131252940254038, "grad_norm": 0.7939375638961792, "learning_rate": 1.9627605456667975e-07, "loss": 0.1107, "step": 3050 }, { "epoch": 1.9137525482201663, "grad_norm": 0.9140084981918335, "learning_rate": 1.934463584448265e-07, "loss": 0.1457, "step": 3051 }, { "epoch": 1.9143798024149286, "grad_norm": 0.8728494644165039, "learning_rate": 1.9063710932556435e-07, "loss": 0.1368, "step": 3052 }, { "epoch": 1.915007056609691, "grad_norm": 0.9430707693099976, "learning_rate": 1.8784831010897787e-07, "loss": 0.1518, "step": 3053 }, { "epoch": 1.9156343108044536, "grad_norm": 0.8481541872024536, "learning_rate": 1.8507996367404413e-07, "loss": 0.1127, "step": 3054 }, { "epoch": 1.9162615649992158, "grad_norm": 0.9993071556091309, "learning_rate": 1.8233207287862597e-07, "loss": 0.1533, "step": 3055 }, { "epoch": 1.9168888191939784, "grad_norm": 0.9163568019866943, "learning_rate": 1.7960464055946536e-07, "loss": 0.104, "step": 3056 }, { "epoch": 1.9175160733887409, "grad_norm": 1.0502631664276123, "learning_rate": 1.7689766953219e-07, "loss": 0.135, "step": 3057 }, { "epoch": 1.9181433275835031, "grad_norm": 0.837598443031311, "learning_rate": 1.742111625912979e-07, "loss": 0.1222, "step": 3058 }, { "epoch": 1.9187705817782656, "grad_norm": 0.865238606929779, "learning_rate": 1.7154512251016608e-07, "loss": 0.1226, "step": 3059 }, { "epoch": 1.9193978359730282, "grad_norm": 0.9549113512039185, "learning_rate": 1.688995520410397e-07, "loss": 0.1448, "step": 3060 }, { "epoch": 1.9200250901677904, "grad_norm": 0.9464938044548035, "learning_rate": 1.662744539150385e-07, "loss": 0.1243, "step": 3061 }, { "epoch": 1.920652344362553, "grad_norm": 0.9034533500671387, "learning_rate": 1.6366983084214139e-07, "loss": 0.147, "step": 3062 }, { "epoch": 1.9212795985573154, "grad_norm": 0.9751142263412476, "learning_rate": 1.6108568551119308e-07, "loss": 0.1495, "step": 3063 }, { "epoch": 1.9219068527520777, "grad_norm": 0.880317747592926, "learning_rate": 1.585220205898952e-07, "loss": 0.1256, "step": 3064 }, { "epoch": 1.9225341069468402, "grad_norm": 0.8028441667556763, "learning_rate": 1.559788387248129e-07, "loss": 0.1176, "step": 3065 }, { "epoch": 1.9231613611416027, "grad_norm": 0.8881433010101318, "learning_rate": 1.5345614254135944e-07, "loss": 0.1104, "step": 3066 }, { "epoch": 1.923788615336365, "grad_norm": 0.8313564658164978, "learning_rate": 1.5095393464380493e-07, "loss": 0.138, "step": 3067 }, { "epoch": 1.9244158695311275, "grad_norm": 0.9249715209007263, "learning_rate": 1.484722176152653e-07, "loss": 0.1298, "step": 3068 }, { "epoch": 1.92504312372589, "grad_norm": 1.0869942903518677, "learning_rate": 1.4601099401770458e-07, "loss": 0.115, "step": 3069 }, { "epoch": 1.9256703779206523, "grad_norm": 0.8350560069084167, "learning_rate": 1.435702663919325e-07, "loss": 0.1181, "step": 3070 }, { "epoch": 1.9262976321154148, "grad_norm": 0.8322076797485352, "learning_rate": 1.411500372575958e-07, "loss": 0.1208, "step": 3071 }, { "epoch": 1.9269248863101773, "grad_norm": 0.732231855392456, "learning_rate": 1.3875030911318256e-07, "loss": 0.1001, "step": 3072 }, { "epoch": 1.9275521405049396, "grad_norm": 0.8648920059204102, "learning_rate": 1.3637108443601776e-07, "loss": 0.1344, "step": 3073 }, { "epoch": 1.928179394699702, "grad_norm": 0.9535402059555054, "learning_rate": 1.3401236568225674e-07, "loss": 0.1525, "step": 3074 }, { "epoch": 1.9288066488944646, "grad_norm": 0.8213782906532288, "learning_rate": 1.3167415528689165e-07, "loss": 0.1296, "step": 3075 }, { "epoch": 1.929433903089227, "grad_norm": 0.8394836187362671, "learning_rate": 1.293564556637339e-07, "loss": 0.1157, "step": 3076 }, { "epoch": 1.9300611572839892, "grad_norm": 0.9124313592910767, "learning_rate": 1.2705926920543177e-07, "loss": 0.1365, "step": 3077 }, { "epoch": 1.930688411478752, "grad_norm": 0.9417162537574768, "learning_rate": 1.247825982834483e-07, "loss": 0.1227, "step": 3078 }, { "epoch": 1.9313156656735142, "grad_norm": 1.0487611293792725, "learning_rate": 1.2252644524807233e-07, "loss": 0.136, "step": 3079 }, { "epoch": 1.9319429198682765, "grad_norm": 0.7279402017593384, "learning_rate": 1.2029081242840968e-07, "loss": 0.0945, "step": 3080 }, { "epoch": 1.932570174063039, "grad_norm": 0.7787689566612244, "learning_rate": 1.1807570213238307e-07, "loss": 0.1133, "step": 3081 }, { "epoch": 1.9331974282578015, "grad_norm": 0.8247366547584534, "learning_rate": 1.1588111664672996e-07, "loss": 0.0978, "step": 3082 }, { "epoch": 1.9338246824525638, "grad_norm": 0.9361110329627991, "learning_rate": 1.137070582369959e-07, "loss": 0.1098, "step": 3083 }, { "epoch": 1.9344519366473263, "grad_norm": 0.896119236946106, "learning_rate": 1.1155352914753892e-07, "loss": 0.1405, "step": 3084 }, { "epoch": 1.9350791908420888, "grad_norm": 0.973166823387146, "learning_rate": 1.0942053160152289e-07, "loss": 0.1345, "step": 3085 }, { "epoch": 1.935706445036851, "grad_norm": 0.8656417727470398, "learning_rate": 1.0730806780091751e-07, "loss": 0.1383, "step": 3086 }, { "epoch": 1.9363336992316136, "grad_norm": 0.8609120845794678, "learning_rate": 1.0521613992649171e-07, "loss": 0.1187, "step": 3087 }, { "epoch": 1.936960953426376, "grad_norm": 0.8523600697517395, "learning_rate": 1.0314475013781578e-07, "loss": 0.1143, "step": 3088 }, { "epoch": 1.9375882076211384, "grad_norm": 0.8735218048095703, "learning_rate": 1.010939005732614e-07, "loss": 0.1231, "step": 3089 }, { "epoch": 1.9382154618159009, "grad_norm": 0.8490827679634094, "learning_rate": 9.906359334999061e-08, "loss": 0.1266, "step": 3090 }, { "epoch": 1.9388427160106634, "grad_norm": 0.8786249160766602, "learning_rate": 9.705383056396012e-08, "loss": 0.124, "step": 3091 }, { "epoch": 1.9394699702054257, "grad_norm": 0.9569567441940308, "learning_rate": 9.506461428992142e-08, "loss": 0.1358, "step": 3092 }, { "epoch": 1.9400972244001882, "grad_norm": 0.7432516813278198, "learning_rate": 9.309594658141185e-08, "loss": 0.1143, "step": 3093 }, { "epoch": 1.9407244785949507, "grad_norm": 0.9143378138542175, "learning_rate": 9.114782947075462e-08, "loss": 0.1389, "step": 3094 }, { "epoch": 1.941351732789713, "grad_norm": 0.8691182732582092, "learning_rate": 8.922026496905878e-08, "loss": 0.1154, "step": 3095 }, { "epoch": 1.9419789869844755, "grad_norm": 0.9375434517860413, "learning_rate": 8.731325506622145e-08, "loss": 0.1266, "step": 3096 }, { "epoch": 1.942606241179238, "grad_norm": 0.7888829112052917, "learning_rate": 8.542680173091233e-08, "loss": 0.1137, "step": 3097 }, { "epoch": 1.9432334953740003, "grad_norm": 0.8019591569900513, "learning_rate": 8.356090691058693e-08, "loss": 0.1047, "step": 3098 }, { "epoch": 1.9438607495687628, "grad_norm": 1.0162184238433838, "learning_rate": 8.171557253146889e-08, "loss": 0.153, "step": 3099 }, { "epoch": 1.9444880037635253, "grad_norm": 0.8386593461036682, "learning_rate": 7.989080049856546e-08, "loss": 0.1238, "step": 3100 }, { "epoch": 1.9451152579582875, "grad_norm": 0.9435861110687256, "learning_rate": 7.808659269565422e-08, "loss": 0.1662, "step": 3101 }, { "epoch": 1.94574251215305, "grad_norm": 0.7505217790603638, "learning_rate": 7.630295098527862e-08, "loss": 0.0964, "step": 3102 }, { "epoch": 1.9463697663478126, "grad_norm": 0.9808968901634216, "learning_rate": 7.45398772087591e-08, "loss": 0.153, "step": 3103 }, { "epoch": 1.9469970205425748, "grad_norm": 0.8741431832313538, "learning_rate": 7.27973731861753e-08, "loss": 0.1229, "step": 3104 }, { "epoch": 1.9476242747373373, "grad_norm": 0.9125285148620605, "learning_rate": 7.107544071637718e-08, "loss": 0.1276, "step": 3105 }, { "epoch": 1.9482515289320999, "grad_norm": 0.9317863583564758, "learning_rate": 6.937408157697612e-08, "loss": 0.1529, "step": 3106 }, { "epoch": 1.9488787831268621, "grad_norm": 0.9088873267173767, "learning_rate": 6.769329752434272e-08, "loss": 0.151, "step": 3107 }, { "epoch": 1.9495060373216246, "grad_norm": 0.9075539708137512, "learning_rate": 6.603309029361349e-08, "loss": 0.1226, "step": 3108 }, { "epoch": 1.9501332915163871, "grad_norm": 0.9500503540039062, "learning_rate": 6.439346159867743e-08, "loss": 0.1379, "step": 3109 }, { "epoch": 1.9507605457111494, "grad_norm": 0.9206661581993103, "learning_rate": 6.277441313218058e-08, "loss": 0.1397, "step": 3110 }, { "epoch": 1.9513877999059117, "grad_norm": 0.8864201307296753, "learning_rate": 6.11759465655215e-08, "loss": 0.111, "step": 3111 }, { "epoch": 1.9520150541006744, "grad_norm": 0.7336342334747314, "learning_rate": 5.9598063548855776e-08, "loss": 0.1047, "step": 3112 }, { "epoch": 1.9526423082954367, "grad_norm": 0.8321946859359741, "learning_rate": 5.80407657110893e-08, "loss": 0.1122, "step": 3113 }, { "epoch": 1.953269562490199, "grad_norm": 0.8356947302818298, "learning_rate": 5.650405465987163e-08, "loss": 0.1242, "step": 3114 }, { "epoch": 1.9538968166849617, "grad_norm": 0.838462233543396, "learning_rate": 5.4987931981607124e-08, "loss": 0.1213, "step": 3115 }, { "epoch": 1.954524070879724, "grad_norm": 1.0194828510284424, "learning_rate": 5.349239924143934e-08, "loss": 0.167, "step": 3116 }, { "epoch": 1.9551513250744863, "grad_norm": 0.7172075510025024, "learning_rate": 5.2017457983259964e-08, "loss": 0.1136, "step": 3117 }, { "epoch": 1.955778579269249, "grad_norm": 0.8290290236473083, "learning_rate": 5.0563109729706575e-08, "loss": 0.1233, "step": 3118 }, { "epoch": 1.9564058334640113, "grad_norm": 0.8827482461929321, "learning_rate": 4.9129355982149294e-08, "loss": 0.127, "step": 3119 }, { "epoch": 1.9570330876587736, "grad_norm": 0.8358291983604431, "learning_rate": 4.771619822070639e-08, "loss": 0.128, "step": 3120 }, { "epoch": 1.957660341853536, "grad_norm": 0.8537582159042358, "learning_rate": 4.632363790422867e-08, "loss": 0.1241, "step": 3121 }, { "epoch": 1.9582875960482986, "grad_norm": 0.8248356580734253, "learning_rate": 4.495167647030396e-08, "loss": 0.1158, "step": 3122 }, { "epoch": 1.958914850243061, "grad_norm": 0.7904495000839233, "learning_rate": 4.36003153352571e-08, "loss": 0.1081, "step": 3123 }, { "epoch": 1.9595421044378234, "grad_norm": 0.8422828316688538, "learning_rate": 4.226955589414772e-08, "loss": 0.1284, "step": 3124 }, { "epoch": 1.960169358632586, "grad_norm": 0.8301834464073181, "learning_rate": 4.095939952076355e-08, "loss": 0.1059, "step": 3125 }, { "epoch": 1.9607966128273482, "grad_norm": 0.8255082368850708, "learning_rate": 3.966984756762493e-08, "loss": 0.1056, "step": 3126 }, { "epoch": 1.9614238670221107, "grad_norm": 0.8115298748016357, "learning_rate": 3.8400901365982514e-08, "loss": 0.1245, "step": 3127 }, { "epoch": 1.9620511212168732, "grad_norm": 0.7331147193908691, "learning_rate": 3.7152562225815094e-08, "loss": 0.0873, "step": 3128 }, { "epoch": 1.9626783754116355, "grad_norm": 0.8912408947944641, "learning_rate": 3.592483143582515e-08, "loss": 0.1511, "step": 3129 }, { "epoch": 1.963305629606398, "grad_norm": 0.8669161796569824, "learning_rate": 3.471771026344328e-08, "loss": 0.1191, "step": 3130 }, { "epoch": 1.9639328838011605, "grad_norm": 0.9340421557426453, "learning_rate": 3.353119995482379e-08, "loss": 0.1274, "step": 3131 }, { "epoch": 1.9645601379959228, "grad_norm": 0.9183062314987183, "learning_rate": 3.2365301734842427e-08, "loss": 0.1455, "step": 3132 }, { "epoch": 1.9651873921906853, "grad_norm": 0.9617796540260315, "learning_rate": 3.122001680709863e-08, "loss": 0.1298, "step": 3133 }, { "epoch": 1.9658146463854478, "grad_norm": 0.8618515133857727, "learning_rate": 3.0095346353906654e-08, "loss": 0.1308, "step": 3134 }, { "epoch": 1.96644190058021, "grad_norm": 0.944823682308197, "learning_rate": 2.899129153630664e-08, "loss": 0.1459, "step": 3135 }, { "epoch": 1.9670691547749726, "grad_norm": 0.9516502022743225, "learning_rate": 2.790785349405134e-08, "loss": 0.1519, "step": 3136 }, { "epoch": 1.967696408969735, "grad_norm": 0.8677295446395874, "learning_rate": 2.6845033345612726e-08, "loss": 0.1106, "step": 3137 }, { "epoch": 1.9683236631644974, "grad_norm": 0.7925218939781189, "learning_rate": 2.5802832188177583e-08, "loss": 0.1048, "step": 3138 }, { "epoch": 1.9689509173592599, "grad_norm": 0.9727739691734314, "learning_rate": 2.4781251097647506e-08, "loss": 0.1396, "step": 3139 }, { "epoch": 1.9695781715540224, "grad_norm": 0.9012995362281799, "learning_rate": 2.378029112863445e-08, "loss": 0.1292, "step": 3140 }, { "epoch": 1.9702054257487847, "grad_norm": 0.8515464663505554, "learning_rate": 2.279995331446738e-08, "loss": 0.1184, "step": 3141 }, { "epoch": 1.9708326799435472, "grad_norm": 0.8222053050994873, "learning_rate": 2.1840238667181213e-08, "loss": 0.1283, "step": 3142 }, { "epoch": 1.9714599341383097, "grad_norm": 0.948520302772522, "learning_rate": 2.0901148177525642e-08, "loss": 0.1469, "step": 3143 }, { "epoch": 1.972087188333072, "grad_norm": 0.8818579316139221, "learning_rate": 1.998268281495408e-08, "loss": 0.1415, "step": 3144 }, { "epoch": 1.9727144425278345, "grad_norm": 0.8317146897315979, "learning_rate": 1.9084843527632514e-08, "loss": 0.1138, "step": 3145 }, { "epoch": 1.973341696722597, "grad_norm": 0.9792901873588562, "learning_rate": 1.8207631242430635e-08, "loss": 0.12, "step": 3146 }, { "epoch": 1.9739689509173592, "grad_norm": 0.9085427522659302, "learning_rate": 1.735104686492628e-08, "loss": 0.1239, "step": 3147 }, { "epoch": 1.9745962051121215, "grad_norm": 0.8495365381240845, "learning_rate": 1.6515091279403206e-08, "loss": 0.1564, "step": 3148 }, { "epoch": 1.9752234593068843, "grad_norm": 0.931058943271637, "learning_rate": 1.569976534884443e-08, "loss": 0.1342, "step": 3149 }, { "epoch": 1.9758507135016465, "grad_norm": 0.7746802568435669, "learning_rate": 1.4905069914943336e-08, "loss": 0.1118, "step": 3150 }, { "epoch": 1.9764779676964088, "grad_norm": 0.7811020612716675, "learning_rate": 1.4131005798090347e-08, "loss": 0.1278, "step": 3151 }, { "epoch": 1.9771052218911715, "grad_norm": 0.9032891392707825, "learning_rate": 1.3377573797377363e-08, "loss": 0.1304, "step": 3152 }, { "epoch": 1.9777324760859338, "grad_norm": 0.8794789910316467, "learning_rate": 1.2644774690602214e-08, "loss": 0.152, "step": 3153 }, { "epoch": 1.9783597302806961, "grad_norm": 0.8922184705734253, "learning_rate": 1.193260923425532e-08, "loss": 0.129, "step": 3154 }, { "epoch": 1.9789869844754588, "grad_norm": 0.84577476978302, "learning_rate": 1.1241078163533038e-08, "loss": 0.1283, "step": 3155 }, { "epoch": 1.9796142386702211, "grad_norm": 0.8250657916069031, "learning_rate": 1.0570182192326528e-08, "loss": 0.1193, "step": 3156 }, { "epoch": 1.9802414928649834, "grad_norm": 0.9372608661651611, "learning_rate": 9.919922013226225e-09, "loss": 0.1617, "step": 3157 }, { "epoch": 1.980868747059746, "grad_norm": 0.8537949919700623, "learning_rate": 9.290298297515154e-09, "loss": 0.1299, "step": 3158 }, { "epoch": 1.9814960012545084, "grad_norm": 0.8301073908805847, "learning_rate": 8.68131169518005e-09, "loss": 0.1278, "step": 3159 }, { "epoch": 1.9821232554492707, "grad_norm": 0.940880537033081, "learning_rate": 8.092962834898022e-09, "loss": 0.1358, "step": 3160 }, { "epoch": 1.9827505096440332, "grad_norm": 0.8299143314361572, "learning_rate": 7.525252324041e-09, "loss": 0.112, "step": 3161 }, { "epoch": 1.9833777638387957, "grad_norm": 0.9687228798866272, "learning_rate": 6.978180748677954e-09, "loss": 0.1346, "step": 3162 }, { "epoch": 1.984005018033558, "grad_norm": 0.8207062482833862, "learning_rate": 6.4517486735682366e-09, "loss": 0.1169, "step": 3163 }, { "epoch": 1.9846322722283205, "grad_norm": 0.9281582236289978, "learning_rate": 5.945956642168238e-09, "loss": 0.1194, "step": 3164 }, { "epoch": 1.985259526423083, "grad_norm": 0.8668698668479919, "learning_rate": 5.460805176624729e-09, "loss": 0.1248, "step": 3165 }, { "epoch": 1.9858867806178453, "grad_norm": 0.9078335165977478, "learning_rate": 4.9962947777770775e-09, "loss": 0.1331, "step": 3166 }, { "epoch": 1.9865140348126078, "grad_norm": 0.911410391330719, "learning_rate": 4.552425925152815e-09, "loss": 0.1121, "step": 3167 }, { "epoch": 1.9871412890073703, "grad_norm": 1.0109318494796753, "learning_rate": 4.129199076976509e-09, "loss": 0.1333, "step": 3168 }, { "epoch": 1.9877685432021326, "grad_norm": 0.7333261966705322, "learning_rate": 3.726614670158668e-09, "loss": 0.1175, "step": 3169 }, { "epoch": 1.988395797396895, "grad_norm": 0.8047865033149719, "learning_rate": 3.3446731203023995e-09, "loss": 0.1159, "step": 3170 }, { "epoch": 1.9890230515916576, "grad_norm": 0.7697649598121643, "learning_rate": 2.9833748216967494e-09, "loss": 0.1023, "step": 3171 }, { "epoch": 1.9896503057864199, "grad_norm": 0.8524652719497681, "learning_rate": 2.642720147325584e-09, "loss": 0.125, "step": 3172 }, { "epoch": 1.9902775599811824, "grad_norm": 0.9473152756690979, "learning_rate": 2.3227094488587066e-09, "loss": 0.1342, "step": 3173 }, { "epoch": 1.990904814175945, "grad_norm": 0.9332677125930786, "learning_rate": 2.023343056651861e-09, "loss": 0.1433, "step": 3174 }, { "epoch": 1.9915320683707072, "grad_norm": 0.8399542570114136, "learning_rate": 1.744621279753389e-09, "loss": 0.1079, "step": 3175 }, { "epoch": 1.9921593225654697, "grad_norm": 0.9552322626113892, "learning_rate": 1.4865444058997924e-09, "loss": 0.1257, "step": 3176 }, { "epoch": 1.9927865767602322, "grad_norm": 1.0563658475875854, "learning_rate": 1.2491127015068494e-09, "loss": 0.1199, "step": 3177 }, { "epoch": 1.9934138309549945, "grad_norm": 0.8615870475769043, "learning_rate": 1.0323264116895991e-09, "loss": 0.1241, "step": 3178 }, { "epoch": 1.994041085149757, "grad_norm": 0.9080129265785217, "learning_rate": 8.36185760240138e-10, "loss": 0.1463, "step": 3179 }, { "epoch": 1.9946683393445195, "grad_norm": 0.9104377627372742, "learning_rate": 6.606909496453817e-10, "loss": 0.1264, "step": 3180 }, { "epoch": 1.9952955935392818, "grad_norm": 0.9355060458183289, "learning_rate": 5.058421610715237e-10, "loss": 0.1359, "step": 3181 }, { "epoch": 1.9959228477340443, "grad_norm": 0.8101538419723511, "learning_rate": 3.7163955437291567e-10, "loss": 0.1157, "step": 3182 }, { "epoch": 1.9965501019288068, "grad_norm": 0.9194393754005432, "learning_rate": 2.5808326809650954e-10, "loss": 0.1287, "step": 3183 }, { "epoch": 1.997177356123569, "grad_norm": 0.7908146977424622, "learning_rate": 1.6517341946853394e-10, "loss": 0.1039, "step": 3184 }, { "epoch": 1.9978046103183313, "grad_norm": 0.9933124780654907, "learning_rate": 9.2910104401156e-11, "loss": 0.1204, "step": 3185 }, { "epoch": 1.998431864513094, "grad_norm": 0.7302822470664978, "learning_rate": 4.1293397494701624e-11, "loss": 0.1045, "step": 3186 }, { "epoch": 1.9990591187078564, "grad_norm": 0.8666970133781433, "learning_rate": 1.0323352037655555e-11, "loss": 0.1419, "step": 3187 }, { "epoch": 1.9996863729026186, "grad_norm": 0.8657733201980591, "learning_rate": 0.0, "loss": 0.1267, "step": 3188 } ], "logging_steps": 1.0, "max_steps": 3188, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.579466809106206e+22, "train_batch_size": 4, "trial_name": null, "trial_params": null }