{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.3503212552996197, "eval_steps": 500, "global_step": 8015, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.3708204029896414e-05, "grad_norm": 23.75, "learning_rate": 1e-05, "loss": 5.1721, "step": 1 }, { "epoch": 8.741640805979283e-05, "grad_norm": 25.625, "learning_rate": 2e-05, "loss": 5.2813, "step": 2 }, { "epoch": 0.00013112461208968922, "grad_norm": 26.75, "learning_rate": 3e-05, "loss": 5.1543, "step": 3 }, { "epoch": 0.00017483281611958566, "grad_norm": 20.0, "learning_rate": 4e-05, "loss": 5.2777, "step": 4 }, { "epoch": 0.00021854102014948207, "grad_norm": 15.3125, "learning_rate": 5e-05, "loss": 4.738, "step": 5 }, { "epoch": 0.00026224922417937845, "grad_norm": 15.875, "learning_rate": 6e-05, "loss": 4.0695, "step": 6 }, { "epoch": 0.0003059574282092749, "grad_norm": 13.625, "learning_rate": 7e-05, "loss": 4.3845, "step": 7 }, { "epoch": 0.0003496656322391713, "grad_norm": 12.3125, "learning_rate": 8e-05, "loss": 4.2192, "step": 8 }, { "epoch": 0.0003933738362690677, "grad_norm": 8.5625, "learning_rate": 9e-05, "loss": 3.635, "step": 9 }, { "epoch": 0.00043708204029896413, "grad_norm": 8.875, "learning_rate": 0.0001, "loss": 3.798, "step": 10 }, { "epoch": 0.0004807902443288605, "grad_norm": 8.25, "learning_rate": 9.999999952821362e-05, "loss": 3.4472, "step": 11 }, { "epoch": 0.0005244984483587569, "grad_norm": 9.4375, "learning_rate": 9.999999811285448e-05, "loss": 3.7404, "step": 12 }, { "epoch": 0.0005682066523886533, "grad_norm": 8.1875, "learning_rate": 9.999999575392258e-05, "loss": 3.5688, "step": 13 }, { "epoch": 0.0006119148564185498, "grad_norm": 6.46875, "learning_rate": 9.9999992451418e-05, "loss": 3.6849, "step": 14 }, { "epoch": 0.0006556230604484462, "grad_norm": 7.0, "learning_rate": 9.99999882053408e-05, "loss": 3.6462, "step": 15 }, { "epoch": 0.0006993312644783426, "grad_norm": 5.40625, "learning_rate": 9.999998301569104e-05, "loss": 3.1855, "step": 16 }, { "epoch": 0.000743039468508239, "grad_norm": 5.75, "learning_rate": 9.999997688246885e-05, "loss": 3.388, "step": 17 }, { "epoch": 0.0007867476725381354, "grad_norm": 5.5625, "learning_rate": 9.999996980567431e-05, "loss": 3.2684, "step": 18 }, { "epoch": 0.0008304558765680318, "grad_norm": 5.6875, "learning_rate": 9.999996178530757e-05, "loss": 2.8991, "step": 19 }, { "epoch": 0.0008741640805979283, "grad_norm": 6.3125, "learning_rate": 9.999995282136878e-05, "loss": 3.6678, "step": 20 }, { "epoch": 0.0009178722846278247, "grad_norm": 6.4375, "learning_rate": 9.99999429138581e-05, "loss": 3.3619, "step": 21 }, { "epoch": 0.000961580488657721, "grad_norm": 7.34375, "learning_rate": 9.999993206277573e-05, "loss": 3.2241, "step": 22 }, { "epoch": 0.0010052886926876176, "grad_norm": 38.0, "learning_rate": 9.999992026812187e-05, "loss": 3.4659, "step": 23 }, { "epoch": 0.0010489968967175138, "grad_norm": 7.03125, "learning_rate": 9.999990752989675e-05, "loss": 3.2605, "step": 24 }, { "epoch": 0.0010927051007474102, "grad_norm": 14.625, "learning_rate": 9.99998938481006e-05, "loss": 3.3462, "step": 25 }, { "epoch": 0.0011364133047773067, "grad_norm": 6.03125, "learning_rate": 9.999987922273368e-05, "loss": 3.2599, "step": 26 }, { "epoch": 0.001180121508807203, "grad_norm": 6.4375, "learning_rate": 9.999986365379628e-05, "loss": 3.0645, "step": 27 }, { "epoch": 0.0012238297128370995, "grad_norm": 5.75, "learning_rate": 9.999984714128867e-05, "loss": 3.1231, "step": 28 }, { "epoch": 0.001267537916866996, "grad_norm": 5.4375, "learning_rate": 9.999982968521116e-05, "loss": 2.9634, "step": 29 }, { "epoch": 0.0013112461208968924, "grad_norm": 4.8125, "learning_rate": 9.999981128556411e-05, "loss": 2.902, "step": 30 }, { "epoch": 0.0013549543249267888, "grad_norm": 21.0, "learning_rate": 9.999979194234786e-05, "loss": 3.1391, "step": 31 }, { "epoch": 0.0013986625289566853, "grad_norm": 5.40625, "learning_rate": 9.999977165556273e-05, "loss": 2.9417, "step": 32 }, { "epoch": 0.0014423707329865815, "grad_norm": 6.5, "learning_rate": 9.999975042520917e-05, "loss": 2.8276, "step": 33 }, { "epoch": 0.001486078937016478, "grad_norm": 6.4375, "learning_rate": 9.999972825128754e-05, "loss": 3.7324, "step": 34 }, { "epoch": 0.0015297871410463744, "grad_norm": 6.28125, "learning_rate": 9.999970513379826e-05, "loss": 3.2154, "step": 35 }, { "epoch": 0.0015734953450762708, "grad_norm": 5.8125, "learning_rate": 9.999968107274177e-05, "loss": 3.2734, "step": 36 }, { "epoch": 0.0016172035491061672, "grad_norm": 4.65625, "learning_rate": 9.999965606811854e-05, "loss": 2.8065, "step": 37 }, { "epoch": 0.0016609117531360637, "grad_norm": 5.59375, "learning_rate": 9.999963011992902e-05, "loss": 2.8479, "step": 38 }, { "epoch": 0.00170461995716596, "grad_norm": 12.875, "learning_rate": 9.99996032281737e-05, "loss": 3.0187, "step": 39 }, { "epoch": 0.0017483281611958565, "grad_norm": 5.40625, "learning_rate": 9.999957539285312e-05, "loss": 3.0339, "step": 40 }, { "epoch": 0.001792036365225753, "grad_norm": 8.4375, "learning_rate": 9.999954661396777e-05, "loss": 3.1146, "step": 41 }, { "epoch": 0.0018357445692556494, "grad_norm": 8.875, "learning_rate": 9.99995168915182e-05, "loss": 2.7607, "step": 42 }, { "epoch": 0.0018794527732855456, "grad_norm": 5.1875, "learning_rate": 9.999948622550497e-05, "loss": 3.1231, "step": 43 }, { "epoch": 0.001923160977315442, "grad_norm": 4.65625, "learning_rate": 9.999945461592867e-05, "loss": 3.0816, "step": 44 }, { "epoch": 0.0019668691813453385, "grad_norm": 5.125, "learning_rate": 9.99994220627899e-05, "loss": 3.6489, "step": 45 }, { "epoch": 0.002010577385375235, "grad_norm": 4.53125, "learning_rate": 9.999938856608926e-05, "loss": 3.0423, "step": 46 }, { "epoch": 0.0020542855894051314, "grad_norm": 5.28125, "learning_rate": 9.999935412582738e-05, "loss": 3.1826, "step": 47 }, { "epoch": 0.0020979937934350276, "grad_norm": 5.0, "learning_rate": 9.999931874200492e-05, "loss": 3.0762, "step": 48 }, { "epoch": 0.0021417019974649242, "grad_norm": 4.46875, "learning_rate": 9.999928241462255e-05, "loss": 3.4191, "step": 49 }, { "epoch": 0.0021854102014948204, "grad_norm": 6.90625, "learning_rate": 9.999924514368095e-05, "loss": 3.2524, "step": 50 }, { "epoch": 0.002229118405524717, "grad_norm": 4.0625, "learning_rate": 9.999920692918082e-05, "loss": 2.9654, "step": 51 }, { "epoch": 0.0022728266095546133, "grad_norm": 5.75, "learning_rate": 9.999916777112288e-05, "loss": 3.1085, "step": 52 }, { "epoch": 0.00231653481358451, "grad_norm": 4.75, "learning_rate": 9.999912766950789e-05, "loss": 3.0036, "step": 53 }, { "epoch": 0.002360243017614406, "grad_norm": 4.96875, "learning_rate": 9.999908662433657e-05, "loss": 3.0041, "step": 54 }, { "epoch": 0.002403951221644303, "grad_norm": 5.53125, "learning_rate": 9.999904463560975e-05, "loss": 2.6111, "step": 55 }, { "epoch": 0.002447659425674199, "grad_norm": 4.28125, "learning_rate": 9.999900170332814e-05, "loss": 2.7588, "step": 56 }, { "epoch": 0.0024913676297040953, "grad_norm": 4.53125, "learning_rate": 9.999895782749263e-05, "loss": 3.0982, "step": 57 }, { "epoch": 0.002535075833733992, "grad_norm": 7.5625, "learning_rate": 9.9998913008104e-05, "loss": 3.2029, "step": 58 }, { "epoch": 0.002578784037763888, "grad_norm": 6.125, "learning_rate": 9.999886724516312e-05, "loss": 2.601, "step": 59 }, { "epoch": 0.002622492241793785, "grad_norm": 5.03125, "learning_rate": 9.999882053867085e-05, "loss": 2.5674, "step": 60 }, { "epoch": 0.002666200445823681, "grad_norm": 5.15625, "learning_rate": 9.999877288862806e-05, "loss": 2.8234, "step": 61 }, { "epoch": 0.0027099086498535777, "grad_norm": 5.34375, "learning_rate": 9.999872429503565e-05, "loss": 2.9025, "step": 62 }, { "epoch": 0.002753616853883474, "grad_norm": 4.625, "learning_rate": 9.999867475789455e-05, "loss": 2.7504, "step": 63 }, { "epoch": 0.0027973250579133705, "grad_norm": 10.125, "learning_rate": 9.999862427720568e-05, "loss": 3.6081, "step": 64 }, { "epoch": 0.0028410332619432667, "grad_norm": 4.9375, "learning_rate": 9.999857285297e-05, "loss": 2.7429, "step": 65 }, { "epoch": 0.002884741465973163, "grad_norm": 8.3125, "learning_rate": 9.999852048518849e-05, "loss": 2.8513, "step": 66 }, { "epoch": 0.0029284496700030596, "grad_norm": 4.84375, "learning_rate": 9.999846717386214e-05, "loss": 2.7622, "step": 67 }, { "epoch": 0.002972157874032956, "grad_norm": 6.90625, "learning_rate": 9.999841291899193e-05, "loss": 3.102, "step": 68 }, { "epoch": 0.0030158660780628525, "grad_norm": 4.3125, "learning_rate": 9.999835772057891e-05, "loss": 2.6154, "step": 69 }, { "epoch": 0.0030595742820927487, "grad_norm": 5.9375, "learning_rate": 9.99983015786241e-05, "loss": 2.9899, "step": 70 }, { "epoch": 0.0031032824861226454, "grad_norm": 4.59375, "learning_rate": 9.999824449312856e-05, "loss": 2.8435, "step": 71 }, { "epoch": 0.0031469906901525416, "grad_norm": 13.375, "learning_rate": 9.999818646409339e-05, "loss": 4.1482, "step": 72 }, { "epoch": 0.0031906988941824382, "grad_norm": 4.40625, "learning_rate": 9.999812749151966e-05, "loss": 2.5349, "step": 73 }, { "epoch": 0.0032344070982123344, "grad_norm": 4.15625, "learning_rate": 9.999806757540851e-05, "loss": 2.5921, "step": 74 }, { "epoch": 0.0032781153022422307, "grad_norm": 4.46875, "learning_rate": 9.999800671576106e-05, "loss": 2.6727, "step": 75 }, { "epoch": 0.0033218235062721273, "grad_norm": 4.0625, "learning_rate": 9.999794491257845e-05, "loss": 2.7172, "step": 76 }, { "epoch": 0.0033655317103020235, "grad_norm": 5.34375, "learning_rate": 9.999788216586186e-05, "loss": 3.1049, "step": 77 }, { "epoch": 0.00340923991433192, "grad_norm": 5.1875, "learning_rate": 9.999781847561245e-05, "loss": 2.8463, "step": 78 }, { "epoch": 0.0034529481183618164, "grad_norm": 5.34375, "learning_rate": 9.999775384183143e-05, "loss": 2.6217, "step": 79 }, { "epoch": 0.003496656322391713, "grad_norm": 4.34375, "learning_rate": 9.999768826452004e-05, "loss": 2.9236, "step": 80 }, { "epoch": 0.0035403645264216093, "grad_norm": 3.875, "learning_rate": 9.99976217436795e-05, "loss": 2.4763, "step": 81 }, { "epoch": 0.003584072730451506, "grad_norm": 6.46875, "learning_rate": 9.999755427931107e-05, "loss": 2.5611, "step": 82 }, { "epoch": 0.003627780934481402, "grad_norm": 6.84375, "learning_rate": 9.999748587141602e-05, "loss": 2.4349, "step": 83 }, { "epoch": 0.003671489138511299, "grad_norm": 4.78125, "learning_rate": 9.999741651999566e-05, "loss": 2.5408, "step": 84 }, { "epoch": 0.003715197342541195, "grad_norm": 4.875, "learning_rate": 9.999734622505126e-05, "loss": 2.9666, "step": 85 }, { "epoch": 0.0037589055465710912, "grad_norm": 5.03125, "learning_rate": 9.999727498658417e-05, "loss": 2.679, "step": 86 }, { "epoch": 0.003802613750600988, "grad_norm": 3.796875, "learning_rate": 9.999720280459576e-05, "loss": 2.6728, "step": 87 }, { "epoch": 0.003846321954630884, "grad_norm": 4.71875, "learning_rate": 9.999712967908735e-05, "loss": 3.1564, "step": 88 }, { "epoch": 0.0038900301586607807, "grad_norm": 6.40625, "learning_rate": 9.999705561006034e-05, "loss": 2.4467, "step": 89 }, { "epoch": 0.003933738362690677, "grad_norm": 5.28125, "learning_rate": 9.999698059751609e-05, "loss": 2.8369, "step": 90 }, { "epoch": 0.003977446566720573, "grad_norm": 4.59375, "learning_rate": 9.999690464145609e-05, "loss": 2.6195, "step": 91 }, { "epoch": 0.00402115477075047, "grad_norm": 5.34375, "learning_rate": 9.999682774188173e-05, "loss": 2.7844, "step": 92 }, { "epoch": 0.0040648629747803665, "grad_norm": 4.375, "learning_rate": 9.999674989879444e-05, "loss": 2.6323, "step": 93 }, { "epoch": 0.004108571178810263, "grad_norm": 4.125, "learning_rate": 9.999667111219573e-05, "loss": 2.692, "step": 94 }, { "epoch": 0.004152279382840159, "grad_norm": 4.96875, "learning_rate": 9.999659138208705e-05, "loss": 2.4742, "step": 95 }, { "epoch": 0.004195987586870055, "grad_norm": 4.53125, "learning_rate": 9.999651070846995e-05, "loss": 2.5278, "step": 96 }, { "epoch": 0.004239695790899952, "grad_norm": 4.3125, "learning_rate": 9.999642909134592e-05, "loss": 2.8639, "step": 97 }, { "epoch": 0.0042834039949298484, "grad_norm": 5.75, "learning_rate": 9.99963465307165e-05, "loss": 2.6517, "step": 98 }, { "epoch": 0.004327112198959745, "grad_norm": 5.6875, "learning_rate": 9.999626302658324e-05, "loss": 2.7603, "step": 99 }, { "epoch": 0.004370820402989641, "grad_norm": 4.375, "learning_rate": 9.999617857894777e-05, "loss": 2.3816, "step": 100 }, { "epoch": 0.004414528607019538, "grad_norm": 3.796875, "learning_rate": 9.99960931878116e-05, "loss": 2.6044, "step": 101 }, { "epoch": 0.004458236811049434, "grad_norm": 3.703125, "learning_rate": 9.999600685317642e-05, "loss": 2.3602, "step": 102 }, { "epoch": 0.00450194501507933, "grad_norm": 9.0, "learning_rate": 9.99959195750438e-05, "loss": 2.4109, "step": 103 }, { "epoch": 0.004545653219109227, "grad_norm": 6.0625, "learning_rate": 9.999583135341544e-05, "loss": 2.9449, "step": 104 }, { "epoch": 0.004589361423139123, "grad_norm": 5.96875, "learning_rate": 9.999574218829295e-05, "loss": 2.4216, "step": 105 }, { "epoch": 0.00463306962716902, "grad_norm": 5.46875, "learning_rate": 9.999565207967805e-05, "loss": 2.8841, "step": 106 }, { "epoch": 0.004676777831198916, "grad_norm": 3.875, "learning_rate": 9.999556102757244e-05, "loss": 2.6692, "step": 107 }, { "epoch": 0.004720486035228812, "grad_norm": 5.90625, "learning_rate": 9.99954690319778e-05, "loss": 2.6152, "step": 108 }, { "epoch": 0.004764194239258709, "grad_norm": 4.46875, "learning_rate": 9.999537609289592e-05, "loss": 2.6917, "step": 109 }, { "epoch": 0.004807902443288606, "grad_norm": 4.03125, "learning_rate": 9.999528221032852e-05, "loss": 2.2566, "step": 110 }, { "epoch": 0.004851610647318502, "grad_norm": 4.84375, "learning_rate": 9.999518738427737e-05, "loss": 2.2956, "step": 111 }, { "epoch": 0.004895318851348398, "grad_norm": 6.625, "learning_rate": 9.99950916147443e-05, "loss": 2.4404, "step": 112 }, { "epoch": 0.004939027055378294, "grad_norm": 5.0, "learning_rate": 9.999499490173106e-05, "loss": 2.5939, "step": 113 }, { "epoch": 0.0049827352594081905, "grad_norm": 5.0625, "learning_rate": 9.999489724523951e-05, "loss": 3.225, "step": 114 }, { "epoch": 0.005026443463438088, "grad_norm": 4.59375, "learning_rate": 9.999479864527148e-05, "loss": 2.8029, "step": 115 }, { "epoch": 0.005070151667467984, "grad_norm": 3.828125, "learning_rate": 9.999469910182885e-05, "loss": 2.5389, "step": 116 }, { "epoch": 0.00511385987149788, "grad_norm": 4.40625, "learning_rate": 9.999459861491348e-05, "loss": 2.9194, "step": 117 }, { "epoch": 0.005157568075527776, "grad_norm": 4.65625, "learning_rate": 9.999449718452725e-05, "loss": 2.623, "step": 118 }, { "epoch": 0.005201276279557673, "grad_norm": 6.46875, "learning_rate": 9.999439481067212e-05, "loss": 3.8513, "step": 119 }, { "epoch": 0.00524498448358757, "grad_norm": 3.90625, "learning_rate": 9.999429149334998e-05, "loss": 2.4835, "step": 120 }, { "epoch": 0.005288692687617466, "grad_norm": 4.71875, "learning_rate": 9.999418723256279e-05, "loss": 2.4446, "step": 121 }, { "epoch": 0.005332400891647362, "grad_norm": 3.921875, "learning_rate": 9.999408202831255e-05, "loss": 3.1651, "step": 122 }, { "epoch": 0.005376109095677258, "grad_norm": 3.625, "learning_rate": 9.99939758806012e-05, "loss": 2.908, "step": 123 }, { "epoch": 0.005419817299707155, "grad_norm": 3.953125, "learning_rate": 9.999386878943077e-05, "loss": 2.483, "step": 124 }, { "epoch": 0.0054635255037370515, "grad_norm": 4.09375, "learning_rate": 9.999376075480327e-05, "loss": 2.8223, "step": 125 }, { "epoch": 0.005507233707766948, "grad_norm": 4.28125, "learning_rate": 9.999365177672075e-05, "loss": 2.6433, "step": 126 }, { "epoch": 0.005550941911796844, "grad_norm": 4.53125, "learning_rate": 9.999354185518525e-05, "loss": 3.3299, "step": 127 }, { "epoch": 0.005594650115826741, "grad_norm": 4.3125, "learning_rate": 9.999343099019884e-05, "loss": 2.6354, "step": 128 }, { "epoch": 0.005638358319856637, "grad_norm": 4.0625, "learning_rate": 9.999331918176365e-05, "loss": 2.099, "step": 129 }, { "epoch": 0.0056820665238865335, "grad_norm": 4.15625, "learning_rate": 9.999320642988175e-05, "loss": 2.7426, "step": 130 }, { "epoch": 0.00572577472791643, "grad_norm": 4.84375, "learning_rate": 9.999309273455528e-05, "loss": 3.2764, "step": 131 }, { "epoch": 0.005769482931946326, "grad_norm": 3.78125, "learning_rate": 9.99929780957864e-05, "loss": 2.2569, "step": 132 }, { "epoch": 0.005813191135976223, "grad_norm": 4.28125, "learning_rate": 9.999286251357727e-05, "loss": 2.6164, "step": 133 }, { "epoch": 0.005856899340006119, "grad_norm": 4.21875, "learning_rate": 9.999274598793005e-05, "loss": 2.2387, "step": 134 }, { "epoch": 0.0059006075440360154, "grad_norm": 14.125, "learning_rate": 9.999262851884695e-05, "loss": 2.7666, "step": 135 }, { "epoch": 0.005944315748065912, "grad_norm": 4.09375, "learning_rate": 9.99925101063302e-05, "loss": 2.6582, "step": 136 }, { "epoch": 0.005988023952095809, "grad_norm": 4.53125, "learning_rate": 9.9992390750382e-05, "loss": 2.0556, "step": 137 }, { "epoch": 0.006031732156125705, "grad_norm": 4.4375, "learning_rate": 9.999227045100465e-05, "loss": 2.7892, "step": 138 }, { "epoch": 0.006075440360155601, "grad_norm": 4.8125, "learning_rate": 9.999214920820039e-05, "loss": 2.4652, "step": 139 }, { "epoch": 0.006119148564185497, "grad_norm": 5.25, "learning_rate": 9.999202702197151e-05, "loss": 3.0553, "step": 140 }, { "epoch": 0.006162856768215394, "grad_norm": 4.53125, "learning_rate": 9.999190389232032e-05, "loss": 2.8345, "step": 141 }, { "epoch": 0.006206564972245291, "grad_norm": 9.8125, "learning_rate": 9.999177981924916e-05, "loss": 2.97, "step": 142 }, { "epoch": 0.006250273176275187, "grad_norm": 6.0625, "learning_rate": 9.999165480276034e-05, "loss": 2.718, "step": 143 }, { "epoch": 0.006293981380305083, "grad_norm": 5.28125, "learning_rate": 9.999152884285622e-05, "loss": 2.0698, "step": 144 }, { "epoch": 0.006337689584334979, "grad_norm": 4.875, "learning_rate": 9.999140193953921e-05, "loss": 2.4787, "step": 145 }, { "epoch": 0.0063813977883648764, "grad_norm": 4.40625, "learning_rate": 9.99912740928117e-05, "loss": 2.3143, "step": 146 }, { "epoch": 0.006425105992394773, "grad_norm": 10.4375, "learning_rate": 9.999114530267607e-05, "loss": 2.3755, "step": 147 }, { "epoch": 0.006468814196424669, "grad_norm": 4.46875, "learning_rate": 9.999101556913477e-05, "loss": 2.5425, "step": 148 }, { "epoch": 0.006512522400454565, "grad_norm": 5.3125, "learning_rate": 9.999088489219027e-05, "loss": 2.6765, "step": 149 }, { "epoch": 0.006556230604484461, "grad_norm": 4.28125, "learning_rate": 9.999075327184499e-05, "loss": 2.6608, "step": 150 }, { "epoch": 0.006599938808514358, "grad_norm": 4.28125, "learning_rate": 9.999062070810144e-05, "loss": 2.8477, "step": 151 }, { "epoch": 0.006643647012544255, "grad_norm": 5.375, "learning_rate": 9.999048720096212e-05, "loss": 2.8137, "step": 152 }, { "epoch": 0.006687355216574151, "grad_norm": 4.375, "learning_rate": 9.999035275042954e-05, "loss": 2.6685, "step": 153 }, { "epoch": 0.006731063420604047, "grad_norm": 7.09375, "learning_rate": 9.999021735650627e-05, "loss": 3.1262, "step": 154 }, { "epoch": 0.006774771624633944, "grad_norm": 4.53125, "learning_rate": 9.999008101919482e-05, "loss": 2.5198, "step": 155 }, { "epoch": 0.00681847982866384, "grad_norm": 6.625, "learning_rate": 9.99899437384978e-05, "loss": 3.0499, "step": 156 }, { "epoch": 0.006862188032693737, "grad_norm": 4.90625, "learning_rate": 9.998980551441776e-05, "loss": 2.6218, "step": 157 }, { "epoch": 0.006905896236723633, "grad_norm": 4.15625, "learning_rate": 9.998966634695737e-05, "loss": 2.47, "step": 158 }, { "epoch": 0.006949604440753529, "grad_norm": 4.59375, "learning_rate": 9.99895262361192e-05, "loss": 2.1887, "step": 159 }, { "epoch": 0.006993312644783426, "grad_norm": 4.25, "learning_rate": 9.998938518190591e-05, "loss": 3.6236, "step": 160 }, { "epoch": 0.007037020848813322, "grad_norm": 3.546875, "learning_rate": 9.998924318432016e-05, "loss": 2.6418, "step": 161 }, { "epoch": 0.0070807290528432185, "grad_norm": 4.0, "learning_rate": 9.998910024336466e-05, "loss": 2.4386, "step": 162 }, { "epoch": 0.007124437256873115, "grad_norm": 5.5625, "learning_rate": 9.998895635904205e-05, "loss": 2.7331, "step": 163 }, { "epoch": 0.007168145460903012, "grad_norm": 4.46875, "learning_rate": 9.99888115313551e-05, "loss": 2.7163, "step": 164 }, { "epoch": 0.007211853664932908, "grad_norm": 3.484375, "learning_rate": 9.998866576030651e-05, "loss": 2.123, "step": 165 }, { "epoch": 0.007255561868962804, "grad_norm": 5.375, "learning_rate": 9.998851904589905e-05, "loss": 2.7204, "step": 166 }, { "epoch": 0.0072992700729927005, "grad_norm": 4.21875, "learning_rate": 9.998837138813549e-05, "loss": 2.6734, "step": 167 }, { "epoch": 0.007342978277022598, "grad_norm": 4.65625, "learning_rate": 9.998822278701858e-05, "loss": 2.7616, "step": 168 }, { "epoch": 0.007386686481052494, "grad_norm": 4.34375, "learning_rate": 9.998807324255118e-05, "loss": 2.556, "step": 169 }, { "epoch": 0.00743039468508239, "grad_norm": 5.34375, "learning_rate": 9.998792275473607e-05, "loss": 2.5006, "step": 170 }, { "epoch": 0.007474102889112286, "grad_norm": 4.09375, "learning_rate": 9.99877713235761e-05, "loss": 2.8722, "step": 171 }, { "epoch": 0.0075178110931421824, "grad_norm": 5.03125, "learning_rate": 9.998761894907414e-05, "loss": 3.0658, "step": 172 }, { "epoch": 0.0075615192971720795, "grad_norm": 4.4375, "learning_rate": 9.998746563123305e-05, "loss": 2.8287, "step": 173 }, { "epoch": 0.007605227501201976, "grad_norm": 4.9375, "learning_rate": 9.998731137005572e-05, "loss": 2.5613, "step": 174 }, { "epoch": 0.007648935705231872, "grad_norm": 4.09375, "learning_rate": 9.99871561655451e-05, "loss": 2.4192, "step": 175 }, { "epoch": 0.007692643909261768, "grad_norm": 9.875, "learning_rate": 9.998700001770406e-05, "loss": 3.3212, "step": 176 }, { "epoch": 0.007736352113291665, "grad_norm": 5.3125, "learning_rate": 9.998684292653559e-05, "loss": 2.8366, "step": 177 }, { "epoch": 0.0077800603173215615, "grad_norm": 4.75, "learning_rate": 9.998668489204266e-05, "loss": 2.4589, "step": 178 }, { "epoch": 0.007823768521351457, "grad_norm": 3.859375, "learning_rate": 9.998652591422821e-05, "loss": 2.6667, "step": 179 }, { "epoch": 0.007867476725381354, "grad_norm": 4.53125, "learning_rate": 9.998636599309527e-05, "loss": 2.7617, "step": 180 }, { "epoch": 0.007911184929411251, "grad_norm": 4.1875, "learning_rate": 9.998620512864686e-05, "loss": 2.5969, "step": 181 }, { "epoch": 0.007954893133441146, "grad_norm": 4.59375, "learning_rate": 9.9986043320886e-05, "loss": 2.214, "step": 182 }, { "epoch": 0.007998601337471043, "grad_norm": 4.28125, "learning_rate": 9.998588056981575e-05, "loss": 2.6208, "step": 183 }, { "epoch": 0.00804230954150094, "grad_norm": 4.375, "learning_rate": 9.998571687543918e-05, "loss": 2.8798, "step": 184 }, { "epoch": 0.008086017745530836, "grad_norm": 4.3125, "learning_rate": 9.99855522377594e-05, "loss": 2.4923, "step": 185 }, { "epoch": 0.008129725949560733, "grad_norm": 3.703125, "learning_rate": 9.998538665677948e-05, "loss": 2.055, "step": 186 }, { "epoch": 0.008173434153590628, "grad_norm": 4.78125, "learning_rate": 9.998522013250257e-05, "loss": 2.6016, "step": 187 }, { "epoch": 0.008217142357620525, "grad_norm": 4.09375, "learning_rate": 9.998505266493181e-05, "loss": 2.4205, "step": 188 }, { "epoch": 0.008260850561650422, "grad_norm": 5.25, "learning_rate": 9.998488425407035e-05, "loss": 3.2423, "step": 189 }, { "epoch": 0.008304558765680318, "grad_norm": 3.703125, "learning_rate": 9.998471489992138e-05, "loss": 2.3305, "step": 190 }, { "epoch": 0.008348266969710215, "grad_norm": 3.40625, "learning_rate": 9.998454460248809e-05, "loss": 2.736, "step": 191 }, { "epoch": 0.00839197517374011, "grad_norm": 4.46875, "learning_rate": 9.998437336177369e-05, "loss": 2.8134, "step": 192 }, { "epoch": 0.008435683377770007, "grad_norm": 5.03125, "learning_rate": 9.998420117778141e-05, "loss": 2.3994, "step": 193 }, { "epoch": 0.008479391581799904, "grad_norm": 5.6875, "learning_rate": 9.998402805051452e-05, "loss": 2.5559, "step": 194 }, { "epoch": 0.0085230997858298, "grad_norm": 3.4375, "learning_rate": 9.998385397997625e-05, "loss": 2.3871, "step": 195 }, { "epoch": 0.008566807989859697, "grad_norm": 4.28125, "learning_rate": 9.998367896616993e-05, "loss": 2.1715, "step": 196 }, { "epoch": 0.008610516193889592, "grad_norm": 6.15625, "learning_rate": 9.998350300909883e-05, "loss": 2.7056, "step": 197 }, { "epoch": 0.00865422439791949, "grad_norm": 10.3125, "learning_rate": 9.998332610876629e-05, "loss": 2.2428, "step": 198 }, { "epoch": 0.008697932601949386, "grad_norm": 6.40625, "learning_rate": 9.998314826517563e-05, "loss": 2.7718, "step": 199 }, { "epoch": 0.008741640805979282, "grad_norm": 7.84375, "learning_rate": 9.998296947833021e-05, "loss": 3.1103, "step": 200 }, { "epoch": 0.008785349010009179, "grad_norm": 4.96875, "learning_rate": 9.998278974823342e-05, "loss": 2.9857, "step": 201 }, { "epoch": 0.008829057214039076, "grad_norm": 4.3125, "learning_rate": 9.998260907488863e-05, "loss": 2.2535, "step": 202 }, { "epoch": 0.008872765418068971, "grad_norm": 3.625, "learning_rate": 9.998242745829927e-05, "loss": 2.3132, "step": 203 }, { "epoch": 0.008916473622098868, "grad_norm": 5.46875, "learning_rate": 9.998224489846877e-05, "loss": 2.4809, "step": 204 }, { "epoch": 0.008960181826128764, "grad_norm": 5.125, "learning_rate": 9.998206139540054e-05, "loss": 3.1461, "step": 205 }, { "epoch": 0.00900389003015866, "grad_norm": 4.15625, "learning_rate": 9.998187694909807e-05, "loss": 2.5521, "step": 206 }, { "epoch": 0.009047598234188558, "grad_norm": 5.6875, "learning_rate": 9.998169155956485e-05, "loss": 2.6275, "step": 207 }, { "epoch": 0.009091306438218453, "grad_norm": 5.09375, "learning_rate": 9.998150522680437e-05, "loss": 2.3942, "step": 208 }, { "epoch": 0.00913501464224835, "grad_norm": 19.0, "learning_rate": 9.998131795082011e-05, "loss": 2.7506, "step": 209 }, { "epoch": 0.009178722846278246, "grad_norm": 4.125, "learning_rate": 9.998112973161566e-05, "loss": 2.8633, "step": 210 }, { "epoch": 0.009222431050308143, "grad_norm": 4.59375, "learning_rate": 9.998094056919454e-05, "loss": 2.8713, "step": 211 }, { "epoch": 0.00926613925433804, "grad_norm": 3.9375, "learning_rate": 9.998075046356033e-05, "loss": 2.5957, "step": 212 }, { "epoch": 0.009309847458367935, "grad_norm": 3.359375, "learning_rate": 9.998055941471662e-05, "loss": 2.4896, "step": 213 }, { "epoch": 0.009353555662397832, "grad_norm": 4.5, "learning_rate": 9.998036742266701e-05, "loss": 3.1921, "step": 214 }, { "epoch": 0.009397263866427728, "grad_norm": 4.375, "learning_rate": 9.998017448741512e-05, "loss": 2.8811, "step": 215 }, { "epoch": 0.009440972070457625, "grad_norm": 3.671875, "learning_rate": 9.997998060896458e-05, "loss": 2.6873, "step": 216 }, { "epoch": 0.009484680274487522, "grad_norm": 4.0625, "learning_rate": 9.997978578731908e-05, "loss": 2.5835, "step": 217 }, { "epoch": 0.009528388478517417, "grad_norm": 7.0625, "learning_rate": 9.997959002248229e-05, "loss": 2.7929, "step": 218 }, { "epoch": 0.009572096682547314, "grad_norm": 3.890625, "learning_rate": 9.997939331445787e-05, "loss": 2.1056, "step": 219 }, { "epoch": 0.009615804886577211, "grad_norm": 4.28125, "learning_rate": 9.997919566324959e-05, "loss": 3.3739, "step": 220 }, { "epoch": 0.009659513090607107, "grad_norm": 3.78125, "learning_rate": 9.99789970688611e-05, "loss": 2.6079, "step": 221 }, { "epoch": 0.009703221294637004, "grad_norm": 3.796875, "learning_rate": 9.997879753129624e-05, "loss": 2.4539, "step": 222 }, { "epoch": 0.009746929498666899, "grad_norm": 4.21875, "learning_rate": 9.99785970505587e-05, "loss": 2.2522, "step": 223 }, { "epoch": 0.009790637702696796, "grad_norm": 3.84375, "learning_rate": 9.99783956266523e-05, "loss": 2.6285, "step": 224 }, { "epoch": 0.009834345906726693, "grad_norm": 4.71875, "learning_rate": 9.997819325958085e-05, "loss": 2.7673, "step": 225 }, { "epoch": 0.009878054110756589, "grad_norm": 7.5, "learning_rate": 9.997798994934813e-05, "loss": 2.7084, "step": 226 }, { "epoch": 0.009921762314786486, "grad_norm": 4.03125, "learning_rate": 9.997778569595801e-05, "loss": 2.9379, "step": 227 }, { "epoch": 0.009965470518816381, "grad_norm": 4.46875, "learning_rate": 9.997758049941435e-05, "loss": 2.7648, "step": 228 }, { "epoch": 0.010009178722846278, "grad_norm": 3.71875, "learning_rate": 9.997737435972099e-05, "loss": 2.381, "step": 229 }, { "epoch": 0.010052886926876175, "grad_norm": 3.40625, "learning_rate": 9.997716727688183e-05, "loss": 2.2979, "step": 230 }, { "epoch": 0.01009659513090607, "grad_norm": 4.15625, "learning_rate": 9.99769592509008e-05, "loss": 2.7071, "step": 231 }, { "epoch": 0.010140303334935968, "grad_norm": 4.875, "learning_rate": 9.99767502817818e-05, "loss": 3.5704, "step": 232 }, { "epoch": 0.010184011538965865, "grad_norm": 3.421875, "learning_rate": 9.997654036952879e-05, "loss": 1.8784, "step": 233 }, { "epoch": 0.01022771974299576, "grad_norm": 4.5, "learning_rate": 9.997632951414573e-05, "loss": 2.5868, "step": 234 }, { "epoch": 0.010271427947025657, "grad_norm": 4.21875, "learning_rate": 9.99761177156366e-05, "loss": 2.1905, "step": 235 }, { "epoch": 0.010315136151055553, "grad_norm": 4.25, "learning_rate": 9.997590497400538e-05, "loss": 3.011, "step": 236 }, { "epoch": 0.01035884435508545, "grad_norm": 3.765625, "learning_rate": 9.997569128925611e-05, "loss": 2.5319, "step": 237 }, { "epoch": 0.010402552559115347, "grad_norm": 4.09375, "learning_rate": 9.997547666139279e-05, "loss": 2.0043, "step": 238 }, { "epoch": 0.010446260763145242, "grad_norm": 3.640625, "learning_rate": 9.99752610904195e-05, "loss": 2.708, "step": 239 }, { "epoch": 0.01048996896717514, "grad_norm": 3.28125, "learning_rate": 9.997504457634029e-05, "loss": 2.214, "step": 240 }, { "epoch": 0.010533677171205034, "grad_norm": 3.140625, "learning_rate": 9.997482711915927e-05, "loss": 2.275, "step": 241 }, { "epoch": 0.010577385375234932, "grad_norm": 4.6875, "learning_rate": 9.997460871888052e-05, "loss": 2.7156, "step": 242 }, { "epoch": 0.010621093579264829, "grad_norm": 5.0625, "learning_rate": 9.997438937550816e-05, "loss": 3.3969, "step": 243 }, { "epoch": 0.010664801783294724, "grad_norm": 4.59375, "learning_rate": 9.997416908904633e-05, "loss": 2.7685, "step": 244 }, { "epoch": 0.010708509987324621, "grad_norm": 3.453125, "learning_rate": 9.997394785949922e-05, "loss": 2.6075, "step": 245 }, { "epoch": 0.010752218191354516, "grad_norm": 3.625, "learning_rate": 9.997372568687097e-05, "loss": 2.2984, "step": 246 }, { "epoch": 0.010795926395384414, "grad_norm": 4.375, "learning_rate": 9.997350257116578e-05, "loss": 2.589, "step": 247 }, { "epoch": 0.01083963459941431, "grad_norm": 3.75, "learning_rate": 9.997327851238788e-05, "loss": 2.0634, "step": 248 }, { "epoch": 0.010883342803444206, "grad_norm": 9.1875, "learning_rate": 9.997305351054146e-05, "loss": 2.4742, "step": 249 }, { "epoch": 0.010927051007474103, "grad_norm": 4.0, "learning_rate": 9.99728275656308e-05, "loss": 2.9405, "step": 250 }, { "epoch": 0.010970759211504, "grad_norm": 3.953125, "learning_rate": 9.997260067766014e-05, "loss": 2.304, "step": 251 }, { "epoch": 0.011014467415533895, "grad_norm": 4.4375, "learning_rate": 9.997237284663379e-05, "loss": 2.9572, "step": 252 }, { "epoch": 0.011058175619563793, "grad_norm": 3.65625, "learning_rate": 9.997214407255602e-05, "loss": 2.3134, "step": 253 }, { "epoch": 0.011101883823593688, "grad_norm": 5.125, "learning_rate": 9.997191435543117e-05, "loss": 3.2421, "step": 254 }, { "epoch": 0.011145592027623585, "grad_norm": 3.453125, "learning_rate": 9.997168369526355e-05, "loss": 2.6112, "step": 255 }, { "epoch": 0.011189300231653482, "grad_norm": 5.96875, "learning_rate": 9.997145209205754e-05, "loss": 2.4515, "step": 256 }, { "epoch": 0.011233008435683377, "grad_norm": 5.28125, "learning_rate": 9.99712195458175e-05, "loss": 2.8503, "step": 257 }, { "epoch": 0.011276716639713275, "grad_norm": 3.9375, "learning_rate": 9.997098605654782e-05, "loss": 2.4916, "step": 258 }, { "epoch": 0.01132042484374317, "grad_norm": 3.59375, "learning_rate": 9.99707516242529e-05, "loss": 2.2976, "step": 259 }, { "epoch": 0.011364133047773067, "grad_norm": 4.40625, "learning_rate": 9.997051624893716e-05, "loss": 2.5726, "step": 260 }, { "epoch": 0.011407841251802964, "grad_norm": 4.0625, "learning_rate": 9.997027993060506e-05, "loss": 2.0185, "step": 261 }, { "epoch": 0.01145154945583286, "grad_norm": 3.96875, "learning_rate": 9.997004266926105e-05, "loss": 3.0021, "step": 262 }, { "epoch": 0.011495257659862756, "grad_norm": 4.0625, "learning_rate": 9.99698044649096e-05, "loss": 2.7337, "step": 263 }, { "epoch": 0.011538965863892652, "grad_norm": 3.359375, "learning_rate": 9.996956531755521e-05, "loss": 2.3048, "step": 264 }, { "epoch": 0.011582674067922549, "grad_norm": 5.34375, "learning_rate": 9.996932522720242e-05, "loss": 3.3072, "step": 265 }, { "epoch": 0.011626382271952446, "grad_norm": 8.625, "learning_rate": 9.996908419385571e-05, "loss": 3.0222, "step": 266 }, { "epoch": 0.011670090475982341, "grad_norm": 4.5625, "learning_rate": 9.996884221751966e-05, "loss": 2.5027, "step": 267 }, { "epoch": 0.011713798680012238, "grad_norm": 4.15625, "learning_rate": 9.996859929819882e-05, "loss": 2.2139, "step": 268 }, { "epoch": 0.011757506884042136, "grad_norm": 62.75, "learning_rate": 9.996835543589781e-05, "loss": 3.957, "step": 269 }, { "epoch": 0.011801215088072031, "grad_norm": 4.5, "learning_rate": 9.996811063062119e-05, "loss": 2.7251, "step": 270 }, { "epoch": 0.011844923292101928, "grad_norm": 4.75, "learning_rate": 9.99678648823736e-05, "loss": 2.8835, "step": 271 }, { "epoch": 0.011888631496131823, "grad_norm": 4.5, "learning_rate": 9.996761819115968e-05, "loss": 2.8296, "step": 272 }, { "epoch": 0.01193233970016172, "grad_norm": 4.9375, "learning_rate": 9.996737055698409e-05, "loss": 2.8128, "step": 273 }, { "epoch": 0.011976047904191617, "grad_norm": 4.125, "learning_rate": 9.996712197985147e-05, "loss": 2.365, "step": 274 }, { "epoch": 0.012019756108221513, "grad_norm": 6.3125, "learning_rate": 9.996687245976655e-05, "loss": 2.4859, "step": 275 }, { "epoch": 0.01206346431225141, "grad_norm": 3.15625, "learning_rate": 9.996662199673401e-05, "loss": 2.6919, "step": 276 }, { "epoch": 0.012107172516281305, "grad_norm": 4.5, "learning_rate": 9.996637059075861e-05, "loss": 2.649, "step": 277 }, { "epoch": 0.012150880720311202, "grad_norm": 7.6875, "learning_rate": 9.996611824184505e-05, "loss": 3.3843, "step": 278 }, { "epoch": 0.0121945889243411, "grad_norm": 3.703125, "learning_rate": 9.996586494999814e-05, "loss": 2.5776, "step": 279 }, { "epoch": 0.012238297128370995, "grad_norm": 3.734375, "learning_rate": 9.996561071522264e-05, "loss": 2.43, "step": 280 }, { "epoch": 0.012282005332400892, "grad_norm": 5.3125, "learning_rate": 9.996535553752331e-05, "loss": 2.3819, "step": 281 }, { "epoch": 0.012325713536430787, "grad_norm": 3.359375, "learning_rate": 9.996509941690503e-05, "loss": 2.1984, "step": 282 }, { "epoch": 0.012369421740460684, "grad_norm": 3.6875, "learning_rate": 9.99648423533726e-05, "loss": 2.7669, "step": 283 }, { "epoch": 0.012413129944490581, "grad_norm": 3.3125, "learning_rate": 9.996458434693086e-05, "loss": 2.799, "step": 284 }, { "epoch": 0.012456838148520477, "grad_norm": 4.0625, "learning_rate": 9.99643253975847e-05, "loss": 2.7216, "step": 285 }, { "epoch": 0.012500546352550374, "grad_norm": 3.390625, "learning_rate": 9.996406550533901e-05, "loss": 2.8937, "step": 286 }, { "epoch": 0.012544254556580271, "grad_norm": 6.0625, "learning_rate": 9.996380467019868e-05, "loss": 2.3943, "step": 287 }, { "epoch": 0.012587962760610166, "grad_norm": 4.15625, "learning_rate": 9.996354289216863e-05, "loss": 2.9379, "step": 288 }, { "epoch": 0.012631670964640063, "grad_norm": 3.71875, "learning_rate": 9.996328017125381e-05, "loss": 2.5333, "step": 289 }, { "epoch": 0.012675379168669959, "grad_norm": 5.09375, "learning_rate": 9.996301650745917e-05, "loss": 2.6082, "step": 290 }, { "epoch": 0.012719087372699856, "grad_norm": 4.28125, "learning_rate": 9.99627519007897e-05, "loss": 3.1054, "step": 291 }, { "epoch": 0.012762795576729753, "grad_norm": 4.5, "learning_rate": 9.996248635125039e-05, "loss": 2.423, "step": 292 }, { "epoch": 0.012806503780759648, "grad_norm": 7.8125, "learning_rate": 9.996221985884623e-05, "loss": 2.3539, "step": 293 }, { "epoch": 0.012850211984789545, "grad_norm": 4.6875, "learning_rate": 9.996195242358226e-05, "loss": 2.8162, "step": 294 }, { "epoch": 0.01289392018881944, "grad_norm": 4.0625, "learning_rate": 9.996168404546356e-05, "loss": 2.4511, "step": 295 }, { "epoch": 0.012937628392849338, "grad_norm": 3.46875, "learning_rate": 9.996141472449514e-05, "loss": 2.3848, "step": 296 }, { "epoch": 0.012981336596879235, "grad_norm": 3.515625, "learning_rate": 9.996114446068212e-05, "loss": 2.4308, "step": 297 }, { "epoch": 0.01302504480090913, "grad_norm": 5.0, "learning_rate": 9.996087325402959e-05, "loss": 2.6241, "step": 298 }, { "epoch": 0.013068753004939027, "grad_norm": 3.453125, "learning_rate": 9.996060110454266e-05, "loss": 2.1833, "step": 299 }, { "epoch": 0.013112461208968923, "grad_norm": 3.625, "learning_rate": 9.996032801222648e-05, "loss": 2.782, "step": 300 }, { "epoch": 0.01315616941299882, "grad_norm": 3.046875, "learning_rate": 9.996005397708619e-05, "loss": 2.0714, "step": 301 }, { "epoch": 0.013199877617028717, "grad_norm": 4.59375, "learning_rate": 9.995977899912697e-05, "loss": 2.7152, "step": 302 }, { "epoch": 0.013243585821058612, "grad_norm": 3.765625, "learning_rate": 9.995950307835401e-05, "loss": 2.8984, "step": 303 }, { "epoch": 0.01328729402508851, "grad_norm": 3.828125, "learning_rate": 9.995922621477252e-05, "loss": 2.3674, "step": 304 }, { "epoch": 0.013331002229118406, "grad_norm": 6.0625, "learning_rate": 9.995894840838771e-05, "loss": 2.7993, "step": 305 }, { "epoch": 0.013374710433148302, "grad_norm": 5.78125, "learning_rate": 9.995866965920485e-05, "loss": 2.4223, "step": 306 }, { "epoch": 0.013418418637178199, "grad_norm": 4.6875, "learning_rate": 9.995838996722914e-05, "loss": 2.8455, "step": 307 }, { "epoch": 0.013462126841208094, "grad_norm": 3.71875, "learning_rate": 9.995810933246594e-05, "loss": 2.0036, "step": 308 }, { "epoch": 0.013505835045237991, "grad_norm": 4.03125, "learning_rate": 9.995782775492048e-05, "loss": 2.5858, "step": 309 }, { "epoch": 0.013549543249267888, "grad_norm": 11.75, "learning_rate": 9.995754523459813e-05, "loss": 2.921, "step": 310 }, { "epoch": 0.013593251453297784, "grad_norm": 27.875, "learning_rate": 9.995726177150418e-05, "loss": 2.7016, "step": 311 }, { "epoch": 0.01363695965732768, "grad_norm": 5.09375, "learning_rate": 9.995697736564397e-05, "loss": 2.4968, "step": 312 }, { "epoch": 0.013680667861357576, "grad_norm": 3.9375, "learning_rate": 9.995669201702291e-05, "loss": 3.1727, "step": 313 }, { "epoch": 0.013724376065387473, "grad_norm": 4.34375, "learning_rate": 9.995640572564635e-05, "loss": 2.812, "step": 314 }, { "epoch": 0.01376808426941737, "grad_norm": 3.296875, "learning_rate": 9.995611849151971e-05, "loss": 2.3059, "step": 315 }, { "epoch": 0.013811792473447266, "grad_norm": 5.40625, "learning_rate": 9.995583031464842e-05, "loss": 3.082, "step": 316 }, { "epoch": 0.013855500677477163, "grad_norm": 4.0, "learning_rate": 9.99555411950379e-05, "loss": 2.0426, "step": 317 }, { "epoch": 0.013899208881507058, "grad_norm": 4.65625, "learning_rate": 9.99552511326936e-05, "loss": 2.8893, "step": 318 }, { "epoch": 0.013942917085536955, "grad_norm": 3.734375, "learning_rate": 9.9954960127621e-05, "loss": 2.4564, "step": 319 }, { "epoch": 0.013986625289566852, "grad_norm": 3.984375, "learning_rate": 9.995466817982562e-05, "loss": 2.5178, "step": 320 }, { "epoch": 0.014030333493596748, "grad_norm": 15.625, "learning_rate": 9.995437528931293e-05, "loss": 3.7793, "step": 321 }, { "epoch": 0.014074041697626645, "grad_norm": 3.40625, "learning_rate": 9.995408145608847e-05, "loss": 2.3784, "step": 322 }, { "epoch": 0.014117749901656542, "grad_norm": 3.890625, "learning_rate": 9.99537866801578e-05, "loss": 2.2287, "step": 323 }, { "epoch": 0.014161458105686437, "grad_norm": 3.984375, "learning_rate": 9.995349096152645e-05, "loss": 2.4412, "step": 324 }, { "epoch": 0.014205166309716334, "grad_norm": 4.09375, "learning_rate": 9.995319430020003e-05, "loss": 2.3395, "step": 325 }, { "epoch": 0.01424887451374623, "grad_norm": 4.5, "learning_rate": 9.995289669618415e-05, "loss": 2.7861, "step": 326 }, { "epoch": 0.014292582717776127, "grad_norm": 4.34375, "learning_rate": 9.995259814948439e-05, "loss": 2.4553, "step": 327 }, { "epoch": 0.014336290921806024, "grad_norm": 4.78125, "learning_rate": 9.99522986601064e-05, "loss": 2.9104, "step": 328 }, { "epoch": 0.014379999125835919, "grad_norm": 4.03125, "learning_rate": 9.995199822805583e-05, "loss": 2.8891, "step": 329 }, { "epoch": 0.014423707329865816, "grad_norm": 4.34375, "learning_rate": 9.995169685333836e-05, "loss": 2.8756, "step": 330 }, { "epoch": 0.014467415533895711, "grad_norm": 3.140625, "learning_rate": 9.995139453595968e-05, "loss": 2.123, "step": 331 }, { "epoch": 0.014511123737925609, "grad_norm": 4.1875, "learning_rate": 9.995109127592546e-05, "loss": 2.7999, "step": 332 }, { "epoch": 0.014554831941955506, "grad_norm": 3.359375, "learning_rate": 9.995078707324146e-05, "loss": 2.3765, "step": 333 }, { "epoch": 0.014598540145985401, "grad_norm": 3.515625, "learning_rate": 9.99504819279134e-05, "loss": 2.0792, "step": 334 }, { "epoch": 0.014642248350015298, "grad_norm": 4.125, "learning_rate": 9.995017583994706e-05, "loss": 2.5463, "step": 335 }, { "epoch": 0.014685956554045195, "grad_norm": 4.71875, "learning_rate": 9.99498688093482e-05, "loss": 3.4456, "step": 336 }, { "epoch": 0.01472966475807509, "grad_norm": 3.765625, "learning_rate": 9.994956083612261e-05, "loss": 2.3138, "step": 337 }, { "epoch": 0.014773372962104988, "grad_norm": 3.359375, "learning_rate": 9.99492519202761e-05, "loss": 2.4555, "step": 338 }, { "epoch": 0.014817081166134883, "grad_norm": 3.453125, "learning_rate": 9.994894206181452e-05, "loss": 2.4877, "step": 339 }, { "epoch": 0.01486078937016478, "grad_norm": 4.875, "learning_rate": 9.994863126074371e-05, "loss": 2.9761, "step": 340 }, { "epoch": 0.014904497574194677, "grad_norm": 4.09375, "learning_rate": 9.994831951706953e-05, "loss": 2.5688, "step": 341 }, { "epoch": 0.014948205778224572, "grad_norm": 3.171875, "learning_rate": 9.994800683079786e-05, "loss": 2.6589, "step": 342 }, { "epoch": 0.01499191398225447, "grad_norm": 4.65625, "learning_rate": 9.99476932019346e-05, "loss": 2.4292, "step": 343 }, { "epoch": 0.015035622186284365, "grad_norm": 3.703125, "learning_rate": 9.994737863048567e-05, "loss": 2.5704, "step": 344 }, { "epoch": 0.015079330390314262, "grad_norm": 4.15625, "learning_rate": 9.994706311645703e-05, "loss": 2.5487, "step": 345 }, { "epoch": 0.015123038594344159, "grad_norm": 3.65625, "learning_rate": 9.994674665985461e-05, "loss": 2.1273, "step": 346 }, { "epoch": 0.015166746798374054, "grad_norm": 4.59375, "learning_rate": 9.994642926068438e-05, "loss": 2.0096, "step": 347 }, { "epoch": 0.015210455002403951, "grad_norm": 4.8125, "learning_rate": 9.994611091895234e-05, "loss": 2.841, "step": 348 }, { "epoch": 0.015254163206433847, "grad_norm": 3.375, "learning_rate": 9.994579163466448e-05, "loss": 2.3199, "step": 349 }, { "epoch": 0.015297871410463744, "grad_norm": 3.171875, "learning_rate": 9.994547140782686e-05, "loss": 2.1122, "step": 350 }, { "epoch": 0.015341579614493641, "grad_norm": 3.6875, "learning_rate": 9.994515023844548e-05, "loss": 2.705, "step": 351 }, { "epoch": 0.015385287818523536, "grad_norm": 3.828125, "learning_rate": 9.994482812652645e-05, "loss": 1.9516, "step": 352 }, { "epoch": 0.015428996022553433, "grad_norm": 3.453125, "learning_rate": 9.99445050720758e-05, "loss": 2.2709, "step": 353 }, { "epoch": 0.01547270422658333, "grad_norm": 3.84375, "learning_rate": 9.994418107509966e-05, "loss": 2.2912, "step": 354 }, { "epoch": 0.015516412430613226, "grad_norm": 4.4375, "learning_rate": 9.994385613560413e-05, "loss": 2.6541, "step": 355 }, { "epoch": 0.015560120634643123, "grad_norm": 3.953125, "learning_rate": 9.994353025359535e-05, "loss": 2.1626, "step": 356 }, { "epoch": 0.015603828838673018, "grad_norm": 3.890625, "learning_rate": 9.994320342907945e-05, "loss": 2.693, "step": 357 }, { "epoch": 0.015647537042702914, "grad_norm": 3.5625, "learning_rate": 9.994287566206264e-05, "loss": 2.8311, "step": 358 }, { "epoch": 0.01569124524673281, "grad_norm": 3.59375, "learning_rate": 9.994254695255105e-05, "loss": 2.4746, "step": 359 }, { "epoch": 0.015734953450762708, "grad_norm": 3.375, "learning_rate": 9.994221730055091e-05, "loss": 2.3147, "step": 360 }, { "epoch": 0.015778661654792605, "grad_norm": 4.09375, "learning_rate": 9.994188670606846e-05, "loss": 2.7883, "step": 361 }, { "epoch": 0.015822369858822502, "grad_norm": 3.34375, "learning_rate": 9.994155516910991e-05, "loss": 2.2414, "step": 362 }, { "epoch": 0.0158660780628524, "grad_norm": 3.1875, "learning_rate": 9.994122268968154e-05, "loss": 2.3983, "step": 363 }, { "epoch": 0.015909786266882293, "grad_norm": 3.859375, "learning_rate": 9.99408892677896e-05, "loss": 2.6232, "step": 364 }, { "epoch": 0.01595349447091219, "grad_norm": 6.09375, "learning_rate": 9.99405549034404e-05, "loss": 2.3536, "step": 365 }, { "epoch": 0.015997202674942087, "grad_norm": 4.9375, "learning_rate": 9.994021959664024e-05, "loss": 2.8419, "step": 366 }, { "epoch": 0.016040910878971984, "grad_norm": 3.703125, "learning_rate": 9.993988334739544e-05, "loss": 2.9319, "step": 367 }, { "epoch": 0.01608461908300188, "grad_norm": 4.53125, "learning_rate": 9.993954615571238e-05, "loss": 2.4233, "step": 368 }, { "epoch": 0.016128327287031775, "grad_norm": 5.84375, "learning_rate": 9.993920802159739e-05, "loss": 3.2623, "step": 369 }, { "epoch": 0.016172035491061672, "grad_norm": 3.28125, "learning_rate": 9.993886894505686e-05, "loss": 2.6416, "step": 370 }, { "epoch": 0.01621574369509157, "grad_norm": 4.34375, "learning_rate": 9.993852892609718e-05, "loss": 2.7184, "step": 371 }, { "epoch": 0.016259451899121466, "grad_norm": 4.0625, "learning_rate": 9.99381879647248e-05, "loss": 2.9044, "step": 372 }, { "epoch": 0.016303160103151363, "grad_norm": 4.5, "learning_rate": 9.993784606094612e-05, "loss": 2.1336, "step": 373 }, { "epoch": 0.016346868307181257, "grad_norm": 6.34375, "learning_rate": 9.99375032147676e-05, "loss": 2.2281, "step": 374 }, { "epoch": 0.016390576511211154, "grad_norm": 3.21875, "learning_rate": 9.993715942619573e-05, "loss": 2.3788, "step": 375 }, { "epoch": 0.01643428471524105, "grad_norm": 3.21875, "learning_rate": 9.993681469523697e-05, "loss": 1.9586, "step": 376 }, { "epoch": 0.016477992919270948, "grad_norm": 4.3125, "learning_rate": 9.993646902189784e-05, "loss": 3.2296, "step": 377 }, { "epoch": 0.016521701123300845, "grad_norm": 3.984375, "learning_rate": 9.993612240618485e-05, "loss": 2.4422, "step": 378 }, { "epoch": 0.01656540932733074, "grad_norm": 5.28125, "learning_rate": 9.993577484810455e-05, "loss": 2.8639, "step": 379 }, { "epoch": 0.016609117531360636, "grad_norm": 5.34375, "learning_rate": 9.993542634766352e-05, "loss": 2.9119, "step": 380 }, { "epoch": 0.016652825735390533, "grad_norm": 4.25, "learning_rate": 9.993507690486831e-05, "loss": 2.5102, "step": 381 }, { "epoch": 0.01669653393942043, "grad_norm": 3.53125, "learning_rate": 9.99347265197255e-05, "loss": 2.5379, "step": 382 }, { "epoch": 0.016740242143450327, "grad_norm": 3.53125, "learning_rate": 9.993437519224176e-05, "loss": 2.2643, "step": 383 }, { "epoch": 0.01678395034748022, "grad_norm": 3.625, "learning_rate": 9.993402292242367e-05, "loss": 2.3072, "step": 384 }, { "epoch": 0.016827658551510118, "grad_norm": 3.90625, "learning_rate": 9.993366971027788e-05, "loss": 2.592, "step": 385 }, { "epoch": 0.016871366755540015, "grad_norm": 4.0625, "learning_rate": 9.993331555581108e-05, "loss": 2.6759, "step": 386 }, { "epoch": 0.016915074959569912, "grad_norm": 3.296875, "learning_rate": 9.993296045902994e-05, "loss": 2.2537, "step": 387 }, { "epoch": 0.01695878316359981, "grad_norm": 4.625, "learning_rate": 9.993260441994116e-05, "loss": 2.7636, "step": 388 }, { "epoch": 0.017002491367629703, "grad_norm": 3.671875, "learning_rate": 9.993224743855145e-05, "loss": 2.5618, "step": 389 }, { "epoch": 0.0170461995716596, "grad_norm": 3.09375, "learning_rate": 9.993188951486758e-05, "loss": 2.3234, "step": 390 }, { "epoch": 0.017089907775689497, "grad_norm": 3.453125, "learning_rate": 9.993153064889626e-05, "loss": 3.1043, "step": 391 }, { "epoch": 0.017133615979719394, "grad_norm": 4.28125, "learning_rate": 9.99311708406443e-05, "loss": 2.9755, "step": 392 }, { "epoch": 0.01717732418374929, "grad_norm": 3.890625, "learning_rate": 9.993081009011847e-05, "loss": 2.5243, "step": 393 }, { "epoch": 0.017221032387779184, "grad_norm": 3.6875, "learning_rate": 9.993044839732559e-05, "loss": 2.388, "step": 394 }, { "epoch": 0.01726474059180908, "grad_norm": 3.09375, "learning_rate": 9.993008576227247e-05, "loss": 2.1978, "step": 395 }, { "epoch": 0.01730844879583898, "grad_norm": 3.359375, "learning_rate": 9.992972218496597e-05, "loss": 2.2283, "step": 396 }, { "epoch": 0.017352156999868876, "grad_norm": 4.09375, "learning_rate": 9.992935766541294e-05, "loss": 2.2622, "step": 397 }, { "epoch": 0.017395865203898773, "grad_norm": 3.625, "learning_rate": 9.992899220362025e-05, "loss": 2.4432, "step": 398 }, { "epoch": 0.01743957340792867, "grad_norm": 4.0625, "learning_rate": 9.992862579959481e-05, "loss": 2.408, "step": 399 }, { "epoch": 0.017483281611958564, "grad_norm": 3.625, "learning_rate": 9.992825845334355e-05, "loss": 1.867, "step": 400 }, { "epoch": 0.01752698981598846, "grad_norm": 4.59375, "learning_rate": 9.992789016487337e-05, "loss": 2.1599, "step": 401 }, { "epoch": 0.017570698020018358, "grad_norm": 7.4375, "learning_rate": 9.992752093419124e-05, "loss": 2.4313, "step": 402 }, { "epoch": 0.017614406224048255, "grad_norm": 6.5, "learning_rate": 9.992715076130414e-05, "loss": 2.2794, "step": 403 }, { "epoch": 0.017658114428078152, "grad_norm": 4.25, "learning_rate": 9.992677964621901e-05, "loss": 2.8487, "step": 404 }, { "epoch": 0.017701822632108045, "grad_norm": 3.203125, "learning_rate": 9.992640758894292e-05, "loss": 2.3595, "step": 405 }, { "epoch": 0.017745530836137943, "grad_norm": 3.34375, "learning_rate": 9.992603458948281e-05, "loss": 2.5555, "step": 406 }, { "epoch": 0.01778923904016784, "grad_norm": 3.3125, "learning_rate": 9.992566064784581e-05, "loss": 2.0888, "step": 407 }, { "epoch": 0.017832947244197737, "grad_norm": 4.5, "learning_rate": 9.99252857640389e-05, "loss": 2.4566, "step": 408 }, { "epoch": 0.017876655448227634, "grad_norm": 3.125, "learning_rate": 9.99249099380692e-05, "loss": 2.1311, "step": 409 }, { "epoch": 0.017920363652257527, "grad_norm": 3.875, "learning_rate": 9.992453316994377e-05, "loss": 2.4535, "step": 410 }, { "epoch": 0.017964071856287425, "grad_norm": 3.9375, "learning_rate": 9.992415545966976e-05, "loss": 2.1546, "step": 411 }, { "epoch": 0.01800778006031732, "grad_norm": 3.484375, "learning_rate": 9.992377680725425e-05, "loss": 2.4082, "step": 412 }, { "epoch": 0.01805148826434722, "grad_norm": 3.40625, "learning_rate": 9.992339721270443e-05, "loss": 2.7536, "step": 413 }, { "epoch": 0.018095196468377116, "grad_norm": 3.703125, "learning_rate": 9.992301667602743e-05, "loss": 2.5332, "step": 414 }, { "epoch": 0.01813890467240701, "grad_norm": 3.109375, "learning_rate": 9.992263519723046e-05, "loss": 2.2629, "step": 415 }, { "epoch": 0.018182612876436906, "grad_norm": 3.671875, "learning_rate": 9.99222527763207e-05, "loss": 2.08, "step": 416 }, { "epoch": 0.018226321080466804, "grad_norm": 3.515625, "learning_rate": 9.992186941330537e-05, "loss": 2.2829, "step": 417 }, { "epoch": 0.0182700292844967, "grad_norm": 3.0, "learning_rate": 9.99214851081917e-05, "loss": 2.5898, "step": 418 }, { "epoch": 0.018313737488526598, "grad_norm": 3.171875, "learning_rate": 9.992109986098696e-05, "loss": 2.425, "step": 419 }, { "epoch": 0.01835744569255649, "grad_norm": 3.59375, "learning_rate": 9.992071367169839e-05, "loss": 2.2076, "step": 420 }, { "epoch": 0.01840115389658639, "grad_norm": 3.203125, "learning_rate": 9.992032654033333e-05, "loss": 2.2598, "step": 421 }, { "epoch": 0.018444862100616286, "grad_norm": 2.96875, "learning_rate": 9.991993846689902e-05, "loss": 2.3534, "step": 422 }, { "epoch": 0.018488570304646183, "grad_norm": 2.84375, "learning_rate": 9.991954945140284e-05, "loss": 2.023, "step": 423 }, { "epoch": 0.01853227850867608, "grad_norm": 3.578125, "learning_rate": 9.99191594938521e-05, "loss": 2.5662, "step": 424 }, { "epoch": 0.018575986712705973, "grad_norm": 3.5625, "learning_rate": 9.991876859425415e-05, "loss": 2.2721, "step": 425 }, { "epoch": 0.01861969491673587, "grad_norm": 3.6875, "learning_rate": 9.991837675261641e-05, "loss": 2.6638, "step": 426 }, { "epoch": 0.018663403120765767, "grad_norm": 3.359375, "learning_rate": 9.991798396894622e-05, "loss": 2.6768, "step": 427 }, { "epoch": 0.018707111324795665, "grad_norm": 3.890625, "learning_rate": 9.991759024325104e-05, "loss": 3.2616, "step": 428 }, { "epoch": 0.01875081952882556, "grad_norm": 3.109375, "learning_rate": 9.99171955755383e-05, "loss": 2.2708, "step": 429 }, { "epoch": 0.018794527732855455, "grad_norm": 4.03125, "learning_rate": 9.991679996581539e-05, "loss": 2.6224, "step": 430 }, { "epoch": 0.018838235936885352, "grad_norm": 5.28125, "learning_rate": 9.991640341408984e-05, "loss": 1.7391, "step": 431 }, { "epoch": 0.01888194414091525, "grad_norm": 4.09375, "learning_rate": 9.991600592036908e-05, "loss": 2.494, "step": 432 }, { "epoch": 0.018925652344945147, "grad_norm": 3.59375, "learning_rate": 9.991560748466067e-05, "loss": 2.703, "step": 433 }, { "epoch": 0.018969360548975044, "grad_norm": 8.625, "learning_rate": 9.991520810697208e-05, "loss": 2.121, "step": 434 }, { "epoch": 0.01901306875300494, "grad_norm": 7.78125, "learning_rate": 9.991480778731086e-05, "loss": 1.7441, "step": 435 }, { "epoch": 0.019056776957034834, "grad_norm": 3.546875, "learning_rate": 9.991440652568458e-05, "loss": 1.911, "step": 436 }, { "epoch": 0.01910048516106473, "grad_norm": 3.0, "learning_rate": 9.99140043221008e-05, "loss": 2.441, "step": 437 }, { "epoch": 0.01914419336509463, "grad_norm": 4.53125, "learning_rate": 9.991360117656712e-05, "loss": 2.7907, "step": 438 }, { "epoch": 0.019187901569124526, "grad_norm": 4.125, "learning_rate": 9.991319708909113e-05, "loss": 2.7946, "step": 439 }, { "epoch": 0.019231609773154423, "grad_norm": 4.0625, "learning_rate": 9.991279205968046e-05, "loss": 2.985, "step": 440 }, { "epoch": 0.019275317977184316, "grad_norm": 3.15625, "learning_rate": 9.991238608834276e-05, "loss": 2.6155, "step": 441 }, { "epoch": 0.019319026181214213, "grad_norm": 3.40625, "learning_rate": 9.99119791750857e-05, "loss": 2.3012, "step": 442 }, { "epoch": 0.01936273438524411, "grad_norm": 6.09375, "learning_rate": 9.991157131991695e-05, "loss": 3.8634, "step": 443 }, { "epoch": 0.019406442589274008, "grad_norm": 3.09375, "learning_rate": 9.991116252284421e-05, "loss": 2.4776, "step": 444 }, { "epoch": 0.019450150793303905, "grad_norm": 6.625, "learning_rate": 9.991075278387518e-05, "loss": 2.2461, "step": 445 }, { "epoch": 0.019493858997333798, "grad_norm": 3.21875, "learning_rate": 9.99103421030176e-05, "loss": 2.2593, "step": 446 }, { "epoch": 0.019537567201363695, "grad_norm": 10.375, "learning_rate": 9.990993048027923e-05, "loss": 2.8026, "step": 447 }, { "epoch": 0.019581275405393592, "grad_norm": 3.8125, "learning_rate": 9.990951791566784e-05, "loss": 2.234, "step": 448 }, { "epoch": 0.01962498360942349, "grad_norm": 4.53125, "learning_rate": 9.99091044091912e-05, "loss": 2.7442, "step": 449 }, { "epoch": 0.019668691813453387, "grad_norm": 4.125, "learning_rate": 9.990868996085712e-05, "loss": 2.5416, "step": 450 }, { "epoch": 0.01971240001748328, "grad_norm": 3.28125, "learning_rate": 9.990827457067343e-05, "loss": 2.4102, "step": 451 }, { "epoch": 0.019756108221513177, "grad_norm": 4.34375, "learning_rate": 9.990785823864795e-05, "loss": 2.8615, "step": 452 }, { "epoch": 0.019799816425543074, "grad_norm": 3.125, "learning_rate": 9.990744096478855e-05, "loss": 2.4941, "step": 453 }, { "epoch": 0.01984352462957297, "grad_norm": 5.0, "learning_rate": 9.990702274910309e-05, "loss": 3.4405, "step": 454 }, { "epoch": 0.01988723283360287, "grad_norm": 4.53125, "learning_rate": 9.990660359159949e-05, "loss": 2.8826, "step": 455 }, { "epoch": 0.019930941037632762, "grad_norm": 4.03125, "learning_rate": 9.990618349228564e-05, "loss": 2.3629, "step": 456 }, { "epoch": 0.01997464924166266, "grad_norm": 5.96875, "learning_rate": 9.990576245116947e-05, "loss": 3.5398, "step": 457 }, { "epoch": 0.020018357445692556, "grad_norm": 4.4375, "learning_rate": 9.990534046825893e-05, "loss": 2.4665, "step": 458 }, { "epoch": 0.020062065649722453, "grad_norm": 3.046875, "learning_rate": 9.990491754356199e-05, "loss": 2.6201, "step": 459 }, { "epoch": 0.02010577385375235, "grad_norm": 4.0, "learning_rate": 9.990449367708661e-05, "loss": 2.3883, "step": 460 }, { "epoch": 0.020149482057782244, "grad_norm": 3.296875, "learning_rate": 9.99040688688408e-05, "loss": 2.013, "step": 461 }, { "epoch": 0.02019319026181214, "grad_norm": 3.15625, "learning_rate": 9.99036431188326e-05, "loss": 2.3463, "step": 462 }, { "epoch": 0.020236898465842038, "grad_norm": 3.984375, "learning_rate": 9.990321642707001e-05, "loss": 1.838, "step": 463 }, { "epoch": 0.020280606669871935, "grad_norm": 3.90625, "learning_rate": 9.99027887935611e-05, "loss": 2.5029, "step": 464 }, { "epoch": 0.020324314873901832, "grad_norm": 3.609375, "learning_rate": 9.990236021831391e-05, "loss": 2.2701, "step": 465 }, { "epoch": 0.02036802307793173, "grad_norm": 4.125, "learning_rate": 9.990193070133659e-05, "loss": 2.177, "step": 466 }, { "epoch": 0.020411731281961623, "grad_norm": 3.90625, "learning_rate": 9.99015002426372e-05, "loss": 2.4276, "step": 467 }, { "epoch": 0.02045543948599152, "grad_norm": 3.578125, "learning_rate": 9.990106884222385e-05, "loss": 2.2515, "step": 468 }, { "epoch": 0.020499147690021417, "grad_norm": 3.390625, "learning_rate": 9.990063650010473e-05, "loss": 2.238, "step": 469 }, { "epoch": 0.020542855894051314, "grad_norm": 3.859375, "learning_rate": 9.990020321628794e-05, "loss": 2.5933, "step": 470 }, { "epoch": 0.02058656409808121, "grad_norm": 10.1875, "learning_rate": 9.989976899078172e-05, "loss": 3.4384, "step": 471 }, { "epoch": 0.020630272302111105, "grad_norm": 3.3125, "learning_rate": 9.989933382359422e-05, "loss": 2.2785, "step": 472 }, { "epoch": 0.020673980506141002, "grad_norm": 4.125, "learning_rate": 9.989889771473367e-05, "loss": 2.3461, "step": 473 }, { "epoch": 0.0207176887101709, "grad_norm": 3.984375, "learning_rate": 9.989846066420829e-05, "loss": 2.3127, "step": 474 }, { "epoch": 0.020761396914200796, "grad_norm": 3.421875, "learning_rate": 9.989802267202635e-05, "loss": 3.1751, "step": 475 }, { "epoch": 0.020805105118230693, "grad_norm": 4.96875, "learning_rate": 9.989758373819608e-05, "loss": 2.5285, "step": 476 }, { "epoch": 0.020848813322260587, "grad_norm": 4.4375, "learning_rate": 9.989714386272579e-05, "loss": 2.064, "step": 477 }, { "epoch": 0.020892521526290484, "grad_norm": 3.40625, "learning_rate": 9.989670304562377e-05, "loss": 2.2734, "step": 478 }, { "epoch": 0.02093622973032038, "grad_norm": 3.515625, "learning_rate": 9.989626128689835e-05, "loss": 2.8476, "step": 479 }, { "epoch": 0.02097993793435028, "grad_norm": 4.71875, "learning_rate": 9.989581858655785e-05, "loss": 2.5702, "step": 480 }, { "epoch": 0.021023646138380175, "grad_norm": 3.3125, "learning_rate": 9.989537494461064e-05, "loss": 2.4645, "step": 481 }, { "epoch": 0.02106735434241007, "grad_norm": 7.40625, "learning_rate": 9.989493036106507e-05, "loss": 3.0028, "step": 482 }, { "epoch": 0.021111062546439966, "grad_norm": 3.515625, "learning_rate": 9.989448483592957e-05, "loss": 2.9433, "step": 483 }, { "epoch": 0.021154770750469863, "grad_norm": 3.71875, "learning_rate": 9.989403836921251e-05, "loss": 2.474, "step": 484 }, { "epoch": 0.02119847895449976, "grad_norm": 3.5625, "learning_rate": 9.989359096092233e-05, "loss": 2.9873, "step": 485 }, { "epoch": 0.021242187158529657, "grad_norm": 3.796875, "learning_rate": 9.989314261106749e-05, "loss": 2.3552, "step": 486 }, { "epoch": 0.02128589536255955, "grad_norm": 3.09375, "learning_rate": 9.98926933196564e-05, "loss": 2.4021, "step": 487 }, { "epoch": 0.021329603566589448, "grad_norm": 3.078125, "learning_rate": 9.989224308669758e-05, "loss": 2.3827, "step": 488 }, { "epoch": 0.021373311770619345, "grad_norm": 2.828125, "learning_rate": 9.989179191219952e-05, "loss": 2.1144, "step": 489 }, { "epoch": 0.021417019974649242, "grad_norm": 3.765625, "learning_rate": 9.989133979617074e-05, "loss": 2.1834, "step": 490 }, { "epoch": 0.02146072817867914, "grad_norm": 3.390625, "learning_rate": 9.989088673861977e-05, "loss": 2.3981, "step": 491 }, { "epoch": 0.021504436382709033, "grad_norm": 3.234375, "learning_rate": 9.989043273955513e-05, "loss": 2.8218, "step": 492 }, { "epoch": 0.02154814458673893, "grad_norm": 3.65625, "learning_rate": 9.988997779898545e-05, "loss": 2.0585, "step": 493 }, { "epoch": 0.021591852790768827, "grad_norm": 3.859375, "learning_rate": 9.988952191691925e-05, "loss": 2.5417, "step": 494 }, { "epoch": 0.021635560994798724, "grad_norm": 3.390625, "learning_rate": 9.988906509336518e-05, "loss": 1.9448, "step": 495 }, { "epoch": 0.02167926919882862, "grad_norm": 3.28125, "learning_rate": 9.988860732833182e-05, "loss": 2.2873, "step": 496 }, { "epoch": 0.021722977402858515, "grad_norm": 3.71875, "learning_rate": 9.988814862182783e-05, "loss": 2.8405, "step": 497 }, { "epoch": 0.021766685606888412, "grad_norm": 3.359375, "learning_rate": 9.988768897386188e-05, "loss": 2.4951, "step": 498 }, { "epoch": 0.02181039381091831, "grad_norm": 3.03125, "learning_rate": 9.988722838444262e-05, "loss": 2.1855, "step": 499 }, { "epoch": 0.021854102014948206, "grad_norm": 3.4375, "learning_rate": 9.988676685357876e-05, "loss": 1.9974, "step": 500 }, { "epoch": 0.021897810218978103, "grad_norm": 3.21875, "learning_rate": 9.988630438127901e-05, "loss": 1.9175, "step": 501 }, { "epoch": 0.021941518423008, "grad_norm": 3.453125, "learning_rate": 9.988584096755208e-05, "loss": 2.3125, "step": 502 }, { "epoch": 0.021985226627037894, "grad_norm": 4.53125, "learning_rate": 9.988537661240673e-05, "loss": 2.1997, "step": 503 }, { "epoch": 0.02202893483106779, "grad_norm": 3.4375, "learning_rate": 9.988491131585171e-05, "loss": 2.3069, "step": 504 }, { "epoch": 0.022072643035097688, "grad_norm": 4.0625, "learning_rate": 9.988444507789582e-05, "loss": 2.2246, "step": 505 }, { "epoch": 0.022116351239127585, "grad_norm": 3.203125, "learning_rate": 9.988397789854784e-05, "loss": 2.7865, "step": 506 }, { "epoch": 0.022160059443157482, "grad_norm": 3.34375, "learning_rate": 9.98835097778166e-05, "loss": 2.2723, "step": 507 }, { "epoch": 0.022203767647187376, "grad_norm": 4.3125, "learning_rate": 9.988304071571093e-05, "loss": 2.3632, "step": 508 }, { "epoch": 0.022247475851217273, "grad_norm": 4.71875, "learning_rate": 9.988257071223968e-05, "loss": 2.6076, "step": 509 }, { "epoch": 0.02229118405524717, "grad_norm": 3.265625, "learning_rate": 9.988209976741172e-05, "loss": 2.2625, "step": 510 }, { "epoch": 0.022334892259277067, "grad_norm": 3.34375, "learning_rate": 9.988162788123594e-05, "loss": 2.5188, "step": 511 }, { "epoch": 0.022378600463306964, "grad_norm": 4.53125, "learning_rate": 9.988115505372123e-05, "loss": 2.7811, "step": 512 }, { "epoch": 0.022422308667336858, "grad_norm": 3.28125, "learning_rate": 9.988068128487654e-05, "loss": 2.3825, "step": 513 }, { "epoch": 0.022466016871366755, "grad_norm": 3.84375, "learning_rate": 9.988020657471077e-05, "loss": 2.4349, "step": 514 }, { "epoch": 0.022509725075396652, "grad_norm": 3.875, "learning_rate": 9.987973092323293e-05, "loss": 2.6618, "step": 515 }, { "epoch": 0.02255343327942655, "grad_norm": 3.671875, "learning_rate": 9.987925433045197e-05, "loss": 1.955, "step": 516 }, { "epoch": 0.022597141483456446, "grad_norm": 7.5625, "learning_rate": 9.987877679637688e-05, "loss": 2.9896, "step": 517 }, { "epoch": 0.02264084968748634, "grad_norm": 3.671875, "learning_rate": 9.987829832101667e-05, "loss": 2.0962, "step": 518 }, { "epoch": 0.022684557891516237, "grad_norm": 4.25, "learning_rate": 9.987781890438039e-05, "loss": 2.0316, "step": 519 }, { "epoch": 0.022728266095546134, "grad_norm": 3.359375, "learning_rate": 9.987733854647707e-05, "loss": 2.8837, "step": 520 }, { "epoch": 0.02277197429957603, "grad_norm": 3.421875, "learning_rate": 9.987685724731577e-05, "loss": 2.0682, "step": 521 }, { "epoch": 0.022815682503605928, "grad_norm": 4.125, "learning_rate": 9.987637500690559e-05, "loss": 2.4832, "step": 522 }, { "epoch": 0.02285939070763582, "grad_norm": 4.21875, "learning_rate": 9.987589182525561e-05, "loss": 3.3895, "step": 523 }, { "epoch": 0.02290309891166572, "grad_norm": 2.921875, "learning_rate": 9.987540770237498e-05, "loss": 2.2143, "step": 524 }, { "epoch": 0.022946807115695616, "grad_norm": 3.671875, "learning_rate": 9.98749226382728e-05, "loss": 2.1937, "step": 525 }, { "epoch": 0.022990515319725513, "grad_norm": 4.0625, "learning_rate": 9.987443663295825e-05, "loss": 3.2734, "step": 526 }, { "epoch": 0.02303422352375541, "grad_norm": 3.640625, "learning_rate": 9.987394968644049e-05, "loss": 2.7082, "step": 527 }, { "epoch": 0.023077931727785304, "grad_norm": 3.25, "learning_rate": 9.987346179872869e-05, "loss": 2.4288, "step": 528 }, { "epoch": 0.0231216399318152, "grad_norm": 4.5625, "learning_rate": 9.987297296983211e-05, "loss": 2.2455, "step": 529 }, { "epoch": 0.023165348135845098, "grad_norm": 3.40625, "learning_rate": 9.987248319975993e-05, "loss": 2.1247, "step": 530 }, { "epoch": 0.023209056339874995, "grad_norm": 4.90625, "learning_rate": 9.987199248852141e-05, "loss": 2.9593, "step": 531 }, { "epoch": 0.023252764543904892, "grad_norm": 4.21875, "learning_rate": 9.987150083612579e-05, "loss": 2.4991, "step": 532 }, { "epoch": 0.023296472747934786, "grad_norm": 3.59375, "learning_rate": 9.987100824258239e-05, "loss": 2.68, "step": 533 }, { "epoch": 0.023340180951964683, "grad_norm": 3.296875, "learning_rate": 9.987051470790048e-05, "loss": 2.7721, "step": 534 }, { "epoch": 0.02338388915599458, "grad_norm": 3.5, "learning_rate": 9.987002023208935e-05, "loss": 2.1934, "step": 535 }, { "epoch": 0.023427597360024477, "grad_norm": 3.5, "learning_rate": 9.986952481515836e-05, "loss": 2.1404, "step": 536 }, { "epoch": 0.023471305564054374, "grad_norm": 2.953125, "learning_rate": 9.986902845711687e-05, "loss": 2.2057, "step": 537 }, { "epoch": 0.02351501376808427, "grad_norm": 3.4375, "learning_rate": 9.986853115797423e-05, "loss": 2.0918, "step": 538 }, { "epoch": 0.023558721972114165, "grad_norm": 3.25, "learning_rate": 9.986803291773982e-05, "loss": 2.3491, "step": 539 }, { "epoch": 0.023602430176144062, "grad_norm": 3.25, "learning_rate": 9.986753373642306e-05, "loss": 3.0529, "step": 540 }, { "epoch": 0.02364613838017396, "grad_norm": 2.765625, "learning_rate": 9.986703361403335e-05, "loss": 2.1784, "step": 541 }, { "epoch": 0.023689846584203856, "grad_norm": 3.265625, "learning_rate": 9.986653255058014e-05, "loss": 2.5431, "step": 542 }, { "epoch": 0.023733554788233753, "grad_norm": 3.515625, "learning_rate": 9.986603054607288e-05, "loss": 2.2682, "step": 543 }, { "epoch": 0.023777262992263647, "grad_norm": 3.234375, "learning_rate": 9.986552760052105e-05, "loss": 2.5113, "step": 544 }, { "epoch": 0.023820971196293544, "grad_norm": 4.28125, "learning_rate": 9.986502371393413e-05, "loss": 2.7172, "step": 545 }, { "epoch": 0.02386467940032344, "grad_norm": 4.1875, "learning_rate": 9.986451888632165e-05, "loss": 2.6705, "step": 546 }, { "epoch": 0.023908387604353338, "grad_norm": 3.4375, "learning_rate": 9.986401311769312e-05, "loss": 2.2219, "step": 547 }, { "epoch": 0.023952095808383235, "grad_norm": 3.875, "learning_rate": 9.98635064080581e-05, "loss": 2.3801, "step": 548 }, { "epoch": 0.02399580401241313, "grad_norm": 3.46875, "learning_rate": 9.986299875742613e-05, "loss": 2.2103, "step": 549 }, { "epoch": 0.024039512216443026, "grad_norm": 4.75, "learning_rate": 9.98624901658068e-05, "loss": 2.581, "step": 550 }, { "epoch": 0.024083220420472923, "grad_norm": 3.109375, "learning_rate": 9.986198063320971e-05, "loss": 2.2305, "step": 551 }, { "epoch": 0.02412692862450282, "grad_norm": 3.515625, "learning_rate": 9.986147015964446e-05, "loss": 2.5085, "step": 552 }, { "epoch": 0.024170636828532717, "grad_norm": 3.640625, "learning_rate": 9.986095874512072e-05, "loss": 2.3899, "step": 553 }, { "epoch": 0.02421434503256261, "grad_norm": 3.21875, "learning_rate": 9.986044638964811e-05, "loss": 2.2853, "step": 554 }, { "epoch": 0.024258053236592508, "grad_norm": 4.65625, "learning_rate": 9.985993309323631e-05, "loss": 2.8576, "step": 555 }, { "epoch": 0.024301761440622405, "grad_norm": 4.09375, "learning_rate": 9.985941885589502e-05, "loss": 2.8423, "step": 556 }, { "epoch": 0.024345469644652302, "grad_norm": 6.40625, "learning_rate": 9.985890367763391e-05, "loss": 2.9808, "step": 557 }, { "epoch": 0.0243891778486822, "grad_norm": 2.9375, "learning_rate": 9.985838755846273e-05, "loss": 2.2293, "step": 558 }, { "epoch": 0.024432886052712093, "grad_norm": 4.40625, "learning_rate": 9.98578704983912e-05, "loss": 3.302, "step": 559 }, { "epoch": 0.02447659425674199, "grad_norm": 13.1875, "learning_rate": 9.98573524974291e-05, "loss": 3.9046, "step": 560 }, { "epoch": 0.024520302460771887, "grad_norm": 3.0625, "learning_rate": 9.98568335555862e-05, "loss": 2.1789, "step": 561 }, { "epoch": 0.024564010664801784, "grad_norm": 3.640625, "learning_rate": 9.985631367287226e-05, "loss": 2.3703, "step": 562 }, { "epoch": 0.02460771886883168, "grad_norm": 4.78125, "learning_rate": 9.985579284929715e-05, "loss": 2.5617, "step": 563 }, { "epoch": 0.024651427072861574, "grad_norm": 2.8125, "learning_rate": 9.985527108487065e-05, "loss": 2.1361, "step": 564 }, { "epoch": 0.02469513527689147, "grad_norm": 4.125, "learning_rate": 9.985474837960263e-05, "loss": 2.3338, "step": 565 }, { "epoch": 0.02473884348092137, "grad_norm": 2.984375, "learning_rate": 9.985422473350295e-05, "loss": 2.0618, "step": 566 }, { "epoch": 0.024782551684951266, "grad_norm": 3.0625, "learning_rate": 9.985370014658148e-05, "loss": 2.133, "step": 567 }, { "epoch": 0.024826259888981163, "grad_norm": 3.359375, "learning_rate": 9.985317461884814e-05, "loss": 2.9443, "step": 568 }, { "epoch": 0.02486996809301106, "grad_norm": 3.25, "learning_rate": 9.985264815031283e-05, "loss": 2.6085, "step": 569 }, { "epoch": 0.024913676297040954, "grad_norm": 4.625, "learning_rate": 9.98521207409855e-05, "loss": 2.6604, "step": 570 }, { "epoch": 0.02495738450107085, "grad_norm": 3.078125, "learning_rate": 9.985159239087609e-05, "loss": 2.368, "step": 571 }, { "epoch": 0.025001092705100748, "grad_norm": 3.609375, "learning_rate": 9.985106309999458e-05, "loss": 2.2466, "step": 572 }, { "epoch": 0.025044800909130645, "grad_norm": 3.15625, "learning_rate": 9.985053286835095e-05, "loss": 2.3146, "step": 573 }, { "epoch": 0.025088509113160542, "grad_norm": 2.953125, "learning_rate": 9.985000169595521e-05, "loss": 2.3523, "step": 574 }, { "epoch": 0.025132217317190435, "grad_norm": 3.328125, "learning_rate": 9.984946958281739e-05, "loss": 2.6499, "step": 575 }, { "epoch": 0.025175925521220333, "grad_norm": 3.421875, "learning_rate": 9.984893652894753e-05, "loss": 1.8698, "step": 576 }, { "epoch": 0.02521963372525023, "grad_norm": 3.453125, "learning_rate": 9.984840253435568e-05, "loss": 2.1902, "step": 577 }, { "epoch": 0.025263341929280127, "grad_norm": 3.6875, "learning_rate": 9.984786759905191e-05, "loss": 2.1288, "step": 578 }, { "epoch": 0.025307050133310024, "grad_norm": 3.25, "learning_rate": 9.984733172304634e-05, "loss": 2.2317, "step": 579 }, { "epoch": 0.025350758337339917, "grad_norm": 10.75, "learning_rate": 9.984679490634907e-05, "loss": 2.4892, "step": 580 }, { "epoch": 0.025394466541369815, "grad_norm": 4.75, "learning_rate": 9.984625714897024e-05, "loss": 2.6579, "step": 581 }, { "epoch": 0.02543817474539971, "grad_norm": 3.375, "learning_rate": 9.984571845091999e-05, "loss": 2.2262, "step": 582 }, { "epoch": 0.02548188294942961, "grad_norm": 3.296875, "learning_rate": 9.984517881220848e-05, "loss": 2.4475, "step": 583 }, { "epoch": 0.025525591153459506, "grad_norm": 3.046875, "learning_rate": 9.984463823284589e-05, "loss": 2.2006, "step": 584 }, { "epoch": 0.0255692993574894, "grad_norm": 2.984375, "learning_rate": 9.984409671284243e-05, "loss": 2.0782, "step": 585 }, { "epoch": 0.025613007561519296, "grad_norm": 3.328125, "learning_rate": 9.984355425220835e-05, "loss": 2.7302, "step": 586 }, { "epoch": 0.025656715765549194, "grad_norm": 3.328125, "learning_rate": 9.984301085095382e-05, "loss": 2.5203, "step": 587 }, { "epoch": 0.02570042396957909, "grad_norm": 3.234375, "learning_rate": 9.984246650908915e-05, "loss": 2.5343, "step": 588 }, { "epoch": 0.025744132173608988, "grad_norm": 5.5625, "learning_rate": 9.98419212266246e-05, "loss": 2.0457, "step": 589 }, { "epoch": 0.02578784037763888, "grad_norm": 3.59375, "learning_rate": 9.984137500357044e-05, "loss": 2.2221, "step": 590 }, { "epoch": 0.02583154858166878, "grad_norm": 3.71875, "learning_rate": 9.984082783993703e-05, "loss": 2.0484, "step": 591 }, { "epoch": 0.025875256785698676, "grad_norm": 3.140625, "learning_rate": 9.984027973573462e-05, "loss": 2.1899, "step": 592 }, { "epoch": 0.025918964989728573, "grad_norm": 4.5625, "learning_rate": 9.983973069097359e-05, "loss": 2.5412, "step": 593 }, { "epoch": 0.02596267319375847, "grad_norm": 3.8125, "learning_rate": 9.983918070566433e-05, "loss": 2.7712, "step": 594 }, { "epoch": 0.026006381397788363, "grad_norm": 3.203125, "learning_rate": 9.983862977981718e-05, "loss": 2.521, "step": 595 }, { "epoch": 0.02605008960181826, "grad_norm": 3.5, "learning_rate": 9.983807791344255e-05, "loss": 2.3225, "step": 596 }, { "epoch": 0.026093797805848157, "grad_norm": 3.90625, "learning_rate": 9.983752510655084e-05, "loss": 2.5047, "step": 597 }, { "epoch": 0.026137506009878055, "grad_norm": 3.390625, "learning_rate": 9.983697135915252e-05, "loss": 2.0369, "step": 598 }, { "epoch": 0.02618121421390795, "grad_norm": 2.859375, "learning_rate": 9.9836416671258e-05, "loss": 2.3224, "step": 599 }, { "epoch": 0.026224922417937845, "grad_norm": 3.203125, "learning_rate": 9.983586104287778e-05, "loss": 1.9921, "step": 600 }, { "epoch": 0.026268630621967742, "grad_norm": 3.484375, "learning_rate": 9.983530447402231e-05, "loss": 1.903, "step": 601 }, { "epoch": 0.02631233882599764, "grad_norm": 3.265625, "learning_rate": 9.983474696470212e-05, "loss": 2.197, "step": 602 }, { "epoch": 0.026356047030027537, "grad_norm": 5.21875, "learning_rate": 9.983418851492773e-05, "loss": 2.5317, "step": 603 }, { "epoch": 0.026399755234057434, "grad_norm": 4.375, "learning_rate": 9.983362912470966e-05, "loss": 2.7071, "step": 604 }, { "epoch": 0.02644346343808733, "grad_norm": 6.6875, "learning_rate": 9.98330687940585e-05, "loss": 2.3881, "step": 605 }, { "epoch": 0.026487171642117224, "grad_norm": 4.21875, "learning_rate": 9.983250752298478e-05, "loss": 2.201, "step": 606 }, { "epoch": 0.02653087984614712, "grad_norm": 4.3125, "learning_rate": 9.983194531149914e-05, "loss": 2.5151, "step": 607 }, { "epoch": 0.02657458805017702, "grad_norm": 3.9375, "learning_rate": 9.983138215961214e-05, "loss": 2.4005, "step": 608 }, { "epoch": 0.026618296254206916, "grad_norm": 3.359375, "learning_rate": 9.983081806733444e-05, "loss": 2.1401, "step": 609 }, { "epoch": 0.026662004458236813, "grad_norm": 3.25, "learning_rate": 9.983025303467668e-05, "loss": 2.2363, "step": 610 }, { "epoch": 0.026705712662266706, "grad_norm": 2.84375, "learning_rate": 9.982968706164953e-05, "loss": 1.9678, "step": 611 }, { "epoch": 0.026749420866296603, "grad_norm": 3.90625, "learning_rate": 9.982912014826365e-05, "loss": 2.2302, "step": 612 }, { "epoch": 0.0267931290703265, "grad_norm": 3.796875, "learning_rate": 9.982855229452975e-05, "loss": 2.2146, "step": 613 }, { "epoch": 0.026836837274356398, "grad_norm": 3.265625, "learning_rate": 9.982798350045854e-05, "loss": 2.1537, "step": 614 }, { "epoch": 0.026880545478386295, "grad_norm": 4.0625, "learning_rate": 9.982741376606078e-05, "loss": 2.3415, "step": 615 }, { "epoch": 0.026924253682416188, "grad_norm": 3.1875, "learning_rate": 9.982684309134719e-05, "loss": 1.9102, "step": 616 }, { "epoch": 0.026967961886446085, "grad_norm": 3.671875, "learning_rate": 9.982627147632855e-05, "loss": 2.2455, "step": 617 }, { "epoch": 0.027011670090475982, "grad_norm": 3.265625, "learning_rate": 9.982569892101565e-05, "loss": 2.1046, "step": 618 }, { "epoch": 0.02705537829450588, "grad_norm": 4.40625, "learning_rate": 9.982512542541929e-05, "loss": 2.8151, "step": 619 }, { "epoch": 0.027099086498535777, "grad_norm": 3.34375, "learning_rate": 9.98245509895503e-05, "loss": 2.7252, "step": 620 }, { "epoch": 0.02714279470256567, "grad_norm": 2.875, "learning_rate": 9.982397561341952e-05, "loss": 1.9278, "step": 621 }, { "epoch": 0.027186502906595567, "grad_norm": 2.921875, "learning_rate": 9.982339929703781e-05, "loss": 2.1459, "step": 622 }, { "epoch": 0.027230211110625464, "grad_norm": 3.46875, "learning_rate": 9.982282204041604e-05, "loss": 2.5963, "step": 623 }, { "epoch": 0.02727391931465536, "grad_norm": 7.90625, "learning_rate": 9.982224384356508e-05, "loss": 2.058, "step": 624 }, { "epoch": 0.02731762751868526, "grad_norm": 2.859375, "learning_rate": 9.98216647064959e-05, "loss": 2.4754, "step": 625 }, { "epoch": 0.027361335722715152, "grad_norm": 3.640625, "learning_rate": 9.982108462921937e-05, "loss": 2.964, "step": 626 }, { "epoch": 0.02740504392674505, "grad_norm": 3.4375, "learning_rate": 9.982050361174647e-05, "loss": 2.1477, "step": 627 }, { "epoch": 0.027448752130774946, "grad_norm": 2.75, "learning_rate": 9.981992165408816e-05, "loss": 2.2316, "step": 628 }, { "epoch": 0.027492460334804843, "grad_norm": 2.984375, "learning_rate": 9.981933875625542e-05, "loss": 2.4692, "step": 629 }, { "epoch": 0.02753616853883474, "grad_norm": 3.328125, "learning_rate": 9.981875491825924e-05, "loss": 2.175, "step": 630 }, { "epoch": 0.027579876742864634, "grad_norm": 5.15625, "learning_rate": 9.981817014011066e-05, "loss": 2.2929, "step": 631 }, { "epoch": 0.02762358494689453, "grad_norm": 3.59375, "learning_rate": 9.981758442182068e-05, "loss": 2.5712, "step": 632 }, { "epoch": 0.027667293150924428, "grad_norm": 3.265625, "learning_rate": 9.981699776340039e-05, "loss": 1.9267, "step": 633 }, { "epoch": 0.027711001354954325, "grad_norm": 3.390625, "learning_rate": 9.981641016486085e-05, "loss": 2.7868, "step": 634 }, { "epoch": 0.027754709558984222, "grad_norm": 3.84375, "learning_rate": 9.981582162621314e-05, "loss": 2.5112, "step": 635 }, { "epoch": 0.027798417763014116, "grad_norm": 3.46875, "learning_rate": 9.981523214746837e-05, "loss": 2.2265, "step": 636 }, { "epoch": 0.027842125967044013, "grad_norm": 4.125, "learning_rate": 9.981464172863768e-05, "loss": 2.7183, "step": 637 }, { "epoch": 0.02788583417107391, "grad_norm": 5.0, "learning_rate": 9.981405036973219e-05, "loss": 2.5362, "step": 638 }, { "epoch": 0.027929542375103807, "grad_norm": 3.8125, "learning_rate": 9.981345807076307e-05, "loss": 3.1845, "step": 639 }, { "epoch": 0.027973250579133704, "grad_norm": 3.25, "learning_rate": 9.98128648317415e-05, "loss": 2.0682, "step": 640 }, { "epoch": 0.0280169587831636, "grad_norm": 3.265625, "learning_rate": 9.981227065267867e-05, "loss": 2.0779, "step": 641 }, { "epoch": 0.028060666987193495, "grad_norm": 3.1875, "learning_rate": 9.981167553358579e-05, "loss": 2.5607, "step": 642 }, { "epoch": 0.028104375191223392, "grad_norm": 3.21875, "learning_rate": 9.981107947447409e-05, "loss": 2.2148, "step": 643 }, { "epoch": 0.02814808339525329, "grad_norm": 3.265625, "learning_rate": 9.981048247535483e-05, "loss": 2.6086, "step": 644 }, { "epoch": 0.028191791599283186, "grad_norm": 3.453125, "learning_rate": 9.980988453623928e-05, "loss": 2.6902, "step": 645 }, { "epoch": 0.028235499803313083, "grad_norm": 4.9375, "learning_rate": 9.98092856571387e-05, "loss": 2.5818, "step": 646 }, { "epoch": 0.028279208007342977, "grad_norm": 3.5, "learning_rate": 9.98086858380644e-05, "loss": 2.1797, "step": 647 }, { "epoch": 0.028322916211372874, "grad_norm": 3.578125, "learning_rate": 9.980808507902773e-05, "loss": 2.1088, "step": 648 }, { "epoch": 0.02836662441540277, "grad_norm": 3.296875, "learning_rate": 9.980748338003998e-05, "loss": 2.5253, "step": 649 }, { "epoch": 0.02841033261943267, "grad_norm": 4.75, "learning_rate": 9.980688074111253e-05, "loss": 2.7148, "step": 650 }, { "epoch": 0.028454040823462565, "grad_norm": 3.21875, "learning_rate": 9.980627716225675e-05, "loss": 2.6032, "step": 651 }, { "epoch": 0.02849774902749246, "grad_norm": 15.375, "learning_rate": 9.980567264348404e-05, "loss": 2.927, "step": 652 }, { "epoch": 0.028541457231522356, "grad_norm": 3.421875, "learning_rate": 9.98050671848058e-05, "loss": 2.7096, "step": 653 }, { "epoch": 0.028585165435552253, "grad_norm": 6.0625, "learning_rate": 9.980446078623345e-05, "loss": 1.9591, "step": 654 }, { "epoch": 0.02862887363958215, "grad_norm": 3.53125, "learning_rate": 9.980385344777842e-05, "loss": 2.5283, "step": 655 }, { "epoch": 0.028672581843612047, "grad_norm": 3.125, "learning_rate": 9.980324516945221e-05, "loss": 1.8712, "step": 656 }, { "epoch": 0.02871629004764194, "grad_norm": 2.765625, "learning_rate": 9.980263595126629e-05, "loss": 2.1822, "step": 657 }, { "epoch": 0.028759998251671838, "grad_norm": 4.03125, "learning_rate": 9.980202579323212e-05, "loss": 2.3666, "step": 658 }, { "epoch": 0.028803706455701735, "grad_norm": 3.171875, "learning_rate": 9.980141469536125e-05, "loss": 2.4666, "step": 659 }, { "epoch": 0.028847414659731632, "grad_norm": 3.46875, "learning_rate": 9.98008026576652e-05, "loss": 2.176, "step": 660 }, { "epoch": 0.02889112286376153, "grad_norm": 3.25, "learning_rate": 9.980018968015552e-05, "loss": 1.8907, "step": 661 }, { "epoch": 0.028934831067791423, "grad_norm": 3.890625, "learning_rate": 9.979957576284379e-05, "loss": 1.7983, "step": 662 }, { "epoch": 0.02897853927182132, "grad_norm": 3.6875, "learning_rate": 9.979896090574157e-05, "loss": 3.0609, "step": 663 }, { "epoch": 0.029022247475851217, "grad_norm": 3.515625, "learning_rate": 9.97983451088605e-05, "loss": 2.1672, "step": 664 }, { "epoch": 0.029065955679881114, "grad_norm": 10.0625, "learning_rate": 9.979772837221216e-05, "loss": 2.3753, "step": 665 }, { "epoch": 0.02910966388391101, "grad_norm": 2.875, "learning_rate": 9.979711069580821e-05, "loss": 2.6742, "step": 666 }, { "epoch": 0.029153372087940905, "grad_norm": 3.890625, "learning_rate": 9.979649207966031e-05, "loss": 2.415, "step": 667 }, { "epoch": 0.029197080291970802, "grad_norm": 5.78125, "learning_rate": 9.979587252378013e-05, "loss": 2.6886, "step": 668 }, { "epoch": 0.0292407884960007, "grad_norm": 3.953125, "learning_rate": 9.979525202817936e-05, "loss": 2.5185, "step": 669 }, { "epoch": 0.029284496700030596, "grad_norm": 3.703125, "learning_rate": 9.979463059286972e-05, "loss": 2.3531, "step": 670 }, { "epoch": 0.029328204904060493, "grad_norm": 3.3125, "learning_rate": 9.979400821786291e-05, "loss": 2.2063, "step": 671 }, { "epoch": 0.02937191310809039, "grad_norm": 3.46875, "learning_rate": 9.979338490317072e-05, "loss": 2.5651, "step": 672 }, { "epoch": 0.029415621312120284, "grad_norm": 2.75, "learning_rate": 9.979276064880486e-05, "loss": 2.0795, "step": 673 }, { "epoch": 0.02945932951615018, "grad_norm": 3.03125, "learning_rate": 9.979213545477715e-05, "loss": 2.2151, "step": 674 }, { "epoch": 0.029503037720180078, "grad_norm": 3.109375, "learning_rate": 9.979150932109937e-05, "loss": 2.5187, "step": 675 }, { "epoch": 0.029546745924209975, "grad_norm": 2.875, "learning_rate": 9.979088224778335e-05, "loss": 2.2578, "step": 676 }, { "epoch": 0.029590454128239872, "grad_norm": 3.421875, "learning_rate": 9.97902542348409e-05, "loss": 2.3404, "step": 677 }, { "epoch": 0.029634162332269766, "grad_norm": 3.140625, "learning_rate": 9.978962528228388e-05, "loss": 2.2639, "step": 678 }, { "epoch": 0.029677870536299663, "grad_norm": 3.296875, "learning_rate": 9.978899539012418e-05, "loss": 1.9094, "step": 679 }, { "epoch": 0.02972157874032956, "grad_norm": 3.09375, "learning_rate": 9.978836455837368e-05, "loss": 2.413, "step": 680 }, { "epoch": 0.029765286944359457, "grad_norm": 4.5625, "learning_rate": 9.978773278704426e-05, "loss": 2.5859, "step": 681 }, { "epoch": 0.029808995148389354, "grad_norm": 3.203125, "learning_rate": 9.978710007614786e-05, "loss": 2.1896, "step": 682 }, { "epoch": 0.029852703352419248, "grad_norm": 3.59375, "learning_rate": 9.978646642569644e-05, "loss": 2.4116, "step": 683 }, { "epoch": 0.029896411556449145, "grad_norm": 3.453125, "learning_rate": 9.978583183570193e-05, "loss": 2.1242, "step": 684 }, { "epoch": 0.029940119760479042, "grad_norm": 2.8125, "learning_rate": 9.97851963061763e-05, "loss": 2.1152, "step": 685 }, { "epoch": 0.02998382796450894, "grad_norm": 3.046875, "learning_rate": 9.978455983713157e-05, "loss": 2.6089, "step": 686 }, { "epoch": 0.030027536168538836, "grad_norm": 3.40625, "learning_rate": 9.978392242857973e-05, "loss": 2.4736, "step": 687 }, { "epoch": 0.03007124437256873, "grad_norm": 3.546875, "learning_rate": 9.978328408053282e-05, "loss": 2.5537, "step": 688 }, { "epoch": 0.030114952576598627, "grad_norm": 2.609375, "learning_rate": 9.978264479300289e-05, "loss": 1.7285, "step": 689 }, { "epoch": 0.030158660780628524, "grad_norm": 4.84375, "learning_rate": 9.978200456600198e-05, "loss": 2.3336, "step": 690 }, { "epoch": 0.03020236898465842, "grad_norm": 2.859375, "learning_rate": 9.97813633995422e-05, "loss": 1.8666, "step": 691 }, { "epoch": 0.030246077188688318, "grad_norm": 3.4375, "learning_rate": 9.978072129363564e-05, "loss": 2.2779, "step": 692 }, { "epoch": 0.030289785392718212, "grad_norm": 3.21875, "learning_rate": 9.978007824829442e-05, "loss": 2.6795, "step": 693 }, { "epoch": 0.03033349359674811, "grad_norm": 3.3125, "learning_rate": 9.977943426353067e-05, "loss": 2.7468, "step": 694 }, { "epoch": 0.030377201800778006, "grad_norm": 4.96875, "learning_rate": 9.977878933935655e-05, "loss": 2.453, "step": 695 }, { "epoch": 0.030420910004807903, "grad_norm": 3.546875, "learning_rate": 9.977814347578421e-05, "loss": 2.216, "step": 696 }, { "epoch": 0.0304646182088378, "grad_norm": 3.09375, "learning_rate": 9.977749667282588e-05, "loss": 2.5925, "step": 697 }, { "epoch": 0.030508326412867694, "grad_norm": 3.21875, "learning_rate": 9.977684893049371e-05, "loss": 2.7342, "step": 698 }, { "epoch": 0.03055203461689759, "grad_norm": 3.3125, "learning_rate": 9.977620024879997e-05, "loss": 2.2558, "step": 699 }, { "epoch": 0.030595742820927488, "grad_norm": 3.53125, "learning_rate": 9.977555062775688e-05, "loss": 2.322, "step": 700 }, { "epoch": 0.030639451024957385, "grad_norm": 3.1875, "learning_rate": 9.97749000673767e-05, "loss": 2.3314, "step": 701 }, { "epoch": 0.030683159228987282, "grad_norm": 3.390625, "learning_rate": 9.977424856767172e-05, "loss": 2.0297, "step": 702 }, { "epoch": 0.030726867433017176, "grad_norm": 3.203125, "learning_rate": 9.977359612865423e-05, "loss": 2.2477, "step": 703 }, { "epoch": 0.030770575637047073, "grad_norm": 3.4375, "learning_rate": 9.977294275033654e-05, "loss": 2.28, "step": 704 }, { "epoch": 0.03081428384107697, "grad_norm": 11.4375, "learning_rate": 9.977228843273098e-05, "loss": 6.5621, "step": 705 }, { "epoch": 0.030857992045106867, "grad_norm": 4.125, "learning_rate": 9.977163317584988e-05, "loss": 2.6921, "step": 706 }, { "epoch": 0.030901700249136764, "grad_norm": 6.71875, "learning_rate": 9.977097697970563e-05, "loss": 2.1848, "step": 707 }, { "epoch": 0.03094540845316666, "grad_norm": 3.34375, "learning_rate": 9.977031984431063e-05, "loss": 2.3677, "step": 708 }, { "epoch": 0.030989116657196555, "grad_norm": 4.0, "learning_rate": 9.976966176967722e-05, "loss": 2.5439, "step": 709 }, { "epoch": 0.031032824861226452, "grad_norm": 2.9375, "learning_rate": 9.976900275581789e-05, "loss": 2.305, "step": 710 }, { "epoch": 0.03107653306525635, "grad_norm": 3.375, "learning_rate": 9.976834280274502e-05, "loss": 2.5969, "step": 711 }, { "epoch": 0.031120241269286246, "grad_norm": 4.75, "learning_rate": 9.976768191047109e-05, "loss": 2.6965, "step": 712 }, { "epoch": 0.031163949473316143, "grad_norm": 3.046875, "learning_rate": 9.976702007900857e-05, "loss": 1.9579, "step": 713 }, { "epoch": 0.031207657677346037, "grad_norm": 3.8125, "learning_rate": 9.976635730836995e-05, "loss": 1.9571, "step": 714 }, { "epoch": 0.031251365881375934, "grad_norm": 3.234375, "learning_rate": 9.976569359856773e-05, "loss": 2.0944, "step": 715 }, { "epoch": 0.03129507408540583, "grad_norm": 3.140625, "learning_rate": 9.976502894961445e-05, "loss": 2.0834, "step": 716 }, { "epoch": 0.03133878228943573, "grad_norm": 3.890625, "learning_rate": 9.976436336152265e-05, "loss": 2.4847, "step": 717 }, { "epoch": 0.03138249049346562, "grad_norm": 2.953125, "learning_rate": 9.976369683430487e-05, "loss": 2.2834, "step": 718 }, { "epoch": 0.03142619869749552, "grad_norm": 3.4375, "learning_rate": 9.976302936797371e-05, "loss": 2.1861, "step": 719 }, { "epoch": 0.031469906901525416, "grad_norm": 4.71875, "learning_rate": 9.976236096254177e-05, "loss": 3.4581, "step": 720 }, { "epoch": 0.03151361510555531, "grad_norm": 5.09375, "learning_rate": 9.976169161802164e-05, "loss": 2.4288, "step": 721 }, { "epoch": 0.03155732330958521, "grad_norm": 3.140625, "learning_rate": 9.976102133442596e-05, "loss": 2.3548, "step": 722 }, { "epoch": 0.031601031513615103, "grad_norm": 3.390625, "learning_rate": 9.976035011176738e-05, "loss": 2.1473, "step": 723 }, { "epoch": 0.031644739717645004, "grad_norm": 4.71875, "learning_rate": 9.975967795005859e-05, "loss": 2.6153, "step": 724 }, { "epoch": 0.0316884479216749, "grad_norm": 3.6875, "learning_rate": 9.975900484931225e-05, "loss": 2.9447, "step": 725 }, { "epoch": 0.0317321561257048, "grad_norm": 2.984375, "learning_rate": 9.975833080954107e-05, "loss": 2.2077, "step": 726 }, { "epoch": 0.03177586432973469, "grad_norm": 2.921875, "learning_rate": 9.975765583075776e-05, "loss": 2.2345, "step": 727 }, { "epoch": 0.031819572533764585, "grad_norm": 3.15625, "learning_rate": 9.975697991297506e-05, "loss": 2.1691, "step": 728 }, { "epoch": 0.031863280737794486, "grad_norm": 3.25, "learning_rate": 9.975630305620574e-05, "loss": 2.5123, "step": 729 }, { "epoch": 0.03190698894182438, "grad_norm": 3.234375, "learning_rate": 9.975562526046256e-05, "loss": 2.519, "step": 730 }, { "epoch": 0.03195069714585428, "grad_norm": 2.796875, "learning_rate": 9.975494652575832e-05, "loss": 2.0585, "step": 731 }, { "epoch": 0.031994405349884174, "grad_norm": 2.65625, "learning_rate": 9.975426685210582e-05, "loss": 2.1076, "step": 732 }, { "epoch": 0.03203811355391407, "grad_norm": 3.4375, "learning_rate": 9.975358623951789e-05, "loss": 2.4889, "step": 733 }, { "epoch": 0.03208182175794397, "grad_norm": 3.265625, "learning_rate": 9.975290468800739e-05, "loss": 2.576, "step": 734 }, { "epoch": 0.03212552996197386, "grad_norm": 2.828125, "learning_rate": 9.975222219758716e-05, "loss": 2.4601, "step": 735 }, { "epoch": 0.03216923816600376, "grad_norm": 3.15625, "learning_rate": 9.975153876827008e-05, "loss": 2.4984, "step": 736 }, { "epoch": 0.032212946370033656, "grad_norm": 3.328125, "learning_rate": 9.975085440006904e-05, "loss": 2.5157, "step": 737 }, { "epoch": 0.03225665457406355, "grad_norm": 4.28125, "learning_rate": 9.9750169092997e-05, "loss": 1.9031, "step": 738 }, { "epoch": 0.03230036277809345, "grad_norm": 3.09375, "learning_rate": 9.974948284706685e-05, "loss": 2.3383, "step": 739 }, { "epoch": 0.032344070982123344, "grad_norm": 2.8125, "learning_rate": 9.974879566229154e-05, "loss": 2.1407, "step": 740 }, { "epoch": 0.032387779186153244, "grad_norm": 3.9375, "learning_rate": 9.974810753868405e-05, "loss": 2.9169, "step": 741 }, { "epoch": 0.03243148739018314, "grad_norm": 3.1875, "learning_rate": 9.974741847625737e-05, "loss": 2.4408, "step": 742 }, { "epoch": 0.03247519559421303, "grad_norm": 4.0, "learning_rate": 9.974672847502451e-05, "loss": 2.501, "step": 743 }, { "epoch": 0.03251890379824293, "grad_norm": 4.65625, "learning_rate": 9.974603753499847e-05, "loss": 2.9058, "step": 744 }, { "epoch": 0.032562612002272825, "grad_norm": 3.203125, "learning_rate": 9.97453456561923e-05, "loss": 2.0979, "step": 745 }, { "epoch": 0.032606320206302726, "grad_norm": 3.0625, "learning_rate": 9.974465283861905e-05, "loss": 2.2657, "step": 746 }, { "epoch": 0.03265002841033262, "grad_norm": 3.125, "learning_rate": 9.974395908229181e-05, "loss": 2.202, "step": 747 }, { "epoch": 0.03269373661436251, "grad_norm": 3.28125, "learning_rate": 9.974326438722367e-05, "loss": 2.4637, "step": 748 }, { "epoch": 0.032737444818392414, "grad_norm": 3.109375, "learning_rate": 9.974256875342772e-05, "loss": 2.0341, "step": 749 }, { "epoch": 0.03278115302242231, "grad_norm": 3.453125, "learning_rate": 9.974187218091711e-05, "loss": 2.1654, "step": 750 }, { "epoch": 0.03282486122645221, "grad_norm": 5.125, "learning_rate": 9.974117466970496e-05, "loss": 3.255, "step": 751 }, { "epoch": 0.0328685694304821, "grad_norm": 3.4375, "learning_rate": 9.974047621980447e-05, "loss": 2.4065, "step": 752 }, { "epoch": 0.032912277634511995, "grad_norm": 3.4375, "learning_rate": 9.97397768312288e-05, "loss": 2.6438, "step": 753 }, { "epoch": 0.032955985838541896, "grad_norm": 3.53125, "learning_rate": 9.973907650399113e-05, "loss": 2.4801, "step": 754 }, { "epoch": 0.03299969404257179, "grad_norm": 2.96875, "learning_rate": 9.973837523810471e-05, "loss": 2.4131, "step": 755 }, { "epoch": 0.03304340224660169, "grad_norm": 4.1875, "learning_rate": 9.973767303358275e-05, "loss": 2.4873, "step": 756 }, { "epoch": 0.033087110450631584, "grad_norm": 3.125, "learning_rate": 9.973696989043852e-05, "loss": 2.5531, "step": 757 }, { "epoch": 0.03313081865466148, "grad_norm": 3.5, "learning_rate": 9.973626580868527e-05, "loss": 2.2931, "step": 758 }, { "epoch": 0.03317452685869138, "grad_norm": 4.65625, "learning_rate": 9.97355607883363e-05, "loss": 1.8566, "step": 759 }, { "epoch": 0.03321823506272127, "grad_norm": 3.71875, "learning_rate": 9.97348548294049e-05, "loss": 3.2503, "step": 760 }, { "epoch": 0.03326194326675117, "grad_norm": 4.21875, "learning_rate": 9.973414793190441e-05, "loss": 2.9089, "step": 761 }, { "epoch": 0.033305651470781066, "grad_norm": 3.5, "learning_rate": 9.973344009584818e-05, "loss": 2.7956, "step": 762 }, { "epoch": 0.03334935967481096, "grad_norm": 4.0625, "learning_rate": 9.973273132124954e-05, "loss": 1.8721, "step": 763 }, { "epoch": 0.03339306787884086, "grad_norm": 3.515625, "learning_rate": 9.973202160812187e-05, "loss": 2.4582, "step": 764 }, { "epoch": 0.03343677608287075, "grad_norm": 3.109375, "learning_rate": 9.973131095647858e-05, "loss": 1.904, "step": 765 }, { "epoch": 0.033480484286900654, "grad_norm": 3.296875, "learning_rate": 9.973059936633306e-05, "loss": 2.0774, "step": 766 }, { "epoch": 0.03352419249093055, "grad_norm": 3.015625, "learning_rate": 9.972988683769877e-05, "loss": 2.5509, "step": 767 }, { "epoch": 0.03356790069496044, "grad_norm": 4.15625, "learning_rate": 9.972917337058913e-05, "loss": 2.0877, "step": 768 }, { "epoch": 0.03361160889899034, "grad_norm": 4.3125, "learning_rate": 9.972845896501761e-05, "loss": 2.6669, "step": 769 }, { "epoch": 0.033655317103020235, "grad_norm": 3.15625, "learning_rate": 9.972774362099768e-05, "loss": 2.1274, "step": 770 }, { "epoch": 0.033699025307050136, "grad_norm": 6.4375, "learning_rate": 9.972702733854286e-05, "loss": 2.6224, "step": 771 }, { "epoch": 0.03374273351108003, "grad_norm": 3.296875, "learning_rate": 9.972631011766668e-05, "loss": 2.4328, "step": 772 }, { "epoch": 0.03378644171510992, "grad_norm": 2.703125, "learning_rate": 9.972559195838263e-05, "loss": 2.3105, "step": 773 }, { "epoch": 0.033830149919139824, "grad_norm": 3.1875, "learning_rate": 9.97248728607043e-05, "loss": 1.9875, "step": 774 }, { "epoch": 0.03387385812316972, "grad_norm": 6.21875, "learning_rate": 9.972415282464524e-05, "loss": 3.3821, "step": 775 }, { "epoch": 0.03391756632719962, "grad_norm": 3.234375, "learning_rate": 9.972343185021906e-05, "loss": 2.2215, "step": 776 }, { "epoch": 0.03396127453122951, "grad_norm": 2.71875, "learning_rate": 9.972270993743934e-05, "loss": 1.8742, "step": 777 }, { "epoch": 0.034004982735259405, "grad_norm": 2.828125, "learning_rate": 9.972198708631972e-05, "loss": 2.1591, "step": 778 }, { "epoch": 0.034048690939289306, "grad_norm": 3.984375, "learning_rate": 9.972126329687384e-05, "loss": 3.2081, "step": 779 }, { "epoch": 0.0340923991433192, "grad_norm": 4.53125, "learning_rate": 9.972053856911534e-05, "loss": 1.9423, "step": 780 }, { "epoch": 0.0341361073473491, "grad_norm": 9.4375, "learning_rate": 9.971981290305792e-05, "loss": 1.6246, "step": 781 }, { "epoch": 0.03417981555137899, "grad_norm": 3.453125, "learning_rate": 9.971908629871527e-05, "loss": 2.1293, "step": 782 }, { "epoch": 0.03422352375540889, "grad_norm": 3.171875, "learning_rate": 9.971835875610109e-05, "loss": 2.006, "step": 783 }, { "epoch": 0.03426723195943879, "grad_norm": 2.859375, "learning_rate": 9.971763027522914e-05, "loss": 2.1156, "step": 784 }, { "epoch": 0.03431094016346868, "grad_norm": 4.375, "learning_rate": 9.971690085611314e-05, "loss": 3.3214, "step": 785 }, { "epoch": 0.03435464836749858, "grad_norm": 3.453125, "learning_rate": 9.971617049876684e-05, "loss": 2.8245, "step": 786 }, { "epoch": 0.034398356571528475, "grad_norm": 3.125, "learning_rate": 9.971543920320407e-05, "loss": 2.7465, "step": 787 }, { "epoch": 0.03444206477555837, "grad_norm": 3.046875, "learning_rate": 9.971470696943859e-05, "loss": 2.3266, "step": 788 }, { "epoch": 0.03448577297958827, "grad_norm": 3.203125, "learning_rate": 9.971397379748424e-05, "loss": 1.9464, "step": 789 }, { "epoch": 0.03452948118361816, "grad_norm": 3.09375, "learning_rate": 9.971323968735484e-05, "loss": 2.7519, "step": 790 }, { "epoch": 0.034573189387648064, "grad_norm": 4.4375, "learning_rate": 9.971250463906426e-05, "loss": 2.2776, "step": 791 }, { "epoch": 0.03461689759167796, "grad_norm": 3.734375, "learning_rate": 9.971176865262635e-05, "loss": 1.9055, "step": 792 }, { "epoch": 0.03466060579570786, "grad_norm": 4.65625, "learning_rate": 9.971103172805503e-05, "loss": 2.7596, "step": 793 }, { "epoch": 0.03470431399973775, "grad_norm": 3.234375, "learning_rate": 9.971029386536419e-05, "loss": 2.2475, "step": 794 }, { "epoch": 0.034748022203767645, "grad_norm": 3.265625, "learning_rate": 9.970955506456776e-05, "loss": 2.4918, "step": 795 }, { "epoch": 0.034791730407797546, "grad_norm": 5.15625, "learning_rate": 9.970881532567967e-05, "loss": 3.1343, "step": 796 }, { "epoch": 0.03483543861182744, "grad_norm": 2.875, "learning_rate": 9.970807464871387e-05, "loss": 1.9314, "step": 797 }, { "epoch": 0.03487914681585734, "grad_norm": 3.734375, "learning_rate": 9.970733303368438e-05, "loss": 2.3322, "step": 798 }, { "epoch": 0.03492285501988723, "grad_norm": 3.734375, "learning_rate": 9.970659048060515e-05, "loss": 2.1469, "step": 799 }, { "epoch": 0.03496656322391713, "grad_norm": 3.109375, "learning_rate": 9.970584698949023e-05, "loss": 2.778, "step": 800 }, { "epoch": 0.03501027142794703, "grad_norm": 5.0625, "learning_rate": 9.970510256035364e-05, "loss": 2.5041, "step": 801 }, { "epoch": 0.03505397963197692, "grad_norm": 3.203125, "learning_rate": 9.970435719320941e-05, "loss": 2.6473, "step": 802 }, { "epoch": 0.03509768783600682, "grad_norm": 5.0, "learning_rate": 9.97036108880716e-05, "loss": 2.6472, "step": 803 }, { "epoch": 0.035141396040036715, "grad_norm": 3.5, "learning_rate": 9.970286364495434e-05, "loss": 2.0429, "step": 804 }, { "epoch": 0.03518510424406661, "grad_norm": 3.6875, "learning_rate": 9.970211546387169e-05, "loss": 2.8479, "step": 805 }, { "epoch": 0.03522881244809651, "grad_norm": 3.515625, "learning_rate": 9.970136634483779e-05, "loss": 2.3924, "step": 806 }, { "epoch": 0.0352725206521264, "grad_norm": 3.546875, "learning_rate": 9.970061628786678e-05, "loss": 2.8506, "step": 807 }, { "epoch": 0.035316228856156304, "grad_norm": 2.6875, "learning_rate": 9.96998652929728e-05, "loss": 1.8322, "step": 808 }, { "epoch": 0.0353599370601862, "grad_norm": 3.53125, "learning_rate": 9.969911336017e-05, "loss": 2.0667, "step": 809 }, { "epoch": 0.03540364526421609, "grad_norm": 3.4375, "learning_rate": 9.969836048947263e-05, "loss": 2.4905, "step": 810 }, { "epoch": 0.03544735346824599, "grad_norm": 3.40625, "learning_rate": 9.969760668089486e-05, "loss": 2.3651, "step": 811 }, { "epoch": 0.035491061672275885, "grad_norm": 3.375, "learning_rate": 9.969685193445091e-05, "loss": 2.1256, "step": 812 }, { "epoch": 0.035534769876305786, "grad_norm": 4.34375, "learning_rate": 9.969609625015502e-05, "loss": 2.6686, "step": 813 }, { "epoch": 0.03557847808033568, "grad_norm": 5.28125, "learning_rate": 9.96953396280215e-05, "loss": 2.7127, "step": 814 }, { "epoch": 0.03562218628436557, "grad_norm": 2.75, "learning_rate": 9.969458206806456e-05, "loss": 1.9587, "step": 815 }, { "epoch": 0.03566589448839547, "grad_norm": 2.8125, "learning_rate": 9.969382357029856e-05, "loss": 2.139, "step": 816 }, { "epoch": 0.03570960269242537, "grad_norm": 5.84375, "learning_rate": 9.969306413473776e-05, "loss": 2.1272, "step": 817 }, { "epoch": 0.03575331089645527, "grad_norm": 3.390625, "learning_rate": 9.969230376139651e-05, "loss": 2.8055, "step": 818 }, { "epoch": 0.03579701910048516, "grad_norm": 3.984375, "learning_rate": 9.969154245028917e-05, "loss": 1.7156, "step": 819 }, { "epoch": 0.035840727304515055, "grad_norm": 6.0, "learning_rate": 9.969078020143012e-05, "loss": 2.3962, "step": 820 }, { "epoch": 0.035884435508544955, "grad_norm": 2.921875, "learning_rate": 9.969001701483372e-05, "loss": 1.9315, "step": 821 }, { "epoch": 0.03592814371257485, "grad_norm": 2.8125, "learning_rate": 9.968925289051436e-05, "loss": 2.0727, "step": 822 }, { "epoch": 0.03597185191660475, "grad_norm": 2.859375, "learning_rate": 9.96884878284865e-05, "loss": 1.8745, "step": 823 }, { "epoch": 0.03601556012063464, "grad_norm": 3.21875, "learning_rate": 9.968772182876454e-05, "loss": 2.4207, "step": 824 }, { "epoch": 0.03605926832466454, "grad_norm": 2.875, "learning_rate": 9.968695489136296e-05, "loss": 1.7213, "step": 825 }, { "epoch": 0.03610297652869444, "grad_norm": 3.359375, "learning_rate": 9.968618701629623e-05, "loss": 2.442, "step": 826 }, { "epoch": 0.03614668473272433, "grad_norm": 3.515625, "learning_rate": 9.968541820357883e-05, "loss": 2.593, "step": 827 }, { "epoch": 0.03619039293675423, "grad_norm": 3.796875, "learning_rate": 9.968464845322527e-05, "loss": 2.513, "step": 828 }, { "epoch": 0.036234101140784125, "grad_norm": 4.34375, "learning_rate": 9.96838777652501e-05, "loss": 2.9353, "step": 829 }, { "epoch": 0.03627780934481402, "grad_norm": 3.34375, "learning_rate": 9.968310613966783e-05, "loss": 2.7721, "step": 830 }, { "epoch": 0.03632151754884392, "grad_norm": 3.8125, "learning_rate": 9.968233357649303e-05, "loss": 1.9949, "step": 831 }, { "epoch": 0.03636522575287381, "grad_norm": 2.84375, "learning_rate": 9.96815600757403e-05, "loss": 1.9257, "step": 832 }, { "epoch": 0.036408933956903713, "grad_norm": 3.390625, "learning_rate": 9.968078563742421e-05, "loss": 2.6905, "step": 833 }, { "epoch": 0.03645264216093361, "grad_norm": 3.859375, "learning_rate": 9.968001026155939e-05, "loss": 1.9883, "step": 834 }, { "epoch": 0.0364963503649635, "grad_norm": 3.609375, "learning_rate": 9.967923394816049e-05, "loss": 3.019, "step": 835 }, { "epoch": 0.0365400585689934, "grad_norm": 3.296875, "learning_rate": 9.967845669724212e-05, "loss": 2.1879, "step": 836 }, { "epoch": 0.036583766773023295, "grad_norm": 3.859375, "learning_rate": 9.967767850881898e-05, "loss": 2.161, "step": 837 }, { "epoch": 0.036627474977053195, "grad_norm": 3.421875, "learning_rate": 9.967689938290574e-05, "loss": 2.8632, "step": 838 }, { "epoch": 0.03667118318108309, "grad_norm": 5.1875, "learning_rate": 9.96761193195171e-05, "loss": 2.971, "step": 839 }, { "epoch": 0.03671489138511298, "grad_norm": 2.984375, "learning_rate": 9.96753383186678e-05, "loss": 2.5535, "step": 840 }, { "epoch": 0.03675859958914288, "grad_norm": 2.890625, "learning_rate": 9.967455638037257e-05, "loss": 2.0311, "step": 841 }, { "epoch": 0.03680230779317278, "grad_norm": 2.734375, "learning_rate": 9.967377350464615e-05, "loss": 1.9492, "step": 842 }, { "epoch": 0.03684601599720268, "grad_norm": 3.078125, "learning_rate": 9.967298969150334e-05, "loss": 2.2931, "step": 843 }, { "epoch": 0.03688972420123257, "grad_norm": 2.96875, "learning_rate": 9.96722049409589e-05, "loss": 1.7512, "step": 844 }, { "epoch": 0.036933432405262465, "grad_norm": 2.96875, "learning_rate": 9.967141925302768e-05, "loss": 2.3825, "step": 845 }, { "epoch": 0.036977140609292365, "grad_norm": 3.84375, "learning_rate": 9.967063262772447e-05, "loss": 2.2133, "step": 846 }, { "epoch": 0.03702084881332226, "grad_norm": 3.046875, "learning_rate": 9.966984506506413e-05, "loss": 2.1161, "step": 847 }, { "epoch": 0.03706455701735216, "grad_norm": 3.28125, "learning_rate": 9.966905656506154e-05, "loss": 2.2078, "step": 848 }, { "epoch": 0.03710826522138205, "grad_norm": 3.625, "learning_rate": 9.966826712773155e-05, "loss": 2.5632, "step": 849 }, { "epoch": 0.03715197342541195, "grad_norm": 3.421875, "learning_rate": 9.966747675308907e-05, "loss": 2.3147, "step": 850 }, { "epoch": 0.03719568162944185, "grad_norm": 3.15625, "learning_rate": 9.966668544114902e-05, "loss": 2.1258, "step": 851 }, { "epoch": 0.03723938983347174, "grad_norm": 3.40625, "learning_rate": 9.966589319192633e-05, "loss": 2.3756, "step": 852 }, { "epoch": 0.03728309803750164, "grad_norm": 4.125, "learning_rate": 9.966510000543594e-05, "loss": 2.3031, "step": 853 }, { "epoch": 0.037326806241531535, "grad_norm": 2.8125, "learning_rate": 9.966430588169285e-05, "loss": 2.0828, "step": 854 }, { "epoch": 0.03737051444556143, "grad_norm": 3.03125, "learning_rate": 9.966351082071201e-05, "loss": 2.4494, "step": 855 }, { "epoch": 0.03741422264959133, "grad_norm": 3.265625, "learning_rate": 9.966271482250845e-05, "loss": 2.3533, "step": 856 }, { "epoch": 0.03745793085362122, "grad_norm": 3.265625, "learning_rate": 9.966191788709716e-05, "loss": 3.1356, "step": 857 }, { "epoch": 0.03750163905765112, "grad_norm": 2.796875, "learning_rate": 9.966112001449321e-05, "loss": 2.1622, "step": 858 }, { "epoch": 0.03754534726168102, "grad_norm": 4.28125, "learning_rate": 9.966032120471165e-05, "loss": 2.6896, "step": 859 }, { "epoch": 0.03758905546571091, "grad_norm": 4.03125, "learning_rate": 9.965952145776754e-05, "loss": 2.0189, "step": 860 }, { "epoch": 0.03763276366974081, "grad_norm": 3.40625, "learning_rate": 9.9658720773676e-05, "loss": 2.3372, "step": 861 }, { "epoch": 0.037676471873770705, "grad_norm": 3.28125, "learning_rate": 9.96579191524521e-05, "loss": 2.0823, "step": 862 }, { "epoch": 0.037720180077800605, "grad_norm": 3.296875, "learning_rate": 9.965711659411102e-05, "loss": 2.822, "step": 863 }, { "epoch": 0.0377638882818305, "grad_norm": 3.015625, "learning_rate": 9.965631309866788e-05, "loss": 2.695, "step": 864 }, { "epoch": 0.0378075964858604, "grad_norm": 3.234375, "learning_rate": 9.965550866613782e-05, "loss": 2.7255, "step": 865 }, { "epoch": 0.03785130468989029, "grad_norm": 3.25, "learning_rate": 9.965470329653604e-05, "loss": 2.4268, "step": 866 }, { "epoch": 0.03789501289392019, "grad_norm": 4.78125, "learning_rate": 9.965389698987775e-05, "loss": 2.2879, "step": 867 }, { "epoch": 0.03793872109795009, "grad_norm": 3.34375, "learning_rate": 9.965308974617816e-05, "loss": 2.4958, "step": 868 }, { "epoch": 0.03798242930197998, "grad_norm": 3.34375, "learning_rate": 9.96522815654525e-05, "loss": 2.387, "step": 869 }, { "epoch": 0.03802613750600988, "grad_norm": 2.703125, "learning_rate": 9.965147244771601e-05, "loss": 1.918, "step": 870 }, { "epoch": 0.038069845710039775, "grad_norm": 3.25, "learning_rate": 9.965066239298398e-05, "loss": 2.7065, "step": 871 }, { "epoch": 0.03811355391406967, "grad_norm": 3.484375, "learning_rate": 9.964985140127168e-05, "loss": 2.3908, "step": 872 }, { "epoch": 0.03815726211809957, "grad_norm": 2.71875, "learning_rate": 9.964903947259443e-05, "loss": 1.9557, "step": 873 }, { "epoch": 0.03820097032212946, "grad_norm": 2.4375, "learning_rate": 9.964822660696753e-05, "loss": 2.1748, "step": 874 }, { "epoch": 0.03824467852615936, "grad_norm": 3.96875, "learning_rate": 9.964741280440634e-05, "loss": 1.9483, "step": 875 }, { "epoch": 0.03828838673018926, "grad_norm": 8.25, "learning_rate": 9.964659806492624e-05, "loss": 4.1064, "step": 876 }, { "epoch": 0.03833209493421915, "grad_norm": 4.625, "learning_rate": 9.964578238854254e-05, "loss": 2.6809, "step": 877 }, { "epoch": 0.03837580313824905, "grad_norm": 3.859375, "learning_rate": 9.964496577527069e-05, "loss": 1.873, "step": 878 }, { "epoch": 0.038419511342278945, "grad_norm": 4.875, "learning_rate": 9.964414822512607e-05, "loss": 2.5123, "step": 879 }, { "epoch": 0.038463219546308845, "grad_norm": 3.203125, "learning_rate": 9.964332973812414e-05, "loss": 2.3094, "step": 880 }, { "epoch": 0.03850692775033874, "grad_norm": 5.0625, "learning_rate": 9.96425103142803e-05, "loss": 1.6306, "step": 881 }, { "epoch": 0.03855063595436863, "grad_norm": 2.984375, "learning_rate": 9.964168995361005e-05, "loss": 2.1723, "step": 882 }, { "epoch": 0.03859434415839853, "grad_norm": 3.046875, "learning_rate": 9.964086865612887e-05, "loss": 2.1385, "step": 883 }, { "epoch": 0.03863805236242843, "grad_norm": 3.1875, "learning_rate": 9.964004642185223e-05, "loss": 2.2899, "step": 884 }, { "epoch": 0.03868176056645833, "grad_norm": 3.578125, "learning_rate": 9.963922325079567e-05, "loss": 2.6071, "step": 885 }, { "epoch": 0.03872546877048822, "grad_norm": 2.953125, "learning_rate": 9.963839914297473e-05, "loss": 2.1812, "step": 886 }, { "epoch": 0.038769176974518114, "grad_norm": 3.34375, "learning_rate": 9.963757409840495e-05, "loss": 2.4299, "step": 887 }, { "epoch": 0.038812885178548015, "grad_norm": 3.421875, "learning_rate": 9.963674811710191e-05, "loss": 2.3144, "step": 888 }, { "epoch": 0.03885659338257791, "grad_norm": 2.953125, "learning_rate": 9.963592119908119e-05, "loss": 2.3799, "step": 889 }, { "epoch": 0.03890030158660781, "grad_norm": 3.078125, "learning_rate": 9.963509334435838e-05, "loss": 2.4469, "step": 890 }, { "epoch": 0.0389440097906377, "grad_norm": 3.875, "learning_rate": 9.963426455294913e-05, "loss": 2.425, "step": 891 }, { "epoch": 0.038987717994667596, "grad_norm": 3.484375, "learning_rate": 9.963343482486906e-05, "loss": 2.833, "step": 892 }, { "epoch": 0.0390314261986975, "grad_norm": 4.71875, "learning_rate": 9.963260416013385e-05, "loss": 2.5376, "step": 893 }, { "epoch": 0.03907513440272739, "grad_norm": 3.96875, "learning_rate": 9.963177255875915e-05, "loss": 2.5258, "step": 894 }, { "epoch": 0.03911884260675729, "grad_norm": 7.4375, "learning_rate": 9.963094002076066e-05, "loss": 3.2763, "step": 895 }, { "epoch": 0.039162550810787185, "grad_norm": 4.96875, "learning_rate": 9.963010654615411e-05, "loss": 3.1825, "step": 896 }, { "epoch": 0.03920625901481708, "grad_norm": 3.21875, "learning_rate": 9.962927213495521e-05, "loss": 2.2391, "step": 897 }, { "epoch": 0.03924996721884698, "grad_norm": 3.125, "learning_rate": 9.962843678717972e-05, "loss": 2.2213, "step": 898 }, { "epoch": 0.03929367542287687, "grad_norm": 3.6875, "learning_rate": 9.96276005028434e-05, "loss": 2.3569, "step": 899 }, { "epoch": 0.03933738362690677, "grad_norm": 3.015625, "learning_rate": 9.962676328196202e-05, "loss": 1.9611, "step": 900 }, { "epoch": 0.03938109183093667, "grad_norm": 2.53125, "learning_rate": 9.962592512455138e-05, "loss": 1.869, "step": 901 }, { "epoch": 0.03942480003496656, "grad_norm": 3.640625, "learning_rate": 9.962508603062732e-05, "loss": 2.3317, "step": 902 }, { "epoch": 0.03946850823899646, "grad_norm": 3.03125, "learning_rate": 9.962424600020567e-05, "loss": 1.9824, "step": 903 }, { "epoch": 0.039512216443026354, "grad_norm": 3.015625, "learning_rate": 9.962340503330226e-05, "loss": 2.6484, "step": 904 }, { "epoch": 0.039555924647056255, "grad_norm": 2.96875, "learning_rate": 9.962256312993297e-05, "loss": 2.3466, "step": 905 }, { "epoch": 0.03959963285108615, "grad_norm": 2.890625, "learning_rate": 9.96217202901137e-05, "loss": 2.0264, "step": 906 }, { "epoch": 0.03964334105511604, "grad_norm": 3.328125, "learning_rate": 9.962087651386034e-05, "loss": 2.5601, "step": 907 }, { "epoch": 0.03968704925914594, "grad_norm": 3.265625, "learning_rate": 9.962003180118882e-05, "loss": 2.2833, "step": 908 }, { "epoch": 0.039730757463175836, "grad_norm": 3.15625, "learning_rate": 9.96191861521151e-05, "loss": 2.6963, "step": 909 }, { "epoch": 0.03977446566720574, "grad_norm": 2.71875, "learning_rate": 9.961833956665509e-05, "loss": 2.1049, "step": 910 }, { "epoch": 0.03981817387123563, "grad_norm": 3.171875, "learning_rate": 9.961749204482483e-05, "loss": 3.2084, "step": 911 }, { "epoch": 0.039861882075265524, "grad_norm": 2.75, "learning_rate": 9.961664358664024e-05, "loss": 2.6011, "step": 912 }, { "epoch": 0.039905590279295425, "grad_norm": 2.796875, "learning_rate": 9.961579419211741e-05, "loss": 2.6555, "step": 913 }, { "epoch": 0.03994929848332532, "grad_norm": 3.09375, "learning_rate": 9.961494386127231e-05, "loss": 1.9832, "step": 914 }, { "epoch": 0.03999300668735522, "grad_norm": 4.5, "learning_rate": 9.961409259412102e-05, "loss": 2.9627, "step": 915 }, { "epoch": 0.04003671489138511, "grad_norm": 5.40625, "learning_rate": 9.961324039067958e-05, "loss": 2.9962, "step": 916 }, { "epoch": 0.040080423095415006, "grad_norm": 2.984375, "learning_rate": 9.961238725096409e-05, "loss": 2.2526, "step": 917 }, { "epoch": 0.04012413129944491, "grad_norm": 3.828125, "learning_rate": 9.961153317499065e-05, "loss": 2.5092, "step": 918 }, { "epoch": 0.0401678395034748, "grad_norm": 2.828125, "learning_rate": 9.961067816277536e-05, "loss": 1.9513, "step": 919 }, { "epoch": 0.0402115477075047, "grad_norm": 3.734375, "learning_rate": 9.960982221433439e-05, "loss": 2.3512, "step": 920 }, { "epoch": 0.040255255911534595, "grad_norm": 3.15625, "learning_rate": 9.960896532968385e-05, "loss": 2.5303, "step": 921 }, { "epoch": 0.04029896411556449, "grad_norm": 2.84375, "learning_rate": 9.960810750883995e-05, "loss": 1.8781, "step": 922 }, { "epoch": 0.04034267231959439, "grad_norm": 2.984375, "learning_rate": 9.960724875181885e-05, "loss": 2.3986, "step": 923 }, { "epoch": 0.04038638052362428, "grad_norm": 2.65625, "learning_rate": 9.960638905863676e-05, "loss": 2.0327, "step": 924 }, { "epoch": 0.04043008872765418, "grad_norm": 6.15625, "learning_rate": 9.960552842930992e-05, "loss": 1.6378, "step": 925 }, { "epoch": 0.040473796931684076, "grad_norm": 9.0, "learning_rate": 9.960466686385456e-05, "loss": 2.1269, "step": 926 }, { "epoch": 0.04051750513571397, "grad_norm": 2.90625, "learning_rate": 9.960380436228693e-05, "loss": 2.3509, "step": 927 }, { "epoch": 0.04056121333974387, "grad_norm": 3.109375, "learning_rate": 9.960294092462332e-05, "loss": 2.1872, "step": 928 }, { "epoch": 0.040604921543773764, "grad_norm": 3.828125, "learning_rate": 9.960207655088003e-05, "loss": 2.3827, "step": 929 }, { "epoch": 0.040648629747803665, "grad_norm": 3.328125, "learning_rate": 9.960121124107336e-05, "loss": 1.9355, "step": 930 }, { "epoch": 0.04069233795183356, "grad_norm": 3.3125, "learning_rate": 9.960034499521964e-05, "loss": 1.8716, "step": 931 }, { "epoch": 0.04073604615586346, "grad_norm": 3.1875, "learning_rate": 9.959947781333522e-05, "loss": 1.9923, "step": 932 }, { "epoch": 0.04077975435989335, "grad_norm": 3.0, "learning_rate": 9.959860969543648e-05, "loss": 2.2197, "step": 933 }, { "epoch": 0.040823462563923246, "grad_norm": 5.15625, "learning_rate": 9.959774064153977e-05, "loss": 2.2632, "step": 934 }, { "epoch": 0.04086717076795315, "grad_norm": 2.796875, "learning_rate": 9.959687065166151e-05, "loss": 2.3649, "step": 935 }, { "epoch": 0.04091087897198304, "grad_norm": 3.890625, "learning_rate": 9.959599972581812e-05, "loss": 2.6295, "step": 936 }, { "epoch": 0.04095458717601294, "grad_norm": 3.6875, "learning_rate": 9.959512786402603e-05, "loss": 2.1709, "step": 937 }, { "epoch": 0.040998295380042835, "grad_norm": 3.9375, "learning_rate": 9.95942550663017e-05, "loss": 2.1453, "step": 938 }, { "epoch": 0.04104200358407273, "grad_norm": 3.578125, "learning_rate": 9.959338133266158e-05, "loss": 2.5455, "step": 939 }, { "epoch": 0.04108571178810263, "grad_norm": 2.78125, "learning_rate": 9.959250666312219e-05, "loss": 2.0837, "step": 940 }, { "epoch": 0.04112941999213252, "grad_norm": 2.984375, "learning_rate": 9.959163105770002e-05, "loss": 2.2131, "step": 941 }, { "epoch": 0.04117312819616242, "grad_norm": 2.65625, "learning_rate": 9.959075451641159e-05, "loss": 2.1649, "step": 942 }, { "epoch": 0.04121683640019232, "grad_norm": 3.5, "learning_rate": 9.958987703927345e-05, "loss": 2.245, "step": 943 }, { "epoch": 0.04126054460422221, "grad_norm": 3.0, "learning_rate": 9.958899862630216e-05, "loss": 2.1328, "step": 944 }, { "epoch": 0.04130425280825211, "grad_norm": 3.25, "learning_rate": 9.958811927751428e-05, "loss": 2.9265, "step": 945 }, { "epoch": 0.041347961012282004, "grad_norm": 2.890625, "learning_rate": 9.958723899292641e-05, "loss": 2.4016, "step": 946 }, { "epoch": 0.041391669216311905, "grad_norm": 3.015625, "learning_rate": 9.958635777255518e-05, "loss": 2.2132, "step": 947 }, { "epoch": 0.0414353774203418, "grad_norm": 2.484375, "learning_rate": 9.958547561641722e-05, "loss": 2.1233, "step": 948 }, { "epoch": 0.04147908562437169, "grad_norm": 7.8125, "learning_rate": 9.958459252452916e-05, "loss": 2.8399, "step": 949 }, { "epoch": 0.04152279382840159, "grad_norm": 3.1875, "learning_rate": 9.958370849690767e-05, "loss": 2.7092, "step": 950 }, { "epoch": 0.041566502032431486, "grad_norm": 3.296875, "learning_rate": 9.958282353356943e-05, "loss": 2.0668, "step": 951 }, { "epoch": 0.04161021023646139, "grad_norm": 2.734375, "learning_rate": 9.958193763453116e-05, "loss": 2.2635, "step": 952 }, { "epoch": 0.04165391844049128, "grad_norm": 3.046875, "learning_rate": 9.958105079980954e-05, "loss": 2.0128, "step": 953 }, { "epoch": 0.041697626644521174, "grad_norm": 2.6875, "learning_rate": 9.958016302942135e-05, "loss": 2.1925, "step": 954 }, { "epoch": 0.041741334848551075, "grad_norm": 3.25, "learning_rate": 9.957927432338332e-05, "loss": 2.0548, "step": 955 }, { "epoch": 0.04178504305258097, "grad_norm": 3.421875, "learning_rate": 9.957838468171223e-05, "loss": 2.3297, "step": 956 }, { "epoch": 0.04182875125661087, "grad_norm": 2.921875, "learning_rate": 9.957749410442485e-05, "loss": 2.3113, "step": 957 }, { "epoch": 0.04187245946064076, "grad_norm": 2.734375, "learning_rate": 9.9576602591538e-05, "loss": 2.4494, "step": 958 }, { "epoch": 0.041916167664670656, "grad_norm": 2.890625, "learning_rate": 9.957571014306852e-05, "loss": 2.2834, "step": 959 }, { "epoch": 0.04195987586870056, "grad_norm": 3.359375, "learning_rate": 9.957481675903321e-05, "loss": 1.9757, "step": 960 }, { "epoch": 0.04200358407273045, "grad_norm": 3.296875, "learning_rate": 9.957392243944898e-05, "loss": 1.9881, "step": 961 }, { "epoch": 0.04204729227676035, "grad_norm": 2.890625, "learning_rate": 9.957302718433266e-05, "loss": 2.0498, "step": 962 }, { "epoch": 0.042091000480790244, "grad_norm": 2.546875, "learning_rate": 9.957213099370117e-05, "loss": 2.2652, "step": 963 }, { "epoch": 0.04213470868482014, "grad_norm": 3.234375, "learning_rate": 9.957123386757144e-05, "loss": 2.2596, "step": 964 }, { "epoch": 0.04217841688885004, "grad_norm": 3.234375, "learning_rate": 9.957033580596036e-05, "loss": 2.7782, "step": 965 }, { "epoch": 0.04222212509287993, "grad_norm": 2.796875, "learning_rate": 9.95694368088849e-05, "loss": 2.0644, "step": 966 }, { "epoch": 0.04226583329690983, "grad_norm": 2.5625, "learning_rate": 9.956853687636203e-05, "loss": 2.149, "step": 967 }, { "epoch": 0.042309541500939726, "grad_norm": 3.3125, "learning_rate": 9.956763600840873e-05, "loss": 2.8036, "step": 968 }, { "epoch": 0.04235324970496962, "grad_norm": 3.125, "learning_rate": 9.9566734205042e-05, "loss": 1.844, "step": 969 }, { "epoch": 0.04239695790899952, "grad_norm": 3.4375, "learning_rate": 9.956583146627883e-05, "loss": 3.511, "step": 970 }, { "epoch": 0.042440666113029414, "grad_norm": 3.296875, "learning_rate": 9.956492779213629e-05, "loss": 2.1949, "step": 971 }, { "epoch": 0.042484374317059315, "grad_norm": 3.828125, "learning_rate": 9.956402318263143e-05, "loss": 2.4102, "step": 972 }, { "epoch": 0.04252808252108921, "grad_norm": 3.296875, "learning_rate": 9.95631176377813e-05, "loss": 2.4406, "step": 973 }, { "epoch": 0.0425717907251191, "grad_norm": 3.234375, "learning_rate": 9.956221115760302e-05, "loss": 2.7419, "step": 974 }, { "epoch": 0.042615498929149, "grad_norm": 3.328125, "learning_rate": 9.956130374211369e-05, "loss": 2.8455, "step": 975 }, { "epoch": 0.042659207133178896, "grad_norm": 3.171875, "learning_rate": 9.956039539133042e-05, "loss": 2.433, "step": 976 }, { "epoch": 0.0427029153372088, "grad_norm": 3.078125, "learning_rate": 9.955948610527035e-05, "loss": 2.2266, "step": 977 }, { "epoch": 0.04274662354123869, "grad_norm": 2.84375, "learning_rate": 9.955857588395065e-05, "loss": 2.2863, "step": 978 }, { "epoch": 0.042790331745268584, "grad_norm": 3.21875, "learning_rate": 9.955766472738847e-05, "loss": 1.9107, "step": 979 }, { "epoch": 0.042834039949298484, "grad_norm": 4.53125, "learning_rate": 9.955675263560107e-05, "loss": 1.9685, "step": 980 }, { "epoch": 0.04287774815332838, "grad_norm": 2.71875, "learning_rate": 9.95558396086056e-05, "loss": 1.9, "step": 981 }, { "epoch": 0.04292145635735828, "grad_norm": 2.65625, "learning_rate": 9.955492564641931e-05, "loss": 1.9955, "step": 982 }, { "epoch": 0.04296516456138817, "grad_norm": 3.203125, "learning_rate": 9.955401074905945e-05, "loss": 2.3646, "step": 983 }, { "epoch": 0.043008872765418066, "grad_norm": 2.84375, "learning_rate": 9.95530949165433e-05, "loss": 1.7669, "step": 984 }, { "epoch": 0.043052580969447966, "grad_norm": 3.125, "learning_rate": 9.955217814888811e-05, "loss": 2.2699, "step": 985 }, { "epoch": 0.04309628917347786, "grad_norm": 6.0625, "learning_rate": 9.955126044611121e-05, "loss": 2.6444, "step": 986 }, { "epoch": 0.04313999737750776, "grad_norm": 3.015625, "learning_rate": 9.955034180822988e-05, "loss": 2.1632, "step": 987 }, { "epoch": 0.043183705581537654, "grad_norm": 2.96875, "learning_rate": 9.954942223526152e-05, "loss": 1.9997, "step": 988 }, { "epoch": 0.04322741378556755, "grad_norm": 2.859375, "learning_rate": 9.954850172722344e-05, "loss": 2.6184, "step": 989 }, { "epoch": 0.04327112198959745, "grad_norm": 3.515625, "learning_rate": 9.9547580284133e-05, "loss": 2.7852, "step": 990 }, { "epoch": 0.04331483019362734, "grad_norm": 3.609375, "learning_rate": 9.954665790600761e-05, "loss": 2.9001, "step": 991 }, { "epoch": 0.04335853839765724, "grad_norm": 5.5625, "learning_rate": 9.954573459286468e-05, "loss": 2.6454, "step": 992 }, { "epoch": 0.043402246601687136, "grad_norm": 3.96875, "learning_rate": 9.954481034472163e-05, "loss": 2.3826, "step": 993 }, { "epoch": 0.04344595480571703, "grad_norm": 2.9375, "learning_rate": 9.95438851615959e-05, "loss": 2.5778, "step": 994 }, { "epoch": 0.04348966300974693, "grad_norm": 5.34375, "learning_rate": 9.954295904350495e-05, "loss": 2.5694, "step": 995 }, { "epoch": 0.043533371213776824, "grad_norm": 3.703125, "learning_rate": 9.954203199046624e-05, "loss": 2.0787, "step": 996 }, { "epoch": 0.043577079417806724, "grad_norm": 3.203125, "learning_rate": 9.95411040024973e-05, "loss": 2.1673, "step": 997 }, { "epoch": 0.04362078762183662, "grad_norm": 2.9375, "learning_rate": 9.954017507961561e-05, "loss": 2.8185, "step": 998 }, { "epoch": 0.04366449582586651, "grad_norm": 3.828125, "learning_rate": 9.953924522183872e-05, "loss": 2.495, "step": 999 }, { "epoch": 0.04370820402989641, "grad_norm": 3.140625, "learning_rate": 9.953831442918418e-05, "loss": 1.975, "step": 1000 }, { "epoch": 0.043751912233926306, "grad_norm": 3.375, "learning_rate": 9.953738270166954e-05, "loss": 2.072, "step": 1001 }, { "epoch": 0.043795620437956206, "grad_norm": 2.609375, "learning_rate": 9.953645003931239e-05, "loss": 2.2948, "step": 1002 }, { "epoch": 0.0438393286419861, "grad_norm": 2.796875, "learning_rate": 9.953551644213033e-05, "loss": 2.4571, "step": 1003 }, { "epoch": 0.043883036846016, "grad_norm": 2.96875, "learning_rate": 9.953458191014098e-05, "loss": 2.5195, "step": 1004 }, { "epoch": 0.043926745050045894, "grad_norm": 2.796875, "learning_rate": 9.953364644336199e-05, "loss": 2.5952, "step": 1005 }, { "epoch": 0.04397045325407579, "grad_norm": 2.734375, "learning_rate": 9.953271004181097e-05, "loss": 2.1683, "step": 1006 }, { "epoch": 0.04401416145810569, "grad_norm": 2.765625, "learning_rate": 9.953177270550564e-05, "loss": 1.818, "step": 1007 }, { "epoch": 0.04405786966213558, "grad_norm": 3.375, "learning_rate": 9.953083443446366e-05, "loss": 2.5001, "step": 1008 }, { "epoch": 0.04410157786616548, "grad_norm": 2.640625, "learning_rate": 9.952989522870275e-05, "loss": 2.2333, "step": 1009 }, { "epoch": 0.044145286070195376, "grad_norm": 4.1875, "learning_rate": 9.952895508824063e-05, "loss": 2.1663, "step": 1010 }, { "epoch": 0.04418899427422527, "grad_norm": 16.25, "learning_rate": 9.952801401309503e-05, "loss": 2.8942, "step": 1011 }, { "epoch": 0.04423270247825517, "grad_norm": 3.109375, "learning_rate": 9.952707200328374e-05, "loss": 1.8845, "step": 1012 }, { "epoch": 0.044276410682285064, "grad_norm": 2.84375, "learning_rate": 9.952612905882451e-05, "loss": 2.5532, "step": 1013 }, { "epoch": 0.044320118886314964, "grad_norm": 2.84375, "learning_rate": 9.952518517973515e-05, "loss": 2.4217, "step": 1014 }, { "epoch": 0.04436382709034486, "grad_norm": 3.140625, "learning_rate": 9.952424036603345e-05, "loss": 1.7717, "step": 1015 }, { "epoch": 0.04440753529437475, "grad_norm": 2.5625, "learning_rate": 9.952329461773726e-05, "loss": 2.0673, "step": 1016 }, { "epoch": 0.04445124349840465, "grad_norm": 2.90625, "learning_rate": 9.952234793486443e-05, "loss": 1.9221, "step": 1017 }, { "epoch": 0.044494951702434546, "grad_norm": 2.953125, "learning_rate": 9.95214003174328e-05, "loss": 2.2079, "step": 1018 }, { "epoch": 0.044538659906464446, "grad_norm": 3.703125, "learning_rate": 9.95204517654603e-05, "loss": 2.5225, "step": 1019 }, { "epoch": 0.04458236811049434, "grad_norm": 3.109375, "learning_rate": 9.951950227896478e-05, "loss": 2.343, "step": 1020 }, { "epoch": 0.044626076314524234, "grad_norm": 7.15625, "learning_rate": 9.95185518579642e-05, "loss": 2.4092, "step": 1021 }, { "epoch": 0.044669784518554134, "grad_norm": 3.75, "learning_rate": 9.951760050247646e-05, "loss": 2.3665, "step": 1022 }, { "epoch": 0.04471349272258403, "grad_norm": 2.9375, "learning_rate": 9.951664821251952e-05, "loss": 2.2941, "step": 1023 }, { "epoch": 0.04475720092661393, "grad_norm": 3.078125, "learning_rate": 9.951569498811137e-05, "loss": 2.8114, "step": 1024 }, { "epoch": 0.04480090913064382, "grad_norm": 3.15625, "learning_rate": 9.951474082927e-05, "loss": 2.7321, "step": 1025 }, { "epoch": 0.044844617334673716, "grad_norm": 3.0, "learning_rate": 9.951378573601338e-05, "loss": 2.2982, "step": 1026 }, { "epoch": 0.044888325538703616, "grad_norm": 3.25, "learning_rate": 9.951282970835957e-05, "loss": 2.0281, "step": 1027 }, { "epoch": 0.04493203374273351, "grad_norm": 3.015625, "learning_rate": 9.951187274632661e-05, "loss": 2.1427, "step": 1028 }, { "epoch": 0.04497574194676341, "grad_norm": 2.78125, "learning_rate": 9.951091484993256e-05, "loss": 2.1392, "step": 1029 }, { "epoch": 0.045019450150793304, "grad_norm": 2.78125, "learning_rate": 9.950995601919546e-05, "loss": 2.1285, "step": 1030 }, { "epoch": 0.0450631583548232, "grad_norm": 3.15625, "learning_rate": 9.950899625413345e-05, "loss": 1.7744, "step": 1031 }, { "epoch": 0.0451068665588531, "grad_norm": 4.875, "learning_rate": 9.950803555476463e-05, "loss": 3.7282, "step": 1032 }, { "epoch": 0.04515057476288299, "grad_norm": 3.234375, "learning_rate": 9.950707392110709e-05, "loss": 3.1688, "step": 1033 }, { "epoch": 0.04519428296691289, "grad_norm": 3.09375, "learning_rate": 9.950611135317904e-05, "loss": 1.9093, "step": 1034 }, { "epoch": 0.045237991170942786, "grad_norm": 3.1875, "learning_rate": 9.95051478509986e-05, "loss": 2.6153, "step": 1035 }, { "epoch": 0.04528169937497268, "grad_norm": 2.90625, "learning_rate": 9.950418341458398e-05, "loss": 2.0785, "step": 1036 }, { "epoch": 0.04532540757900258, "grad_norm": 4.96875, "learning_rate": 9.950321804395338e-05, "loss": 2.2215, "step": 1037 }, { "epoch": 0.045369115783032474, "grad_norm": 3.625, "learning_rate": 9.950225173912499e-05, "loss": 3.471, "step": 1038 }, { "epoch": 0.045412823987062374, "grad_norm": 4.3125, "learning_rate": 9.950128450011706e-05, "loss": 2.7834, "step": 1039 }, { "epoch": 0.04545653219109227, "grad_norm": 2.953125, "learning_rate": 9.950031632694785e-05, "loss": 2.182, "step": 1040 }, { "epoch": 0.04550024039512216, "grad_norm": 2.90625, "learning_rate": 9.949934721963563e-05, "loss": 2.1175, "step": 1041 }, { "epoch": 0.04554394859915206, "grad_norm": 3.28125, "learning_rate": 9.949837717819868e-05, "loss": 2.6425, "step": 1042 }, { "epoch": 0.045587656803181956, "grad_norm": 2.828125, "learning_rate": 9.949740620265532e-05, "loss": 2.0681, "step": 1043 }, { "epoch": 0.045631365007211856, "grad_norm": 3.96875, "learning_rate": 9.949643429302386e-05, "loss": 3.4625, "step": 1044 }, { "epoch": 0.04567507321124175, "grad_norm": 2.828125, "learning_rate": 9.949546144932265e-05, "loss": 1.965, "step": 1045 }, { "epoch": 0.04571878141527164, "grad_norm": 3.484375, "learning_rate": 9.949448767157003e-05, "loss": 2.3129, "step": 1046 }, { "epoch": 0.045762489619301544, "grad_norm": 3.34375, "learning_rate": 9.949351295978441e-05, "loss": 1.81, "step": 1047 }, { "epoch": 0.04580619782333144, "grad_norm": 3.78125, "learning_rate": 9.949253731398416e-05, "loss": 2.5846, "step": 1048 }, { "epoch": 0.04584990602736134, "grad_norm": 4.25, "learning_rate": 9.949156073418769e-05, "loss": 2.8508, "step": 1049 }, { "epoch": 0.04589361423139123, "grad_norm": 2.75, "learning_rate": 9.949058322041345e-05, "loss": 2.2982, "step": 1050 }, { "epoch": 0.045937322435421125, "grad_norm": 3.65625, "learning_rate": 9.948960477267986e-05, "loss": 3.0965, "step": 1051 }, { "epoch": 0.045981030639451026, "grad_norm": 2.578125, "learning_rate": 9.948862539100541e-05, "loss": 2.16, "step": 1052 }, { "epoch": 0.04602473884348092, "grad_norm": 3.421875, "learning_rate": 9.948764507540858e-05, "loss": 2.1271, "step": 1053 }, { "epoch": 0.04606844704751082, "grad_norm": 2.875, "learning_rate": 9.948666382590785e-05, "loss": 2.0429, "step": 1054 }, { "epoch": 0.046112155251540714, "grad_norm": 2.6875, "learning_rate": 9.948568164252174e-05, "loss": 2.3033, "step": 1055 }, { "epoch": 0.04615586345557061, "grad_norm": 7.3125, "learning_rate": 9.948469852526881e-05, "loss": 2.4171, "step": 1056 }, { "epoch": 0.04619957165960051, "grad_norm": 3.5, "learning_rate": 9.948371447416758e-05, "loss": 2.5804, "step": 1057 }, { "epoch": 0.0462432798636304, "grad_norm": 2.78125, "learning_rate": 9.948272948923666e-05, "loss": 2.2676, "step": 1058 }, { "epoch": 0.0462869880676603, "grad_norm": 2.984375, "learning_rate": 9.94817435704946e-05, "loss": 2.327, "step": 1059 }, { "epoch": 0.046330696271690196, "grad_norm": 3.359375, "learning_rate": 9.948075671796004e-05, "loss": 1.9911, "step": 1060 }, { "epoch": 0.04637440447572009, "grad_norm": 3.71875, "learning_rate": 9.947976893165156e-05, "loss": 2.0437, "step": 1061 }, { "epoch": 0.04641811267974999, "grad_norm": 3.0, "learning_rate": 9.947878021158784e-05, "loss": 2.4868, "step": 1062 }, { "epoch": 0.046461820883779884, "grad_norm": 3.5625, "learning_rate": 9.947779055778752e-05, "loss": 2.313, "step": 1063 }, { "epoch": 0.046505529087809784, "grad_norm": 2.671875, "learning_rate": 9.947679997026929e-05, "loss": 2.0247, "step": 1064 }, { "epoch": 0.04654923729183968, "grad_norm": 12.3125, "learning_rate": 9.947580844905182e-05, "loss": 3.1176, "step": 1065 }, { "epoch": 0.04659294549586957, "grad_norm": 3.015625, "learning_rate": 9.947481599415384e-05, "loss": 2.522, "step": 1066 }, { "epoch": 0.04663665369989947, "grad_norm": 3.5, "learning_rate": 9.947382260559408e-05, "loss": 2.9242, "step": 1067 }, { "epoch": 0.046680361903929365, "grad_norm": 2.84375, "learning_rate": 9.947282828339128e-05, "loss": 1.9872, "step": 1068 }, { "epoch": 0.046724070107959266, "grad_norm": 2.828125, "learning_rate": 9.947183302756422e-05, "loss": 2.146, "step": 1069 }, { "epoch": 0.04676777831198916, "grad_norm": 2.921875, "learning_rate": 9.947083683813165e-05, "loss": 2.3677, "step": 1070 }, { "epoch": 0.04681148651601906, "grad_norm": 3.09375, "learning_rate": 9.946983971511239e-05, "loss": 2.5506, "step": 1071 }, { "epoch": 0.046855194720048954, "grad_norm": 2.703125, "learning_rate": 9.946884165852525e-05, "loss": 2.0168, "step": 1072 }, { "epoch": 0.04689890292407885, "grad_norm": 2.625, "learning_rate": 9.946784266838909e-05, "loss": 1.9957, "step": 1073 }, { "epoch": 0.04694261112810875, "grad_norm": 2.6875, "learning_rate": 9.94668427447227e-05, "loss": 2.315, "step": 1074 }, { "epoch": 0.04698631933213864, "grad_norm": 3.109375, "learning_rate": 9.946584188754504e-05, "loss": 2.1152, "step": 1075 }, { "epoch": 0.04703002753616854, "grad_norm": 2.703125, "learning_rate": 9.946484009687493e-05, "loss": 2.0469, "step": 1076 }, { "epoch": 0.047073735740198436, "grad_norm": 4.21875, "learning_rate": 9.946383737273129e-05, "loss": 2.7682, "step": 1077 }, { "epoch": 0.04711744394422833, "grad_norm": 3.90625, "learning_rate": 9.946283371513305e-05, "loss": 3.337, "step": 1078 }, { "epoch": 0.04716115214825823, "grad_norm": 3.140625, "learning_rate": 9.946182912409915e-05, "loss": 2.2733, "step": 1079 }, { "epoch": 0.047204860352288124, "grad_norm": 3.578125, "learning_rate": 9.946082359964855e-05, "loss": 2.0077, "step": 1080 }, { "epoch": 0.047248568556318024, "grad_norm": 3.046875, "learning_rate": 9.945981714180021e-05, "loss": 2.0198, "step": 1081 }, { "epoch": 0.04729227676034792, "grad_norm": 2.4375, "learning_rate": 9.945880975057315e-05, "loss": 1.9136, "step": 1082 }, { "epoch": 0.04733598496437781, "grad_norm": 3.8125, "learning_rate": 9.945780142598636e-05, "loss": 2.7276, "step": 1083 }, { "epoch": 0.04737969316840771, "grad_norm": 3.875, "learning_rate": 9.945679216805887e-05, "loss": 2.2242, "step": 1084 }, { "epoch": 0.047423401372437606, "grad_norm": 3.0, "learning_rate": 9.945578197680974e-05, "loss": 2.5395, "step": 1085 }, { "epoch": 0.047467109576467506, "grad_norm": 2.75, "learning_rate": 9.9454770852258e-05, "loss": 2.159, "step": 1086 }, { "epoch": 0.0475108177804974, "grad_norm": 3.25, "learning_rate": 9.945375879442277e-05, "loss": 1.941, "step": 1087 }, { "epoch": 0.04755452598452729, "grad_norm": 4.84375, "learning_rate": 9.945274580332316e-05, "loss": 2.7804, "step": 1088 }, { "epoch": 0.047598234188557194, "grad_norm": 2.96875, "learning_rate": 9.945173187897823e-05, "loss": 2.6826, "step": 1089 }, { "epoch": 0.04764194239258709, "grad_norm": 3.734375, "learning_rate": 9.945071702140716e-05, "loss": 2.7705, "step": 1090 }, { "epoch": 0.04768565059661699, "grad_norm": 3.375, "learning_rate": 9.94497012306291e-05, "loss": 2.5417, "step": 1091 }, { "epoch": 0.04772935880064688, "grad_norm": 4.09375, "learning_rate": 9.944868450666318e-05, "loss": 3.0292, "step": 1092 }, { "epoch": 0.047773067004676775, "grad_norm": 3.09375, "learning_rate": 9.944766684952863e-05, "loss": 2.1641, "step": 1093 }, { "epoch": 0.047816775208706676, "grad_norm": 3.53125, "learning_rate": 9.944664825924463e-05, "loss": 2.5355, "step": 1094 }, { "epoch": 0.04786048341273657, "grad_norm": 3.09375, "learning_rate": 9.944562873583042e-05, "loss": 2.6627, "step": 1095 }, { "epoch": 0.04790419161676647, "grad_norm": 2.65625, "learning_rate": 9.944460827930521e-05, "loss": 2.0701, "step": 1096 }, { "epoch": 0.047947899820796364, "grad_norm": 3.671875, "learning_rate": 9.944358688968831e-05, "loss": 2.3189, "step": 1097 }, { "epoch": 0.04799160802482626, "grad_norm": 3.265625, "learning_rate": 9.944256456699895e-05, "loss": 2.0858, "step": 1098 }, { "epoch": 0.04803531622885616, "grad_norm": 2.640625, "learning_rate": 9.944154131125642e-05, "loss": 2.1744, "step": 1099 }, { "epoch": 0.04807902443288605, "grad_norm": 2.9375, "learning_rate": 9.944051712248009e-05, "loss": 2.2945, "step": 1100 }, { "epoch": 0.04812273263691595, "grad_norm": 4.0, "learning_rate": 9.943949200068921e-05, "loss": 3.715, "step": 1101 }, { "epoch": 0.048166440840945846, "grad_norm": 2.84375, "learning_rate": 9.943846594590316e-05, "loss": 2.1754, "step": 1102 }, { "epoch": 0.04821014904497574, "grad_norm": 3.15625, "learning_rate": 9.943743895814131e-05, "loss": 2.2889, "step": 1103 }, { "epoch": 0.04825385724900564, "grad_norm": 4.1875, "learning_rate": 9.943641103742303e-05, "loss": 2.7616, "step": 1104 }, { "epoch": 0.04829756545303553, "grad_norm": 3.34375, "learning_rate": 9.943538218376773e-05, "loss": 2.4096, "step": 1105 }, { "epoch": 0.048341273657065434, "grad_norm": 2.75, "learning_rate": 9.943435239719482e-05, "loss": 2.522, "step": 1106 }, { "epoch": 0.04838498186109533, "grad_norm": 3.421875, "learning_rate": 9.943332167772372e-05, "loss": 2.0895, "step": 1107 }, { "epoch": 0.04842869006512522, "grad_norm": 3.640625, "learning_rate": 9.943229002537391e-05, "loss": 2.8614, "step": 1108 }, { "epoch": 0.04847239826915512, "grad_norm": 3.125, "learning_rate": 9.943125744016483e-05, "loss": 2.4255, "step": 1109 }, { "epoch": 0.048516106473185015, "grad_norm": 2.90625, "learning_rate": 9.943022392211599e-05, "loss": 2.4272, "step": 1110 }, { "epoch": 0.048559814677214916, "grad_norm": 3.625, "learning_rate": 9.942918947124686e-05, "loss": 3.2011, "step": 1111 }, { "epoch": 0.04860352288124481, "grad_norm": 3.46875, "learning_rate": 9.942815408757699e-05, "loss": 1.8, "step": 1112 }, { "epoch": 0.0486472310852747, "grad_norm": 2.734375, "learning_rate": 9.942711777112594e-05, "loss": 2.1547, "step": 1113 }, { "epoch": 0.048690939289304604, "grad_norm": 3.0625, "learning_rate": 9.942608052191321e-05, "loss": 2.4241, "step": 1114 }, { "epoch": 0.0487346474933345, "grad_norm": 3.359375, "learning_rate": 9.942504233995842e-05, "loss": 1.774, "step": 1115 }, { "epoch": 0.0487783556973644, "grad_norm": 7.46875, "learning_rate": 9.942400322528114e-05, "loss": 2.6383, "step": 1116 }, { "epoch": 0.04882206390139429, "grad_norm": 2.9375, "learning_rate": 9.942296317790099e-05, "loss": 1.941, "step": 1117 }, { "epoch": 0.048865772105424185, "grad_norm": 2.796875, "learning_rate": 9.942192219783758e-05, "loss": 1.9, "step": 1118 }, { "epoch": 0.048909480309454086, "grad_norm": 2.796875, "learning_rate": 9.942088028511059e-05, "loss": 2.382, "step": 1119 }, { "epoch": 0.04895318851348398, "grad_norm": 2.8125, "learning_rate": 9.941983743973964e-05, "loss": 2.1542, "step": 1120 }, { "epoch": 0.04899689671751388, "grad_norm": 3.640625, "learning_rate": 9.941879366174444e-05, "loss": 2.5014, "step": 1121 }, { "epoch": 0.04904060492154377, "grad_norm": 3.0625, "learning_rate": 9.941774895114467e-05, "loss": 2.2071, "step": 1122 }, { "epoch": 0.04908431312557367, "grad_norm": 3.921875, "learning_rate": 9.941670330796007e-05, "loss": 2.6761, "step": 1123 }, { "epoch": 0.04912802132960357, "grad_norm": 6.0625, "learning_rate": 9.941565673221034e-05, "loss": 2.7923, "step": 1124 }, { "epoch": 0.04917172953363346, "grad_norm": 2.953125, "learning_rate": 9.941460922391526e-05, "loss": 2.3441, "step": 1125 }, { "epoch": 0.04921543773766336, "grad_norm": 3.15625, "learning_rate": 9.941356078309457e-05, "loss": 2.1081, "step": 1126 }, { "epoch": 0.049259145941693255, "grad_norm": 2.546875, "learning_rate": 9.941251140976807e-05, "loss": 1.9443, "step": 1127 }, { "epoch": 0.04930285414572315, "grad_norm": 2.671875, "learning_rate": 9.941146110395557e-05, "loss": 2.0367, "step": 1128 }, { "epoch": 0.04934656234975305, "grad_norm": 3.265625, "learning_rate": 9.941040986567689e-05, "loss": 2.5013, "step": 1129 }, { "epoch": 0.04939027055378294, "grad_norm": 7.71875, "learning_rate": 9.940935769495186e-05, "loss": 1.9259, "step": 1130 }, { "epoch": 0.049433978757812844, "grad_norm": 4.6875, "learning_rate": 9.940830459180033e-05, "loss": 1.9416, "step": 1131 }, { "epoch": 0.04947768696184274, "grad_norm": 5.28125, "learning_rate": 9.940725055624218e-05, "loss": 2.3391, "step": 1132 }, { "epoch": 0.04952139516587263, "grad_norm": 3.78125, "learning_rate": 9.940619558829731e-05, "loss": 2.6132, "step": 1133 }, { "epoch": 0.04956510336990253, "grad_norm": 3.015625, "learning_rate": 9.940513968798563e-05, "loss": 2.4921, "step": 1134 }, { "epoch": 0.049608811573932425, "grad_norm": 2.96875, "learning_rate": 9.940408285532705e-05, "loss": 2.0902, "step": 1135 }, { "epoch": 0.049652519777962326, "grad_norm": 2.828125, "learning_rate": 9.940302509034152e-05, "loss": 2.2618, "step": 1136 }, { "epoch": 0.04969622798199222, "grad_norm": 3.578125, "learning_rate": 9.9401966393049e-05, "loss": 2.1943, "step": 1137 }, { "epoch": 0.04973993618602212, "grad_norm": 2.8125, "learning_rate": 9.940090676346948e-05, "loss": 2.2639, "step": 1138 }, { "epoch": 0.04978364439005201, "grad_norm": 3.0, "learning_rate": 9.939984620162295e-05, "loss": 2.3306, "step": 1139 }, { "epoch": 0.04982735259408191, "grad_norm": 5.375, "learning_rate": 9.939878470752942e-05, "loss": 1.9926, "step": 1140 }, { "epoch": 0.04987106079811181, "grad_norm": 2.96875, "learning_rate": 9.939772228120893e-05, "loss": 2.2294, "step": 1141 }, { "epoch": 0.0499147690021417, "grad_norm": 2.75, "learning_rate": 9.939665892268152e-05, "loss": 1.99, "step": 1142 }, { "epoch": 0.0499584772061716, "grad_norm": 3.34375, "learning_rate": 9.939559463196727e-05, "loss": 2.5241, "step": 1143 }, { "epoch": 0.050002185410201495, "grad_norm": 3.3125, "learning_rate": 9.939452940908626e-05, "loss": 2.1868, "step": 1144 }, { "epoch": 0.05004589361423139, "grad_norm": 3.890625, "learning_rate": 9.939346325405858e-05, "loss": 3.2205, "step": 1145 }, { "epoch": 0.05008960181826129, "grad_norm": 5.03125, "learning_rate": 9.939239616690436e-05, "loss": 3.0481, "step": 1146 }, { "epoch": 0.05013331002229118, "grad_norm": 3.09375, "learning_rate": 9.939132814764375e-05, "loss": 2.1522, "step": 1147 }, { "epoch": 0.050177018226321084, "grad_norm": 3.171875, "learning_rate": 9.939025919629688e-05, "loss": 2.2517, "step": 1148 }, { "epoch": 0.05022072643035098, "grad_norm": 3.046875, "learning_rate": 9.938918931288395e-05, "loss": 1.72, "step": 1149 }, { "epoch": 0.05026443463438087, "grad_norm": 3.125, "learning_rate": 9.93881184974251e-05, "loss": 2.559, "step": 1150 }, { "epoch": 0.05030814283841077, "grad_norm": 2.59375, "learning_rate": 9.938704674994062e-05, "loss": 2.2228, "step": 1151 }, { "epoch": 0.050351851042440665, "grad_norm": 3.1875, "learning_rate": 9.938597407045065e-05, "loss": 2.6145, "step": 1152 }, { "epoch": 0.050395559246470566, "grad_norm": 2.984375, "learning_rate": 9.93849004589755e-05, "loss": 2.0967, "step": 1153 }, { "epoch": 0.05043926745050046, "grad_norm": 4.125, "learning_rate": 9.938382591553538e-05, "loss": 2.0268, "step": 1154 }, { "epoch": 0.05048297565453035, "grad_norm": 3.640625, "learning_rate": 9.938275044015059e-05, "loss": 2.994, "step": 1155 }, { "epoch": 0.05052668385856025, "grad_norm": 3.90625, "learning_rate": 9.938167403284144e-05, "loss": 3.0414, "step": 1156 }, { "epoch": 0.05057039206259015, "grad_norm": 3.265625, "learning_rate": 9.938059669362822e-05, "loss": 2.5288, "step": 1157 }, { "epoch": 0.05061410026662005, "grad_norm": 2.640625, "learning_rate": 9.937951842253127e-05, "loss": 1.7965, "step": 1158 }, { "epoch": 0.05065780847064994, "grad_norm": 3.1875, "learning_rate": 9.937843921957095e-05, "loss": 2.2143, "step": 1159 }, { "epoch": 0.050701516674679835, "grad_norm": 3.3125, "learning_rate": 9.937735908476762e-05, "loss": 2.3242, "step": 1160 }, { "epoch": 0.050745224878709735, "grad_norm": 5.21875, "learning_rate": 9.937627801814165e-05, "loss": 1.9423, "step": 1161 }, { "epoch": 0.05078893308273963, "grad_norm": 3.59375, "learning_rate": 9.937519601971343e-05, "loss": 2.6635, "step": 1162 }, { "epoch": 0.05083264128676953, "grad_norm": 6.1875, "learning_rate": 9.937411308950342e-05, "loss": 2.6874, "step": 1163 }, { "epoch": 0.05087634949079942, "grad_norm": 2.6875, "learning_rate": 9.937302922753203e-05, "loss": 2.2795, "step": 1164 }, { "epoch": 0.05092005769482932, "grad_norm": 3.328125, "learning_rate": 9.937194443381972e-05, "loss": 1.9133, "step": 1165 }, { "epoch": 0.05096376589885922, "grad_norm": 2.9375, "learning_rate": 9.937085870838695e-05, "loss": 1.9703, "step": 1166 }, { "epoch": 0.05100747410288911, "grad_norm": 2.53125, "learning_rate": 9.936977205125424e-05, "loss": 1.8399, "step": 1167 }, { "epoch": 0.05105118230691901, "grad_norm": 3.703125, "learning_rate": 9.936868446244208e-05, "loss": 2.7845, "step": 1168 }, { "epoch": 0.051094890510948905, "grad_norm": 3.359375, "learning_rate": 9.936759594197098e-05, "loss": 2.5411, "step": 1169 }, { "epoch": 0.0511385987149788, "grad_norm": 4.1875, "learning_rate": 9.936650648986148e-05, "loss": 2.3818, "step": 1170 }, { "epoch": 0.0511823069190087, "grad_norm": 3.484375, "learning_rate": 9.936541610613416e-05, "loss": 2.4754, "step": 1171 }, { "epoch": 0.05122601512303859, "grad_norm": 3.140625, "learning_rate": 9.936432479080961e-05, "loss": 1.9356, "step": 1172 }, { "epoch": 0.051269723327068494, "grad_norm": 3.65625, "learning_rate": 9.93632325439084e-05, "loss": 2.1584, "step": 1173 }, { "epoch": 0.05131343153109839, "grad_norm": 2.5, "learning_rate": 9.936213936545113e-05, "loss": 2.1896, "step": 1174 }, { "epoch": 0.05135713973512828, "grad_norm": 2.671875, "learning_rate": 9.936104525545846e-05, "loss": 2.0162, "step": 1175 }, { "epoch": 0.05140084793915818, "grad_norm": 2.625, "learning_rate": 9.935995021395102e-05, "loss": 2.0853, "step": 1176 }, { "epoch": 0.051444556143188075, "grad_norm": 3.078125, "learning_rate": 9.935885424094948e-05, "loss": 1.9196, "step": 1177 }, { "epoch": 0.051488264347217975, "grad_norm": 2.734375, "learning_rate": 9.935775733647452e-05, "loss": 2.0876, "step": 1178 }, { "epoch": 0.05153197255124787, "grad_norm": 3.765625, "learning_rate": 9.935665950054684e-05, "loss": 2.6171, "step": 1179 }, { "epoch": 0.05157568075527776, "grad_norm": 2.875, "learning_rate": 9.935556073318716e-05, "loss": 2.4073, "step": 1180 }, { "epoch": 0.05161938895930766, "grad_norm": 3.546875, "learning_rate": 9.935446103441623e-05, "loss": 2.6037, "step": 1181 }, { "epoch": 0.05166309716333756, "grad_norm": 2.796875, "learning_rate": 9.935336040425478e-05, "loss": 2.2674, "step": 1182 }, { "epoch": 0.05170680536736746, "grad_norm": 8.0625, "learning_rate": 9.935225884272359e-05, "loss": 3.6372, "step": 1183 }, { "epoch": 0.05175051357139735, "grad_norm": 3.03125, "learning_rate": 9.935115634984345e-05, "loss": 2.1184, "step": 1184 }, { "epoch": 0.051794221775427245, "grad_norm": 6.875, "learning_rate": 9.935005292563515e-05, "loss": 2.7781, "step": 1185 }, { "epoch": 0.051837929979457145, "grad_norm": 2.765625, "learning_rate": 9.934894857011953e-05, "loss": 2.1325, "step": 1186 }, { "epoch": 0.05188163818348704, "grad_norm": 2.640625, "learning_rate": 9.934784328331743e-05, "loss": 2.4299, "step": 1187 }, { "epoch": 0.05192534638751694, "grad_norm": 3.21875, "learning_rate": 9.934673706524969e-05, "loss": 2.3853, "step": 1188 }, { "epoch": 0.05196905459154683, "grad_norm": 2.84375, "learning_rate": 9.934562991593722e-05, "loss": 1.9624, "step": 1189 }, { "epoch": 0.05201276279557673, "grad_norm": 8.625, "learning_rate": 9.934452183540089e-05, "loss": 2.052, "step": 1190 }, { "epoch": 0.05205647099960663, "grad_norm": 2.96875, "learning_rate": 9.934341282366162e-05, "loss": 2.0976, "step": 1191 }, { "epoch": 0.05210017920363652, "grad_norm": 2.734375, "learning_rate": 9.934230288074032e-05, "loss": 2.397, "step": 1192 }, { "epoch": 0.05214388740766642, "grad_norm": 2.828125, "learning_rate": 9.934119200665795e-05, "loss": 2.1238, "step": 1193 }, { "epoch": 0.052187595611696315, "grad_norm": 3.4375, "learning_rate": 9.934008020143548e-05, "loss": 2.1589, "step": 1194 }, { "epoch": 0.05223130381572621, "grad_norm": 2.796875, "learning_rate": 9.933896746509391e-05, "loss": 2.0372, "step": 1195 }, { "epoch": 0.05227501201975611, "grad_norm": 2.8125, "learning_rate": 9.933785379765417e-05, "loss": 2.1988, "step": 1196 }, { "epoch": 0.052318720223786, "grad_norm": 3.25, "learning_rate": 9.933673919913735e-05, "loss": 2.2642, "step": 1197 }, { "epoch": 0.0523624284278159, "grad_norm": 2.5625, "learning_rate": 9.933562366956445e-05, "loss": 1.705, "step": 1198 }, { "epoch": 0.0524061366318458, "grad_norm": 2.8125, "learning_rate": 9.933450720895651e-05, "loss": 2.5751, "step": 1199 }, { "epoch": 0.05244984483587569, "grad_norm": 2.71875, "learning_rate": 9.933338981733464e-05, "loss": 2.3266, "step": 1200 }, { "epoch": 0.05249355303990559, "grad_norm": 3.015625, "learning_rate": 9.933227149471991e-05, "loss": 2.8926, "step": 1201 }, { "epoch": 0.052537261243935485, "grad_norm": 2.6875, "learning_rate": 9.933115224113338e-05, "loss": 2.0426, "step": 1202 }, { "epoch": 0.052580969447965385, "grad_norm": 2.578125, "learning_rate": 9.933003205659623e-05, "loss": 2.0112, "step": 1203 }, { "epoch": 0.05262467765199528, "grad_norm": 5.15625, "learning_rate": 9.93289109411296e-05, "loss": 1.9949, "step": 1204 }, { "epoch": 0.05266838585602517, "grad_norm": 3.09375, "learning_rate": 9.93277888947546e-05, "loss": 2.1734, "step": 1205 }, { "epoch": 0.05271209406005507, "grad_norm": 3.25, "learning_rate": 9.932666591749242e-05, "loss": 3.1198, "step": 1206 }, { "epoch": 0.05275580226408497, "grad_norm": 2.84375, "learning_rate": 9.932554200936429e-05, "loss": 2.1966, "step": 1207 }, { "epoch": 0.05279951046811487, "grad_norm": 3.546875, "learning_rate": 9.932441717039138e-05, "loss": 2.2443, "step": 1208 }, { "epoch": 0.05284321867214476, "grad_norm": 9.1875, "learning_rate": 9.932329140059494e-05, "loss": 2.2764, "step": 1209 }, { "epoch": 0.05288692687617466, "grad_norm": 3.375, "learning_rate": 9.932216469999618e-05, "loss": 2.5796, "step": 1210 }, { "epoch": 0.052930635080204555, "grad_norm": 3.109375, "learning_rate": 9.93210370686164e-05, "loss": 2.1995, "step": 1211 }, { "epoch": 0.05297434328423445, "grad_norm": 2.984375, "learning_rate": 9.931990850647688e-05, "loss": 2.3949, "step": 1212 }, { "epoch": 0.05301805148826435, "grad_norm": 2.71875, "learning_rate": 9.931877901359888e-05, "loss": 1.9585, "step": 1213 }, { "epoch": 0.05306175969229424, "grad_norm": 3.140625, "learning_rate": 9.931764859000375e-05, "loss": 2.6056, "step": 1214 }, { "epoch": 0.05310546789632414, "grad_norm": 3.015625, "learning_rate": 9.931651723571282e-05, "loss": 2.7266, "step": 1215 }, { "epoch": 0.05314917610035404, "grad_norm": 3.609375, "learning_rate": 9.931538495074743e-05, "loss": 2.2353, "step": 1216 }, { "epoch": 0.05319288430438393, "grad_norm": 3.078125, "learning_rate": 9.931425173512895e-05, "loss": 2.2246, "step": 1217 }, { "epoch": 0.05323659250841383, "grad_norm": 2.5625, "learning_rate": 9.931311758887877e-05, "loss": 1.8312, "step": 1218 }, { "epoch": 0.053280300712443725, "grad_norm": 2.625, "learning_rate": 9.931198251201828e-05, "loss": 1.859, "step": 1219 }, { "epoch": 0.053324008916473625, "grad_norm": 3.265625, "learning_rate": 9.931084650456892e-05, "loss": 2.4562, "step": 1220 }, { "epoch": 0.05336771712050352, "grad_norm": 5.03125, "learning_rate": 9.930970956655212e-05, "loss": 3.3246, "step": 1221 }, { "epoch": 0.05341142532453341, "grad_norm": 2.828125, "learning_rate": 9.930857169798931e-05, "loss": 2.1604, "step": 1222 }, { "epoch": 0.05345513352856331, "grad_norm": 3.65625, "learning_rate": 9.9307432898902e-05, "loss": 2.7602, "step": 1223 }, { "epoch": 0.05349884173259321, "grad_norm": 3.015625, "learning_rate": 9.930629316931168e-05, "loss": 2.5866, "step": 1224 }, { "epoch": 0.05354254993662311, "grad_norm": 2.609375, "learning_rate": 9.930515250923984e-05, "loss": 1.9332, "step": 1225 }, { "epoch": 0.053586258140653, "grad_norm": 2.609375, "learning_rate": 9.9304010918708e-05, "loss": 1.9111, "step": 1226 }, { "epoch": 0.053629966344682894, "grad_norm": 2.859375, "learning_rate": 9.930286839773773e-05, "loss": 2.2201, "step": 1227 }, { "epoch": 0.053673674548712795, "grad_norm": 2.953125, "learning_rate": 9.930172494635057e-05, "loss": 2.6325, "step": 1228 }, { "epoch": 0.05371738275274269, "grad_norm": 3.953125, "learning_rate": 9.93005805645681e-05, "loss": 2.6803, "step": 1229 }, { "epoch": 0.05376109095677259, "grad_norm": 4.34375, "learning_rate": 9.929943525241194e-05, "loss": 3.2208, "step": 1230 }, { "epoch": 0.05380479916080248, "grad_norm": 2.90625, "learning_rate": 9.929828900990367e-05, "loss": 2.3413, "step": 1231 }, { "epoch": 0.053848507364832376, "grad_norm": 3.296875, "learning_rate": 9.929714183706493e-05, "loss": 2.2428, "step": 1232 }, { "epoch": 0.05389221556886228, "grad_norm": 3.984375, "learning_rate": 9.929599373391738e-05, "loss": 2.9937, "step": 1233 }, { "epoch": 0.05393592377289217, "grad_norm": 2.953125, "learning_rate": 9.92948447004827e-05, "loss": 2.2362, "step": 1234 }, { "epoch": 0.05397963197692207, "grad_norm": 2.890625, "learning_rate": 9.929369473678253e-05, "loss": 3.1684, "step": 1235 }, { "epoch": 0.054023340180951965, "grad_norm": 2.671875, "learning_rate": 9.92925438428386e-05, "loss": 2.0654, "step": 1236 }, { "epoch": 0.05406704838498186, "grad_norm": 2.796875, "learning_rate": 9.929139201867263e-05, "loss": 2.3591, "step": 1237 }, { "epoch": 0.05411075658901176, "grad_norm": 2.421875, "learning_rate": 9.929023926430636e-05, "loss": 2.0845, "step": 1238 }, { "epoch": 0.05415446479304165, "grad_norm": 2.75, "learning_rate": 9.928908557976153e-05, "loss": 2.1563, "step": 1239 }, { "epoch": 0.05419817299707155, "grad_norm": 2.8125, "learning_rate": 9.928793096505992e-05, "loss": 1.9936, "step": 1240 }, { "epoch": 0.05424188120110145, "grad_norm": 4.21875, "learning_rate": 9.928677542022331e-05, "loss": 2.7263, "step": 1241 }, { "epoch": 0.05428558940513134, "grad_norm": 2.796875, "learning_rate": 9.928561894527353e-05, "loss": 1.5295, "step": 1242 }, { "epoch": 0.05432929760916124, "grad_norm": 2.84375, "learning_rate": 9.928446154023238e-05, "loss": 2.5526, "step": 1243 }, { "epoch": 0.054373005813191135, "grad_norm": 3.0, "learning_rate": 9.928330320512171e-05, "loss": 2.418, "step": 1244 }, { "epoch": 0.054416714017221035, "grad_norm": 2.71875, "learning_rate": 9.928214393996339e-05, "loss": 2.2359, "step": 1245 }, { "epoch": 0.05446042222125093, "grad_norm": 2.578125, "learning_rate": 9.928098374477927e-05, "loss": 2.3314, "step": 1246 }, { "epoch": 0.05450413042528082, "grad_norm": 2.40625, "learning_rate": 9.927982261959127e-05, "loss": 1.9215, "step": 1247 }, { "epoch": 0.05454783862931072, "grad_norm": 3.40625, "learning_rate": 9.927866056442128e-05, "loss": 1.9057, "step": 1248 }, { "epoch": 0.054591546833340616, "grad_norm": 3.125, "learning_rate": 9.927749757929125e-05, "loss": 2.2604, "step": 1249 }, { "epoch": 0.05463525503737052, "grad_norm": 3.71875, "learning_rate": 9.927633366422314e-05, "loss": 2.3643, "step": 1250 }, { "epoch": 0.05467896324140041, "grad_norm": 4.03125, "learning_rate": 9.927516881923889e-05, "loss": 2.77, "step": 1251 }, { "epoch": 0.054722671445430304, "grad_norm": 6.625, "learning_rate": 9.927400304436047e-05, "loss": 1.8398, "step": 1252 }, { "epoch": 0.054766379649460205, "grad_norm": 2.765625, "learning_rate": 9.92728363396099e-05, "loss": 2.0013, "step": 1253 }, { "epoch": 0.0548100878534901, "grad_norm": 3.125, "learning_rate": 9.927166870500922e-05, "loss": 2.3079, "step": 1254 }, { "epoch": 0.05485379605752, "grad_norm": 2.875, "learning_rate": 9.927050014058042e-05, "loss": 2.1234, "step": 1255 }, { "epoch": 0.05489750426154989, "grad_norm": 2.953125, "learning_rate": 9.926933064634558e-05, "loss": 2.1321, "step": 1256 }, { "epoch": 0.054941212465579786, "grad_norm": 2.984375, "learning_rate": 9.926816022232675e-05, "loss": 2.8561, "step": 1257 }, { "epoch": 0.05498492066960969, "grad_norm": 3.234375, "learning_rate": 9.926698886854604e-05, "loss": 2.4861, "step": 1258 }, { "epoch": 0.05502862887363958, "grad_norm": 3.3125, "learning_rate": 9.926581658502554e-05, "loss": 2.2771, "step": 1259 }, { "epoch": 0.05507233707766948, "grad_norm": 2.734375, "learning_rate": 9.926464337178738e-05, "loss": 2.2922, "step": 1260 }, { "epoch": 0.055116045281699375, "grad_norm": 3.5, "learning_rate": 9.92634692288537e-05, "loss": 2.449, "step": 1261 }, { "epoch": 0.05515975348572927, "grad_norm": 3.296875, "learning_rate": 9.926229415624666e-05, "loss": 2.2916, "step": 1262 }, { "epoch": 0.05520346168975917, "grad_norm": 2.765625, "learning_rate": 9.926111815398843e-05, "loss": 2.6731, "step": 1263 }, { "epoch": 0.05524716989378906, "grad_norm": 32.25, "learning_rate": 9.92599412221012e-05, "loss": 2.2843, "step": 1264 }, { "epoch": 0.05529087809781896, "grad_norm": 2.640625, "learning_rate": 9.925876336060719e-05, "loss": 1.9131, "step": 1265 }, { "epoch": 0.055334586301848857, "grad_norm": 3.421875, "learning_rate": 9.925758456952862e-05, "loss": 1.8637, "step": 1266 }, { "epoch": 0.05537829450587875, "grad_norm": 3.109375, "learning_rate": 9.925640484888774e-05, "loss": 2.5708, "step": 1267 }, { "epoch": 0.05542200270990865, "grad_norm": 2.515625, "learning_rate": 9.925522419870681e-05, "loss": 2.1795, "step": 1268 }, { "epoch": 0.055465710913938544, "grad_norm": 3.1875, "learning_rate": 9.92540426190081e-05, "loss": 2.3094, "step": 1269 }, { "epoch": 0.055509419117968445, "grad_norm": 2.9375, "learning_rate": 9.925286010981394e-05, "loss": 2.4622, "step": 1270 }, { "epoch": 0.05555312732199834, "grad_norm": 2.5625, "learning_rate": 9.925167667114661e-05, "loss": 1.9271, "step": 1271 }, { "epoch": 0.05559683552602823, "grad_norm": 2.765625, "learning_rate": 9.925049230302846e-05, "loss": 2.2545, "step": 1272 }, { "epoch": 0.05564054373005813, "grad_norm": 3.0625, "learning_rate": 9.924930700548185e-05, "loss": 2.7818, "step": 1273 }, { "epoch": 0.055684251934088026, "grad_norm": 2.875, "learning_rate": 9.924812077852913e-05, "loss": 1.8465, "step": 1274 }, { "epoch": 0.05572796013811793, "grad_norm": 3.234375, "learning_rate": 9.924693362219269e-05, "loss": 2.0316, "step": 1275 }, { "epoch": 0.05577166834214782, "grad_norm": 2.953125, "learning_rate": 9.924574553649496e-05, "loss": 1.9583, "step": 1276 }, { "epoch": 0.05581537654617772, "grad_norm": 3.546875, "learning_rate": 9.924455652145831e-05, "loss": 2.0286, "step": 1277 }, { "epoch": 0.055859084750207615, "grad_norm": 3.328125, "learning_rate": 9.924336657710522e-05, "loss": 1.9079, "step": 1278 }, { "epoch": 0.05590279295423751, "grad_norm": 2.859375, "learning_rate": 9.924217570345813e-05, "loss": 2.3216, "step": 1279 }, { "epoch": 0.05594650115826741, "grad_norm": 2.4375, "learning_rate": 9.924098390053951e-05, "loss": 2.2232, "step": 1280 }, { "epoch": 0.0559902093622973, "grad_norm": 2.53125, "learning_rate": 9.923979116837185e-05, "loss": 2.5149, "step": 1281 }, { "epoch": 0.0560339175663272, "grad_norm": 5.0, "learning_rate": 9.923859750697768e-05, "loss": 2.9284, "step": 1282 }, { "epoch": 0.0560776257703571, "grad_norm": 24.25, "learning_rate": 9.923740291637951e-05, "loss": 0.6024, "step": 1283 }, { "epoch": 0.05612133397438699, "grad_norm": 3.421875, "learning_rate": 9.923620739659989e-05, "loss": 3.0348, "step": 1284 }, { "epoch": 0.05616504217841689, "grad_norm": 2.71875, "learning_rate": 9.923501094766136e-05, "loss": 2.269, "step": 1285 }, { "epoch": 0.056208750382446784, "grad_norm": 2.890625, "learning_rate": 9.923381356958654e-05, "loss": 2.0336, "step": 1286 }, { "epoch": 0.056252458586476685, "grad_norm": 4.6875, "learning_rate": 9.923261526239798e-05, "loss": 2.3973, "step": 1287 }, { "epoch": 0.05629616679050658, "grad_norm": 2.734375, "learning_rate": 9.923141602611834e-05, "loss": 2.2979, "step": 1288 }, { "epoch": 0.05633987499453647, "grad_norm": 2.515625, "learning_rate": 9.92302158607702e-05, "loss": 2.3606, "step": 1289 }, { "epoch": 0.05638358319856637, "grad_norm": 4.6875, "learning_rate": 9.922901476637625e-05, "loss": 2.5858, "step": 1290 }, { "epoch": 0.056427291402596266, "grad_norm": 2.921875, "learning_rate": 9.922781274295913e-05, "loss": 2.6504, "step": 1291 }, { "epoch": 0.05647099960662617, "grad_norm": 2.78125, "learning_rate": 9.922660979054155e-05, "loss": 2.3354, "step": 1292 }, { "epoch": 0.05651470781065606, "grad_norm": 2.203125, "learning_rate": 9.922540590914619e-05, "loss": 1.7236, "step": 1293 }, { "epoch": 0.056558416014685954, "grad_norm": 2.71875, "learning_rate": 9.922420109879578e-05, "loss": 1.9761, "step": 1294 }, { "epoch": 0.056602124218715855, "grad_norm": 3.0625, "learning_rate": 9.922299535951305e-05, "loss": 2.3798, "step": 1295 }, { "epoch": 0.05664583242274575, "grad_norm": 2.609375, "learning_rate": 9.922178869132075e-05, "loss": 2.1449, "step": 1296 }, { "epoch": 0.05668954062677565, "grad_norm": 3.171875, "learning_rate": 9.922058109424167e-05, "loss": 2.036, "step": 1297 }, { "epoch": 0.05673324883080554, "grad_norm": 3.0, "learning_rate": 9.921937256829859e-05, "loss": 2.653, "step": 1298 }, { "epoch": 0.056776957034835436, "grad_norm": 3.0625, "learning_rate": 9.921816311351431e-05, "loss": 2.4909, "step": 1299 }, { "epoch": 0.05682066523886534, "grad_norm": 3.046875, "learning_rate": 9.921695272991165e-05, "loss": 2.1007, "step": 1300 }, { "epoch": 0.05686437344289523, "grad_norm": 2.40625, "learning_rate": 9.921574141751346e-05, "loss": 1.91, "step": 1301 }, { "epoch": 0.05690808164692513, "grad_norm": 3.34375, "learning_rate": 9.921452917634261e-05, "loss": 2.7349, "step": 1302 }, { "epoch": 0.056951789850955024, "grad_norm": 2.84375, "learning_rate": 9.921331600642196e-05, "loss": 1.8035, "step": 1303 }, { "epoch": 0.05699549805498492, "grad_norm": 3.390625, "learning_rate": 9.921210190777441e-05, "loss": 2.4165, "step": 1304 }, { "epoch": 0.05703920625901482, "grad_norm": 3.171875, "learning_rate": 9.921088688042287e-05, "loss": 2.0328, "step": 1305 }, { "epoch": 0.05708291446304471, "grad_norm": 3.171875, "learning_rate": 9.920967092439027e-05, "loss": 2.5423, "step": 1306 }, { "epoch": 0.05712662266707461, "grad_norm": 6.59375, "learning_rate": 9.920845403969957e-05, "loss": 2.742, "step": 1307 }, { "epoch": 0.057170330871104506, "grad_norm": 3.046875, "learning_rate": 9.920723622637371e-05, "loss": 2.1735, "step": 1308 }, { "epoch": 0.0572140390751344, "grad_norm": 2.984375, "learning_rate": 9.92060174844357e-05, "loss": 2.5959, "step": 1309 }, { "epoch": 0.0572577472791643, "grad_norm": 2.71875, "learning_rate": 9.920479781390852e-05, "loss": 2.1141, "step": 1310 }, { "epoch": 0.057301455483194194, "grad_norm": 2.421875, "learning_rate": 9.920357721481518e-05, "loss": 1.6989, "step": 1311 }, { "epoch": 0.057345163687224095, "grad_norm": 2.546875, "learning_rate": 9.920235568717873e-05, "loss": 1.9488, "step": 1312 }, { "epoch": 0.05738887189125399, "grad_norm": 2.859375, "learning_rate": 9.920113323102223e-05, "loss": 2.0413, "step": 1313 }, { "epoch": 0.05743258009528388, "grad_norm": 3.03125, "learning_rate": 9.919990984636871e-05, "loss": 2.6842, "step": 1314 }, { "epoch": 0.05747628829931378, "grad_norm": 4.0625, "learning_rate": 9.91986855332413e-05, "loss": 2.3465, "step": 1315 }, { "epoch": 0.057519996503343676, "grad_norm": 3.21875, "learning_rate": 9.919746029166311e-05, "loss": 2.2191, "step": 1316 }, { "epoch": 0.05756370470737358, "grad_norm": 3.46875, "learning_rate": 9.91962341216572e-05, "loss": 2.654, "step": 1317 }, { "epoch": 0.05760741291140347, "grad_norm": 2.625, "learning_rate": 9.919500702324677e-05, "loss": 1.9608, "step": 1318 }, { "epoch": 0.057651121115433364, "grad_norm": 2.734375, "learning_rate": 9.919377899645497e-05, "loss": 1.9015, "step": 1319 }, { "epoch": 0.057694829319463264, "grad_norm": 3.3125, "learning_rate": 9.919255004130494e-05, "loss": 2.0449, "step": 1320 }, { "epoch": 0.05773853752349316, "grad_norm": 3.03125, "learning_rate": 9.919132015781991e-05, "loss": 2.9302, "step": 1321 }, { "epoch": 0.05778224572752306, "grad_norm": 2.65625, "learning_rate": 9.919008934602307e-05, "loss": 1.5977, "step": 1322 }, { "epoch": 0.05782595393155295, "grad_norm": 3.34375, "learning_rate": 9.918885760593764e-05, "loss": 2.4274, "step": 1323 }, { "epoch": 0.057869662135582846, "grad_norm": 3.265625, "learning_rate": 9.918762493758689e-05, "loss": 1.7857, "step": 1324 }, { "epoch": 0.057913370339612746, "grad_norm": 2.734375, "learning_rate": 9.918639134099407e-05, "loss": 2.3214, "step": 1325 }, { "epoch": 0.05795707854364264, "grad_norm": 2.703125, "learning_rate": 9.918515681618246e-05, "loss": 2.0117, "step": 1326 }, { "epoch": 0.05800078674767254, "grad_norm": 2.890625, "learning_rate": 9.918392136317533e-05, "loss": 2.0574, "step": 1327 }, { "epoch": 0.058044494951702434, "grad_norm": 3.84375, "learning_rate": 9.918268498199604e-05, "loss": 1.8831, "step": 1328 }, { "epoch": 0.05808820315573233, "grad_norm": 2.78125, "learning_rate": 9.918144767266791e-05, "loss": 1.9005, "step": 1329 }, { "epoch": 0.05813191135976223, "grad_norm": 2.53125, "learning_rate": 9.918020943521427e-05, "loss": 1.7483, "step": 1330 }, { "epoch": 0.05817561956379212, "grad_norm": 2.796875, "learning_rate": 9.91789702696585e-05, "loss": 1.9886, "step": 1331 }, { "epoch": 0.05821932776782202, "grad_norm": 2.984375, "learning_rate": 9.917773017602399e-05, "loss": 2.307, "step": 1332 }, { "epoch": 0.058263035971851916, "grad_norm": 2.578125, "learning_rate": 9.917648915433413e-05, "loss": 2.0734, "step": 1333 }, { "epoch": 0.05830674417588181, "grad_norm": 2.4375, "learning_rate": 9.917524720461234e-05, "loss": 2.1241, "step": 1334 }, { "epoch": 0.05835045237991171, "grad_norm": 2.640625, "learning_rate": 9.917400432688208e-05, "loss": 1.9862, "step": 1335 }, { "epoch": 0.058394160583941604, "grad_norm": 2.484375, "learning_rate": 9.917276052116677e-05, "loss": 2.1588, "step": 1336 }, { "epoch": 0.058437868787971504, "grad_norm": 2.734375, "learning_rate": 9.917151578748994e-05, "loss": 2.2892, "step": 1337 }, { "epoch": 0.0584815769920014, "grad_norm": 2.40625, "learning_rate": 9.9170270125875e-05, "loss": 2.1376, "step": 1338 }, { "epoch": 0.05852528519603129, "grad_norm": 3.03125, "learning_rate": 9.916902353634552e-05, "loss": 2.2808, "step": 1339 }, { "epoch": 0.05856899340006119, "grad_norm": 4.0625, "learning_rate": 9.916777601892499e-05, "loss": 1.8809, "step": 1340 }, { "epoch": 0.058612701604091086, "grad_norm": 3.953125, "learning_rate": 9.916652757363698e-05, "loss": 3.0737, "step": 1341 }, { "epoch": 0.058656409808120986, "grad_norm": 2.453125, "learning_rate": 9.916527820050504e-05, "loss": 2.0382, "step": 1342 }, { "epoch": 0.05870011801215088, "grad_norm": 2.515625, "learning_rate": 9.916402789955272e-05, "loss": 2.1752, "step": 1343 }, { "epoch": 0.05874382621618078, "grad_norm": 2.765625, "learning_rate": 9.916277667080365e-05, "loss": 2.1868, "step": 1344 }, { "epoch": 0.058787534420210674, "grad_norm": 2.625, "learning_rate": 9.916152451428144e-05, "loss": 1.9418, "step": 1345 }, { "epoch": 0.05883124262424057, "grad_norm": 2.4375, "learning_rate": 9.91602714300097e-05, "loss": 1.4852, "step": 1346 }, { "epoch": 0.05887495082827047, "grad_norm": 2.734375, "learning_rate": 9.91590174180121e-05, "loss": 1.6127, "step": 1347 }, { "epoch": 0.05891865903230036, "grad_norm": 3.21875, "learning_rate": 9.915776247831227e-05, "loss": 2.0565, "step": 1348 }, { "epoch": 0.05896236723633026, "grad_norm": 2.890625, "learning_rate": 9.915650661093395e-05, "loss": 2.064, "step": 1349 }, { "epoch": 0.059006075440360156, "grad_norm": 2.71875, "learning_rate": 9.915524981590079e-05, "loss": 1.8135, "step": 1350 }, { "epoch": 0.05904978364439005, "grad_norm": 2.59375, "learning_rate": 9.915399209323652e-05, "loss": 2.0698, "step": 1351 }, { "epoch": 0.05909349184841995, "grad_norm": 2.46875, "learning_rate": 9.915273344296488e-05, "loss": 2.1185, "step": 1352 }, { "epoch": 0.059137200052449844, "grad_norm": 3.90625, "learning_rate": 9.915147386510964e-05, "loss": 2.5517, "step": 1353 }, { "epoch": 0.059180908256479745, "grad_norm": 3.15625, "learning_rate": 9.915021335969452e-05, "loss": 2.5902, "step": 1354 }, { "epoch": 0.05922461646050964, "grad_norm": 3.78125, "learning_rate": 9.914895192674336e-05, "loss": 2.0517, "step": 1355 }, { "epoch": 0.05926832466453953, "grad_norm": 2.859375, "learning_rate": 9.914768956627994e-05, "loss": 2.1344, "step": 1356 }, { "epoch": 0.05931203286856943, "grad_norm": 2.546875, "learning_rate": 9.914642627832808e-05, "loss": 2.0166, "step": 1357 }, { "epoch": 0.059355741072599326, "grad_norm": 3.0, "learning_rate": 9.914516206291165e-05, "loss": 1.9574, "step": 1358 }, { "epoch": 0.059399449276629226, "grad_norm": 3.5, "learning_rate": 9.914389692005446e-05, "loss": 2.3096, "step": 1359 }, { "epoch": 0.05944315748065912, "grad_norm": 3.234375, "learning_rate": 9.914263084978042e-05, "loss": 1.9114, "step": 1360 }, { "epoch": 0.059486865684689014, "grad_norm": 2.453125, "learning_rate": 9.914136385211341e-05, "loss": 1.9104, "step": 1361 }, { "epoch": 0.059530573888718914, "grad_norm": 3.515625, "learning_rate": 9.914009592707733e-05, "loss": 2.0492, "step": 1362 }, { "epoch": 0.05957428209274881, "grad_norm": 3.421875, "learning_rate": 9.913882707469614e-05, "loss": 2.3943, "step": 1363 }, { "epoch": 0.05961799029677871, "grad_norm": 2.78125, "learning_rate": 9.913755729499376e-05, "loss": 2.0374, "step": 1364 }, { "epoch": 0.0596616985008086, "grad_norm": 2.375, "learning_rate": 9.913628658799415e-05, "loss": 2.1342, "step": 1365 }, { "epoch": 0.059705406704838496, "grad_norm": 3.6875, "learning_rate": 9.91350149537213e-05, "loss": 2.3227, "step": 1366 }, { "epoch": 0.059749114908868396, "grad_norm": 3.03125, "learning_rate": 9.913374239219922e-05, "loss": 2.3847, "step": 1367 }, { "epoch": 0.05979282311289829, "grad_norm": 2.453125, "learning_rate": 9.913246890345189e-05, "loss": 1.9194, "step": 1368 }, { "epoch": 0.05983653131692819, "grad_norm": 3.125, "learning_rate": 9.913119448750337e-05, "loss": 2.4296, "step": 1369 }, { "epoch": 0.059880239520958084, "grad_norm": 2.609375, "learning_rate": 9.912991914437771e-05, "loss": 1.7472, "step": 1370 }, { "epoch": 0.05992394772498798, "grad_norm": 3.21875, "learning_rate": 9.912864287409896e-05, "loss": 1.9631, "step": 1371 }, { "epoch": 0.05996765592901788, "grad_norm": 3.921875, "learning_rate": 9.912736567669121e-05, "loss": 1.94, "step": 1372 }, { "epoch": 0.06001136413304777, "grad_norm": 3.859375, "learning_rate": 9.912608755217859e-05, "loss": 2.1358, "step": 1373 }, { "epoch": 0.06005507233707767, "grad_norm": 2.546875, "learning_rate": 9.912480850058516e-05, "loss": 1.6588, "step": 1374 }, { "epoch": 0.060098780541107566, "grad_norm": 2.421875, "learning_rate": 9.912352852193514e-05, "loss": 2.0139, "step": 1375 }, { "epoch": 0.06014248874513746, "grad_norm": 4.125, "learning_rate": 9.912224761625262e-05, "loss": 2.7426, "step": 1376 }, { "epoch": 0.06018619694916736, "grad_norm": 3.46875, "learning_rate": 9.912096578356179e-05, "loss": 2.7015, "step": 1377 }, { "epoch": 0.060229905153197254, "grad_norm": 3.09375, "learning_rate": 9.911968302388685e-05, "loss": 2.041, "step": 1378 }, { "epoch": 0.060273613357227154, "grad_norm": 2.96875, "learning_rate": 9.9118399337252e-05, "loss": 2.2014, "step": 1379 }, { "epoch": 0.06031732156125705, "grad_norm": 3.21875, "learning_rate": 9.911711472368148e-05, "loss": 1.9344, "step": 1380 }, { "epoch": 0.06036102976528694, "grad_norm": 2.640625, "learning_rate": 9.91158291831995e-05, "loss": 1.7271, "step": 1381 }, { "epoch": 0.06040473796931684, "grad_norm": 7.03125, "learning_rate": 9.911454271583034e-05, "loss": 2.765, "step": 1382 }, { "epoch": 0.060448446173346736, "grad_norm": 3.109375, "learning_rate": 9.911325532159828e-05, "loss": 2.8126, "step": 1383 }, { "epoch": 0.060492154377376636, "grad_norm": 2.625, "learning_rate": 9.91119670005276e-05, "loss": 2.4261, "step": 1384 }, { "epoch": 0.06053586258140653, "grad_norm": 2.40625, "learning_rate": 9.911067775264264e-05, "loss": 1.7767, "step": 1385 }, { "epoch": 0.060579570785436423, "grad_norm": 3.3125, "learning_rate": 9.91093875779677e-05, "loss": 2.9243, "step": 1386 }, { "epoch": 0.060623278989466324, "grad_norm": 4.0, "learning_rate": 9.910809647652715e-05, "loss": 3.1425, "step": 1387 }, { "epoch": 0.06066698719349622, "grad_norm": 3.6875, "learning_rate": 9.910680444834535e-05, "loss": 1.9814, "step": 1388 }, { "epoch": 0.06071069539752612, "grad_norm": 3.734375, "learning_rate": 9.910551149344669e-05, "loss": 2.4808, "step": 1389 }, { "epoch": 0.06075440360155601, "grad_norm": 2.71875, "learning_rate": 9.910421761185553e-05, "loss": 1.9118, "step": 1390 }, { "epoch": 0.060798111805585905, "grad_norm": 2.65625, "learning_rate": 9.910292280359631e-05, "loss": 2.3266, "step": 1391 }, { "epoch": 0.060841820009615806, "grad_norm": 3.03125, "learning_rate": 9.91016270686935e-05, "loss": 2.2481, "step": 1392 }, { "epoch": 0.0608855282136457, "grad_norm": 2.515625, "learning_rate": 9.910033040717152e-05, "loss": 1.938, "step": 1393 }, { "epoch": 0.0609292364176756, "grad_norm": 3.25, "learning_rate": 9.909903281905484e-05, "loss": 2.3012, "step": 1394 }, { "epoch": 0.060972944621705494, "grad_norm": 3.234375, "learning_rate": 9.909773430436794e-05, "loss": 2.2996, "step": 1395 }, { "epoch": 0.06101665282573539, "grad_norm": 2.53125, "learning_rate": 9.909643486313533e-05, "loss": 2.4493, "step": 1396 }, { "epoch": 0.06106036102976529, "grad_norm": 2.859375, "learning_rate": 9.909513449538156e-05, "loss": 2.1811, "step": 1397 }, { "epoch": 0.06110406923379518, "grad_norm": 2.421875, "learning_rate": 9.909383320113113e-05, "loss": 1.8874, "step": 1398 }, { "epoch": 0.06114777743782508, "grad_norm": 2.828125, "learning_rate": 9.909253098040863e-05, "loss": 2.762, "step": 1399 }, { "epoch": 0.061191485641854976, "grad_norm": 2.609375, "learning_rate": 9.909122783323861e-05, "loss": 2.0106, "step": 1400 }, { "epoch": 0.06123519384588487, "grad_norm": 3.5, "learning_rate": 9.908992375964568e-05, "loss": 1.8271, "step": 1401 }, { "epoch": 0.06127890204991477, "grad_norm": 3.8125, "learning_rate": 9.908861875965443e-05, "loss": 2.2767, "step": 1402 }, { "epoch": 0.061322610253944664, "grad_norm": 4.4375, "learning_rate": 9.908731283328949e-05, "loss": 2.3544, "step": 1403 }, { "epoch": 0.061366318457974564, "grad_norm": 2.296875, "learning_rate": 9.908600598057554e-05, "loss": 1.7036, "step": 1404 }, { "epoch": 0.06141002666200446, "grad_norm": 2.65625, "learning_rate": 9.90846982015372e-05, "loss": 2.2406, "step": 1405 }, { "epoch": 0.06145373486603435, "grad_norm": 3.78125, "learning_rate": 9.908338949619917e-05, "loss": 2.2038, "step": 1406 }, { "epoch": 0.06149744307006425, "grad_norm": 2.578125, "learning_rate": 9.908207986458613e-05, "loss": 2.0234, "step": 1407 }, { "epoch": 0.061541151274094145, "grad_norm": 3.046875, "learning_rate": 9.908076930672282e-05, "loss": 2.0903, "step": 1408 }, { "epoch": 0.061584859478124046, "grad_norm": 2.796875, "learning_rate": 9.907945782263396e-05, "loss": 2.2869, "step": 1409 }, { "epoch": 0.06162856768215394, "grad_norm": 2.90625, "learning_rate": 9.907814541234429e-05, "loss": 2.3249, "step": 1410 }, { "epoch": 0.06167227588618383, "grad_norm": 2.578125, "learning_rate": 9.907683207587859e-05, "loss": 2.0903, "step": 1411 }, { "epoch": 0.061715984090213734, "grad_norm": 2.828125, "learning_rate": 9.907551781326165e-05, "loss": 1.7549, "step": 1412 }, { "epoch": 0.06175969229424363, "grad_norm": 2.65625, "learning_rate": 9.907420262451826e-05, "loss": 2.3239, "step": 1413 }, { "epoch": 0.06180340049827353, "grad_norm": 2.734375, "learning_rate": 9.907288650967324e-05, "loss": 2.0301, "step": 1414 }, { "epoch": 0.06184710870230342, "grad_norm": 2.28125, "learning_rate": 9.907156946875142e-05, "loss": 1.8346, "step": 1415 }, { "epoch": 0.06189081690633332, "grad_norm": 3.09375, "learning_rate": 9.907025150177768e-05, "loss": 2.1143, "step": 1416 }, { "epoch": 0.061934525110363216, "grad_norm": 3.015625, "learning_rate": 9.906893260877686e-05, "loss": 2.0177, "step": 1417 }, { "epoch": 0.06197823331439311, "grad_norm": 3.140625, "learning_rate": 9.906761278977387e-05, "loss": 3.0162, "step": 1418 }, { "epoch": 0.06202194151842301, "grad_norm": 3.046875, "learning_rate": 9.906629204479362e-05, "loss": 2.8375, "step": 1419 }, { "epoch": 0.062065649722452904, "grad_norm": 4.1875, "learning_rate": 9.906497037386102e-05, "loss": 1.6367, "step": 1420 }, { "epoch": 0.062109357926482804, "grad_norm": 3.421875, "learning_rate": 9.906364777700104e-05, "loss": 2.1294, "step": 1421 }, { "epoch": 0.0621530661305127, "grad_norm": 4.0625, "learning_rate": 9.906232425423858e-05, "loss": 2.181, "step": 1422 }, { "epoch": 0.06219677433454259, "grad_norm": 2.78125, "learning_rate": 9.906099980559868e-05, "loss": 1.8974, "step": 1423 }, { "epoch": 0.06224048253857249, "grad_norm": 3.828125, "learning_rate": 9.90596744311063e-05, "loss": 2.1287, "step": 1424 }, { "epoch": 0.062284190742602386, "grad_norm": 2.421875, "learning_rate": 9.905834813078646e-05, "loss": 2.0806, "step": 1425 }, { "epoch": 0.062327898946632286, "grad_norm": 3.0625, "learning_rate": 9.905702090466419e-05, "loss": 2.4359, "step": 1426 }, { "epoch": 0.06237160715066218, "grad_norm": 3.5, "learning_rate": 9.905569275276454e-05, "loss": 2.2533, "step": 1427 }, { "epoch": 0.06241531535469207, "grad_norm": 3.390625, "learning_rate": 9.905436367511256e-05, "loss": 1.8447, "step": 1428 }, { "epoch": 0.062459023558721974, "grad_norm": 2.671875, "learning_rate": 9.905303367173336e-05, "loss": 2.7585, "step": 1429 }, { "epoch": 0.06250273176275187, "grad_norm": 2.671875, "learning_rate": 9.9051702742652e-05, "loss": 1.7898, "step": 1430 }, { "epoch": 0.06254643996678176, "grad_norm": 3.59375, "learning_rate": 9.905037088789363e-05, "loss": 2.5768, "step": 1431 }, { "epoch": 0.06259014817081165, "grad_norm": 3.34375, "learning_rate": 9.904903810748339e-05, "loss": 2.9688, "step": 1432 }, { "epoch": 0.06263385637484156, "grad_norm": 2.53125, "learning_rate": 9.904770440144638e-05, "loss": 1.9799, "step": 1433 }, { "epoch": 0.06267756457887146, "grad_norm": 3.125, "learning_rate": 9.904636976980782e-05, "loss": 2.2924, "step": 1434 }, { "epoch": 0.06272127278290135, "grad_norm": 2.71875, "learning_rate": 9.904503421259288e-05, "loss": 2.0099, "step": 1435 }, { "epoch": 0.06276498098693124, "grad_norm": 2.671875, "learning_rate": 9.904369772982676e-05, "loss": 2.7724, "step": 1436 }, { "epoch": 0.06280868919096114, "grad_norm": 5.03125, "learning_rate": 9.904236032153469e-05, "loss": 2.749, "step": 1437 }, { "epoch": 0.06285239739499104, "grad_norm": 2.65625, "learning_rate": 9.904102198774188e-05, "loss": 1.9447, "step": 1438 }, { "epoch": 0.06289610559902094, "grad_norm": 3.8125, "learning_rate": 9.903968272847363e-05, "loss": 2.5455, "step": 1439 }, { "epoch": 0.06293981380305083, "grad_norm": 3.609375, "learning_rate": 9.90383425437552e-05, "loss": 2.74, "step": 1440 }, { "epoch": 0.06298352200708073, "grad_norm": 2.5, "learning_rate": 9.903700143361185e-05, "loss": 1.8687, "step": 1441 }, { "epoch": 0.06302723021111062, "grad_norm": 2.421875, "learning_rate": 9.903565939806893e-05, "loss": 1.8802, "step": 1442 }, { "epoch": 0.06307093841514053, "grad_norm": 2.5, "learning_rate": 9.903431643715175e-05, "loss": 1.7479, "step": 1443 }, { "epoch": 0.06311464661917042, "grad_norm": 2.734375, "learning_rate": 9.903297255088563e-05, "loss": 2.1058, "step": 1444 }, { "epoch": 0.06315835482320031, "grad_norm": 3.515625, "learning_rate": 9.903162773929599e-05, "loss": 2.3713, "step": 1445 }, { "epoch": 0.06320206302723021, "grad_norm": 3.015625, "learning_rate": 9.903028200240815e-05, "loss": 2.5098, "step": 1446 }, { "epoch": 0.0632457712312601, "grad_norm": 2.84375, "learning_rate": 9.902893534024753e-05, "loss": 2.1725, "step": 1447 }, { "epoch": 0.06328947943529001, "grad_norm": 2.828125, "learning_rate": 9.902758775283955e-05, "loss": 2.1446, "step": 1448 }, { "epoch": 0.0633331876393199, "grad_norm": 2.640625, "learning_rate": 9.902623924020962e-05, "loss": 1.8664, "step": 1449 }, { "epoch": 0.0633768958433498, "grad_norm": 2.890625, "learning_rate": 9.902488980238322e-05, "loss": 2.2319, "step": 1450 }, { "epoch": 0.06342060404737969, "grad_norm": 2.5, "learning_rate": 9.902353943938578e-05, "loss": 1.9501, "step": 1451 }, { "epoch": 0.0634643122514096, "grad_norm": 2.671875, "learning_rate": 9.90221881512428e-05, "loss": 1.8332, "step": 1452 }, { "epoch": 0.06350802045543949, "grad_norm": 3.078125, "learning_rate": 9.902083593797979e-05, "loss": 2.0205, "step": 1453 }, { "epoch": 0.06355172865946938, "grad_norm": 3.109375, "learning_rate": 9.901948279962226e-05, "loss": 2.2535, "step": 1454 }, { "epoch": 0.06359543686349928, "grad_norm": 3.625, "learning_rate": 9.901812873619574e-05, "loss": 2.2449, "step": 1455 }, { "epoch": 0.06363914506752917, "grad_norm": 4.03125, "learning_rate": 9.901677374772579e-05, "loss": 2.5408, "step": 1456 }, { "epoch": 0.06368285327155908, "grad_norm": 3.453125, "learning_rate": 9.901541783423798e-05, "loss": 2.0815, "step": 1457 }, { "epoch": 0.06372656147558897, "grad_norm": 2.53125, "learning_rate": 9.90140609957579e-05, "loss": 2.0225, "step": 1458 }, { "epoch": 0.06377026967961887, "grad_norm": 2.390625, "learning_rate": 9.901270323231115e-05, "loss": 1.7129, "step": 1459 }, { "epoch": 0.06381397788364876, "grad_norm": 3.734375, "learning_rate": 9.901134454392334e-05, "loss": 2.7482, "step": 1460 }, { "epoch": 0.06385768608767865, "grad_norm": 2.796875, "learning_rate": 9.900998493062015e-05, "loss": 2.3469, "step": 1461 }, { "epoch": 0.06390139429170856, "grad_norm": 2.96875, "learning_rate": 9.900862439242719e-05, "loss": 1.9815, "step": 1462 }, { "epoch": 0.06394510249573845, "grad_norm": 3.03125, "learning_rate": 9.900726292937018e-05, "loss": 2.0003, "step": 1463 }, { "epoch": 0.06398881069976835, "grad_norm": 3.21875, "learning_rate": 9.900590054147478e-05, "loss": 2.2793, "step": 1464 }, { "epoch": 0.06403251890379824, "grad_norm": 2.734375, "learning_rate": 9.900453722876672e-05, "loss": 1.8329, "step": 1465 }, { "epoch": 0.06407622710782813, "grad_norm": 2.75, "learning_rate": 9.900317299127171e-05, "loss": 1.8097, "step": 1466 }, { "epoch": 0.06411993531185804, "grad_norm": 3.5, "learning_rate": 9.900180782901551e-05, "loss": 2.5161, "step": 1467 }, { "epoch": 0.06416364351588794, "grad_norm": 2.8125, "learning_rate": 9.900044174202388e-05, "loss": 1.8867, "step": 1468 }, { "epoch": 0.06420735171991783, "grad_norm": 3.328125, "learning_rate": 9.899907473032259e-05, "loss": 2.3157, "step": 1469 }, { "epoch": 0.06425105992394772, "grad_norm": 2.28125, "learning_rate": 9.899770679393747e-05, "loss": 1.8642, "step": 1470 }, { "epoch": 0.06429476812797762, "grad_norm": 2.46875, "learning_rate": 9.899633793289427e-05, "loss": 1.941, "step": 1471 }, { "epoch": 0.06433847633200752, "grad_norm": 4.28125, "learning_rate": 9.89949681472189e-05, "loss": 2.7477, "step": 1472 }, { "epoch": 0.06438218453603742, "grad_norm": 2.796875, "learning_rate": 9.899359743693714e-05, "loss": 2.2268, "step": 1473 }, { "epoch": 0.06442589274006731, "grad_norm": 2.921875, "learning_rate": 9.899222580207492e-05, "loss": 2.1666, "step": 1474 }, { "epoch": 0.0644696009440972, "grad_norm": 3.4375, "learning_rate": 9.899085324265807e-05, "loss": 2.0582, "step": 1475 }, { "epoch": 0.0645133091481271, "grad_norm": 2.515625, "learning_rate": 9.898947975871253e-05, "loss": 1.966, "step": 1476 }, { "epoch": 0.064557017352157, "grad_norm": 3.015625, "learning_rate": 9.89881053502642e-05, "loss": 2.102, "step": 1477 }, { "epoch": 0.0646007255561869, "grad_norm": 2.484375, "learning_rate": 9.898673001733902e-05, "loss": 2.1326, "step": 1478 }, { "epoch": 0.0646444337602168, "grad_norm": 3.203125, "learning_rate": 9.898535375996296e-05, "loss": 1.7869, "step": 1479 }, { "epoch": 0.06468814196424669, "grad_norm": 2.578125, "learning_rate": 9.898397657816198e-05, "loss": 2.1237, "step": 1480 }, { "epoch": 0.06473185016827658, "grad_norm": 2.953125, "learning_rate": 9.898259847196205e-05, "loss": 2.1493, "step": 1481 }, { "epoch": 0.06477555837230649, "grad_norm": 3.65625, "learning_rate": 9.89812194413892e-05, "loss": 2.0955, "step": 1482 }, { "epoch": 0.06481926657633638, "grad_norm": 2.875, "learning_rate": 9.897983948646948e-05, "loss": 1.7992, "step": 1483 }, { "epoch": 0.06486297478036628, "grad_norm": 2.71875, "learning_rate": 9.897845860722888e-05, "loss": 2.1167, "step": 1484 }, { "epoch": 0.06490668298439617, "grad_norm": 3.03125, "learning_rate": 9.897707680369348e-05, "loss": 2.0633, "step": 1485 }, { "epoch": 0.06495039118842606, "grad_norm": 5.28125, "learning_rate": 9.897569407588935e-05, "loss": 3.077, "step": 1486 }, { "epoch": 0.06499409939245597, "grad_norm": 3.25, "learning_rate": 9.897431042384261e-05, "loss": 2.4853, "step": 1487 }, { "epoch": 0.06503780759648586, "grad_norm": 3.03125, "learning_rate": 9.897292584757934e-05, "loss": 2.4062, "step": 1488 }, { "epoch": 0.06508151580051576, "grad_norm": 2.484375, "learning_rate": 9.897154034712568e-05, "loss": 1.9607, "step": 1489 }, { "epoch": 0.06512522400454565, "grad_norm": 2.984375, "learning_rate": 9.897015392250779e-05, "loss": 2.6984, "step": 1490 }, { "epoch": 0.06516893220857554, "grad_norm": 3.0, "learning_rate": 9.896876657375183e-05, "loss": 2.8004, "step": 1491 }, { "epoch": 0.06521264041260545, "grad_norm": 3.75, "learning_rate": 9.896737830088396e-05, "loss": 1.97, "step": 1492 }, { "epoch": 0.06525634861663535, "grad_norm": 4.625, "learning_rate": 9.89659891039304e-05, "loss": 2.434, "step": 1493 }, { "epoch": 0.06530005682066524, "grad_norm": 2.421875, "learning_rate": 9.896459898291734e-05, "loss": 1.8298, "step": 1494 }, { "epoch": 0.06534376502469513, "grad_norm": 2.671875, "learning_rate": 9.896320793787106e-05, "loss": 2.0294, "step": 1495 }, { "epoch": 0.06538747322872503, "grad_norm": 4.875, "learning_rate": 9.896181596881777e-05, "loss": 1.8582, "step": 1496 }, { "epoch": 0.06543118143275493, "grad_norm": 3.0, "learning_rate": 9.896042307578376e-05, "loss": 2.4271, "step": 1497 }, { "epoch": 0.06547488963678483, "grad_norm": 3.109375, "learning_rate": 9.89590292587953e-05, "loss": 2.879, "step": 1498 }, { "epoch": 0.06551859784081472, "grad_norm": 2.875, "learning_rate": 9.895763451787869e-05, "loss": 2.1706, "step": 1499 }, { "epoch": 0.06556230604484461, "grad_norm": 2.90625, "learning_rate": 9.895623885306029e-05, "loss": 1.8151, "step": 1500 }, { "epoch": 0.06560601424887451, "grad_norm": 3.203125, "learning_rate": 9.89548422643664e-05, "loss": 2.4741, "step": 1501 }, { "epoch": 0.06564972245290442, "grad_norm": 2.671875, "learning_rate": 9.895344475182338e-05, "loss": 1.8052, "step": 1502 }, { "epoch": 0.06569343065693431, "grad_norm": 3.625, "learning_rate": 9.89520463154576e-05, "loss": 2.1158, "step": 1503 }, { "epoch": 0.0657371388609642, "grad_norm": 3.515625, "learning_rate": 9.895064695529548e-05, "loss": 1.9689, "step": 1504 }, { "epoch": 0.0657808470649941, "grad_norm": 4.0, "learning_rate": 9.89492466713634e-05, "loss": 2.3131, "step": 1505 }, { "epoch": 0.06582455526902399, "grad_norm": 2.984375, "learning_rate": 9.894784546368779e-05, "loss": 2.4752, "step": 1506 }, { "epoch": 0.0658682634730539, "grad_norm": 3.125, "learning_rate": 9.894644333229511e-05, "loss": 2.025, "step": 1507 }, { "epoch": 0.06591197167708379, "grad_norm": 2.65625, "learning_rate": 9.894504027721179e-05, "loss": 2.1173, "step": 1508 }, { "epoch": 0.06595567988111369, "grad_norm": 2.984375, "learning_rate": 9.894363629846432e-05, "loss": 2.4434, "step": 1509 }, { "epoch": 0.06599938808514358, "grad_norm": 2.640625, "learning_rate": 9.894223139607921e-05, "loss": 1.9026, "step": 1510 }, { "epoch": 0.06604309628917347, "grad_norm": 2.953125, "learning_rate": 9.894082557008296e-05, "loss": 1.9825, "step": 1511 }, { "epoch": 0.06608680449320338, "grad_norm": 2.890625, "learning_rate": 9.893941882050209e-05, "loss": 2.8117, "step": 1512 }, { "epoch": 0.06613051269723327, "grad_norm": 2.90625, "learning_rate": 9.893801114736318e-05, "loss": 2.2047, "step": 1513 }, { "epoch": 0.06617422090126317, "grad_norm": 5.40625, "learning_rate": 9.893660255069275e-05, "loss": 2.2197, "step": 1514 }, { "epoch": 0.06621792910529306, "grad_norm": 3.1875, "learning_rate": 9.893519303051742e-05, "loss": 2.2841, "step": 1515 }, { "epoch": 0.06626163730932295, "grad_norm": 6.5625, "learning_rate": 9.893378258686377e-05, "loss": 2.3869, "step": 1516 }, { "epoch": 0.06630534551335286, "grad_norm": 2.859375, "learning_rate": 9.893237121975843e-05, "loss": 2.418, "step": 1517 }, { "epoch": 0.06634905371738276, "grad_norm": 3.21875, "learning_rate": 9.893095892922803e-05, "loss": 2.641, "step": 1518 }, { "epoch": 0.06639276192141265, "grad_norm": 7.0, "learning_rate": 9.89295457152992e-05, "loss": 2.23, "step": 1519 }, { "epoch": 0.06643647012544254, "grad_norm": 2.609375, "learning_rate": 9.892813157799864e-05, "loss": 1.9, "step": 1520 }, { "epoch": 0.06648017832947244, "grad_norm": 3.71875, "learning_rate": 9.892671651735304e-05, "loss": 2.3359, "step": 1521 }, { "epoch": 0.06652388653350234, "grad_norm": 3.09375, "learning_rate": 9.892530053338909e-05, "loss": 2.4135, "step": 1522 }, { "epoch": 0.06656759473753224, "grad_norm": 2.921875, "learning_rate": 9.89238836261335e-05, "loss": 2.7742, "step": 1523 }, { "epoch": 0.06661130294156213, "grad_norm": 2.53125, "learning_rate": 9.892246579561302e-05, "loss": 1.8957, "step": 1524 }, { "epoch": 0.06665501114559202, "grad_norm": 2.859375, "learning_rate": 9.89210470418544e-05, "loss": 2.5296, "step": 1525 }, { "epoch": 0.06669871934962192, "grad_norm": 2.6875, "learning_rate": 9.891962736488443e-05, "loss": 2.2513, "step": 1526 }, { "epoch": 0.06674242755365183, "grad_norm": 2.734375, "learning_rate": 9.89182067647299e-05, "loss": 2.5814, "step": 1527 }, { "epoch": 0.06678613575768172, "grad_norm": 2.234375, "learning_rate": 9.891678524141758e-05, "loss": 1.811, "step": 1528 }, { "epoch": 0.06682984396171161, "grad_norm": 2.921875, "learning_rate": 9.891536279497436e-05, "loss": 2.0597, "step": 1529 }, { "epoch": 0.0668735521657415, "grad_norm": 3.765625, "learning_rate": 9.891393942542704e-05, "loss": 2.3774, "step": 1530 }, { "epoch": 0.0669172603697714, "grad_norm": 3.671875, "learning_rate": 9.891251513280248e-05, "loss": 2.5181, "step": 1531 }, { "epoch": 0.06696096857380131, "grad_norm": 2.375, "learning_rate": 9.891108991712759e-05, "loss": 2.0023, "step": 1532 }, { "epoch": 0.0670046767778312, "grad_norm": 2.65625, "learning_rate": 9.890966377842925e-05, "loss": 1.9709, "step": 1533 }, { "epoch": 0.0670483849818611, "grad_norm": 2.53125, "learning_rate": 9.890823671673436e-05, "loss": 2.2626, "step": 1534 }, { "epoch": 0.06709209318589099, "grad_norm": 2.453125, "learning_rate": 9.890680873206986e-05, "loss": 2.0042, "step": 1535 }, { "epoch": 0.06713580138992088, "grad_norm": 2.65625, "learning_rate": 9.89053798244627e-05, "loss": 2.0635, "step": 1536 }, { "epoch": 0.06717950959395079, "grad_norm": 4.0625, "learning_rate": 9.890394999393984e-05, "loss": 2.7742, "step": 1537 }, { "epoch": 0.06722321779798068, "grad_norm": 2.671875, "learning_rate": 9.890251924052827e-05, "loss": 2.0707, "step": 1538 }, { "epoch": 0.06726692600201058, "grad_norm": 2.796875, "learning_rate": 9.8901087564255e-05, "loss": 2.774, "step": 1539 }, { "epoch": 0.06731063420604047, "grad_norm": 3.546875, "learning_rate": 9.889965496514702e-05, "loss": 3.196, "step": 1540 }, { "epoch": 0.06735434241007036, "grad_norm": 3.0625, "learning_rate": 9.889822144323137e-05, "loss": 2.3068, "step": 1541 }, { "epoch": 0.06739805061410027, "grad_norm": 2.65625, "learning_rate": 9.889678699853514e-05, "loss": 2.4428, "step": 1542 }, { "epoch": 0.06744175881813017, "grad_norm": 2.5, "learning_rate": 9.889535163108537e-05, "loss": 2.0466, "step": 1543 }, { "epoch": 0.06748546702216006, "grad_norm": 2.359375, "learning_rate": 9.889391534090912e-05, "loss": 1.907, "step": 1544 }, { "epoch": 0.06752917522618995, "grad_norm": 12.0625, "learning_rate": 9.889247812803356e-05, "loss": 6.6027, "step": 1545 }, { "epoch": 0.06757288343021985, "grad_norm": 3.125, "learning_rate": 9.889103999248576e-05, "loss": 2.2651, "step": 1546 }, { "epoch": 0.06761659163424975, "grad_norm": 2.828125, "learning_rate": 9.88896009342929e-05, "loss": 1.9441, "step": 1547 }, { "epoch": 0.06766029983827965, "grad_norm": 3.09375, "learning_rate": 9.888816095348209e-05, "loss": 2.6584, "step": 1548 }, { "epoch": 0.06770400804230954, "grad_norm": 3.015625, "learning_rate": 9.888672005008054e-05, "loss": 2.5327, "step": 1549 }, { "epoch": 0.06774771624633943, "grad_norm": 2.703125, "learning_rate": 9.888527822411543e-05, "loss": 2.1271, "step": 1550 }, { "epoch": 0.06779142445036933, "grad_norm": 2.953125, "learning_rate": 9.888383547561398e-05, "loss": 2.3199, "step": 1551 }, { "epoch": 0.06783513265439924, "grad_norm": 4.5625, "learning_rate": 9.888239180460339e-05, "loss": 2.153, "step": 1552 }, { "epoch": 0.06787884085842913, "grad_norm": 2.625, "learning_rate": 9.888094721111093e-05, "loss": 2.5418, "step": 1553 }, { "epoch": 0.06792254906245902, "grad_norm": 3.296875, "learning_rate": 9.887950169516386e-05, "loss": 2.1519, "step": 1554 }, { "epoch": 0.06796625726648892, "grad_norm": 3.25, "learning_rate": 9.887805525678943e-05, "loss": 2.2746, "step": 1555 }, { "epoch": 0.06800996547051881, "grad_norm": 2.53125, "learning_rate": 9.887660789601499e-05, "loss": 1.8309, "step": 1556 }, { "epoch": 0.06805367367454872, "grad_norm": 2.84375, "learning_rate": 9.88751596128678e-05, "loss": 2.4382, "step": 1557 }, { "epoch": 0.06809738187857861, "grad_norm": 3.0625, "learning_rate": 9.887371040737523e-05, "loss": 2.1003, "step": 1558 }, { "epoch": 0.0681410900826085, "grad_norm": 3.140625, "learning_rate": 9.88722602795646e-05, "loss": 2.383, "step": 1559 }, { "epoch": 0.0681847982866384, "grad_norm": 2.78125, "learning_rate": 9.887080922946329e-05, "loss": 2.1193, "step": 1560 }, { "epoch": 0.06822850649066829, "grad_norm": 3.171875, "learning_rate": 9.886935725709868e-05, "loss": 2.1573, "step": 1561 }, { "epoch": 0.0682722146946982, "grad_norm": 2.921875, "learning_rate": 9.886790436249818e-05, "loss": 2.4051, "step": 1562 }, { "epoch": 0.0683159228987281, "grad_norm": 2.6875, "learning_rate": 9.886645054568919e-05, "loss": 1.9849, "step": 1563 }, { "epoch": 0.06835963110275799, "grad_norm": 3.0625, "learning_rate": 9.886499580669917e-05, "loss": 1.915, "step": 1564 }, { "epoch": 0.06840333930678788, "grad_norm": 2.75, "learning_rate": 9.886354014555554e-05, "loss": 2.0232, "step": 1565 }, { "epoch": 0.06844704751081777, "grad_norm": 2.4375, "learning_rate": 9.886208356228581e-05, "loss": 2.0822, "step": 1566 }, { "epoch": 0.06849075571484768, "grad_norm": 2.75, "learning_rate": 9.886062605691743e-05, "loss": 1.6554, "step": 1567 }, { "epoch": 0.06853446391887758, "grad_norm": 2.734375, "learning_rate": 9.885916762947795e-05, "loss": 2.097, "step": 1568 }, { "epoch": 0.06857817212290747, "grad_norm": 2.78125, "learning_rate": 9.885770827999484e-05, "loss": 1.9294, "step": 1569 }, { "epoch": 0.06862188032693736, "grad_norm": 3.21875, "learning_rate": 9.885624800849567e-05, "loss": 2.3477, "step": 1570 }, { "epoch": 0.06866558853096726, "grad_norm": 2.46875, "learning_rate": 9.8854786815008e-05, "loss": 2.2106, "step": 1571 }, { "epoch": 0.06870929673499716, "grad_norm": 2.65625, "learning_rate": 9.88533246995594e-05, "loss": 1.9569, "step": 1572 }, { "epoch": 0.06875300493902706, "grad_norm": 2.671875, "learning_rate": 9.885186166217746e-05, "loss": 1.7803, "step": 1573 }, { "epoch": 0.06879671314305695, "grad_norm": 2.75, "learning_rate": 9.885039770288979e-05, "loss": 2.2029, "step": 1574 }, { "epoch": 0.06884042134708684, "grad_norm": 2.71875, "learning_rate": 9.884893282172401e-05, "loss": 2.0134, "step": 1575 }, { "epoch": 0.06888412955111674, "grad_norm": 2.765625, "learning_rate": 9.884746701870777e-05, "loss": 2.7702, "step": 1576 }, { "epoch": 0.06892783775514665, "grad_norm": 2.546875, "learning_rate": 9.884600029386875e-05, "loss": 2.3747, "step": 1577 }, { "epoch": 0.06897154595917654, "grad_norm": 2.78125, "learning_rate": 9.884453264723459e-05, "loss": 2.3949, "step": 1578 }, { "epoch": 0.06901525416320643, "grad_norm": 5.78125, "learning_rate": 9.884306407883301e-05, "loss": 2.6275, "step": 1579 }, { "epoch": 0.06905896236723633, "grad_norm": 2.71875, "learning_rate": 9.884159458869173e-05, "loss": 2.6411, "step": 1580 }, { "epoch": 0.06910267057126622, "grad_norm": 3.859375, "learning_rate": 9.884012417683849e-05, "loss": 2.56, "step": 1581 }, { "epoch": 0.06914637877529613, "grad_norm": 2.65625, "learning_rate": 9.8838652843301e-05, "loss": 1.8407, "step": 1582 }, { "epoch": 0.06919008697932602, "grad_norm": 2.84375, "learning_rate": 9.883718058810707e-05, "loss": 2.2617, "step": 1583 }, { "epoch": 0.06923379518335591, "grad_norm": 2.828125, "learning_rate": 9.883570741128446e-05, "loss": 2.1872, "step": 1584 }, { "epoch": 0.06927750338738581, "grad_norm": 3.21875, "learning_rate": 9.883423331286096e-05, "loss": 3.165, "step": 1585 }, { "epoch": 0.06932121159141572, "grad_norm": 2.265625, "learning_rate": 9.88327582928644e-05, "loss": 2.0316, "step": 1586 }, { "epoch": 0.06936491979544561, "grad_norm": 4.0625, "learning_rate": 9.883128235132264e-05, "loss": 2.3265, "step": 1587 }, { "epoch": 0.0694086279994755, "grad_norm": 2.75, "learning_rate": 9.882980548826349e-05, "loss": 2.2151, "step": 1588 }, { "epoch": 0.0694523362035054, "grad_norm": 3.65625, "learning_rate": 9.882832770371487e-05, "loss": 2.3292, "step": 1589 }, { "epoch": 0.06949604440753529, "grad_norm": 2.78125, "learning_rate": 9.88268489977046e-05, "loss": 2.4083, "step": 1590 }, { "epoch": 0.0695397526115652, "grad_norm": 4.90625, "learning_rate": 9.882536937026066e-05, "loss": 2.4895, "step": 1591 }, { "epoch": 0.06958346081559509, "grad_norm": 4.53125, "learning_rate": 9.882388882141092e-05, "loss": 2.478, "step": 1592 }, { "epoch": 0.06962716901962498, "grad_norm": 5.28125, "learning_rate": 9.882240735118334e-05, "loss": 1.8026, "step": 1593 }, { "epoch": 0.06967087722365488, "grad_norm": 2.375, "learning_rate": 9.882092495960589e-05, "loss": 1.8172, "step": 1594 }, { "epoch": 0.06971458542768477, "grad_norm": 3.90625, "learning_rate": 9.881944164670651e-05, "loss": 2.2879, "step": 1595 }, { "epoch": 0.06975829363171468, "grad_norm": 2.828125, "learning_rate": 9.881795741251323e-05, "loss": 2.2479, "step": 1596 }, { "epoch": 0.06980200183574457, "grad_norm": 2.796875, "learning_rate": 9.881647225705403e-05, "loss": 2.1813, "step": 1597 }, { "epoch": 0.06984571003977447, "grad_norm": 2.984375, "learning_rate": 9.881498618035695e-05, "loss": 2.2546, "step": 1598 }, { "epoch": 0.06988941824380436, "grad_norm": 3.59375, "learning_rate": 9.881349918245005e-05, "loss": 2.346, "step": 1599 }, { "epoch": 0.06993312644783425, "grad_norm": 2.984375, "learning_rate": 9.881201126336135e-05, "loss": 2.2346, "step": 1600 }, { "epoch": 0.06997683465186416, "grad_norm": 2.375, "learning_rate": 9.881052242311896e-05, "loss": 1.7978, "step": 1601 }, { "epoch": 0.07002054285589406, "grad_norm": 2.8125, "learning_rate": 9.880903266175098e-05, "loss": 1.906, "step": 1602 }, { "epoch": 0.07006425105992395, "grad_norm": 2.96875, "learning_rate": 9.880754197928553e-05, "loss": 2.1055, "step": 1603 }, { "epoch": 0.07010795926395384, "grad_norm": 2.6875, "learning_rate": 9.88060503757507e-05, "loss": 2.0959, "step": 1604 }, { "epoch": 0.07015166746798374, "grad_norm": 2.65625, "learning_rate": 9.880455785117469e-05, "loss": 2.2334, "step": 1605 }, { "epoch": 0.07019537567201364, "grad_norm": 2.453125, "learning_rate": 9.880306440558562e-05, "loss": 2.0596, "step": 1606 }, { "epoch": 0.07023908387604354, "grad_norm": 2.453125, "learning_rate": 9.880157003901171e-05, "loss": 2.3092, "step": 1607 }, { "epoch": 0.07028279208007343, "grad_norm": 2.75, "learning_rate": 9.880007475148114e-05, "loss": 1.8754, "step": 1608 }, { "epoch": 0.07032650028410332, "grad_norm": 2.53125, "learning_rate": 9.879857854302214e-05, "loss": 1.8996, "step": 1609 }, { "epoch": 0.07037020848813322, "grad_norm": 3.609375, "learning_rate": 9.879708141366293e-05, "loss": 2.2173, "step": 1610 }, { "epoch": 0.07041391669216313, "grad_norm": 2.765625, "learning_rate": 9.879558336343177e-05, "loss": 2.2584, "step": 1611 }, { "epoch": 0.07045762489619302, "grad_norm": 2.59375, "learning_rate": 9.879408439235696e-05, "loss": 1.9219, "step": 1612 }, { "epoch": 0.07050133310022291, "grad_norm": 2.25, "learning_rate": 9.879258450046673e-05, "loss": 1.9287, "step": 1613 }, { "epoch": 0.0705450413042528, "grad_norm": 2.453125, "learning_rate": 9.879108368778943e-05, "loss": 2.1835, "step": 1614 }, { "epoch": 0.0705887495082827, "grad_norm": 3.703125, "learning_rate": 9.878958195435338e-05, "loss": 2.613, "step": 1615 }, { "epoch": 0.07063245771231261, "grad_norm": 2.921875, "learning_rate": 9.878807930018689e-05, "loss": 2.775, "step": 1616 }, { "epoch": 0.0706761659163425, "grad_norm": 2.3125, "learning_rate": 9.878657572531833e-05, "loss": 2.0104, "step": 1617 }, { "epoch": 0.0707198741203724, "grad_norm": 3.90625, "learning_rate": 9.878507122977609e-05, "loss": 2.1273, "step": 1618 }, { "epoch": 0.07076358232440229, "grad_norm": 3.109375, "learning_rate": 9.878356581358856e-05, "loss": 2.3979, "step": 1619 }, { "epoch": 0.07080729052843218, "grad_norm": 2.65625, "learning_rate": 9.878205947678414e-05, "loss": 2.65, "step": 1620 }, { "epoch": 0.07085099873246209, "grad_norm": 3.046875, "learning_rate": 9.878055221939127e-05, "loss": 2.9008, "step": 1621 }, { "epoch": 0.07089470693649198, "grad_norm": 2.28125, "learning_rate": 9.877904404143837e-05, "loss": 1.728, "step": 1622 }, { "epoch": 0.07093841514052188, "grad_norm": 2.34375, "learning_rate": 9.87775349429539e-05, "loss": 2.2248, "step": 1623 }, { "epoch": 0.07098212334455177, "grad_norm": 2.9375, "learning_rate": 9.877602492396636e-05, "loss": 2.0064, "step": 1624 }, { "epoch": 0.07102583154858166, "grad_norm": 3.25, "learning_rate": 9.877451398450426e-05, "loss": 2.1671, "step": 1625 }, { "epoch": 0.07106953975261157, "grad_norm": 2.515625, "learning_rate": 9.877300212459608e-05, "loss": 2.1163, "step": 1626 }, { "epoch": 0.07111324795664146, "grad_norm": 2.5625, "learning_rate": 9.877148934427037e-05, "loss": 2.1899, "step": 1627 }, { "epoch": 0.07115695616067136, "grad_norm": 4.96875, "learning_rate": 9.876997564355565e-05, "loss": 2.778, "step": 1628 }, { "epoch": 0.07120066436470125, "grad_norm": 3.015625, "learning_rate": 9.876846102248053e-05, "loss": 2.4448, "step": 1629 }, { "epoch": 0.07124437256873115, "grad_norm": 2.984375, "learning_rate": 9.876694548107357e-05, "loss": 2.8018, "step": 1630 }, { "epoch": 0.07128808077276105, "grad_norm": 3.234375, "learning_rate": 9.876542901936336e-05, "loss": 2.128, "step": 1631 }, { "epoch": 0.07133178897679095, "grad_norm": 2.6875, "learning_rate": 9.876391163737853e-05, "loss": 2.2933, "step": 1632 }, { "epoch": 0.07137549718082084, "grad_norm": 2.90625, "learning_rate": 9.876239333514772e-05, "loss": 1.9584, "step": 1633 }, { "epoch": 0.07141920538485073, "grad_norm": 3.75, "learning_rate": 9.876087411269959e-05, "loss": 2.2731, "step": 1634 }, { "epoch": 0.07146291358888063, "grad_norm": 3.46875, "learning_rate": 9.875935397006278e-05, "loss": 2.5422, "step": 1635 }, { "epoch": 0.07150662179291054, "grad_norm": 3.15625, "learning_rate": 9.875783290726601e-05, "loss": 2.1556, "step": 1636 }, { "epoch": 0.07155032999694043, "grad_norm": 3.0625, "learning_rate": 9.875631092433795e-05, "loss": 2.452, "step": 1637 }, { "epoch": 0.07159403820097032, "grad_norm": 2.34375, "learning_rate": 9.875478802130736e-05, "loss": 1.9157, "step": 1638 }, { "epoch": 0.07163774640500022, "grad_norm": 2.890625, "learning_rate": 9.875326419820296e-05, "loss": 2.4768, "step": 1639 }, { "epoch": 0.07168145460903011, "grad_norm": 3.5625, "learning_rate": 9.87517394550535e-05, "loss": 2.0934, "step": 1640 }, { "epoch": 0.07172516281306002, "grad_norm": 3.765625, "learning_rate": 9.875021379188776e-05, "loss": 2.2172, "step": 1641 }, { "epoch": 0.07176887101708991, "grad_norm": 2.390625, "learning_rate": 9.874868720873454e-05, "loss": 2.0909, "step": 1642 }, { "epoch": 0.0718125792211198, "grad_norm": 3.40625, "learning_rate": 9.874715970562262e-05, "loss": 2.1855, "step": 1643 }, { "epoch": 0.0718562874251497, "grad_norm": 2.59375, "learning_rate": 9.874563128258087e-05, "loss": 2.1557, "step": 1644 }, { "epoch": 0.07189999562917959, "grad_norm": 2.953125, "learning_rate": 9.874410193963813e-05, "loss": 2.1594, "step": 1645 }, { "epoch": 0.0719437038332095, "grad_norm": 3.09375, "learning_rate": 9.874257167682321e-05, "loss": 2.2683, "step": 1646 }, { "epoch": 0.07198741203723939, "grad_norm": 4.09375, "learning_rate": 9.874104049416502e-05, "loss": 2.4891, "step": 1647 }, { "epoch": 0.07203112024126929, "grad_norm": 3.0625, "learning_rate": 9.873950839169248e-05, "loss": 2.486, "step": 1648 }, { "epoch": 0.07207482844529918, "grad_norm": 2.53125, "learning_rate": 9.873797536943447e-05, "loss": 2.0965, "step": 1649 }, { "epoch": 0.07211853664932907, "grad_norm": 2.796875, "learning_rate": 9.873644142741992e-05, "loss": 2.0246, "step": 1650 }, { "epoch": 0.07216224485335898, "grad_norm": 3.953125, "learning_rate": 9.87349065656778e-05, "loss": 2.8423, "step": 1651 }, { "epoch": 0.07220595305738887, "grad_norm": 2.765625, "learning_rate": 9.873337078423706e-05, "loss": 2.1673, "step": 1652 }, { "epoch": 0.07224966126141877, "grad_norm": 2.890625, "learning_rate": 9.873183408312668e-05, "loss": 2.2967, "step": 1653 }, { "epoch": 0.07229336946544866, "grad_norm": 2.546875, "learning_rate": 9.873029646237567e-05, "loss": 1.8027, "step": 1654 }, { "epoch": 0.07233707766947856, "grad_norm": 2.625, "learning_rate": 9.872875792201304e-05, "loss": 2.0913, "step": 1655 }, { "epoch": 0.07238078587350846, "grad_norm": 3.234375, "learning_rate": 9.872721846206783e-05, "loss": 2.0012, "step": 1656 }, { "epoch": 0.07242449407753836, "grad_norm": 2.953125, "learning_rate": 9.872567808256909e-05, "loss": 2.3273, "step": 1657 }, { "epoch": 0.07246820228156825, "grad_norm": 3.046875, "learning_rate": 9.872413678354589e-05, "loss": 2.4328, "step": 1658 }, { "epoch": 0.07251191048559814, "grad_norm": 4.6875, "learning_rate": 9.87225945650273e-05, "loss": 2.1735, "step": 1659 }, { "epoch": 0.07255561868962804, "grad_norm": 2.421875, "learning_rate": 9.872105142704244e-05, "loss": 2.0066, "step": 1660 }, { "epoch": 0.07259932689365795, "grad_norm": 2.71875, "learning_rate": 9.871950736962044e-05, "loss": 2.249, "step": 1661 }, { "epoch": 0.07264303509768784, "grad_norm": 2.71875, "learning_rate": 9.871796239279043e-05, "loss": 2.347, "step": 1662 }, { "epoch": 0.07268674330171773, "grad_norm": 4.375, "learning_rate": 9.871641649658155e-05, "loss": 2.0257, "step": 1663 }, { "epoch": 0.07273045150574763, "grad_norm": 2.859375, "learning_rate": 9.871486968102299e-05, "loss": 2.1046, "step": 1664 }, { "epoch": 0.07277415970977752, "grad_norm": 3.640625, "learning_rate": 9.871332194614395e-05, "loss": 2.882, "step": 1665 }, { "epoch": 0.07281786791380743, "grad_norm": 3.03125, "learning_rate": 9.871177329197362e-05, "loss": 1.7936, "step": 1666 }, { "epoch": 0.07286157611783732, "grad_norm": 2.484375, "learning_rate": 9.871022371854123e-05, "loss": 1.7715, "step": 1667 }, { "epoch": 0.07290528432186721, "grad_norm": 3.546875, "learning_rate": 9.870867322587602e-05, "loss": 2.7403, "step": 1668 }, { "epoch": 0.07294899252589711, "grad_norm": 3.0, "learning_rate": 9.870712181400726e-05, "loss": 2.8774, "step": 1669 }, { "epoch": 0.072992700729927, "grad_norm": 3.375, "learning_rate": 9.870556948296423e-05, "loss": 2.4485, "step": 1670 }, { "epoch": 0.07303640893395691, "grad_norm": 3.046875, "learning_rate": 9.87040162327762e-05, "loss": 1.856, "step": 1671 }, { "epoch": 0.0730801171379868, "grad_norm": 3.0, "learning_rate": 9.870246206347252e-05, "loss": 2.0979, "step": 1672 }, { "epoch": 0.0731238253420167, "grad_norm": 2.703125, "learning_rate": 9.870090697508248e-05, "loss": 1.9364, "step": 1673 }, { "epoch": 0.07316753354604659, "grad_norm": 4.96875, "learning_rate": 9.869935096763543e-05, "loss": 2.922, "step": 1674 }, { "epoch": 0.07321124175007648, "grad_norm": 2.625, "learning_rate": 9.869779404116078e-05, "loss": 2.2812, "step": 1675 }, { "epoch": 0.07325494995410639, "grad_norm": 3.0, "learning_rate": 9.869623619568786e-05, "loss": 2.1977, "step": 1676 }, { "epoch": 0.07329865815813628, "grad_norm": 3.03125, "learning_rate": 9.86946774312461e-05, "loss": 1.9256, "step": 1677 }, { "epoch": 0.07334236636216618, "grad_norm": 3.578125, "learning_rate": 9.86931177478649e-05, "loss": 2.031, "step": 1678 }, { "epoch": 0.07338607456619607, "grad_norm": 2.703125, "learning_rate": 9.86915571455737e-05, "loss": 2.2388, "step": 1679 }, { "epoch": 0.07342978277022597, "grad_norm": 2.828125, "learning_rate": 9.868999562440194e-05, "loss": 2.7069, "step": 1680 }, { "epoch": 0.07347349097425587, "grad_norm": 2.875, "learning_rate": 9.86884331843791e-05, "loss": 2.6101, "step": 1681 }, { "epoch": 0.07351719917828577, "grad_norm": 3.34375, "learning_rate": 9.868686982553468e-05, "loss": 2.801, "step": 1682 }, { "epoch": 0.07356090738231566, "grad_norm": 2.859375, "learning_rate": 9.868530554789815e-05, "loss": 2.0419, "step": 1683 }, { "epoch": 0.07360461558634555, "grad_norm": 2.453125, "learning_rate": 9.868374035149905e-05, "loss": 2.0969, "step": 1684 }, { "epoch": 0.07364832379037545, "grad_norm": 2.5625, "learning_rate": 9.868217423636693e-05, "loss": 2.2815, "step": 1685 }, { "epoch": 0.07369203199440535, "grad_norm": 3.671875, "learning_rate": 9.86806072025313e-05, "loss": 2.4339, "step": 1686 }, { "epoch": 0.07373574019843525, "grad_norm": 11.3125, "learning_rate": 9.867903925002178e-05, "loss": 2.8798, "step": 1687 }, { "epoch": 0.07377944840246514, "grad_norm": 2.828125, "learning_rate": 9.867747037886793e-05, "loss": 2.0418, "step": 1688 }, { "epoch": 0.07382315660649504, "grad_norm": 2.359375, "learning_rate": 9.867590058909936e-05, "loss": 2.1559, "step": 1689 }, { "epoch": 0.07386686481052493, "grad_norm": 3.53125, "learning_rate": 9.867432988074572e-05, "loss": 2.198, "step": 1690 }, { "epoch": 0.07391057301455484, "grad_norm": 2.46875, "learning_rate": 9.867275825383664e-05, "loss": 1.8589, "step": 1691 }, { "epoch": 0.07395428121858473, "grad_norm": 2.875, "learning_rate": 9.867118570840175e-05, "loss": 2.4594, "step": 1692 }, { "epoch": 0.07399798942261462, "grad_norm": 2.625, "learning_rate": 9.866961224447075e-05, "loss": 2.5562, "step": 1693 }, { "epoch": 0.07404169762664452, "grad_norm": 2.96875, "learning_rate": 9.866803786207335e-05, "loss": 2.3999, "step": 1694 }, { "epoch": 0.07408540583067441, "grad_norm": 2.9375, "learning_rate": 9.866646256123922e-05, "loss": 1.985, "step": 1695 }, { "epoch": 0.07412911403470432, "grad_norm": 3.03125, "learning_rate": 9.866488634199813e-05, "loss": 2.6449, "step": 1696 }, { "epoch": 0.07417282223873421, "grad_norm": 2.75, "learning_rate": 9.866330920437979e-05, "loss": 2.1322, "step": 1697 }, { "epoch": 0.0742165304427641, "grad_norm": 2.25, "learning_rate": 9.8661731148414e-05, "loss": 1.5896, "step": 1698 }, { "epoch": 0.074260238646794, "grad_norm": 2.59375, "learning_rate": 9.86601521741305e-05, "loss": 2.0734, "step": 1699 }, { "epoch": 0.0743039468508239, "grad_norm": 2.859375, "learning_rate": 9.865857228155911e-05, "loss": 2.4698, "step": 1700 }, { "epoch": 0.0743476550548538, "grad_norm": 2.796875, "learning_rate": 9.865699147072964e-05, "loss": 2.0935, "step": 1701 }, { "epoch": 0.0743913632588837, "grad_norm": 3.0, "learning_rate": 9.865540974167193e-05, "loss": 2.4411, "step": 1702 }, { "epoch": 0.07443507146291359, "grad_norm": 2.875, "learning_rate": 9.865382709441584e-05, "loss": 2.3764, "step": 1703 }, { "epoch": 0.07447877966694348, "grad_norm": 2.75, "learning_rate": 9.865224352899119e-05, "loss": 2.1394, "step": 1704 }, { "epoch": 0.07452248787097338, "grad_norm": 3.125, "learning_rate": 9.865065904542792e-05, "loss": 2.3577, "step": 1705 }, { "epoch": 0.07456619607500328, "grad_norm": 3.359375, "learning_rate": 9.864907364375589e-05, "loss": 2.5026, "step": 1706 }, { "epoch": 0.07460990427903318, "grad_norm": 3.234375, "learning_rate": 9.864748732400504e-05, "loss": 2.5778, "step": 1707 }, { "epoch": 0.07465361248306307, "grad_norm": 2.9375, "learning_rate": 9.86459000862053e-05, "loss": 2.1946, "step": 1708 }, { "epoch": 0.07469732068709296, "grad_norm": 3.390625, "learning_rate": 9.864431193038662e-05, "loss": 2.1463, "step": 1709 }, { "epoch": 0.07474102889112286, "grad_norm": 3.171875, "learning_rate": 9.864272285657898e-05, "loss": 2.2934, "step": 1710 }, { "epoch": 0.07478473709515276, "grad_norm": 2.6875, "learning_rate": 9.864113286481237e-05, "loss": 2.0675, "step": 1711 }, { "epoch": 0.07482844529918266, "grad_norm": 2.5, "learning_rate": 9.863954195511677e-05, "loss": 2.022, "step": 1712 }, { "epoch": 0.07487215350321255, "grad_norm": 2.953125, "learning_rate": 9.863795012752224e-05, "loss": 2.0623, "step": 1713 }, { "epoch": 0.07491586170724245, "grad_norm": 2.640625, "learning_rate": 9.863635738205881e-05, "loss": 2.5347, "step": 1714 }, { "epoch": 0.07495956991127234, "grad_norm": 6.125, "learning_rate": 9.863476371875651e-05, "loss": 2.5752, "step": 1715 }, { "epoch": 0.07500327811530225, "grad_norm": 3.765625, "learning_rate": 9.863316913764545e-05, "loss": 2.5674, "step": 1716 }, { "epoch": 0.07504698631933214, "grad_norm": 3.5, "learning_rate": 9.863157363875568e-05, "loss": 2.4708, "step": 1717 }, { "epoch": 0.07509069452336203, "grad_norm": 21.0, "learning_rate": 9.862997722211735e-05, "loss": 7.6248, "step": 1718 }, { "epoch": 0.07513440272739193, "grad_norm": 2.296875, "learning_rate": 9.862837988776059e-05, "loss": 1.906, "step": 1719 }, { "epoch": 0.07517811093142182, "grad_norm": 3.046875, "learning_rate": 9.86267816357155e-05, "loss": 2.6795, "step": 1720 }, { "epoch": 0.07522181913545173, "grad_norm": 2.671875, "learning_rate": 9.86251824660123e-05, "loss": 3.0829, "step": 1721 }, { "epoch": 0.07526552733948162, "grad_norm": 2.6875, "learning_rate": 9.86235823786811e-05, "loss": 2.1595, "step": 1722 }, { "epoch": 0.07530923554351152, "grad_norm": 3.171875, "learning_rate": 9.862198137375215e-05, "loss": 2.2193, "step": 1723 }, { "epoch": 0.07535294374754141, "grad_norm": 4.46875, "learning_rate": 9.862037945125564e-05, "loss": 2.0731, "step": 1724 }, { "epoch": 0.07539665195157132, "grad_norm": 2.875, "learning_rate": 9.86187766112218e-05, "loss": 1.9634, "step": 1725 }, { "epoch": 0.07544036015560121, "grad_norm": 2.96875, "learning_rate": 9.861717285368091e-05, "loss": 2.0719, "step": 1726 }, { "epoch": 0.0754840683596311, "grad_norm": 3.40625, "learning_rate": 9.861556817866318e-05, "loss": 2.5894, "step": 1727 }, { "epoch": 0.075527776563661, "grad_norm": 3.703125, "learning_rate": 9.861396258619894e-05, "loss": 2.2967, "step": 1728 }, { "epoch": 0.07557148476769089, "grad_norm": 2.65625, "learning_rate": 9.861235607631847e-05, "loss": 1.8993, "step": 1729 }, { "epoch": 0.0756151929717208, "grad_norm": 3.140625, "learning_rate": 9.861074864905207e-05, "loss": 1.9114, "step": 1730 }, { "epoch": 0.07565890117575069, "grad_norm": 3.203125, "learning_rate": 9.860914030443012e-05, "loss": 1.9079, "step": 1731 }, { "epoch": 0.07570260937978059, "grad_norm": 3.59375, "learning_rate": 9.860753104248292e-05, "loss": 2.5769, "step": 1732 }, { "epoch": 0.07574631758381048, "grad_norm": 2.890625, "learning_rate": 9.860592086324088e-05, "loss": 2.4568, "step": 1733 }, { "epoch": 0.07579002578784037, "grad_norm": 2.703125, "learning_rate": 9.860430976673436e-05, "loss": 2.1262, "step": 1734 }, { "epoch": 0.07583373399187028, "grad_norm": 2.625, "learning_rate": 9.86026977529938e-05, "loss": 1.8477, "step": 1735 }, { "epoch": 0.07587744219590017, "grad_norm": 2.3125, "learning_rate": 9.860108482204957e-05, "loss": 2.0501, "step": 1736 }, { "epoch": 0.07592115039993007, "grad_norm": 2.796875, "learning_rate": 9.859947097393215e-05, "loss": 2.7278, "step": 1737 }, { "epoch": 0.07596485860395996, "grad_norm": 2.453125, "learning_rate": 9.859785620867197e-05, "loss": 2.0078, "step": 1738 }, { "epoch": 0.07600856680798986, "grad_norm": 2.65625, "learning_rate": 9.859624052629951e-05, "loss": 2.1657, "step": 1739 }, { "epoch": 0.07605227501201976, "grad_norm": 2.40625, "learning_rate": 9.859462392684526e-05, "loss": 2.1615, "step": 1740 }, { "epoch": 0.07609598321604966, "grad_norm": 2.546875, "learning_rate": 9.859300641033974e-05, "loss": 2.0125, "step": 1741 }, { "epoch": 0.07613969142007955, "grad_norm": 2.78125, "learning_rate": 9.859138797681347e-05, "loss": 2.6558, "step": 1742 }, { "epoch": 0.07618339962410944, "grad_norm": 5.34375, "learning_rate": 9.858976862629698e-05, "loss": 2.9906, "step": 1743 }, { "epoch": 0.07622710782813934, "grad_norm": 2.390625, "learning_rate": 9.858814835882085e-05, "loss": 2.4754, "step": 1744 }, { "epoch": 0.07627081603216924, "grad_norm": 3.0, "learning_rate": 9.85865271744156e-05, "loss": 2.1856, "step": 1745 }, { "epoch": 0.07631452423619914, "grad_norm": 3.203125, "learning_rate": 9.85849050731119e-05, "loss": 2.4571, "step": 1746 }, { "epoch": 0.07635823244022903, "grad_norm": 2.578125, "learning_rate": 9.858328205494035e-05, "loss": 1.9271, "step": 1747 }, { "epoch": 0.07640194064425893, "grad_norm": 2.75, "learning_rate": 9.858165811993153e-05, "loss": 2.4178, "step": 1748 }, { "epoch": 0.07644564884828882, "grad_norm": 2.71875, "learning_rate": 9.858003326811611e-05, "loss": 2.3161, "step": 1749 }, { "epoch": 0.07648935705231873, "grad_norm": 5.15625, "learning_rate": 9.857840749952478e-05, "loss": 1.7724, "step": 1750 }, { "epoch": 0.07653306525634862, "grad_norm": 2.8125, "learning_rate": 9.857678081418818e-05, "loss": 2.2475, "step": 1751 }, { "epoch": 0.07657677346037851, "grad_norm": 2.953125, "learning_rate": 9.857515321213704e-05, "loss": 1.9093, "step": 1752 }, { "epoch": 0.07662048166440841, "grad_norm": 2.921875, "learning_rate": 9.857352469340204e-05, "loss": 2.1066, "step": 1753 }, { "epoch": 0.0766641898684383, "grad_norm": 2.671875, "learning_rate": 9.857189525801396e-05, "loss": 1.8703, "step": 1754 }, { "epoch": 0.07670789807246821, "grad_norm": 2.5625, "learning_rate": 9.857026490600349e-05, "loss": 2.1322, "step": 1755 }, { "epoch": 0.0767516062764981, "grad_norm": 3.21875, "learning_rate": 9.856863363740145e-05, "loss": 2.6255, "step": 1756 }, { "epoch": 0.076795314480528, "grad_norm": 2.484375, "learning_rate": 9.856700145223862e-05, "loss": 1.8698, "step": 1757 }, { "epoch": 0.07683902268455789, "grad_norm": 3.03125, "learning_rate": 9.856536835054577e-05, "loss": 3.1263, "step": 1758 }, { "epoch": 0.07688273088858778, "grad_norm": 3.859375, "learning_rate": 9.856373433235373e-05, "loss": 1.8888, "step": 1759 }, { "epoch": 0.07692643909261769, "grad_norm": 2.5, "learning_rate": 9.856209939769335e-05, "loss": 2.0877, "step": 1760 }, { "epoch": 0.07697014729664758, "grad_norm": 2.59375, "learning_rate": 9.856046354659547e-05, "loss": 2.0977, "step": 1761 }, { "epoch": 0.07701385550067748, "grad_norm": 2.625, "learning_rate": 9.855882677909099e-05, "loss": 2.3225, "step": 1762 }, { "epoch": 0.07705756370470737, "grad_norm": 2.375, "learning_rate": 9.855718909521075e-05, "loss": 1.7009, "step": 1763 }, { "epoch": 0.07710127190873726, "grad_norm": 2.71875, "learning_rate": 9.855555049498568e-05, "loss": 2.2034, "step": 1764 }, { "epoch": 0.07714498011276717, "grad_norm": 2.84375, "learning_rate": 9.85539109784467e-05, "loss": 1.5595, "step": 1765 }, { "epoch": 0.07718868831679707, "grad_norm": 3.15625, "learning_rate": 9.855227054562476e-05, "loss": 2.9596, "step": 1766 }, { "epoch": 0.07723239652082696, "grad_norm": 2.84375, "learning_rate": 9.855062919655083e-05, "loss": 1.9394, "step": 1767 }, { "epoch": 0.07727610472485685, "grad_norm": 2.765625, "learning_rate": 9.854898693125586e-05, "loss": 2.1817, "step": 1768 }, { "epoch": 0.07731981292888675, "grad_norm": 3.8125, "learning_rate": 9.854734374977081e-05, "loss": 2.0313, "step": 1769 }, { "epoch": 0.07736352113291665, "grad_norm": 2.390625, "learning_rate": 9.854569965212676e-05, "loss": 1.8892, "step": 1770 }, { "epoch": 0.07740722933694655, "grad_norm": 2.578125, "learning_rate": 9.854405463835468e-05, "loss": 2.2463, "step": 1771 }, { "epoch": 0.07745093754097644, "grad_norm": 2.765625, "learning_rate": 9.854240870848565e-05, "loss": 1.8787, "step": 1772 }, { "epoch": 0.07749464574500634, "grad_norm": 3.046875, "learning_rate": 9.854076186255072e-05, "loss": 2.0774, "step": 1773 }, { "epoch": 0.07753835394903623, "grad_norm": 2.421875, "learning_rate": 9.853911410058097e-05, "loss": 2.1188, "step": 1774 }, { "epoch": 0.07758206215306614, "grad_norm": 5.28125, "learning_rate": 9.853746542260749e-05, "loss": 2.0926, "step": 1775 }, { "epoch": 0.07762577035709603, "grad_norm": 3.390625, "learning_rate": 9.853581582866139e-05, "loss": 2.7162, "step": 1776 }, { "epoch": 0.07766947856112592, "grad_norm": 3.09375, "learning_rate": 9.85341653187738e-05, "loss": 2.7719, "step": 1777 }, { "epoch": 0.07771318676515582, "grad_norm": 2.828125, "learning_rate": 9.853251389297587e-05, "loss": 2.4834, "step": 1778 }, { "epoch": 0.07775689496918571, "grad_norm": 3.59375, "learning_rate": 9.853086155129878e-05, "loss": 2.0547, "step": 1779 }, { "epoch": 0.07780060317321562, "grad_norm": 2.5, "learning_rate": 9.852920829377369e-05, "loss": 2.4811, "step": 1780 }, { "epoch": 0.07784431137724551, "grad_norm": 3.25, "learning_rate": 9.85275541204318e-05, "loss": 2.6087, "step": 1781 }, { "epoch": 0.0778880195812754, "grad_norm": 3.078125, "learning_rate": 9.852589903130435e-05, "loss": 2.869, "step": 1782 }, { "epoch": 0.0779317277853053, "grad_norm": 2.4375, "learning_rate": 9.852424302642256e-05, "loss": 1.683, "step": 1783 }, { "epoch": 0.07797543598933519, "grad_norm": 2.75, "learning_rate": 9.852258610581768e-05, "loss": 2.5439, "step": 1784 }, { "epoch": 0.0780191441933651, "grad_norm": 3.265625, "learning_rate": 9.852092826952097e-05, "loss": 1.7083, "step": 1785 }, { "epoch": 0.078062852397395, "grad_norm": 3.703125, "learning_rate": 9.851926951756374e-05, "loss": 2.2147, "step": 1786 }, { "epoch": 0.07810656060142489, "grad_norm": 2.53125, "learning_rate": 9.851760984997727e-05, "loss": 1.9388, "step": 1787 }, { "epoch": 0.07815026880545478, "grad_norm": 4.125, "learning_rate": 9.851594926679287e-05, "loss": 2.1677, "step": 1788 }, { "epoch": 0.07819397700948467, "grad_norm": 4.0625, "learning_rate": 9.851428776804191e-05, "loss": 3.2133, "step": 1789 }, { "epoch": 0.07823768521351458, "grad_norm": 2.5625, "learning_rate": 9.851262535375574e-05, "loss": 2.3131, "step": 1790 }, { "epoch": 0.07828139341754448, "grad_norm": 3.359375, "learning_rate": 9.851096202396572e-05, "loss": 2.0723, "step": 1791 }, { "epoch": 0.07832510162157437, "grad_norm": 2.546875, "learning_rate": 9.850929777870324e-05, "loss": 2.3914, "step": 1792 }, { "epoch": 0.07836880982560426, "grad_norm": 2.84375, "learning_rate": 9.850763261799969e-05, "loss": 1.8671, "step": 1793 }, { "epoch": 0.07841251802963416, "grad_norm": 3.1875, "learning_rate": 9.850596654188653e-05, "loss": 2.0209, "step": 1794 }, { "epoch": 0.07845622623366406, "grad_norm": 2.765625, "learning_rate": 9.850429955039518e-05, "loss": 2.0369, "step": 1795 }, { "epoch": 0.07849993443769396, "grad_norm": 2.453125, "learning_rate": 9.85026316435571e-05, "loss": 2.1397, "step": 1796 }, { "epoch": 0.07854364264172385, "grad_norm": 2.53125, "learning_rate": 9.850096282140379e-05, "loss": 1.6577, "step": 1797 }, { "epoch": 0.07858735084575375, "grad_norm": 3.296875, "learning_rate": 9.84992930839667e-05, "loss": 2.19, "step": 1798 }, { "epoch": 0.07863105904978364, "grad_norm": 3.375, "learning_rate": 9.849762243127737e-05, "loss": 1.9441, "step": 1799 }, { "epoch": 0.07867476725381355, "grad_norm": 2.6875, "learning_rate": 9.849595086336732e-05, "loss": 2.5172, "step": 1800 }, { "epoch": 0.07871847545784344, "grad_norm": 2.703125, "learning_rate": 9.84942783802681e-05, "loss": 1.7464, "step": 1801 }, { "epoch": 0.07876218366187333, "grad_norm": 2.546875, "learning_rate": 9.849260498201126e-05, "loss": 2.0358, "step": 1802 }, { "epoch": 0.07880589186590323, "grad_norm": 2.75, "learning_rate": 9.849093066862837e-05, "loss": 2.3709, "step": 1803 }, { "epoch": 0.07884960006993312, "grad_norm": 2.6875, "learning_rate": 9.848925544015106e-05, "loss": 2.3652, "step": 1804 }, { "epoch": 0.07889330827396303, "grad_norm": 2.296875, "learning_rate": 9.848757929661095e-05, "loss": 1.8576, "step": 1805 }, { "epoch": 0.07893701647799292, "grad_norm": 3.0625, "learning_rate": 9.848590223803961e-05, "loss": 1.9136, "step": 1806 }, { "epoch": 0.07898072468202282, "grad_norm": 4.46875, "learning_rate": 9.848422426446875e-05, "loss": 2.979, "step": 1807 }, { "epoch": 0.07902443288605271, "grad_norm": 2.828125, "learning_rate": 9.848254537593e-05, "loss": 2.4617, "step": 1808 }, { "epoch": 0.0790681410900826, "grad_norm": 2.265625, "learning_rate": 9.848086557245507e-05, "loss": 1.9116, "step": 1809 }, { "epoch": 0.07911184929411251, "grad_norm": 2.890625, "learning_rate": 9.847918485407563e-05, "loss": 2.1013, "step": 1810 }, { "epoch": 0.0791555574981424, "grad_norm": 2.75, "learning_rate": 9.847750322082341e-05, "loss": 2.1538, "step": 1811 }, { "epoch": 0.0791992657021723, "grad_norm": 2.53125, "learning_rate": 9.847582067273015e-05, "loss": 2.1033, "step": 1812 }, { "epoch": 0.07924297390620219, "grad_norm": 2.453125, "learning_rate": 9.847413720982763e-05, "loss": 2.1661, "step": 1813 }, { "epoch": 0.07928668211023208, "grad_norm": 2.375, "learning_rate": 9.847245283214757e-05, "loss": 1.8955, "step": 1814 }, { "epoch": 0.07933039031426199, "grad_norm": 2.484375, "learning_rate": 9.847076753972176e-05, "loss": 1.9043, "step": 1815 }, { "epoch": 0.07937409851829189, "grad_norm": 3.28125, "learning_rate": 9.846908133258204e-05, "loss": 1.8656, "step": 1816 }, { "epoch": 0.07941780672232178, "grad_norm": 2.984375, "learning_rate": 9.846739421076022e-05, "loss": 2.4502, "step": 1817 }, { "epoch": 0.07946151492635167, "grad_norm": 2.4375, "learning_rate": 9.846570617428811e-05, "loss": 2.2399, "step": 1818 }, { "epoch": 0.07950522313038157, "grad_norm": 2.53125, "learning_rate": 9.84640172231976e-05, "loss": 2.2902, "step": 1819 }, { "epoch": 0.07954893133441147, "grad_norm": 5.0625, "learning_rate": 9.846232735752055e-05, "loss": 1.7842, "step": 1820 }, { "epoch": 0.07959263953844137, "grad_norm": 2.953125, "learning_rate": 9.846063657728884e-05, "loss": 1.8978, "step": 1821 }, { "epoch": 0.07963634774247126, "grad_norm": 3.078125, "learning_rate": 9.845894488253438e-05, "loss": 2.3265, "step": 1822 }, { "epoch": 0.07968005594650115, "grad_norm": 3.921875, "learning_rate": 9.84572522732891e-05, "loss": 1.7457, "step": 1823 }, { "epoch": 0.07972376415053105, "grad_norm": 3.46875, "learning_rate": 9.845555874958496e-05, "loss": 2.2468, "step": 1824 }, { "epoch": 0.07976747235456096, "grad_norm": 2.9375, "learning_rate": 9.84538643114539e-05, "loss": 2.579, "step": 1825 }, { "epoch": 0.07981118055859085, "grad_norm": 12.9375, "learning_rate": 9.84521689589279e-05, "loss": 1.3026, "step": 1826 }, { "epoch": 0.07985488876262074, "grad_norm": 3.625, "learning_rate": 9.845047269203895e-05, "loss": 2.2743, "step": 1827 }, { "epoch": 0.07989859696665064, "grad_norm": 2.296875, "learning_rate": 9.844877551081906e-05, "loss": 1.8684, "step": 1828 }, { "epoch": 0.07994230517068053, "grad_norm": 3.265625, "learning_rate": 9.844707741530026e-05, "loss": 2.2146, "step": 1829 }, { "epoch": 0.07998601337471044, "grad_norm": 3.203125, "learning_rate": 9.844537840551462e-05, "loss": 2.2717, "step": 1830 }, { "epoch": 0.08002972157874033, "grad_norm": 2.578125, "learning_rate": 9.844367848149417e-05, "loss": 1.9394, "step": 1831 }, { "epoch": 0.08007342978277023, "grad_norm": 2.40625, "learning_rate": 9.844197764327097e-05, "loss": 2.0863, "step": 1832 }, { "epoch": 0.08011713798680012, "grad_norm": 2.75, "learning_rate": 9.844027589087719e-05, "loss": 2.6755, "step": 1833 }, { "epoch": 0.08016084619083001, "grad_norm": 2.671875, "learning_rate": 9.843857322434487e-05, "loss": 2.1548, "step": 1834 }, { "epoch": 0.08020455439485992, "grad_norm": 2.859375, "learning_rate": 9.84368696437062e-05, "loss": 2.247, "step": 1835 }, { "epoch": 0.08024826259888981, "grad_norm": 2.96875, "learning_rate": 9.843516514899329e-05, "loss": 1.8082, "step": 1836 }, { "epoch": 0.08029197080291971, "grad_norm": 2.734375, "learning_rate": 9.843345974023832e-05, "loss": 2.3728, "step": 1837 }, { "epoch": 0.0803356790069496, "grad_norm": 3.546875, "learning_rate": 9.843175341747348e-05, "loss": 2.3455, "step": 1838 }, { "epoch": 0.0803793872109795, "grad_norm": 3.34375, "learning_rate": 9.843004618073096e-05, "loss": 2.3092, "step": 1839 }, { "epoch": 0.0804230954150094, "grad_norm": 2.90625, "learning_rate": 9.842833803004298e-05, "loss": 1.8681, "step": 1840 }, { "epoch": 0.0804668036190393, "grad_norm": 2.578125, "learning_rate": 9.842662896544176e-05, "loss": 1.9617, "step": 1841 }, { "epoch": 0.08051051182306919, "grad_norm": 3.421875, "learning_rate": 9.84249189869596e-05, "loss": 2.3176, "step": 1842 }, { "epoch": 0.08055422002709908, "grad_norm": 2.3125, "learning_rate": 9.842320809462873e-05, "loss": 2.0864, "step": 1843 }, { "epoch": 0.08059792823112898, "grad_norm": 3.125, "learning_rate": 9.842149628848145e-05, "loss": 1.9941, "step": 1844 }, { "epoch": 0.08064163643515888, "grad_norm": 4.25, "learning_rate": 9.841978356855005e-05, "loss": 2.5347, "step": 1845 }, { "epoch": 0.08068534463918878, "grad_norm": 2.65625, "learning_rate": 9.841806993486686e-05, "loss": 2.5548, "step": 1846 }, { "epoch": 0.08072905284321867, "grad_norm": 2.3125, "learning_rate": 9.841635538746424e-05, "loss": 2.0899, "step": 1847 }, { "epoch": 0.08077276104724856, "grad_norm": 2.625, "learning_rate": 9.841463992637451e-05, "loss": 2.5336, "step": 1848 }, { "epoch": 0.08081646925127846, "grad_norm": 2.703125, "learning_rate": 9.841292355163008e-05, "loss": 1.8719, "step": 1849 }, { "epoch": 0.08086017745530837, "grad_norm": 3.21875, "learning_rate": 9.84112062632633e-05, "loss": 2.7897, "step": 1850 }, { "epoch": 0.08090388565933826, "grad_norm": 4.03125, "learning_rate": 9.84094880613066e-05, "loss": 3.1195, "step": 1851 }, { "epoch": 0.08094759386336815, "grad_norm": 3.03125, "learning_rate": 9.840776894579241e-05, "loss": 1.8838, "step": 1852 }, { "epoch": 0.08099130206739805, "grad_norm": 2.609375, "learning_rate": 9.840604891675317e-05, "loss": 1.8534, "step": 1853 }, { "epoch": 0.08103501027142794, "grad_norm": 3.609375, "learning_rate": 9.840432797422132e-05, "loss": 1.9414, "step": 1854 }, { "epoch": 0.08107871847545785, "grad_norm": 3.1875, "learning_rate": 9.840260611822938e-05, "loss": 2.4429, "step": 1855 }, { "epoch": 0.08112242667948774, "grad_norm": 2.953125, "learning_rate": 9.84008833488098e-05, "loss": 2.4672, "step": 1856 }, { "epoch": 0.08116613488351763, "grad_norm": 2.84375, "learning_rate": 9.83991596659951e-05, "loss": 2.3259, "step": 1857 }, { "epoch": 0.08120984308754753, "grad_norm": 5.65625, "learning_rate": 9.839743506981782e-05, "loss": 1.7876, "step": 1858 }, { "epoch": 0.08125355129157742, "grad_norm": 3.046875, "learning_rate": 9.83957095603105e-05, "loss": 2.2488, "step": 1859 }, { "epoch": 0.08129725949560733, "grad_norm": 3.984375, "learning_rate": 9.839398313750571e-05, "loss": 2.2605, "step": 1860 }, { "epoch": 0.08134096769963722, "grad_norm": 2.453125, "learning_rate": 9.839225580143601e-05, "loss": 1.7144, "step": 1861 }, { "epoch": 0.08138467590366712, "grad_norm": 2.515625, "learning_rate": 9.839052755213403e-05, "loss": 2.2738, "step": 1862 }, { "epoch": 0.08142838410769701, "grad_norm": 2.828125, "learning_rate": 9.838879838963235e-05, "loss": 1.904, "step": 1863 }, { "epoch": 0.08147209231172692, "grad_norm": 3.078125, "learning_rate": 9.838706831396362e-05, "loss": 2.7066, "step": 1864 }, { "epoch": 0.08151580051575681, "grad_norm": 2.78125, "learning_rate": 9.838533732516051e-05, "loss": 1.9853, "step": 1865 }, { "epoch": 0.0815595087197867, "grad_norm": 2.765625, "learning_rate": 9.838360542325565e-05, "loss": 2.0883, "step": 1866 }, { "epoch": 0.0816032169238166, "grad_norm": 3.046875, "learning_rate": 9.838187260828173e-05, "loss": 2.198, "step": 1867 }, { "epoch": 0.08164692512784649, "grad_norm": 2.46875, "learning_rate": 9.838013888027145e-05, "loss": 2.0494, "step": 1868 }, { "epoch": 0.0816906333318764, "grad_norm": 2.484375, "learning_rate": 9.837840423925755e-05, "loss": 1.6796, "step": 1869 }, { "epoch": 0.0817343415359063, "grad_norm": 2.84375, "learning_rate": 9.837666868527274e-05, "loss": 2.2668, "step": 1870 }, { "epoch": 0.08177804973993619, "grad_norm": 2.375, "learning_rate": 9.837493221834979e-05, "loss": 2.0788, "step": 1871 }, { "epoch": 0.08182175794396608, "grad_norm": 2.59375, "learning_rate": 9.837319483852147e-05, "loss": 1.9986, "step": 1872 }, { "epoch": 0.08186546614799597, "grad_norm": 2.578125, "learning_rate": 9.837145654582054e-05, "loss": 2.0753, "step": 1873 }, { "epoch": 0.08190917435202588, "grad_norm": 2.53125, "learning_rate": 9.836971734027985e-05, "loss": 2.1466, "step": 1874 }, { "epoch": 0.08195288255605578, "grad_norm": 2.703125, "learning_rate": 9.836797722193217e-05, "loss": 2.1172, "step": 1875 }, { "epoch": 0.08199659076008567, "grad_norm": 2.84375, "learning_rate": 9.836623619081039e-05, "loss": 2.0186, "step": 1876 }, { "epoch": 0.08204029896411556, "grad_norm": 2.5625, "learning_rate": 9.836449424694732e-05, "loss": 1.7065, "step": 1877 }, { "epoch": 0.08208400716814546, "grad_norm": 3.609375, "learning_rate": 9.836275139037585e-05, "loss": 2.7346, "step": 1878 }, { "epoch": 0.08212771537217536, "grad_norm": 2.546875, "learning_rate": 9.836100762112888e-05, "loss": 2.1246, "step": 1879 }, { "epoch": 0.08217142357620526, "grad_norm": 2.671875, "learning_rate": 9.83592629392393e-05, "loss": 2.2013, "step": 1880 }, { "epoch": 0.08221513178023515, "grad_norm": 3.453125, "learning_rate": 9.835751734474005e-05, "loss": 2.2353, "step": 1881 }, { "epoch": 0.08225883998426504, "grad_norm": 2.90625, "learning_rate": 9.835577083766407e-05, "loss": 2.8191, "step": 1882 }, { "epoch": 0.08230254818829494, "grad_norm": 3.25, "learning_rate": 9.835402341804432e-05, "loss": 2.6574, "step": 1883 }, { "epoch": 0.08234625639232485, "grad_norm": 2.59375, "learning_rate": 9.835227508591376e-05, "loss": 2.3453, "step": 1884 }, { "epoch": 0.08238996459635474, "grad_norm": 2.65625, "learning_rate": 9.835052584130541e-05, "loss": 1.847, "step": 1885 }, { "epoch": 0.08243367280038463, "grad_norm": 3.140625, "learning_rate": 9.834877568425225e-05, "loss": 1.968, "step": 1886 }, { "epoch": 0.08247738100441453, "grad_norm": 2.46875, "learning_rate": 9.834702461478733e-05, "loss": 1.6705, "step": 1887 }, { "epoch": 0.08252108920844442, "grad_norm": 3.125, "learning_rate": 9.834527263294371e-05, "loss": 2.1591, "step": 1888 }, { "epoch": 0.08256479741247433, "grad_norm": 2.359375, "learning_rate": 9.834351973875441e-05, "loss": 1.9738, "step": 1889 }, { "epoch": 0.08260850561650422, "grad_norm": 4.21875, "learning_rate": 9.834176593225254e-05, "loss": 2.5599, "step": 1890 }, { "epoch": 0.08265221382053412, "grad_norm": 3.28125, "learning_rate": 9.834001121347119e-05, "loss": 1.9454, "step": 1891 }, { "epoch": 0.08269592202456401, "grad_norm": 2.515625, "learning_rate": 9.833825558244347e-05, "loss": 1.8525, "step": 1892 }, { "epoch": 0.0827396302285939, "grad_norm": 3.140625, "learning_rate": 9.83364990392025e-05, "loss": 2.792, "step": 1893 }, { "epoch": 0.08278333843262381, "grad_norm": 2.609375, "learning_rate": 9.833474158378147e-05, "loss": 2.171, "step": 1894 }, { "epoch": 0.0828270466366537, "grad_norm": 3.53125, "learning_rate": 9.833298321621349e-05, "loss": 2.5549, "step": 1895 }, { "epoch": 0.0828707548406836, "grad_norm": 2.625, "learning_rate": 9.833122393653178e-05, "loss": 2.6365, "step": 1896 }, { "epoch": 0.08291446304471349, "grad_norm": 3.234375, "learning_rate": 9.832946374476954e-05, "loss": 2.2036, "step": 1897 }, { "epoch": 0.08295817124874338, "grad_norm": 2.6875, "learning_rate": 9.832770264095998e-05, "loss": 2.0949, "step": 1898 }, { "epoch": 0.08300187945277329, "grad_norm": 2.484375, "learning_rate": 9.832594062513634e-05, "loss": 1.9104, "step": 1899 }, { "epoch": 0.08304558765680319, "grad_norm": 3.25, "learning_rate": 9.832417769733185e-05, "loss": 2.0378, "step": 1900 }, { "epoch": 0.08308929586083308, "grad_norm": 2.625, "learning_rate": 9.832241385757981e-05, "loss": 2.1519, "step": 1901 }, { "epoch": 0.08313300406486297, "grad_norm": 2.9375, "learning_rate": 9.832064910591348e-05, "loss": 2.804, "step": 1902 }, { "epoch": 0.08317671226889287, "grad_norm": 2.40625, "learning_rate": 9.831888344236617e-05, "loss": 1.7018, "step": 1903 }, { "epoch": 0.08322042047292277, "grad_norm": 2.46875, "learning_rate": 9.83171168669712e-05, "loss": 2.1303, "step": 1904 }, { "epoch": 0.08326412867695267, "grad_norm": 3.109375, "learning_rate": 9.831534937976192e-05, "loss": 2.5991, "step": 1905 }, { "epoch": 0.08330783688098256, "grad_norm": 2.453125, "learning_rate": 9.831358098077168e-05, "loss": 2.0354, "step": 1906 }, { "epoch": 0.08335154508501245, "grad_norm": 2.1875, "learning_rate": 9.831181167003385e-05, "loss": 1.8323, "step": 1907 }, { "epoch": 0.08339525328904235, "grad_norm": 2.421875, "learning_rate": 9.831004144758183e-05, "loss": 1.5327, "step": 1908 }, { "epoch": 0.08343896149307226, "grad_norm": 2.265625, "learning_rate": 9.8308270313449e-05, "loss": 1.8952, "step": 1909 }, { "epoch": 0.08348266969710215, "grad_norm": 3.140625, "learning_rate": 9.83064982676688e-05, "loss": 2.3476, "step": 1910 }, { "epoch": 0.08352637790113204, "grad_norm": 2.328125, "learning_rate": 9.830472531027468e-05, "loss": 2.3885, "step": 1911 }, { "epoch": 0.08357008610516194, "grad_norm": 2.78125, "learning_rate": 9.830295144130009e-05, "loss": 2.0377, "step": 1912 }, { "epoch": 0.08361379430919183, "grad_norm": 2.90625, "learning_rate": 9.830117666077849e-05, "loss": 3.0738, "step": 1913 }, { "epoch": 0.08365750251322174, "grad_norm": 3.578125, "learning_rate": 9.82994009687434e-05, "loss": 2.0192, "step": 1914 }, { "epoch": 0.08370121071725163, "grad_norm": 2.484375, "learning_rate": 9.829762436522831e-05, "loss": 2.0367, "step": 1915 }, { "epoch": 0.08374491892128152, "grad_norm": 2.46875, "learning_rate": 9.829584685026676e-05, "loss": 2.0879, "step": 1916 }, { "epoch": 0.08378862712531142, "grad_norm": 3.765625, "learning_rate": 9.829406842389229e-05, "loss": 3.2253, "step": 1917 }, { "epoch": 0.08383233532934131, "grad_norm": 2.9375, "learning_rate": 9.829228908613845e-05, "loss": 1.9372, "step": 1918 }, { "epoch": 0.08387604353337122, "grad_norm": 3.109375, "learning_rate": 9.829050883703882e-05, "loss": 1.9538, "step": 1919 }, { "epoch": 0.08391975173740111, "grad_norm": 2.75, "learning_rate": 9.828872767662705e-05, "loss": 2.3358, "step": 1920 }, { "epoch": 0.083963459941431, "grad_norm": 2.828125, "learning_rate": 9.828694560493667e-05, "loss": 2.8138, "step": 1921 }, { "epoch": 0.0840071681454609, "grad_norm": 8.0, "learning_rate": 9.828516262200135e-05, "loss": 2.7766, "step": 1922 }, { "epoch": 0.0840508763494908, "grad_norm": 2.671875, "learning_rate": 9.828337872785475e-05, "loss": 2.4589, "step": 1923 }, { "epoch": 0.0840945845535207, "grad_norm": 2.765625, "learning_rate": 9.828159392253052e-05, "loss": 2.168, "step": 1924 }, { "epoch": 0.0841382927575506, "grad_norm": 3.171875, "learning_rate": 9.827980820606232e-05, "loss": 2.1371, "step": 1925 }, { "epoch": 0.08418200096158049, "grad_norm": 3.328125, "learning_rate": 9.82780215784839e-05, "loss": 2.5149, "step": 1926 }, { "epoch": 0.08422570916561038, "grad_norm": 2.765625, "learning_rate": 9.827623403982892e-05, "loss": 2.0682, "step": 1927 }, { "epoch": 0.08426941736964028, "grad_norm": 2.953125, "learning_rate": 9.827444559013115e-05, "loss": 2.2317, "step": 1928 }, { "epoch": 0.08431312557367018, "grad_norm": 2.140625, "learning_rate": 9.827265622942434e-05, "loss": 1.63, "step": 1929 }, { "epoch": 0.08435683377770008, "grad_norm": 2.75, "learning_rate": 9.827086595774225e-05, "loss": 1.8615, "step": 1930 }, { "epoch": 0.08440054198172997, "grad_norm": 2.6875, "learning_rate": 9.826907477511865e-05, "loss": 2.2187, "step": 1931 }, { "epoch": 0.08444425018575986, "grad_norm": 2.796875, "learning_rate": 9.826728268158737e-05, "loss": 2.1593, "step": 1932 }, { "epoch": 0.08448795838978976, "grad_norm": 2.734375, "learning_rate": 9.826548967718221e-05, "loss": 2.1498, "step": 1933 }, { "epoch": 0.08453166659381967, "grad_norm": 2.40625, "learning_rate": 9.826369576193701e-05, "loss": 2.1045, "step": 1934 }, { "epoch": 0.08457537479784956, "grad_norm": 2.765625, "learning_rate": 9.826190093588563e-05, "loss": 2.108, "step": 1935 }, { "epoch": 0.08461908300187945, "grad_norm": 2.65625, "learning_rate": 9.826010519906194e-05, "loss": 2.1962, "step": 1936 }, { "epoch": 0.08466279120590935, "grad_norm": 2.828125, "learning_rate": 9.825830855149983e-05, "loss": 2.0458, "step": 1937 }, { "epoch": 0.08470649940993924, "grad_norm": 2.71875, "learning_rate": 9.82565109932332e-05, "loss": 2.0363, "step": 1938 }, { "epoch": 0.08475020761396915, "grad_norm": 4.34375, "learning_rate": 9.825471252429596e-05, "loss": 1.9519, "step": 1939 }, { "epoch": 0.08479391581799904, "grad_norm": 2.734375, "learning_rate": 9.825291314472208e-05, "loss": 2.0237, "step": 1940 }, { "epoch": 0.08483762402202893, "grad_norm": 3.578125, "learning_rate": 9.82511128545455e-05, "loss": 2.245, "step": 1941 }, { "epoch": 0.08488133222605883, "grad_norm": 3.640625, "learning_rate": 9.824931165380018e-05, "loss": 3.7645, "step": 1942 }, { "epoch": 0.08492504043008872, "grad_norm": 2.65625, "learning_rate": 9.824750954252014e-05, "loss": 2.355, "step": 1943 }, { "epoch": 0.08496874863411863, "grad_norm": 3.28125, "learning_rate": 9.824570652073935e-05, "loss": 2.4083, "step": 1944 }, { "epoch": 0.08501245683814852, "grad_norm": 3.1875, "learning_rate": 9.824390258849188e-05, "loss": 2.2051, "step": 1945 }, { "epoch": 0.08505616504217842, "grad_norm": 3.0625, "learning_rate": 9.824209774581174e-05, "loss": 2.7646, "step": 1946 }, { "epoch": 0.08509987324620831, "grad_norm": 3.6875, "learning_rate": 9.824029199273302e-05, "loss": 3.1808, "step": 1947 }, { "epoch": 0.0851435814502382, "grad_norm": 2.84375, "learning_rate": 9.823848532928978e-05, "loss": 2.0116, "step": 1948 }, { "epoch": 0.08518728965426811, "grad_norm": 2.609375, "learning_rate": 9.823667775551611e-05, "loss": 2.1517, "step": 1949 }, { "epoch": 0.085230997858298, "grad_norm": 2.6875, "learning_rate": 9.823486927144612e-05, "loss": 2.1716, "step": 1950 }, { "epoch": 0.0852747060623279, "grad_norm": 3.515625, "learning_rate": 9.823305987711393e-05, "loss": 1.7987, "step": 1951 }, { "epoch": 0.08531841426635779, "grad_norm": 8.6875, "learning_rate": 9.823124957255372e-05, "loss": 1.7863, "step": 1952 }, { "epoch": 0.08536212247038769, "grad_norm": 2.3125, "learning_rate": 9.822943835779963e-05, "loss": 1.958, "step": 1953 }, { "epoch": 0.0854058306744176, "grad_norm": 2.734375, "learning_rate": 9.822762623288584e-05, "loss": 2.1187, "step": 1954 }, { "epoch": 0.08544953887844749, "grad_norm": 2.40625, "learning_rate": 9.822581319784656e-05, "loss": 1.9922, "step": 1955 }, { "epoch": 0.08549324708247738, "grad_norm": 3.015625, "learning_rate": 9.822399925271598e-05, "loss": 2.26, "step": 1956 }, { "epoch": 0.08553695528650727, "grad_norm": 2.453125, "learning_rate": 9.822218439752834e-05, "loss": 1.9014, "step": 1957 }, { "epoch": 0.08558066349053717, "grad_norm": 3.3125, "learning_rate": 9.822036863231793e-05, "loss": 1.8039, "step": 1958 }, { "epoch": 0.08562437169456708, "grad_norm": 2.90625, "learning_rate": 9.821855195711897e-05, "loss": 2.4106, "step": 1959 }, { "epoch": 0.08566807989859697, "grad_norm": 2.828125, "learning_rate": 9.821673437196574e-05, "loss": 1.9691, "step": 1960 }, { "epoch": 0.08571178810262686, "grad_norm": 3.703125, "learning_rate": 9.821491587689257e-05, "loss": 1.9763, "step": 1961 }, { "epoch": 0.08575549630665676, "grad_norm": 3.09375, "learning_rate": 9.821309647193376e-05, "loss": 2.3648, "step": 1962 }, { "epoch": 0.08579920451068665, "grad_norm": 2.953125, "learning_rate": 9.821127615712364e-05, "loss": 2.5316, "step": 1963 }, { "epoch": 0.08584291271471656, "grad_norm": 2.453125, "learning_rate": 9.820945493249659e-05, "loss": 1.8494, "step": 1964 }, { "epoch": 0.08588662091874645, "grad_norm": 2.546875, "learning_rate": 9.820763279808695e-05, "loss": 2.0713, "step": 1965 }, { "epoch": 0.08593032912277634, "grad_norm": 2.453125, "learning_rate": 9.820580975392912e-05, "loss": 1.7838, "step": 1966 }, { "epoch": 0.08597403732680624, "grad_norm": 3.515625, "learning_rate": 9.820398580005749e-05, "loss": 3.0606, "step": 1967 }, { "epoch": 0.08601774553083613, "grad_norm": 2.359375, "learning_rate": 9.820216093650649e-05, "loss": 1.7928, "step": 1968 }, { "epoch": 0.08606145373486604, "grad_norm": 2.328125, "learning_rate": 9.820033516331057e-05, "loss": 1.9501, "step": 1969 }, { "epoch": 0.08610516193889593, "grad_norm": 2.40625, "learning_rate": 9.819850848050419e-05, "loss": 2.061, "step": 1970 }, { "epoch": 0.08614887014292583, "grad_norm": 3.15625, "learning_rate": 9.819668088812177e-05, "loss": 1.9728, "step": 1971 }, { "epoch": 0.08619257834695572, "grad_norm": 3.375, "learning_rate": 9.819485238619784e-05, "loss": 2.0728, "step": 1972 }, { "epoch": 0.08623628655098561, "grad_norm": 2.71875, "learning_rate": 9.819302297476692e-05, "loss": 1.947, "step": 1973 }, { "epoch": 0.08627999475501552, "grad_norm": 3.0625, "learning_rate": 9.819119265386349e-05, "loss": 2.1996, "step": 1974 }, { "epoch": 0.08632370295904541, "grad_norm": 3.15625, "learning_rate": 9.818936142352212e-05, "loss": 2.351, "step": 1975 }, { "epoch": 0.08636741116307531, "grad_norm": 2.578125, "learning_rate": 9.81875292837774e-05, "loss": 1.9119, "step": 1976 }, { "epoch": 0.0864111193671052, "grad_norm": 2.609375, "learning_rate": 9.818569623466383e-05, "loss": 2.0526, "step": 1977 }, { "epoch": 0.0864548275711351, "grad_norm": 3.03125, "learning_rate": 9.818386227621605e-05, "loss": 2.6655, "step": 1978 }, { "epoch": 0.086498535775165, "grad_norm": 3.40625, "learning_rate": 9.818202740846867e-05, "loss": 2.1808, "step": 1979 }, { "epoch": 0.0865422439791949, "grad_norm": 4.21875, "learning_rate": 9.818019163145631e-05, "loss": 2.8019, "step": 1980 }, { "epoch": 0.08658595218322479, "grad_norm": 3.546875, "learning_rate": 9.81783549452136e-05, "loss": 2.1154, "step": 1981 }, { "epoch": 0.08662966038725468, "grad_norm": 2.5, "learning_rate": 9.81765173497752e-05, "loss": 2.0534, "step": 1982 }, { "epoch": 0.08667336859128458, "grad_norm": 2.75, "learning_rate": 9.817467884517582e-05, "loss": 2.0711, "step": 1983 }, { "epoch": 0.08671707679531448, "grad_norm": 3.5625, "learning_rate": 9.817283943145013e-05, "loss": 2.0547, "step": 1984 }, { "epoch": 0.08676078499934438, "grad_norm": 2.71875, "learning_rate": 9.817099910863285e-05, "loss": 2.4254, "step": 1985 }, { "epoch": 0.08680449320337427, "grad_norm": 2.6875, "learning_rate": 9.81691578767587e-05, "loss": 2.117, "step": 1986 }, { "epoch": 0.08684820140740417, "grad_norm": 2.671875, "learning_rate": 9.816731573586245e-05, "loss": 2.1401, "step": 1987 }, { "epoch": 0.08689190961143406, "grad_norm": 2.75, "learning_rate": 9.816547268597882e-05, "loss": 2.3913, "step": 1988 }, { "epoch": 0.08693561781546397, "grad_norm": 2.796875, "learning_rate": 9.816362872714264e-05, "loss": 2.3797, "step": 1989 }, { "epoch": 0.08697932601949386, "grad_norm": 2.65625, "learning_rate": 9.816178385938868e-05, "loss": 2.1, "step": 1990 }, { "epoch": 0.08702303422352375, "grad_norm": 2.421875, "learning_rate": 9.815993808275177e-05, "loss": 2.0036, "step": 1991 }, { "epoch": 0.08706674242755365, "grad_norm": 3.78125, "learning_rate": 9.815809139726673e-05, "loss": 1.5592, "step": 1992 }, { "epoch": 0.08711045063158354, "grad_norm": 3.09375, "learning_rate": 9.815624380296841e-05, "loss": 2.726, "step": 1993 }, { "epoch": 0.08715415883561345, "grad_norm": 3.03125, "learning_rate": 9.815439529989168e-05, "loss": 2.5015, "step": 1994 }, { "epoch": 0.08719786703964334, "grad_norm": 3.125, "learning_rate": 9.815254588807143e-05, "loss": 1.9638, "step": 1995 }, { "epoch": 0.08724157524367324, "grad_norm": 3.203125, "learning_rate": 9.815069556754256e-05, "loss": 2.2552, "step": 1996 }, { "epoch": 0.08728528344770313, "grad_norm": 2.625, "learning_rate": 9.814884433833999e-05, "loss": 2.0396, "step": 1997 }, { "epoch": 0.08732899165173302, "grad_norm": 9.1875, "learning_rate": 9.814699220049863e-05, "loss": 1.9973, "step": 1998 }, { "epoch": 0.08737269985576293, "grad_norm": 2.5625, "learning_rate": 9.814513915405346e-05, "loss": 2.2719, "step": 1999 }, { "epoch": 0.08741640805979282, "grad_norm": 3.625, "learning_rate": 9.814328519903946e-05, "loss": 2.6787, "step": 2000 }, { "epoch": 0.08746011626382272, "grad_norm": 2.84375, "learning_rate": 9.814143033549157e-05, "loss": 2.5491, "step": 2001 }, { "epoch": 0.08750382446785261, "grad_norm": 3.125, "learning_rate": 9.813957456344485e-05, "loss": 3.0051, "step": 2002 }, { "epoch": 0.08754753267188252, "grad_norm": 3.703125, "learning_rate": 9.813771788293429e-05, "loss": 1.8316, "step": 2003 }, { "epoch": 0.08759124087591241, "grad_norm": 2.390625, "learning_rate": 9.813586029399492e-05, "loss": 1.6633, "step": 2004 }, { "epoch": 0.0876349490799423, "grad_norm": 4.375, "learning_rate": 9.813400179666181e-05, "loss": 2.5481, "step": 2005 }, { "epoch": 0.0876786572839722, "grad_norm": 2.984375, "learning_rate": 9.813214239097004e-05, "loss": 2.0448, "step": 2006 }, { "epoch": 0.0877223654880021, "grad_norm": 3.25, "learning_rate": 9.813028207695467e-05, "loss": 1.9342, "step": 2007 }, { "epoch": 0.087766073692032, "grad_norm": 3.265625, "learning_rate": 9.812842085465085e-05, "loss": 2.5093, "step": 2008 }, { "epoch": 0.0878097818960619, "grad_norm": 2.53125, "learning_rate": 9.812655872409366e-05, "loss": 2.2058, "step": 2009 }, { "epoch": 0.08785349010009179, "grad_norm": 3.453125, "learning_rate": 9.812469568531828e-05, "loss": 2.2205, "step": 2010 }, { "epoch": 0.08789719830412168, "grad_norm": 4.46875, "learning_rate": 9.812283173835985e-05, "loss": 2.2833, "step": 2011 }, { "epoch": 0.08794090650815158, "grad_norm": 2.4375, "learning_rate": 9.812096688325354e-05, "loss": 2.1205, "step": 2012 }, { "epoch": 0.08798461471218148, "grad_norm": 3.1875, "learning_rate": 9.811910112003455e-05, "loss": 1.9461, "step": 2013 }, { "epoch": 0.08802832291621138, "grad_norm": 42.5, "learning_rate": 9.811723444873809e-05, "loss": 2.3175, "step": 2014 }, { "epoch": 0.08807203112024127, "grad_norm": 2.40625, "learning_rate": 9.811536686939937e-05, "loss": 1.8189, "step": 2015 }, { "epoch": 0.08811573932427116, "grad_norm": 2.953125, "learning_rate": 9.811349838205366e-05, "loss": 2.5017, "step": 2016 }, { "epoch": 0.08815944752830106, "grad_norm": 2.59375, "learning_rate": 9.81116289867362e-05, "loss": 1.8558, "step": 2017 }, { "epoch": 0.08820315573233097, "grad_norm": 2.375, "learning_rate": 9.81097586834823e-05, "loss": 2.0018, "step": 2018 }, { "epoch": 0.08824686393636086, "grad_norm": 3.15625, "learning_rate": 9.810788747232721e-05, "loss": 2.0362, "step": 2019 }, { "epoch": 0.08829057214039075, "grad_norm": 3.28125, "learning_rate": 9.810601535330627e-05, "loss": 2.3536, "step": 2020 }, { "epoch": 0.08833428034442065, "grad_norm": 2.5625, "learning_rate": 9.810414232645482e-05, "loss": 2.0757, "step": 2021 }, { "epoch": 0.08837798854845054, "grad_norm": 2.4375, "learning_rate": 9.810226839180817e-05, "loss": 2.1589, "step": 2022 }, { "epoch": 0.08842169675248045, "grad_norm": 3.5, "learning_rate": 9.810039354940173e-05, "loss": 2.8814, "step": 2023 }, { "epoch": 0.08846540495651034, "grad_norm": 3.1875, "learning_rate": 9.809851779927084e-05, "loss": 2.5291, "step": 2024 }, { "epoch": 0.08850911316054023, "grad_norm": 2.8125, "learning_rate": 9.809664114145091e-05, "loss": 2.4861, "step": 2025 }, { "epoch": 0.08855282136457013, "grad_norm": 2.6875, "learning_rate": 9.809476357597738e-05, "loss": 2.0149, "step": 2026 }, { "epoch": 0.08859652956860002, "grad_norm": 4.34375, "learning_rate": 9.809288510288566e-05, "loss": 3.2423, "step": 2027 }, { "epoch": 0.08864023777262993, "grad_norm": 2.984375, "learning_rate": 9.809100572221118e-05, "loss": 1.4191, "step": 2028 }, { "epoch": 0.08868394597665982, "grad_norm": 7.0625, "learning_rate": 9.808912543398945e-05, "loss": 2.0518, "step": 2029 }, { "epoch": 0.08872765418068972, "grad_norm": 3.609375, "learning_rate": 9.808724423825592e-05, "loss": 1.9862, "step": 2030 }, { "epoch": 0.08877136238471961, "grad_norm": 2.234375, "learning_rate": 9.80853621350461e-05, "loss": 1.978, "step": 2031 }, { "epoch": 0.0888150705887495, "grad_norm": 2.28125, "learning_rate": 9.808347912439554e-05, "loss": 2.0013, "step": 2032 }, { "epoch": 0.08885877879277941, "grad_norm": 2.671875, "learning_rate": 9.808159520633973e-05, "loss": 2.1639, "step": 2033 }, { "epoch": 0.0889024869968093, "grad_norm": 2.96875, "learning_rate": 9.807971038091423e-05, "loss": 2.9512, "step": 2034 }, { "epoch": 0.0889461952008392, "grad_norm": 2.796875, "learning_rate": 9.807782464815463e-05, "loss": 2.0199, "step": 2035 }, { "epoch": 0.08898990340486909, "grad_norm": 2.5, "learning_rate": 9.80759380080965e-05, "loss": 2.0716, "step": 2036 }, { "epoch": 0.08903361160889899, "grad_norm": 2.5, "learning_rate": 9.807405046077545e-05, "loss": 2.1774, "step": 2037 }, { "epoch": 0.08907731981292889, "grad_norm": 4.8125, "learning_rate": 9.80721620062271e-05, "loss": 2.5221, "step": 2038 }, { "epoch": 0.08912102801695879, "grad_norm": 3.359375, "learning_rate": 9.807027264448708e-05, "loss": 2.0483, "step": 2039 }, { "epoch": 0.08916473622098868, "grad_norm": 2.9375, "learning_rate": 9.806838237559107e-05, "loss": 1.8315, "step": 2040 }, { "epoch": 0.08920844442501857, "grad_norm": 3.21875, "learning_rate": 9.80664911995747e-05, "loss": 2.7907, "step": 2041 }, { "epoch": 0.08925215262904847, "grad_norm": 2.890625, "learning_rate": 9.80645991164737e-05, "loss": 2.3077, "step": 2042 }, { "epoch": 0.08929586083307837, "grad_norm": 2.640625, "learning_rate": 9.806270612632375e-05, "loss": 2.032, "step": 2043 }, { "epoch": 0.08933956903710827, "grad_norm": 2.78125, "learning_rate": 9.806081222916059e-05, "loss": 2.1644, "step": 2044 }, { "epoch": 0.08938327724113816, "grad_norm": 5.5625, "learning_rate": 9.805891742501996e-05, "loss": 2.7271, "step": 2045 }, { "epoch": 0.08942698544516806, "grad_norm": 2.484375, "learning_rate": 9.80570217139376e-05, "loss": 1.9969, "step": 2046 }, { "epoch": 0.08947069364919795, "grad_norm": 3.046875, "learning_rate": 9.80551250959493e-05, "loss": 2.655, "step": 2047 }, { "epoch": 0.08951440185322786, "grad_norm": 2.640625, "learning_rate": 9.805322757109086e-05, "loss": 2.6689, "step": 2048 }, { "epoch": 0.08955811005725775, "grad_norm": 2.421875, "learning_rate": 9.805132913939807e-05, "loss": 2.1874, "step": 2049 }, { "epoch": 0.08960181826128764, "grad_norm": 2.578125, "learning_rate": 9.804942980090676e-05, "loss": 2.3881, "step": 2050 }, { "epoch": 0.08964552646531754, "grad_norm": 2.28125, "learning_rate": 9.804752955565278e-05, "loss": 2.0989, "step": 2051 }, { "epoch": 0.08968923466934743, "grad_norm": 3.125, "learning_rate": 9.804562840367198e-05, "loss": 2.2578, "step": 2052 }, { "epoch": 0.08973294287337734, "grad_norm": 2.625, "learning_rate": 9.804372634500026e-05, "loss": 1.8339, "step": 2053 }, { "epoch": 0.08977665107740723, "grad_norm": 2.96875, "learning_rate": 9.804182337967349e-05, "loss": 2.4403, "step": 2054 }, { "epoch": 0.08982035928143713, "grad_norm": 3.03125, "learning_rate": 9.80399195077276e-05, "loss": 2.6908, "step": 2055 }, { "epoch": 0.08986406748546702, "grad_norm": 2.203125, "learning_rate": 9.803801472919849e-05, "loss": 2.0668, "step": 2056 }, { "epoch": 0.08990777568949691, "grad_norm": 2.421875, "learning_rate": 9.803610904412214e-05, "loss": 2.2085, "step": 2057 }, { "epoch": 0.08995148389352682, "grad_norm": 2.703125, "learning_rate": 9.803420245253449e-05, "loss": 2.0336, "step": 2058 }, { "epoch": 0.08999519209755671, "grad_norm": 3.125, "learning_rate": 9.803229495447154e-05, "loss": 2.9318, "step": 2059 }, { "epoch": 0.09003890030158661, "grad_norm": 2.484375, "learning_rate": 9.803038654996927e-05, "loss": 1.6366, "step": 2060 }, { "epoch": 0.0900826085056165, "grad_norm": 2.375, "learning_rate": 9.802847723906371e-05, "loss": 1.9645, "step": 2061 }, { "epoch": 0.0901263167096464, "grad_norm": 3.9375, "learning_rate": 9.802656702179088e-05, "loss": 2.3127, "step": 2062 }, { "epoch": 0.0901700249136763, "grad_norm": 3.109375, "learning_rate": 9.802465589818683e-05, "loss": 1.8978, "step": 2063 }, { "epoch": 0.0902137331177062, "grad_norm": 2.28125, "learning_rate": 9.802274386828762e-05, "loss": 1.9738, "step": 2064 }, { "epoch": 0.09025744132173609, "grad_norm": 3.34375, "learning_rate": 9.802083093212935e-05, "loss": 2.8582, "step": 2065 }, { "epoch": 0.09030114952576598, "grad_norm": 3.125, "learning_rate": 9.801891708974809e-05, "loss": 2.021, "step": 2066 }, { "epoch": 0.09034485772979588, "grad_norm": 3.71875, "learning_rate": 9.801700234117999e-05, "loss": 2.0541, "step": 2067 }, { "epoch": 0.09038856593382578, "grad_norm": 4.03125, "learning_rate": 9.801508668646118e-05, "loss": 1.8612, "step": 2068 }, { "epoch": 0.09043227413785568, "grad_norm": 2.921875, "learning_rate": 9.801317012562779e-05, "loss": 2.3819, "step": 2069 }, { "epoch": 0.09047598234188557, "grad_norm": 2.546875, "learning_rate": 9.8011252658716e-05, "loss": 2.1188, "step": 2070 }, { "epoch": 0.09051969054591547, "grad_norm": 2.359375, "learning_rate": 9.8009334285762e-05, "loss": 2.0454, "step": 2071 }, { "epoch": 0.09056339874994536, "grad_norm": 2.65625, "learning_rate": 9.800741500680197e-05, "loss": 2.1398, "step": 2072 }, { "epoch": 0.09060710695397527, "grad_norm": 4.8125, "learning_rate": 9.800549482187216e-05, "loss": 1.7601, "step": 2073 }, { "epoch": 0.09065081515800516, "grad_norm": 6.40625, "learning_rate": 9.800357373100882e-05, "loss": 2.1453, "step": 2074 }, { "epoch": 0.09069452336203505, "grad_norm": 2.8125, "learning_rate": 9.800165173424814e-05, "loss": 2.3561, "step": 2075 }, { "epoch": 0.09073823156606495, "grad_norm": 2.8125, "learning_rate": 9.799972883162645e-05, "loss": 1.8398, "step": 2076 }, { "epoch": 0.09078193977009484, "grad_norm": 2.765625, "learning_rate": 9.799780502318e-05, "loss": 2.2264, "step": 2077 }, { "epoch": 0.09082564797412475, "grad_norm": 2.703125, "learning_rate": 9.799588030894512e-05, "loss": 2.0077, "step": 2078 }, { "epoch": 0.09086935617815464, "grad_norm": 2.53125, "learning_rate": 9.799395468895812e-05, "loss": 2.0867, "step": 2079 }, { "epoch": 0.09091306438218454, "grad_norm": 2.578125, "learning_rate": 9.799202816325534e-05, "loss": 2.1293, "step": 2080 }, { "epoch": 0.09095677258621443, "grad_norm": 2.40625, "learning_rate": 9.799010073187316e-05, "loss": 2.2106, "step": 2081 }, { "epoch": 0.09100048079024432, "grad_norm": 2.453125, "learning_rate": 9.798817239484792e-05, "loss": 1.7821, "step": 2082 }, { "epoch": 0.09104418899427423, "grad_norm": 3.65625, "learning_rate": 9.798624315221603e-05, "loss": 2.1652, "step": 2083 }, { "epoch": 0.09108789719830412, "grad_norm": 2.3125, "learning_rate": 9.798431300401388e-05, "loss": 1.9395, "step": 2084 }, { "epoch": 0.09113160540233402, "grad_norm": 2.84375, "learning_rate": 9.79823819502779e-05, "loss": 2.8928, "step": 2085 }, { "epoch": 0.09117531360636391, "grad_norm": 2.375, "learning_rate": 9.798044999104456e-05, "loss": 2.069, "step": 2086 }, { "epoch": 0.0912190218103938, "grad_norm": 3.15625, "learning_rate": 9.797851712635028e-05, "loss": 2.0174, "step": 2087 }, { "epoch": 0.09126273001442371, "grad_norm": 2.34375, "learning_rate": 9.797658335623155e-05, "loss": 2.1882, "step": 2088 }, { "epoch": 0.0913064382184536, "grad_norm": 2.75, "learning_rate": 9.797464868072488e-05, "loss": 2.2737, "step": 2089 }, { "epoch": 0.0913501464224835, "grad_norm": 2.40625, "learning_rate": 9.797271309986676e-05, "loss": 2.0482, "step": 2090 }, { "epoch": 0.0913938546265134, "grad_norm": 3.515625, "learning_rate": 9.797077661369372e-05, "loss": 2.1115, "step": 2091 }, { "epoch": 0.09143756283054329, "grad_norm": 2.9375, "learning_rate": 9.79688392222423e-05, "loss": 2.2927, "step": 2092 }, { "epoch": 0.0914812710345732, "grad_norm": 2.546875, "learning_rate": 9.796690092554908e-05, "loss": 1.8428, "step": 2093 }, { "epoch": 0.09152497923860309, "grad_norm": 2.421875, "learning_rate": 9.796496172365061e-05, "loss": 1.911, "step": 2094 }, { "epoch": 0.09156868744263298, "grad_norm": 3.75, "learning_rate": 9.796302161658353e-05, "loss": 2.5428, "step": 2095 }, { "epoch": 0.09161239564666288, "grad_norm": 2.875, "learning_rate": 9.79610806043844e-05, "loss": 1.9935, "step": 2096 }, { "epoch": 0.09165610385069277, "grad_norm": 2.46875, "learning_rate": 9.795913868708989e-05, "loss": 2.322, "step": 2097 }, { "epoch": 0.09169981205472268, "grad_norm": 3.9375, "learning_rate": 9.795719586473663e-05, "loss": 2.0394, "step": 2098 }, { "epoch": 0.09174352025875257, "grad_norm": 5.03125, "learning_rate": 9.795525213736128e-05, "loss": 2.4226, "step": 2099 }, { "epoch": 0.09178722846278246, "grad_norm": 12.25, "learning_rate": 9.795330750500054e-05, "loss": 1.4187, "step": 2100 }, { "epoch": 0.09183093666681236, "grad_norm": 2.5, "learning_rate": 9.795136196769107e-05, "loss": 1.8698, "step": 2101 }, { "epoch": 0.09187464487084225, "grad_norm": 3.0, "learning_rate": 9.794941552546963e-05, "loss": 2.5359, "step": 2102 }, { "epoch": 0.09191835307487216, "grad_norm": 3.171875, "learning_rate": 9.794746817837293e-05, "loss": 2.3038, "step": 2103 }, { "epoch": 0.09196206127890205, "grad_norm": 2.796875, "learning_rate": 9.794551992643772e-05, "loss": 2.0825, "step": 2104 }, { "epoch": 0.09200576948293195, "grad_norm": 3.546875, "learning_rate": 9.794357076970076e-05, "loss": 2.0303, "step": 2105 }, { "epoch": 0.09204947768696184, "grad_norm": 2.328125, "learning_rate": 9.794162070819885e-05, "loss": 1.7951, "step": 2106 }, { "epoch": 0.09209318589099173, "grad_norm": 2.3125, "learning_rate": 9.793966974196878e-05, "loss": 2.0509, "step": 2107 }, { "epoch": 0.09213689409502164, "grad_norm": 2.65625, "learning_rate": 9.793771787104735e-05, "loss": 2.2638, "step": 2108 }, { "epoch": 0.09218060229905153, "grad_norm": 2.875, "learning_rate": 9.793576509547144e-05, "loss": 2.2946, "step": 2109 }, { "epoch": 0.09222431050308143, "grad_norm": 3.171875, "learning_rate": 9.793381141527786e-05, "loss": 2.5537, "step": 2110 }, { "epoch": 0.09226801870711132, "grad_norm": 2.515625, "learning_rate": 9.79318568305035e-05, "loss": 2.0841, "step": 2111 }, { "epoch": 0.09231172691114121, "grad_norm": 2.578125, "learning_rate": 9.792990134118525e-05, "loss": 2.1249, "step": 2112 }, { "epoch": 0.09235543511517112, "grad_norm": 4.125, "learning_rate": 9.792794494736e-05, "loss": 2.4162, "step": 2113 }, { "epoch": 0.09239914331920102, "grad_norm": 2.625, "learning_rate": 9.792598764906466e-05, "loss": 2.1053, "step": 2114 }, { "epoch": 0.09244285152323091, "grad_norm": 2.71875, "learning_rate": 9.792402944633619e-05, "loss": 2.5305, "step": 2115 }, { "epoch": 0.0924865597272608, "grad_norm": 2.390625, "learning_rate": 9.792207033921152e-05, "loss": 2.1751, "step": 2116 }, { "epoch": 0.0925302679312907, "grad_norm": 2.484375, "learning_rate": 9.792011032772765e-05, "loss": 1.9627, "step": 2117 }, { "epoch": 0.0925739761353206, "grad_norm": 2.75, "learning_rate": 9.791814941192155e-05, "loss": 2.2857, "step": 2118 }, { "epoch": 0.0926176843393505, "grad_norm": 2.703125, "learning_rate": 9.791618759183023e-05, "loss": 2.2022, "step": 2119 }, { "epoch": 0.09266139254338039, "grad_norm": 3.9375, "learning_rate": 9.791422486749071e-05, "loss": 2.8579, "step": 2120 }, { "epoch": 0.09270510074741029, "grad_norm": 3.34375, "learning_rate": 9.791226123894003e-05, "loss": 2.7253, "step": 2121 }, { "epoch": 0.09274880895144018, "grad_norm": 3.0, "learning_rate": 9.791029670621525e-05, "loss": 1.9731, "step": 2122 }, { "epoch": 0.09279251715547009, "grad_norm": 2.609375, "learning_rate": 9.790833126935343e-05, "loss": 1.9945, "step": 2123 }, { "epoch": 0.09283622535949998, "grad_norm": 4.84375, "learning_rate": 9.79063649283917e-05, "loss": 1.9241, "step": 2124 }, { "epoch": 0.09287993356352987, "grad_norm": 3.015625, "learning_rate": 9.79043976833671e-05, "loss": 2.9551, "step": 2125 }, { "epoch": 0.09292364176755977, "grad_norm": 2.859375, "learning_rate": 9.790242953431682e-05, "loss": 1.9754, "step": 2126 }, { "epoch": 0.09296734997158966, "grad_norm": 2.953125, "learning_rate": 9.790046048127797e-05, "loss": 1.9089, "step": 2127 }, { "epoch": 0.09301105817561957, "grad_norm": 3.078125, "learning_rate": 9.789849052428772e-05, "loss": 2.0825, "step": 2128 }, { "epoch": 0.09305476637964946, "grad_norm": 2.390625, "learning_rate": 9.789651966338323e-05, "loss": 2.5024, "step": 2129 }, { "epoch": 0.09309847458367936, "grad_norm": 2.84375, "learning_rate": 9.789454789860171e-05, "loss": 1.8926, "step": 2130 }, { "epoch": 0.09314218278770925, "grad_norm": 2.90625, "learning_rate": 9.789257522998037e-05, "loss": 2.1313, "step": 2131 }, { "epoch": 0.09318589099173914, "grad_norm": 2.625, "learning_rate": 9.789060165755643e-05, "loss": 2.2453, "step": 2132 }, { "epoch": 0.09322959919576905, "grad_norm": 2.6875, "learning_rate": 9.788862718136713e-05, "loss": 2.0841, "step": 2133 }, { "epoch": 0.09327330739979894, "grad_norm": 3.71875, "learning_rate": 9.788665180144973e-05, "loss": 2.1431, "step": 2134 }, { "epoch": 0.09331701560382884, "grad_norm": 2.265625, "learning_rate": 9.788467551784153e-05, "loss": 2.3447, "step": 2135 }, { "epoch": 0.09336072380785873, "grad_norm": 2.28125, "learning_rate": 9.788269833057979e-05, "loss": 2.0081, "step": 2136 }, { "epoch": 0.09340443201188864, "grad_norm": 2.765625, "learning_rate": 9.788072023970187e-05, "loss": 2.434, "step": 2137 }, { "epoch": 0.09344814021591853, "grad_norm": 2.90625, "learning_rate": 9.787874124524505e-05, "loss": 1.61, "step": 2138 }, { "epoch": 0.09349184841994843, "grad_norm": 2.25, "learning_rate": 9.78767613472467e-05, "loss": 1.7895, "step": 2139 }, { "epoch": 0.09353555662397832, "grad_norm": 2.984375, "learning_rate": 9.787478054574419e-05, "loss": 1.8193, "step": 2140 }, { "epoch": 0.09357926482800821, "grad_norm": 3.484375, "learning_rate": 9.787279884077489e-05, "loss": 2.2462, "step": 2141 }, { "epoch": 0.09362297303203812, "grad_norm": 2.359375, "learning_rate": 9.78708162323762e-05, "loss": 1.8915, "step": 2142 }, { "epoch": 0.09366668123606801, "grad_norm": 2.375, "learning_rate": 9.786883272058554e-05, "loss": 2.1082, "step": 2143 }, { "epoch": 0.09371038944009791, "grad_norm": 2.40625, "learning_rate": 9.786684830544033e-05, "loss": 1.9747, "step": 2144 }, { "epoch": 0.0937540976441278, "grad_norm": 2.21875, "learning_rate": 9.786486298697803e-05, "loss": 1.9774, "step": 2145 }, { "epoch": 0.0937978058481577, "grad_norm": 3.484375, "learning_rate": 9.786287676523609e-05, "loss": 2.7382, "step": 2146 }, { "epoch": 0.0938415140521876, "grad_norm": 2.515625, "learning_rate": 9.786088964025201e-05, "loss": 1.9554, "step": 2147 }, { "epoch": 0.0938852222562175, "grad_norm": 2.875, "learning_rate": 9.78589016120633e-05, "loss": 1.9873, "step": 2148 }, { "epoch": 0.09392893046024739, "grad_norm": 3.359375, "learning_rate": 9.785691268070745e-05, "loss": 2.1718, "step": 2149 }, { "epoch": 0.09397263866427728, "grad_norm": 2.453125, "learning_rate": 9.7854922846222e-05, "loss": 2.5165, "step": 2150 }, { "epoch": 0.09401634686830718, "grad_norm": 2.3125, "learning_rate": 9.785293210864451e-05, "loss": 1.8265, "step": 2151 }, { "epoch": 0.09406005507233708, "grad_norm": 2.359375, "learning_rate": 9.785094046801256e-05, "loss": 2.2153, "step": 2152 }, { "epoch": 0.09410376327636698, "grad_norm": 2.296875, "learning_rate": 9.78489479243637e-05, "loss": 1.8579, "step": 2153 }, { "epoch": 0.09414747148039687, "grad_norm": 2.875, "learning_rate": 9.784695447773555e-05, "loss": 3.1056, "step": 2154 }, { "epoch": 0.09419117968442677, "grad_norm": 3.59375, "learning_rate": 9.784496012816573e-05, "loss": 1.8621, "step": 2155 }, { "epoch": 0.09423488788845666, "grad_norm": 2.359375, "learning_rate": 9.78429648756919e-05, "loss": 1.3941, "step": 2156 }, { "epoch": 0.09427859609248657, "grad_norm": 3.34375, "learning_rate": 9.784096872035167e-05, "loss": 2.677, "step": 2157 }, { "epoch": 0.09432230429651646, "grad_norm": 2.71875, "learning_rate": 9.783897166218273e-05, "loss": 2.3034, "step": 2158 }, { "epoch": 0.09436601250054635, "grad_norm": 3.5625, "learning_rate": 9.783697370122278e-05, "loss": 2.7365, "step": 2159 }, { "epoch": 0.09440972070457625, "grad_norm": 2.609375, "learning_rate": 9.78349748375095e-05, "loss": 2.675, "step": 2160 }, { "epoch": 0.09445342890860614, "grad_norm": 2.453125, "learning_rate": 9.783297507108065e-05, "loss": 2.2508, "step": 2161 }, { "epoch": 0.09449713711263605, "grad_norm": 2.4375, "learning_rate": 9.78309744019739e-05, "loss": 2.5425, "step": 2162 }, { "epoch": 0.09454084531666594, "grad_norm": 2.890625, "learning_rate": 9.78289728302271e-05, "loss": 1.9132, "step": 2163 }, { "epoch": 0.09458455352069584, "grad_norm": 2.53125, "learning_rate": 9.782697035587793e-05, "loss": 1.9358, "step": 2164 }, { "epoch": 0.09462826172472573, "grad_norm": 3.40625, "learning_rate": 9.782496697896424e-05, "loss": 2.3742, "step": 2165 }, { "epoch": 0.09467196992875562, "grad_norm": 2.46875, "learning_rate": 9.78229626995238e-05, "loss": 2.5638, "step": 2166 }, { "epoch": 0.09471567813278553, "grad_norm": 2.328125, "learning_rate": 9.782095751759448e-05, "loss": 2.0113, "step": 2167 }, { "epoch": 0.09475938633681542, "grad_norm": 2.640625, "learning_rate": 9.781895143321406e-05, "loss": 2.1018, "step": 2168 }, { "epoch": 0.09480309454084532, "grad_norm": 2.203125, "learning_rate": 9.781694444642044e-05, "loss": 1.7566, "step": 2169 }, { "epoch": 0.09484680274487521, "grad_norm": 3.15625, "learning_rate": 9.781493655725149e-05, "loss": 2.4728, "step": 2170 }, { "epoch": 0.0948905109489051, "grad_norm": 3.84375, "learning_rate": 9.781292776574509e-05, "loss": 1.4731, "step": 2171 }, { "epoch": 0.09493421915293501, "grad_norm": 2.3125, "learning_rate": 9.781091807193916e-05, "loss": 2.251, "step": 2172 }, { "epoch": 0.0949779273569649, "grad_norm": 2.4375, "learning_rate": 9.780890747587164e-05, "loss": 1.8029, "step": 2173 }, { "epoch": 0.0950216355609948, "grad_norm": 3.1875, "learning_rate": 9.780689597758041e-05, "loss": 1.749, "step": 2174 }, { "epoch": 0.09506534376502469, "grad_norm": 3.171875, "learning_rate": 9.78048835771035e-05, "loss": 2.6273, "step": 2175 }, { "epoch": 0.09510905196905459, "grad_norm": 3.1875, "learning_rate": 9.780287027447886e-05, "loss": 2.1231, "step": 2176 }, { "epoch": 0.0951527601730845, "grad_norm": 3.390625, "learning_rate": 9.780085606974448e-05, "loss": 2.3221, "step": 2177 }, { "epoch": 0.09519646837711439, "grad_norm": 2.6875, "learning_rate": 9.779884096293838e-05, "loss": 2.2185, "step": 2178 }, { "epoch": 0.09524017658114428, "grad_norm": 3.265625, "learning_rate": 9.779682495409857e-05, "loss": 2.3901, "step": 2179 }, { "epoch": 0.09528388478517417, "grad_norm": 2.40625, "learning_rate": 9.779480804326313e-05, "loss": 2.7555, "step": 2180 }, { "epoch": 0.09532759298920407, "grad_norm": 2.828125, "learning_rate": 9.779279023047008e-05, "loss": 2.0593, "step": 2181 }, { "epoch": 0.09537130119323398, "grad_norm": 3.078125, "learning_rate": 9.779077151575753e-05, "loss": 2.1978, "step": 2182 }, { "epoch": 0.09541500939726387, "grad_norm": 3.9375, "learning_rate": 9.778875189916356e-05, "loss": 2.2172, "step": 2183 }, { "epoch": 0.09545871760129376, "grad_norm": 2.84375, "learning_rate": 9.778673138072629e-05, "loss": 1.9492, "step": 2184 }, { "epoch": 0.09550242580532366, "grad_norm": 3.234375, "learning_rate": 9.778470996048383e-05, "loss": 2.308, "step": 2185 }, { "epoch": 0.09554613400935355, "grad_norm": 2.515625, "learning_rate": 9.778268763847438e-05, "loss": 2.4126, "step": 2186 }, { "epoch": 0.09558984221338346, "grad_norm": 3.390625, "learning_rate": 9.778066441473604e-05, "loss": 1.7932, "step": 2187 }, { "epoch": 0.09563355041741335, "grad_norm": 3.109375, "learning_rate": 9.777864028930705e-05, "loss": 2.7622, "step": 2188 }, { "epoch": 0.09567725862144325, "grad_norm": 2.59375, "learning_rate": 9.777661526222556e-05, "loss": 2.0042, "step": 2189 }, { "epoch": 0.09572096682547314, "grad_norm": 3.015625, "learning_rate": 9.777458933352981e-05, "loss": 2.0112, "step": 2190 }, { "epoch": 0.09576467502950303, "grad_norm": 2.578125, "learning_rate": 9.777256250325801e-05, "loss": 2.1095, "step": 2191 }, { "epoch": 0.09580838323353294, "grad_norm": 2.796875, "learning_rate": 9.777053477144844e-05, "loss": 2.1284, "step": 2192 }, { "epoch": 0.09585209143756283, "grad_norm": 3.109375, "learning_rate": 9.776850613813936e-05, "loss": 2.2209, "step": 2193 }, { "epoch": 0.09589579964159273, "grad_norm": 2.921875, "learning_rate": 9.776647660336903e-05, "loss": 2.518, "step": 2194 }, { "epoch": 0.09593950784562262, "grad_norm": 2.84375, "learning_rate": 9.776444616717578e-05, "loss": 2.4591, "step": 2195 }, { "epoch": 0.09598321604965251, "grad_norm": 2.1875, "learning_rate": 9.776241482959791e-05, "loss": 2.046, "step": 2196 }, { "epoch": 0.09602692425368242, "grad_norm": 2.40625, "learning_rate": 9.776038259067375e-05, "loss": 2.0352, "step": 2197 }, { "epoch": 0.09607063245771232, "grad_norm": 3.1875, "learning_rate": 9.775834945044167e-05, "loss": 2.0531, "step": 2198 }, { "epoch": 0.09611434066174221, "grad_norm": 3.046875, "learning_rate": 9.775631540894002e-05, "loss": 2.5601, "step": 2199 }, { "epoch": 0.0961580488657721, "grad_norm": 3.5625, "learning_rate": 9.775428046620718e-05, "loss": 2.8106, "step": 2200 }, { "epoch": 0.096201757069802, "grad_norm": 2.75, "learning_rate": 9.775224462228159e-05, "loss": 2.3545, "step": 2201 }, { "epoch": 0.0962454652738319, "grad_norm": 6.71875, "learning_rate": 9.775020787720162e-05, "loss": 1.8256, "step": 2202 }, { "epoch": 0.0962891734778618, "grad_norm": 2.71875, "learning_rate": 9.774817023100575e-05, "loss": 2.4899, "step": 2203 }, { "epoch": 0.09633288168189169, "grad_norm": 2.546875, "learning_rate": 9.77461316837324e-05, "loss": 2.3327, "step": 2204 }, { "epoch": 0.09637658988592158, "grad_norm": 3.296875, "learning_rate": 9.774409223542006e-05, "loss": 2.6428, "step": 2205 }, { "epoch": 0.09642029808995148, "grad_norm": 2.421875, "learning_rate": 9.774205188610721e-05, "loss": 1.8124, "step": 2206 }, { "epoch": 0.09646400629398139, "grad_norm": 2.546875, "learning_rate": 9.774001063583235e-05, "loss": 2.2997, "step": 2207 }, { "epoch": 0.09650771449801128, "grad_norm": 2.4375, "learning_rate": 9.773796848463402e-05, "loss": 2.1743, "step": 2208 }, { "epoch": 0.09655142270204117, "grad_norm": 2.546875, "learning_rate": 9.773592543255074e-05, "loss": 2.0742, "step": 2209 }, { "epoch": 0.09659513090607107, "grad_norm": 2.421875, "learning_rate": 9.773388147962106e-05, "loss": 1.9118, "step": 2210 }, { "epoch": 0.09663883911010096, "grad_norm": 2.34375, "learning_rate": 9.773183662588356e-05, "loss": 1.9707, "step": 2211 }, { "epoch": 0.09668254731413087, "grad_norm": 2.9375, "learning_rate": 9.772979087137686e-05, "loss": 2.2365, "step": 2212 }, { "epoch": 0.09672625551816076, "grad_norm": 2.203125, "learning_rate": 9.772774421613951e-05, "loss": 1.9471, "step": 2213 }, { "epoch": 0.09676996372219066, "grad_norm": 2.453125, "learning_rate": 9.772569666021018e-05, "loss": 2.2223, "step": 2214 }, { "epoch": 0.09681367192622055, "grad_norm": 2.609375, "learning_rate": 9.772364820362749e-05, "loss": 2.2746, "step": 2215 }, { "epoch": 0.09685738013025044, "grad_norm": 2.375, "learning_rate": 9.77215988464301e-05, "loss": 1.7533, "step": 2216 }, { "epoch": 0.09690108833428035, "grad_norm": 3.3125, "learning_rate": 9.771954858865668e-05, "loss": 2.0964, "step": 2217 }, { "epoch": 0.09694479653831024, "grad_norm": 3.65625, "learning_rate": 9.771749743034592e-05, "loss": 2.1936, "step": 2218 }, { "epoch": 0.09698850474234014, "grad_norm": 4.34375, "learning_rate": 9.771544537153653e-05, "loss": 2.7054, "step": 2219 }, { "epoch": 0.09703221294637003, "grad_norm": 2.484375, "learning_rate": 9.771339241226726e-05, "loss": 2.2081, "step": 2220 }, { "epoch": 0.09707592115039992, "grad_norm": 2.703125, "learning_rate": 9.771133855257684e-05, "loss": 2.1181, "step": 2221 }, { "epoch": 0.09711962935442983, "grad_norm": 13.4375, "learning_rate": 9.770928379250399e-05, "loss": 2.4265, "step": 2222 }, { "epoch": 0.09716333755845973, "grad_norm": 10.25, "learning_rate": 9.770722813208754e-05, "loss": 6.5293, "step": 2223 }, { "epoch": 0.09720704576248962, "grad_norm": 3.453125, "learning_rate": 9.770517157136625e-05, "loss": 2.4392, "step": 2224 }, { "epoch": 0.09725075396651951, "grad_norm": 3.0625, "learning_rate": 9.770311411037894e-05, "loss": 2.3015, "step": 2225 }, { "epoch": 0.0972944621705494, "grad_norm": 2.8125, "learning_rate": 9.770105574916443e-05, "loss": 2.1963, "step": 2226 }, { "epoch": 0.09733817037457931, "grad_norm": 3.421875, "learning_rate": 9.76989964877616e-05, "loss": 1.7115, "step": 2227 }, { "epoch": 0.09738187857860921, "grad_norm": 4.0, "learning_rate": 9.769693632620926e-05, "loss": 1.7496, "step": 2228 }, { "epoch": 0.0974255867826391, "grad_norm": 2.65625, "learning_rate": 9.769487526454631e-05, "loss": 2.2873, "step": 2229 }, { "epoch": 0.097469294986669, "grad_norm": 2.84375, "learning_rate": 9.769281330281165e-05, "loss": 2.1804, "step": 2230 }, { "epoch": 0.09751300319069889, "grad_norm": 2.484375, "learning_rate": 9.76907504410442e-05, "loss": 2.0584, "step": 2231 }, { "epoch": 0.0975567113947288, "grad_norm": 3.390625, "learning_rate": 9.768868667928288e-05, "loss": 1.7148, "step": 2232 }, { "epoch": 0.09760041959875869, "grad_norm": 2.6875, "learning_rate": 9.768662201756662e-05, "loss": 1.9899, "step": 2233 }, { "epoch": 0.09764412780278858, "grad_norm": 2.78125, "learning_rate": 9.768455645593441e-05, "loss": 2.4444, "step": 2234 }, { "epoch": 0.09768783600681848, "grad_norm": 2.703125, "learning_rate": 9.768248999442522e-05, "loss": 1.8971, "step": 2235 }, { "epoch": 0.09773154421084837, "grad_norm": 2.890625, "learning_rate": 9.768042263307804e-05, "loss": 2.4553, "step": 2236 }, { "epoch": 0.09777525241487828, "grad_norm": 2.65625, "learning_rate": 9.767835437193187e-05, "loss": 1.7158, "step": 2237 }, { "epoch": 0.09781896061890817, "grad_norm": 2.859375, "learning_rate": 9.767628521102578e-05, "loss": 2.3434, "step": 2238 }, { "epoch": 0.09786266882293806, "grad_norm": 2.953125, "learning_rate": 9.767421515039881e-05, "loss": 1.9139, "step": 2239 }, { "epoch": 0.09790637702696796, "grad_norm": 2.328125, "learning_rate": 9.767214419009e-05, "loss": 2.0438, "step": 2240 }, { "epoch": 0.09795008523099785, "grad_norm": 2.375, "learning_rate": 9.767007233013845e-05, "loss": 1.9096, "step": 2241 }, { "epoch": 0.09799379343502776, "grad_norm": 2.609375, "learning_rate": 9.766799957058324e-05, "loss": 2.0401, "step": 2242 }, { "epoch": 0.09803750163905765, "grad_norm": 3.796875, "learning_rate": 9.766592591146352e-05, "loss": 3.3079, "step": 2243 }, { "epoch": 0.09808120984308755, "grad_norm": 2.84375, "learning_rate": 9.766385135281839e-05, "loss": 2.1826, "step": 2244 }, { "epoch": 0.09812491804711744, "grad_norm": 2.40625, "learning_rate": 9.766177589468701e-05, "loss": 1.8902, "step": 2245 }, { "epoch": 0.09816862625114733, "grad_norm": 2.515625, "learning_rate": 9.765969953710857e-05, "loss": 1.9289, "step": 2246 }, { "epoch": 0.09821233445517724, "grad_norm": 2.484375, "learning_rate": 9.765762228012222e-05, "loss": 1.9364, "step": 2247 }, { "epoch": 0.09825604265920714, "grad_norm": 18.75, "learning_rate": 9.765554412376719e-05, "loss": 6.6763, "step": 2248 }, { "epoch": 0.09829975086323703, "grad_norm": 2.828125, "learning_rate": 9.765346506808266e-05, "loss": 1.8456, "step": 2249 }, { "epoch": 0.09834345906726692, "grad_norm": 2.515625, "learning_rate": 9.765138511310791e-05, "loss": 2.7566, "step": 2250 }, { "epoch": 0.09838716727129682, "grad_norm": 2.546875, "learning_rate": 9.764930425888215e-05, "loss": 1.8674, "step": 2251 }, { "epoch": 0.09843087547532672, "grad_norm": 2.8125, "learning_rate": 9.764722250544469e-05, "loss": 2.1064, "step": 2252 }, { "epoch": 0.09847458367935662, "grad_norm": 2.53125, "learning_rate": 9.764513985283478e-05, "loss": 2.2086, "step": 2253 }, { "epoch": 0.09851829188338651, "grad_norm": 2.625, "learning_rate": 9.764305630109175e-05, "loss": 2.2642, "step": 2254 }, { "epoch": 0.0985620000874164, "grad_norm": 3.078125, "learning_rate": 9.764097185025489e-05, "loss": 2.1661, "step": 2255 }, { "epoch": 0.0986057082914463, "grad_norm": 2.375, "learning_rate": 9.763888650036354e-05, "loss": 1.985, "step": 2256 }, { "epoch": 0.0986494164954762, "grad_norm": 3.15625, "learning_rate": 9.76368002514571e-05, "loss": 2.8063, "step": 2257 }, { "epoch": 0.0986931246995061, "grad_norm": 3.0625, "learning_rate": 9.763471310357488e-05, "loss": 1.8268, "step": 2258 }, { "epoch": 0.09873683290353599, "grad_norm": 3.265625, "learning_rate": 9.763262505675632e-05, "loss": 2.2688, "step": 2259 }, { "epoch": 0.09878054110756589, "grad_norm": 6.71875, "learning_rate": 9.763053611104079e-05, "loss": 2.1481, "step": 2260 }, { "epoch": 0.09882424931159578, "grad_norm": 3.3125, "learning_rate": 9.76284462664677e-05, "loss": 1.79, "step": 2261 }, { "epoch": 0.09886795751562569, "grad_norm": 3.265625, "learning_rate": 9.762635552307653e-05, "loss": 2.7382, "step": 2262 }, { "epoch": 0.09891166571965558, "grad_norm": 2.5, "learning_rate": 9.762426388090671e-05, "loss": 2.0873, "step": 2263 }, { "epoch": 0.09895537392368547, "grad_norm": 3.65625, "learning_rate": 9.762217133999771e-05, "loss": 2.393, "step": 2264 }, { "epoch": 0.09899908212771537, "grad_norm": 2.9375, "learning_rate": 9.762007790038904e-05, "loss": 1.5162, "step": 2265 }, { "epoch": 0.09904279033174526, "grad_norm": 3.875, "learning_rate": 9.761798356212019e-05, "loss": 3.1219, "step": 2266 }, { "epoch": 0.09908649853577517, "grad_norm": 2.46875, "learning_rate": 9.761588832523067e-05, "loss": 2.3465, "step": 2267 }, { "epoch": 0.09913020673980506, "grad_norm": 2.5625, "learning_rate": 9.761379218976005e-05, "loss": 1.912, "step": 2268 }, { "epoch": 0.09917391494383496, "grad_norm": 2.484375, "learning_rate": 9.761169515574786e-05, "loss": 1.8608, "step": 2269 }, { "epoch": 0.09921762314786485, "grad_norm": 2.515625, "learning_rate": 9.760959722323371e-05, "loss": 1.9141, "step": 2270 }, { "epoch": 0.09926133135189474, "grad_norm": 3.421875, "learning_rate": 9.760749839225714e-05, "loss": 1.937, "step": 2271 }, { "epoch": 0.09930503955592465, "grad_norm": 3.578125, "learning_rate": 9.760539866285781e-05, "loss": 2.1976, "step": 2272 }, { "epoch": 0.09934874775995454, "grad_norm": 2.234375, "learning_rate": 9.76032980350753e-05, "loss": 1.4496, "step": 2273 }, { "epoch": 0.09939245596398444, "grad_norm": 3.234375, "learning_rate": 9.760119650894929e-05, "loss": 2.3222, "step": 2274 }, { "epoch": 0.09943616416801433, "grad_norm": 2.8125, "learning_rate": 9.759909408451942e-05, "loss": 2.3326, "step": 2275 }, { "epoch": 0.09947987237204424, "grad_norm": 2.4375, "learning_rate": 9.759699076182536e-05, "loss": 1.8843, "step": 2276 }, { "epoch": 0.09952358057607413, "grad_norm": 6.25, "learning_rate": 9.759488654090681e-05, "loss": 2.3823, "step": 2277 }, { "epoch": 0.09956728878010403, "grad_norm": 2.671875, "learning_rate": 9.759278142180348e-05, "loss": 2.1267, "step": 2278 }, { "epoch": 0.09961099698413392, "grad_norm": 2.59375, "learning_rate": 9.759067540455511e-05, "loss": 1.9924, "step": 2279 }, { "epoch": 0.09965470518816381, "grad_norm": 6.0625, "learning_rate": 9.758856848920142e-05, "loss": 2.2601, "step": 2280 }, { "epoch": 0.09969841339219372, "grad_norm": 2.390625, "learning_rate": 9.758646067578216e-05, "loss": 2.5403, "step": 2281 }, { "epoch": 0.09974212159622362, "grad_norm": 3.765625, "learning_rate": 9.758435196433716e-05, "loss": 2.2937, "step": 2282 }, { "epoch": 0.09978582980025351, "grad_norm": 5.125, "learning_rate": 9.758224235490618e-05, "loss": 1.7025, "step": 2283 }, { "epoch": 0.0998295380042834, "grad_norm": 2.671875, "learning_rate": 9.758013184752901e-05, "loss": 2.2129, "step": 2284 }, { "epoch": 0.0998732462083133, "grad_norm": 2.5, "learning_rate": 9.757802044224553e-05, "loss": 2.5303, "step": 2285 }, { "epoch": 0.0999169544123432, "grad_norm": 2.34375, "learning_rate": 9.757590813909554e-05, "loss": 1.7859, "step": 2286 }, { "epoch": 0.0999606626163731, "grad_norm": 2.375, "learning_rate": 9.757379493811892e-05, "loss": 2.1649, "step": 2287 }, { "epoch": 0.10000437082040299, "grad_norm": 2.84375, "learning_rate": 9.757168083935556e-05, "loss": 1.764, "step": 2288 }, { "epoch": 0.10004807902443288, "grad_norm": 2.296875, "learning_rate": 9.756956584284533e-05, "loss": 1.8986, "step": 2289 }, { "epoch": 0.10009178722846278, "grad_norm": 2.515625, "learning_rate": 9.756744994862817e-05, "loss": 2.2794, "step": 2290 }, { "epoch": 0.10013549543249269, "grad_norm": 3.234375, "learning_rate": 9.7565333156744e-05, "loss": 1.987, "step": 2291 }, { "epoch": 0.10017920363652258, "grad_norm": 2.59375, "learning_rate": 9.756321546723277e-05, "loss": 2.2158, "step": 2292 }, { "epoch": 0.10022291184055247, "grad_norm": 2.828125, "learning_rate": 9.756109688013442e-05, "loss": 1.4978, "step": 2293 }, { "epoch": 0.10026662004458237, "grad_norm": 2.671875, "learning_rate": 9.755897739548896e-05, "loss": 2.0564, "step": 2294 }, { "epoch": 0.10031032824861226, "grad_norm": 2.84375, "learning_rate": 9.75568570133364e-05, "loss": 1.9511, "step": 2295 }, { "epoch": 0.10035403645264217, "grad_norm": 2.671875, "learning_rate": 9.75547357337167e-05, "loss": 2.2555, "step": 2296 }, { "epoch": 0.10039774465667206, "grad_norm": 2.375, "learning_rate": 9.755261355666994e-05, "loss": 1.7196, "step": 2297 }, { "epoch": 0.10044145286070195, "grad_norm": 3.0625, "learning_rate": 9.755049048223615e-05, "loss": 1.6631, "step": 2298 }, { "epoch": 0.10048516106473185, "grad_norm": 2.625, "learning_rate": 9.754836651045538e-05, "loss": 1.9797, "step": 2299 }, { "epoch": 0.10052886926876174, "grad_norm": 3.265625, "learning_rate": 9.754624164136774e-05, "loss": 2.3664, "step": 2300 }, { "epoch": 0.10057257747279165, "grad_norm": 2.09375, "learning_rate": 9.754411587501333e-05, "loss": 1.7274, "step": 2301 }, { "epoch": 0.10061628567682154, "grad_norm": 2.59375, "learning_rate": 9.754198921143226e-05, "loss": 1.8973, "step": 2302 }, { "epoch": 0.10065999388085144, "grad_norm": 2.578125, "learning_rate": 9.753986165066464e-05, "loss": 2.0499, "step": 2303 }, { "epoch": 0.10070370208488133, "grad_norm": 2.578125, "learning_rate": 9.753773319275065e-05, "loss": 1.7278, "step": 2304 }, { "epoch": 0.10074741028891122, "grad_norm": 2.875, "learning_rate": 9.753560383773046e-05, "loss": 2.5053, "step": 2305 }, { "epoch": 0.10079111849294113, "grad_norm": 14.0, "learning_rate": 9.753347358564423e-05, "loss": 5.9202, "step": 2306 }, { "epoch": 0.10083482669697102, "grad_norm": 2.703125, "learning_rate": 9.753134243653217e-05, "loss": 1.9736, "step": 2307 }, { "epoch": 0.10087853490100092, "grad_norm": 2.8125, "learning_rate": 9.75292103904345e-05, "loss": 2.354, "step": 2308 }, { "epoch": 0.10092224310503081, "grad_norm": 2.828125, "learning_rate": 9.752707744739145e-05, "loss": 3.0874, "step": 2309 }, { "epoch": 0.1009659513090607, "grad_norm": 2.796875, "learning_rate": 9.752494360744329e-05, "loss": 2.251, "step": 2310 }, { "epoch": 0.10100965951309061, "grad_norm": 3.078125, "learning_rate": 9.752280887063026e-05, "loss": 2.0361, "step": 2311 }, { "epoch": 0.1010533677171205, "grad_norm": 2.734375, "learning_rate": 9.752067323699267e-05, "loss": 2.2617, "step": 2312 }, { "epoch": 0.1010970759211504, "grad_norm": 3.140625, "learning_rate": 9.751853670657081e-05, "loss": 1.9537, "step": 2313 }, { "epoch": 0.1011407841251803, "grad_norm": 2.875, "learning_rate": 9.7516399279405e-05, "loss": 2.3131, "step": 2314 }, { "epoch": 0.10118449232921019, "grad_norm": 2.9375, "learning_rate": 9.751426095553557e-05, "loss": 3.22, "step": 2315 }, { "epoch": 0.1012282005332401, "grad_norm": 2.5625, "learning_rate": 9.751212173500291e-05, "loss": 1.9288, "step": 2316 }, { "epoch": 0.10127190873726999, "grad_norm": 2.625, "learning_rate": 9.750998161784734e-05, "loss": 2.2181, "step": 2317 }, { "epoch": 0.10131561694129988, "grad_norm": 3.578125, "learning_rate": 9.750784060410927e-05, "loss": 2.2256, "step": 2318 }, { "epoch": 0.10135932514532978, "grad_norm": 3.328125, "learning_rate": 9.750569869382911e-05, "loss": 2.3126, "step": 2319 }, { "epoch": 0.10140303334935967, "grad_norm": 2.46875, "learning_rate": 9.750355588704727e-05, "loss": 2.6601, "step": 2320 }, { "epoch": 0.10144674155338958, "grad_norm": 3.375, "learning_rate": 9.750141218380419e-05, "loss": 2.0413, "step": 2321 }, { "epoch": 0.10149044975741947, "grad_norm": 2.6875, "learning_rate": 9.749926758414035e-05, "loss": 2.0241, "step": 2322 }, { "epoch": 0.10153415796144936, "grad_norm": 2.5625, "learning_rate": 9.749712208809618e-05, "loss": 2.1908, "step": 2323 }, { "epoch": 0.10157786616547926, "grad_norm": 3.25, "learning_rate": 9.749497569571217e-05, "loss": 1.5666, "step": 2324 }, { "epoch": 0.10162157436950915, "grad_norm": 2.359375, "learning_rate": 9.749282840702887e-05, "loss": 2.0106, "step": 2325 }, { "epoch": 0.10166528257353906, "grad_norm": 5.21875, "learning_rate": 9.749068022208676e-05, "loss": 2.8774, "step": 2326 }, { "epoch": 0.10170899077756895, "grad_norm": 2.59375, "learning_rate": 9.748853114092639e-05, "loss": 1.7896, "step": 2327 }, { "epoch": 0.10175269898159885, "grad_norm": 2.84375, "learning_rate": 9.748638116358834e-05, "loss": 2.3381, "step": 2328 }, { "epoch": 0.10179640718562874, "grad_norm": 2.53125, "learning_rate": 9.748423029011317e-05, "loss": 1.9284, "step": 2329 }, { "epoch": 0.10184011538965863, "grad_norm": 2.8125, "learning_rate": 9.748207852054144e-05, "loss": 1.5755, "step": 2330 }, { "epoch": 0.10188382359368854, "grad_norm": 2.703125, "learning_rate": 9.747992585491379e-05, "loss": 2.5287, "step": 2331 }, { "epoch": 0.10192753179771843, "grad_norm": 2.640625, "learning_rate": 9.747777229327084e-05, "loss": 2.5582, "step": 2332 }, { "epoch": 0.10197124000174833, "grad_norm": 2.296875, "learning_rate": 9.747561783565323e-05, "loss": 1.9647, "step": 2333 }, { "epoch": 0.10201494820577822, "grad_norm": 3.046875, "learning_rate": 9.747346248210161e-05, "loss": 2.9669, "step": 2334 }, { "epoch": 0.10205865640980812, "grad_norm": 3.265625, "learning_rate": 9.747130623265665e-05, "loss": 2.9084, "step": 2335 }, { "epoch": 0.10210236461383802, "grad_norm": 3.375, "learning_rate": 9.746914908735906e-05, "loss": 1.8825, "step": 2336 }, { "epoch": 0.10214607281786792, "grad_norm": 2.671875, "learning_rate": 9.746699104624953e-05, "loss": 2.0626, "step": 2337 }, { "epoch": 0.10218978102189781, "grad_norm": 3.359375, "learning_rate": 9.746483210936881e-05, "loss": 2.3356, "step": 2338 }, { "epoch": 0.1022334892259277, "grad_norm": 2.765625, "learning_rate": 9.74626722767576e-05, "loss": 2.2161, "step": 2339 }, { "epoch": 0.1022771974299576, "grad_norm": 2.671875, "learning_rate": 9.74605115484567e-05, "loss": 2.2453, "step": 2340 }, { "epoch": 0.1023209056339875, "grad_norm": 2.5, "learning_rate": 9.745834992450689e-05, "loss": 1.6678, "step": 2341 }, { "epoch": 0.1023646138380174, "grad_norm": 2.453125, "learning_rate": 9.745618740494892e-05, "loss": 1.775, "step": 2342 }, { "epoch": 0.10240832204204729, "grad_norm": 2.390625, "learning_rate": 9.745402398982363e-05, "loss": 1.9103, "step": 2343 }, { "epoch": 0.10245203024607719, "grad_norm": 2.34375, "learning_rate": 9.745185967917184e-05, "loss": 1.9037, "step": 2344 }, { "epoch": 0.10249573845010708, "grad_norm": 2.34375, "learning_rate": 9.74496944730344e-05, "loss": 2.4936, "step": 2345 }, { "epoch": 0.10253944665413699, "grad_norm": 2.6875, "learning_rate": 9.744752837145217e-05, "loss": 1.9646, "step": 2346 }, { "epoch": 0.10258315485816688, "grad_norm": 2.578125, "learning_rate": 9.744536137446601e-05, "loss": 1.7475, "step": 2347 }, { "epoch": 0.10262686306219677, "grad_norm": 2.6875, "learning_rate": 9.744319348211684e-05, "loss": 2.0058, "step": 2348 }, { "epoch": 0.10267057126622667, "grad_norm": 2.515625, "learning_rate": 9.744102469444555e-05, "loss": 2.3312, "step": 2349 }, { "epoch": 0.10271427947025656, "grad_norm": 2.8125, "learning_rate": 9.743885501149308e-05, "loss": 2.1554, "step": 2350 }, { "epoch": 0.10275798767428647, "grad_norm": 2.890625, "learning_rate": 9.743668443330037e-05, "loss": 2.3322, "step": 2351 }, { "epoch": 0.10280169587831636, "grad_norm": 2.375, "learning_rate": 9.743451295990837e-05, "loss": 1.9326, "step": 2352 }, { "epoch": 0.10284540408234626, "grad_norm": 2.265625, "learning_rate": 9.743234059135811e-05, "loss": 2.1341, "step": 2353 }, { "epoch": 0.10288911228637615, "grad_norm": 2.421875, "learning_rate": 9.743016732769053e-05, "loss": 2.1587, "step": 2354 }, { "epoch": 0.10293282049040604, "grad_norm": 2.9375, "learning_rate": 9.742799316894663e-05, "loss": 2.2016, "step": 2355 }, { "epoch": 0.10297652869443595, "grad_norm": 2.59375, "learning_rate": 9.742581811516751e-05, "loss": 1.9144, "step": 2356 }, { "epoch": 0.10302023689846584, "grad_norm": 2.984375, "learning_rate": 9.742364216639416e-05, "loss": 2.4872, "step": 2357 }, { "epoch": 0.10306394510249574, "grad_norm": 2.28125, "learning_rate": 9.742146532266767e-05, "loss": 1.7991, "step": 2358 }, { "epoch": 0.10310765330652563, "grad_norm": 2.609375, "learning_rate": 9.741928758402912e-05, "loss": 2.3372, "step": 2359 }, { "epoch": 0.10315136151055553, "grad_norm": 2.578125, "learning_rate": 9.741710895051958e-05, "loss": 1.8571, "step": 2360 }, { "epoch": 0.10319506971458543, "grad_norm": 2.953125, "learning_rate": 9.741492942218018e-05, "loss": 1.9904, "step": 2361 }, { "epoch": 0.10323877791861533, "grad_norm": 2.765625, "learning_rate": 9.741274899905207e-05, "loss": 2.087, "step": 2362 }, { "epoch": 0.10328248612264522, "grad_norm": 2.546875, "learning_rate": 9.741056768117636e-05, "loss": 2.236, "step": 2363 }, { "epoch": 0.10332619432667511, "grad_norm": 2.90625, "learning_rate": 9.740838546859426e-05, "loss": 1.9287, "step": 2364 }, { "epoch": 0.10336990253070501, "grad_norm": 2.453125, "learning_rate": 9.74062023613469e-05, "loss": 2.3908, "step": 2365 }, { "epoch": 0.10341361073473491, "grad_norm": 2.46875, "learning_rate": 9.740401835947551e-05, "loss": 1.9419, "step": 2366 }, { "epoch": 0.10345731893876481, "grad_norm": 2.578125, "learning_rate": 9.740183346302131e-05, "loss": 2.2898, "step": 2367 }, { "epoch": 0.1035010271427947, "grad_norm": 2.375, "learning_rate": 9.739964767202552e-05, "loss": 2.1998, "step": 2368 }, { "epoch": 0.1035447353468246, "grad_norm": 2.390625, "learning_rate": 9.739746098652939e-05, "loss": 2.2489, "step": 2369 }, { "epoch": 0.10358844355085449, "grad_norm": 3.015625, "learning_rate": 9.73952734065742e-05, "loss": 1.9166, "step": 2370 }, { "epoch": 0.1036321517548844, "grad_norm": 2.1875, "learning_rate": 9.73930849322012e-05, "loss": 1.8404, "step": 2371 }, { "epoch": 0.10367585995891429, "grad_norm": 2.4375, "learning_rate": 9.739089556345171e-05, "loss": 2.4283, "step": 2372 }, { "epoch": 0.10371956816294418, "grad_norm": 2.671875, "learning_rate": 9.738870530036706e-05, "loss": 1.9826, "step": 2373 }, { "epoch": 0.10376327636697408, "grad_norm": 2.765625, "learning_rate": 9.738651414298857e-05, "loss": 2.3027, "step": 2374 }, { "epoch": 0.10380698457100397, "grad_norm": 2.625, "learning_rate": 9.738432209135757e-05, "loss": 1.8125, "step": 2375 }, { "epoch": 0.10385069277503388, "grad_norm": 2.234375, "learning_rate": 9.738212914551547e-05, "loss": 1.6889, "step": 2376 }, { "epoch": 0.10389440097906377, "grad_norm": 2.84375, "learning_rate": 9.737993530550362e-05, "loss": 2.5132, "step": 2377 }, { "epoch": 0.10393810918309367, "grad_norm": 4.3125, "learning_rate": 9.737774057136344e-05, "loss": 2.1089, "step": 2378 }, { "epoch": 0.10398181738712356, "grad_norm": 5.0, "learning_rate": 9.737554494313635e-05, "loss": 2.1395, "step": 2379 }, { "epoch": 0.10402552559115345, "grad_norm": 2.78125, "learning_rate": 9.737334842086374e-05, "loss": 2.4614, "step": 2380 }, { "epoch": 0.10406923379518336, "grad_norm": 7.59375, "learning_rate": 9.737115100458713e-05, "loss": 1.8017, "step": 2381 }, { "epoch": 0.10411294199921325, "grad_norm": 2.65625, "learning_rate": 9.736895269434794e-05, "loss": 2.0097, "step": 2382 }, { "epoch": 0.10415665020324315, "grad_norm": 2.546875, "learning_rate": 9.736675349018767e-05, "loss": 1.8963, "step": 2383 }, { "epoch": 0.10420035840727304, "grad_norm": 2.9375, "learning_rate": 9.736455339214783e-05, "loss": 2.386, "step": 2384 }, { "epoch": 0.10424406661130294, "grad_norm": 2.203125, "learning_rate": 9.736235240026993e-05, "loss": 2.0695, "step": 2385 }, { "epoch": 0.10428777481533284, "grad_norm": 3.40625, "learning_rate": 9.73601505145955e-05, "loss": 2.3432, "step": 2386 }, { "epoch": 0.10433148301936274, "grad_norm": 3.25, "learning_rate": 9.735794773516611e-05, "loss": 2.3339, "step": 2387 }, { "epoch": 0.10437519122339263, "grad_norm": 3.421875, "learning_rate": 9.735574406202332e-05, "loss": 1.8667, "step": 2388 }, { "epoch": 0.10441889942742252, "grad_norm": 2.78125, "learning_rate": 9.735353949520871e-05, "loss": 2.2035, "step": 2389 }, { "epoch": 0.10446260763145242, "grad_norm": 4.1875, "learning_rate": 9.73513340347639e-05, "loss": 1.8563, "step": 2390 }, { "epoch": 0.10450631583548232, "grad_norm": 2.109375, "learning_rate": 9.73491276807305e-05, "loss": 1.6536, "step": 2391 }, { "epoch": 0.10455002403951222, "grad_norm": 2.796875, "learning_rate": 9.734692043315012e-05, "loss": 2.2219, "step": 2392 }, { "epoch": 0.10459373224354211, "grad_norm": 2.859375, "learning_rate": 9.734471229206448e-05, "loss": 2.2594, "step": 2393 }, { "epoch": 0.104637440447572, "grad_norm": 2.640625, "learning_rate": 9.73425032575152e-05, "loss": 2.0679, "step": 2394 }, { "epoch": 0.1046811486516019, "grad_norm": 2.265625, "learning_rate": 9.734029332954395e-05, "loss": 2.0133, "step": 2395 }, { "epoch": 0.1047248568556318, "grad_norm": 2.46875, "learning_rate": 9.73380825081925e-05, "loss": 1.9773, "step": 2396 }, { "epoch": 0.1047685650596617, "grad_norm": 3.109375, "learning_rate": 9.733587079350252e-05, "loss": 2.7695, "step": 2397 }, { "epoch": 0.1048122732636916, "grad_norm": 2.328125, "learning_rate": 9.733365818551576e-05, "loss": 1.9399, "step": 2398 }, { "epoch": 0.10485598146772149, "grad_norm": 2.625, "learning_rate": 9.7331444684274e-05, "loss": 2.1683, "step": 2399 }, { "epoch": 0.10489968967175138, "grad_norm": 3.0, "learning_rate": 9.732923028981897e-05, "loss": 2.5887, "step": 2400 }, { "epoch": 0.10494339787578129, "grad_norm": 2.75, "learning_rate": 9.732701500219251e-05, "loss": 1.808, "step": 2401 }, { "epoch": 0.10498710607981118, "grad_norm": 4.8125, "learning_rate": 9.732479882143636e-05, "loss": 3.8407, "step": 2402 }, { "epoch": 0.10503081428384108, "grad_norm": 7.09375, "learning_rate": 9.732258174759239e-05, "loss": 3.2358, "step": 2403 }, { "epoch": 0.10507452248787097, "grad_norm": 2.921875, "learning_rate": 9.732036378070243e-05, "loss": 1.934, "step": 2404 }, { "epoch": 0.10511823069190086, "grad_norm": 2.453125, "learning_rate": 9.731814492080832e-05, "loss": 2.7517, "step": 2405 }, { "epoch": 0.10516193889593077, "grad_norm": 2.71875, "learning_rate": 9.731592516795197e-05, "loss": 2.9847, "step": 2406 }, { "epoch": 0.10520564709996066, "grad_norm": 2.203125, "learning_rate": 9.731370452217524e-05, "loss": 1.8667, "step": 2407 }, { "epoch": 0.10524935530399056, "grad_norm": 2.359375, "learning_rate": 9.731148298352004e-05, "loss": 2.0742, "step": 2408 }, { "epoch": 0.10529306350802045, "grad_norm": 4.0625, "learning_rate": 9.73092605520283e-05, "loss": 1.9423, "step": 2409 }, { "epoch": 0.10533677171205034, "grad_norm": 4.28125, "learning_rate": 9.730703722774196e-05, "loss": 2.7559, "step": 2410 }, { "epoch": 0.10538047991608025, "grad_norm": 2.640625, "learning_rate": 9.730481301070298e-05, "loss": 2.0713, "step": 2411 }, { "epoch": 0.10542418812011015, "grad_norm": 2.578125, "learning_rate": 9.730258790095331e-05, "loss": 2.1342, "step": 2412 }, { "epoch": 0.10546789632414004, "grad_norm": 2.875, "learning_rate": 9.730036189853498e-05, "loss": 2.3037, "step": 2413 }, { "epoch": 0.10551160452816993, "grad_norm": 2.546875, "learning_rate": 9.729813500348997e-05, "loss": 2.0718, "step": 2414 }, { "epoch": 0.10555531273219984, "grad_norm": 2.921875, "learning_rate": 9.72959072158603e-05, "loss": 3.004, "step": 2415 }, { "epoch": 0.10559902093622973, "grad_norm": 2.3125, "learning_rate": 9.729367853568805e-05, "loss": 1.9165, "step": 2416 }, { "epoch": 0.10564272914025963, "grad_norm": 3.046875, "learning_rate": 9.729144896301524e-05, "loss": 1.8318, "step": 2417 }, { "epoch": 0.10568643734428952, "grad_norm": 3.015625, "learning_rate": 9.728921849788397e-05, "loss": 2.5869, "step": 2418 }, { "epoch": 0.10573014554831942, "grad_norm": 2.828125, "learning_rate": 9.72869871403363e-05, "loss": 2.5118, "step": 2419 }, { "epoch": 0.10577385375234932, "grad_norm": 2.890625, "learning_rate": 9.728475489041438e-05, "loss": 2.0763, "step": 2420 }, { "epoch": 0.10581756195637922, "grad_norm": 2.453125, "learning_rate": 9.728252174816031e-05, "loss": 2.224, "step": 2421 }, { "epoch": 0.10586127016040911, "grad_norm": 2.46875, "learning_rate": 9.728028771361624e-05, "loss": 2.1799, "step": 2422 }, { "epoch": 0.105904978364439, "grad_norm": 2.515625, "learning_rate": 9.727805278682431e-05, "loss": 1.9667, "step": 2423 }, { "epoch": 0.1059486865684689, "grad_norm": 2.421875, "learning_rate": 9.727581696782673e-05, "loss": 2.0938, "step": 2424 }, { "epoch": 0.1059923947724988, "grad_norm": 2.5625, "learning_rate": 9.727358025666568e-05, "loss": 2.1156, "step": 2425 }, { "epoch": 0.1060361029765287, "grad_norm": 2.53125, "learning_rate": 9.727134265338335e-05, "loss": 1.8436, "step": 2426 }, { "epoch": 0.10607981118055859, "grad_norm": 2.40625, "learning_rate": 9.7269104158022e-05, "loss": 2.3787, "step": 2427 }, { "epoch": 0.10612351938458849, "grad_norm": 2.359375, "learning_rate": 9.726686477062386e-05, "loss": 1.9247, "step": 2428 }, { "epoch": 0.10616722758861838, "grad_norm": 3.1875, "learning_rate": 9.726462449123117e-05, "loss": 2.2084, "step": 2429 }, { "epoch": 0.10621093579264829, "grad_norm": 2.375, "learning_rate": 9.726238331988624e-05, "loss": 2.3032, "step": 2430 }, { "epoch": 0.10625464399667818, "grad_norm": 2.3125, "learning_rate": 9.726014125663135e-05, "loss": 1.9042, "step": 2431 }, { "epoch": 0.10629835220070807, "grad_norm": 2.6875, "learning_rate": 9.725789830150882e-05, "loss": 2.1058, "step": 2432 }, { "epoch": 0.10634206040473797, "grad_norm": 2.4375, "learning_rate": 9.725565445456095e-05, "loss": 1.7417, "step": 2433 }, { "epoch": 0.10638576860876786, "grad_norm": 7.0, "learning_rate": 9.72534097158301e-05, "loss": 2.8365, "step": 2434 }, { "epoch": 0.10642947681279777, "grad_norm": 2.484375, "learning_rate": 9.725116408535864e-05, "loss": 2.0676, "step": 2435 }, { "epoch": 0.10647318501682766, "grad_norm": 2.640625, "learning_rate": 9.724891756318895e-05, "loss": 2.0782, "step": 2436 }, { "epoch": 0.10651689322085756, "grad_norm": 2.21875, "learning_rate": 9.724667014936342e-05, "loss": 1.8681, "step": 2437 }, { "epoch": 0.10656060142488745, "grad_norm": 2.484375, "learning_rate": 9.724442184392445e-05, "loss": 2.1118, "step": 2438 }, { "epoch": 0.10660430962891734, "grad_norm": 2.625, "learning_rate": 9.724217264691448e-05, "loss": 2.16, "step": 2439 }, { "epoch": 0.10664801783294725, "grad_norm": 3.0625, "learning_rate": 9.723992255837596e-05, "loss": 1.5494, "step": 2440 }, { "epoch": 0.10669172603697714, "grad_norm": 2.859375, "learning_rate": 9.723767157835135e-05, "loss": 2.2222, "step": 2441 }, { "epoch": 0.10673543424100704, "grad_norm": 3.671875, "learning_rate": 9.723541970688311e-05, "loss": 2.3099, "step": 2442 }, { "epoch": 0.10677914244503693, "grad_norm": 2.953125, "learning_rate": 9.723316694401377e-05, "loss": 2.4099, "step": 2443 }, { "epoch": 0.10682285064906683, "grad_norm": 4.59375, "learning_rate": 9.723091328978581e-05, "loss": 2.0523, "step": 2444 }, { "epoch": 0.10686655885309673, "grad_norm": 2.75, "learning_rate": 9.722865874424178e-05, "loss": 2.5832, "step": 2445 }, { "epoch": 0.10691026705712663, "grad_norm": 2.5625, "learning_rate": 9.722640330742423e-05, "loss": 2.1906, "step": 2446 }, { "epoch": 0.10695397526115652, "grad_norm": 2.4375, "learning_rate": 9.722414697937572e-05, "loss": 2.4322, "step": 2447 }, { "epoch": 0.10699768346518641, "grad_norm": 2.53125, "learning_rate": 9.72218897601388e-05, "loss": 1.8567, "step": 2448 }, { "epoch": 0.10704139166921631, "grad_norm": 2.515625, "learning_rate": 9.721963164975612e-05, "loss": 1.9813, "step": 2449 }, { "epoch": 0.10708509987324621, "grad_norm": 2.625, "learning_rate": 9.721737264827025e-05, "loss": 2.5686, "step": 2450 }, { "epoch": 0.10712880807727611, "grad_norm": 2.4375, "learning_rate": 9.721511275572384e-05, "loss": 1.8549, "step": 2451 }, { "epoch": 0.107172516281306, "grad_norm": 2.28125, "learning_rate": 9.721285197215952e-05, "loss": 2.0145, "step": 2452 }, { "epoch": 0.1072162244853359, "grad_norm": 2.09375, "learning_rate": 9.721059029761999e-05, "loss": 1.6011, "step": 2453 }, { "epoch": 0.10725993268936579, "grad_norm": 2.15625, "learning_rate": 9.720832773214789e-05, "loss": 1.7277, "step": 2454 }, { "epoch": 0.1073036408933957, "grad_norm": 3.203125, "learning_rate": 9.720606427578595e-05, "loss": 2.0673, "step": 2455 }, { "epoch": 0.10734734909742559, "grad_norm": 2.75, "learning_rate": 9.720379992857687e-05, "loss": 2.2455, "step": 2456 }, { "epoch": 0.10739105730145548, "grad_norm": 2.375, "learning_rate": 9.720153469056338e-05, "loss": 2.2157, "step": 2457 }, { "epoch": 0.10743476550548538, "grad_norm": 2.734375, "learning_rate": 9.719926856178823e-05, "loss": 1.8761, "step": 2458 }, { "epoch": 0.10747847370951527, "grad_norm": 2.484375, "learning_rate": 9.71970015422942e-05, "loss": 2.5707, "step": 2459 }, { "epoch": 0.10752218191354518, "grad_norm": 2.65625, "learning_rate": 9.719473363212405e-05, "loss": 2.0183, "step": 2460 }, { "epoch": 0.10756589011757507, "grad_norm": 2.359375, "learning_rate": 9.719246483132058e-05, "loss": 2.1769, "step": 2461 }, { "epoch": 0.10760959832160497, "grad_norm": 2.25, "learning_rate": 9.719019513992662e-05, "loss": 1.8736, "step": 2462 }, { "epoch": 0.10765330652563486, "grad_norm": 2.28125, "learning_rate": 9.7187924557985e-05, "loss": 2.0864, "step": 2463 }, { "epoch": 0.10769701472966475, "grad_norm": 2.359375, "learning_rate": 9.718565308553857e-05, "loss": 1.8813, "step": 2464 }, { "epoch": 0.10774072293369466, "grad_norm": 2.3125, "learning_rate": 9.718338072263017e-05, "loss": 1.9003, "step": 2465 }, { "epoch": 0.10778443113772455, "grad_norm": 3.75, "learning_rate": 9.718110746930272e-05, "loss": 1.6998, "step": 2466 }, { "epoch": 0.10782813934175445, "grad_norm": 2.40625, "learning_rate": 9.71788333255991e-05, "loss": 1.8179, "step": 2467 }, { "epoch": 0.10787184754578434, "grad_norm": 2.8125, "learning_rate": 9.717655829156222e-05, "loss": 1.872, "step": 2468 }, { "epoch": 0.10791555574981423, "grad_norm": 6.28125, "learning_rate": 9.717428236723505e-05, "loss": 2.1897, "step": 2469 }, { "epoch": 0.10795926395384414, "grad_norm": 2.734375, "learning_rate": 9.717200555266049e-05, "loss": 1.6204, "step": 2470 }, { "epoch": 0.10800297215787404, "grad_norm": 2.671875, "learning_rate": 9.716972784788152e-05, "loss": 2.1799, "step": 2471 }, { "epoch": 0.10804668036190393, "grad_norm": 2.71875, "learning_rate": 9.716744925294116e-05, "loss": 2.0839, "step": 2472 }, { "epoch": 0.10809038856593382, "grad_norm": 2.78125, "learning_rate": 9.716516976788236e-05, "loss": 2.656, "step": 2473 }, { "epoch": 0.10813409676996372, "grad_norm": 2.625, "learning_rate": 9.716288939274819e-05, "loss": 2.4926, "step": 2474 }, { "epoch": 0.10817780497399362, "grad_norm": 2.328125, "learning_rate": 9.716060812758163e-05, "loss": 2.2936, "step": 2475 }, { "epoch": 0.10822151317802352, "grad_norm": 2.53125, "learning_rate": 9.715832597242576e-05, "loss": 1.5035, "step": 2476 }, { "epoch": 0.10826522138205341, "grad_norm": 2.921875, "learning_rate": 9.715604292732366e-05, "loss": 2.7127, "step": 2477 }, { "epoch": 0.1083089295860833, "grad_norm": 2.90625, "learning_rate": 9.715375899231837e-05, "loss": 2.8202, "step": 2478 }, { "epoch": 0.1083526377901132, "grad_norm": 2.765625, "learning_rate": 9.715147416745303e-05, "loss": 1.6242, "step": 2479 }, { "epoch": 0.1083963459941431, "grad_norm": 2.359375, "learning_rate": 9.714918845277075e-05, "loss": 2.0765, "step": 2480 }, { "epoch": 0.108440054198173, "grad_norm": 2.671875, "learning_rate": 9.714690184831465e-05, "loss": 2.0368, "step": 2481 }, { "epoch": 0.1084837624022029, "grad_norm": 3.265625, "learning_rate": 9.714461435412792e-05, "loss": 2.2531, "step": 2482 }, { "epoch": 0.10852747060623279, "grad_norm": 2.5625, "learning_rate": 9.714232597025368e-05, "loss": 1.8642, "step": 2483 }, { "epoch": 0.10857117881026268, "grad_norm": 2.46875, "learning_rate": 9.714003669673515e-05, "loss": 2.1971, "step": 2484 }, { "epoch": 0.10861488701429259, "grad_norm": 2.171875, "learning_rate": 9.713774653361549e-05, "loss": 1.6596, "step": 2485 }, { "epoch": 0.10865859521832248, "grad_norm": 2.703125, "learning_rate": 9.713545548093797e-05, "loss": 2.9122, "step": 2486 }, { "epoch": 0.10870230342235238, "grad_norm": 2.4375, "learning_rate": 9.713316353874581e-05, "loss": 1.8774, "step": 2487 }, { "epoch": 0.10874601162638227, "grad_norm": 2.3125, "learning_rate": 9.713087070708224e-05, "loss": 2.1811, "step": 2488 }, { "epoch": 0.10878971983041216, "grad_norm": 3.265625, "learning_rate": 9.712857698599054e-05, "loss": 1.9505, "step": 2489 }, { "epoch": 0.10883342803444207, "grad_norm": 2.3125, "learning_rate": 9.712628237551402e-05, "loss": 1.8161, "step": 2490 }, { "epoch": 0.10887713623847196, "grad_norm": 2.359375, "learning_rate": 9.712398687569595e-05, "loss": 1.9934, "step": 2491 }, { "epoch": 0.10892084444250186, "grad_norm": 2.609375, "learning_rate": 9.712169048657966e-05, "loss": 2.0326, "step": 2492 }, { "epoch": 0.10896455264653175, "grad_norm": 3.140625, "learning_rate": 9.711939320820848e-05, "loss": 1.9797, "step": 2493 }, { "epoch": 0.10900826085056164, "grad_norm": 2.171875, "learning_rate": 9.71170950406258e-05, "loss": 1.9381, "step": 2494 }, { "epoch": 0.10905196905459155, "grad_norm": 3.328125, "learning_rate": 9.711479598387494e-05, "loss": 2.5, "step": 2495 }, { "epoch": 0.10909567725862145, "grad_norm": 2.34375, "learning_rate": 9.71124960379993e-05, "loss": 1.9666, "step": 2496 }, { "epoch": 0.10913938546265134, "grad_norm": 2.859375, "learning_rate": 9.711019520304231e-05, "loss": 2.0122, "step": 2497 }, { "epoch": 0.10918309366668123, "grad_norm": 2.421875, "learning_rate": 9.710789347904736e-05, "loss": 2.2053, "step": 2498 }, { "epoch": 0.10922680187071113, "grad_norm": 2.4375, "learning_rate": 9.71055908660579e-05, "loss": 2.1717, "step": 2499 }, { "epoch": 0.10927051007474103, "grad_norm": 3.0625, "learning_rate": 9.710328736411737e-05, "loss": 2.2256, "step": 2500 }, { "epoch": 0.10931421827877093, "grad_norm": 2.1875, "learning_rate": 9.710098297326928e-05, "loss": 1.8244, "step": 2501 }, { "epoch": 0.10935792648280082, "grad_norm": 2.953125, "learning_rate": 9.709867769355707e-05, "loss": 2.2914, "step": 2502 }, { "epoch": 0.10940163468683071, "grad_norm": 2.484375, "learning_rate": 9.709637152502427e-05, "loss": 2.2509, "step": 2503 }, { "epoch": 0.10944534289086061, "grad_norm": 2.546875, "learning_rate": 9.709406446771439e-05, "loss": 2.1208, "step": 2504 }, { "epoch": 0.10948905109489052, "grad_norm": 2.734375, "learning_rate": 9.709175652167096e-05, "loss": 1.6196, "step": 2505 }, { "epoch": 0.10953275929892041, "grad_norm": 2.859375, "learning_rate": 9.708944768693755e-05, "loss": 2.3912, "step": 2506 }, { "epoch": 0.1095764675029503, "grad_norm": 3.5625, "learning_rate": 9.708713796355773e-05, "loss": 2.3145, "step": 2507 }, { "epoch": 0.1096201757069802, "grad_norm": 3.109375, "learning_rate": 9.708482735157509e-05, "loss": 2.4361, "step": 2508 }, { "epoch": 0.10966388391101009, "grad_norm": 2.28125, "learning_rate": 9.708251585103322e-05, "loss": 2.2467, "step": 2509 }, { "epoch": 0.10970759211504, "grad_norm": 4.375, "learning_rate": 9.708020346197577e-05, "loss": 2.3246, "step": 2510 }, { "epoch": 0.10975130031906989, "grad_norm": 3.03125, "learning_rate": 9.707789018444636e-05, "loss": 2.2745, "step": 2511 }, { "epoch": 0.10979500852309979, "grad_norm": 3.09375, "learning_rate": 9.707557601848862e-05, "loss": 2.3604, "step": 2512 }, { "epoch": 0.10983871672712968, "grad_norm": 2.46875, "learning_rate": 9.707326096414625e-05, "loss": 1.7883, "step": 2513 }, { "epoch": 0.10988242493115957, "grad_norm": 2.328125, "learning_rate": 9.707094502146294e-05, "loss": 1.9555, "step": 2514 }, { "epoch": 0.10992613313518948, "grad_norm": 3.078125, "learning_rate": 9.706862819048239e-05, "loss": 2.4865, "step": 2515 }, { "epoch": 0.10996984133921937, "grad_norm": 2.40625, "learning_rate": 9.706631047124833e-05, "loss": 2.1589, "step": 2516 }, { "epoch": 0.11001354954324927, "grad_norm": 2.53125, "learning_rate": 9.706399186380446e-05, "loss": 1.9848, "step": 2517 }, { "epoch": 0.11005725774727916, "grad_norm": 2.9375, "learning_rate": 9.706167236819459e-05, "loss": 2.1828, "step": 2518 }, { "epoch": 0.11010096595130905, "grad_norm": 2.65625, "learning_rate": 9.705935198446246e-05, "loss": 1.9501, "step": 2519 }, { "epoch": 0.11014467415533896, "grad_norm": 2.671875, "learning_rate": 9.705703071265187e-05, "loss": 2.0098, "step": 2520 }, { "epoch": 0.11018838235936886, "grad_norm": 2.640625, "learning_rate": 9.70547085528066e-05, "loss": 1.8923, "step": 2521 }, { "epoch": 0.11023209056339875, "grad_norm": 2.84375, "learning_rate": 9.705238550497053e-05, "loss": 2.8409, "step": 2522 }, { "epoch": 0.11027579876742864, "grad_norm": 2.90625, "learning_rate": 9.705006156918744e-05, "loss": 1.8483, "step": 2523 }, { "epoch": 0.11031950697145854, "grad_norm": 2.625, "learning_rate": 9.704773674550123e-05, "loss": 2.0112, "step": 2524 }, { "epoch": 0.11036321517548844, "grad_norm": 2.8125, "learning_rate": 9.704541103395574e-05, "loss": 2.122, "step": 2525 }, { "epoch": 0.11040692337951834, "grad_norm": 3.1875, "learning_rate": 9.704308443459487e-05, "loss": 2.662, "step": 2526 }, { "epoch": 0.11045063158354823, "grad_norm": 2.46875, "learning_rate": 9.704075694746253e-05, "loss": 2.0194, "step": 2527 }, { "epoch": 0.11049433978757812, "grad_norm": 2.40625, "learning_rate": 9.703842857260263e-05, "loss": 1.7063, "step": 2528 }, { "epoch": 0.11053804799160802, "grad_norm": 2.421875, "learning_rate": 9.703609931005914e-05, "loss": 1.9463, "step": 2529 }, { "epoch": 0.11058175619563793, "grad_norm": 2.671875, "learning_rate": 9.703376915987601e-05, "loss": 1.9706, "step": 2530 }, { "epoch": 0.11062546439966782, "grad_norm": 3.265625, "learning_rate": 9.703143812209718e-05, "loss": 2.4519, "step": 2531 }, { "epoch": 0.11066917260369771, "grad_norm": 3.15625, "learning_rate": 9.702910619676667e-05, "loss": 2.0152, "step": 2532 }, { "epoch": 0.1107128808077276, "grad_norm": 3.03125, "learning_rate": 9.702677338392847e-05, "loss": 2.2778, "step": 2533 }, { "epoch": 0.1107565890117575, "grad_norm": 2.21875, "learning_rate": 9.702443968362662e-05, "loss": 1.9025, "step": 2534 }, { "epoch": 0.11080029721578741, "grad_norm": 2.84375, "learning_rate": 9.702210509590514e-05, "loss": 2.1866, "step": 2535 }, { "epoch": 0.1108440054198173, "grad_norm": 2.421875, "learning_rate": 9.701976962080812e-05, "loss": 1.7364, "step": 2536 }, { "epoch": 0.1108877136238472, "grad_norm": 3.140625, "learning_rate": 9.70174332583796e-05, "loss": 2.5829, "step": 2537 }, { "epoch": 0.11093142182787709, "grad_norm": 5.65625, "learning_rate": 9.701509600866368e-05, "loss": 2.1956, "step": 2538 }, { "epoch": 0.11097513003190698, "grad_norm": 4.53125, "learning_rate": 9.701275787170448e-05, "loss": 2.4632, "step": 2539 }, { "epoch": 0.11101883823593689, "grad_norm": 3.109375, "learning_rate": 9.701041884754612e-05, "loss": 2.389, "step": 2540 }, { "epoch": 0.11106254643996678, "grad_norm": 2.53125, "learning_rate": 9.700807893623272e-05, "loss": 2.3511, "step": 2541 }, { "epoch": 0.11110625464399668, "grad_norm": 2.421875, "learning_rate": 9.700573813780847e-05, "loss": 2.1516, "step": 2542 }, { "epoch": 0.11114996284802657, "grad_norm": 2.671875, "learning_rate": 9.700339645231751e-05, "loss": 2.1829, "step": 2543 }, { "epoch": 0.11119367105205646, "grad_norm": 2.828125, "learning_rate": 9.700105387980406e-05, "loss": 2.7425, "step": 2544 }, { "epoch": 0.11123737925608637, "grad_norm": 3.0, "learning_rate": 9.699871042031232e-05, "loss": 2.3237, "step": 2545 }, { "epoch": 0.11128108746011627, "grad_norm": 2.765625, "learning_rate": 9.69963660738865e-05, "loss": 1.8203, "step": 2546 }, { "epoch": 0.11132479566414616, "grad_norm": 2.65625, "learning_rate": 9.699402084057086e-05, "loss": 1.8776, "step": 2547 }, { "epoch": 0.11136850386817605, "grad_norm": 3.1875, "learning_rate": 9.699167472040964e-05, "loss": 2.8036, "step": 2548 }, { "epoch": 0.11141221207220595, "grad_norm": 3.03125, "learning_rate": 9.698932771344715e-05, "loss": 2.2721, "step": 2549 }, { "epoch": 0.11145592027623585, "grad_norm": 2.109375, "learning_rate": 9.698697981972763e-05, "loss": 1.9911, "step": 2550 }, { "epoch": 0.11149962848026575, "grad_norm": 7.59375, "learning_rate": 9.698463103929542e-05, "loss": 2.914, "step": 2551 }, { "epoch": 0.11154333668429564, "grad_norm": 2.546875, "learning_rate": 9.698228137219485e-05, "loss": 2.3899, "step": 2552 }, { "epoch": 0.11158704488832553, "grad_norm": 2.78125, "learning_rate": 9.697993081847024e-05, "loss": 2.4362, "step": 2553 }, { "epoch": 0.11163075309235544, "grad_norm": 2.375, "learning_rate": 9.697757937816596e-05, "loss": 2.0962, "step": 2554 }, { "epoch": 0.11167446129638534, "grad_norm": 11.1875, "learning_rate": 9.69752270513264e-05, "loss": 2.4154, "step": 2555 }, { "epoch": 0.11171816950041523, "grad_norm": 3.140625, "learning_rate": 9.697287383799592e-05, "loss": 1.3796, "step": 2556 }, { "epoch": 0.11176187770444512, "grad_norm": 2.65625, "learning_rate": 9.697051973821895e-05, "loss": 1.9024, "step": 2557 }, { "epoch": 0.11180558590847502, "grad_norm": 2.53125, "learning_rate": 9.696816475203992e-05, "loss": 2.4041, "step": 2558 }, { "epoch": 0.11184929411250492, "grad_norm": 2.328125, "learning_rate": 9.696580887950324e-05, "loss": 2.0977, "step": 2559 }, { "epoch": 0.11189300231653482, "grad_norm": 2.71875, "learning_rate": 9.69634521206534e-05, "loss": 2.501, "step": 2560 }, { "epoch": 0.11193671052056471, "grad_norm": 2.890625, "learning_rate": 9.696109447553488e-05, "loss": 1.9393, "step": 2561 }, { "epoch": 0.1119804187245946, "grad_norm": 3.046875, "learning_rate": 9.695873594419213e-05, "loss": 1.7693, "step": 2562 }, { "epoch": 0.1120241269286245, "grad_norm": 3.078125, "learning_rate": 9.695637652666972e-05, "loss": 2.4633, "step": 2563 }, { "epoch": 0.1120678351326544, "grad_norm": 2.296875, "learning_rate": 9.695401622301212e-05, "loss": 1.7753, "step": 2564 }, { "epoch": 0.1121115433366843, "grad_norm": 3.75, "learning_rate": 9.69516550332639e-05, "loss": 3.1476, "step": 2565 }, { "epoch": 0.1121552515407142, "grad_norm": 3.546875, "learning_rate": 9.694929295746963e-05, "loss": 2.3226, "step": 2566 }, { "epoch": 0.11219895974474409, "grad_norm": 2.5625, "learning_rate": 9.694692999567386e-05, "loss": 2.6497, "step": 2567 }, { "epoch": 0.11224266794877398, "grad_norm": 2.703125, "learning_rate": 9.694456614792119e-05, "loss": 1.9439, "step": 2568 }, { "epoch": 0.11228637615280389, "grad_norm": 2.5, "learning_rate": 9.694220141425623e-05, "loss": 2.1219, "step": 2569 }, { "epoch": 0.11233008435683378, "grad_norm": 2.296875, "learning_rate": 9.69398357947236e-05, "loss": 1.9452, "step": 2570 }, { "epoch": 0.11237379256086368, "grad_norm": 2.015625, "learning_rate": 9.693746928936798e-05, "loss": 1.7742, "step": 2571 }, { "epoch": 0.11241750076489357, "grad_norm": 2.390625, "learning_rate": 9.693510189823398e-05, "loss": 2.0195, "step": 2572 }, { "epoch": 0.11246120896892346, "grad_norm": 2.71875, "learning_rate": 9.69327336213663e-05, "loss": 2.213, "step": 2573 }, { "epoch": 0.11250491717295337, "grad_norm": 3.125, "learning_rate": 9.693036445880963e-05, "loss": 1.6811, "step": 2574 }, { "epoch": 0.11254862537698326, "grad_norm": 3.4375, "learning_rate": 9.692799441060868e-05, "loss": 2.2884, "step": 2575 }, { "epoch": 0.11259233358101316, "grad_norm": 2.65625, "learning_rate": 9.692562347680817e-05, "loss": 2.2061, "step": 2576 }, { "epoch": 0.11263604178504305, "grad_norm": 2.359375, "learning_rate": 9.692325165745285e-05, "loss": 1.8484, "step": 2577 }, { "epoch": 0.11267974998907294, "grad_norm": 3.015625, "learning_rate": 9.692087895258748e-05, "loss": 2.6623, "step": 2578 }, { "epoch": 0.11272345819310285, "grad_norm": 2.328125, "learning_rate": 9.691850536225684e-05, "loss": 2.2441, "step": 2579 }, { "epoch": 0.11276716639713275, "grad_norm": 3.359375, "learning_rate": 9.691613088650571e-05, "loss": 1.9522, "step": 2580 }, { "epoch": 0.11281087460116264, "grad_norm": 2.75, "learning_rate": 9.69137555253789e-05, "loss": 2.5621, "step": 2581 }, { "epoch": 0.11285458280519253, "grad_norm": 2.9375, "learning_rate": 9.691137927892125e-05, "loss": 2.6895, "step": 2582 }, { "epoch": 0.11289829100922243, "grad_norm": 2.484375, "learning_rate": 9.69090021471776e-05, "loss": 1.9568, "step": 2583 }, { "epoch": 0.11294199921325233, "grad_norm": 2.671875, "learning_rate": 9.69066241301928e-05, "loss": 1.9114, "step": 2584 }, { "epoch": 0.11298570741728223, "grad_norm": 2.90625, "learning_rate": 9.690424522801173e-05, "loss": 2.1984, "step": 2585 }, { "epoch": 0.11302941562131212, "grad_norm": 2.25, "learning_rate": 9.69018654406793e-05, "loss": 2.0646, "step": 2586 }, { "epoch": 0.11307312382534201, "grad_norm": 2.828125, "learning_rate": 9.68994847682404e-05, "loss": 1.6406, "step": 2587 }, { "epoch": 0.11311683202937191, "grad_norm": 3.765625, "learning_rate": 9.689710321073997e-05, "loss": 2.687, "step": 2588 }, { "epoch": 0.11316054023340182, "grad_norm": 2.40625, "learning_rate": 9.689472076822295e-05, "loss": 1.8021, "step": 2589 }, { "epoch": 0.11320424843743171, "grad_norm": 2.875, "learning_rate": 9.689233744073427e-05, "loss": 2.06, "step": 2590 }, { "epoch": 0.1132479566414616, "grad_norm": 2.96875, "learning_rate": 9.688995322831895e-05, "loss": 2.283, "step": 2591 }, { "epoch": 0.1132916648454915, "grad_norm": 2.46875, "learning_rate": 9.688756813102197e-05, "loss": 2.036, "step": 2592 }, { "epoch": 0.11333537304952139, "grad_norm": 2.203125, "learning_rate": 9.688518214888836e-05, "loss": 2.0899, "step": 2593 }, { "epoch": 0.1133790812535513, "grad_norm": 2.5, "learning_rate": 9.688279528196309e-05, "loss": 2.0154, "step": 2594 }, { "epoch": 0.11342278945758119, "grad_norm": 2.484375, "learning_rate": 9.688040753029125e-05, "loss": 1.6289, "step": 2595 }, { "epoch": 0.11346649766161108, "grad_norm": 2.765625, "learning_rate": 9.687801889391789e-05, "loss": 2.3163, "step": 2596 }, { "epoch": 0.11351020586564098, "grad_norm": 3.984375, "learning_rate": 9.687562937288807e-05, "loss": 2.118, "step": 2597 }, { "epoch": 0.11355391406967087, "grad_norm": 4.09375, "learning_rate": 9.687323896724693e-05, "loss": 2.3258, "step": 2598 }, { "epoch": 0.11359762227370078, "grad_norm": 2.515625, "learning_rate": 9.687084767703954e-05, "loss": 2.6265, "step": 2599 }, { "epoch": 0.11364133047773067, "grad_norm": 2.875, "learning_rate": 9.686845550231102e-05, "loss": 2.2793, "step": 2600 }, { "epoch": 0.11368503868176057, "grad_norm": 2.5, "learning_rate": 9.686606244310654e-05, "loss": 1.9895, "step": 2601 }, { "epoch": 0.11372874688579046, "grad_norm": 5.4375, "learning_rate": 9.686366849947126e-05, "loss": 2.4364, "step": 2602 }, { "epoch": 0.11377245508982035, "grad_norm": 2.921875, "learning_rate": 9.686127367145034e-05, "loss": 2.4419, "step": 2603 }, { "epoch": 0.11381616329385026, "grad_norm": 4.59375, "learning_rate": 9.685887795908899e-05, "loss": 1.8057, "step": 2604 }, { "epoch": 0.11385987149788016, "grad_norm": 2.6875, "learning_rate": 9.68564813624324e-05, "loss": 1.5621, "step": 2605 }, { "epoch": 0.11390357970191005, "grad_norm": 2.6875, "learning_rate": 9.685408388152581e-05, "loss": 1.7561, "step": 2606 }, { "epoch": 0.11394728790593994, "grad_norm": 2.921875, "learning_rate": 9.685168551641448e-05, "loss": 2.0413, "step": 2607 }, { "epoch": 0.11399099610996984, "grad_norm": 3.546875, "learning_rate": 9.684928626714365e-05, "loss": 1.4987, "step": 2608 }, { "epoch": 0.11403470431399974, "grad_norm": 2.671875, "learning_rate": 9.68468861337586e-05, "loss": 2.4914, "step": 2609 }, { "epoch": 0.11407841251802964, "grad_norm": 2.203125, "learning_rate": 9.684448511630461e-05, "loss": 2.1296, "step": 2610 }, { "epoch": 0.11412212072205953, "grad_norm": 2.390625, "learning_rate": 9.684208321482704e-05, "loss": 1.8615, "step": 2611 }, { "epoch": 0.11416582892608942, "grad_norm": 2.875, "learning_rate": 9.683968042937117e-05, "loss": 2.1629, "step": 2612 }, { "epoch": 0.11420953713011932, "grad_norm": 2.515625, "learning_rate": 9.683727675998236e-05, "loss": 1.9825, "step": 2613 }, { "epoch": 0.11425324533414923, "grad_norm": 2.9375, "learning_rate": 9.683487220670595e-05, "loss": 2.6845, "step": 2614 }, { "epoch": 0.11429695353817912, "grad_norm": 2.53125, "learning_rate": 9.683246676958735e-05, "loss": 2.0385, "step": 2615 }, { "epoch": 0.11434066174220901, "grad_norm": 2.734375, "learning_rate": 9.683006044867194e-05, "loss": 2.3356, "step": 2616 }, { "epoch": 0.1143843699462389, "grad_norm": 2.28125, "learning_rate": 9.682765324400514e-05, "loss": 1.81, "step": 2617 }, { "epoch": 0.1144280781502688, "grad_norm": 2.515625, "learning_rate": 9.682524515563236e-05, "loss": 2.5127, "step": 2618 }, { "epoch": 0.11447178635429871, "grad_norm": 3.984375, "learning_rate": 9.682283618359905e-05, "loss": 2.0217, "step": 2619 }, { "epoch": 0.1145154945583286, "grad_norm": 2.5625, "learning_rate": 9.682042632795067e-05, "loss": 1.9385, "step": 2620 }, { "epoch": 0.1145592027623585, "grad_norm": 3.28125, "learning_rate": 9.681801558873272e-05, "loss": 2.1403, "step": 2621 }, { "epoch": 0.11460291096638839, "grad_norm": 3.0, "learning_rate": 9.681560396599068e-05, "loss": 1.5651, "step": 2622 }, { "epoch": 0.11464661917041828, "grad_norm": 2.65625, "learning_rate": 9.681319145977003e-05, "loss": 2.7997, "step": 2623 }, { "epoch": 0.11469032737444819, "grad_norm": 2.203125, "learning_rate": 9.681077807011634e-05, "loss": 2.129, "step": 2624 }, { "epoch": 0.11473403557847808, "grad_norm": 2.703125, "learning_rate": 9.680836379707513e-05, "loss": 2.0857, "step": 2625 }, { "epoch": 0.11477774378250798, "grad_norm": 3.453125, "learning_rate": 9.680594864069197e-05, "loss": 1.9064, "step": 2626 }, { "epoch": 0.11482145198653787, "grad_norm": 2.265625, "learning_rate": 9.680353260101245e-05, "loss": 1.6006, "step": 2627 }, { "epoch": 0.11486516019056776, "grad_norm": 2.390625, "learning_rate": 9.680111567808213e-05, "loss": 1.6344, "step": 2628 }, { "epoch": 0.11490886839459767, "grad_norm": 2.53125, "learning_rate": 9.679869787194664e-05, "loss": 2.1607, "step": 2629 }, { "epoch": 0.11495257659862756, "grad_norm": 3.265625, "learning_rate": 9.679627918265163e-05, "loss": 2.1579, "step": 2630 }, { "epoch": 0.11499628480265746, "grad_norm": 2.34375, "learning_rate": 9.679385961024271e-05, "loss": 2.589, "step": 2631 }, { "epoch": 0.11503999300668735, "grad_norm": 3.34375, "learning_rate": 9.679143915476556e-05, "loss": 3.4489, "step": 2632 }, { "epoch": 0.11508370121071725, "grad_norm": 3.609375, "learning_rate": 9.678901781626584e-05, "loss": 1.725, "step": 2633 }, { "epoch": 0.11512740941474715, "grad_norm": 5.25, "learning_rate": 9.678659559478926e-05, "loss": 2.3209, "step": 2634 }, { "epoch": 0.11517111761877705, "grad_norm": 2.921875, "learning_rate": 9.678417249038154e-05, "loss": 2.0092, "step": 2635 }, { "epoch": 0.11521482582280694, "grad_norm": 2.515625, "learning_rate": 9.678174850308839e-05, "loss": 2.1935, "step": 2636 }, { "epoch": 0.11525853402683683, "grad_norm": 3.25, "learning_rate": 9.677932363295555e-05, "loss": 2.3839, "step": 2637 }, { "epoch": 0.11530224223086673, "grad_norm": 2.578125, "learning_rate": 9.677689788002879e-05, "loss": 2.3866, "step": 2638 }, { "epoch": 0.11534595043489664, "grad_norm": 2.28125, "learning_rate": 9.677447124435389e-05, "loss": 1.7951, "step": 2639 }, { "epoch": 0.11538965863892653, "grad_norm": 2.734375, "learning_rate": 9.677204372597663e-05, "loss": 2.4154, "step": 2640 }, { "epoch": 0.11543336684295642, "grad_norm": 2.203125, "learning_rate": 9.676961532494284e-05, "loss": 2.1352, "step": 2641 }, { "epoch": 0.11547707504698632, "grad_norm": 2.703125, "learning_rate": 9.676718604129832e-05, "loss": 2.3192, "step": 2642 }, { "epoch": 0.11552078325101621, "grad_norm": 2.125, "learning_rate": 9.676475587508897e-05, "loss": 2.0761, "step": 2643 }, { "epoch": 0.11556449145504612, "grad_norm": 2.375, "learning_rate": 9.67623248263606e-05, "loss": 1.6954, "step": 2644 }, { "epoch": 0.11560819965907601, "grad_norm": 2.359375, "learning_rate": 9.675989289515908e-05, "loss": 2.0882, "step": 2645 }, { "epoch": 0.1156519078631059, "grad_norm": 3.25, "learning_rate": 9.675746008153035e-05, "loss": 2.324, "step": 2646 }, { "epoch": 0.1156956160671358, "grad_norm": 3.078125, "learning_rate": 9.675502638552029e-05, "loss": 2.7426, "step": 2647 }, { "epoch": 0.11573932427116569, "grad_norm": 2.609375, "learning_rate": 9.675259180717482e-05, "loss": 2.0731, "step": 2648 }, { "epoch": 0.1157830324751956, "grad_norm": 2.65625, "learning_rate": 9.675015634653992e-05, "loss": 2.5737, "step": 2649 }, { "epoch": 0.11582674067922549, "grad_norm": 2.671875, "learning_rate": 9.674772000366151e-05, "loss": 2.3047, "step": 2650 }, { "epoch": 0.11587044888325539, "grad_norm": 2.765625, "learning_rate": 9.674528277858559e-05, "loss": 1.3378, "step": 2651 }, { "epoch": 0.11591415708728528, "grad_norm": 3.0625, "learning_rate": 9.674284467135816e-05, "loss": 2.2169, "step": 2652 }, { "epoch": 0.11595786529131517, "grad_norm": 2.40625, "learning_rate": 9.67404056820252e-05, "loss": 2.0575, "step": 2653 }, { "epoch": 0.11600157349534508, "grad_norm": 2.4375, "learning_rate": 9.673796581063278e-05, "loss": 2.0379, "step": 2654 }, { "epoch": 0.11604528169937497, "grad_norm": 2.40625, "learning_rate": 9.67355250572269e-05, "loss": 1.7116, "step": 2655 }, { "epoch": 0.11608898990340487, "grad_norm": 2.296875, "learning_rate": 9.673308342185365e-05, "loss": 1.9168, "step": 2656 }, { "epoch": 0.11613269810743476, "grad_norm": 2.34375, "learning_rate": 9.673064090455911e-05, "loss": 2.8693, "step": 2657 }, { "epoch": 0.11617640631146466, "grad_norm": 2.375, "learning_rate": 9.672819750538935e-05, "loss": 2.0332, "step": 2658 }, { "epoch": 0.11622011451549456, "grad_norm": 2.515625, "learning_rate": 9.67257532243905e-05, "loss": 2.4218, "step": 2659 }, { "epoch": 0.11626382271952446, "grad_norm": 2.4375, "learning_rate": 9.672330806160868e-05, "loss": 1.8975, "step": 2660 }, { "epoch": 0.11630753092355435, "grad_norm": 3.0, "learning_rate": 9.672086201709003e-05, "loss": 2.4923, "step": 2661 }, { "epoch": 0.11635123912758424, "grad_norm": 3.15625, "learning_rate": 9.671841509088073e-05, "loss": 2.2724, "step": 2662 }, { "epoch": 0.11639494733161414, "grad_norm": 2.859375, "learning_rate": 9.671596728302692e-05, "loss": 2.2291, "step": 2663 }, { "epoch": 0.11643865553564405, "grad_norm": 2.828125, "learning_rate": 9.671351859357483e-05, "loss": 2.263, "step": 2664 }, { "epoch": 0.11648236373967394, "grad_norm": 2.6875, "learning_rate": 9.671106902257065e-05, "loss": 2.7241, "step": 2665 }, { "epoch": 0.11652607194370383, "grad_norm": 2.578125, "learning_rate": 9.670861857006061e-05, "loss": 2.1171, "step": 2666 }, { "epoch": 0.11656978014773373, "grad_norm": 2.828125, "learning_rate": 9.670616723609096e-05, "loss": 2.266, "step": 2667 }, { "epoch": 0.11661348835176362, "grad_norm": 2.5625, "learning_rate": 9.670371502070795e-05, "loss": 1.9497, "step": 2668 }, { "epoch": 0.11665719655579353, "grad_norm": 2.875, "learning_rate": 9.670126192395787e-05, "loss": 2.612, "step": 2669 }, { "epoch": 0.11670090475982342, "grad_norm": 2.921875, "learning_rate": 9.669880794588701e-05, "loss": 2.1332, "step": 2670 }, { "epoch": 0.11674461296385331, "grad_norm": 3.046875, "learning_rate": 9.669635308654166e-05, "loss": 2.334, "step": 2671 }, { "epoch": 0.11678832116788321, "grad_norm": 2.78125, "learning_rate": 9.669389734596819e-05, "loss": 2.0459, "step": 2672 }, { "epoch": 0.1168320293719131, "grad_norm": 2.859375, "learning_rate": 9.66914407242129e-05, "loss": 2.9586, "step": 2673 }, { "epoch": 0.11687573757594301, "grad_norm": 2.28125, "learning_rate": 9.668898322132218e-05, "loss": 1.893, "step": 2674 }, { "epoch": 0.1169194457799729, "grad_norm": 2.40625, "learning_rate": 9.668652483734237e-05, "loss": 2.3674, "step": 2675 }, { "epoch": 0.1169631539840028, "grad_norm": 2.28125, "learning_rate": 9.668406557231991e-05, "loss": 2.0931, "step": 2676 }, { "epoch": 0.11700686218803269, "grad_norm": 2.3125, "learning_rate": 9.668160542630118e-05, "loss": 1.92, "step": 2677 }, { "epoch": 0.11705057039206258, "grad_norm": 2.203125, "learning_rate": 9.667914439933262e-05, "loss": 1.6033, "step": 2678 }, { "epoch": 0.11709427859609249, "grad_norm": 3.296875, "learning_rate": 9.667668249146067e-05, "loss": 2.6922, "step": 2679 }, { "epoch": 0.11713798680012238, "grad_norm": 2.40625, "learning_rate": 9.667421970273177e-05, "loss": 1.8012, "step": 2680 }, { "epoch": 0.11718169500415228, "grad_norm": 2.84375, "learning_rate": 9.667175603319243e-05, "loss": 2.2011, "step": 2681 }, { "epoch": 0.11722540320818217, "grad_norm": 2.46875, "learning_rate": 9.66692914828891e-05, "loss": 2.2007, "step": 2682 }, { "epoch": 0.11726911141221207, "grad_norm": 2.90625, "learning_rate": 9.666682605186835e-05, "loss": 2.1641, "step": 2683 }, { "epoch": 0.11731281961624197, "grad_norm": 2.265625, "learning_rate": 9.666435974017665e-05, "loss": 2.4392, "step": 2684 }, { "epoch": 0.11735652782027187, "grad_norm": 2.234375, "learning_rate": 9.666189254786056e-05, "loss": 1.8101, "step": 2685 }, { "epoch": 0.11740023602430176, "grad_norm": 2.84375, "learning_rate": 9.665942447496666e-05, "loss": 2.6438, "step": 2686 }, { "epoch": 0.11744394422833165, "grad_norm": 8.3125, "learning_rate": 9.66569555215415e-05, "loss": 2.9357, "step": 2687 }, { "epoch": 0.11748765243236156, "grad_norm": 3.1875, "learning_rate": 9.665448568763169e-05, "loss": 2.0782, "step": 2688 }, { "epoch": 0.11753136063639145, "grad_norm": 2.328125, "learning_rate": 9.665201497328384e-05, "loss": 2.1359, "step": 2689 }, { "epoch": 0.11757506884042135, "grad_norm": 2.296875, "learning_rate": 9.664954337854455e-05, "loss": 1.7294, "step": 2690 }, { "epoch": 0.11761877704445124, "grad_norm": 2.625, "learning_rate": 9.66470709034605e-05, "loss": 2.2099, "step": 2691 }, { "epoch": 0.11766248524848114, "grad_norm": 2.890625, "learning_rate": 9.664459754807832e-05, "loss": 3.201, "step": 2692 }, { "epoch": 0.11770619345251104, "grad_norm": 2.734375, "learning_rate": 9.66421233124447e-05, "loss": 2.0315, "step": 2693 }, { "epoch": 0.11774990165654094, "grad_norm": 3.078125, "learning_rate": 9.663964819660633e-05, "loss": 1.792, "step": 2694 }, { "epoch": 0.11779360986057083, "grad_norm": 2.75, "learning_rate": 9.663717220060991e-05, "loss": 2.191, "step": 2695 }, { "epoch": 0.11783731806460072, "grad_norm": 2.5, "learning_rate": 9.663469532450218e-05, "loss": 2.0232, "step": 2696 }, { "epoch": 0.11788102626863062, "grad_norm": 2.859375, "learning_rate": 9.663221756832988e-05, "loss": 1.938, "step": 2697 }, { "epoch": 0.11792473447266053, "grad_norm": 2.71875, "learning_rate": 9.662973893213976e-05, "loss": 2.7432, "step": 2698 }, { "epoch": 0.11796844267669042, "grad_norm": 2.515625, "learning_rate": 9.662725941597859e-05, "loss": 1.9639, "step": 2699 }, { "epoch": 0.11801215088072031, "grad_norm": 2.078125, "learning_rate": 9.662477901989318e-05, "loss": 1.9061, "step": 2700 }, { "epoch": 0.1180558590847502, "grad_norm": 2.59375, "learning_rate": 9.662229774393032e-05, "loss": 2.5382, "step": 2701 }, { "epoch": 0.1180995672887801, "grad_norm": 2.8125, "learning_rate": 9.661981558813687e-05, "loss": 2.3339, "step": 2702 }, { "epoch": 0.11814327549281001, "grad_norm": 2.1875, "learning_rate": 9.661733255255963e-05, "loss": 1.7774, "step": 2703 }, { "epoch": 0.1181869836968399, "grad_norm": 4.53125, "learning_rate": 9.661484863724549e-05, "loss": 1.8731, "step": 2704 }, { "epoch": 0.1182306919008698, "grad_norm": 3.09375, "learning_rate": 9.661236384224129e-05, "loss": 2.4805, "step": 2705 }, { "epoch": 0.11827440010489969, "grad_norm": 2.40625, "learning_rate": 9.660987816759396e-05, "loss": 1.7, "step": 2706 }, { "epoch": 0.11831810830892958, "grad_norm": 2.921875, "learning_rate": 9.66073916133504e-05, "loss": 2.4671, "step": 2707 }, { "epoch": 0.11836181651295949, "grad_norm": 2.890625, "learning_rate": 9.660490417955749e-05, "loss": 2.5451, "step": 2708 }, { "epoch": 0.11840552471698938, "grad_norm": 2.8125, "learning_rate": 9.660241586626224e-05, "loss": 2.3707, "step": 2709 }, { "epoch": 0.11844923292101928, "grad_norm": 3.046875, "learning_rate": 9.659992667351157e-05, "loss": 2.45, "step": 2710 }, { "epoch": 0.11849294112504917, "grad_norm": 2.71875, "learning_rate": 9.659743660135245e-05, "loss": 2.5622, "step": 2711 }, { "epoch": 0.11853664932907906, "grad_norm": 2.421875, "learning_rate": 9.659494564983191e-05, "loss": 2.4197, "step": 2712 }, { "epoch": 0.11858035753310897, "grad_norm": 2.953125, "learning_rate": 9.659245381899691e-05, "loss": 2.1187, "step": 2713 }, { "epoch": 0.11862406573713886, "grad_norm": 2.296875, "learning_rate": 9.658996110889449e-05, "loss": 2.2612, "step": 2714 }, { "epoch": 0.11866777394116876, "grad_norm": 2.25, "learning_rate": 9.65874675195717e-05, "loss": 1.9677, "step": 2715 }, { "epoch": 0.11871148214519865, "grad_norm": 2.84375, "learning_rate": 9.658497305107558e-05, "loss": 1.9515, "step": 2716 }, { "epoch": 0.11875519034922855, "grad_norm": 2.1875, "learning_rate": 9.658247770345323e-05, "loss": 1.9262, "step": 2717 }, { "epoch": 0.11879889855325845, "grad_norm": 2.875, "learning_rate": 9.657998147675173e-05, "loss": 2.469, "step": 2718 }, { "epoch": 0.11884260675728835, "grad_norm": 2.390625, "learning_rate": 9.65774843710182e-05, "loss": 2.1998, "step": 2719 }, { "epoch": 0.11888631496131824, "grad_norm": 2.15625, "learning_rate": 9.657498638629972e-05, "loss": 1.9848, "step": 2720 }, { "epoch": 0.11893002316534813, "grad_norm": 2.78125, "learning_rate": 9.657248752264348e-05, "loss": 2.4081, "step": 2721 }, { "epoch": 0.11897373136937803, "grad_norm": 2.1875, "learning_rate": 9.656998778009661e-05, "loss": 1.5549, "step": 2722 }, { "epoch": 0.11901743957340793, "grad_norm": 2.484375, "learning_rate": 9.656748715870629e-05, "loss": 1.8315, "step": 2723 }, { "epoch": 0.11906114777743783, "grad_norm": 2.40625, "learning_rate": 9.656498565851972e-05, "loss": 1.9119, "step": 2724 }, { "epoch": 0.11910485598146772, "grad_norm": 2.625, "learning_rate": 9.656248327958409e-05, "loss": 1.7329, "step": 2725 }, { "epoch": 0.11914856418549762, "grad_norm": 3.3125, "learning_rate": 9.655998002194663e-05, "loss": 2.5823, "step": 2726 }, { "epoch": 0.11919227238952751, "grad_norm": 2.140625, "learning_rate": 9.65574758856546e-05, "loss": 1.5248, "step": 2727 }, { "epoch": 0.11923598059355742, "grad_norm": 2.765625, "learning_rate": 9.655497087075522e-05, "loss": 2.3173, "step": 2728 }, { "epoch": 0.11927968879758731, "grad_norm": 2.5625, "learning_rate": 9.655246497729578e-05, "loss": 1.7577, "step": 2729 }, { "epoch": 0.1193233970016172, "grad_norm": 4.5625, "learning_rate": 9.654995820532359e-05, "loss": 1.9014, "step": 2730 }, { "epoch": 0.1193671052056471, "grad_norm": 2.3125, "learning_rate": 9.654745055488592e-05, "loss": 1.8937, "step": 2731 }, { "epoch": 0.11941081340967699, "grad_norm": 2.265625, "learning_rate": 9.654494202603013e-05, "loss": 1.7926, "step": 2732 }, { "epoch": 0.1194545216137069, "grad_norm": 2.65625, "learning_rate": 9.654243261880353e-05, "loss": 1.5646, "step": 2733 }, { "epoch": 0.11949822981773679, "grad_norm": 2.890625, "learning_rate": 9.653992233325348e-05, "loss": 3.3764, "step": 2734 }, { "epoch": 0.11954193802176669, "grad_norm": 3.21875, "learning_rate": 9.653741116942738e-05, "loss": 2.5024, "step": 2735 }, { "epoch": 0.11958564622579658, "grad_norm": 2.859375, "learning_rate": 9.653489912737258e-05, "loss": 2.5474, "step": 2736 }, { "epoch": 0.11962935442982647, "grad_norm": 2.40625, "learning_rate": 9.653238620713652e-05, "loss": 2.585, "step": 2737 }, { "epoch": 0.11967306263385638, "grad_norm": 2.59375, "learning_rate": 9.652987240876659e-05, "loss": 2.2581, "step": 2738 }, { "epoch": 0.11971677083788627, "grad_norm": 2.96875, "learning_rate": 9.652735773231025e-05, "loss": 2.341, "step": 2739 }, { "epoch": 0.11976047904191617, "grad_norm": 2.578125, "learning_rate": 9.652484217781497e-05, "loss": 2.4574, "step": 2740 }, { "epoch": 0.11980418724594606, "grad_norm": 3.71875, "learning_rate": 9.65223257453282e-05, "loss": 2.1156, "step": 2741 }, { "epoch": 0.11984789544997596, "grad_norm": 2.40625, "learning_rate": 9.651980843489742e-05, "loss": 1.846, "step": 2742 }, { "epoch": 0.11989160365400586, "grad_norm": 3.0625, "learning_rate": 9.651729024657014e-05, "loss": 2.0689, "step": 2743 }, { "epoch": 0.11993531185803576, "grad_norm": 2.53125, "learning_rate": 9.651477118039391e-05, "loss": 1.9795, "step": 2744 }, { "epoch": 0.11997902006206565, "grad_norm": 3.171875, "learning_rate": 9.651225123641625e-05, "loss": 2.5617, "step": 2745 }, { "epoch": 0.12002272826609554, "grad_norm": 2.71875, "learning_rate": 9.65097304146847e-05, "loss": 2.7336, "step": 2746 }, { "epoch": 0.12006643647012544, "grad_norm": 2.875, "learning_rate": 9.650720871524686e-05, "loss": 1.7494, "step": 2747 }, { "epoch": 0.12011014467415534, "grad_norm": 2.84375, "learning_rate": 9.65046861381503e-05, "loss": 1.8763, "step": 2748 }, { "epoch": 0.12015385287818524, "grad_norm": 2.3125, "learning_rate": 9.650216268344262e-05, "loss": 1.9197, "step": 2749 }, { "epoch": 0.12019756108221513, "grad_norm": 2.28125, "learning_rate": 9.649963835117147e-05, "loss": 2.1772, "step": 2750 }, { "epoch": 0.12024126928624503, "grad_norm": 2.3125, "learning_rate": 9.649711314138443e-05, "loss": 1.8606, "step": 2751 }, { "epoch": 0.12028497749027492, "grad_norm": 2.515625, "learning_rate": 9.649458705412921e-05, "loss": 1.6259, "step": 2752 }, { "epoch": 0.12032868569430483, "grad_norm": 2.046875, "learning_rate": 9.649206008945348e-05, "loss": 1.7527, "step": 2753 }, { "epoch": 0.12037239389833472, "grad_norm": 7.90625, "learning_rate": 9.648953224740489e-05, "loss": 3.3193, "step": 2754 }, { "epoch": 0.12041610210236461, "grad_norm": 2.828125, "learning_rate": 9.648700352803118e-05, "loss": 2.3352, "step": 2755 }, { "epoch": 0.12045981030639451, "grad_norm": 2.875, "learning_rate": 9.648447393138005e-05, "loss": 2.2762, "step": 2756 }, { "epoch": 0.1205035185104244, "grad_norm": 2.53125, "learning_rate": 9.648194345749923e-05, "loss": 2.3559, "step": 2757 }, { "epoch": 0.12054722671445431, "grad_norm": 2.625, "learning_rate": 9.64794121064365e-05, "loss": 2.6152, "step": 2758 }, { "epoch": 0.1205909349184842, "grad_norm": 2.25, "learning_rate": 9.64768798782396e-05, "loss": 1.7972, "step": 2759 }, { "epoch": 0.1206346431225141, "grad_norm": 3.171875, "learning_rate": 9.647434677295635e-05, "loss": 1.8208, "step": 2760 }, { "epoch": 0.12067835132654399, "grad_norm": 2.984375, "learning_rate": 9.647181279063453e-05, "loss": 2.6227, "step": 2761 }, { "epoch": 0.12072205953057388, "grad_norm": 2.3125, "learning_rate": 9.646927793132195e-05, "loss": 2.2009, "step": 2762 }, { "epoch": 0.12076576773460379, "grad_norm": 2.78125, "learning_rate": 9.646674219506648e-05, "loss": 2.5652, "step": 2763 }, { "epoch": 0.12080947593863368, "grad_norm": 2.46875, "learning_rate": 9.646420558191596e-05, "loss": 2.5864, "step": 2764 }, { "epoch": 0.12085318414266358, "grad_norm": 2.328125, "learning_rate": 9.646166809191824e-05, "loss": 2.542, "step": 2765 }, { "epoch": 0.12089689234669347, "grad_norm": 2.375, "learning_rate": 9.645912972512124e-05, "loss": 1.6604, "step": 2766 }, { "epoch": 0.12094060055072337, "grad_norm": 2.21875, "learning_rate": 9.645659048157282e-05, "loss": 1.5808, "step": 2767 }, { "epoch": 0.12098430875475327, "grad_norm": 3.0625, "learning_rate": 9.645405036132093e-05, "loss": 1.7339, "step": 2768 }, { "epoch": 0.12102801695878317, "grad_norm": 2.65625, "learning_rate": 9.645150936441351e-05, "loss": 2.326, "step": 2769 }, { "epoch": 0.12107172516281306, "grad_norm": 2.640625, "learning_rate": 9.64489674908985e-05, "loss": 1.763, "step": 2770 }, { "epoch": 0.12111543336684295, "grad_norm": 2.578125, "learning_rate": 9.644642474082386e-05, "loss": 1.7941, "step": 2771 }, { "epoch": 0.12115914157087285, "grad_norm": 2.578125, "learning_rate": 9.644388111423759e-05, "loss": 1.76, "step": 2772 }, { "epoch": 0.12120284977490275, "grad_norm": 2.4375, "learning_rate": 9.644133661118769e-05, "loss": 2.1463, "step": 2773 }, { "epoch": 0.12124655797893265, "grad_norm": 2.390625, "learning_rate": 9.643879123172218e-05, "loss": 1.7819, "step": 2774 }, { "epoch": 0.12129026618296254, "grad_norm": 2.609375, "learning_rate": 9.643624497588908e-05, "loss": 2.1013, "step": 2775 }, { "epoch": 0.12133397438699244, "grad_norm": 2.765625, "learning_rate": 9.643369784373645e-05, "loss": 2.0237, "step": 2776 }, { "epoch": 0.12137768259102233, "grad_norm": 2.359375, "learning_rate": 9.643114983531238e-05, "loss": 1.6875, "step": 2777 }, { "epoch": 0.12142139079505224, "grad_norm": 2.59375, "learning_rate": 9.642860095066493e-05, "loss": 2.4743, "step": 2778 }, { "epoch": 0.12146509899908213, "grad_norm": 2.5625, "learning_rate": 9.64260511898422e-05, "loss": 2.577, "step": 2779 }, { "epoch": 0.12150880720311202, "grad_norm": 3.828125, "learning_rate": 9.642350055289232e-05, "loss": 2.5573, "step": 2780 }, { "epoch": 0.12155251540714192, "grad_norm": 2.25, "learning_rate": 9.642094903986341e-05, "loss": 1.871, "step": 2781 }, { "epoch": 0.12159622361117181, "grad_norm": 3.015625, "learning_rate": 9.641839665080363e-05, "loss": 2.1364, "step": 2782 }, { "epoch": 0.12163993181520172, "grad_norm": 2.234375, "learning_rate": 9.641584338576115e-05, "loss": 1.9107, "step": 2783 }, { "epoch": 0.12168364001923161, "grad_norm": 2.515625, "learning_rate": 9.641328924478416e-05, "loss": 1.7113, "step": 2784 }, { "epoch": 0.1217273482232615, "grad_norm": 2.3125, "learning_rate": 9.641073422792085e-05, "loss": 1.885, "step": 2785 }, { "epoch": 0.1217710564272914, "grad_norm": 2.46875, "learning_rate": 9.640817833521941e-05, "loss": 2.057, "step": 2786 }, { "epoch": 0.12181476463132129, "grad_norm": 3.609375, "learning_rate": 9.640562156672812e-05, "loss": 2.3443, "step": 2787 }, { "epoch": 0.1218584728353512, "grad_norm": 4.1875, "learning_rate": 9.64030639224952e-05, "loss": 1.3211, "step": 2788 }, { "epoch": 0.1219021810393811, "grad_norm": 2.609375, "learning_rate": 9.640050540256896e-05, "loss": 1.985, "step": 2789 }, { "epoch": 0.12194588924341099, "grad_norm": 2.53125, "learning_rate": 9.639794600699761e-05, "loss": 1.9536, "step": 2790 }, { "epoch": 0.12198959744744088, "grad_norm": 2.359375, "learning_rate": 9.639538573582951e-05, "loss": 1.9697, "step": 2791 }, { "epoch": 0.12203330565147077, "grad_norm": 3.296875, "learning_rate": 9.639282458911297e-05, "loss": 1.9432, "step": 2792 }, { "epoch": 0.12207701385550068, "grad_norm": 2.265625, "learning_rate": 9.639026256689628e-05, "loss": 1.8395, "step": 2793 }, { "epoch": 0.12212072205953058, "grad_norm": 3.3125, "learning_rate": 9.638769966922783e-05, "loss": 2.3836, "step": 2794 }, { "epoch": 0.12216443026356047, "grad_norm": 2.984375, "learning_rate": 9.638513589615596e-05, "loss": 2.228, "step": 2795 }, { "epoch": 0.12220813846759036, "grad_norm": 2.359375, "learning_rate": 9.638257124772909e-05, "loss": 2.281, "step": 2796 }, { "epoch": 0.12225184667162026, "grad_norm": 2.296875, "learning_rate": 9.638000572399559e-05, "loss": 2.0127, "step": 2797 }, { "epoch": 0.12229555487565016, "grad_norm": 2.640625, "learning_rate": 9.637743932500385e-05, "loss": 2.5281, "step": 2798 }, { "epoch": 0.12233926307968006, "grad_norm": 2.34375, "learning_rate": 9.637487205080236e-05, "loss": 2.0413, "step": 2799 }, { "epoch": 0.12238297128370995, "grad_norm": 2.234375, "learning_rate": 9.637230390143953e-05, "loss": 2.1581, "step": 2800 }, { "epoch": 0.12242667948773985, "grad_norm": 2.359375, "learning_rate": 9.636973487696383e-05, "loss": 2.5069, "step": 2801 }, { "epoch": 0.12247038769176974, "grad_norm": 2.28125, "learning_rate": 9.636716497742375e-05, "loss": 1.5927, "step": 2802 }, { "epoch": 0.12251409589579965, "grad_norm": 2.09375, "learning_rate": 9.636459420286779e-05, "loss": 1.7826, "step": 2803 }, { "epoch": 0.12255780409982954, "grad_norm": 3.71875, "learning_rate": 9.636202255334444e-05, "loss": 2.4786, "step": 2804 }, { "epoch": 0.12260151230385943, "grad_norm": 2.625, "learning_rate": 9.635945002890225e-05, "loss": 2.1374, "step": 2805 }, { "epoch": 0.12264522050788933, "grad_norm": 2.234375, "learning_rate": 9.635687662958978e-05, "loss": 1.6501, "step": 2806 }, { "epoch": 0.12268892871191922, "grad_norm": 2.625, "learning_rate": 9.635430235545557e-05, "loss": 2.1622, "step": 2807 }, { "epoch": 0.12273263691594913, "grad_norm": 2.0625, "learning_rate": 9.635172720654822e-05, "loss": 1.9891, "step": 2808 }, { "epoch": 0.12277634511997902, "grad_norm": 2.375, "learning_rate": 9.634915118291629e-05, "loss": 1.9462, "step": 2809 }, { "epoch": 0.12282005332400892, "grad_norm": 2.859375, "learning_rate": 9.634657428460844e-05, "loss": 2.5531, "step": 2810 }, { "epoch": 0.12286376152803881, "grad_norm": 2.21875, "learning_rate": 9.634399651167328e-05, "loss": 1.9049, "step": 2811 }, { "epoch": 0.1229074697320687, "grad_norm": 3.078125, "learning_rate": 9.634141786415944e-05, "loss": 2.4264, "step": 2812 }, { "epoch": 0.12295117793609861, "grad_norm": 3.640625, "learning_rate": 9.633883834211562e-05, "loss": 2.2442, "step": 2813 }, { "epoch": 0.1229948861401285, "grad_norm": 3.234375, "learning_rate": 9.633625794559045e-05, "loss": 2.1397, "step": 2814 }, { "epoch": 0.1230385943441584, "grad_norm": 2.71875, "learning_rate": 9.633367667463267e-05, "loss": 2.8998, "step": 2815 }, { "epoch": 0.12308230254818829, "grad_norm": 2.28125, "learning_rate": 9.633109452929097e-05, "loss": 1.9054, "step": 2816 }, { "epoch": 0.12312601075221818, "grad_norm": 2.1875, "learning_rate": 9.632851150961409e-05, "loss": 1.9339, "step": 2817 }, { "epoch": 0.12316971895624809, "grad_norm": 2.34375, "learning_rate": 9.632592761565077e-05, "loss": 1.8701, "step": 2818 }, { "epoch": 0.12321342716027799, "grad_norm": 2.859375, "learning_rate": 9.632334284744978e-05, "loss": 2.0784, "step": 2819 }, { "epoch": 0.12325713536430788, "grad_norm": 7.4375, "learning_rate": 9.632075720505987e-05, "loss": 1.8226, "step": 2820 }, { "epoch": 0.12330084356833777, "grad_norm": 2.28125, "learning_rate": 9.631817068852986e-05, "loss": 1.7425, "step": 2821 }, { "epoch": 0.12334455177236767, "grad_norm": 2.546875, "learning_rate": 9.631558329790857e-05, "loss": 1.9114, "step": 2822 }, { "epoch": 0.12338825997639757, "grad_norm": 3.078125, "learning_rate": 9.63129950332448e-05, "loss": 2.1165, "step": 2823 }, { "epoch": 0.12343196818042747, "grad_norm": 2.234375, "learning_rate": 9.631040589458741e-05, "loss": 1.8682, "step": 2824 }, { "epoch": 0.12347567638445736, "grad_norm": 2.84375, "learning_rate": 9.630781588198526e-05, "loss": 1.7066, "step": 2825 }, { "epoch": 0.12351938458848725, "grad_norm": 2.578125, "learning_rate": 9.630522499548723e-05, "loss": 2.5323, "step": 2826 }, { "epoch": 0.12356309279251716, "grad_norm": 2.640625, "learning_rate": 9.630263323514222e-05, "loss": 2.2879, "step": 2827 }, { "epoch": 0.12360680099654706, "grad_norm": 2.359375, "learning_rate": 9.630004060099911e-05, "loss": 2.0478, "step": 2828 }, { "epoch": 0.12365050920057695, "grad_norm": 2.109375, "learning_rate": 9.629744709310686e-05, "loss": 1.6938, "step": 2829 }, { "epoch": 0.12369421740460684, "grad_norm": 2.53125, "learning_rate": 9.629485271151439e-05, "loss": 1.9106, "step": 2830 }, { "epoch": 0.12373792560863674, "grad_norm": 3.9375, "learning_rate": 9.629225745627069e-05, "loss": 2.8505, "step": 2831 }, { "epoch": 0.12378163381266664, "grad_norm": 2.640625, "learning_rate": 9.628966132742469e-05, "loss": 2.4035, "step": 2832 }, { "epoch": 0.12382534201669654, "grad_norm": 2.75, "learning_rate": 9.628706432502543e-05, "loss": 1.9975, "step": 2833 }, { "epoch": 0.12386905022072643, "grad_norm": 2.234375, "learning_rate": 9.62844664491219e-05, "loss": 1.9244, "step": 2834 }, { "epoch": 0.12391275842475633, "grad_norm": 2.25, "learning_rate": 9.628186769976311e-05, "loss": 1.9185, "step": 2835 }, { "epoch": 0.12395646662878622, "grad_norm": 4.09375, "learning_rate": 9.627926807699812e-05, "loss": 1.9349, "step": 2836 }, { "epoch": 0.12400017483281613, "grad_norm": 4.28125, "learning_rate": 9.6276667580876e-05, "loss": 1.671, "step": 2837 }, { "epoch": 0.12404388303684602, "grad_norm": 2.1875, "learning_rate": 9.627406621144578e-05, "loss": 1.6789, "step": 2838 }, { "epoch": 0.12408759124087591, "grad_norm": 2.53125, "learning_rate": 9.62714639687566e-05, "loss": 1.9697, "step": 2839 }, { "epoch": 0.12413129944490581, "grad_norm": 2.3125, "learning_rate": 9.626886085285755e-05, "loss": 2.0127, "step": 2840 }, { "epoch": 0.1241750076489357, "grad_norm": 2.328125, "learning_rate": 9.626625686379776e-05, "loss": 1.8073, "step": 2841 }, { "epoch": 0.12421871585296561, "grad_norm": 2.140625, "learning_rate": 9.626365200162636e-05, "loss": 1.6166, "step": 2842 }, { "epoch": 0.1242624240569955, "grad_norm": 3.90625, "learning_rate": 9.626104626639252e-05, "loss": 1.9251, "step": 2843 }, { "epoch": 0.1243061322610254, "grad_norm": 3.875, "learning_rate": 9.625843965814539e-05, "loss": 2.5869, "step": 2844 }, { "epoch": 0.12434984046505529, "grad_norm": 2.796875, "learning_rate": 9.62558321769342e-05, "loss": 3.0161, "step": 2845 }, { "epoch": 0.12439354866908518, "grad_norm": 2.375, "learning_rate": 9.625322382280811e-05, "loss": 1.901, "step": 2846 }, { "epoch": 0.12443725687311509, "grad_norm": 2.21875, "learning_rate": 9.625061459581639e-05, "loss": 1.9044, "step": 2847 }, { "epoch": 0.12448096507714498, "grad_norm": 2.453125, "learning_rate": 9.624800449600825e-05, "loss": 2.3202, "step": 2848 }, { "epoch": 0.12452467328117488, "grad_norm": 2.515625, "learning_rate": 9.624539352343295e-05, "loss": 2.5673, "step": 2849 }, { "epoch": 0.12456838148520477, "grad_norm": 2.3125, "learning_rate": 9.624278167813977e-05, "loss": 2.3917, "step": 2850 }, { "epoch": 0.12461208968923466, "grad_norm": 3.53125, "learning_rate": 9.6240168960178e-05, "loss": 3.0122, "step": 2851 }, { "epoch": 0.12465579789326457, "grad_norm": 2.125, "learning_rate": 9.623755536959693e-05, "loss": 1.8228, "step": 2852 }, { "epoch": 0.12469950609729447, "grad_norm": 3.21875, "learning_rate": 9.623494090644591e-05, "loss": 3.0652, "step": 2853 }, { "epoch": 0.12474321430132436, "grad_norm": 2.296875, "learning_rate": 9.623232557077426e-05, "loss": 2.1147, "step": 2854 }, { "epoch": 0.12478692250535425, "grad_norm": 3.34375, "learning_rate": 9.622970936263134e-05, "loss": 2.8506, "step": 2855 }, { "epoch": 0.12483063070938415, "grad_norm": 3.15625, "learning_rate": 9.622709228206651e-05, "loss": 2.3544, "step": 2856 }, { "epoch": 0.12487433891341405, "grad_norm": 2.65625, "learning_rate": 9.622447432912918e-05, "loss": 1.9846, "step": 2857 }, { "epoch": 0.12491804711744395, "grad_norm": 2.46875, "learning_rate": 9.622185550386873e-05, "loss": 1.9084, "step": 2858 }, { "epoch": 0.12496175532147384, "grad_norm": 2.25, "learning_rate": 9.62192358063346e-05, "loss": 1.8372, "step": 2859 }, { "epoch": 0.12500546352550373, "grad_norm": 2.265625, "learning_rate": 9.621661523657623e-05, "loss": 2.4741, "step": 2860 }, { "epoch": 0.12504917172953364, "grad_norm": 2.65625, "learning_rate": 9.621399379464306e-05, "loss": 2.4071, "step": 2861 }, { "epoch": 0.12509287993356352, "grad_norm": 2.796875, "learning_rate": 9.621137148058457e-05, "loss": 2.3149, "step": 2862 }, { "epoch": 0.12513658813759343, "grad_norm": 2.578125, "learning_rate": 9.620874829445023e-05, "loss": 2.3921, "step": 2863 }, { "epoch": 0.1251802963416233, "grad_norm": 2.40625, "learning_rate": 9.620612423628956e-05, "loss": 1.9683, "step": 2864 }, { "epoch": 0.12522400454565322, "grad_norm": 3.4375, "learning_rate": 9.620349930615207e-05, "loss": 2.6925, "step": 2865 }, { "epoch": 0.12526771274968312, "grad_norm": 2.390625, "learning_rate": 9.620087350408732e-05, "loss": 2.2013, "step": 2866 }, { "epoch": 0.125311420953713, "grad_norm": 2.703125, "learning_rate": 9.619824683014484e-05, "loss": 2.3714, "step": 2867 }, { "epoch": 0.1253551291577429, "grad_norm": 2.140625, "learning_rate": 9.61956192843742e-05, "loss": 1.9963, "step": 2868 }, { "epoch": 0.1253988373617728, "grad_norm": 2.4375, "learning_rate": 9.619299086682498e-05, "loss": 1.7972, "step": 2869 }, { "epoch": 0.1254425455658027, "grad_norm": 2.5625, "learning_rate": 9.61903615775468e-05, "loss": 2.12, "step": 2870 }, { "epoch": 0.1254862537698326, "grad_norm": 2.203125, "learning_rate": 9.618773141658927e-05, "loss": 2.1438, "step": 2871 }, { "epoch": 0.12552996197386249, "grad_norm": 2.34375, "learning_rate": 9.618510038400203e-05, "loss": 2.0048, "step": 2872 }, { "epoch": 0.1255736701778924, "grad_norm": 2.734375, "learning_rate": 9.618246847983471e-05, "loss": 2.315, "step": 2873 }, { "epoch": 0.12561737838192227, "grad_norm": 2.234375, "learning_rate": 9.617983570413702e-05, "loss": 1.9862, "step": 2874 }, { "epoch": 0.12566108658595218, "grad_norm": 2.09375, "learning_rate": 9.617720205695862e-05, "loss": 1.7937, "step": 2875 }, { "epoch": 0.1257047947899821, "grad_norm": 2.75, "learning_rate": 9.617456753834919e-05, "loss": 2.5493, "step": 2876 }, { "epoch": 0.12574850299401197, "grad_norm": 2.421875, "learning_rate": 9.617193214835847e-05, "loss": 2.4217, "step": 2877 }, { "epoch": 0.12579221119804188, "grad_norm": 8.375, "learning_rate": 9.616929588703618e-05, "loss": 2.2522, "step": 2878 }, { "epoch": 0.12583591940207176, "grad_norm": 2.390625, "learning_rate": 9.61666587544321e-05, "loss": 1.9273, "step": 2879 }, { "epoch": 0.12587962760610166, "grad_norm": 2.640625, "learning_rate": 9.616402075059597e-05, "loss": 2.637, "step": 2880 }, { "epoch": 0.12592333581013157, "grad_norm": 2.515625, "learning_rate": 9.616138187557758e-05, "loss": 2.0555, "step": 2881 }, { "epoch": 0.12596704401416145, "grad_norm": 3.953125, "learning_rate": 9.615874212942673e-05, "loss": 2.0235, "step": 2882 }, { "epoch": 0.12601075221819136, "grad_norm": 2.390625, "learning_rate": 9.615610151219323e-05, "loss": 2.136, "step": 2883 }, { "epoch": 0.12605446042222124, "grad_norm": 2.453125, "learning_rate": 9.615346002392692e-05, "loss": 2.1353, "step": 2884 }, { "epoch": 0.12609816862625114, "grad_norm": 2.546875, "learning_rate": 9.615081766467764e-05, "loss": 2.4363, "step": 2885 }, { "epoch": 0.12614187683028105, "grad_norm": 2.578125, "learning_rate": 9.614817443449529e-05, "loss": 1.9039, "step": 2886 }, { "epoch": 0.12618558503431093, "grad_norm": 3.046875, "learning_rate": 9.614553033342969e-05, "loss": 2.5831, "step": 2887 }, { "epoch": 0.12622929323834084, "grad_norm": 2.234375, "learning_rate": 9.614288536153078e-05, "loss": 2.0155, "step": 2888 }, { "epoch": 0.12627300144237072, "grad_norm": 2.171875, "learning_rate": 9.614023951884848e-05, "loss": 1.7467, "step": 2889 }, { "epoch": 0.12631670964640063, "grad_norm": 2.6875, "learning_rate": 9.613759280543269e-05, "loss": 1.8073, "step": 2890 }, { "epoch": 0.12636041785043053, "grad_norm": 4.90625, "learning_rate": 9.613494522133337e-05, "loss": 3.2485, "step": 2891 }, { "epoch": 0.12640412605446041, "grad_norm": 2.796875, "learning_rate": 9.613229676660049e-05, "loss": 2.328, "step": 2892 }, { "epoch": 0.12644783425849032, "grad_norm": 2.0, "learning_rate": 9.612964744128404e-05, "loss": 1.8794, "step": 2893 }, { "epoch": 0.1264915424625202, "grad_norm": 2.1875, "learning_rate": 9.6126997245434e-05, "loss": 2.0111, "step": 2894 }, { "epoch": 0.1265352506665501, "grad_norm": 2.25, "learning_rate": 9.612434617910038e-05, "loss": 2.0805, "step": 2895 }, { "epoch": 0.12657895887058002, "grad_norm": 2.375, "learning_rate": 9.612169424233323e-05, "loss": 2.1638, "step": 2896 }, { "epoch": 0.1266226670746099, "grad_norm": 2.96875, "learning_rate": 9.611904143518257e-05, "loss": 2.546, "step": 2897 }, { "epoch": 0.1266663752786398, "grad_norm": 3.78125, "learning_rate": 9.611638775769848e-05, "loss": 2.0348, "step": 2898 }, { "epoch": 0.1267100834826697, "grad_norm": 2.203125, "learning_rate": 9.611373320993104e-05, "loss": 1.8323, "step": 2899 }, { "epoch": 0.1267537916866996, "grad_norm": 2.328125, "learning_rate": 9.611107779193033e-05, "loss": 1.6366, "step": 2900 }, { "epoch": 0.1267974998907295, "grad_norm": 2.328125, "learning_rate": 9.610842150374647e-05, "loss": 1.679, "step": 2901 }, { "epoch": 0.12684120809475938, "grad_norm": 2.09375, "learning_rate": 9.610576434542959e-05, "loss": 1.9237, "step": 2902 }, { "epoch": 0.12688491629878929, "grad_norm": 2.578125, "learning_rate": 9.610310631702983e-05, "loss": 2.8621, "step": 2903 }, { "epoch": 0.1269286245028192, "grad_norm": 2.765625, "learning_rate": 9.610044741859736e-05, "loss": 2.1277, "step": 2904 }, { "epoch": 0.12697233270684907, "grad_norm": 2.40625, "learning_rate": 9.609778765018235e-05, "loss": 1.9442, "step": 2905 }, { "epoch": 0.12701604091087898, "grad_norm": 2.96875, "learning_rate": 9.609512701183499e-05, "loss": 2.3559, "step": 2906 }, { "epoch": 0.12705974911490886, "grad_norm": 2.359375, "learning_rate": 9.609246550360551e-05, "loss": 2.4651, "step": 2907 }, { "epoch": 0.12710345731893877, "grad_norm": 4.90625, "learning_rate": 9.60898031255441e-05, "loss": 2.1018, "step": 2908 }, { "epoch": 0.12714716552296867, "grad_norm": 2.40625, "learning_rate": 9.608713987770103e-05, "loss": 1.988, "step": 2909 }, { "epoch": 0.12719087372699855, "grad_norm": 2.375, "learning_rate": 9.608447576012656e-05, "loss": 2.4612, "step": 2910 }, { "epoch": 0.12723458193102846, "grad_norm": 2.203125, "learning_rate": 9.608181077287098e-05, "loss": 2.0709, "step": 2911 }, { "epoch": 0.12727829013505834, "grad_norm": 2.484375, "learning_rate": 9.607914491598453e-05, "loss": 1.7109, "step": 2912 }, { "epoch": 0.12732199833908825, "grad_norm": 2.921875, "learning_rate": 9.607647818951756e-05, "loss": 2.0656, "step": 2913 }, { "epoch": 0.12736570654311816, "grad_norm": 3.125, "learning_rate": 9.607381059352038e-05, "loss": 3.0216, "step": 2914 }, { "epoch": 0.12740941474714804, "grad_norm": 2.25, "learning_rate": 9.607114212804335e-05, "loss": 1.9264, "step": 2915 }, { "epoch": 0.12745312295117794, "grad_norm": 2.640625, "learning_rate": 9.606847279313681e-05, "loss": 2.1365, "step": 2916 }, { "epoch": 0.12749683115520782, "grad_norm": 2.40625, "learning_rate": 9.606580258885114e-05, "loss": 1.7999, "step": 2917 }, { "epoch": 0.12754053935923773, "grad_norm": 2.84375, "learning_rate": 9.606313151523672e-05, "loss": 1.9917, "step": 2918 }, { "epoch": 0.12758424756326764, "grad_norm": 2.453125, "learning_rate": 9.606045957234398e-05, "loss": 2.5768, "step": 2919 }, { "epoch": 0.12762795576729752, "grad_norm": 2.390625, "learning_rate": 9.605778676022333e-05, "loss": 1.913, "step": 2920 }, { "epoch": 0.12767166397132743, "grad_norm": 2.46875, "learning_rate": 9.605511307892519e-05, "loss": 2.1415, "step": 2921 }, { "epoch": 0.1277153721753573, "grad_norm": 2.40625, "learning_rate": 9.605243852850006e-05, "loss": 1.9214, "step": 2922 }, { "epoch": 0.1277590803793872, "grad_norm": 2.953125, "learning_rate": 9.604976310899837e-05, "loss": 2.1233, "step": 2923 }, { "epoch": 0.12780278858341712, "grad_norm": 2.1875, "learning_rate": 9.604708682047064e-05, "loss": 2.0758, "step": 2924 }, { "epoch": 0.127846496787447, "grad_norm": 3.09375, "learning_rate": 9.604440966296734e-05, "loss": 2.0466, "step": 2925 }, { "epoch": 0.1278902049914769, "grad_norm": 2.8125, "learning_rate": 9.604173163653904e-05, "loss": 2.3425, "step": 2926 }, { "epoch": 0.1279339131955068, "grad_norm": 2.703125, "learning_rate": 9.603905274123626e-05, "loss": 2.4204, "step": 2927 }, { "epoch": 0.1279776213995367, "grad_norm": 2.265625, "learning_rate": 9.603637297710954e-05, "loss": 2.0726, "step": 2928 }, { "epoch": 0.1280213296035666, "grad_norm": 2.109375, "learning_rate": 9.603369234420945e-05, "loss": 1.755, "step": 2929 }, { "epoch": 0.12806503780759648, "grad_norm": 2.890625, "learning_rate": 9.603101084258658e-05, "loss": 2.0132, "step": 2930 }, { "epoch": 0.1281087460116264, "grad_norm": 2.5, "learning_rate": 9.602832847229156e-05, "loss": 1.9451, "step": 2931 }, { "epoch": 0.12815245421565627, "grad_norm": 2.3125, "learning_rate": 9.602564523337498e-05, "loss": 1.7434, "step": 2932 }, { "epoch": 0.12819616241968618, "grad_norm": 2.515625, "learning_rate": 9.602296112588749e-05, "loss": 2.1285, "step": 2933 }, { "epoch": 0.12823987062371608, "grad_norm": 3.09375, "learning_rate": 9.602027614987974e-05, "loss": 2.3753, "step": 2934 }, { "epoch": 0.12828357882774596, "grad_norm": 2.1875, "learning_rate": 9.60175903054024e-05, "loss": 1.763, "step": 2935 }, { "epoch": 0.12832728703177587, "grad_norm": 2.125, "learning_rate": 9.601490359250615e-05, "loss": 1.7454, "step": 2936 }, { "epoch": 0.12837099523580575, "grad_norm": 2.5625, "learning_rate": 9.601221601124172e-05, "loss": 1.8542, "step": 2937 }, { "epoch": 0.12841470343983566, "grad_norm": 2.4375, "learning_rate": 9.600952756165979e-05, "loss": 2.1361, "step": 2938 }, { "epoch": 0.12845841164386557, "grad_norm": 2.75, "learning_rate": 9.600683824381112e-05, "loss": 1.8827, "step": 2939 }, { "epoch": 0.12850211984789545, "grad_norm": 4.40625, "learning_rate": 9.600414805774643e-05, "loss": 2.8128, "step": 2940 }, { "epoch": 0.12854582805192535, "grad_norm": 2.1875, "learning_rate": 9.600145700351652e-05, "loss": 1.7991, "step": 2941 }, { "epoch": 0.12858953625595523, "grad_norm": 2.34375, "learning_rate": 9.599876508117219e-05, "loss": 2.6316, "step": 2942 }, { "epoch": 0.12863324445998514, "grad_norm": 2.46875, "learning_rate": 9.599607229076418e-05, "loss": 1.8649, "step": 2943 }, { "epoch": 0.12867695266401505, "grad_norm": 2.5625, "learning_rate": 9.599337863234335e-05, "loss": 2.396, "step": 2944 }, { "epoch": 0.12872066086804493, "grad_norm": 2.40625, "learning_rate": 9.599068410596053e-05, "loss": 2.2869, "step": 2945 }, { "epoch": 0.12876436907207484, "grad_norm": 2.6875, "learning_rate": 9.598798871166656e-05, "loss": 2.4572, "step": 2946 }, { "epoch": 0.12880807727610472, "grad_norm": 3.09375, "learning_rate": 9.598529244951232e-05, "loss": 2.1541, "step": 2947 }, { "epoch": 0.12885178548013462, "grad_norm": 2.8125, "learning_rate": 9.598259531954868e-05, "loss": 2.0215, "step": 2948 }, { "epoch": 0.12889549368416453, "grad_norm": 2.625, "learning_rate": 9.597989732182654e-05, "loss": 2.2977, "step": 2949 }, { "epoch": 0.1289392018881944, "grad_norm": 2.421875, "learning_rate": 9.597719845639682e-05, "loss": 1.8592, "step": 2950 }, { "epoch": 0.12898291009222432, "grad_norm": 2.375, "learning_rate": 9.597449872331045e-05, "loss": 2.1149, "step": 2951 }, { "epoch": 0.1290266182962542, "grad_norm": 2.015625, "learning_rate": 9.597179812261836e-05, "loss": 1.9048, "step": 2952 }, { "epoch": 0.1290703265002841, "grad_norm": 2.234375, "learning_rate": 9.596909665437155e-05, "loss": 2.1005, "step": 2953 }, { "epoch": 0.129114034704314, "grad_norm": 2.78125, "learning_rate": 9.596639431862098e-05, "loss": 1.4811, "step": 2954 }, { "epoch": 0.1291577429083439, "grad_norm": 2.203125, "learning_rate": 9.596369111541764e-05, "loss": 2.0181, "step": 2955 }, { "epoch": 0.1292014511123738, "grad_norm": 2.53125, "learning_rate": 9.596098704481255e-05, "loss": 2.2414, "step": 2956 }, { "epoch": 0.12924515931640368, "grad_norm": 3.171875, "learning_rate": 9.595828210685675e-05, "loss": 1.9929, "step": 2957 }, { "epoch": 0.1292888675204336, "grad_norm": 2.375, "learning_rate": 9.595557630160127e-05, "loss": 2.1108, "step": 2958 }, { "epoch": 0.1293325757244635, "grad_norm": 2.484375, "learning_rate": 9.595286962909717e-05, "loss": 1.8243, "step": 2959 }, { "epoch": 0.12937628392849337, "grad_norm": 2.109375, "learning_rate": 9.595016208939555e-05, "loss": 2.0417, "step": 2960 }, { "epoch": 0.12941999213252328, "grad_norm": 2.546875, "learning_rate": 9.594745368254751e-05, "loss": 2.0429, "step": 2961 }, { "epoch": 0.12946370033655316, "grad_norm": 4.0, "learning_rate": 9.594474440860412e-05, "loss": 2.2023, "step": 2962 }, { "epoch": 0.12950740854058307, "grad_norm": 3.453125, "learning_rate": 9.594203426761656e-05, "loss": 2.6341, "step": 2963 }, { "epoch": 0.12955111674461298, "grad_norm": 2.390625, "learning_rate": 9.593932325963593e-05, "loss": 2.112, "step": 2964 }, { "epoch": 0.12959482494864286, "grad_norm": 3.03125, "learning_rate": 9.593661138471342e-05, "loss": 1.9946, "step": 2965 }, { "epoch": 0.12963853315267276, "grad_norm": 2.671875, "learning_rate": 9.593389864290018e-05, "loss": 2.502, "step": 2966 }, { "epoch": 0.12968224135670264, "grad_norm": 3.4375, "learning_rate": 9.593118503424743e-05, "loss": 2.1195, "step": 2967 }, { "epoch": 0.12972594956073255, "grad_norm": 2.15625, "learning_rate": 9.592847055880636e-05, "loss": 2.0636, "step": 2968 }, { "epoch": 0.12976965776476246, "grad_norm": 2.734375, "learning_rate": 9.592575521662821e-05, "loss": 2.1656, "step": 2969 }, { "epoch": 0.12981336596879234, "grad_norm": 2.546875, "learning_rate": 9.592303900776422e-05, "loss": 2.2107, "step": 2970 }, { "epoch": 0.12985707417282225, "grad_norm": 3.25, "learning_rate": 9.592032193226564e-05, "loss": 2.9833, "step": 2971 }, { "epoch": 0.12990078237685213, "grad_norm": 2.796875, "learning_rate": 9.591760399018375e-05, "loss": 2.4552, "step": 2972 }, { "epoch": 0.12994449058088203, "grad_norm": 2.25, "learning_rate": 9.591488518156985e-05, "loss": 2.3015, "step": 2973 }, { "epoch": 0.12998819878491194, "grad_norm": 2.5625, "learning_rate": 9.591216550647524e-05, "loss": 1.8688, "step": 2974 }, { "epoch": 0.13003190698894182, "grad_norm": 2.328125, "learning_rate": 9.590944496495124e-05, "loss": 1.8787, "step": 2975 }, { "epoch": 0.13007561519297173, "grad_norm": 2.515625, "learning_rate": 9.59067235570492e-05, "loss": 2.0342, "step": 2976 }, { "epoch": 0.1301193233970016, "grad_norm": 2.703125, "learning_rate": 9.590400128282047e-05, "loss": 2.1873, "step": 2977 }, { "epoch": 0.13016303160103151, "grad_norm": 5.46875, "learning_rate": 9.590127814231642e-05, "loss": 3.4453, "step": 2978 }, { "epoch": 0.13020673980506142, "grad_norm": 2.125, "learning_rate": 9.589855413558846e-05, "loss": 1.825, "step": 2979 }, { "epoch": 0.1302504480090913, "grad_norm": 2.234375, "learning_rate": 9.589582926268798e-05, "loss": 1.9386, "step": 2980 }, { "epoch": 0.1302941562131212, "grad_norm": 2.484375, "learning_rate": 9.589310352366639e-05, "loss": 2.2409, "step": 2981 }, { "epoch": 0.1303378644171511, "grad_norm": 2.875, "learning_rate": 9.589037691857515e-05, "loss": 2.7937, "step": 2982 }, { "epoch": 0.130381572621181, "grad_norm": 2.609375, "learning_rate": 9.588764944746571e-05, "loss": 2.0267, "step": 2983 }, { "epoch": 0.1304252808252109, "grad_norm": 2.609375, "learning_rate": 9.588492111038953e-05, "loss": 2.0755, "step": 2984 }, { "epoch": 0.13046898902924078, "grad_norm": 2.875, "learning_rate": 9.588219190739811e-05, "loss": 2.3942, "step": 2985 }, { "epoch": 0.1305126972332707, "grad_norm": 2.21875, "learning_rate": 9.587946183854295e-05, "loss": 1.7443, "step": 2986 }, { "epoch": 0.13055640543730057, "grad_norm": 2.421875, "learning_rate": 9.587673090387558e-05, "loss": 1.7173, "step": 2987 }, { "epoch": 0.13060011364133048, "grad_norm": 3.328125, "learning_rate": 9.587399910344753e-05, "loss": 2.3119, "step": 2988 }, { "epoch": 0.13064382184536039, "grad_norm": 2.515625, "learning_rate": 9.587126643731033e-05, "loss": 1.8181, "step": 2989 }, { "epoch": 0.13068753004939027, "grad_norm": 2.46875, "learning_rate": 9.586853290551558e-05, "loss": 2.0235, "step": 2990 }, { "epoch": 0.13073123825342017, "grad_norm": 2.578125, "learning_rate": 9.586579850811486e-05, "loss": 1.7663, "step": 2991 }, { "epoch": 0.13077494645745005, "grad_norm": 3.1875, "learning_rate": 9.586306324515976e-05, "loss": 2.0949, "step": 2992 }, { "epoch": 0.13081865466147996, "grad_norm": 2.34375, "learning_rate": 9.58603271167019e-05, "loss": 2.186, "step": 2993 }, { "epoch": 0.13086236286550987, "grad_norm": 2.453125, "learning_rate": 9.585759012279294e-05, "loss": 1.7462, "step": 2994 }, { "epoch": 0.13090607106953975, "grad_norm": 3.984375, "learning_rate": 9.58548522634845e-05, "loss": 2.6174, "step": 2995 }, { "epoch": 0.13094977927356966, "grad_norm": 2.453125, "learning_rate": 9.585211353882826e-05, "loss": 2.5182, "step": 2996 }, { "epoch": 0.13099348747759954, "grad_norm": 2.421875, "learning_rate": 9.58493739488759e-05, "loss": 1.7231, "step": 2997 }, { "epoch": 0.13103719568162944, "grad_norm": 2.71875, "learning_rate": 9.584663349367912e-05, "loss": 2.4883, "step": 2998 }, { "epoch": 0.13108090388565935, "grad_norm": 2.453125, "learning_rate": 9.584389217328966e-05, "loss": 1.7723, "step": 2999 }, { "epoch": 0.13112461208968923, "grad_norm": 2.15625, "learning_rate": 9.584114998775921e-05, "loss": 1.8153, "step": 3000 }, { "epoch": 0.13116832029371914, "grad_norm": 2.578125, "learning_rate": 9.583840693713954e-05, "loss": 2.0988, "step": 3001 }, { "epoch": 0.13121202849774902, "grad_norm": 3.03125, "learning_rate": 9.583566302148244e-05, "loss": 2.2539, "step": 3002 }, { "epoch": 0.13125573670177892, "grad_norm": 2.578125, "learning_rate": 9.583291824083965e-05, "loss": 1.6684, "step": 3003 }, { "epoch": 0.13129944490580883, "grad_norm": 2.96875, "learning_rate": 9.583017259526299e-05, "loss": 2.04, "step": 3004 }, { "epoch": 0.1313431531098387, "grad_norm": 2.453125, "learning_rate": 9.582742608480428e-05, "loss": 2.2517, "step": 3005 }, { "epoch": 0.13138686131386862, "grad_norm": 2.84375, "learning_rate": 9.582467870951533e-05, "loss": 1.9857, "step": 3006 }, { "epoch": 0.1314305695178985, "grad_norm": 2.390625, "learning_rate": 9.5821930469448e-05, "loss": 2.4797, "step": 3007 }, { "epoch": 0.1314742777219284, "grad_norm": 2.640625, "learning_rate": 9.581918136465416e-05, "loss": 2.4298, "step": 3008 }, { "epoch": 0.13151798592595831, "grad_norm": 2.375, "learning_rate": 9.581643139518565e-05, "loss": 1.7604, "step": 3009 }, { "epoch": 0.1315616941299882, "grad_norm": 2.6875, "learning_rate": 9.581368056109443e-05, "loss": 2.3003, "step": 3010 }, { "epoch": 0.1316054023340181, "grad_norm": 2.609375, "learning_rate": 9.581092886243237e-05, "loss": 1.9386, "step": 3011 }, { "epoch": 0.13164911053804798, "grad_norm": 4.4375, "learning_rate": 9.58081762992514e-05, "loss": 2.2748, "step": 3012 }, { "epoch": 0.1316928187420779, "grad_norm": 3.59375, "learning_rate": 9.580542287160348e-05, "loss": 2.9969, "step": 3013 }, { "epoch": 0.1317365269461078, "grad_norm": 2.625, "learning_rate": 9.580266857954057e-05, "loss": 2.2068, "step": 3014 }, { "epoch": 0.13178023515013768, "grad_norm": 2.796875, "learning_rate": 9.579991342311463e-05, "loss": 1.8651, "step": 3015 }, { "epoch": 0.13182394335416758, "grad_norm": 2.4375, "learning_rate": 9.579715740237766e-05, "loss": 1.7948, "step": 3016 }, { "epoch": 0.13186765155819746, "grad_norm": 2.265625, "learning_rate": 9.579440051738168e-05, "loss": 2.0184, "step": 3017 }, { "epoch": 0.13191135976222737, "grad_norm": 2.59375, "learning_rate": 9.579164276817873e-05, "loss": 2.3655, "step": 3018 }, { "epoch": 0.13195506796625728, "grad_norm": 3.5625, "learning_rate": 9.578888415482082e-05, "loss": 1.9905, "step": 3019 }, { "epoch": 0.13199877617028716, "grad_norm": 3.65625, "learning_rate": 9.578612467736004e-05, "loss": 2.846, "step": 3020 }, { "epoch": 0.13204248437431707, "grad_norm": 2.609375, "learning_rate": 9.578336433584842e-05, "loss": 2.0329, "step": 3021 }, { "epoch": 0.13208619257834694, "grad_norm": 2.296875, "learning_rate": 9.57806031303381e-05, "loss": 2.0556, "step": 3022 }, { "epoch": 0.13212990078237685, "grad_norm": 2.3125, "learning_rate": 9.577784106088115e-05, "loss": 2.2177, "step": 3023 }, { "epoch": 0.13217360898640676, "grad_norm": 2.078125, "learning_rate": 9.577507812752972e-05, "loss": 1.7264, "step": 3024 }, { "epoch": 0.13221731719043664, "grad_norm": 2.3125, "learning_rate": 9.577231433033596e-05, "loss": 1.7551, "step": 3025 }, { "epoch": 0.13226102539446655, "grad_norm": 2.21875, "learning_rate": 9.576954966935198e-05, "loss": 1.812, "step": 3026 }, { "epoch": 0.13230473359849643, "grad_norm": 3.578125, "learning_rate": 9.576678414463001e-05, "loss": 3.2237, "step": 3027 }, { "epoch": 0.13234844180252633, "grad_norm": 2.203125, "learning_rate": 9.576401775622222e-05, "loss": 1.7513, "step": 3028 }, { "epoch": 0.13239215000655624, "grad_norm": 2.984375, "learning_rate": 9.576125050418077e-05, "loss": 2.0373, "step": 3029 }, { "epoch": 0.13243585821058612, "grad_norm": 2.578125, "learning_rate": 9.575848238855796e-05, "loss": 2.1212, "step": 3030 }, { "epoch": 0.13247956641461603, "grad_norm": 3.765625, "learning_rate": 9.575571340940597e-05, "loss": 1.8877, "step": 3031 }, { "epoch": 0.1325232746186459, "grad_norm": 3.09375, "learning_rate": 9.575294356677707e-05, "loss": 2.2786, "step": 3032 }, { "epoch": 0.13256698282267582, "grad_norm": 2.890625, "learning_rate": 9.575017286072355e-05, "loss": 1.922, "step": 3033 }, { "epoch": 0.13261069102670572, "grad_norm": 2.609375, "learning_rate": 9.574740129129767e-05, "loss": 2.1338, "step": 3034 }, { "epoch": 0.1326543992307356, "grad_norm": 2.890625, "learning_rate": 9.574462885855174e-05, "loss": 2.4, "step": 3035 }, { "epoch": 0.1326981074347655, "grad_norm": 3.859375, "learning_rate": 9.57418555625381e-05, "loss": 2.177, "step": 3036 }, { "epoch": 0.1327418156387954, "grad_norm": 2.8125, "learning_rate": 9.573908140330905e-05, "loss": 2.1712, "step": 3037 }, { "epoch": 0.1327855238428253, "grad_norm": 2.703125, "learning_rate": 9.573630638091698e-05, "loss": 1.9705, "step": 3038 }, { "epoch": 0.1328292320468552, "grad_norm": 2.21875, "learning_rate": 9.573353049541425e-05, "loss": 1.9445, "step": 3039 }, { "epoch": 0.13287294025088509, "grad_norm": 2.03125, "learning_rate": 9.573075374685323e-05, "loss": 2.0247, "step": 3040 }, { "epoch": 0.132916648454915, "grad_norm": 4.28125, "learning_rate": 9.572797613528633e-05, "loss": 1.9356, "step": 3041 }, { "epoch": 0.13296035665894487, "grad_norm": 3.109375, "learning_rate": 9.572519766076595e-05, "loss": 2.2053, "step": 3042 }, { "epoch": 0.13300406486297478, "grad_norm": 2.921875, "learning_rate": 9.572241832334457e-05, "loss": 2.8439, "step": 3043 }, { "epoch": 0.1330477730670047, "grad_norm": 2.703125, "learning_rate": 9.571963812307459e-05, "loss": 2.3801, "step": 3044 }, { "epoch": 0.13309148127103457, "grad_norm": 2.53125, "learning_rate": 9.57168570600085e-05, "loss": 2.0442, "step": 3045 }, { "epoch": 0.13313518947506447, "grad_norm": 2.1875, "learning_rate": 9.571407513419877e-05, "loss": 1.8136, "step": 3046 }, { "epoch": 0.13317889767909435, "grad_norm": 2.6875, "learning_rate": 9.571129234569792e-05, "loss": 2.2411, "step": 3047 }, { "epoch": 0.13322260588312426, "grad_norm": 2.5625, "learning_rate": 9.570850869455845e-05, "loss": 2.2694, "step": 3048 }, { "epoch": 0.13326631408715417, "grad_norm": 2.484375, "learning_rate": 9.57057241808329e-05, "loss": 1.9991, "step": 3049 }, { "epoch": 0.13331002229118405, "grad_norm": 3.265625, "learning_rate": 9.570293880457382e-05, "loss": 2.7739, "step": 3050 }, { "epoch": 0.13335373049521396, "grad_norm": 2.875, "learning_rate": 9.570015256583375e-05, "loss": 2.8135, "step": 3051 }, { "epoch": 0.13339743869924384, "grad_norm": 2.5, "learning_rate": 9.56973654646653e-05, "loss": 1.9088, "step": 3052 }, { "epoch": 0.13344114690327374, "grad_norm": 2.53125, "learning_rate": 9.569457750112106e-05, "loss": 2.6223, "step": 3053 }, { "epoch": 0.13348485510730365, "grad_norm": 2.515625, "learning_rate": 9.569178867525362e-05, "loss": 2.0422, "step": 3054 }, { "epoch": 0.13352856331133353, "grad_norm": 2.3125, "learning_rate": 9.568899898711563e-05, "loss": 2.0285, "step": 3055 }, { "epoch": 0.13357227151536344, "grad_norm": 2.265625, "learning_rate": 9.568620843675975e-05, "loss": 2.091, "step": 3056 }, { "epoch": 0.13361597971939332, "grad_norm": 2.359375, "learning_rate": 9.56834170242386e-05, "loss": 2.0307, "step": 3057 }, { "epoch": 0.13365968792342323, "grad_norm": 2.453125, "learning_rate": 9.568062474960489e-05, "loss": 2.0899, "step": 3058 }, { "epoch": 0.13370339612745313, "grad_norm": 2.625, "learning_rate": 9.56778316129113e-05, "loss": 1.8432, "step": 3059 }, { "epoch": 0.133747104331483, "grad_norm": 3.671875, "learning_rate": 9.567503761421057e-05, "loss": 1.4322, "step": 3060 }, { "epoch": 0.13379081253551292, "grad_norm": 2.28125, "learning_rate": 9.567224275355539e-05, "loss": 2.1823, "step": 3061 }, { "epoch": 0.1338345207395428, "grad_norm": 2.328125, "learning_rate": 9.566944703099852e-05, "loss": 1.718, "step": 3062 }, { "epoch": 0.1338782289435727, "grad_norm": 2.671875, "learning_rate": 9.56666504465927e-05, "loss": 2.4914, "step": 3063 }, { "epoch": 0.13392193714760262, "grad_norm": 2.640625, "learning_rate": 9.566385300039074e-05, "loss": 2.0368, "step": 3064 }, { "epoch": 0.1339656453516325, "grad_norm": 2.421875, "learning_rate": 9.56610546924454e-05, "loss": 1.9711, "step": 3065 }, { "epoch": 0.1340093535556624, "grad_norm": 2.59375, "learning_rate": 9.56582555228095e-05, "loss": 2.2761, "step": 3066 }, { "epoch": 0.13405306175969228, "grad_norm": 2.3125, "learning_rate": 9.565545549153588e-05, "loss": 1.7822, "step": 3067 }, { "epoch": 0.1340967699637222, "grad_norm": 2.34375, "learning_rate": 9.565265459867736e-05, "loss": 2.0829, "step": 3068 }, { "epoch": 0.1341404781677521, "grad_norm": 2.15625, "learning_rate": 9.564985284428679e-05, "loss": 1.8644, "step": 3069 }, { "epoch": 0.13418418637178198, "grad_norm": 2.1875, "learning_rate": 9.564705022841706e-05, "loss": 1.8726, "step": 3070 }, { "epoch": 0.13422789457581188, "grad_norm": 2.5625, "learning_rate": 9.564424675112106e-05, "loss": 2.0443, "step": 3071 }, { "epoch": 0.13427160277984176, "grad_norm": 2.3125, "learning_rate": 9.564144241245173e-05, "loss": 2.0608, "step": 3072 }, { "epoch": 0.13431531098387167, "grad_norm": 2.921875, "learning_rate": 9.563863721246191e-05, "loss": 1.9922, "step": 3073 }, { "epoch": 0.13435901918790158, "grad_norm": 2.4375, "learning_rate": 9.563583115120458e-05, "loss": 2.3749, "step": 3074 }, { "epoch": 0.13440272739193146, "grad_norm": 2.46875, "learning_rate": 9.563302422873272e-05, "loss": 1.9774, "step": 3075 }, { "epoch": 0.13444643559596137, "grad_norm": 3.96875, "learning_rate": 9.563021644509926e-05, "loss": 2.1169, "step": 3076 }, { "epoch": 0.13449014379999125, "grad_norm": 2.875, "learning_rate": 9.562740780035721e-05, "loss": 2.4444, "step": 3077 }, { "epoch": 0.13453385200402115, "grad_norm": 2.453125, "learning_rate": 9.562459829455957e-05, "loss": 1.5088, "step": 3078 }, { "epoch": 0.13457756020805106, "grad_norm": 2.84375, "learning_rate": 9.562178792775936e-05, "loss": 2.3153, "step": 3079 }, { "epoch": 0.13462126841208094, "grad_norm": 2.53125, "learning_rate": 9.561897670000958e-05, "loss": 2.1347, "step": 3080 }, { "epoch": 0.13466497661611085, "grad_norm": 2.46875, "learning_rate": 9.561616461136336e-05, "loss": 2.0139, "step": 3081 }, { "epoch": 0.13470868482014073, "grad_norm": 2.375, "learning_rate": 9.56133516618737e-05, "loss": 1.9502, "step": 3082 }, { "epoch": 0.13475239302417064, "grad_norm": 2.03125, "learning_rate": 9.561053785159371e-05, "loss": 1.7509, "step": 3083 }, { "epoch": 0.13479610122820054, "grad_norm": 2.875, "learning_rate": 9.56077231805765e-05, "loss": 2.5232, "step": 3084 }, { "epoch": 0.13483980943223042, "grad_norm": 3.359375, "learning_rate": 9.560490764887516e-05, "loss": 2.7191, "step": 3085 }, { "epoch": 0.13488351763626033, "grad_norm": 2.15625, "learning_rate": 9.560209125654282e-05, "loss": 1.7089, "step": 3086 }, { "epoch": 0.1349272258402902, "grad_norm": 3.140625, "learning_rate": 9.559927400363268e-05, "loss": 1.6867, "step": 3087 }, { "epoch": 0.13497093404432012, "grad_norm": 2.875, "learning_rate": 9.559645589019785e-05, "loss": 1.9889, "step": 3088 }, { "epoch": 0.13501464224835003, "grad_norm": 2.5625, "learning_rate": 9.559363691629155e-05, "loss": 1.6799, "step": 3089 }, { "epoch": 0.1350583504523799, "grad_norm": 2.515625, "learning_rate": 9.559081708196696e-05, "loss": 2.2714, "step": 3090 }, { "epoch": 0.1351020586564098, "grad_norm": 5.21875, "learning_rate": 9.55879963872773e-05, "loss": 2.3897, "step": 3091 }, { "epoch": 0.1351457668604397, "grad_norm": 2.328125, "learning_rate": 9.558517483227579e-05, "loss": 2.3887, "step": 3092 }, { "epoch": 0.1351894750644696, "grad_norm": 2.15625, "learning_rate": 9.55823524170157e-05, "loss": 1.8025, "step": 3093 }, { "epoch": 0.1352331832684995, "grad_norm": 3.40625, "learning_rate": 9.557952914155027e-05, "loss": 2.3162, "step": 3094 }, { "epoch": 0.1352768914725294, "grad_norm": 2.21875, "learning_rate": 9.557670500593276e-05, "loss": 1.6999, "step": 3095 }, { "epoch": 0.1353205996765593, "grad_norm": 2.59375, "learning_rate": 9.557388001021653e-05, "loss": 2.1748, "step": 3096 }, { "epoch": 0.13536430788058917, "grad_norm": 2.484375, "learning_rate": 9.557105415445484e-05, "loss": 2.0518, "step": 3097 }, { "epoch": 0.13540801608461908, "grad_norm": 2.609375, "learning_rate": 9.556822743870104e-05, "loss": 2.3651, "step": 3098 }, { "epoch": 0.135451724288649, "grad_norm": 2.46875, "learning_rate": 9.556539986300845e-05, "loss": 1.8946, "step": 3099 }, { "epoch": 0.13549543249267887, "grad_norm": 2.78125, "learning_rate": 9.556257142743046e-05, "loss": 2.0344, "step": 3100 }, { "epoch": 0.13553914069670878, "grad_norm": 2.78125, "learning_rate": 9.555974213202044e-05, "loss": 2.6906, "step": 3101 }, { "epoch": 0.13558284890073866, "grad_norm": 2.203125, "learning_rate": 9.555691197683177e-05, "loss": 2.0669, "step": 3102 }, { "epoch": 0.13562655710476856, "grad_norm": 2.109375, "learning_rate": 9.555408096191786e-05, "loss": 1.9788, "step": 3103 }, { "epoch": 0.13567026530879847, "grad_norm": 2.203125, "learning_rate": 9.555124908733215e-05, "loss": 1.8438, "step": 3104 }, { "epoch": 0.13571397351282835, "grad_norm": 2.140625, "learning_rate": 9.554841635312805e-05, "loss": 2.1955, "step": 3105 }, { "epoch": 0.13575768171685826, "grad_norm": 2.640625, "learning_rate": 9.554558275935907e-05, "loss": 1.9396, "step": 3106 }, { "epoch": 0.13580138992088814, "grad_norm": 2.1875, "learning_rate": 9.554274830607866e-05, "loss": 1.7357, "step": 3107 }, { "epoch": 0.13584509812491805, "grad_norm": 2.859375, "learning_rate": 9.553991299334028e-05, "loss": 1.8541, "step": 3108 }, { "epoch": 0.13588880632894795, "grad_norm": 2.3125, "learning_rate": 9.553707682119746e-05, "loss": 1.9001, "step": 3109 }, { "epoch": 0.13593251453297783, "grad_norm": 2.65625, "learning_rate": 9.553423978970376e-05, "loss": 2.2973, "step": 3110 }, { "epoch": 0.13597622273700774, "grad_norm": 2.78125, "learning_rate": 9.553140189891266e-05, "loss": 2.4227, "step": 3111 }, { "epoch": 0.13601993094103762, "grad_norm": 2.40625, "learning_rate": 9.552856314887772e-05, "loss": 1.8651, "step": 3112 }, { "epoch": 0.13606363914506753, "grad_norm": 3.140625, "learning_rate": 9.552572353965254e-05, "loss": 1.8486, "step": 3113 }, { "epoch": 0.13610734734909744, "grad_norm": 2.6875, "learning_rate": 9.552288307129072e-05, "loss": 2.4455, "step": 3114 }, { "epoch": 0.13615105555312731, "grad_norm": 2.59375, "learning_rate": 9.552004174384583e-05, "loss": 2.3039, "step": 3115 }, { "epoch": 0.13619476375715722, "grad_norm": 2.5, "learning_rate": 9.551719955737148e-05, "loss": 2.0296, "step": 3116 }, { "epoch": 0.1362384719611871, "grad_norm": 2.4375, "learning_rate": 9.551435651192135e-05, "loss": 1.6363, "step": 3117 }, { "epoch": 0.136282180165217, "grad_norm": 3.703125, "learning_rate": 9.551151260754907e-05, "loss": 1.8366, "step": 3118 }, { "epoch": 0.13632588836924692, "grad_norm": 2.9375, "learning_rate": 9.550866784430829e-05, "loss": 1.8424, "step": 3119 }, { "epoch": 0.1363695965732768, "grad_norm": 2.921875, "learning_rate": 9.550582222225273e-05, "loss": 2.4484, "step": 3120 }, { "epoch": 0.1364133047773067, "grad_norm": 2.609375, "learning_rate": 9.550297574143608e-05, "loss": 2.1218, "step": 3121 }, { "epoch": 0.13645701298133658, "grad_norm": 2.265625, "learning_rate": 9.550012840191203e-05, "loss": 1.9442, "step": 3122 }, { "epoch": 0.1365007211853665, "grad_norm": 2.40625, "learning_rate": 9.549728020373434e-05, "loss": 2.4977, "step": 3123 }, { "epoch": 0.1365444293893964, "grad_norm": 2.484375, "learning_rate": 9.549443114695676e-05, "loss": 1.6271, "step": 3124 }, { "epoch": 0.13658813759342628, "grad_norm": 2.203125, "learning_rate": 9.549158123163305e-05, "loss": 1.5913, "step": 3125 }, { "epoch": 0.1366318457974562, "grad_norm": 3.078125, "learning_rate": 9.5488730457817e-05, "loss": 2.578, "step": 3126 }, { "epoch": 0.13667555400148607, "grad_norm": 3.15625, "learning_rate": 9.54858788255624e-05, "loss": 3.1502, "step": 3127 }, { "epoch": 0.13671926220551597, "grad_norm": 2.703125, "learning_rate": 9.548302633492306e-05, "loss": 1.9293, "step": 3128 }, { "epoch": 0.13676297040954588, "grad_norm": 2.359375, "learning_rate": 9.548017298595279e-05, "loss": 1.9183, "step": 3129 }, { "epoch": 0.13680667861357576, "grad_norm": 2.34375, "learning_rate": 9.54773187787055e-05, "loss": 2.0055, "step": 3130 }, { "epoch": 0.13685038681760567, "grad_norm": 2.71875, "learning_rate": 9.547446371323501e-05, "loss": 2.2359, "step": 3131 }, { "epoch": 0.13689409502163555, "grad_norm": 2.796875, "learning_rate": 9.547160778959519e-05, "loss": 2.2192, "step": 3132 }, { "epoch": 0.13693780322566546, "grad_norm": 2.625, "learning_rate": 9.546875100783996e-05, "loss": 2.0813, "step": 3133 }, { "epoch": 0.13698151142969536, "grad_norm": 3.609375, "learning_rate": 9.54658933680232e-05, "loss": 2.0326, "step": 3134 }, { "epoch": 0.13702521963372524, "grad_norm": 2.421875, "learning_rate": 9.546303487019888e-05, "loss": 2.1513, "step": 3135 }, { "epoch": 0.13706892783775515, "grad_norm": 3.421875, "learning_rate": 9.546017551442092e-05, "loss": 2.4596, "step": 3136 }, { "epoch": 0.13711263604178503, "grad_norm": 3.5625, "learning_rate": 9.545731530074328e-05, "loss": 2.2997, "step": 3137 }, { "epoch": 0.13715634424581494, "grad_norm": 2.625, "learning_rate": 9.545445422921996e-05, "loss": 2.3707, "step": 3138 }, { "epoch": 0.13720005244984484, "grad_norm": 3.6875, "learning_rate": 9.545159229990493e-05, "loss": 2.7255, "step": 3139 }, { "epoch": 0.13724376065387472, "grad_norm": 3.90625, "learning_rate": 9.544872951285217e-05, "loss": 3.7178, "step": 3140 }, { "epoch": 0.13728746885790463, "grad_norm": 3.21875, "learning_rate": 9.544586586811576e-05, "loss": 1.2338, "step": 3141 }, { "epoch": 0.1373311770619345, "grad_norm": 2.8125, "learning_rate": 9.544300136574973e-05, "loss": 2.3403, "step": 3142 }, { "epoch": 0.13737488526596442, "grad_norm": 2.515625, "learning_rate": 9.54401360058081e-05, "loss": 2.3907, "step": 3143 }, { "epoch": 0.13741859346999433, "grad_norm": 2.234375, "learning_rate": 9.543726978834497e-05, "loss": 2.0025, "step": 3144 }, { "epoch": 0.1374623016740242, "grad_norm": 2.546875, "learning_rate": 9.543440271341444e-05, "loss": 1.6868, "step": 3145 }, { "epoch": 0.13750600987805411, "grad_norm": 2.296875, "learning_rate": 9.543153478107061e-05, "loss": 1.867, "step": 3146 }, { "epoch": 0.137549718082084, "grad_norm": 2.609375, "learning_rate": 9.542866599136759e-05, "loss": 2.2322, "step": 3147 }, { "epoch": 0.1375934262861139, "grad_norm": 2.203125, "learning_rate": 9.54257963443595e-05, "loss": 2.1401, "step": 3148 }, { "epoch": 0.1376371344901438, "grad_norm": 2.40625, "learning_rate": 9.542292584010056e-05, "loss": 1.5096, "step": 3149 }, { "epoch": 0.1376808426941737, "grad_norm": 3.359375, "learning_rate": 9.542005447864488e-05, "loss": 2.3567, "step": 3150 }, { "epoch": 0.1377245508982036, "grad_norm": 2.484375, "learning_rate": 9.541718226004665e-05, "loss": 2.2019, "step": 3151 }, { "epoch": 0.13776825910223348, "grad_norm": 2.71875, "learning_rate": 9.541430918436011e-05, "loss": 1.9213, "step": 3152 }, { "epoch": 0.13781196730626338, "grad_norm": 2.40625, "learning_rate": 9.541143525163946e-05, "loss": 1.7277, "step": 3153 }, { "epoch": 0.1378556755102933, "grad_norm": 2.390625, "learning_rate": 9.540856046193894e-05, "loss": 2.1676, "step": 3154 }, { "epoch": 0.13789938371432317, "grad_norm": 2.21875, "learning_rate": 9.540568481531277e-05, "loss": 1.7101, "step": 3155 }, { "epoch": 0.13794309191835308, "grad_norm": 2.125, "learning_rate": 9.540280831181525e-05, "loss": 1.8688, "step": 3156 }, { "epoch": 0.13798680012238296, "grad_norm": 2.859375, "learning_rate": 9.539993095150066e-05, "loss": 1.7798, "step": 3157 }, { "epoch": 0.13803050832641287, "grad_norm": 3.765625, "learning_rate": 9.53970527344233e-05, "loss": 2.4279, "step": 3158 }, { "epoch": 0.13807421653044277, "grad_norm": 2.203125, "learning_rate": 9.539417366063748e-05, "loss": 2.1791, "step": 3159 }, { "epoch": 0.13811792473447265, "grad_norm": 2.359375, "learning_rate": 9.539129373019754e-05, "loss": 1.9027, "step": 3160 }, { "epoch": 0.13816163293850256, "grad_norm": 2.078125, "learning_rate": 9.53884129431578e-05, "loss": 1.9052, "step": 3161 }, { "epoch": 0.13820534114253244, "grad_norm": 2.296875, "learning_rate": 9.538553129957268e-05, "loss": 2.1048, "step": 3162 }, { "epoch": 0.13824904934656235, "grad_norm": 2.296875, "learning_rate": 9.538264879949652e-05, "loss": 1.6879, "step": 3163 }, { "epoch": 0.13829275755059225, "grad_norm": 2.6875, "learning_rate": 9.537976544298373e-05, "loss": 2.0213, "step": 3164 }, { "epoch": 0.13833646575462213, "grad_norm": 2.328125, "learning_rate": 9.53768812300887e-05, "loss": 2.1036, "step": 3165 }, { "epoch": 0.13838017395865204, "grad_norm": 2.25, "learning_rate": 9.537399616086588e-05, "loss": 2.0177, "step": 3166 }, { "epoch": 0.13842388216268192, "grad_norm": 2.265625, "learning_rate": 9.537111023536973e-05, "loss": 1.8663, "step": 3167 }, { "epoch": 0.13846759036671183, "grad_norm": 2.34375, "learning_rate": 9.53682234536547e-05, "loss": 1.6764, "step": 3168 }, { "epoch": 0.13851129857074174, "grad_norm": 2.453125, "learning_rate": 9.536533581577525e-05, "loss": 2.2184, "step": 3169 }, { "epoch": 0.13855500677477162, "grad_norm": 2.625, "learning_rate": 9.536244732178588e-05, "loss": 1.9081, "step": 3170 }, { "epoch": 0.13859871497880152, "grad_norm": 2.328125, "learning_rate": 9.535955797174112e-05, "loss": 2.1123, "step": 3171 }, { "epoch": 0.13864242318283143, "grad_norm": 2.546875, "learning_rate": 9.535666776569547e-05, "loss": 2.1118, "step": 3172 }, { "epoch": 0.1386861313868613, "grad_norm": 2.453125, "learning_rate": 9.53537767037035e-05, "loss": 2.053, "step": 3173 }, { "epoch": 0.13872983959089122, "grad_norm": 2.78125, "learning_rate": 9.535088478581975e-05, "loss": 2.8949, "step": 3174 }, { "epoch": 0.1387735477949211, "grad_norm": 2.328125, "learning_rate": 9.53479920120988e-05, "loss": 1.5815, "step": 3175 }, { "epoch": 0.138817255998951, "grad_norm": 2.296875, "learning_rate": 9.534509838259523e-05, "loss": 2.0353, "step": 3176 }, { "epoch": 0.1388609642029809, "grad_norm": 2.796875, "learning_rate": 9.534220389736367e-05, "loss": 1.7666, "step": 3177 }, { "epoch": 0.1389046724070108, "grad_norm": 3.046875, "learning_rate": 9.533930855645872e-05, "loss": 3.2732, "step": 3178 }, { "epoch": 0.1389483806110407, "grad_norm": 2.265625, "learning_rate": 9.533641235993504e-05, "loss": 2.1203, "step": 3179 }, { "epoch": 0.13899208881507058, "grad_norm": 3.265625, "learning_rate": 9.533351530784726e-05, "loss": 2.1847, "step": 3180 }, { "epoch": 0.1390357970191005, "grad_norm": 11.3125, "learning_rate": 9.533061740025008e-05, "loss": 3.1752, "step": 3181 }, { "epoch": 0.1390795052231304, "grad_norm": 2.34375, "learning_rate": 9.532771863719816e-05, "loss": 2.4082, "step": 3182 }, { "epoch": 0.13912321342716027, "grad_norm": 2.125, "learning_rate": 9.532481901874624e-05, "loss": 2.0876, "step": 3183 }, { "epoch": 0.13916692163119018, "grad_norm": 2.203125, "learning_rate": 9.532191854494901e-05, "loss": 2.1728, "step": 3184 }, { "epoch": 0.13921062983522006, "grad_norm": 3.0, "learning_rate": 9.531901721586121e-05, "loss": 2.6058, "step": 3185 }, { "epoch": 0.13925433803924997, "grad_norm": 2.28125, "learning_rate": 9.531611503153759e-05, "loss": 1.9939, "step": 3186 }, { "epoch": 0.13929804624327988, "grad_norm": 2.75, "learning_rate": 9.531321199203292e-05, "loss": 2.3874, "step": 3187 }, { "epoch": 0.13934175444730976, "grad_norm": 2.234375, "learning_rate": 9.5310308097402e-05, "loss": 1.8303, "step": 3188 }, { "epoch": 0.13938546265133966, "grad_norm": 2.234375, "learning_rate": 9.530740334769963e-05, "loss": 2.1354, "step": 3189 }, { "epoch": 0.13942917085536954, "grad_norm": 2.90625, "learning_rate": 9.53044977429806e-05, "loss": 2.4122, "step": 3190 }, { "epoch": 0.13947287905939945, "grad_norm": 2.484375, "learning_rate": 9.530159128329976e-05, "loss": 1.836, "step": 3191 }, { "epoch": 0.13951658726342936, "grad_norm": 2.5625, "learning_rate": 9.529868396871197e-05, "loss": 1.8899, "step": 3192 }, { "epoch": 0.13956029546745924, "grad_norm": 2.734375, "learning_rate": 9.529577579927209e-05, "loss": 2.129, "step": 3193 }, { "epoch": 0.13960400367148915, "grad_norm": 2.890625, "learning_rate": 9.529286677503499e-05, "loss": 2.3418, "step": 3194 }, { "epoch": 0.13964771187551903, "grad_norm": 2.40625, "learning_rate": 9.528995689605556e-05, "loss": 1.5699, "step": 3195 }, { "epoch": 0.13969142007954893, "grad_norm": 2.359375, "learning_rate": 9.528704616238874e-05, "loss": 1.5867, "step": 3196 }, { "epoch": 0.13973512828357884, "grad_norm": 2.59375, "learning_rate": 9.528413457408944e-05, "loss": 1.8923, "step": 3197 }, { "epoch": 0.13977883648760872, "grad_norm": 2.6875, "learning_rate": 9.528122213121262e-05, "loss": 2.0186, "step": 3198 }, { "epoch": 0.13982254469163863, "grad_norm": 2.390625, "learning_rate": 9.527830883381324e-05, "loss": 1.7945, "step": 3199 }, { "epoch": 0.1398662528956685, "grad_norm": 2.1875, "learning_rate": 9.527539468194625e-05, "loss": 1.8891, "step": 3200 }, { "epoch": 0.13990996109969842, "grad_norm": 2.296875, "learning_rate": 9.527247967566668e-05, "loss": 2.1507, "step": 3201 }, { "epoch": 0.13995366930372832, "grad_norm": 2.84375, "learning_rate": 9.526956381502953e-05, "loss": 2.117, "step": 3202 }, { "epoch": 0.1399973775077582, "grad_norm": 2.609375, "learning_rate": 9.526664710008983e-05, "loss": 2.8609, "step": 3203 }, { "epoch": 0.1400410857117881, "grad_norm": 2.640625, "learning_rate": 9.52637295309026e-05, "loss": 2.5578, "step": 3204 }, { "epoch": 0.140084793915818, "grad_norm": 5.8125, "learning_rate": 9.526081110752292e-05, "loss": 2.2185, "step": 3205 }, { "epoch": 0.1401285021198479, "grad_norm": 2.4375, "learning_rate": 9.525789183000588e-05, "loss": 1.6714, "step": 3206 }, { "epoch": 0.1401722103238778, "grad_norm": 2.390625, "learning_rate": 9.525497169840653e-05, "loss": 2.083, "step": 3207 }, { "epoch": 0.14021591852790768, "grad_norm": 2.8125, "learning_rate": 9.525205071278e-05, "loss": 2.0151, "step": 3208 }, { "epoch": 0.1402596267319376, "grad_norm": 2.1875, "learning_rate": 9.524912887318142e-05, "loss": 1.9866, "step": 3209 }, { "epoch": 0.14030333493596747, "grad_norm": 2.671875, "learning_rate": 9.524620617966593e-05, "loss": 2.3579, "step": 3210 }, { "epoch": 0.14034704313999738, "grad_norm": 2.3125, "learning_rate": 9.524328263228865e-05, "loss": 1.9456, "step": 3211 }, { "epoch": 0.1403907513440273, "grad_norm": 2.328125, "learning_rate": 9.52403582311048e-05, "loss": 1.9813, "step": 3212 }, { "epoch": 0.14043445954805717, "grad_norm": 2.859375, "learning_rate": 9.523743297616954e-05, "loss": 2.962, "step": 3213 }, { "epoch": 0.14047816775208707, "grad_norm": 2.328125, "learning_rate": 9.523450686753807e-05, "loss": 1.8951, "step": 3214 }, { "epoch": 0.14052187595611695, "grad_norm": 2.359375, "learning_rate": 9.523157990526564e-05, "loss": 2.1464, "step": 3215 }, { "epoch": 0.14056558416014686, "grad_norm": 2.984375, "learning_rate": 9.522865208940745e-05, "loss": 2.693, "step": 3216 }, { "epoch": 0.14060929236417677, "grad_norm": 3.21875, "learning_rate": 9.522572342001876e-05, "loss": 2.5182, "step": 3217 }, { "epoch": 0.14065300056820665, "grad_norm": 2.515625, "learning_rate": 9.522279389715488e-05, "loss": 2.3654, "step": 3218 }, { "epoch": 0.14069670877223656, "grad_norm": 2.703125, "learning_rate": 9.521986352087102e-05, "loss": 2.6586, "step": 3219 }, { "epoch": 0.14074041697626644, "grad_norm": 2.53125, "learning_rate": 9.521693229122255e-05, "loss": 2.3786, "step": 3220 }, { "epoch": 0.14078412518029634, "grad_norm": 2.375, "learning_rate": 9.521400020826475e-05, "loss": 2.2397, "step": 3221 }, { "epoch": 0.14082783338432625, "grad_norm": 2.65625, "learning_rate": 9.521106727205295e-05, "loss": 1.7273, "step": 3222 }, { "epoch": 0.14087154158835613, "grad_norm": 3.171875, "learning_rate": 9.520813348264252e-05, "loss": 1.6895, "step": 3223 }, { "epoch": 0.14091524979238604, "grad_norm": 2.265625, "learning_rate": 9.520519884008881e-05, "loss": 2.1822, "step": 3224 }, { "epoch": 0.14095895799641592, "grad_norm": 2.375, "learning_rate": 9.52022633444472e-05, "loss": 2.0736, "step": 3225 }, { "epoch": 0.14100266620044583, "grad_norm": 2.90625, "learning_rate": 9.519932699577309e-05, "loss": 2.0192, "step": 3226 }, { "epoch": 0.14104637440447573, "grad_norm": 4.28125, "learning_rate": 9.519638979412191e-05, "loss": 2.1896, "step": 3227 }, { "epoch": 0.1410900826085056, "grad_norm": 3.015625, "learning_rate": 9.519345173954907e-05, "loss": 2.3782, "step": 3228 }, { "epoch": 0.14113379081253552, "grad_norm": 2.328125, "learning_rate": 9.519051283211002e-05, "loss": 2.3049, "step": 3229 }, { "epoch": 0.1411774990165654, "grad_norm": 2.8125, "learning_rate": 9.518757307186021e-05, "loss": 1.8682, "step": 3230 }, { "epoch": 0.1412212072205953, "grad_norm": 10.5, "learning_rate": 9.518463245885513e-05, "loss": 3.0038, "step": 3231 }, { "epoch": 0.14126491542462521, "grad_norm": 2.390625, "learning_rate": 9.518169099315028e-05, "loss": 1.859, "step": 3232 }, { "epoch": 0.1413086236286551, "grad_norm": 2.390625, "learning_rate": 9.517874867480117e-05, "loss": 1.7138, "step": 3233 }, { "epoch": 0.141352331832685, "grad_norm": 2.84375, "learning_rate": 9.517580550386331e-05, "loss": 3.028, "step": 3234 }, { "epoch": 0.14139604003671488, "grad_norm": 2.3125, "learning_rate": 9.517286148039223e-05, "loss": 2.257, "step": 3235 }, { "epoch": 0.1414397482407448, "grad_norm": 3.015625, "learning_rate": 9.516991660444355e-05, "loss": 2.1228, "step": 3236 }, { "epoch": 0.1414834564447747, "grad_norm": 2.4375, "learning_rate": 9.516697087607276e-05, "loss": 2.25, "step": 3237 }, { "epoch": 0.14152716464880458, "grad_norm": 2.59375, "learning_rate": 9.516402429533552e-05, "loss": 2.3626, "step": 3238 }, { "epoch": 0.14157087285283448, "grad_norm": 2.453125, "learning_rate": 9.51610768622874e-05, "loss": 1.8227, "step": 3239 }, { "epoch": 0.14161458105686436, "grad_norm": 2.703125, "learning_rate": 9.515812857698403e-05, "loss": 2.5844, "step": 3240 }, { "epoch": 0.14165828926089427, "grad_norm": 2.140625, "learning_rate": 9.515517943948105e-05, "loss": 1.8612, "step": 3241 }, { "epoch": 0.14170199746492418, "grad_norm": 3.859375, "learning_rate": 9.51522294498341e-05, "loss": 2.173, "step": 3242 }, { "epoch": 0.14174570566895406, "grad_norm": 2.734375, "learning_rate": 9.514927860809888e-05, "loss": 2.1819, "step": 3243 }, { "epoch": 0.14178941387298397, "grad_norm": 2.015625, "learning_rate": 9.514632691433107e-05, "loss": 2.0039, "step": 3244 }, { "epoch": 0.14183312207701385, "grad_norm": 2.453125, "learning_rate": 9.514337436858635e-05, "loss": 2.5136, "step": 3245 }, { "epoch": 0.14187683028104375, "grad_norm": 2.21875, "learning_rate": 9.514042097092045e-05, "loss": 1.9573, "step": 3246 }, { "epoch": 0.14192053848507366, "grad_norm": 2.859375, "learning_rate": 9.513746672138911e-05, "loss": 2.0925, "step": 3247 }, { "epoch": 0.14196424668910354, "grad_norm": 2.5, "learning_rate": 9.513451162004809e-05, "loss": 1.9035, "step": 3248 }, { "epoch": 0.14200795489313345, "grad_norm": 2.859375, "learning_rate": 9.513155566695313e-05, "loss": 2.0891, "step": 3249 }, { "epoch": 0.14205166309716333, "grad_norm": 2.140625, "learning_rate": 9.512859886216003e-05, "loss": 2.0599, "step": 3250 }, { "epoch": 0.14209537130119324, "grad_norm": 2.625, "learning_rate": 9.51256412057246e-05, "loss": 2.3514, "step": 3251 }, { "epoch": 0.14213907950522314, "grad_norm": 2.109375, "learning_rate": 9.512268269770264e-05, "loss": 2.2689, "step": 3252 }, { "epoch": 0.14218278770925302, "grad_norm": 2.625, "learning_rate": 9.511972333814998e-05, "loss": 2.5667, "step": 3253 }, { "epoch": 0.14222649591328293, "grad_norm": 2.4375, "learning_rate": 9.511676312712246e-05, "loss": 1.9769, "step": 3254 }, { "epoch": 0.1422702041173128, "grad_norm": 2.375, "learning_rate": 9.511380206467597e-05, "loss": 2.144, "step": 3255 }, { "epoch": 0.14231391232134272, "grad_norm": 2.59375, "learning_rate": 9.511084015086637e-05, "loss": 2.1737, "step": 3256 }, { "epoch": 0.14235762052537262, "grad_norm": 2.140625, "learning_rate": 9.510787738574958e-05, "loss": 1.7987, "step": 3257 }, { "epoch": 0.1424013287294025, "grad_norm": 2.796875, "learning_rate": 9.510491376938147e-05, "loss": 2.2213, "step": 3258 }, { "epoch": 0.1424450369334324, "grad_norm": 2.328125, "learning_rate": 9.510194930181799e-05, "loss": 1.9827, "step": 3259 }, { "epoch": 0.1424887451374623, "grad_norm": 2.421875, "learning_rate": 9.50989839831151e-05, "loss": 2.4605, "step": 3260 }, { "epoch": 0.1425324533414922, "grad_norm": 2.25, "learning_rate": 9.509601781332873e-05, "loss": 1.6873, "step": 3261 }, { "epoch": 0.1425761615455221, "grad_norm": 2.265625, "learning_rate": 9.509305079251487e-05, "loss": 1.9218, "step": 3262 }, { "epoch": 0.142619869749552, "grad_norm": 2.5, "learning_rate": 9.509008292072951e-05, "loss": 1.5666, "step": 3263 }, { "epoch": 0.1426635779535819, "grad_norm": 2.0625, "learning_rate": 9.508711419802867e-05, "loss": 1.6043, "step": 3264 }, { "epoch": 0.14270728615761177, "grad_norm": 2.15625, "learning_rate": 9.508414462446835e-05, "loss": 1.8474, "step": 3265 }, { "epoch": 0.14275099436164168, "grad_norm": 2.78125, "learning_rate": 9.508117420010462e-05, "loss": 2.0662, "step": 3266 }, { "epoch": 0.1427947025656716, "grad_norm": 2.0625, "learning_rate": 9.507820292499353e-05, "loss": 1.7683, "step": 3267 }, { "epoch": 0.14283841076970147, "grad_norm": 2.390625, "learning_rate": 9.507523079919111e-05, "loss": 1.8974, "step": 3268 }, { "epoch": 0.14288211897373138, "grad_norm": 2.203125, "learning_rate": 9.507225782275349e-05, "loss": 1.9533, "step": 3269 }, { "epoch": 0.14292582717776126, "grad_norm": 2.921875, "learning_rate": 9.506928399573678e-05, "loss": 2.3724, "step": 3270 }, { "epoch": 0.14296953538179116, "grad_norm": 2.578125, "learning_rate": 9.506630931819707e-05, "loss": 2.1634, "step": 3271 }, { "epoch": 0.14301324358582107, "grad_norm": 2.59375, "learning_rate": 9.506333379019052e-05, "loss": 2.2706, "step": 3272 }, { "epoch": 0.14305695178985095, "grad_norm": 2.1875, "learning_rate": 9.506035741177329e-05, "loss": 1.7177, "step": 3273 }, { "epoch": 0.14310065999388086, "grad_norm": 2.328125, "learning_rate": 9.50573801830015e-05, "loss": 1.9437, "step": 3274 }, { "epoch": 0.14314436819791074, "grad_norm": 2.4375, "learning_rate": 9.50544021039314e-05, "loss": 1.9555, "step": 3275 }, { "epoch": 0.14318807640194064, "grad_norm": 2.609375, "learning_rate": 9.505142317461915e-05, "loss": 2.1584, "step": 3276 }, { "epoch": 0.14323178460597055, "grad_norm": 3.125, "learning_rate": 9.504844339512095e-05, "loss": 3.2088, "step": 3277 }, { "epoch": 0.14327549281000043, "grad_norm": 2.34375, "learning_rate": 9.504546276549309e-05, "loss": 1.9269, "step": 3278 }, { "epoch": 0.14331920101403034, "grad_norm": 2.71875, "learning_rate": 9.504248128579177e-05, "loss": 2.091, "step": 3279 }, { "epoch": 0.14336290921806022, "grad_norm": 2.4375, "learning_rate": 9.503949895607329e-05, "loss": 2.1584, "step": 3280 }, { "epoch": 0.14340661742209013, "grad_norm": 2.234375, "learning_rate": 9.50365157763939e-05, "loss": 1.7713, "step": 3281 }, { "epoch": 0.14345032562612003, "grad_norm": 2.8125, "learning_rate": 9.503353174680991e-05, "loss": 2.0846, "step": 3282 }, { "epoch": 0.14349403383014991, "grad_norm": 2.671875, "learning_rate": 9.503054686737763e-05, "loss": 2.443, "step": 3283 }, { "epoch": 0.14353774203417982, "grad_norm": 2.890625, "learning_rate": 9.502756113815338e-05, "loss": 2.3072, "step": 3284 }, { "epoch": 0.1435814502382097, "grad_norm": 2.28125, "learning_rate": 9.502457455919355e-05, "loss": 1.807, "step": 3285 }, { "epoch": 0.1436251584422396, "grad_norm": 2.703125, "learning_rate": 9.502158713055444e-05, "loss": 2.1623, "step": 3286 }, { "epoch": 0.14366886664626952, "grad_norm": 2.4375, "learning_rate": 9.501859885229248e-05, "loss": 1.5188, "step": 3287 }, { "epoch": 0.1437125748502994, "grad_norm": 2.703125, "learning_rate": 9.501560972446402e-05, "loss": 2.3192, "step": 3288 }, { "epoch": 0.1437562830543293, "grad_norm": 2.359375, "learning_rate": 9.501261974712548e-05, "loss": 1.9327, "step": 3289 }, { "epoch": 0.14379999125835918, "grad_norm": 2.359375, "learning_rate": 9.50096289203333e-05, "loss": 2.3821, "step": 3290 }, { "epoch": 0.1438436994623891, "grad_norm": 2.671875, "learning_rate": 9.500663724414392e-05, "loss": 1.9964, "step": 3291 }, { "epoch": 0.143887407666419, "grad_norm": 2.296875, "learning_rate": 9.500364471861378e-05, "loss": 1.928, "step": 3292 }, { "epoch": 0.14393111587044888, "grad_norm": 2.46875, "learning_rate": 9.500065134379939e-05, "loss": 2.0895, "step": 3293 }, { "epoch": 0.14397482407447879, "grad_norm": 3.375, "learning_rate": 9.49976571197572e-05, "loss": 1.7263, "step": 3294 }, { "epoch": 0.14401853227850867, "grad_norm": 3.0, "learning_rate": 9.499466204654372e-05, "loss": 2.0133, "step": 3295 }, { "epoch": 0.14406224048253857, "grad_norm": 2.5625, "learning_rate": 9.499166612421548e-05, "loss": 2.2996, "step": 3296 }, { "epoch": 0.14410594868656848, "grad_norm": 2.1875, "learning_rate": 9.498866935282902e-05, "loss": 2.0241, "step": 3297 }, { "epoch": 0.14414965689059836, "grad_norm": 2.125, "learning_rate": 9.49856717324409e-05, "loss": 2.0614, "step": 3298 }, { "epoch": 0.14419336509462827, "grad_norm": 2.59375, "learning_rate": 9.498267326310768e-05, "loss": 1.7679, "step": 3299 }, { "epoch": 0.14423707329865815, "grad_norm": 2.53125, "learning_rate": 9.497967394488594e-05, "loss": 2.318, "step": 3300 }, { "epoch": 0.14428078150268805, "grad_norm": 3.5625, "learning_rate": 9.497667377783228e-05, "loss": 2.6375, "step": 3301 }, { "epoch": 0.14432448970671796, "grad_norm": 6.15625, "learning_rate": 9.497367276200335e-05, "loss": 1.3413, "step": 3302 }, { "epoch": 0.14436819791074784, "grad_norm": 2.359375, "learning_rate": 9.497067089745572e-05, "loss": 2.1526, "step": 3303 }, { "epoch": 0.14441190611477775, "grad_norm": 2.8125, "learning_rate": 9.496766818424612e-05, "loss": 2.5314, "step": 3304 }, { "epoch": 0.14445561431880763, "grad_norm": 2.53125, "learning_rate": 9.496466462243115e-05, "loss": 2.562, "step": 3305 }, { "epoch": 0.14449932252283754, "grad_norm": 2.4375, "learning_rate": 9.496166021206753e-05, "loss": 1.9449, "step": 3306 }, { "epoch": 0.14454303072686744, "grad_norm": 2.140625, "learning_rate": 9.495865495321194e-05, "loss": 1.7941, "step": 3307 }, { "epoch": 0.14458673893089732, "grad_norm": 2.5625, "learning_rate": 9.495564884592109e-05, "loss": 3.1137, "step": 3308 }, { "epoch": 0.14463044713492723, "grad_norm": 2.25, "learning_rate": 9.495264189025172e-05, "loss": 1.586, "step": 3309 }, { "epoch": 0.1446741553389571, "grad_norm": 3.078125, "learning_rate": 9.494963408626056e-05, "loss": 1.9154, "step": 3310 }, { "epoch": 0.14471786354298702, "grad_norm": 2.453125, "learning_rate": 9.49466254340044e-05, "loss": 1.7515, "step": 3311 }, { "epoch": 0.14476157174701693, "grad_norm": 10.5, "learning_rate": 9.494361593354e-05, "loss": 6.3968, "step": 3312 }, { "epoch": 0.1448052799510468, "grad_norm": 2.625, "learning_rate": 9.494060558492415e-05, "loss": 2.0223, "step": 3313 }, { "epoch": 0.1448489881550767, "grad_norm": 2.515625, "learning_rate": 9.493759438821366e-05, "loss": 1.9596, "step": 3314 }, { "epoch": 0.1448926963591066, "grad_norm": 3.046875, "learning_rate": 9.493458234346537e-05, "loss": 1.9956, "step": 3315 }, { "epoch": 0.1449364045631365, "grad_norm": 2.875, "learning_rate": 9.493156945073611e-05, "loss": 2.5605, "step": 3316 }, { "epoch": 0.1449801127671664, "grad_norm": 2.984375, "learning_rate": 9.492855571008275e-05, "loss": 2.2362, "step": 3317 }, { "epoch": 0.1450238209711963, "grad_norm": 2.625, "learning_rate": 9.492554112156214e-05, "loss": 1.9502, "step": 3318 }, { "epoch": 0.1450675291752262, "grad_norm": 2.21875, "learning_rate": 9.492252568523117e-05, "loss": 1.599, "step": 3319 }, { "epoch": 0.14511123737925608, "grad_norm": 2.640625, "learning_rate": 9.491950940114678e-05, "loss": 1.7509, "step": 3320 }, { "epoch": 0.14515494558328598, "grad_norm": 2.84375, "learning_rate": 9.491649226936585e-05, "loss": 3.2139, "step": 3321 }, { "epoch": 0.1451986537873159, "grad_norm": 4.75, "learning_rate": 9.491347428994536e-05, "loss": 1.9854, "step": 3322 }, { "epoch": 0.14524236199134577, "grad_norm": 3.546875, "learning_rate": 9.491045546294223e-05, "loss": 2.3345, "step": 3323 }, { "epoch": 0.14528607019537568, "grad_norm": 2.390625, "learning_rate": 9.490743578841344e-05, "loss": 2.7734, "step": 3324 }, { "epoch": 0.14532977839940556, "grad_norm": 2.75, "learning_rate": 9.490441526641599e-05, "loss": 1.8867, "step": 3325 }, { "epoch": 0.14537348660343546, "grad_norm": 2.546875, "learning_rate": 9.490139389700685e-05, "loss": 2.8756, "step": 3326 }, { "epoch": 0.14541719480746537, "grad_norm": 2.140625, "learning_rate": 9.489837168024307e-05, "loss": 2.0287, "step": 3327 }, { "epoch": 0.14546090301149525, "grad_norm": 2.640625, "learning_rate": 9.489534861618166e-05, "loss": 2.3343, "step": 3328 }, { "epoch": 0.14550461121552516, "grad_norm": 2.765625, "learning_rate": 9.48923247048797e-05, "loss": 2.1087, "step": 3329 }, { "epoch": 0.14554831941955504, "grad_norm": 2.15625, "learning_rate": 9.488929994639421e-05, "loss": 2.1728, "step": 3330 }, { "epoch": 0.14559202762358495, "grad_norm": 2.5, "learning_rate": 9.488627434078232e-05, "loss": 2.136, "step": 3331 }, { "epoch": 0.14563573582761485, "grad_norm": 2.15625, "learning_rate": 9.488324788810108e-05, "loss": 2.0132, "step": 3332 }, { "epoch": 0.14567944403164473, "grad_norm": 2.5, "learning_rate": 9.488022058840765e-05, "loss": 1.7678, "step": 3333 }, { "epoch": 0.14572315223567464, "grad_norm": 2.4375, "learning_rate": 9.487719244175912e-05, "loss": 1.7634, "step": 3334 }, { "epoch": 0.14576686043970452, "grad_norm": 2.734375, "learning_rate": 9.487416344821267e-05, "loss": 1.8089, "step": 3335 }, { "epoch": 0.14581056864373443, "grad_norm": 2.390625, "learning_rate": 9.487113360782543e-05, "loss": 1.9985, "step": 3336 }, { "epoch": 0.14585427684776434, "grad_norm": 2.78125, "learning_rate": 9.48681029206546e-05, "loss": 1.8382, "step": 3337 }, { "epoch": 0.14589798505179422, "grad_norm": 2.875, "learning_rate": 9.486507138675735e-05, "loss": 2.4906, "step": 3338 }, { "epoch": 0.14594169325582412, "grad_norm": 2.671875, "learning_rate": 9.486203900619092e-05, "loss": 2.1798, "step": 3339 }, { "epoch": 0.145985401459854, "grad_norm": 2.609375, "learning_rate": 9.485900577901252e-05, "loss": 2.698, "step": 3340 }, { "epoch": 0.1460291096638839, "grad_norm": 2.25, "learning_rate": 9.485597170527939e-05, "loss": 2.2058, "step": 3341 }, { "epoch": 0.14607281786791382, "grad_norm": 3.546875, "learning_rate": 9.485293678504879e-05, "loss": 2.5278, "step": 3342 }, { "epoch": 0.1461165260719437, "grad_norm": 2.0625, "learning_rate": 9.484990101837799e-05, "loss": 1.7709, "step": 3343 }, { "epoch": 0.1461602342759736, "grad_norm": 2.25, "learning_rate": 9.484686440532429e-05, "loss": 1.9356, "step": 3344 }, { "epoch": 0.14620394248000348, "grad_norm": 3.25, "learning_rate": 9.484382694594498e-05, "loss": 1.9257, "step": 3345 }, { "epoch": 0.1462476506840334, "grad_norm": 2.28125, "learning_rate": 9.484078864029739e-05, "loss": 1.8498, "step": 3346 }, { "epoch": 0.1462913588880633, "grad_norm": 2.84375, "learning_rate": 9.483774948843884e-05, "loss": 2.0713, "step": 3347 }, { "epoch": 0.14633506709209318, "grad_norm": 2.65625, "learning_rate": 9.483470949042672e-05, "loss": 1.8746, "step": 3348 }, { "epoch": 0.1463787752961231, "grad_norm": 3.25, "learning_rate": 9.483166864631837e-05, "loss": 2.3901, "step": 3349 }, { "epoch": 0.14642248350015297, "grad_norm": 3.15625, "learning_rate": 9.482862695617119e-05, "loss": 2.7898, "step": 3350 }, { "epoch": 0.14646619170418287, "grad_norm": 2.1875, "learning_rate": 9.482558442004257e-05, "loss": 2.0902, "step": 3351 }, { "epoch": 0.14650989990821278, "grad_norm": 2.8125, "learning_rate": 9.482254103798993e-05, "loss": 1.9117, "step": 3352 }, { "epoch": 0.14655360811224266, "grad_norm": 3.1875, "learning_rate": 9.481949681007069e-05, "loss": 2.7748, "step": 3353 }, { "epoch": 0.14659731631627257, "grad_norm": 2.40625, "learning_rate": 9.481645173634234e-05, "loss": 1.8882, "step": 3354 }, { "epoch": 0.14664102452030245, "grad_norm": 2.65625, "learning_rate": 9.48134058168623e-05, "loss": 2.5552, "step": 3355 }, { "epoch": 0.14668473272433236, "grad_norm": 2.21875, "learning_rate": 9.481035905168808e-05, "loss": 1.67, "step": 3356 }, { "epoch": 0.14672844092836226, "grad_norm": 3.453125, "learning_rate": 9.480731144087716e-05, "loss": 1.8939, "step": 3357 }, { "epoch": 0.14677214913239214, "grad_norm": 2.296875, "learning_rate": 9.480426298448706e-05, "loss": 1.988, "step": 3358 }, { "epoch": 0.14681585733642205, "grad_norm": 2.453125, "learning_rate": 9.480121368257531e-05, "loss": 1.9197, "step": 3359 }, { "epoch": 0.14685956554045193, "grad_norm": 2.421875, "learning_rate": 9.479816353519946e-05, "loss": 1.9942, "step": 3360 }, { "epoch": 0.14690327374448184, "grad_norm": 3.015625, "learning_rate": 9.479511254241704e-05, "loss": 2.4083, "step": 3361 }, { "epoch": 0.14694698194851175, "grad_norm": 2.296875, "learning_rate": 9.479206070428568e-05, "loss": 2.3581, "step": 3362 }, { "epoch": 0.14699069015254163, "grad_norm": 2.40625, "learning_rate": 9.478900802086292e-05, "loss": 1.7141, "step": 3363 }, { "epoch": 0.14703439835657153, "grad_norm": 2.453125, "learning_rate": 9.478595449220639e-05, "loss": 1.9283, "step": 3364 }, { "epoch": 0.1470781065606014, "grad_norm": 2.125, "learning_rate": 9.478290011837375e-05, "loss": 1.9525, "step": 3365 }, { "epoch": 0.14712181476463132, "grad_norm": 2.25, "learning_rate": 9.477984489942258e-05, "loss": 2.0672, "step": 3366 }, { "epoch": 0.14716552296866123, "grad_norm": 2.15625, "learning_rate": 9.477678883541055e-05, "loss": 1.9176, "step": 3367 }, { "epoch": 0.1472092311726911, "grad_norm": 2.25, "learning_rate": 9.477373192639536e-05, "loss": 2.0645, "step": 3368 }, { "epoch": 0.14725293937672101, "grad_norm": 2.4375, "learning_rate": 9.477067417243468e-05, "loss": 1.8324, "step": 3369 }, { "epoch": 0.1472966475807509, "grad_norm": 4.5625, "learning_rate": 9.476761557358623e-05, "loss": 2.603, "step": 3370 }, { "epoch": 0.1473403557847808, "grad_norm": 3.234375, "learning_rate": 9.476455612990771e-05, "loss": 2.2431, "step": 3371 }, { "epoch": 0.1473840639888107, "grad_norm": 2.171875, "learning_rate": 9.476149584145687e-05, "loss": 1.86, "step": 3372 }, { "epoch": 0.1474277721928406, "grad_norm": 2.65625, "learning_rate": 9.475843470829145e-05, "loss": 1.998, "step": 3373 }, { "epoch": 0.1474714803968705, "grad_norm": 2.78125, "learning_rate": 9.475537273046922e-05, "loss": 2.1818, "step": 3374 }, { "epoch": 0.14751518860090038, "grad_norm": 2.359375, "learning_rate": 9.475230990804797e-05, "loss": 1.7166, "step": 3375 }, { "epoch": 0.14755889680493028, "grad_norm": 2.78125, "learning_rate": 9.47492462410855e-05, "loss": 1.8999, "step": 3376 }, { "epoch": 0.1476026050089602, "grad_norm": 2.046875, "learning_rate": 9.474618172963963e-05, "loss": 2.1685, "step": 3377 }, { "epoch": 0.14764631321299007, "grad_norm": 2.09375, "learning_rate": 9.474311637376818e-05, "loss": 1.6009, "step": 3378 }, { "epoch": 0.14769002141701998, "grad_norm": 3.0, "learning_rate": 9.474005017352899e-05, "loss": 2.7177, "step": 3379 }, { "epoch": 0.14773372962104986, "grad_norm": 2.328125, "learning_rate": 9.473698312897997e-05, "loss": 2.1018, "step": 3380 }, { "epoch": 0.14777743782507977, "grad_norm": 2.1875, "learning_rate": 9.473391524017894e-05, "loss": 2.058, "step": 3381 }, { "epoch": 0.14782114602910967, "grad_norm": 2.296875, "learning_rate": 9.473084650718382e-05, "loss": 2.4579, "step": 3382 }, { "epoch": 0.14786485423313955, "grad_norm": 2.40625, "learning_rate": 9.472777693005254e-05, "loss": 2.458, "step": 3383 }, { "epoch": 0.14790856243716946, "grad_norm": 2.5, "learning_rate": 9.4724706508843e-05, "loss": 2.1446, "step": 3384 }, { "epoch": 0.14795227064119934, "grad_norm": 2.171875, "learning_rate": 9.472163524361315e-05, "loss": 2.2017, "step": 3385 }, { "epoch": 0.14799597884522925, "grad_norm": 2.78125, "learning_rate": 9.471856313442098e-05, "loss": 2.6495, "step": 3386 }, { "epoch": 0.14803968704925916, "grad_norm": 2.265625, "learning_rate": 9.471549018132442e-05, "loss": 1.7599, "step": 3387 }, { "epoch": 0.14808339525328904, "grad_norm": 2.796875, "learning_rate": 9.471241638438148e-05, "loss": 2.5642, "step": 3388 }, { "epoch": 0.14812710345731894, "grad_norm": 2.578125, "learning_rate": 9.470934174365016e-05, "loss": 1.8783, "step": 3389 }, { "epoch": 0.14817081166134882, "grad_norm": 5.21875, "learning_rate": 9.470626625918851e-05, "loss": 2.3672, "step": 3390 }, { "epoch": 0.14821451986537873, "grad_norm": 3.6875, "learning_rate": 9.470318993105453e-05, "loss": 2.1336, "step": 3391 }, { "epoch": 0.14825822806940864, "grad_norm": 2.703125, "learning_rate": 9.47001127593063e-05, "loss": 1.8938, "step": 3392 }, { "epoch": 0.14830193627343852, "grad_norm": 2.578125, "learning_rate": 9.469703474400188e-05, "loss": 2.27, "step": 3393 }, { "epoch": 0.14834564447746842, "grad_norm": 3.28125, "learning_rate": 9.469395588519939e-05, "loss": 2.4541, "step": 3394 }, { "epoch": 0.1483893526814983, "grad_norm": 2.375, "learning_rate": 9.469087618295687e-05, "loss": 1.814, "step": 3395 }, { "epoch": 0.1484330608855282, "grad_norm": 2.125, "learning_rate": 9.468779563733248e-05, "loss": 1.6851, "step": 3396 }, { "epoch": 0.14847676908955812, "grad_norm": 5.375, "learning_rate": 9.468471424838434e-05, "loss": 1.4152, "step": 3397 }, { "epoch": 0.148520477293588, "grad_norm": 5.53125, "learning_rate": 9.468163201617062e-05, "loss": 1.3591, "step": 3398 }, { "epoch": 0.1485641854976179, "grad_norm": 3.03125, "learning_rate": 9.467854894074945e-05, "loss": 2.6621, "step": 3399 }, { "epoch": 0.1486078937016478, "grad_norm": 2.34375, "learning_rate": 9.467546502217907e-05, "loss": 1.9949, "step": 3400 }, { "epoch": 0.1486516019056777, "grad_norm": 2.546875, "learning_rate": 9.467238026051762e-05, "loss": 2.0779, "step": 3401 }, { "epoch": 0.1486953101097076, "grad_norm": 2.203125, "learning_rate": 9.466929465582335e-05, "loss": 1.8255, "step": 3402 }, { "epoch": 0.14873901831373748, "grad_norm": 2.875, "learning_rate": 9.466620820815446e-05, "loss": 2.032, "step": 3403 }, { "epoch": 0.1487827265177674, "grad_norm": 2.3125, "learning_rate": 9.466312091756922e-05, "loss": 2.1223, "step": 3404 }, { "epoch": 0.14882643472179727, "grad_norm": 2.1875, "learning_rate": 9.46600327841259e-05, "loss": 2.3961, "step": 3405 }, { "epoch": 0.14887014292582718, "grad_norm": 2.609375, "learning_rate": 9.465694380788274e-05, "loss": 2.0143, "step": 3406 }, { "epoch": 0.14891385112985708, "grad_norm": 2.484375, "learning_rate": 9.465385398889806e-05, "loss": 2.1623, "step": 3407 }, { "epoch": 0.14895755933388696, "grad_norm": 3.078125, "learning_rate": 9.465076332723017e-05, "loss": 2.1627, "step": 3408 }, { "epoch": 0.14900126753791687, "grad_norm": 2.921875, "learning_rate": 9.464767182293739e-05, "loss": 1.8762, "step": 3409 }, { "epoch": 0.14904497574194675, "grad_norm": 2.53125, "learning_rate": 9.464457947607805e-05, "loss": 2.4482, "step": 3410 }, { "epoch": 0.14908868394597666, "grad_norm": 2.40625, "learning_rate": 9.464148628671053e-05, "loss": 1.5772, "step": 3411 }, { "epoch": 0.14913239215000657, "grad_norm": 2.328125, "learning_rate": 9.46383922548932e-05, "loss": 2.1683, "step": 3412 }, { "epoch": 0.14917610035403644, "grad_norm": 2.09375, "learning_rate": 9.463529738068441e-05, "loss": 2.185, "step": 3413 }, { "epoch": 0.14921980855806635, "grad_norm": 2.796875, "learning_rate": 9.463220166414262e-05, "loss": 2.1126, "step": 3414 }, { "epoch": 0.14926351676209623, "grad_norm": 2.5, "learning_rate": 9.462910510532621e-05, "loss": 2.6235, "step": 3415 }, { "epoch": 0.14930722496612614, "grad_norm": 2.484375, "learning_rate": 9.462600770429364e-05, "loss": 1.4827, "step": 3416 }, { "epoch": 0.14935093317015605, "grad_norm": 2.1875, "learning_rate": 9.462290946110335e-05, "loss": 2.3322, "step": 3417 }, { "epoch": 0.14939464137418593, "grad_norm": 2.96875, "learning_rate": 9.461981037581383e-05, "loss": 2.0577, "step": 3418 }, { "epoch": 0.14943834957821583, "grad_norm": 2.359375, "learning_rate": 9.461671044848352e-05, "loss": 1.8846, "step": 3419 }, { "epoch": 0.14948205778224571, "grad_norm": 2.328125, "learning_rate": 9.461360967917098e-05, "loss": 1.9623, "step": 3420 }, { "epoch": 0.14952576598627562, "grad_norm": 2.609375, "learning_rate": 9.461050806793468e-05, "loss": 1.7624, "step": 3421 }, { "epoch": 0.14956947419030553, "grad_norm": 2.25, "learning_rate": 9.460740561483314e-05, "loss": 2.1842, "step": 3422 }, { "epoch": 0.1496131823943354, "grad_norm": 5.875, "learning_rate": 9.460430231992496e-05, "loss": 1.1398, "step": 3423 }, { "epoch": 0.14965689059836532, "grad_norm": 2.375, "learning_rate": 9.460119818326866e-05, "loss": 1.8948, "step": 3424 }, { "epoch": 0.1497005988023952, "grad_norm": 2.625, "learning_rate": 9.459809320492286e-05, "loss": 1.7911, "step": 3425 }, { "epoch": 0.1497443070064251, "grad_norm": 2.6875, "learning_rate": 9.459498738494613e-05, "loss": 2.3553, "step": 3426 }, { "epoch": 0.149788015210455, "grad_norm": 2.4375, "learning_rate": 9.459188072339706e-05, "loss": 1.7611, "step": 3427 }, { "epoch": 0.1498317234144849, "grad_norm": 2.734375, "learning_rate": 9.458877322033431e-05, "loss": 2.0372, "step": 3428 }, { "epoch": 0.1498754316185148, "grad_norm": 2.765625, "learning_rate": 9.458566487581653e-05, "loss": 1.9606, "step": 3429 }, { "epoch": 0.14991913982254468, "grad_norm": 2.265625, "learning_rate": 9.458255568990235e-05, "loss": 2.0393, "step": 3430 }, { "epoch": 0.14996284802657459, "grad_norm": 3.046875, "learning_rate": 9.457944566265045e-05, "loss": 2.1543, "step": 3431 }, { "epoch": 0.1500065562306045, "grad_norm": 2.390625, "learning_rate": 9.457633479411952e-05, "loss": 2.36, "step": 3432 }, { "epoch": 0.15005026443463437, "grad_norm": 2.4375, "learning_rate": 9.457322308436828e-05, "loss": 1.8532, "step": 3433 }, { "epoch": 0.15009397263866428, "grad_norm": 2.84375, "learning_rate": 9.457011053345547e-05, "loss": 2.2903, "step": 3434 }, { "epoch": 0.15013768084269416, "grad_norm": 2.53125, "learning_rate": 9.45669971414398e-05, "loss": 2.2657, "step": 3435 }, { "epoch": 0.15018138904672407, "grad_norm": 3.078125, "learning_rate": 9.456388290838e-05, "loss": 1.8505, "step": 3436 }, { "epoch": 0.15022509725075398, "grad_norm": 2.546875, "learning_rate": 9.45607678343349e-05, "loss": 1.9809, "step": 3437 }, { "epoch": 0.15026880545478385, "grad_norm": 2.8125, "learning_rate": 9.455765191936326e-05, "loss": 1.7168, "step": 3438 }, { "epoch": 0.15031251365881376, "grad_norm": 2.46875, "learning_rate": 9.455453516352385e-05, "loss": 2.7301, "step": 3439 }, { "epoch": 0.15035622186284364, "grad_norm": 2.15625, "learning_rate": 9.455141756687554e-05, "loss": 2.3094, "step": 3440 }, { "epoch": 0.15039993006687355, "grad_norm": 3.140625, "learning_rate": 9.454829912947712e-05, "loss": 1.5946, "step": 3441 }, { "epoch": 0.15044363827090346, "grad_norm": 2.421875, "learning_rate": 9.454517985138747e-05, "loss": 1.9504, "step": 3442 }, { "epoch": 0.15048734647493334, "grad_norm": 2.265625, "learning_rate": 9.454205973266543e-05, "loss": 2.2709, "step": 3443 }, { "epoch": 0.15053105467896324, "grad_norm": 2.828125, "learning_rate": 9.453893877336991e-05, "loss": 1.9691, "step": 3444 }, { "epoch": 0.15057476288299312, "grad_norm": 2.609375, "learning_rate": 9.453581697355978e-05, "loss": 2.2119, "step": 3445 }, { "epoch": 0.15061847108702303, "grad_norm": 2.078125, "learning_rate": 9.453269433329398e-05, "loss": 1.9717, "step": 3446 }, { "epoch": 0.15066217929105294, "grad_norm": 2.140625, "learning_rate": 9.452957085263142e-05, "loss": 1.919, "step": 3447 }, { "epoch": 0.15070588749508282, "grad_norm": 2.53125, "learning_rate": 9.452644653163104e-05, "loss": 2.4109, "step": 3448 }, { "epoch": 0.15074959569911273, "grad_norm": 2.46875, "learning_rate": 9.452332137035181e-05, "loss": 1.6877, "step": 3449 }, { "epoch": 0.15079330390314263, "grad_norm": 2.484375, "learning_rate": 9.452019536885271e-05, "loss": 2.4079, "step": 3450 }, { "epoch": 0.1508370121071725, "grad_norm": 2.203125, "learning_rate": 9.451706852719273e-05, "loss": 1.923, "step": 3451 }, { "epoch": 0.15088072031120242, "grad_norm": 2.15625, "learning_rate": 9.451394084543087e-05, "loss": 1.6903, "step": 3452 }, { "epoch": 0.1509244285152323, "grad_norm": 3.765625, "learning_rate": 9.451081232362616e-05, "loss": 1.7523, "step": 3453 }, { "epoch": 0.1509681367192622, "grad_norm": 2.59375, "learning_rate": 9.450768296183765e-05, "loss": 2.4434, "step": 3454 }, { "epoch": 0.15101184492329212, "grad_norm": 2.84375, "learning_rate": 9.450455276012435e-05, "loss": 1.9919, "step": 3455 }, { "epoch": 0.151055553127322, "grad_norm": 3.359375, "learning_rate": 9.45014217185454e-05, "loss": 3.1905, "step": 3456 }, { "epoch": 0.1510992613313519, "grad_norm": 2.25, "learning_rate": 9.449828983715985e-05, "loss": 1.9003, "step": 3457 }, { "epoch": 0.15114296953538178, "grad_norm": 2.140625, "learning_rate": 9.44951571160268e-05, "loss": 1.8811, "step": 3458 }, { "epoch": 0.1511866777394117, "grad_norm": 2.796875, "learning_rate": 9.449202355520537e-05, "loss": 2.2725, "step": 3459 }, { "epoch": 0.1512303859434416, "grad_norm": 2.734375, "learning_rate": 9.448888915475471e-05, "loss": 1.641, "step": 3460 }, { "epoch": 0.15127409414747148, "grad_norm": 4.875, "learning_rate": 9.448575391473396e-05, "loss": 1.6954, "step": 3461 }, { "epoch": 0.15131780235150138, "grad_norm": 2.109375, "learning_rate": 9.448261783520228e-05, "loss": 1.897, "step": 3462 }, { "epoch": 0.15136151055553126, "grad_norm": 2.828125, "learning_rate": 9.447948091621886e-05, "loss": 2.1477, "step": 3463 }, { "epoch": 0.15140521875956117, "grad_norm": 2.125, "learning_rate": 9.44763431578429e-05, "loss": 1.9178, "step": 3464 }, { "epoch": 0.15144892696359108, "grad_norm": 2.796875, "learning_rate": 9.447320456013362e-05, "loss": 2.9479, "step": 3465 }, { "epoch": 0.15149263516762096, "grad_norm": 3.09375, "learning_rate": 9.447006512315025e-05, "loss": 2.2641, "step": 3466 }, { "epoch": 0.15153634337165087, "grad_norm": 2.109375, "learning_rate": 9.4466924846952e-05, "loss": 1.8904, "step": 3467 }, { "epoch": 0.15158005157568075, "grad_norm": 2.546875, "learning_rate": 9.446378373159818e-05, "loss": 1.7885, "step": 3468 }, { "epoch": 0.15162375977971065, "grad_norm": 2.765625, "learning_rate": 9.446064177714804e-05, "loss": 3.0216, "step": 3469 }, { "epoch": 0.15166746798374056, "grad_norm": 2.265625, "learning_rate": 9.445749898366089e-05, "loss": 2.1373, "step": 3470 }, { "epoch": 0.15171117618777044, "grad_norm": 2.34375, "learning_rate": 9.445435535119602e-05, "loss": 1.9214, "step": 3471 }, { "epoch": 0.15175488439180035, "grad_norm": 3.96875, "learning_rate": 9.445121087981277e-05, "loss": 2.1844, "step": 3472 }, { "epoch": 0.15179859259583023, "grad_norm": 2.4375, "learning_rate": 9.444806556957047e-05, "loss": 1.8605, "step": 3473 }, { "epoch": 0.15184230079986014, "grad_norm": 2.09375, "learning_rate": 9.444491942052849e-05, "loss": 1.8039, "step": 3474 }, { "epoch": 0.15188600900389004, "grad_norm": 3.1875, "learning_rate": 9.444177243274618e-05, "loss": 2.4046, "step": 3475 }, { "epoch": 0.15192971720791992, "grad_norm": 2.515625, "learning_rate": 9.443862460628295e-05, "loss": 2.4799, "step": 3476 }, { "epoch": 0.15197342541194983, "grad_norm": 2.375, "learning_rate": 9.44354759411982e-05, "loss": 1.925, "step": 3477 }, { "epoch": 0.1520171336159797, "grad_norm": 2.46875, "learning_rate": 9.443232643755133e-05, "loss": 2.4786, "step": 3478 }, { "epoch": 0.15206084182000962, "grad_norm": 3.21875, "learning_rate": 9.442917609540181e-05, "loss": 1.7428, "step": 3479 }, { "epoch": 0.15210455002403953, "grad_norm": 3.09375, "learning_rate": 9.442602491480906e-05, "loss": 2.5866, "step": 3480 }, { "epoch": 0.1521482582280694, "grad_norm": 2.671875, "learning_rate": 9.442287289583259e-05, "loss": 1.6681, "step": 3481 }, { "epoch": 0.1521919664320993, "grad_norm": 2.546875, "learning_rate": 9.441972003853181e-05, "loss": 2.4017, "step": 3482 }, { "epoch": 0.1522356746361292, "grad_norm": 1.9921875, "learning_rate": 9.44165663429663e-05, "loss": 1.6264, "step": 3483 }, { "epoch": 0.1522793828401591, "grad_norm": 2.3125, "learning_rate": 9.441341180919551e-05, "loss": 1.6506, "step": 3484 }, { "epoch": 0.152323091044189, "grad_norm": 2.09375, "learning_rate": 9.4410256437279e-05, "loss": 1.826, "step": 3485 }, { "epoch": 0.1523667992482189, "grad_norm": 2.75, "learning_rate": 9.440710022727634e-05, "loss": 2.2919, "step": 3486 }, { "epoch": 0.1524105074522488, "grad_norm": 2.890625, "learning_rate": 9.440394317924706e-05, "loss": 1.9157, "step": 3487 }, { "epoch": 0.15245421565627867, "grad_norm": 2.515625, "learning_rate": 9.440078529325073e-05, "loss": 1.9245, "step": 3488 }, { "epoch": 0.15249792386030858, "grad_norm": 2.515625, "learning_rate": 9.439762656934698e-05, "loss": 1.9615, "step": 3489 }, { "epoch": 0.1525416320643385, "grad_norm": 2.28125, "learning_rate": 9.439446700759537e-05, "loss": 2.1101, "step": 3490 }, { "epoch": 0.15258534026836837, "grad_norm": 2.46875, "learning_rate": 9.439130660805558e-05, "loss": 2.4986, "step": 3491 }, { "epoch": 0.15262904847239828, "grad_norm": 2.359375, "learning_rate": 9.438814537078722e-05, "loss": 1.9813, "step": 3492 }, { "epoch": 0.15267275667642816, "grad_norm": 4.3125, "learning_rate": 9.438498329584995e-05, "loss": 1.785, "step": 3493 }, { "epoch": 0.15271646488045806, "grad_norm": 2.125, "learning_rate": 9.438182038330345e-05, "loss": 2.1659, "step": 3494 }, { "epoch": 0.15276017308448797, "grad_norm": 2.234375, "learning_rate": 9.43786566332074e-05, "loss": 1.9646, "step": 3495 }, { "epoch": 0.15280388128851785, "grad_norm": 2.40625, "learning_rate": 9.437549204562151e-05, "loss": 1.9267, "step": 3496 }, { "epoch": 0.15284758949254776, "grad_norm": 2.734375, "learning_rate": 9.43723266206055e-05, "loss": 1.9642, "step": 3497 }, { "epoch": 0.15289129769657764, "grad_norm": 2.0, "learning_rate": 9.43691603582191e-05, "loss": 1.8211, "step": 3498 }, { "epoch": 0.15293500590060755, "grad_norm": 2.515625, "learning_rate": 9.436599325852208e-05, "loss": 1.7812, "step": 3499 }, { "epoch": 0.15297871410463745, "grad_norm": 2.21875, "learning_rate": 9.436282532157419e-05, "loss": 1.7518, "step": 3500 }, { "epoch": 0.15302242230866733, "grad_norm": 2.9375, "learning_rate": 9.435965654743522e-05, "loss": 1.9779, "step": 3501 }, { "epoch": 0.15306613051269724, "grad_norm": 19.5, "learning_rate": 9.435648693616496e-05, "loss": 0.4233, "step": 3502 }, { "epoch": 0.15310983871672712, "grad_norm": 2.40625, "learning_rate": 9.435331648782324e-05, "loss": 2.0467, "step": 3503 }, { "epoch": 0.15315354692075703, "grad_norm": 2.609375, "learning_rate": 9.43501452024699e-05, "loss": 1.829, "step": 3504 }, { "epoch": 0.15319725512478694, "grad_norm": 2.453125, "learning_rate": 9.434697308016475e-05, "loss": 2.502, "step": 3505 }, { "epoch": 0.15324096332881681, "grad_norm": 2.8125, "learning_rate": 9.434380012096768e-05, "loss": 2.0048, "step": 3506 }, { "epoch": 0.15328467153284672, "grad_norm": 3.640625, "learning_rate": 9.434062632493856e-05, "loss": 1.4008, "step": 3507 }, { "epoch": 0.1533283797368766, "grad_norm": 3.109375, "learning_rate": 9.43374516921373e-05, "loss": 2.3127, "step": 3508 }, { "epoch": 0.1533720879409065, "grad_norm": 3.359375, "learning_rate": 9.433427622262379e-05, "loss": 2.145, "step": 3509 }, { "epoch": 0.15341579614493642, "grad_norm": 2.375, "learning_rate": 9.433109991645795e-05, "loss": 1.7612, "step": 3510 }, { "epoch": 0.1534595043489663, "grad_norm": 3.234375, "learning_rate": 9.432792277369974e-05, "loss": 2.0052, "step": 3511 }, { "epoch": 0.1535032125529962, "grad_norm": 2.390625, "learning_rate": 9.432474479440912e-05, "loss": 1.7988, "step": 3512 }, { "epoch": 0.15354692075702608, "grad_norm": 4.25, "learning_rate": 9.432156597864604e-05, "loss": 1.9816, "step": 3513 }, { "epoch": 0.153590628961056, "grad_norm": 2.734375, "learning_rate": 9.431838632647052e-05, "loss": 1.6767, "step": 3514 }, { "epoch": 0.1536343371650859, "grad_norm": 2.046875, "learning_rate": 9.431520583794254e-05, "loss": 1.8451, "step": 3515 }, { "epoch": 0.15367804536911578, "grad_norm": 2.484375, "learning_rate": 9.431202451312211e-05, "loss": 2.6531, "step": 3516 }, { "epoch": 0.1537217535731457, "grad_norm": 2.546875, "learning_rate": 9.43088423520693e-05, "loss": 2.4231, "step": 3517 }, { "epoch": 0.15376546177717557, "grad_norm": 3.359375, "learning_rate": 9.430565935484416e-05, "loss": 2.4119, "step": 3518 }, { "epoch": 0.15380916998120547, "grad_norm": 2.390625, "learning_rate": 9.430247552150673e-05, "loss": 2.4021, "step": 3519 }, { "epoch": 0.15385287818523538, "grad_norm": 4.6875, "learning_rate": 9.42992908521171e-05, "loss": 1.9986, "step": 3520 }, { "epoch": 0.15389658638926526, "grad_norm": 2.1875, "learning_rate": 9.429610534673538e-05, "loss": 1.8919, "step": 3521 }, { "epoch": 0.15394029459329517, "grad_norm": 2.171875, "learning_rate": 9.42929190054217e-05, "loss": 1.787, "step": 3522 }, { "epoch": 0.15398400279732505, "grad_norm": 3.015625, "learning_rate": 9.428973182823616e-05, "loss": 2.095, "step": 3523 }, { "epoch": 0.15402771100135496, "grad_norm": 4.53125, "learning_rate": 9.428654381523892e-05, "loss": 1.0479, "step": 3524 }, { "epoch": 0.15407141920538486, "grad_norm": 3.5, "learning_rate": 9.428335496649014e-05, "loss": 2.9287, "step": 3525 }, { "epoch": 0.15411512740941474, "grad_norm": 2.515625, "learning_rate": 9.428016528205001e-05, "loss": 2.4471, "step": 3526 }, { "epoch": 0.15415883561344465, "grad_norm": 2.8125, "learning_rate": 9.42769747619787e-05, "loss": 1.9132, "step": 3527 }, { "epoch": 0.15420254381747453, "grad_norm": 2.46875, "learning_rate": 9.427378340633645e-05, "loss": 1.7993, "step": 3528 }, { "epoch": 0.15424625202150444, "grad_norm": 3.859375, "learning_rate": 9.427059121518346e-05, "loss": 2.7769, "step": 3529 }, { "epoch": 0.15428996022553434, "grad_norm": 3.625, "learning_rate": 9.426739818857998e-05, "loss": 1.8669, "step": 3530 }, { "epoch": 0.15433366842956422, "grad_norm": 2.84375, "learning_rate": 9.426420432658627e-05, "loss": 2.2612, "step": 3531 }, { "epoch": 0.15437737663359413, "grad_norm": 2.796875, "learning_rate": 9.426100962926261e-05, "loss": 1.631, "step": 3532 }, { "epoch": 0.154421084837624, "grad_norm": 2.3125, "learning_rate": 9.425781409666926e-05, "loss": 1.6795, "step": 3533 }, { "epoch": 0.15446479304165392, "grad_norm": 2.984375, "learning_rate": 9.425461772886656e-05, "loss": 3.0124, "step": 3534 }, { "epoch": 0.15450850124568383, "grad_norm": 2.6875, "learning_rate": 9.42514205259148e-05, "loss": 1.6248, "step": 3535 }, { "epoch": 0.1545522094497137, "grad_norm": 3.015625, "learning_rate": 9.424822248787435e-05, "loss": 2.2115, "step": 3536 }, { "epoch": 0.15459591765374361, "grad_norm": 2.375, "learning_rate": 9.424502361480552e-05, "loss": 2.0061, "step": 3537 }, { "epoch": 0.1546396258577735, "grad_norm": 2.265625, "learning_rate": 9.424182390676872e-05, "loss": 1.5383, "step": 3538 }, { "epoch": 0.1546833340618034, "grad_norm": 2.421875, "learning_rate": 9.423862336382429e-05, "loss": 1.6646, "step": 3539 }, { "epoch": 0.1547270422658333, "grad_norm": 2.015625, "learning_rate": 9.423542198603267e-05, "loss": 1.7199, "step": 3540 }, { "epoch": 0.1547707504698632, "grad_norm": 2.40625, "learning_rate": 9.423221977345424e-05, "loss": 1.8018, "step": 3541 }, { "epoch": 0.1548144586738931, "grad_norm": 2.734375, "learning_rate": 9.422901672614946e-05, "loss": 2.5242, "step": 3542 }, { "epoch": 0.15485816687792298, "grad_norm": 2.875, "learning_rate": 9.422581284417875e-05, "loss": 2.0725, "step": 3543 }, { "epoch": 0.15490187508195288, "grad_norm": 2.640625, "learning_rate": 9.422260812760259e-05, "loss": 2.1044, "step": 3544 }, { "epoch": 0.1549455832859828, "grad_norm": 3.109375, "learning_rate": 9.421940257648146e-05, "loss": 1.7116, "step": 3545 }, { "epoch": 0.15498929149001267, "grad_norm": 2.203125, "learning_rate": 9.421619619087582e-05, "loss": 1.9174, "step": 3546 }, { "epoch": 0.15503299969404258, "grad_norm": 2.09375, "learning_rate": 9.421298897084623e-05, "loss": 1.6679, "step": 3547 }, { "epoch": 0.15507670789807246, "grad_norm": 3.25, "learning_rate": 9.420978091645318e-05, "loss": 2.2195, "step": 3548 }, { "epoch": 0.15512041610210237, "grad_norm": 2.296875, "learning_rate": 9.420657202775722e-05, "loss": 1.989, "step": 3549 }, { "epoch": 0.15516412430613227, "grad_norm": 2.5625, "learning_rate": 9.42033623048189e-05, "loss": 1.9881, "step": 3550 }, { "epoch": 0.15520783251016215, "grad_norm": 3.140625, "learning_rate": 9.420015174769881e-05, "loss": 2.7604, "step": 3551 }, { "epoch": 0.15525154071419206, "grad_norm": 2.375, "learning_rate": 9.419694035645751e-05, "loss": 1.9209, "step": 3552 }, { "epoch": 0.15529524891822194, "grad_norm": 7.90625, "learning_rate": 9.419372813115563e-05, "loss": 2.8501, "step": 3553 }, { "epoch": 0.15533895712225185, "grad_norm": 2.421875, "learning_rate": 9.419051507185378e-05, "loss": 2.2907, "step": 3554 }, { "epoch": 0.15538266532628175, "grad_norm": 2.53125, "learning_rate": 9.418730117861259e-05, "loss": 2.0245, "step": 3555 }, { "epoch": 0.15542637353031163, "grad_norm": 2.34375, "learning_rate": 9.418408645149273e-05, "loss": 1.9516, "step": 3556 }, { "epoch": 0.15547008173434154, "grad_norm": 3.125, "learning_rate": 9.418087089055484e-05, "loss": 2.2403, "step": 3557 }, { "epoch": 0.15551378993837142, "grad_norm": 2.375, "learning_rate": 9.417765449585961e-05, "loss": 2.1481, "step": 3558 }, { "epoch": 0.15555749814240133, "grad_norm": 2.234375, "learning_rate": 9.417443726746776e-05, "loss": 2.1226, "step": 3559 }, { "epoch": 0.15560120634643124, "grad_norm": 2.203125, "learning_rate": 9.417121920543996e-05, "loss": 2.2953, "step": 3560 }, { "epoch": 0.15564491455046112, "grad_norm": 3.515625, "learning_rate": 9.416800030983699e-05, "loss": 1.9059, "step": 3561 }, { "epoch": 0.15568862275449102, "grad_norm": 2.796875, "learning_rate": 9.416478058071956e-05, "loss": 2.1399, "step": 3562 }, { "epoch": 0.1557323309585209, "grad_norm": 2.1875, "learning_rate": 9.416156001814843e-05, "loss": 2.1246, "step": 3563 }, { "epoch": 0.1557760391625508, "grad_norm": 2.65625, "learning_rate": 9.41583386221844e-05, "loss": 1.8416, "step": 3564 }, { "epoch": 0.15581974736658072, "grad_norm": 2.984375, "learning_rate": 9.415511639288826e-05, "loss": 2.3951, "step": 3565 }, { "epoch": 0.1558634555706106, "grad_norm": 2.921875, "learning_rate": 9.41518933303208e-05, "loss": 2.3691, "step": 3566 }, { "epoch": 0.1559071637746405, "grad_norm": 2.609375, "learning_rate": 9.414866943454284e-05, "loss": 2.3471, "step": 3567 }, { "epoch": 0.15595087197867039, "grad_norm": 3.421875, "learning_rate": 9.414544470561524e-05, "loss": 2.5009, "step": 3568 }, { "epoch": 0.1559945801827003, "grad_norm": 2.640625, "learning_rate": 9.414221914359886e-05, "loss": 2.6902, "step": 3569 }, { "epoch": 0.1560382883867302, "grad_norm": 3.34375, "learning_rate": 9.413899274855454e-05, "loss": 2.5366, "step": 3570 }, { "epoch": 0.15608199659076008, "grad_norm": 2.375, "learning_rate": 9.41357655205432e-05, "loss": 1.9691, "step": 3571 }, { "epoch": 0.15612570479479, "grad_norm": 4.6875, "learning_rate": 9.413253745962573e-05, "loss": 2.2237, "step": 3572 }, { "epoch": 0.15616941299881987, "grad_norm": 2.125, "learning_rate": 9.412930856586304e-05, "loss": 2.151, "step": 3573 }, { "epoch": 0.15621312120284978, "grad_norm": 2.34375, "learning_rate": 9.412607883931607e-05, "loss": 1.9646, "step": 3574 }, { "epoch": 0.15625682940687968, "grad_norm": 2.71875, "learning_rate": 9.412284828004577e-05, "loss": 1.9428, "step": 3575 }, { "epoch": 0.15630053761090956, "grad_norm": 2.578125, "learning_rate": 9.41196168881131e-05, "loss": 2.2769, "step": 3576 }, { "epoch": 0.15634424581493947, "grad_norm": 2.078125, "learning_rate": 9.411638466357906e-05, "loss": 1.6582, "step": 3577 }, { "epoch": 0.15638795401896935, "grad_norm": 2.03125, "learning_rate": 9.411315160650462e-05, "loss": 1.6979, "step": 3578 }, { "epoch": 0.15643166222299926, "grad_norm": 2.1875, "learning_rate": 9.410991771695082e-05, "loss": 2.0957, "step": 3579 }, { "epoch": 0.15647537042702916, "grad_norm": 2.9375, "learning_rate": 9.410668299497864e-05, "loss": 1.7496, "step": 3580 }, { "epoch": 0.15651907863105904, "grad_norm": 2.25, "learning_rate": 9.410344744064919e-05, "loss": 1.6589, "step": 3581 }, { "epoch": 0.15656278683508895, "grad_norm": 3.140625, "learning_rate": 9.410021105402348e-05, "loss": 3.1007, "step": 3582 }, { "epoch": 0.15660649503911883, "grad_norm": 2.4375, "learning_rate": 9.409697383516263e-05, "loss": 2.4073, "step": 3583 }, { "epoch": 0.15665020324314874, "grad_norm": 2.09375, "learning_rate": 9.409373578412769e-05, "loss": 2.0086, "step": 3584 }, { "epoch": 0.15669391144717865, "grad_norm": 2.78125, "learning_rate": 9.409049690097977e-05, "loss": 1.5228, "step": 3585 }, { "epoch": 0.15673761965120853, "grad_norm": 3.0, "learning_rate": 9.408725718578e-05, "loss": 2.3846, "step": 3586 }, { "epoch": 0.15678132785523843, "grad_norm": 2.859375, "learning_rate": 9.408401663858953e-05, "loss": 1.5352, "step": 3587 }, { "epoch": 0.1568250360592683, "grad_norm": 2.328125, "learning_rate": 9.408077525946952e-05, "loss": 2.0641, "step": 3588 }, { "epoch": 0.15686874426329822, "grad_norm": 2.375, "learning_rate": 9.40775330484811e-05, "loss": 2.0999, "step": 3589 }, { "epoch": 0.15691245246732813, "grad_norm": 2.09375, "learning_rate": 9.407429000568549e-05, "loss": 1.557, "step": 3590 }, { "epoch": 0.156956160671358, "grad_norm": 2.140625, "learning_rate": 9.407104613114388e-05, "loss": 1.735, "step": 3591 }, { "epoch": 0.15699986887538792, "grad_norm": 2.484375, "learning_rate": 9.406780142491748e-05, "loss": 1.8904, "step": 3592 }, { "epoch": 0.1570435770794178, "grad_norm": 11.9375, "learning_rate": 9.406455588706752e-05, "loss": 2.161, "step": 3593 }, { "epoch": 0.1570872852834477, "grad_norm": 2.640625, "learning_rate": 9.406130951765529e-05, "loss": 2.2186, "step": 3594 }, { "epoch": 0.1571309934874776, "grad_norm": 2.171875, "learning_rate": 9.405806231674202e-05, "loss": 1.8745, "step": 3595 }, { "epoch": 0.1571747016915075, "grad_norm": 2.25, "learning_rate": 9.405481428438896e-05, "loss": 1.8984, "step": 3596 }, { "epoch": 0.1572184098955374, "grad_norm": 2.890625, "learning_rate": 9.405156542065745e-05, "loss": 1.8932, "step": 3597 }, { "epoch": 0.15726211809956728, "grad_norm": 2.15625, "learning_rate": 9.404831572560879e-05, "loss": 2.3254, "step": 3598 }, { "epoch": 0.15730582630359718, "grad_norm": 2.71875, "learning_rate": 9.40450651993043e-05, "loss": 2.3937, "step": 3599 }, { "epoch": 0.1573495345076271, "grad_norm": 2.375, "learning_rate": 9.404181384180532e-05, "loss": 2.4118, "step": 3600 }, { "epoch": 0.15739324271165697, "grad_norm": 2.65625, "learning_rate": 9.403856165317321e-05, "loss": 2.095, "step": 3601 }, { "epoch": 0.15743695091568688, "grad_norm": 2.578125, "learning_rate": 9.403530863346937e-05, "loss": 2.1934, "step": 3602 }, { "epoch": 0.15748065911971676, "grad_norm": 2.875, "learning_rate": 9.403205478275514e-05, "loss": 1.9076, "step": 3603 }, { "epoch": 0.15752436732374667, "grad_norm": 2.4375, "learning_rate": 9.402880010109196e-05, "loss": 2.0982, "step": 3604 }, { "epoch": 0.15756807552777657, "grad_norm": 2.359375, "learning_rate": 9.402554458854125e-05, "loss": 2.1694, "step": 3605 }, { "epoch": 0.15761178373180645, "grad_norm": 2.609375, "learning_rate": 9.402228824516442e-05, "loss": 2.0995, "step": 3606 }, { "epoch": 0.15765549193583636, "grad_norm": 2.546875, "learning_rate": 9.401903107102296e-05, "loss": 2.2792, "step": 3607 }, { "epoch": 0.15769920013986624, "grad_norm": 2.421875, "learning_rate": 9.40157730661783e-05, "loss": 1.9848, "step": 3608 }, { "epoch": 0.15774290834389615, "grad_norm": 2.453125, "learning_rate": 9.401251423069194e-05, "loss": 1.9047, "step": 3609 }, { "epoch": 0.15778661654792606, "grad_norm": 2.21875, "learning_rate": 9.400925456462539e-05, "loss": 1.9244, "step": 3610 }, { "epoch": 0.15783032475195594, "grad_norm": 3.375, "learning_rate": 9.400599406804016e-05, "loss": 2.0467, "step": 3611 }, { "epoch": 0.15787403295598584, "grad_norm": 2.453125, "learning_rate": 9.400273274099776e-05, "loss": 1.9429, "step": 3612 }, { "epoch": 0.15791774116001572, "grad_norm": 2.34375, "learning_rate": 9.399947058355976e-05, "loss": 1.8686, "step": 3613 }, { "epoch": 0.15796144936404563, "grad_norm": 2.328125, "learning_rate": 9.399620759578769e-05, "loss": 1.9697, "step": 3614 }, { "epoch": 0.15800515756807554, "grad_norm": 2.921875, "learning_rate": 9.399294377774318e-05, "loss": 2.0929, "step": 3615 }, { "epoch": 0.15804886577210542, "grad_norm": 2.703125, "learning_rate": 9.398967912948778e-05, "loss": 2.5104, "step": 3616 }, { "epoch": 0.15809257397613533, "grad_norm": 2.296875, "learning_rate": 9.398641365108309e-05, "loss": 2.3617, "step": 3617 }, { "epoch": 0.1581362821801652, "grad_norm": 2.71875, "learning_rate": 9.398314734259078e-05, "loss": 2.0578, "step": 3618 }, { "epoch": 0.1581799903841951, "grad_norm": 6.15625, "learning_rate": 9.397988020407246e-05, "loss": 2.7893, "step": 3619 }, { "epoch": 0.15822369858822502, "grad_norm": 2.34375, "learning_rate": 9.397661223558979e-05, "loss": 1.8977, "step": 3620 }, { "epoch": 0.1582674067922549, "grad_norm": 2.46875, "learning_rate": 9.397334343720445e-05, "loss": 1.6987, "step": 3621 }, { "epoch": 0.1583111149962848, "grad_norm": 3.0, "learning_rate": 9.39700738089781e-05, "loss": 2.5893, "step": 3622 }, { "epoch": 0.1583548232003147, "grad_norm": 2.25, "learning_rate": 9.396680335097247e-05, "loss": 1.7377, "step": 3623 }, { "epoch": 0.1583985314043446, "grad_norm": 2.59375, "learning_rate": 9.396353206324929e-05, "loss": 2.4146, "step": 3624 }, { "epoch": 0.1584422396083745, "grad_norm": 2.046875, "learning_rate": 9.396025994587024e-05, "loss": 1.7168, "step": 3625 }, { "epoch": 0.15848594781240438, "grad_norm": 2.1875, "learning_rate": 9.395698699889713e-05, "loss": 1.5608, "step": 3626 }, { "epoch": 0.1585296560164343, "grad_norm": 2.09375, "learning_rate": 9.395371322239168e-05, "loss": 1.6012, "step": 3627 }, { "epoch": 0.15857336422046417, "grad_norm": 2.421875, "learning_rate": 9.395043861641571e-05, "loss": 2.1641, "step": 3628 }, { "epoch": 0.15861707242449408, "grad_norm": 2.78125, "learning_rate": 9.394716318103098e-05, "loss": 2.3077, "step": 3629 }, { "epoch": 0.15866078062852398, "grad_norm": 2.515625, "learning_rate": 9.394388691629932e-05, "loss": 1.7936, "step": 3630 }, { "epoch": 0.15870448883255386, "grad_norm": 3.484375, "learning_rate": 9.394060982228257e-05, "loss": 1.8558, "step": 3631 }, { "epoch": 0.15874819703658377, "grad_norm": 3.140625, "learning_rate": 9.393733189904254e-05, "loss": 1.8328, "step": 3632 }, { "epoch": 0.15879190524061365, "grad_norm": 2.265625, "learning_rate": 9.393405314664113e-05, "loss": 2.028, "step": 3633 }, { "epoch": 0.15883561344464356, "grad_norm": 2.484375, "learning_rate": 9.393077356514018e-05, "loss": 1.8092, "step": 3634 }, { "epoch": 0.15887932164867347, "grad_norm": 2.453125, "learning_rate": 9.392749315460161e-05, "loss": 1.8547, "step": 3635 }, { "epoch": 0.15892302985270335, "grad_norm": 2.59375, "learning_rate": 9.392421191508729e-05, "loss": 1.7204, "step": 3636 }, { "epoch": 0.15896673805673325, "grad_norm": 2.65625, "learning_rate": 9.392092984665918e-05, "loss": 3.101, "step": 3637 }, { "epoch": 0.15901044626076313, "grad_norm": 2.546875, "learning_rate": 9.391764694937919e-05, "loss": 2.1922, "step": 3638 }, { "epoch": 0.15905415446479304, "grad_norm": 3.0625, "learning_rate": 9.391436322330928e-05, "loss": 1.7696, "step": 3639 }, { "epoch": 0.15909786266882295, "grad_norm": 3.390625, "learning_rate": 9.391107866851143e-05, "loss": 2.4794, "step": 3640 }, { "epoch": 0.15914157087285283, "grad_norm": 2.140625, "learning_rate": 9.390779328504762e-05, "loss": 2.1426, "step": 3641 }, { "epoch": 0.15918527907688274, "grad_norm": 2.359375, "learning_rate": 9.390450707297984e-05, "loss": 1.9995, "step": 3642 }, { "epoch": 0.15922898728091261, "grad_norm": 2.484375, "learning_rate": 9.39012200323701e-05, "loss": 2.6311, "step": 3643 }, { "epoch": 0.15927269548494252, "grad_norm": 2.0625, "learning_rate": 9.389793216328047e-05, "loss": 1.9958, "step": 3644 }, { "epoch": 0.15931640368897243, "grad_norm": 2.03125, "learning_rate": 9.389464346577295e-05, "loss": 1.8677, "step": 3645 }, { "epoch": 0.1593601118930023, "grad_norm": 3.53125, "learning_rate": 9.389135393990962e-05, "loss": 2.6298, "step": 3646 }, { "epoch": 0.15940382009703222, "grad_norm": 3.296875, "learning_rate": 9.388806358575256e-05, "loss": 2.1751, "step": 3647 }, { "epoch": 0.1594475283010621, "grad_norm": 3.546875, "learning_rate": 9.388477240336387e-05, "loss": 2.0166, "step": 3648 }, { "epoch": 0.159491236505092, "grad_norm": 3.140625, "learning_rate": 9.388148039280566e-05, "loss": 2.4338, "step": 3649 }, { "epoch": 0.1595349447091219, "grad_norm": 2.65625, "learning_rate": 9.387818755414004e-05, "loss": 1.426, "step": 3650 }, { "epoch": 0.1595786529131518, "grad_norm": 2.25, "learning_rate": 9.387489388742917e-05, "loss": 1.8721, "step": 3651 }, { "epoch": 0.1596223611171817, "grad_norm": 2.21875, "learning_rate": 9.387159939273518e-05, "loss": 1.9787, "step": 3652 }, { "epoch": 0.15966606932121158, "grad_norm": 3.4375, "learning_rate": 9.386830407012026e-05, "loss": 2.2221, "step": 3653 }, { "epoch": 0.1597097775252415, "grad_norm": 4.90625, "learning_rate": 9.386500791964661e-05, "loss": 2.0422, "step": 3654 }, { "epoch": 0.1597534857292714, "grad_norm": 3.140625, "learning_rate": 9.38617109413764e-05, "loss": 2.3686, "step": 3655 }, { "epoch": 0.15979719393330127, "grad_norm": 3.546875, "learning_rate": 9.385841313537187e-05, "loss": 2.6774, "step": 3656 }, { "epoch": 0.15984090213733118, "grad_norm": 12.375, "learning_rate": 9.385511450169525e-05, "loss": 2.3355, "step": 3657 }, { "epoch": 0.15988461034136106, "grad_norm": 2.671875, "learning_rate": 9.385181504040881e-05, "loss": 2.1482, "step": 3658 }, { "epoch": 0.15992831854539097, "grad_norm": 2.375, "learning_rate": 9.384851475157477e-05, "loss": 2.1601, "step": 3659 }, { "epoch": 0.15997202674942088, "grad_norm": 2.453125, "learning_rate": 9.384521363525546e-05, "loss": 2.2922, "step": 3660 }, { "epoch": 0.16001573495345076, "grad_norm": 2.671875, "learning_rate": 9.384191169151313e-05, "loss": 2.2924, "step": 3661 }, { "epoch": 0.16005944315748066, "grad_norm": 2.6875, "learning_rate": 9.383860892041014e-05, "loss": 2.3593, "step": 3662 }, { "epoch": 0.16010315136151054, "grad_norm": 2.234375, "learning_rate": 9.383530532200879e-05, "loss": 2.0018, "step": 3663 }, { "epoch": 0.16014685956554045, "grad_norm": 2.671875, "learning_rate": 9.383200089637143e-05, "loss": 1.9835, "step": 3664 }, { "epoch": 0.16019056776957036, "grad_norm": 2.4375, "learning_rate": 9.382869564356043e-05, "loss": 2.4303, "step": 3665 }, { "epoch": 0.16023427597360024, "grad_norm": 2.578125, "learning_rate": 9.382538956363813e-05, "loss": 2.014, "step": 3666 }, { "epoch": 0.16027798417763015, "grad_norm": 2.125, "learning_rate": 9.382208265666695e-05, "loss": 1.8586, "step": 3667 }, { "epoch": 0.16032169238166002, "grad_norm": 2.234375, "learning_rate": 9.38187749227093e-05, "loss": 2.2283, "step": 3668 }, { "epoch": 0.16036540058568993, "grad_norm": 3.3125, "learning_rate": 9.381546636182758e-05, "loss": 2.8335, "step": 3669 }, { "epoch": 0.16040910878971984, "grad_norm": 3.5625, "learning_rate": 9.381215697408426e-05, "loss": 1.6542, "step": 3670 }, { "epoch": 0.16045281699374972, "grad_norm": 2.90625, "learning_rate": 9.380884675954176e-05, "loss": 2.2729, "step": 3671 }, { "epoch": 0.16049652519777963, "grad_norm": 2.390625, "learning_rate": 9.380553571826256e-05, "loss": 1.8695, "step": 3672 }, { "epoch": 0.1605402334018095, "grad_norm": 2.3125, "learning_rate": 9.380222385030915e-05, "loss": 1.9721, "step": 3673 }, { "epoch": 0.16058394160583941, "grad_norm": 2.25, "learning_rate": 9.379891115574402e-05, "loss": 2.1319, "step": 3674 }, { "epoch": 0.16062764980986932, "grad_norm": 2.453125, "learning_rate": 9.379559763462968e-05, "loss": 2.5423, "step": 3675 }, { "epoch": 0.1606713580138992, "grad_norm": 2.46875, "learning_rate": 9.379228328702868e-05, "loss": 1.8878, "step": 3676 }, { "epoch": 0.1607150662179291, "grad_norm": 2.59375, "learning_rate": 9.378896811300356e-05, "loss": 2.2672, "step": 3677 }, { "epoch": 0.160758774421959, "grad_norm": 2.671875, "learning_rate": 9.378565211261687e-05, "loss": 1.9762, "step": 3678 }, { "epoch": 0.1608024826259889, "grad_norm": 2.328125, "learning_rate": 9.378233528593121e-05, "loss": 2.1997, "step": 3679 }, { "epoch": 0.1608461908300188, "grad_norm": 2.375, "learning_rate": 9.377901763300916e-05, "loss": 1.9741, "step": 3680 }, { "epoch": 0.16088989903404868, "grad_norm": 2.953125, "learning_rate": 9.377569915391333e-05, "loss": 2.4164, "step": 3681 }, { "epoch": 0.1609336072380786, "grad_norm": 2.40625, "learning_rate": 9.377237984870634e-05, "loss": 2.2117, "step": 3682 }, { "epoch": 0.16097731544210847, "grad_norm": 2.59375, "learning_rate": 9.376905971745085e-05, "loss": 2.0477, "step": 3683 }, { "epoch": 0.16102102364613838, "grad_norm": 2.515625, "learning_rate": 9.376573876020948e-05, "loss": 1.9321, "step": 3684 }, { "epoch": 0.16106473185016829, "grad_norm": 2.703125, "learning_rate": 9.376241697704493e-05, "loss": 2.2536, "step": 3685 }, { "epoch": 0.16110844005419817, "grad_norm": 2.09375, "learning_rate": 9.375909436801988e-05, "loss": 1.8819, "step": 3686 }, { "epoch": 0.16115214825822807, "grad_norm": 2.453125, "learning_rate": 9.375577093319701e-05, "loss": 1.461, "step": 3687 }, { "epoch": 0.16119585646225795, "grad_norm": 3.046875, "learning_rate": 9.37524466726391e-05, "loss": 2.1925, "step": 3688 }, { "epoch": 0.16123956466628786, "grad_norm": 2.453125, "learning_rate": 9.37491215864088e-05, "loss": 1.9754, "step": 3689 }, { "epoch": 0.16128327287031777, "grad_norm": 2.625, "learning_rate": 9.374579567456892e-05, "loss": 1.9294, "step": 3690 }, { "epoch": 0.16132698107434765, "grad_norm": 2.5, "learning_rate": 9.374246893718221e-05, "loss": 1.5906, "step": 3691 }, { "epoch": 0.16137068927837755, "grad_norm": 2.734375, "learning_rate": 9.373914137431146e-05, "loss": 2.2835, "step": 3692 }, { "epoch": 0.16141439748240743, "grad_norm": 2.296875, "learning_rate": 9.373581298601943e-05, "loss": 2.0546, "step": 3693 }, { "epoch": 0.16145810568643734, "grad_norm": 2.453125, "learning_rate": 9.373248377236896e-05, "loss": 1.9919, "step": 3694 }, { "epoch": 0.16150181389046725, "grad_norm": 2.921875, "learning_rate": 9.372915373342288e-05, "loss": 1.7252, "step": 3695 }, { "epoch": 0.16154552209449713, "grad_norm": 2.046875, "learning_rate": 9.3725822869244e-05, "loss": 1.6739, "step": 3696 }, { "epoch": 0.16158923029852704, "grad_norm": 2.796875, "learning_rate": 9.372249117989523e-05, "loss": 2.5599, "step": 3697 }, { "epoch": 0.16163293850255692, "grad_norm": 2.921875, "learning_rate": 9.37191586654394e-05, "loss": 1.8535, "step": 3698 }, { "epoch": 0.16167664670658682, "grad_norm": 2.359375, "learning_rate": 9.371582532593943e-05, "loss": 2.326, "step": 3699 }, { "epoch": 0.16172035491061673, "grad_norm": 2.21875, "learning_rate": 9.37124911614582e-05, "loss": 1.3687, "step": 3700 }, { "epoch": 0.1617640631146466, "grad_norm": 2.40625, "learning_rate": 9.370915617205865e-05, "loss": 1.9865, "step": 3701 }, { "epoch": 0.16180777131867652, "grad_norm": 2.53125, "learning_rate": 9.370582035780371e-05, "loss": 2.1977, "step": 3702 }, { "epoch": 0.1618514795227064, "grad_norm": 2.453125, "learning_rate": 9.370248371875631e-05, "loss": 2.3067, "step": 3703 }, { "epoch": 0.1618951877267363, "grad_norm": 2.328125, "learning_rate": 9.369914625497945e-05, "loss": 1.9207, "step": 3704 }, { "epoch": 0.1619388959307662, "grad_norm": 3.171875, "learning_rate": 9.36958079665361e-05, "loss": 2.5128, "step": 3705 }, { "epoch": 0.1619826041347961, "grad_norm": 3.765625, "learning_rate": 9.369246885348926e-05, "loss": 2.426, "step": 3706 }, { "epoch": 0.162026312338826, "grad_norm": 2.21875, "learning_rate": 9.368912891590192e-05, "loss": 1.9204, "step": 3707 }, { "epoch": 0.16207002054285588, "grad_norm": 3.3125, "learning_rate": 9.368578815383715e-05, "loss": 1.9233, "step": 3708 }, { "epoch": 0.1621137287468858, "grad_norm": 2.265625, "learning_rate": 9.368244656735798e-05, "loss": 1.6875, "step": 3709 }, { "epoch": 0.1621574369509157, "grad_norm": 3.015625, "learning_rate": 9.367910415652745e-05, "loss": 2.2647, "step": 3710 }, { "epoch": 0.16220114515494558, "grad_norm": 2.171875, "learning_rate": 9.367576092140866e-05, "loss": 2.1075, "step": 3711 }, { "epoch": 0.16224485335897548, "grad_norm": 2.4375, "learning_rate": 9.367241686206469e-05, "loss": 1.6637, "step": 3712 }, { "epoch": 0.16228856156300536, "grad_norm": 4.84375, "learning_rate": 9.366907197855868e-05, "loss": 2.8638, "step": 3713 }, { "epoch": 0.16233226976703527, "grad_norm": 3.84375, "learning_rate": 9.366572627095369e-05, "loss": 1.9139, "step": 3714 }, { "epoch": 0.16237597797106518, "grad_norm": 2.65625, "learning_rate": 9.366237973931291e-05, "loss": 2.1883, "step": 3715 }, { "epoch": 0.16241968617509506, "grad_norm": 2.40625, "learning_rate": 9.365903238369946e-05, "loss": 1.3408, "step": 3716 }, { "epoch": 0.16246339437912496, "grad_norm": 2.359375, "learning_rate": 9.365568420417655e-05, "loss": 2.2424, "step": 3717 }, { "epoch": 0.16250710258315484, "grad_norm": 2.546875, "learning_rate": 9.365233520080731e-05, "loss": 2.0933, "step": 3718 }, { "epoch": 0.16255081078718475, "grad_norm": 2.640625, "learning_rate": 9.364898537365501e-05, "loss": 1.9398, "step": 3719 }, { "epoch": 0.16259451899121466, "grad_norm": 2.21875, "learning_rate": 9.36456347227828e-05, "loss": 1.7613, "step": 3720 }, { "epoch": 0.16263822719524454, "grad_norm": 2.546875, "learning_rate": 9.364228324825395e-05, "loss": 1.9097, "step": 3721 }, { "epoch": 0.16268193539927445, "grad_norm": 2.328125, "learning_rate": 9.36389309501317e-05, "loss": 2.2346, "step": 3722 }, { "epoch": 0.16272564360330435, "grad_norm": 2.640625, "learning_rate": 9.36355778284793e-05, "loss": 2.4336, "step": 3723 }, { "epoch": 0.16276935180733423, "grad_norm": 2.453125, "learning_rate": 9.363222388336004e-05, "loss": 2.0523, "step": 3724 }, { "epoch": 0.16281306001136414, "grad_norm": 2.9375, "learning_rate": 9.362886911483722e-05, "loss": 1.5397, "step": 3725 }, { "epoch": 0.16285676821539402, "grad_norm": 5.84375, "learning_rate": 9.362551352297413e-05, "loss": 1.9769, "step": 3726 }, { "epoch": 0.16290047641942393, "grad_norm": 2.265625, "learning_rate": 9.362215710783411e-05, "loss": 1.7988, "step": 3727 }, { "epoch": 0.16294418462345384, "grad_norm": 1.8984375, "learning_rate": 9.36187998694805e-05, "loss": 1.6174, "step": 3728 }, { "epoch": 0.16298789282748372, "grad_norm": 2.296875, "learning_rate": 9.361544180797665e-05, "loss": 2.2354, "step": 3729 }, { "epoch": 0.16303160103151362, "grad_norm": 2.125, "learning_rate": 9.361208292338593e-05, "loss": 2.0043, "step": 3730 }, { "epoch": 0.1630753092355435, "grad_norm": 2.53125, "learning_rate": 9.360872321577174e-05, "loss": 2.1913, "step": 3731 }, { "epoch": 0.1631190174395734, "grad_norm": 2.828125, "learning_rate": 9.360536268519746e-05, "loss": 1.9443, "step": 3732 }, { "epoch": 0.16316272564360332, "grad_norm": 2.640625, "learning_rate": 9.360200133172655e-05, "loss": 2.0476, "step": 3733 }, { "epoch": 0.1632064338476332, "grad_norm": 2.390625, "learning_rate": 9.359863915542238e-05, "loss": 1.656, "step": 3734 }, { "epoch": 0.1632501420516631, "grad_norm": 2.21875, "learning_rate": 9.359527615634844e-05, "loss": 1.7408, "step": 3735 }, { "epoch": 0.16329385025569298, "grad_norm": 3.328125, "learning_rate": 9.359191233456821e-05, "loss": 1.8783, "step": 3736 }, { "epoch": 0.1633375584597229, "grad_norm": 4.53125, "learning_rate": 9.358854769014513e-05, "loss": 2.5117, "step": 3737 }, { "epoch": 0.1633812666637528, "grad_norm": 2.59375, "learning_rate": 9.358518222314272e-05, "loss": 2.0793, "step": 3738 }, { "epoch": 0.16342497486778268, "grad_norm": 2.109375, "learning_rate": 9.35818159336245e-05, "loss": 1.7235, "step": 3739 }, { "epoch": 0.1634686830718126, "grad_norm": 2.375, "learning_rate": 9.357844882165396e-05, "loss": 2.2602, "step": 3740 }, { "epoch": 0.16351239127584247, "grad_norm": 2.75, "learning_rate": 9.357508088729468e-05, "loss": 1.9349, "step": 3741 }, { "epoch": 0.16355609947987237, "grad_norm": 2.46875, "learning_rate": 9.357171213061021e-05, "loss": 1.54, "step": 3742 }, { "epoch": 0.16359980768390228, "grad_norm": 2.71875, "learning_rate": 9.356834255166409e-05, "loss": 2.17, "step": 3743 }, { "epoch": 0.16364351588793216, "grad_norm": 2.34375, "learning_rate": 9.356497215051996e-05, "loss": 2.0412, "step": 3744 }, { "epoch": 0.16368722409196207, "grad_norm": 2.703125, "learning_rate": 9.356160092724138e-05, "loss": 2.0443, "step": 3745 }, { "epoch": 0.16373093229599195, "grad_norm": 5.4375, "learning_rate": 9.355822888189201e-05, "loss": 2.7347, "step": 3746 }, { "epoch": 0.16377464050002186, "grad_norm": 2.109375, "learning_rate": 9.355485601453545e-05, "loss": 2.0591, "step": 3747 }, { "epoch": 0.16381834870405176, "grad_norm": 2.75, "learning_rate": 9.355148232523537e-05, "loss": 1.9002, "step": 3748 }, { "epoch": 0.16386205690808164, "grad_norm": 2.1875, "learning_rate": 9.354810781405543e-05, "loss": 1.8313, "step": 3749 }, { "epoch": 0.16390576511211155, "grad_norm": 2.15625, "learning_rate": 9.354473248105932e-05, "loss": 1.8867, "step": 3750 }, { "epoch": 0.16394947331614143, "grad_norm": 2.265625, "learning_rate": 9.354135632631073e-05, "loss": 2.0169, "step": 3751 }, { "epoch": 0.16399318152017134, "grad_norm": 2.90625, "learning_rate": 9.353797934987338e-05, "loss": 1.8563, "step": 3752 }, { "epoch": 0.16403688972420125, "grad_norm": 3.21875, "learning_rate": 9.353460155181098e-05, "loss": 3.3005, "step": 3753 }, { "epoch": 0.16408059792823113, "grad_norm": 2.328125, "learning_rate": 9.35312229321873e-05, "loss": 1.9956, "step": 3754 }, { "epoch": 0.16412430613226103, "grad_norm": 1.9921875, "learning_rate": 9.352784349106608e-05, "loss": 1.6531, "step": 3755 }, { "epoch": 0.1641680143362909, "grad_norm": 2.40625, "learning_rate": 9.35244632285111e-05, "loss": 1.8427, "step": 3756 }, { "epoch": 0.16421172254032082, "grad_norm": 2.546875, "learning_rate": 9.352108214458616e-05, "loss": 2.2344, "step": 3757 }, { "epoch": 0.16425543074435073, "grad_norm": 3.078125, "learning_rate": 9.351770023935506e-05, "loss": 1.8037, "step": 3758 }, { "epoch": 0.1642991389483806, "grad_norm": 3.4375, "learning_rate": 9.35143175128816e-05, "loss": 1.6607, "step": 3759 }, { "epoch": 0.16434284715241051, "grad_norm": 2.828125, "learning_rate": 9.351093396522965e-05, "loss": 2.2265, "step": 3760 }, { "epoch": 0.1643865553564404, "grad_norm": 2.515625, "learning_rate": 9.350754959646306e-05, "loss": 1.9707, "step": 3761 }, { "epoch": 0.1644302635604703, "grad_norm": 2.484375, "learning_rate": 9.350416440664566e-05, "loss": 2.1641, "step": 3762 }, { "epoch": 0.1644739717645002, "grad_norm": 2.234375, "learning_rate": 9.350077839584138e-05, "loss": 2.1195, "step": 3763 }, { "epoch": 0.1645176799685301, "grad_norm": 2.40625, "learning_rate": 9.34973915641141e-05, "loss": 2.0303, "step": 3764 }, { "epoch": 0.16456138817256, "grad_norm": 2.265625, "learning_rate": 9.349400391152773e-05, "loss": 2.0424, "step": 3765 }, { "epoch": 0.16460509637658988, "grad_norm": 2.265625, "learning_rate": 9.34906154381462e-05, "loss": 1.924, "step": 3766 }, { "epoch": 0.16464880458061978, "grad_norm": 2.25, "learning_rate": 9.348722614403345e-05, "loss": 2.0095, "step": 3767 }, { "epoch": 0.1646925127846497, "grad_norm": 2.546875, "learning_rate": 9.348383602925347e-05, "loss": 1.8124, "step": 3768 }, { "epoch": 0.16473622098867957, "grad_norm": 2.4375, "learning_rate": 9.34804450938702e-05, "loss": 1.9358, "step": 3769 }, { "epoch": 0.16477992919270948, "grad_norm": 2.546875, "learning_rate": 9.347705333794767e-05, "loss": 2.5346, "step": 3770 }, { "epoch": 0.16482363739673936, "grad_norm": 2.359375, "learning_rate": 9.347366076154984e-05, "loss": 1.8833, "step": 3771 }, { "epoch": 0.16486734560076927, "grad_norm": 2.515625, "learning_rate": 9.347026736474078e-05, "loss": 1.9519, "step": 3772 }, { "epoch": 0.16491105380479917, "grad_norm": 2.15625, "learning_rate": 9.346687314758448e-05, "loss": 2.0777, "step": 3773 }, { "epoch": 0.16495476200882905, "grad_norm": 3.9375, "learning_rate": 9.346347811014504e-05, "loss": 2.6569, "step": 3774 }, { "epoch": 0.16499847021285896, "grad_norm": 2.703125, "learning_rate": 9.346008225248651e-05, "loss": 2.0684, "step": 3775 }, { "epoch": 0.16504217841688884, "grad_norm": 2.25, "learning_rate": 9.345668557467298e-05, "loss": 2.0878, "step": 3776 }, { "epoch": 0.16508588662091875, "grad_norm": 2.15625, "learning_rate": 9.345328807676854e-05, "loss": 2.1552, "step": 3777 }, { "epoch": 0.16512959482494866, "grad_norm": 2.625, "learning_rate": 9.34498897588373e-05, "loss": 1.7176, "step": 3778 }, { "epoch": 0.16517330302897854, "grad_norm": 5.34375, "learning_rate": 9.344649062094342e-05, "loss": 2.4368, "step": 3779 }, { "epoch": 0.16521701123300844, "grad_norm": 3.171875, "learning_rate": 9.344309066315101e-05, "loss": 2.6929, "step": 3780 }, { "epoch": 0.16526071943703832, "grad_norm": 2.375, "learning_rate": 9.343968988552426e-05, "loss": 2.2669, "step": 3781 }, { "epoch": 0.16530442764106823, "grad_norm": 2.4375, "learning_rate": 9.343628828812734e-05, "loss": 2.6719, "step": 3782 }, { "epoch": 0.16534813584509814, "grad_norm": 3.703125, "learning_rate": 9.343288587102443e-05, "loss": 2.3055, "step": 3783 }, { "epoch": 0.16539184404912802, "grad_norm": 11.5625, "learning_rate": 9.342948263427977e-05, "loss": 5.9702, "step": 3784 }, { "epoch": 0.16543555225315792, "grad_norm": 3.828125, "learning_rate": 9.342607857795756e-05, "loss": 2.4608, "step": 3785 }, { "epoch": 0.1654792604571878, "grad_norm": 2.640625, "learning_rate": 9.342267370212203e-05, "loss": 2.3413, "step": 3786 }, { "epoch": 0.1655229686612177, "grad_norm": 2.59375, "learning_rate": 9.341926800683745e-05, "loss": 1.8671, "step": 3787 }, { "epoch": 0.16556667686524762, "grad_norm": 2.953125, "learning_rate": 9.34158614921681e-05, "loss": 2.4157, "step": 3788 }, { "epoch": 0.1656103850692775, "grad_norm": 2.640625, "learning_rate": 9.341245415817825e-05, "loss": 2.5063, "step": 3789 }, { "epoch": 0.1656540932733074, "grad_norm": 2.3125, "learning_rate": 9.34090460049322e-05, "loss": 1.7339, "step": 3790 }, { "epoch": 0.1656978014773373, "grad_norm": 3.234375, "learning_rate": 9.340563703249428e-05, "loss": 2.6966, "step": 3791 }, { "epoch": 0.1657415096813672, "grad_norm": 2.140625, "learning_rate": 9.34022272409288e-05, "loss": 2.1976, "step": 3792 }, { "epoch": 0.1657852178853971, "grad_norm": 1.953125, "learning_rate": 9.339881663030014e-05, "loss": 1.7816, "step": 3793 }, { "epoch": 0.16582892608942698, "grad_norm": 2.640625, "learning_rate": 9.339540520067265e-05, "loss": 1.6969, "step": 3794 }, { "epoch": 0.1658726342934569, "grad_norm": 2.046875, "learning_rate": 9.33919929521107e-05, "loss": 1.4362, "step": 3795 }, { "epoch": 0.16591634249748677, "grad_norm": 2.015625, "learning_rate": 9.338857988467868e-05, "loss": 1.5953, "step": 3796 }, { "epoch": 0.16596005070151668, "grad_norm": 2.484375, "learning_rate": 9.338516599844101e-05, "loss": 2.5578, "step": 3797 }, { "epoch": 0.16600375890554658, "grad_norm": 2.734375, "learning_rate": 9.338175129346213e-05, "loss": 2.6275, "step": 3798 }, { "epoch": 0.16604746710957646, "grad_norm": 2.140625, "learning_rate": 9.337833576980646e-05, "loss": 2.0663, "step": 3799 }, { "epoch": 0.16609117531360637, "grad_norm": 2.4375, "learning_rate": 9.337491942753845e-05, "loss": 2.4615, "step": 3800 }, { "epoch": 0.16613488351763625, "grad_norm": 2.578125, "learning_rate": 9.337150226672258e-05, "loss": 1.79, "step": 3801 }, { "epoch": 0.16617859172166616, "grad_norm": 2.90625, "learning_rate": 9.336808428742335e-05, "loss": 2.3343, "step": 3802 }, { "epoch": 0.16622229992569607, "grad_norm": 2.234375, "learning_rate": 9.336466548970522e-05, "loss": 2.0193, "step": 3803 }, { "epoch": 0.16626600812972595, "grad_norm": 2.65625, "learning_rate": 9.336124587363278e-05, "loss": 1.7385, "step": 3804 }, { "epoch": 0.16630971633375585, "grad_norm": 2.1875, "learning_rate": 9.335782543927049e-05, "loss": 1.5442, "step": 3805 }, { "epoch": 0.16635342453778573, "grad_norm": 3.296875, "learning_rate": 9.335440418668294e-05, "loss": 2.1318, "step": 3806 }, { "epoch": 0.16639713274181564, "grad_norm": 2.296875, "learning_rate": 9.335098211593469e-05, "loss": 1.9159, "step": 3807 }, { "epoch": 0.16644084094584555, "grad_norm": 2.109375, "learning_rate": 9.334755922709031e-05, "loss": 1.7075, "step": 3808 }, { "epoch": 0.16648454914987543, "grad_norm": 2.65625, "learning_rate": 9.334413552021439e-05, "loss": 2.5683, "step": 3809 }, { "epoch": 0.16652825735390533, "grad_norm": 2.390625, "learning_rate": 9.334071099537154e-05, "loss": 1.9669, "step": 3810 }, { "epoch": 0.16657196555793521, "grad_norm": 2.234375, "learning_rate": 9.333728565262642e-05, "loss": 2.0282, "step": 3811 }, { "epoch": 0.16661567376196512, "grad_norm": 2.65625, "learning_rate": 9.333385949204363e-05, "loss": 1.8504, "step": 3812 }, { "epoch": 0.16665938196599503, "grad_norm": 2.125, "learning_rate": 9.333043251368784e-05, "loss": 1.7398, "step": 3813 }, { "epoch": 0.1667030901700249, "grad_norm": 2.203125, "learning_rate": 9.332700471762374e-05, "loss": 1.6821, "step": 3814 }, { "epoch": 0.16674679837405482, "grad_norm": 2.21875, "learning_rate": 9.332357610391598e-05, "loss": 2.1855, "step": 3815 }, { "epoch": 0.1667905065780847, "grad_norm": 3.46875, "learning_rate": 9.332014667262929e-05, "loss": 2.1317, "step": 3816 }, { "epoch": 0.1668342147821146, "grad_norm": 2.71875, "learning_rate": 9.33167164238284e-05, "loss": 1.906, "step": 3817 }, { "epoch": 0.1668779229861445, "grad_norm": 2.46875, "learning_rate": 9.331328535757801e-05, "loss": 1.8553, "step": 3818 }, { "epoch": 0.1669216311901744, "grad_norm": 2.453125, "learning_rate": 9.33098534739429e-05, "loss": 1.9632, "step": 3819 }, { "epoch": 0.1669653393942043, "grad_norm": 2.34375, "learning_rate": 9.33064207729878e-05, "loss": 2.1928, "step": 3820 }, { "epoch": 0.16700904759823418, "grad_norm": 2.28125, "learning_rate": 9.330298725477753e-05, "loss": 2.4237, "step": 3821 }, { "epoch": 0.16705275580226409, "grad_norm": 3.09375, "learning_rate": 9.329955291937684e-05, "loss": 3.0202, "step": 3822 }, { "epoch": 0.167096464006294, "grad_norm": 2.578125, "learning_rate": 9.329611776685059e-05, "loss": 2.1526, "step": 3823 }, { "epoch": 0.16714017221032387, "grad_norm": 2.21875, "learning_rate": 9.329268179726359e-05, "loss": 2.0835, "step": 3824 }, { "epoch": 0.16718388041435378, "grad_norm": 2.453125, "learning_rate": 9.328924501068066e-05, "loss": 1.9372, "step": 3825 }, { "epoch": 0.16722758861838366, "grad_norm": 2.328125, "learning_rate": 9.328580740716666e-05, "loss": 2.1097, "step": 3826 }, { "epoch": 0.16727129682241357, "grad_norm": 2.515625, "learning_rate": 9.32823689867865e-05, "loss": 1.9521, "step": 3827 }, { "epoch": 0.16731500502644348, "grad_norm": 2.234375, "learning_rate": 9.327892974960503e-05, "loss": 1.81, "step": 3828 }, { "epoch": 0.16735871323047335, "grad_norm": 2.015625, "learning_rate": 9.327548969568716e-05, "loss": 1.8508, "step": 3829 }, { "epoch": 0.16740242143450326, "grad_norm": 3.203125, "learning_rate": 9.327204882509784e-05, "loss": 3.2697, "step": 3830 }, { "epoch": 0.16744612963853314, "grad_norm": 2.34375, "learning_rate": 9.326860713790195e-05, "loss": 1.9622, "step": 3831 }, { "epoch": 0.16748983784256305, "grad_norm": 2.546875, "learning_rate": 9.326516463416448e-05, "loss": 2.7874, "step": 3832 }, { "epoch": 0.16753354604659296, "grad_norm": 2.921875, "learning_rate": 9.32617213139504e-05, "loss": 1.9411, "step": 3833 }, { "epoch": 0.16757725425062284, "grad_norm": 3.046875, "learning_rate": 9.325827717732465e-05, "loss": 2.0798, "step": 3834 }, { "epoch": 0.16762096245465274, "grad_norm": 3.296875, "learning_rate": 9.325483222435226e-05, "loss": 2.3578, "step": 3835 }, { "epoch": 0.16766467065868262, "grad_norm": 2.25, "learning_rate": 9.325138645509823e-05, "loss": 1.6908, "step": 3836 }, { "epoch": 0.16770837886271253, "grad_norm": 2.25, "learning_rate": 9.324793986962758e-05, "loss": 1.8728, "step": 3837 }, { "epoch": 0.16775208706674244, "grad_norm": 2.3125, "learning_rate": 9.324449246800537e-05, "loss": 1.8805, "step": 3838 }, { "epoch": 0.16779579527077232, "grad_norm": 1.984375, "learning_rate": 9.324104425029665e-05, "loss": 1.7632, "step": 3839 }, { "epoch": 0.16783950347480223, "grad_norm": 3.09375, "learning_rate": 9.32375952165665e-05, "loss": 2.3108, "step": 3840 }, { "epoch": 0.1678832116788321, "grad_norm": 2.296875, "learning_rate": 9.323414536687997e-05, "loss": 2.0386, "step": 3841 }, { "epoch": 0.167926919882862, "grad_norm": 2.671875, "learning_rate": 9.323069470130221e-05, "loss": 2.6539, "step": 3842 }, { "epoch": 0.16797062808689192, "grad_norm": 3.171875, "learning_rate": 9.322724321989833e-05, "loss": 2.3568, "step": 3843 }, { "epoch": 0.1680143362909218, "grad_norm": 2.359375, "learning_rate": 9.322379092273345e-05, "loss": 2.3111, "step": 3844 }, { "epoch": 0.1680580444949517, "grad_norm": 2.21875, "learning_rate": 9.322033780987272e-05, "loss": 1.8544, "step": 3845 }, { "epoch": 0.1681017526989816, "grad_norm": 2.90625, "learning_rate": 9.321688388138132e-05, "loss": 2.1158, "step": 3846 }, { "epoch": 0.1681454609030115, "grad_norm": 2.53125, "learning_rate": 9.321342913732441e-05, "loss": 2.156, "step": 3847 }, { "epoch": 0.1681891691070414, "grad_norm": 2.28125, "learning_rate": 9.32099735777672e-05, "loss": 1.8919, "step": 3848 }, { "epoch": 0.16823287731107128, "grad_norm": 2.390625, "learning_rate": 9.320651720277491e-05, "loss": 1.758, "step": 3849 }, { "epoch": 0.1682765855151012, "grad_norm": 3.125, "learning_rate": 9.320306001241275e-05, "loss": 2.3838, "step": 3850 }, { "epoch": 0.16832029371913107, "grad_norm": 2.078125, "learning_rate": 9.319960200674597e-05, "loss": 2.0288, "step": 3851 }, { "epoch": 0.16836400192316098, "grad_norm": 2.25, "learning_rate": 9.319614318583982e-05, "loss": 2.3708, "step": 3852 }, { "epoch": 0.16840771012719088, "grad_norm": 3.171875, "learning_rate": 9.319268354975959e-05, "loss": 2.506, "step": 3853 }, { "epoch": 0.16845141833122076, "grad_norm": 2.484375, "learning_rate": 9.318922309857055e-05, "loss": 1.9185, "step": 3854 }, { "epoch": 0.16849512653525067, "grad_norm": 2.75, "learning_rate": 9.3185761832338e-05, "loss": 2.2522, "step": 3855 }, { "epoch": 0.16853883473928055, "grad_norm": 2.640625, "learning_rate": 9.318229975112728e-05, "loss": 2.6273, "step": 3856 }, { "epoch": 0.16858254294331046, "grad_norm": 2.375, "learning_rate": 9.317883685500373e-05, "loss": 2.1665, "step": 3857 }, { "epoch": 0.16862625114734037, "grad_norm": 2.15625, "learning_rate": 9.317537314403267e-05, "loss": 1.6587, "step": 3858 }, { "epoch": 0.16866995935137025, "grad_norm": 2.1875, "learning_rate": 9.317190861827949e-05, "loss": 1.9373, "step": 3859 }, { "epoch": 0.16871366755540015, "grad_norm": 2.1875, "learning_rate": 9.316844327780955e-05, "loss": 1.7105, "step": 3860 }, { "epoch": 0.16875737575943003, "grad_norm": 2.828125, "learning_rate": 9.316497712268825e-05, "loss": 1.3369, "step": 3861 }, { "epoch": 0.16880108396345994, "grad_norm": 2.078125, "learning_rate": 9.316151015298103e-05, "loss": 1.8242, "step": 3862 }, { "epoch": 0.16884479216748985, "grad_norm": 2.96875, "learning_rate": 9.315804236875327e-05, "loss": 1.7831, "step": 3863 }, { "epoch": 0.16888850037151973, "grad_norm": 2.625, "learning_rate": 9.315457377007046e-05, "loss": 2.3505, "step": 3864 }, { "epoch": 0.16893220857554964, "grad_norm": 2.546875, "learning_rate": 9.315110435699804e-05, "loss": 2.0785, "step": 3865 }, { "epoch": 0.16897591677957952, "grad_norm": 2.203125, "learning_rate": 9.314763412960144e-05, "loss": 1.6823, "step": 3866 }, { "epoch": 0.16901962498360942, "grad_norm": 2.171875, "learning_rate": 9.314416308794621e-05, "loss": 2.4492, "step": 3867 }, { "epoch": 0.16906333318763933, "grad_norm": 4.59375, "learning_rate": 9.314069123209784e-05, "loss": 1.8297, "step": 3868 }, { "epoch": 0.1691070413916692, "grad_norm": 2.3125, "learning_rate": 9.313721856212181e-05, "loss": 2.1315, "step": 3869 }, { "epoch": 0.16915074959569912, "grad_norm": 2.59375, "learning_rate": 9.313374507808371e-05, "loss": 2.3644, "step": 3870 }, { "epoch": 0.169194457799729, "grad_norm": 2.34375, "learning_rate": 9.313027078004903e-05, "loss": 2.1034, "step": 3871 }, { "epoch": 0.1692381660037589, "grad_norm": 2.625, "learning_rate": 9.31267956680834e-05, "loss": 1.766, "step": 3872 }, { "epoch": 0.1692818742077888, "grad_norm": 2.6875, "learning_rate": 9.312331974225235e-05, "loss": 2.1167, "step": 3873 }, { "epoch": 0.1693255824118187, "grad_norm": 2.4375, "learning_rate": 9.31198430026215e-05, "loss": 2.5551, "step": 3874 }, { "epoch": 0.1693692906158486, "grad_norm": 2.1875, "learning_rate": 9.311636544925645e-05, "loss": 1.8521, "step": 3875 }, { "epoch": 0.16941299881987848, "grad_norm": 2.328125, "learning_rate": 9.311288708222284e-05, "loss": 2.3467, "step": 3876 }, { "epoch": 0.1694567070239084, "grad_norm": 2.109375, "learning_rate": 9.310940790158629e-05, "loss": 1.8095, "step": 3877 }, { "epoch": 0.1695004152279383, "grad_norm": 2.21875, "learning_rate": 9.310592790741248e-05, "loss": 2.0915, "step": 3878 }, { "epoch": 0.16954412343196817, "grad_norm": 1.984375, "learning_rate": 9.310244709976707e-05, "loss": 1.654, "step": 3879 }, { "epoch": 0.16958783163599808, "grad_norm": 2.421875, "learning_rate": 9.309896547871576e-05, "loss": 2.3766, "step": 3880 }, { "epoch": 0.16963153984002796, "grad_norm": 3.5, "learning_rate": 9.309548304432421e-05, "loss": 2.7146, "step": 3881 }, { "epoch": 0.16967524804405787, "grad_norm": 3.125, "learning_rate": 9.30919997966582e-05, "loss": 1.8983, "step": 3882 }, { "epoch": 0.16971895624808778, "grad_norm": 2.171875, "learning_rate": 9.308851573578344e-05, "loss": 1.8319, "step": 3883 }, { "epoch": 0.16976266445211766, "grad_norm": 2.265625, "learning_rate": 9.308503086176565e-05, "loss": 1.8928, "step": 3884 }, { "epoch": 0.16980637265614756, "grad_norm": 3.765625, "learning_rate": 9.308154517467065e-05, "loss": 1.9515, "step": 3885 }, { "epoch": 0.16985008086017744, "grad_norm": 2.84375, "learning_rate": 9.307805867456418e-05, "loss": 2.2746, "step": 3886 }, { "epoch": 0.16989378906420735, "grad_norm": 2.09375, "learning_rate": 9.307457136151204e-05, "loss": 1.6149, "step": 3887 }, { "epoch": 0.16993749726823726, "grad_norm": 3.234375, "learning_rate": 9.307108323558005e-05, "loss": 1.6674, "step": 3888 }, { "epoch": 0.16998120547226714, "grad_norm": 4.0625, "learning_rate": 9.306759429683404e-05, "loss": 1.9972, "step": 3889 }, { "epoch": 0.17002491367629705, "grad_norm": 3.921875, "learning_rate": 9.306410454533982e-05, "loss": 2.2144, "step": 3890 }, { "epoch": 0.17006862188032693, "grad_norm": 2.40625, "learning_rate": 9.306061398116331e-05, "loss": 1.7609, "step": 3891 }, { "epoch": 0.17011233008435683, "grad_norm": 2.90625, "learning_rate": 9.305712260437031e-05, "loss": 2.6109, "step": 3892 }, { "epoch": 0.17015603828838674, "grad_norm": 2.765625, "learning_rate": 9.305363041502675e-05, "loss": 2.5016, "step": 3893 }, { "epoch": 0.17019974649241662, "grad_norm": 2.46875, "learning_rate": 9.305013741319852e-05, "loss": 2.1102, "step": 3894 }, { "epoch": 0.17024345469644653, "grad_norm": 3.3125, "learning_rate": 9.304664359895155e-05, "loss": 2.5799, "step": 3895 }, { "epoch": 0.1702871629004764, "grad_norm": 3.171875, "learning_rate": 9.304314897235176e-05, "loss": 1.9134, "step": 3896 }, { "epoch": 0.17033087110450632, "grad_norm": 2.34375, "learning_rate": 9.303965353346508e-05, "loss": 2.659, "step": 3897 }, { "epoch": 0.17037457930853622, "grad_norm": 2.5, "learning_rate": 9.303615728235753e-05, "loss": 2.0285, "step": 3898 }, { "epoch": 0.1704182875125661, "grad_norm": 2.5625, "learning_rate": 9.303266021909504e-05, "loss": 2.3058, "step": 3899 }, { "epoch": 0.170461995716596, "grad_norm": 2.625, "learning_rate": 9.302916234374361e-05, "loss": 2.6567, "step": 3900 }, { "epoch": 0.1705057039206259, "grad_norm": 2.4375, "learning_rate": 9.302566365636928e-05, "loss": 2.077, "step": 3901 }, { "epoch": 0.1705494121246558, "grad_norm": 2.8125, "learning_rate": 9.302216415703805e-05, "loss": 2.523, "step": 3902 }, { "epoch": 0.1705931203286857, "grad_norm": 2.859375, "learning_rate": 9.301866384581597e-05, "loss": 1.9372, "step": 3903 }, { "epoch": 0.17063682853271558, "grad_norm": 2.71875, "learning_rate": 9.301516272276907e-05, "loss": 2.5272, "step": 3904 }, { "epoch": 0.1706805367367455, "grad_norm": 2.484375, "learning_rate": 9.301166078796347e-05, "loss": 1.977, "step": 3905 }, { "epoch": 0.17072424494077537, "grad_norm": 2.625, "learning_rate": 9.300815804146522e-05, "loss": 2.5269, "step": 3906 }, { "epoch": 0.17076795314480528, "grad_norm": 2.1875, "learning_rate": 9.300465448334044e-05, "loss": 2.3774, "step": 3907 }, { "epoch": 0.1708116613488352, "grad_norm": 2.578125, "learning_rate": 9.300115011365522e-05, "loss": 2.1276, "step": 3908 }, { "epoch": 0.17085536955286507, "grad_norm": 2.78125, "learning_rate": 9.299764493247574e-05, "loss": 1.9362, "step": 3909 }, { "epoch": 0.17089907775689497, "grad_norm": 2.203125, "learning_rate": 9.29941389398681e-05, "loss": 2.0708, "step": 3910 }, { "epoch": 0.17094278596092485, "grad_norm": 2.484375, "learning_rate": 9.299063213589849e-05, "loss": 2.2451, "step": 3911 }, { "epoch": 0.17098649416495476, "grad_norm": 2.234375, "learning_rate": 9.298712452063309e-05, "loss": 1.7547, "step": 3912 }, { "epoch": 0.17103020236898467, "grad_norm": 2.640625, "learning_rate": 9.298361609413805e-05, "loss": 2.5509, "step": 3913 }, { "epoch": 0.17107391057301455, "grad_norm": 2.75, "learning_rate": 9.298010685647966e-05, "loss": 1.9587, "step": 3914 }, { "epoch": 0.17111761877704446, "grad_norm": 2.125, "learning_rate": 9.297659680772408e-05, "loss": 1.6686, "step": 3915 }, { "epoch": 0.17116132698107434, "grad_norm": 2.484375, "learning_rate": 9.297308594793756e-05, "loss": 2.1915, "step": 3916 }, { "epoch": 0.17120503518510424, "grad_norm": 2.125, "learning_rate": 9.296957427718638e-05, "loss": 2.1469, "step": 3917 }, { "epoch": 0.17124874338913415, "grad_norm": 2.171875, "learning_rate": 9.296606179553679e-05, "loss": 2.1142, "step": 3918 }, { "epoch": 0.17129245159316403, "grad_norm": 2.65625, "learning_rate": 9.296254850305506e-05, "loss": 1.9486, "step": 3919 }, { "epoch": 0.17133615979719394, "grad_norm": 2.578125, "learning_rate": 9.295903439980755e-05, "loss": 1.8862, "step": 3920 }, { "epoch": 0.17137986800122382, "grad_norm": 2.671875, "learning_rate": 9.29555194858605e-05, "loss": 3.0025, "step": 3921 }, { "epoch": 0.17142357620525372, "grad_norm": 2.171875, "learning_rate": 9.295200376128031e-05, "loss": 2.0601, "step": 3922 }, { "epoch": 0.17146728440928363, "grad_norm": 2.0625, "learning_rate": 9.294848722613326e-05, "loss": 1.4115, "step": 3923 }, { "epoch": 0.1715109926133135, "grad_norm": 1.9921875, "learning_rate": 9.294496988048578e-05, "loss": 1.6362, "step": 3924 }, { "epoch": 0.17155470081734342, "grad_norm": 2.390625, "learning_rate": 9.29414517244042e-05, "loss": 1.8668, "step": 3925 }, { "epoch": 0.1715984090213733, "grad_norm": 2.3125, "learning_rate": 9.293793275795492e-05, "loss": 1.9503, "step": 3926 }, { "epoch": 0.1716421172254032, "grad_norm": 2.0625, "learning_rate": 9.293441298120436e-05, "loss": 1.7978, "step": 3927 }, { "epoch": 0.17168582542943311, "grad_norm": 2.453125, "learning_rate": 9.293089239421895e-05, "loss": 2.4183, "step": 3928 }, { "epoch": 0.171729533633463, "grad_norm": 1.8828125, "learning_rate": 9.29273709970651e-05, "loss": 1.8488, "step": 3929 }, { "epoch": 0.1717732418374929, "grad_norm": 2.4375, "learning_rate": 9.29238487898093e-05, "loss": 2.9994, "step": 3930 }, { "epoch": 0.17181695004152278, "grad_norm": 7.125, "learning_rate": 9.2920325772518e-05, "loss": 2.1471, "step": 3931 }, { "epoch": 0.1718606582455527, "grad_norm": 2.484375, "learning_rate": 9.291680194525767e-05, "loss": 1.7098, "step": 3932 }, { "epoch": 0.1719043664495826, "grad_norm": 3.40625, "learning_rate": 9.291327730809483e-05, "loss": 1.9738, "step": 3933 }, { "epoch": 0.17194807465361248, "grad_norm": 2.875, "learning_rate": 9.2909751861096e-05, "loss": 1.9738, "step": 3934 }, { "epoch": 0.17199178285764238, "grad_norm": 2.59375, "learning_rate": 9.29062256043277e-05, "loss": 2.3958, "step": 3935 }, { "epoch": 0.17203549106167226, "grad_norm": 2.984375, "learning_rate": 9.290269853785645e-05, "loss": 2.3367, "step": 3936 }, { "epoch": 0.17207919926570217, "grad_norm": 5.375, "learning_rate": 9.289917066174886e-05, "loss": 2.0566, "step": 3937 }, { "epoch": 0.17212290746973208, "grad_norm": 5.46875, "learning_rate": 9.289564197607148e-05, "loss": 1.1127, "step": 3938 }, { "epoch": 0.17216661567376196, "grad_norm": 2.84375, "learning_rate": 9.28921124808909e-05, "loss": 2.359, "step": 3939 }, { "epoch": 0.17221032387779187, "grad_norm": 2.859375, "learning_rate": 9.288858217627374e-05, "loss": 3.0929, "step": 3940 }, { "epoch": 0.17225403208182175, "grad_norm": 2.625, "learning_rate": 9.28850510622866e-05, "loss": 1.8901, "step": 3941 }, { "epoch": 0.17229774028585165, "grad_norm": 2.203125, "learning_rate": 9.288151913899614e-05, "loss": 1.8007, "step": 3942 }, { "epoch": 0.17234144848988156, "grad_norm": 2.859375, "learning_rate": 9.287798640646898e-05, "loss": 3.0978, "step": 3943 }, { "epoch": 0.17238515669391144, "grad_norm": 2.828125, "learning_rate": 9.287445286477184e-05, "loss": 2.0779, "step": 3944 }, { "epoch": 0.17242886489794135, "grad_norm": 2.15625, "learning_rate": 9.287091851397137e-05, "loss": 1.9458, "step": 3945 }, { "epoch": 0.17247257310197123, "grad_norm": 2.03125, "learning_rate": 9.286738335413425e-05, "loss": 1.902, "step": 3946 }, { "epoch": 0.17251628130600113, "grad_norm": 3.0, "learning_rate": 9.286384738532723e-05, "loss": 2.942, "step": 3947 }, { "epoch": 0.17255998951003104, "grad_norm": 4.1875, "learning_rate": 9.286031060761703e-05, "loss": 1.6749, "step": 3948 }, { "epoch": 0.17260369771406092, "grad_norm": 2.6875, "learning_rate": 9.285677302107039e-05, "loss": 2.1474, "step": 3949 }, { "epoch": 0.17264740591809083, "grad_norm": 2.890625, "learning_rate": 9.285323462575406e-05, "loss": 2.5917, "step": 3950 }, { "epoch": 0.1726911141221207, "grad_norm": 2.84375, "learning_rate": 9.284969542173482e-05, "loss": 2.1621, "step": 3951 }, { "epoch": 0.17273482232615062, "grad_norm": 2.671875, "learning_rate": 9.284615540907947e-05, "loss": 2.9806, "step": 3952 }, { "epoch": 0.17277853053018052, "grad_norm": 2.34375, "learning_rate": 9.28426145878548e-05, "loss": 1.9889, "step": 3953 }, { "epoch": 0.1728222387342104, "grad_norm": 2.875, "learning_rate": 9.283907295812765e-05, "loss": 2.4204, "step": 3954 }, { "epoch": 0.1728659469382403, "grad_norm": 2.46875, "learning_rate": 9.283553051996483e-05, "loss": 2.1639, "step": 3955 }, { "epoch": 0.1729096551422702, "grad_norm": 2.859375, "learning_rate": 9.283198727343322e-05, "loss": 3.293, "step": 3956 }, { "epoch": 0.1729533633463001, "grad_norm": 2.6875, "learning_rate": 9.282844321859965e-05, "loss": 2.632, "step": 3957 }, { "epoch": 0.17299707155033, "grad_norm": 2.359375, "learning_rate": 9.282489835553106e-05, "loss": 1.9452, "step": 3958 }, { "epoch": 0.17304077975435989, "grad_norm": 2.96875, "learning_rate": 9.282135268429427e-05, "loss": 1.9549, "step": 3959 }, { "epoch": 0.1730844879583898, "grad_norm": 2.75, "learning_rate": 9.281780620495624e-05, "loss": 2.5024, "step": 3960 }, { "epoch": 0.17312819616241967, "grad_norm": 3.109375, "learning_rate": 9.28142589175839e-05, "loss": 3.4426, "step": 3961 }, { "epoch": 0.17317190436644958, "grad_norm": 2.140625, "learning_rate": 9.281071082224418e-05, "loss": 1.8448, "step": 3962 }, { "epoch": 0.1732156125704795, "grad_norm": 2.765625, "learning_rate": 9.280716191900404e-05, "loss": 1.6593, "step": 3963 }, { "epoch": 0.17325932077450937, "grad_norm": 3.015625, "learning_rate": 9.280361220793044e-05, "loss": 2.7787, "step": 3964 }, { "epoch": 0.17330302897853928, "grad_norm": 2.8125, "learning_rate": 9.280006168909039e-05, "loss": 2.3553, "step": 3965 }, { "epoch": 0.17334673718256915, "grad_norm": 2.28125, "learning_rate": 9.279651036255088e-05, "loss": 1.7812, "step": 3966 }, { "epoch": 0.17339044538659906, "grad_norm": 2.359375, "learning_rate": 9.279295822837893e-05, "loss": 2.0827, "step": 3967 }, { "epoch": 0.17343415359062897, "grad_norm": 2.265625, "learning_rate": 9.278940528664158e-05, "loss": 2.0041, "step": 3968 }, { "epoch": 0.17347786179465885, "grad_norm": 2.359375, "learning_rate": 9.278585153740587e-05, "loss": 2.1094, "step": 3969 }, { "epoch": 0.17352156999868876, "grad_norm": 2.015625, "learning_rate": 9.278229698073888e-05, "loss": 1.5974, "step": 3970 }, { "epoch": 0.17356527820271864, "grad_norm": 2.1875, "learning_rate": 9.277874161670766e-05, "loss": 2.0873, "step": 3971 }, { "epoch": 0.17360898640674854, "grad_norm": 2.1875, "learning_rate": 9.277518544537934e-05, "loss": 1.7993, "step": 3972 }, { "epoch": 0.17365269461077845, "grad_norm": 2.34375, "learning_rate": 9.277162846682102e-05, "loss": 1.8639, "step": 3973 }, { "epoch": 0.17369640281480833, "grad_norm": 2.578125, "learning_rate": 9.276807068109981e-05, "loss": 2.4488, "step": 3974 }, { "epoch": 0.17374011101883824, "grad_norm": 2.125, "learning_rate": 9.276451208828285e-05, "loss": 1.8302, "step": 3975 }, { "epoch": 0.17378381922286812, "grad_norm": 2.390625, "learning_rate": 9.276095268843732e-05, "loss": 2.6344, "step": 3976 }, { "epoch": 0.17382752742689803, "grad_norm": 2.546875, "learning_rate": 9.275739248163037e-05, "loss": 2.0113, "step": 3977 }, { "epoch": 0.17387123563092793, "grad_norm": 2.609375, "learning_rate": 9.27538314679292e-05, "loss": 2.4077, "step": 3978 }, { "epoch": 0.1739149438349578, "grad_norm": 2.53125, "learning_rate": 9.275026964740101e-05, "loss": 1.8287, "step": 3979 }, { "epoch": 0.17395865203898772, "grad_norm": 4.4375, "learning_rate": 9.2746707020113e-05, "loss": 1.3134, "step": 3980 }, { "epoch": 0.1740023602430176, "grad_norm": 2.421875, "learning_rate": 9.274314358613241e-05, "loss": 2.0166, "step": 3981 }, { "epoch": 0.1740460684470475, "grad_norm": 2.28125, "learning_rate": 9.27395793455265e-05, "loss": 2.2178, "step": 3982 }, { "epoch": 0.17408977665107742, "grad_norm": 2.765625, "learning_rate": 9.273601429836253e-05, "loss": 2.4367, "step": 3983 }, { "epoch": 0.1741334848551073, "grad_norm": 2.4375, "learning_rate": 9.273244844470777e-05, "loss": 2.2315, "step": 3984 }, { "epoch": 0.1741771930591372, "grad_norm": 2.484375, "learning_rate": 9.27288817846295e-05, "loss": 2.0803, "step": 3985 }, { "epoch": 0.17422090126316708, "grad_norm": 2.515625, "learning_rate": 9.272531431819504e-05, "loss": 2.5731, "step": 3986 }, { "epoch": 0.174264609467197, "grad_norm": 2.265625, "learning_rate": 9.272174604547172e-05, "loss": 2.1792, "step": 3987 }, { "epoch": 0.1743083176712269, "grad_norm": 2.015625, "learning_rate": 9.271817696652688e-05, "loss": 1.598, "step": 3988 }, { "epoch": 0.17435202587525678, "grad_norm": 2.140625, "learning_rate": 9.271460708142787e-05, "loss": 2.1333, "step": 3989 }, { "epoch": 0.17439573407928669, "grad_norm": 2.40625, "learning_rate": 9.271103639024204e-05, "loss": 2.2194, "step": 3990 }, { "epoch": 0.17443944228331656, "grad_norm": 2.421875, "learning_rate": 9.27074648930368e-05, "loss": 2.4873, "step": 3991 }, { "epoch": 0.17448315048734647, "grad_norm": 2.375, "learning_rate": 9.270389258987956e-05, "loss": 1.8827, "step": 3992 }, { "epoch": 0.17452685869137638, "grad_norm": 3.28125, "learning_rate": 9.270031948083769e-05, "loss": 2.8426, "step": 3993 }, { "epoch": 0.17457056689540626, "grad_norm": 2.59375, "learning_rate": 9.269674556597865e-05, "loss": 2.0483, "step": 3994 }, { "epoch": 0.17461427509943617, "grad_norm": 2.65625, "learning_rate": 9.269317084536988e-05, "loss": 2.9369, "step": 3995 }, { "epoch": 0.17465798330346605, "grad_norm": 2.0, "learning_rate": 9.268959531907883e-05, "loss": 1.7993, "step": 3996 }, { "epoch": 0.17470169150749595, "grad_norm": 2.328125, "learning_rate": 9.268601898717299e-05, "loss": 2.6471, "step": 3997 }, { "epoch": 0.17474539971152586, "grad_norm": 2.0, "learning_rate": 9.268244184971984e-05, "loss": 1.9124, "step": 3998 }, { "epoch": 0.17478910791555574, "grad_norm": 2.265625, "learning_rate": 9.267886390678691e-05, "loss": 1.875, "step": 3999 }, { "epoch": 0.17483281611958565, "grad_norm": 4.75, "learning_rate": 9.267528515844168e-05, "loss": 1.9821, "step": 4000 }, { "epoch": 0.17487652432361556, "grad_norm": 2.296875, "learning_rate": 9.267170560475172e-05, "loss": 1.5852, "step": 4001 }, { "epoch": 0.17492023252764544, "grad_norm": 2.546875, "learning_rate": 9.266812524578457e-05, "loss": 2.142, "step": 4002 }, { "epoch": 0.17496394073167534, "grad_norm": 2.296875, "learning_rate": 9.266454408160779e-05, "loss": 1.7585, "step": 4003 }, { "epoch": 0.17500764893570522, "grad_norm": 2.234375, "learning_rate": 9.266096211228896e-05, "loss": 2.1145, "step": 4004 }, { "epoch": 0.17505135713973513, "grad_norm": 2.3125, "learning_rate": 9.265737933789571e-05, "loss": 1.99, "step": 4005 }, { "epoch": 0.17509506534376504, "grad_norm": 2.328125, "learning_rate": 9.26537957584956e-05, "loss": 2.156, "step": 4006 }, { "epoch": 0.17513877354779492, "grad_norm": 2.84375, "learning_rate": 9.26502113741563e-05, "loss": 1.9717, "step": 4007 }, { "epoch": 0.17518248175182483, "grad_norm": 2.09375, "learning_rate": 9.264662618494544e-05, "loss": 2.2891, "step": 4008 }, { "epoch": 0.1752261899558547, "grad_norm": 2.5625, "learning_rate": 9.264304019093066e-05, "loss": 2.4795, "step": 4009 }, { "epoch": 0.1752698981598846, "grad_norm": 2.296875, "learning_rate": 9.263945339217967e-05, "loss": 2.0334, "step": 4010 }, { "epoch": 0.17531360636391452, "grad_norm": 2.09375, "learning_rate": 9.263586578876011e-05, "loss": 1.9267, "step": 4011 }, { "epoch": 0.1753573145679444, "grad_norm": 1.8984375, "learning_rate": 9.263227738073973e-05, "loss": 1.7599, "step": 4012 }, { "epoch": 0.1754010227719743, "grad_norm": 6.71875, "learning_rate": 9.262868816818622e-05, "loss": 2.0304, "step": 4013 }, { "epoch": 0.1754447309760042, "grad_norm": 2.390625, "learning_rate": 9.262509815116732e-05, "loss": 1.9665, "step": 4014 }, { "epoch": 0.1754884391800341, "grad_norm": 2.734375, "learning_rate": 9.262150732975078e-05, "loss": 2.1552, "step": 4015 }, { "epoch": 0.175532147384064, "grad_norm": 2.203125, "learning_rate": 9.261791570400436e-05, "loss": 2.0909, "step": 4016 }, { "epoch": 0.17557585558809388, "grad_norm": 2.609375, "learning_rate": 9.261432327399583e-05, "loss": 2.7963, "step": 4017 }, { "epoch": 0.1756195637921238, "grad_norm": 2.03125, "learning_rate": 9.261073003979303e-05, "loss": 1.868, "step": 4018 }, { "epoch": 0.17566327199615367, "grad_norm": 2.65625, "learning_rate": 9.260713600146373e-05, "loss": 2.179, "step": 4019 }, { "epoch": 0.17570698020018358, "grad_norm": 2.296875, "learning_rate": 9.260354115907574e-05, "loss": 2.1183, "step": 4020 }, { "epoch": 0.17575068840421348, "grad_norm": 2.5, "learning_rate": 9.259994551269694e-05, "loss": 1.9912, "step": 4021 }, { "epoch": 0.17579439660824336, "grad_norm": 2.734375, "learning_rate": 9.259634906239516e-05, "loss": 1.9105, "step": 4022 }, { "epoch": 0.17583810481227327, "grad_norm": 1.984375, "learning_rate": 9.259275180823829e-05, "loss": 1.8473, "step": 4023 }, { "epoch": 0.17588181301630315, "grad_norm": 2.28125, "learning_rate": 9.258915375029418e-05, "loss": 1.7336, "step": 4024 }, { "epoch": 0.17592552122033306, "grad_norm": 1.9921875, "learning_rate": 9.258555488863078e-05, "loss": 2.0827, "step": 4025 }, { "epoch": 0.17596922942436297, "grad_norm": 2.4375, "learning_rate": 9.258195522331596e-05, "loss": 2.2803, "step": 4026 }, { "epoch": 0.17601293762839285, "grad_norm": 3.234375, "learning_rate": 9.257835475441768e-05, "loss": 2.1713, "step": 4027 }, { "epoch": 0.17605664583242275, "grad_norm": 2.65625, "learning_rate": 9.257475348200387e-05, "loss": 2.4612, "step": 4028 }, { "epoch": 0.17610035403645263, "grad_norm": 2.0, "learning_rate": 9.25711514061425e-05, "loss": 1.716, "step": 4029 }, { "epoch": 0.17614406224048254, "grad_norm": 2.578125, "learning_rate": 9.256754852690152e-05, "loss": 2.4852, "step": 4030 }, { "epoch": 0.17618777044451245, "grad_norm": 3.703125, "learning_rate": 9.256394484434899e-05, "loss": 2.4831, "step": 4031 }, { "epoch": 0.17623147864854233, "grad_norm": 2.5625, "learning_rate": 9.256034035855283e-05, "loss": 2.3615, "step": 4032 }, { "epoch": 0.17627518685257224, "grad_norm": 2.578125, "learning_rate": 9.255673506958113e-05, "loss": 2.2626, "step": 4033 }, { "epoch": 0.17631889505660212, "grad_norm": 2.5625, "learning_rate": 9.255312897750189e-05, "loss": 1.6607, "step": 4034 }, { "epoch": 0.17636260326063202, "grad_norm": 2.78125, "learning_rate": 9.254952208238318e-05, "loss": 1.9265, "step": 4035 }, { "epoch": 0.17640631146466193, "grad_norm": 2.421875, "learning_rate": 9.254591438429306e-05, "loss": 2.1257, "step": 4036 }, { "epoch": 0.1764500196686918, "grad_norm": 5.40625, "learning_rate": 9.25423058832996e-05, "loss": 2.5334, "step": 4037 }, { "epoch": 0.17649372787272172, "grad_norm": 3.359375, "learning_rate": 9.253869657947092e-05, "loss": 2.8139, "step": 4038 }, { "epoch": 0.1765374360767516, "grad_norm": 2.125, "learning_rate": 9.253508647287512e-05, "loss": 1.7314, "step": 4039 }, { "epoch": 0.1765811442807815, "grad_norm": 2.09375, "learning_rate": 9.253147556358034e-05, "loss": 1.8548, "step": 4040 }, { "epoch": 0.1766248524848114, "grad_norm": 2.375, "learning_rate": 9.252786385165471e-05, "loss": 2.5766, "step": 4041 }, { "epoch": 0.1766685606888413, "grad_norm": 2.625, "learning_rate": 9.25242513371664e-05, "loss": 1.997, "step": 4042 }, { "epoch": 0.1767122688928712, "grad_norm": 2.578125, "learning_rate": 9.252063802018356e-05, "loss": 2.0024, "step": 4043 }, { "epoch": 0.17675597709690108, "grad_norm": 2.578125, "learning_rate": 9.251702390077441e-05, "loss": 2.189, "step": 4044 }, { "epoch": 0.176799685300931, "grad_norm": 2.015625, "learning_rate": 9.251340897900713e-05, "loss": 2.05, "step": 4045 }, { "epoch": 0.1768433935049609, "grad_norm": 2.296875, "learning_rate": 9.250979325494995e-05, "loss": 1.6734, "step": 4046 }, { "epoch": 0.17688710170899077, "grad_norm": 2.078125, "learning_rate": 9.250617672867108e-05, "loss": 2.1073, "step": 4047 }, { "epoch": 0.17693080991302068, "grad_norm": 2.265625, "learning_rate": 9.250255940023882e-05, "loss": 1.8738, "step": 4048 }, { "epoch": 0.17697451811705056, "grad_norm": 2.203125, "learning_rate": 9.24989412697214e-05, "loss": 1.796, "step": 4049 }, { "epoch": 0.17701822632108047, "grad_norm": 7.3125, "learning_rate": 9.24953223371871e-05, "loss": 1.5202, "step": 4050 }, { "epoch": 0.17706193452511038, "grad_norm": 2.828125, "learning_rate": 9.249170260270421e-05, "loss": 3.0731, "step": 4051 }, { "epoch": 0.17710564272914026, "grad_norm": 2.25, "learning_rate": 9.248808206634105e-05, "loss": 1.5085, "step": 4052 }, { "epoch": 0.17714935093317016, "grad_norm": 2.328125, "learning_rate": 9.248446072816595e-05, "loss": 1.917, "step": 4053 }, { "epoch": 0.17719305913720004, "grad_norm": 2.359375, "learning_rate": 9.248083858824725e-05, "loss": 2.1638, "step": 4054 }, { "epoch": 0.17723676734122995, "grad_norm": 2.046875, "learning_rate": 9.247721564665329e-05, "loss": 1.7179, "step": 4055 }, { "epoch": 0.17728047554525986, "grad_norm": 2.3125, "learning_rate": 9.247359190345243e-05, "loss": 1.8676, "step": 4056 }, { "epoch": 0.17732418374928974, "grad_norm": 2.484375, "learning_rate": 9.24699673587131e-05, "loss": 1.9322, "step": 4057 }, { "epoch": 0.17736789195331965, "grad_norm": 2.09375, "learning_rate": 9.246634201250366e-05, "loss": 1.9405, "step": 4058 }, { "epoch": 0.17741160015734952, "grad_norm": 2.203125, "learning_rate": 9.246271586489255e-05, "loss": 1.8649, "step": 4059 }, { "epoch": 0.17745530836137943, "grad_norm": 2.515625, "learning_rate": 9.245908891594818e-05, "loss": 2.0666, "step": 4060 }, { "epoch": 0.17749901656540934, "grad_norm": 2.1875, "learning_rate": 9.245546116573901e-05, "loss": 1.7037, "step": 4061 }, { "epoch": 0.17754272476943922, "grad_norm": 2.21875, "learning_rate": 9.245183261433349e-05, "loss": 1.4171, "step": 4062 }, { "epoch": 0.17758643297346913, "grad_norm": 2.828125, "learning_rate": 9.244820326180011e-05, "loss": 1.9897, "step": 4063 }, { "epoch": 0.177630141177499, "grad_norm": 4.59375, "learning_rate": 9.244457310820736e-05, "loss": 1.8852, "step": 4064 }, { "epoch": 0.17767384938152891, "grad_norm": 2.3125, "learning_rate": 9.244094215362373e-05, "loss": 1.8307, "step": 4065 }, { "epoch": 0.17771755758555882, "grad_norm": 3.140625, "learning_rate": 9.243731039811775e-05, "loss": 3.5239, "step": 4066 }, { "epoch": 0.1777612657895887, "grad_norm": 2.28125, "learning_rate": 9.243367784175796e-05, "loss": 2.6461, "step": 4067 }, { "epoch": 0.1778049739936186, "grad_norm": 2.671875, "learning_rate": 9.243004448461293e-05, "loss": 2.6207, "step": 4068 }, { "epoch": 0.1778486821976485, "grad_norm": 2.234375, "learning_rate": 9.242641032675117e-05, "loss": 1.9232, "step": 4069 }, { "epoch": 0.1778923904016784, "grad_norm": 2.0625, "learning_rate": 9.242277536824134e-05, "loss": 1.782, "step": 4070 }, { "epoch": 0.1779360986057083, "grad_norm": 2.859375, "learning_rate": 9.241913960915197e-05, "loss": 1.5369, "step": 4071 }, { "epoch": 0.17797980680973818, "grad_norm": 5.34375, "learning_rate": 9.241550304955168e-05, "loss": 2.1154, "step": 4072 }, { "epoch": 0.1780235150137681, "grad_norm": 2.3125, "learning_rate": 9.241186568950915e-05, "loss": 2.0227, "step": 4073 }, { "epoch": 0.17806722321779797, "grad_norm": 2.546875, "learning_rate": 9.240822752909298e-05, "loss": 2.1406, "step": 4074 }, { "epoch": 0.17811093142182788, "grad_norm": 2.140625, "learning_rate": 9.240458856837182e-05, "loss": 2.0179, "step": 4075 }, { "epoch": 0.17815463962585779, "grad_norm": 2.625, "learning_rate": 9.240094880741437e-05, "loss": 2.2995, "step": 4076 }, { "epoch": 0.17819834782988767, "grad_norm": 2.046875, "learning_rate": 9.23973082462893e-05, "loss": 1.9567, "step": 4077 }, { "epoch": 0.17824205603391757, "grad_norm": 2.1875, "learning_rate": 9.23936668850653e-05, "loss": 2.1861, "step": 4078 }, { "epoch": 0.17828576423794745, "grad_norm": 2.171875, "learning_rate": 9.239002472381113e-05, "loss": 1.9636, "step": 4079 }, { "epoch": 0.17832947244197736, "grad_norm": 2.171875, "learning_rate": 9.238638176259549e-05, "loss": 1.7762, "step": 4080 }, { "epoch": 0.17837318064600727, "grad_norm": 2.546875, "learning_rate": 9.238273800148712e-05, "loss": 1.7434, "step": 4081 }, { "epoch": 0.17841688885003715, "grad_norm": 2.546875, "learning_rate": 9.237909344055482e-05, "loss": 1.8037, "step": 4082 }, { "epoch": 0.17846059705406705, "grad_norm": 2.515625, "learning_rate": 9.237544807986733e-05, "loss": 2.0798, "step": 4083 }, { "epoch": 0.17850430525809693, "grad_norm": 2.25, "learning_rate": 9.237180191949347e-05, "loss": 2.1024, "step": 4084 }, { "epoch": 0.17854801346212684, "grad_norm": 2.4375, "learning_rate": 9.236815495950204e-05, "loss": 2.6516, "step": 4085 }, { "epoch": 0.17859172166615675, "grad_norm": 2.40625, "learning_rate": 9.236450719996185e-05, "loss": 1.988, "step": 4086 }, { "epoch": 0.17863542987018663, "grad_norm": 2.25, "learning_rate": 9.236085864094177e-05, "loss": 2.3926, "step": 4087 }, { "epoch": 0.17867913807421654, "grad_norm": 2.484375, "learning_rate": 9.235720928251063e-05, "loss": 2.2052, "step": 4088 }, { "epoch": 0.17872284627824642, "grad_norm": 2.375, "learning_rate": 9.235355912473729e-05, "loss": 1.9098, "step": 4089 }, { "epoch": 0.17876655448227632, "grad_norm": 2.953125, "learning_rate": 9.234990816769065e-05, "loss": 2.6546, "step": 4090 }, { "epoch": 0.17881026268630623, "grad_norm": 4.5625, "learning_rate": 9.23462564114396e-05, "loss": 2.1178, "step": 4091 }, { "epoch": 0.1788539708903361, "grad_norm": 2.4375, "learning_rate": 9.234260385605308e-05, "loss": 2.0182, "step": 4092 }, { "epoch": 0.17889767909436602, "grad_norm": 2.65625, "learning_rate": 9.233895050159999e-05, "loss": 2.1849, "step": 4093 }, { "epoch": 0.1789413872983959, "grad_norm": 2.234375, "learning_rate": 9.233529634814928e-05, "loss": 2.3742, "step": 4094 }, { "epoch": 0.1789850955024258, "grad_norm": 2.1875, "learning_rate": 9.23316413957699e-05, "loss": 2.0213, "step": 4095 }, { "epoch": 0.1790288037064557, "grad_norm": 2.109375, "learning_rate": 9.232798564453086e-05, "loss": 1.5629, "step": 4096 }, { "epoch": 0.1790725119104856, "grad_norm": 2.546875, "learning_rate": 9.23243290945011e-05, "loss": 1.5609, "step": 4097 }, { "epoch": 0.1791162201145155, "grad_norm": 2.59375, "learning_rate": 9.232067174574968e-05, "loss": 2.1387, "step": 4098 }, { "epoch": 0.17915992831854538, "grad_norm": 2.140625, "learning_rate": 9.231701359834557e-05, "loss": 2.1409, "step": 4099 }, { "epoch": 0.1792036365225753, "grad_norm": 2.84375, "learning_rate": 9.231335465235782e-05, "loss": 2.0755, "step": 4100 }, { "epoch": 0.1792473447266052, "grad_norm": 3.75, "learning_rate": 9.230969490785549e-05, "loss": 2.0952, "step": 4101 }, { "epoch": 0.17929105293063508, "grad_norm": 2.359375, "learning_rate": 9.230603436490763e-05, "loss": 1.9135, "step": 4102 }, { "epoch": 0.17933476113466498, "grad_norm": 2.578125, "learning_rate": 9.230237302358336e-05, "loss": 2.0219, "step": 4103 }, { "epoch": 0.17937846933869486, "grad_norm": 2.640625, "learning_rate": 9.22987108839517e-05, "loss": 1.9435, "step": 4104 }, { "epoch": 0.17942217754272477, "grad_norm": 3.25, "learning_rate": 9.229504794608182e-05, "loss": 1.6264, "step": 4105 }, { "epoch": 0.17946588574675468, "grad_norm": 2.328125, "learning_rate": 9.229138421004284e-05, "loss": 2.1426, "step": 4106 }, { "epoch": 0.17950959395078456, "grad_norm": 2.15625, "learning_rate": 9.228771967590388e-05, "loss": 1.7699, "step": 4107 }, { "epoch": 0.17955330215481446, "grad_norm": 2.140625, "learning_rate": 9.228405434373409e-05, "loss": 2.0044, "step": 4108 }, { "epoch": 0.17959701035884434, "grad_norm": 2.21875, "learning_rate": 9.228038821360268e-05, "loss": 1.7695, "step": 4109 }, { "epoch": 0.17964071856287425, "grad_norm": 2.828125, "learning_rate": 9.227672128557879e-05, "loss": 2.1878, "step": 4110 }, { "epoch": 0.17968442676690416, "grad_norm": 2.28125, "learning_rate": 9.227305355973163e-05, "loss": 2.2419, "step": 4111 }, { "epoch": 0.17972813497093404, "grad_norm": 2.09375, "learning_rate": 9.226938503613043e-05, "loss": 1.7856, "step": 4112 }, { "epoch": 0.17977184317496395, "grad_norm": 2.34375, "learning_rate": 9.226571571484442e-05, "loss": 1.968, "step": 4113 }, { "epoch": 0.17981555137899383, "grad_norm": 2.34375, "learning_rate": 9.226204559594284e-05, "loss": 2.1826, "step": 4114 }, { "epoch": 0.17985925958302373, "grad_norm": 2.234375, "learning_rate": 9.225837467949495e-05, "loss": 2.0379, "step": 4115 }, { "epoch": 0.17990296778705364, "grad_norm": 2.375, "learning_rate": 9.225470296557002e-05, "loss": 1.8321, "step": 4116 }, { "epoch": 0.17994667599108352, "grad_norm": 3.453125, "learning_rate": 9.225103045423735e-05, "loss": 1.9019, "step": 4117 }, { "epoch": 0.17999038419511343, "grad_norm": 2.515625, "learning_rate": 9.224735714556624e-05, "loss": 2.7345, "step": 4118 }, { "epoch": 0.1800340923991433, "grad_norm": 2.375, "learning_rate": 9.2243683039626e-05, "loss": 2.2188, "step": 4119 }, { "epoch": 0.18007780060317322, "grad_norm": 2.234375, "learning_rate": 9.224000813648602e-05, "loss": 1.8773, "step": 4120 }, { "epoch": 0.18012150880720312, "grad_norm": 2.0625, "learning_rate": 9.223633243621556e-05, "loss": 1.8854, "step": 4121 }, { "epoch": 0.180165217011233, "grad_norm": 2.109375, "learning_rate": 9.223265593888405e-05, "loss": 1.7813, "step": 4122 }, { "epoch": 0.1802089252152629, "grad_norm": 1.8671875, "learning_rate": 9.222897864456088e-05, "loss": 1.6252, "step": 4123 }, { "epoch": 0.1802526334192928, "grad_norm": 2.84375, "learning_rate": 9.22253005533154e-05, "loss": 1.9214, "step": 4124 }, { "epoch": 0.1802963416233227, "grad_norm": 2.5, "learning_rate": 9.222162166521704e-05, "loss": 2.6064, "step": 4125 }, { "epoch": 0.1803400498273526, "grad_norm": 2.546875, "learning_rate": 9.221794198033525e-05, "loss": 1.7756, "step": 4126 }, { "epoch": 0.18038375803138249, "grad_norm": 2.140625, "learning_rate": 9.221426149873942e-05, "loss": 1.7808, "step": 4127 }, { "epoch": 0.1804274662354124, "grad_norm": 2.703125, "learning_rate": 9.221058022049906e-05, "loss": 2.3092, "step": 4128 }, { "epoch": 0.18047117443944227, "grad_norm": 2.90625, "learning_rate": 9.220689814568359e-05, "loss": 1.8663, "step": 4129 }, { "epoch": 0.18051488264347218, "grad_norm": 3.015625, "learning_rate": 9.220321527436256e-05, "loss": 2.9884, "step": 4130 }, { "epoch": 0.1805585908475021, "grad_norm": 2.171875, "learning_rate": 9.21995316066054e-05, "loss": 1.7863, "step": 4131 }, { "epoch": 0.18060229905153197, "grad_norm": 2.53125, "learning_rate": 9.219584714248167e-05, "loss": 1.6175, "step": 4132 }, { "epoch": 0.18064600725556187, "grad_norm": 1.8984375, "learning_rate": 9.21921618820609e-05, "loss": 1.6722, "step": 4133 }, { "epoch": 0.18068971545959175, "grad_norm": 4.21875, "learning_rate": 9.21884758254126e-05, "loss": 1.5489, "step": 4134 }, { "epoch": 0.18073342366362166, "grad_norm": 2.671875, "learning_rate": 9.21847889726064e-05, "loss": 2.5853, "step": 4135 }, { "epoch": 0.18077713186765157, "grad_norm": 2.46875, "learning_rate": 9.218110132371182e-05, "loss": 2.0443, "step": 4136 }, { "epoch": 0.18082084007168145, "grad_norm": 3.390625, "learning_rate": 9.217741287879846e-05, "loss": 1.8777, "step": 4137 }, { "epoch": 0.18086454827571136, "grad_norm": 2.5, "learning_rate": 9.217372363793592e-05, "loss": 1.9894, "step": 4138 }, { "epoch": 0.18090825647974124, "grad_norm": 2.203125, "learning_rate": 9.217003360119386e-05, "loss": 1.6585, "step": 4139 }, { "epoch": 0.18095196468377114, "grad_norm": 2.53125, "learning_rate": 9.216634276864188e-05, "loss": 2.5412, "step": 4140 }, { "epoch": 0.18099567288780105, "grad_norm": 2.140625, "learning_rate": 9.216265114034964e-05, "loss": 2.1771, "step": 4141 }, { "epoch": 0.18103938109183093, "grad_norm": 2.71875, "learning_rate": 9.21589587163868e-05, "loss": 1.8523, "step": 4142 }, { "epoch": 0.18108308929586084, "grad_norm": 2.140625, "learning_rate": 9.215526549682307e-05, "loss": 2.1015, "step": 4143 }, { "epoch": 0.18112679749989072, "grad_norm": 3.4375, "learning_rate": 9.21515714817281e-05, "loss": 2.06, "step": 4144 }, { "epoch": 0.18117050570392063, "grad_norm": 2.828125, "learning_rate": 9.214787667117165e-05, "loss": 2.2972, "step": 4145 }, { "epoch": 0.18121421390795053, "grad_norm": 2.390625, "learning_rate": 9.214418106522342e-05, "loss": 2.0532, "step": 4146 }, { "epoch": 0.1812579221119804, "grad_norm": 2.390625, "learning_rate": 9.214048466395316e-05, "loss": 2.1041, "step": 4147 }, { "epoch": 0.18130163031601032, "grad_norm": 2.234375, "learning_rate": 9.21367874674306e-05, "loss": 1.9628, "step": 4148 }, { "epoch": 0.1813453385200402, "grad_norm": 2.28125, "learning_rate": 9.213308947572554e-05, "loss": 2.5201, "step": 4149 }, { "epoch": 0.1813890467240701, "grad_norm": 2.203125, "learning_rate": 9.212939068890778e-05, "loss": 1.8226, "step": 4150 }, { "epoch": 0.18143275492810002, "grad_norm": 2.5625, "learning_rate": 9.212569110704708e-05, "loss": 2.3515, "step": 4151 }, { "epoch": 0.1814764631321299, "grad_norm": 2.984375, "learning_rate": 9.212199073021329e-05, "loss": 1.9236, "step": 4152 }, { "epoch": 0.1815201713361598, "grad_norm": 2.125, "learning_rate": 9.211828955847622e-05, "loss": 1.9031, "step": 4153 }, { "epoch": 0.18156387954018968, "grad_norm": 2.34375, "learning_rate": 9.211458759190573e-05, "loss": 1.7923, "step": 4154 }, { "epoch": 0.1816075877442196, "grad_norm": 3.15625, "learning_rate": 9.211088483057168e-05, "loss": 2.4361, "step": 4155 }, { "epoch": 0.1816512959482495, "grad_norm": 2.328125, "learning_rate": 9.210718127454394e-05, "loss": 2.2487, "step": 4156 }, { "epoch": 0.18169500415227938, "grad_norm": 2.671875, "learning_rate": 9.210347692389241e-05, "loss": 1.8312, "step": 4157 }, { "epoch": 0.18173871235630928, "grad_norm": 3.21875, "learning_rate": 9.209977177868698e-05, "loss": 2.6821, "step": 4158 }, { "epoch": 0.18178242056033916, "grad_norm": 2.328125, "learning_rate": 9.209606583899759e-05, "loss": 1.7224, "step": 4159 }, { "epoch": 0.18182612876436907, "grad_norm": 2.546875, "learning_rate": 9.209235910489418e-05, "loss": 2.8291, "step": 4160 }, { "epoch": 0.18186983696839898, "grad_norm": 2.5, "learning_rate": 9.208865157644668e-05, "loss": 1.6297, "step": 4161 }, { "epoch": 0.18191354517242886, "grad_norm": 2.5, "learning_rate": 9.208494325372508e-05, "loss": 2.0138, "step": 4162 }, { "epoch": 0.18195725337645877, "grad_norm": 2.203125, "learning_rate": 9.208123413679933e-05, "loss": 2.2733, "step": 4163 }, { "epoch": 0.18200096158048865, "grad_norm": 3.125, "learning_rate": 9.207752422573946e-05, "loss": 1.977, "step": 4164 }, { "epoch": 0.18204466978451855, "grad_norm": 2.703125, "learning_rate": 9.207381352061545e-05, "loss": 2.1237, "step": 4165 }, { "epoch": 0.18208837798854846, "grad_norm": 2.78125, "learning_rate": 9.207010202149736e-05, "loss": 2.2442, "step": 4166 }, { "epoch": 0.18213208619257834, "grad_norm": 2.171875, "learning_rate": 9.206638972845522e-05, "loss": 1.8178, "step": 4167 }, { "epoch": 0.18217579439660825, "grad_norm": 2.484375, "learning_rate": 9.206267664155907e-05, "loss": 2.3336, "step": 4168 }, { "epoch": 0.18221950260063813, "grad_norm": 2.765625, "learning_rate": 9.205896276087899e-05, "loss": 1.9392, "step": 4169 }, { "epoch": 0.18226321080466804, "grad_norm": 2.34375, "learning_rate": 9.205524808648507e-05, "loss": 1.5458, "step": 4170 }, { "epoch": 0.18230691900869794, "grad_norm": 3.53125, "learning_rate": 9.205153261844741e-05, "loss": 2.8738, "step": 4171 }, { "epoch": 0.18235062721272782, "grad_norm": 2.828125, "learning_rate": 9.204781635683613e-05, "loss": 2.0505, "step": 4172 }, { "epoch": 0.18239433541675773, "grad_norm": 16.25, "learning_rate": 9.204409930172135e-05, "loss": 2.1514, "step": 4173 }, { "epoch": 0.1824380436207876, "grad_norm": 2.84375, "learning_rate": 9.204038145317324e-05, "loss": 1.4737, "step": 4174 }, { "epoch": 0.18248175182481752, "grad_norm": 2.46875, "learning_rate": 9.203666281126193e-05, "loss": 1.922, "step": 4175 }, { "epoch": 0.18252546002884742, "grad_norm": 2.203125, "learning_rate": 9.203294337605761e-05, "loss": 2.0385, "step": 4176 }, { "epoch": 0.1825691682328773, "grad_norm": 2.109375, "learning_rate": 9.202922314763048e-05, "loss": 1.6214, "step": 4177 }, { "epoch": 0.1826128764369072, "grad_norm": 2.078125, "learning_rate": 9.202550212605074e-05, "loss": 1.7539, "step": 4178 }, { "epoch": 0.1826565846409371, "grad_norm": 2.359375, "learning_rate": 9.202178031138862e-05, "loss": 1.9389, "step": 4179 }, { "epoch": 0.182700292844967, "grad_norm": 2.625, "learning_rate": 9.201805770371432e-05, "loss": 2.0368, "step": 4180 }, { "epoch": 0.1827440010489969, "grad_norm": 2.765625, "learning_rate": 9.201433430309813e-05, "loss": 1.7877, "step": 4181 }, { "epoch": 0.1827877092530268, "grad_norm": 2.625, "learning_rate": 9.20106101096103e-05, "loss": 2.8653, "step": 4182 }, { "epoch": 0.1828314174570567, "grad_norm": 2.46875, "learning_rate": 9.200688512332111e-05, "loss": 2.528, "step": 4183 }, { "epoch": 0.18287512566108657, "grad_norm": 3.671875, "learning_rate": 9.200315934430088e-05, "loss": 1.9375, "step": 4184 }, { "epoch": 0.18291883386511648, "grad_norm": 2.03125, "learning_rate": 9.199943277261989e-05, "loss": 1.5832, "step": 4185 }, { "epoch": 0.1829625420691464, "grad_norm": 2.484375, "learning_rate": 9.199570540834846e-05, "loss": 2.6069, "step": 4186 }, { "epoch": 0.18300625027317627, "grad_norm": 3.375, "learning_rate": 9.199197725155697e-05, "loss": 2.8072, "step": 4187 }, { "epoch": 0.18304995847720618, "grad_norm": 2.5, "learning_rate": 9.198824830231573e-05, "loss": 2.2208, "step": 4188 }, { "epoch": 0.18309366668123606, "grad_norm": 2.234375, "learning_rate": 9.198451856069515e-05, "loss": 2.1902, "step": 4189 }, { "epoch": 0.18313737488526596, "grad_norm": 3.59375, "learning_rate": 9.19807880267656e-05, "loss": 2.2622, "step": 4190 }, { "epoch": 0.18318108308929587, "grad_norm": 2.5625, "learning_rate": 9.197705670059747e-05, "loss": 2.5455, "step": 4191 }, { "epoch": 0.18322479129332575, "grad_norm": 2.21875, "learning_rate": 9.197332458226118e-05, "loss": 1.8177, "step": 4192 }, { "epoch": 0.18326849949735566, "grad_norm": 2.125, "learning_rate": 9.196959167182719e-05, "loss": 2.3912, "step": 4193 }, { "epoch": 0.18331220770138554, "grad_norm": 2.3125, "learning_rate": 9.19658579693659e-05, "loss": 1.7873, "step": 4194 }, { "epoch": 0.18335591590541545, "grad_norm": 2.96875, "learning_rate": 9.196212347494781e-05, "loss": 2.2754, "step": 4195 }, { "epoch": 0.18339962410944535, "grad_norm": 2.515625, "learning_rate": 9.195838818864337e-05, "loss": 2.2531, "step": 4196 }, { "epoch": 0.18344333231347523, "grad_norm": 2.46875, "learning_rate": 9.195465211052306e-05, "loss": 1.9503, "step": 4197 }, { "epoch": 0.18348704051750514, "grad_norm": 2.015625, "learning_rate": 9.195091524065742e-05, "loss": 1.4529, "step": 4198 }, { "epoch": 0.18353074872153502, "grad_norm": 1.96875, "learning_rate": 9.194717757911694e-05, "loss": 1.9256, "step": 4199 }, { "epoch": 0.18357445692556493, "grad_norm": 1.953125, "learning_rate": 9.194343912597218e-05, "loss": 2.1218, "step": 4200 }, { "epoch": 0.18361816512959483, "grad_norm": 2.140625, "learning_rate": 9.193969988129367e-05, "loss": 2.0301, "step": 4201 }, { "epoch": 0.18366187333362471, "grad_norm": 2.3125, "learning_rate": 9.1935959845152e-05, "loss": 2.0649, "step": 4202 }, { "epoch": 0.18370558153765462, "grad_norm": 5.375, "learning_rate": 9.193221901761772e-05, "loss": 3.2402, "step": 4203 }, { "epoch": 0.1837492897416845, "grad_norm": 7.09375, "learning_rate": 9.192847739876142e-05, "loss": 2.1486, "step": 4204 }, { "epoch": 0.1837929979457144, "grad_norm": 2.28125, "learning_rate": 9.192473498865376e-05, "loss": 1.6001, "step": 4205 }, { "epoch": 0.18383670614974432, "grad_norm": 2.3125, "learning_rate": 9.192099178736532e-05, "loss": 1.9152, "step": 4206 }, { "epoch": 0.1838804143537742, "grad_norm": 4.28125, "learning_rate": 9.191724779496675e-05, "loss": 1.463, "step": 4207 }, { "epoch": 0.1839241225578041, "grad_norm": 2.875, "learning_rate": 9.19135030115287e-05, "loss": 1.755, "step": 4208 }, { "epoch": 0.18396783076183398, "grad_norm": 2.3125, "learning_rate": 9.190975743712184e-05, "loss": 1.781, "step": 4209 }, { "epoch": 0.1840115389658639, "grad_norm": 2.203125, "learning_rate": 9.19060110718169e-05, "loss": 2.0729, "step": 4210 }, { "epoch": 0.1840552471698938, "grad_norm": 2.40625, "learning_rate": 9.19022639156845e-05, "loss": 1.388, "step": 4211 }, { "epoch": 0.18409895537392368, "grad_norm": 2.171875, "learning_rate": 9.18985159687954e-05, "loss": 1.774, "step": 4212 }, { "epoch": 0.18414266357795359, "grad_norm": 2.171875, "learning_rate": 9.189476723122034e-05, "loss": 1.8746, "step": 4213 }, { "epoch": 0.18418637178198347, "grad_norm": 2.359375, "learning_rate": 9.189101770303003e-05, "loss": 1.9607, "step": 4214 }, { "epoch": 0.18423007998601337, "grad_norm": 2.125, "learning_rate": 9.188726738429526e-05, "loss": 1.4984, "step": 4215 }, { "epoch": 0.18427378819004328, "grad_norm": 2.046875, "learning_rate": 9.188351627508678e-05, "loss": 1.7226, "step": 4216 }, { "epoch": 0.18431749639407316, "grad_norm": 2.359375, "learning_rate": 9.187976437547538e-05, "loss": 2.1207, "step": 4217 }, { "epoch": 0.18436120459810307, "grad_norm": 2.578125, "learning_rate": 9.187601168553191e-05, "loss": 2.0887, "step": 4218 }, { "epoch": 0.18440491280213295, "grad_norm": 2.234375, "learning_rate": 9.187225820532712e-05, "loss": 2.0781, "step": 4219 }, { "epoch": 0.18444862100616286, "grad_norm": 2.1875, "learning_rate": 9.186850393493188e-05, "loss": 2.0583, "step": 4220 }, { "epoch": 0.18449232921019276, "grad_norm": 2.34375, "learning_rate": 9.186474887441704e-05, "loss": 2.3583, "step": 4221 }, { "epoch": 0.18453603741422264, "grad_norm": 2.515625, "learning_rate": 9.186099302385344e-05, "loss": 2.019, "step": 4222 }, { "epoch": 0.18457974561825255, "grad_norm": 2.515625, "learning_rate": 9.185723638331201e-05, "loss": 2.1852, "step": 4223 }, { "epoch": 0.18462345382228243, "grad_norm": 2.359375, "learning_rate": 9.185347895286358e-05, "loss": 1.8512, "step": 4224 }, { "epoch": 0.18466716202631234, "grad_norm": 1.859375, "learning_rate": 9.184972073257911e-05, "loss": 1.7002, "step": 4225 }, { "epoch": 0.18471087023034224, "grad_norm": 2.703125, "learning_rate": 9.184596172252948e-05, "loss": 1.9231, "step": 4226 }, { "epoch": 0.18475457843437212, "grad_norm": 2.0625, "learning_rate": 9.184220192278565e-05, "loss": 1.7761, "step": 4227 }, { "epoch": 0.18479828663840203, "grad_norm": 2.671875, "learning_rate": 9.183844133341859e-05, "loss": 1.9647, "step": 4228 }, { "epoch": 0.1848419948424319, "grad_norm": 2.03125, "learning_rate": 9.183467995449924e-05, "loss": 1.9758, "step": 4229 }, { "epoch": 0.18488570304646182, "grad_norm": 2.0625, "learning_rate": 9.183091778609858e-05, "loss": 2.1707, "step": 4230 }, { "epoch": 0.18492941125049173, "grad_norm": 2.328125, "learning_rate": 9.182715482828763e-05, "loss": 2.0485, "step": 4231 }, { "epoch": 0.1849731194545216, "grad_norm": 2.546875, "learning_rate": 9.182339108113738e-05, "loss": 1.9269, "step": 4232 }, { "epoch": 0.1850168276585515, "grad_norm": 3.28125, "learning_rate": 9.181962654471888e-05, "loss": 2.4354, "step": 4233 }, { "epoch": 0.1850605358625814, "grad_norm": 2.21875, "learning_rate": 9.181586121910317e-05, "loss": 2.3555, "step": 4234 }, { "epoch": 0.1851042440666113, "grad_norm": 2.953125, "learning_rate": 9.181209510436128e-05, "loss": 2.3343, "step": 4235 }, { "epoch": 0.1851479522706412, "grad_norm": 2.015625, "learning_rate": 9.180832820056431e-05, "loss": 1.7184, "step": 4236 }, { "epoch": 0.1851916604746711, "grad_norm": 2.0, "learning_rate": 9.180456050778334e-05, "loss": 1.6113, "step": 4237 }, { "epoch": 0.185235368678701, "grad_norm": 2.421875, "learning_rate": 9.180079202608947e-05, "loss": 1.749, "step": 4238 }, { "epoch": 0.18527907688273088, "grad_norm": 2.3125, "learning_rate": 9.179702275555382e-05, "loss": 2.1112, "step": 4239 }, { "epoch": 0.18532278508676078, "grad_norm": 2.453125, "learning_rate": 9.17932526962475e-05, "loss": 1.8853, "step": 4240 }, { "epoch": 0.1853664932907907, "grad_norm": 2.53125, "learning_rate": 9.178948184824168e-05, "loss": 2.6173, "step": 4241 }, { "epoch": 0.18541020149482057, "grad_norm": 2.9375, "learning_rate": 9.178571021160753e-05, "loss": 2.3146, "step": 4242 }, { "epoch": 0.18545390969885048, "grad_norm": 3.125, "learning_rate": 9.17819377864162e-05, "loss": 2.4007, "step": 4243 }, { "epoch": 0.18549761790288036, "grad_norm": 2.875, "learning_rate": 9.17781645727389e-05, "loss": 2.725, "step": 4244 }, { "epoch": 0.18554132610691026, "grad_norm": 2.703125, "learning_rate": 9.177439057064683e-05, "loss": 2.5454, "step": 4245 }, { "epoch": 0.18558503431094017, "grad_norm": 2.703125, "learning_rate": 9.17706157802112e-05, "loss": 1.9567, "step": 4246 }, { "epoch": 0.18562874251497005, "grad_norm": 3.0, "learning_rate": 9.176684020150326e-05, "loss": 2.2343, "step": 4247 }, { "epoch": 0.18567245071899996, "grad_norm": 2.6875, "learning_rate": 9.176306383459426e-05, "loss": 2.1333, "step": 4248 }, { "epoch": 0.18571615892302984, "grad_norm": 2.578125, "learning_rate": 9.175928667955546e-05, "loss": 1.7027, "step": 4249 }, { "epoch": 0.18575986712705975, "grad_norm": 3.125, "learning_rate": 9.175550873645816e-05, "loss": 2.0228, "step": 4250 }, { "epoch": 0.18580357533108965, "grad_norm": 2.796875, "learning_rate": 9.175173000537361e-05, "loss": 2.6521, "step": 4251 }, { "epoch": 0.18584728353511953, "grad_norm": 2.15625, "learning_rate": 9.174795048637316e-05, "loss": 2.2313, "step": 4252 }, { "epoch": 0.18589099173914944, "grad_norm": 2.9375, "learning_rate": 9.174417017952812e-05, "loss": 2.1287, "step": 4253 }, { "epoch": 0.18593469994317932, "grad_norm": 2.0625, "learning_rate": 9.174038908490984e-05, "loss": 1.7118, "step": 4254 }, { "epoch": 0.18597840814720923, "grad_norm": 2.171875, "learning_rate": 9.173660720258966e-05, "loss": 1.589, "step": 4255 }, { "epoch": 0.18602211635123914, "grad_norm": 2.21875, "learning_rate": 9.173282453263897e-05, "loss": 1.8921, "step": 4256 }, { "epoch": 0.18606582455526902, "grad_norm": 2.546875, "learning_rate": 9.172904107512911e-05, "loss": 1.9779, "step": 4257 }, { "epoch": 0.18610953275929892, "grad_norm": 2.734375, "learning_rate": 9.172525683013155e-05, "loss": 2.7427, "step": 4258 }, { "epoch": 0.1861532409633288, "grad_norm": 2.734375, "learning_rate": 9.172147179771765e-05, "loss": 2.3742, "step": 4259 }, { "epoch": 0.1861969491673587, "grad_norm": 2.28125, "learning_rate": 9.171768597795885e-05, "loss": 2.1021, "step": 4260 }, { "epoch": 0.18624065737138862, "grad_norm": 2.703125, "learning_rate": 9.17138993709266e-05, "loss": 2.4938, "step": 4261 }, { "epoch": 0.1862843655754185, "grad_norm": 20.25, "learning_rate": 9.171011197669236e-05, "loss": 1.6165, "step": 4262 }, { "epoch": 0.1863280737794484, "grad_norm": 2.453125, "learning_rate": 9.170632379532759e-05, "loss": 1.7445, "step": 4263 }, { "epoch": 0.18637178198347829, "grad_norm": 2.234375, "learning_rate": 9.17025348269038e-05, "loss": 1.8762, "step": 4264 }, { "epoch": 0.1864154901875082, "grad_norm": 2.484375, "learning_rate": 9.16987450714925e-05, "loss": 2.254, "step": 4265 }, { "epoch": 0.1864591983915381, "grad_norm": 3.203125, "learning_rate": 9.169495452916516e-05, "loss": 3.2139, "step": 4266 }, { "epoch": 0.18650290659556798, "grad_norm": 2.25, "learning_rate": 9.169116319999336e-05, "loss": 2.2422, "step": 4267 }, { "epoch": 0.1865466147995979, "grad_norm": 3.34375, "learning_rate": 9.168737108404864e-05, "loss": 1.9404, "step": 4268 }, { "epoch": 0.18659032300362777, "grad_norm": 2.703125, "learning_rate": 9.168357818140255e-05, "loss": 1.9761, "step": 4269 }, { "epoch": 0.18663403120765767, "grad_norm": 2.484375, "learning_rate": 9.167978449212666e-05, "loss": 2.1656, "step": 4270 }, { "epoch": 0.18667773941168758, "grad_norm": 2.359375, "learning_rate": 9.167599001629257e-05, "loss": 2.4656, "step": 4271 }, { "epoch": 0.18672144761571746, "grad_norm": 2.890625, "learning_rate": 9.167219475397191e-05, "loss": 2.2688, "step": 4272 }, { "epoch": 0.18676515581974737, "grad_norm": 2.875, "learning_rate": 9.166839870523627e-05, "loss": 1.9322, "step": 4273 }, { "epoch": 0.18680886402377728, "grad_norm": 2.453125, "learning_rate": 9.16646018701573e-05, "loss": 1.6503, "step": 4274 }, { "epoch": 0.18685257222780716, "grad_norm": 2.328125, "learning_rate": 9.166080424880666e-05, "loss": 1.9766, "step": 4275 }, { "epoch": 0.18689628043183706, "grad_norm": 2.390625, "learning_rate": 9.165700584125601e-05, "loss": 2.035, "step": 4276 }, { "epoch": 0.18693998863586694, "grad_norm": 2.546875, "learning_rate": 9.165320664757705e-05, "loss": 2.16, "step": 4277 }, { "epoch": 0.18698369683989685, "grad_norm": 2.453125, "learning_rate": 9.164940666784143e-05, "loss": 1.9903, "step": 4278 }, { "epoch": 0.18702740504392676, "grad_norm": 2.171875, "learning_rate": 9.164560590212088e-05, "loss": 1.7399, "step": 4279 }, { "epoch": 0.18707111324795664, "grad_norm": 2.640625, "learning_rate": 9.164180435048715e-05, "loss": 2.0793, "step": 4280 }, { "epoch": 0.18711482145198655, "grad_norm": 6.625, "learning_rate": 9.163800201301197e-05, "loss": 2.8651, "step": 4281 }, { "epoch": 0.18715852965601643, "grad_norm": 2.625, "learning_rate": 9.163419888976708e-05, "loss": 2.162, "step": 4282 }, { "epoch": 0.18720223786004633, "grad_norm": 2.0625, "learning_rate": 9.163039498082428e-05, "loss": 1.6775, "step": 4283 }, { "epoch": 0.18724594606407624, "grad_norm": 2.265625, "learning_rate": 9.162659028625531e-05, "loss": 1.9262, "step": 4284 }, { "epoch": 0.18728965426810612, "grad_norm": 2.625, "learning_rate": 9.162278480613203e-05, "loss": 1.9239, "step": 4285 }, { "epoch": 0.18733336247213603, "grad_norm": 2.40625, "learning_rate": 9.161897854052619e-05, "loss": 1.6802, "step": 4286 }, { "epoch": 0.1873770706761659, "grad_norm": 2.265625, "learning_rate": 9.161517148950967e-05, "loss": 1.9088, "step": 4287 }, { "epoch": 0.18742077888019582, "grad_norm": 2.171875, "learning_rate": 9.161136365315428e-05, "loss": 1.9722, "step": 4288 }, { "epoch": 0.18746448708422572, "grad_norm": 2.890625, "learning_rate": 9.160755503153192e-05, "loss": 1.692, "step": 4289 }, { "epoch": 0.1875081952882556, "grad_norm": 2.28125, "learning_rate": 9.16037456247144e-05, "loss": 1.5951, "step": 4290 }, { "epoch": 0.1875519034922855, "grad_norm": 2.890625, "learning_rate": 9.159993543277368e-05, "loss": 2.0796, "step": 4291 }, { "epoch": 0.1875956116963154, "grad_norm": 2.515625, "learning_rate": 9.159612445578163e-05, "loss": 1.3354, "step": 4292 }, { "epoch": 0.1876393199003453, "grad_norm": 2.65625, "learning_rate": 9.159231269381016e-05, "loss": 2.0206, "step": 4293 }, { "epoch": 0.1876830281043752, "grad_norm": 2.9375, "learning_rate": 9.158850014693123e-05, "loss": 1.9006, "step": 4294 }, { "epoch": 0.18772673630840508, "grad_norm": 3.078125, "learning_rate": 9.158468681521676e-05, "loss": 2.1584, "step": 4295 }, { "epoch": 0.187770444512435, "grad_norm": 2.171875, "learning_rate": 9.158087269873871e-05, "loss": 1.8828, "step": 4296 }, { "epoch": 0.18781415271646487, "grad_norm": 2.296875, "learning_rate": 9.15770577975691e-05, "loss": 1.9622, "step": 4297 }, { "epoch": 0.18785786092049478, "grad_norm": 2.203125, "learning_rate": 9.157324211177991e-05, "loss": 1.7722, "step": 4298 }, { "epoch": 0.1879015691245247, "grad_norm": 2.84375, "learning_rate": 9.15694256414431e-05, "loss": 2.0762, "step": 4299 }, { "epoch": 0.18794527732855457, "grad_norm": 2.15625, "learning_rate": 9.156560838663076e-05, "loss": 2.1648, "step": 4300 }, { "epoch": 0.18798898553258447, "grad_norm": 2.15625, "learning_rate": 9.156179034741486e-05, "loss": 1.9935, "step": 4301 }, { "epoch": 0.18803269373661435, "grad_norm": 2.21875, "learning_rate": 9.155797152386752e-05, "loss": 1.7464, "step": 4302 }, { "epoch": 0.18807640194064426, "grad_norm": 2.921875, "learning_rate": 9.155415191606074e-05, "loss": 1.6097, "step": 4303 }, { "epoch": 0.18812011014467417, "grad_norm": 2.078125, "learning_rate": 9.155033152406665e-05, "loss": 1.9678, "step": 4304 }, { "epoch": 0.18816381834870405, "grad_norm": 2.265625, "learning_rate": 9.154651034795734e-05, "loss": 1.6927, "step": 4305 }, { "epoch": 0.18820752655273396, "grad_norm": 3.046875, "learning_rate": 9.154268838780489e-05, "loss": 2.6745, "step": 4306 }, { "epoch": 0.18825123475676384, "grad_norm": 2.375, "learning_rate": 9.153886564368145e-05, "loss": 2.1593, "step": 4307 }, { "epoch": 0.18829494296079374, "grad_norm": 2.0, "learning_rate": 9.153504211565917e-05, "loss": 1.9622, "step": 4308 }, { "epoch": 0.18833865116482365, "grad_norm": 2.078125, "learning_rate": 9.15312178038102e-05, "loss": 1.9805, "step": 4309 }, { "epoch": 0.18838235936885353, "grad_norm": 2.265625, "learning_rate": 9.15273927082067e-05, "loss": 1.6238, "step": 4310 }, { "epoch": 0.18842606757288344, "grad_norm": 2.390625, "learning_rate": 9.152356682892085e-05, "loss": 1.8822, "step": 4311 }, { "epoch": 0.18846977577691332, "grad_norm": 2.515625, "learning_rate": 9.151974016602485e-05, "loss": 1.5081, "step": 4312 }, { "epoch": 0.18851348398094323, "grad_norm": 2.46875, "learning_rate": 9.151591271959094e-05, "loss": 1.7578, "step": 4313 }, { "epoch": 0.18855719218497313, "grad_norm": 2.171875, "learning_rate": 9.151208448969134e-05, "loss": 1.9479, "step": 4314 }, { "epoch": 0.188600900389003, "grad_norm": 2.671875, "learning_rate": 9.150825547639827e-05, "loss": 2.4475, "step": 4315 }, { "epoch": 0.18864460859303292, "grad_norm": 2.109375, "learning_rate": 9.150442567978402e-05, "loss": 1.4966, "step": 4316 }, { "epoch": 0.1886883167970628, "grad_norm": 1.953125, "learning_rate": 9.150059509992085e-05, "loss": 1.818, "step": 4317 }, { "epoch": 0.1887320250010927, "grad_norm": 2.359375, "learning_rate": 9.149676373688105e-05, "loss": 1.8557, "step": 4318 }, { "epoch": 0.18877573320512261, "grad_norm": 2.203125, "learning_rate": 9.149293159073691e-05, "loss": 1.636, "step": 4319 }, { "epoch": 0.1888194414091525, "grad_norm": 2.21875, "learning_rate": 9.148909866156076e-05, "loss": 2.0637, "step": 4320 }, { "epoch": 0.1888631496131824, "grad_norm": 2.171875, "learning_rate": 9.148526494942496e-05, "loss": 1.9292, "step": 4321 }, { "epoch": 0.18890685781721228, "grad_norm": 2.1875, "learning_rate": 9.14814304544018e-05, "loss": 1.9628, "step": 4322 }, { "epoch": 0.1889505660212422, "grad_norm": 2.3125, "learning_rate": 9.147759517656369e-05, "loss": 2.0658, "step": 4323 }, { "epoch": 0.1889942742252721, "grad_norm": 2.03125, "learning_rate": 9.147375911598299e-05, "loss": 1.3896, "step": 4324 }, { "epoch": 0.18903798242930198, "grad_norm": 2.296875, "learning_rate": 9.14699222727321e-05, "loss": 2.1023, "step": 4325 }, { "epoch": 0.18908169063333188, "grad_norm": 2.4375, "learning_rate": 9.146608464688341e-05, "loss": 2.0631, "step": 4326 }, { "epoch": 0.18912539883736176, "grad_norm": 2.265625, "learning_rate": 9.146224623850936e-05, "loss": 2.255, "step": 4327 }, { "epoch": 0.18916910704139167, "grad_norm": 2.640625, "learning_rate": 9.145840704768238e-05, "loss": 1.578, "step": 4328 }, { "epoch": 0.18921281524542158, "grad_norm": 2.515625, "learning_rate": 9.145456707447491e-05, "loss": 1.5111, "step": 4329 }, { "epoch": 0.18925652344945146, "grad_norm": 3.046875, "learning_rate": 9.145072631895942e-05, "loss": 1.6645, "step": 4330 }, { "epoch": 0.18930023165348137, "grad_norm": 2.0625, "learning_rate": 9.14468847812084e-05, "loss": 1.8262, "step": 4331 }, { "epoch": 0.18934393985751125, "grad_norm": 2.1875, "learning_rate": 9.144304246129434e-05, "loss": 1.8456, "step": 4332 }, { "epoch": 0.18938764806154115, "grad_norm": 2.46875, "learning_rate": 9.143919935928975e-05, "loss": 2.0993, "step": 4333 }, { "epoch": 0.18943135626557106, "grad_norm": 2.546875, "learning_rate": 9.143535547526716e-05, "loss": 2.0291, "step": 4334 }, { "epoch": 0.18947506446960094, "grad_norm": 2.40625, "learning_rate": 9.143151080929911e-05, "loss": 2.1035, "step": 4335 }, { "epoch": 0.18951877267363085, "grad_norm": 3.234375, "learning_rate": 9.142766536145815e-05, "loss": 2.0885, "step": 4336 }, { "epoch": 0.18956248087766073, "grad_norm": 2.4375, "learning_rate": 9.142381913181684e-05, "loss": 1.8083, "step": 4337 }, { "epoch": 0.18960618908169063, "grad_norm": 2.609375, "learning_rate": 9.141997212044779e-05, "loss": 1.5989, "step": 4338 }, { "epoch": 0.18964989728572054, "grad_norm": 3.625, "learning_rate": 9.141612432742357e-05, "loss": 2.7321, "step": 4339 }, { "epoch": 0.18969360548975042, "grad_norm": 2.40625, "learning_rate": 9.14122757528168e-05, "loss": 2.4181, "step": 4340 }, { "epoch": 0.18973731369378033, "grad_norm": 2.3125, "learning_rate": 9.140842639670014e-05, "loss": 1.8314, "step": 4341 }, { "epoch": 0.1897810218978102, "grad_norm": 2.21875, "learning_rate": 9.140457625914618e-05, "loss": 2.2134, "step": 4342 }, { "epoch": 0.18982473010184012, "grad_norm": 2.21875, "learning_rate": 9.14007253402276e-05, "loss": 2.1502, "step": 4343 }, { "epoch": 0.18986843830587002, "grad_norm": 2.9375, "learning_rate": 9.13968736400171e-05, "loss": 2.2266, "step": 4344 }, { "epoch": 0.1899121465098999, "grad_norm": 2.4375, "learning_rate": 9.139302115858733e-05, "loss": 1.8241, "step": 4345 }, { "epoch": 0.1899558547139298, "grad_norm": 2.59375, "learning_rate": 9.138916789601102e-05, "loss": 2.5854, "step": 4346 }, { "epoch": 0.1899995629179597, "grad_norm": 2.71875, "learning_rate": 9.138531385236086e-05, "loss": 2.0389, "step": 4347 }, { "epoch": 0.1900432711219896, "grad_norm": 2.453125, "learning_rate": 9.13814590277096e-05, "loss": 2.3824, "step": 4348 }, { "epoch": 0.1900869793260195, "grad_norm": 2.296875, "learning_rate": 9.137760342212997e-05, "loss": 1.815, "step": 4349 }, { "epoch": 0.19013068753004939, "grad_norm": 2.375, "learning_rate": 9.137374703569475e-05, "loss": 2.382, "step": 4350 }, { "epoch": 0.1901743957340793, "grad_norm": 2.15625, "learning_rate": 9.13698898684767e-05, "loss": 1.7854, "step": 4351 }, { "epoch": 0.19021810393810917, "grad_norm": 2.0, "learning_rate": 9.136603192054862e-05, "loss": 1.8529, "step": 4352 }, { "epoch": 0.19026181214213908, "grad_norm": 2.015625, "learning_rate": 9.136217319198332e-05, "loss": 1.661, "step": 4353 }, { "epoch": 0.190305520346169, "grad_norm": 2.34375, "learning_rate": 9.135831368285362e-05, "loss": 1.901, "step": 4354 }, { "epoch": 0.19034922855019887, "grad_norm": 1.984375, "learning_rate": 9.135445339323232e-05, "loss": 1.9566, "step": 4355 }, { "epoch": 0.19039293675422878, "grad_norm": 2.640625, "learning_rate": 9.135059232319232e-05, "loss": 2.8984, "step": 4356 }, { "epoch": 0.19043664495825866, "grad_norm": 2.921875, "learning_rate": 9.134673047280645e-05, "loss": 2.7567, "step": 4357 }, { "epoch": 0.19048035316228856, "grad_norm": 2.75, "learning_rate": 9.134286784214759e-05, "loss": 2.2439, "step": 4358 }, { "epoch": 0.19052406136631847, "grad_norm": 2.328125, "learning_rate": 9.133900443128864e-05, "loss": 1.7812, "step": 4359 }, { "epoch": 0.19056776957034835, "grad_norm": 2.5, "learning_rate": 9.133514024030252e-05, "loss": 2.3759, "step": 4360 }, { "epoch": 0.19061147777437826, "grad_norm": 2.515625, "learning_rate": 9.133127526926215e-05, "loss": 2.232, "step": 4361 }, { "epoch": 0.19065518597840814, "grad_norm": 2.5625, "learning_rate": 9.132740951824046e-05, "loss": 1.6692, "step": 4362 }, { "epoch": 0.19069889418243804, "grad_norm": 2.59375, "learning_rate": 9.13235429873104e-05, "loss": 2.4393, "step": 4363 }, { "epoch": 0.19074260238646795, "grad_norm": 2.5, "learning_rate": 9.131967567654493e-05, "loss": 2.0248, "step": 4364 }, { "epoch": 0.19078631059049783, "grad_norm": 2.25, "learning_rate": 9.131580758601705e-05, "loss": 1.9735, "step": 4365 }, { "epoch": 0.19083001879452774, "grad_norm": 2.390625, "learning_rate": 9.131193871579975e-05, "loss": 2.3554, "step": 4366 }, { "epoch": 0.19087372699855762, "grad_norm": 2.65625, "learning_rate": 9.130806906596603e-05, "loss": 2.4335, "step": 4367 }, { "epoch": 0.19091743520258753, "grad_norm": 1.9765625, "learning_rate": 9.130419863658894e-05, "loss": 1.9289, "step": 4368 }, { "epoch": 0.19096114340661743, "grad_norm": 2.203125, "learning_rate": 9.13003274277415e-05, "loss": 2.2863, "step": 4369 }, { "epoch": 0.1910048516106473, "grad_norm": 2.3125, "learning_rate": 9.129645543949676e-05, "loss": 2.1855, "step": 4370 }, { "epoch": 0.19104855981467722, "grad_norm": 2.0, "learning_rate": 9.129258267192783e-05, "loss": 1.8109, "step": 4371 }, { "epoch": 0.1910922680187071, "grad_norm": 2.609375, "learning_rate": 9.128870912510774e-05, "loss": 1.9122, "step": 4372 }, { "epoch": 0.191135976222737, "grad_norm": 2.109375, "learning_rate": 9.128483479910963e-05, "loss": 1.843, "step": 4373 }, { "epoch": 0.19117968442676692, "grad_norm": 2.5625, "learning_rate": 9.12809596940066e-05, "loss": 2.258, "step": 4374 }, { "epoch": 0.1912233926307968, "grad_norm": 2.5, "learning_rate": 9.127708380987176e-05, "loss": 1.8263, "step": 4375 }, { "epoch": 0.1912671008348267, "grad_norm": 2.625, "learning_rate": 9.12732071467783e-05, "loss": 1.9707, "step": 4376 }, { "epoch": 0.19131080903885658, "grad_norm": 2.390625, "learning_rate": 9.126932970479933e-05, "loss": 2.1258, "step": 4377 }, { "epoch": 0.1913545172428865, "grad_norm": 2.4375, "learning_rate": 9.126545148400807e-05, "loss": 1.8641, "step": 4378 }, { "epoch": 0.1913982254469164, "grad_norm": 2.1875, "learning_rate": 9.126157248447766e-05, "loss": 1.7545, "step": 4379 }, { "epoch": 0.19144193365094628, "grad_norm": 2.328125, "learning_rate": 9.125769270628133e-05, "loss": 1.8299, "step": 4380 }, { "epoch": 0.19148564185497619, "grad_norm": 2.125, "learning_rate": 9.125381214949229e-05, "loss": 2.2791, "step": 4381 }, { "epoch": 0.19152935005900606, "grad_norm": 3.890625, "learning_rate": 9.124993081418375e-05, "loss": 2.0627, "step": 4382 }, { "epoch": 0.19157305826303597, "grad_norm": 2.6875, "learning_rate": 9.124604870042901e-05, "loss": 2.8771, "step": 4383 }, { "epoch": 0.19161676646706588, "grad_norm": 2.46875, "learning_rate": 9.12421658083013e-05, "loss": 1.8637, "step": 4384 }, { "epoch": 0.19166047467109576, "grad_norm": 1.890625, "learning_rate": 9.123828213787389e-05, "loss": 2.0092, "step": 4385 }, { "epoch": 0.19170418287512567, "grad_norm": 1.8828125, "learning_rate": 9.123439768922006e-05, "loss": 1.5351, "step": 4386 }, { "epoch": 0.19174789107915555, "grad_norm": 3.453125, "learning_rate": 9.123051246241314e-05, "loss": 2.1595, "step": 4387 }, { "epoch": 0.19179159928318545, "grad_norm": 3.203125, "learning_rate": 9.122662645752646e-05, "loss": 1.3175, "step": 4388 }, { "epoch": 0.19183530748721536, "grad_norm": 2.671875, "learning_rate": 9.122273967463331e-05, "loss": 1.9585, "step": 4389 }, { "epoch": 0.19187901569124524, "grad_norm": 2.296875, "learning_rate": 9.121885211380708e-05, "loss": 1.9942, "step": 4390 }, { "epoch": 0.19192272389527515, "grad_norm": 1.8515625, "learning_rate": 9.121496377512114e-05, "loss": 1.6896, "step": 4391 }, { "epoch": 0.19196643209930503, "grad_norm": 2.796875, "learning_rate": 9.121107465864882e-05, "loss": 2.2896, "step": 4392 }, { "epoch": 0.19201014030333494, "grad_norm": 2.15625, "learning_rate": 9.120718476446354e-05, "loss": 2.1606, "step": 4393 }, { "epoch": 0.19205384850736484, "grad_norm": 2.140625, "learning_rate": 9.120329409263871e-05, "loss": 1.8183, "step": 4394 }, { "epoch": 0.19209755671139472, "grad_norm": 2.359375, "learning_rate": 9.119940264324776e-05, "loss": 1.8949, "step": 4395 }, { "epoch": 0.19214126491542463, "grad_norm": 3.21875, "learning_rate": 9.119551041636412e-05, "loss": 2.3729, "step": 4396 }, { "epoch": 0.1921849731194545, "grad_norm": 1.9375, "learning_rate": 9.119161741206123e-05, "loss": 1.9106, "step": 4397 }, { "epoch": 0.19222868132348442, "grad_norm": 2.453125, "learning_rate": 9.11877236304126e-05, "loss": 2.1901, "step": 4398 }, { "epoch": 0.19227238952751433, "grad_norm": 2.25, "learning_rate": 9.118382907149165e-05, "loss": 2.2365, "step": 4399 }, { "epoch": 0.1923160977315442, "grad_norm": 1.9140625, "learning_rate": 9.11799337353719e-05, "loss": 1.8551, "step": 4400 }, { "epoch": 0.1923598059355741, "grad_norm": 2.015625, "learning_rate": 9.117603762212687e-05, "loss": 1.8847, "step": 4401 }, { "epoch": 0.192403514139604, "grad_norm": 2.484375, "learning_rate": 9.11721407318301e-05, "loss": 3.003, "step": 4402 }, { "epoch": 0.1924472223436339, "grad_norm": 2.03125, "learning_rate": 9.116824306455509e-05, "loss": 1.9676, "step": 4403 }, { "epoch": 0.1924909305476638, "grad_norm": 2.125, "learning_rate": 9.116434462037545e-05, "loss": 1.9876, "step": 4404 }, { "epoch": 0.1925346387516937, "grad_norm": 3.40625, "learning_rate": 9.116044539936468e-05, "loss": 1.6742, "step": 4405 }, { "epoch": 0.1925783469557236, "grad_norm": 2.265625, "learning_rate": 9.115654540159641e-05, "loss": 2.2874, "step": 4406 }, { "epoch": 0.19262205515975347, "grad_norm": 2.15625, "learning_rate": 9.115264462714421e-05, "loss": 1.6118, "step": 4407 }, { "epoch": 0.19266576336378338, "grad_norm": 2.203125, "learning_rate": 9.114874307608175e-05, "loss": 1.7595, "step": 4408 }, { "epoch": 0.1927094715678133, "grad_norm": 2.8125, "learning_rate": 9.114484074848259e-05, "loss": 2.242, "step": 4409 }, { "epoch": 0.19275317977184317, "grad_norm": 2.296875, "learning_rate": 9.11409376444204e-05, "loss": 1.8576, "step": 4410 }, { "epoch": 0.19279688797587308, "grad_norm": 3.46875, "learning_rate": 9.113703376396885e-05, "loss": 2.2897, "step": 4411 }, { "epoch": 0.19284059617990296, "grad_norm": 2.3125, "learning_rate": 9.11331291072016e-05, "loss": 2.1501, "step": 4412 }, { "epoch": 0.19288430438393286, "grad_norm": 2.125, "learning_rate": 9.112922367419234e-05, "loss": 2.0837, "step": 4413 }, { "epoch": 0.19292801258796277, "grad_norm": 2.453125, "learning_rate": 9.112531746501476e-05, "loss": 1.7882, "step": 4414 }, { "epoch": 0.19297172079199265, "grad_norm": 2.921875, "learning_rate": 9.112141047974259e-05, "loss": 2.9361, "step": 4415 }, { "epoch": 0.19301542899602256, "grad_norm": 2.21875, "learning_rate": 9.111750271844957e-05, "loss": 2.1117, "step": 4416 }, { "epoch": 0.19305913720005244, "grad_norm": 2.828125, "learning_rate": 9.11135941812094e-05, "loss": 2.7356, "step": 4417 }, { "epoch": 0.19310284540408235, "grad_norm": 2.125, "learning_rate": 9.110968486809588e-05, "loss": 1.5091, "step": 4418 }, { "epoch": 0.19314655360811225, "grad_norm": 2.28125, "learning_rate": 9.110577477918279e-05, "loss": 2.2072, "step": 4419 }, { "epoch": 0.19319026181214213, "grad_norm": 2.265625, "learning_rate": 9.110186391454389e-05, "loss": 1.8448, "step": 4420 }, { "epoch": 0.19323397001617204, "grad_norm": 2.234375, "learning_rate": 9.1097952274253e-05, "loss": 1.9389, "step": 4421 }, { "epoch": 0.19327767822020192, "grad_norm": 2.140625, "learning_rate": 9.109403985838392e-05, "loss": 2.0713, "step": 4422 }, { "epoch": 0.19332138642423183, "grad_norm": 2.40625, "learning_rate": 9.109012666701051e-05, "loss": 2.0326, "step": 4423 }, { "epoch": 0.19336509462826174, "grad_norm": 2.5, "learning_rate": 9.108621270020661e-05, "loss": 2.5237, "step": 4424 }, { "epoch": 0.19340880283229162, "grad_norm": 2.265625, "learning_rate": 9.108229795804609e-05, "loss": 1.8756, "step": 4425 }, { "epoch": 0.19345251103632152, "grad_norm": 1.9921875, "learning_rate": 9.10783824406028e-05, "loss": 1.8804, "step": 4426 }, { "epoch": 0.1934962192403514, "grad_norm": 3.484375, "learning_rate": 9.107446614795063e-05, "loss": 2.3902, "step": 4427 }, { "epoch": 0.1935399274443813, "grad_norm": 2.171875, "learning_rate": 9.107054908016352e-05, "loss": 2.1118, "step": 4428 }, { "epoch": 0.19358363564841122, "grad_norm": 2.921875, "learning_rate": 9.106663123731539e-05, "loss": 1.6956, "step": 4429 }, { "epoch": 0.1936273438524411, "grad_norm": 2.984375, "learning_rate": 9.106271261948014e-05, "loss": 2.6526, "step": 4430 }, { "epoch": 0.193671052056471, "grad_norm": 2.15625, "learning_rate": 9.105879322673174e-05, "loss": 1.9012, "step": 4431 }, { "epoch": 0.19371476026050088, "grad_norm": 2.25, "learning_rate": 9.105487305914416e-05, "loss": 1.5692, "step": 4432 }, { "epoch": 0.1937584684645308, "grad_norm": 2.140625, "learning_rate": 9.105095211679135e-05, "loss": 2.1344, "step": 4433 }, { "epoch": 0.1938021766685607, "grad_norm": 2.171875, "learning_rate": 9.104703039974736e-05, "loss": 2.1873, "step": 4434 }, { "epoch": 0.19384588487259058, "grad_norm": 2.421875, "learning_rate": 9.104310790808614e-05, "loss": 2.5386, "step": 4435 }, { "epoch": 0.1938895930766205, "grad_norm": 2.4375, "learning_rate": 9.103918464188175e-05, "loss": 2.2476, "step": 4436 }, { "epoch": 0.19393330128065037, "grad_norm": 2.265625, "learning_rate": 9.103526060120821e-05, "loss": 1.8868, "step": 4437 }, { "epoch": 0.19397700948468027, "grad_norm": 2.09375, "learning_rate": 9.103133578613959e-05, "loss": 1.8679, "step": 4438 }, { "epoch": 0.19402071768871018, "grad_norm": 2.34375, "learning_rate": 9.102741019674993e-05, "loss": 1.5364, "step": 4439 }, { "epoch": 0.19406442589274006, "grad_norm": 2.640625, "learning_rate": 9.102348383311334e-05, "loss": 1.9936, "step": 4440 }, { "epoch": 0.19410813409676997, "grad_norm": 2.484375, "learning_rate": 9.101955669530391e-05, "loss": 2.3603, "step": 4441 }, { "epoch": 0.19415184230079985, "grad_norm": 2.09375, "learning_rate": 9.101562878339572e-05, "loss": 1.5734, "step": 4442 }, { "epoch": 0.19419555050482976, "grad_norm": 2.125, "learning_rate": 9.101170009746294e-05, "loss": 1.8535, "step": 4443 }, { "epoch": 0.19423925870885966, "grad_norm": 3.09375, "learning_rate": 9.100777063757969e-05, "loss": 2.6233, "step": 4444 }, { "epoch": 0.19428296691288954, "grad_norm": 3.453125, "learning_rate": 9.100384040382011e-05, "loss": 3.0462, "step": 4445 }, { "epoch": 0.19432667511691945, "grad_norm": 2.75, "learning_rate": 9.099990939625838e-05, "loss": 2.0341, "step": 4446 }, { "epoch": 0.19437038332094933, "grad_norm": 2.4375, "learning_rate": 9.099597761496869e-05, "loss": 2.5488, "step": 4447 }, { "epoch": 0.19441409152497924, "grad_norm": 2.09375, "learning_rate": 9.099204506002525e-05, "loss": 1.9444, "step": 4448 }, { "epoch": 0.19445779972900915, "grad_norm": 2.328125, "learning_rate": 9.098811173150225e-05, "loss": 1.9015, "step": 4449 }, { "epoch": 0.19450150793303903, "grad_norm": 2.625, "learning_rate": 9.098417762947394e-05, "loss": 1.7492, "step": 4450 }, { "epoch": 0.19454521613706893, "grad_norm": 2.828125, "learning_rate": 9.098024275401454e-05, "loss": 2.0373, "step": 4451 }, { "epoch": 0.1945889243410988, "grad_norm": 2.25, "learning_rate": 9.097630710519829e-05, "loss": 2.2535, "step": 4452 }, { "epoch": 0.19463263254512872, "grad_norm": 2.578125, "learning_rate": 9.097237068309951e-05, "loss": 1.9008, "step": 4453 }, { "epoch": 0.19467634074915863, "grad_norm": 2.59375, "learning_rate": 9.096843348779247e-05, "loss": 2.2965, "step": 4454 }, { "epoch": 0.1947200489531885, "grad_norm": 3.046875, "learning_rate": 9.096449551935144e-05, "loss": 2.1173, "step": 4455 }, { "epoch": 0.19476375715721841, "grad_norm": 2.0, "learning_rate": 9.096055677785078e-05, "loss": 1.7159, "step": 4456 }, { "epoch": 0.1948074653612483, "grad_norm": 1.8671875, "learning_rate": 9.09566172633648e-05, "loss": 1.6598, "step": 4457 }, { "epoch": 0.1948511735652782, "grad_norm": 2.125, "learning_rate": 9.095267697596782e-05, "loss": 1.8245, "step": 4458 }, { "epoch": 0.1948948817693081, "grad_norm": 2.375, "learning_rate": 9.094873591573423e-05, "loss": 2.1265, "step": 4459 }, { "epoch": 0.194938589973338, "grad_norm": 2.3125, "learning_rate": 9.094479408273841e-05, "loss": 2.1805, "step": 4460 }, { "epoch": 0.1949822981773679, "grad_norm": 2.421875, "learning_rate": 9.094085147705472e-05, "loss": 2.1034, "step": 4461 }, { "epoch": 0.19502600638139778, "grad_norm": 2.484375, "learning_rate": 9.093690809875758e-05, "loss": 1.6653, "step": 4462 }, { "epoch": 0.19506971458542768, "grad_norm": 2.078125, "learning_rate": 9.09329639479214e-05, "loss": 1.5786, "step": 4463 }, { "epoch": 0.1951134227894576, "grad_norm": 2.4375, "learning_rate": 9.092901902462062e-05, "loss": 1.8306, "step": 4464 }, { "epoch": 0.19515713099348747, "grad_norm": 3.484375, "learning_rate": 9.092507332892968e-05, "loss": 2.9709, "step": 4465 }, { "epoch": 0.19520083919751738, "grad_norm": 2.28125, "learning_rate": 9.092112686092304e-05, "loss": 2.0711, "step": 4466 }, { "epoch": 0.19524454740154726, "grad_norm": 2.1875, "learning_rate": 9.091717962067518e-05, "loss": 2.3881, "step": 4467 }, { "epoch": 0.19528825560557717, "grad_norm": 2.296875, "learning_rate": 9.09132316082606e-05, "loss": 2.128, "step": 4468 }, { "epoch": 0.19533196380960707, "grad_norm": 2.53125, "learning_rate": 9.090928282375378e-05, "loss": 1.5185, "step": 4469 }, { "epoch": 0.19537567201363695, "grad_norm": 2.4375, "learning_rate": 9.090533326722924e-05, "loss": 2.0833, "step": 4470 }, { "epoch": 0.19541938021766686, "grad_norm": 2.578125, "learning_rate": 9.090138293876155e-05, "loss": 2.3409, "step": 4471 }, { "epoch": 0.19546308842169674, "grad_norm": 2.328125, "learning_rate": 9.089743183842523e-05, "loss": 1.8522, "step": 4472 }, { "epoch": 0.19550679662572665, "grad_norm": 2.0, "learning_rate": 9.089347996629484e-05, "loss": 1.7542, "step": 4473 }, { "epoch": 0.19555050482975656, "grad_norm": 2.203125, "learning_rate": 9.088952732244495e-05, "loss": 1.8944, "step": 4474 }, { "epoch": 0.19559421303378643, "grad_norm": 2.34375, "learning_rate": 9.088557390695021e-05, "loss": 1.8428, "step": 4475 }, { "epoch": 0.19563792123781634, "grad_norm": 2.28125, "learning_rate": 9.088161971988516e-05, "loss": 1.6356, "step": 4476 }, { "epoch": 0.19568162944184622, "grad_norm": 2.0625, "learning_rate": 9.087766476132444e-05, "loss": 1.7155, "step": 4477 }, { "epoch": 0.19572533764587613, "grad_norm": 5.75, "learning_rate": 9.08737090313427e-05, "loss": 2.7836, "step": 4478 }, { "epoch": 0.19576904584990604, "grad_norm": 2.0625, "learning_rate": 9.086975253001457e-05, "loss": 1.6453, "step": 4479 }, { "epoch": 0.19581275405393592, "grad_norm": 2.15625, "learning_rate": 9.086579525741475e-05, "loss": 1.6787, "step": 4480 }, { "epoch": 0.19585646225796582, "grad_norm": 2.140625, "learning_rate": 9.086183721361787e-05, "loss": 2.1751, "step": 4481 }, { "epoch": 0.1959001704619957, "grad_norm": 2.359375, "learning_rate": 9.085787839869866e-05, "loss": 2.2441, "step": 4482 }, { "epoch": 0.1959438786660256, "grad_norm": 2.125, "learning_rate": 9.085391881273182e-05, "loss": 1.4872, "step": 4483 }, { "epoch": 0.19598758687005552, "grad_norm": 2.578125, "learning_rate": 9.084995845579208e-05, "loss": 1.9066, "step": 4484 }, { "epoch": 0.1960312950740854, "grad_norm": 2.25, "learning_rate": 9.084599732795415e-05, "loss": 2.2245, "step": 4485 }, { "epoch": 0.1960750032781153, "grad_norm": 2.421875, "learning_rate": 9.084203542929282e-05, "loss": 2.049, "step": 4486 }, { "epoch": 0.19611871148214519, "grad_norm": 2.234375, "learning_rate": 9.083807275988284e-05, "loss": 2.4972, "step": 4487 }, { "epoch": 0.1961624196861751, "grad_norm": 2.078125, "learning_rate": 9.083410931979899e-05, "loss": 1.5624, "step": 4488 }, { "epoch": 0.196206127890205, "grad_norm": 2.234375, "learning_rate": 9.083014510911604e-05, "loss": 1.945, "step": 4489 }, { "epoch": 0.19624983609423488, "grad_norm": 2.21875, "learning_rate": 9.082618012790886e-05, "loss": 2.0894, "step": 4490 }, { "epoch": 0.1962935442982648, "grad_norm": 2.203125, "learning_rate": 9.082221437625223e-05, "loss": 2.0207, "step": 4491 }, { "epoch": 0.19633725250229467, "grad_norm": 2.15625, "learning_rate": 9.081824785422099e-05, "loss": 1.7351, "step": 4492 }, { "epoch": 0.19638096070632458, "grad_norm": 2.859375, "learning_rate": 9.081428056189e-05, "loss": 3.121, "step": 4493 }, { "epoch": 0.19642466891035448, "grad_norm": 2.21875, "learning_rate": 9.081031249933416e-05, "loss": 2.1267, "step": 4494 }, { "epoch": 0.19646837711438436, "grad_norm": 2.046875, "learning_rate": 9.080634366662832e-05, "loss": 1.6989, "step": 4495 }, { "epoch": 0.19651208531841427, "grad_norm": 2.46875, "learning_rate": 9.080237406384738e-05, "loss": 1.9422, "step": 4496 }, { "epoch": 0.19655579352244415, "grad_norm": 2.3125, "learning_rate": 9.079840369106625e-05, "loss": 2.0947, "step": 4497 }, { "epoch": 0.19659950172647406, "grad_norm": 3.421875, "learning_rate": 9.079443254835987e-05, "loss": 1.5288, "step": 4498 }, { "epoch": 0.19664320993050396, "grad_norm": 2.640625, "learning_rate": 9.079046063580316e-05, "loss": 1.7854, "step": 4499 }, { "epoch": 0.19668691813453384, "grad_norm": 2.328125, "learning_rate": 9.07864879534711e-05, "loss": 1.9261, "step": 4500 }, { "epoch": 0.19673062633856375, "grad_norm": 2.140625, "learning_rate": 9.078251450143866e-05, "loss": 1.7282, "step": 4501 }, { "epoch": 0.19677433454259363, "grad_norm": 2.203125, "learning_rate": 9.077854027978078e-05, "loss": 1.9388, "step": 4502 }, { "epoch": 0.19681804274662354, "grad_norm": 2.875, "learning_rate": 9.077456528857254e-05, "loss": 1.6096, "step": 4503 }, { "epoch": 0.19686175095065345, "grad_norm": 3.625, "learning_rate": 9.077058952788888e-05, "loss": 1.4145, "step": 4504 }, { "epoch": 0.19690545915468333, "grad_norm": 2.046875, "learning_rate": 9.076661299780486e-05, "loss": 2.0563, "step": 4505 }, { "epoch": 0.19694916735871323, "grad_norm": 2.5625, "learning_rate": 9.076263569839553e-05, "loss": 1.9754, "step": 4506 }, { "epoch": 0.1969928755627431, "grad_norm": 2.21875, "learning_rate": 9.075865762973595e-05, "loss": 1.9317, "step": 4507 }, { "epoch": 0.19703658376677302, "grad_norm": 2.09375, "learning_rate": 9.075467879190117e-05, "loss": 1.9321, "step": 4508 }, { "epoch": 0.19708029197080293, "grad_norm": 2.6875, "learning_rate": 9.075069918496625e-05, "loss": 2.1764, "step": 4509 }, { "epoch": 0.1971240001748328, "grad_norm": 2.328125, "learning_rate": 9.074671880900636e-05, "loss": 1.9645, "step": 4510 }, { "epoch": 0.19716770837886272, "grad_norm": 2.28125, "learning_rate": 9.074273766409657e-05, "loss": 1.7994, "step": 4511 }, { "epoch": 0.1972114165828926, "grad_norm": 2.640625, "learning_rate": 9.073875575031203e-05, "loss": 2.1177, "step": 4512 }, { "epoch": 0.1972551247869225, "grad_norm": 2.375, "learning_rate": 9.073477306772789e-05, "loss": 1.6883, "step": 4513 }, { "epoch": 0.1972988329909524, "grad_norm": 2.359375, "learning_rate": 9.073078961641928e-05, "loss": 2.5953, "step": 4514 }, { "epoch": 0.1973425411949823, "grad_norm": 2.421875, "learning_rate": 9.07268053964614e-05, "loss": 2.3189, "step": 4515 }, { "epoch": 0.1973862493990122, "grad_norm": 2.609375, "learning_rate": 9.072282040792939e-05, "loss": 2.5728, "step": 4516 }, { "epoch": 0.19742995760304208, "grad_norm": 2.75, "learning_rate": 9.071883465089852e-05, "loss": 2.2928, "step": 4517 }, { "epoch": 0.19747366580707199, "grad_norm": 5.96875, "learning_rate": 9.071484812544398e-05, "loss": 1.8055, "step": 4518 }, { "epoch": 0.1975173740111019, "grad_norm": 3.359375, "learning_rate": 9.071086083164099e-05, "loss": 3.7166, "step": 4519 }, { "epoch": 0.19756108221513177, "grad_norm": 2.09375, "learning_rate": 9.07068727695648e-05, "loss": 1.6634, "step": 4520 }, { "epoch": 0.19760479041916168, "grad_norm": 2.390625, "learning_rate": 9.07028839392907e-05, "loss": 2.0254, "step": 4521 }, { "epoch": 0.19764849862319156, "grad_norm": 2.1875, "learning_rate": 9.069889434089391e-05, "loss": 1.9842, "step": 4522 }, { "epoch": 0.19769220682722147, "grad_norm": 2.125, "learning_rate": 9.069490397444975e-05, "loss": 1.9428, "step": 4523 }, { "epoch": 0.19773591503125137, "grad_norm": 2.578125, "learning_rate": 9.069091284003354e-05, "loss": 1.9117, "step": 4524 }, { "epoch": 0.19777962323528125, "grad_norm": 2.9375, "learning_rate": 9.068692093772058e-05, "loss": 2.1338, "step": 4525 }, { "epoch": 0.19782333143931116, "grad_norm": 2.015625, "learning_rate": 9.06829282675862e-05, "loss": 2.0547, "step": 4526 }, { "epoch": 0.19786703964334104, "grad_norm": 2.171875, "learning_rate": 9.067893482970575e-05, "loss": 1.9351, "step": 4527 }, { "epoch": 0.19791074784737095, "grad_norm": 2.046875, "learning_rate": 9.06749406241546e-05, "loss": 2.3361, "step": 4528 }, { "epoch": 0.19795445605140086, "grad_norm": 2.15625, "learning_rate": 9.067094565100811e-05, "loss": 1.8335, "step": 4529 }, { "epoch": 0.19799816425543074, "grad_norm": 2.59375, "learning_rate": 9.066694991034169e-05, "loss": 2.0857, "step": 4530 }, { "epoch": 0.19804187245946064, "grad_norm": 2.46875, "learning_rate": 9.066295340223073e-05, "loss": 2.4841, "step": 4531 }, { "epoch": 0.19808558066349052, "grad_norm": 3.109375, "learning_rate": 9.065895612675066e-05, "loss": 2.5671, "step": 4532 }, { "epoch": 0.19812928886752043, "grad_norm": 2.078125, "learning_rate": 9.065495808397693e-05, "loss": 1.6286, "step": 4533 }, { "epoch": 0.19817299707155034, "grad_norm": 2.09375, "learning_rate": 9.065095927398495e-05, "loss": 1.6137, "step": 4534 }, { "epoch": 0.19821670527558022, "grad_norm": 2.1875, "learning_rate": 9.064695969685021e-05, "loss": 1.7371, "step": 4535 }, { "epoch": 0.19826041347961013, "grad_norm": 2.5, "learning_rate": 9.064295935264818e-05, "loss": 1.8502, "step": 4536 }, { "epoch": 0.19830412168364, "grad_norm": 2.484375, "learning_rate": 9.063895824145435e-05, "loss": 2.0257, "step": 4537 }, { "epoch": 0.1983478298876699, "grad_norm": 2.171875, "learning_rate": 9.063495636334423e-05, "loss": 1.7634, "step": 4538 }, { "epoch": 0.19839153809169982, "grad_norm": 2.703125, "learning_rate": 9.063095371839337e-05, "loss": 1.9927, "step": 4539 }, { "epoch": 0.1984352462957297, "grad_norm": 2.546875, "learning_rate": 9.062695030667724e-05, "loss": 2.03, "step": 4540 }, { "epoch": 0.1984789544997596, "grad_norm": 2.640625, "learning_rate": 9.062294612827145e-05, "loss": 1.8626, "step": 4541 }, { "epoch": 0.1985226627037895, "grad_norm": 2.421875, "learning_rate": 9.061894118325154e-05, "loss": 2.0393, "step": 4542 }, { "epoch": 0.1985663709078194, "grad_norm": 1.984375, "learning_rate": 9.061493547169308e-05, "loss": 1.5743, "step": 4543 }, { "epoch": 0.1986100791118493, "grad_norm": 2.125, "learning_rate": 9.061092899367169e-05, "loss": 2.0798, "step": 4544 }, { "epoch": 0.19865378731587918, "grad_norm": 8.375, "learning_rate": 9.060692174926296e-05, "loss": 2.4696, "step": 4545 }, { "epoch": 0.1986974955199091, "grad_norm": 2.109375, "learning_rate": 9.060291373854251e-05, "loss": 1.7727, "step": 4546 }, { "epoch": 0.19874120372393897, "grad_norm": 2.3125, "learning_rate": 9.059890496158599e-05, "loss": 2.1714, "step": 4547 }, { "epoch": 0.19878491192796888, "grad_norm": 2.9375, "learning_rate": 9.059489541846903e-05, "loss": 1.4298, "step": 4548 }, { "epoch": 0.19882862013199878, "grad_norm": 2.359375, "learning_rate": 9.059088510926732e-05, "loss": 2.2674, "step": 4549 }, { "epoch": 0.19887232833602866, "grad_norm": 25.75, "learning_rate": 9.058687403405653e-05, "loss": 2.4088, "step": 4550 }, { "epoch": 0.19891603654005857, "grad_norm": 1.90625, "learning_rate": 9.058286219291237e-05, "loss": 1.5939, "step": 4551 }, { "epoch": 0.19895974474408848, "grad_norm": 2.515625, "learning_rate": 9.057884958591052e-05, "loss": 2.0231, "step": 4552 }, { "epoch": 0.19900345294811836, "grad_norm": 2.34375, "learning_rate": 9.057483621312671e-05, "loss": 1.5278, "step": 4553 }, { "epoch": 0.19904716115214827, "grad_norm": 2.203125, "learning_rate": 9.05708220746367e-05, "loss": 2.2683, "step": 4554 }, { "epoch": 0.19909086935617815, "grad_norm": 2.171875, "learning_rate": 9.056680717051622e-05, "loss": 1.9358, "step": 4555 }, { "epoch": 0.19913457756020805, "grad_norm": 2.984375, "learning_rate": 9.056279150084106e-05, "loss": 2.4731, "step": 4556 }, { "epoch": 0.19917828576423796, "grad_norm": 2.0625, "learning_rate": 9.055877506568699e-05, "loss": 1.8777, "step": 4557 }, { "epoch": 0.19922199396826784, "grad_norm": 1.984375, "learning_rate": 9.05547578651298e-05, "loss": 1.6332, "step": 4558 }, { "epoch": 0.19926570217229775, "grad_norm": 2.421875, "learning_rate": 9.05507398992453e-05, "loss": 2.3121, "step": 4559 }, { "epoch": 0.19930941037632763, "grad_norm": 2.625, "learning_rate": 9.054672116810932e-05, "loss": 2.1761, "step": 4560 }, { "epoch": 0.19935311858035754, "grad_norm": 7.90625, "learning_rate": 9.054270167179768e-05, "loss": 2.2086, "step": 4561 }, { "epoch": 0.19939682678438744, "grad_norm": 2.125, "learning_rate": 9.053868141038628e-05, "loss": 1.5267, "step": 4562 }, { "epoch": 0.19944053498841732, "grad_norm": 2.25, "learning_rate": 9.053466038395096e-05, "loss": 1.4548, "step": 4563 }, { "epoch": 0.19948424319244723, "grad_norm": 2.609375, "learning_rate": 9.053063859256758e-05, "loss": 1.987, "step": 4564 }, { "epoch": 0.1995279513964771, "grad_norm": 6.9375, "learning_rate": 9.052661603631207e-05, "loss": 1.8233, "step": 4565 }, { "epoch": 0.19957165960050702, "grad_norm": 6.25, "learning_rate": 9.052259271526033e-05, "loss": 3.0517, "step": 4566 }, { "epoch": 0.19961536780453693, "grad_norm": 2.578125, "learning_rate": 9.05185686294883e-05, "loss": 1.7139, "step": 4567 }, { "epoch": 0.1996590760085668, "grad_norm": 2.90625, "learning_rate": 9.051454377907191e-05, "loss": 2.6329, "step": 4568 }, { "epoch": 0.1997027842125967, "grad_norm": 2.546875, "learning_rate": 9.05105181640871e-05, "loss": 2.0316, "step": 4569 }, { "epoch": 0.1997464924166266, "grad_norm": 2.515625, "learning_rate": 9.050649178460987e-05, "loss": 2.1056, "step": 4570 }, { "epoch": 0.1997902006206565, "grad_norm": 2.5625, "learning_rate": 9.050246464071616e-05, "loss": 2.4229, "step": 4571 }, { "epoch": 0.1998339088246864, "grad_norm": 2.890625, "learning_rate": 9.049843673248199e-05, "loss": 1.7415, "step": 4572 }, { "epoch": 0.1998776170287163, "grad_norm": 2.21875, "learning_rate": 9.04944080599834e-05, "loss": 2.1538, "step": 4573 }, { "epoch": 0.1999213252327462, "grad_norm": 2.34375, "learning_rate": 9.04903786232964e-05, "loss": 1.956, "step": 4574 }, { "epoch": 0.19996503343677607, "grad_norm": 3.03125, "learning_rate": 9.048634842249699e-05, "loss": 2.8857, "step": 4575 }, { "epoch": 0.20000874164080598, "grad_norm": 2.109375, "learning_rate": 9.048231745766129e-05, "loss": 1.7488, "step": 4576 }, { "epoch": 0.2000524498448359, "grad_norm": 2.484375, "learning_rate": 9.047828572886532e-05, "loss": 2.6818, "step": 4577 }, { "epoch": 0.20009615804886577, "grad_norm": 1.84375, "learning_rate": 9.04742532361852e-05, "loss": 1.7045, "step": 4578 }, { "epoch": 0.20013986625289568, "grad_norm": 2.625, "learning_rate": 9.047021997969701e-05, "loss": 1.9284, "step": 4579 }, { "epoch": 0.20018357445692556, "grad_norm": 2.609375, "learning_rate": 9.046618595947687e-05, "loss": 2.3077, "step": 4580 }, { "epoch": 0.20022728266095546, "grad_norm": 3.109375, "learning_rate": 9.04621511756009e-05, "loss": 1.7883, "step": 4581 }, { "epoch": 0.20027099086498537, "grad_norm": 2.171875, "learning_rate": 9.045811562814525e-05, "loss": 1.8529, "step": 4582 }, { "epoch": 0.20031469906901525, "grad_norm": 2.125, "learning_rate": 9.045407931718608e-05, "loss": 1.7938, "step": 4583 }, { "epoch": 0.20035840727304516, "grad_norm": 2.546875, "learning_rate": 9.045004224279954e-05, "loss": 1.9549, "step": 4584 }, { "epoch": 0.20040211547707504, "grad_norm": 2.703125, "learning_rate": 9.044600440506184e-05, "loss": 1.5256, "step": 4585 }, { "epoch": 0.20044582368110495, "grad_norm": 2.921875, "learning_rate": 9.044196580404917e-05, "loss": 2.6624, "step": 4586 }, { "epoch": 0.20048953188513485, "grad_norm": 3.03125, "learning_rate": 9.043792643983774e-05, "loss": 3.1266, "step": 4587 }, { "epoch": 0.20053324008916473, "grad_norm": 2.015625, "learning_rate": 9.04338863125038e-05, "loss": 1.8653, "step": 4588 }, { "epoch": 0.20057694829319464, "grad_norm": 2.1875, "learning_rate": 9.042984542212355e-05, "loss": 2.1311, "step": 4589 }, { "epoch": 0.20062065649722452, "grad_norm": 2.03125, "learning_rate": 9.042580376877329e-05, "loss": 1.9296, "step": 4590 }, { "epoch": 0.20066436470125443, "grad_norm": 2.421875, "learning_rate": 9.042176135252926e-05, "loss": 2.8345, "step": 4591 }, { "epoch": 0.20070807290528433, "grad_norm": 1.9375, "learning_rate": 9.041771817346778e-05, "loss": 1.6019, "step": 4592 }, { "epoch": 0.20075178110931421, "grad_norm": 2.15625, "learning_rate": 9.041367423166513e-05, "loss": 1.8668, "step": 4593 }, { "epoch": 0.20079548931334412, "grad_norm": 2.21875, "learning_rate": 9.040962952719763e-05, "loss": 2.3199, "step": 4594 }, { "epoch": 0.200839197517374, "grad_norm": 3.125, "learning_rate": 9.040558406014161e-05, "loss": 1.8228, "step": 4595 }, { "epoch": 0.2008829057214039, "grad_norm": 2.21875, "learning_rate": 9.040153783057342e-05, "loss": 2.0666, "step": 4596 }, { "epoch": 0.20092661392543382, "grad_norm": 3.203125, "learning_rate": 9.039749083856938e-05, "loss": 2.0252, "step": 4597 }, { "epoch": 0.2009703221294637, "grad_norm": 2.515625, "learning_rate": 9.039344308420591e-05, "loss": 2.374, "step": 4598 }, { "epoch": 0.2010140303334936, "grad_norm": 3.265625, "learning_rate": 9.038939456755938e-05, "loss": 2.3647, "step": 4599 }, { "epoch": 0.20105773853752348, "grad_norm": 2.375, "learning_rate": 9.038534528870618e-05, "loss": 1.8677, "step": 4600 }, { "epoch": 0.2011014467415534, "grad_norm": 2.40625, "learning_rate": 9.038129524772274e-05, "loss": 1.8486, "step": 4601 }, { "epoch": 0.2011451549455833, "grad_norm": 1.9921875, "learning_rate": 9.03772444446855e-05, "loss": 1.3506, "step": 4602 }, { "epoch": 0.20118886314961318, "grad_norm": 1.875, "learning_rate": 9.037319287967088e-05, "loss": 1.6946, "step": 4603 }, { "epoch": 0.20123257135364309, "grad_norm": 1.8828125, "learning_rate": 9.036914055275534e-05, "loss": 1.5738, "step": 4604 }, { "epoch": 0.20127627955767297, "grad_norm": 2.328125, "learning_rate": 9.036508746401538e-05, "loss": 1.9645, "step": 4605 }, { "epoch": 0.20131998776170287, "grad_norm": 2.375, "learning_rate": 9.036103361352746e-05, "loss": 1.9012, "step": 4606 }, { "epoch": 0.20136369596573278, "grad_norm": 3.3125, "learning_rate": 9.03569790013681e-05, "loss": 2.5034, "step": 4607 }, { "epoch": 0.20140740416976266, "grad_norm": 3.109375, "learning_rate": 9.035292362761381e-05, "loss": 1.9332, "step": 4608 }, { "epoch": 0.20145111237379257, "grad_norm": 2.1875, "learning_rate": 9.034886749234111e-05, "loss": 1.7966, "step": 4609 }, { "epoch": 0.20149482057782245, "grad_norm": 2.921875, "learning_rate": 9.034481059562657e-05, "loss": 2.015, "step": 4610 }, { "epoch": 0.20153852878185236, "grad_norm": 2.25, "learning_rate": 9.034075293754672e-05, "loss": 2.0806, "step": 4611 }, { "epoch": 0.20158223698588226, "grad_norm": 2.375, "learning_rate": 9.033669451817817e-05, "loss": 2.1522, "step": 4612 }, { "epoch": 0.20162594518991214, "grad_norm": 2.078125, "learning_rate": 9.033263533759748e-05, "loss": 1.8821, "step": 4613 }, { "epoch": 0.20166965339394205, "grad_norm": 2.40625, "learning_rate": 9.032857539588124e-05, "loss": 2.1767, "step": 4614 }, { "epoch": 0.20171336159797193, "grad_norm": 2.125, "learning_rate": 9.03245146931061e-05, "loss": 2.1203, "step": 4615 }, { "epoch": 0.20175706980200184, "grad_norm": 2.53125, "learning_rate": 9.032045322934868e-05, "loss": 2.2441, "step": 4616 }, { "epoch": 0.20180077800603174, "grad_norm": 4.71875, "learning_rate": 9.031639100468563e-05, "loss": 1.45, "step": 4617 }, { "epoch": 0.20184448621006162, "grad_norm": 2.40625, "learning_rate": 9.031232801919359e-05, "loss": 1.8462, "step": 4618 }, { "epoch": 0.20188819441409153, "grad_norm": 2.84375, "learning_rate": 9.030826427294924e-05, "loss": 2.1721, "step": 4619 }, { "epoch": 0.2019319026181214, "grad_norm": 2.15625, "learning_rate": 9.03041997660293e-05, "loss": 1.9742, "step": 4620 }, { "epoch": 0.20197561082215132, "grad_norm": 2.71875, "learning_rate": 9.030013449851045e-05, "loss": 1.8034, "step": 4621 }, { "epoch": 0.20201931902618123, "grad_norm": 2.375, "learning_rate": 9.029606847046941e-05, "loss": 2.3671, "step": 4622 }, { "epoch": 0.2020630272302111, "grad_norm": 2.59375, "learning_rate": 9.029200168198289e-05, "loss": 2.5159, "step": 4623 }, { "epoch": 0.202106735434241, "grad_norm": 2.90625, "learning_rate": 9.028793413312767e-05, "loss": 2.3314, "step": 4624 }, { "epoch": 0.2021504436382709, "grad_norm": 2.265625, "learning_rate": 9.02838658239805e-05, "loss": 1.9898, "step": 4625 }, { "epoch": 0.2021941518423008, "grad_norm": 2.5, "learning_rate": 9.027979675461814e-05, "loss": 2.2, "step": 4626 }, { "epoch": 0.2022378600463307, "grad_norm": 2.125, "learning_rate": 9.027572692511739e-05, "loss": 2.3884, "step": 4627 }, { "epoch": 0.2022815682503606, "grad_norm": 2.59375, "learning_rate": 9.027165633555507e-05, "loss": 2.0405, "step": 4628 }, { "epoch": 0.2023252764543905, "grad_norm": 2.453125, "learning_rate": 9.026758498600797e-05, "loss": 2.6007, "step": 4629 }, { "epoch": 0.20236898465842038, "grad_norm": 2.5625, "learning_rate": 9.026351287655294e-05, "loss": 2.4496, "step": 4630 }, { "epoch": 0.20241269286245028, "grad_norm": 2.921875, "learning_rate": 9.02594400072668e-05, "loss": 2.2956, "step": 4631 }, { "epoch": 0.2024564010664802, "grad_norm": 2.265625, "learning_rate": 9.025536637822647e-05, "loss": 1.6348, "step": 4632 }, { "epoch": 0.20250010927051007, "grad_norm": 2.234375, "learning_rate": 9.025129198950877e-05, "loss": 2.1001, "step": 4633 }, { "epoch": 0.20254381747453998, "grad_norm": 2.53125, "learning_rate": 9.02472168411906e-05, "loss": 1.516, "step": 4634 }, { "epoch": 0.20258752567856986, "grad_norm": 2.484375, "learning_rate": 9.024314093334886e-05, "loss": 2.2612, "step": 4635 }, { "epoch": 0.20263123388259976, "grad_norm": 2.171875, "learning_rate": 9.02390642660605e-05, "loss": 1.5756, "step": 4636 }, { "epoch": 0.20267494208662967, "grad_norm": 2.453125, "learning_rate": 9.023498683940243e-05, "loss": 1.7007, "step": 4637 }, { "epoch": 0.20271865029065955, "grad_norm": 2.34375, "learning_rate": 9.02309086534516e-05, "loss": 1.9744, "step": 4638 }, { "epoch": 0.20276235849468946, "grad_norm": 2.53125, "learning_rate": 9.022682970828497e-05, "loss": 2.396, "step": 4639 }, { "epoch": 0.20280606669871934, "grad_norm": 1.953125, "learning_rate": 9.022275000397951e-05, "loss": 1.851, "step": 4640 }, { "epoch": 0.20284977490274925, "grad_norm": 2.390625, "learning_rate": 9.021866954061223e-05, "loss": 1.8836, "step": 4641 }, { "epoch": 0.20289348310677915, "grad_norm": 2.625, "learning_rate": 9.02145883182601e-05, "loss": 2.2614, "step": 4642 }, { "epoch": 0.20293719131080903, "grad_norm": 3.046875, "learning_rate": 9.021050633700018e-05, "loss": 2.3415, "step": 4643 }, { "epoch": 0.20298089951483894, "grad_norm": 2.75, "learning_rate": 9.020642359690947e-05, "loss": 1.7504, "step": 4644 }, { "epoch": 0.20302460771886882, "grad_norm": 2.484375, "learning_rate": 9.020234009806503e-05, "loss": 1.7237, "step": 4645 }, { "epoch": 0.20306831592289873, "grad_norm": 2.453125, "learning_rate": 9.01982558405439e-05, "loss": 2.115, "step": 4646 }, { "epoch": 0.20311202412692864, "grad_norm": 2.875, "learning_rate": 9.019417082442321e-05, "loss": 2.3209, "step": 4647 }, { "epoch": 0.20315573233095852, "grad_norm": 2.421875, "learning_rate": 9.019008504978e-05, "loss": 2.3012, "step": 4648 }, { "epoch": 0.20319944053498842, "grad_norm": 2.484375, "learning_rate": 9.01859985166914e-05, "loss": 1.8486, "step": 4649 }, { "epoch": 0.2032431487390183, "grad_norm": 2.171875, "learning_rate": 9.018191122523452e-05, "loss": 1.7963, "step": 4650 }, { "epoch": 0.2032868569430482, "grad_norm": 2.34375, "learning_rate": 9.017782317548649e-05, "loss": 1.8977, "step": 4651 }, { "epoch": 0.20333056514707812, "grad_norm": 2.234375, "learning_rate": 9.017373436752445e-05, "loss": 2.0669, "step": 4652 }, { "epoch": 0.203374273351108, "grad_norm": 2.21875, "learning_rate": 9.016964480142557e-05, "loss": 2.3295, "step": 4653 }, { "epoch": 0.2034179815551379, "grad_norm": 2.140625, "learning_rate": 9.016555447726704e-05, "loss": 1.9676, "step": 4654 }, { "epoch": 0.20346168975916779, "grad_norm": 3.921875, "learning_rate": 9.016146339512606e-05, "loss": 1.961, "step": 4655 }, { "epoch": 0.2035053979631977, "grad_norm": 2.5, "learning_rate": 9.015737155507978e-05, "loss": 2.3247, "step": 4656 }, { "epoch": 0.2035491061672276, "grad_norm": 2.046875, "learning_rate": 9.015327895720547e-05, "loss": 1.9737, "step": 4657 }, { "epoch": 0.20359281437125748, "grad_norm": 2.125, "learning_rate": 9.014918560158035e-05, "loss": 2.0781, "step": 4658 }, { "epoch": 0.2036365225752874, "grad_norm": 2.53125, "learning_rate": 9.014509148828164e-05, "loss": 2.504, "step": 4659 }, { "epoch": 0.20368023077931727, "grad_norm": 2.171875, "learning_rate": 9.014099661738664e-05, "loss": 1.7876, "step": 4660 }, { "epoch": 0.20372393898334717, "grad_norm": 2.421875, "learning_rate": 9.01369009889726e-05, "loss": 1.7947, "step": 4661 }, { "epoch": 0.20376764718737708, "grad_norm": 3.546875, "learning_rate": 9.013280460311684e-05, "loss": 3.0699, "step": 4662 }, { "epoch": 0.20381135539140696, "grad_norm": 2.671875, "learning_rate": 9.012870745989663e-05, "loss": 2.0769, "step": 4663 }, { "epoch": 0.20385506359543687, "grad_norm": 3.78125, "learning_rate": 9.012460955938933e-05, "loss": 1.6031, "step": 4664 }, { "epoch": 0.20389877179946675, "grad_norm": 2.296875, "learning_rate": 9.012051090167222e-05, "loss": 2.1118, "step": 4665 }, { "epoch": 0.20394248000349666, "grad_norm": 4.28125, "learning_rate": 9.01164114868227e-05, "loss": 1.6598, "step": 4666 }, { "epoch": 0.20398618820752656, "grad_norm": 2.25, "learning_rate": 9.011231131491808e-05, "loss": 2.263, "step": 4667 }, { "epoch": 0.20402989641155644, "grad_norm": 2.59375, "learning_rate": 9.01082103860358e-05, "loss": 1.7258, "step": 4668 }, { "epoch": 0.20407360461558635, "grad_norm": 2.625, "learning_rate": 9.010410870025319e-05, "loss": 2.1402, "step": 4669 }, { "epoch": 0.20411731281961623, "grad_norm": 1.9140625, "learning_rate": 9.01000062576477e-05, "loss": 1.9602, "step": 4670 }, { "epoch": 0.20416102102364614, "grad_norm": 3.6875, "learning_rate": 9.009590305829672e-05, "loss": 2.8867, "step": 4671 }, { "epoch": 0.20420472922767605, "grad_norm": 2.296875, "learning_rate": 9.009179910227768e-05, "loss": 1.8176, "step": 4672 }, { "epoch": 0.20424843743170593, "grad_norm": 2.53125, "learning_rate": 9.008769438966805e-05, "loss": 2.1523, "step": 4673 }, { "epoch": 0.20429214563573583, "grad_norm": 1.953125, "learning_rate": 9.008358892054528e-05, "loss": 1.8026, "step": 4674 }, { "epoch": 0.2043358538397657, "grad_norm": 2.203125, "learning_rate": 9.007948269498685e-05, "loss": 1.8456, "step": 4675 }, { "epoch": 0.20437956204379562, "grad_norm": 2.609375, "learning_rate": 9.007537571307025e-05, "loss": 1.8779, "step": 4676 }, { "epoch": 0.20442327024782553, "grad_norm": 2.78125, "learning_rate": 9.007126797487298e-05, "loss": 2.1508, "step": 4677 }, { "epoch": 0.2044669784518554, "grad_norm": 2.46875, "learning_rate": 9.006715948047257e-05, "loss": 2.1961, "step": 4678 }, { "epoch": 0.20451068665588532, "grad_norm": 2.15625, "learning_rate": 9.006305022994654e-05, "loss": 1.837, "step": 4679 }, { "epoch": 0.2045543948599152, "grad_norm": 2.984375, "learning_rate": 9.005894022337245e-05, "loss": 2.0313, "step": 4680 }, { "epoch": 0.2045981030639451, "grad_norm": 6.875, "learning_rate": 9.005482946082784e-05, "loss": 1.9768, "step": 4681 }, { "epoch": 0.204641811267975, "grad_norm": 2.09375, "learning_rate": 9.00507179423903e-05, "loss": 1.8533, "step": 4682 }, { "epoch": 0.2046855194720049, "grad_norm": 2.234375, "learning_rate": 9.004660566813744e-05, "loss": 1.7265, "step": 4683 }, { "epoch": 0.2047292276760348, "grad_norm": 2.328125, "learning_rate": 9.004249263814683e-05, "loss": 1.9114, "step": 4684 }, { "epoch": 0.20477293588006468, "grad_norm": 2.03125, "learning_rate": 9.003837885249612e-05, "loss": 1.8118, "step": 4685 }, { "epoch": 0.20481664408409458, "grad_norm": 4.09375, "learning_rate": 9.003426431126291e-05, "loss": 1.7946, "step": 4686 }, { "epoch": 0.2048603522881245, "grad_norm": 3.328125, "learning_rate": 9.003014901452488e-05, "loss": 2.2468, "step": 4687 }, { "epoch": 0.20490406049215437, "grad_norm": 2.34375, "learning_rate": 9.002603296235967e-05, "loss": 1.8963, "step": 4688 }, { "epoch": 0.20494776869618428, "grad_norm": 2.203125, "learning_rate": 9.002191615484496e-05, "loss": 2.3479, "step": 4689 }, { "epoch": 0.20499147690021416, "grad_norm": 1.8828125, "learning_rate": 9.001779859205845e-05, "loss": 1.614, "step": 4690 }, { "epoch": 0.20503518510424407, "grad_norm": 1.9296875, "learning_rate": 9.001368027407784e-05, "loss": 1.6517, "step": 4691 }, { "epoch": 0.20507889330827397, "grad_norm": 2.4375, "learning_rate": 9.000956120098084e-05, "loss": 2.1384, "step": 4692 }, { "epoch": 0.20512260151230385, "grad_norm": 1.9609375, "learning_rate": 9.000544137284519e-05, "loss": 1.7031, "step": 4693 }, { "epoch": 0.20516630971633376, "grad_norm": 2.015625, "learning_rate": 9.000132078974863e-05, "loss": 1.6414, "step": 4694 }, { "epoch": 0.20521001792036364, "grad_norm": 2.265625, "learning_rate": 8.999719945176894e-05, "loss": 2.316, "step": 4695 }, { "epoch": 0.20525372612439355, "grad_norm": 2.8125, "learning_rate": 8.999307735898387e-05, "loss": 1.9569, "step": 4696 }, { "epoch": 0.20529743432842346, "grad_norm": 2.296875, "learning_rate": 8.998895451147125e-05, "loss": 1.6369, "step": 4697 }, { "epoch": 0.20534114253245334, "grad_norm": 2.25, "learning_rate": 8.998483090930883e-05, "loss": 1.8258, "step": 4698 }, { "epoch": 0.20538485073648324, "grad_norm": 2.375, "learning_rate": 8.998070655257447e-05, "loss": 2.0003, "step": 4699 }, { "epoch": 0.20542855894051312, "grad_norm": 1.9765625, "learning_rate": 8.997658144134598e-05, "loss": 1.6348, "step": 4700 }, { "epoch": 0.20547226714454303, "grad_norm": 2.15625, "learning_rate": 8.997245557570121e-05, "loss": 1.7787, "step": 4701 }, { "epoch": 0.20551597534857294, "grad_norm": 2.15625, "learning_rate": 8.996832895571803e-05, "loss": 1.9775, "step": 4702 }, { "epoch": 0.20555968355260282, "grad_norm": 2.296875, "learning_rate": 8.996420158147431e-05, "loss": 1.8802, "step": 4703 }, { "epoch": 0.20560339175663273, "grad_norm": 29.75, "learning_rate": 8.996007345304795e-05, "loss": 3.9084, "step": 4704 }, { "epoch": 0.2056470999606626, "grad_norm": 18.125, "learning_rate": 8.995594457051684e-05, "loss": 1.2003, "step": 4705 }, { "epoch": 0.2056908081646925, "grad_norm": 2.75, "learning_rate": 8.99518149339589e-05, "loss": 2.1783, "step": 4706 }, { "epoch": 0.20573451636872242, "grad_norm": 2.75, "learning_rate": 8.994768454345206e-05, "loss": 1.7874, "step": 4707 }, { "epoch": 0.2057782245727523, "grad_norm": 2.109375, "learning_rate": 8.994355339907429e-05, "loss": 2.057, "step": 4708 }, { "epoch": 0.2058219327767822, "grad_norm": 2.734375, "learning_rate": 8.993942150090352e-05, "loss": 2.2157, "step": 4709 }, { "epoch": 0.2058656409808121, "grad_norm": 2.703125, "learning_rate": 8.993528884901774e-05, "loss": 2.5268, "step": 4710 }, { "epoch": 0.205909349184842, "grad_norm": 2.796875, "learning_rate": 8.993115544349493e-05, "loss": 2.3586, "step": 4711 }, { "epoch": 0.2059530573888719, "grad_norm": 2.28125, "learning_rate": 8.992702128441311e-05, "loss": 1.8512, "step": 4712 }, { "epoch": 0.20599676559290178, "grad_norm": 2.140625, "learning_rate": 8.992288637185028e-05, "loss": 1.7177, "step": 4713 }, { "epoch": 0.2060404737969317, "grad_norm": 2.21875, "learning_rate": 8.991875070588447e-05, "loss": 1.7863, "step": 4714 }, { "epoch": 0.20608418200096157, "grad_norm": 2.140625, "learning_rate": 8.991461428659375e-05, "loss": 1.9259, "step": 4715 }, { "epoch": 0.20612789020499148, "grad_norm": 2.515625, "learning_rate": 8.991047711405617e-05, "loss": 1.8682, "step": 4716 }, { "epoch": 0.20617159840902138, "grad_norm": 2.75, "learning_rate": 8.990633918834979e-05, "loss": 1.8183, "step": 4717 }, { "epoch": 0.20621530661305126, "grad_norm": 3.34375, "learning_rate": 8.990220050955271e-05, "loss": 2.2547, "step": 4718 }, { "epoch": 0.20625901481708117, "grad_norm": 2.734375, "learning_rate": 8.989806107774304e-05, "loss": 1.9872, "step": 4719 }, { "epoch": 0.20630272302111105, "grad_norm": 2.765625, "learning_rate": 8.989392089299888e-05, "loss": 1.8414, "step": 4720 }, { "epoch": 0.20634643122514096, "grad_norm": 2.5625, "learning_rate": 8.988977995539837e-05, "loss": 2.3367, "step": 4721 }, { "epoch": 0.20639013942917087, "grad_norm": 2.78125, "learning_rate": 8.988563826501965e-05, "loss": 2.3742, "step": 4722 }, { "epoch": 0.20643384763320075, "grad_norm": 2.59375, "learning_rate": 8.98814958219409e-05, "loss": 2.4695, "step": 4723 }, { "epoch": 0.20647755583723065, "grad_norm": 2.53125, "learning_rate": 8.987735262624027e-05, "loss": 2.1642, "step": 4724 }, { "epoch": 0.20652126404126053, "grad_norm": 1.84375, "learning_rate": 8.987320867799594e-05, "loss": 1.6123, "step": 4725 }, { "epoch": 0.20656497224529044, "grad_norm": 2.84375, "learning_rate": 8.986906397728616e-05, "loss": 1.9332, "step": 4726 }, { "epoch": 0.20660868044932035, "grad_norm": 4.75, "learning_rate": 8.98649185241891e-05, "loss": 1.7614, "step": 4727 }, { "epoch": 0.20665238865335023, "grad_norm": 3.109375, "learning_rate": 8.9860772318783e-05, "loss": 2.1411, "step": 4728 }, { "epoch": 0.20669609685738013, "grad_norm": 2.265625, "learning_rate": 8.985662536114613e-05, "loss": 1.8783, "step": 4729 }, { "epoch": 0.20673980506141001, "grad_norm": 2.609375, "learning_rate": 8.985247765135672e-05, "loss": 1.7308, "step": 4730 }, { "epoch": 0.20678351326543992, "grad_norm": 2.515625, "learning_rate": 8.984832918949306e-05, "loss": 2.3303, "step": 4731 }, { "epoch": 0.20682722146946983, "grad_norm": 1.8515625, "learning_rate": 8.984417997563342e-05, "loss": 1.6741, "step": 4732 }, { "epoch": 0.2068709296734997, "grad_norm": 2.484375, "learning_rate": 8.984003000985613e-05, "loss": 1.9943, "step": 4733 }, { "epoch": 0.20691463787752962, "grad_norm": 2.40625, "learning_rate": 8.983587929223948e-05, "loss": 2.332, "step": 4734 }, { "epoch": 0.2069583460815595, "grad_norm": 2.53125, "learning_rate": 8.98317278228618e-05, "loss": 2.201, "step": 4735 }, { "epoch": 0.2070020542855894, "grad_norm": 2.28125, "learning_rate": 8.982757560180146e-05, "loss": 1.8238, "step": 4736 }, { "epoch": 0.2070457624896193, "grad_norm": 2.546875, "learning_rate": 8.982342262913679e-05, "loss": 1.7964, "step": 4737 }, { "epoch": 0.2070894706936492, "grad_norm": 2.21875, "learning_rate": 8.981926890494619e-05, "loss": 1.4018, "step": 4738 }, { "epoch": 0.2071331788976791, "grad_norm": 2.75, "learning_rate": 8.981511442930802e-05, "loss": 1.6008, "step": 4739 }, { "epoch": 0.20717688710170898, "grad_norm": 2.375, "learning_rate": 8.98109592023007e-05, "loss": 2.0941, "step": 4740 }, { "epoch": 0.20722059530573889, "grad_norm": 2.234375, "learning_rate": 8.980680322400264e-05, "loss": 1.9683, "step": 4741 }, { "epoch": 0.2072643035097688, "grad_norm": 2.0, "learning_rate": 8.980264649449225e-05, "loss": 1.7002, "step": 4742 }, { "epoch": 0.20730801171379867, "grad_norm": 6.375, "learning_rate": 8.9798489013848e-05, "loss": 1.7523, "step": 4743 }, { "epoch": 0.20735171991782858, "grad_norm": 2.5625, "learning_rate": 8.979433078214834e-05, "loss": 3.0292, "step": 4744 }, { "epoch": 0.20739542812185846, "grad_norm": 2.078125, "learning_rate": 8.979017179947174e-05, "loss": 2.0299, "step": 4745 }, { "epoch": 0.20743913632588837, "grad_norm": 2.890625, "learning_rate": 8.978601206589669e-05, "loss": 2.5303, "step": 4746 }, { "epoch": 0.20748284452991828, "grad_norm": 2.84375, "learning_rate": 8.978185158150167e-05, "loss": 1.5541, "step": 4747 }, { "epoch": 0.20752655273394816, "grad_norm": 2.4375, "learning_rate": 8.977769034636523e-05, "loss": 1.9323, "step": 4748 }, { "epoch": 0.20757026093797806, "grad_norm": 2.265625, "learning_rate": 8.977352836056587e-05, "loss": 1.5497, "step": 4749 }, { "epoch": 0.20761396914200794, "grad_norm": 2.25, "learning_rate": 8.976936562418215e-05, "loss": 2.4203, "step": 4750 }, { "epoch": 0.20765767734603785, "grad_norm": 2.078125, "learning_rate": 8.97652021372926e-05, "loss": 1.8565, "step": 4751 }, { "epoch": 0.20770138555006776, "grad_norm": 2.015625, "learning_rate": 8.976103789997582e-05, "loss": 1.4862, "step": 4752 }, { "epoch": 0.20774509375409764, "grad_norm": 5.6875, "learning_rate": 8.975687291231041e-05, "loss": 2.5187, "step": 4753 }, { "epoch": 0.20778880195812754, "grad_norm": 4.03125, "learning_rate": 8.975270717437492e-05, "loss": 2.9507, "step": 4754 }, { "epoch": 0.20783251016215742, "grad_norm": 3.1875, "learning_rate": 8.974854068624798e-05, "loss": 1.4123, "step": 4755 }, { "epoch": 0.20787621836618733, "grad_norm": 2.640625, "learning_rate": 8.974437344800825e-05, "loss": 2.8158, "step": 4756 }, { "epoch": 0.20791992657021724, "grad_norm": 2.4375, "learning_rate": 8.974020545973433e-05, "loss": 1.6974, "step": 4757 }, { "epoch": 0.20796363477424712, "grad_norm": 2.609375, "learning_rate": 8.97360367215049e-05, "loss": 1.9125, "step": 4758 }, { "epoch": 0.20800734297827703, "grad_norm": 2.3125, "learning_rate": 8.973186723339863e-05, "loss": 2.0247, "step": 4759 }, { "epoch": 0.2080510511823069, "grad_norm": 2.796875, "learning_rate": 8.972769699549419e-05, "loss": 1.7753, "step": 4760 }, { "epoch": 0.20809475938633681, "grad_norm": 2.6875, "learning_rate": 8.972352600787028e-05, "loss": 2.3908, "step": 4761 }, { "epoch": 0.20813846759036672, "grad_norm": 2.4375, "learning_rate": 8.971935427060562e-05, "loss": 2.2623, "step": 4762 }, { "epoch": 0.2081821757943966, "grad_norm": 2.703125, "learning_rate": 8.971518178377895e-05, "loss": 1.909, "step": 4763 }, { "epoch": 0.2082258839984265, "grad_norm": 3.125, "learning_rate": 8.9711008547469e-05, "loss": 1.8331, "step": 4764 }, { "epoch": 0.2082695922024564, "grad_norm": 3.140625, "learning_rate": 8.970683456175451e-05, "loss": 1.4373, "step": 4765 }, { "epoch": 0.2083133004064863, "grad_norm": 2.09375, "learning_rate": 8.970265982671427e-05, "loss": 2.1941, "step": 4766 }, { "epoch": 0.2083570086105162, "grad_norm": 3.3125, "learning_rate": 8.969848434242705e-05, "loss": 2.371, "step": 4767 }, { "epoch": 0.20840071681454608, "grad_norm": 2.421875, "learning_rate": 8.969430810897166e-05, "loss": 2.0571, "step": 4768 }, { "epoch": 0.208444425018576, "grad_norm": 2.15625, "learning_rate": 8.969013112642689e-05, "loss": 1.9345, "step": 4769 }, { "epoch": 0.20848813322260587, "grad_norm": 2.3125, "learning_rate": 8.968595339487157e-05, "loss": 1.3695, "step": 4770 }, { "epoch": 0.20853184142663578, "grad_norm": 2.21875, "learning_rate": 8.968177491438457e-05, "loss": 1.992, "step": 4771 }, { "epoch": 0.20857554963066569, "grad_norm": 2.28125, "learning_rate": 8.967759568504472e-05, "loss": 2.2224, "step": 4772 }, { "epoch": 0.20861925783469557, "grad_norm": 2.359375, "learning_rate": 8.967341570693088e-05, "loss": 1.8914, "step": 4773 }, { "epoch": 0.20866296603872547, "grad_norm": 1.90625, "learning_rate": 8.966923498012196e-05, "loss": 1.6996, "step": 4774 }, { "epoch": 0.20870667424275535, "grad_norm": 3.234375, "learning_rate": 8.966505350469682e-05, "loss": 2.8157, "step": 4775 }, { "epoch": 0.20875038244678526, "grad_norm": 2.4375, "learning_rate": 8.966087128073441e-05, "loss": 1.5552, "step": 4776 }, { "epoch": 0.20879409065081517, "grad_norm": 1.953125, "learning_rate": 8.965668830831364e-05, "loss": 1.7169, "step": 4777 }, { "epoch": 0.20883779885484505, "grad_norm": 2.546875, "learning_rate": 8.965250458751343e-05, "loss": 2.0599, "step": 4778 }, { "epoch": 0.20888150705887495, "grad_norm": 3.125, "learning_rate": 8.964832011841275e-05, "loss": 2.4304, "step": 4779 }, { "epoch": 0.20892521526290483, "grad_norm": 2.234375, "learning_rate": 8.964413490109055e-05, "loss": 1.6481, "step": 4780 }, { "epoch": 0.20896892346693474, "grad_norm": 2.203125, "learning_rate": 8.963994893562585e-05, "loss": 2.0656, "step": 4781 }, { "epoch": 0.20901263167096465, "grad_norm": 2.25, "learning_rate": 8.96357622220976e-05, "loss": 1.93, "step": 4782 }, { "epoch": 0.20905633987499453, "grad_norm": 2.78125, "learning_rate": 8.963157476058485e-05, "loss": 2.4348, "step": 4783 }, { "epoch": 0.20910004807902444, "grad_norm": 2.453125, "learning_rate": 8.962738655116658e-05, "loss": 2.1908, "step": 4784 }, { "epoch": 0.20914375628305432, "grad_norm": 2.0625, "learning_rate": 8.962319759392188e-05, "loss": 1.6403, "step": 4785 }, { "epoch": 0.20918746448708422, "grad_norm": 20.5, "learning_rate": 8.961900788892974e-05, "loss": 1.6297, "step": 4786 }, { "epoch": 0.20923117269111413, "grad_norm": 2.59375, "learning_rate": 8.961481743626928e-05, "loss": 2.1056, "step": 4787 }, { "epoch": 0.209274880895144, "grad_norm": 2.15625, "learning_rate": 8.961062623601955e-05, "loss": 1.9578, "step": 4788 }, { "epoch": 0.20931858909917392, "grad_norm": 1.9375, "learning_rate": 8.960643428825966e-05, "loss": 1.7978, "step": 4789 }, { "epoch": 0.2093622973032038, "grad_norm": 2.265625, "learning_rate": 8.96022415930687e-05, "loss": 1.7017, "step": 4790 }, { "epoch": 0.2094060055072337, "grad_norm": 2.0, "learning_rate": 8.959804815052582e-05, "loss": 1.9484, "step": 4791 }, { "epoch": 0.2094497137112636, "grad_norm": 2.5625, "learning_rate": 8.959385396071012e-05, "loss": 1.9809, "step": 4792 }, { "epoch": 0.2094934219152935, "grad_norm": 2.03125, "learning_rate": 8.958965902370078e-05, "loss": 1.4298, "step": 4793 }, { "epoch": 0.2095371301193234, "grad_norm": 2.375, "learning_rate": 8.958546333957694e-05, "loss": 1.766, "step": 4794 }, { "epoch": 0.20958083832335328, "grad_norm": 2.265625, "learning_rate": 8.958126690841781e-05, "loss": 2.1259, "step": 4795 }, { "epoch": 0.2096245465273832, "grad_norm": 2.1875, "learning_rate": 8.957706973030256e-05, "loss": 1.6469, "step": 4796 }, { "epoch": 0.2096682547314131, "grad_norm": 2.3125, "learning_rate": 8.95728718053104e-05, "loss": 1.6772, "step": 4797 }, { "epoch": 0.20971196293544297, "grad_norm": 3.015625, "learning_rate": 8.956867313352056e-05, "loss": 2.1811, "step": 4798 }, { "epoch": 0.20975567113947288, "grad_norm": 2.609375, "learning_rate": 8.956447371501227e-05, "loss": 2.0043, "step": 4799 }, { "epoch": 0.20979937934350276, "grad_norm": 2.9375, "learning_rate": 8.956027354986476e-05, "loss": 2.1774, "step": 4800 }, { "epoch": 0.20984308754753267, "grad_norm": 2.15625, "learning_rate": 8.955607263815732e-05, "loss": 1.8232, "step": 4801 }, { "epoch": 0.20988679575156258, "grad_norm": 2.578125, "learning_rate": 8.955187097996922e-05, "loss": 2.3588, "step": 4802 }, { "epoch": 0.20993050395559246, "grad_norm": 2.296875, "learning_rate": 8.954766857537975e-05, "loss": 1.8239, "step": 4803 }, { "epoch": 0.20997421215962236, "grad_norm": 12.1875, "learning_rate": 8.95434654244682e-05, "loss": 1.6482, "step": 4804 }, { "epoch": 0.21001792036365224, "grad_norm": 2.25, "learning_rate": 8.953926152731394e-05, "loss": 2.0697, "step": 4805 }, { "epoch": 0.21006162856768215, "grad_norm": 2.3125, "learning_rate": 8.953505688399624e-05, "loss": 2.2319, "step": 4806 }, { "epoch": 0.21010533677171206, "grad_norm": 2.1875, "learning_rate": 8.95308514945945e-05, "loss": 1.8125, "step": 4807 }, { "epoch": 0.21014904497574194, "grad_norm": 5.875, "learning_rate": 8.952664535918803e-05, "loss": 1.6745, "step": 4808 }, { "epoch": 0.21019275317977185, "grad_norm": 2.359375, "learning_rate": 8.952243847785624e-05, "loss": 2.3218, "step": 4809 }, { "epoch": 0.21023646138380173, "grad_norm": 2.6875, "learning_rate": 8.951823085067852e-05, "loss": 2.0469, "step": 4810 }, { "epoch": 0.21028016958783163, "grad_norm": 2.484375, "learning_rate": 8.951402247773428e-05, "loss": 2.3366, "step": 4811 }, { "epoch": 0.21032387779186154, "grad_norm": 2.234375, "learning_rate": 8.950981335910291e-05, "loss": 1.7435, "step": 4812 }, { "epoch": 0.21036758599589142, "grad_norm": 2.328125, "learning_rate": 8.950560349486386e-05, "loss": 1.5361, "step": 4813 }, { "epoch": 0.21041129419992133, "grad_norm": 2.875, "learning_rate": 8.950139288509658e-05, "loss": 2.4291, "step": 4814 }, { "epoch": 0.2104550024039512, "grad_norm": 2.203125, "learning_rate": 8.949718152988051e-05, "loss": 2.2256, "step": 4815 }, { "epoch": 0.21049871060798112, "grad_norm": 2.25, "learning_rate": 8.949296942929514e-05, "loss": 1.9177, "step": 4816 }, { "epoch": 0.21054241881201102, "grad_norm": 3.703125, "learning_rate": 8.948875658341997e-05, "loss": 2.6135, "step": 4817 }, { "epoch": 0.2105861270160409, "grad_norm": 2.46875, "learning_rate": 8.94845429923345e-05, "loss": 2.557, "step": 4818 }, { "epoch": 0.2106298352200708, "grad_norm": 2.625, "learning_rate": 8.948032865611822e-05, "loss": 2.2347, "step": 4819 }, { "epoch": 0.2106735434241007, "grad_norm": 2.671875, "learning_rate": 8.947611357485068e-05, "loss": 2.5274, "step": 4820 }, { "epoch": 0.2107172516281306, "grad_norm": 2.8125, "learning_rate": 8.947189774861142e-05, "loss": 1.4707, "step": 4821 }, { "epoch": 0.2107609598321605, "grad_norm": 5.3125, "learning_rate": 8.946768117748001e-05, "loss": 1.3507, "step": 4822 }, { "epoch": 0.21080466803619038, "grad_norm": 2.40625, "learning_rate": 8.946346386153601e-05, "loss": 2.4313, "step": 4823 }, { "epoch": 0.2108483762402203, "grad_norm": 1.9921875, "learning_rate": 8.945924580085901e-05, "loss": 1.7597, "step": 4824 }, { "epoch": 0.2108920844442502, "grad_norm": 2.71875, "learning_rate": 8.945502699552862e-05, "loss": 1.9471, "step": 4825 }, { "epoch": 0.21093579264828008, "grad_norm": 2.390625, "learning_rate": 8.945080744562442e-05, "loss": 2.3347, "step": 4826 }, { "epoch": 0.21097950085231, "grad_norm": 2.5625, "learning_rate": 8.944658715122609e-05, "loss": 2.9457, "step": 4827 }, { "epoch": 0.21102320905633987, "grad_norm": 2.234375, "learning_rate": 8.944236611241323e-05, "loss": 1.7127, "step": 4828 }, { "epoch": 0.21106691726036977, "grad_norm": 2.265625, "learning_rate": 8.943814432926553e-05, "loss": 2.4287, "step": 4829 }, { "epoch": 0.21111062546439968, "grad_norm": 2.09375, "learning_rate": 8.943392180186265e-05, "loss": 2.2223, "step": 4830 }, { "epoch": 0.21115433366842956, "grad_norm": 1.9765625, "learning_rate": 8.942969853028426e-05, "loss": 1.7299, "step": 4831 }, { "epoch": 0.21119804187245947, "grad_norm": 3.71875, "learning_rate": 8.942547451461008e-05, "loss": 1.8223, "step": 4832 }, { "epoch": 0.21124175007648935, "grad_norm": 2.265625, "learning_rate": 8.942124975491981e-05, "loss": 2.1694, "step": 4833 }, { "epoch": 0.21128545828051926, "grad_norm": 2.65625, "learning_rate": 8.94170242512932e-05, "loss": 1.9498, "step": 4834 }, { "epoch": 0.21132916648454916, "grad_norm": 2.734375, "learning_rate": 8.941279800380995e-05, "loss": 1.9834, "step": 4835 }, { "epoch": 0.21137287468857904, "grad_norm": 2.28125, "learning_rate": 8.940857101254985e-05, "loss": 2.5328, "step": 4836 }, { "epoch": 0.21141658289260895, "grad_norm": 2.59375, "learning_rate": 8.940434327759264e-05, "loss": 2.3951, "step": 4837 }, { "epoch": 0.21146029109663883, "grad_norm": 2.09375, "learning_rate": 8.940011479901816e-05, "loss": 2.0517, "step": 4838 }, { "epoch": 0.21150399930066874, "grad_norm": 2.25, "learning_rate": 8.939588557690614e-05, "loss": 2.1932, "step": 4839 }, { "epoch": 0.21154770750469865, "grad_norm": 3.671875, "learning_rate": 8.939165561133642e-05, "loss": 2.1056, "step": 4840 }, { "epoch": 0.21159141570872853, "grad_norm": 2.65625, "learning_rate": 8.938742490238884e-05, "loss": 2.1849, "step": 4841 }, { "epoch": 0.21163512391275843, "grad_norm": 2.78125, "learning_rate": 8.938319345014321e-05, "loss": 2.4694, "step": 4842 }, { "epoch": 0.2116788321167883, "grad_norm": 2.109375, "learning_rate": 8.937896125467942e-05, "loss": 1.8354, "step": 4843 }, { "epoch": 0.21172254032081822, "grad_norm": 3.0625, "learning_rate": 8.937472831607732e-05, "loss": 2.216, "step": 4844 }, { "epoch": 0.21176624852484813, "grad_norm": 2.171875, "learning_rate": 8.937049463441678e-05, "loss": 1.7439, "step": 4845 }, { "epoch": 0.211809956728878, "grad_norm": 2.203125, "learning_rate": 8.93662602097777e-05, "loss": 2.1265, "step": 4846 }, { "epoch": 0.21185366493290791, "grad_norm": 2.140625, "learning_rate": 8.936202504224e-05, "loss": 1.944, "step": 4847 }, { "epoch": 0.2118973731369378, "grad_norm": 2.15625, "learning_rate": 8.93577891318836e-05, "loss": 2.1815, "step": 4848 }, { "epoch": 0.2119410813409677, "grad_norm": 2.171875, "learning_rate": 8.935355247878842e-05, "loss": 2.2721, "step": 4849 }, { "epoch": 0.2119847895449976, "grad_norm": 2.546875, "learning_rate": 8.934931508303445e-05, "loss": 2.313, "step": 4850 }, { "epoch": 0.2120284977490275, "grad_norm": 2.328125, "learning_rate": 8.934507694470162e-05, "loss": 2.0388, "step": 4851 }, { "epoch": 0.2120722059530574, "grad_norm": 2.171875, "learning_rate": 8.934083806386995e-05, "loss": 1.6507, "step": 4852 }, { "epoch": 0.21211591415708728, "grad_norm": 1.96875, "learning_rate": 8.933659844061938e-05, "loss": 1.8782, "step": 4853 }, { "epoch": 0.21215962236111718, "grad_norm": 2.59375, "learning_rate": 8.933235807502996e-05, "loss": 1.8664, "step": 4854 }, { "epoch": 0.2122033305651471, "grad_norm": 2.40625, "learning_rate": 8.932811696718169e-05, "loss": 1.7721, "step": 4855 }, { "epoch": 0.21224703876917697, "grad_norm": 3.09375, "learning_rate": 8.93238751171546e-05, "loss": 1.5299, "step": 4856 }, { "epoch": 0.21229074697320688, "grad_norm": 1.9609375, "learning_rate": 8.931963252502878e-05, "loss": 1.7576, "step": 4857 }, { "epoch": 0.21233445517723676, "grad_norm": 2.28125, "learning_rate": 8.931538919088425e-05, "loss": 2.125, "step": 4858 }, { "epoch": 0.21237816338126667, "grad_norm": 2.203125, "learning_rate": 8.931114511480112e-05, "loss": 1.7627, "step": 4859 }, { "epoch": 0.21242187158529657, "grad_norm": 2.25, "learning_rate": 8.930690029685946e-05, "loss": 1.9135, "step": 4860 }, { "epoch": 0.21246557978932645, "grad_norm": 3.453125, "learning_rate": 8.930265473713938e-05, "loss": 2.209, "step": 4861 }, { "epoch": 0.21250928799335636, "grad_norm": 3.078125, "learning_rate": 8.9298408435721e-05, "loss": 1.8138, "step": 4862 }, { "epoch": 0.21255299619738624, "grad_norm": 2.59375, "learning_rate": 8.929416139268446e-05, "loss": 2.499, "step": 4863 }, { "epoch": 0.21259670440141615, "grad_norm": 2.546875, "learning_rate": 8.92899136081099e-05, "loss": 2.1922, "step": 4864 }, { "epoch": 0.21264041260544606, "grad_norm": 2.75, "learning_rate": 8.92856650820775e-05, "loss": 1.584, "step": 4865 }, { "epoch": 0.21268412080947594, "grad_norm": 2.375, "learning_rate": 8.928141581466742e-05, "loss": 1.9576, "step": 4866 }, { "epoch": 0.21272782901350584, "grad_norm": 2.09375, "learning_rate": 8.927716580595984e-05, "loss": 1.8619, "step": 4867 }, { "epoch": 0.21277153721753572, "grad_norm": 2.390625, "learning_rate": 8.9272915056035e-05, "loss": 2.3122, "step": 4868 }, { "epoch": 0.21281524542156563, "grad_norm": 2.65625, "learning_rate": 8.926866356497307e-05, "loss": 1.3723, "step": 4869 }, { "epoch": 0.21285895362559554, "grad_norm": 2.421875, "learning_rate": 8.92644113328543e-05, "loss": 1.8232, "step": 4870 }, { "epoch": 0.21290266182962542, "grad_norm": 2.171875, "learning_rate": 8.926015835975896e-05, "loss": 2.2606, "step": 4871 }, { "epoch": 0.21294637003365532, "grad_norm": 2.734375, "learning_rate": 8.925590464576727e-05, "loss": 1.2309, "step": 4872 }, { "epoch": 0.2129900782376852, "grad_norm": 2.09375, "learning_rate": 8.925165019095956e-05, "loss": 1.7613, "step": 4873 }, { "epoch": 0.2130337864417151, "grad_norm": 2.15625, "learning_rate": 8.924739499541606e-05, "loss": 1.8071, "step": 4874 }, { "epoch": 0.21307749464574502, "grad_norm": 5.375, "learning_rate": 8.924313905921709e-05, "loss": 2.6352, "step": 4875 }, { "epoch": 0.2131212028497749, "grad_norm": 3.25, "learning_rate": 8.923888238244298e-05, "loss": 2.1339, "step": 4876 }, { "epoch": 0.2131649110538048, "grad_norm": 3.484375, "learning_rate": 8.923462496517405e-05, "loss": 2.0866, "step": 4877 }, { "epoch": 0.2132086192578347, "grad_norm": 2.1875, "learning_rate": 8.923036680749064e-05, "loss": 1.9792, "step": 4878 }, { "epoch": 0.2132523274618646, "grad_norm": 3.359375, "learning_rate": 8.922610790947311e-05, "loss": 2.6828, "step": 4879 }, { "epoch": 0.2132960356658945, "grad_norm": 2.875, "learning_rate": 8.922184827120183e-05, "loss": 1.9287, "step": 4880 }, { "epoch": 0.21333974386992438, "grad_norm": 2.40625, "learning_rate": 8.92175878927572e-05, "loss": 2.0443, "step": 4881 }, { "epoch": 0.2133834520739543, "grad_norm": 3.375, "learning_rate": 8.921332677421961e-05, "loss": 3.1954, "step": 4882 }, { "epoch": 0.21342716027798417, "grad_norm": 2.453125, "learning_rate": 8.920906491566944e-05, "loss": 2.149, "step": 4883 }, { "epoch": 0.21347086848201408, "grad_norm": 2.28125, "learning_rate": 8.92048023171872e-05, "loss": 2.1222, "step": 4884 }, { "epoch": 0.21351457668604398, "grad_norm": 4.125, "learning_rate": 8.920053897885325e-05, "loss": 1.8552, "step": 4885 }, { "epoch": 0.21355828489007386, "grad_norm": 2.34375, "learning_rate": 8.919627490074807e-05, "loss": 1.9297, "step": 4886 }, { "epoch": 0.21360199309410377, "grad_norm": 2.203125, "learning_rate": 8.919201008295214e-05, "loss": 2.3196, "step": 4887 }, { "epoch": 0.21364570129813365, "grad_norm": 3.0, "learning_rate": 8.918774452554595e-05, "loss": 2.5424, "step": 4888 }, { "epoch": 0.21368940950216356, "grad_norm": 2.578125, "learning_rate": 8.918347822860997e-05, "loss": 3.2891, "step": 4889 }, { "epoch": 0.21373311770619347, "grad_norm": 2.375, "learning_rate": 8.917921119222474e-05, "loss": 1.8642, "step": 4890 }, { "epoch": 0.21377682591022334, "grad_norm": 2.46875, "learning_rate": 8.917494341647077e-05, "loss": 2.4109, "step": 4891 }, { "epoch": 0.21382053411425325, "grad_norm": 2.234375, "learning_rate": 8.917067490142858e-05, "loss": 1.8142, "step": 4892 }, { "epoch": 0.21386424231828313, "grad_norm": 1.8125, "learning_rate": 8.916640564717878e-05, "loss": 1.7431, "step": 4893 }, { "epoch": 0.21390795052231304, "grad_norm": 2.296875, "learning_rate": 8.916213565380188e-05, "loss": 1.7604, "step": 4894 }, { "epoch": 0.21395165872634295, "grad_norm": 2.1875, "learning_rate": 8.915786492137848e-05, "loss": 2.2038, "step": 4895 }, { "epoch": 0.21399536693037283, "grad_norm": 1.9609375, "learning_rate": 8.915359344998919e-05, "loss": 1.6342, "step": 4896 }, { "epoch": 0.21403907513440273, "grad_norm": 2.03125, "learning_rate": 8.91493212397146e-05, "loss": 1.4095, "step": 4897 }, { "epoch": 0.21408278333843261, "grad_norm": 2.140625, "learning_rate": 8.914504829063535e-05, "loss": 1.7478, "step": 4898 }, { "epoch": 0.21412649154246252, "grad_norm": 2.0625, "learning_rate": 8.914077460283205e-05, "loss": 1.538, "step": 4899 }, { "epoch": 0.21417019974649243, "grad_norm": 2.484375, "learning_rate": 8.913650017638537e-05, "loss": 2.1669, "step": 4900 }, { "epoch": 0.2142139079505223, "grad_norm": 2.4375, "learning_rate": 8.913222501137597e-05, "loss": 1.8726, "step": 4901 }, { "epoch": 0.21425761615455222, "grad_norm": 2.296875, "learning_rate": 8.912794910788453e-05, "loss": 2.4547, "step": 4902 }, { "epoch": 0.2143013243585821, "grad_norm": 2.4375, "learning_rate": 8.912367246599175e-05, "loss": 1.9188, "step": 4903 }, { "epoch": 0.214345032562612, "grad_norm": 2.203125, "learning_rate": 8.911939508577833e-05, "loss": 2.116, "step": 4904 }, { "epoch": 0.2143887407666419, "grad_norm": 2.359375, "learning_rate": 8.911511696732498e-05, "loss": 2.0171, "step": 4905 }, { "epoch": 0.2144324489706718, "grad_norm": 2.078125, "learning_rate": 8.911083811071244e-05, "loss": 2.2515, "step": 4906 }, { "epoch": 0.2144761571747017, "grad_norm": 2.21875, "learning_rate": 8.910655851602146e-05, "loss": 2.4468, "step": 4907 }, { "epoch": 0.21451986537873158, "grad_norm": 2.640625, "learning_rate": 8.910227818333282e-05, "loss": 2.6137, "step": 4908 }, { "epoch": 0.21456357358276149, "grad_norm": 2.453125, "learning_rate": 8.909799711272727e-05, "loss": 2.7769, "step": 4909 }, { "epoch": 0.2146072817867914, "grad_norm": 2.0625, "learning_rate": 8.909371530428561e-05, "loss": 1.7773, "step": 4910 }, { "epoch": 0.21465098999082127, "grad_norm": 2.34375, "learning_rate": 8.908943275808866e-05, "loss": 1.6918, "step": 4911 }, { "epoch": 0.21469469819485118, "grad_norm": 2.25, "learning_rate": 8.90851494742172e-05, "loss": 2.4438, "step": 4912 }, { "epoch": 0.21473840639888106, "grad_norm": 2.28125, "learning_rate": 8.908086545275209e-05, "loss": 2.461, "step": 4913 }, { "epoch": 0.21478211460291097, "grad_norm": 3.015625, "learning_rate": 8.907658069377418e-05, "loss": 1.8086, "step": 4914 }, { "epoch": 0.21482582280694087, "grad_norm": 3.59375, "learning_rate": 8.90722951973643e-05, "loss": 1.9816, "step": 4915 }, { "epoch": 0.21486953101097075, "grad_norm": 2.4375, "learning_rate": 8.906800896360336e-05, "loss": 2.106, "step": 4916 }, { "epoch": 0.21491323921500066, "grad_norm": 3.65625, "learning_rate": 8.906372199257223e-05, "loss": 1.485, "step": 4917 }, { "epoch": 0.21495694741903054, "grad_norm": 2.921875, "learning_rate": 8.90594342843518e-05, "loss": 3.2665, "step": 4918 }, { "epoch": 0.21500065562306045, "grad_norm": 2.34375, "learning_rate": 8.9055145839023e-05, "loss": 1.7803, "step": 4919 }, { "epoch": 0.21504436382709036, "grad_norm": 2.4375, "learning_rate": 8.905085665666674e-05, "loss": 2.6265, "step": 4920 }, { "epoch": 0.21508807203112024, "grad_norm": 2.28125, "learning_rate": 8.9046566737364e-05, "loss": 2.2594, "step": 4921 }, { "epoch": 0.21513178023515014, "grad_norm": 2.625, "learning_rate": 8.904227608119571e-05, "loss": 1.9987, "step": 4922 }, { "epoch": 0.21517548843918002, "grad_norm": 2.296875, "learning_rate": 8.903798468824286e-05, "loss": 2.3512, "step": 4923 }, { "epoch": 0.21521919664320993, "grad_norm": 2.53125, "learning_rate": 8.90336925585864e-05, "loss": 2.123, "step": 4924 }, { "epoch": 0.21526290484723984, "grad_norm": 2.515625, "learning_rate": 8.902939969230737e-05, "loss": 1.8981, "step": 4925 }, { "epoch": 0.21530661305126972, "grad_norm": 2.1875, "learning_rate": 8.902510608948676e-05, "loss": 2.0691, "step": 4926 }, { "epoch": 0.21535032125529963, "grad_norm": 2.4375, "learning_rate": 8.902081175020559e-05, "loss": 2.0388, "step": 4927 }, { "epoch": 0.2153940294593295, "grad_norm": 2.484375, "learning_rate": 8.901651667454492e-05, "loss": 1.8893, "step": 4928 }, { "epoch": 0.2154377376633594, "grad_norm": 1.984375, "learning_rate": 8.901222086258578e-05, "loss": 1.5117, "step": 4929 }, { "epoch": 0.21548144586738932, "grad_norm": 2.375, "learning_rate": 8.900792431440927e-05, "loss": 2.1315, "step": 4930 }, { "epoch": 0.2155251540714192, "grad_norm": 2.796875, "learning_rate": 8.900362703009644e-05, "loss": 1.8519, "step": 4931 }, { "epoch": 0.2155688622754491, "grad_norm": 2.125, "learning_rate": 8.89993290097284e-05, "loss": 2.1304, "step": 4932 }, { "epoch": 0.215612570479479, "grad_norm": 3.265625, "learning_rate": 8.899503025338627e-05, "loss": 1.9898, "step": 4933 }, { "epoch": 0.2156562786835089, "grad_norm": 2.09375, "learning_rate": 8.899073076115116e-05, "loss": 1.602, "step": 4934 }, { "epoch": 0.2156999868875388, "grad_norm": 2.125, "learning_rate": 8.898643053310422e-05, "loss": 1.7423, "step": 4935 }, { "epoch": 0.21574369509156868, "grad_norm": 2.09375, "learning_rate": 8.898212956932659e-05, "loss": 1.9523, "step": 4936 }, { "epoch": 0.2157874032955986, "grad_norm": 2.15625, "learning_rate": 8.897782786989944e-05, "loss": 1.8911, "step": 4937 }, { "epoch": 0.21583111149962847, "grad_norm": 2.3125, "learning_rate": 8.897352543490395e-05, "loss": 1.5745, "step": 4938 }, { "epoch": 0.21587481970365838, "grad_norm": 2.671875, "learning_rate": 8.89692222644213e-05, "loss": 2.2996, "step": 4939 }, { "epoch": 0.21591852790768828, "grad_norm": 2.609375, "learning_rate": 8.896491835853272e-05, "loss": 2.591, "step": 4940 }, { "epoch": 0.21596223611171816, "grad_norm": 2.921875, "learning_rate": 8.896061371731943e-05, "loss": 2.5338, "step": 4941 }, { "epoch": 0.21600594431574807, "grad_norm": 2.15625, "learning_rate": 8.895630834086264e-05, "loss": 1.6313, "step": 4942 }, { "epoch": 0.21604965251977795, "grad_norm": 2.640625, "learning_rate": 8.89520022292436e-05, "loss": 1.9688, "step": 4943 }, { "epoch": 0.21609336072380786, "grad_norm": 2.296875, "learning_rate": 8.894769538254362e-05, "loss": 2.121, "step": 4944 }, { "epoch": 0.21613706892783777, "grad_norm": 2.828125, "learning_rate": 8.894338780084392e-05, "loss": 1.9814, "step": 4945 }, { "epoch": 0.21618077713186765, "grad_norm": 2.09375, "learning_rate": 8.893907948422581e-05, "loss": 1.8047, "step": 4946 }, { "epoch": 0.21622448533589755, "grad_norm": 2.0625, "learning_rate": 8.893477043277061e-05, "loss": 2.0301, "step": 4947 }, { "epoch": 0.21626819353992743, "grad_norm": 2.015625, "learning_rate": 8.893046064655961e-05, "loss": 1.6069, "step": 4948 }, { "epoch": 0.21631190174395734, "grad_norm": 2.75, "learning_rate": 8.892615012567416e-05, "loss": 1.8555, "step": 4949 }, { "epoch": 0.21635560994798725, "grad_norm": 3.109375, "learning_rate": 8.892183887019562e-05, "loss": 2.1679, "step": 4950 }, { "epoch": 0.21639931815201713, "grad_norm": 2.796875, "learning_rate": 8.891752688020532e-05, "loss": 2.19, "step": 4951 }, { "epoch": 0.21644302635604704, "grad_norm": 2.609375, "learning_rate": 8.891321415578464e-05, "loss": 2.3977, "step": 4952 }, { "epoch": 0.21648673456007692, "grad_norm": 2.4375, "learning_rate": 8.890890069701499e-05, "loss": 1.8516, "step": 4953 }, { "epoch": 0.21653044276410682, "grad_norm": 2.328125, "learning_rate": 8.890458650397774e-05, "loss": 1.684, "step": 4954 }, { "epoch": 0.21657415096813673, "grad_norm": 2.421875, "learning_rate": 8.890027157675432e-05, "loss": 2.0473, "step": 4955 }, { "epoch": 0.2166178591721666, "grad_norm": 2.265625, "learning_rate": 8.889595591542617e-05, "loss": 1.8999, "step": 4956 }, { "epoch": 0.21666156737619652, "grad_norm": 2.234375, "learning_rate": 8.889163952007471e-05, "loss": 1.7533, "step": 4957 }, { "epoch": 0.2167052755802264, "grad_norm": 2.078125, "learning_rate": 8.888732239078141e-05, "loss": 1.7618, "step": 4958 }, { "epoch": 0.2167489837842563, "grad_norm": 3.234375, "learning_rate": 8.888300452762774e-05, "loss": 2.773, "step": 4959 }, { "epoch": 0.2167926919882862, "grad_norm": 2.0, "learning_rate": 8.887868593069519e-05, "loss": 1.9146, "step": 4960 }, { "epoch": 0.2168364001923161, "grad_norm": 3.125, "learning_rate": 8.887436660006525e-05, "loss": 1.0823, "step": 4961 }, { "epoch": 0.216880108396346, "grad_norm": 2.578125, "learning_rate": 8.887004653581942e-05, "loss": 1.4368, "step": 4962 }, { "epoch": 0.21692381660037588, "grad_norm": 2.3125, "learning_rate": 8.886572573803926e-05, "loss": 2.3, "step": 4963 }, { "epoch": 0.2169675248044058, "grad_norm": 2.125, "learning_rate": 8.886140420680627e-05, "loss": 1.8728, "step": 4964 }, { "epoch": 0.2170112330084357, "grad_norm": 2.8125, "learning_rate": 8.885708194220204e-05, "loss": 2.7391, "step": 4965 }, { "epoch": 0.21705494121246557, "grad_norm": 2.359375, "learning_rate": 8.88527589443081e-05, "loss": 2.1869, "step": 4966 }, { "epoch": 0.21709864941649548, "grad_norm": 1.8515625, "learning_rate": 8.884843521320606e-05, "loss": 1.664, "step": 4967 }, { "epoch": 0.21714235762052536, "grad_norm": 2.1875, "learning_rate": 8.884411074897751e-05, "loss": 1.9771, "step": 4968 }, { "epoch": 0.21718606582455527, "grad_norm": 3.765625, "learning_rate": 8.883978555170404e-05, "loss": 1.2271, "step": 4969 }, { "epoch": 0.21722977402858518, "grad_norm": 2.765625, "learning_rate": 8.883545962146731e-05, "loss": 2.1806, "step": 4970 }, { "epoch": 0.21727348223261506, "grad_norm": 3.4375, "learning_rate": 8.883113295834892e-05, "loss": 2.4585, "step": 4971 }, { "epoch": 0.21731719043664496, "grad_norm": 2.96875, "learning_rate": 8.882680556243054e-05, "loss": 1.8355, "step": 4972 }, { "epoch": 0.21736089864067484, "grad_norm": 17.5, "learning_rate": 8.882247743379383e-05, "loss": 4.6282, "step": 4973 }, { "epoch": 0.21740460684470475, "grad_norm": 1.9609375, "learning_rate": 8.881814857252046e-05, "loss": 1.9054, "step": 4974 }, { "epoch": 0.21744831504873466, "grad_norm": 2.796875, "learning_rate": 8.881381897869215e-05, "loss": 2.76, "step": 4975 }, { "epoch": 0.21749202325276454, "grad_norm": 2.21875, "learning_rate": 8.880948865239057e-05, "loss": 1.9151, "step": 4976 }, { "epoch": 0.21753573145679445, "grad_norm": 2.09375, "learning_rate": 8.880515759369745e-05, "loss": 1.7111, "step": 4977 }, { "epoch": 0.21757943966082433, "grad_norm": 3.03125, "learning_rate": 8.880082580269453e-05, "loss": 2.0375, "step": 4978 }, { "epoch": 0.21762314786485423, "grad_norm": 3.546875, "learning_rate": 8.879649327946356e-05, "loss": 2.5731, "step": 4979 }, { "epoch": 0.21766685606888414, "grad_norm": 2.15625, "learning_rate": 8.879216002408631e-05, "loss": 1.8861, "step": 4980 }, { "epoch": 0.21771056427291402, "grad_norm": 2.484375, "learning_rate": 8.878782603664452e-05, "loss": 1.8373, "step": 4981 }, { "epoch": 0.21775427247694393, "grad_norm": 2.328125, "learning_rate": 8.878349131722e-05, "loss": 1.8506, "step": 4982 }, { "epoch": 0.2177979806809738, "grad_norm": 2.125, "learning_rate": 8.877915586589456e-05, "loss": 1.7731, "step": 4983 }, { "epoch": 0.21784168888500371, "grad_norm": 2.59375, "learning_rate": 8.877481968275001e-05, "loss": 2.25, "step": 4984 }, { "epoch": 0.21788539708903362, "grad_norm": 2.21875, "learning_rate": 8.877048276786817e-05, "loss": 1.6804, "step": 4985 }, { "epoch": 0.2179291052930635, "grad_norm": 2.375, "learning_rate": 8.87661451213309e-05, "loss": 2.3068, "step": 4986 }, { "epoch": 0.2179728134970934, "grad_norm": 2.78125, "learning_rate": 8.876180674322005e-05, "loss": 2.9461, "step": 4987 }, { "epoch": 0.2180165217011233, "grad_norm": 2.46875, "learning_rate": 8.87574676336175e-05, "loss": 1.837, "step": 4988 }, { "epoch": 0.2180602299051532, "grad_norm": 11.1875, "learning_rate": 8.87531277926051e-05, "loss": 1.5664, "step": 4989 }, { "epoch": 0.2181039381091831, "grad_norm": 2.34375, "learning_rate": 8.874878722026479e-05, "loss": 1.7533, "step": 4990 }, { "epoch": 0.21814764631321298, "grad_norm": 2.140625, "learning_rate": 8.874444591667848e-05, "loss": 1.566, "step": 4991 }, { "epoch": 0.2181913545172429, "grad_norm": 1.984375, "learning_rate": 8.874010388192808e-05, "loss": 1.5187, "step": 4992 }, { "epoch": 0.21823506272127277, "grad_norm": 2.609375, "learning_rate": 8.873576111609553e-05, "loss": 2.2533, "step": 4993 }, { "epoch": 0.21827877092530268, "grad_norm": 2.3125, "learning_rate": 8.87314176192628e-05, "loss": 2.0866, "step": 4994 }, { "epoch": 0.2183224791293326, "grad_norm": 2.171875, "learning_rate": 8.872707339151183e-05, "loss": 2.0056, "step": 4995 }, { "epoch": 0.21836618733336247, "grad_norm": 1.9921875, "learning_rate": 8.872272843292464e-05, "loss": 1.6985, "step": 4996 }, { "epoch": 0.21840989553739237, "grad_norm": 2.328125, "learning_rate": 8.871838274358319e-05, "loss": 1.3936, "step": 4997 }, { "epoch": 0.21845360374142225, "grad_norm": 2.5, "learning_rate": 8.871403632356951e-05, "loss": 1.8284, "step": 4998 }, { "epoch": 0.21849731194545216, "grad_norm": 3.125, "learning_rate": 8.870968917296562e-05, "loss": 2.3903, "step": 4999 }, { "epoch": 0.21854102014948207, "grad_norm": 2.171875, "learning_rate": 8.870534129185357e-05, "loss": 1.67, "step": 5000 }, { "epoch": 0.21858472835351195, "grad_norm": 2.0625, "learning_rate": 8.87009926803154e-05, "loss": 1.8593, "step": 5001 }, { "epoch": 0.21862843655754186, "grad_norm": 3.09375, "learning_rate": 8.869664333843315e-05, "loss": 2.3788, "step": 5002 }, { "epoch": 0.21867214476157174, "grad_norm": 3.4375, "learning_rate": 8.869229326628892e-05, "loss": 1.2086, "step": 5003 }, { "epoch": 0.21871585296560164, "grad_norm": 2.5625, "learning_rate": 8.868794246396481e-05, "loss": 2.5474, "step": 5004 }, { "epoch": 0.21875956116963155, "grad_norm": 2.59375, "learning_rate": 8.868359093154292e-05, "loss": 2.0143, "step": 5005 }, { "epoch": 0.21880326937366143, "grad_norm": 2.84375, "learning_rate": 8.867923866910536e-05, "loss": 2.6357, "step": 5006 }, { "epoch": 0.21884697757769134, "grad_norm": 2.65625, "learning_rate": 8.867488567673429e-05, "loss": 1.5781, "step": 5007 }, { "epoch": 0.21889068578172122, "grad_norm": 2.359375, "learning_rate": 8.867053195451183e-05, "loss": 1.9196, "step": 5008 }, { "epoch": 0.21893439398575112, "grad_norm": 2.0625, "learning_rate": 8.866617750252014e-05, "loss": 1.9003, "step": 5009 }, { "epoch": 0.21897810218978103, "grad_norm": 2.28125, "learning_rate": 8.866182232084141e-05, "loss": 2.0773, "step": 5010 }, { "epoch": 0.2190218103938109, "grad_norm": 2.078125, "learning_rate": 8.865746640955783e-05, "loss": 2.4597, "step": 5011 }, { "epoch": 0.21906551859784082, "grad_norm": 2.140625, "learning_rate": 8.865310976875159e-05, "loss": 1.7359, "step": 5012 }, { "epoch": 0.2191092268018707, "grad_norm": 2.03125, "learning_rate": 8.864875239850489e-05, "loss": 1.7675, "step": 5013 }, { "epoch": 0.2191529350059006, "grad_norm": 2.328125, "learning_rate": 8.864439429890001e-05, "loss": 1.7439, "step": 5014 }, { "epoch": 0.21919664320993051, "grad_norm": 2.34375, "learning_rate": 8.864003547001915e-05, "loss": 1.7313, "step": 5015 }, { "epoch": 0.2192403514139604, "grad_norm": 2.046875, "learning_rate": 8.86356759119446e-05, "loss": 1.7512, "step": 5016 }, { "epoch": 0.2192840596179903, "grad_norm": 2.140625, "learning_rate": 8.86313156247586e-05, "loss": 1.7813, "step": 5017 }, { "epoch": 0.21932776782202018, "grad_norm": 2.0, "learning_rate": 8.862695460854347e-05, "loss": 1.8943, "step": 5018 }, { "epoch": 0.2193714760260501, "grad_norm": 2.09375, "learning_rate": 8.862259286338145e-05, "loss": 1.486, "step": 5019 }, { "epoch": 0.21941518423008, "grad_norm": 2.109375, "learning_rate": 8.861823038935493e-05, "loss": 1.7871, "step": 5020 }, { "epoch": 0.21945889243410988, "grad_norm": 2.0, "learning_rate": 8.861386718654618e-05, "loss": 1.8623, "step": 5021 }, { "epoch": 0.21950260063813978, "grad_norm": 2.046875, "learning_rate": 8.860950325503754e-05, "loss": 2.0073, "step": 5022 }, { "epoch": 0.21954630884216966, "grad_norm": 2.421875, "learning_rate": 8.86051385949114e-05, "loss": 1.7694, "step": 5023 }, { "epoch": 0.21959001704619957, "grad_norm": 2.859375, "learning_rate": 8.86007732062501e-05, "loss": 2.0825, "step": 5024 }, { "epoch": 0.21963372525022948, "grad_norm": 2.421875, "learning_rate": 8.859640708913603e-05, "loss": 2.2887, "step": 5025 }, { "epoch": 0.21967743345425936, "grad_norm": 2.484375, "learning_rate": 8.859204024365159e-05, "loss": 1.7923, "step": 5026 }, { "epoch": 0.21972114165828927, "grad_norm": 3.109375, "learning_rate": 8.858767266987917e-05, "loss": 2.2799, "step": 5027 }, { "epoch": 0.21976484986231914, "grad_norm": 2.046875, "learning_rate": 8.85833043679012e-05, "loss": 2.2855, "step": 5028 }, { "epoch": 0.21980855806634905, "grad_norm": 2.203125, "learning_rate": 8.857893533780015e-05, "loss": 2.3333, "step": 5029 }, { "epoch": 0.21985226627037896, "grad_norm": 2.21875, "learning_rate": 8.857456557965842e-05, "loss": 1.4897, "step": 5030 }, { "epoch": 0.21989597447440884, "grad_norm": 3.078125, "learning_rate": 8.857019509355851e-05, "loss": 2.6878, "step": 5031 }, { "epoch": 0.21993968267843875, "grad_norm": 6.40625, "learning_rate": 8.856582387958286e-05, "loss": 1.858, "step": 5032 }, { "epoch": 0.21998339088246863, "grad_norm": 2.8125, "learning_rate": 8.8561451937814e-05, "loss": 2.4977, "step": 5033 }, { "epoch": 0.22002709908649853, "grad_norm": 2.0, "learning_rate": 8.855707926833441e-05, "loss": 1.8504, "step": 5034 }, { "epoch": 0.22007080729052844, "grad_norm": 2.25, "learning_rate": 8.855270587122661e-05, "loss": 2.276, "step": 5035 }, { "epoch": 0.22011451549455832, "grad_norm": 2.5, "learning_rate": 8.854833174657317e-05, "loss": 1.9943, "step": 5036 }, { "epoch": 0.22015822369858823, "grad_norm": 2.46875, "learning_rate": 8.854395689445658e-05, "loss": 2.0906, "step": 5037 }, { "epoch": 0.2202019319026181, "grad_norm": 2.28125, "learning_rate": 8.853958131495943e-05, "loss": 1.8033, "step": 5038 }, { "epoch": 0.22024564010664802, "grad_norm": 1.9765625, "learning_rate": 8.853520500816429e-05, "loss": 1.8322, "step": 5039 }, { "epoch": 0.22028934831067792, "grad_norm": 2.40625, "learning_rate": 8.853082797415374e-05, "loss": 1.8526, "step": 5040 }, { "epoch": 0.2203330565147078, "grad_norm": 2.4375, "learning_rate": 8.85264502130104e-05, "loss": 2.4182, "step": 5041 }, { "epoch": 0.2203767647187377, "grad_norm": 2.71875, "learning_rate": 8.852207172481686e-05, "loss": 1.7747, "step": 5042 }, { "epoch": 0.2204204729227676, "grad_norm": 2.28125, "learning_rate": 8.851769250965577e-05, "loss": 2.3158, "step": 5043 }, { "epoch": 0.2204641811267975, "grad_norm": 2.46875, "learning_rate": 8.851331256760975e-05, "loss": 1.5608, "step": 5044 }, { "epoch": 0.2205078893308274, "grad_norm": 2.390625, "learning_rate": 8.850893189876149e-05, "loss": 2.1841, "step": 5045 }, { "epoch": 0.22055159753485729, "grad_norm": 1.9609375, "learning_rate": 8.850455050319361e-05, "loss": 1.779, "step": 5046 }, { "epoch": 0.2205953057388872, "grad_norm": 2.171875, "learning_rate": 8.850016838098885e-05, "loss": 1.5091, "step": 5047 }, { "epoch": 0.22063901394291707, "grad_norm": 1.953125, "learning_rate": 8.849578553222985e-05, "loss": 2.0306, "step": 5048 }, { "epoch": 0.22068272214694698, "grad_norm": 1.859375, "learning_rate": 8.849140195699936e-05, "loss": 1.5791, "step": 5049 }, { "epoch": 0.2207264303509769, "grad_norm": 2.171875, "learning_rate": 8.84870176553801e-05, "loss": 1.8366, "step": 5050 }, { "epoch": 0.22077013855500677, "grad_norm": 2.40625, "learning_rate": 8.84826326274548e-05, "loss": 2.0644, "step": 5051 }, { "epoch": 0.22081384675903667, "grad_norm": 2.390625, "learning_rate": 8.847824687330621e-05, "loss": 1.9944, "step": 5052 }, { "epoch": 0.22085755496306655, "grad_norm": 2.75, "learning_rate": 8.84738603930171e-05, "loss": 2.3579, "step": 5053 }, { "epoch": 0.22090126316709646, "grad_norm": 1.9609375, "learning_rate": 8.846947318667025e-05, "loss": 1.8134, "step": 5054 }, { "epoch": 0.22094497137112637, "grad_norm": 2.21875, "learning_rate": 8.846508525434845e-05, "loss": 2.1187, "step": 5055 }, { "epoch": 0.22098867957515625, "grad_norm": 2.484375, "learning_rate": 8.846069659613451e-05, "loss": 2.0825, "step": 5056 }, { "epoch": 0.22103238777918616, "grad_norm": 2.609375, "learning_rate": 8.845630721211124e-05, "loss": 2.2968, "step": 5057 }, { "epoch": 0.22107609598321604, "grad_norm": 2.078125, "learning_rate": 8.84519171023615e-05, "loss": 1.684, "step": 5058 }, { "epoch": 0.22111980418724594, "grad_norm": 2.875, "learning_rate": 8.84475262669681e-05, "loss": 1.9728, "step": 5059 }, { "epoch": 0.22116351239127585, "grad_norm": 2.265625, "learning_rate": 8.844313470601393e-05, "loss": 2.5474, "step": 5060 }, { "epoch": 0.22120722059530573, "grad_norm": 4.28125, "learning_rate": 8.843874241958186e-05, "loss": 1.8955, "step": 5061 }, { "epoch": 0.22125092879933564, "grad_norm": 5.65625, "learning_rate": 8.843434940775476e-05, "loss": 1.4155, "step": 5062 }, { "epoch": 0.22129463700336552, "grad_norm": 2.765625, "learning_rate": 8.842995567061558e-05, "loss": 2.2181, "step": 5063 }, { "epoch": 0.22133834520739543, "grad_norm": 2.375, "learning_rate": 8.842556120824719e-05, "loss": 1.9322, "step": 5064 }, { "epoch": 0.22138205341142533, "grad_norm": 2.078125, "learning_rate": 8.842116602073252e-05, "loss": 1.7316, "step": 5065 }, { "epoch": 0.2214257616154552, "grad_norm": 2.328125, "learning_rate": 8.841677010815455e-05, "loss": 2.1623, "step": 5066 }, { "epoch": 0.22146946981948512, "grad_norm": 2.6875, "learning_rate": 8.841237347059618e-05, "loss": 1.8282, "step": 5067 }, { "epoch": 0.221513178023515, "grad_norm": 2.546875, "learning_rate": 8.840797610814045e-05, "loss": 2.1581, "step": 5068 }, { "epoch": 0.2215568862275449, "grad_norm": 2.765625, "learning_rate": 8.84035780208703e-05, "loss": 2.4111, "step": 5069 }, { "epoch": 0.22160059443157482, "grad_norm": 2.078125, "learning_rate": 8.839917920886874e-05, "loss": 2.1053, "step": 5070 }, { "epoch": 0.2216443026356047, "grad_norm": 2.640625, "learning_rate": 8.839477967221879e-05, "loss": 1.8067, "step": 5071 }, { "epoch": 0.2216880108396346, "grad_norm": 2.125, "learning_rate": 8.839037941100344e-05, "loss": 2.0825, "step": 5072 }, { "epoch": 0.22173171904366448, "grad_norm": 2.59375, "learning_rate": 8.838597842530578e-05, "loss": 2.5282, "step": 5073 }, { "epoch": 0.2217754272476944, "grad_norm": 2.28125, "learning_rate": 8.838157671520884e-05, "loss": 1.9524, "step": 5074 }, { "epoch": 0.2218191354517243, "grad_norm": 2.125, "learning_rate": 8.837717428079566e-05, "loss": 2.0709, "step": 5075 }, { "epoch": 0.22186284365575418, "grad_norm": 2.65625, "learning_rate": 8.837277112214937e-05, "loss": 1.9269, "step": 5076 }, { "epoch": 0.22190655185978408, "grad_norm": 2.84375, "learning_rate": 8.836836723935303e-05, "loss": 1.8649, "step": 5077 }, { "epoch": 0.22195026006381396, "grad_norm": 3.453125, "learning_rate": 8.836396263248976e-05, "loss": 3.6734, "step": 5078 }, { "epoch": 0.22199396826784387, "grad_norm": 2.046875, "learning_rate": 8.835955730164269e-05, "loss": 2.067, "step": 5079 }, { "epoch": 0.22203767647187378, "grad_norm": 2.140625, "learning_rate": 8.835515124689494e-05, "loss": 2.0929, "step": 5080 }, { "epoch": 0.22208138467590366, "grad_norm": 2.0625, "learning_rate": 8.835074446832965e-05, "loss": 1.9263, "step": 5081 }, { "epoch": 0.22212509287993357, "grad_norm": 1.9609375, "learning_rate": 8.834633696603e-05, "loss": 1.5586, "step": 5082 }, { "epoch": 0.22216880108396345, "grad_norm": 2.03125, "learning_rate": 8.834192874007916e-05, "loss": 2.14, "step": 5083 }, { "epoch": 0.22221250928799335, "grad_norm": 2.171875, "learning_rate": 8.833751979056032e-05, "loss": 2.221, "step": 5084 }, { "epoch": 0.22225621749202326, "grad_norm": 2.15625, "learning_rate": 8.833311011755668e-05, "loss": 1.5307, "step": 5085 }, { "epoch": 0.22229992569605314, "grad_norm": 2.140625, "learning_rate": 8.832869972115148e-05, "loss": 1.4644, "step": 5086 }, { "epoch": 0.22234363390008305, "grad_norm": 1.9921875, "learning_rate": 8.832428860142792e-05, "loss": 1.6832, "step": 5087 }, { "epoch": 0.22238734210411293, "grad_norm": 2.3125, "learning_rate": 8.831987675846924e-05, "loss": 2.2758, "step": 5088 }, { "epoch": 0.22243105030814284, "grad_norm": 2.390625, "learning_rate": 8.831546419235873e-05, "loss": 1.977, "step": 5089 }, { "epoch": 0.22247475851217274, "grad_norm": 2.609375, "learning_rate": 8.831105090317965e-05, "loss": 1.7981, "step": 5090 }, { "epoch": 0.22251846671620262, "grad_norm": 2.0625, "learning_rate": 8.830663689101529e-05, "loss": 1.8568, "step": 5091 }, { "epoch": 0.22256217492023253, "grad_norm": 2.234375, "learning_rate": 8.83022221559489e-05, "loss": 2.2899, "step": 5092 }, { "epoch": 0.2226058831242624, "grad_norm": 2.109375, "learning_rate": 8.829780669806387e-05, "loss": 1.7343, "step": 5093 }, { "epoch": 0.22264959132829232, "grad_norm": 2.46875, "learning_rate": 8.829339051744346e-05, "loss": 2.4551, "step": 5094 }, { "epoch": 0.22269329953232223, "grad_norm": 2.15625, "learning_rate": 8.828897361417106e-05, "loss": 1.7362, "step": 5095 }, { "epoch": 0.2227370077363521, "grad_norm": 2.53125, "learning_rate": 8.828455598832998e-05, "loss": 2.0821, "step": 5096 }, { "epoch": 0.222780715940382, "grad_norm": 2.0, "learning_rate": 8.828013764000362e-05, "loss": 1.9559, "step": 5097 }, { "epoch": 0.2228244241444119, "grad_norm": 2.25, "learning_rate": 8.827571856927535e-05, "loss": 1.6318, "step": 5098 }, { "epoch": 0.2228681323484418, "grad_norm": 2.0625, "learning_rate": 8.827129877622857e-05, "loss": 1.8371, "step": 5099 }, { "epoch": 0.2229118405524717, "grad_norm": 1.8671875, "learning_rate": 8.826687826094666e-05, "loss": 1.8739, "step": 5100 }, { "epoch": 0.2229555487565016, "grad_norm": 2.21875, "learning_rate": 8.826245702351309e-05, "loss": 2.28, "step": 5101 }, { "epoch": 0.2229992569605315, "grad_norm": 2.40625, "learning_rate": 8.825803506401125e-05, "loss": 1.8682, "step": 5102 }, { "epoch": 0.2230429651645614, "grad_norm": 3.5625, "learning_rate": 8.82536123825246e-05, "loss": 1.5349, "step": 5103 }, { "epoch": 0.22308667336859128, "grad_norm": 1.9765625, "learning_rate": 8.824918897913661e-05, "loss": 1.752, "step": 5104 }, { "epoch": 0.2231303815726212, "grad_norm": 1.9921875, "learning_rate": 8.824476485393076e-05, "loss": 1.6159, "step": 5105 }, { "epoch": 0.22317408977665107, "grad_norm": 2.21875, "learning_rate": 8.824034000699055e-05, "loss": 1.7539, "step": 5106 }, { "epoch": 0.22321779798068098, "grad_norm": 2.0, "learning_rate": 8.823591443839944e-05, "loss": 1.666, "step": 5107 }, { "epoch": 0.22326150618471088, "grad_norm": 2.609375, "learning_rate": 8.8231488148241e-05, "loss": 2.2305, "step": 5108 }, { "epoch": 0.22330521438874076, "grad_norm": 2.328125, "learning_rate": 8.822706113659872e-05, "loss": 2.1343, "step": 5109 }, { "epoch": 0.22334892259277067, "grad_norm": 2.234375, "learning_rate": 8.822263340355616e-05, "loss": 1.7487, "step": 5110 }, { "epoch": 0.22339263079680055, "grad_norm": 2.796875, "learning_rate": 8.82182049491969e-05, "loss": 2.2869, "step": 5111 }, { "epoch": 0.22343633900083046, "grad_norm": 2.0, "learning_rate": 8.821377577360446e-05, "loss": 2.0224, "step": 5112 }, { "epoch": 0.22348004720486037, "grad_norm": 2.234375, "learning_rate": 8.820934587686247e-05, "loss": 2.3331, "step": 5113 }, { "epoch": 0.22352375540889025, "grad_norm": 2.109375, "learning_rate": 8.82049152590545e-05, "loss": 1.472, "step": 5114 }, { "epoch": 0.22356746361292015, "grad_norm": 1.9921875, "learning_rate": 8.820048392026417e-05, "loss": 1.8046, "step": 5115 }, { "epoch": 0.22361117181695003, "grad_norm": 2.203125, "learning_rate": 8.819605186057514e-05, "loss": 1.7677, "step": 5116 }, { "epoch": 0.22365488002097994, "grad_norm": 3.03125, "learning_rate": 8.819161908007099e-05, "loss": 1.8259, "step": 5117 }, { "epoch": 0.22369858822500985, "grad_norm": 2.3125, "learning_rate": 8.818718557883541e-05, "loss": 1.3694, "step": 5118 }, { "epoch": 0.22374229642903973, "grad_norm": 2.0, "learning_rate": 8.818275135695207e-05, "loss": 1.6956, "step": 5119 }, { "epoch": 0.22378600463306964, "grad_norm": 2.578125, "learning_rate": 8.817831641450462e-05, "loss": 1.988, "step": 5120 }, { "epoch": 0.22382971283709951, "grad_norm": 2.3125, "learning_rate": 8.81738807515768e-05, "loss": 1.4667, "step": 5121 }, { "epoch": 0.22387342104112942, "grad_norm": 2.296875, "learning_rate": 8.816944436825228e-05, "loss": 1.899, "step": 5122 }, { "epoch": 0.22391712924515933, "grad_norm": 2.296875, "learning_rate": 8.816500726461478e-05, "loss": 2.2279, "step": 5123 }, { "epoch": 0.2239608374491892, "grad_norm": 2.265625, "learning_rate": 8.816056944074805e-05, "loss": 2.3851, "step": 5124 }, { "epoch": 0.22400454565321912, "grad_norm": 2.375, "learning_rate": 8.815613089673583e-05, "loss": 1.4785, "step": 5125 }, { "epoch": 0.224048253857249, "grad_norm": 2.03125, "learning_rate": 8.81516916326619e-05, "loss": 2.0775, "step": 5126 }, { "epoch": 0.2240919620612789, "grad_norm": 2.640625, "learning_rate": 8.814725164861001e-05, "loss": 2.5111, "step": 5127 }, { "epoch": 0.2241356702653088, "grad_norm": 2.125, "learning_rate": 8.814281094466398e-05, "loss": 1.7167, "step": 5128 }, { "epoch": 0.2241793784693387, "grad_norm": 2.375, "learning_rate": 8.813836952090758e-05, "loss": 2.0692, "step": 5129 }, { "epoch": 0.2242230866733686, "grad_norm": 2.109375, "learning_rate": 8.813392737742463e-05, "loss": 1.7218, "step": 5130 }, { "epoch": 0.22426679487739848, "grad_norm": 2.40625, "learning_rate": 8.812948451429898e-05, "loss": 2.4234, "step": 5131 }, { "epoch": 0.2243105030814284, "grad_norm": 2.59375, "learning_rate": 8.812504093161446e-05, "loss": 1.8171, "step": 5132 }, { "epoch": 0.2243542112854583, "grad_norm": 2.9375, "learning_rate": 8.812059662945494e-05, "loss": 1.8073, "step": 5133 }, { "epoch": 0.22439791948948817, "grad_norm": 2.046875, "learning_rate": 8.811615160790427e-05, "loss": 1.7733, "step": 5134 }, { "epoch": 0.22444162769351808, "grad_norm": 2.234375, "learning_rate": 8.811170586704633e-05, "loss": 2.5059, "step": 5135 }, { "epoch": 0.22448533589754796, "grad_norm": 2.421875, "learning_rate": 8.810725940696505e-05, "loss": 1.9448, "step": 5136 }, { "epoch": 0.22452904410157787, "grad_norm": 3.625, "learning_rate": 8.810281222774432e-05, "loss": 2.2897, "step": 5137 }, { "epoch": 0.22457275230560778, "grad_norm": 3.921875, "learning_rate": 8.809836432946808e-05, "loss": 1.8743, "step": 5138 }, { "epoch": 0.22461646050963766, "grad_norm": 2.390625, "learning_rate": 8.809391571222023e-05, "loss": 1.9626, "step": 5139 }, { "epoch": 0.22466016871366756, "grad_norm": 2.859375, "learning_rate": 8.808946637608477e-05, "loss": 1.7792, "step": 5140 }, { "epoch": 0.22470387691769744, "grad_norm": 2.359375, "learning_rate": 8.808501632114563e-05, "loss": 1.9027, "step": 5141 }, { "epoch": 0.22474758512172735, "grad_norm": 2.859375, "learning_rate": 8.808056554748681e-05, "loss": 2.5154, "step": 5142 }, { "epoch": 0.22479129332575726, "grad_norm": 2.09375, "learning_rate": 8.807611405519227e-05, "loss": 1.9519, "step": 5143 }, { "epoch": 0.22483500152978714, "grad_norm": 2.640625, "learning_rate": 8.807166184434607e-05, "loss": 2.3366, "step": 5144 }, { "epoch": 0.22487870973381704, "grad_norm": 2.296875, "learning_rate": 8.806720891503219e-05, "loss": 1.8174, "step": 5145 }, { "epoch": 0.22492241793784692, "grad_norm": 2.640625, "learning_rate": 8.806275526733466e-05, "loss": 1.7592, "step": 5146 }, { "epoch": 0.22496612614187683, "grad_norm": 2.46875, "learning_rate": 8.805830090133755e-05, "loss": 2.2325, "step": 5147 }, { "epoch": 0.22500983434590674, "grad_norm": 1.984375, "learning_rate": 8.805384581712492e-05, "loss": 1.9405, "step": 5148 }, { "epoch": 0.22505354254993662, "grad_norm": 2.125, "learning_rate": 8.804939001478083e-05, "loss": 1.8371, "step": 5149 }, { "epoch": 0.22509725075396653, "grad_norm": 1.8671875, "learning_rate": 8.804493349438937e-05, "loss": 1.6809, "step": 5150 }, { "epoch": 0.2251409589579964, "grad_norm": 2.328125, "learning_rate": 8.804047625603464e-05, "loss": 2.0279, "step": 5151 }, { "epoch": 0.22518466716202631, "grad_norm": 2.265625, "learning_rate": 8.803601829980076e-05, "loss": 2.1587, "step": 5152 }, { "epoch": 0.22522837536605622, "grad_norm": 2.125, "learning_rate": 8.803155962577186e-05, "loss": 1.6492, "step": 5153 }, { "epoch": 0.2252720835700861, "grad_norm": 2.140625, "learning_rate": 8.802710023403208e-05, "loss": 2.2723, "step": 5154 }, { "epoch": 0.225315791774116, "grad_norm": 2.1875, "learning_rate": 8.802264012466557e-05, "loss": 2.0675, "step": 5155 }, { "epoch": 0.2253594999781459, "grad_norm": 1.9453125, "learning_rate": 8.801817929775649e-05, "loss": 1.8635, "step": 5156 }, { "epoch": 0.2254032081821758, "grad_norm": 2.09375, "learning_rate": 8.801371775338904e-05, "loss": 2.1729, "step": 5157 }, { "epoch": 0.2254469163862057, "grad_norm": 2.25, "learning_rate": 8.800925549164741e-05, "loss": 1.7483, "step": 5158 }, { "epoch": 0.22549062459023558, "grad_norm": 2.328125, "learning_rate": 8.800479251261581e-05, "loss": 1.9852, "step": 5159 }, { "epoch": 0.2255343327942655, "grad_norm": 1.90625, "learning_rate": 8.800032881637847e-05, "loss": 1.364, "step": 5160 }, { "epoch": 0.22557804099829537, "grad_norm": 2.265625, "learning_rate": 8.799586440301961e-05, "loss": 2.4219, "step": 5161 }, { "epoch": 0.22562174920232528, "grad_norm": 2.078125, "learning_rate": 8.79913992726235e-05, "loss": 2.3594, "step": 5162 }, { "epoch": 0.22566545740635519, "grad_norm": 2.484375, "learning_rate": 8.798693342527438e-05, "loss": 1.6779, "step": 5163 }, { "epoch": 0.22570916561038507, "grad_norm": 2.828125, "learning_rate": 8.798246686105654e-05, "loss": 3.4992, "step": 5164 }, { "epoch": 0.22575287381441497, "grad_norm": 2.5625, "learning_rate": 8.797799958005426e-05, "loss": 1.3351, "step": 5165 }, { "epoch": 0.22579658201844485, "grad_norm": 2.0625, "learning_rate": 8.797353158235186e-05, "loss": 1.9769, "step": 5166 }, { "epoch": 0.22584029022247476, "grad_norm": 2.578125, "learning_rate": 8.796906286803365e-05, "loss": 1.8765, "step": 5167 }, { "epoch": 0.22588399842650467, "grad_norm": 2.21875, "learning_rate": 8.796459343718397e-05, "loss": 2.3319, "step": 5168 }, { "epoch": 0.22592770663053455, "grad_norm": 2.78125, "learning_rate": 8.796012328988716e-05, "loss": 1.7823, "step": 5169 }, { "epoch": 0.22597141483456445, "grad_norm": 2.4375, "learning_rate": 8.795565242622758e-05, "loss": 1.6866, "step": 5170 }, { "epoch": 0.22601512303859433, "grad_norm": 2.65625, "learning_rate": 8.795118084628959e-05, "loss": 1.9945, "step": 5171 }, { "epoch": 0.22605883124262424, "grad_norm": 2.5625, "learning_rate": 8.794670855015757e-05, "loss": 1.8617, "step": 5172 }, { "epoch": 0.22610253944665415, "grad_norm": 2.734375, "learning_rate": 8.794223553791595e-05, "loss": 2.4788, "step": 5173 }, { "epoch": 0.22614624765068403, "grad_norm": 2.390625, "learning_rate": 8.79377618096491e-05, "loss": 2.0072, "step": 5174 }, { "epoch": 0.22618995585471394, "grad_norm": 2.265625, "learning_rate": 8.793328736544149e-05, "loss": 2.0756, "step": 5175 }, { "epoch": 0.22623366405874382, "grad_norm": 2.09375, "learning_rate": 8.792881220537751e-05, "loss": 2.3657, "step": 5176 }, { "epoch": 0.22627737226277372, "grad_norm": 2.140625, "learning_rate": 8.792433632954167e-05, "loss": 2.5119, "step": 5177 }, { "epoch": 0.22632108046680363, "grad_norm": 2.46875, "learning_rate": 8.791985973801839e-05, "loss": 2.3355, "step": 5178 }, { "epoch": 0.2263647886708335, "grad_norm": 4.0, "learning_rate": 8.791538243089219e-05, "loss": 2.1918, "step": 5179 }, { "epoch": 0.22640849687486342, "grad_norm": 2.03125, "learning_rate": 8.791090440824752e-05, "loss": 1.9486, "step": 5180 }, { "epoch": 0.2264522050788933, "grad_norm": 2.0, "learning_rate": 8.790642567016891e-05, "loss": 2.2691, "step": 5181 }, { "epoch": 0.2264959132829232, "grad_norm": 2.203125, "learning_rate": 8.790194621674088e-05, "loss": 2.2397, "step": 5182 }, { "epoch": 0.2265396214869531, "grad_norm": 2.46875, "learning_rate": 8.789746604804796e-05, "loss": 1.9892, "step": 5183 }, { "epoch": 0.226583329690983, "grad_norm": 2.1875, "learning_rate": 8.78929851641747e-05, "loss": 1.6987, "step": 5184 }, { "epoch": 0.2266270378950129, "grad_norm": 2.0625, "learning_rate": 8.788850356520566e-05, "loss": 1.6268, "step": 5185 }, { "epoch": 0.22667074609904278, "grad_norm": 2.515625, "learning_rate": 8.788402125122542e-05, "loss": 2.4155, "step": 5186 }, { "epoch": 0.2267144543030727, "grad_norm": 3.078125, "learning_rate": 8.787953822231855e-05, "loss": 2.1763, "step": 5187 }, { "epoch": 0.2267581625071026, "grad_norm": 2.265625, "learning_rate": 8.787505447856967e-05, "loss": 1.7471, "step": 5188 }, { "epoch": 0.22680187071113247, "grad_norm": 3.171875, "learning_rate": 8.787057002006337e-05, "loss": 2.0579, "step": 5189 }, { "epoch": 0.22684557891516238, "grad_norm": 2.4375, "learning_rate": 8.786608484688432e-05, "loss": 1.7905, "step": 5190 }, { "epoch": 0.22688928711919226, "grad_norm": 13.9375, "learning_rate": 8.786159895911712e-05, "loss": 2.8267, "step": 5191 }, { "epoch": 0.22693299532322217, "grad_norm": 2.421875, "learning_rate": 8.785711235684647e-05, "loss": 2.1309, "step": 5192 }, { "epoch": 0.22697670352725208, "grad_norm": 1.8671875, "learning_rate": 8.785262504015696e-05, "loss": 1.6625, "step": 5193 }, { "epoch": 0.22702041173128196, "grad_norm": 1.8984375, "learning_rate": 8.784813700913337e-05, "loss": 1.8822, "step": 5194 }, { "epoch": 0.22706411993531186, "grad_norm": 2.28125, "learning_rate": 8.784364826386034e-05, "loss": 2.059, "step": 5195 }, { "epoch": 0.22710782813934174, "grad_norm": 2.203125, "learning_rate": 8.783915880442257e-05, "loss": 2.0635, "step": 5196 }, { "epoch": 0.22715153634337165, "grad_norm": 2.015625, "learning_rate": 8.783466863090482e-05, "loss": 2.2928, "step": 5197 }, { "epoch": 0.22719524454740156, "grad_norm": 2.40625, "learning_rate": 8.78301777433918e-05, "loss": 2.1231, "step": 5198 }, { "epoch": 0.22723895275143144, "grad_norm": 3.9375, "learning_rate": 8.782568614196827e-05, "loss": 3.9269, "step": 5199 }, { "epoch": 0.22728266095546135, "grad_norm": 2.828125, "learning_rate": 8.782119382671898e-05, "loss": 1.8587, "step": 5200 }, { "epoch": 0.22732636915949123, "grad_norm": 2.03125, "learning_rate": 8.781670079772873e-05, "loss": 1.5783, "step": 5201 }, { "epoch": 0.22737007736352113, "grad_norm": 2.09375, "learning_rate": 8.781220705508229e-05, "loss": 1.6649, "step": 5202 }, { "epoch": 0.22741378556755104, "grad_norm": 2.078125, "learning_rate": 8.780771259886447e-05, "loss": 2.1052, "step": 5203 }, { "epoch": 0.22745749377158092, "grad_norm": 2.75, "learning_rate": 8.780321742916008e-05, "loss": 2.1247, "step": 5204 }, { "epoch": 0.22750120197561083, "grad_norm": 1.9140625, "learning_rate": 8.779872154605397e-05, "loss": 1.8571, "step": 5205 }, { "epoch": 0.2275449101796407, "grad_norm": 2.078125, "learning_rate": 8.779422494963096e-05, "loss": 2.2196, "step": 5206 }, { "epoch": 0.22758861838367062, "grad_norm": 2.21875, "learning_rate": 8.778972763997592e-05, "loss": 2.0445, "step": 5207 }, { "epoch": 0.22763232658770052, "grad_norm": 3.484375, "learning_rate": 8.778522961717373e-05, "loss": 2.1614, "step": 5208 }, { "epoch": 0.2276760347917304, "grad_norm": 2.5, "learning_rate": 8.778073088130925e-05, "loss": 2.4693, "step": 5209 }, { "epoch": 0.2277197429957603, "grad_norm": 2.640625, "learning_rate": 8.777623143246741e-05, "loss": 2.0724, "step": 5210 }, { "epoch": 0.2277634511997902, "grad_norm": 1.9921875, "learning_rate": 8.777173127073308e-05, "loss": 1.5359, "step": 5211 }, { "epoch": 0.2278071594038201, "grad_norm": 2.15625, "learning_rate": 8.776723039619121e-05, "loss": 1.9064, "step": 5212 }, { "epoch": 0.22785086760785, "grad_norm": 2.078125, "learning_rate": 8.776272880892675e-05, "loss": 1.9944, "step": 5213 }, { "epoch": 0.22789457581187988, "grad_norm": 2.5, "learning_rate": 8.775822650902463e-05, "loss": 2.0785, "step": 5214 }, { "epoch": 0.2279382840159098, "grad_norm": 4.84375, "learning_rate": 8.775372349656981e-05, "loss": 2.0266, "step": 5215 }, { "epoch": 0.22798199221993967, "grad_norm": 3.796875, "learning_rate": 8.77492197716473e-05, "loss": 2.3301, "step": 5216 }, { "epoch": 0.22802570042396958, "grad_norm": 2.140625, "learning_rate": 8.774471533434206e-05, "loss": 1.6443, "step": 5217 }, { "epoch": 0.2280694086279995, "grad_norm": 1.828125, "learning_rate": 8.77402101847391e-05, "loss": 1.6125, "step": 5218 }, { "epoch": 0.22811311683202937, "grad_norm": 1.90625, "learning_rate": 8.773570432292344e-05, "loss": 1.7902, "step": 5219 }, { "epoch": 0.22815682503605927, "grad_norm": 2.234375, "learning_rate": 8.773119774898013e-05, "loss": 2.0626, "step": 5220 }, { "epoch": 0.22820053324008915, "grad_norm": 2.234375, "learning_rate": 8.77266904629942e-05, "loss": 2.1337, "step": 5221 }, { "epoch": 0.22824424144411906, "grad_norm": 2.5625, "learning_rate": 8.772218246505073e-05, "loss": 2.8284, "step": 5222 }, { "epoch": 0.22828794964814897, "grad_norm": 1.9375, "learning_rate": 8.771767375523475e-05, "loss": 1.8774, "step": 5223 }, { "epoch": 0.22833165785217885, "grad_norm": 2.15625, "learning_rate": 8.771316433363138e-05, "loss": 1.9789, "step": 5224 }, { "epoch": 0.22837536605620876, "grad_norm": 2.34375, "learning_rate": 8.770865420032571e-05, "loss": 2.3812, "step": 5225 }, { "epoch": 0.22841907426023864, "grad_norm": 2.40625, "learning_rate": 8.770414335540285e-05, "loss": 2.1789, "step": 5226 }, { "epoch": 0.22846278246426854, "grad_norm": 2.125, "learning_rate": 8.769963179894793e-05, "loss": 1.7856, "step": 5227 }, { "epoch": 0.22850649066829845, "grad_norm": 1.9296875, "learning_rate": 8.76951195310461e-05, "loss": 2.0435, "step": 5228 }, { "epoch": 0.22855019887232833, "grad_norm": 3.78125, "learning_rate": 8.76906065517825e-05, "loss": 2.341, "step": 5229 }, { "epoch": 0.22859390707635824, "grad_norm": 2.125, "learning_rate": 8.768609286124228e-05, "loss": 1.9607, "step": 5230 }, { "epoch": 0.22863761528038812, "grad_norm": 2.203125, "learning_rate": 8.768157845951065e-05, "loss": 2.5474, "step": 5231 }, { "epoch": 0.22868132348441803, "grad_norm": 2.25, "learning_rate": 8.767706334667279e-05, "loss": 1.9932, "step": 5232 }, { "epoch": 0.22872503168844793, "grad_norm": 2.34375, "learning_rate": 8.767254752281392e-05, "loss": 1.9982, "step": 5233 }, { "epoch": 0.2287687398924778, "grad_norm": 2.140625, "learning_rate": 8.766803098801924e-05, "loss": 1.7068, "step": 5234 }, { "epoch": 0.22881244809650772, "grad_norm": 2.375, "learning_rate": 8.766351374237398e-05, "loss": 2.1922, "step": 5235 }, { "epoch": 0.2288561563005376, "grad_norm": 3.328125, "learning_rate": 8.765899578596342e-05, "loss": 2.0894, "step": 5236 }, { "epoch": 0.2288998645045675, "grad_norm": 2.8125, "learning_rate": 8.765447711887278e-05, "loss": 2.5275, "step": 5237 }, { "epoch": 0.22894357270859741, "grad_norm": 2.46875, "learning_rate": 8.764995774118736e-05, "loss": 2.0998, "step": 5238 }, { "epoch": 0.2289872809126273, "grad_norm": 2.140625, "learning_rate": 8.764543765299245e-05, "loss": 1.943, "step": 5239 }, { "epoch": 0.2290309891166572, "grad_norm": 2.1875, "learning_rate": 8.764091685437333e-05, "loss": 1.6759, "step": 5240 }, { "epoch": 0.22907469732068708, "grad_norm": 2.59375, "learning_rate": 8.763639534541533e-05, "loss": 2.0194, "step": 5241 }, { "epoch": 0.229118405524717, "grad_norm": 2.234375, "learning_rate": 8.763187312620377e-05, "loss": 1.8292, "step": 5242 }, { "epoch": 0.2291621137287469, "grad_norm": 2.21875, "learning_rate": 8.762735019682399e-05, "loss": 1.873, "step": 5243 }, { "epoch": 0.22920582193277678, "grad_norm": 2.0625, "learning_rate": 8.762282655736136e-05, "loss": 1.5856, "step": 5244 }, { "epoch": 0.22924953013680668, "grad_norm": 2.578125, "learning_rate": 8.761830220790123e-05, "loss": 2.082, "step": 5245 }, { "epoch": 0.22929323834083656, "grad_norm": 2.890625, "learning_rate": 8.761377714852899e-05, "loss": 2.1958, "step": 5246 }, { "epoch": 0.22933694654486647, "grad_norm": 2.25, "learning_rate": 8.760925137933001e-05, "loss": 2.0435, "step": 5247 }, { "epoch": 0.22938065474889638, "grad_norm": 2.296875, "learning_rate": 8.760472490038974e-05, "loss": 2.3615, "step": 5248 }, { "epoch": 0.22942436295292626, "grad_norm": 1.875, "learning_rate": 8.760019771179358e-05, "loss": 1.7286, "step": 5249 }, { "epoch": 0.22946807115695617, "grad_norm": 2.78125, "learning_rate": 8.759566981362694e-05, "loss": 1.8911, "step": 5250 }, { "epoch": 0.22951177936098605, "grad_norm": 9.9375, "learning_rate": 8.759114120597532e-05, "loss": 1.535, "step": 5251 }, { "epoch": 0.22955548756501595, "grad_norm": 2.484375, "learning_rate": 8.758661188892414e-05, "loss": 2.203, "step": 5252 }, { "epoch": 0.22959919576904586, "grad_norm": 3.296875, "learning_rate": 8.75820818625589e-05, "loss": 2.2292, "step": 5253 }, { "epoch": 0.22964290397307574, "grad_norm": 2.578125, "learning_rate": 8.757755112696509e-05, "loss": 2.431, "step": 5254 }, { "epoch": 0.22968661217710565, "grad_norm": 3.640625, "learning_rate": 8.757301968222817e-05, "loss": 3.2358, "step": 5255 }, { "epoch": 0.22973032038113553, "grad_norm": 1.9296875, "learning_rate": 8.756848752843369e-05, "loss": 1.7219, "step": 5256 }, { "epoch": 0.22977402858516544, "grad_norm": 2.34375, "learning_rate": 8.756395466566719e-05, "loss": 1.6914, "step": 5257 }, { "epoch": 0.22981773678919534, "grad_norm": 2.46875, "learning_rate": 8.755942109401417e-05, "loss": 2.3572, "step": 5258 }, { "epoch": 0.22986144499322522, "grad_norm": 2.59375, "learning_rate": 8.755488681356022e-05, "loss": 3.0781, "step": 5259 }, { "epoch": 0.22990515319725513, "grad_norm": 1.9296875, "learning_rate": 8.755035182439088e-05, "loss": 1.7704, "step": 5260 }, { "epoch": 0.229948861401285, "grad_norm": 1.9140625, "learning_rate": 8.754581612659178e-05, "loss": 1.8192, "step": 5261 }, { "epoch": 0.22999256960531492, "grad_norm": 2.21875, "learning_rate": 8.754127972024846e-05, "loss": 2.4582, "step": 5262 }, { "epoch": 0.23003627780934482, "grad_norm": 2.578125, "learning_rate": 8.753674260544657e-05, "loss": 2.6562, "step": 5263 }, { "epoch": 0.2300799860133747, "grad_norm": 3.640625, "learning_rate": 8.753220478227171e-05, "loss": 1.5956, "step": 5264 }, { "epoch": 0.2301236942174046, "grad_norm": 2.71875, "learning_rate": 8.752766625080952e-05, "loss": 1.8347, "step": 5265 }, { "epoch": 0.2301674024214345, "grad_norm": 2.71875, "learning_rate": 8.752312701114564e-05, "loss": 1.8427, "step": 5266 }, { "epoch": 0.2302111106254644, "grad_norm": 3.390625, "learning_rate": 8.751858706336576e-05, "loss": 3.1018, "step": 5267 }, { "epoch": 0.2302548188294943, "grad_norm": 1.953125, "learning_rate": 8.751404640755552e-05, "loss": 1.7006, "step": 5268 }, { "epoch": 0.2302985270335242, "grad_norm": 2.1875, "learning_rate": 8.750950504380062e-05, "loss": 1.9282, "step": 5269 }, { "epoch": 0.2303422352375541, "grad_norm": 2.140625, "learning_rate": 8.750496297218678e-05, "loss": 1.5637, "step": 5270 }, { "epoch": 0.23038594344158397, "grad_norm": 3.40625, "learning_rate": 8.75004201927997e-05, "loss": 1.9573, "step": 5271 }, { "epoch": 0.23042965164561388, "grad_norm": 2.3125, "learning_rate": 8.749587670572511e-05, "loss": 2.4692, "step": 5272 }, { "epoch": 0.2304733598496438, "grad_norm": 1.984375, "learning_rate": 8.749133251104876e-05, "loss": 1.6436, "step": 5273 }, { "epoch": 0.23051706805367367, "grad_norm": 2.59375, "learning_rate": 8.748678760885638e-05, "loss": 2.428, "step": 5274 }, { "epoch": 0.23056077625770358, "grad_norm": 2.078125, "learning_rate": 8.748224199923378e-05, "loss": 1.7959, "step": 5275 }, { "epoch": 0.23060448446173346, "grad_norm": 1.9765625, "learning_rate": 8.747769568226672e-05, "loss": 1.7832, "step": 5276 }, { "epoch": 0.23064819266576336, "grad_norm": 1.9921875, "learning_rate": 8.747314865804098e-05, "loss": 1.8904, "step": 5277 }, { "epoch": 0.23069190086979327, "grad_norm": 1.8515625, "learning_rate": 8.74686009266424e-05, "loss": 1.6946, "step": 5278 }, { "epoch": 0.23073560907382315, "grad_norm": 2.546875, "learning_rate": 8.74640524881568e-05, "loss": 2.447, "step": 5279 }, { "epoch": 0.23077931727785306, "grad_norm": 2.15625, "learning_rate": 8.745950334266997e-05, "loss": 1.8461, "step": 5280 }, { "epoch": 0.23082302548188294, "grad_norm": 2.25, "learning_rate": 8.745495349026781e-05, "loss": 1.5948, "step": 5281 }, { "epoch": 0.23086673368591284, "grad_norm": 2.09375, "learning_rate": 8.745040293103616e-05, "loss": 1.9506, "step": 5282 }, { "epoch": 0.23091044188994275, "grad_norm": 3.640625, "learning_rate": 8.744585166506089e-05, "loss": 1.9788, "step": 5283 }, { "epoch": 0.23095415009397263, "grad_norm": 3.46875, "learning_rate": 8.744129969242793e-05, "loss": 2.6072, "step": 5284 }, { "epoch": 0.23099785829800254, "grad_norm": 2.328125, "learning_rate": 8.743674701322312e-05, "loss": 1.6111, "step": 5285 }, { "epoch": 0.23104156650203242, "grad_norm": 2.359375, "learning_rate": 8.743219362753244e-05, "loss": 1.7419, "step": 5286 }, { "epoch": 0.23108527470606233, "grad_norm": 2.671875, "learning_rate": 8.742763953544175e-05, "loss": 2.2248, "step": 5287 }, { "epoch": 0.23112898291009223, "grad_norm": 2.15625, "learning_rate": 8.742308473703706e-05, "loss": 1.6965, "step": 5288 }, { "epoch": 0.23117269111412211, "grad_norm": 2.203125, "learning_rate": 8.741852923240427e-05, "loss": 1.9022, "step": 5289 }, { "epoch": 0.23121639931815202, "grad_norm": 4.21875, "learning_rate": 8.741397302162939e-05, "loss": 1.2419, "step": 5290 }, { "epoch": 0.2312601075221819, "grad_norm": 1.9375, "learning_rate": 8.740941610479838e-05, "loss": 1.4853, "step": 5291 }, { "epoch": 0.2313038157262118, "grad_norm": 2.84375, "learning_rate": 8.740485848199723e-05, "loss": 2.8512, "step": 5292 }, { "epoch": 0.23134752393024172, "grad_norm": 2.125, "learning_rate": 8.740030015331198e-05, "loss": 1.8565, "step": 5293 }, { "epoch": 0.2313912321342716, "grad_norm": 2.4375, "learning_rate": 8.739574111882862e-05, "loss": 2.0053, "step": 5294 }, { "epoch": 0.2314349403383015, "grad_norm": 2.046875, "learning_rate": 8.73911813786332e-05, "loss": 1.7054, "step": 5295 }, { "epoch": 0.23147864854233138, "grad_norm": 2.03125, "learning_rate": 8.738662093281177e-05, "loss": 1.8426, "step": 5296 }, { "epoch": 0.2315223567463613, "grad_norm": 2.234375, "learning_rate": 8.738205978145038e-05, "loss": 2.0349, "step": 5297 }, { "epoch": 0.2315660649503912, "grad_norm": 1.984375, "learning_rate": 8.737749792463512e-05, "loss": 1.8219, "step": 5298 }, { "epoch": 0.23160977315442108, "grad_norm": 2.5, "learning_rate": 8.737293536245207e-05, "loss": 3.0215, "step": 5299 }, { "epoch": 0.23165348135845099, "grad_norm": 1.9140625, "learning_rate": 8.736837209498736e-05, "loss": 1.6017, "step": 5300 }, { "epoch": 0.23169718956248087, "grad_norm": 2.125, "learning_rate": 8.736380812232706e-05, "loss": 2.0057, "step": 5301 }, { "epoch": 0.23174089776651077, "grad_norm": 2.734375, "learning_rate": 8.735924344455732e-05, "loss": 2.6932, "step": 5302 }, { "epoch": 0.23178460597054068, "grad_norm": 2.453125, "learning_rate": 8.735467806176429e-05, "loss": 2.12, "step": 5303 }, { "epoch": 0.23182831417457056, "grad_norm": 3.03125, "learning_rate": 8.735011197403411e-05, "loss": 2.4922, "step": 5304 }, { "epoch": 0.23187202237860047, "grad_norm": 3.546875, "learning_rate": 8.734554518145297e-05, "loss": 2.1247, "step": 5305 }, { "epoch": 0.23191573058263035, "grad_norm": 2.046875, "learning_rate": 8.734097768410703e-05, "loss": 2.5513, "step": 5306 }, { "epoch": 0.23195943878666025, "grad_norm": 2.671875, "learning_rate": 8.733640948208248e-05, "loss": 2.6246, "step": 5307 }, { "epoch": 0.23200314699069016, "grad_norm": 2.65625, "learning_rate": 8.733184057546558e-05, "loss": 2.9116, "step": 5308 }, { "epoch": 0.23204685519472004, "grad_norm": 2.40625, "learning_rate": 8.732727096434247e-05, "loss": 2.7499, "step": 5309 }, { "epoch": 0.23209056339874995, "grad_norm": 2.046875, "learning_rate": 8.732270064879947e-05, "loss": 1.8118, "step": 5310 }, { "epoch": 0.23213427160277983, "grad_norm": 2.71875, "learning_rate": 8.731812962892277e-05, "loss": 1.5574, "step": 5311 }, { "epoch": 0.23217797980680974, "grad_norm": 2.875, "learning_rate": 8.731355790479862e-05, "loss": 2.3718, "step": 5312 }, { "epoch": 0.23222168801083964, "grad_norm": 2.890625, "learning_rate": 8.730898547651337e-05, "loss": 1.4868, "step": 5313 }, { "epoch": 0.23226539621486952, "grad_norm": 2.40625, "learning_rate": 8.730441234415324e-05, "loss": 2.3289, "step": 5314 }, { "epoch": 0.23230910441889943, "grad_norm": 2.109375, "learning_rate": 8.729983850780456e-05, "loss": 1.4998, "step": 5315 }, { "epoch": 0.2323528126229293, "grad_norm": 2.53125, "learning_rate": 8.729526396755365e-05, "loss": 2.767, "step": 5316 }, { "epoch": 0.23239652082695922, "grad_norm": 2.078125, "learning_rate": 8.729068872348681e-05, "loss": 1.6264, "step": 5317 }, { "epoch": 0.23244022903098913, "grad_norm": 2.265625, "learning_rate": 8.728611277569042e-05, "loss": 1.6083, "step": 5318 }, { "epoch": 0.232483937235019, "grad_norm": 3.1875, "learning_rate": 8.72815361242508e-05, "loss": 1.8636, "step": 5319 }, { "epoch": 0.2325276454390489, "grad_norm": 2.015625, "learning_rate": 8.727695876925433e-05, "loss": 1.5848, "step": 5320 }, { "epoch": 0.2325713536430788, "grad_norm": 2.484375, "learning_rate": 8.72723807107874e-05, "loss": 2.8408, "step": 5321 }, { "epoch": 0.2326150618471087, "grad_norm": 2.234375, "learning_rate": 8.72678019489364e-05, "loss": 1.5735, "step": 5322 }, { "epoch": 0.2326587700511386, "grad_norm": 2.28125, "learning_rate": 8.726322248378775e-05, "loss": 1.8741, "step": 5323 }, { "epoch": 0.2327024782551685, "grad_norm": 2.765625, "learning_rate": 8.725864231542784e-05, "loss": 1.8679, "step": 5324 }, { "epoch": 0.2327461864591984, "grad_norm": 2.234375, "learning_rate": 8.725406144394313e-05, "loss": 2.4191, "step": 5325 }, { "epoch": 0.23278989466322828, "grad_norm": 2.078125, "learning_rate": 8.724947986942004e-05, "loss": 1.9748, "step": 5326 }, { "epoch": 0.23283360286725818, "grad_norm": 1.9453125, "learning_rate": 8.724489759194507e-05, "loss": 1.6349, "step": 5327 }, { "epoch": 0.2328773110712881, "grad_norm": 2.4375, "learning_rate": 8.724031461160467e-05, "loss": 2.3706, "step": 5328 }, { "epoch": 0.23292101927531797, "grad_norm": 1.9609375, "learning_rate": 8.723573092848534e-05, "loss": 1.7051, "step": 5329 }, { "epoch": 0.23296472747934788, "grad_norm": 1.953125, "learning_rate": 8.723114654267356e-05, "loss": 1.9907, "step": 5330 }, { "epoch": 0.23300843568337776, "grad_norm": 2.09375, "learning_rate": 8.722656145425587e-05, "loss": 1.867, "step": 5331 }, { "epoch": 0.23305214388740766, "grad_norm": 2.15625, "learning_rate": 8.722197566331878e-05, "loss": 2.5761, "step": 5332 }, { "epoch": 0.23309585209143757, "grad_norm": 2.09375, "learning_rate": 8.721738916994883e-05, "loss": 1.9586, "step": 5333 }, { "epoch": 0.23313956029546745, "grad_norm": 2.453125, "learning_rate": 8.721280197423258e-05, "loss": 1.9132, "step": 5334 }, { "epoch": 0.23318326849949736, "grad_norm": 2.3125, "learning_rate": 8.720821407625661e-05, "loss": 2.1847, "step": 5335 }, { "epoch": 0.23322697670352724, "grad_norm": 4.625, "learning_rate": 8.720362547610747e-05, "loss": 2.6595, "step": 5336 }, { "epoch": 0.23327068490755715, "grad_norm": 2.71875, "learning_rate": 8.719903617387178e-05, "loss": 2.1804, "step": 5337 }, { "epoch": 0.23331439311158705, "grad_norm": 2.375, "learning_rate": 8.719444616963613e-05, "loss": 2.4745, "step": 5338 }, { "epoch": 0.23335810131561693, "grad_norm": 1.8359375, "learning_rate": 8.718985546348715e-05, "loss": 1.5742, "step": 5339 }, { "epoch": 0.23340180951964684, "grad_norm": 2.296875, "learning_rate": 8.718526405551146e-05, "loss": 2.6613, "step": 5340 }, { "epoch": 0.23344551772367672, "grad_norm": 2.25, "learning_rate": 8.718067194579573e-05, "loss": 2.6931, "step": 5341 }, { "epoch": 0.23348922592770663, "grad_norm": 1.8828125, "learning_rate": 8.71760791344266e-05, "loss": 1.6553, "step": 5342 }, { "epoch": 0.23353293413173654, "grad_norm": 2.125, "learning_rate": 8.717148562149076e-05, "loss": 1.8587, "step": 5343 }, { "epoch": 0.23357664233576642, "grad_norm": 2.296875, "learning_rate": 8.716689140707488e-05, "loss": 2.2146, "step": 5344 }, { "epoch": 0.23362035053979632, "grad_norm": 2.125, "learning_rate": 8.716229649126566e-05, "loss": 1.5433, "step": 5345 }, { "epoch": 0.2336640587438262, "grad_norm": 2.34375, "learning_rate": 8.715770087414983e-05, "loss": 1.7082, "step": 5346 }, { "epoch": 0.2337077669478561, "grad_norm": 2.53125, "learning_rate": 8.715310455581409e-05, "loss": 1.8389, "step": 5347 }, { "epoch": 0.23375147515188602, "grad_norm": 2.15625, "learning_rate": 8.71485075363452e-05, "loss": 1.7461, "step": 5348 }, { "epoch": 0.2337951833559159, "grad_norm": 2.34375, "learning_rate": 8.714390981582991e-05, "loss": 1.8022, "step": 5349 }, { "epoch": 0.2338388915599458, "grad_norm": 2.0625, "learning_rate": 8.713931139435497e-05, "loss": 1.8482, "step": 5350 }, { "epoch": 0.23388259976397568, "grad_norm": 1.96875, "learning_rate": 8.713471227200719e-05, "loss": 1.8543, "step": 5351 }, { "epoch": 0.2339263079680056, "grad_norm": 1.9453125, "learning_rate": 8.713011244887331e-05, "loss": 1.3542, "step": 5352 }, { "epoch": 0.2339700161720355, "grad_norm": 2.890625, "learning_rate": 8.712551192504019e-05, "loss": 1.8759, "step": 5353 }, { "epoch": 0.23401372437606538, "grad_norm": 2.78125, "learning_rate": 8.712091070059463e-05, "loss": 2.6063, "step": 5354 }, { "epoch": 0.2340574325800953, "grad_norm": 4.8125, "learning_rate": 8.711630877562345e-05, "loss": 2.0466, "step": 5355 }, { "epoch": 0.23410114078412517, "grad_norm": 3.859375, "learning_rate": 8.71117061502135e-05, "loss": 2.1461, "step": 5356 }, { "epoch": 0.23414484898815507, "grad_norm": 2.75, "learning_rate": 8.710710282445165e-05, "loss": 2.7146, "step": 5357 }, { "epoch": 0.23418855719218498, "grad_norm": 2.09375, "learning_rate": 8.710249879842476e-05, "loss": 1.7998, "step": 5358 }, { "epoch": 0.23423226539621486, "grad_norm": 7.21875, "learning_rate": 8.709789407221971e-05, "loss": 1.5454, "step": 5359 }, { "epoch": 0.23427597360024477, "grad_norm": 2.71875, "learning_rate": 8.709328864592341e-05, "loss": 2.0275, "step": 5360 }, { "epoch": 0.23431968180427465, "grad_norm": 2.4375, "learning_rate": 8.708868251962277e-05, "loss": 2.6636, "step": 5361 }, { "epoch": 0.23436339000830456, "grad_norm": 1.8984375, "learning_rate": 8.70840756934047e-05, "loss": 1.9007, "step": 5362 }, { "epoch": 0.23440709821233446, "grad_norm": 3.53125, "learning_rate": 8.707946816735617e-05, "loss": 2.0047, "step": 5363 }, { "epoch": 0.23445080641636434, "grad_norm": 2.703125, "learning_rate": 8.70748599415641e-05, "loss": 1.9084, "step": 5364 }, { "epoch": 0.23449451462039425, "grad_norm": 2.28125, "learning_rate": 8.707025101611545e-05, "loss": 2.4486, "step": 5365 }, { "epoch": 0.23453822282442413, "grad_norm": 2.546875, "learning_rate": 8.706564139109722e-05, "loss": 1.7819, "step": 5366 }, { "epoch": 0.23458193102845404, "grad_norm": 2.1875, "learning_rate": 8.706103106659638e-05, "loss": 1.785, "step": 5367 }, { "epoch": 0.23462563923248395, "grad_norm": 2.0625, "learning_rate": 8.705642004269996e-05, "loss": 2.4596, "step": 5368 }, { "epoch": 0.23466934743651383, "grad_norm": 2.34375, "learning_rate": 8.705180831949496e-05, "loss": 2.7992, "step": 5369 }, { "epoch": 0.23471305564054373, "grad_norm": 2.34375, "learning_rate": 8.70471958970684e-05, "loss": 1.4907, "step": 5370 }, { "epoch": 0.2347567638445736, "grad_norm": 3.0625, "learning_rate": 8.704258277550735e-05, "loss": 1.842, "step": 5371 }, { "epoch": 0.23480047204860352, "grad_norm": 2.484375, "learning_rate": 8.703796895489883e-05, "loss": 1.6824, "step": 5372 }, { "epoch": 0.23484418025263343, "grad_norm": 2.75, "learning_rate": 8.703335443532994e-05, "loss": 2.8193, "step": 5373 }, { "epoch": 0.2348878884566633, "grad_norm": 2.609375, "learning_rate": 8.702873921688776e-05, "loss": 2.1645, "step": 5374 }, { "epoch": 0.23493159666069321, "grad_norm": 2.078125, "learning_rate": 8.702412329965937e-05, "loss": 2.1561, "step": 5375 }, { "epoch": 0.23497530486472312, "grad_norm": 1.875, "learning_rate": 8.701950668373189e-05, "loss": 1.7574, "step": 5376 }, { "epoch": 0.235019013068753, "grad_norm": 2.140625, "learning_rate": 8.701488936919242e-05, "loss": 2.1245, "step": 5377 }, { "epoch": 0.2350627212727829, "grad_norm": 2.140625, "learning_rate": 8.701027135612814e-05, "loss": 1.968, "step": 5378 }, { "epoch": 0.2351064294768128, "grad_norm": 2.375, "learning_rate": 8.700565264462617e-05, "loss": 1.6649, "step": 5379 }, { "epoch": 0.2351501376808427, "grad_norm": 1.953125, "learning_rate": 8.700103323477366e-05, "loss": 1.9941, "step": 5380 }, { "epoch": 0.2351938458848726, "grad_norm": 2.203125, "learning_rate": 8.699641312665782e-05, "loss": 2.0117, "step": 5381 }, { "epoch": 0.23523755408890248, "grad_norm": 2.4375, "learning_rate": 8.69917923203658e-05, "loss": 1.7337, "step": 5382 }, { "epoch": 0.2352812622929324, "grad_norm": 2.90625, "learning_rate": 8.698717081598484e-05, "loss": 1.8028, "step": 5383 }, { "epoch": 0.23532497049696227, "grad_norm": 2.265625, "learning_rate": 8.698254861360211e-05, "loss": 1.703, "step": 5384 }, { "epoch": 0.23536867870099218, "grad_norm": 2.734375, "learning_rate": 8.697792571330487e-05, "loss": 2.4212, "step": 5385 }, { "epoch": 0.2354123869050221, "grad_norm": 2.5, "learning_rate": 8.697330211518038e-05, "loss": 2.0185, "step": 5386 }, { "epoch": 0.23545609510905197, "grad_norm": 2.875, "learning_rate": 8.696867781931584e-05, "loss": 3.0804, "step": 5387 }, { "epoch": 0.23549980331308187, "grad_norm": 2.375, "learning_rate": 8.696405282579855e-05, "loss": 1.683, "step": 5388 }, { "epoch": 0.23554351151711175, "grad_norm": 2.015625, "learning_rate": 8.695942713471577e-05, "loss": 2.1921, "step": 5389 }, { "epoch": 0.23558721972114166, "grad_norm": 3.125, "learning_rate": 8.695480074615483e-05, "loss": 1.9542, "step": 5390 }, { "epoch": 0.23563092792517157, "grad_norm": 2.5, "learning_rate": 8.695017366020301e-05, "loss": 1.7134, "step": 5391 }, { "epoch": 0.23567463612920145, "grad_norm": 2.203125, "learning_rate": 8.694554587694764e-05, "loss": 1.7987, "step": 5392 }, { "epoch": 0.23571834433323136, "grad_norm": 3.03125, "learning_rate": 8.694091739647602e-05, "loss": 2.0596, "step": 5393 }, { "epoch": 0.23576205253726124, "grad_norm": 2.3125, "learning_rate": 8.693628821887556e-05, "loss": 2.1124, "step": 5394 }, { "epoch": 0.23580576074129114, "grad_norm": 2.515625, "learning_rate": 8.693165834423355e-05, "loss": 2.8182, "step": 5395 }, { "epoch": 0.23584946894532105, "grad_norm": 2.265625, "learning_rate": 8.692702777263742e-05, "loss": 1.3823, "step": 5396 }, { "epoch": 0.23589317714935093, "grad_norm": 2.546875, "learning_rate": 8.692239650417452e-05, "loss": 1.7818, "step": 5397 }, { "epoch": 0.23593688535338084, "grad_norm": 2.734375, "learning_rate": 8.691776453893227e-05, "loss": 2.2617, "step": 5398 }, { "epoch": 0.23598059355741072, "grad_norm": 2.25, "learning_rate": 8.691313187699805e-05, "loss": 2.4695, "step": 5399 }, { "epoch": 0.23602430176144062, "grad_norm": 2.09375, "learning_rate": 8.690849851845933e-05, "loss": 1.8407, "step": 5400 }, { "epoch": 0.23606800996547053, "grad_norm": 2.015625, "learning_rate": 8.690386446340353e-05, "loss": 1.6898, "step": 5401 }, { "epoch": 0.2361117181695004, "grad_norm": 2.265625, "learning_rate": 8.689922971191809e-05, "loss": 1.4087, "step": 5402 }, { "epoch": 0.23615542637353032, "grad_norm": 3.109375, "learning_rate": 8.689459426409045e-05, "loss": 2.578, "step": 5403 }, { "epoch": 0.2361991345775602, "grad_norm": 2.296875, "learning_rate": 8.688995812000815e-05, "loss": 1.958, "step": 5404 }, { "epoch": 0.2362428427815901, "grad_norm": 2.515625, "learning_rate": 8.688532127975865e-05, "loss": 2.3085, "step": 5405 }, { "epoch": 0.23628655098562001, "grad_norm": 2.96875, "learning_rate": 8.688068374342944e-05, "loss": 1.7854, "step": 5406 }, { "epoch": 0.2363302591896499, "grad_norm": 2.546875, "learning_rate": 8.687604551110807e-05, "loss": 2.3475, "step": 5407 }, { "epoch": 0.2363739673936798, "grad_norm": 4.3125, "learning_rate": 8.687140658288202e-05, "loss": 1.7633, "step": 5408 }, { "epoch": 0.23641767559770968, "grad_norm": 2.953125, "learning_rate": 8.686676695883889e-05, "loss": 2.5959, "step": 5409 }, { "epoch": 0.2364613838017396, "grad_norm": 1.9765625, "learning_rate": 8.68621266390662e-05, "loss": 1.6729, "step": 5410 }, { "epoch": 0.2365050920057695, "grad_norm": 2.421875, "learning_rate": 8.685748562365153e-05, "loss": 1.8097, "step": 5411 }, { "epoch": 0.23654880020979938, "grad_norm": 4.3125, "learning_rate": 8.685284391268247e-05, "loss": 1.8138, "step": 5412 }, { "epoch": 0.23659250841382928, "grad_norm": 2.03125, "learning_rate": 8.684820150624659e-05, "loss": 1.8495, "step": 5413 }, { "epoch": 0.23663621661785916, "grad_norm": 2.453125, "learning_rate": 8.684355840443155e-05, "loss": 2.0667, "step": 5414 }, { "epoch": 0.23667992482188907, "grad_norm": 1.828125, "learning_rate": 8.683891460732492e-05, "loss": 1.48, "step": 5415 }, { "epoch": 0.23672363302591898, "grad_norm": 2.328125, "learning_rate": 8.683427011501434e-05, "loss": 1.7694, "step": 5416 }, { "epoch": 0.23676734122994886, "grad_norm": 1.9765625, "learning_rate": 8.68296249275875e-05, "loss": 1.4667, "step": 5417 }, { "epoch": 0.23681104943397877, "grad_norm": 2.140625, "learning_rate": 8.682497904513203e-05, "loss": 1.8178, "step": 5418 }, { "epoch": 0.23685475763800865, "grad_norm": 2.203125, "learning_rate": 8.68203324677356e-05, "loss": 1.4938, "step": 5419 }, { "epoch": 0.23689846584203855, "grad_norm": 2.515625, "learning_rate": 8.681568519548591e-05, "loss": 2.7004, "step": 5420 }, { "epoch": 0.23694217404606846, "grad_norm": 2.5625, "learning_rate": 8.681103722847065e-05, "loss": 2.2187, "step": 5421 }, { "epoch": 0.23698588225009834, "grad_norm": 2.359375, "learning_rate": 8.680638856677754e-05, "loss": 2.0843, "step": 5422 }, { "epoch": 0.23702959045412825, "grad_norm": 1.9453125, "learning_rate": 8.680173921049433e-05, "loss": 1.6284, "step": 5423 }, { "epoch": 0.23707329865815813, "grad_norm": 1.9609375, "learning_rate": 8.679708915970873e-05, "loss": 1.8602, "step": 5424 }, { "epoch": 0.23711700686218803, "grad_norm": 2.609375, "learning_rate": 8.679243841450848e-05, "loss": 2.6321, "step": 5425 }, { "epoch": 0.23716071506621794, "grad_norm": 2.671875, "learning_rate": 8.67877869749814e-05, "loss": 2.7493, "step": 5426 }, { "epoch": 0.23720442327024782, "grad_norm": 2.03125, "learning_rate": 8.678313484121522e-05, "loss": 1.7742, "step": 5427 }, { "epoch": 0.23724813147427773, "grad_norm": 3.03125, "learning_rate": 8.677848201329774e-05, "loss": 2.1923, "step": 5428 }, { "epoch": 0.2372918396783076, "grad_norm": 2.0625, "learning_rate": 8.677382849131682e-05, "loss": 1.8479, "step": 5429 }, { "epoch": 0.23733554788233752, "grad_norm": 2.546875, "learning_rate": 8.676917427536017e-05, "loss": 2.7202, "step": 5430 }, { "epoch": 0.23737925608636742, "grad_norm": 2.921875, "learning_rate": 8.676451936551573e-05, "loss": 1.9664, "step": 5431 }, { "epoch": 0.2374229642903973, "grad_norm": 1.8671875, "learning_rate": 8.67598637618713e-05, "loss": 1.6204, "step": 5432 }, { "epoch": 0.2374666724944272, "grad_norm": 2.28125, "learning_rate": 8.675520746451473e-05, "loss": 1.4899, "step": 5433 }, { "epoch": 0.2375103806984571, "grad_norm": 2.09375, "learning_rate": 8.675055047353391e-05, "loss": 1.8132, "step": 5434 }, { "epoch": 0.237554088902487, "grad_norm": 3.484375, "learning_rate": 8.67458927890167e-05, "loss": 1.4917, "step": 5435 }, { "epoch": 0.2375977971065169, "grad_norm": 2.890625, "learning_rate": 8.674123441105102e-05, "loss": 1.318, "step": 5436 }, { "epoch": 0.23764150531054679, "grad_norm": 2.28125, "learning_rate": 8.673657533972477e-05, "loss": 1.9401, "step": 5437 }, { "epoch": 0.2376852135145767, "grad_norm": 2.640625, "learning_rate": 8.673191557512588e-05, "loss": 2.696, "step": 5438 }, { "epoch": 0.23772892171860657, "grad_norm": 2.03125, "learning_rate": 8.672725511734228e-05, "loss": 2.0011, "step": 5439 }, { "epoch": 0.23777262992263648, "grad_norm": 2.25, "learning_rate": 8.672259396646191e-05, "loss": 1.6778, "step": 5440 }, { "epoch": 0.2378163381266664, "grad_norm": 2.015625, "learning_rate": 8.671793212257276e-05, "loss": 2.2002, "step": 5441 }, { "epoch": 0.23786004633069627, "grad_norm": 1.984375, "learning_rate": 8.671326958576279e-05, "loss": 1.8127, "step": 5442 }, { "epoch": 0.23790375453472618, "grad_norm": 2.546875, "learning_rate": 8.670860635611997e-05, "loss": 2.4345, "step": 5443 }, { "epoch": 0.23794746273875605, "grad_norm": 2.359375, "learning_rate": 8.670394243373236e-05, "loss": 2.1367, "step": 5444 }, { "epoch": 0.23799117094278596, "grad_norm": 2.265625, "learning_rate": 8.66992778186879e-05, "loss": 1.5326, "step": 5445 }, { "epoch": 0.23803487914681587, "grad_norm": 2.0, "learning_rate": 8.669461251107466e-05, "loss": 2.1858, "step": 5446 }, { "epoch": 0.23807858735084575, "grad_norm": 2.625, "learning_rate": 8.66899465109807e-05, "loss": 2.0247, "step": 5447 }, { "epoch": 0.23812229555487566, "grad_norm": 2.734375, "learning_rate": 8.668527981849402e-05, "loss": 2.1743, "step": 5448 }, { "epoch": 0.23816600375890554, "grad_norm": 2.328125, "learning_rate": 8.668061243370274e-05, "loss": 1.9896, "step": 5449 }, { "epoch": 0.23820971196293544, "grad_norm": 3.28125, "learning_rate": 8.66759443566949e-05, "loss": 2.7154, "step": 5450 }, { "epoch": 0.23825342016696535, "grad_norm": 2.78125, "learning_rate": 8.667127558755862e-05, "loss": 2.4444, "step": 5451 }, { "epoch": 0.23829712837099523, "grad_norm": 2.203125, "learning_rate": 8.6666606126382e-05, "loss": 2.4881, "step": 5452 }, { "epoch": 0.23834083657502514, "grad_norm": 2.8125, "learning_rate": 8.666193597325314e-05, "loss": 1.6791, "step": 5453 }, { "epoch": 0.23838454477905502, "grad_norm": 2.046875, "learning_rate": 8.66572651282602e-05, "loss": 2.1325, "step": 5454 }, { "epoch": 0.23842825298308493, "grad_norm": 2.609375, "learning_rate": 8.665259359149132e-05, "loss": 2.0048, "step": 5455 }, { "epoch": 0.23847196118711483, "grad_norm": 2.328125, "learning_rate": 8.664792136303465e-05, "loss": 2.0438, "step": 5456 }, { "epoch": 0.2385156693911447, "grad_norm": 2.03125, "learning_rate": 8.664324844297837e-05, "loss": 1.7064, "step": 5457 }, { "epoch": 0.23855937759517462, "grad_norm": 2.5625, "learning_rate": 8.663857483141064e-05, "loss": 2.4179, "step": 5458 }, { "epoch": 0.2386030857992045, "grad_norm": 2.03125, "learning_rate": 8.66339005284197e-05, "loss": 1.6632, "step": 5459 }, { "epoch": 0.2386467940032344, "grad_norm": 3.5, "learning_rate": 8.662922553409373e-05, "loss": 1.39, "step": 5460 }, { "epoch": 0.23869050220726432, "grad_norm": 2.28125, "learning_rate": 8.662454984852098e-05, "loss": 1.7344, "step": 5461 }, { "epoch": 0.2387342104112942, "grad_norm": 2.15625, "learning_rate": 8.661987347178964e-05, "loss": 1.8208, "step": 5462 }, { "epoch": 0.2387779186153241, "grad_norm": 2.0625, "learning_rate": 8.661519640398801e-05, "loss": 1.9327, "step": 5463 }, { "epoch": 0.23882162681935398, "grad_norm": 2.5, "learning_rate": 8.661051864520432e-05, "loss": 2.1962, "step": 5464 }, { "epoch": 0.2388653350233839, "grad_norm": 2.03125, "learning_rate": 8.660584019552687e-05, "loss": 1.6932, "step": 5465 }, { "epoch": 0.2389090432274138, "grad_norm": 2.21875, "learning_rate": 8.660116105504393e-05, "loss": 1.9655, "step": 5466 }, { "epoch": 0.23895275143144368, "grad_norm": 2.21875, "learning_rate": 8.659648122384382e-05, "loss": 2.2509, "step": 5467 }, { "epoch": 0.23899645963547358, "grad_norm": 2.1875, "learning_rate": 8.659180070201484e-05, "loss": 2.0024, "step": 5468 }, { "epoch": 0.23904016783950346, "grad_norm": 2.34375, "learning_rate": 8.658711948964533e-05, "loss": 2.3215, "step": 5469 }, { "epoch": 0.23908387604353337, "grad_norm": 2.0625, "learning_rate": 8.658243758682361e-05, "loss": 1.5055, "step": 5470 }, { "epoch": 0.23912758424756328, "grad_norm": 2.53125, "learning_rate": 8.657775499363806e-05, "loss": 1.6403, "step": 5471 }, { "epoch": 0.23917129245159316, "grad_norm": 1.9609375, "learning_rate": 8.657307171017703e-05, "loss": 1.699, "step": 5472 }, { "epoch": 0.23921500065562307, "grad_norm": 1.875, "learning_rate": 8.656838773652891e-05, "loss": 2.1911, "step": 5473 }, { "epoch": 0.23925870885965295, "grad_norm": 2.453125, "learning_rate": 8.65637030727821e-05, "loss": 2.0156, "step": 5474 }, { "epoch": 0.23930241706368285, "grad_norm": 2.609375, "learning_rate": 8.655901771902499e-05, "loss": 1.7335, "step": 5475 }, { "epoch": 0.23934612526771276, "grad_norm": 2.84375, "learning_rate": 8.655433167534601e-05, "loss": 2.5339, "step": 5476 }, { "epoch": 0.23938983347174264, "grad_norm": 1.84375, "learning_rate": 8.654964494183358e-05, "loss": 1.634, "step": 5477 }, { "epoch": 0.23943354167577255, "grad_norm": 2.546875, "learning_rate": 8.654495751857616e-05, "loss": 1.8699, "step": 5478 }, { "epoch": 0.23947724987980243, "grad_norm": 2.328125, "learning_rate": 8.654026940566222e-05, "loss": 2.0441, "step": 5479 }, { "epoch": 0.23952095808383234, "grad_norm": 2.28125, "learning_rate": 8.653558060318018e-05, "loss": 2.1452, "step": 5480 }, { "epoch": 0.23956466628786224, "grad_norm": 2.546875, "learning_rate": 8.653089111121859e-05, "loss": 2.3463, "step": 5481 }, { "epoch": 0.23960837449189212, "grad_norm": 2.578125, "learning_rate": 8.65262009298659e-05, "loss": 1.9638, "step": 5482 }, { "epoch": 0.23965208269592203, "grad_norm": 2.203125, "learning_rate": 8.652151005921064e-05, "loss": 2.3107, "step": 5483 }, { "epoch": 0.2396957908999519, "grad_norm": 2.734375, "learning_rate": 8.651681849934134e-05, "loss": 1.8512, "step": 5484 }, { "epoch": 0.23973949910398182, "grad_norm": 2.09375, "learning_rate": 8.651212625034651e-05, "loss": 1.7393, "step": 5485 }, { "epoch": 0.23978320730801173, "grad_norm": 1.8984375, "learning_rate": 8.650743331231472e-05, "loss": 1.7654, "step": 5486 }, { "epoch": 0.2398269155120416, "grad_norm": 1.96875, "learning_rate": 8.650273968533454e-05, "loss": 2.1793, "step": 5487 }, { "epoch": 0.2398706237160715, "grad_norm": 2.078125, "learning_rate": 8.649804536949454e-05, "loss": 1.9098, "step": 5488 }, { "epoch": 0.2399143319201014, "grad_norm": 2.328125, "learning_rate": 8.649335036488329e-05, "loss": 2.5594, "step": 5489 }, { "epoch": 0.2399580401241313, "grad_norm": 2.3125, "learning_rate": 8.64886546715894e-05, "loss": 2.2916, "step": 5490 }, { "epoch": 0.2400017483281612, "grad_norm": 2.625, "learning_rate": 8.64839582897015e-05, "loss": 2.9065, "step": 5491 }, { "epoch": 0.2400454565321911, "grad_norm": 2.171875, "learning_rate": 8.647926121930821e-05, "loss": 2.2818, "step": 5492 }, { "epoch": 0.240089164736221, "grad_norm": 3.203125, "learning_rate": 8.647456346049816e-05, "loss": 1.6659, "step": 5493 }, { "epoch": 0.24013287294025087, "grad_norm": 2.609375, "learning_rate": 8.646986501336001e-05, "loss": 2.4522, "step": 5494 }, { "epoch": 0.24017658114428078, "grad_norm": 3.203125, "learning_rate": 8.646516587798243e-05, "loss": 2.2336, "step": 5495 }, { "epoch": 0.2402202893483107, "grad_norm": 2.40625, "learning_rate": 8.64604660544541e-05, "loss": 1.8014, "step": 5496 }, { "epoch": 0.24026399755234057, "grad_norm": 2.21875, "learning_rate": 8.645576554286372e-05, "loss": 1.5453, "step": 5497 }, { "epoch": 0.24030770575637048, "grad_norm": 2.609375, "learning_rate": 8.645106434329996e-05, "loss": 2.1022, "step": 5498 }, { "epoch": 0.24035141396040036, "grad_norm": 2.03125, "learning_rate": 8.644636245585159e-05, "loss": 1.9023, "step": 5499 }, { "epoch": 0.24039512216443026, "grad_norm": 2.078125, "learning_rate": 8.64416598806073e-05, "loss": 1.5248, "step": 5500 }, { "epoch": 0.24043883036846017, "grad_norm": 2.09375, "learning_rate": 8.643695661765585e-05, "loss": 1.7943, "step": 5501 }, { "epoch": 0.24048253857249005, "grad_norm": 2.34375, "learning_rate": 8.643225266708601e-05, "loss": 2.9711, "step": 5502 }, { "epoch": 0.24052624677651996, "grad_norm": 2.40625, "learning_rate": 8.642754802898652e-05, "loss": 1.6168, "step": 5503 }, { "epoch": 0.24056995498054984, "grad_norm": 2.046875, "learning_rate": 8.64228427034462e-05, "loss": 1.6666, "step": 5504 }, { "epoch": 0.24061366318457975, "grad_norm": 2.796875, "learning_rate": 8.641813669055381e-05, "loss": 2.1982, "step": 5505 }, { "epoch": 0.24065737138860965, "grad_norm": 2.109375, "learning_rate": 8.641342999039819e-05, "loss": 1.7612, "step": 5506 }, { "epoch": 0.24070107959263953, "grad_norm": 2.234375, "learning_rate": 8.640872260306814e-05, "loss": 1.2477, "step": 5507 }, { "epoch": 0.24074478779666944, "grad_norm": 1.8359375, "learning_rate": 8.64040145286525e-05, "loss": 1.8345, "step": 5508 }, { "epoch": 0.24078849600069932, "grad_norm": 2.015625, "learning_rate": 8.639930576724014e-05, "loss": 1.8334, "step": 5509 }, { "epoch": 0.24083220420472923, "grad_norm": 2.0, "learning_rate": 8.639459631891989e-05, "loss": 2.0725, "step": 5510 }, { "epoch": 0.24087591240875914, "grad_norm": 2.0, "learning_rate": 8.638988618378063e-05, "loss": 2.0752, "step": 5511 }, { "epoch": 0.24091962061278901, "grad_norm": 2.625, "learning_rate": 8.638517536191127e-05, "loss": 2.3952, "step": 5512 }, { "epoch": 0.24096332881681892, "grad_norm": 2.078125, "learning_rate": 8.638046385340069e-05, "loss": 1.9946, "step": 5513 }, { "epoch": 0.2410070370208488, "grad_norm": 2.96875, "learning_rate": 8.63757516583378e-05, "loss": 1.7343, "step": 5514 }, { "epoch": 0.2410507452248787, "grad_norm": 2.59375, "learning_rate": 8.637103877681155e-05, "loss": 2.0783, "step": 5515 }, { "epoch": 0.24109445342890862, "grad_norm": 5.4375, "learning_rate": 8.636632520891085e-05, "loss": 2.0024, "step": 5516 }, { "epoch": 0.2411381616329385, "grad_norm": 2.046875, "learning_rate": 8.636161095472466e-05, "loss": 1.9169, "step": 5517 }, { "epoch": 0.2411818698369684, "grad_norm": 2.296875, "learning_rate": 8.635689601434196e-05, "loss": 2.361, "step": 5518 }, { "epoch": 0.24122557804099828, "grad_norm": 2.03125, "learning_rate": 8.635218038785171e-05, "loss": 2.0534, "step": 5519 }, { "epoch": 0.2412692862450282, "grad_norm": 2.265625, "learning_rate": 8.634746407534292e-05, "loss": 1.9767, "step": 5520 }, { "epoch": 0.2413129944490581, "grad_norm": 2.0625, "learning_rate": 8.634274707690458e-05, "loss": 1.6981, "step": 5521 }, { "epoch": 0.24135670265308798, "grad_norm": 2.234375, "learning_rate": 8.63380293926257e-05, "loss": 1.6489, "step": 5522 }, { "epoch": 0.2414004108571179, "grad_norm": 2.234375, "learning_rate": 8.633331102259532e-05, "loss": 1.6282, "step": 5523 }, { "epoch": 0.24144411906114777, "grad_norm": 1.921875, "learning_rate": 8.632859196690249e-05, "loss": 1.904, "step": 5524 }, { "epoch": 0.24148782726517767, "grad_norm": 2.609375, "learning_rate": 8.632387222563622e-05, "loss": 2.4687, "step": 5525 }, { "epoch": 0.24153153546920758, "grad_norm": 2.859375, "learning_rate": 8.631915179888566e-05, "loss": 2.9449, "step": 5526 }, { "epoch": 0.24157524367323746, "grad_norm": 2.28125, "learning_rate": 8.631443068673983e-05, "loss": 1.87, "step": 5527 }, { "epoch": 0.24161895187726737, "grad_norm": 1.9921875, "learning_rate": 8.630970888928784e-05, "loss": 1.7336, "step": 5528 }, { "epoch": 0.24166266008129725, "grad_norm": 2.34375, "learning_rate": 8.630498640661879e-05, "loss": 2.7522, "step": 5529 }, { "epoch": 0.24170636828532716, "grad_norm": 2.671875, "learning_rate": 8.630026323882181e-05, "loss": 1.2049, "step": 5530 }, { "epoch": 0.24175007648935706, "grad_norm": 2.25, "learning_rate": 8.629553938598603e-05, "loss": 2.6773, "step": 5531 }, { "epoch": 0.24179378469338694, "grad_norm": 2.203125, "learning_rate": 8.629081484820058e-05, "loss": 1.9445, "step": 5532 }, { "epoch": 0.24183749289741685, "grad_norm": 2.09375, "learning_rate": 8.628608962555467e-05, "loss": 1.8289, "step": 5533 }, { "epoch": 0.24188120110144673, "grad_norm": 1.8359375, "learning_rate": 8.62813637181374e-05, "loss": 1.6484, "step": 5534 }, { "epoch": 0.24192490930547664, "grad_norm": 3.359375, "learning_rate": 8.627663712603802e-05, "loss": 1.7932, "step": 5535 }, { "epoch": 0.24196861750950655, "grad_norm": 2.015625, "learning_rate": 8.627190984934569e-05, "loss": 2.0733, "step": 5536 }, { "epoch": 0.24201232571353642, "grad_norm": 2.65625, "learning_rate": 8.626718188814964e-05, "loss": 2.6501, "step": 5537 }, { "epoch": 0.24205603391756633, "grad_norm": 2.203125, "learning_rate": 8.626245324253906e-05, "loss": 1.7905, "step": 5538 }, { "epoch": 0.2420997421215962, "grad_norm": 2.234375, "learning_rate": 8.625772391260323e-05, "loss": 1.7842, "step": 5539 }, { "epoch": 0.24214345032562612, "grad_norm": 3.640625, "learning_rate": 8.625299389843137e-05, "loss": 3.0661, "step": 5540 }, { "epoch": 0.24218715852965603, "grad_norm": 2.25, "learning_rate": 8.624826320011275e-05, "loss": 2.2763, "step": 5541 }, { "epoch": 0.2422308667336859, "grad_norm": 2.734375, "learning_rate": 8.624353181773664e-05, "loss": 1.3571, "step": 5542 }, { "epoch": 0.24227457493771581, "grad_norm": 2.953125, "learning_rate": 8.623879975139235e-05, "loss": 1.9099, "step": 5543 }, { "epoch": 0.2423182831417457, "grad_norm": 3.15625, "learning_rate": 8.623406700116917e-05, "loss": 2.2006, "step": 5544 }, { "epoch": 0.2423619913457756, "grad_norm": 2.328125, "learning_rate": 8.622933356715639e-05, "loss": 1.6884, "step": 5545 }, { "epoch": 0.2424056995498055, "grad_norm": 2.390625, "learning_rate": 8.622459944944336e-05, "loss": 2.199, "step": 5546 }, { "epoch": 0.2424494077538354, "grad_norm": 2.46875, "learning_rate": 8.621986464811943e-05, "loss": 1.8362, "step": 5547 }, { "epoch": 0.2424931159578653, "grad_norm": 2.421875, "learning_rate": 8.621512916327394e-05, "loss": 2.5015, "step": 5548 }, { "epoch": 0.24253682416189518, "grad_norm": 2.4375, "learning_rate": 8.621039299499624e-05, "loss": 2.1437, "step": 5549 }, { "epoch": 0.24258053236592508, "grad_norm": 2.078125, "learning_rate": 8.620565614337572e-05, "loss": 1.5535, "step": 5550 }, { "epoch": 0.242624240569955, "grad_norm": 2.015625, "learning_rate": 8.620091860850177e-05, "loss": 1.5493, "step": 5551 }, { "epoch": 0.24266794877398487, "grad_norm": 1.9453125, "learning_rate": 8.619618039046382e-05, "loss": 1.6928, "step": 5552 }, { "epoch": 0.24271165697801478, "grad_norm": 2.03125, "learning_rate": 8.619144148935125e-05, "loss": 2.0614, "step": 5553 }, { "epoch": 0.24275536518204466, "grad_norm": 2.5625, "learning_rate": 8.618670190525352e-05, "loss": 1.4767, "step": 5554 }, { "epoch": 0.24279907338607457, "grad_norm": 3.296875, "learning_rate": 8.618196163826005e-05, "loss": 3.169, "step": 5555 }, { "epoch": 0.24284278159010447, "grad_norm": 2.203125, "learning_rate": 8.617722068846028e-05, "loss": 1.8736, "step": 5556 }, { "epoch": 0.24288648979413435, "grad_norm": 2.3125, "learning_rate": 8.617247905594372e-05, "loss": 2.5414, "step": 5557 }, { "epoch": 0.24293019799816426, "grad_norm": 2.671875, "learning_rate": 8.616773674079985e-05, "loss": 2.1125, "step": 5558 }, { "epoch": 0.24297390620219414, "grad_norm": 3.171875, "learning_rate": 8.616299374311814e-05, "loss": 2.6338, "step": 5559 }, { "epoch": 0.24301761440622405, "grad_norm": 2.125, "learning_rate": 8.615825006298808e-05, "loss": 1.9468, "step": 5560 }, { "epoch": 0.24306132261025395, "grad_norm": 2.28125, "learning_rate": 8.615350570049924e-05, "loss": 1.7916, "step": 5561 }, { "epoch": 0.24310503081428383, "grad_norm": 2.171875, "learning_rate": 8.614876065574113e-05, "loss": 1.6609, "step": 5562 }, { "epoch": 0.24314873901831374, "grad_norm": 2.0625, "learning_rate": 8.614401492880329e-05, "loss": 2.25, "step": 5563 }, { "epoch": 0.24319244722234362, "grad_norm": 2.1875, "learning_rate": 8.613926851977527e-05, "loss": 1.9206, "step": 5564 }, { "epoch": 0.24323615542637353, "grad_norm": 2.625, "learning_rate": 8.613452142874667e-05, "loss": 2.1226, "step": 5565 }, { "epoch": 0.24327986363040344, "grad_norm": 2.265625, "learning_rate": 8.612977365580705e-05, "loss": 1.3526, "step": 5566 }, { "epoch": 0.24332357183443332, "grad_norm": 2.671875, "learning_rate": 8.612502520104602e-05, "loss": 2.4662, "step": 5567 }, { "epoch": 0.24336728003846322, "grad_norm": 2.140625, "learning_rate": 8.61202760645532e-05, "loss": 1.9605, "step": 5568 }, { "epoch": 0.2434109882424931, "grad_norm": 2.296875, "learning_rate": 8.611552624641818e-05, "loss": 1.9366, "step": 5569 }, { "epoch": 0.243454696446523, "grad_norm": 2.46875, "learning_rate": 8.611077574673063e-05, "loss": 2.3338, "step": 5570 }, { "epoch": 0.24349840465055292, "grad_norm": 2.796875, "learning_rate": 8.610602456558018e-05, "loss": 2.3373, "step": 5571 }, { "epoch": 0.2435421128545828, "grad_norm": 2.203125, "learning_rate": 8.61012727030565e-05, "loss": 2.1172, "step": 5572 }, { "epoch": 0.2435858210586127, "grad_norm": 2.390625, "learning_rate": 8.609652015924925e-05, "loss": 1.8434, "step": 5573 }, { "epoch": 0.24362952926264259, "grad_norm": 2.15625, "learning_rate": 8.609176693424814e-05, "loss": 2.0251, "step": 5574 }, { "epoch": 0.2436732374666725, "grad_norm": 2.28125, "learning_rate": 8.608701302814286e-05, "loss": 1.6486, "step": 5575 }, { "epoch": 0.2437169456707024, "grad_norm": 3.765625, "learning_rate": 8.60822584410231e-05, "loss": 1.7621, "step": 5576 }, { "epoch": 0.24376065387473228, "grad_norm": 2.65625, "learning_rate": 8.607750317297863e-05, "loss": 1.6235, "step": 5577 }, { "epoch": 0.2438043620787622, "grad_norm": 2.140625, "learning_rate": 8.607274722409918e-05, "loss": 2.2202, "step": 5578 }, { "epoch": 0.24384807028279207, "grad_norm": 1.9765625, "learning_rate": 8.606799059447445e-05, "loss": 1.8622, "step": 5579 }, { "epoch": 0.24389177848682198, "grad_norm": 1.9296875, "learning_rate": 8.606323328419428e-05, "loss": 1.903, "step": 5580 }, { "epoch": 0.24393548669085188, "grad_norm": 2.234375, "learning_rate": 8.605847529334839e-05, "loss": 2.0046, "step": 5581 }, { "epoch": 0.24397919489488176, "grad_norm": 3.609375, "learning_rate": 8.60537166220266e-05, "loss": 1.0194, "step": 5582 }, { "epoch": 0.24402290309891167, "grad_norm": 3.671875, "learning_rate": 8.604895727031869e-05, "loss": 2.1291, "step": 5583 }, { "epoch": 0.24406661130294155, "grad_norm": 1.9765625, "learning_rate": 8.60441972383145e-05, "loss": 1.747, "step": 5584 }, { "epoch": 0.24411031950697146, "grad_norm": 2.46875, "learning_rate": 8.603943652610385e-05, "loss": 2.0577, "step": 5585 }, { "epoch": 0.24415402771100136, "grad_norm": 3.71875, "learning_rate": 8.603467513377657e-05, "loss": 1.9055, "step": 5586 }, { "epoch": 0.24419773591503124, "grad_norm": 2.796875, "learning_rate": 8.602991306142252e-05, "loss": 2.5746, "step": 5587 }, { "epoch": 0.24424144411906115, "grad_norm": 2.53125, "learning_rate": 8.602515030913159e-05, "loss": 2.2653, "step": 5588 }, { "epoch": 0.24428515232309103, "grad_norm": 2.53125, "learning_rate": 8.602038687699364e-05, "loss": 2.5107, "step": 5589 }, { "epoch": 0.24432886052712094, "grad_norm": 2.21875, "learning_rate": 8.601562276509855e-05, "loss": 1.8985, "step": 5590 }, { "epoch": 0.24437256873115085, "grad_norm": 2.34375, "learning_rate": 8.601085797353625e-05, "loss": 1.976, "step": 5591 }, { "epoch": 0.24441627693518073, "grad_norm": 2.328125, "learning_rate": 8.600609250239664e-05, "loss": 1.7231, "step": 5592 }, { "epoch": 0.24445998513921063, "grad_norm": 2.15625, "learning_rate": 8.600132635176967e-05, "loss": 1.9487, "step": 5593 }, { "epoch": 0.2445036933432405, "grad_norm": 1.9609375, "learning_rate": 8.599655952174527e-05, "loss": 2.0027, "step": 5594 }, { "epoch": 0.24454740154727042, "grad_norm": 2.078125, "learning_rate": 8.59917920124134e-05, "loss": 1.7015, "step": 5595 }, { "epoch": 0.24459110975130033, "grad_norm": 5.15625, "learning_rate": 8.598702382386403e-05, "loss": 2.4159, "step": 5596 }, { "epoch": 0.2446348179553302, "grad_norm": 1.8359375, "learning_rate": 8.598225495618715e-05, "loss": 1.7125, "step": 5597 }, { "epoch": 0.24467852615936012, "grad_norm": 2.265625, "learning_rate": 8.597748540947274e-05, "loss": 1.8233, "step": 5598 }, { "epoch": 0.24472223436339, "grad_norm": 2.265625, "learning_rate": 8.597271518381082e-05, "loss": 1.7276, "step": 5599 }, { "epoch": 0.2447659425674199, "grad_norm": 2.421875, "learning_rate": 8.596794427929142e-05, "loss": 2.5536, "step": 5600 }, { "epoch": 0.2448096507714498, "grad_norm": 3.015625, "learning_rate": 8.596317269600455e-05, "loss": 2.0302, "step": 5601 }, { "epoch": 0.2448533589754797, "grad_norm": 2.09375, "learning_rate": 8.595840043404028e-05, "loss": 1.6446, "step": 5602 }, { "epoch": 0.2448970671795096, "grad_norm": 2.1875, "learning_rate": 8.595362749348866e-05, "loss": 2.3099, "step": 5603 }, { "epoch": 0.24494077538353948, "grad_norm": 2.25, "learning_rate": 8.594885387443974e-05, "loss": 2.1294, "step": 5604 }, { "epoch": 0.24498448358756938, "grad_norm": 2.1875, "learning_rate": 8.594407957698365e-05, "loss": 1.8185, "step": 5605 }, { "epoch": 0.2450281917915993, "grad_norm": 2.0, "learning_rate": 8.593930460121044e-05, "loss": 2.2778, "step": 5606 }, { "epoch": 0.24507189999562917, "grad_norm": 2.5625, "learning_rate": 8.593452894721027e-05, "loss": 1.0981, "step": 5607 }, { "epoch": 0.24511560819965908, "grad_norm": 1.984375, "learning_rate": 8.592975261507323e-05, "loss": 1.7008, "step": 5608 }, { "epoch": 0.24515931640368896, "grad_norm": 2.03125, "learning_rate": 8.592497560488946e-05, "loss": 1.9112, "step": 5609 }, { "epoch": 0.24520302460771887, "grad_norm": 3.09375, "learning_rate": 8.592019791674913e-05, "loss": 1.4231, "step": 5610 }, { "epoch": 0.24524673281174877, "grad_norm": 2.171875, "learning_rate": 8.591541955074235e-05, "loss": 2.1189, "step": 5611 }, { "epoch": 0.24529044101577865, "grad_norm": 2.328125, "learning_rate": 8.591064050695936e-05, "loss": 2.0184, "step": 5612 }, { "epoch": 0.24533414921980856, "grad_norm": 2.71875, "learning_rate": 8.590586078549032e-05, "loss": 2.7024, "step": 5613 }, { "epoch": 0.24537785742383844, "grad_norm": 1.9921875, "learning_rate": 8.590108038642541e-05, "loss": 1.9898, "step": 5614 }, { "epoch": 0.24542156562786835, "grad_norm": 2.09375, "learning_rate": 8.589629930985487e-05, "loss": 1.8514, "step": 5615 }, { "epoch": 0.24546527383189826, "grad_norm": 2.046875, "learning_rate": 8.589151755586892e-05, "loss": 1.9361, "step": 5616 }, { "epoch": 0.24550898203592814, "grad_norm": 2.0, "learning_rate": 8.588673512455781e-05, "loss": 1.7724, "step": 5617 }, { "epoch": 0.24555269023995804, "grad_norm": 1.984375, "learning_rate": 8.588195201601177e-05, "loss": 1.6943, "step": 5618 }, { "epoch": 0.24559639844398792, "grad_norm": 2.5625, "learning_rate": 8.587716823032106e-05, "loss": 1.6803, "step": 5619 }, { "epoch": 0.24564010664801783, "grad_norm": 2.0, "learning_rate": 8.587238376757597e-05, "loss": 2.5287, "step": 5620 }, { "epoch": 0.24568381485204774, "grad_norm": 1.9765625, "learning_rate": 8.58675986278668e-05, "loss": 2.0384, "step": 5621 }, { "epoch": 0.24572752305607762, "grad_norm": 2.375, "learning_rate": 8.586281281128383e-05, "loss": 1.9584, "step": 5622 }, { "epoch": 0.24577123126010753, "grad_norm": 2.390625, "learning_rate": 8.585802631791739e-05, "loss": 2.0015, "step": 5623 }, { "epoch": 0.2458149394641374, "grad_norm": 1.9921875, "learning_rate": 8.58532391478578e-05, "loss": 2.0059, "step": 5624 }, { "epoch": 0.2458586476681673, "grad_norm": 2.203125, "learning_rate": 8.584845130119541e-05, "loss": 1.6853, "step": 5625 }, { "epoch": 0.24590235587219722, "grad_norm": 2.109375, "learning_rate": 8.584366277802057e-05, "loss": 1.7735, "step": 5626 }, { "epoch": 0.2459460640762271, "grad_norm": 2.34375, "learning_rate": 8.583887357842364e-05, "loss": 2.4116, "step": 5627 }, { "epoch": 0.245989772280257, "grad_norm": 2.765625, "learning_rate": 8.583408370249501e-05, "loss": 1.9734, "step": 5628 }, { "epoch": 0.2460334804842869, "grad_norm": 1.9453125, "learning_rate": 8.582929315032507e-05, "loss": 1.5228, "step": 5629 }, { "epoch": 0.2460771886883168, "grad_norm": 1.8984375, "learning_rate": 8.582450192200421e-05, "loss": 1.5607, "step": 5630 }, { "epoch": 0.2461208968923467, "grad_norm": 2.796875, "learning_rate": 8.581971001762286e-05, "loss": 2.2077, "step": 5631 }, { "epoch": 0.24616460509637658, "grad_norm": 2.203125, "learning_rate": 8.581491743727146e-05, "loss": 1.8515, "step": 5632 }, { "epoch": 0.2462083133004065, "grad_norm": 2.015625, "learning_rate": 8.581012418104044e-05, "loss": 1.7113, "step": 5633 }, { "epoch": 0.24625202150443637, "grad_norm": 2.59375, "learning_rate": 8.580533024902024e-05, "loss": 1.9744, "step": 5634 }, { "epoch": 0.24629572970846628, "grad_norm": 2.375, "learning_rate": 8.580053564130137e-05, "loss": 2.5002, "step": 5635 }, { "epoch": 0.24633943791249618, "grad_norm": 2.09375, "learning_rate": 8.579574035797427e-05, "loss": 2.1414, "step": 5636 }, { "epoch": 0.24638314611652606, "grad_norm": 2.40625, "learning_rate": 8.579094439912946e-05, "loss": 1.9267, "step": 5637 }, { "epoch": 0.24642685432055597, "grad_norm": 2.25, "learning_rate": 8.578614776485743e-05, "loss": 1.9097, "step": 5638 }, { "epoch": 0.24647056252458585, "grad_norm": 2.078125, "learning_rate": 8.578135045524872e-05, "loss": 2.1637, "step": 5639 }, { "epoch": 0.24651427072861576, "grad_norm": 2.046875, "learning_rate": 8.577655247039384e-05, "loss": 2.1543, "step": 5640 }, { "epoch": 0.24655797893264567, "grad_norm": 2.390625, "learning_rate": 8.577175381038337e-05, "loss": 2.2151, "step": 5641 }, { "epoch": 0.24660168713667555, "grad_norm": 2.015625, "learning_rate": 8.576695447530781e-05, "loss": 1.4585, "step": 5642 }, { "epoch": 0.24664539534070545, "grad_norm": 1.9921875, "learning_rate": 8.576215446525776e-05, "loss": 2.1276, "step": 5643 }, { "epoch": 0.24668910354473533, "grad_norm": 2.734375, "learning_rate": 8.575735378032382e-05, "loss": 1.7072, "step": 5644 }, { "epoch": 0.24673281174876524, "grad_norm": 1.8671875, "learning_rate": 8.575255242059656e-05, "loss": 1.6291, "step": 5645 }, { "epoch": 0.24677651995279515, "grad_norm": 2.515625, "learning_rate": 8.574775038616662e-05, "loss": 2.2993, "step": 5646 }, { "epoch": 0.24682022815682503, "grad_norm": 2.0625, "learning_rate": 8.574294767712458e-05, "loss": 1.3124, "step": 5647 }, { "epoch": 0.24686393636085494, "grad_norm": 2.296875, "learning_rate": 8.573814429356113e-05, "loss": 1.6663, "step": 5648 }, { "epoch": 0.24690764456488482, "grad_norm": 2.125, "learning_rate": 8.573334023556685e-05, "loss": 1.9524, "step": 5649 }, { "epoch": 0.24695135276891472, "grad_norm": 1.8125, "learning_rate": 8.572853550323245e-05, "loss": 1.5047, "step": 5650 }, { "epoch": 0.24699506097294463, "grad_norm": 2.1875, "learning_rate": 8.572373009664858e-05, "loss": 2.7538, "step": 5651 }, { "epoch": 0.2470387691769745, "grad_norm": 2.203125, "learning_rate": 8.571892401590591e-05, "loss": 1.896, "step": 5652 }, { "epoch": 0.24708247738100442, "grad_norm": 2.015625, "learning_rate": 8.571411726109519e-05, "loss": 1.6372, "step": 5653 }, { "epoch": 0.24712618558503432, "grad_norm": 2.015625, "learning_rate": 8.570930983230707e-05, "loss": 1.9993, "step": 5654 }, { "epoch": 0.2471698937890642, "grad_norm": 2.0625, "learning_rate": 8.570450172963232e-05, "loss": 1.7876, "step": 5655 }, { "epoch": 0.2472136019930941, "grad_norm": 2.40625, "learning_rate": 8.569969295316164e-05, "loss": 1.8467, "step": 5656 }, { "epoch": 0.247257310197124, "grad_norm": 2.28125, "learning_rate": 8.569488350298583e-05, "loss": 2.269, "step": 5657 }, { "epoch": 0.2473010184011539, "grad_norm": 2.640625, "learning_rate": 8.569007337919558e-05, "loss": 1.9019, "step": 5658 }, { "epoch": 0.2473447266051838, "grad_norm": 2.171875, "learning_rate": 8.568526258188172e-05, "loss": 1.8272, "step": 5659 }, { "epoch": 0.2473884348092137, "grad_norm": 2.734375, "learning_rate": 8.568045111113501e-05, "loss": 1.8297, "step": 5660 }, { "epoch": 0.2474321430132436, "grad_norm": 2.34375, "learning_rate": 8.567563896704626e-05, "loss": 2.0034, "step": 5661 }, { "epoch": 0.24747585121727347, "grad_norm": 2.1875, "learning_rate": 8.567082614970627e-05, "loss": 1.7031, "step": 5662 }, { "epoch": 0.24751955942130338, "grad_norm": 2.546875, "learning_rate": 8.566601265920588e-05, "loss": 1.8154, "step": 5663 }, { "epoch": 0.2475632676253333, "grad_norm": 2.328125, "learning_rate": 8.566119849563594e-05, "loss": 1.7957, "step": 5664 }, { "epoch": 0.24760697582936317, "grad_norm": 2.90625, "learning_rate": 8.565638365908726e-05, "loss": 2.5451, "step": 5665 }, { "epoch": 0.24765068403339308, "grad_norm": 2.171875, "learning_rate": 8.565156814965074e-05, "loss": 1.937, "step": 5666 }, { "epoch": 0.24769439223742296, "grad_norm": 2.140625, "learning_rate": 8.564675196741722e-05, "loss": 1.8272, "step": 5667 }, { "epoch": 0.24773810044145286, "grad_norm": 2.09375, "learning_rate": 8.564193511247762e-05, "loss": 1.7934, "step": 5668 }, { "epoch": 0.24778180864548277, "grad_norm": 1.9921875, "learning_rate": 8.563711758492283e-05, "loss": 2.6983, "step": 5669 }, { "epoch": 0.24782551684951265, "grad_norm": 2.09375, "learning_rate": 8.563229938484376e-05, "loss": 1.7599, "step": 5670 }, { "epoch": 0.24786922505354256, "grad_norm": 2.03125, "learning_rate": 8.562748051233135e-05, "loss": 1.7215, "step": 5671 }, { "epoch": 0.24791293325757244, "grad_norm": 2.3125, "learning_rate": 8.562266096747653e-05, "loss": 2.0875, "step": 5672 }, { "epoch": 0.24795664146160235, "grad_norm": 2.625, "learning_rate": 8.561784075037023e-05, "loss": 2.0816, "step": 5673 }, { "epoch": 0.24800034966563225, "grad_norm": 2.234375, "learning_rate": 8.561301986110347e-05, "loss": 1.7168, "step": 5674 }, { "epoch": 0.24804405786966213, "grad_norm": 1.890625, "learning_rate": 8.560819829976716e-05, "loss": 1.9503, "step": 5675 }, { "epoch": 0.24808776607369204, "grad_norm": 2.34375, "learning_rate": 8.560337606645234e-05, "loss": 2.4694, "step": 5676 }, { "epoch": 0.24813147427772192, "grad_norm": 1.8515625, "learning_rate": 8.559855316124999e-05, "loss": 2.046, "step": 5677 }, { "epoch": 0.24817518248175183, "grad_norm": 1.8515625, "learning_rate": 8.559372958425113e-05, "loss": 2.6653, "step": 5678 }, { "epoch": 0.24821889068578173, "grad_norm": 2.1875, "learning_rate": 8.55889053355468e-05, "loss": 1.7663, "step": 5679 }, { "epoch": 0.24826259888981161, "grad_norm": 2.578125, "learning_rate": 8.558408041522801e-05, "loss": 2.7306, "step": 5680 }, { "epoch": 0.24830630709384152, "grad_norm": 2.40625, "learning_rate": 8.557925482338585e-05, "loss": 1.6388, "step": 5681 }, { "epoch": 0.2483500152978714, "grad_norm": 1.9921875, "learning_rate": 8.557442856011135e-05, "loss": 1.9075, "step": 5682 }, { "epoch": 0.2483937235019013, "grad_norm": 2.0, "learning_rate": 8.556960162549563e-05, "loss": 1.7638, "step": 5683 }, { "epoch": 0.24843743170593122, "grad_norm": 2.390625, "learning_rate": 8.556477401962974e-05, "loss": 1.8392, "step": 5684 }, { "epoch": 0.2484811399099611, "grad_norm": 2.15625, "learning_rate": 8.55599457426048e-05, "loss": 1.5737, "step": 5685 }, { "epoch": 0.248524848113991, "grad_norm": 2.3125, "learning_rate": 8.555511679451196e-05, "loss": 2.7735, "step": 5686 }, { "epoch": 0.24856855631802088, "grad_norm": 1.9765625, "learning_rate": 8.555028717544227e-05, "loss": 1.7439, "step": 5687 }, { "epoch": 0.2486122645220508, "grad_norm": 3.015625, "learning_rate": 8.554545688548696e-05, "loss": 2.0171, "step": 5688 }, { "epoch": 0.2486559727260807, "grad_norm": 2.203125, "learning_rate": 8.554062592473713e-05, "loss": 2.0584, "step": 5689 }, { "epoch": 0.24869968093011058, "grad_norm": 2.640625, "learning_rate": 8.553579429328395e-05, "loss": 2.2466, "step": 5690 }, { "epoch": 0.24874338913414049, "grad_norm": 2.4375, "learning_rate": 8.553096199121863e-05, "loss": 2.3407, "step": 5691 }, { "epoch": 0.24878709733817037, "grad_norm": 2.453125, "learning_rate": 8.552612901863233e-05, "loss": 2.0368, "step": 5692 }, { "epoch": 0.24883080554220027, "grad_norm": 1.96875, "learning_rate": 8.552129537561628e-05, "loss": 1.7908, "step": 5693 }, { "epoch": 0.24887451374623018, "grad_norm": 3.171875, "learning_rate": 8.551646106226169e-05, "loss": 1.6307, "step": 5694 }, { "epoch": 0.24891822195026006, "grad_norm": 2.140625, "learning_rate": 8.55116260786598e-05, "loss": 1.781, "step": 5695 }, { "epoch": 0.24896193015428997, "grad_norm": 3.65625, "learning_rate": 8.550679042490182e-05, "loss": 2.1163, "step": 5696 }, { "epoch": 0.24900563835831985, "grad_norm": 2.03125, "learning_rate": 8.550195410107902e-05, "loss": 1.4579, "step": 5697 }, { "epoch": 0.24904934656234975, "grad_norm": 2.0, "learning_rate": 8.549711710728269e-05, "loss": 1.4266, "step": 5698 }, { "epoch": 0.24909305476637966, "grad_norm": 2.078125, "learning_rate": 8.54922794436041e-05, "loss": 1.961, "step": 5699 }, { "epoch": 0.24913676297040954, "grad_norm": 2.515625, "learning_rate": 8.548744111013452e-05, "loss": 2.7949, "step": 5700 }, { "epoch": 0.24918047117443945, "grad_norm": 2.34375, "learning_rate": 8.54826021069653e-05, "loss": 2.0042, "step": 5701 }, { "epoch": 0.24922417937846933, "grad_norm": 2.140625, "learning_rate": 8.547776243418772e-05, "loss": 1.7539, "step": 5702 }, { "epoch": 0.24926788758249924, "grad_norm": 2.4375, "learning_rate": 8.547292209189314e-05, "loss": 2.0249, "step": 5703 }, { "epoch": 0.24931159578652914, "grad_norm": 2.3125, "learning_rate": 8.546808108017288e-05, "loss": 2.0008, "step": 5704 }, { "epoch": 0.24935530399055902, "grad_norm": 2.5, "learning_rate": 8.54632393991183e-05, "loss": 2.4508, "step": 5705 }, { "epoch": 0.24939901219458893, "grad_norm": 2.25, "learning_rate": 8.545839704882082e-05, "loss": 2.152, "step": 5706 }, { "epoch": 0.2494427203986188, "grad_norm": 2.3125, "learning_rate": 8.545355402937175e-05, "loss": 2.0039, "step": 5707 }, { "epoch": 0.24948642860264872, "grad_norm": 3.46875, "learning_rate": 8.54487103408625e-05, "loss": 2.6012, "step": 5708 }, { "epoch": 0.24953013680667863, "grad_norm": 2.34375, "learning_rate": 8.544386598338452e-05, "loss": 1.8128, "step": 5709 }, { "epoch": 0.2495738450107085, "grad_norm": 2.171875, "learning_rate": 8.54390209570292e-05, "loss": 2.1566, "step": 5710 }, { "epoch": 0.2496175532147384, "grad_norm": 2.125, "learning_rate": 8.543417526188797e-05, "loss": 1.9152, "step": 5711 }, { "epoch": 0.2496612614187683, "grad_norm": 1.9921875, "learning_rate": 8.542932889805226e-05, "loss": 1.7658, "step": 5712 }, { "epoch": 0.2497049696227982, "grad_norm": 2.46875, "learning_rate": 8.542448186561359e-05, "loss": 2.0908, "step": 5713 }, { "epoch": 0.2497486778268281, "grad_norm": 2.421875, "learning_rate": 8.541963416466336e-05, "loss": 1.8004, "step": 5714 }, { "epoch": 0.249792386030858, "grad_norm": 1.8359375, "learning_rate": 8.541478579529308e-05, "loss": 1.5534, "step": 5715 }, { "epoch": 0.2498360942348879, "grad_norm": 2.296875, "learning_rate": 8.540993675759427e-05, "loss": 1.7388, "step": 5716 }, { "epoch": 0.24987980243891778, "grad_norm": 1.9609375, "learning_rate": 8.540508705165839e-05, "loss": 1.371, "step": 5717 }, { "epoch": 0.24992351064294768, "grad_norm": 1.7734375, "learning_rate": 8.540023667757702e-05, "loss": 1.7817, "step": 5718 }, { "epoch": 0.2499672188469776, "grad_norm": 3.34375, "learning_rate": 8.539538563544163e-05, "loss": 1.8297, "step": 5719 }, { "epoch": 0.25001092705100747, "grad_norm": 2.484375, "learning_rate": 8.539053392534382e-05, "loss": 2.3024, "step": 5720 }, { "epoch": 0.25005463525503735, "grad_norm": 2.015625, "learning_rate": 8.538568154737512e-05, "loss": 1.5528, "step": 5721 }, { "epoch": 0.2500983434590673, "grad_norm": 1.8984375, "learning_rate": 8.53808285016271e-05, "loss": 1.8575, "step": 5722 }, { "epoch": 0.25014205166309716, "grad_norm": 2.140625, "learning_rate": 8.537597478819136e-05, "loss": 1.7515, "step": 5723 }, { "epoch": 0.25018575986712704, "grad_norm": 2.734375, "learning_rate": 8.537112040715948e-05, "loss": 1.4989, "step": 5724 }, { "epoch": 0.250229468071157, "grad_norm": 16.25, "learning_rate": 8.536626535862309e-05, "loss": 5.8871, "step": 5725 }, { "epoch": 0.25027317627518686, "grad_norm": 2.09375, "learning_rate": 8.53614096426738e-05, "loss": 1.7411, "step": 5726 }, { "epoch": 0.25031688447921674, "grad_norm": 2.140625, "learning_rate": 8.535655325940324e-05, "loss": 2.1619, "step": 5727 }, { "epoch": 0.2503605926832466, "grad_norm": 1.8828125, "learning_rate": 8.535169620890306e-05, "loss": 1.8591, "step": 5728 }, { "epoch": 0.25040430088727655, "grad_norm": 2.3125, "learning_rate": 8.53468384912649e-05, "loss": 1.9312, "step": 5729 }, { "epoch": 0.25044800909130643, "grad_norm": 2.921875, "learning_rate": 8.534198010658047e-05, "loss": 2.3886, "step": 5730 }, { "epoch": 0.2504917172953363, "grad_norm": 1.984375, "learning_rate": 8.533712105494145e-05, "loss": 1.7222, "step": 5731 }, { "epoch": 0.25053542549936625, "grad_norm": 2.359375, "learning_rate": 8.533226133643951e-05, "loss": 1.8809, "step": 5732 }, { "epoch": 0.25057913370339613, "grad_norm": 2.640625, "learning_rate": 8.532740095116638e-05, "loss": 1.8495, "step": 5733 }, { "epoch": 0.250622841907426, "grad_norm": 2.796875, "learning_rate": 8.532253989921378e-05, "loss": 1.7284, "step": 5734 }, { "epoch": 0.25066655011145594, "grad_norm": 2.25, "learning_rate": 8.531767818067343e-05, "loss": 1.6024, "step": 5735 }, { "epoch": 0.2507102583154858, "grad_norm": 2.546875, "learning_rate": 8.53128157956371e-05, "loss": 1.7684, "step": 5736 }, { "epoch": 0.2507539665195157, "grad_norm": 2.9375, "learning_rate": 8.530795274419654e-05, "loss": 2.55, "step": 5737 }, { "epoch": 0.2507976747235456, "grad_norm": 2.625, "learning_rate": 8.530308902644353e-05, "loss": 2.0154, "step": 5738 }, { "epoch": 0.2508413829275755, "grad_norm": 1.953125, "learning_rate": 8.529822464246984e-05, "loss": 1.9612, "step": 5739 }, { "epoch": 0.2508850911316054, "grad_norm": 2.3125, "learning_rate": 8.529335959236729e-05, "loss": 1.4551, "step": 5740 }, { "epoch": 0.2509287993356353, "grad_norm": 2.84375, "learning_rate": 8.528849387622766e-05, "loss": 1.9448, "step": 5741 }, { "epoch": 0.2509725075396652, "grad_norm": 3.078125, "learning_rate": 8.528362749414279e-05, "loss": 2.8076, "step": 5742 }, { "epoch": 0.2510162157436951, "grad_norm": 2.453125, "learning_rate": 8.527876044620453e-05, "loss": 2.5786, "step": 5743 }, { "epoch": 0.25105992394772497, "grad_norm": 2.203125, "learning_rate": 8.52738927325047e-05, "loss": 1.8251, "step": 5744 }, { "epoch": 0.2511036321517549, "grad_norm": 2.6875, "learning_rate": 8.526902435313519e-05, "loss": 2.0876, "step": 5745 }, { "epoch": 0.2511473403557848, "grad_norm": 2.875, "learning_rate": 8.526415530818785e-05, "loss": 1.8256, "step": 5746 }, { "epoch": 0.25119104855981467, "grad_norm": 2.875, "learning_rate": 8.525928559775458e-05, "loss": 1.682, "step": 5747 }, { "epoch": 0.25123475676384455, "grad_norm": 2.78125, "learning_rate": 8.525441522192727e-05, "loss": 1.7598, "step": 5748 }, { "epoch": 0.2512784649678745, "grad_norm": 2.90625, "learning_rate": 8.524954418079782e-05, "loss": 1.87, "step": 5749 }, { "epoch": 0.25132217317190436, "grad_norm": 2.515625, "learning_rate": 8.52446724744582e-05, "loss": 2.6512, "step": 5750 }, { "epoch": 0.25136588137593424, "grad_norm": 2.859375, "learning_rate": 8.52398001030003e-05, "loss": 2.5546, "step": 5751 }, { "epoch": 0.2514095895799642, "grad_norm": 1.8828125, "learning_rate": 8.523492706651607e-05, "loss": 1.8665, "step": 5752 }, { "epoch": 0.25145329778399406, "grad_norm": 2.140625, "learning_rate": 8.52300533650975e-05, "loss": 1.7692, "step": 5753 }, { "epoch": 0.25149700598802394, "grad_norm": 2.0, "learning_rate": 8.522517899883654e-05, "loss": 1.8136, "step": 5754 }, { "epoch": 0.25154071419205387, "grad_norm": 2.28125, "learning_rate": 8.522030396782518e-05, "loss": 1.8562, "step": 5755 }, { "epoch": 0.25158442239608375, "grad_norm": 4.71875, "learning_rate": 8.521542827215544e-05, "loss": 1.8953, "step": 5756 }, { "epoch": 0.25162813060011363, "grad_norm": 2.765625, "learning_rate": 8.52105519119193e-05, "loss": 1.6913, "step": 5757 }, { "epoch": 0.2516718388041435, "grad_norm": 1.9140625, "learning_rate": 8.52056748872088e-05, "loss": 1.3204, "step": 5758 }, { "epoch": 0.25171554700817345, "grad_norm": 2.171875, "learning_rate": 8.520079719811598e-05, "loss": 2.2793, "step": 5759 }, { "epoch": 0.2517592552122033, "grad_norm": 2.015625, "learning_rate": 8.519591884473288e-05, "loss": 1.7225, "step": 5760 }, { "epoch": 0.2518029634162332, "grad_norm": 2.390625, "learning_rate": 8.519103982715158e-05, "loss": 2.0289, "step": 5761 }, { "epoch": 0.25184667162026314, "grad_norm": 2.15625, "learning_rate": 8.518616014546413e-05, "loss": 1.7984, "step": 5762 }, { "epoch": 0.251890379824293, "grad_norm": 2.28125, "learning_rate": 8.518127979976262e-05, "loss": 1.9239, "step": 5763 }, { "epoch": 0.2519340880283229, "grad_norm": 6.21875, "learning_rate": 8.517639879013916e-05, "loss": 1.6648, "step": 5764 }, { "epoch": 0.25197779623235284, "grad_norm": 2.1875, "learning_rate": 8.517151711668587e-05, "loss": 1.3681, "step": 5765 }, { "epoch": 0.2520215044363827, "grad_norm": 2.234375, "learning_rate": 8.516663477949485e-05, "loss": 1.8568, "step": 5766 }, { "epoch": 0.2520652126404126, "grad_norm": 2.8125, "learning_rate": 8.516175177865827e-05, "loss": 1.7729, "step": 5767 }, { "epoch": 0.2521089208444425, "grad_norm": 3.5, "learning_rate": 8.515686811426824e-05, "loss": 1.5158, "step": 5768 }, { "epoch": 0.2521526290484724, "grad_norm": 2.796875, "learning_rate": 8.515198378641694e-05, "loss": 2.2034, "step": 5769 }, { "epoch": 0.2521963372525023, "grad_norm": 2.234375, "learning_rate": 8.514709879519653e-05, "loss": 1.7099, "step": 5770 }, { "epoch": 0.25224004545653217, "grad_norm": 1.9609375, "learning_rate": 8.514221314069923e-05, "loss": 2.1182, "step": 5771 }, { "epoch": 0.2522837536605621, "grad_norm": 2.625, "learning_rate": 8.513732682301723e-05, "loss": 2.1552, "step": 5772 }, { "epoch": 0.252327461864592, "grad_norm": 2.5, "learning_rate": 8.513243984224273e-05, "loss": 2.3363, "step": 5773 }, { "epoch": 0.25237117006862186, "grad_norm": 2.125, "learning_rate": 8.512755219846793e-05, "loss": 1.8566, "step": 5774 }, { "epoch": 0.2524148782726518, "grad_norm": 1.9375, "learning_rate": 8.512266389178511e-05, "loss": 1.7914, "step": 5775 }, { "epoch": 0.2524585864766817, "grad_norm": 1.9765625, "learning_rate": 8.511777492228651e-05, "loss": 1.6455, "step": 5776 }, { "epoch": 0.25250229468071156, "grad_norm": 2.125, "learning_rate": 8.51128852900644e-05, "loss": 1.6525, "step": 5777 }, { "epoch": 0.25254600288474144, "grad_norm": 3.078125, "learning_rate": 8.510799499521103e-05, "loss": 2.3558, "step": 5778 }, { "epoch": 0.2525897110887714, "grad_norm": 2.375, "learning_rate": 8.510310403781867e-05, "loss": 1.8693, "step": 5779 }, { "epoch": 0.25263341929280125, "grad_norm": 2.390625, "learning_rate": 8.509821241797967e-05, "loss": 1.9012, "step": 5780 }, { "epoch": 0.25267712749683113, "grad_norm": 1.984375, "learning_rate": 8.509332013578632e-05, "loss": 1.8172, "step": 5781 }, { "epoch": 0.25272083570086107, "grad_norm": 2.71875, "learning_rate": 8.508842719133094e-05, "loss": 1.3929, "step": 5782 }, { "epoch": 0.25276454390489095, "grad_norm": 2.78125, "learning_rate": 8.508353358470586e-05, "loss": 2.3934, "step": 5783 }, { "epoch": 0.25280825210892083, "grad_norm": 2.203125, "learning_rate": 8.507863931600346e-05, "loss": 1.8745, "step": 5784 }, { "epoch": 0.25285196031295076, "grad_norm": 1.859375, "learning_rate": 8.507374438531607e-05, "loss": 1.8801, "step": 5785 }, { "epoch": 0.25289566851698064, "grad_norm": 1.984375, "learning_rate": 8.506884879273608e-05, "loss": 1.8671, "step": 5786 }, { "epoch": 0.2529393767210105, "grad_norm": 2.234375, "learning_rate": 8.506395253835586e-05, "loss": 1.6834, "step": 5787 }, { "epoch": 0.2529830849250404, "grad_norm": 3.6875, "learning_rate": 8.505905562226783e-05, "loss": 2.4975, "step": 5788 }, { "epoch": 0.25302679312907034, "grad_norm": 3.125, "learning_rate": 8.50541580445644e-05, "loss": 1.4975, "step": 5789 }, { "epoch": 0.2530705013331002, "grad_norm": 2.25, "learning_rate": 8.504925980533797e-05, "loss": 2.0253, "step": 5790 }, { "epoch": 0.2531142095371301, "grad_norm": 2.375, "learning_rate": 8.504436090468103e-05, "loss": 1.482, "step": 5791 }, { "epoch": 0.25315791774116003, "grad_norm": 4.6875, "learning_rate": 8.503946134268596e-05, "loss": 2.5413, "step": 5792 }, { "epoch": 0.2532016259451899, "grad_norm": 2.75, "learning_rate": 8.503456111944529e-05, "loss": 1.794, "step": 5793 }, { "epoch": 0.2532453341492198, "grad_norm": 2.578125, "learning_rate": 8.502966023505143e-05, "loss": 1.9426, "step": 5794 }, { "epoch": 0.2532890423532497, "grad_norm": 2.15625, "learning_rate": 8.502475868959692e-05, "loss": 1.6883, "step": 5795 }, { "epoch": 0.2533327505572796, "grad_norm": 2.34375, "learning_rate": 8.501985648317423e-05, "loss": 1.847, "step": 5796 }, { "epoch": 0.2533764587613095, "grad_norm": 2.078125, "learning_rate": 8.501495361587588e-05, "loss": 1.9255, "step": 5797 }, { "epoch": 0.2534201669653394, "grad_norm": 2.171875, "learning_rate": 8.501005008779439e-05, "loss": 2.6771, "step": 5798 }, { "epoch": 0.2534638751693693, "grad_norm": 2.078125, "learning_rate": 8.50051458990223e-05, "loss": 1.9616, "step": 5799 }, { "epoch": 0.2535075833733992, "grad_norm": 2.15625, "learning_rate": 8.500024104965217e-05, "loss": 1.777, "step": 5800 }, { "epoch": 0.25355129157742906, "grad_norm": 4.46875, "learning_rate": 8.499533553977654e-05, "loss": 1.363, "step": 5801 }, { "epoch": 0.253594999781459, "grad_norm": 2.078125, "learning_rate": 8.4990429369488e-05, "loss": 1.8625, "step": 5802 }, { "epoch": 0.2536387079854889, "grad_norm": 3.53125, "learning_rate": 8.498552253887912e-05, "loss": 1.6072, "step": 5803 }, { "epoch": 0.25368241618951876, "grad_norm": 2.203125, "learning_rate": 8.498061504804253e-05, "loss": 1.8609, "step": 5804 }, { "epoch": 0.2537261243935487, "grad_norm": 2.09375, "learning_rate": 8.49757068970708e-05, "loss": 1.9611, "step": 5805 }, { "epoch": 0.25376983259757857, "grad_norm": 1.953125, "learning_rate": 8.497079808605659e-05, "loss": 1.9539, "step": 5806 }, { "epoch": 0.25381354080160845, "grad_norm": 2.296875, "learning_rate": 8.496588861509253e-05, "loss": 2.2939, "step": 5807 }, { "epoch": 0.2538572490056384, "grad_norm": 2.375, "learning_rate": 8.496097848427124e-05, "loss": 2.2952, "step": 5808 }, { "epoch": 0.25390095720966827, "grad_norm": 1.9921875, "learning_rate": 8.495606769368543e-05, "loss": 2.087, "step": 5809 }, { "epoch": 0.25394466541369815, "grad_norm": 2.265625, "learning_rate": 8.495115624342772e-05, "loss": 1.9989, "step": 5810 }, { "epoch": 0.253988373617728, "grad_norm": 2.421875, "learning_rate": 8.494624413359083e-05, "loss": 2.0572, "step": 5811 }, { "epoch": 0.25403208182175796, "grad_norm": 18.25, "learning_rate": 8.494133136426746e-05, "loss": 3.0295, "step": 5812 }, { "epoch": 0.25407579002578784, "grad_norm": 2.1875, "learning_rate": 8.493641793555032e-05, "loss": 1.7027, "step": 5813 }, { "epoch": 0.2541194982298177, "grad_norm": 2.265625, "learning_rate": 8.49315038475321e-05, "loss": 2.3696, "step": 5814 }, { "epoch": 0.25416320643384765, "grad_norm": 2.328125, "learning_rate": 8.492658910030557e-05, "loss": 1.951, "step": 5815 }, { "epoch": 0.25420691463787753, "grad_norm": 2.359375, "learning_rate": 8.492167369396349e-05, "loss": 1.4611, "step": 5816 }, { "epoch": 0.2542506228419074, "grad_norm": 2.578125, "learning_rate": 8.491675762859858e-05, "loss": 2.0407, "step": 5817 }, { "epoch": 0.25429433104593735, "grad_norm": 2.171875, "learning_rate": 8.491184090430364e-05, "loss": 2.1088, "step": 5818 }, { "epoch": 0.25433803924996723, "grad_norm": 2.109375, "learning_rate": 8.490692352117147e-05, "loss": 1.9155, "step": 5819 }, { "epoch": 0.2543817474539971, "grad_norm": 1.9609375, "learning_rate": 8.490200547929481e-05, "loss": 1.7446, "step": 5820 }, { "epoch": 0.254425455658027, "grad_norm": 2.171875, "learning_rate": 8.489708677876653e-05, "loss": 1.8726, "step": 5821 }, { "epoch": 0.2544691638620569, "grad_norm": 2.03125, "learning_rate": 8.489216741967945e-05, "loss": 2.0559, "step": 5822 }, { "epoch": 0.2545128720660868, "grad_norm": 1.953125, "learning_rate": 8.488724740212636e-05, "loss": 1.9535, "step": 5823 }, { "epoch": 0.2545565802701167, "grad_norm": 2.578125, "learning_rate": 8.488232672620015e-05, "loss": 2.1477, "step": 5824 }, { "epoch": 0.2546002884741466, "grad_norm": 2.6875, "learning_rate": 8.487740539199366e-05, "loss": 1.7686, "step": 5825 }, { "epoch": 0.2546439966781765, "grad_norm": 2.40625, "learning_rate": 8.487248339959976e-05, "loss": 1.6782, "step": 5826 }, { "epoch": 0.2546877048822064, "grad_norm": 2.03125, "learning_rate": 8.486756074911137e-05, "loss": 1.6152, "step": 5827 }, { "epoch": 0.2547314130862363, "grad_norm": 2.421875, "learning_rate": 8.486263744062134e-05, "loss": 1.6807, "step": 5828 }, { "epoch": 0.2547751212902662, "grad_norm": 2.34375, "learning_rate": 8.48577134742226e-05, "loss": 1.3611, "step": 5829 }, { "epoch": 0.2548188294942961, "grad_norm": 2.1875, "learning_rate": 8.485278885000808e-05, "loss": 2.2076, "step": 5830 }, { "epoch": 0.25486253769832595, "grad_norm": 2.375, "learning_rate": 8.484786356807071e-05, "loss": 1.8441, "step": 5831 }, { "epoch": 0.2549062459023559, "grad_norm": 2.78125, "learning_rate": 8.484293762850344e-05, "loss": 2.0972, "step": 5832 }, { "epoch": 0.25494995410638577, "grad_norm": 2.09375, "learning_rate": 8.483801103139923e-05, "loss": 1.763, "step": 5833 }, { "epoch": 0.25499366231041565, "grad_norm": 2.1875, "learning_rate": 8.483308377685104e-05, "loss": 1.9297, "step": 5834 }, { "epoch": 0.2550373705144456, "grad_norm": 3.09375, "learning_rate": 8.482815586495184e-05, "loss": 1.6853, "step": 5835 }, { "epoch": 0.25508107871847546, "grad_norm": 2.75, "learning_rate": 8.482322729579468e-05, "loss": 2.3892, "step": 5836 }, { "epoch": 0.25512478692250534, "grad_norm": 3.328125, "learning_rate": 8.481829806947252e-05, "loss": 1.6897, "step": 5837 }, { "epoch": 0.2551684951265353, "grad_norm": 3.65625, "learning_rate": 8.481336818607842e-05, "loss": 1.6136, "step": 5838 }, { "epoch": 0.25521220333056516, "grad_norm": 2.234375, "learning_rate": 8.480843764570537e-05, "loss": 2.6346, "step": 5839 }, { "epoch": 0.25525591153459504, "grad_norm": 2.609375, "learning_rate": 8.480350644844645e-05, "loss": 1.8048, "step": 5840 }, { "epoch": 0.2552996197386249, "grad_norm": 2.4375, "learning_rate": 8.479857459439471e-05, "loss": 1.9963, "step": 5841 }, { "epoch": 0.25534332794265485, "grad_norm": 1.9765625, "learning_rate": 8.479364208364323e-05, "loss": 1.7453, "step": 5842 }, { "epoch": 0.25538703614668473, "grad_norm": 2.421875, "learning_rate": 8.478870891628507e-05, "loss": 1.8611, "step": 5843 }, { "epoch": 0.2554307443507146, "grad_norm": 3.984375, "learning_rate": 8.478377509241334e-05, "loss": 1.5761, "step": 5844 }, { "epoch": 0.25547445255474455, "grad_norm": 3.359375, "learning_rate": 8.477884061212115e-05, "loss": 3.9752, "step": 5845 }, { "epoch": 0.2555181607587744, "grad_norm": 2.40625, "learning_rate": 8.477390547550162e-05, "loss": 3.0617, "step": 5846 }, { "epoch": 0.2555618689628043, "grad_norm": 2.109375, "learning_rate": 8.47689696826479e-05, "loss": 1.7095, "step": 5847 }, { "epoch": 0.25560557716683424, "grad_norm": 2.40625, "learning_rate": 8.47640332336531e-05, "loss": 2.1366, "step": 5848 }, { "epoch": 0.2556492853708641, "grad_norm": 2.71875, "learning_rate": 8.47590961286104e-05, "loss": 2.36, "step": 5849 }, { "epoch": 0.255692993574894, "grad_norm": 4.09375, "learning_rate": 8.475415836761295e-05, "loss": 1.0162, "step": 5850 }, { "epoch": 0.2557367017789239, "grad_norm": 4.25, "learning_rate": 8.474921995075398e-05, "loss": 1.045, "step": 5851 }, { "epoch": 0.2557804099829538, "grad_norm": 2.203125, "learning_rate": 8.474428087812664e-05, "loss": 2.409, "step": 5852 }, { "epoch": 0.2558241181869837, "grad_norm": 2.21875, "learning_rate": 8.473934114982416e-05, "loss": 2.0687, "step": 5853 }, { "epoch": 0.2558678263910136, "grad_norm": 2.59375, "learning_rate": 8.473440076593973e-05, "loss": 2.1305, "step": 5854 }, { "epoch": 0.2559115345950435, "grad_norm": 1.8671875, "learning_rate": 8.472945972656662e-05, "loss": 1.7336, "step": 5855 }, { "epoch": 0.2559552427990734, "grad_norm": 2.046875, "learning_rate": 8.472451803179807e-05, "loss": 1.7277, "step": 5856 }, { "epoch": 0.25599895100310327, "grad_norm": 2.3125, "learning_rate": 8.47195756817273e-05, "loss": 1.8274, "step": 5857 }, { "epoch": 0.2560426592071332, "grad_norm": 2.390625, "learning_rate": 8.471463267644761e-05, "loss": 2.0623, "step": 5858 }, { "epoch": 0.2560863674111631, "grad_norm": 2.4375, "learning_rate": 8.47096890160523e-05, "loss": 1.7609, "step": 5859 }, { "epoch": 0.25613007561519296, "grad_norm": 1.8515625, "learning_rate": 8.470474470063461e-05, "loss": 1.7962, "step": 5860 }, { "epoch": 0.25617378381922284, "grad_norm": 2.296875, "learning_rate": 8.46997997302879e-05, "loss": 2.4369, "step": 5861 }, { "epoch": 0.2562174920232528, "grad_norm": 2.625, "learning_rate": 8.469485410510545e-05, "loss": 2.3581, "step": 5862 }, { "epoch": 0.25626120022728266, "grad_norm": 2.265625, "learning_rate": 8.468990782518063e-05, "loss": 2.239, "step": 5863 }, { "epoch": 0.25630490843131254, "grad_norm": 2.5, "learning_rate": 8.468496089060674e-05, "loss": 1.8326, "step": 5864 }, { "epoch": 0.2563486166353425, "grad_norm": 2.203125, "learning_rate": 8.468001330147714e-05, "loss": 2.0595, "step": 5865 }, { "epoch": 0.25639232483937235, "grad_norm": 2.359375, "learning_rate": 8.467506505788525e-05, "loss": 2.3059, "step": 5866 }, { "epoch": 0.25643603304340223, "grad_norm": 2.25, "learning_rate": 8.46701161599244e-05, "loss": 1.7416, "step": 5867 }, { "epoch": 0.25647974124743217, "grad_norm": 2.15625, "learning_rate": 8.4665166607688e-05, "loss": 1.5773, "step": 5868 }, { "epoch": 0.25652344945146205, "grad_norm": 2.078125, "learning_rate": 8.466021640126945e-05, "loss": 2.0622, "step": 5869 }, { "epoch": 0.25656715765549193, "grad_norm": 2.15625, "learning_rate": 8.465526554076217e-05, "loss": 1.6875, "step": 5870 }, { "epoch": 0.2566108658595218, "grad_norm": 2.15625, "learning_rate": 8.46503140262596e-05, "loss": 1.9292, "step": 5871 }, { "epoch": 0.25665457406355174, "grad_norm": 2.703125, "learning_rate": 8.464536185785516e-05, "loss": 1.3767, "step": 5872 }, { "epoch": 0.2566982822675816, "grad_norm": 2.8125, "learning_rate": 8.464040903564234e-05, "loss": 2.645, "step": 5873 }, { "epoch": 0.2567419904716115, "grad_norm": 2.375, "learning_rate": 8.463545555971458e-05, "loss": 1.9756, "step": 5874 }, { "epoch": 0.25678569867564144, "grad_norm": 2.234375, "learning_rate": 8.463050143016533e-05, "loss": 1.9329, "step": 5875 }, { "epoch": 0.2568294068796713, "grad_norm": 2.75, "learning_rate": 8.462554664708815e-05, "loss": 2.5044, "step": 5876 }, { "epoch": 0.2568731150837012, "grad_norm": 1.9296875, "learning_rate": 8.462059121057651e-05, "loss": 2.1592, "step": 5877 }, { "epoch": 0.25691682328773113, "grad_norm": 1.8828125, "learning_rate": 8.461563512072392e-05, "loss": 1.6989, "step": 5878 }, { "epoch": 0.256960531491761, "grad_norm": 2.125, "learning_rate": 8.461067837762391e-05, "loss": 1.9492, "step": 5879 }, { "epoch": 0.2570042396957909, "grad_norm": 1.9375, "learning_rate": 8.460572098137e-05, "loss": 1.9093, "step": 5880 }, { "epoch": 0.2570479478998208, "grad_norm": 3.578125, "learning_rate": 8.460076293205581e-05, "loss": 1.6803, "step": 5881 }, { "epoch": 0.2570916561038507, "grad_norm": 2.171875, "learning_rate": 8.459580422977484e-05, "loss": 2.7429, "step": 5882 }, { "epoch": 0.2571353643078806, "grad_norm": 1.8515625, "learning_rate": 8.459084487462072e-05, "loss": 1.4708, "step": 5883 }, { "epoch": 0.25717907251191047, "grad_norm": 2.0625, "learning_rate": 8.4585884866687e-05, "loss": 2.23, "step": 5884 }, { "epoch": 0.2572227807159404, "grad_norm": 2.75, "learning_rate": 8.458092420606727e-05, "loss": 1.7679, "step": 5885 }, { "epoch": 0.2572664889199703, "grad_norm": 2.0, "learning_rate": 8.45759628928552e-05, "loss": 1.1209, "step": 5886 }, { "epoch": 0.25731019712400016, "grad_norm": 2.359375, "learning_rate": 8.457100092714438e-05, "loss": 2.1544, "step": 5887 }, { "epoch": 0.2573539053280301, "grad_norm": 2.1875, "learning_rate": 8.456603830902845e-05, "loss": 1.6905, "step": 5888 }, { "epoch": 0.25739761353206, "grad_norm": 3.125, "learning_rate": 8.456107503860107e-05, "loss": 1.5867, "step": 5889 }, { "epoch": 0.25744132173608986, "grad_norm": 2.125, "learning_rate": 8.455611111595591e-05, "loss": 2.0275, "step": 5890 }, { "epoch": 0.25748502994011974, "grad_norm": 2.765625, "learning_rate": 8.455114654118663e-05, "loss": 1.8815, "step": 5891 }, { "epoch": 0.25752873814414967, "grad_norm": 2.21875, "learning_rate": 8.454618131438693e-05, "loss": 1.4841, "step": 5892 }, { "epoch": 0.25757244634817955, "grad_norm": 1.9375, "learning_rate": 8.454121543565052e-05, "loss": 1.9968, "step": 5893 }, { "epoch": 0.25761615455220943, "grad_norm": 4.6875, "learning_rate": 8.453624890507109e-05, "loss": 1.7452, "step": 5894 }, { "epoch": 0.25765986275623937, "grad_norm": 3.15625, "learning_rate": 8.453128172274238e-05, "loss": 2.609, "step": 5895 }, { "epoch": 0.25770357096026925, "grad_norm": 2.0, "learning_rate": 8.452631388875814e-05, "loss": 1.4952, "step": 5896 }, { "epoch": 0.2577472791642991, "grad_norm": 2.9375, "learning_rate": 8.452134540321208e-05, "loss": 1.5478, "step": 5897 }, { "epoch": 0.25779098736832906, "grad_norm": 2.671875, "learning_rate": 8.451637626619801e-05, "loss": 2.7189, "step": 5898 }, { "epoch": 0.25783469557235894, "grad_norm": 2.265625, "learning_rate": 8.451140647780967e-05, "loss": 2.0582, "step": 5899 }, { "epoch": 0.2578784037763888, "grad_norm": 1.9453125, "learning_rate": 8.450643603814086e-05, "loss": 1.5491, "step": 5900 }, { "epoch": 0.2579221119804187, "grad_norm": 2.109375, "learning_rate": 8.45014649472854e-05, "loss": 1.8259, "step": 5901 }, { "epoch": 0.25796582018444864, "grad_norm": 2.6875, "learning_rate": 8.449649320533706e-05, "loss": 2.0901, "step": 5902 }, { "epoch": 0.2580095283884785, "grad_norm": 2.5, "learning_rate": 8.44915208123897e-05, "loss": 1.1867, "step": 5903 }, { "epoch": 0.2580532365925084, "grad_norm": 2.125, "learning_rate": 8.448654776853714e-05, "loss": 2.0911, "step": 5904 }, { "epoch": 0.25809694479653833, "grad_norm": 2.171875, "learning_rate": 8.448157407387323e-05, "loss": 1.4067, "step": 5905 }, { "epoch": 0.2581406530005682, "grad_norm": 2.4375, "learning_rate": 8.447659972849183e-05, "loss": 1.938, "step": 5906 }, { "epoch": 0.2581843612045981, "grad_norm": 2.25, "learning_rate": 8.447162473248682e-05, "loss": 1.7084, "step": 5907 }, { "epoch": 0.258228069408628, "grad_norm": 2.578125, "learning_rate": 8.446664908595207e-05, "loss": 2.4166, "step": 5908 }, { "epoch": 0.2582717776126579, "grad_norm": 1.9453125, "learning_rate": 8.44616727889815e-05, "loss": 2.0318, "step": 5909 }, { "epoch": 0.2583154858166878, "grad_norm": 2.1875, "learning_rate": 8.4456695841669e-05, "loss": 1.4643, "step": 5910 }, { "epoch": 0.25835919402071766, "grad_norm": 2.015625, "learning_rate": 8.445171824410848e-05, "loss": 1.8736, "step": 5911 }, { "epoch": 0.2584029022247476, "grad_norm": 2.453125, "learning_rate": 8.444673999639393e-05, "loss": 2.0095, "step": 5912 }, { "epoch": 0.2584466104287775, "grad_norm": 3.875, "learning_rate": 8.444176109861926e-05, "loss": 1.1565, "step": 5913 }, { "epoch": 0.25849031863280736, "grad_norm": 2.265625, "learning_rate": 8.44367815508784e-05, "loss": 2.3151, "step": 5914 }, { "epoch": 0.2585340268368373, "grad_norm": 3.296875, "learning_rate": 8.443180135326538e-05, "loss": 2.2622, "step": 5915 }, { "epoch": 0.2585777350408672, "grad_norm": 2.6875, "learning_rate": 8.442682050587414e-05, "loss": 1.0598, "step": 5916 }, { "epoch": 0.25862144324489705, "grad_norm": 2.53125, "learning_rate": 8.44218390087987e-05, "loss": 1.5134, "step": 5917 }, { "epoch": 0.258665151448927, "grad_norm": 3.015625, "learning_rate": 8.441685686213306e-05, "loss": 1.896, "step": 5918 }, { "epoch": 0.25870885965295687, "grad_norm": 2.25, "learning_rate": 8.441187406597123e-05, "loss": 3.0446, "step": 5919 }, { "epoch": 0.25875256785698675, "grad_norm": 2.28125, "learning_rate": 8.440689062040725e-05, "loss": 1.9769, "step": 5920 }, { "epoch": 0.25879627606101663, "grad_norm": 2.734375, "learning_rate": 8.440190652553517e-05, "loss": 2.0879, "step": 5921 }, { "epoch": 0.25883998426504656, "grad_norm": 2.40625, "learning_rate": 8.439692178144906e-05, "loss": 2.3422, "step": 5922 }, { "epoch": 0.25888369246907644, "grad_norm": 3.859375, "learning_rate": 8.439193638824296e-05, "loss": 2.7385, "step": 5923 }, { "epoch": 0.2589274006731063, "grad_norm": 2.34375, "learning_rate": 8.438695034601097e-05, "loss": 1.7118, "step": 5924 }, { "epoch": 0.25897110887713626, "grad_norm": 2.9375, "learning_rate": 8.438196365484718e-05, "loss": 2.2815, "step": 5925 }, { "epoch": 0.25901481708116614, "grad_norm": 2.265625, "learning_rate": 8.43769763148457e-05, "loss": 1.369, "step": 5926 }, { "epoch": 0.259058525285196, "grad_norm": 2.984375, "learning_rate": 8.437198832610063e-05, "loss": 2.1702, "step": 5927 }, { "epoch": 0.25910223348922595, "grad_norm": 2.28125, "learning_rate": 8.436699968870612e-05, "loss": 1.6627, "step": 5928 }, { "epoch": 0.25914594169325583, "grad_norm": 2.1875, "learning_rate": 8.43620104027563e-05, "loss": 1.8602, "step": 5929 }, { "epoch": 0.2591896498972857, "grad_norm": 2.015625, "learning_rate": 8.435702046834535e-05, "loss": 1.3718, "step": 5930 }, { "epoch": 0.2592333581013156, "grad_norm": 5.0625, "learning_rate": 8.435202988556739e-05, "loss": 2.0221, "step": 5931 }, { "epoch": 0.2592770663053455, "grad_norm": 2.828125, "learning_rate": 8.434703865451665e-05, "loss": 2.0702, "step": 5932 }, { "epoch": 0.2593207745093754, "grad_norm": 2.3125, "learning_rate": 8.43420467752873e-05, "loss": 1.816, "step": 5933 }, { "epoch": 0.2593644827134053, "grad_norm": 2.5, "learning_rate": 8.433705424797355e-05, "loss": 2.138, "step": 5934 }, { "epoch": 0.2594081909174352, "grad_norm": 2.609375, "learning_rate": 8.43320610726696e-05, "loss": 2.6959, "step": 5935 }, { "epoch": 0.2594518991214651, "grad_norm": 1.9296875, "learning_rate": 8.43270672494697e-05, "loss": 1.9112, "step": 5936 }, { "epoch": 0.259495607325495, "grad_norm": 1.9765625, "learning_rate": 8.432207277846806e-05, "loss": 1.7615, "step": 5937 }, { "epoch": 0.2595393155295249, "grad_norm": 2.375, "learning_rate": 8.431707765975898e-05, "loss": 2.2894, "step": 5938 }, { "epoch": 0.2595830237335548, "grad_norm": 1.90625, "learning_rate": 8.43120818934367e-05, "loss": 1.6917, "step": 5939 }, { "epoch": 0.2596267319375847, "grad_norm": 2.1875, "learning_rate": 8.430708547959547e-05, "loss": 1.6675, "step": 5940 }, { "epoch": 0.25967044014161456, "grad_norm": 2.46875, "learning_rate": 8.43020884183296e-05, "loss": 2.4967, "step": 5941 }, { "epoch": 0.2597141483456445, "grad_norm": 2.34375, "learning_rate": 8.429709070973342e-05, "loss": 2.0623, "step": 5942 }, { "epoch": 0.25975785654967437, "grad_norm": 2.609375, "learning_rate": 8.429209235390123e-05, "loss": 1.6179, "step": 5943 }, { "epoch": 0.25980156475370425, "grad_norm": 2.0625, "learning_rate": 8.428709335092733e-05, "loss": 1.2945, "step": 5944 }, { "epoch": 0.2598452729577342, "grad_norm": 2.484375, "learning_rate": 8.428209370090607e-05, "loss": 2.2274, "step": 5945 }, { "epoch": 0.25988898116176407, "grad_norm": 3.125, "learning_rate": 8.427709340393181e-05, "loss": 1.5624, "step": 5946 }, { "epoch": 0.25993268936579395, "grad_norm": 2.625, "learning_rate": 8.427209246009893e-05, "loss": 1.8742, "step": 5947 }, { "epoch": 0.2599763975698239, "grad_norm": 2.21875, "learning_rate": 8.426709086950178e-05, "loss": 2.013, "step": 5948 }, { "epoch": 0.26002010577385376, "grad_norm": 2.203125, "learning_rate": 8.426208863223473e-05, "loss": 2.0844, "step": 5949 }, { "epoch": 0.26006381397788364, "grad_norm": 2.25, "learning_rate": 8.425708574839222e-05, "loss": 1.6669, "step": 5950 }, { "epoch": 0.2601075221819135, "grad_norm": 2.359375, "learning_rate": 8.425208221806862e-05, "loss": 1.0572, "step": 5951 }, { "epoch": 0.26015123038594345, "grad_norm": 2.21875, "learning_rate": 8.42470780413584e-05, "loss": 1.8001, "step": 5952 }, { "epoch": 0.26019493858997333, "grad_norm": 1.9140625, "learning_rate": 8.424207321835598e-05, "loss": 1.5684, "step": 5953 }, { "epoch": 0.2602386467940032, "grad_norm": 2.03125, "learning_rate": 8.423706774915579e-05, "loss": 1.7722, "step": 5954 }, { "epoch": 0.26028235499803315, "grad_norm": 2.703125, "learning_rate": 8.42320616338523e-05, "loss": 1.5773, "step": 5955 }, { "epoch": 0.26032606320206303, "grad_norm": 1.921875, "learning_rate": 8.422705487253996e-05, "loss": 1.4819, "step": 5956 }, { "epoch": 0.2603697714060929, "grad_norm": 2.328125, "learning_rate": 8.422204746531332e-05, "loss": 2.35, "step": 5957 }, { "epoch": 0.26041347961012284, "grad_norm": 2.78125, "learning_rate": 8.421703941226682e-05, "loss": 1.896, "step": 5958 }, { "epoch": 0.2604571878141527, "grad_norm": 2.0625, "learning_rate": 8.421203071349498e-05, "loss": 1.5529, "step": 5959 }, { "epoch": 0.2605008960181826, "grad_norm": 2.046875, "learning_rate": 8.420702136909234e-05, "loss": 1.3801, "step": 5960 }, { "epoch": 0.2605446042222125, "grad_norm": 2.0625, "learning_rate": 8.420201137915342e-05, "loss": 1.6023, "step": 5961 }, { "epoch": 0.2605883124262424, "grad_norm": 2.03125, "learning_rate": 8.419700074377275e-05, "loss": 1.5803, "step": 5962 }, { "epoch": 0.2606320206302723, "grad_norm": 2.734375, "learning_rate": 8.419198946304491e-05, "loss": 1.7009, "step": 5963 }, { "epoch": 0.2606757288343022, "grad_norm": 2.328125, "learning_rate": 8.418697753706449e-05, "loss": 2.4918, "step": 5964 }, { "epoch": 0.2607194370383321, "grad_norm": 2.28125, "learning_rate": 8.418196496592603e-05, "loss": 2.4145, "step": 5965 }, { "epoch": 0.260763145242362, "grad_norm": 3.609375, "learning_rate": 8.417695174972413e-05, "loss": 2.5977, "step": 5966 }, { "epoch": 0.2608068534463919, "grad_norm": 2.59375, "learning_rate": 8.41719378885534e-05, "loss": 2.1141, "step": 5967 }, { "epoch": 0.2608505616504218, "grad_norm": 2.125, "learning_rate": 8.416692338250848e-05, "loss": 2.1758, "step": 5968 }, { "epoch": 0.2608942698544517, "grad_norm": 2.046875, "learning_rate": 8.416190823168402e-05, "loss": 1.8151, "step": 5969 }, { "epoch": 0.26093797805848157, "grad_norm": 1.953125, "learning_rate": 8.41568924361746e-05, "loss": 1.7852, "step": 5970 }, { "epoch": 0.26098168626251145, "grad_norm": 2.71875, "learning_rate": 8.41518759960749e-05, "loss": 1.7464, "step": 5971 }, { "epoch": 0.2610253944665414, "grad_norm": 2.90625, "learning_rate": 8.41468589114796e-05, "loss": 2.5288, "step": 5972 }, { "epoch": 0.26106910267057126, "grad_norm": 4.34375, "learning_rate": 8.414184118248339e-05, "loss": 1.4174, "step": 5973 }, { "epoch": 0.26111281087460114, "grad_norm": 2.75, "learning_rate": 8.413682280918093e-05, "loss": 2.227, "step": 5974 }, { "epoch": 0.2611565190786311, "grad_norm": 2.234375, "learning_rate": 8.413180379166694e-05, "loss": 2.214, "step": 5975 }, { "epoch": 0.26120022728266096, "grad_norm": 2.390625, "learning_rate": 8.412678413003614e-05, "loss": 1.6059, "step": 5976 }, { "epoch": 0.26124393548669084, "grad_norm": 3.140625, "learning_rate": 8.412176382438326e-05, "loss": 1.8957, "step": 5977 }, { "epoch": 0.26128764369072077, "grad_norm": 2.03125, "learning_rate": 8.411674287480303e-05, "loss": 1.9365, "step": 5978 }, { "epoch": 0.26133135189475065, "grad_norm": 2.4375, "learning_rate": 8.411172128139021e-05, "loss": 2.0909, "step": 5979 }, { "epoch": 0.26137506009878053, "grad_norm": 2.484375, "learning_rate": 8.410669904423955e-05, "loss": 1.919, "step": 5980 }, { "epoch": 0.2614187683028104, "grad_norm": 2.53125, "learning_rate": 8.410167616344586e-05, "loss": 2.6475, "step": 5981 }, { "epoch": 0.26146247650684035, "grad_norm": 3.046875, "learning_rate": 8.40966526391039e-05, "loss": 1.7789, "step": 5982 }, { "epoch": 0.2615061847108702, "grad_norm": 2.6875, "learning_rate": 8.409162847130847e-05, "loss": 1.9418, "step": 5983 }, { "epoch": 0.2615498929149001, "grad_norm": 2.140625, "learning_rate": 8.40866036601544e-05, "loss": 1.7559, "step": 5984 }, { "epoch": 0.26159360111893004, "grad_norm": 2.1875, "learning_rate": 8.40815782057365e-05, "loss": 2.1114, "step": 5985 }, { "epoch": 0.2616373093229599, "grad_norm": 1.875, "learning_rate": 8.407655210814962e-05, "loss": 1.7708, "step": 5986 }, { "epoch": 0.2616810175269898, "grad_norm": 2.0625, "learning_rate": 8.407152536748861e-05, "loss": 1.7084, "step": 5987 }, { "epoch": 0.26172472573101974, "grad_norm": 2.3125, "learning_rate": 8.406649798384834e-05, "loss": 1.8533, "step": 5988 }, { "epoch": 0.2617684339350496, "grad_norm": 2.21875, "learning_rate": 8.406146995732365e-05, "loss": 1.8596, "step": 5989 }, { "epoch": 0.2618121421390795, "grad_norm": 1.828125, "learning_rate": 8.405644128800945e-05, "loss": 1.7747, "step": 5990 }, { "epoch": 0.2618558503431094, "grad_norm": 2.71875, "learning_rate": 8.405141197600064e-05, "loss": 1.559, "step": 5991 }, { "epoch": 0.2618995585471393, "grad_norm": 2.375, "learning_rate": 8.404638202139213e-05, "loss": 1.9718, "step": 5992 }, { "epoch": 0.2619432667511692, "grad_norm": 2.734375, "learning_rate": 8.404135142427884e-05, "loss": 1.9266, "step": 5993 }, { "epoch": 0.26198697495519907, "grad_norm": 2.9375, "learning_rate": 8.40363201847557e-05, "loss": 1.9553, "step": 5994 }, { "epoch": 0.262030683159229, "grad_norm": 2.71875, "learning_rate": 8.403128830291767e-05, "loss": 2.5839, "step": 5995 }, { "epoch": 0.2620743913632589, "grad_norm": 2.140625, "learning_rate": 8.402625577885968e-05, "loss": 2.0484, "step": 5996 }, { "epoch": 0.26211809956728876, "grad_norm": 2.3125, "learning_rate": 8.402122261267673e-05, "loss": 1.694, "step": 5997 }, { "epoch": 0.2621618077713187, "grad_norm": 2.625, "learning_rate": 8.40161888044638e-05, "loss": 2.2003, "step": 5998 }, { "epoch": 0.2622055159753486, "grad_norm": 2.546875, "learning_rate": 8.401115435431587e-05, "loss": 2.1018, "step": 5999 }, { "epoch": 0.26224922417937846, "grad_norm": 2.203125, "learning_rate": 8.400611926232797e-05, "loss": 2.1926, "step": 6000 }, { "epoch": 0.26229293238340834, "grad_norm": 2.109375, "learning_rate": 8.400108352859508e-05, "loss": 2.101, "step": 6001 }, { "epoch": 0.2623366405874383, "grad_norm": 2.078125, "learning_rate": 8.399604715321227e-05, "loss": 1.8448, "step": 6002 }, { "epoch": 0.26238034879146815, "grad_norm": 2.234375, "learning_rate": 8.399101013627458e-05, "loss": 2.1145, "step": 6003 }, { "epoch": 0.26242405699549803, "grad_norm": 2.125, "learning_rate": 8.398597247787705e-05, "loss": 1.993, "step": 6004 }, { "epoch": 0.26246776519952797, "grad_norm": 1.984375, "learning_rate": 8.398093417811477e-05, "loss": 1.7932, "step": 6005 }, { "epoch": 0.26251147340355785, "grad_norm": 1.703125, "learning_rate": 8.397589523708278e-05, "loss": 1.4415, "step": 6006 }, { "epoch": 0.26255518160758773, "grad_norm": 2.0625, "learning_rate": 8.397085565487621e-05, "loss": 2.1253, "step": 6007 }, { "epoch": 0.26259888981161766, "grad_norm": 2.28125, "learning_rate": 8.396581543159017e-05, "loss": 2.4571, "step": 6008 }, { "epoch": 0.26264259801564754, "grad_norm": 2.953125, "learning_rate": 8.396077456731974e-05, "loss": 1.5223, "step": 6009 }, { "epoch": 0.2626863062196774, "grad_norm": 1.96875, "learning_rate": 8.395573306216005e-05, "loss": 1.9146, "step": 6010 }, { "epoch": 0.2627300144237073, "grad_norm": 2.15625, "learning_rate": 8.395069091620628e-05, "loss": 2.2909, "step": 6011 }, { "epoch": 0.26277372262773724, "grad_norm": 2.078125, "learning_rate": 8.394564812955355e-05, "loss": 1.7254, "step": 6012 }, { "epoch": 0.2628174308317671, "grad_norm": 1.875, "learning_rate": 8.394060470229704e-05, "loss": 2.082, "step": 6013 }, { "epoch": 0.262861139035797, "grad_norm": 1.984375, "learning_rate": 8.393556063453192e-05, "loss": 1.4722, "step": 6014 }, { "epoch": 0.26290484723982693, "grad_norm": 2.046875, "learning_rate": 8.393051592635337e-05, "loss": 1.8483, "step": 6015 }, { "epoch": 0.2629485554438568, "grad_norm": 2.015625, "learning_rate": 8.392547057785661e-05, "loss": 1.9085, "step": 6016 }, { "epoch": 0.2629922636478867, "grad_norm": 2.5, "learning_rate": 8.392042458913685e-05, "loss": 2.4344, "step": 6017 }, { "epoch": 0.26303597185191663, "grad_norm": 2.46875, "learning_rate": 8.39153779602893e-05, "loss": 2.4289, "step": 6018 }, { "epoch": 0.2630796800559465, "grad_norm": 2.28125, "learning_rate": 8.391033069140921e-05, "loss": 2.3144, "step": 6019 }, { "epoch": 0.2631233882599764, "grad_norm": 1.9140625, "learning_rate": 8.390528278259182e-05, "loss": 1.7527, "step": 6020 }, { "epoch": 0.26316709646400627, "grad_norm": 2.28125, "learning_rate": 8.39002342339324e-05, "loss": 2.7173, "step": 6021 }, { "epoch": 0.2632108046680362, "grad_norm": 2.03125, "learning_rate": 8.389518504552622e-05, "loss": 1.7396, "step": 6022 }, { "epoch": 0.2632545128720661, "grad_norm": 2.625, "learning_rate": 8.389013521746857e-05, "loss": 2.6851, "step": 6023 }, { "epoch": 0.26329822107609596, "grad_norm": 2.453125, "learning_rate": 8.388508474985474e-05, "loss": 1.8855, "step": 6024 }, { "epoch": 0.2633419292801259, "grad_norm": 2.8125, "learning_rate": 8.388003364278005e-05, "loss": 1.9291, "step": 6025 }, { "epoch": 0.2633856374841558, "grad_norm": 2.359375, "learning_rate": 8.387498189633979e-05, "loss": 1.743, "step": 6026 }, { "epoch": 0.26342934568818566, "grad_norm": 3.40625, "learning_rate": 8.386992951062935e-05, "loss": 2.0112, "step": 6027 }, { "epoch": 0.2634730538922156, "grad_norm": 1.9765625, "learning_rate": 8.386487648574403e-05, "loss": 1.7297, "step": 6028 }, { "epoch": 0.26351676209624547, "grad_norm": 1.9140625, "learning_rate": 8.385982282177922e-05, "loss": 1.628, "step": 6029 }, { "epoch": 0.26356047030027535, "grad_norm": 2.265625, "learning_rate": 8.385476851883025e-05, "loss": 1.2578, "step": 6030 }, { "epoch": 0.26360417850430523, "grad_norm": 2.671875, "learning_rate": 8.384971357699254e-05, "loss": 2.0018, "step": 6031 }, { "epoch": 0.26364788670833517, "grad_norm": 3.015625, "learning_rate": 8.384465799636145e-05, "loss": 1.8163, "step": 6032 }, { "epoch": 0.26369159491236505, "grad_norm": 2.25, "learning_rate": 8.383960177703243e-05, "loss": 1.2395, "step": 6033 }, { "epoch": 0.2637353031163949, "grad_norm": 2.125, "learning_rate": 8.383454491910086e-05, "loss": 1.9073, "step": 6034 }, { "epoch": 0.26377901132042486, "grad_norm": 2.84375, "learning_rate": 8.38294874226622e-05, "loss": 1.8201, "step": 6035 }, { "epoch": 0.26382271952445474, "grad_norm": 2.5625, "learning_rate": 8.382442928781184e-05, "loss": 2.0795, "step": 6036 }, { "epoch": 0.2638664277284846, "grad_norm": 2.328125, "learning_rate": 8.38193705146453e-05, "loss": 1.7793, "step": 6037 }, { "epoch": 0.26391013593251456, "grad_norm": 2.0, "learning_rate": 8.3814311103258e-05, "loss": 1.4628, "step": 6038 }, { "epoch": 0.26395384413654444, "grad_norm": 2.0, "learning_rate": 8.380925105374544e-05, "loss": 1.5489, "step": 6039 }, { "epoch": 0.2639975523405743, "grad_norm": 2.21875, "learning_rate": 8.380419036620312e-05, "loss": 1.9486, "step": 6040 }, { "epoch": 0.2640412605446042, "grad_norm": 2.0, "learning_rate": 8.379912904072651e-05, "loss": 1.6631, "step": 6041 }, { "epoch": 0.26408496874863413, "grad_norm": 2.265625, "learning_rate": 8.379406707741115e-05, "loss": 2.0659, "step": 6042 }, { "epoch": 0.264128676952664, "grad_norm": 2.203125, "learning_rate": 8.378900447635256e-05, "loss": 1.9999, "step": 6043 }, { "epoch": 0.2641723851566939, "grad_norm": 2.234375, "learning_rate": 8.378394123764628e-05, "loss": 2.2885, "step": 6044 }, { "epoch": 0.2642160933607238, "grad_norm": 2.015625, "learning_rate": 8.377887736138786e-05, "loss": 1.6849, "step": 6045 }, { "epoch": 0.2642598015647537, "grad_norm": 2.03125, "learning_rate": 8.377381284767285e-05, "loss": 1.595, "step": 6046 }, { "epoch": 0.2643035097687836, "grad_norm": 2.3125, "learning_rate": 8.376874769659684e-05, "loss": 1.8376, "step": 6047 }, { "epoch": 0.2643472179728135, "grad_norm": 1.8828125, "learning_rate": 8.376368190825541e-05, "loss": 1.3498, "step": 6048 }, { "epoch": 0.2643909261768434, "grad_norm": 1.921875, "learning_rate": 8.375861548274418e-05, "loss": 1.5076, "step": 6049 }, { "epoch": 0.2644346343808733, "grad_norm": 2.296875, "learning_rate": 8.375354842015873e-05, "loss": 1.6232, "step": 6050 }, { "epoch": 0.26447834258490316, "grad_norm": 2.421875, "learning_rate": 8.374848072059469e-05, "loss": 1.8066, "step": 6051 }, { "epoch": 0.2645220507889331, "grad_norm": 3.21875, "learning_rate": 8.374341238414769e-05, "loss": 1.2586, "step": 6052 }, { "epoch": 0.264565758992963, "grad_norm": 3.609375, "learning_rate": 8.37383434109134e-05, "loss": 1.9279, "step": 6053 }, { "epoch": 0.26460946719699285, "grad_norm": 4.375, "learning_rate": 8.373327380098748e-05, "loss": 1.6612, "step": 6054 }, { "epoch": 0.2646531754010228, "grad_norm": 2.234375, "learning_rate": 8.372820355446558e-05, "loss": 1.921, "step": 6055 }, { "epoch": 0.26469688360505267, "grad_norm": 2.25, "learning_rate": 8.372313267144338e-05, "loss": 2.0424, "step": 6056 }, { "epoch": 0.26474059180908255, "grad_norm": 2.234375, "learning_rate": 8.37180611520166e-05, "loss": 2.2897, "step": 6057 }, { "epoch": 0.2647843000131125, "grad_norm": 2.15625, "learning_rate": 8.371298899628091e-05, "loss": 1.8176, "step": 6058 }, { "epoch": 0.26482800821714236, "grad_norm": 2.34375, "learning_rate": 8.370791620433206e-05, "loss": 1.5037, "step": 6059 }, { "epoch": 0.26487171642117224, "grad_norm": 2.359375, "learning_rate": 8.370284277626577e-05, "loss": 2.4598, "step": 6060 }, { "epoch": 0.2649154246252021, "grad_norm": 2.78125, "learning_rate": 8.369776871217781e-05, "loss": 1.8672, "step": 6061 }, { "epoch": 0.26495913282923206, "grad_norm": 2.1875, "learning_rate": 8.369269401216387e-05, "loss": 2.3062, "step": 6062 }, { "epoch": 0.26500284103326194, "grad_norm": 3.109375, "learning_rate": 8.368761867631978e-05, "loss": 2.6771, "step": 6063 }, { "epoch": 0.2650465492372918, "grad_norm": 2.484375, "learning_rate": 8.368254270474128e-05, "loss": 2.3499, "step": 6064 }, { "epoch": 0.26509025744132175, "grad_norm": 2.3125, "learning_rate": 8.367746609752419e-05, "loss": 2.033, "step": 6065 }, { "epoch": 0.26513396564535163, "grad_norm": 2.515625, "learning_rate": 8.36723888547643e-05, "loss": 1.5885, "step": 6066 }, { "epoch": 0.2651776738493815, "grad_norm": 3.09375, "learning_rate": 8.366731097655742e-05, "loss": 2.0933, "step": 6067 }, { "epoch": 0.26522138205341145, "grad_norm": 2.03125, "learning_rate": 8.366223246299938e-05, "loss": 1.838, "step": 6068 }, { "epoch": 0.2652650902574413, "grad_norm": 2.46875, "learning_rate": 8.365715331418602e-05, "loss": 2.1014, "step": 6069 }, { "epoch": 0.2653087984614712, "grad_norm": 2.203125, "learning_rate": 8.36520735302132e-05, "loss": 1.5907, "step": 6070 }, { "epoch": 0.26535250666550114, "grad_norm": 2.015625, "learning_rate": 8.364699311117677e-05, "loss": 1.9376, "step": 6071 }, { "epoch": 0.265396214869531, "grad_norm": 1.890625, "learning_rate": 8.36419120571726e-05, "loss": 1.7767, "step": 6072 }, { "epoch": 0.2654399230735609, "grad_norm": 2.6875, "learning_rate": 8.36368303682966e-05, "loss": 1.4563, "step": 6073 }, { "epoch": 0.2654836312775908, "grad_norm": 12.125, "learning_rate": 8.363174804464465e-05, "loss": 2.3386, "step": 6074 }, { "epoch": 0.2655273394816207, "grad_norm": 2.703125, "learning_rate": 8.362666508631264e-05, "loss": 1.813, "step": 6075 }, { "epoch": 0.2655710476856506, "grad_norm": 2.59375, "learning_rate": 8.362158149339656e-05, "loss": 1.9771, "step": 6076 }, { "epoch": 0.2656147558896805, "grad_norm": 2.640625, "learning_rate": 8.361649726599228e-05, "loss": 2.1627, "step": 6077 }, { "epoch": 0.2656584640937104, "grad_norm": 2.125, "learning_rate": 8.361141240419578e-05, "loss": 1.6052, "step": 6078 }, { "epoch": 0.2657021722977403, "grad_norm": 2.140625, "learning_rate": 8.3606326908103e-05, "loss": 2.0792, "step": 6079 }, { "epoch": 0.26574588050177017, "grad_norm": 1.9921875, "learning_rate": 8.360124077780994e-05, "loss": 1.6047, "step": 6080 }, { "epoch": 0.2657895887058001, "grad_norm": 2.25, "learning_rate": 8.359615401341254e-05, "loss": 2.0099, "step": 6081 }, { "epoch": 0.26583329690983, "grad_norm": 2.109375, "learning_rate": 8.359106661500683e-05, "loss": 1.9349, "step": 6082 }, { "epoch": 0.26587700511385987, "grad_norm": 3.09375, "learning_rate": 8.358597858268878e-05, "loss": 1.8877, "step": 6083 }, { "epoch": 0.26592071331788975, "grad_norm": 2.0, "learning_rate": 8.358088991655447e-05, "loss": 1.7101, "step": 6084 }, { "epoch": 0.2659644215219197, "grad_norm": 2.359375, "learning_rate": 8.357580061669985e-05, "loss": 1.8284, "step": 6085 }, { "epoch": 0.26600812972594956, "grad_norm": 2.5, "learning_rate": 8.357071068322104e-05, "loss": 2.4833, "step": 6086 }, { "epoch": 0.26605183792997944, "grad_norm": 2.125, "learning_rate": 8.356562011621404e-05, "loss": 1.9072, "step": 6087 }, { "epoch": 0.2660955461340094, "grad_norm": 2.09375, "learning_rate": 8.356052891577494e-05, "loss": 1.7125, "step": 6088 }, { "epoch": 0.26613925433803926, "grad_norm": 1.8515625, "learning_rate": 8.355543708199982e-05, "loss": 1.396, "step": 6089 }, { "epoch": 0.26618296254206913, "grad_norm": 1.9140625, "learning_rate": 8.355034461498477e-05, "loss": 1.6723, "step": 6090 }, { "epoch": 0.26622667074609907, "grad_norm": 2.28125, "learning_rate": 8.354525151482587e-05, "loss": 1.8009, "step": 6091 }, { "epoch": 0.26627037895012895, "grad_norm": 2.453125, "learning_rate": 8.354015778161925e-05, "loss": 2.198, "step": 6092 }, { "epoch": 0.26631408715415883, "grad_norm": 2.03125, "learning_rate": 8.353506341546104e-05, "loss": 1.7902, "step": 6093 }, { "epoch": 0.2663577953581887, "grad_norm": 2.28125, "learning_rate": 8.352996841644741e-05, "loss": 1.8954, "step": 6094 }, { "epoch": 0.26640150356221864, "grad_norm": 2.015625, "learning_rate": 8.352487278467443e-05, "loss": 1.6802, "step": 6095 }, { "epoch": 0.2664452117662485, "grad_norm": 2.34375, "learning_rate": 8.351977652023833e-05, "loss": 2.098, "step": 6096 }, { "epoch": 0.2664889199702784, "grad_norm": 1.9140625, "learning_rate": 8.351467962323525e-05, "loss": 1.4421, "step": 6097 }, { "epoch": 0.26653262817430834, "grad_norm": 2.3125, "learning_rate": 8.350958209376138e-05, "loss": 1.5341, "step": 6098 }, { "epoch": 0.2665763363783382, "grad_norm": 2.109375, "learning_rate": 8.350448393191295e-05, "loss": 1.7144, "step": 6099 }, { "epoch": 0.2666200445823681, "grad_norm": 3.640625, "learning_rate": 8.349938513778613e-05, "loss": 1.6064, "step": 6100 }, { "epoch": 0.26666375278639803, "grad_norm": 2.1875, "learning_rate": 8.349428571147717e-05, "loss": 1.8313, "step": 6101 }, { "epoch": 0.2667074609904279, "grad_norm": 2.90625, "learning_rate": 8.348918565308226e-05, "loss": 3.0004, "step": 6102 }, { "epoch": 0.2667511691944578, "grad_norm": 2.15625, "learning_rate": 8.34840849626977e-05, "loss": 1.8625, "step": 6103 }, { "epoch": 0.2667948773984877, "grad_norm": 3.140625, "learning_rate": 8.347898364041973e-05, "loss": 2.0085, "step": 6104 }, { "epoch": 0.2668385856025176, "grad_norm": 2.296875, "learning_rate": 8.34738816863446e-05, "loss": 2.2929, "step": 6105 }, { "epoch": 0.2668822938065475, "grad_norm": 2.390625, "learning_rate": 8.34687791005686e-05, "loss": 2.3305, "step": 6106 }, { "epoch": 0.26692600201057737, "grad_norm": 2.0625, "learning_rate": 8.346367588318804e-05, "loss": 1.8152, "step": 6107 }, { "epoch": 0.2669697102146073, "grad_norm": 2.34375, "learning_rate": 8.345857203429919e-05, "loss": 2.0315, "step": 6108 }, { "epoch": 0.2670134184186372, "grad_norm": 2.546875, "learning_rate": 8.345346755399841e-05, "loss": 2.0062, "step": 6109 }, { "epoch": 0.26705712662266706, "grad_norm": 2.265625, "learning_rate": 8.344836244238199e-05, "loss": 2.1459, "step": 6110 }, { "epoch": 0.267100834826697, "grad_norm": 2.359375, "learning_rate": 8.344325669954631e-05, "loss": 1.3544, "step": 6111 }, { "epoch": 0.2671445430307269, "grad_norm": 2.15625, "learning_rate": 8.343815032558768e-05, "loss": 2.2826, "step": 6112 }, { "epoch": 0.26718825123475676, "grad_norm": 1.8828125, "learning_rate": 8.34330433206025e-05, "loss": 1.2367, "step": 6113 }, { "epoch": 0.26723195943878664, "grad_norm": 2.546875, "learning_rate": 8.342793568468713e-05, "loss": 1.9825, "step": 6114 }, { "epoch": 0.2672756676428166, "grad_norm": 2.15625, "learning_rate": 8.342282741793796e-05, "loss": 1.8248, "step": 6115 }, { "epoch": 0.26731937584684645, "grad_norm": 2.015625, "learning_rate": 8.34177185204514e-05, "loss": 1.5443, "step": 6116 }, { "epoch": 0.26736308405087633, "grad_norm": 3.0, "learning_rate": 8.341260899232383e-05, "loss": 2.6857, "step": 6117 }, { "epoch": 0.26740679225490627, "grad_norm": 1.8515625, "learning_rate": 8.340749883365174e-05, "loss": 1.6376, "step": 6118 }, { "epoch": 0.26745050045893615, "grad_norm": 2.265625, "learning_rate": 8.340238804453148e-05, "loss": 2.387, "step": 6119 }, { "epoch": 0.267494208662966, "grad_norm": 1.9609375, "learning_rate": 8.339727662505957e-05, "loss": 1.637, "step": 6120 }, { "epoch": 0.26753791686699596, "grad_norm": 2.625, "learning_rate": 8.339216457533244e-05, "loss": 1.8343, "step": 6121 }, { "epoch": 0.26758162507102584, "grad_norm": 2.375, "learning_rate": 8.338705189544655e-05, "loss": 2.1124, "step": 6122 }, { "epoch": 0.2676253332750557, "grad_norm": 2.328125, "learning_rate": 8.338193858549842e-05, "loss": 1.2943, "step": 6123 }, { "epoch": 0.2676690414790856, "grad_norm": 1.9609375, "learning_rate": 8.337682464558452e-05, "loss": 1.8715, "step": 6124 }, { "epoch": 0.26771274968311554, "grad_norm": 2.390625, "learning_rate": 8.337171007580135e-05, "loss": 1.9677, "step": 6125 }, { "epoch": 0.2677564578871454, "grad_norm": 2.4375, "learning_rate": 8.336659487624545e-05, "loss": 2.3621, "step": 6126 }, { "epoch": 0.2678001660911753, "grad_norm": 2.046875, "learning_rate": 8.336147904701332e-05, "loss": 1.705, "step": 6127 }, { "epoch": 0.26784387429520523, "grad_norm": 1.984375, "learning_rate": 8.335636258820155e-05, "loss": 1.5326, "step": 6128 }, { "epoch": 0.2678875824992351, "grad_norm": 2.734375, "learning_rate": 8.335124549990667e-05, "loss": 2.4204, "step": 6129 }, { "epoch": 0.267931290703265, "grad_norm": 1.9375, "learning_rate": 8.334612778222524e-05, "loss": 1.5847, "step": 6130 }, { "epoch": 0.2679749989072949, "grad_norm": 2.125, "learning_rate": 8.334100943525385e-05, "loss": 1.3181, "step": 6131 }, { "epoch": 0.2680187071113248, "grad_norm": 2.046875, "learning_rate": 8.333589045908907e-05, "loss": 2.1265, "step": 6132 }, { "epoch": 0.2680624153153547, "grad_norm": 2.046875, "learning_rate": 8.333077085382752e-05, "loss": 1.4887, "step": 6133 }, { "epoch": 0.26810612351938456, "grad_norm": 2.4375, "learning_rate": 8.332565061956584e-05, "loss": 1.9686, "step": 6134 }, { "epoch": 0.2681498317234145, "grad_norm": 2.640625, "learning_rate": 8.332052975640061e-05, "loss": 1.8269, "step": 6135 }, { "epoch": 0.2681935399274444, "grad_norm": 2.0625, "learning_rate": 8.33154082644285e-05, "loss": 1.8044, "step": 6136 }, { "epoch": 0.26823724813147426, "grad_norm": 2.109375, "learning_rate": 8.331028614374612e-05, "loss": 1.8124, "step": 6137 }, { "epoch": 0.2682809563355042, "grad_norm": 2.484375, "learning_rate": 8.330516339445018e-05, "loss": 2.2855, "step": 6138 }, { "epoch": 0.2683246645395341, "grad_norm": 2.25, "learning_rate": 8.330004001663733e-05, "loss": 1.9794, "step": 6139 }, { "epoch": 0.26836837274356395, "grad_norm": 2.21875, "learning_rate": 8.329491601040427e-05, "loss": 2.4759, "step": 6140 }, { "epoch": 0.2684120809475939, "grad_norm": 2.484375, "learning_rate": 8.328979137584767e-05, "loss": 1.6447, "step": 6141 }, { "epoch": 0.26845578915162377, "grad_norm": 2.34375, "learning_rate": 8.328466611306427e-05, "loss": 2.2443, "step": 6142 }, { "epoch": 0.26849949735565365, "grad_norm": 2.359375, "learning_rate": 8.327954022215076e-05, "loss": 1.942, "step": 6143 }, { "epoch": 0.26854320555968353, "grad_norm": 2.078125, "learning_rate": 8.32744137032039e-05, "loss": 1.8281, "step": 6144 }, { "epoch": 0.26858691376371346, "grad_norm": 3.6875, "learning_rate": 8.326928655632043e-05, "loss": 1.2905, "step": 6145 }, { "epoch": 0.26863062196774334, "grad_norm": 2.0625, "learning_rate": 8.32641587815971e-05, "loss": 1.9108, "step": 6146 }, { "epoch": 0.2686743301717732, "grad_norm": 2.34375, "learning_rate": 8.325903037913067e-05, "loss": 2.4243, "step": 6147 }, { "epoch": 0.26871803837580316, "grad_norm": 2.171875, "learning_rate": 8.325390134901794e-05, "loss": 1.801, "step": 6148 }, { "epoch": 0.26876174657983304, "grad_norm": 2.125, "learning_rate": 8.324877169135569e-05, "loss": 1.8057, "step": 6149 }, { "epoch": 0.2688054547838629, "grad_norm": 2.078125, "learning_rate": 8.324364140624073e-05, "loss": 2.0626, "step": 6150 }, { "epoch": 0.26884916298789285, "grad_norm": 2.09375, "learning_rate": 8.323851049376987e-05, "loss": 1.8675, "step": 6151 }, { "epoch": 0.26889287119192273, "grad_norm": 1.9453125, "learning_rate": 8.323337895403994e-05, "loss": 1.7426, "step": 6152 }, { "epoch": 0.2689365793959526, "grad_norm": 2.21875, "learning_rate": 8.322824678714776e-05, "loss": 1.7979, "step": 6153 }, { "epoch": 0.2689802875999825, "grad_norm": 2.265625, "learning_rate": 8.322311399319023e-05, "loss": 1.97, "step": 6154 }, { "epoch": 0.26902399580401243, "grad_norm": 1.921875, "learning_rate": 8.321798057226417e-05, "loss": 1.6476, "step": 6155 }, { "epoch": 0.2690677040080423, "grad_norm": 2.0, "learning_rate": 8.32128465244665e-05, "loss": 1.8328, "step": 6156 }, { "epoch": 0.2691114122120722, "grad_norm": 2.4375, "learning_rate": 8.320771184989404e-05, "loss": 1.6689, "step": 6157 }, { "epoch": 0.2691551204161021, "grad_norm": 2.75, "learning_rate": 8.320257654864374e-05, "loss": 1.8429, "step": 6158 }, { "epoch": 0.269198828620132, "grad_norm": 2.453125, "learning_rate": 8.31974406208125e-05, "loss": 1.3874, "step": 6159 }, { "epoch": 0.2692425368241619, "grad_norm": 3.5625, "learning_rate": 8.319230406649723e-05, "loss": 2.851, "step": 6160 }, { "epoch": 0.2692862450281918, "grad_norm": 1.984375, "learning_rate": 8.318716688579488e-05, "loss": 1.8165, "step": 6161 }, { "epoch": 0.2693299532322217, "grad_norm": 2.4375, "learning_rate": 8.31820290788024e-05, "loss": 1.682, "step": 6162 }, { "epoch": 0.2693736614362516, "grad_norm": 1.8828125, "learning_rate": 8.317689064561671e-05, "loss": 1.5811, "step": 6163 }, { "epoch": 0.26941736964028146, "grad_norm": 2.078125, "learning_rate": 8.317175158633483e-05, "loss": 1.8027, "step": 6164 }, { "epoch": 0.2694610778443114, "grad_norm": 2.015625, "learning_rate": 8.316661190105371e-05, "loss": 1.8132, "step": 6165 }, { "epoch": 0.26950478604834127, "grad_norm": 1.9140625, "learning_rate": 8.316147158987036e-05, "loss": 1.6619, "step": 6166 }, { "epoch": 0.26954849425237115, "grad_norm": 2.484375, "learning_rate": 8.315633065288176e-05, "loss": 1.9372, "step": 6167 }, { "epoch": 0.2695922024564011, "grad_norm": 2.578125, "learning_rate": 8.315118909018495e-05, "loss": 2.3095, "step": 6168 }, { "epoch": 0.26963591066043097, "grad_norm": 1.9453125, "learning_rate": 8.314604690187695e-05, "loss": 1.9157, "step": 6169 }, { "epoch": 0.26967961886446085, "grad_norm": 2.234375, "learning_rate": 8.314090408805482e-05, "loss": 2.1959, "step": 6170 }, { "epoch": 0.2697233270684908, "grad_norm": 1.9296875, "learning_rate": 8.313576064881558e-05, "loss": 1.7974, "step": 6171 }, { "epoch": 0.26976703527252066, "grad_norm": 1.9765625, "learning_rate": 8.313061658425632e-05, "loss": 2.098, "step": 6172 }, { "epoch": 0.26981074347655054, "grad_norm": 2.140625, "learning_rate": 8.312547189447409e-05, "loss": 1.4758, "step": 6173 }, { "epoch": 0.2698544516805804, "grad_norm": 2.421875, "learning_rate": 8.312032657956599e-05, "loss": 2.3724, "step": 6174 }, { "epoch": 0.26989815988461036, "grad_norm": 2.875, "learning_rate": 8.311518063962914e-05, "loss": 2.2493, "step": 6175 }, { "epoch": 0.26994186808864024, "grad_norm": 2.8125, "learning_rate": 8.311003407476064e-05, "loss": 2.4625, "step": 6176 }, { "epoch": 0.2699855762926701, "grad_norm": 1.90625, "learning_rate": 8.31048868850576e-05, "loss": 1.5971, "step": 6177 }, { "epoch": 0.27002928449670005, "grad_norm": 2.328125, "learning_rate": 8.309973907061715e-05, "loss": 2.2358, "step": 6178 }, { "epoch": 0.27007299270072993, "grad_norm": 2.25, "learning_rate": 8.309459063153646e-05, "loss": 1.8094, "step": 6179 }, { "epoch": 0.2701167009047598, "grad_norm": 2.09375, "learning_rate": 8.308944156791268e-05, "loss": 1.8803, "step": 6180 }, { "epoch": 0.27016040910878975, "grad_norm": 2.09375, "learning_rate": 8.308429187984297e-05, "loss": 1.9824, "step": 6181 }, { "epoch": 0.2702041173128196, "grad_norm": 2.78125, "learning_rate": 8.307914156742454e-05, "loss": 1.7535, "step": 6182 }, { "epoch": 0.2702478255168495, "grad_norm": 4.3125, "learning_rate": 8.307399063075453e-05, "loss": 1.4322, "step": 6183 }, { "epoch": 0.2702915337208794, "grad_norm": 2.265625, "learning_rate": 8.306883906993022e-05, "loss": 1.9071, "step": 6184 }, { "epoch": 0.2703352419249093, "grad_norm": 2.578125, "learning_rate": 8.306368688504876e-05, "loss": 2.7365, "step": 6185 }, { "epoch": 0.2703789501289392, "grad_norm": 1.921875, "learning_rate": 8.305853407620742e-05, "loss": 1.8138, "step": 6186 }, { "epoch": 0.2704226583329691, "grad_norm": 2.1875, "learning_rate": 8.305338064350342e-05, "loss": 2.2551, "step": 6187 }, { "epoch": 0.270466366536999, "grad_norm": 2.03125, "learning_rate": 8.304822658703402e-05, "loss": 1.7694, "step": 6188 }, { "epoch": 0.2705100747410289, "grad_norm": 2.34375, "learning_rate": 8.304307190689649e-05, "loss": 1.9035, "step": 6189 }, { "epoch": 0.2705537829450588, "grad_norm": 2.796875, "learning_rate": 8.30379166031881e-05, "loss": 2.906, "step": 6190 }, { "epoch": 0.2705974911490887, "grad_norm": 2.5, "learning_rate": 8.303276067600614e-05, "loss": 1.8274, "step": 6191 }, { "epoch": 0.2706411993531186, "grad_norm": 4.8125, "learning_rate": 8.30276041254479e-05, "loss": 1.8758, "step": 6192 }, { "epoch": 0.27068490755714847, "grad_norm": 1.9609375, "learning_rate": 8.30224469516107e-05, "loss": 1.784, "step": 6193 }, { "epoch": 0.27072861576117835, "grad_norm": 2.109375, "learning_rate": 8.301728915459188e-05, "loss": 1.81, "step": 6194 }, { "epoch": 0.2707723239652083, "grad_norm": 2.328125, "learning_rate": 8.301213073448874e-05, "loss": 1.76, "step": 6195 }, { "epoch": 0.27081603216923816, "grad_norm": 2.3125, "learning_rate": 8.300697169139867e-05, "loss": 1.9981, "step": 6196 }, { "epoch": 0.27085974037326804, "grad_norm": 2.203125, "learning_rate": 8.300181202541899e-05, "loss": 2.1087, "step": 6197 }, { "epoch": 0.270903448577298, "grad_norm": 2.6875, "learning_rate": 8.299665173664708e-05, "loss": 2.4448, "step": 6198 }, { "epoch": 0.27094715678132786, "grad_norm": 2.546875, "learning_rate": 8.299149082518034e-05, "loss": 2.4269, "step": 6199 }, { "epoch": 0.27099086498535774, "grad_norm": 2.484375, "learning_rate": 8.298632929111614e-05, "loss": 2.4778, "step": 6200 }, { "epoch": 0.2710345731893877, "grad_norm": 2.328125, "learning_rate": 8.298116713455191e-05, "loss": 1.7866, "step": 6201 }, { "epoch": 0.27107828139341755, "grad_norm": 2.65625, "learning_rate": 8.297600435558505e-05, "loss": 1.9275, "step": 6202 }, { "epoch": 0.27112198959744743, "grad_norm": 2.703125, "learning_rate": 8.297084095431298e-05, "loss": 2.4905, "step": 6203 }, { "epoch": 0.2711656978014773, "grad_norm": 2.625, "learning_rate": 8.296567693083317e-05, "loss": 2.0117, "step": 6204 }, { "epoch": 0.27120940600550725, "grad_norm": 1.859375, "learning_rate": 8.296051228524305e-05, "loss": 1.6825, "step": 6205 }, { "epoch": 0.2712531142095371, "grad_norm": 2.5625, "learning_rate": 8.29553470176401e-05, "loss": 2.1916, "step": 6206 }, { "epoch": 0.271296822413567, "grad_norm": 1.859375, "learning_rate": 8.295018112812179e-05, "loss": 1.6755, "step": 6207 }, { "epoch": 0.27134053061759694, "grad_norm": 2.03125, "learning_rate": 8.294501461678559e-05, "loss": 1.9042, "step": 6208 }, { "epoch": 0.2713842388216268, "grad_norm": 4.15625, "learning_rate": 8.293984748372901e-05, "loss": 2.5244, "step": 6209 }, { "epoch": 0.2714279470256567, "grad_norm": 2.546875, "learning_rate": 8.293467972904958e-05, "loss": 2.1528, "step": 6210 }, { "epoch": 0.27147165522968664, "grad_norm": 2.125, "learning_rate": 8.292951135284481e-05, "loss": 1.8469, "step": 6211 }, { "epoch": 0.2715153634337165, "grad_norm": 2.15625, "learning_rate": 8.292434235521222e-05, "loss": 1.7206, "step": 6212 }, { "epoch": 0.2715590716377464, "grad_norm": 2.46875, "learning_rate": 8.291917273624939e-05, "loss": 1.7422, "step": 6213 }, { "epoch": 0.2716027798417763, "grad_norm": 2.3125, "learning_rate": 8.291400249605386e-05, "loss": 2.3142, "step": 6214 }, { "epoch": 0.2716464880458062, "grad_norm": 2.296875, "learning_rate": 8.290883163472318e-05, "loss": 2.354, "step": 6215 }, { "epoch": 0.2716901962498361, "grad_norm": 1.765625, "learning_rate": 8.290366015235497e-05, "loss": 1.3597, "step": 6216 }, { "epoch": 0.27173390445386597, "grad_norm": 2.84375, "learning_rate": 8.289848804904678e-05, "loss": 2.4125, "step": 6217 }, { "epoch": 0.2717776126578959, "grad_norm": 2.21875, "learning_rate": 8.289331532489626e-05, "loss": 1.9229, "step": 6218 }, { "epoch": 0.2718213208619258, "grad_norm": 1.9609375, "learning_rate": 8.2888141980001e-05, "loss": 1.9399, "step": 6219 }, { "epoch": 0.27186502906595567, "grad_norm": 2.65625, "learning_rate": 8.288296801445863e-05, "loss": 2.0839, "step": 6220 }, { "epoch": 0.2719087372699856, "grad_norm": 2.953125, "learning_rate": 8.287779342836682e-05, "loss": 1.7229, "step": 6221 }, { "epoch": 0.2719524454740155, "grad_norm": 2.359375, "learning_rate": 8.287261822182316e-05, "loss": 1.7666, "step": 6222 }, { "epoch": 0.27199615367804536, "grad_norm": 2.234375, "learning_rate": 8.286744239492537e-05, "loss": 1.9596, "step": 6223 }, { "epoch": 0.27203986188207524, "grad_norm": 2.265625, "learning_rate": 8.28622659477711e-05, "loss": 1.8435, "step": 6224 }, { "epoch": 0.2720835700861052, "grad_norm": 2.25, "learning_rate": 8.285708888045803e-05, "loss": 1.8474, "step": 6225 }, { "epoch": 0.27212727829013506, "grad_norm": 2.3125, "learning_rate": 8.285191119308389e-05, "loss": 2.607, "step": 6226 }, { "epoch": 0.27217098649416493, "grad_norm": 2.296875, "learning_rate": 8.284673288574637e-05, "loss": 1.9533, "step": 6227 }, { "epoch": 0.27221469469819487, "grad_norm": 2.6875, "learning_rate": 8.284155395854318e-05, "loss": 1.8539, "step": 6228 }, { "epoch": 0.27225840290222475, "grad_norm": 2.203125, "learning_rate": 8.283637441157209e-05, "loss": 1.8384, "step": 6229 }, { "epoch": 0.27230211110625463, "grad_norm": 1.984375, "learning_rate": 8.283119424493082e-05, "loss": 1.7905, "step": 6230 }, { "epoch": 0.27234581931028456, "grad_norm": 2.0, "learning_rate": 8.282601345871713e-05, "loss": 2.0853, "step": 6231 }, { "epoch": 0.27238952751431444, "grad_norm": 2.171875, "learning_rate": 8.282083205302877e-05, "loss": 2.1163, "step": 6232 }, { "epoch": 0.2724332357183443, "grad_norm": 1.9375, "learning_rate": 8.281565002796356e-05, "loss": 1.4279, "step": 6233 }, { "epoch": 0.2724769439223742, "grad_norm": 2.125, "learning_rate": 8.281046738361926e-05, "loss": 2.0867, "step": 6234 }, { "epoch": 0.27252065212640414, "grad_norm": 2.671875, "learning_rate": 8.28052841200937e-05, "loss": 2.2017, "step": 6235 }, { "epoch": 0.272564360330434, "grad_norm": 2.328125, "learning_rate": 8.280010023748467e-05, "loss": 2.464, "step": 6236 }, { "epoch": 0.2726080685344639, "grad_norm": 1.8984375, "learning_rate": 8.279491573589004e-05, "loss": 1.7016, "step": 6237 }, { "epoch": 0.27265177673849383, "grad_norm": 2.125, "learning_rate": 8.278973061540758e-05, "loss": 1.849, "step": 6238 }, { "epoch": 0.2726954849425237, "grad_norm": 2.265625, "learning_rate": 8.27845448761352e-05, "loss": 2.3198, "step": 6239 }, { "epoch": 0.2727391931465536, "grad_norm": 1.984375, "learning_rate": 8.277935851817075e-05, "loss": 2.0965, "step": 6240 }, { "epoch": 0.27278290135058353, "grad_norm": 1.984375, "learning_rate": 8.277417154161208e-05, "loss": 1.7996, "step": 6241 }, { "epoch": 0.2728266095546134, "grad_norm": 2.109375, "learning_rate": 8.27689839465571e-05, "loss": 1.7935, "step": 6242 }, { "epoch": 0.2728703177586433, "grad_norm": 2.5, "learning_rate": 8.27637957331037e-05, "loss": 2.6535, "step": 6243 }, { "epoch": 0.27291402596267317, "grad_norm": 2.5, "learning_rate": 8.275860690134978e-05, "loss": 1.6348, "step": 6244 }, { "epoch": 0.2729577341667031, "grad_norm": 2.546875, "learning_rate": 8.275341745139329e-05, "loss": 2.5961, "step": 6245 }, { "epoch": 0.273001442370733, "grad_norm": 2.25, "learning_rate": 8.274822738333214e-05, "loss": 1.988, "step": 6246 }, { "epoch": 0.27304515057476286, "grad_norm": 1.984375, "learning_rate": 8.274303669726426e-05, "loss": 1.7639, "step": 6247 }, { "epoch": 0.2730888587787928, "grad_norm": 1.890625, "learning_rate": 8.273784539328763e-05, "loss": 1.5711, "step": 6248 }, { "epoch": 0.2731325669828227, "grad_norm": 2.171875, "learning_rate": 8.273265347150021e-05, "loss": 1.4728, "step": 6249 }, { "epoch": 0.27317627518685256, "grad_norm": 2.09375, "learning_rate": 8.272746093199996e-05, "loss": 1.6939, "step": 6250 }, { "epoch": 0.2732199833908825, "grad_norm": 2.3125, "learning_rate": 8.272226777488492e-05, "loss": 1.7364, "step": 6251 }, { "epoch": 0.2732636915949124, "grad_norm": 3.109375, "learning_rate": 8.271707400025306e-05, "loss": 1.8905, "step": 6252 }, { "epoch": 0.27330739979894225, "grad_norm": 2.34375, "learning_rate": 8.271187960820237e-05, "loss": 2.2283, "step": 6253 }, { "epoch": 0.27335110800297213, "grad_norm": 2.09375, "learning_rate": 8.270668459883093e-05, "loss": 2.0876, "step": 6254 }, { "epoch": 0.27339481620700207, "grad_norm": 2.0, "learning_rate": 8.270148897223673e-05, "loss": 1.9225, "step": 6255 }, { "epoch": 0.27343852441103195, "grad_norm": 2.0, "learning_rate": 8.269629272851785e-05, "loss": 1.451, "step": 6256 }, { "epoch": 0.2734822326150618, "grad_norm": 1.9453125, "learning_rate": 8.269109586777234e-05, "loss": 1.8739, "step": 6257 }, { "epoch": 0.27352594081909176, "grad_norm": 1.9609375, "learning_rate": 8.268589839009826e-05, "loss": 1.9135, "step": 6258 }, { "epoch": 0.27356964902312164, "grad_norm": 2.453125, "learning_rate": 8.268070029559372e-05, "loss": 1.818, "step": 6259 }, { "epoch": 0.2736133572271515, "grad_norm": 2.265625, "learning_rate": 8.267550158435679e-05, "loss": 2.6027, "step": 6260 }, { "epoch": 0.27365706543118146, "grad_norm": 2.578125, "learning_rate": 8.26703022564856e-05, "loss": 2.0903, "step": 6261 }, { "epoch": 0.27370077363521134, "grad_norm": 2.734375, "learning_rate": 8.266510231207824e-05, "loss": 2.4899, "step": 6262 }, { "epoch": 0.2737444818392412, "grad_norm": 2.265625, "learning_rate": 8.265990175123286e-05, "loss": 1.5422, "step": 6263 }, { "epoch": 0.2737881900432711, "grad_norm": 2.1875, "learning_rate": 8.265470057404761e-05, "loss": 2.1211, "step": 6264 }, { "epoch": 0.27383189824730103, "grad_norm": 1.921875, "learning_rate": 8.264949878062062e-05, "loss": 2.0299, "step": 6265 }, { "epoch": 0.2738756064513309, "grad_norm": 2.0625, "learning_rate": 8.264429637105009e-05, "loss": 1.7817, "step": 6266 }, { "epoch": 0.2739193146553608, "grad_norm": 2.0625, "learning_rate": 8.263909334543416e-05, "loss": 1.8252, "step": 6267 }, { "epoch": 0.2739630228593907, "grad_norm": 2.375, "learning_rate": 8.263388970387102e-05, "loss": 2.439, "step": 6268 }, { "epoch": 0.2740067310634206, "grad_norm": 2.453125, "learning_rate": 8.262868544645892e-05, "loss": 1.74, "step": 6269 }, { "epoch": 0.2740504392674505, "grad_norm": 2.390625, "learning_rate": 8.262348057329601e-05, "loss": 1.8924, "step": 6270 }, { "epoch": 0.2740941474714804, "grad_norm": 2.5625, "learning_rate": 8.261827508448056e-05, "loss": 3.097, "step": 6271 }, { "epoch": 0.2741378556755103, "grad_norm": 2.46875, "learning_rate": 8.261306898011077e-05, "loss": 1.4289, "step": 6272 }, { "epoch": 0.2741815638795402, "grad_norm": 2.578125, "learning_rate": 8.26078622602849e-05, "loss": 2.0246, "step": 6273 }, { "epoch": 0.27422527208357006, "grad_norm": 2.375, "learning_rate": 8.260265492510122e-05, "loss": 2.4173, "step": 6274 }, { "epoch": 0.2742689802876, "grad_norm": 2.46875, "learning_rate": 8.259744697465799e-05, "loss": 1.5699, "step": 6275 }, { "epoch": 0.2743126884916299, "grad_norm": 3.5625, "learning_rate": 8.25922384090535e-05, "loss": 1.3334, "step": 6276 }, { "epoch": 0.27435639669565975, "grad_norm": 2.25, "learning_rate": 8.258702922838603e-05, "loss": 1.9971, "step": 6277 }, { "epoch": 0.2744001048996897, "grad_norm": 2.234375, "learning_rate": 8.258181943275387e-05, "loss": 1.7544, "step": 6278 }, { "epoch": 0.27444381310371957, "grad_norm": 2.28125, "learning_rate": 8.257660902225539e-05, "loss": 2.1136, "step": 6279 }, { "epoch": 0.27448752130774945, "grad_norm": 2.25, "learning_rate": 8.257139799698886e-05, "loss": 1.9613, "step": 6280 }, { "epoch": 0.2745312295117794, "grad_norm": 2.953125, "learning_rate": 8.256618635705267e-05, "loss": 3.2578, "step": 6281 }, { "epoch": 0.27457493771580926, "grad_norm": 6.0, "learning_rate": 8.256097410254512e-05, "loss": 1.0849, "step": 6282 }, { "epoch": 0.27461864591983914, "grad_norm": 2.421875, "learning_rate": 8.25557612335646e-05, "loss": 2.0473, "step": 6283 }, { "epoch": 0.274662354123869, "grad_norm": 3.125, "learning_rate": 8.255054775020949e-05, "loss": 1.511, "step": 6284 }, { "epoch": 0.27470606232789896, "grad_norm": 2.203125, "learning_rate": 8.254533365257817e-05, "loss": 1.7947, "step": 6285 }, { "epoch": 0.27474977053192884, "grad_norm": 2.8125, "learning_rate": 8.254011894076904e-05, "loss": 2.7416, "step": 6286 }, { "epoch": 0.2747934787359587, "grad_norm": 2.359375, "learning_rate": 8.25349036148805e-05, "loss": 2.099, "step": 6287 }, { "epoch": 0.27483718693998865, "grad_norm": 2.46875, "learning_rate": 8.252968767501098e-05, "loss": 2.0382, "step": 6288 }, { "epoch": 0.27488089514401853, "grad_norm": 2.28125, "learning_rate": 8.252447112125889e-05, "loss": 1.3828, "step": 6289 }, { "epoch": 0.2749246033480484, "grad_norm": 2.296875, "learning_rate": 8.251925395372272e-05, "loss": 1.9086, "step": 6290 }, { "epoch": 0.27496831155207835, "grad_norm": 1.96875, "learning_rate": 8.251403617250088e-05, "loss": 1.6746, "step": 6291 }, { "epoch": 0.27501201975610823, "grad_norm": 2.40625, "learning_rate": 8.250881777769187e-05, "loss": 1.8238, "step": 6292 }, { "epoch": 0.2750557279601381, "grad_norm": 2.3125, "learning_rate": 8.250359876939415e-05, "loss": 2.2168, "step": 6293 }, { "epoch": 0.275099436164168, "grad_norm": 1.9453125, "learning_rate": 8.249837914770621e-05, "loss": 1.8462, "step": 6294 }, { "epoch": 0.2751431443681979, "grad_norm": 1.9453125, "learning_rate": 8.249315891272655e-05, "loss": 1.9032, "step": 6295 }, { "epoch": 0.2751868525722278, "grad_norm": 2.1875, "learning_rate": 8.24879380645537e-05, "loss": 1.4899, "step": 6296 }, { "epoch": 0.2752305607762577, "grad_norm": 2.078125, "learning_rate": 8.248271660328618e-05, "loss": 2.1009, "step": 6297 }, { "epoch": 0.2752742689802876, "grad_norm": 2.171875, "learning_rate": 8.247749452902251e-05, "loss": 1.9969, "step": 6298 }, { "epoch": 0.2753179771843175, "grad_norm": 2.0, "learning_rate": 8.247227184186126e-05, "loss": 2.2424, "step": 6299 }, { "epoch": 0.2753616853883474, "grad_norm": 2.734375, "learning_rate": 8.246704854190098e-05, "loss": 1.373, "step": 6300 }, { "epoch": 0.2754053935923773, "grad_norm": 2.328125, "learning_rate": 8.246182462924023e-05, "loss": 2.7404, "step": 6301 }, { "epoch": 0.2754491017964072, "grad_norm": 2.078125, "learning_rate": 8.24566001039776e-05, "loss": 1.9392, "step": 6302 }, { "epoch": 0.27549281000043707, "grad_norm": 2.046875, "learning_rate": 8.245137496621169e-05, "loss": 1.6657, "step": 6303 }, { "epoch": 0.27553651820446695, "grad_norm": 2.1875, "learning_rate": 8.244614921604111e-05, "loss": 1.6642, "step": 6304 }, { "epoch": 0.2755802264084969, "grad_norm": 2.71875, "learning_rate": 8.244092285356446e-05, "loss": 1.851, "step": 6305 }, { "epoch": 0.27562393461252677, "grad_norm": 2.875, "learning_rate": 8.24356958788804e-05, "loss": 1.5946, "step": 6306 }, { "epoch": 0.27566764281655665, "grad_norm": 2.453125, "learning_rate": 8.243046829208754e-05, "loss": 1.6248, "step": 6307 }, { "epoch": 0.2757113510205866, "grad_norm": 2.25, "learning_rate": 8.242524009328454e-05, "loss": 2.4631, "step": 6308 }, { "epoch": 0.27575505922461646, "grad_norm": 2.390625, "learning_rate": 8.242001128257007e-05, "loss": 1.7864, "step": 6309 }, { "epoch": 0.27579876742864634, "grad_norm": 3.53125, "learning_rate": 8.24147818600428e-05, "loss": 1.8237, "step": 6310 }, { "epoch": 0.2758424756326763, "grad_norm": 2.125, "learning_rate": 8.240955182580143e-05, "loss": 1.5502, "step": 6311 }, { "epoch": 0.27588618383670616, "grad_norm": 2.078125, "learning_rate": 8.240432117994464e-05, "loss": 2.0371, "step": 6312 }, { "epoch": 0.27592989204073604, "grad_norm": 2.09375, "learning_rate": 8.239908992257115e-05, "loss": 1.7174, "step": 6313 }, { "epoch": 0.2759736002447659, "grad_norm": 2.40625, "learning_rate": 8.239385805377966e-05, "loss": 1.4633, "step": 6314 }, { "epoch": 0.27601730844879585, "grad_norm": 3.046875, "learning_rate": 8.238862557366893e-05, "loss": 1.6023, "step": 6315 }, { "epoch": 0.27606101665282573, "grad_norm": 3.65625, "learning_rate": 8.23833924823377e-05, "loss": 3.2681, "step": 6316 }, { "epoch": 0.2761047248568556, "grad_norm": 1.8828125, "learning_rate": 8.237815877988472e-05, "loss": 1.6448, "step": 6317 }, { "epoch": 0.27614843306088555, "grad_norm": 1.9296875, "learning_rate": 8.237292446640877e-05, "loss": 1.7115, "step": 6318 }, { "epoch": 0.2761921412649154, "grad_norm": 2.125, "learning_rate": 8.236768954200862e-05, "loss": 1.8672, "step": 6319 }, { "epoch": 0.2762358494689453, "grad_norm": 2.390625, "learning_rate": 8.236245400678304e-05, "loss": 2.6833, "step": 6320 }, { "epoch": 0.27627955767297524, "grad_norm": 2.15625, "learning_rate": 8.235721786083087e-05, "loss": 2.5507, "step": 6321 }, { "epoch": 0.2763232658770051, "grad_norm": 2.125, "learning_rate": 8.23519811042509e-05, "loss": 1.4608, "step": 6322 }, { "epoch": 0.276366974081035, "grad_norm": 1.828125, "learning_rate": 8.234674373714196e-05, "loss": 1.4405, "step": 6323 }, { "epoch": 0.2764106822850649, "grad_norm": 2.640625, "learning_rate": 8.234150575960288e-05, "loss": 1.7559, "step": 6324 }, { "epoch": 0.2764543904890948, "grad_norm": 2.140625, "learning_rate": 8.233626717173251e-05, "loss": 2.0372, "step": 6325 }, { "epoch": 0.2764980986931247, "grad_norm": 2.78125, "learning_rate": 8.233102797362974e-05, "loss": 2.3649, "step": 6326 }, { "epoch": 0.2765418068971546, "grad_norm": 2.84375, "learning_rate": 8.23257881653934e-05, "loss": 1.9945, "step": 6327 }, { "epoch": 0.2765855151011845, "grad_norm": 2.1875, "learning_rate": 8.232054774712238e-05, "loss": 1.8691, "step": 6328 }, { "epoch": 0.2766292233052144, "grad_norm": 2.234375, "learning_rate": 8.23153067189156e-05, "loss": 1.6846, "step": 6329 }, { "epoch": 0.27667293150924427, "grad_norm": 2.328125, "learning_rate": 8.231006508087196e-05, "loss": 1.7247, "step": 6330 }, { "epoch": 0.2767166397132742, "grad_norm": 2.46875, "learning_rate": 8.230482283309035e-05, "loss": 2.3358, "step": 6331 }, { "epoch": 0.2767603479173041, "grad_norm": 2.21875, "learning_rate": 8.229957997566971e-05, "loss": 1.9732, "step": 6332 }, { "epoch": 0.27680405612133396, "grad_norm": 2.046875, "learning_rate": 8.2294336508709e-05, "loss": 1.8837, "step": 6333 }, { "epoch": 0.27684776432536384, "grad_norm": 1.8515625, "learning_rate": 8.228909243230714e-05, "loss": 1.7376, "step": 6334 }, { "epoch": 0.2768914725293938, "grad_norm": 2.25, "learning_rate": 8.228384774656312e-05, "loss": 1.9227, "step": 6335 }, { "epoch": 0.27693518073342366, "grad_norm": 2.171875, "learning_rate": 8.227860245157593e-05, "loss": 1.8045, "step": 6336 }, { "epoch": 0.27697888893745354, "grad_norm": 2.90625, "learning_rate": 8.22733565474445e-05, "loss": 2.3304, "step": 6337 }, { "epoch": 0.2770225971414835, "grad_norm": 1.9296875, "learning_rate": 8.226811003426788e-05, "loss": 1.5063, "step": 6338 }, { "epoch": 0.27706630534551335, "grad_norm": 2.4375, "learning_rate": 8.226286291214504e-05, "loss": 1.6672, "step": 6339 }, { "epoch": 0.27711001354954323, "grad_norm": 1.9765625, "learning_rate": 8.225761518117503e-05, "loss": 1.7877, "step": 6340 }, { "epoch": 0.27715372175357317, "grad_norm": 1.8671875, "learning_rate": 8.225236684145688e-05, "loss": 1.9904, "step": 6341 }, { "epoch": 0.27719742995760305, "grad_norm": 2.03125, "learning_rate": 8.224711789308963e-05, "loss": 1.7372, "step": 6342 }, { "epoch": 0.2772411381616329, "grad_norm": 3.390625, "learning_rate": 8.224186833617231e-05, "loss": 2.7278, "step": 6343 }, { "epoch": 0.27728484636566286, "grad_norm": 3.53125, "learning_rate": 8.223661817080403e-05, "loss": 2.5373, "step": 6344 }, { "epoch": 0.27732855456969274, "grad_norm": 2.125, "learning_rate": 8.223136739708383e-05, "loss": 1.7289, "step": 6345 }, { "epoch": 0.2773722627737226, "grad_norm": 2.046875, "learning_rate": 8.222611601511083e-05, "loss": 1.7691, "step": 6346 }, { "epoch": 0.2774159709777525, "grad_norm": 2.703125, "learning_rate": 8.222086402498412e-05, "loss": 2.8211, "step": 6347 }, { "epoch": 0.27745967918178244, "grad_norm": 2.328125, "learning_rate": 8.221561142680281e-05, "loss": 1.4104, "step": 6348 }, { "epoch": 0.2775033873858123, "grad_norm": 2.296875, "learning_rate": 8.221035822066601e-05, "loss": 2.357, "step": 6349 }, { "epoch": 0.2775470955898422, "grad_norm": 2.125, "learning_rate": 8.220510440667289e-05, "loss": 1.7921, "step": 6350 }, { "epoch": 0.27759080379387213, "grad_norm": 2.15625, "learning_rate": 8.219984998492256e-05, "loss": 1.8645, "step": 6351 }, { "epoch": 0.277634511997902, "grad_norm": 1.9921875, "learning_rate": 8.219459495551421e-05, "loss": 1.8275, "step": 6352 }, { "epoch": 0.2776782202019319, "grad_norm": 2.0, "learning_rate": 8.218933931854697e-05, "loss": 1.5563, "step": 6353 }, { "epoch": 0.2777219284059618, "grad_norm": 2.046875, "learning_rate": 8.218408307412006e-05, "loss": 1.6998, "step": 6354 }, { "epoch": 0.2777656366099917, "grad_norm": 2.125, "learning_rate": 8.217882622233268e-05, "loss": 1.4991, "step": 6355 }, { "epoch": 0.2778093448140216, "grad_norm": 2.4375, "learning_rate": 8.217356876328402e-05, "loss": 2.0588, "step": 6356 }, { "epoch": 0.27785305301805147, "grad_norm": 2.078125, "learning_rate": 8.216831069707326e-05, "loss": 2.2262, "step": 6357 }, { "epoch": 0.2778967612220814, "grad_norm": 2.296875, "learning_rate": 8.216305202379968e-05, "loss": 1.8249, "step": 6358 }, { "epoch": 0.2779404694261113, "grad_norm": 2.15625, "learning_rate": 8.215779274356248e-05, "loss": 1.7044, "step": 6359 }, { "epoch": 0.27798417763014116, "grad_norm": 2.21875, "learning_rate": 8.215253285646094e-05, "loss": 1.7547, "step": 6360 }, { "epoch": 0.2780278858341711, "grad_norm": 2.21875, "learning_rate": 8.214727236259431e-05, "loss": 1.8101, "step": 6361 }, { "epoch": 0.278071594038201, "grad_norm": 2.21875, "learning_rate": 8.214201126206184e-05, "loss": 2.1843, "step": 6362 }, { "epoch": 0.27811530224223086, "grad_norm": 2.140625, "learning_rate": 8.213674955496287e-05, "loss": 2.5314, "step": 6363 }, { "epoch": 0.2781590104462608, "grad_norm": 1.9921875, "learning_rate": 8.213148724139665e-05, "loss": 1.6071, "step": 6364 }, { "epoch": 0.27820271865029067, "grad_norm": 2.046875, "learning_rate": 8.212622432146248e-05, "loss": 1.8472, "step": 6365 }, { "epoch": 0.27824642685432055, "grad_norm": 2.09375, "learning_rate": 8.212096079525974e-05, "loss": 1.2409, "step": 6366 }, { "epoch": 0.27829013505835043, "grad_norm": 2.015625, "learning_rate": 8.211569666288769e-05, "loss": 1.8642, "step": 6367 }, { "epoch": 0.27833384326238036, "grad_norm": 2.484375, "learning_rate": 8.211043192444572e-05, "loss": 2.009, "step": 6368 }, { "epoch": 0.27837755146641024, "grad_norm": 3.328125, "learning_rate": 8.210516658003316e-05, "loss": 1.6596, "step": 6369 }, { "epoch": 0.2784212596704401, "grad_norm": 2.234375, "learning_rate": 8.209990062974936e-05, "loss": 1.956, "step": 6370 }, { "epoch": 0.27846496787447006, "grad_norm": 2.40625, "learning_rate": 8.209463407369373e-05, "loss": 2.223, "step": 6371 }, { "epoch": 0.27850867607849994, "grad_norm": 2.140625, "learning_rate": 8.208936691196565e-05, "loss": 1.4611, "step": 6372 }, { "epoch": 0.2785523842825298, "grad_norm": 2.03125, "learning_rate": 8.20840991446645e-05, "loss": 1.7562, "step": 6373 }, { "epoch": 0.27859609248655975, "grad_norm": 2.203125, "learning_rate": 8.207883077188971e-05, "loss": 1.8233, "step": 6374 }, { "epoch": 0.27863980069058963, "grad_norm": 2.359375, "learning_rate": 8.20735617937407e-05, "loss": 2.423, "step": 6375 }, { "epoch": 0.2786835088946195, "grad_norm": 1.8671875, "learning_rate": 8.20682922103169e-05, "loss": 1.5783, "step": 6376 }, { "epoch": 0.2787272170986494, "grad_norm": 2.25, "learning_rate": 8.206302202171775e-05, "loss": 1.7583, "step": 6377 }, { "epoch": 0.27877092530267933, "grad_norm": 2.21875, "learning_rate": 8.20577512280427e-05, "loss": 1.9529, "step": 6378 }, { "epoch": 0.2788146335067092, "grad_norm": 2.109375, "learning_rate": 8.205247982939123e-05, "loss": 2.1829, "step": 6379 }, { "epoch": 0.2788583417107391, "grad_norm": 1.8125, "learning_rate": 8.204720782586281e-05, "loss": 1.5143, "step": 6380 }, { "epoch": 0.278902049914769, "grad_norm": 2.328125, "learning_rate": 8.204193521755694e-05, "loss": 1.2953, "step": 6381 }, { "epoch": 0.2789457581187989, "grad_norm": 1.9765625, "learning_rate": 8.203666200457313e-05, "loss": 2.1489, "step": 6382 }, { "epoch": 0.2789894663228288, "grad_norm": 2.28125, "learning_rate": 8.203138818701087e-05, "loss": 2.1649, "step": 6383 }, { "epoch": 0.2790331745268587, "grad_norm": 2.078125, "learning_rate": 8.20261137649697e-05, "loss": 1.788, "step": 6384 }, { "epoch": 0.2790768827308886, "grad_norm": 1.859375, "learning_rate": 8.202083873854914e-05, "loss": 1.6652, "step": 6385 }, { "epoch": 0.2791205909349185, "grad_norm": 2.703125, "learning_rate": 8.201556310784877e-05, "loss": 2.2077, "step": 6386 }, { "epoch": 0.27916429913894836, "grad_norm": 2.71875, "learning_rate": 8.20102868729681e-05, "loss": 2.984, "step": 6387 }, { "epoch": 0.2792080073429783, "grad_norm": 2.21875, "learning_rate": 8.200501003400676e-05, "loss": 2.2747, "step": 6388 }, { "epoch": 0.2792517155470082, "grad_norm": 2.0, "learning_rate": 8.199973259106427e-05, "loss": 1.7683, "step": 6389 }, { "epoch": 0.27929542375103805, "grad_norm": 1.90625, "learning_rate": 8.199445454424026e-05, "loss": 1.855, "step": 6390 }, { "epoch": 0.279339131955068, "grad_norm": 2.0, "learning_rate": 8.198917589363432e-05, "loss": 1.5856, "step": 6391 }, { "epoch": 0.27938284015909787, "grad_norm": 1.8125, "learning_rate": 8.198389663934611e-05, "loss": 1.5465, "step": 6392 }, { "epoch": 0.27942654836312775, "grad_norm": 2.25, "learning_rate": 8.197861678147518e-05, "loss": 1.8482, "step": 6393 }, { "epoch": 0.2794702565671577, "grad_norm": 2.265625, "learning_rate": 8.197333632012123e-05, "loss": 2.3386, "step": 6394 }, { "epoch": 0.27951396477118756, "grad_norm": 2.3125, "learning_rate": 8.196805525538389e-05, "loss": 2.0954, "step": 6395 }, { "epoch": 0.27955767297521744, "grad_norm": 1.9375, "learning_rate": 8.19627735873628e-05, "loss": 1.5945, "step": 6396 }, { "epoch": 0.2796013811792473, "grad_norm": 2.140625, "learning_rate": 8.195749131615767e-05, "loss": 1.8132, "step": 6397 }, { "epoch": 0.27964508938327726, "grad_norm": 2.171875, "learning_rate": 8.195220844186817e-05, "loss": 1.6905, "step": 6398 }, { "epoch": 0.27968879758730714, "grad_norm": 2.25, "learning_rate": 8.194692496459398e-05, "loss": 2.4612, "step": 6399 }, { "epoch": 0.279732505791337, "grad_norm": 2.0625, "learning_rate": 8.194164088443482e-05, "loss": 1.7586, "step": 6400 }, { "epoch": 0.27977621399536695, "grad_norm": 5.59375, "learning_rate": 8.19363562014904e-05, "loss": 2.1648, "step": 6401 }, { "epoch": 0.27981992219939683, "grad_norm": 2.078125, "learning_rate": 8.193107091586048e-05, "loss": 1.8121, "step": 6402 }, { "epoch": 0.2798636304034267, "grad_norm": 3.125, "learning_rate": 8.192578502764476e-05, "loss": 2.687, "step": 6403 }, { "epoch": 0.27990733860745665, "grad_norm": 2.015625, "learning_rate": 8.192049853694301e-05, "loss": 1.8532, "step": 6404 }, { "epoch": 0.2799510468114865, "grad_norm": 1.90625, "learning_rate": 8.191521144385501e-05, "loss": 1.6634, "step": 6405 }, { "epoch": 0.2799947550155164, "grad_norm": 1.953125, "learning_rate": 8.190992374848052e-05, "loss": 1.8239, "step": 6406 }, { "epoch": 0.2800384632195463, "grad_norm": 1.9921875, "learning_rate": 8.190463545091931e-05, "loss": 1.9444, "step": 6407 }, { "epoch": 0.2800821714235762, "grad_norm": 1.8203125, "learning_rate": 8.189934655127121e-05, "loss": 1.6436, "step": 6408 }, { "epoch": 0.2801258796276061, "grad_norm": 1.9609375, "learning_rate": 8.1894057049636e-05, "loss": 1.5293, "step": 6409 }, { "epoch": 0.280169587831636, "grad_norm": 2.25, "learning_rate": 8.188876694611351e-05, "loss": 2.2065, "step": 6410 }, { "epoch": 0.2802132960356659, "grad_norm": 2.546875, "learning_rate": 8.188347624080359e-05, "loss": 1.4536, "step": 6411 }, { "epoch": 0.2802570042396958, "grad_norm": 2.03125, "learning_rate": 8.187818493380607e-05, "loss": 1.959, "step": 6412 }, { "epoch": 0.2803007124437257, "grad_norm": 2.359375, "learning_rate": 8.18728930252208e-05, "loss": 1.874, "step": 6413 }, { "epoch": 0.2803444206477556, "grad_norm": 12.0625, "learning_rate": 8.186760051514765e-05, "loss": 1.8555, "step": 6414 }, { "epoch": 0.2803881288517855, "grad_norm": 1.7421875, "learning_rate": 8.186230740368649e-05, "loss": 1.4012, "step": 6415 }, { "epoch": 0.28043183705581537, "grad_norm": 2.234375, "learning_rate": 8.185701369093722e-05, "loss": 1.9157, "step": 6416 }, { "epoch": 0.28047554525984525, "grad_norm": 1.90625, "learning_rate": 8.185171937699974e-05, "loss": 2.0665, "step": 6417 }, { "epoch": 0.2805192534638752, "grad_norm": 2.5, "learning_rate": 8.184642446197395e-05, "loss": 2.6483, "step": 6418 }, { "epoch": 0.28056296166790506, "grad_norm": 1.8984375, "learning_rate": 8.184112894595979e-05, "loss": 1.6593, "step": 6419 }, { "epoch": 0.28060666987193494, "grad_norm": 2.15625, "learning_rate": 8.183583282905717e-05, "loss": 2.048, "step": 6420 }, { "epoch": 0.2806503780759649, "grad_norm": 2.0625, "learning_rate": 8.183053611136607e-05, "loss": 1.9933, "step": 6421 }, { "epoch": 0.28069408627999476, "grad_norm": 1.9140625, "learning_rate": 8.18252387929864e-05, "loss": 1.734, "step": 6422 }, { "epoch": 0.28073779448402464, "grad_norm": 2.546875, "learning_rate": 8.181994087401819e-05, "loss": 2.8626, "step": 6423 }, { "epoch": 0.2807815026880546, "grad_norm": 3.078125, "learning_rate": 8.181464235456136e-05, "loss": 1.482, "step": 6424 }, { "epoch": 0.28082521089208445, "grad_norm": 2.109375, "learning_rate": 8.180934323471592e-05, "loss": 1.6419, "step": 6425 }, { "epoch": 0.28086891909611433, "grad_norm": 2.609375, "learning_rate": 8.180404351458189e-05, "loss": 1.9609, "step": 6426 }, { "epoch": 0.2809126273001442, "grad_norm": 1.8515625, "learning_rate": 8.179874319425926e-05, "loss": 1.8158, "step": 6427 }, { "epoch": 0.28095633550417415, "grad_norm": 2.515625, "learning_rate": 8.179344227384807e-05, "loss": 2.282, "step": 6428 }, { "epoch": 0.28100004370820403, "grad_norm": 1.96875, "learning_rate": 8.178814075344836e-05, "loss": 1.9564, "step": 6429 }, { "epoch": 0.2810437519122339, "grad_norm": 2.046875, "learning_rate": 8.178283863316015e-05, "loss": 2.1462, "step": 6430 }, { "epoch": 0.28108746011626384, "grad_norm": 2.421875, "learning_rate": 8.177753591308352e-05, "loss": 2.547, "step": 6431 }, { "epoch": 0.2811311683202937, "grad_norm": 2.109375, "learning_rate": 8.177223259331855e-05, "loss": 1.6894, "step": 6432 }, { "epoch": 0.2811748765243236, "grad_norm": 2.109375, "learning_rate": 8.176692867396531e-05, "loss": 1.7359, "step": 6433 }, { "epoch": 0.28121858472835354, "grad_norm": 1.984375, "learning_rate": 8.176162415512388e-05, "loss": 1.8399, "step": 6434 }, { "epoch": 0.2812622929323834, "grad_norm": 1.9609375, "learning_rate": 8.175631903689438e-05, "loss": 1.7448, "step": 6435 }, { "epoch": 0.2813060011364133, "grad_norm": 3.109375, "learning_rate": 8.175101331937693e-05, "loss": 2.505, "step": 6436 }, { "epoch": 0.2813497093404432, "grad_norm": 2.484375, "learning_rate": 8.174570700267163e-05, "loss": 2.1827, "step": 6437 }, { "epoch": 0.2813934175444731, "grad_norm": 3.203125, "learning_rate": 8.174040008687864e-05, "loss": 1.9409, "step": 6438 }, { "epoch": 0.281437125748503, "grad_norm": 2.015625, "learning_rate": 8.17350925720981e-05, "loss": 1.7053, "step": 6439 }, { "epoch": 0.28148083395253287, "grad_norm": 2.328125, "learning_rate": 8.17297844584302e-05, "loss": 1.9401, "step": 6440 }, { "epoch": 0.2815245421565628, "grad_norm": 2.75, "learning_rate": 8.172447574597506e-05, "loss": 2.2327, "step": 6441 }, { "epoch": 0.2815682503605927, "grad_norm": 2.40625, "learning_rate": 8.171916643483291e-05, "loss": 1.9431, "step": 6442 }, { "epoch": 0.28161195856462257, "grad_norm": 2.171875, "learning_rate": 8.17138565251039e-05, "loss": 2.0892, "step": 6443 }, { "epoch": 0.2816556667686525, "grad_norm": 2.625, "learning_rate": 8.170854601688828e-05, "loss": 1.9925, "step": 6444 }, { "epoch": 0.2816993749726824, "grad_norm": 2.5625, "learning_rate": 8.170323491028624e-05, "loss": 1.9526, "step": 6445 }, { "epoch": 0.28174308317671226, "grad_norm": 2.140625, "learning_rate": 8.169792320539802e-05, "loss": 1.7848, "step": 6446 }, { "epoch": 0.28178679138074214, "grad_norm": 2.21875, "learning_rate": 8.169261090232385e-05, "loss": 1.9374, "step": 6447 }, { "epoch": 0.2818304995847721, "grad_norm": 2.125, "learning_rate": 8.168729800116398e-05, "loss": 2.2083, "step": 6448 }, { "epoch": 0.28187420778880196, "grad_norm": 2.21875, "learning_rate": 8.168198450201869e-05, "loss": 1.8002, "step": 6449 }, { "epoch": 0.28191791599283184, "grad_norm": 2.015625, "learning_rate": 8.167667040498823e-05, "loss": 1.5561, "step": 6450 }, { "epoch": 0.28196162419686177, "grad_norm": 2.234375, "learning_rate": 8.16713557101729e-05, "loss": 2.1003, "step": 6451 }, { "epoch": 0.28200533240089165, "grad_norm": 2.234375, "learning_rate": 8.166604041767299e-05, "loss": 2.0644, "step": 6452 }, { "epoch": 0.28204904060492153, "grad_norm": 2.0, "learning_rate": 8.166072452758883e-05, "loss": 1.5807, "step": 6453 }, { "epoch": 0.28209274880895147, "grad_norm": 2.140625, "learning_rate": 8.16554080400207e-05, "loss": 2.0718, "step": 6454 }, { "epoch": 0.28213645701298135, "grad_norm": 2.09375, "learning_rate": 8.165009095506895e-05, "loss": 1.9122, "step": 6455 }, { "epoch": 0.2821801652170112, "grad_norm": 2.515625, "learning_rate": 8.164477327283391e-05, "loss": 1.8248, "step": 6456 }, { "epoch": 0.2822238734210411, "grad_norm": 2.65625, "learning_rate": 8.163945499341596e-05, "loss": 2.3692, "step": 6457 }, { "epoch": 0.28226758162507104, "grad_norm": 2.171875, "learning_rate": 8.163413611691544e-05, "loss": 1.3578, "step": 6458 }, { "epoch": 0.2823112898291009, "grad_norm": 2.109375, "learning_rate": 8.162881664343271e-05, "loss": 1.7039, "step": 6459 }, { "epoch": 0.2823549980331308, "grad_norm": 2.0, "learning_rate": 8.16234965730682e-05, "loss": 2.7822, "step": 6460 }, { "epoch": 0.28239870623716073, "grad_norm": 2.203125, "learning_rate": 8.161817590592228e-05, "loss": 1.3216, "step": 6461 }, { "epoch": 0.2824424144411906, "grad_norm": 1.8046875, "learning_rate": 8.161285464209537e-05, "loss": 1.5041, "step": 6462 }, { "epoch": 0.2824861226452205, "grad_norm": 2.25, "learning_rate": 8.160753278168787e-05, "loss": 1.7797, "step": 6463 }, { "epoch": 0.28252983084925043, "grad_norm": 1.8984375, "learning_rate": 8.160221032480021e-05, "loss": 1.8841, "step": 6464 }, { "epoch": 0.2825735390532803, "grad_norm": 2.28125, "learning_rate": 8.159688727153287e-05, "loss": 1.8538, "step": 6465 }, { "epoch": 0.2826172472573102, "grad_norm": 1.953125, "learning_rate": 8.159156362198628e-05, "loss": 1.6499, "step": 6466 }, { "epoch": 0.28266095546134007, "grad_norm": 2.234375, "learning_rate": 8.15862393762609e-05, "loss": 1.8777, "step": 6467 }, { "epoch": 0.28270466366537, "grad_norm": 2.390625, "learning_rate": 8.15809145344572e-05, "loss": 1.7798, "step": 6468 }, { "epoch": 0.2827483718693999, "grad_norm": 2.03125, "learning_rate": 8.157558909667569e-05, "loss": 1.5544, "step": 6469 }, { "epoch": 0.28279208007342976, "grad_norm": 2.296875, "learning_rate": 8.157026306301685e-05, "loss": 1.9482, "step": 6470 }, { "epoch": 0.2828357882774597, "grad_norm": 3.453125, "learning_rate": 8.156493643358121e-05, "loss": 1.892, "step": 6471 }, { "epoch": 0.2828794964814896, "grad_norm": 1.90625, "learning_rate": 8.155960920846926e-05, "loss": 1.6708, "step": 6472 }, { "epoch": 0.28292320468551946, "grad_norm": 2.25, "learning_rate": 8.155428138778158e-05, "loss": 1.9615, "step": 6473 }, { "epoch": 0.2829669128895494, "grad_norm": 1.796875, "learning_rate": 8.154895297161866e-05, "loss": 1.6599, "step": 6474 }, { "epoch": 0.2830106210935793, "grad_norm": 2.28125, "learning_rate": 8.154362396008109e-05, "loss": 2.2279, "step": 6475 }, { "epoch": 0.28305432929760915, "grad_norm": 2.265625, "learning_rate": 8.153829435326942e-05, "loss": 1.5279, "step": 6476 }, { "epoch": 0.28309803750163903, "grad_norm": 2.140625, "learning_rate": 8.153296415128425e-05, "loss": 1.9643, "step": 6477 }, { "epoch": 0.28314174570566897, "grad_norm": 2.046875, "learning_rate": 8.152763335422613e-05, "loss": 1.7157, "step": 6478 }, { "epoch": 0.28318545390969885, "grad_norm": 2.125, "learning_rate": 8.15223019621957e-05, "loss": 1.7339, "step": 6479 }, { "epoch": 0.2832291621137287, "grad_norm": 2.84375, "learning_rate": 8.151696997529354e-05, "loss": 1.2496, "step": 6480 }, { "epoch": 0.28327287031775866, "grad_norm": 1.8515625, "learning_rate": 8.151163739362029e-05, "loss": 1.8114, "step": 6481 }, { "epoch": 0.28331657852178854, "grad_norm": 2.375, "learning_rate": 8.150630421727659e-05, "loss": 1.8059, "step": 6482 }, { "epoch": 0.2833602867258184, "grad_norm": 2.296875, "learning_rate": 8.150097044636307e-05, "loss": 1.8667, "step": 6483 }, { "epoch": 0.28340399492984836, "grad_norm": 1.8671875, "learning_rate": 8.149563608098037e-05, "loss": 1.6471, "step": 6484 }, { "epoch": 0.28344770313387824, "grad_norm": 1.921875, "learning_rate": 8.14903011212292e-05, "loss": 1.4635, "step": 6485 }, { "epoch": 0.2834914113379081, "grad_norm": 2.203125, "learning_rate": 8.148496556721022e-05, "loss": 1.7774, "step": 6486 }, { "epoch": 0.283535119541938, "grad_norm": 2.09375, "learning_rate": 8.14796294190241e-05, "loss": 1.8177, "step": 6487 }, { "epoch": 0.28357882774596793, "grad_norm": 1.90625, "learning_rate": 8.147429267677156e-05, "loss": 1.8345, "step": 6488 }, { "epoch": 0.2836225359499978, "grad_norm": 2.265625, "learning_rate": 8.146895534055332e-05, "loss": 2.1024, "step": 6489 }, { "epoch": 0.2836662441540277, "grad_norm": 2.53125, "learning_rate": 8.146361741047006e-05, "loss": 2.1015, "step": 6490 }, { "epoch": 0.2837099523580576, "grad_norm": 2.046875, "learning_rate": 8.145827888662257e-05, "loss": 1.6696, "step": 6491 }, { "epoch": 0.2837536605620875, "grad_norm": 1.8515625, "learning_rate": 8.145293976911158e-05, "loss": 1.7858, "step": 6492 }, { "epoch": 0.2837973687661174, "grad_norm": 2.28125, "learning_rate": 8.144760005803783e-05, "loss": 1.94, "step": 6493 }, { "epoch": 0.2838410769701473, "grad_norm": 2.21875, "learning_rate": 8.14422597535021e-05, "loss": 1.595, "step": 6494 }, { "epoch": 0.2838847851741772, "grad_norm": 2.1875, "learning_rate": 8.143691885560515e-05, "loss": 2.0082, "step": 6495 }, { "epoch": 0.2839284933782071, "grad_norm": 2.0625, "learning_rate": 8.14315773644478e-05, "loss": 1.6122, "step": 6496 }, { "epoch": 0.28397220158223696, "grad_norm": 2.15625, "learning_rate": 8.142623528013084e-05, "loss": 1.9811, "step": 6497 }, { "epoch": 0.2840159097862669, "grad_norm": 2.703125, "learning_rate": 8.142089260275509e-05, "loss": 1.9579, "step": 6498 }, { "epoch": 0.2840596179902968, "grad_norm": 2.453125, "learning_rate": 8.141554933242135e-05, "loss": 2.491, "step": 6499 }, { "epoch": 0.28410332619432666, "grad_norm": 1.9609375, "learning_rate": 8.141020546923048e-05, "loss": 1.4218, "step": 6500 }, { "epoch": 0.2841470343983566, "grad_norm": 2.140625, "learning_rate": 8.14048610132833e-05, "loss": 1.2199, "step": 6501 }, { "epoch": 0.28419074260238647, "grad_norm": 2.953125, "learning_rate": 8.13995159646807e-05, "loss": 1.6209, "step": 6502 }, { "epoch": 0.28423445080641635, "grad_norm": 4.34375, "learning_rate": 8.139417032352354e-05, "loss": 3.7028, "step": 6503 }, { "epoch": 0.2842781590104463, "grad_norm": 2.375, "learning_rate": 8.138882408991268e-05, "loss": 1.2836, "step": 6504 }, { "epoch": 0.28432186721447616, "grad_norm": 2.109375, "learning_rate": 8.138347726394904e-05, "loss": 1.7913, "step": 6505 }, { "epoch": 0.28436557541850604, "grad_norm": 2.96875, "learning_rate": 8.13781298457335e-05, "loss": 1.7721, "step": 6506 }, { "epoch": 0.2844092836225359, "grad_norm": 2.890625, "learning_rate": 8.137278183536699e-05, "loss": 2.4412, "step": 6507 }, { "epoch": 0.28445299182656586, "grad_norm": 2.609375, "learning_rate": 8.13674332329504e-05, "loss": 2.2241, "step": 6508 }, { "epoch": 0.28449670003059574, "grad_norm": 3.359375, "learning_rate": 8.136208403858472e-05, "loss": 2.3641, "step": 6509 }, { "epoch": 0.2845404082346256, "grad_norm": 2.34375, "learning_rate": 8.135673425237084e-05, "loss": 1.7108, "step": 6510 }, { "epoch": 0.28458411643865555, "grad_norm": 1.984375, "learning_rate": 8.135138387440977e-05, "loss": 1.385, "step": 6511 }, { "epoch": 0.28462782464268543, "grad_norm": 2.09375, "learning_rate": 8.134603290480245e-05, "loss": 2.3765, "step": 6512 }, { "epoch": 0.2846715328467153, "grad_norm": 2.4375, "learning_rate": 8.134068134364987e-05, "loss": 2.4618, "step": 6513 }, { "epoch": 0.28471524105074525, "grad_norm": 1.9375, "learning_rate": 8.133532919105302e-05, "loss": 1.9383, "step": 6514 }, { "epoch": 0.28475894925477513, "grad_norm": 1.984375, "learning_rate": 8.13299764471129e-05, "loss": 1.7269, "step": 6515 }, { "epoch": 0.284802657458805, "grad_norm": 2.0, "learning_rate": 8.132462311193053e-05, "loss": 1.7784, "step": 6516 }, { "epoch": 0.2848463656628349, "grad_norm": 2.015625, "learning_rate": 8.131926918560692e-05, "loss": 1.8252, "step": 6517 }, { "epoch": 0.2848900738668648, "grad_norm": 2.0625, "learning_rate": 8.131391466824312e-05, "loss": 1.5615, "step": 6518 }, { "epoch": 0.2849337820708947, "grad_norm": 2.390625, "learning_rate": 8.130855955994019e-05, "loss": 2.0726, "step": 6519 }, { "epoch": 0.2849774902749246, "grad_norm": 2.21875, "learning_rate": 8.130320386079915e-05, "loss": 1.6181, "step": 6520 }, { "epoch": 0.2850211984789545, "grad_norm": 2.1875, "learning_rate": 8.129784757092111e-05, "loss": 1.9783, "step": 6521 }, { "epoch": 0.2850649066829844, "grad_norm": 2.859375, "learning_rate": 8.129249069040712e-05, "loss": 3.0229, "step": 6522 }, { "epoch": 0.2851086148870143, "grad_norm": 2.1875, "learning_rate": 8.12871332193583e-05, "loss": 1.6179, "step": 6523 }, { "epoch": 0.2851523230910442, "grad_norm": 2.59375, "learning_rate": 8.128177515787574e-05, "loss": 2.1017, "step": 6524 }, { "epoch": 0.2851960312950741, "grad_norm": 2.15625, "learning_rate": 8.127641650606054e-05, "loss": 1.5302, "step": 6525 }, { "epoch": 0.285239739499104, "grad_norm": 2.15625, "learning_rate": 8.127105726401386e-05, "loss": 1.546, "step": 6526 }, { "epoch": 0.28528344770313385, "grad_norm": 2.171875, "learning_rate": 8.126569743183681e-05, "loss": 1.6359, "step": 6527 }, { "epoch": 0.2853271559071638, "grad_norm": 2.015625, "learning_rate": 8.126033700963053e-05, "loss": 1.6768, "step": 6528 }, { "epoch": 0.28537086411119367, "grad_norm": 2.046875, "learning_rate": 8.12549759974962e-05, "loss": 1.6905, "step": 6529 }, { "epoch": 0.28541457231522355, "grad_norm": 2.46875, "learning_rate": 8.1249614395535e-05, "loss": 1.8334, "step": 6530 }, { "epoch": 0.2854582805192535, "grad_norm": 2.203125, "learning_rate": 8.124425220384808e-05, "loss": 2.195, "step": 6531 }, { "epoch": 0.28550198872328336, "grad_norm": 2.453125, "learning_rate": 8.123888942253666e-05, "loss": 2.0734, "step": 6532 }, { "epoch": 0.28554569692731324, "grad_norm": 2.078125, "learning_rate": 8.123352605170191e-05, "loss": 1.9242, "step": 6533 }, { "epoch": 0.2855894051313432, "grad_norm": 1.9296875, "learning_rate": 8.122816209144509e-05, "loss": 2.0874, "step": 6534 }, { "epoch": 0.28563311333537306, "grad_norm": 2.59375, "learning_rate": 8.122279754186736e-05, "loss": 1.4013, "step": 6535 }, { "epoch": 0.28567682153940294, "grad_norm": 3.1875, "learning_rate": 8.121743240307003e-05, "loss": 1.9932, "step": 6536 }, { "epoch": 0.2857205297434328, "grad_norm": 2.046875, "learning_rate": 8.121206667515431e-05, "loss": 1.5798, "step": 6537 }, { "epoch": 0.28576423794746275, "grad_norm": 2.03125, "learning_rate": 8.120670035822147e-05, "loss": 2.2536, "step": 6538 }, { "epoch": 0.28580794615149263, "grad_norm": 1.8515625, "learning_rate": 8.120133345237278e-05, "loss": 1.8946, "step": 6539 }, { "epoch": 0.2858516543555225, "grad_norm": 2.3125, "learning_rate": 8.11959659577095e-05, "loss": 1.5591, "step": 6540 }, { "epoch": 0.28589536255955245, "grad_norm": 1.9453125, "learning_rate": 8.119059787433294e-05, "loss": 1.8271, "step": 6541 }, { "epoch": 0.2859390707635823, "grad_norm": 2.078125, "learning_rate": 8.118522920234442e-05, "loss": 1.7642, "step": 6542 }, { "epoch": 0.2859827789676122, "grad_norm": 1.953125, "learning_rate": 8.117985994184522e-05, "loss": 1.6098, "step": 6543 }, { "epoch": 0.28602648717164214, "grad_norm": 2.109375, "learning_rate": 8.117449009293668e-05, "loss": 2.3942, "step": 6544 }, { "epoch": 0.286070195375672, "grad_norm": 2.734375, "learning_rate": 8.116911965572016e-05, "loss": 2.513, "step": 6545 }, { "epoch": 0.2861139035797019, "grad_norm": 2.59375, "learning_rate": 8.116374863029696e-05, "loss": 2.7693, "step": 6546 }, { "epoch": 0.2861576117837318, "grad_norm": 2.0, "learning_rate": 8.115837701676848e-05, "loss": 2.2292, "step": 6547 }, { "epoch": 0.2862013199877617, "grad_norm": 2.390625, "learning_rate": 8.115300481523609e-05, "loss": 1.8436, "step": 6548 }, { "epoch": 0.2862450281917916, "grad_norm": 2.640625, "learning_rate": 8.114763202580113e-05, "loss": 1.6627, "step": 6549 }, { "epoch": 0.2862887363958215, "grad_norm": 6.625, "learning_rate": 8.114225864856504e-05, "loss": 2.6498, "step": 6550 }, { "epoch": 0.2863324445998514, "grad_norm": 1.734375, "learning_rate": 8.11368846836292e-05, "loss": 1.5364, "step": 6551 }, { "epoch": 0.2863761528038813, "grad_norm": 2.375, "learning_rate": 8.113151013109503e-05, "loss": 2.3289, "step": 6552 }, { "epoch": 0.28641986100791117, "grad_norm": 2.328125, "learning_rate": 8.112613499106396e-05, "loss": 1.7235, "step": 6553 }, { "epoch": 0.2864635692119411, "grad_norm": 2.140625, "learning_rate": 8.11207592636374e-05, "loss": 2.1643, "step": 6554 }, { "epoch": 0.286507277415971, "grad_norm": 2.875, "learning_rate": 8.111538294891684e-05, "loss": 1.8422, "step": 6555 }, { "epoch": 0.28655098562000086, "grad_norm": 2.0, "learning_rate": 8.11100060470037e-05, "loss": 1.6211, "step": 6556 }, { "epoch": 0.28659469382403074, "grad_norm": 2.265625, "learning_rate": 8.110462855799949e-05, "loss": 1.5814, "step": 6557 }, { "epoch": 0.2866384020280607, "grad_norm": 2.546875, "learning_rate": 8.109925048200565e-05, "loss": 1.3296, "step": 6558 }, { "epoch": 0.28668211023209056, "grad_norm": 2.140625, "learning_rate": 8.109387181912369e-05, "loss": 2.0455, "step": 6559 }, { "epoch": 0.28672581843612044, "grad_norm": 1.9921875, "learning_rate": 8.108849256945513e-05, "loss": 1.9988, "step": 6560 }, { "epoch": 0.2867695266401504, "grad_norm": 2.1875, "learning_rate": 8.108311273310146e-05, "loss": 1.8771, "step": 6561 }, { "epoch": 0.28681323484418025, "grad_norm": 2.46875, "learning_rate": 8.10777323101642e-05, "loss": 2.4142, "step": 6562 }, { "epoch": 0.28685694304821013, "grad_norm": 2.078125, "learning_rate": 8.107235130074492e-05, "loss": 1.7797, "step": 6563 }, { "epoch": 0.28690065125224007, "grad_norm": 2.28125, "learning_rate": 8.106696970494514e-05, "loss": 1.9409, "step": 6564 }, { "epoch": 0.28694435945626995, "grad_norm": 1.953125, "learning_rate": 8.106158752286642e-05, "loss": 1.4793, "step": 6565 }, { "epoch": 0.28698806766029983, "grad_norm": 1.90625, "learning_rate": 8.105620475461033e-05, "loss": 1.6698, "step": 6566 }, { "epoch": 0.2870317758643297, "grad_norm": 2.09375, "learning_rate": 8.105082140027846e-05, "loss": 2.1729, "step": 6567 }, { "epoch": 0.28707548406835964, "grad_norm": 2.3125, "learning_rate": 8.104543745997242e-05, "loss": 1.7732, "step": 6568 }, { "epoch": 0.2871191922723895, "grad_norm": 2.109375, "learning_rate": 8.104005293379378e-05, "loss": 1.779, "step": 6569 }, { "epoch": 0.2871629004764194, "grad_norm": 2.53125, "learning_rate": 8.103466782184415e-05, "loss": 1.9623, "step": 6570 }, { "epoch": 0.28720660868044934, "grad_norm": 2.328125, "learning_rate": 8.102928212422519e-05, "loss": 2.3689, "step": 6571 }, { "epoch": 0.2872503168844792, "grad_norm": 2.25, "learning_rate": 8.102389584103849e-05, "loss": 1.5722, "step": 6572 }, { "epoch": 0.2872940250885091, "grad_norm": 2.203125, "learning_rate": 8.101850897238574e-05, "loss": 2.7088, "step": 6573 }, { "epoch": 0.28733773329253903, "grad_norm": 2.671875, "learning_rate": 8.101312151836857e-05, "loss": 2.5903, "step": 6574 }, { "epoch": 0.2873814414965689, "grad_norm": 2.265625, "learning_rate": 8.100773347908868e-05, "loss": 2.3079, "step": 6575 }, { "epoch": 0.2874251497005988, "grad_norm": 2.171875, "learning_rate": 8.100234485464771e-05, "loss": 2.0066, "step": 6576 }, { "epoch": 0.28746885790462867, "grad_norm": 2.328125, "learning_rate": 8.099695564514737e-05, "loss": 2.1999, "step": 6577 }, { "epoch": 0.2875125661086586, "grad_norm": 2.046875, "learning_rate": 8.099156585068937e-05, "loss": 1.8645, "step": 6578 }, { "epoch": 0.2875562743126885, "grad_norm": 3.046875, "learning_rate": 8.098617547137541e-05, "loss": 1.7175, "step": 6579 }, { "epoch": 0.28759998251671837, "grad_norm": 2.109375, "learning_rate": 8.098078450730723e-05, "loss": 1.8844, "step": 6580 }, { "epoch": 0.2876436907207483, "grad_norm": 2.078125, "learning_rate": 8.097539295858656e-05, "loss": 1.7426, "step": 6581 }, { "epoch": 0.2876873989247782, "grad_norm": 2.3125, "learning_rate": 8.097000082531512e-05, "loss": 2.1152, "step": 6582 }, { "epoch": 0.28773110712880806, "grad_norm": 1.9765625, "learning_rate": 8.096460810759472e-05, "loss": 1.8683, "step": 6583 }, { "epoch": 0.287774815332838, "grad_norm": 2.0, "learning_rate": 8.095921480552707e-05, "loss": 1.4677, "step": 6584 }, { "epoch": 0.2878185235368679, "grad_norm": 2.0625, "learning_rate": 8.095382091921399e-05, "loss": 1.6928, "step": 6585 }, { "epoch": 0.28786223174089776, "grad_norm": 2.109375, "learning_rate": 8.094842644875726e-05, "loss": 1.963, "step": 6586 }, { "epoch": 0.28790593994492764, "grad_norm": 1.8828125, "learning_rate": 8.094303139425867e-05, "loss": 1.5391, "step": 6587 }, { "epoch": 0.28794964814895757, "grad_norm": 2.328125, "learning_rate": 8.093763575582006e-05, "loss": 2.0163, "step": 6588 }, { "epoch": 0.28799335635298745, "grad_norm": 2.203125, "learning_rate": 8.093223953354323e-05, "loss": 1.5258, "step": 6589 }, { "epoch": 0.28803706455701733, "grad_norm": 2.359375, "learning_rate": 8.092684272753002e-05, "loss": 2.2255, "step": 6590 }, { "epoch": 0.28808077276104727, "grad_norm": 2.5, "learning_rate": 8.092144533788226e-05, "loss": 1.9112, "step": 6591 }, { "epoch": 0.28812448096507715, "grad_norm": 2.4375, "learning_rate": 8.091604736470184e-05, "loss": 2.0743, "step": 6592 }, { "epoch": 0.288168189169107, "grad_norm": 2.1875, "learning_rate": 8.091064880809061e-05, "loss": 1.6284, "step": 6593 }, { "epoch": 0.28821189737313696, "grad_norm": 2.296875, "learning_rate": 8.090524966815042e-05, "loss": 2.0639, "step": 6594 }, { "epoch": 0.28825560557716684, "grad_norm": 2.15625, "learning_rate": 8.089984994498324e-05, "loss": 1.5172, "step": 6595 }, { "epoch": 0.2882993137811967, "grad_norm": 2.890625, "learning_rate": 8.089444963869088e-05, "loss": 1.8543, "step": 6596 }, { "epoch": 0.2883430219852266, "grad_norm": 2.46875, "learning_rate": 8.088904874937528e-05, "loss": 2.9156, "step": 6597 }, { "epoch": 0.28838673018925653, "grad_norm": 2.359375, "learning_rate": 8.088364727713841e-05, "loss": 1.9629, "step": 6598 }, { "epoch": 0.2884304383932864, "grad_norm": 2.15625, "learning_rate": 8.087824522208215e-05, "loss": 2.4238, "step": 6599 }, { "epoch": 0.2884741465973163, "grad_norm": 2.296875, "learning_rate": 8.087284258430847e-05, "loss": 2.4762, "step": 6600 }, { "epoch": 0.28851785480134623, "grad_norm": 2.171875, "learning_rate": 8.08674393639193e-05, "loss": 1.8678, "step": 6601 }, { "epoch": 0.2885615630053761, "grad_norm": 2.96875, "learning_rate": 8.086203556101664e-05, "loss": 3.2367, "step": 6602 }, { "epoch": 0.288605271209406, "grad_norm": 2.59375, "learning_rate": 8.085663117570243e-05, "loss": 2.9074, "step": 6603 }, { "epoch": 0.2886489794134359, "grad_norm": 3.1875, "learning_rate": 8.08512262080787e-05, "loss": 2.416, "step": 6604 }, { "epoch": 0.2886926876174658, "grad_norm": 2.296875, "learning_rate": 8.084582065824743e-05, "loss": 1.755, "step": 6605 }, { "epoch": 0.2887363958214957, "grad_norm": 1.8203125, "learning_rate": 8.084041452631062e-05, "loss": 1.7571, "step": 6606 }, { "epoch": 0.28878010402552556, "grad_norm": 2.390625, "learning_rate": 8.08350078123703e-05, "loss": 2.1158, "step": 6607 }, { "epoch": 0.2888238122295555, "grad_norm": 1.984375, "learning_rate": 8.082960051652852e-05, "loss": 1.8783, "step": 6608 }, { "epoch": 0.2888675204335854, "grad_norm": 2.515625, "learning_rate": 8.08241926388873e-05, "loss": 2.3248, "step": 6609 }, { "epoch": 0.28891122863761526, "grad_norm": 2.40625, "learning_rate": 8.08187841795487e-05, "loss": 1.7596, "step": 6610 }, { "epoch": 0.2889549368416452, "grad_norm": 1.9140625, "learning_rate": 8.081337513861478e-05, "loss": 1.851, "step": 6611 }, { "epoch": 0.2889986450456751, "grad_norm": 2.09375, "learning_rate": 8.080796551618763e-05, "loss": 1.7286, "step": 6612 }, { "epoch": 0.28904235324970495, "grad_norm": 2.265625, "learning_rate": 8.080255531236935e-05, "loss": 1.975, "step": 6613 }, { "epoch": 0.2890860614537349, "grad_norm": 2.140625, "learning_rate": 8.079714452726199e-05, "loss": 1.9174, "step": 6614 }, { "epoch": 0.28912976965776477, "grad_norm": 2.953125, "learning_rate": 8.079173316096772e-05, "loss": 2.6944, "step": 6615 }, { "epoch": 0.28917347786179465, "grad_norm": 2.0625, "learning_rate": 8.07863212135886e-05, "loss": 2.5183, "step": 6616 }, { "epoch": 0.2892171860658246, "grad_norm": 2.5, "learning_rate": 8.07809086852268e-05, "loss": 2.2211, "step": 6617 }, { "epoch": 0.28926089426985446, "grad_norm": 2.28125, "learning_rate": 8.077549557598448e-05, "loss": 1.8159, "step": 6618 }, { "epoch": 0.28930460247388434, "grad_norm": 2.25, "learning_rate": 8.077008188596375e-05, "loss": 1.7229, "step": 6619 }, { "epoch": 0.2893483106779142, "grad_norm": 2.34375, "learning_rate": 8.076466761526678e-05, "loss": 1.8865, "step": 6620 }, { "epoch": 0.28939201888194416, "grad_norm": 1.890625, "learning_rate": 8.075925276399576e-05, "loss": 1.7476, "step": 6621 }, { "epoch": 0.28943572708597404, "grad_norm": 2.015625, "learning_rate": 8.075383733225288e-05, "loss": 1.7712, "step": 6622 }, { "epoch": 0.2894794352900039, "grad_norm": 2.4375, "learning_rate": 8.074842132014034e-05, "loss": 1.715, "step": 6623 }, { "epoch": 0.28952314349403385, "grad_norm": 1.9453125, "learning_rate": 8.074300472776031e-05, "loss": 1.8801, "step": 6624 }, { "epoch": 0.28956685169806373, "grad_norm": 2.703125, "learning_rate": 8.073758755521505e-05, "loss": 1.5784, "step": 6625 }, { "epoch": 0.2896105599020936, "grad_norm": 2.765625, "learning_rate": 8.073216980260678e-05, "loss": 1.6752, "step": 6626 }, { "epoch": 0.28965426810612355, "grad_norm": 3.84375, "learning_rate": 8.072675147003773e-05, "loss": 1.2782, "step": 6627 }, { "epoch": 0.2896979763101534, "grad_norm": 6.90625, "learning_rate": 8.072133255761017e-05, "loss": 2.8148, "step": 6628 }, { "epoch": 0.2897416845141833, "grad_norm": 1.9765625, "learning_rate": 8.071591306542634e-05, "loss": 1.8114, "step": 6629 }, { "epoch": 0.2897853927182132, "grad_norm": 1.9453125, "learning_rate": 8.071049299358853e-05, "loss": 1.2275, "step": 6630 }, { "epoch": 0.2898291009222431, "grad_norm": 2.34375, "learning_rate": 8.070507234219901e-05, "loss": 2.4481, "step": 6631 }, { "epoch": 0.289872809126273, "grad_norm": 2.71875, "learning_rate": 8.06996511113601e-05, "loss": 2.0297, "step": 6632 }, { "epoch": 0.2899165173303029, "grad_norm": 2.296875, "learning_rate": 8.06942293011741e-05, "loss": 1.6154, "step": 6633 }, { "epoch": 0.2899602255343328, "grad_norm": 2.375, "learning_rate": 8.06888069117433e-05, "loss": 2.2716, "step": 6634 }, { "epoch": 0.2900039337383627, "grad_norm": 2.03125, "learning_rate": 8.068338394317005e-05, "loss": 2.1387, "step": 6635 }, { "epoch": 0.2900476419423926, "grad_norm": 1.78125, "learning_rate": 8.06779603955567e-05, "loss": 1.6537, "step": 6636 }, { "epoch": 0.2900913501464225, "grad_norm": 2.21875, "learning_rate": 8.067253626900558e-05, "loss": 2.2958, "step": 6637 }, { "epoch": 0.2901350583504524, "grad_norm": 2.734375, "learning_rate": 8.066711156361905e-05, "loss": 2.7007, "step": 6638 }, { "epoch": 0.29017876655448227, "grad_norm": 3.03125, "learning_rate": 8.066168627949952e-05, "loss": 2.0117, "step": 6639 }, { "epoch": 0.29022247475851215, "grad_norm": 2.140625, "learning_rate": 8.065626041674932e-05, "loss": 2.6418, "step": 6640 }, { "epoch": 0.2902661829625421, "grad_norm": 1.984375, "learning_rate": 8.065083397547086e-05, "loss": 1.6984, "step": 6641 }, { "epoch": 0.29030989116657197, "grad_norm": 2.0625, "learning_rate": 8.064540695576657e-05, "loss": 2.5482, "step": 6642 }, { "epoch": 0.29035359937060184, "grad_norm": 2.578125, "learning_rate": 8.063997935773885e-05, "loss": 1.8375, "step": 6643 }, { "epoch": 0.2903973075746318, "grad_norm": 2.15625, "learning_rate": 8.063455118149013e-05, "loss": 2.1303, "step": 6644 }, { "epoch": 0.29044101577866166, "grad_norm": 1.7890625, "learning_rate": 8.062912242712282e-05, "loss": 1.3989, "step": 6645 }, { "epoch": 0.29048472398269154, "grad_norm": 1.90625, "learning_rate": 8.06236930947394e-05, "loss": 1.541, "step": 6646 }, { "epoch": 0.2905284321867215, "grad_norm": 2.0625, "learning_rate": 8.061826318444232e-05, "loss": 1.709, "step": 6647 }, { "epoch": 0.29057214039075135, "grad_norm": 2.5625, "learning_rate": 8.061283269633407e-05, "loss": 1.6461, "step": 6648 }, { "epoch": 0.29061584859478123, "grad_norm": 1.9921875, "learning_rate": 8.06074016305171e-05, "loss": 1.5956, "step": 6649 }, { "epoch": 0.2906595567988111, "grad_norm": 2.078125, "learning_rate": 8.060196998709391e-05, "loss": 1.9987, "step": 6650 }, { "epoch": 0.29070326500284105, "grad_norm": 2.265625, "learning_rate": 8.0596537766167e-05, "loss": 2.2364, "step": 6651 }, { "epoch": 0.29074697320687093, "grad_norm": 6.1875, "learning_rate": 8.05911049678389e-05, "loss": 1.6748, "step": 6652 }, { "epoch": 0.2907906814109008, "grad_norm": 14.9375, "learning_rate": 8.058567159221213e-05, "loss": 3.4629, "step": 6653 }, { "epoch": 0.29083438961493074, "grad_norm": 2.1875, "learning_rate": 8.058023763938922e-05, "loss": 1.752, "step": 6654 }, { "epoch": 0.2908780978189606, "grad_norm": 2.609375, "learning_rate": 8.057480310947271e-05, "loss": 2.5652, "step": 6655 }, { "epoch": 0.2909218060229905, "grad_norm": 2.78125, "learning_rate": 8.056936800256517e-05, "loss": 2.0146, "step": 6656 }, { "epoch": 0.29096551422702044, "grad_norm": 2.328125, "learning_rate": 8.056393231876918e-05, "loss": 1.7524, "step": 6657 }, { "epoch": 0.2910092224310503, "grad_norm": 2.6875, "learning_rate": 8.055849605818728e-05, "loss": 1.9087, "step": 6658 }, { "epoch": 0.2910529306350802, "grad_norm": 2.5625, "learning_rate": 8.055305922092208e-05, "loss": 2.1884, "step": 6659 }, { "epoch": 0.2910966388391101, "grad_norm": 1.8515625, "learning_rate": 8.05476218070762e-05, "loss": 1.6765, "step": 6660 }, { "epoch": 0.29114034704314, "grad_norm": 2.390625, "learning_rate": 8.054218381675225e-05, "loss": 1.5884, "step": 6661 }, { "epoch": 0.2911840552471699, "grad_norm": 2.125, "learning_rate": 8.053674525005282e-05, "loss": 1.8013, "step": 6662 }, { "epoch": 0.2912277634511998, "grad_norm": 2.28125, "learning_rate": 8.053130610708057e-05, "loss": 1.8213, "step": 6663 }, { "epoch": 0.2912714716552297, "grad_norm": 2.25, "learning_rate": 8.052586638793814e-05, "loss": 2.4342, "step": 6664 }, { "epoch": 0.2913151798592596, "grad_norm": 1.9375, "learning_rate": 8.052042609272817e-05, "loss": 1.9834, "step": 6665 }, { "epoch": 0.29135888806328947, "grad_norm": 2.78125, "learning_rate": 8.051498522155334e-05, "loss": 1.7118, "step": 6666 }, { "epoch": 0.2914025962673194, "grad_norm": 1.96875, "learning_rate": 8.050954377451634e-05, "loss": 1.9769, "step": 6667 }, { "epoch": 0.2914463044713493, "grad_norm": 2.046875, "learning_rate": 8.050410175171983e-05, "loss": 1.5139, "step": 6668 }, { "epoch": 0.29149001267537916, "grad_norm": 2.703125, "learning_rate": 8.049865915326653e-05, "loss": 2.0808, "step": 6669 }, { "epoch": 0.29153372087940904, "grad_norm": 3.015625, "learning_rate": 8.049321597925914e-05, "loss": 1.6965, "step": 6670 }, { "epoch": 0.291577429083439, "grad_norm": 2.015625, "learning_rate": 8.04877722298004e-05, "loss": 2.0674, "step": 6671 }, { "epoch": 0.29162113728746886, "grad_norm": 1.96875, "learning_rate": 8.0482327904993e-05, "loss": 1.6745, "step": 6672 }, { "epoch": 0.29166484549149874, "grad_norm": 2.234375, "learning_rate": 8.047688300493972e-05, "loss": 1.9184, "step": 6673 }, { "epoch": 0.29170855369552867, "grad_norm": 2.796875, "learning_rate": 8.047143752974331e-05, "loss": 1.4173, "step": 6674 }, { "epoch": 0.29175226189955855, "grad_norm": 3.96875, "learning_rate": 8.046599147950651e-05, "loss": 2.3369, "step": 6675 }, { "epoch": 0.29179597010358843, "grad_norm": 1.890625, "learning_rate": 8.046054485433211e-05, "loss": 1.6808, "step": 6676 }, { "epoch": 0.29183967830761837, "grad_norm": 3.25, "learning_rate": 8.04550976543229e-05, "loss": 1.5491, "step": 6677 }, { "epoch": 0.29188338651164825, "grad_norm": 2.53125, "learning_rate": 8.044964987958168e-05, "loss": 1.711, "step": 6678 }, { "epoch": 0.2919270947156781, "grad_norm": 2.390625, "learning_rate": 8.044420153021124e-05, "loss": 2.976, "step": 6679 }, { "epoch": 0.291970802919708, "grad_norm": 2.03125, "learning_rate": 8.043875260631442e-05, "loss": 1.2881, "step": 6680 }, { "epoch": 0.29201451112373794, "grad_norm": 2.0, "learning_rate": 8.043330310799402e-05, "loss": 1.7619, "step": 6681 }, { "epoch": 0.2920582193277678, "grad_norm": 2.984375, "learning_rate": 8.042785303535289e-05, "loss": 2.2613, "step": 6682 }, { "epoch": 0.2921019275317977, "grad_norm": 1.9921875, "learning_rate": 8.04224023884939e-05, "loss": 1.8949, "step": 6683 }, { "epoch": 0.29214563573582764, "grad_norm": 1.8125, "learning_rate": 8.041695116751991e-05, "loss": 1.5188, "step": 6684 }, { "epoch": 0.2921893439398575, "grad_norm": 2.40625, "learning_rate": 8.041149937253378e-05, "loss": 1.3007, "step": 6685 }, { "epoch": 0.2922330521438874, "grad_norm": 2.484375, "learning_rate": 8.040604700363838e-05, "loss": 2.9892, "step": 6686 }, { "epoch": 0.29227676034791733, "grad_norm": 2.15625, "learning_rate": 8.040059406093662e-05, "loss": 2.1024, "step": 6687 }, { "epoch": 0.2923204685519472, "grad_norm": 1.9296875, "learning_rate": 8.039514054453141e-05, "loss": 1.5767, "step": 6688 }, { "epoch": 0.2923641767559771, "grad_norm": 2.03125, "learning_rate": 8.038968645452567e-05, "loss": 1.3219, "step": 6689 }, { "epoch": 0.29240788496000697, "grad_norm": 2.078125, "learning_rate": 8.03842317910223e-05, "loss": 1.9124, "step": 6690 }, { "epoch": 0.2924515931640369, "grad_norm": 2.21875, "learning_rate": 8.037877655412426e-05, "loss": 1.6807, "step": 6691 }, { "epoch": 0.2924953013680668, "grad_norm": 2.3125, "learning_rate": 8.037332074393449e-05, "loss": 2.4909, "step": 6692 }, { "epoch": 0.29253900957209666, "grad_norm": 2.359375, "learning_rate": 8.036786436055595e-05, "loss": 1.5007, "step": 6693 }, { "epoch": 0.2925827177761266, "grad_norm": 2.671875, "learning_rate": 8.036240740409162e-05, "loss": 2.127, "step": 6694 }, { "epoch": 0.2926264259801565, "grad_norm": 2.21875, "learning_rate": 8.035694987464446e-05, "loss": 2.2236, "step": 6695 }, { "epoch": 0.29267013418418636, "grad_norm": 2.21875, "learning_rate": 8.035149177231749e-05, "loss": 1.577, "step": 6696 }, { "epoch": 0.2927138423882163, "grad_norm": 1.8515625, "learning_rate": 8.034603309721368e-05, "loss": 1.6475, "step": 6697 }, { "epoch": 0.2927575505922462, "grad_norm": 2.671875, "learning_rate": 8.034057384943606e-05, "loss": 1.9149, "step": 6698 }, { "epoch": 0.29280125879627605, "grad_norm": 2.078125, "learning_rate": 8.033511402908767e-05, "loss": 1.9467, "step": 6699 }, { "epoch": 0.29284496700030593, "grad_norm": 2.84375, "learning_rate": 8.03296536362715e-05, "loss": 3.2761, "step": 6700 }, { "epoch": 0.29288867520433587, "grad_norm": 2.109375, "learning_rate": 8.032419267109066e-05, "loss": 1.9798, "step": 6701 }, { "epoch": 0.29293238340836575, "grad_norm": 2.578125, "learning_rate": 8.031873113364814e-05, "loss": 2.787, "step": 6702 }, { "epoch": 0.29297609161239563, "grad_norm": 2.640625, "learning_rate": 8.031326902404703e-05, "loss": 2.9573, "step": 6703 }, { "epoch": 0.29301979981642556, "grad_norm": 1.9453125, "learning_rate": 8.030780634239043e-05, "loss": 1.6533, "step": 6704 }, { "epoch": 0.29306350802045544, "grad_norm": 2.25, "learning_rate": 8.030234308878142e-05, "loss": 2.3842, "step": 6705 }, { "epoch": 0.2931072162244853, "grad_norm": 17.625, "learning_rate": 8.02968792633231e-05, "loss": 2.1034, "step": 6706 }, { "epoch": 0.29315092442851526, "grad_norm": 2.4375, "learning_rate": 8.029141486611856e-05, "loss": 1.9293, "step": 6707 }, { "epoch": 0.29319463263254514, "grad_norm": 2.171875, "learning_rate": 8.028594989727092e-05, "loss": 1.9474, "step": 6708 }, { "epoch": 0.293238340836575, "grad_norm": 2.3125, "learning_rate": 8.028048435688333e-05, "loss": 1.9978, "step": 6709 }, { "epoch": 0.2932820490406049, "grad_norm": 1.8828125, "learning_rate": 8.027501824505895e-05, "loss": 1.4904, "step": 6710 }, { "epoch": 0.29332575724463483, "grad_norm": 2.140625, "learning_rate": 8.02695515619009e-05, "loss": 2.2193, "step": 6711 }, { "epoch": 0.2933694654486647, "grad_norm": 2.203125, "learning_rate": 8.026408430751235e-05, "loss": 2.0577, "step": 6712 }, { "epoch": 0.2934131736526946, "grad_norm": 1.9296875, "learning_rate": 8.025861648199649e-05, "loss": 1.9362, "step": 6713 }, { "epoch": 0.2934568818567245, "grad_norm": 2.828125, "learning_rate": 8.02531480854565e-05, "loss": 1.6689, "step": 6714 }, { "epoch": 0.2935005900607544, "grad_norm": 2.5625, "learning_rate": 8.024767911799558e-05, "loss": 2.1455, "step": 6715 }, { "epoch": 0.2935442982647843, "grad_norm": 1.9921875, "learning_rate": 8.024220957971693e-05, "loss": 2.7358, "step": 6716 }, { "epoch": 0.2935880064688142, "grad_norm": 2.09375, "learning_rate": 8.023673947072376e-05, "loss": 2.3428, "step": 6717 }, { "epoch": 0.2936317146728441, "grad_norm": 1.921875, "learning_rate": 8.023126879111931e-05, "loss": 2.0114, "step": 6718 }, { "epoch": 0.293675422876874, "grad_norm": 1.828125, "learning_rate": 8.022579754100681e-05, "loss": 1.714, "step": 6719 }, { "epoch": 0.29371913108090386, "grad_norm": 1.9609375, "learning_rate": 8.022032572048954e-05, "loss": 1.6105, "step": 6720 }, { "epoch": 0.2937628392849338, "grad_norm": 3.328125, "learning_rate": 8.021485332967072e-05, "loss": 2.8173, "step": 6721 }, { "epoch": 0.2938065474889637, "grad_norm": 2.328125, "learning_rate": 8.020938036865365e-05, "loss": 2.0372, "step": 6722 }, { "epoch": 0.29385025569299356, "grad_norm": 2.640625, "learning_rate": 8.020390683754161e-05, "loss": 1.6673, "step": 6723 }, { "epoch": 0.2938939638970235, "grad_norm": 2.328125, "learning_rate": 8.019843273643788e-05, "loss": 1.8072, "step": 6724 }, { "epoch": 0.29393767210105337, "grad_norm": 1.984375, "learning_rate": 8.019295806544578e-05, "loss": 1.9004, "step": 6725 }, { "epoch": 0.29398138030508325, "grad_norm": 2.015625, "learning_rate": 8.018748282466862e-05, "loss": 1.6449, "step": 6726 }, { "epoch": 0.2940250885091132, "grad_norm": 2.265625, "learning_rate": 8.018200701420971e-05, "loss": 2.4514, "step": 6727 }, { "epoch": 0.29406879671314307, "grad_norm": 4.28125, "learning_rate": 8.017653063417241e-05, "loss": 1.6911, "step": 6728 }, { "epoch": 0.29411250491717295, "grad_norm": 2.234375, "learning_rate": 8.017105368466006e-05, "loss": 1.3129, "step": 6729 }, { "epoch": 0.2941562131212028, "grad_norm": 2.5, "learning_rate": 8.016557616577601e-05, "loss": 2.1801, "step": 6730 }, { "epoch": 0.29419992132523276, "grad_norm": 2.078125, "learning_rate": 8.016009807762364e-05, "loss": 1.9602, "step": 6731 }, { "epoch": 0.29424362952926264, "grad_norm": 2.796875, "learning_rate": 8.015461942030631e-05, "loss": 2.5422, "step": 6732 }, { "epoch": 0.2942873377332925, "grad_norm": 2.421875, "learning_rate": 8.014914019392743e-05, "loss": 1.9976, "step": 6733 }, { "epoch": 0.29433104593732246, "grad_norm": 2.078125, "learning_rate": 8.01436603985904e-05, "loss": 1.7281, "step": 6734 }, { "epoch": 0.29437475414135233, "grad_norm": 2.265625, "learning_rate": 8.013818003439861e-05, "loss": 1.6319, "step": 6735 }, { "epoch": 0.2944184623453822, "grad_norm": 2.359375, "learning_rate": 8.013269910145552e-05, "loss": 1.9, "step": 6736 }, { "epoch": 0.29446217054941215, "grad_norm": 3.109375, "learning_rate": 8.012721759986452e-05, "loss": 2.5719, "step": 6737 }, { "epoch": 0.29450587875344203, "grad_norm": 2.734375, "learning_rate": 8.01217355297291e-05, "loss": 2.2032, "step": 6738 }, { "epoch": 0.2945495869574719, "grad_norm": 4.0, "learning_rate": 8.011625289115267e-05, "loss": 2.4784, "step": 6739 }, { "epoch": 0.2945932951615018, "grad_norm": 1.875, "learning_rate": 8.011076968423872e-05, "loss": 1.816, "step": 6740 }, { "epoch": 0.2946370033655317, "grad_norm": 2.15625, "learning_rate": 8.010528590909073e-05, "loss": 2.3117, "step": 6741 }, { "epoch": 0.2946807115695616, "grad_norm": 2.8125, "learning_rate": 8.009980156581217e-05, "loss": 1.533, "step": 6742 }, { "epoch": 0.2947244197735915, "grad_norm": 2.359375, "learning_rate": 8.009431665450655e-05, "loss": 2.4178, "step": 6743 }, { "epoch": 0.2947681279776214, "grad_norm": 2.140625, "learning_rate": 8.008883117527738e-05, "loss": 1.5071, "step": 6744 }, { "epoch": 0.2948118361816513, "grad_norm": 2.03125, "learning_rate": 8.008334512822817e-05, "loss": 1.9468, "step": 6745 }, { "epoch": 0.2948555443856812, "grad_norm": 2.3125, "learning_rate": 8.007785851346245e-05, "loss": 1.5961, "step": 6746 }, { "epoch": 0.2948992525897111, "grad_norm": 3.5625, "learning_rate": 8.007237133108376e-05, "loss": 2.2648, "step": 6747 }, { "epoch": 0.294942960793741, "grad_norm": 2.1875, "learning_rate": 8.006688358119568e-05, "loss": 1.8828, "step": 6748 }, { "epoch": 0.2949866689977709, "grad_norm": 2.046875, "learning_rate": 8.006139526390172e-05, "loss": 2.2068, "step": 6749 }, { "epoch": 0.29503037720180075, "grad_norm": 2.203125, "learning_rate": 8.005590637930548e-05, "loss": 2.3664, "step": 6750 }, { "epoch": 0.2950740854058307, "grad_norm": 2.296875, "learning_rate": 8.005041692751055e-05, "loss": 1.5957, "step": 6751 }, { "epoch": 0.29511779360986057, "grad_norm": 2.84375, "learning_rate": 8.00449269086205e-05, "loss": 1.9969, "step": 6752 }, { "epoch": 0.29516150181389045, "grad_norm": 3.109375, "learning_rate": 8.003943632273898e-05, "loss": 2.201, "step": 6753 }, { "epoch": 0.2952052100179204, "grad_norm": 2.171875, "learning_rate": 8.003394516996956e-05, "loss": 2.0344, "step": 6754 }, { "epoch": 0.29524891822195026, "grad_norm": 2.46875, "learning_rate": 8.002845345041589e-05, "loss": 2.9183, "step": 6755 }, { "epoch": 0.29529262642598014, "grad_norm": 2.0625, "learning_rate": 8.00229611641816e-05, "loss": 1.9433, "step": 6756 }, { "epoch": 0.2953363346300101, "grad_norm": 2.421875, "learning_rate": 8.001746831137032e-05, "loss": 1.548, "step": 6757 }, { "epoch": 0.29538004283403996, "grad_norm": 2.125, "learning_rate": 8.001197489208572e-05, "loss": 2.0139, "step": 6758 }, { "epoch": 0.29542375103806984, "grad_norm": 2.28125, "learning_rate": 8.00064809064315e-05, "loss": 1.7309, "step": 6759 }, { "epoch": 0.2954674592420997, "grad_norm": 2.5, "learning_rate": 8.00009863545113e-05, "loss": 1.7731, "step": 6760 }, { "epoch": 0.29551116744612965, "grad_norm": 1.8828125, "learning_rate": 7.999549123642882e-05, "loss": 1.9047, "step": 6761 }, { "epoch": 0.29555487565015953, "grad_norm": 2.53125, "learning_rate": 7.998999555228777e-05, "loss": 1.9589, "step": 6762 }, { "epoch": 0.2955985838541894, "grad_norm": 2.96875, "learning_rate": 7.998449930219185e-05, "loss": 3.0505, "step": 6763 }, { "epoch": 0.29564229205821935, "grad_norm": 2.21875, "learning_rate": 7.997900248624479e-05, "loss": 1.55, "step": 6764 }, { "epoch": 0.2956860002622492, "grad_norm": 2.1875, "learning_rate": 7.997350510455032e-05, "loss": 2.1873, "step": 6765 }, { "epoch": 0.2957297084662791, "grad_norm": 2.140625, "learning_rate": 7.99680071572122e-05, "loss": 1.4288, "step": 6766 }, { "epoch": 0.29577341667030904, "grad_norm": 2.15625, "learning_rate": 7.996250864433415e-05, "loss": 1.6355, "step": 6767 }, { "epoch": 0.2958171248743389, "grad_norm": 2.265625, "learning_rate": 7.995700956601995e-05, "loss": 1.9442, "step": 6768 }, { "epoch": 0.2958608330783688, "grad_norm": 2.46875, "learning_rate": 7.995150992237339e-05, "loss": 1.9044, "step": 6769 }, { "epoch": 0.2959045412823987, "grad_norm": 2.1875, "learning_rate": 7.994600971349825e-05, "loss": 2.1128, "step": 6770 }, { "epoch": 0.2959482494864286, "grad_norm": 2.1875, "learning_rate": 7.994050893949832e-05, "loss": 2.3428, "step": 6771 }, { "epoch": 0.2959919576904585, "grad_norm": 2.109375, "learning_rate": 7.993500760047739e-05, "loss": 2.0457, "step": 6772 }, { "epoch": 0.2960356658944884, "grad_norm": 2.796875, "learning_rate": 7.992950569653932e-05, "loss": 1.9142, "step": 6773 }, { "epoch": 0.2960793740985183, "grad_norm": 1.921875, "learning_rate": 7.992400322778791e-05, "loss": 1.6232, "step": 6774 }, { "epoch": 0.2961230823025482, "grad_norm": 2.015625, "learning_rate": 7.9918500194327e-05, "loss": 2.474, "step": 6775 }, { "epoch": 0.29616679050657807, "grad_norm": 2.6875, "learning_rate": 7.991299659626046e-05, "loss": 1.8029, "step": 6776 }, { "epoch": 0.296210498710608, "grad_norm": 2.15625, "learning_rate": 7.990749243369214e-05, "loss": 2.3675, "step": 6777 }, { "epoch": 0.2962542069146379, "grad_norm": 2.203125, "learning_rate": 7.99019877067259e-05, "loss": 1.9141, "step": 6778 }, { "epoch": 0.29629791511866777, "grad_norm": 2.171875, "learning_rate": 7.989648241546563e-05, "loss": 1.9746, "step": 6779 }, { "epoch": 0.29634162332269764, "grad_norm": 2.421875, "learning_rate": 7.989097656001524e-05, "loss": 2.2656, "step": 6780 }, { "epoch": 0.2963853315267276, "grad_norm": 1.875, "learning_rate": 7.98854701404786e-05, "loss": 1.8483, "step": 6781 }, { "epoch": 0.29642903973075746, "grad_norm": 2.203125, "learning_rate": 7.987996315695965e-05, "loss": 2.1934, "step": 6782 }, { "epoch": 0.29647274793478734, "grad_norm": 2.09375, "learning_rate": 7.98744556095623e-05, "loss": 1.8816, "step": 6783 }, { "epoch": 0.2965164561388173, "grad_norm": 2.546875, "learning_rate": 7.986894749839049e-05, "loss": 1.7686, "step": 6784 }, { "epoch": 0.29656016434284715, "grad_norm": 2.984375, "learning_rate": 7.986343882354818e-05, "loss": 2.356, "step": 6785 }, { "epoch": 0.29660387254687703, "grad_norm": 3.21875, "learning_rate": 7.985792958513931e-05, "loss": 1.9254, "step": 6786 }, { "epoch": 0.29664758075090697, "grad_norm": 2.921875, "learning_rate": 7.985241978326786e-05, "loss": 2.4049, "step": 6787 }, { "epoch": 0.29669128895493685, "grad_norm": 2.84375, "learning_rate": 7.984690941803779e-05, "loss": 1.249, "step": 6788 }, { "epoch": 0.29673499715896673, "grad_norm": 1.9453125, "learning_rate": 7.984139848955309e-05, "loss": 2.1914, "step": 6789 }, { "epoch": 0.2967787053629966, "grad_norm": 1.9296875, "learning_rate": 7.98358869979178e-05, "loss": 1.8875, "step": 6790 }, { "epoch": 0.29682241356702654, "grad_norm": 1.8046875, "learning_rate": 7.983037494323588e-05, "loss": 1.6059, "step": 6791 }, { "epoch": 0.2968661217710564, "grad_norm": 1.953125, "learning_rate": 7.982486232561138e-05, "loss": 1.8311, "step": 6792 }, { "epoch": 0.2969098299750863, "grad_norm": 2.140625, "learning_rate": 7.981934914514829e-05, "loss": 2.5336, "step": 6793 }, { "epoch": 0.29695353817911624, "grad_norm": 2.890625, "learning_rate": 7.98138354019507e-05, "loss": 1.7158, "step": 6794 }, { "epoch": 0.2969972463831461, "grad_norm": 1.9375, "learning_rate": 7.980832109612265e-05, "loss": 1.6328, "step": 6795 }, { "epoch": 0.297040954587176, "grad_norm": 2.359375, "learning_rate": 7.980280622776819e-05, "loss": 1.5029, "step": 6796 }, { "epoch": 0.29708466279120593, "grad_norm": 3.09375, "learning_rate": 7.979729079699141e-05, "loss": 1.9137, "step": 6797 }, { "epoch": 0.2971283709952358, "grad_norm": 2.28125, "learning_rate": 7.979177480389637e-05, "loss": 1.9737, "step": 6798 }, { "epoch": 0.2971720791992657, "grad_norm": 2.96875, "learning_rate": 7.978625824858719e-05, "loss": 2.503, "step": 6799 }, { "epoch": 0.2972157874032956, "grad_norm": 2.515625, "learning_rate": 7.978074113116796e-05, "loss": 1.8997, "step": 6800 }, { "epoch": 0.2972594956073255, "grad_norm": 2.828125, "learning_rate": 7.977522345174281e-05, "loss": 1.8808, "step": 6801 }, { "epoch": 0.2973032038113554, "grad_norm": 2.515625, "learning_rate": 7.976970521041585e-05, "loss": 2.4508, "step": 6802 }, { "epoch": 0.29734691201538527, "grad_norm": 2.484375, "learning_rate": 7.976418640729122e-05, "loss": 1.4971, "step": 6803 }, { "epoch": 0.2973906202194152, "grad_norm": 2.0625, "learning_rate": 7.975866704247307e-05, "loss": 1.9584, "step": 6804 }, { "epoch": 0.2974343284234451, "grad_norm": 2.25, "learning_rate": 7.975314711606558e-05, "loss": 2.1464, "step": 6805 }, { "epoch": 0.29747803662747496, "grad_norm": 1.9453125, "learning_rate": 7.974762662817289e-05, "loss": 1.552, "step": 6806 }, { "epoch": 0.2975217448315049, "grad_norm": 2.984375, "learning_rate": 7.974210557889919e-05, "loss": 2.6815, "step": 6807 }, { "epoch": 0.2975654530355348, "grad_norm": 2.34375, "learning_rate": 7.973658396834866e-05, "loss": 2.0794, "step": 6808 }, { "epoch": 0.29760916123956466, "grad_norm": 2.296875, "learning_rate": 7.973106179662553e-05, "loss": 2.1026, "step": 6809 }, { "epoch": 0.29765286944359454, "grad_norm": 1.9453125, "learning_rate": 7.972553906383398e-05, "loss": 1.8778, "step": 6810 }, { "epoch": 0.29769657764762447, "grad_norm": 2.375, "learning_rate": 7.972001577007825e-05, "loss": 2.0974, "step": 6811 }, { "epoch": 0.29774028585165435, "grad_norm": 2.46875, "learning_rate": 7.971449191546256e-05, "loss": 2.1475, "step": 6812 }, { "epoch": 0.29778399405568423, "grad_norm": 1.96875, "learning_rate": 7.970896750009115e-05, "loss": 1.5038, "step": 6813 }, { "epoch": 0.29782770225971417, "grad_norm": 1.953125, "learning_rate": 7.970344252406831e-05, "loss": 2.0435, "step": 6814 }, { "epoch": 0.29787141046374405, "grad_norm": 2.953125, "learning_rate": 7.969791698749827e-05, "loss": 1.9473, "step": 6815 }, { "epoch": 0.2979151186677739, "grad_norm": 2.40625, "learning_rate": 7.969239089048531e-05, "loss": 2.4137, "step": 6816 }, { "epoch": 0.29795882687180386, "grad_norm": 2.125, "learning_rate": 7.968686423313372e-05, "loss": 2.2156, "step": 6817 }, { "epoch": 0.29800253507583374, "grad_norm": 2.125, "learning_rate": 7.968133701554779e-05, "loss": 1.4572, "step": 6818 }, { "epoch": 0.2980462432798636, "grad_norm": 2.4375, "learning_rate": 7.967580923783184e-05, "loss": 2.623, "step": 6819 }, { "epoch": 0.2980899514838935, "grad_norm": 1.890625, "learning_rate": 7.967028090009016e-05, "loss": 1.7034, "step": 6820 }, { "epoch": 0.29813365968792344, "grad_norm": 2.21875, "learning_rate": 7.966475200242713e-05, "loss": 1.4808, "step": 6821 }, { "epoch": 0.2981773678919533, "grad_norm": 4.71875, "learning_rate": 7.965922254494702e-05, "loss": 1.6554, "step": 6822 }, { "epoch": 0.2982210760959832, "grad_norm": 2.265625, "learning_rate": 7.965369252775424e-05, "loss": 1.5935, "step": 6823 }, { "epoch": 0.29826478430001313, "grad_norm": 1.8671875, "learning_rate": 7.96481619509531e-05, "loss": 1.3847, "step": 6824 }, { "epoch": 0.298308492504043, "grad_norm": 2.109375, "learning_rate": 7.9642630814648e-05, "loss": 2.2384, "step": 6825 }, { "epoch": 0.2983522007080729, "grad_norm": 2.09375, "learning_rate": 7.963709911894333e-05, "loss": 1.6818, "step": 6826 }, { "epoch": 0.2983959089121028, "grad_norm": 1.8984375, "learning_rate": 7.963156686394345e-05, "loss": 1.734, "step": 6827 }, { "epoch": 0.2984396171161327, "grad_norm": 2.375, "learning_rate": 7.962603404975278e-05, "loss": 2.9876, "step": 6828 }, { "epoch": 0.2984833253201626, "grad_norm": 2.28125, "learning_rate": 7.962050067647573e-05, "loss": 2.0571, "step": 6829 }, { "epoch": 0.29852703352419246, "grad_norm": 2.171875, "learning_rate": 7.961496674421672e-05, "loss": 1.4679, "step": 6830 }, { "epoch": 0.2985707417282224, "grad_norm": 2.046875, "learning_rate": 7.960943225308019e-05, "loss": 1.9087, "step": 6831 }, { "epoch": 0.2986144499322523, "grad_norm": 2.109375, "learning_rate": 7.960389720317057e-05, "loss": 1.6903, "step": 6832 }, { "epoch": 0.29865815813628216, "grad_norm": 1.96875, "learning_rate": 7.959836159459231e-05, "loss": 1.7672, "step": 6833 }, { "epoch": 0.2987018663403121, "grad_norm": 1.8984375, "learning_rate": 7.959282542744992e-05, "loss": 1.6609, "step": 6834 }, { "epoch": 0.298745574544342, "grad_norm": 1.9296875, "learning_rate": 7.958728870184782e-05, "loss": 1.8973, "step": 6835 }, { "epoch": 0.29878928274837185, "grad_norm": 2.359375, "learning_rate": 7.958175141789054e-05, "loss": 1.6439, "step": 6836 }, { "epoch": 0.2988329909524018, "grad_norm": 2.0625, "learning_rate": 7.957621357568254e-05, "loss": 1.9569, "step": 6837 }, { "epoch": 0.29887669915643167, "grad_norm": 2.96875, "learning_rate": 7.957067517532835e-05, "loss": 2.3333, "step": 6838 }, { "epoch": 0.29892040736046155, "grad_norm": 2.53125, "learning_rate": 7.956513621693248e-05, "loss": 1.7887, "step": 6839 }, { "epoch": 0.29896411556449143, "grad_norm": 2.21875, "learning_rate": 7.955959670059947e-05, "loss": 1.6348, "step": 6840 }, { "epoch": 0.29900782376852136, "grad_norm": 2.390625, "learning_rate": 7.955405662643384e-05, "loss": 2.1351, "step": 6841 }, { "epoch": 0.29905153197255124, "grad_norm": 1.984375, "learning_rate": 7.954851599454014e-05, "loss": 2.4259, "step": 6842 }, { "epoch": 0.2990952401765811, "grad_norm": 1.921875, "learning_rate": 7.954297480502293e-05, "loss": 1.6529, "step": 6843 }, { "epoch": 0.29913894838061106, "grad_norm": 1.953125, "learning_rate": 7.953743305798682e-05, "loss": 1.8452, "step": 6844 }, { "epoch": 0.29918265658464094, "grad_norm": 1.9609375, "learning_rate": 7.953189075353633e-05, "loss": 1.8695, "step": 6845 }, { "epoch": 0.2992263647886708, "grad_norm": 2.3125, "learning_rate": 7.95263478917761e-05, "loss": 2.2973, "step": 6846 }, { "epoch": 0.29927007299270075, "grad_norm": 2.015625, "learning_rate": 7.95208044728107e-05, "loss": 1.5313, "step": 6847 }, { "epoch": 0.29931378119673063, "grad_norm": 2.140625, "learning_rate": 7.951526049674475e-05, "loss": 1.9208, "step": 6848 }, { "epoch": 0.2993574894007605, "grad_norm": 2.359375, "learning_rate": 7.950971596368289e-05, "loss": 1.8555, "step": 6849 }, { "epoch": 0.2994011976047904, "grad_norm": 2.75, "learning_rate": 7.950417087372972e-05, "loss": 1.3852, "step": 6850 }, { "epoch": 0.2994449058088203, "grad_norm": 2.765625, "learning_rate": 7.949862522698992e-05, "loss": 2.5552, "step": 6851 }, { "epoch": 0.2994886140128502, "grad_norm": 2.0, "learning_rate": 7.949307902356813e-05, "loss": 1.6658, "step": 6852 }, { "epoch": 0.2995323222168801, "grad_norm": 2.140625, "learning_rate": 7.9487532263569e-05, "loss": 2.0919, "step": 6853 }, { "epoch": 0.29957603042091, "grad_norm": 2.203125, "learning_rate": 7.948198494709724e-05, "loss": 1.8746, "step": 6854 }, { "epoch": 0.2996197386249399, "grad_norm": 2.125, "learning_rate": 7.947643707425749e-05, "loss": 1.9121, "step": 6855 }, { "epoch": 0.2996634468289698, "grad_norm": 2.875, "learning_rate": 7.94708886451545e-05, "loss": 2.0367, "step": 6856 }, { "epoch": 0.2997071550329997, "grad_norm": 2.390625, "learning_rate": 7.946533965989293e-05, "loss": 1.6858, "step": 6857 }, { "epoch": 0.2997508632370296, "grad_norm": 2.1875, "learning_rate": 7.945979011857751e-05, "loss": 2.0082, "step": 6858 }, { "epoch": 0.2997945714410595, "grad_norm": 1.984375, "learning_rate": 7.945424002131298e-05, "loss": 1.92, "step": 6859 }, { "epoch": 0.29983827964508936, "grad_norm": 1.8828125, "learning_rate": 7.944868936820408e-05, "loss": 1.7948, "step": 6860 }, { "epoch": 0.2998819878491193, "grad_norm": 1.9453125, "learning_rate": 7.944313815935556e-05, "loss": 1.986, "step": 6861 }, { "epoch": 0.29992569605314917, "grad_norm": 2.015625, "learning_rate": 7.943758639487216e-05, "loss": 1.5364, "step": 6862 }, { "epoch": 0.29996940425717905, "grad_norm": 2.0625, "learning_rate": 7.943203407485864e-05, "loss": 2.095, "step": 6863 }, { "epoch": 0.300013112461209, "grad_norm": 3.328125, "learning_rate": 7.942648119941982e-05, "loss": 1.8194, "step": 6864 }, { "epoch": 0.30005682066523887, "grad_norm": 2.03125, "learning_rate": 7.942092776866048e-05, "loss": 1.7115, "step": 6865 }, { "epoch": 0.30010052886926875, "grad_norm": 2.5, "learning_rate": 7.94153737826854e-05, "loss": 1.4567, "step": 6866 }, { "epoch": 0.3001442370732987, "grad_norm": 2.140625, "learning_rate": 7.94098192415994e-05, "loss": 2.0076, "step": 6867 }, { "epoch": 0.30018794527732856, "grad_norm": 1.96875, "learning_rate": 7.940426414550732e-05, "loss": 1.7571, "step": 6868 }, { "epoch": 0.30023165348135844, "grad_norm": 2.421875, "learning_rate": 7.939870849451398e-05, "loss": 1.8405, "step": 6869 }, { "epoch": 0.3002753616853883, "grad_norm": 3.3125, "learning_rate": 7.939315228872421e-05, "loss": 2.0807, "step": 6870 }, { "epoch": 0.30031906988941826, "grad_norm": 3.53125, "learning_rate": 7.938759552824288e-05, "loss": 2.4804, "step": 6871 }, { "epoch": 0.30036277809344814, "grad_norm": 2.265625, "learning_rate": 7.938203821317487e-05, "loss": 2.3102, "step": 6872 }, { "epoch": 0.300406486297478, "grad_norm": 1.8515625, "learning_rate": 7.937648034362502e-05, "loss": 1.4459, "step": 6873 }, { "epoch": 0.30045019450150795, "grad_norm": 1.796875, "learning_rate": 7.937092191969821e-05, "loss": 1.3184, "step": 6874 }, { "epoch": 0.30049390270553783, "grad_norm": 2.140625, "learning_rate": 7.936536294149939e-05, "loss": 1.9091, "step": 6875 }, { "epoch": 0.3005376109095677, "grad_norm": 1.8515625, "learning_rate": 7.935980340913342e-05, "loss": 1.5597, "step": 6876 }, { "epoch": 0.30058131911359764, "grad_norm": 1.8671875, "learning_rate": 7.935424332270522e-05, "loss": 1.9584, "step": 6877 }, { "epoch": 0.3006250273176275, "grad_norm": 2.171875, "learning_rate": 7.934868268231973e-05, "loss": 1.6071, "step": 6878 }, { "epoch": 0.3006687355216574, "grad_norm": 2.234375, "learning_rate": 7.93431214880819e-05, "loss": 2.0378, "step": 6879 }, { "epoch": 0.3007124437256873, "grad_norm": 2.515625, "learning_rate": 7.933755974009663e-05, "loss": 2.7808, "step": 6880 }, { "epoch": 0.3007561519297172, "grad_norm": 2.015625, "learning_rate": 7.933199743846893e-05, "loss": 1.7805, "step": 6881 }, { "epoch": 0.3007998601337471, "grad_norm": 3.109375, "learning_rate": 7.932643458330374e-05, "loss": 2.1663, "step": 6882 }, { "epoch": 0.300843568337777, "grad_norm": 2.25, "learning_rate": 7.932087117470606e-05, "loss": 1.845, "step": 6883 }, { "epoch": 0.3008872765418069, "grad_norm": 1.984375, "learning_rate": 7.931530721278084e-05, "loss": 1.5432, "step": 6884 }, { "epoch": 0.3009309847458368, "grad_norm": 2.40625, "learning_rate": 7.930974269763313e-05, "loss": 2.3063, "step": 6885 }, { "epoch": 0.3009746929498667, "grad_norm": 2.171875, "learning_rate": 7.93041776293679e-05, "loss": 2.1164, "step": 6886 }, { "epoch": 0.3010184011538966, "grad_norm": 2.53125, "learning_rate": 7.929861200809021e-05, "loss": 2.5617, "step": 6887 }, { "epoch": 0.3010621093579265, "grad_norm": 2.078125, "learning_rate": 7.929304583390505e-05, "loss": 2.2743, "step": 6888 }, { "epoch": 0.30110581756195637, "grad_norm": 2.203125, "learning_rate": 7.92874791069175e-05, "loss": 2.0906, "step": 6889 }, { "epoch": 0.30114952576598625, "grad_norm": 2.0, "learning_rate": 7.928191182723256e-05, "loss": 1.8454, "step": 6890 }, { "epoch": 0.3011932339700162, "grad_norm": 2.046875, "learning_rate": 7.927634399495536e-05, "loss": 1.8599, "step": 6891 }, { "epoch": 0.30123694217404606, "grad_norm": 1.875, "learning_rate": 7.927077561019092e-05, "loss": 1.3247, "step": 6892 }, { "epoch": 0.30128065037807594, "grad_norm": 1.8984375, "learning_rate": 7.926520667304434e-05, "loss": 1.5736, "step": 6893 }, { "epoch": 0.3013243585821059, "grad_norm": 1.8984375, "learning_rate": 7.925963718362073e-05, "loss": 1.8178, "step": 6894 }, { "epoch": 0.30136806678613576, "grad_norm": 2.0625, "learning_rate": 7.925406714202517e-05, "loss": 1.571, "step": 6895 }, { "epoch": 0.30141177499016564, "grad_norm": 2.4375, "learning_rate": 7.924849654836281e-05, "loss": 2.1494, "step": 6896 }, { "epoch": 0.3014554831941956, "grad_norm": 1.953125, "learning_rate": 7.924292540273872e-05, "loss": 1.5882, "step": 6897 }, { "epoch": 0.30149919139822545, "grad_norm": 2.40625, "learning_rate": 7.923735370525809e-05, "loss": 1.4472, "step": 6898 }, { "epoch": 0.30154289960225533, "grad_norm": 2.421875, "learning_rate": 7.923178145602603e-05, "loss": 2.0544, "step": 6899 }, { "epoch": 0.30158660780628527, "grad_norm": 2.203125, "learning_rate": 7.922620865514772e-05, "loss": 2.23, "step": 6900 }, { "epoch": 0.30163031601031515, "grad_norm": 2.34375, "learning_rate": 7.92206353027283e-05, "loss": 1.5023, "step": 6901 }, { "epoch": 0.301674024214345, "grad_norm": 1.9375, "learning_rate": 7.921506139887297e-05, "loss": 1.8787, "step": 6902 }, { "epoch": 0.3017177324183749, "grad_norm": 2.34375, "learning_rate": 7.92094869436869e-05, "loss": 1.8779, "step": 6903 }, { "epoch": 0.30176144062240484, "grad_norm": 3.078125, "learning_rate": 7.920391193727532e-05, "loss": 2.8937, "step": 6904 }, { "epoch": 0.3018051488264347, "grad_norm": 2.328125, "learning_rate": 7.91983363797434e-05, "loss": 2.0161, "step": 6905 }, { "epoch": 0.3018488570304646, "grad_norm": 2.171875, "learning_rate": 7.91927602711964e-05, "loss": 1.6358, "step": 6906 }, { "epoch": 0.30189256523449454, "grad_norm": 2.734375, "learning_rate": 7.91871836117395e-05, "loss": 1.476, "step": 6907 }, { "epoch": 0.3019362734385244, "grad_norm": 2.21875, "learning_rate": 7.918160640147798e-05, "loss": 2.321, "step": 6908 }, { "epoch": 0.3019799816425543, "grad_norm": 1.9453125, "learning_rate": 7.917602864051706e-05, "loss": 2.1206, "step": 6909 }, { "epoch": 0.30202368984658423, "grad_norm": 2.046875, "learning_rate": 7.917045032896202e-05, "loss": 2.1465, "step": 6910 }, { "epoch": 0.3020673980506141, "grad_norm": 2.0, "learning_rate": 7.916487146691815e-05, "loss": 1.9398, "step": 6911 }, { "epoch": 0.302111106254644, "grad_norm": 2.328125, "learning_rate": 7.915929205449069e-05, "loss": 1.5202, "step": 6912 }, { "epoch": 0.30215481445867387, "grad_norm": 1.75, "learning_rate": 7.915371209178494e-05, "loss": 1.6357, "step": 6913 }, { "epoch": 0.3021985226627038, "grad_norm": 2.359375, "learning_rate": 7.914813157890623e-05, "loss": 1.6771, "step": 6914 }, { "epoch": 0.3022422308667337, "grad_norm": 2.171875, "learning_rate": 7.914255051595984e-05, "loss": 2.1166, "step": 6915 }, { "epoch": 0.30228593907076357, "grad_norm": 2.125, "learning_rate": 7.913696890305112e-05, "loss": 1.9647, "step": 6916 }, { "epoch": 0.3023296472747935, "grad_norm": 1.8203125, "learning_rate": 7.913138674028537e-05, "loss": 1.7212, "step": 6917 }, { "epoch": 0.3023733554788234, "grad_norm": 2.21875, "learning_rate": 7.912580402776797e-05, "loss": 1.934, "step": 6918 }, { "epoch": 0.30241706368285326, "grad_norm": 2.046875, "learning_rate": 7.912022076560426e-05, "loss": 2.1029, "step": 6919 }, { "epoch": 0.3024607718868832, "grad_norm": 1.96875, "learning_rate": 7.911463695389959e-05, "loss": 2.3138, "step": 6920 }, { "epoch": 0.3025044800909131, "grad_norm": 3.640625, "learning_rate": 7.910905259275936e-05, "loss": 1.9484, "step": 6921 }, { "epoch": 0.30254818829494295, "grad_norm": 2.6875, "learning_rate": 7.910346768228894e-05, "loss": 2.4706, "step": 6922 }, { "epoch": 0.30259189649897283, "grad_norm": 2.25, "learning_rate": 7.909788222259372e-05, "loss": 2.3833, "step": 6923 }, { "epoch": 0.30263560470300277, "grad_norm": 2.296875, "learning_rate": 7.90922962137791e-05, "loss": 1.9278, "step": 6924 }, { "epoch": 0.30267931290703265, "grad_norm": 2.671875, "learning_rate": 7.908670965595052e-05, "loss": 2.091, "step": 6925 }, { "epoch": 0.30272302111106253, "grad_norm": 1.8984375, "learning_rate": 7.908112254921341e-05, "loss": 1.9959, "step": 6926 }, { "epoch": 0.30276672931509246, "grad_norm": 1.953125, "learning_rate": 7.907553489367316e-05, "loss": 1.8171, "step": 6927 }, { "epoch": 0.30281043751912234, "grad_norm": 2.5, "learning_rate": 7.906994668943528e-05, "loss": 1.744, "step": 6928 }, { "epoch": 0.3028541457231522, "grad_norm": 3.0, "learning_rate": 7.906435793660519e-05, "loss": 2.5186, "step": 6929 }, { "epoch": 0.30289785392718216, "grad_norm": 2.4375, "learning_rate": 7.905876863528834e-05, "loss": 2.2941, "step": 6930 }, { "epoch": 0.30294156213121204, "grad_norm": 2.484375, "learning_rate": 7.905317878559026e-05, "loss": 1.5411, "step": 6931 }, { "epoch": 0.3029852703352419, "grad_norm": 2.3125, "learning_rate": 7.90475883876164e-05, "loss": 2.3315, "step": 6932 }, { "epoch": 0.3030289785392718, "grad_norm": 3.828125, "learning_rate": 7.904199744147228e-05, "loss": 2.2387, "step": 6933 }, { "epoch": 0.30307268674330173, "grad_norm": 2.828125, "learning_rate": 7.903640594726339e-05, "loss": 2.0719, "step": 6934 }, { "epoch": 0.3031163949473316, "grad_norm": 2.015625, "learning_rate": 7.903081390509525e-05, "loss": 1.5947, "step": 6935 }, { "epoch": 0.3031601031513615, "grad_norm": 2.03125, "learning_rate": 7.902522131507341e-05, "loss": 1.4249, "step": 6936 }, { "epoch": 0.30320381135539143, "grad_norm": 2.109375, "learning_rate": 7.901962817730341e-05, "loss": 1.5904, "step": 6937 }, { "epoch": 0.3032475195594213, "grad_norm": 1.921875, "learning_rate": 7.901403449189077e-05, "loss": 1.9359, "step": 6938 }, { "epoch": 0.3032912277634512, "grad_norm": 2.140625, "learning_rate": 7.900844025894109e-05, "loss": 2.1124, "step": 6939 }, { "epoch": 0.3033349359674811, "grad_norm": 3.015625, "learning_rate": 7.900284547855991e-05, "loss": 2.2528, "step": 6940 }, { "epoch": 0.303378644171511, "grad_norm": 2.28125, "learning_rate": 7.899725015085285e-05, "loss": 2.0718, "step": 6941 }, { "epoch": 0.3034223523755409, "grad_norm": 2.546875, "learning_rate": 7.899165427592543e-05, "loss": 2.1333, "step": 6942 }, { "epoch": 0.30346606057957076, "grad_norm": 2.953125, "learning_rate": 7.898605785388334e-05, "loss": 1.8171, "step": 6943 }, { "epoch": 0.3035097687836007, "grad_norm": 2.515625, "learning_rate": 7.898046088483214e-05, "loss": 2.5504, "step": 6944 }, { "epoch": 0.3035534769876306, "grad_norm": 2.5, "learning_rate": 7.897486336887746e-05, "loss": 2.1916, "step": 6945 }, { "epoch": 0.30359718519166046, "grad_norm": 1.8671875, "learning_rate": 7.896926530612492e-05, "loss": 1.616, "step": 6946 }, { "epoch": 0.3036408933956904, "grad_norm": 2.28125, "learning_rate": 7.89636666966802e-05, "loss": 1.3841, "step": 6947 }, { "epoch": 0.30368460159972027, "grad_norm": 2.890625, "learning_rate": 7.895806754064893e-05, "loss": 1.758, "step": 6948 }, { "epoch": 0.30372830980375015, "grad_norm": 2.421875, "learning_rate": 7.895246783813677e-05, "loss": 2.1847, "step": 6949 }, { "epoch": 0.3037720180077801, "grad_norm": 3.515625, "learning_rate": 7.894686758924942e-05, "loss": 1.5626, "step": 6950 }, { "epoch": 0.30381572621180997, "grad_norm": 2.078125, "learning_rate": 7.894126679409254e-05, "loss": 1.8753, "step": 6951 }, { "epoch": 0.30385943441583985, "grad_norm": 2.234375, "learning_rate": 7.893566545277184e-05, "loss": 1.9157, "step": 6952 }, { "epoch": 0.3039031426198697, "grad_norm": 1.984375, "learning_rate": 7.893006356539303e-05, "loss": 1.4586, "step": 6953 }, { "epoch": 0.30394685082389966, "grad_norm": 2.59375, "learning_rate": 7.89244611320618e-05, "loss": 2.8266, "step": 6954 }, { "epoch": 0.30399055902792954, "grad_norm": 2.53125, "learning_rate": 7.891885815288388e-05, "loss": 2.0882, "step": 6955 }, { "epoch": 0.3040342672319594, "grad_norm": 3.0, "learning_rate": 7.891325462796503e-05, "loss": 3.2572, "step": 6956 }, { "epoch": 0.30407797543598936, "grad_norm": 2.328125, "learning_rate": 7.890765055741098e-05, "loss": 2.1291, "step": 6957 }, { "epoch": 0.30412168364001924, "grad_norm": 12.75, "learning_rate": 7.89020459413275e-05, "loss": 2.1205, "step": 6958 }, { "epoch": 0.3041653918440491, "grad_norm": 2.0625, "learning_rate": 7.889644077982033e-05, "loss": 1.6894, "step": 6959 }, { "epoch": 0.30420910004807905, "grad_norm": 1.9921875, "learning_rate": 7.889083507299529e-05, "loss": 1.7101, "step": 6960 }, { "epoch": 0.30425280825210893, "grad_norm": 2.421875, "learning_rate": 7.888522882095813e-05, "loss": 1.3514, "step": 6961 }, { "epoch": 0.3042965164561388, "grad_norm": 2.421875, "learning_rate": 7.887962202381465e-05, "loss": 1.7874, "step": 6962 }, { "epoch": 0.3043402246601687, "grad_norm": 2.09375, "learning_rate": 7.887401468167068e-05, "loss": 1.7584, "step": 6963 }, { "epoch": 0.3043839328641986, "grad_norm": 2.234375, "learning_rate": 7.886840679463203e-05, "loss": 1.8778, "step": 6964 }, { "epoch": 0.3044276410682285, "grad_norm": 2.71875, "learning_rate": 7.886279836280454e-05, "loss": 1.6752, "step": 6965 }, { "epoch": 0.3044713492722584, "grad_norm": 2.484375, "learning_rate": 7.885718938629402e-05, "loss": 1.838, "step": 6966 }, { "epoch": 0.3045150574762883, "grad_norm": 3.5625, "learning_rate": 7.885157986520634e-05, "loss": 2.1676, "step": 6967 }, { "epoch": 0.3045587656803182, "grad_norm": 2.234375, "learning_rate": 7.884596979964736e-05, "loss": 1.8915, "step": 6968 }, { "epoch": 0.3046024738843481, "grad_norm": 1.875, "learning_rate": 7.884035918972295e-05, "loss": 1.6817, "step": 6969 }, { "epoch": 0.304646182088378, "grad_norm": 2.015625, "learning_rate": 7.883474803553899e-05, "loss": 1.9612, "step": 6970 }, { "epoch": 0.3046898902924079, "grad_norm": 1.8984375, "learning_rate": 7.882913633720135e-05, "loss": 1.4549, "step": 6971 }, { "epoch": 0.3047335984964378, "grad_norm": 2.234375, "learning_rate": 7.882352409481597e-05, "loss": 1.7752, "step": 6972 }, { "epoch": 0.30477730670046765, "grad_norm": 3.9375, "learning_rate": 7.881791130848873e-05, "loss": 1.3633, "step": 6973 }, { "epoch": 0.3048210149044976, "grad_norm": 2.1875, "learning_rate": 7.881229797832554e-05, "loss": 1.5878, "step": 6974 }, { "epoch": 0.30486472310852747, "grad_norm": 2.15625, "learning_rate": 7.880668410443238e-05, "loss": 2.0573, "step": 6975 }, { "epoch": 0.30490843131255735, "grad_norm": 2.015625, "learning_rate": 7.880106968691517e-05, "loss": 1.8925, "step": 6976 }, { "epoch": 0.3049521395165873, "grad_norm": 2.078125, "learning_rate": 7.879545472587984e-05, "loss": 1.6229, "step": 6977 }, { "epoch": 0.30499584772061716, "grad_norm": 2.40625, "learning_rate": 7.878983922143237e-05, "loss": 1.9097, "step": 6978 }, { "epoch": 0.30503955592464704, "grad_norm": 2.515625, "learning_rate": 7.878422317367873e-05, "loss": 1.9204, "step": 6979 }, { "epoch": 0.305083264128677, "grad_norm": 2.234375, "learning_rate": 7.877860658272491e-05, "loss": 2.0385, "step": 6980 }, { "epoch": 0.30512697233270686, "grad_norm": 2.359375, "learning_rate": 7.87729894486769e-05, "loss": 1.7698, "step": 6981 }, { "epoch": 0.30517068053673674, "grad_norm": 2.140625, "learning_rate": 7.876737177164071e-05, "loss": 2.0247, "step": 6982 }, { "epoch": 0.3052143887407666, "grad_norm": 2.515625, "learning_rate": 7.876175355172234e-05, "loss": 2.0221, "step": 6983 }, { "epoch": 0.30525809694479655, "grad_norm": 2.0, "learning_rate": 7.875613478902782e-05, "loss": 1.7424, "step": 6984 }, { "epoch": 0.30530180514882643, "grad_norm": 2.21875, "learning_rate": 7.875051548366316e-05, "loss": 1.5619, "step": 6985 }, { "epoch": 0.3053455133528563, "grad_norm": 2.078125, "learning_rate": 7.874489563573446e-05, "loss": 1.806, "step": 6986 }, { "epoch": 0.30538922155688625, "grad_norm": 2.1875, "learning_rate": 7.873927524534775e-05, "loss": 2.3417, "step": 6987 }, { "epoch": 0.3054329297609161, "grad_norm": 1.9296875, "learning_rate": 7.873365431260906e-05, "loss": 1.8671, "step": 6988 }, { "epoch": 0.305476637964946, "grad_norm": 1.875, "learning_rate": 7.87280328376245e-05, "loss": 1.7924, "step": 6989 }, { "epoch": 0.30552034616897594, "grad_norm": 1.8984375, "learning_rate": 7.872241082050016e-05, "loss": 1.9036, "step": 6990 }, { "epoch": 0.3055640543730058, "grad_norm": 1.875, "learning_rate": 7.871678826134211e-05, "loss": 1.6127, "step": 6991 }, { "epoch": 0.3056077625770357, "grad_norm": 2.5, "learning_rate": 7.871116516025647e-05, "loss": 1.8068, "step": 6992 }, { "epoch": 0.3056514707810656, "grad_norm": 2.421875, "learning_rate": 7.870554151734937e-05, "loss": 2.2059, "step": 6993 }, { "epoch": 0.3056951789850955, "grad_norm": 2.484375, "learning_rate": 7.869991733272692e-05, "loss": 1.8673, "step": 6994 }, { "epoch": 0.3057388871891254, "grad_norm": 1.953125, "learning_rate": 7.869429260649526e-05, "loss": 1.5146, "step": 6995 }, { "epoch": 0.3057825953931553, "grad_norm": 3.640625, "learning_rate": 7.868866733876052e-05, "loss": 1.1441, "step": 6996 }, { "epoch": 0.3058263035971852, "grad_norm": 2.109375, "learning_rate": 7.868304152962889e-05, "loss": 1.926, "step": 6997 }, { "epoch": 0.3058700118012151, "grad_norm": 2.59375, "learning_rate": 7.867741517920653e-05, "loss": 1.4671, "step": 6998 }, { "epoch": 0.30591372000524497, "grad_norm": 2.40625, "learning_rate": 7.867178828759958e-05, "loss": 2.3758, "step": 6999 }, { "epoch": 0.3059574282092749, "grad_norm": 1.9296875, "learning_rate": 7.866616085491426e-05, "loss": 1.7675, "step": 7000 }, { "epoch": 0.3060011364133048, "grad_norm": 1.7578125, "learning_rate": 7.866053288125678e-05, "loss": 1.6345, "step": 7001 }, { "epoch": 0.30604484461733467, "grad_norm": 2.328125, "learning_rate": 7.865490436673331e-05, "loss": 1.8859, "step": 7002 }, { "epoch": 0.30608855282136455, "grad_norm": 2.421875, "learning_rate": 7.864927531145011e-05, "loss": 1.9555, "step": 7003 }, { "epoch": 0.3061322610253945, "grad_norm": 2.09375, "learning_rate": 7.864364571551337e-05, "loss": 1.4031, "step": 7004 }, { "epoch": 0.30617596922942436, "grad_norm": 2.46875, "learning_rate": 7.863801557902936e-05, "loss": 1.9749, "step": 7005 }, { "epoch": 0.30621967743345424, "grad_norm": 2.46875, "learning_rate": 7.863238490210432e-05, "loss": 1.5911, "step": 7006 }, { "epoch": 0.3062633856374842, "grad_norm": 2.984375, "learning_rate": 7.862675368484449e-05, "loss": 2.1096, "step": 7007 }, { "epoch": 0.30630709384151406, "grad_norm": 2.265625, "learning_rate": 7.862112192735616e-05, "loss": 1.6069, "step": 7008 }, { "epoch": 0.30635080204554394, "grad_norm": 2.65625, "learning_rate": 7.86154896297456e-05, "loss": 2.3773, "step": 7009 }, { "epoch": 0.30639451024957387, "grad_norm": 2.125, "learning_rate": 7.86098567921191e-05, "loss": 1.9284, "step": 7010 }, { "epoch": 0.30643821845360375, "grad_norm": 2.171875, "learning_rate": 7.860422341458298e-05, "loss": 1.9852, "step": 7011 }, { "epoch": 0.30648192665763363, "grad_norm": 2.1875, "learning_rate": 7.859858949724351e-05, "loss": 1.8512, "step": 7012 }, { "epoch": 0.3065256348616635, "grad_norm": 2.171875, "learning_rate": 7.859295504020706e-05, "loss": 2.174, "step": 7013 }, { "epoch": 0.30656934306569344, "grad_norm": 2.140625, "learning_rate": 7.858732004357989e-05, "loss": 1.7676, "step": 7014 }, { "epoch": 0.3066130512697233, "grad_norm": 2.6875, "learning_rate": 7.85816845074684e-05, "loss": 1.7165, "step": 7015 }, { "epoch": 0.3066567594737532, "grad_norm": 3.40625, "learning_rate": 7.857604843197896e-05, "loss": 2.595, "step": 7016 }, { "epoch": 0.30670046767778314, "grad_norm": 2.65625, "learning_rate": 7.857041181721787e-05, "loss": 3.0552, "step": 7017 }, { "epoch": 0.306744175881813, "grad_norm": 1.953125, "learning_rate": 7.856477466329152e-05, "loss": 1.7754, "step": 7018 }, { "epoch": 0.3067878840858429, "grad_norm": 2.359375, "learning_rate": 7.85591369703063e-05, "loss": 1.7161, "step": 7019 }, { "epoch": 0.30683159228987283, "grad_norm": 2.09375, "learning_rate": 7.855349873836862e-05, "loss": 2.4942, "step": 7020 }, { "epoch": 0.3068753004939027, "grad_norm": 1.875, "learning_rate": 7.854785996758485e-05, "loss": 1.9154, "step": 7021 }, { "epoch": 0.3069190086979326, "grad_norm": 1.8359375, "learning_rate": 7.854222065806141e-05, "loss": 1.6528, "step": 7022 }, { "epoch": 0.3069627169019625, "grad_norm": 2.234375, "learning_rate": 7.853658080990471e-05, "loss": 1.9866, "step": 7023 }, { "epoch": 0.3070064251059924, "grad_norm": 1.640625, "learning_rate": 7.853094042322121e-05, "loss": 1.3854, "step": 7024 }, { "epoch": 0.3070501333100223, "grad_norm": 2.34375, "learning_rate": 7.852529949811734e-05, "loss": 2.4263, "step": 7025 }, { "epoch": 0.30709384151405217, "grad_norm": 2.359375, "learning_rate": 7.851965803469956e-05, "loss": 1.3436, "step": 7026 }, { "epoch": 0.3071375497180821, "grad_norm": 10.875, "learning_rate": 7.851401603307431e-05, "loss": 5.592, "step": 7027 }, { "epoch": 0.307181257922112, "grad_norm": 2.296875, "learning_rate": 7.85083734933481e-05, "loss": 1.9178, "step": 7028 }, { "epoch": 0.30722496612614186, "grad_norm": 2.453125, "learning_rate": 7.850273041562737e-05, "loss": 2.4896, "step": 7029 }, { "epoch": 0.3072686743301718, "grad_norm": 2.140625, "learning_rate": 7.849708680001862e-05, "loss": 1.8103, "step": 7030 }, { "epoch": 0.3073123825342017, "grad_norm": 2.28125, "learning_rate": 7.84914426466284e-05, "loss": 2.4037, "step": 7031 }, { "epoch": 0.30735609073823156, "grad_norm": 2.34375, "learning_rate": 7.848579795556316e-05, "loss": 1.6429, "step": 7032 }, { "epoch": 0.30739979894226144, "grad_norm": 1.84375, "learning_rate": 7.848015272692947e-05, "loss": 1.5081, "step": 7033 }, { "epoch": 0.3074435071462914, "grad_norm": 3.5625, "learning_rate": 7.847450696083385e-05, "loss": 2.2542, "step": 7034 }, { "epoch": 0.30748721535032125, "grad_norm": 2.5, "learning_rate": 7.846886065738284e-05, "loss": 1.5479, "step": 7035 }, { "epoch": 0.30753092355435113, "grad_norm": 2.46875, "learning_rate": 7.846321381668298e-05, "loss": 2.4093, "step": 7036 }, { "epoch": 0.30757463175838107, "grad_norm": 2.046875, "learning_rate": 7.845756643884087e-05, "loss": 1.6931, "step": 7037 }, { "epoch": 0.30761833996241095, "grad_norm": 2.34375, "learning_rate": 7.845191852396305e-05, "loss": 2.3507, "step": 7038 }, { "epoch": 0.3076620481664408, "grad_norm": 1.8203125, "learning_rate": 7.844627007215613e-05, "loss": 1.7684, "step": 7039 }, { "epoch": 0.30770575637047076, "grad_norm": 2.25, "learning_rate": 7.844062108352668e-05, "loss": 1.9573, "step": 7040 }, { "epoch": 0.30774946457450064, "grad_norm": 3.28125, "learning_rate": 7.843497155818132e-05, "loss": 2.1727, "step": 7041 }, { "epoch": 0.3077931727785305, "grad_norm": 2.046875, "learning_rate": 7.842932149622666e-05, "loss": 1.7293, "step": 7042 }, { "epoch": 0.3078368809825604, "grad_norm": 2.328125, "learning_rate": 7.842367089776932e-05, "loss": 1.8516, "step": 7043 }, { "epoch": 0.30788058918659034, "grad_norm": 2.15625, "learning_rate": 7.841801976291595e-05, "loss": 1.437, "step": 7044 }, { "epoch": 0.3079242973906202, "grad_norm": 2.015625, "learning_rate": 7.841236809177317e-05, "loss": 1.8, "step": 7045 }, { "epoch": 0.3079680055946501, "grad_norm": 2.09375, "learning_rate": 7.840671588444768e-05, "loss": 1.8159, "step": 7046 }, { "epoch": 0.30801171379868003, "grad_norm": 2.953125, "learning_rate": 7.84010631410461e-05, "loss": 2.0276, "step": 7047 }, { "epoch": 0.3080554220027099, "grad_norm": 2.25, "learning_rate": 7.839540986167514e-05, "loss": 1.6176, "step": 7048 }, { "epoch": 0.3080991302067398, "grad_norm": 2.15625, "learning_rate": 7.838975604644146e-05, "loss": 2.1571, "step": 7049 }, { "epoch": 0.3081428384107697, "grad_norm": 2.296875, "learning_rate": 7.838410169545176e-05, "loss": 1.8845, "step": 7050 }, { "epoch": 0.3081865466147996, "grad_norm": 2.046875, "learning_rate": 7.837844680881274e-05, "loss": 1.7899, "step": 7051 }, { "epoch": 0.3082302548188295, "grad_norm": 3.390625, "learning_rate": 7.837279138663114e-05, "loss": 2.2053, "step": 7052 }, { "epoch": 0.30827396302285937, "grad_norm": 2.3125, "learning_rate": 7.836713542901366e-05, "loss": 2.0172, "step": 7053 }, { "epoch": 0.3083176712268893, "grad_norm": 2.515625, "learning_rate": 7.836147893606707e-05, "loss": 2.0153, "step": 7054 }, { "epoch": 0.3083613794309192, "grad_norm": 3.25, "learning_rate": 7.835582190789807e-05, "loss": 2.5625, "step": 7055 }, { "epoch": 0.30840508763494906, "grad_norm": 10.0625, "learning_rate": 7.835016434461345e-05, "loss": 1.7711, "step": 7056 }, { "epoch": 0.308448795838979, "grad_norm": 2.078125, "learning_rate": 7.834450624631996e-05, "loss": 1.8224, "step": 7057 }, { "epoch": 0.3084925040430089, "grad_norm": 2.09375, "learning_rate": 7.83388476131244e-05, "loss": 1.7466, "step": 7058 }, { "epoch": 0.30853621224703875, "grad_norm": 2.328125, "learning_rate": 7.833318844513353e-05, "loss": 2.2057, "step": 7059 }, { "epoch": 0.3085799204510687, "grad_norm": 2.140625, "learning_rate": 7.832752874245415e-05, "loss": 1.783, "step": 7060 }, { "epoch": 0.30862362865509857, "grad_norm": 1.890625, "learning_rate": 7.83218685051931e-05, "loss": 1.6596, "step": 7061 }, { "epoch": 0.30866733685912845, "grad_norm": 2.09375, "learning_rate": 7.831620773345715e-05, "loss": 1.66, "step": 7062 }, { "epoch": 0.30871104506315833, "grad_norm": 2.15625, "learning_rate": 7.831054642735315e-05, "loss": 1.8626, "step": 7063 }, { "epoch": 0.30875475326718826, "grad_norm": 2.609375, "learning_rate": 7.830488458698794e-05, "loss": 1.6357, "step": 7064 }, { "epoch": 0.30879846147121814, "grad_norm": 2.046875, "learning_rate": 7.829922221246835e-05, "loss": 1.7975, "step": 7065 }, { "epoch": 0.308842169675248, "grad_norm": 2.4375, "learning_rate": 7.829355930390125e-05, "loss": 1.8929, "step": 7066 }, { "epoch": 0.30888587787927796, "grad_norm": 1.7265625, "learning_rate": 7.828789586139352e-05, "loss": 1.6512, "step": 7067 }, { "epoch": 0.30892958608330784, "grad_norm": 1.890625, "learning_rate": 7.828223188505202e-05, "loss": 2.0205, "step": 7068 }, { "epoch": 0.3089732942873377, "grad_norm": 1.984375, "learning_rate": 7.827656737498365e-05, "loss": 1.6325, "step": 7069 }, { "epoch": 0.30901700249136765, "grad_norm": 1.9921875, "learning_rate": 7.827090233129528e-05, "loss": 1.7217, "step": 7070 }, { "epoch": 0.30906071069539753, "grad_norm": 1.8828125, "learning_rate": 7.826523675409385e-05, "loss": 1.4903, "step": 7071 }, { "epoch": 0.3091044188994274, "grad_norm": 1.953125, "learning_rate": 7.825957064348625e-05, "loss": 1.8773, "step": 7072 }, { "epoch": 0.3091481271034573, "grad_norm": 2.6875, "learning_rate": 7.825390399957944e-05, "loss": 2.1115, "step": 7073 }, { "epoch": 0.30919183530748723, "grad_norm": 1.8203125, "learning_rate": 7.824823682248033e-05, "loss": 1.6687, "step": 7074 }, { "epoch": 0.3092355435115171, "grad_norm": 2.109375, "learning_rate": 7.824256911229588e-05, "loss": 1.6351, "step": 7075 }, { "epoch": 0.309279251715547, "grad_norm": 2.359375, "learning_rate": 7.823690086913305e-05, "loss": 1.7782, "step": 7076 }, { "epoch": 0.3093229599195769, "grad_norm": 2.265625, "learning_rate": 7.82312320930988e-05, "loss": 1.6454, "step": 7077 }, { "epoch": 0.3093666681236068, "grad_norm": 2.1875, "learning_rate": 7.822556278430011e-05, "loss": 1.8831, "step": 7078 }, { "epoch": 0.3094103763276367, "grad_norm": 1.984375, "learning_rate": 7.821989294284397e-05, "loss": 2.0986, "step": 7079 }, { "epoch": 0.3094540845316666, "grad_norm": 1.9765625, "learning_rate": 7.821422256883736e-05, "loss": 1.6004, "step": 7080 }, { "epoch": 0.3094977927356965, "grad_norm": 1.796875, "learning_rate": 7.820855166238734e-05, "loss": 1.667, "step": 7081 }, { "epoch": 0.3095415009397264, "grad_norm": 2.390625, "learning_rate": 7.820288022360087e-05, "loss": 1.7269, "step": 7082 }, { "epoch": 0.30958520914375626, "grad_norm": 2.28125, "learning_rate": 7.819720825258501e-05, "loss": 2.1683, "step": 7083 }, { "epoch": 0.3096289173477862, "grad_norm": 1.7890625, "learning_rate": 7.81915357494468e-05, "loss": 1.5796, "step": 7084 }, { "epoch": 0.30967262555181607, "grad_norm": 2.015625, "learning_rate": 7.818586271429327e-05, "loss": 1.6388, "step": 7085 }, { "epoch": 0.30971633375584595, "grad_norm": 2.234375, "learning_rate": 7.818018914723149e-05, "loss": 1.7634, "step": 7086 }, { "epoch": 0.3097600419598759, "grad_norm": 5.40625, "learning_rate": 7.817451504836852e-05, "loss": 1.5701, "step": 7087 }, { "epoch": 0.30980375016390577, "grad_norm": 2.21875, "learning_rate": 7.816884041781148e-05, "loss": 1.5761, "step": 7088 }, { "epoch": 0.30984745836793565, "grad_norm": 2.203125, "learning_rate": 7.816316525566738e-05, "loss": 1.9034, "step": 7089 }, { "epoch": 0.3098911665719656, "grad_norm": 1.9296875, "learning_rate": 7.815748956204337e-05, "loss": 1.9308, "step": 7090 }, { "epoch": 0.30993487477599546, "grad_norm": 2.0, "learning_rate": 7.815181333704656e-05, "loss": 1.768, "step": 7091 }, { "epoch": 0.30997858298002534, "grad_norm": 2.359375, "learning_rate": 7.814613658078407e-05, "loss": 2.7116, "step": 7092 }, { "epoch": 0.3100222911840552, "grad_norm": 2.28125, "learning_rate": 7.814045929336299e-05, "loss": 1.9427, "step": 7093 }, { "epoch": 0.31006599938808516, "grad_norm": 2.578125, "learning_rate": 7.813478147489052e-05, "loss": 2.0114, "step": 7094 }, { "epoch": 0.31010970759211504, "grad_norm": 3.1875, "learning_rate": 7.812910312547375e-05, "loss": 2.1606, "step": 7095 }, { "epoch": 0.3101534157961449, "grad_norm": 2.078125, "learning_rate": 7.812342424521988e-05, "loss": 2.1392, "step": 7096 }, { "epoch": 0.31019712400017485, "grad_norm": 2.03125, "learning_rate": 7.811774483423605e-05, "loss": 1.721, "step": 7097 }, { "epoch": 0.31024083220420473, "grad_norm": 1.9140625, "learning_rate": 7.811206489262945e-05, "loss": 1.7087, "step": 7098 }, { "epoch": 0.3102845404082346, "grad_norm": 2.515625, "learning_rate": 7.810638442050728e-05, "loss": 1.376, "step": 7099 }, { "epoch": 0.31032824861226455, "grad_norm": 2.171875, "learning_rate": 7.810070341797673e-05, "loss": 1.9291, "step": 7100 }, { "epoch": 0.3103719568162944, "grad_norm": 2.140625, "learning_rate": 7.8095021885145e-05, "loss": 2.0947, "step": 7101 }, { "epoch": 0.3104156650203243, "grad_norm": 1.9765625, "learning_rate": 7.808933982211933e-05, "loss": 1.6669, "step": 7102 }, { "epoch": 0.3104593732243542, "grad_norm": 1.890625, "learning_rate": 7.808365722900693e-05, "loss": 1.8462, "step": 7103 }, { "epoch": 0.3105030814283841, "grad_norm": 2.109375, "learning_rate": 7.807797410591504e-05, "loss": 1.6853, "step": 7104 }, { "epoch": 0.310546789632414, "grad_norm": 2.546875, "learning_rate": 7.80722904529509e-05, "loss": 1.6934, "step": 7105 }, { "epoch": 0.3105904978364439, "grad_norm": 2.28125, "learning_rate": 7.80666062702218e-05, "loss": 1.8148, "step": 7106 }, { "epoch": 0.3106342060404738, "grad_norm": 2.1875, "learning_rate": 7.806092155783497e-05, "loss": 2.3154, "step": 7107 }, { "epoch": 0.3106779142445037, "grad_norm": 2.203125, "learning_rate": 7.805523631589774e-05, "loss": 2.4815, "step": 7108 }, { "epoch": 0.3107216224485336, "grad_norm": 2.578125, "learning_rate": 7.804955054451735e-05, "loss": 2.6464, "step": 7109 }, { "epoch": 0.3107653306525635, "grad_norm": 1.96875, "learning_rate": 7.804386424380113e-05, "loss": 1.8508, "step": 7110 }, { "epoch": 0.3108090388565934, "grad_norm": 1.9140625, "learning_rate": 7.803817741385635e-05, "loss": 1.4178, "step": 7111 }, { "epoch": 0.31085274706062327, "grad_norm": 1.9765625, "learning_rate": 7.803249005479037e-05, "loss": 1.8201, "step": 7112 }, { "epoch": 0.31089645526465315, "grad_norm": 2.140625, "learning_rate": 7.802680216671053e-05, "loss": 2.3145, "step": 7113 }, { "epoch": 0.3109401634686831, "grad_norm": 3.390625, "learning_rate": 7.80211137497241e-05, "loss": 1.7484, "step": 7114 }, { "epoch": 0.31098387167271296, "grad_norm": 2.171875, "learning_rate": 7.801542480393849e-05, "loss": 2.0886, "step": 7115 }, { "epoch": 0.31102757987674284, "grad_norm": 1.9921875, "learning_rate": 7.800973532946104e-05, "loss": 1.5976, "step": 7116 }, { "epoch": 0.3110712880807728, "grad_norm": 2.359375, "learning_rate": 7.800404532639911e-05, "loss": 2.2827, "step": 7117 }, { "epoch": 0.31111499628480266, "grad_norm": 2.0, "learning_rate": 7.799835479486008e-05, "loss": 1.6772, "step": 7118 }, { "epoch": 0.31115870448883254, "grad_norm": 2.4375, "learning_rate": 7.799266373495137e-05, "loss": 1.6108, "step": 7119 }, { "epoch": 0.3112024126928625, "grad_norm": 2.484375, "learning_rate": 7.798697214678032e-05, "loss": 2.0943, "step": 7120 }, { "epoch": 0.31124612089689235, "grad_norm": 2.0, "learning_rate": 7.79812800304544e-05, "loss": 1.4641, "step": 7121 }, { "epoch": 0.31128982910092223, "grad_norm": 2.0, "learning_rate": 7.797558738608099e-05, "loss": 1.6022, "step": 7122 }, { "epoch": 0.3113335373049521, "grad_norm": 2.34375, "learning_rate": 7.796989421376755e-05, "loss": 1.8272, "step": 7123 }, { "epoch": 0.31137724550898205, "grad_norm": 2.34375, "learning_rate": 7.796420051362148e-05, "loss": 2.2746, "step": 7124 }, { "epoch": 0.3114209537130119, "grad_norm": 2.5, "learning_rate": 7.795850628575024e-05, "loss": 1.6013, "step": 7125 }, { "epoch": 0.3114646619170418, "grad_norm": 2.578125, "learning_rate": 7.79528115302613e-05, "loss": 2.7004, "step": 7126 }, { "epoch": 0.31150837012107174, "grad_norm": 2.3125, "learning_rate": 7.794711624726213e-05, "loss": 1.7298, "step": 7127 }, { "epoch": 0.3115520783251016, "grad_norm": 2.71875, "learning_rate": 7.79414204368602e-05, "loss": 1.7595, "step": 7128 }, { "epoch": 0.3115957865291315, "grad_norm": 1.9140625, "learning_rate": 7.7935724099163e-05, "loss": 1.7804, "step": 7129 }, { "epoch": 0.31163949473316144, "grad_norm": 1.9765625, "learning_rate": 7.793002723427802e-05, "loss": 1.7247, "step": 7130 }, { "epoch": 0.3116832029371913, "grad_norm": 1.9140625, "learning_rate": 7.792432984231277e-05, "loss": 1.3776, "step": 7131 }, { "epoch": 0.3117269111412212, "grad_norm": 2.328125, "learning_rate": 7.791863192337479e-05, "loss": 2.9035, "step": 7132 }, { "epoch": 0.3117706193452511, "grad_norm": 2.234375, "learning_rate": 7.791293347757159e-05, "loss": 2.0015, "step": 7133 }, { "epoch": 0.311814327549281, "grad_norm": 2.25, "learning_rate": 7.79072345050107e-05, "loss": 1.801, "step": 7134 }, { "epoch": 0.3118580357533109, "grad_norm": 1.9921875, "learning_rate": 7.790153500579968e-05, "loss": 1.9849, "step": 7135 }, { "epoch": 0.31190174395734077, "grad_norm": 2.5625, "learning_rate": 7.78958349800461e-05, "loss": 1.977, "step": 7136 }, { "epoch": 0.3119454521613707, "grad_norm": 1.96875, "learning_rate": 7.789013442785749e-05, "loss": 1.6839, "step": 7137 }, { "epoch": 0.3119891603654006, "grad_norm": 2.3125, "learning_rate": 7.788443334934148e-05, "loss": 1.6571, "step": 7138 }, { "epoch": 0.31203286856943047, "grad_norm": 2.140625, "learning_rate": 7.78787317446056e-05, "loss": 2.0375, "step": 7139 }, { "epoch": 0.3120765767734604, "grad_norm": 1.90625, "learning_rate": 7.787302961375748e-05, "loss": 1.9974, "step": 7140 }, { "epoch": 0.3121202849774903, "grad_norm": 2.140625, "learning_rate": 7.786732695690475e-05, "loss": 2.2131, "step": 7141 }, { "epoch": 0.31216399318152016, "grad_norm": 2.484375, "learning_rate": 7.786162377415497e-05, "loss": 2.4709, "step": 7142 }, { "epoch": 0.31220770138555004, "grad_norm": 1.953125, "learning_rate": 7.785592006561582e-05, "loss": 1.3958, "step": 7143 }, { "epoch": 0.31225140958958, "grad_norm": 1.9765625, "learning_rate": 7.78502158313949e-05, "loss": 1.7984, "step": 7144 }, { "epoch": 0.31229511779360986, "grad_norm": 2.421875, "learning_rate": 7.784451107159988e-05, "loss": 1.6409, "step": 7145 }, { "epoch": 0.31233882599763974, "grad_norm": 2.078125, "learning_rate": 7.783880578633843e-05, "loss": 2.3524, "step": 7146 }, { "epoch": 0.31238253420166967, "grad_norm": 2.71875, "learning_rate": 7.783309997571819e-05, "loss": 1.5622, "step": 7147 }, { "epoch": 0.31242624240569955, "grad_norm": 1.9921875, "learning_rate": 7.782739363984683e-05, "loss": 1.5084, "step": 7148 }, { "epoch": 0.31246995060972943, "grad_norm": 2.046875, "learning_rate": 7.782168677883206e-05, "loss": 1.8476, "step": 7149 }, { "epoch": 0.31251365881375937, "grad_norm": 2.296875, "learning_rate": 7.781597939278156e-05, "loss": 2.3668, "step": 7150 }, { "epoch": 0.31255736701778924, "grad_norm": 2.359375, "learning_rate": 7.781027148180304e-05, "loss": 2.4507, "step": 7151 }, { "epoch": 0.3126010752218191, "grad_norm": 2.015625, "learning_rate": 7.780456304600423e-05, "loss": 2.6835, "step": 7152 }, { "epoch": 0.312644783425849, "grad_norm": 1.953125, "learning_rate": 7.779885408549286e-05, "loss": 1.8557, "step": 7153 }, { "epoch": 0.31268849162987894, "grad_norm": 1.7890625, "learning_rate": 7.779314460037663e-05, "loss": 1.6768, "step": 7154 }, { "epoch": 0.3127321998339088, "grad_norm": 2.390625, "learning_rate": 7.778743459076333e-05, "loss": 2.9159, "step": 7155 }, { "epoch": 0.3127759080379387, "grad_norm": 3.375, "learning_rate": 7.778172405676068e-05, "loss": 1.871, "step": 7156 }, { "epoch": 0.31281961624196863, "grad_norm": 2.15625, "learning_rate": 7.777601299847648e-05, "loss": 1.6686, "step": 7157 }, { "epoch": 0.3128633244459985, "grad_norm": 2.28125, "learning_rate": 7.777030141601848e-05, "loss": 2.1794, "step": 7158 }, { "epoch": 0.3129070326500284, "grad_norm": 1.96875, "learning_rate": 7.776458930949446e-05, "loss": 1.9874, "step": 7159 }, { "epoch": 0.31295074085405833, "grad_norm": 1.984375, "learning_rate": 7.775887667901225e-05, "loss": 2.0633, "step": 7160 }, { "epoch": 0.3129944490580882, "grad_norm": 2.078125, "learning_rate": 7.775316352467962e-05, "loss": 1.8188, "step": 7161 }, { "epoch": 0.3130381572621181, "grad_norm": 2.09375, "learning_rate": 7.774744984660442e-05, "loss": 1.6672, "step": 7162 }, { "epoch": 0.31308186546614797, "grad_norm": 2.171875, "learning_rate": 7.774173564489445e-05, "loss": 2.5734, "step": 7163 }, { "epoch": 0.3131255736701779, "grad_norm": 2.21875, "learning_rate": 7.773602091965754e-05, "loss": 2.3279, "step": 7164 }, { "epoch": 0.3131692818742078, "grad_norm": 2.1875, "learning_rate": 7.773030567100157e-05, "loss": 1.6927, "step": 7165 }, { "epoch": 0.31321299007823766, "grad_norm": 2.859375, "learning_rate": 7.772458989903437e-05, "loss": 2.3808, "step": 7166 }, { "epoch": 0.3132566982822676, "grad_norm": 2.75, "learning_rate": 7.771887360386379e-05, "loss": 2.2934, "step": 7167 }, { "epoch": 0.3133004064862975, "grad_norm": 2.515625, "learning_rate": 7.771315678559774e-05, "loss": 2.1141, "step": 7168 }, { "epoch": 0.31334411469032736, "grad_norm": 3.046875, "learning_rate": 7.770743944434407e-05, "loss": 1.8697, "step": 7169 }, { "epoch": 0.3133878228943573, "grad_norm": 2.0, "learning_rate": 7.77017215802107e-05, "loss": 2.1161, "step": 7170 }, { "epoch": 0.3134315310983872, "grad_norm": 2.109375, "learning_rate": 7.769600319330552e-05, "loss": 2.0299, "step": 7171 }, { "epoch": 0.31347523930241705, "grad_norm": 2.5, "learning_rate": 7.769028428373645e-05, "loss": 2.1765, "step": 7172 }, { "epoch": 0.313518947506447, "grad_norm": 1.9140625, "learning_rate": 7.768456485161142e-05, "loss": 1.7941, "step": 7173 }, { "epoch": 0.31356265571047687, "grad_norm": 2.28125, "learning_rate": 7.767884489703836e-05, "loss": 2.2785, "step": 7174 }, { "epoch": 0.31360636391450675, "grad_norm": 1.875, "learning_rate": 7.76731244201252e-05, "loss": 1.3323, "step": 7175 }, { "epoch": 0.3136500721185366, "grad_norm": 2.0625, "learning_rate": 7.766740342097992e-05, "loss": 1.9032, "step": 7176 }, { "epoch": 0.31369378032256656, "grad_norm": 1.734375, "learning_rate": 7.766168189971046e-05, "loss": 1.7193, "step": 7177 }, { "epoch": 0.31373748852659644, "grad_norm": 2.203125, "learning_rate": 7.765595985642483e-05, "loss": 2.1914, "step": 7178 }, { "epoch": 0.3137811967306263, "grad_norm": 2.09375, "learning_rate": 7.765023729123095e-05, "loss": 1.8275, "step": 7179 }, { "epoch": 0.31382490493465626, "grad_norm": 2.34375, "learning_rate": 7.764451420423687e-05, "loss": 2.1466, "step": 7180 }, { "epoch": 0.31386861313868614, "grad_norm": 2.25, "learning_rate": 7.763879059555055e-05, "loss": 2.0121, "step": 7181 }, { "epoch": 0.313912321342716, "grad_norm": 2.21875, "learning_rate": 7.763306646528004e-05, "loss": 2.1565, "step": 7182 }, { "epoch": 0.31395602954674595, "grad_norm": 2.4375, "learning_rate": 7.762734181353335e-05, "loss": 2.0406, "step": 7183 }, { "epoch": 0.31399973775077583, "grad_norm": 1.921875, "learning_rate": 7.762161664041852e-05, "loss": 1.7566, "step": 7184 }, { "epoch": 0.3140434459548057, "grad_norm": 2.546875, "learning_rate": 7.761589094604357e-05, "loss": 1.4868, "step": 7185 }, { "epoch": 0.3140871541588356, "grad_norm": 2.265625, "learning_rate": 7.761016473051655e-05, "loss": 1.4814, "step": 7186 }, { "epoch": 0.3141308623628655, "grad_norm": 2.390625, "learning_rate": 7.760443799394557e-05, "loss": 1.6667, "step": 7187 }, { "epoch": 0.3141745705668954, "grad_norm": 2.578125, "learning_rate": 7.759871073643865e-05, "loss": 2.9772, "step": 7188 }, { "epoch": 0.3142182787709253, "grad_norm": 2.25, "learning_rate": 7.75929829581039e-05, "loss": 1.7509, "step": 7189 }, { "epoch": 0.3142619869749552, "grad_norm": 2.21875, "learning_rate": 7.758725465904938e-05, "loss": 1.8446, "step": 7190 }, { "epoch": 0.3143056951789851, "grad_norm": 2.078125, "learning_rate": 7.758152583938323e-05, "loss": 2.0265, "step": 7191 }, { "epoch": 0.314349403383015, "grad_norm": 1.96875, "learning_rate": 7.757579649921354e-05, "loss": 1.84, "step": 7192 }, { "epoch": 0.3143931115870449, "grad_norm": 1.7890625, "learning_rate": 7.757006663864843e-05, "loss": 1.6102, "step": 7193 }, { "epoch": 0.3144368197910748, "grad_norm": 2.640625, "learning_rate": 7.756433625779604e-05, "loss": 2.7256, "step": 7194 }, { "epoch": 0.3144805279951047, "grad_norm": 2.25, "learning_rate": 7.755860535676452e-05, "loss": 2.0873, "step": 7195 }, { "epoch": 0.31452423619913455, "grad_norm": 2.453125, "learning_rate": 7.755287393566199e-05, "loss": 2.017, "step": 7196 }, { "epoch": 0.3145679444031645, "grad_norm": 2.125, "learning_rate": 7.754714199459663e-05, "loss": 1.8984, "step": 7197 }, { "epoch": 0.31461165260719437, "grad_norm": 2.390625, "learning_rate": 7.75414095336766e-05, "loss": 1.4577, "step": 7198 }, { "epoch": 0.31465536081122425, "grad_norm": 2.296875, "learning_rate": 7.753567655301012e-05, "loss": 1.9013, "step": 7199 }, { "epoch": 0.3146990690152542, "grad_norm": 2.15625, "learning_rate": 7.752994305270534e-05, "loss": 1.9996, "step": 7200 }, { "epoch": 0.31474277721928406, "grad_norm": 2.75, "learning_rate": 7.752420903287044e-05, "loss": 2.3113, "step": 7201 }, { "epoch": 0.31478648542331394, "grad_norm": 2.40625, "learning_rate": 7.751847449361367e-05, "loss": 1.899, "step": 7202 }, { "epoch": 0.3148301936273439, "grad_norm": 2.546875, "learning_rate": 7.751273943504322e-05, "loss": 3.2488, "step": 7203 }, { "epoch": 0.31487390183137376, "grad_norm": 1.859375, "learning_rate": 7.750700385726736e-05, "loss": 1.6907, "step": 7204 }, { "epoch": 0.31491761003540364, "grad_norm": 1.875, "learning_rate": 7.75012677603943e-05, "loss": 1.556, "step": 7205 }, { "epoch": 0.3149613182394335, "grad_norm": 2.765625, "learning_rate": 7.749553114453228e-05, "loss": 2.0231, "step": 7206 }, { "epoch": 0.31500502644346345, "grad_norm": 2.421875, "learning_rate": 7.748979400978956e-05, "loss": 2.0842, "step": 7207 }, { "epoch": 0.31504873464749333, "grad_norm": 2.453125, "learning_rate": 7.748405635627444e-05, "loss": 1.6753, "step": 7208 }, { "epoch": 0.3150924428515232, "grad_norm": 2.203125, "learning_rate": 7.747831818409517e-05, "loss": 1.8329, "step": 7209 }, { "epoch": 0.31513615105555315, "grad_norm": 2.171875, "learning_rate": 7.747257949336003e-05, "loss": 1.687, "step": 7210 }, { "epoch": 0.31517985925958303, "grad_norm": 2.609375, "learning_rate": 7.746684028417733e-05, "loss": 1.944, "step": 7211 }, { "epoch": 0.3152235674636129, "grad_norm": 2.265625, "learning_rate": 7.746110055665539e-05, "loss": 1.9002, "step": 7212 }, { "epoch": 0.31526727566764284, "grad_norm": 2.3125, "learning_rate": 7.745536031090252e-05, "loss": 1.7212, "step": 7213 }, { "epoch": 0.3153109838716727, "grad_norm": 2.265625, "learning_rate": 7.744961954702703e-05, "loss": 1.6796, "step": 7214 }, { "epoch": 0.3153546920757026, "grad_norm": 2.4375, "learning_rate": 7.744387826513726e-05, "loss": 2.0101, "step": 7215 }, { "epoch": 0.3153984002797325, "grad_norm": 2.0625, "learning_rate": 7.743813646534158e-05, "loss": 1.7057, "step": 7216 }, { "epoch": 0.3154421084837624, "grad_norm": 1.9765625, "learning_rate": 7.743239414774832e-05, "loss": 1.9942, "step": 7217 }, { "epoch": 0.3154858166877923, "grad_norm": 1.90625, "learning_rate": 7.742665131246587e-05, "loss": 1.5873, "step": 7218 }, { "epoch": 0.3155295248918222, "grad_norm": 2.234375, "learning_rate": 7.742090795960259e-05, "loss": 2.4747, "step": 7219 }, { "epoch": 0.3155732330958521, "grad_norm": 1.9140625, "learning_rate": 7.741516408926686e-05, "loss": 1.9626, "step": 7220 }, { "epoch": 0.315616941299882, "grad_norm": 2.203125, "learning_rate": 7.740941970156707e-05, "loss": 1.7351, "step": 7221 }, { "epoch": 0.31566064950391187, "grad_norm": 1.9609375, "learning_rate": 7.740367479661166e-05, "loss": 1.2704, "step": 7222 }, { "epoch": 0.3157043577079418, "grad_norm": 2.234375, "learning_rate": 7.739792937450901e-05, "loss": 1.7378, "step": 7223 }, { "epoch": 0.3157480659119717, "grad_norm": 2.421875, "learning_rate": 7.739218343536757e-05, "loss": 2.3897, "step": 7224 }, { "epoch": 0.31579177411600157, "grad_norm": 2.28125, "learning_rate": 7.738643697929575e-05, "loss": 2.4272, "step": 7225 }, { "epoch": 0.31583548232003145, "grad_norm": 2.828125, "learning_rate": 7.7380690006402e-05, "loss": 2.2493, "step": 7226 }, { "epoch": 0.3158791905240614, "grad_norm": 2.40625, "learning_rate": 7.737494251679479e-05, "loss": 2.0007, "step": 7227 }, { "epoch": 0.31592289872809126, "grad_norm": 2.0625, "learning_rate": 7.736919451058258e-05, "loss": 1.9244, "step": 7228 }, { "epoch": 0.31596660693212114, "grad_norm": 2.953125, "learning_rate": 7.736344598787381e-05, "loss": 1.9202, "step": 7229 }, { "epoch": 0.3160103151361511, "grad_norm": 2.15625, "learning_rate": 7.735769694877701e-05, "loss": 1.7687, "step": 7230 }, { "epoch": 0.31605402334018096, "grad_norm": 1.9765625, "learning_rate": 7.735194739340064e-05, "loss": 1.5402, "step": 7231 }, { "epoch": 0.31609773154421084, "grad_norm": 2.0625, "learning_rate": 7.734619732185322e-05, "loss": 1.9211, "step": 7232 }, { "epoch": 0.31614143974824077, "grad_norm": 2.328125, "learning_rate": 7.734044673424325e-05, "loss": 2.1806, "step": 7233 }, { "epoch": 0.31618514795227065, "grad_norm": 2.046875, "learning_rate": 7.733469563067928e-05, "loss": 1.5808, "step": 7234 }, { "epoch": 0.31622885615630053, "grad_norm": 2.5, "learning_rate": 7.73289440112698e-05, "loss": 2.3715, "step": 7235 }, { "epoch": 0.3162725643603304, "grad_norm": 2.328125, "learning_rate": 7.732319187612335e-05, "loss": 2.3467, "step": 7236 }, { "epoch": 0.31631627256436035, "grad_norm": 2.1875, "learning_rate": 7.731743922534853e-05, "loss": 1.9838, "step": 7237 }, { "epoch": 0.3163599807683902, "grad_norm": 2.234375, "learning_rate": 7.731168605905388e-05, "loss": 2.6055, "step": 7238 }, { "epoch": 0.3164036889724201, "grad_norm": 2.890625, "learning_rate": 7.730593237734796e-05, "loss": 3.3343, "step": 7239 }, { "epoch": 0.31644739717645004, "grad_norm": 1.9921875, "learning_rate": 7.730017818033935e-05, "loss": 2.0111, "step": 7240 }, { "epoch": 0.3164911053804799, "grad_norm": 2.390625, "learning_rate": 7.729442346813662e-05, "loss": 2.0082, "step": 7241 }, { "epoch": 0.3165348135845098, "grad_norm": 1.9921875, "learning_rate": 7.728866824084842e-05, "loss": 2.1414, "step": 7242 }, { "epoch": 0.31657852178853974, "grad_norm": 2.15625, "learning_rate": 7.728291249858332e-05, "loss": 1.7971, "step": 7243 }, { "epoch": 0.3166222299925696, "grad_norm": 2.03125, "learning_rate": 7.727715624144998e-05, "loss": 2.4157, "step": 7244 }, { "epoch": 0.3166659381965995, "grad_norm": 2.046875, "learning_rate": 7.727139946955697e-05, "loss": 1.6075, "step": 7245 }, { "epoch": 0.3167096464006294, "grad_norm": 2.078125, "learning_rate": 7.726564218301297e-05, "loss": 2.1659, "step": 7246 }, { "epoch": 0.3167533546046593, "grad_norm": 2.703125, "learning_rate": 7.725988438192662e-05, "loss": 1.6702, "step": 7247 }, { "epoch": 0.3167970628086892, "grad_norm": 2.28125, "learning_rate": 7.725412606640658e-05, "loss": 1.9162, "step": 7248 }, { "epoch": 0.31684077101271907, "grad_norm": 1.984375, "learning_rate": 7.724836723656153e-05, "loss": 1.8255, "step": 7249 }, { "epoch": 0.316884479216749, "grad_norm": 2.15625, "learning_rate": 7.724260789250011e-05, "loss": 1.9185, "step": 7250 }, { "epoch": 0.3169281874207789, "grad_norm": 2.03125, "learning_rate": 7.723684803433102e-05, "loss": 1.7484, "step": 7251 }, { "epoch": 0.31697189562480876, "grad_norm": 1.9921875, "learning_rate": 7.723108766216298e-05, "loss": 1.7841, "step": 7252 }, { "epoch": 0.3170156038288387, "grad_norm": 2.328125, "learning_rate": 7.72253267761047e-05, "loss": 3.1336, "step": 7253 }, { "epoch": 0.3170593120328686, "grad_norm": 2.40625, "learning_rate": 7.721956537626487e-05, "loss": 1.9293, "step": 7254 }, { "epoch": 0.31710302023689846, "grad_norm": 2.09375, "learning_rate": 7.721380346275222e-05, "loss": 1.7513, "step": 7255 }, { "epoch": 0.31714672844092834, "grad_norm": 2.765625, "learning_rate": 7.720804103567546e-05, "loss": 2.3873, "step": 7256 }, { "epoch": 0.3171904366449583, "grad_norm": 2.1875, "learning_rate": 7.720227809514343e-05, "loss": 1.8492, "step": 7257 }, { "epoch": 0.31723414484898815, "grad_norm": 3.34375, "learning_rate": 7.719651464126475e-05, "loss": 2.1024, "step": 7258 }, { "epoch": 0.31727785305301803, "grad_norm": 1.859375, "learning_rate": 7.719075067414831e-05, "loss": 1.4269, "step": 7259 }, { "epoch": 0.31732156125704797, "grad_norm": 2.015625, "learning_rate": 7.718498619390283e-05, "loss": 2.1015, "step": 7260 }, { "epoch": 0.31736526946107785, "grad_norm": 2.0625, "learning_rate": 7.717922120063706e-05, "loss": 1.6376, "step": 7261 }, { "epoch": 0.3174089776651077, "grad_norm": 2.484375, "learning_rate": 7.717345569445986e-05, "loss": 1.0115, "step": 7262 }, { "epoch": 0.31745268586913766, "grad_norm": 2.203125, "learning_rate": 7.716768967547998e-05, "loss": 2.3164, "step": 7263 }, { "epoch": 0.31749639407316754, "grad_norm": 2.21875, "learning_rate": 7.716192314380626e-05, "loss": 1.6065, "step": 7264 }, { "epoch": 0.3175401022771974, "grad_norm": 2.140625, "learning_rate": 7.715615609954752e-05, "loss": 2.0752, "step": 7265 }, { "epoch": 0.3175838104812273, "grad_norm": 2.671875, "learning_rate": 7.71503885428126e-05, "loss": 1.8567, "step": 7266 }, { "epoch": 0.31762751868525724, "grad_norm": 2.390625, "learning_rate": 7.714462047371031e-05, "loss": 2.532, "step": 7267 }, { "epoch": 0.3176712268892871, "grad_norm": 2.125, "learning_rate": 7.713885189234956e-05, "loss": 1.9574, "step": 7268 }, { "epoch": 0.317714935093317, "grad_norm": 2.453125, "learning_rate": 7.713308279883915e-05, "loss": 2.0208, "step": 7269 }, { "epoch": 0.31775864329734693, "grad_norm": 2.53125, "learning_rate": 7.712731319328798e-05, "loss": 1.8329, "step": 7270 }, { "epoch": 0.3178023515013768, "grad_norm": 2.40625, "learning_rate": 7.712154307580493e-05, "loss": 2.0871, "step": 7271 }, { "epoch": 0.3178460597054067, "grad_norm": 2.09375, "learning_rate": 7.711577244649888e-05, "loss": 2.0626, "step": 7272 }, { "epoch": 0.3178897679094366, "grad_norm": 2.546875, "learning_rate": 7.711000130547875e-05, "loss": 2.0854, "step": 7273 }, { "epoch": 0.3179334761134665, "grad_norm": 1.953125, "learning_rate": 7.710422965285344e-05, "loss": 1.7566, "step": 7274 }, { "epoch": 0.3179771843174964, "grad_norm": 1.8203125, "learning_rate": 7.709845748873187e-05, "loss": 1.6372, "step": 7275 }, { "epoch": 0.31802089252152627, "grad_norm": 2.859375, "learning_rate": 7.709268481322296e-05, "loss": 1.9832, "step": 7276 }, { "epoch": 0.3180646007255562, "grad_norm": 2.515625, "learning_rate": 7.708691162643565e-05, "loss": 1.6387, "step": 7277 }, { "epoch": 0.3181083089295861, "grad_norm": 2.1875, "learning_rate": 7.70811379284789e-05, "loss": 2.0553, "step": 7278 }, { "epoch": 0.31815201713361596, "grad_norm": 1.90625, "learning_rate": 7.707536371946167e-05, "loss": 1.6551, "step": 7279 }, { "epoch": 0.3181957253376459, "grad_norm": 2.046875, "learning_rate": 7.706958899949293e-05, "loss": 1.7686, "step": 7280 }, { "epoch": 0.3182394335416758, "grad_norm": 2.1875, "learning_rate": 7.706381376868162e-05, "loss": 1.6572, "step": 7281 }, { "epoch": 0.31828314174570566, "grad_norm": 3.921875, "learning_rate": 7.705803802713677e-05, "loss": 2.0216, "step": 7282 }, { "epoch": 0.3183268499497356, "grad_norm": 3.296875, "learning_rate": 7.705226177496736e-05, "loss": 2.2726, "step": 7283 }, { "epoch": 0.31837055815376547, "grad_norm": 2.875, "learning_rate": 7.70464850122824e-05, "loss": 2.1551, "step": 7284 }, { "epoch": 0.31841426635779535, "grad_norm": 1.8515625, "learning_rate": 7.70407077391909e-05, "loss": 1.7063, "step": 7285 }, { "epoch": 0.31845797456182523, "grad_norm": 1.7890625, "learning_rate": 7.703492995580188e-05, "loss": 1.7317, "step": 7286 }, { "epoch": 0.31850168276585517, "grad_norm": 2.125, "learning_rate": 7.70291516622244e-05, "loss": 1.7335, "step": 7287 }, { "epoch": 0.31854539096988504, "grad_norm": 2.09375, "learning_rate": 7.702337285856748e-05, "loss": 1.8964, "step": 7288 }, { "epoch": 0.3185890991739149, "grad_norm": 2.703125, "learning_rate": 7.701759354494018e-05, "loss": 1.7829, "step": 7289 }, { "epoch": 0.31863280737794486, "grad_norm": 2.0625, "learning_rate": 7.701181372145159e-05, "loss": 1.8129, "step": 7290 }, { "epoch": 0.31867651558197474, "grad_norm": 3.6875, "learning_rate": 7.700603338821074e-05, "loss": 2.084, "step": 7291 }, { "epoch": 0.3187202237860046, "grad_norm": 2.203125, "learning_rate": 7.700025254532673e-05, "loss": 2.0998, "step": 7292 }, { "epoch": 0.31876393199003455, "grad_norm": 2.140625, "learning_rate": 7.699447119290867e-05, "loss": 2.103, "step": 7293 }, { "epoch": 0.31880764019406443, "grad_norm": 2.71875, "learning_rate": 7.698868933106565e-05, "loss": 2.3017, "step": 7294 }, { "epoch": 0.3188513483980943, "grad_norm": 2.296875, "learning_rate": 7.698290695990677e-05, "loss": 2.3595, "step": 7295 }, { "epoch": 0.3188950566021242, "grad_norm": 2.453125, "learning_rate": 7.697712407954119e-05, "loss": 2.0153, "step": 7296 }, { "epoch": 0.31893876480615413, "grad_norm": 2.71875, "learning_rate": 7.697134069007799e-05, "loss": 2.3144, "step": 7297 }, { "epoch": 0.318982473010184, "grad_norm": 2.5, "learning_rate": 7.696555679162635e-05, "loss": 2.4251, "step": 7298 }, { "epoch": 0.3190261812142139, "grad_norm": 1.984375, "learning_rate": 7.695977238429539e-05, "loss": 2.1105, "step": 7299 }, { "epoch": 0.3190698894182438, "grad_norm": 2.0625, "learning_rate": 7.695398746819431e-05, "loss": 2.094, "step": 7300 }, { "epoch": 0.3191135976222737, "grad_norm": 2.078125, "learning_rate": 7.694820204343223e-05, "loss": 1.9124, "step": 7301 }, { "epoch": 0.3191573058263036, "grad_norm": 2.046875, "learning_rate": 7.694241611011838e-05, "loss": 1.5197, "step": 7302 }, { "epoch": 0.3192010140303335, "grad_norm": 2.734375, "learning_rate": 7.693662966836191e-05, "loss": 1.8983, "step": 7303 }, { "epoch": 0.3192447222343634, "grad_norm": 2.421875, "learning_rate": 7.693084271827205e-05, "loss": 1.5317, "step": 7304 }, { "epoch": 0.3192884304383933, "grad_norm": 2.296875, "learning_rate": 7.692505525995799e-05, "loss": 2.3226, "step": 7305 }, { "epoch": 0.31933213864242316, "grad_norm": 2.046875, "learning_rate": 7.691926729352894e-05, "loss": 1.7196, "step": 7306 }, { "epoch": 0.3193758468464531, "grad_norm": 2.703125, "learning_rate": 7.691347881909412e-05, "loss": 2.3532, "step": 7307 }, { "epoch": 0.319419555050483, "grad_norm": 1.9921875, "learning_rate": 7.690768983676281e-05, "loss": 1.8094, "step": 7308 }, { "epoch": 0.31946326325451285, "grad_norm": 1.90625, "learning_rate": 7.690190034664423e-05, "loss": 2.125, "step": 7309 }, { "epoch": 0.3195069714585428, "grad_norm": 10.0, "learning_rate": 7.689611034884763e-05, "loss": 2.7774, "step": 7310 }, { "epoch": 0.31955067966257267, "grad_norm": 2.09375, "learning_rate": 7.689031984348227e-05, "loss": 1.3217, "step": 7311 }, { "epoch": 0.31959438786660255, "grad_norm": 2.546875, "learning_rate": 7.688452883065745e-05, "loss": 1.6019, "step": 7312 }, { "epoch": 0.3196380960706325, "grad_norm": 2.34375, "learning_rate": 7.687873731048245e-05, "loss": 1.9432, "step": 7313 }, { "epoch": 0.31968180427466236, "grad_norm": 2.390625, "learning_rate": 7.687294528306655e-05, "loss": 2.1253, "step": 7314 }, { "epoch": 0.31972551247869224, "grad_norm": 2.109375, "learning_rate": 7.686715274851906e-05, "loss": 1.5275, "step": 7315 }, { "epoch": 0.3197692206827221, "grad_norm": 2.09375, "learning_rate": 7.68613597069493e-05, "loss": 2.0593, "step": 7316 }, { "epoch": 0.31981292888675206, "grad_norm": 1.9921875, "learning_rate": 7.685556615846657e-05, "loss": 1.4496, "step": 7317 }, { "epoch": 0.31985663709078194, "grad_norm": 3.28125, "learning_rate": 7.684977210318024e-05, "loss": 2.216, "step": 7318 }, { "epoch": 0.3199003452948118, "grad_norm": 1.8359375, "learning_rate": 7.684397754119964e-05, "loss": 1.4906, "step": 7319 }, { "epoch": 0.31994405349884175, "grad_norm": 2.296875, "learning_rate": 7.683818247263407e-05, "loss": 1.724, "step": 7320 }, { "epoch": 0.31998776170287163, "grad_norm": 2.734375, "learning_rate": 7.683238689759298e-05, "loss": 1.9679, "step": 7321 }, { "epoch": 0.3200314699069015, "grad_norm": 2.140625, "learning_rate": 7.682659081618567e-05, "loss": 1.907, "step": 7322 }, { "epoch": 0.32007517811093145, "grad_norm": 2.1875, "learning_rate": 7.682079422852156e-05, "loss": 2.0567, "step": 7323 }, { "epoch": 0.3201188863149613, "grad_norm": 2.546875, "learning_rate": 7.681499713471002e-05, "loss": 1.9915, "step": 7324 }, { "epoch": 0.3201625945189912, "grad_norm": 4.25, "learning_rate": 7.680919953486048e-05, "loss": 2.7251, "step": 7325 }, { "epoch": 0.3202063027230211, "grad_norm": 5.28125, "learning_rate": 7.680340142908231e-05, "loss": 2.0837, "step": 7326 }, { "epoch": 0.320250010927051, "grad_norm": 1.9921875, "learning_rate": 7.679760281748491e-05, "loss": 1.6771, "step": 7327 }, { "epoch": 0.3202937191310809, "grad_norm": 2.328125, "learning_rate": 7.67918037001778e-05, "loss": 2.085, "step": 7328 }, { "epoch": 0.3203374273351108, "grad_norm": 2.796875, "learning_rate": 7.678600407727032e-05, "loss": 1.6378, "step": 7329 }, { "epoch": 0.3203811355391407, "grad_norm": 2.25, "learning_rate": 7.678020394887197e-05, "loss": 2.031, "step": 7330 }, { "epoch": 0.3204248437431706, "grad_norm": 2.1875, "learning_rate": 7.67744033150922e-05, "loss": 1.9841, "step": 7331 }, { "epoch": 0.3204685519472005, "grad_norm": 2.390625, "learning_rate": 7.676860217604047e-05, "loss": 2.5098, "step": 7332 }, { "epoch": 0.3205122601512304, "grad_norm": 1.9921875, "learning_rate": 7.676280053182626e-05, "loss": 1.766, "step": 7333 }, { "epoch": 0.3205559683552603, "grad_norm": 2.140625, "learning_rate": 7.675699838255905e-05, "loss": 2.0977, "step": 7334 }, { "epoch": 0.32059967655929017, "grad_norm": 2.59375, "learning_rate": 7.675119572834835e-05, "loss": 2.1568, "step": 7335 }, { "epoch": 0.32064338476332005, "grad_norm": 2.421875, "learning_rate": 7.674539256930363e-05, "loss": 1.7092, "step": 7336 }, { "epoch": 0.32068709296735, "grad_norm": 1.8359375, "learning_rate": 7.673958890553443e-05, "loss": 1.755, "step": 7337 }, { "epoch": 0.32073080117137986, "grad_norm": 1.7734375, "learning_rate": 7.673378473715027e-05, "loss": 1.5277, "step": 7338 }, { "epoch": 0.32077450937540974, "grad_norm": 8.5, "learning_rate": 7.672798006426069e-05, "loss": 2.4689, "step": 7339 }, { "epoch": 0.3208182175794397, "grad_norm": 2.171875, "learning_rate": 7.672217488697522e-05, "loss": 1.7042, "step": 7340 }, { "epoch": 0.32086192578346956, "grad_norm": 2.125, "learning_rate": 7.671636920540342e-05, "loss": 2.0158, "step": 7341 }, { "epoch": 0.32090563398749944, "grad_norm": 1.9140625, "learning_rate": 7.671056301965484e-05, "loss": 1.4584, "step": 7342 }, { "epoch": 0.3209493421915294, "grad_norm": 11.25, "learning_rate": 7.670475632983909e-05, "loss": 1.9689, "step": 7343 }, { "epoch": 0.32099305039555925, "grad_norm": 2.1875, "learning_rate": 7.669894913606568e-05, "loss": 1.9686, "step": 7344 }, { "epoch": 0.32103675859958913, "grad_norm": 1.984375, "learning_rate": 7.669314143844428e-05, "loss": 1.7838, "step": 7345 }, { "epoch": 0.321080466803619, "grad_norm": 3.078125, "learning_rate": 7.668733323708443e-05, "loss": 1.416, "step": 7346 }, { "epoch": 0.32112417500764895, "grad_norm": 2.34375, "learning_rate": 7.668152453209576e-05, "loss": 2.0266, "step": 7347 }, { "epoch": 0.32116788321167883, "grad_norm": 1.9765625, "learning_rate": 7.66757153235879e-05, "loss": 1.6594, "step": 7348 }, { "epoch": 0.3212115914157087, "grad_norm": 1.9296875, "learning_rate": 7.666990561167046e-05, "loss": 1.8492, "step": 7349 }, { "epoch": 0.32125529961973864, "grad_norm": 2.09375, "learning_rate": 7.666409539645308e-05, "loss": 1.8732, "step": 7350 }, { "epoch": 0.3212990078237685, "grad_norm": 2.5, "learning_rate": 7.665828467804542e-05, "loss": 2.4108, "step": 7351 }, { "epoch": 0.3213427160277984, "grad_norm": 2.09375, "learning_rate": 7.665247345655713e-05, "loss": 1.5882, "step": 7352 }, { "epoch": 0.32138642423182834, "grad_norm": 2.578125, "learning_rate": 7.664666173209787e-05, "loss": 1.9882, "step": 7353 }, { "epoch": 0.3214301324358582, "grad_norm": 2.203125, "learning_rate": 7.664084950477731e-05, "loss": 1.8138, "step": 7354 }, { "epoch": 0.3214738406398881, "grad_norm": 2.640625, "learning_rate": 7.663503677470516e-05, "loss": 2.143, "step": 7355 }, { "epoch": 0.321517548843918, "grad_norm": 2.203125, "learning_rate": 7.66292235419911e-05, "loss": 1.7407, "step": 7356 }, { "epoch": 0.3215612570479479, "grad_norm": 1.9609375, "learning_rate": 7.662340980674483e-05, "loss": 1.5849, "step": 7357 }, { "epoch": 0.3216049652519778, "grad_norm": 3.03125, "learning_rate": 7.661759556907607e-05, "loss": 2.7416, "step": 7358 }, { "epoch": 0.32164867345600767, "grad_norm": 2.1875, "learning_rate": 7.661178082909455e-05, "loss": 1.7122, "step": 7359 }, { "epoch": 0.3216923816600376, "grad_norm": 2.21875, "learning_rate": 7.660596558690998e-05, "loss": 2.0861, "step": 7360 }, { "epoch": 0.3217360898640675, "grad_norm": 2.6875, "learning_rate": 7.660014984263214e-05, "loss": 2.1306, "step": 7361 }, { "epoch": 0.32177979806809737, "grad_norm": 2.03125, "learning_rate": 7.659433359637072e-05, "loss": 1.573, "step": 7362 }, { "epoch": 0.3218235062721273, "grad_norm": 2.53125, "learning_rate": 7.658851684823553e-05, "loss": 1.3277, "step": 7363 }, { "epoch": 0.3218672144761572, "grad_norm": 2.03125, "learning_rate": 7.658269959833635e-05, "loss": 1.7538, "step": 7364 }, { "epoch": 0.32191092268018706, "grad_norm": 2.4375, "learning_rate": 7.657688184678293e-05, "loss": 2.4755, "step": 7365 }, { "epoch": 0.32195463088421694, "grad_norm": 2.21875, "learning_rate": 7.657106359368507e-05, "loss": 1.7868, "step": 7366 }, { "epoch": 0.3219983390882469, "grad_norm": 1.90625, "learning_rate": 7.656524483915256e-05, "loss": 1.634, "step": 7367 }, { "epoch": 0.32204204729227676, "grad_norm": 1.765625, "learning_rate": 7.655942558329523e-05, "loss": 1.6302, "step": 7368 }, { "epoch": 0.32208575549630664, "grad_norm": 2.109375, "learning_rate": 7.655360582622286e-05, "loss": 1.8055, "step": 7369 }, { "epoch": 0.32212946370033657, "grad_norm": 2.390625, "learning_rate": 7.654778556804533e-05, "loss": 2.1191, "step": 7370 }, { "epoch": 0.32217317190436645, "grad_norm": 1.9296875, "learning_rate": 7.654196480887244e-05, "loss": 1.8245, "step": 7371 }, { "epoch": 0.32221688010839633, "grad_norm": 2.1875, "learning_rate": 7.653614354881402e-05, "loss": 1.6235, "step": 7372 }, { "epoch": 0.32226058831242627, "grad_norm": 2.578125, "learning_rate": 7.653032178797996e-05, "loss": 2.6314, "step": 7373 }, { "epoch": 0.32230429651645615, "grad_norm": 3.3125, "learning_rate": 7.652449952648013e-05, "loss": 3.2089, "step": 7374 }, { "epoch": 0.322348004720486, "grad_norm": 2.015625, "learning_rate": 7.65186767644244e-05, "loss": 1.5702, "step": 7375 }, { "epoch": 0.3223917129245159, "grad_norm": 2.234375, "learning_rate": 7.651285350192261e-05, "loss": 1.951, "step": 7376 }, { "epoch": 0.32243542112854584, "grad_norm": 2.421875, "learning_rate": 7.650702973908471e-05, "loss": 1.9708, "step": 7377 }, { "epoch": 0.3224791293325757, "grad_norm": 2.046875, "learning_rate": 7.650120547602056e-05, "loss": 1.8858, "step": 7378 }, { "epoch": 0.3225228375366056, "grad_norm": 1.8515625, "learning_rate": 7.64953807128401e-05, "loss": 1.9008, "step": 7379 }, { "epoch": 0.32256654574063554, "grad_norm": 1.8203125, "learning_rate": 7.648955544965326e-05, "loss": 1.4871, "step": 7380 }, { "epoch": 0.3226102539446654, "grad_norm": 2.0625, "learning_rate": 7.648372968656993e-05, "loss": 1.5375, "step": 7381 }, { "epoch": 0.3226539621486953, "grad_norm": 2.484375, "learning_rate": 7.647790342370009e-05, "loss": 3.0121, "step": 7382 }, { "epoch": 0.32269767035272523, "grad_norm": 3.15625, "learning_rate": 7.647207666115368e-05, "loss": 1.7063, "step": 7383 }, { "epoch": 0.3227413785567551, "grad_norm": 2.3125, "learning_rate": 7.646624939904064e-05, "loss": 2.0754, "step": 7384 }, { "epoch": 0.322785086760785, "grad_norm": 2.1875, "learning_rate": 7.646042163747097e-05, "loss": 1.5163, "step": 7385 }, { "epoch": 0.32282879496481487, "grad_norm": 2.5625, "learning_rate": 7.645459337655463e-05, "loss": 2.555, "step": 7386 }, { "epoch": 0.3228725031688448, "grad_norm": 2.625, "learning_rate": 7.644876461640158e-05, "loss": 2.0501, "step": 7387 }, { "epoch": 0.3229162113728747, "grad_norm": 2.109375, "learning_rate": 7.644293535712189e-05, "loss": 1.6365, "step": 7388 }, { "epoch": 0.32295991957690456, "grad_norm": 3.796875, "learning_rate": 7.643710559882551e-05, "loss": 2.3091, "step": 7389 }, { "epoch": 0.3230036277809345, "grad_norm": 2.28125, "learning_rate": 7.643127534162247e-05, "loss": 2.0301, "step": 7390 }, { "epoch": 0.3230473359849644, "grad_norm": 2.96875, "learning_rate": 7.642544458562278e-05, "loss": 2.1748, "step": 7391 }, { "epoch": 0.32309104418899426, "grad_norm": 1.8828125, "learning_rate": 7.64196133309365e-05, "loss": 1.9963, "step": 7392 }, { "epoch": 0.3231347523930242, "grad_norm": 2.109375, "learning_rate": 7.641378157767368e-05, "loss": 1.8105, "step": 7393 }, { "epoch": 0.3231784605970541, "grad_norm": 2.59375, "learning_rate": 7.640794932594433e-05, "loss": 1.945, "step": 7394 }, { "epoch": 0.32322216880108395, "grad_norm": 2.046875, "learning_rate": 7.640211657585856e-05, "loss": 1.8537, "step": 7395 }, { "epoch": 0.32326587700511383, "grad_norm": 1.8984375, "learning_rate": 7.639628332752642e-05, "loss": 2.0916, "step": 7396 }, { "epoch": 0.32330958520914377, "grad_norm": 1.984375, "learning_rate": 7.639044958105799e-05, "loss": 1.7224, "step": 7397 }, { "epoch": 0.32335329341317365, "grad_norm": 2.390625, "learning_rate": 7.638461533656338e-05, "loss": 1.7498, "step": 7398 }, { "epoch": 0.32339700161720353, "grad_norm": 2.109375, "learning_rate": 7.637878059415266e-05, "loss": 1.8257, "step": 7399 }, { "epoch": 0.32344070982123346, "grad_norm": 2.328125, "learning_rate": 7.637294535393598e-05, "loss": 1.925, "step": 7400 }, { "epoch": 0.32348441802526334, "grad_norm": 2.625, "learning_rate": 7.636710961602341e-05, "loss": 2.1079, "step": 7401 }, { "epoch": 0.3235281262292932, "grad_norm": 1.78125, "learning_rate": 7.636127338052512e-05, "loss": 1.7199, "step": 7402 }, { "epoch": 0.32357183443332316, "grad_norm": 2.8125, "learning_rate": 7.635543664755124e-05, "loss": 1.651, "step": 7403 }, { "epoch": 0.32361554263735304, "grad_norm": 2.484375, "learning_rate": 7.634959941721191e-05, "loss": 2.3902, "step": 7404 }, { "epoch": 0.3236592508413829, "grad_norm": 2.203125, "learning_rate": 7.634376168961729e-05, "loss": 2.8142, "step": 7405 }, { "epoch": 0.3237029590454128, "grad_norm": 1.9921875, "learning_rate": 7.633792346487754e-05, "loss": 1.6277, "step": 7406 }, { "epoch": 0.32374666724944273, "grad_norm": 2.265625, "learning_rate": 7.633208474310283e-05, "loss": 1.9859, "step": 7407 }, { "epoch": 0.3237903754534726, "grad_norm": 2.203125, "learning_rate": 7.632624552440337e-05, "loss": 1.6204, "step": 7408 }, { "epoch": 0.3238340836575025, "grad_norm": 1.75, "learning_rate": 7.632040580888936e-05, "loss": 1.6116, "step": 7409 }, { "epoch": 0.3238777918615324, "grad_norm": 2.296875, "learning_rate": 7.631456559667095e-05, "loss": 1.6195, "step": 7410 }, { "epoch": 0.3239215000655623, "grad_norm": 1.9140625, "learning_rate": 7.630872488785841e-05, "loss": 2.1594, "step": 7411 }, { "epoch": 0.3239652082695922, "grad_norm": 1.9921875, "learning_rate": 7.630288368256193e-05, "loss": 1.6741, "step": 7412 }, { "epoch": 0.3240089164736221, "grad_norm": 1.9609375, "learning_rate": 7.629704198089175e-05, "loss": 1.9449, "step": 7413 }, { "epoch": 0.324052624677652, "grad_norm": 1.8671875, "learning_rate": 7.629119978295811e-05, "loss": 1.716, "step": 7414 }, { "epoch": 0.3240963328816819, "grad_norm": 2.5, "learning_rate": 7.62853570888713e-05, "loss": 1.6255, "step": 7415 }, { "epoch": 0.32414004108571176, "grad_norm": 1.8515625, "learning_rate": 7.62795138987415e-05, "loss": 1.7113, "step": 7416 }, { "epoch": 0.3241837492897417, "grad_norm": 1.890625, "learning_rate": 7.627367021267906e-05, "loss": 1.3831, "step": 7417 }, { "epoch": 0.3242274574937716, "grad_norm": 2.03125, "learning_rate": 7.626782603079421e-05, "loss": 1.4712, "step": 7418 }, { "epoch": 0.32427116569780146, "grad_norm": 1.8046875, "learning_rate": 7.626198135319724e-05, "loss": 1.3958, "step": 7419 }, { "epoch": 0.3243148739018314, "grad_norm": 2.125, "learning_rate": 7.625613617999847e-05, "loss": 1.7865, "step": 7420 }, { "epoch": 0.32435858210586127, "grad_norm": 5.65625, "learning_rate": 7.62502905113082e-05, "loss": 1.3227, "step": 7421 }, { "epoch": 0.32440229030989115, "grad_norm": 1.8125, "learning_rate": 7.624444434723674e-05, "loss": 1.6127, "step": 7422 }, { "epoch": 0.3244459985139211, "grad_norm": 2.046875, "learning_rate": 7.623859768789441e-05, "loss": 1.8083, "step": 7423 }, { "epoch": 0.32448970671795097, "grad_norm": 1.8515625, "learning_rate": 7.623275053339156e-05, "loss": 1.8015, "step": 7424 }, { "epoch": 0.32453341492198085, "grad_norm": 3.734375, "learning_rate": 7.622690288383853e-05, "loss": 1.9107, "step": 7425 }, { "epoch": 0.3245771231260107, "grad_norm": 2.234375, "learning_rate": 7.62210547393457e-05, "loss": 2.0738, "step": 7426 }, { "epoch": 0.32462083133004066, "grad_norm": 2.109375, "learning_rate": 7.621520610002335e-05, "loss": 1.9741, "step": 7427 }, { "epoch": 0.32466453953407054, "grad_norm": 2.21875, "learning_rate": 7.620935696598192e-05, "loss": 1.5479, "step": 7428 }, { "epoch": 0.3247082477381004, "grad_norm": 2.15625, "learning_rate": 7.620350733733179e-05, "loss": 1.9824, "step": 7429 }, { "epoch": 0.32475195594213035, "grad_norm": 2.078125, "learning_rate": 7.619765721418335e-05, "loss": 1.6375, "step": 7430 }, { "epoch": 0.32479566414616023, "grad_norm": 2.15625, "learning_rate": 7.619180659664698e-05, "loss": 2.0936, "step": 7431 }, { "epoch": 0.3248393723501901, "grad_norm": 1.984375, "learning_rate": 7.618595548483309e-05, "loss": 2.2806, "step": 7432 }, { "epoch": 0.32488308055422005, "grad_norm": 2.21875, "learning_rate": 7.61801038788521e-05, "loss": 1.6695, "step": 7433 }, { "epoch": 0.32492678875824993, "grad_norm": 2.09375, "learning_rate": 7.617425177881446e-05, "loss": 1.7873, "step": 7434 }, { "epoch": 0.3249704969622798, "grad_norm": 2.46875, "learning_rate": 7.616839918483061e-05, "loss": 1.407, "step": 7435 }, { "epoch": 0.3250142051663097, "grad_norm": 3.46875, "learning_rate": 7.616254609701096e-05, "loss": 2.6228, "step": 7436 }, { "epoch": 0.3250579133703396, "grad_norm": 3.46875, "learning_rate": 7.6156692515466e-05, "loss": 1.7178, "step": 7437 }, { "epoch": 0.3251016215743695, "grad_norm": 2.15625, "learning_rate": 7.615083844030618e-05, "loss": 2.5461, "step": 7438 }, { "epoch": 0.3251453297783994, "grad_norm": 1.984375, "learning_rate": 7.614498387164198e-05, "loss": 1.7624, "step": 7439 }, { "epoch": 0.3251890379824293, "grad_norm": 2.4375, "learning_rate": 7.613912880958386e-05, "loss": 1.7311, "step": 7440 }, { "epoch": 0.3252327461864592, "grad_norm": 2.171875, "learning_rate": 7.613327325424235e-05, "loss": 1.9983, "step": 7441 }, { "epoch": 0.3252764543904891, "grad_norm": 2.359375, "learning_rate": 7.612741720572794e-05, "loss": 1.957, "step": 7442 }, { "epoch": 0.325320162594519, "grad_norm": 2.375, "learning_rate": 7.612156066415113e-05, "loss": 1.513, "step": 7443 }, { "epoch": 0.3253638707985489, "grad_norm": 2.28125, "learning_rate": 7.611570362962248e-05, "loss": 2.1919, "step": 7444 }, { "epoch": 0.3254075790025788, "grad_norm": 1.8125, "learning_rate": 7.610984610225247e-05, "loss": 1.4103, "step": 7445 }, { "epoch": 0.3254512872066087, "grad_norm": 2.265625, "learning_rate": 7.610398808215166e-05, "loss": 2.1165, "step": 7446 }, { "epoch": 0.3254949954106386, "grad_norm": 1.9296875, "learning_rate": 7.609812956943063e-05, "loss": 1.7011, "step": 7447 }, { "epoch": 0.32553870361466847, "grad_norm": 2.203125, "learning_rate": 7.609227056419989e-05, "loss": 1.6094, "step": 7448 }, { "epoch": 0.32558241181869835, "grad_norm": 2.65625, "learning_rate": 7.608641106657001e-05, "loss": 2.8938, "step": 7449 }, { "epoch": 0.3256261200227283, "grad_norm": 1.9765625, "learning_rate": 7.608055107665161e-05, "loss": 1.9487, "step": 7450 }, { "epoch": 0.32566982822675816, "grad_norm": 2.46875, "learning_rate": 7.607469059455526e-05, "loss": 1.7912, "step": 7451 }, { "epoch": 0.32571353643078804, "grad_norm": 2.21875, "learning_rate": 7.606882962039154e-05, "loss": 1.9667, "step": 7452 }, { "epoch": 0.325757244634818, "grad_norm": 1.8046875, "learning_rate": 7.606296815427106e-05, "loss": 1.8214, "step": 7453 }, { "epoch": 0.32580095283884786, "grad_norm": 1.90625, "learning_rate": 7.605710619630444e-05, "loss": 2.1281, "step": 7454 }, { "epoch": 0.32584466104287774, "grad_norm": 2.5, "learning_rate": 7.605124374660231e-05, "loss": 1.7559, "step": 7455 }, { "epoch": 0.32588836924690767, "grad_norm": 2.40625, "learning_rate": 7.604538080527527e-05, "loss": 1.7102, "step": 7456 }, { "epoch": 0.32593207745093755, "grad_norm": 2.09375, "learning_rate": 7.603951737243402e-05, "loss": 1.8114, "step": 7457 }, { "epoch": 0.32597578565496743, "grad_norm": 1.8203125, "learning_rate": 7.603365344818916e-05, "loss": 0.9383, "step": 7458 }, { "epoch": 0.3260194938589973, "grad_norm": 1.9765625, "learning_rate": 7.602778903265137e-05, "loss": 2.0806, "step": 7459 }, { "epoch": 0.32606320206302725, "grad_norm": 2.046875, "learning_rate": 7.602192412593132e-05, "loss": 2.2013, "step": 7460 }, { "epoch": 0.3261069102670571, "grad_norm": 2.078125, "learning_rate": 7.601605872813969e-05, "loss": 2.2884, "step": 7461 }, { "epoch": 0.326150618471087, "grad_norm": 1.96875, "learning_rate": 7.601019283938717e-05, "loss": 2.3615, "step": 7462 }, { "epoch": 0.32619432667511694, "grad_norm": 2.125, "learning_rate": 7.600432645978444e-05, "loss": 1.6707, "step": 7463 }, { "epoch": 0.3262380348791468, "grad_norm": 2.359375, "learning_rate": 7.599845958944224e-05, "loss": 1.7099, "step": 7464 }, { "epoch": 0.3262817430831767, "grad_norm": 2.296875, "learning_rate": 7.599259222847127e-05, "loss": 1.9748, "step": 7465 }, { "epoch": 0.32632545128720664, "grad_norm": 1.9765625, "learning_rate": 7.598672437698224e-05, "loss": 1.7586, "step": 7466 }, { "epoch": 0.3263691594912365, "grad_norm": 2.359375, "learning_rate": 7.598085603508592e-05, "loss": 2.448, "step": 7467 }, { "epoch": 0.3264128676952664, "grad_norm": 2.15625, "learning_rate": 7.597498720289302e-05, "loss": 1.597, "step": 7468 }, { "epoch": 0.3264565758992963, "grad_norm": 1.953125, "learning_rate": 7.59691178805143e-05, "loss": 1.6784, "step": 7469 }, { "epoch": 0.3265002841033262, "grad_norm": 2.078125, "learning_rate": 7.596324806806052e-05, "loss": 1.8834, "step": 7470 }, { "epoch": 0.3265439923073561, "grad_norm": 2.34375, "learning_rate": 7.595737776564249e-05, "loss": 1.5806, "step": 7471 }, { "epoch": 0.32658770051138597, "grad_norm": 1.90625, "learning_rate": 7.595150697337095e-05, "loss": 1.6759, "step": 7472 }, { "epoch": 0.3266314087154159, "grad_norm": 2.390625, "learning_rate": 7.594563569135668e-05, "loss": 1.974, "step": 7473 }, { "epoch": 0.3266751169194458, "grad_norm": 2.625, "learning_rate": 7.593976391971054e-05, "loss": 1.97, "step": 7474 }, { "epoch": 0.32671882512347566, "grad_norm": 2.328125, "learning_rate": 7.593389165854329e-05, "loss": 2.3795, "step": 7475 }, { "epoch": 0.3267625333275056, "grad_norm": 2.171875, "learning_rate": 7.592801890796575e-05, "loss": 2.0789, "step": 7476 }, { "epoch": 0.3268062415315355, "grad_norm": 2.015625, "learning_rate": 7.592214566808877e-05, "loss": 1.7114, "step": 7477 }, { "epoch": 0.32684994973556536, "grad_norm": 2.296875, "learning_rate": 7.591627193902315e-05, "loss": 2.268, "step": 7478 }, { "epoch": 0.32689365793959524, "grad_norm": 2.09375, "learning_rate": 7.591039772087977e-05, "loss": 1.96, "step": 7479 }, { "epoch": 0.3269373661436252, "grad_norm": 3.4375, "learning_rate": 7.59045230137695e-05, "loss": 2.1119, "step": 7480 }, { "epoch": 0.32698107434765505, "grad_norm": 2.25, "learning_rate": 7.589864781780314e-05, "loss": 1.6881, "step": 7481 }, { "epoch": 0.32702478255168493, "grad_norm": 1.890625, "learning_rate": 7.589277213309163e-05, "loss": 1.9424, "step": 7482 }, { "epoch": 0.32706849075571487, "grad_norm": 1.8203125, "learning_rate": 7.58868959597458e-05, "loss": 1.2206, "step": 7483 }, { "epoch": 0.32711219895974475, "grad_norm": 2.203125, "learning_rate": 7.588101929787658e-05, "loss": 1.8753, "step": 7484 }, { "epoch": 0.32715590716377463, "grad_norm": 3.640625, "learning_rate": 7.587514214759487e-05, "loss": 1.7069, "step": 7485 }, { "epoch": 0.32719961536780456, "grad_norm": 3.1875, "learning_rate": 7.586926450901155e-05, "loss": 1.8258, "step": 7486 }, { "epoch": 0.32724332357183444, "grad_norm": 2.375, "learning_rate": 7.586338638223757e-05, "loss": 1.7908, "step": 7487 }, { "epoch": 0.3272870317758643, "grad_norm": 2.09375, "learning_rate": 7.585750776738383e-05, "loss": 2.2592, "step": 7488 }, { "epoch": 0.3273307399798942, "grad_norm": 1.875, "learning_rate": 7.58516286645613e-05, "loss": 1.6792, "step": 7489 }, { "epoch": 0.32737444818392414, "grad_norm": 2.609375, "learning_rate": 7.584574907388092e-05, "loss": 2.2359, "step": 7490 }, { "epoch": 0.327418156387954, "grad_norm": 2.09375, "learning_rate": 7.583986899545362e-05, "loss": 1.7483, "step": 7491 }, { "epoch": 0.3274618645919839, "grad_norm": 2.21875, "learning_rate": 7.58339884293904e-05, "loss": 1.6944, "step": 7492 }, { "epoch": 0.32750557279601383, "grad_norm": 2.78125, "learning_rate": 7.58281073758022e-05, "loss": 2.4001, "step": 7493 }, { "epoch": 0.3275492810000437, "grad_norm": 2.53125, "learning_rate": 7.58222258348e-05, "loss": 1.9724, "step": 7494 }, { "epoch": 0.3275929892040736, "grad_norm": 2.34375, "learning_rate": 7.581634380649488e-05, "loss": 1.8521, "step": 7495 }, { "epoch": 0.3276366974081035, "grad_norm": 2.21875, "learning_rate": 7.581046129099773e-05, "loss": 2.0944, "step": 7496 }, { "epoch": 0.3276804056121334, "grad_norm": 1.9296875, "learning_rate": 7.580457828841963e-05, "loss": 1.59, "step": 7497 }, { "epoch": 0.3277241138161633, "grad_norm": 2.5625, "learning_rate": 7.579869479887158e-05, "loss": 2.4358, "step": 7498 }, { "epoch": 0.32776782202019317, "grad_norm": 1.9765625, "learning_rate": 7.57928108224646e-05, "loss": 1.5962, "step": 7499 }, { "epoch": 0.3278115302242231, "grad_norm": 2.390625, "learning_rate": 7.578692635930975e-05, "loss": 1.5579, "step": 7500 }, { "epoch": 0.327855238428253, "grad_norm": 15.0, "learning_rate": 7.578104140951807e-05, "loss": 5.9767, "step": 7501 }, { "epoch": 0.32789894663228286, "grad_norm": 2.015625, "learning_rate": 7.577515597320062e-05, "loss": 2.0851, "step": 7502 }, { "epoch": 0.3279426548363128, "grad_norm": 2.15625, "learning_rate": 7.576927005046844e-05, "loss": 2.4982, "step": 7503 }, { "epoch": 0.3279863630403427, "grad_norm": 2.25, "learning_rate": 7.576338364143264e-05, "loss": 1.5677, "step": 7504 }, { "epoch": 0.32803007124437256, "grad_norm": 1.96875, "learning_rate": 7.575749674620431e-05, "loss": 1.7917, "step": 7505 }, { "epoch": 0.3280737794484025, "grad_norm": 1.9296875, "learning_rate": 7.575160936489452e-05, "loss": 1.7448, "step": 7506 }, { "epoch": 0.32811748765243237, "grad_norm": 2.703125, "learning_rate": 7.574572149761437e-05, "loss": 1.9557, "step": 7507 }, { "epoch": 0.32816119585646225, "grad_norm": 2.234375, "learning_rate": 7.573983314447499e-05, "loss": 1.9883, "step": 7508 }, { "epoch": 0.32820490406049213, "grad_norm": 2.203125, "learning_rate": 7.573394430558749e-05, "loss": 1.3002, "step": 7509 }, { "epoch": 0.32824861226452207, "grad_norm": 2.046875, "learning_rate": 7.572805498106301e-05, "loss": 2.2083, "step": 7510 }, { "epoch": 0.32829232046855195, "grad_norm": 1.9453125, "learning_rate": 7.57221651710127e-05, "loss": 1.7499, "step": 7511 }, { "epoch": 0.3283360286725818, "grad_norm": 1.921875, "learning_rate": 7.571627487554769e-05, "loss": 1.98, "step": 7512 }, { "epoch": 0.32837973687661176, "grad_norm": 2.171875, "learning_rate": 7.571038409477913e-05, "loss": 2.192, "step": 7513 }, { "epoch": 0.32842344508064164, "grad_norm": 2.75, "learning_rate": 7.570449282881822e-05, "loss": 1.4564, "step": 7514 }, { "epoch": 0.3284671532846715, "grad_norm": 2.21875, "learning_rate": 7.569860107777613e-05, "loss": 1.8013, "step": 7515 }, { "epoch": 0.32851086148870146, "grad_norm": 2.078125, "learning_rate": 7.5692708841764e-05, "loss": 2.2148, "step": 7516 }, { "epoch": 0.32855456969273134, "grad_norm": 2.0, "learning_rate": 7.56868161208931e-05, "loss": 1.547, "step": 7517 }, { "epoch": 0.3285982778967612, "grad_norm": 2.25, "learning_rate": 7.568092291527455e-05, "loss": 2.1837, "step": 7518 }, { "epoch": 0.3286419861007911, "grad_norm": 2.015625, "learning_rate": 7.567502922501963e-05, "loss": 2.1615, "step": 7519 }, { "epoch": 0.32868569430482103, "grad_norm": 1.8203125, "learning_rate": 7.566913505023956e-05, "loss": 1.499, "step": 7520 }, { "epoch": 0.3287294025088509, "grad_norm": 2.171875, "learning_rate": 7.566324039104553e-05, "loss": 2.5199, "step": 7521 }, { "epoch": 0.3287731107128808, "grad_norm": 1.8046875, "learning_rate": 7.565734524754882e-05, "loss": 1.5872, "step": 7522 }, { "epoch": 0.3288168189169107, "grad_norm": 2.140625, "learning_rate": 7.565144961986064e-05, "loss": 1.7103, "step": 7523 }, { "epoch": 0.3288605271209406, "grad_norm": 2.109375, "learning_rate": 7.564555350809226e-05, "loss": 2.1791, "step": 7524 }, { "epoch": 0.3289042353249705, "grad_norm": 2.515625, "learning_rate": 7.5639656912355e-05, "loss": 1.6546, "step": 7525 }, { "epoch": 0.3289479435290004, "grad_norm": 1.765625, "learning_rate": 7.563375983276008e-05, "loss": 1.3833, "step": 7526 }, { "epoch": 0.3289916517330303, "grad_norm": 2.171875, "learning_rate": 7.56278622694188e-05, "loss": 2.4949, "step": 7527 }, { "epoch": 0.3290353599370602, "grad_norm": 1.953125, "learning_rate": 7.562196422244245e-05, "loss": 1.7161, "step": 7528 }, { "epoch": 0.32907906814109006, "grad_norm": 1.9375, "learning_rate": 7.561606569194237e-05, "loss": 1.6621, "step": 7529 }, { "epoch": 0.32912277634512, "grad_norm": 2.171875, "learning_rate": 7.561016667802982e-05, "loss": 1.9472, "step": 7530 }, { "epoch": 0.3291664845491499, "grad_norm": 2.265625, "learning_rate": 7.560426718081617e-05, "loss": 2.2656, "step": 7531 }, { "epoch": 0.32921019275317975, "grad_norm": 2.375, "learning_rate": 7.559836720041274e-05, "loss": 1.8794, "step": 7532 }, { "epoch": 0.3292539009572097, "grad_norm": 2.375, "learning_rate": 7.559246673693085e-05, "loss": 2.2259, "step": 7533 }, { "epoch": 0.32929760916123957, "grad_norm": 2.234375, "learning_rate": 7.558656579048185e-05, "loss": 1.6449, "step": 7534 }, { "epoch": 0.32934131736526945, "grad_norm": 2.21875, "learning_rate": 7.558066436117715e-05, "loss": 2.2579, "step": 7535 }, { "epoch": 0.3293850255692994, "grad_norm": 4.875, "learning_rate": 7.557476244912805e-05, "loss": 2.4239, "step": 7536 }, { "epoch": 0.32942873377332926, "grad_norm": 1.8984375, "learning_rate": 7.556886005444597e-05, "loss": 1.8257, "step": 7537 }, { "epoch": 0.32947244197735914, "grad_norm": 2.765625, "learning_rate": 7.55629571772423e-05, "loss": 1.2687, "step": 7538 }, { "epoch": 0.329516150181389, "grad_norm": 2.21875, "learning_rate": 7.555705381762841e-05, "loss": 2.1488, "step": 7539 }, { "epoch": 0.32955985838541896, "grad_norm": 1.953125, "learning_rate": 7.555114997571572e-05, "loss": 1.7148, "step": 7540 }, { "epoch": 0.32960356658944884, "grad_norm": 1.9296875, "learning_rate": 7.554524565161565e-05, "loss": 1.781, "step": 7541 }, { "epoch": 0.3296472747934787, "grad_norm": 1.8984375, "learning_rate": 7.553934084543961e-05, "loss": 1.5344, "step": 7542 }, { "epoch": 0.32969098299750865, "grad_norm": 1.796875, "learning_rate": 7.553343555729903e-05, "loss": 1.5419, "step": 7543 }, { "epoch": 0.32973469120153853, "grad_norm": 3.828125, "learning_rate": 7.552752978730536e-05, "loss": 1.7144, "step": 7544 }, { "epoch": 0.3297783994055684, "grad_norm": 1.9296875, "learning_rate": 7.552162353557006e-05, "loss": 2.0587, "step": 7545 }, { "epoch": 0.32982210760959835, "grad_norm": 2.15625, "learning_rate": 7.551571680220457e-05, "loss": 1.6627, "step": 7546 }, { "epoch": 0.3298658158136282, "grad_norm": 1.75, "learning_rate": 7.550980958732037e-05, "loss": 1.9505, "step": 7547 }, { "epoch": 0.3299095240176581, "grad_norm": 2.421875, "learning_rate": 7.550390189102894e-05, "loss": 1.7785, "step": 7548 }, { "epoch": 0.329953232221688, "grad_norm": 1.8828125, "learning_rate": 7.549799371344175e-05, "loss": 1.792, "step": 7549 }, { "epoch": 0.3299969404257179, "grad_norm": 2.09375, "learning_rate": 7.549208505467033e-05, "loss": 1.8559, "step": 7550 }, { "epoch": 0.3300406486297478, "grad_norm": 2.0, "learning_rate": 7.548617591482614e-05, "loss": 1.8498, "step": 7551 }, { "epoch": 0.3300843568337777, "grad_norm": 2.0625, "learning_rate": 7.548026629402075e-05, "loss": 2.5397, "step": 7552 }, { "epoch": 0.3301280650378076, "grad_norm": 2.28125, "learning_rate": 7.547435619236562e-05, "loss": 1.9982, "step": 7553 }, { "epoch": 0.3301717732418375, "grad_norm": 2.734375, "learning_rate": 7.54684456099723e-05, "loss": 1.7053, "step": 7554 }, { "epoch": 0.3302154814458674, "grad_norm": 2.015625, "learning_rate": 7.546253454695237e-05, "loss": 1.6925, "step": 7555 }, { "epoch": 0.3302591896498973, "grad_norm": 1.9296875, "learning_rate": 7.545662300341736e-05, "loss": 1.8186, "step": 7556 }, { "epoch": 0.3303028978539272, "grad_norm": 2.6875, "learning_rate": 7.54507109794788e-05, "loss": 2.1328, "step": 7557 }, { "epoch": 0.33034660605795707, "grad_norm": 3.15625, "learning_rate": 7.544479847524829e-05, "loss": 2.549, "step": 7558 }, { "epoch": 0.33039031426198695, "grad_norm": 2.0625, "learning_rate": 7.54388854908374e-05, "loss": 2.1843, "step": 7559 }, { "epoch": 0.3304340224660169, "grad_norm": 2.0, "learning_rate": 7.543297202635772e-05, "loss": 2.3384, "step": 7560 }, { "epoch": 0.33047773067004677, "grad_norm": 2.578125, "learning_rate": 7.542705808192085e-05, "loss": 1.8247, "step": 7561 }, { "epoch": 0.33052143887407665, "grad_norm": 2.640625, "learning_rate": 7.542114365763837e-05, "loss": 2.4422, "step": 7562 }, { "epoch": 0.3305651470781066, "grad_norm": 3.734375, "learning_rate": 7.541522875362193e-05, "loss": 2.1137, "step": 7563 }, { "epoch": 0.33060885528213646, "grad_norm": 1.9453125, "learning_rate": 7.540931336998312e-05, "loss": 1.5262, "step": 7564 }, { "epoch": 0.33065256348616634, "grad_norm": 2.546875, "learning_rate": 7.540339750683358e-05, "loss": 1.9562, "step": 7565 }, { "epoch": 0.3306962716901963, "grad_norm": 2.09375, "learning_rate": 7.539748116428495e-05, "loss": 2.2275, "step": 7566 }, { "epoch": 0.33073997989422615, "grad_norm": 2.140625, "learning_rate": 7.539156434244892e-05, "loss": 1.2463, "step": 7567 }, { "epoch": 0.33078368809825603, "grad_norm": 2.671875, "learning_rate": 7.53856470414371e-05, "loss": 1.8308, "step": 7568 }, { "epoch": 0.3308273963022859, "grad_norm": 2.078125, "learning_rate": 7.537972926136115e-05, "loss": 1.3167, "step": 7569 }, { "epoch": 0.33087110450631585, "grad_norm": 2.046875, "learning_rate": 7.537381100233278e-05, "loss": 1.7028, "step": 7570 }, { "epoch": 0.33091481271034573, "grad_norm": 1.8671875, "learning_rate": 7.536789226446367e-05, "loss": 1.7325, "step": 7571 }, { "epoch": 0.3309585209143756, "grad_norm": 2.09375, "learning_rate": 7.536197304786555e-05, "loss": 1.6386, "step": 7572 }, { "epoch": 0.33100222911840554, "grad_norm": 1.8984375, "learning_rate": 7.535605335265003e-05, "loss": 1.6623, "step": 7573 }, { "epoch": 0.3310459373224354, "grad_norm": 2.25, "learning_rate": 7.535013317892889e-05, "loss": 1.4363, "step": 7574 }, { "epoch": 0.3310896455264653, "grad_norm": 2.28125, "learning_rate": 7.534421252681387e-05, "loss": 2.1629, "step": 7575 }, { "epoch": 0.33113335373049524, "grad_norm": 1.9296875, "learning_rate": 7.533829139641664e-05, "loss": 1.831, "step": 7576 }, { "epoch": 0.3311770619345251, "grad_norm": 2.1875, "learning_rate": 7.5332369787849e-05, "loss": 1.9585, "step": 7577 }, { "epoch": 0.331220770138555, "grad_norm": 2.578125, "learning_rate": 7.532644770122266e-05, "loss": 2.5307, "step": 7578 }, { "epoch": 0.3312644783425849, "grad_norm": 1.9375, "learning_rate": 7.532052513664939e-05, "loss": 1.8876, "step": 7579 }, { "epoch": 0.3313081865466148, "grad_norm": 2.0625, "learning_rate": 7.531460209424096e-05, "loss": 1.8074, "step": 7580 }, { "epoch": 0.3313518947506447, "grad_norm": 1.9453125, "learning_rate": 7.530867857410915e-05, "loss": 1.6012, "step": 7581 }, { "epoch": 0.3313956029546746, "grad_norm": 1.671875, "learning_rate": 7.530275457636574e-05, "loss": 1.5269, "step": 7582 }, { "epoch": 0.3314393111587045, "grad_norm": 2.21875, "learning_rate": 7.529683010112252e-05, "loss": 1.6858, "step": 7583 }, { "epoch": 0.3314830193627344, "grad_norm": 2.609375, "learning_rate": 7.529090514849128e-05, "loss": 2.4866, "step": 7584 }, { "epoch": 0.33152672756676427, "grad_norm": 1.8203125, "learning_rate": 7.528497971858388e-05, "loss": 1.7441, "step": 7585 }, { "epoch": 0.3315704357707942, "grad_norm": 2.09375, "learning_rate": 7.52790538115121e-05, "loss": 1.7589, "step": 7586 }, { "epoch": 0.3316141439748241, "grad_norm": 2.234375, "learning_rate": 7.52731274273878e-05, "loss": 1.9113, "step": 7587 }, { "epoch": 0.33165785217885396, "grad_norm": 2.4375, "learning_rate": 7.526720056632277e-05, "loss": 1.9697, "step": 7588 }, { "epoch": 0.33170156038288384, "grad_norm": 2.421875, "learning_rate": 7.52612732284289e-05, "loss": 2.1639, "step": 7589 }, { "epoch": 0.3317452685869138, "grad_norm": 1.9296875, "learning_rate": 7.525534541381806e-05, "loss": 1.6989, "step": 7590 }, { "epoch": 0.33178897679094366, "grad_norm": 2.671875, "learning_rate": 7.524941712260207e-05, "loss": 1.7631, "step": 7591 }, { "epoch": 0.33183268499497354, "grad_norm": 2.34375, "learning_rate": 7.524348835489286e-05, "loss": 1.7935, "step": 7592 }, { "epoch": 0.33187639319900347, "grad_norm": 1.9453125, "learning_rate": 7.523755911080226e-05, "loss": 1.505, "step": 7593 }, { "epoch": 0.33192010140303335, "grad_norm": 2.578125, "learning_rate": 7.523162939044219e-05, "loss": 2.3742, "step": 7594 }, { "epoch": 0.33196380960706323, "grad_norm": 2.390625, "learning_rate": 7.522569919392455e-05, "loss": 2.1225, "step": 7595 }, { "epoch": 0.33200751781109317, "grad_norm": 1.9296875, "learning_rate": 7.521976852136125e-05, "loss": 1.9097, "step": 7596 }, { "epoch": 0.33205122601512305, "grad_norm": 2.046875, "learning_rate": 7.521383737286423e-05, "loss": 1.6101, "step": 7597 }, { "epoch": 0.3320949342191529, "grad_norm": 2.1875, "learning_rate": 7.520790574854538e-05, "loss": 1.6832, "step": 7598 }, { "epoch": 0.3321386424231828, "grad_norm": 2.03125, "learning_rate": 7.520197364851667e-05, "loss": 1.3584, "step": 7599 }, { "epoch": 0.33218235062721274, "grad_norm": 1.7578125, "learning_rate": 7.519604107289003e-05, "loss": 1.5448, "step": 7600 }, { "epoch": 0.3322260588312426, "grad_norm": 3.875, "learning_rate": 7.519010802177744e-05, "loss": 2.2089, "step": 7601 }, { "epoch": 0.3322697670352725, "grad_norm": 1.890625, "learning_rate": 7.518417449529085e-05, "loss": 1.6202, "step": 7602 }, { "epoch": 0.33231347523930244, "grad_norm": 2.25, "learning_rate": 7.517824049354221e-05, "loss": 1.6155, "step": 7603 }, { "epoch": 0.3323571834433323, "grad_norm": 2.234375, "learning_rate": 7.517230601664354e-05, "loss": 2.1604, "step": 7604 }, { "epoch": 0.3324008916473622, "grad_norm": 3.109375, "learning_rate": 7.516637106470683e-05, "loss": 2.687, "step": 7605 }, { "epoch": 0.33244459985139213, "grad_norm": 2.296875, "learning_rate": 7.516043563784405e-05, "loss": 2.0484, "step": 7606 }, { "epoch": 0.332488308055422, "grad_norm": 2.21875, "learning_rate": 7.515449973616723e-05, "loss": 1.984, "step": 7607 }, { "epoch": 0.3325320162594519, "grad_norm": 2.3125, "learning_rate": 7.514856335978842e-05, "loss": 2.0978, "step": 7608 }, { "epoch": 0.33257572446348177, "grad_norm": 2.28125, "learning_rate": 7.514262650881958e-05, "loss": 1.5738, "step": 7609 }, { "epoch": 0.3326194326675117, "grad_norm": 2.203125, "learning_rate": 7.51366891833728e-05, "loss": 2.2538, "step": 7610 }, { "epoch": 0.3326631408715416, "grad_norm": 2.4375, "learning_rate": 7.513075138356012e-05, "loss": 2.524, "step": 7611 }, { "epoch": 0.33270684907557146, "grad_norm": 2.140625, "learning_rate": 7.512481310949358e-05, "loss": 1.6862, "step": 7612 }, { "epoch": 0.3327505572796014, "grad_norm": 2.34375, "learning_rate": 7.511887436128525e-05, "loss": 1.6454, "step": 7613 }, { "epoch": 0.3327942654836313, "grad_norm": 2.046875, "learning_rate": 7.511293513904718e-05, "loss": 2.314, "step": 7614 }, { "epoch": 0.33283797368766116, "grad_norm": 1.8984375, "learning_rate": 7.510699544289151e-05, "loss": 1.6821, "step": 7615 }, { "epoch": 0.3328816818916911, "grad_norm": 2.53125, "learning_rate": 7.510105527293026e-05, "loss": 1.4252, "step": 7616 }, { "epoch": 0.332925390095721, "grad_norm": 2.28125, "learning_rate": 7.509511462927559e-05, "loss": 2.2525, "step": 7617 }, { "epoch": 0.33296909829975085, "grad_norm": 2.09375, "learning_rate": 7.508917351203957e-05, "loss": 1.7308, "step": 7618 }, { "epoch": 0.33301280650378073, "grad_norm": 2.46875, "learning_rate": 7.508323192133432e-05, "loss": 1.7358, "step": 7619 }, { "epoch": 0.33305651470781067, "grad_norm": 2.375, "learning_rate": 7.507728985727199e-05, "loss": 2.3117, "step": 7620 }, { "epoch": 0.33310022291184055, "grad_norm": 1.9453125, "learning_rate": 7.50713473199647e-05, "loss": 1.3512, "step": 7621 }, { "epoch": 0.33314393111587043, "grad_norm": 2.65625, "learning_rate": 7.506540430952461e-05, "loss": 1.5071, "step": 7622 }, { "epoch": 0.33318763931990036, "grad_norm": 2.53125, "learning_rate": 7.505946082606386e-05, "loss": 1.7641, "step": 7623 }, { "epoch": 0.33323134752393024, "grad_norm": 2.171875, "learning_rate": 7.505351686969457e-05, "loss": 2.113, "step": 7624 }, { "epoch": 0.3332750557279601, "grad_norm": 2.21875, "learning_rate": 7.504757244052901e-05, "loss": 1.863, "step": 7625 }, { "epoch": 0.33331876393199006, "grad_norm": 3.75, "learning_rate": 7.504162753867927e-05, "loss": 2.0576, "step": 7626 }, { "epoch": 0.33336247213601994, "grad_norm": 1.8984375, "learning_rate": 7.503568216425757e-05, "loss": 1.4004, "step": 7627 }, { "epoch": 0.3334061803400498, "grad_norm": 2.078125, "learning_rate": 7.502973631737612e-05, "loss": 2.2958, "step": 7628 }, { "epoch": 0.3334498885440797, "grad_norm": 1.828125, "learning_rate": 7.50237899981471e-05, "loss": 1.5195, "step": 7629 }, { "epoch": 0.33349359674810963, "grad_norm": 3.46875, "learning_rate": 7.501784320668277e-05, "loss": 2.7067, "step": 7630 }, { "epoch": 0.3335373049521395, "grad_norm": 2.484375, "learning_rate": 7.501189594309531e-05, "loss": 0.8452, "step": 7631 }, { "epoch": 0.3335810131561694, "grad_norm": 2.3125, "learning_rate": 7.500594820749698e-05, "loss": 2.0745, "step": 7632 }, { "epoch": 0.33362472136019933, "grad_norm": 1.921875, "learning_rate": 7.500000000000001e-05, "loss": 1.715, "step": 7633 }, { "epoch": 0.3336684295642292, "grad_norm": 2.71875, "learning_rate": 7.499405132071665e-05, "loss": 1.7119, "step": 7634 }, { "epoch": 0.3337121377682591, "grad_norm": 2.078125, "learning_rate": 7.498810216975917e-05, "loss": 2.5392, "step": 7635 }, { "epoch": 0.333755845972289, "grad_norm": 1.953125, "learning_rate": 7.498215254723982e-05, "loss": 1.5535, "step": 7636 }, { "epoch": 0.3337995541763189, "grad_norm": 2.203125, "learning_rate": 7.49762024532709e-05, "loss": 2.2684, "step": 7637 }, { "epoch": 0.3338432623803488, "grad_norm": 2.5625, "learning_rate": 7.497025188796469e-05, "loss": 1.5744, "step": 7638 }, { "epoch": 0.33388697058437866, "grad_norm": 1.828125, "learning_rate": 7.496430085143348e-05, "loss": 1.4891, "step": 7639 }, { "epoch": 0.3339306787884086, "grad_norm": 2.0, "learning_rate": 7.495834934378958e-05, "loss": 1.5024, "step": 7640 }, { "epoch": 0.3339743869924385, "grad_norm": 2.234375, "learning_rate": 7.495239736514531e-05, "loss": 1.609, "step": 7641 }, { "epoch": 0.33401809519646836, "grad_norm": 1.9765625, "learning_rate": 7.494644491561299e-05, "loss": 2.082, "step": 7642 }, { "epoch": 0.3340618034004983, "grad_norm": 1.7890625, "learning_rate": 7.494049199530494e-05, "loss": 1.2633, "step": 7643 }, { "epoch": 0.33410551160452817, "grad_norm": 1.90625, "learning_rate": 7.49345386043335e-05, "loss": 1.6481, "step": 7644 }, { "epoch": 0.33414921980855805, "grad_norm": 1.9375, "learning_rate": 7.492858474281103e-05, "loss": 1.5163, "step": 7645 }, { "epoch": 0.334192928012588, "grad_norm": 4.1875, "learning_rate": 7.492263041084988e-05, "loss": 1.8762, "step": 7646 }, { "epoch": 0.33423663621661787, "grad_norm": 4.84375, "learning_rate": 7.491667560856242e-05, "loss": 1.7709, "step": 7647 }, { "epoch": 0.33428034442064775, "grad_norm": 2.140625, "learning_rate": 7.491072033606104e-05, "loss": 1.7079, "step": 7648 }, { "epoch": 0.3343240526246776, "grad_norm": 2.140625, "learning_rate": 7.49047645934581e-05, "loss": 1.9067, "step": 7649 }, { "epoch": 0.33436776082870756, "grad_norm": 2.640625, "learning_rate": 7.4898808380866e-05, "loss": 2.0671, "step": 7650 }, { "epoch": 0.33441146903273744, "grad_norm": 2.15625, "learning_rate": 7.489285169839717e-05, "loss": 1.4923, "step": 7651 }, { "epoch": 0.3344551772367673, "grad_norm": 2.546875, "learning_rate": 7.488689454616399e-05, "loss": 2.8122, "step": 7652 }, { "epoch": 0.33449888544079726, "grad_norm": 1.9453125, "learning_rate": 7.488093692427887e-05, "loss": 2.0358, "step": 7653 }, { "epoch": 0.33454259364482714, "grad_norm": 2.015625, "learning_rate": 7.487497883285428e-05, "loss": 2.0688, "step": 7654 }, { "epoch": 0.334586301848857, "grad_norm": 2.140625, "learning_rate": 7.486902027200263e-05, "loss": 1.5415, "step": 7655 }, { "epoch": 0.33463001005288695, "grad_norm": 1.9609375, "learning_rate": 7.486306124183637e-05, "loss": 1.4973, "step": 7656 }, { "epoch": 0.33467371825691683, "grad_norm": 1.9609375, "learning_rate": 7.485710174246794e-05, "loss": 1.6159, "step": 7657 }, { "epoch": 0.3347174264609467, "grad_norm": 2.109375, "learning_rate": 7.485114177400984e-05, "loss": 1.4921, "step": 7658 }, { "epoch": 0.3347611346649766, "grad_norm": 2.359375, "learning_rate": 7.484518133657455e-05, "loss": 1.5661, "step": 7659 }, { "epoch": 0.3348048428690065, "grad_norm": 4.96875, "learning_rate": 7.483922043027448e-05, "loss": 1.9808, "step": 7660 }, { "epoch": 0.3348485510730364, "grad_norm": 2.046875, "learning_rate": 7.48332590552222e-05, "loss": 2.1961, "step": 7661 }, { "epoch": 0.3348922592770663, "grad_norm": 2.265625, "learning_rate": 7.482729721153016e-05, "loss": 1.6836, "step": 7662 }, { "epoch": 0.3349359674810962, "grad_norm": 1.828125, "learning_rate": 7.482133489931091e-05, "loss": 1.5001, "step": 7663 }, { "epoch": 0.3349796756851261, "grad_norm": 1.765625, "learning_rate": 7.481537211867693e-05, "loss": 1.6789, "step": 7664 }, { "epoch": 0.335023383889156, "grad_norm": 1.9921875, "learning_rate": 7.480940886974077e-05, "loss": 1.4361, "step": 7665 }, { "epoch": 0.3350670920931859, "grad_norm": 2.328125, "learning_rate": 7.480344515261495e-05, "loss": 2.1453, "step": 7666 }, { "epoch": 0.3351108002972158, "grad_norm": 2.171875, "learning_rate": 7.479748096741201e-05, "loss": 1.6566, "step": 7667 }, { "epoch": 0.3351545085012457, "grad_norm": 2.1875, "learning_rate": 7.479151631424453e-05, "loss": 1.9521, "step": 7668 }, { "epoch": 0.33519821670527555, "grad_norm": 2.828125, "learning_rate": 7.478555119322505e-05, "loss": 2.6565, "step": 7669 }, { "epoch": 0.3352419249093055, "grad_norm": 2.296875, "learning_rate": 7.477958560446613e-05, "loss": 2.3781, "step": 7670 }, { "epoch": 0.33528563311333537, "grad_norm": 2.15625, "learning_rate": 7.477361954808037e-05, "loss": 1.7341, "step": 7671 }, { "epoch": 0.33532934131736525, "grad_norm": 2.265625, "learning_rate": 7.476765302418037e-05, "loss": 1.6521, "step": 7672 }, { "epoch": 0.3353730495213952, "grad_norm": 2.265625, "learning_rate": 7.47616860328787e-05, "loss": 2.4899, "step": 7673 }, { "epoch": 0.33541675772542506, "grad_norm": 2.328125, "learning_rate": 7.475571857428797e-05, "loss": 1.8374, "step": 7674 }, { "epoch": 0.33546046592945494, "grad_norm": 2.03125, "learning_rate": 7.474975064852081e-05, "loss": 1.6771, "step": 7675 }, { "epoch": 0.3355041741334849, "grad_norm": 2.109375, "learning_rate": 7.474378225568983e-05, "loss": 2.0606, "step": 7676 }, { "epoch": 0.33554788233751476, "grad_norm": 2.25, "learning_rate": 7.473781339590766e-05, "loss": 1.8691, "step": 7677 }, { "epoch": 0.33559159054154464, "grad_norm": 3.3125, "learning_rate": 7.473184406928696e-05, "loss": 2.2992, "step": 7678 }, { "epoch": 0.3356352987455745, "grad_norm": 2.140625, "learning_rate": 7.472587427594037e-05, "loss": 1.6181, "step": 7679 }, { "epoch": 0.33567900694960445, "grad_norm": 2.078125, "learning_rate": 7.471990401598052e-05, "loss": 1.5764, "step": 7680 }, { "epoch": 0.33572271515363433, "grad_norm": 2.0, "learning_rate": 7.471393328952012e-05, "loss": 1.7561, "step": 7681 }, { "epoch": 0.3357664233576642, "grad_norm": 2.0, "learning_rate": 7.470796209667184e-05, "loss": 1.7098, "step": 7682 }, { "epoch": 0.33581013156169415, "grad_norm": 1.7734375, "learning_rate": 7.470199043754833e-05, "loss": 1.8006, "step": 7683 }, { "epoch": 0.335853839765724, "grad_norm": 2.875, "learning_rate": 7.469601831226233e-05, "loss": 1.7551, "step": 7684 }, { "epoch": 0.3358975479697539, "grad_norm": 1.8828125, "learning_rate": 7.469004572092651e-05, "loss": 1.9204, "step": 7685 }, { "epoch": 0.33594125617378384, "grad_norm": 1.78125, "learning_rate": 7.46840726636536e-05, "loss": 1.3732, "step": 7686 }, { "epoch": 0.3359849643778137, "grad_norm": 2.21875, "learning_rate": 7.46780991405563e-05, "loss": 1.8284, "step": 7687 }, { "epoch": 0.3360286725818436, "grad_norm": 2.4375, "learning_rate": 7.467212515174736e-05, "loss": 1.9712, "step": 7688 }, { "epoch": 0.3360723807858735, "grad_norm": 1.859375, "learning_rate": 7.466615069733951e-05, "loss": 1.4069, "step": 7689 }, { "epoch": 0.3361160889899034, "grad_norm": 2.328125, "learning_rate": 7.466017577744549e-05, "loss": 2.1121, "step": 7690 }, { "epoch": 0.3361597971939333, "grad_norm": 2.09375, "learning_rate": 7.465420039217806e-05, "loss": 1.5186, "step": 7691 }, { "epoch": 0.3362035053979632, "grad_norm": 2.125, "learning_rate": 7.464822454165e-05, "loss": 1.6417, "step": 7692 }, { "epoch": 0.3362472136019931, "grad_norm": 4.90625, "learning_rate": 7.464224822597407e-05, "loss": 1.5733, "step": 7693 }, { "epoch": 0.336290921806023, "grad_norm": 3.0625, "learning_rate": 7.463627144526304e-05, "loss": 2.7127, "step": 7694 }, { "epoch": 0.33633463001005287, "grad_norm": 1.8828125, "learning_rate": 7.463029419962971e-05, "loss": 1.4256, "step": 7695 }, { "epoch": 0.3363783382140828, "grad_norm": 2.265625, "learning_rate": 7.462431648918689e-05, "loss": 1.8544, "step": 7696 }, { "epoch": 0.3364220464181127, "grad_norm": 2.296875, "learning_rate": 7.461833831404737e-05, "loss": 2.4548, "step": 7697 }, { "epoch": 0.33646575462214257, "grad_norm": 4.78125, "learning_rate": 7.461235967432398e-05, "loss": 2.2272, "step": 7698 }, { "epoch": 0.33650946282617245, "grad_norm": 5.53125, "learning_rate": 7.460638057012955e-05, "loss": 2.3309, "step": 7699 }, { "epoch": 0.3365531710302024, "grad_norm": 2.296875, "learning_rate": 7.46004010015769e-05, "loss": 2.1454, "step": 7700 }, { "epoch": 0.33659687923423226, "grad_norm": 2.671875, "learning_rate": 7.459442096877886e-05, "loss": 1.3449, "step": 7701 }, { "epoch": 0.33664058743826214, "grad_norm": 2.390625, "learning_rate": 7.458844047184832e-05, "loss": 2.5731, "step": 7702 }, { "epoch": 0.3366842956422921, "grad_norm": 2.359375, "learning_rate": 7.458245951089813e-05, "loss": 1.9036, "step": 7703 }, { "epoch": 0.33672800384632195, "grad_norm": 2.203125, "learning_rate": 7.457647808604113e-05, "loss": 2.333, "step": 7704 }, { "epoch": 0.33677171205035183, "grad_norm": 1.984375, "learning_rate": 7.457049619739022e-05, "loss": 1.6744, "step": 7705 }, { "epoch": 0.33681542025438177, "grad_norm": 2.28125, "learning_rate": 7.45645138450583e-05, "loss": 1.5279, "step": 7706 }, { "epoch": 0.33685912845841165, "grad_norm": 1.8203125, "learning_rate": 7.455853102915825e-05, "loss": 1.5336, "step": 7707 }, { "epoch": 0.33690283666244153, "grad_norm": 2.21875, "learning_rate": 7.455254774980297e-05, "loss": 1.969, "step": 7708 }, { "epoch": 0.3369465448664714, "grad_norm": 2.109375, "learning_rate": 7.45465640071054e-05, "loss": 2.3044, "step": 7709 }, { "epoch": 0.33699025307050134, "grad_norm": 2.671875, "learning_rate": 7.454057980117841e-05, "loss": 1.9823, "step": 7710 }, { "epoch": 0.3370339612745312, "grad_norm": 1.875, "learning_rate": 7.453459513213498e-05, "loss": 1.6091, "step": 7711 }, { "epoch": 0.3370776694785611, "grad_norm": 2.3125, "learning_rate": 7.452861000008803e-05, "loss": 2.207, "step": 7712 }, { "epoch": 0.33712137768259104, "grad_norm": 1.859375, "learning_rate": 7.45226244051505e-05, "loss": 1.6165, "step": 7713 }, { "epoch": 0.3371650858866209, "grad_norm": 2.171875, "learning_rate": 7.451663834743537e-05, "loss": 1.8244, "step": 7714 }, { "epoch": 0.3372087940906508, "grad_norm": 1.9609375, "learning_rate": 7.451065182705558e-05, "loss": 1.6859, "step": 7715 }, { "epoch": 0.33725250229468073, "grad_norm": 2.265625, "learning_rate": 7.450466484412413e-05, "loss": 2.2727, "step": 7716 }, { "epoch": 0.3372962104987106, "grad_norm": 2.140625, "learning_rate": 7.449867739875397e-05, "loss": 1.7578, "step": 7717 }, { "epoch": 0.3373399187027405, "grad_norm": 3.15625, "learning_rate": 7.449268949105812e-05, "loss": 1.8431, "step": 7718 }, { "epoch": 0.33738362690677043, "grad_norm": 2.40625, "learning_rate": 7.448670112114959e-05, "loss": 2.004, "step": 7719 }, { "epoch": 0.3374273351108003, "grad_norm": 2.125, "learning_rate": 7.448071228914134e-05, "loss": 1.9159, "step": 7720 }, { "epoch": 0.3374710433148302, "grad_norm": 2.015625, "learning_rate": 7.447472299514644e-05, "loss": 1.911, "step": 7721 }, { "epoch": 0.33751475151886007, "grad_norm": 2.03125, "learning_rate": 7.44687332392779e-05, "loss": 1.8569, "step": 7722 }, { "epoch": 0.33755845972289, "grad_norm": 2.15625, "learning_rate": 7.446274302164873e-05, "loss": 1.5678, "step": 7723 }, { "epoch": 0.3376021679269199, "grad_norm": 2.0, "learning_rate": 7.445675234237202e-05, "loss": 1.7527, "step": 7724 }, { "epoch": 0.33764587613094976, "grad_norm": 2.046875, "learning_rate": 7.445076120156078e-05, "loss": 2.0955, "step": 7725 }, { "epoch": 0.3376895843349797, "grad_norm": 2.203125, "learning_rate": 7.44447695993281e-05, "loss": 1.7765, "step": 7726 }, { "epoch": 0.3377332925390096, "grad_norm": 1.671875, "learning_rate": 7.443877753578702e-05, "loss": 1.4968, "step": 7727 }, { "epoch": 0.33777700074303946, "grad_norm": 3.109375, "learning_rate": 7.443278501105065e-05, "loss": 2.7519, "step": 7728 }, { "epoch": 0.3378207089470694, "grad_norm": 2.03125, "learning_rate": 7.442679202523208e-05, "loss": 2.2485, "step": 7729 }, { "epoch": 0.33786441715109927, "grad_norm": 2.25, "learning_rate": 7.442079857844438e-05, "loss": 2.299, "step": 7730 }, { "epoch": 0.33790812535512915, "grad_norm": 2.21875, "learning_rate": 7.441480467080066e-05, "loss": 1.8709, "step": 7731 }, { "epoch": 0.33795183355915903, "grad_norm": 2.296875, "learning_rate": 7.440881030241407e-05, "loss": 2.7728, "step": 7732 }, { "epoch": 0.33799554176318897, "grad_norm": 2.078125, "learning_rate": 7.44028154733977e-05, "loss": 1.9273, "step": 7733 }, { "epoch": 0.33803924996721885, "grad_norm": 1.953125, "learning_rate": 7.439682018386467e-05, "loss": 1.672, "step": 7734 }, { "epoch": 0.3380829581712487, "grad_norm": 2.5, "learning_rate": 7.439082443392813e-05, "loss": 2.3186, "step": 7735 }, { "epoch": 0.33812666637527866, "grad_norm": 1.9296875, "learning_rate": 7.438482822370124e-05, "loss": 1.9351, "step": 7736 }, { "epoch": 0.33817037457930854, "grad_norm": 1.9140625, "learning_rate": 7.437883155329715e-05, "loss": 1.9687, "step": 7737 }, { "epoch": 0.3382140827833384, "grad_norm": 1.921875, "learning_rate": 7.437283442282904e-05, "loss": 1.5673, "step": 7738 }, { "epoch": 0.33825779098736836, "grad_norm": 2.421875, "learning_rate": 7.436683683241006e-05, "loss": 2.2424, "step": 7739 }, { "epoch": 0.33830149919139824, "grad_norm": 2.09375, "learning_rate": 7.43608387821534e-05, "loss": 2.1129, "step": 7740 }, { "epoch": 0.3383452073954281, "grad_norm": 1.8203125, "learning_rate": 7.435484027217225e-05, "loss": 1.741, "step": 7741 }, { "epoch": 0.338388915599458, "grad_norm": 1.7734375, "learning_rate": 7.434884130257985e-05, "loss": 1.89, "step": 7742 }, { "epoch": 0.33843262380348793, "grad_norm": 2.4375, "learning_rate": 7.434284187348935e-05, "loss": 1.9559, "step": 7743 }, { "epoch": 0.3384763320075178, "grad_norm": 2.0, "learning_rate": 7.4336841985014e-05, "loss": 1.6128, "step": 7744 }, { "epoch": 0.3385200402115477, "grad_norm": 2.078125, "learning_rate": 7.433084163726703e-05, "loss": 1.7827, "step": 7745 }, { "epoch": 0.3385637484155776, "grad_norm": 2.203125, "learning_rate": 7.432484083036165e-05, "loss": 1.7955, "step": 7746 }, { "epoch": 0.3386074566196075, "grad_norm": 2.21875, "learning_rate": 7.431883956441112e-05, "loss": 2.5427, "step": 7747 }, { "epoch": 0.3386511648236374, "grad_norm": 2.296875, "learning_rate": 7.431283783952872e-05, "loss": 1.9095, "step": 7748 }, { "epoch": 0.3386948730276673, "grad_norm": 2.609375, "learning_rate": 7.430683565582766e-05, "loss": 1.592, "step": 7749 }, { "epoch": 0.3387385812316972, "grad_norm": 2.71875, "learning_rate": 7.430083301342124e-05, "loss": 2.0556, "step": 7750 }, { "epoch": 0.3387822894357271, "grad_norm": 1.9765625, "learning_rate": 7.429482991242274e-05, "loss": 1.5385, "step": 7751 }, { "epoch": 0.33882599763975696, "grad_norm": 1.5390625, "learning_rate": 7.428882635294543e-05, "loss": 1.3219, "step": 7752 }, { "epoch": 0.3388697058437869, "grad_norm": 1.8828125, "learning_rate": 7.428282233510262e-05, "loss": 1.8182, "step": 7753 }, { "epoch": 0.3389134140478168, "grad_norm": 2.546875, "learning_rate": 7.427681785900761e-05, "loss": 2.379, "step": 7754 }, { "epoch": 0.33895712225184665, "grad_norm": 2.21875, "learning_rate": 7.427081292477371e-05, "loss": 2.1487, "step": 7755 }, { "epoch": 0.3390008304558766, "grad_norm": 2.390625, "learning_rate": 7.426480753251425e-05, "loss": 2.2427, "step": 7756 }, { "epoch": 0.33904453865990647, "grad_norm": 2.03125, "learning_rate": 7.425880168234256e-05, "loss": 2.2215, "step": 7757 }, { "epoch": 0.33908824686393635, "grad_norm": 2.953125, "learning_rate": 7.425279537437198e-05, "loss": 1.9807, "step": 7758 }, { "epoch": 0.3391319550679663, "grad_norm": 6.96875, "learning_rate": 7.424678860871584e-05, "loss": 3.9342, "step": 7759 }, { "epoch": 0.33917566327199616, "grad_norm": 2.28125, "learning_rate": 7.42407813854875e-05, "loss": 2.0916, "step": 7760 }, { "epoch": 0.33921937147602604, "grad_norm": 2.1875, "learning_rate": 7.423477370480035e-05, "loss": 1.8062, "step": 7761 }, { "epoch": 0.3392630796800559, "grad_norm": 2.078125, "learning_rate": 7.422876556676776e-05, "loss": 1.7046, "step": 7762 }, { "epoch": 0.33930678788408586, "grad_norm": 1.8203125, "learning_rate": 7.422275697150308e-05, "loss": 1.4491, "step": 7763 }, { "epoch": 0.33935049608811574, "grad_norm": 2.15625, "learning_rate": 7.421674791911973e-05, "loss": 1.9007, "step": 7764 }, { "epoch": 0.3393942042921456, "grad_norm": 1.765625, "learning_rate": 7.42107384097311e-05, "loss": 1.5789, "step": 7765 }, { "epoch": 0.33943791249617555, "grad_norm": 1.875, "learning_rate": 7.420472844345059e-05, "loss": 1.7002, "step": 7766 }, { "epoch": 0.33948162070020543, "grad_norm": 1.921875, "learning_rate": 7.419871802039163e-05, "loss": 1.5454, "step": 7767 }, { "epoch": 0.3395253289042353, "grad_norm": 1.765625, "learning_rate": 7.419270714066765e-05, "loss": 1.8283, "step": 7768 }, { "epoch": 0.33956903710826525, "grad_norm": 1.859375, "learning_rate": 7.418669580439209e-05, "loss": 1.5417, "step": 7769 }, { "epoch": 0.33961274531229513, "grad_norm": 2.328125, "learning_rate": 7.418068401167834e-05, "loss": 1.8136, "step": 7770 }, { "epoch": 0.339656453516325, "grad_norm": 2.1875, "learning_rate": 7.41746717626399e-05, "loss": 1.7942, "step": 7771 }, { "epoch": 0.3397001617203549, "grad_norm": 2.765625, "learning_rate": 7.416865905739024e-05, "loss": 2.0847, "step": 7772 }, { "epoch": 0.3397438699243848, "grad_norm": 1.7734375, "learning_rate": 7.41626458960428e-05, "loss": 1.6278, "step": 7773 }, { "epoch": 0.3397875781284147, "grad_norm": 2.375, "learning_rate": 7.415663227871106e-05, "loss": 2.0389, "step": 7774 }, { "epoch": 0.3398312863324446, "grad_norm": 1.8515625, "learning_rate": 7.41506182055085e-05, "loss": 1.7392, "step": 7775 }, { "epoch": 0.3398749945364745, "grad_norm": 2.65625, "learning_rate": 7.414460367654864e-05, "loss": 2.5617, "step": 7776 }, { "epoch": 0.3399187027405044, "grad_norm": 2.109375, "learning_rate": 7.413858869194496e-05, "loss": 1.6048, "step": 7777 }, { "epoch": 0.3399624109445343, "grad_norm": 2.109375, "learning_rate": 7.413257325181098e-05, "loss": 1.5458, "step": 7778 }, { "epoch": 0.3400061191485642, "grad_norm": 2.53125, "learning_rate": 7.412655735626024e-05, "loss": 2.3651, "step": 7779 }, { "epoch": 0.3400498273525941, "grad_norm": 2.5625, "learning_rate": 7.412054100540623e-05, "loss": 2.3673, "step": 7780 }, { "epoch": 0.34009353555662397, "grad_norm": 2.515625, "learning_rate": 7.41145241993625e-05, "loss": 2.4358, "step": 7781 }, { "epoch": 0.34013724376065385, "grad_norm": 2.546875, "learning_rate": 7.410850693824261e-05, "loss": 1.8383, "step": 7782 }, { "epoch": 0.3401809519646838, "grad_norm": 2.1875, "learning_rate": 7.41024892221601e-05, "loss": 2.3828, "step": 7783 }, { "epoch": 0.34022466016871367, "grad_norm": 1.796875, "learning_rate": 7.409647105122854e-05, "loss": 1.704, "step": 7784 }, { "epoch": 0.34026836837274355, "grad_norm": 1.8828125, "learning_rate": 7.409045242556151e-05, "loss": 1.6124, "step": 7785 }, { "epoch": 0.3403120765767735, "grad_norm": 2.53125, "learning_rate": 7.408443334527257e-05, "loss": 1.0824, "step": 7786 }, { "epoch": 0.34035578478080336, "grad_norm": 28.25, "learning_rate": 7.407841381047532e-05, "loss": 2.9776, "step": 7787 }, { "epoch": 0.34039949298483324, "grad_norm": 2.328125, "learning_rate": 7.407239382128336e-05, "loss": 1.7465, "step": 7788 }, { "epoch": 0.3404432011888632, "grad_norm": 2.25, "learning_rate": 7.406637337781031e-05, "loss": 2.5876, "step": 7789 }, { "epoch": 0.34048690939289306, "grad_norm": 2.984375, "learning_rate": 7.406035248016973e-05, "loss": 1.9449, "step": 7790 }, { "epoch": 0.34053061759692294, "grad_norm": 1.953125, "learning_rate": 7.40543311284753e-05, "loss": 1.52, "step": 7791 }, { "epoch": 0.3405743258009528, "grad_norm": 2.03125, "learning_rate": 7.404830932284064e-05, "loss": 1.8827, "step": 7792 }, { "epoch": 0.34061803400498275, "grad_norm": 2.359375, "learning_rate": 7.404228706337937e-05, "loss": 2.3619, "step": 7793 }, { "epoch": 0.34066174220901263, "grad_norm": 2.515625, "learning_rate": 7.403626435020516e-05, "loss": 1.6467, "step": 7794 }, { "epoch": 0.3407054504130425, "grad_norm": 2.34375, "learning_rate": 7.403024118343167e-05, "loss": 2.2609, "step": 7795 }, { "epoch": 0.34074915861707245, "grad_norm": 1.8671875, "learning_rate": 7.402421756317252e-05, "loss": 1.1447, "step": 7796 }, { "epoch": 0.3407928668211023, "grad_norm": 2.15625, "learning_rate": 7.401819348954144e-05, "loss": 1.6363, "step": 7797 }, { "epoch": 0.3408365750251322, "grad_norm": 2.171875, "learning_rate": 7.401216896265208e-05, "loss": 1.7098, "step": 7798 }, { "epoch": 0.34088028322916214, "grad_norm": 1.9453125, "learning_rate": 7.400614398261817e-05, "loss": 2.1047, "step": 7799 }, { "epoch": 0.340923991433192, "grad_norm": 1.765625, "learning_rate": 7.400011854955336e-05, "loss": 1.68, "step": 7800 }, { "epoch": 0.3409676996372219, "grad_norm": 1.8203125, "learning_rate": 7.399409266357139e-05, "loss": 1.8467, "step": 7801 }, { "epoch": 0.3410114078412518, "grad_norm": 3.0, "learning_rate": 7.398806632478598e-05, "loss": 2.5555, "step": 7802 }, { "epoch": 0.3410551160452817, "grad_norm": 2.46875, "learning_rate": 7.398203953331083e-05, "loss": 1.5894, "step": 7803 }, { "epoch": 0.3410988242493116, "grad_norm": 2.15625, "learning_rate": 7.39760122892597e-05, "loss": 2.3018, "step": 7804 }, { "epoch": 0.3411425324533415, "grad_norm": 2.546875, "learning_rate": 7.396998459274632e-05, "loss": 1.5333, "step": 7805 }, { "epoch": 0.3411862406573714, "grad_norm": 2.625, "learning_rate": 7.396395644388443e-05, "loss": 2.7594, "step": 7806 }, { "epoch": 0.3412299488614013, "grad_norm": 2.078125, "learning_rate": 7.395792784278783e-05, "loss": 1.6819, "step": 7807 }, { "epoch": 0.34127365706543117, "grad_norm": 2.359375, "learning_rate": 7.395189878957025e-05, "loss": 2.0002, "step": 7808 }, { "epoch": 0.3413173652694611, "grad_norm": 1.953125, "learning_rate": 7.394586928434549e-05, "loss": 1.7576, "step": 7809 }, { "epoch": 0.341361073473491, "grad_norm": 2.015625, "learning_rate": 7.39398393272273e-05, "loss": 1.5846, "step": 7810 }, { "epoch": 0.34140478167752086, "grad_norm": 2.328125, "learning_rate": 7.393380891832951e-05, "loss": 1.8085, "step": 7811 }, { "epoch": 0.34144848988155074, "grad_norm": 3.328125, "learning_rate": 7.392777805776592e-05, "loss": 2.6784, "step": 7812 }, { "epoch": 0.3414921980855807, "grad_norm": 1.8828125, "learning_rate": 7.392174674565031e-05, "loss": 1.4711, "step": 7813 }, { "epoch": 0.34153590628961056, "grad_norm": 1.984375, "learning_rate": 7.391571498209654e-05, "loss": 1.9854, "step": 7814 }, { "epoch": 0.34157961449364044, "grad_norm": 1.96875, "learning_rate": 7.390968276721844e-05, "loss": 1.5118, "step": 7815 }, { "epoch": 0.3416233226976704, "grad_norm": 2.34375, "learning_rate": 7.390365010112979e-05, "loss": 2.5905, "step": 7816 }, { "epoch": 0.34166703090170025, "grad_norm": 2.078125, "learning_rate": 7.389761698394449e-05, "loss": 1.9055, "step": 7817 }, { "epoch": 0.34171073910573013, "grad_norm": 2.828125, "learning_rate": 7.389158341577638e-05, "loss": 1.9238, "step": 7818 }, { "epoch": 0.34175444730976007, "grad_norm": 2.03125, "learning_rate": 7.388554939673931e-05, "loss": 1.9666, "step": 7819 }, { "epoch": 0.34179815551378995, "grad_norm": 1.8515625, "learning_rate": 7.387951492694717e-05, "loss": 1.648, "step": 7820 }, { "epoch": 0.3418418637178198, "grad_norm": 2.5, "learning_rate": 7.387348000651381e-05, "loss": 2.3185, "step": 7821 }, { "epoch": 0.3418855719218497, "grad_norm": 2.390625, "learning_rate": 7.386744463555316e-05, "loss": 1.9617, "step": 7822 }, { "epoch": 0.34192928012587964, "grad_norm": 2.4375, "learning_rate": 7.386140881417907e-05, "loss": 1.8481, "step": 7823 }, { "epoch": 0.3419729883299095, "grad_norm": 2.03125, "learning_rate": 7.385537254250549e-05, "loss": 1.8291, "step": 7824 }, { "epoch": 0.3420166965339394, "grad_norm": 3.734375, "learning_rate": 7.38493358206463e-05, "loss": 1.8099, "step": 7825 }, { "epoch": 0.34206040473796934, "grad_norm": 2.75, "learning_rate": 7.384329864871542e-05, "loss": 2.5644, "step": 7826 }, { "epoch": 0.3421041129419992, "grad_norm": 2.296875, "learning_rate": 7.38372610268268e-05, "loss": 1.8474, "step": 7827 }, { "epoch": 0.3421478211460291, "grad_norm": 3.046875, "learning_rate": 7.383122295509437e-05, "loss": 2.8546, "step": 7828 }, { "epoch": 0.34219152935005903, "grad_norm": 2.1875, "learning_rate": 7.382518443363208e-05, "loss": 2.1391, "step": 7829 }, { "epoch": 0.3422352375540889, "grad_norm": 2.15625, "learning_rate": 7.38191454625539e-05, "loss": 2.2937, "step": 7830 }, { "epoch": 0.3422789457581188, "grad_norm": 2.0625, "learning_rate": 7.381310604197375e-05, "loss": 1.79, "step": 7831 }, { "epoch": 0.34232265396214867, "grad_norm": 2.65625, "learning_rate": 7.380706617200564e-05, "loss": 2.197, "step": 7832 }, { "epoch": 0.3423663621661786, "grad_norm": 2.0, "learning_rate": 7.380102585276355e-05, "loss": 2.2665, "step": 7833 }, { "epoch": 0.3424100703702085, "grad_norm": 1.9921875, "learning_rate": 7.379498508436146e-05, "loss": 1.7406, "step": 7834 }, { "epoch": 0.34245377857423837, "grad_norm": 2.15625, "learning_rate": 7.378894386691337e-05, "loss": 2.1328, "step": 7835 }, { "epoch": 0.3424974867782683, "grad_norm": 2.03125, "learning_rate": 7.378290220053328e-05, "loss": 1.7703, "step": 7836 }, { "epoch": 0.3425411949822982, "grad_norm": 2.359375, "learning_rate": 7.377686008533521e-05, "loss": 2.2732, "step": 7837 }, { "epoch": 0.34258490318632806, "grad_norm": 2.953125, "learning_rate": 7.377081752143319e-05, "loss": 1.8219, "step": 7838 }, { "epoch": 0.342628611390358, "grad_norm": 1.921875, "learning_rate": 7.376477450894124e-05, "loss": 1.7202, "step": 7839 }, { "epoch": 0.3426723195943879, "grad_norm": 1.9609375, "learning_rate": 7.375873104797341e-05, "loss": 1.6177, "step": 7840 }, { "epoch": 0.34271602779841775, "grad_norm": 2.515625, "learning_rate": 7.375268713864374e-05, "loss": 1.2808, "step": 7841 }, { "epoch": 0.34275973600244763, "grad_norm": 1.9765625, "learning_rate": 7.374664278106631e-05, "loss": 2.2372, "step": 7842 }, { "epoch": 0.34280344420647757, "grad_norm": 1.8359375, "learning_rate": 7.374059797535517e-05, "loss": 2.0804, "step": 7843 }, { "epoch": 0.34284715241050745, "grad_norm": 1.796875, "learning_rate": 7.373455272162438e-05, "loss": 1.7002, "step": 7844 }, { "epoch": 0.34289086061453733, "grad_norm": 2.75, "learning_rate": 7.372850701998803e-05, "loss": 1.9006, "step": 7845 }, { "epoch": 0.34293456881856726, "grad_norm": 2.15625, "learning_rate": 7.372246087056023e-05, "loss": 2.5089, "step": 7846 }, { "epoch": 0.34297827702259714, "grad_norm": 1.9921875, "learning_rate": 7.371641427345506e-05, "loss": 1.567, "step": 7847 }, { "epoch": 0.343021985226627, "grad_norm": 2.21875, "learning_rate": 7.371036722878664e-05, "loss": 2.1754, "step": 7848 }, { "epoch": 0.34306569343065696, "grad_norm": 3.265625, "learning_rate": 7.370431973666909e-05, "loss": 2.6514, "step": 7849 }, { "epoch": 0.34310940163468684, "grad_norm": 2.25, "learning_rate": 7.369827179721651e-05, "loss": 1.7452, "step": 7850 }, { "epoch": 0.3431531098387167, "grad_norm": 2.078125, "learning_rate": 7.369222341054305e-05, "loss": 1.6974, "step": 7851 }, { "epoch": 0.3431968180427466, "grad_norm": 2.4375, "learning_rate": 7.368617457676286e-05, "loss": 2.2866, "step": 7852 }, { "epoch": 0.34324052624677653, "grad_norm": 3.578125, "learning_rate": 7.36801252959901e-05, "loss": 2.6568, "step": 7853 }, { "epoch": 0.3432842344508064, "grad_norm": 3.03125, "learning_rate": 7.367407556833887e-05, "loss": 1.8624, "step": 7854 }, { "epoch": 0.3433279426548363, "grad_norm": 2.234375, "learning_rate": 7.366802539392341e-05, "loss": 2.464, "step": 7855 }, { "epoch": 0.34337165085886623, "grad_norm": 2.421875, "learning_rate": 7.366197477285785e-05, "loss": 2.198, "step": 7856 }, { "epoch": 0.3434153590628961, "grad_norm": 1.9140625, "learning_rate": 7.365592370525639e-05, "loss": 2.274, "step": 7857 }, { "epoch": 0.343459067266926, "grad_norm": 2.203125, "learning_rate": 7.364987219123323e-05, "loss": 1.6779, "step": 7858 }, { "epoch": 0.3435027754709559, "grad_norm": 2.03125, "learning_rate": 7.364382023090255e-05, "loss": 2.1957, "step": 7859 }, { "epoch": 0.3435464836749858, "grad_norm": 2.296875, "learning_rate": 7.363776782437857e-05, "loss": 1.9011, "step": 7860 }, { "epoch": 0.3435901918790157, "grad_norm": 2.703125, "learning_rate": 7.36317149717755e-05, "loss": 2.4219, "step": 7861 }, { "epoch": 0.34363390008304556, "grad_norm": 2.203125, "learning_rate": 7.362566167320759e-05, "loss": 1.9962, "step": 7862 }, { "epoch": 0.3436776082870755, "grad_norm": 1.8359375, "learning_rate": 7.361960792878906e-05, "loss": 1.5491, "step": 7863 }, { "epoch": 0.3437213164911054, "grad_norm": 1.6953125, "learning_rate": 7.361355373863414e-05, "loss": 1.6535, "step": 7864 }, { "epoch": 0.34376502469513526, "grad_norm": 2.546875, "learning_rate": 7.360749910285711e-05, "loss": 1.7978, "step": 7865 }, { "epoch": 0.3438087328991652, "grad_norm": 2.09375, "learning_rate": 7.360144402157218e-05, "loss": 1.3264, "step": 7866 }, { "epoch": 0.3438524411031951, "grad_norm": 1.96875, "learning_rate": 7.359538849489367e-05, "loss": 1.3655, "step": 7867 }, { "epoch": 0.34389614930722495, "grad_norm": 2.09375, "learning_rate": 7.358933252293585e-05, "loss": 1.5282, "step": 7868 }, { "epoch": 0.3439398575112549, "grad_norm": 2.65625, "learning_rate": 7.3583276105813e-05, "loss": 2.3046, "step": 7869 }, { "epoch": 0.34398356571528477, "grad_norm": 1.8203125, "learning_rate": 7.357721924363937e-05, "loss": 1.8875, "step": 7870 }, { "epoch": 0.34402727391931465, "grad_norm": 1.859375, "learning_rate": 7.357116193652931e-05, "loss": 1.6185, "step": 7871 }, { "epoch": 0.3440709821233445, "grad_norm": 2.546875, "learning_rate": 7.356510418459714e-05, "loss": 1.548, "step": 7872 }, { "epoch": 0.34411469032737446, "grad_norm": 2.109375, "learning_rate": 7.355904598795713e-05, "loss": 1.9057, "step": 7873 }, { "epoch": 0.34415839853140434, "grad_norm": 2.078125, "learning_rate": 7.355298734672364e-05, "loss": 1.7803, "step": 7874 }, { "epoch": 0.3442021067354342, "grad_norm": 1.9921875, "learning_rate": 7.354692826101102e-05, "loss": 2.1805, "step": 7875 }, { "epoch": 0.34424581493946416, "grad_norm": 3.1875, "learning_rate": 7.354086873093356e-05, "loss": 1.8715, "step": 7876 }, { "epoch": 0.34428952314349404, "grad_norm": 2.0625, "learning_rate": 7.353480875660566e-05, "loss": 1.8297, "step": 7877 }, { "epoch": 0.3443332313475239, "grad_norm": 2.75, "learning_rate": 7.352874833814168e-05, "loss": 2.1321, "step": 7878 }, { "epoch": 0.34437693955155385, "grad_norm": 2.3125, "learning_rate": 7.352268747565596e-05, "loss": 2.0992, "step": 7879 }, { "epoch": 0.34442064775558373, "grad_norm": 1.890625, "learning_rate": 7.351662616926289e-05, "loss": 1.477, "step": 7880 }, { "epoch": 0.3444643559596136, "grad_norm": 2.03125, "learning_rate": 7.351056441907687e-05, "loss": 1.5112, "step": 7881 }, { "epoch": 0.3445080641636435, "grad_norm": 2.0625, "learning_rate": 7.350450222521226e-05, "loss": 1.8943, "step": 7882 }, { "epoch": 0.3445517723676734, "grad_norm": 2.40625, "learning_rate": 7.34984395877835e-05, "loss": 1.7028, "step": 7883 }, { "epoch": 0.3445954805717033, "grad_norm": 2.0, "learning_rate": 7.349237650690497e-05, "loss": 1.7619, "step": 7884 }, { "epoch": 0.3446391887757332, "grad_norm": 2.140625, "learning_rate": 7.348631298269114e-05, "loss": 1.6977, "step": 7885 }, { "epoch": 0.3446828969797631, "grad_norm": 2.609375, "learning_rate": 7.348024901525635e-05, "loss": 2.221, "step": 7886 }, { "epoch": 0.344726605183793, "grad_norm": 2.640625, "learning_rate": 7.347418460471511e-05, "loss": 1.3263, "step": 7887 }, { "epoch": 0.3447703133878229, "grad_norm": 2.21875, "learning_rate": 7.346811975118185e-05, "loss": 1.9857, "step": 7888 }, { "epoch": 0.3448140215918528, "grad_norm": 2.296875, "learning_rate": 7.346205445477101e-05, "loss": 1.9187, "step": 7889 }, { "epoch": 0.3448577297958827, "grad_norm": 2.140625, "learning_rate": 7.345598871559706e-05, "loss": 2.4341, "step": 7890 }, { "epoch": 0.3449014379999126, "grad_norm": 2.5625, "learning_rate": 7.344992253377445e-05, "loss": 1.9575, "step": 7891 }, { "epoch": 0.34494514620394245, "grad_norm": 2.125, "learning_rate": 7.344385590941768e-05, "loss": 1.8408, "step": 7892 }, { "epoch": 0.3449888544079724, "grad_norm": 2.578125, "learning_rate": 7.343778884264123e-05, "loss": 1.9551, "step": 7893 }, { "epoch": 0.34503256261200227, "grad_norm": 2.078125, "learning_rate": 7.343172133355958e-05, "loss": 1.682, "step": 7894 }, { "epoch": 0.34507627081603215, "grad_norm": 2.171875, "learning_rate": 7.342565338228726e-05, "loss": 1.4362, "step": 7895 }, { "epoch": 0.3451199790200621, "grad_norm": 2.078125, "learning_rate": 7.341958498893876e-05, "loss": 1.7903, "step": 7896 }, { "epoch": 0.34516368722409196, "grad_norm": 2.25, "learning_rate": 7.34135161536286e-05, "loss": 1.8678, "step": 7897 }, { "epoch": 0.34520739542812184, "grad_norm": 1.7890625, "learning_rate": 7.340744687647133e-05, "loss": 1.4317, "step": 7898 }, { "epoch": 0.3452511036321518, "grad_norm": 2.09375, "learning_rate": 7.340137715758146e-05, "loss": 1.769, "step": 7899 }, { "epoch": 0.34529481183618166, "grad_norm": 2.6875, "learning_rate": 7.339530699707354e-05, "loss": 1.5794, "step": 7900 }, { "epoch": 0.34533852004021154, "grad_norm": 3.25, "learning_rate": 7.338923639506213e-05, "loss": 2.1146, "step": 7901 }, { "epoch": 0.3453822282442414, "grad_norm": 2.265625, "learning_rate": 7.338316535166179e-05, "loss": 2.3914, "step": 7902 }, { "epoch": 0.34542593644827135, "grad_norm": 2.03125, "learning_rate": 7.337709386698709e-05, "loss": 1.5035, "step": 7903 }, { "epoch": 0.34546964465230123, "grad_norm": 2.9375, "learning_rate": 7.33710219411526e-05, "loss": 2.1352, "step": 7904 }, { "epoch": 0.3455133528563311, "grad_norm": 3.015625, "learning_rate": 7.336494957427292e-05, "loss": 1.7889, "step": 7905 }, { "epoch": 0.34555706106036105, "grad_norm": 1.9296875, "learning_rate": 7.335887676646263e-05, "loss": 2.0182, "step": 7906 }, { "epoch": 0.34560076926439093, "grad_norm": 2.140625, "learning_rate": 7.335280351783632e-05, "loss": 1.5878, "step": 7907 }, { "epoch": 0.3456444774684208, "grad_norm": 2.375, "learning_rate": 7.334672982850865e-05, "loss": 2.4043, "step": 7908 }, { "epoch": 0.34568818567245074, "grad_norm": 2.5, "learning_rate": 7.334065569859419e-05, "loss": 2.0702, "step": 7909 }, { "epoch": 0.3457318938764806, "grad_norm": 2.09375, "learning_rate": 7.333458112820758e-05, "loss": 1.916, "step": 7910 }, { "epoch": 0.3457756020805105, "grad_norm": 2.953125, "learning_rate": 7.332850611746346e-05, "loss": 2.2221, "step": 7911 }, { "epoch": 0.3458193102845404, "grad_norm": 2.171875, "learning_rate": 7.332243066647651e-05, "loss": 2.2696, "step": 7912 }, { "epoch": 0.3458630184885703, "grad_norm": 2.359375, "learning_rate": 7.331635477536131e-05, "loss": 1.8497, "step": 7913 }, { "epoch": 0.3459067266926002, "grad_norm": 2.21875, "learning_rate": 7.331027844423258e-05, "loss": 1.73, "step": 7914 }, { "epoch": 0.3459504348966301, "grad_norm": 2.46875, "learning_rate": 7.330420167320498e-05, "loss": 2.7669, "step": 7915 }, { "epoch": 0.34599414310066, "grad_norm": 6.78125, "learning_rate": 7.329812446239315e-05, "loss": 2.766, "step": 7916 }, { "epoch": 0.3460378513046899, "grad_norm": 1.90625, "learning_rate": 7.329204681191183e-05, "loss": 1.4046, "step": 7917 }, { "epoch": 0.34608155950871977, "grad_norm": 3.59375, "learning_rate": 7.328596872187567e-05, "loss": 2.738, "step": 7918 }, { "epoch": 0.3461252677127497, "grad_norm": 2.359375, "learning_rate": 7.327989019239938e-05, "loss": 2.2535, "step": 7919 }, { "epoch": 0.3461689759167796, "grad_norm": 2.75, "learning_rate": 7.32738112235977e-05, "loss": 2.1258, "step": 7920 }, { "epoch": 0.34621268412080947, "grad_norm": 2.390625, "learning_rate": 7.326773181558532e-05, "loss": 1.9796, "step": 7921 }, { "epoch": 0.34625639232483935, "grad_norm": 2.203125, "learning_rate": 7.326165196847697e-05, "loss": 1.7916, "step": 7922 }, { "epoch": 0.3463001005288693, "grad_norm": 1.84375, "learning_rate": 7.32555716823874e-05, "loss": 1.8068, "step": 7923 }, { "epoch": 0.34634380873289916, "grad_norm": 3.53125, "learning_rate": 7.324949095743134e-05, "loss": 2.206, "step": 7924 }, { "epoch": 0.34638751693692904, "grad_norm": 2.578125, "learning_rate": 7.324340979372356e-05, "loss": 2.2156, "step": 7925 }, { "epoch": 0.346431225140959, "grad_norm": 2.015625, "learning_rate": 7.32373281913788e-05, "loss": 1.6678, "step": 7926 }, { "epoch": 0.34647493334498886, "grad_norm": 2.25, "learning_rate": 7.323124615051183e-05, "loss": 1.9071, "step": 7927 }, { "epoch": 0.34651864154901874, "grad_norm": 3.640625, "learning_rate": 7.322516367123744e-05, "loss": 2.493, "step": 7928 }, { "epoch": 0.34656234975304867, "grad_norm": 3.515625, "learning_rate": 7.321908075367041e-05, "loss": 1.8614, "step": 7929 }, { "epoch": 0.34660605795707855, "grad_norm": 2.046875, "learning_rate": 7.321299739792552e-05, "loss": 1.7816, "step": 7930 }, { "epoch": 0.34664976616110843, "grad_norm": 2.21875, "learning_rate": 7.32069136041176e-05, "loss": 1.5655, "step": 7931 }, { "epoch": 0.3466934743651383, "grad_norm": 3.453125, "learning_rate": 7.320082937236144e-05, "loss": 1.8418, "step": 7932 }, { "epoch": 0.34673718256916825, "grad_norm": 3.59375, "learning_rate": 7.319474470277187e-05, "loss": 2.4229, "step": 7933 }, { "epoch": 0.3467808907731981, "grad_norm": 1.9609375, "learning_rate": 7.318865959546369e-05, "loss": 1.6238, "step": 7934 }, { "epoch": 0.346824598977228, "grad_norm": 2.21875, "learning_rate": 7.318257405055178e-05, "loss": 1.6683, "step": 7935 }, { "epoch": 0.34686830718125794, "grad_norm": 2.328125, "learning_rate": 7.317648806815094e-05, "loss": 2.9313, "step": 7936 }, { "epoch": 0.3469120153852878, "grad_norm": 1.9296875, "learning_rate": 7.317040164837604e-05, "loss": 1.7694, "step": 7937 }, { "epoch": 0.3469557235893177, "grad_norm": 1.9375, "learning_rate": 7.316431479134194e-05, "loss": 1.7356, "step": 7938 }, { "epoch": 0.34699943179334763, "grad_norm": 1.7578125, "learning_rate": 7.31582274971635e-05, "loss": 1.583, "step": 7939 }, { "epoch": 0.3470431399973775, "grad_norm": 2.140625, "learning_rate": 7.315213976595561e-05, "loss": 1.7361, "step": 7940 }, { "epoch": 0.3470868482014074, "grad_norm": 2.078125, "learning_rate": 7.314605159783314e-05, "loss": 1.5803, "step": 7941 }, { "epoch": 0.3471305564054373, "grad_norm": 1.8984375, "learning_rate": 7.313996299291098e-05, "loss": 1.6847, "step": 7942 }, { "epoch": 0.3471742646094672, "grad_norm": 2.28125, "learning_rate": 7.313387395130406e-05, "loss": 1.7097, "step": 7943 }, { "epoch": 0.3472179728134971, "grad_norm": 2.171875, "learning_rate": 7.312778447312725e-05, "loss": 2.1191, "step": 7944 }, { "epoch": 0.34726168101752697, "grad_norm": 2.28125, "learning_rate": 7.312169455849551e-05, "loss": 2.0175, "step": 7945 }, { "epoch": 0.3473053892215569, "grad_norm": 4.1875, "learning_rate": 7.311560420752373e-05, "loss": 2.3162, "step": 7946 }, { "epoch": 0.3473490974255868, "grad_norm": 2.296875, "learning_rate": 7.310951342032684e-05, "loss": 1.7003, "step": 7947 }, { "epoch": 0.34739280562961666, "grad_norm": 2.578125, "learning_rate": 7.310342219701981e-05, "loss": 2.1897, "step": 7948 }, { "epoch": 0.3474365138336466, "grad_norm": 2.421875, "learning_rate": 7.309733053771758e-05, "loss": 2.1538, "step": 7949 }, { "epoch": 0.3474802220376765, "grad_norm": 2.390625, "learning_rate": 7.309123844253511e-05, "loss": 1.8306, "step": 7950 }, { "epoch": 0.34752393024170636, "grad_norm": 1.8046875, "learning_rate": 7.308514591158735e-05, "loss": 1.3273, "step": 7951 }, { "epoch": 0.34756763844573624, "grad_norm": 1.9453125, "learning_rate": 7.307905294498929e-05, "loss": 1.599, "step": 7952 }, { "epoch": 0.3476113466497662, "grad_norm": 3.015625, "learning_rate": 7.30729595428559e-05, "loss": 2.3818, "step": 7953 }, { "epoch": 0.34765505485379605, "grad_norm": 2.234375, "learning_rate": 7.306686570530221e-05, "loss": 2.0856, "step": 7954 }, { "epoch": 0.34769876305782593, "grad_norm": 2.140625, "learning_rate": 7.30607714324432e-05, "loss": 2.4284, "step": 7955 }, { "epoch": 0.34774247126185587, "grad_norm": 2.21875, "learning_rate": 7.305467672439384e-05, "loss": 1.9622, "step": 7956 }, { "epoch": 0.34778617946588575, "grad_norm": 1.9609375, "learning_rate": 7.304858158126917e-05, "loss": 1.7486, "step": 7957 }, { "epoch": 0.3478298876699156, "grad_norm": 2.0625, "learning_rate": 7.304248600318425e-05, "loss": 1.6015, "step": 7958 }, { "epoch": 0.34787359587394556, "grad_norm": 2.171875, "learning_rate": 7.303638999025406e-05, "loss": 1.9826, "step": 7959 }, { "epoch": 0.34791730407797544, "grad_norm": 3.171875, "learning_rate": 7.303029354259367e-05, "loss": 1.6595, "step": 7960 }, { "epoch": 0.3479610122820053, "grad_norm": 3.46875, "learning_rate": 7.302419666031813e-05, "loss": 1.8291, "step": 7961 }, { "epoch": 0.3480047204860352, "grad_norm": 2.203125, "learning_rate": 7.301809934354248e-05, "loss": 1.7561, "step": 7962 }, { "epoch": 0.34804842869006514, "grad_norm": 2.359375, "learning_rate": 7.30120015923818e-05, "loss": 2.1202, "step": 7963 }, { "epoch": 0.348092136894095, "grad_norm": 1.859375, "learning_rate": 7.300590340695115e-05, "loss": 1.761, "step": 7964 }, { "epoch": 0.3481358450981249, "grad_norm": 2.03125, "learning_rate": 7.299980478736564e-05, "loss": 1.82, "step": 7965 }, { "epoch": 0.34817955330215483, "grad_norm": 2.15625, "learning_rate": 7.299370573374031e-05, "loss": 1.8937, "step": 7966 }, { "epoch": 0.3482232615061847, "grad_norm": 2.453125, "learning_rate": 7.298760624619029e-05, "loss": 2.0898, "step": 7967 }, { "epoch": 0.3482669697102146, "grad_norm": 2.453125, "learning_rate": 7.29815063248307e-05, "loss": 2.0204, "step": 7968 }, { "epoch": 0.3483106779142445, "grad_norm": 1.8359375, "learning_rate": 7.297540596977662e-05, "loss": 1.4455, "step": 7969 }, { "epoch": 0.3483543861182744, "grad_norm": 2.09375, "learning_rate": 7.29693051811432e-05, "loss": 1.4206, "step": 7970 }, { "epoch": 0.3483980943223043, "grad_norm": 2.21875, "learning_rate": 7.296320395904556e-05, "loss": 2.2885, "step": 7971 }, { "epoch": 0.34844180252633417, "grad_norm": 2.4375, "learning_rate": 7.295710230359885e-05, "loss": 2.0993, "step": 7972 }, { "epoch": 0.3484855107303641, "grad_norm": 2.078125, "learning_rate": 7.295100021491818e-05, "loss": 1.7229, "step": 7973 }, { "epoch": 0.348529218934394, "grad_norm": 2.203125, "learning_rate": 7.294489769311876e-05, "loss": 1.7387, "step": 7974 }, { "epoch": 0.34857292713842386, "grad_norm": 1.9140625, "learning_rate": 7.293879473831572e-05, "loss": 1.8183, "step": 7975 }, { "epoch": 0.3486166353424538, "grad_norm": 2.09375, "learning_rate": 7.293269135062424e-05, "loss": 1.9519, "step": 7976 }, { "epoch": 0.3486603435464837, "grad_norm": 1.953125, "learning_rate": 7.292658753015948e-05, "loss": 1.6782, "step": 7977 }, { "epoch": 0.34870405175051356, "grad_norm": 2.53125, "learning_rate": 7.292048327703666e-05, "loss": 2.2993, "step": 7978 }, { "epoch": 0.3487477599545435, "grad_norm": 1.9140625, "learning_rate": 7.291437859137095e-05, "loss": 1.7695, "step": 7979 }, { "epoch": 0.34879146815857337, "grad_norm": 1.8984375, "learning_rate": 7.290827347327758e-05, "loss": 1.7113, "step": 7980 }, { "epoch": 0.34883517636260325, "grad_norm": 1.7890625, "learning_rate": 7.290216792287175e-05, "loss": 1.5739, "step": 7981 }, { "epoch": 0.34887888456663313, "grad_norm": 1.9140625, "learning_rate": 7.289606194026866e-05, "loss": 1.8195, "step": 7982 }, { "epoch": 0.34892259277066306, "grad_norm": 1.9765625, "learning_rate": 7.288995552558357e-05, "loss": 1.7554, "step": 7983 }, { "epoch": 0.34896630097469294, "grad_norm": 2.09375, "learning_rate": 7.28838486789317e-05, "loss": 1.7297, "step": 7984 }, { "epoch": 0.3490100091787228, "grad_norm": 1.921875, "learning_rate": 7.28777414004283e-05, "loss": 2.0312, "step": 7985 }, { "epoch": 0.34905371738275276, "grad_norm": 2.125, "learning_rate": 7.287163369018863e-05, "loss": 2.0484, "step": 7986 }, { "epoch": 0.34909742558678264, "grad_norm": 2.234375, "learning_rate": 7.286552554832793e-05, "loss": 2.0505, "step": 7987 }, { "epoch": 0.3491411337908125, "grad_norm": 2.125, "learning_rate": 7.28594169749615e-05, "loss": 1.859, "step": 7988 }, { "epoch": 0.34918484199484245, "grad_norm": 2.03125, "learning_rate": 7.285330797020458e-05, "loss": 2.0386, "step": 7989 }, { "epoch": 0.34922855019887233, "grad_norm": 1.6796875, "learning_rate": 7.28471985341725e-05, "loss": 1.5225, "step": 7990 }, { "epoch": 0.3492722584029022, "grad_norm": 1.8671875, "learning_rate": 7.284108866698051e-05, "loss": 1.7554, "step": 7991 }, { "epoch": 0.3493159666069321, "grad_norm": 2.765625, "learning_rate": 7.283497836874396e-05, "loss": 1.5883, "step": 7992 }, { "epoch": 0.34935967481096203, "grad_norm": 1.9375, "learning_rate": 7.282886763957812e-05, "loss": 1.3351, "step": 7993 }, { "epoch": 0.3494033830149919, "grad_norm": 1.9140625, "learning_rate": 7.282275647959831e-05, "loss": 1.6072, "step": 7994 }, { "epoch": 0.3494470912190218, "grad_norm": 3.0, "learning_rate": 7.281664488891988e-05, "loss": 2.1401, "step": 7995 }, { "epoch": 0.3494907994230517, "grad_norm": 1.9921875, "learning_rate": 7.281053286765815e-05, "loss": 1.2633, "step": 7996 }, { "epoch": 0.3495345076270816, "grad_norm": 2.21875, "learning_rate": 7.280442041592846e-05, "loss": 2.2913, "step": 7997 }, { "epoch": 0.3495782158311115, "grad_norm": 2.375, "learning_rate": 7.279830753384618e-05, "loss": 2.0087, "step": 7998 }, { "epoch": 0.3496219240351414, "grad_norm": 2.0625, "learning_rate": 7.279219422152666e-05, "loss": 2.0633, "step": 7999 }, { "epoch": 0.3496656322391713, "grad_norm": 2.09375, "learning_rate": 7.278608047908523e-05, "loss": 1.8298, "step": 8000 }, { "epoch": 0.3497093404432012, "grad_norm": 2.703125, "learning_rate": 7.277996630663734e-05, "loss": 1.0935, "step": 8001 }, { "epoch": 0.3497530486472311, "grad_norm": 8.4375, "learning_rate": 7.27738517042983e-05, "loss": 1.3579, "step": 8002 }, { "epoch": 0.349796756851261, "grad_norm": 2.015625, "learning_rate": 7.276773667218354e-05, "loss": 1.889, "step": 8003 }, { "epoch": 0.3498404650552909, "grad_norm": 1.9921875, "learning_rate": 7.276162121040846e-05, "loss": 1.8152, "step": 8004 }, { "epoch": 0.34988417325932075, "grad_norm": 1.7890625, "learning_rate": 7.275550531908846e-05, "loss": 1.5692, "step": 8005 }, { "epoch": 0.3499278814633507, "grad_norm": 3.8125, "learning_rate": 7.274938899833896e-05, "loss": 2.0927, "step": 8006 }, { "epoch": 0.34997158966738057, "grad_norm": 3.59375, "learning_rate": 7.274327224827535e-05, "loss": 1.6025, "step": 8007 }, { "epoch": 0.35001529787141045, "grad_norm": 2.0625, "learning_rate": 7.273715506901312e-05, "loss": 2.1043, "step": 8008 }, { "epoch": 0.3500590060754404, "grad_norm": 2.265625, "learning_rate": 7.273103746066767e-05, "loss": 2.2297, "step": 8009 }, { "epoch": 0.35010271427947026, "grad_norm": 2.328125, "learning_rate": 7.272491942335447e-05, "loss": 1.5105, "step": 8010 }, { "epoch": 0.35014642248350014, "grad_norm": 2.453125, "learning_rate": 7.271880095718895e-05, "loss": 1.8723, "step": 8011 }, { "epoch": 0.3501901306875301, "grad_norm": 2.09375, "learning_rate": 7.27126820622866e-05, "loss": 1.9394, "step": 8012 }, { "epoch": 0.35023383889155996, "grad_norm": 1.9921875, "learning_rate": 7.270656273876289e-05, "loss": 1.9664, "step": 8013 }, { "epoch": 0.35027754709558984, "grad_norm": 1.921875, "learning_rate": 7.270044298673328e-05, "loss": 1.7432, "step": 8014 }, { "epoch": 0.3503212552996197, "grad_norm": 1.8671875, "learning_rate": 7.269432280631327e-05, "loss": 1.5492, "step": 8015 } ], "logging_steps": 1, "max_steps": 22879, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 229, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.2123448025219072e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }