{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9928021841648051, "eval_steps": 500, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 4.415610232722863, "learning_rate": 4.132231404958678e-08, "loss": 0.7865, "step": 1 }, { "epoch": 0.0, "grad_norm": 4.7507651462908065, "learning_rate": 8.264462809917357e-08, "loss": 0.8337, "step": 2 }, { "epoch": 0.0, "grad_norm": 5.697050383152881, "learning_rate": 1.2396694214876034e-07, "loss": 0.7606, "step": 3 }, { "epoch": 0.0, "grad_norm": 4.833288022383022, "learning_rate": 1.6528925619834713e-07, "loss": 0.7577, "step": 4 }, { "epoch": 0.0, "grad_norm": 4.6610300424792745, "learning_rate": 2.066115702479339e-07, "loss": 0.8072, "step": 5 }, { "epoch": 0.0, "grad_norm": 4.361957249992969, "learning_rate": 2.4793388429752067e-07, "loss": 0.787, "step": 6 }, { "epoch": 0.0, "grad_norm": 4.178361424391466, "learning_rate": 2.892561983471075e-07, "loss": 0.8219, "step": 7 }, { "epoch": 0.0, "grad_norm": 0.7176108393289844, "learning_rate": 3.3057851239669426e-07, "loss": 0.4465, "step": 8 }, { "epoch": 0.0, "grad_norm": 0.6557419046963692, "learning_rate": 3.7190082644628103e-07, "loss": 0.4455, "step": 9 }, { "epoch": 0.0, "grad_norm": 7.388562834977926, "learning_rate": 4.132231404958678e-07, "loss": 0.7729, "step": 10 }, { "epoch": 0.0, "grad_norm": 4.305265496796701, "learning_rate": 4.5454545454545457e-07, "loss": 0.8012, "step": 11 }, { "epoch": 0.0, "grad_norm": 4.2907850336775555, "learning_rate": 4.958677685950413e-07, "loss": 0.8273, "step": 12 }, { "epoch": 0.0, "grad_norm": 6.601527102678475, "learning_rate": 5.371900826446281e-07, "loss": 0.793, "step": 13 }, { "epoch": 0.0, "grad_norm": 3.968476594018755, "learning_rate": 5.78512396694215e-07, "loss": 0.788, "step": 14 }, { "epoch": 0.0, "grad_norm": 4.524793672412605, "learning_rate": 6.198347107438018e-07, "loss": 0.7818, "step": 15 }, { "epoch": 0.0, "grad_norm": 4.443521791810294, "learning_rate": 6.611570247933885e-07, "loss": 0.7434, "step": 16 }, { "epoch": 0.0, "grad_norm": 3.773132453512853, "learning_rate": 7.024793388429753e-07, "loss": 0.7548, "step": 17 }, { "epoch": 0.0, "grad_norm": 3.7482263437220364, "learning_rate": 7.438016528925621e-07, "loss": 0.7902, "step": 18 }, { "epoch": 0.0, "grad_norm": 3.836441534543556, "learning_rate": 7.851239669421488e-07, "loss": 0.7889, "step": 19 }, { "epoch": 0.0, "grad_norm": 3.9739204476729437, "learning_rate": 8.264462809917356e-07, "loss": 0.7762, "step": 20 }, { "epoch": 0.0, "grad_norm": 4.4474365552708, "learning_rate": 8.677685950413224e-07, "loss": 0.7679, "step": 21 }, { "epoch": 0.0, "grad_norm": 3.4580858839397326, "learning_rate": 9.090909090909091e-07, "loss": 0.8085, "step": 22 }, { "epoch": 0.0, "grad_norm": 3.413169939123294, "learning_rate": 9.50413223140496e-07, "loss": 0.6965, "step": 23 }, { "epoch": 0.0, "grad_norm": 2.8731977460065075, "learning_rate": 9.917355371900827e-07, "loss": 0.7589, "step": 24 }, { "epoch": 0.0, "grad_norm": 2.7087501271234804, "learning_rate": 1.0330578512396695e-06, "loss": 0.6862, "step": 25 }, { "epoch": 0.0, "grad_norm": 3.0679975335981493, "learning_rate": 1.0743801652892562e-06, "loss": 0.6426, "step": 26 }, { "epoch": 0.0, "grad_norm": 2.616920787492988, "learning_rate": 1.115702479338843e-06, "loss": 0.7291, "step": 27 }, { "epoch": 0.0, "grad_norm": 2.431583405470523, "learning_rate": 1.15702479338843e-06, "loss": 0.6693, "step": 28 }, { "epoch": 0.0, "grad_norm": 2.759569588989731, "learning_rate": 1.1983471074380167e-06, "loss": 0.6582, "step": 29 }, { "epoch": 0.0, "grad_norm": 2.4037335332718492, "learning_rate": 1.2396694214876035e-06, "loss": 0.6811, "step": 30 }, { "epoch": 0.0, "grad_norm": 2.4900842129537524, "learning_rate": 1.28099173553719e-06, "loss": 0.7467, "step": 31 }, { "epoch": 0.0, "grad_norm": 2.028361911401686, "learning_rate": 1.322314049586777e-06, "loss": 0.6178, "step": 32 }, { "epoch": 0.0, "grad_norm": 2.571997398821945, "learning_rate": 1.3636363636363636e-06, "loss": 0.6036, "step": 33 }, { "epoch": 0.0, "grad_norm": 2.276076194538056, "learning_rate": 1.4049586776859506e-06, "loss": 0.6242, "step": 34 }, { "epoch": 0.0, "grad_norm": 3.2059372728177826, "learning_rate": 1.4462809917355372e-06, "loss": 0.6995, "step": 35 }, { "epoch": 0.0, "grad_norm": 2.5259678899939773, "learning_rate": 1.4876033057851241e-06, "loss": 0.641, "step": 36 }, { "epoch": 0.0, "grad_norm": 2.099201309749444, "learning_rate": 1.5289256198347107e-06, "loss": 0.6355, "step": 37 }, { "epoch": 0.0, "grad_norm": 2.0199781281044813, "learning_rate": 1.5702479338842977e-06, "loss": 0.6039, "step": 38 }, { "epoch": 0.0, "grad_norm": 5.172474850706783, "learning_rate": 1.6115702479338842e-06, "loss": 0.6257, "step": 39 }, { "epoch": 0.0, "grad_norm": 2.051256221405437, "learning_rate": 1.6528925619834712e-06, "loss": 0.6485, "step": 40 }, { "epoch": 0.01, "grad_norm": 1.8784132131735476, "learning_rate": 1.694214876033058e-06, "loss": 0.6131, "step": 41 }, { "epoch": 0.01, "grad_norm": 2.3302814905160405, "learning_rate": 1.7355371900826448e-06, "loss": 0.6148, "step": 42 }, { "epoch": 0.01, "grad_norm": 2.675021092939445, "learning_rate": 1.7768595041322315e-06, "loss": 0.618, "step": 43 }, { "epoch": 0.01, "grad_norm": 1.9396764176262677, "learning_rate": 1.8181818181818183e-06, "loss": 0.607, "step": 44 }, { "epoch": 0.01, "grad_norm": 3.162903769359593, "learning_rate": 1.859504132231405e-06, "loss": 0.6073, "step": 45 }, { "epoch": 0.01, "grad_norm": 1.8725730728879908, "learning_rate": 1.900826446280992e-06, "loss": 0.6039, "step": 46 }, { "epoch": 0.01, "grad_norm": 2.1190143535626333, "learning_rate": 1.9421487603305786e-06, "loss": 0.5924, "step": 47 }, { "epoch": 0.01, "grad_norm": 1.831058773283454, "learning_rate": 1.9834710743801654e-06, "loss": 0.6027, "step": 48 }, { "epoch": 0.01, "grad_norm": 0.7497304742654913, "learning_rate": 2.024793388429752e-06, "loss": 0.5308, "step": 49 }, { "epoch": 0.01, "grad_norm": 2.031101871069113, "learning_rate": 2.066115702479339e-06, "loss": 0.5773, "step": 50 }, { "epoch": 0.01, "grad_norm": 2.0924155453957884, "learning_rate": 2.1074380165289257e-06, "loss": 0.5808, "step": 51 }, { "epoch": 0.01, "grad_norm": 2.1410226694005132, "learning_rate": 2.1487603305785124e-06, "loss": 0.6618, "step": 52 }, { "epoch": 0.01, "grad_norm": 1.9601583251234524, "learning_rate": 2.1900826446280992e-06, "loss": 0.5147, "step": 53 }, { "epoch": 0.01, "grad_norm": 2.217739964833312, "learning_rate": 2.231404958677686e-06, "loss": 0.6055, "step": 54 }, { "epoch": 0.01, "grad_norm": 3.4871845999980895, "learning_rate": 2.2727272727272728e-06, "loss": 0.5695, "step": 55 }, { "epoch": 0.01, "grad_norm": 9.728315202799985, "learning_rate": 2.31404958677686e-06, "loss": 0.5878, "step": 56 }, { "epoch": 0.01, "grad_norm": 2.1838581200045892, "learning_rate": 2.3553719008264463e-06, "loss": 0.5239, "step": 57 }, { "epoch": 0.01, "grad_norm": 2.2711658012889107, "learning_rate": 2.3966942148760335e-06, "loss": 0.595, "step": 58 }, { "epoch": 0.01, "grad_norm": 1.9191083300460372, "learning_rate": 2.43801652892562e-06, "loss": 0.597, "step": 59 }, { "epoch": 0.01, "grad_norm": 3.232113453296245, "learning_rate": 2.479338842975207e-06, "loss": 0.5439, "step": 60 }, { "epoch": 0.01, "grad_norm": 2.247069820214734, "learning_rate": 2.5206611570247934e-06, "loss": 0.6307, "step": 61 }, { "epoch": 0.01, "grad_norm": 2.1081482509346166, "learning_rate": 2.56198347107438e-06, "loss": 0.6352, "step": 62 }, { "epoch": 0.01, "grad_norm": 2.038799318139236, "learning_rate": 2.6033057851239673e-06, "loss": 0.6116, "step": 63 }, { "epoch": 0.01, "grad_norm": 2.2767344300217505, "learning_rate": 2.644628099173554e-06, "loss": 0.5667, "step": 64 }, { "epoch": 0.01, "grad_norm": 2.1918624104732034, "learning_rate": 2.6859504132231405e-06, "loss": 0.6243, "step": 65 }, { "epoch": 0.01, "grad_norm": 1.9263213735038611, "learning_rate": 2.7272727272727272e-06, "loss": 0.5455, "step": 66 }, { "epoch": 0.01, "grad_norm": 4.079115837709929, "learning_rate": 2.7685950413223144e-06, "loss": 0.5963, "step": 67 }, { "epoch": 0.01, "grad_norm": 2.422611576288611, "learning_rate": 2.809917355371901e-06, "loss": 0.5641, "step": 68 }, { "epoch": 0.01, "grad_norm": 1.6977361968463944, "learning_rate": 2.851239669421488e-06, "loss": 0.5915, "step": 69 }, { "epoch": 0.01, "grad_norm": 1.6310705010720266, "learning_rate": 2.8925619834710743e-06, "loss": 0.5116, "step": 70 }, { "epoch": 0.01, "grad_norm": 1.9813718105958915, "learning_rate": 2.9338842975206615e-06, "loss": 0.5831, "step": 71 }, { "epoch": 0.01, "grad_norm": 4.533914016058211, "learning_rate": 2.9752066115702483e-06, "loss": 0.5544, "step": 72 }, { "epoch": 0.01, "grad_norm": 2.749797864900883, "learning_rate": 3.016528925619835e-06, "loss": 0.6231, "step": 73 }, { "epoch": 0.01, "grad_norm": 2.2739710963750737, "learning_rate": 3.0578512396694214e-06, "loss": 0.5658, "step": 74 }, { "epoch": 0.01, "grad_norm": 1.6293489910001722, "learning_rate": 3.0991735537190086e-06, "loss": 0.5888, "step": 75 }, { "epoch": 0.01, "grad_norm": 6.27649736387961, "learning_rate": 3.1404958677685953e-06, "loss": 0.566, "step": 76 }, { "epoch": 0.01, "grad_norm": 1.971009882984369, "learning_rate": 3.181818181818182e-06, "loss": 0.5731, "step": 77 }, { "epoch": 0.01, "grad_norm": 3.7269504201622956, "learning_rate": 3.2231404958677685e-06, "loss": 0.5792, "step": 78 }, { "epoch": 0.01, "grad_norm": 2.398280082914536, "learning_rate": 3.264462809917356e-06, "loss": 0.5617, "step": 79 }, { "epoch": 0.01, "grad_norm": 2.239264649971885, "learning_rate": 3.3057851239669424e-06, "loss": 0.6041, "step": 80 }, { "epoch": 0.01, "grad_norm": 2.1614348411341076, "learning_rate": 3.347107438016529e-06, "loss": 0.6224, "step": 81 }, { "epoch": 0.01, "grad_norm": 2.5115529692047422, "learning_rate": 3.388429752066116e-06, "loss": 0.5348, "step": 82 }, { "epoch": 0.01, "grad_norm": 8.669544917763194, "learning_rate": 3.429752066115703e-06, "loss": 0.5508, "step": 83 }, { "epoch": 0.01, "grad_norm": 1.9449410268347895, "learning_rate": 3.4710743801652895e-06, "loss": 0.6149, "step": 84 }, { "epoch": 0.01, "grad_norm": 2.2476415793258164, "learning_rate": 3.5123966942148763e-06, "loss": 0.566, "step": 85 }, { "epoch": 0.01, "grad_norm": 2.284326985907656, "learning_rate": 3.553719008264463e-06, "loss": 0.5919, "step": 86 }, { "epoch": 0.01, "grad_norm": 2.329487557124964, "learning_rate": 3.5950413223140502e-06, "loss": 0.5753, "step": 87 }, { "epoch": 0.01, "grad_norm": 1.6915372745929467, "learning_rate": 3.6363636363636366e-06, "loss": 0.5402, "step": 88 }, { "epoch": 0.01, "grad_norm": 1.840436747417264, "learning_rate": 3.6776859504132234e-06, "loss": 0.5935, "step": 89 }, { "epoch": 0.01, "grad_norm": 2.0556986278371054, "learning_rate": 3.71900826446281e-06, "loss": 0.5469, "step": 90 }, { "epoch": 0.01, "grad_norm": 3.216467905440008, "learning_rate": 3.7603305785123973e-06, "loss": 0.62, "step": 91 }, { "epoch": 0.01, "grad_norm": 2.7956144664342064, "learning_rate": 3.801652892561984e-06, "loss": 0.5953, "step": 92 }, { "epoch": 0.01, "grad_norm": 2.052474595357274, "learning_rate": 3.842975206611571e-06, "loss": 0.5889, "step": 93 }, { "epoch": 0.01, "grad_norm": 2.6229806435376872, "learning_rate": 3.884297520661157e-06, "loss": 0.5417, "step": 94 }, { "epoch": 0.01, "grad_norm": 2.529529144898349, "learning_rate": 3.925619834710744e-06, "loss": 0.462, "step": 95 }, { "epoch": 0.01, "grad_norm": 2.104085886441274, "learning_rate": 3.966942148760331e-06, "loss": 0.5942, "step": 96 }, { "epoch": 0.01, "grad_norm": 1.7522377211377986, "learning_rate": 4.008264462809918e-06, "loss": 0.5685, "step": 97 }, { "epoch": 0.01, "grad_norm": 2.433678798953759, "learning_rate": 4.049586776859504e-06, "loss": 0.5783, "step": 98 }, { "epoch": 0.01, "grad_norm": 2.2162277574010516, "learning_rate": 4.0909090909090915e-06, "loss": 0.5774, "step": 99 }, { "epoch": 0.01, "grad_norm": 5.1328075855667015, "learning_rate": 4.132231404958678e-06, "loss": 0.4919, "step": 100 }, { "epoch": 0.01, "grad_norm": 2.1548005092718774, "learning_rate": 4.173553719008265e-06, "loss": 0.5601, "step": 101 }, { "epoch": 0.01, "grad_norm": 2.275791310124109, "learning_rate": 4.214876033057851e-06, "loss": 0.5364, "step": 102 }, { "epoch": 0.01, "grad_norm": 2.729405960454034, "learning_rate": 4.2561983471074386e-06, "loss": 0.5553, "step": 103 }, { "epoch": 0.01, "grad_norm": 1.8846153766860623, "learning_rate": 4.297520661157025e-06, "loss": 0.5103, "step": 104 }, { "epoch": 0.01, "grad_norm": 2.0246132215342603, "learning_rate": 4.338842975206612e-06, "loss": 0.5289, "step": 105 }, { "epoch": 0.01, "grad_norm": 1.7353792905572192, "learning_rate": 4.3801652892561984e-06, "loss": 0.5145, "step": 106 }, { "epoch": 0.01, "grad_norm": 2.167202823008346, "learning_rate": 4.421487603305786e-06, "loss": 0.5981, "step": 107 }, { "epoch": 0.01, "grad_norm": 3.2486808689824627, "learning_rate": 4.462809917355372e-06, "loss": 0.584, "step": 108 }, { "epoch": 0.01, "grad_norm": 1.7923020421622617, "learning_rate": 4.504132231404959e-06, "loss": 0.5168, "step": 109 }, { "epoch": 0.01, "grad_norm": 1.998942559674158, "learning_rate": 4.5454545454545455e-06, "loss": 0.5632, "step": 110 }, { "epoch": 0.01, "grad_norm": 2.1861973024624484, "learning_rate": 4.586776859504133e-06, "loss": 0.5556, "step": 111 }, { "epoch": 0.01, "grad_norm": 1.902236458891177, "learning_rate": 4.62809917355372e-06, "loss": 0.5957, "step": 112 }, { "epoch": 0.01, "grad_norm": 2.3472123997091527, "learning_rate": 4.669421487603306e-06, "loss": 0.5574, "step": 113 }, { "epoch": 0.01, "grad_norm": 1.826111048367796, "learning_rate": 4.710743801652893e-06, "loss": 0.5195, "step": 114 }, { "epoch": 0.01, "grad_norm": 1.8812962425133646, "learning_rate": 4.75206611570248e-06, "loss": 0.5703, "step": 115 }, { "epoch": 0.01, "grad_norm": 1.8624556818394709, "learning_rate": 4.793388429752067e-06, "loss": 0.5656, "step": 116 }, { "epoch": 0.01, "grad_norm": 3.766670998630178, "learning_rate": 4.834710743801653e-06, "loss": 0.5282, "step": 117 }, { "epoch": 0.01, "grad_norm": 2.039610136010939, "learning_rate": 4.87603305785124e-06, "loss": 0.5132, "step": 118 }, { "epoch": 0.01, "grad_norm": 2.5079262458353866, "learning_rate": 4.917355371900827e-06, "loss": 0.5289, "step": 119 }, { "epoch": 0.01, "grad_norm": 2.4038638893489335, "learning_rate": 4.958677685950414e-06, "loss": 0.5659, "step": 120 }, { "epoch": 0.02, "grad_norm": 1.6960722697241173, "learning_rate": 5e-06, "loss": 0.5051, "step": 121 }, { "epoch": 0.02, "grad_norm": 2.2161861436132058, "learning_rate": 5.041322314049587e-06, "loss": 0.5622, "step": 122 }, { "epoch": 0.02, "grad_norm": 2.0070358384779747, "learning_rate": 5.082644628099174e-06, "loss": 0.569, "step": 123 }, { "epoch": 0.02, "grad_norm": 2.072153444096047, "learning_rate": 5.12396694214876e-06, "loss": 0.5898, "step": 124 }, { "epoch": 0.02, "grad_norm": 1.961455431362149, "learning_rate": 5.165289256198347e-06, "loss": 0.5278, "step": 125 }, { "epoch": 0.02, "grad_norm": 2.7520151908460355, "learning_rate": 5.206611570247935e-06, "loss": 0.5762, "step": 126 }, { "epoch": 0.02, "grad_norm": 0.6835662297556955, "learning_rate": 5.247933884297521e-06, "loss": 0.5071, "step": 127 }, { "epoch": 0.02, "grad_norm": 2.702178064947002, "learning_rate": 5.289256198347108e-06, "loss": 0.5928, "step": 128 }, { "epoch": 0.02, "grad_norm": 2.2345051265285494, "learning_rate": 5.3305785123966946e-06, "loss": 0.5699, "step": 129 }, { "epoch": 0.02, "grad_norm": 2.0642219152001933, "learning_rate": 5.371900826446281e-06, "loss": 0.5713, "step": 130 }, { "epoch": 0.02, "grad_norm": 2.0108826296235645, "learning_rate": 5.413223140495868e-06, "loss": 0.5459, "step": 131 }, { "epoch": 0.02, "grad_norm": 2.1627936472454423, "learning_rate": 5.4545454545454545e-06, "loss": 0.516, "step": 132 }, { "epoch": 0.02, "grad_norm": 1.6647761146847948, "learning_rate": 5.495867768595042e-06, "loss": 0.4862, "step": 133 }, { "epoch": 0.02, "grad_norm": 2.7119580663489593, "learning_rate": 5.537190082644629e-06, "loss": 0.5691, "step": 134 }, { "epoch": 0.02, "grad_norm": 2.5852970473911814, "learning_rate": 5.578512396694216e-06, "loss": 0.5285, "step": 135 }, { "epoch": 0.02, "grad_norm": 2.4735294501089675, "learning_rate": 5.619834710743802e-06, "loss": 0.5869, "step": 136 }, { "epoch": 0.02, "grad_norm": 2.04141538102587, "learning_rate": 5.661157024793389e-06, "loss": 0.5159, "step": 137 }, { "epoch": 0.02, "grad_norm": 1.724866138626163, "learning_rate": 5.702479338842976e-06, "loss": 0.5488, "step": 138 }, { "epoch": 0.02, "grad_norm": 1.7139656285038183, "learning_rate": 5.743801652892562e-06, "loss": 0.5026, "step": 139 }, { "epoch": 0.02, "grad_norm": 1.847574100805026, "learning_rate": 5.785123966942149e-06, "loss": 0.5027, "step": 140 }, { "epoch": 0.02, "grad_norm": 3.230329154971066, "learning_rate": 5.826446280991736e-06, "loss": 0.5595, "step": 141 }, { "epoch": 0.02, "grad_norm": 1.795342282430791, "learning_rate": 5.867768595041323e-06, "loss": 0.5263, "step": 142 }, { "epoch": 0.02, "grad_norm": 1.8716269720617247, "learning_rate": 5.90909090909091e-06, "loss": 0.5301, "step": 143 }, { "epoch": 0.02, "grad_norm": 1.9299922283974924, "learning_rate": 5.9504132231404965e-06, "loss": 0.5687, "step": 144 }, { "epoch": 0.02, "grad_norm": 0.7136333319419209, "learning_rate": 5.991735537190083e-06, "loss": 0.5037, "step": 145 }, { "epoch": 0.02, "grad_norm": 1.7337829913060052, "learning_rate": 6.03305785123967e-06, "loss": 0.4986, "step": 146 }, { "epoch": 0.02, "grad_norm": 0.6919406868411878, "learning_rate": 6.074380165289256e-06, "loss": 0.4855, "step": 147 }, { "epoch": 0.02, "grad_norm": 2.328246418923426, "learning_rate": 6.115702479338843e-06, "loss": 0.5565, "step": 148 }, { "epoch": 0.02, "grad_norm": 5.441722885984954, "learning_rate": 6.15702479338843e-06, "loss": 0.5145, "step": 149 }, { "epoch": 0.02, "grad_norm": 2.1257138893693006, "learning_rate": 6.198347107438017e-06, "loss": 0.5299, "step": 150 }, { "epoch": 0.02, "grad_norm": 2.1770823656371974, "learning_rate": 6.239669421487604e-06, "loss": 0.5518, "step": 151 }, { "epoch": 0.02, "grad_norm": 2.1037313109160927, "learning_rate": 6.280991735537191e-06, "loss": 0.5186, "step": 152 }, { "epoch": 0.02, "grad_norm": 2.095149553016966, "learning_rate": 6.322314049586777e-06, "loss": 0.5334, "step": 153 }, { "epoch": 0.02, "grad_norm": 0.6095163348330389, "learning_rate": 6.363636363636364e-06, "loss": 0.4232, "step": 154 }, { "epoch": 0.02, "grad_norm": 2.260362966455831, "learning_rate": 6.404958677685951e-06, "loss": 0.5246, "step": 155 }, { "epoch": 0.02, "grad_norm": 2.18109831500874, "learning_rate": 6.446280991735537e-06, "loss": 0.5689, "step": 156 }, { "epoch": 0.02, "grad_norm": 1.6992471311150057, "learning_rate": 6.487603305785124e-06, "loss": 0.5432, "step": 157 }, { "epoch": 0.02, "grad_norm": 2.1996411301077297, "learning_rate": 6.528925619834712e-06, "loss": 0.5471, "step": 158 }, { "epoch": 0.02, "grad_norm": 6.683198647229002, "learning_rate": 6.5702479338842985e-06, "loss": 0.4844, "step": 159 }, { "epoch": 0.02, "grad_norm": 1.9315335319997355, "learning_rate": 6.611570247933885e-06, "loss": 0.5114, "step": 160 }, { "epoch": 0.02, "grad_norm": 4.034007691224993, "learning_rate": 6.652892561983472e-06, "loss": 0.5095, "step": 161 }, { "epoch": 0.02, "grad_norm": 1.933206220453076, "learning_rate": 6.694214876033058e-06, "loss": 0.5584, "step": 162 }, { "epoch": 0.02, "grad_norm": 1.7594153622700035, "learning_rate": 6.735537190082645e-06, "loss": 0.5267, "step": 163 }, { "epoch": 0.02, "grad_norm": 2.128978895295904, "learning_rate": 6.776859504132232e-06, "loss": 0.4973, "step": 164 }, { "epoch": 0.02, "grad_norm": 1.8752257215193229, "learning_rate": 6.818181818181818e-06, "loss": 0.5569, "step": 165 }, { "epoch": 0.02, "grad_norm": 1.7552075283979744, "learning_rate": 6.859504132231406e-06, "loss": 0.5137, "step": 166 }, { "epoch": 0.02, "grad_norm": 1.7116607536756927, "learning_rate": 6.900826446280993e-06, "loss": 0.4852, "step": 167 }, { "epoch": 0.02, "grad_norm": 1.8310325583717713, "learning_rate": 6.942148760330579e-06, "loss": 0.512, "step": 168 }, { "epoch": 0.02, "grad_norm": 2.051351087670528, "learning_rate": 6.983471074380166e-06, "loss": 0.5072, "step": 169 }, { "epoch": 0.02, "grad_norm": 2.6691184220184736, "learning_rate": 7.0247933884297525e-06, "loss": 0.553, "step": 170 }, { "epoch": 0.02, "grad_norm": 2.472591362280194, "learning_rate": 7.066115702479339e-06, "loss": 0.5742, "step": 171 }, { "epoch": 0.02, "grad_norm": 1.97874128709382, "learning_rate": 7.107438016528926e-06, "loss": 0.5504, "step": 172 }, { "epoch": 0.02, "grad_norm": 1.968673756150883, "learning_rate": 7.1487603305785124e-06, "loss": 0.5672, "step": 173 }, { "epoch": 0.02, "grad_norm": 2.58437763050975, "learning_rate": 7.1900826446281005e-06, "loss": 0.5689, "step": 174 }, { "epoch": 0.02, "grad_norm": 2.1221625942102254, "learning_rate": 7.231404958677687e-06, "loss": 0.551, "step": 175 }, { "epoch": 0.02, "grad_norm": 1.7421455025898023, "learning_rate": 7.272727272727273e-06, "loss": 0.5774, "step": 176 }, { "epoch": 0.02, "grad_norm": 1.8920356174568798, "learning_rate": 7.31404958677686e-06, "loss": 0.5269, "step": 177 }, { "epoch": 0.02, "grad_norm": 1.9144789767737893, "learning_rate": 7.355371900826447e-06, "loss": 0.5161, "step": 178 }, { "epoch": 0.02, "grad_norm": 5.34326962708711, "learning_rate": 7.396694214876033e-06, "loss": 0.5565, "step": 179 }, { "epoch": 0.02, "grad_norm": 3.702683434570155, "learning_rate": 7.43801652892562e-06, "loss": 0.5342, "step": 180 }, { "epoch": 0.02, "grad_norm": 2.4440904350216632, "learning_rate": 7.479338842975207e-06, "loss": 0.4631, "step": 181 }, { "epoch": 0.02, "grad_norm": 2.2025744403847805, "learning_rate": 7.520661157024795e-06, "loss": 0.5517, "step": 182 }, { "epoch": 0.02, "grad_norm": 2.3152746565304083, "learning_rate": 7.561983471074381e-06, "loss": 0.5385, "step": 183 }, { "epoch": 0.02, "grad_norm": 1.8138121988036877, "learning_rate": 7.603305785123968e-06, "loss": 0.5312, "step": 184 }, { "epoch": 0.02, "grad_norm": 1.9696695332798093, "learning_rate": 7.644628099173555e-06, "loss": 0.4975, "step": 185 }, { "epoch": 0.02, "grad_norm": 2.1638549313150577, "learning_rate": 7.685950413223142e-06, "loss": 0.4598, "step": 186 }, { "epoch": 0.02, "grad_norm": 3.679763180841708, "learning_rate": 7.727272727272727e-06, "loss": 0.5828, "step": 187 }, { "epoch": 0.02, "grad_norm": 15.755753338347303, "learning_rate": 7.768595041322314e-06, "loss": 0.6424, "step": 188 }, { "epoch": 0.02, "grad_norm": 2.0607035699738367, "learning_rate": 7.809917355371902e-06, "loss": 0.6084, "step": 189 }, { "epoch": 0.02, "grad_norm": 4.563843230786939, "learning_rate": 7.851239669421489e-06, "loss": 0.5493, "step": 190 }, { "epoch": 0.02, "grad_norm": 2.7007150791601604, "learning_rate": 7.892561983471076e-06, "loss": 0.5373, "step": 191 }, { "epoch": 0.02, "grad_norm": 1.6296232588728798, "learning_rate": 7.933884297520661e-06, "loss": 0.5412, "step": 192 }, { "epoch": 0.02, "grad_norm": 2.642761568125003, "learning_rate": 7.975206611570249e-06, "loss": 0.5568, "step": 193 }, { "epoch": 0.02, "grad_norm": 2.062493720572721, "learning_rate": 8.016528925619836e-06, "loss": 0.4958, "step": 194 }, { "epoch": 0.02, "grad_norm": 1.840113636335198, "learning_rate": 8.057851239669421e-06, "loss": 0.5443, "step": 195 }, { "epoch": 0.02, "grad_norm": 2.2357413647918993, "learning_rate": 8.099173553719009e-06, "loss": 0.5351, "step": 196 }, { "epoch": 0.02, "grad_norm": 2.220715219963027, "learning_rate": 8.140495867768596e-06, "loss": 0.5326, "step": 197 }, { "epoch": 0.02, "grad_norm": 1.9436370507926042, "learning_rate": 8.181818181818183e-06, "loss": 0.4906, "step": 198 }, { "epoch": 0.02, "grad_norm": 4.233288939881527, "learning_rate": 8.22314049586777e-06, "loss": 0.4961, "step": 199 }, { "epoch": 0.02, "grad_norm": 2.064536487822645, "learning_rate": 8.264462809917356e-06, "loss": 0.5768, "step": 200 }, { "epoch": 0.02, "grad_norm": 2.7690467508481755, "learning_rate": 8.305785123966943e-06, "loss": 0.55, "step": 201 }, { "epoch": 0.03, "grad_norm": 2.137594915236406, "learning_rate": 8.34710743801653e-06, "loss": 0.4988, "step": 202 }, { "epoch": 0.03, "grad_norm": 1.9534259558411042, "learning_rate": 8.388429752066116e-06, "loss": 0.5566, "step": 203 }, { "epoch": 0.03, "grad_norm": 2.5020717347328643, "learning_rate": 8.429752066115703e-06, "loss": 0.5513, "step": 204 }, { "epoch": 0.03, "grad_norm": 2.660014453532568, "learning_rate": 8.47107438016529e-06, "loss": 0.4613, "step": 205 }, { "epoch": 0.03, "grad_norm": 0.6983360001377713, "learning_rate": 8.512396694214877e-06, "loss": 0.4163, "step": 206 }, { "epoch": 0.03, "grad_norm": 4.38401204497113, "learning_rate": 8.553719008264464e-06, "loss": 0.5223, "step": 207 }, { "epoch": 0.03, "grad_norm": 4.657695910352119, "learning_rate": 8.59504132231405e-06, "loss": 0.6005, "step": 208 }, { "epoch": 0.03, "grad_norm": 2.1393074553357745, "learning_rate": 8.636363636363637e-06, "loss": 0.5133, "step": 209 }, { "epoch": 0.03, "grad_norm": 3.524402023924675, "learning_rate": 8.677685950413224e-06, "loss": 0.5466, "step": 210 }, { "epoch": 0.03, "grad_norm": 2.064634815950332, "learning_rate": 8.71900826446281e-06, "loss": 0.4988, "step": 211 }, { "epoch": 0.03, "grad_norm": 1.96453185900645, "learning_rate": 8.760330578512397e-06, "loss": 0.5043, "step": 212 }, { "epoch": 0.03, "grad_norm": 2.3011311824481058, "learning_rate": 8.801652892561984e-06, "loss": 0.5443, "step": 213 }, { "epoch": 0.03, "grad_norm": 1.9545398206419267, "learning_rate": 8.842975206611571e-06, "loss": 0.5573, "step": 214 }, { "epoch": 0.03, "grad_norm": 2.2270890144831594, "learning_rate": 8.884297520661158e-06, "loss": 0.5475, "step": 215 }, { "epoch": 0.03, "grad_norm": 2.367048313677594, "learning_rate": 8.925619834710744e-06, "loss": 0.5026, "step": 216 }, { "epoch": 0.03, "grad_norm": 2.3731067718776466, "learning_rate": 8.966942148760331e-06, "loss": 0.5645, "step": 217 }, { "epoch": 0.03, "grad_norm": 1.7771313439516687, "learning_rate": 9.008264462809918e-06, "loss": 0.5453, "step": 218 }, { "epoch": 0.03, "grad_norm": 1.8002080571572299, "learning_rate": 9.049586776859506e-06, "loss": 0.495, "step": 219 }, { "epoch": 0.03, "grad_norm": 2.302513893194134, "learning_rate": 9.090909090909091e-06, "loss": 0.5282, "step": 220 }, { "epoch": 0.03, "grad_norm": 4.695747565735532, "learning_rate": 9.132231404958678e-06, "loss": 0.5554, "step": 221 }, { "epoch": 0.03, "grad_norm": 4.083286425298984, "learning_rate": 9.173553719008265e-06, "loss": 0.5131, "step": 222 }, { "epoch": 0.03, "grad_norm": 2.330380712840821, "learning_rate": 9.214876033057853e-06, "loss": 0.5469, "step": 223 }, { "epoch": 0.03, "grad_norm": 1.8673689557327797, "learning_rate": 9.25619834710744e-06, "loss": 0.5133, "step": 224 }, { "epoch": 0.03, "grad_norm": 3.7630221758042226, "learning_rate": 9.297520661157025e-06, "loss": 0.5031, "step": 225 }, { "epoch": 0.03, "grad_norm": 2.421818000908611, "learning_rate": 9.338842975206613e-06, "loss": 0.4737, "step": 226 }, { "epoch": 0.03, "grad_norm": 2.1159305242439594, "learning_rate": 9.3801652892562e-06, "loss": 0.5586, "step": 227 }, { "epoch": 0.03, "grad_norm": 3.542872602570653, "learning_rate": 9.421487603305785e-06, "loss": 0.5837, "step": 228 }, { "epoch": 0.03, "grad_norm": 3.8866498424994997, "learning_rate": 9.462809917355372e-06, "loss": 0.5472, "step": 229 }, { "epoch": 0.03, "grad_norm": 1.8632223477667107, "learning_rate": 9.50413223140496e-06, "loss": 0.4915, "step": 230 }, { "epoch": 0.03, "grad_norm": 2.769130478817958, "learning_rate": 9.545454545454547e-06, "loss": 0.5311, "step": 231 }, { "epoch": 0.03, "grad_norm": 1.5927783003609426, "learning_rate": 9.586776859504134e-06, "loss": 0.4975, "step": 232 }, { "epoch": 0.03, "grad_norm": 2.346701530645649, "learning_rate": 9.62809917355372e-06, "loss": 0.5696, "step": 233 }, { "epoch": 0.03, "grad_norm": 1.8552511574832422, "learning_rate": 9.669421487603307e-06, "loss": 0.5047, "step": 234 }, { "epoch": 0.03, "grad_norm": 1.8612115784851542, "learning_rate": 9.710743801652894e-06, "loss": 0.5683, "step": 235 }, { "epoch": 0.03, "grad_norm": 1.7957162557860462, "learning_rate": 9.75206611570248e-06, "loss": 0.5231, "step": 236 }, { "epoch": 0.03, "grad_norm": 1.6128091928743173, "learning_rate": 9.793388429752067e-06, "loss": 0.5223, "step": 237 }, { "epoch": 0.03, "grad_norm": 1.7441349517290226, "learning_rate": 9.834710743801654e-06, "loss": 0.5417, "step": 238 }, { "epoch": 0.03, "grad_norm": 1.7518601110115872, "learning_rate": 9.876033057851241e-06, "loss": 0.5866, "step": 239 }, { "epoch": 0.03, "grad_norm": 2.1033851691625745, "learning_rate": 9.917355371900828e-06, "loss": 0.5297, "step": 240 }, { "epoch": 0.03, "grad_norm": 1.9910098722645073, "learning_rate": 9.958677685950414e-06, "loss": 0.4461, "step": 241 }, { "epoch": 0.03, "grad_norm": 2.8174366976581458, "learning_rate": 1e-05, "loss": 0.5063, "step": 242 }, { "epoch": 0.03, "grad_norm": 1.674372365245035, "learning_rate": 9.999999596102982e-06, "loss": 0.5256, "step": 243 }, { "epoch": 0.03, "grad_norm": 1.7487857604376191, "learning_rate": 9.999998384411993e-06, "loss": 0.5951, "step": 244 }, { "epoch": 0.03, "grad_norm": 1.6303611245059892, "learning_rate": 9.99999636492723e-06, "loss": 0.6027, "step": 245 }, { "epoch": 0.03, "grad_norm": 2.0701765426010597, "learning_rate": 9.999993537649018e-06, "loss": 0.5304, "step": 246 }, { "epoch": 0.03, "grad_norm": 2.1090228708257226, "learning_rate": 9.999989902577813e-06, "loss": 0.5487, "step": 247 }, { "epoch": 0.03, "grad_norm": 2.2097391973231413, "learning_rate": 9.999985459714203e-06, "loss": 0.5548, "step": 248 }, { "epoch": 0.03, "grad_norm": 0.6819193332040362, "learning_rate": 9.999980209058907e-06, "loss": 0.4655, "step": 249 }, { "epoch": 0.03, "grad_norm": 1.7160171625750975, "learning_rate": 9.999974150612773e-06, "loss": 0.5536, "step": 250 }, { "epoch": 0.03, "grad_norm": 2.044131904537645, "learning_rate": 9.999967284376777e-06, "loss": 0.5704, "step": 251 }, { "epoch": 0.03, "grad_norm": 1.8803170341600512, "learning_rate": 9.999959610352032e-06, "loss": 0.5053, "step": 252 }, { "epoch": 0.03, "grad_norm": 2.1350599019041843, "learning_rate": 9.999951128539776e-06, "loss": 0.5145, "step": 253 }, { "epoch": 0.03, "grad_norm": 3.959337484545383, "learning_rate": 9.999941838941378e-06, "loss": 0.5465, "step": 254 }, { "epoch": 0.03, "grad_norm": 2.263615164770816, "learning_rate": 9.999931741558342e-06, "loss": 0.5284, "step": 255 }, { "epoch": 0.03, "grad_norm": 1.9392426887684562, "learning_rate": 9.999920836392298e-06, "loss": 0.5087, "step": 256 }, { "epoch": 0.03, "grad_norm": 2.0601038827538445, "learning_rate": 9.999909123445006e-06, "loss": 0.5072, "step": 257 }, { "epoch": 0.03, "grad_norm": 1.9443319471352343, "learning_rate": 9.99989660271836e-06, "loss": 0.5114, "step": 258 }, { "epoch": 0.03, "grad_norm": 1.9823465986216011, "learning_rate": 9.999883274214383e-06, "loss": 0.5244, "step": 259 }, { "epoch": 0.03, "grad_norm": 1.5894097797953115, "learning_rate": 9.99986913793523e-06, "loss": 0.5607, "step": 260 }, { "epoch": 0.03, "grad_norm": 2.1034456786503797, "learning_rate": 9.99985419388318e-06, "loss": 0.5246, "step": 261 }, { "epoch": 0.03, "grad_norm": 13.925708553269999, "learning_rate": 9.999838442060652e-06, "loss": 0.5578, "step": 262 }, { "epoch": 0.03, "grad_norm": 2.031790416570794, "learning_rate": 9.999821882470188e-06, "loss": 0.5533, "step": 263 }, { "epoch": 0.03, "grad_norm": 2.299536220707557, "learning_rate": 9.999804515114465e-06, "loss": 0.4861, "step": 264 }, { "epoch": 0.03, "grad_norm": 3.339327687472147, "learning_rate": 9.999786339996288e-06, "loss": 0.5479, "step": 265 }, { "epoch": 0.03, "grad_norm": 0.7242515927078733, "learning_rate": 9.999767357118594e-06, "loss": 0.4831, "step": 266 }, { "epoch": 0.03, "grad_norm": 2.910605648741558, "learning_rate": 9.99974756648445e-06, "loss": 0.5574, "step": 267 }, { "epoch": 0.03, "grad_norm": 2.0505930996455395, "learning_rate": 9.99972696809705e-06, "loss": 0.5484, "step": 268 }, { "epoch": 0.03, "grad_norm": 2.3422061192597923, "learning_rate": 9.999705561959727e-06, "loss": 0.5685, "step": 269 }, { "epoch": 0.03, "grad_norm": 0.685857192457201, "learning_rate": 9.999683348075937e-06, "loss": 0.4722, "step": 270 }, { "epoch": 0.03, "grad_norm": 1.9628329354004277, "learning_rate": 9.999660326449267e-06, "loss": 0.5987, "step": 271 }, { "epoch": 0.03, "grad_norm": 2.2789243043528704, "learning_rate": 9.99963649708344e-06, "loss": 0.5422, "step": 272 }, { "epoch": 0.03, "grad_norm": 3.421312306756197, "learning_rate": 9.999611859982304e-06, "loss": 0.5501, "step": 273 }, { "epoch": 0.03, "grad_norm": 2.0296521584425005, "learning_rate": 9.99958641514984e-06, "loss": 0.5765, "step": 274 }, { "epoch": 0.03, "grad_norm": 1.5470558706281141, "learning_rate": 9.999560162590157e-06, "loss": 0.5157, "step": 275 }, { "epoch": 0.03, "grad_norm": 1.4700139418770868, "learning_rate": 9.999533102307497e-06, "loss": 0.506, "step": 276 }, { "epoch": 0.03, "grad_norm": 1.6813756337253796, "learning_rate": 9.999505234306232e-06, "loss": 0.513, "step": 277 }, { "epoch": 0.03, "grad_norm": 1.6349444581628045, "learning_rate": 9.999476558590865e-06, "loss": 0.5735, "step": 278 }, { "epoch": 0.03, "grad_norm": 1.9746160718526162, "learning_rate": 9.99944707516603e-06, "loss": 0.5774, "step": 279 }, { "epoch": 0.03, "grad_norm": 4.4811608488552475, "learning_rate": 9.999416784036488e-06, "loss": 0.4944, "step": 280 }, { "epoch": 0.03, "grad_norm": 1.5957764732299413, "learning_rate": 9.999385685207135e-06, "loss": 0.5006, "step": 281 }, { "epoch": 0.03, "grad_norm": 1.9472233687602192, "learning_rate": 9.999353778682992e-06, "loss": 0.5112, "step": 282 }, { "epoch": 0.04, "grad_norm": 1.9484798031195745, "learning_rate": 9.999321064469216e-06, "loss": 0.4938, "step": 283 }, { "epoch": 0.04, "grad_norm": 2.920263709004281, "learning_rate": 9.999287542571092e-06, "loss": 0.4956, "step": 284 }, { "epoch": 0.04, "grad_norm": 0.6468350417563766, "learning_rate": 9.999253212994035e-06, "loss": 0.4424, "step": 285 }, { "epoch": 0.04, "grad_norm": 1.844929706278709, "learning_rate": 9.999218075743594e-06, "loss": 0.5706, "step": 286 }, { "epoch": 0.04, "grad_norm": 4.482273640778281, "learning_rate": 9.999182130825443e-06, "loss": 0.5308, "step": 287 }, { "epoch": 0.04, "grad_norm": 1.9083396990379513, "learning_rate": 9.99914537824539e-06, "loss": 0.5654, "step": 288 }, { "epoch": 0.04, "grad_norm": 1.7555024837914865, "learning_rate": 9.99910781800937e-06, "loss": 0.5698, "step": 289 }, { "epoch": 0.04, "grad_norm": 2.563462775984113, "learning_rate": 9.999069450123458e-06, "loss": 0.5425, "step": 290 }, { "epoch": 0.04, "grad_norm": 2.537480565574436, "learning_rate": 9.999030274593845e-06, "loss": 0.5425, "step": 291 }, { "epoch": 0.04, "grad_norm": 5.201318330504299, "learning_rate": 9.998990291426864e-06, "loss": 0.5718, "step": 292 }, { "epoch": 0.04, "grad_norm": 2.0509238907273457, "learning_rate": 9.998949500628977e-06, "loss": 0.4952, "step": 293 }, { "epoch": 0.04, "grad_norm": 1.6658988414947071, "learning_rate": 9.998907902206769e-06, "loss": 0.5382, "step": 294 }, { "epoch": 0.04, "grad_norm": 2.076364793574101, "learning_rate": 9.998865496166963e-06, "loss": 0.5792, "step": 295 }, { "epoch": 0.04, "grad_norm": 1.6504825769137341, "learning_rate": 9.99882228251641e-06, "loss": 0.5384, "step": 296 }, { "epoch": 0.04, "grad_norm": 4.195943188299471, "learning_rate": 9.998778261262093e-06, "loss": 0.4845, "step": 297 }, { "epoch": 0.04, "grad_norm": 3.1865330279591957, "learning_rate": 9.99873343241112e-06, "loss": 0.5822, "step": 298 }, { "epoch": 0.04, "grad_norm": 1.583521959332929, "learning_rate": 9.998687795970739e-06, "loss": 0.5497, "step": 299 }, { "epoch": 0.04, "grad_norm": 1.867600307176823, "learning_rate": 9.998641351948319e-06, "loss": 0.5321, "step": 300 }, { "epoch": 0.04, "grad_norm": 1.9599824750180688, "learning_rate": 9.998594100351362e-06, "loss": 0.5741, "step": 301 }, { "epoch": 0.04, "grad_norm": 0.6829182091305455, "learning_rate": 9.998546041187507e-06, "loss": 0.5211, "step": 302 }, { "epoch": 0.04, "grad_norm": 3.0686970869720076, "learning_rate": 9.998497174464517e-06, "loss": 0.525, "step": 303 }, { "epoch": 0.04, "grad_norm": 1.7658227546806848, "learning_rate": 9.998447500190283e-06, "loss": 0.5234, "step": 304 }, { "epoch": 0.04, "grad_norm": 2.1642748578874906, "learning_rate": 9.998397018372833e-06, "loss": 0.5805, "step": 305 }, { "epoch": 0.04, "grad_norm": 1.8098658826161074, "learning_rate": 9.998345729020324e-06, "loss": 0.5115, "step": 306 }, { "epoch": 0.04, "grad_norm": 2.0808928845396357, "learning_rate": 9.998293632141042e-06, "loss": 0.5643, "step": 307 }, { "epoch": 0.04, "grad_norm": 4.062037007760145, "learning_rate": 9.998240727743401e-06, "loss": 0.5168, "step": 308 }, { "epoch": 0.04, "grad_norm": 2.1539325790326656, "learning_rate": 9.99818701583595e-06, "loss": 0.4943, "step": 309 }, { "epoch": 0.04, "grad_norm": 7.820375964588249, "learning_rate": 9.998132496427366e-06, "loss": 0.5088, "step": 310 }, { "epoch": 0.04, "grad_norm": 1.954096590969464, "learning_rate": 9.998077169526459e-06, "loss": 0.5659, "step": 311 }, { "epoch": 0.04, "grad_norm": 2.285658810614788, "learning_rate": 9.998021035142166e-06, "loss": 0.5142, "step": 312 }, { "epoch": 0.04, "grad_norm": 1.9304285977081468, "learning_rate": 9.997964093283555e-06, "loss": 0.5098, "step": 313 }, { "epoch": 0.04, "grad_norm": 2.4164241091216825, "learning_rate": 9.997906343959827e-06, "loss": 0.5105, "step": 314 }, { "epoch": 0.04, "grad_norm": 2.6374326074383707, "learning_rate": 9.997847787180313e-06, "loss": 0.5503, "step": 315 }, { "epoch": 0.04, "grad_norm": 1.9372827013095961, "learning_rate": 9.99778842295447e-06, "loss": 0.5462, "step": 316 }, { "epoch": 0.04, "grad_norm": 1.5169237820059158, "learning_rate": 9.997728251291891e-06, "loss": 0.5151, "step": 317 }, { "epoch": 0.04, "grad_norm": 1.56778689015433, "learning_rate": 9.997667272202297e-06, "loss": 0.4994, "step": 318 }, { "epoch": 0.04, "grad_norm": 0.7363415337809248, "learning_rate": 9.99760548569554e-06, "loss": 0.4856, "step": 319 }, { "epoch": 0.04, "grad_norm": 1.6500642022053966, "learning_rate": 9.997542891781602e-06, "loss": 0.541, "step": 320 }, { "epoch": 0.04, "grad_norm": 1.646446903693105, "learning_rate": 9.997479490470594e-06, "loss": 0.5681, "step": 321 }, { "epoch": 0.04, "grad_norm": 1.6120661264680305, "learning_rate": 9.997415281772762e-06, "loss": 0.5224, "step": 322 }, { "epoch": 0.04, "grad_norm": 1.6969882678190478, "learning_rate": 9.997350265698477e-06, "loss": 0.563, "step": 323 }, { "epoch": 0.04, "grad_norm": 2.702569911701666, "learning_rate": 9.997284442258244e-06, "loss": 0.5087, "step": 324 }, { "epoch": 0.04, "grad_norm": 1.5080891548286657, "learning_rate": 9.997217811462698e-06, "loss": 0.5377, "step": 325 }, { "epoch": 0.04, "grad_norm": 4.292261492859581, "learning_rate": 9.997150373322602e-06, "loss": 0.5686, "step": 326 }, { "epoch": 0.04, "grad_norm": 2.150320693707667, "learning_rate": 9.997082127848853e-06, "loss": 0.5537, "step": 327 }, { "epoch": 0.04, "grad_norm": 1.8924546749988123, "learning_rate": 9.997013075052476e-06, "loss": 0.5463, "step": 328 }, { "epoch": 0.04, "grad_norm": 1.654314133243649, "learning_rate": 9.996943214944626e-06, "loss": 0.5565, "step": 329 }, { "epoch": 0.04, "grad_norm": 7.923442603059371, "learning_rate": 9.99687254753659e-06, "loss": 0.5881, "step": 330 }, { "epoch": 0.04, "grad_norm": 1.8293811891706615, "learning_rate": 9.996801072839789e-06, "loss": 0.5718, "step": 331 }, { "epoch": 0.04, "grad_norm": 1.480332829339136, "learning_rate": 9.996728790865763e-06, "loss": 0.503, "step": 332 }, { "epoch": 0.04, "grad_norm": 1.6201449901428477, "learning_rate": 9.996655701626195e-06, "loss": 0.4973, "step": 333 }, { "epoch": 0.04, "grad_norm": 2.2742138495119044, "learning_rate": 9.99658180513289e-06, "loss": 0.4997, "step": 334 }, { "epoch": 0.04, "grad_norm": 9.120557910370504, "learning_rate": 9.99650710139779e-06, "loss": 0.5397, "step": 335 }, { "epoch": 0.04, "grad_norm": 1.5861768219312984, "learning_rate": 9.996431590432962e-06, "loss": 0.543, "step": 336 }, { "epoch": 0.04, "grad_norm": 1.614805525586774, "learning_rate": 9.996355272250607e-06, "loss": 0.55, "step": 337 }, { "epoch": 0.04, "grad_norm": 2.6053554102899765, "learning_rate": 9.996278146863054e-06, "loss": 0.5516, "step": 338 }, { "epoch": 0.04, "grad_norm": 10.796024780854161, "learning_rate": 9.996200214282762e-06, "loss": 0.5283, "step": 339 }, { "epoch": 0.04, "grad_norm": 1.6948255023691408, "learning_rate": 9.996121474522323e-06, "loss": 0.5753, "step": 340 }, { "epoch": 0.04, "grad_norm": 1.6740506396070718, "learning_rate": 9.996041927594457e-06, "loss": 0.5164, "step": 341 }, { "epoch": 0.04, "grad_norm": 1.6315151778674875, "learning_rate": 9.995961573512018e-06, "loss": 0.5759, "step": 342 }, { "epoch": 0.04, "grad_norm": 1.701863204355339, "learning_rate": 9.995880412287985e-06, "loss": 0.5348, "step": 343 }, { "epoch": 0.04, "grad_norm": 2.103113802438708, "learning_rate": 9.995798443935473e-06, "loss": 0.54, "step": 344 }, { "epoch": 0.04, "grad_norm": 1.8491565354564938, "learning_rate": 9.995715668467722e-06, "loss": 0.5071, "step": 345 }, { "epoch": 0.04, "grad_norm": 2.0318284949244294, "learning_rate": 9.995632085898106e-06, "loss": 0.5683, "step": 346 }, { "epoch": 0.04, "grad_norm": 2.1338428368044196, "learning_rate": 9.995547696240132e-06, "loss": 0.531, "step": 347 }, { "epoch": 0.04, "grad_norm": 1.9904163800757648, "learning_rate": 9.995462499507427e-06, "loss": 0.5595, "step": 348 }, { "epoch": 0.04, "grad_norm": 1.5269130775268465, "learning_rate": 9.99537649571376e-06, "loss": 0.517, "step": 349 }, { "epoch": 0.04, "grad_norm": 1.6240532368533307, "learning_rate": 9.995289684873027e-06, "loss": 0.5192, "step": 350 }, { "epoch": 0.04, "grad_norm": 1.3651319032052907, "learning_rate": 9.99520206699925e-06, "loss": 0.4983, "step": 351 }, { "epoch": 0.04, "grad_norm": 2.342350856871222, "learning_rate": 9.995113642106584e-06, "loss": 0.5554, "step": 352 }, { "epoch": 0.04, "grad_norm": 2.6168109185854025, "learning_rate": 9.995024410209316e-06, "loss": 0.5243, "step": 353 }, { "epoch": 0.04, "grad_norm": 0.6988720397199879, "learning_rate": 9.994934371321862e-06, "loss": 0.5005, "step": 354 }, { "epoch": 0.04, "grad_norm": 4.159118467801112, "learning_rate": 9.994843525458771e-06, "loss": 0.5256, "step": 355 }, { "epoch": 0.04, "grad_norm": 1.7501348381594886, "learning_rate": 9.994751872634717e-06, "loss": 0.5444, "step": 356 }, { "epoch": 0.04, "grad_norm": 2.558673168866606, "learning_rate": 9.994659412864508e-06, "loss": 0.5795, "step": 357 }, { "epoch": 0.04, "grad_norm": 1.3712882222338771, "learning_rate": 9.994566146163082e-06, "loss": 0.4981, "step": 358 }, { "epoch": 0.04, "grad_norm": 3.917448635976479, "learning_rate": 9.994472072545506e-06, "loss": 0.5797, "step": 359 }, { "epoch": 0.04, "grad_norm": 2.5699481449625465, "learning_rate": 9.994377192026981e-06, "loss": 0.5329, "step": 360 }, { "epoch": 0.04, "grad_norm": 2.349005454005592, "learning_rate": 9.994281504622831e-06, "loss": 0.5051, "step": 361 }, { "epoch": 0.04, "grad_norm": 2.434754572817806, "learning_rate": 9.99418501034852e-06, "loss": 0.4765, "step": 362 }, { "epoch": 0.05, "grad_norm": 1.7721504794383782, "learning_rate": 9.994087709219637e-06, "loss": 0.603, "step": 363 }, { "epoch": 0.05, "grad_norm": 2.2223558544616, "learning_rate": 9.993989601251899e-06, "loss": 0.5589, "step": 364 }, { "epoch": 0.05, "grad_norm": 2.1127193527497323, "learning_rate": 9.993890686461159e-06, "loss": 0.5474, "step": 365 }, { "epoch": 0.05, "grad_norm": 2.1347486367959934, "learning_rate": 9.993790964863394e-06, "loss": 0.5455, "step": 366 }, { "epoch": 0.05, "grad_norm": 3.7495898246789374, "learning_rate": 9.993690436474719e-06, "loss": 0.4887, "step": 367 }, { "epoch": 0.05, "grad_norm": 1.783308092584701, "learning_rate": 9.993589101311373e-06, "loss": 0.5348, "step": 368 }, { "epoch": 0.05, "grad_norm": 1.5856712387472398, "learning_rate": 9.993486959389728e-06, "loss": 0.5714, "step": 369 }, { "epoch": 0.05, "grad_norm": 1.6477109895625353, "learning_rate": 9.993384010726288e-06, "loss": 0.5417, "step": 370 }, { "epoch": 0.05, "grad_norm": 5.309065931402148, "learning_rate": 9.99328025533768e-06, "loss": 0.5701, "step": 371 }, { "epoch": 0.05, "grad_norm": 0.6912380894441247, "learning_rate": 9.993175693240673e-06, "loss": 0.5116, "step": 372 }, { "epoch": 0.05, "grad_norm": 3.1409674989933656, "learning_rate": 9.993070324452154e-06, "loss": 0.5192, "step": 373 }, { "epoch": 0.05, "grad_norm": 1.778627892390217, "learning_rate": 9.99296414898915e-06, "loss": 0.5501, "step": 374 }, { "epoch": 0.05, "grad_norm": 2.570838939085222, "learning_rate": 9.992857166868814e-06, "loss": 0.4959, "step": 375 }, { "epoch": 0.05, "grad_norm": 2.0505514157390987, "learning_rate": 9.99274937810843e-06, "loss": 0.5285, "step": 376 }, { "epoch": 0.05, "grad_norm": 1.6209806009721568, "learning_rate": 9.99264078272541e-06, "loss": 0.5205, "step": 377 }, { "epoch": 0.05, "grad_norm": 1.561951117823356, "learning_rate": 9.992531380737303e-06, "loss": 0.5162, "step": 378 }, { "epoch": 0.05, "grad_norm": 1.6842346047481311, "learning_rate": 9.99242117216178e-06, "loss": 0.5459, "step": 379 }, { "epoch": 0.05, "grad_norm": 1.5206441075953183, "learning_rate": 9.992310157016647e-06, "loss": 0.5212, "step": 380 }, { "epoch": 0.05, "grad_norm": 1.613170143739462, "learning_rate": 9.992198335319839e-06, "loss": 0.5296, "step": 381 }, { "epoch": 0.05, "grad_norm": 2.7097419358184256, "learning_rate": 9.992085707089424e-06, "loss": 0.5358, "step": 382 }, { "epoch": 0.05, "grad_norm": 1.8302659825726482, "learning_rate": 9.991972272343595e-06, "loss": 0.4922, "step": 383 }, { "epoch": 0.05, "grad_norm": 2.3884596685854786, "learning_rate": 9.991858031100682e-06, "loss": 0.5421, "step": 384 }, { "epoch": 0.05, "grad_norm": 1.8222840733689352, "learning_rate": 9.991742983379137e-06, "loss": 0.5477, "step": 385 }, { "epoch": 0.05, "grad_norm": 1.8910852787106245, "learning_rate": 9.991627129197552e-06, "loss": 0.5638, "step": 386 }, { "epoch": 0.05, "grad_norm": 1.5426420623594577, "learning_rate": 9.991510468574642e-06, "loss": 0.5331, "step": 387 }, { "epoch": 0.05, "grad_norm": 1.6218897462067299, "learning_rate": 9.991393001529255e-06, "loss": 0.4998, "step": 388 }, { "epoch": 0.05, "grad_norm": 3.4803448874576803, "learning_rate": 9.991274728080368e-06, "loss": 0.5687, "step": 389 }, { "epoch": 0.05, "grad_norm": 1.817078050217724, "learning_rate": 9.991155648247086e-06, "loss": 0.5589, "step": 390 }, { "epoch": 0.05, "grad_norm": 1.56274673389566, "learning_rate": 9.991035762048655e-06, "loss": 0.5402, "step": 391 }, { "epoch": 0.05, "grad_norm": 0.6700379174795714, "learning_rate": 9.990915069504438e-06, "loss": 0.4653, "step": 392 }, { "epoch": 0.05, "grad_norm": 0.6793263712894418, "learning_rate": 9.990793570633935e-06, "loss": 0.4721, "step": 393 }, { "epoch": 0.05, "grad_norm": 1.6887319698367116, "learning_rate": 9.990671265456778e-06, "loss": 0.5377, "step": 394 }, { "epoch": 0.05, "grad_norm": 1.4642447699000192, "learning_rate": 9.990548153992721e-06, "loss": 0.552, "step": 395 }, { "epoch": 0.05, "grad_norm": 1.8643006750012217, "learning_rate": 9.990424236261657e-06, "loss": 0.5422, "step": 396 }, { "epoch": 0.05, "grad_norm": 0.7130101747622972, "learning_rate": 9.990299512283608e-06, "loss": 0.4625, "step": 397 }, { "epoch": 0.05, "grad_norm": 1.509124542655514, "learning_rate": 9.990173982078721e-06, "loss": 0.5449, "step": 398 }, { "epoch": 0.05, "grad_norm": 1.7287882922623734, "learning_rate": 9.990047645667277e-06, "loss": 0.525, "step": 399 }, { "epoch": 0.05, "grad_norm": 1.597759493266317, "learning_rate": 9.98992050306969e-06, "loss": 0.5109, "step": 400 }, { "epoch": 0.05, "grad_norm": 1.7164045972462163, "learning_rate": 9.989792554306496e-06, "loss": 0.4961, "step": 401 }, { "epoch": 0.05, "grad_norm": 1.7717464858573455, "learning_rate": 9.98966379939837e-06, "loss": 0.4837, "step": 402 }, { "epoch": 0.05, "grad_norm": 2.9481512152040525, "learning_rate": 9.989534238366113e-06, "loss": 0.5405, "step": 403 }, { "epoch": 0.05, "grad_norm": 1.6987543219058931, "learning_rate": 9.989403871230654e-06, "loss": 0.5504, "step": 404 }, { "epoch": 0.05, "grad_norm": 1.4308349414603916, "learning_rate": 9.989272698013058e-06, "loss": 0.5327, "step": 405 }, { "epoch": 0.05, "grad_norm": 1.6876272721147005, "learning_rate": 9.989140718734515e-06, "loss": 0.5607, "step": 406 }, { "epoch": 0.05, "grad_norm": 0.729149900507196, "learning_rate": 9.989007933416348e-06, "loss": 0.4808, "step": 407 }, { "epoch": 0.05, "grad_norm": 1.7700639883038296, "learning_rate": 9.988874342080011e-06, "loss": 0.5669, "step": 408 }, { "epoch": 0.05, "grad_norm": 1.5781092289283492, "learning_rate": 9.988739944747086e-06, "loss": 0.505, "step": 409 }, { "epoch": 0.05, "grad_norm": 1.557526972913807, "learning_rate": 9.988604741439288e-06, "loss": 0.5129, "step": 410 }, { "epoch": 0.05, "grad_norm": 1.8408561874260345, "learning_rate": 9.988468732178456e-06, "loss": 0.5595, "step": 411 }, { "epoch": 0.05, "grad_norm": 2.0413512443099053, "learning_rate": 9.988331916986565e-06, "loss": 0.5793, "step": 412 }, { "epoch": 0.05, "grad_norm": 1.6768995403140314, "learning_rate": 9.988194295885721e-06, "loss": 0.5092, "step": 413 }, { "epoch": 0.05, "grad_norm": 1.4892709861156581, "learning_rate": 9.988055868898156e-06, "loss": 0.5342, "step": 414 }, { "epoch": 0.05, "grad_norm": 1.6539830627005252, "learning_rate": 9.987916636046234e-06, "loss": 0.5679, "step": 415 }, { "epoch": 0.05, "grad_norm": 1.6412699630465084, "learning_rate": 9.987776597352451e-06, "loss": 0.531, "step": 416 }, { "epoch": 0.05, "grad_norm": 1.5955019956242151, "learning_rate": 9.987635752839429e-06, "loss": 0.4732, "step": 417 }, { "epoch": 0.05, "grad_norm": 1.4359477688178175, "learning_rate": 9.987494102529924e-06, "loss": 0.4767, "step": 418 }, { "epoch": 0.05, "grad_norm": 1.6153040652848387, "learning_rate": 9.987351646446824e-06, "loss": 0.4845, "step": 419 }, { "epoch": 0.05, "grad_norm": 1.592003559782074, "learning_rate": 9.987208384613136e-06, "loss": 0.5956, "step": 420 }, { "epoch": 0.05, "grad_norm": 1.7946167267388238, "learning_rate": 9.987064317052013e-06, "loss": 0.6168, "step": 421 }, { "epoch": 0.05, "grad_norm": 0.7008916654016195, "learning_rate": 9.986919443786727e-06, "loss": 0.5003, "step": 422 }, { "epoch": 0.05, "grad_norm": 5.2870924806038975, "learning_rate": 9.986773764840684e-06, "loss": 0.5361, "step": 423 }, { "epoch": 0.05, "grad_norm": 4.484401416940637, "learning_rate": 9.98662728023742e-06, "loss": 0.5721, "step": 424 }, { "epoch": 0.05, "grad_norm": 1.547533213628365, "learning_rate": 9.986479990000598e-06, "loss": 0.5358, "step": 425 }, { "epoch": 0.05, "grad_norm": 2.673741921515085, "learning_rate": 9.98633189415402e-06, "loss": 0.504, "step": 426 }, { "epoch": 0.05, "grad_norm": 1.4089500017727525, "learning_rate": 9.986182992721606e-06, "loss": 0.5495, "step": 427 }, { "epoch": 0.05, "grad_norm": 2.2003603241293996, "learning_rate": 9.986033285727418e-06, "loss": 0.5278, "step": 428 }, { "epoch": 0.05, "grad_norm": 3.784641006031379, "learning_rate": 9.985882773195638e-06, "loss": 0.5357, "step": 429 }, { "epoch": 0.05, "grad_norm": 1.7317785476187197, "learning_rate": 9.985731455150584e-06, "loss": 0.5567, "step": 430 }, { "epoch": 0.05, "grad_norm": 2.648485799879626, "learning_rate": 9.985579331616705e-06, "loss": 0.4999, "step": 431 }, { "epoch": 0.05, "grad_norm": 1.9685222126772437, "learning_rate": 9.985426402618574e-06, "loss": 0.5034, "step": 432 }, { "epoch": 0.05, "grad_norm": 1.4689771404993912, "learning_rate": 9.985272668180901e-06, "loss": 0.537, "step": 433 }, { "epoch": 0.05, "grad_norm": 1.688785913533373, "learning_rate": 9.985118128328522e-06, "loss": 0.5454, "step": 434 }, { "epoch": 0.05, "grad_norm": 4.48562468774087, "learning_rate": 9.984962783086403e-06, "loss": 0.4896, "step": 435 }, { "epoch": 0.05, "grad_norm": 1.459814886971303, "learning_rate": 9.984806632479643e-06, "loss": 0.4912, "step": 436 }, { "epoch": 0.05, "grad_norm": 1.5872647895748622, "learning_rate": 9.98464967653347e-06, "loss": 0.5714, "step": 437 }, { "epoch": 0.05, "grad_norm": 1.9004557876468335, "learning_rate": 9.98449191527324e-06, "loss": 0.494, "step": 438 }, { "epoch": 0.05, "grad_norm": 1.985375325252717, "learning_rate": 9.984333348724442e-06, "loss": 0.5637, "step": 439 }, { "epoch": 0.05, "grad_norm": 1.8027928544018976, "learning_rate": 9.984173976912695e-06, "loss": 0.5001, "step": 440 }, { "epoch": 0.05, "grad_norm": 1.5841099007415989, "learning_rate": 9.984013799863744e-06, "loss": 0.5864, "step": 441 }, { "epoch": 0.05, "grad_norm": 1.5646937500034126, "learning_rate": 9.983852817603468e-06, "loss": 0.574, "step": 442 }, { "epoch": 0.05, "grad_norm": 1.97296027361365, "learning_rate": 9.983691030157876e-06, "loss": 0.4592, "step": 443 }, { "epoch": 0.06, "grad_norm": 0.7128940493354311, "learning_rate": 9.983528437553106e-06, "loss": 0.5014, "step": 444 }, { "epoch": 0.06, "grad_norm": 1.3553160387069387, "learning_rate": 9.983365039815425e-06, "loss": 0.5138, "step": 445 }, { "epoch": 0.06, "grad_norm": 1.7551202165825732, "learning_rate": 9.983200836971234e-06, "loss": 0.4924, "step": 446 }, { "epoch": 0.06, "grad_norm": 1.783065671146444, "learning_rate": 9.983035829047058e-06, "loss": 0.4881, "step": 447 }, { "epoch": 0.06, "grad_norm": 1.6346481923047431, "learning_rate": 9.982870016069557e-06, "loss": 0.5086, "step": 448 }, { "epoch": 0.06, "grad_norm": 2.269760431343647, "learning_rate": 9.98270339806552e-06, "loss": 0.4659, "step": 449 }, { "epoch": 0.06, "grad_norm": 1.9208644340466066, "learning_rate": 9.982535975061866e-06, "loss": 0.4691, "step": 450 }, { "epoch": 0.06, "grad_norm": 0.700303977995299, "learning_rate": 9.982367747085642e-06, "loss": 0.4813, "step": 451 }, { "epoch": 0.06, "grad_norm": 1.4580808287839546, "learning_rate": 9.982198714164029e-06, "loss": 0.5482, "step": 452 }, { "epoch": 0.06, "grad_norm": 1.6395785682570598, "learning_rate": 9.982028876324334e-06, "loss": 0.5674, "step": 453 }, { "epoch": 0.06, "grad_norm": 3.089402865370069, "learning_rate": 9.981858233593996e-06, "loss": 0.5959, "step": 454 }, { "epoch": 0.06, "grad_norm": 1.7647424329197148, "learning_rate": 9.981686786000584e-06, "loss": 0.5567, "step": 455 }, { "epoch": 0.06, "grad_norm": 2.6426304996228964, "learning_rate": 9.981514533571797e-06, "loss": 0.5418, "step": 456 }, { "epoch": 0.06, "grad_norm": 2.124650738441062, "learning_rate": 9.981341476335464e-06, "loss": 0.602, "step": 457 }, { "epoch": 0.06, "grad_norm": 4.379438226743473, "learning_rate": 9.981167614319542e-06, "loss": 0.4779, "step": 458 }, { "epoch": 0.06, "grad_norm": 3.0204003882969204, "learning_rate": 9.980992947552124e-06, "loss": 0.5301, "step": 459 }, { "epoch": 0.06, "grad_norm": 1.6188537242497516, "learning_rate": 9.980817476061426e-06, "loss": 0.4723, "step": 460 }, { "epoch": 0.06, "grad_norm": 1.636311850641546, "learning_rate": 9.980641199875797e-06, "loss": 0.5494, "step": 461 }, { "epoch": 0.06, "grad_norm": 1.5196244609813645, "learning_rate": 9.980464119023718e-06, "loss": 0.5092, "step": 462 }, { "epoch": 0.06, "grad_norm": 1.59037993007294, "learning_rate": 9.980286233533795e-06, "loss": 0.5209, "step": 463 }, { "epoch": 0.06, "grad_norm": 1.4369151490083365, "learning_rate": 9.980107543434769e-06, "loss": 0.5809, "step": 464 }, { "epoch": 0.06, "grad_norm": 3.0160569481227903, "learning_rate": 9.97992804875551e-06, "loss": 0.5392, "step": 465 }, { "epoch": 0.06, "grad_norm": 0.7213586451768685, "learning_rate": 9.979747749525014e-06, "loss": 0.5294, "step": 466 }, { "epoch": 0.06, "grad_norm": 1.5676191987425458, "learning_rate": 9.979566645772412e-06, "loss": 0.4995, "step": 467 }, { "epoch": 0.06, "grad_norm": 2.2941976901857233, "learning_rate": 9.97938473752696e-06, "loss": 0.5625, "step": 468 }, { "epoch": 0.06, "grad_norm": 2.3231610487610865, "learning_rate": 9.979202024818052e-06, "loss": 0.561, "step": 469 }, { "epoch": 0.06, "grad_norm": 2.0242057322321005, "learning_rate": 9.979018507675202e-06, "loss": 0.5524, "step": 470 }, { "epoch": 0.06, "grad_norm": 1.8226219382548836, "learning_rate": 9.978834186128063e-06, "loss": 0.512, "step": 471 }, { "epoch": 0.06, "grad_norm": 1.4955935257507191, "learning_rate": 9.97864906020641e-06, "loss": 0.5431, "step": 472 }, { "epoch": 0.06, "grad_norm": 1.680011950409537, "learning_rate": 9.978463129940153e-06, "loss": 0.5267, "step": 473 }, { "epoch": 0.06, "grad_norm": 1.565469276971012, "learning_rate": 9.978276395359332e-06, "loss": 0.5174, "step": 474 }, { "epoch": 0.06, "grad_norm": 0.6714958132903396, "learning_rate": 9.978088856494115e-06, "loss": 0.5051, "step": 475 }, { "epoch": 0.06, "grad_norm": 1.8389831857433339, "learning_rate": 9.977900513374799e-06, "loss": 0.5401, "step": 476 }, { "epoch": 0.06, "grad_norm": 1.5499532118888697, "learning_rate": 9.977711366031813e-06, "loss": 0.544, "step": 477 }, { "epoch": 0.06, "grad_norm": 1.727431677360426, "learning_rate": 9.977521414495716e-06, "loss": 0.518, "step": 478 }, { "epoch": 0.06, "grad_norm": 2.157775699970845, "learning_rate": 9.977330658797199e-06, "loss": 0.5556, "step": 479 }, { "epoch": 0.06, "grad_norm": 1.5612974363773897, "learning_rate": 9.977139098967075e-06, "loss": 0.5161, "step": 480 }, { "epoch": 0.06, "grad_norm": 1.7175998885815322, "learning_rate": 9.976946735036296e-06, "loss": 0.5357, "step": 481 }, { "epoch": 0.06, "grad_norm": 1.406772227901931, "learning_rate": 9.97675356703594e-06, "loss": 0.5333, "step": 482 }, { "epoch": 0.06, "grad_norm": 2.1451551555311283, "learning_rate": 9.976559594997211e-06, "loss": 0.5144, "step": 483 }, { "epoch": 0.06, "grad_norm": 1.8879155418167197, "learning_rate": 9.976364818951452e-06, "loss": 0.5342, "step": 484 }, { "epoch": 0.06, "grad_norm": 1.4034381097267934, "learning_rate": 9.976169238930128e-06, "loss": 0.5143, "step": 485 }, { "epoch": 0.06, "grad_norm": 1.6253361682506393, "learning_rate": 9.975972854964837e-06, "loss": 0.5532, "step": 486 }, { "epoch": 0.06, "grad_norm": 2.0486162168827273, "learning_rate": 9.975775667087308e-06, "loss": 0.5694, "step": 487 }, { "epoch": 0.06, "grad_norm": 1.5977042020706673, "learning_rate": 9.975577675329396e-06, "loss": 0.6012, "step": 488 }, { "epoch": 0.06, "grad_norm": 2.6581212962368017, "learning_rate": 9.97537887972309e-06, "loss": 0.5014, "step": 489 }, { "epoch": 0.06, "grad_norm": 1.450762817262192, "learning_rate": 9.975179280300507e-06, "loss": 0.5355, "step": 490 }, { "epoch": 0.06, "grad_norm": 1.496453897715201, "learning_rate": 9.974978877093892e-06, "loss": 0.5749, "step": 491 }, { "epoch": 0.06, "grad_norm": 1.5955663116958974, "learning_rate": 9.974777670135627e-06, "loss": 0.5485, "step": 492 }, { "epoch": 0.06, "grad_norm": 1.6511586849526274, "learning_rate": 9.974575659458214e-06, "loss": 0.528, "step": 493 }, { "epoch": 0.06, "grad_norm": 1.8173706390670754, "learning_rate": 9.97437284509429e-06, "loss": 0.5043, "step": 494 }, { "epoch": 0.06, "grad_norm": 1.450248038097281, "learning_rate": 9.974169227076623e-06, "loss": 0.56, "step": 495 }, { "epoch": 0.06, "grad_norm": 2.3759328275504474, "learning_rate": 9.97396480543811e-06, "loss": 0.519, "step": 496 }, { "epoch": 0.06, "grad_norm": 1.6118263272522013, "learning_rate": 9.973759580211776e-06, "loss": 0.4886, "step": 497 }, { "epoch": 0.06, "grad_norm": 1.4398888457780274, "learning_rate": 9.973553551430775e-06, "loss": 0.5334, "step": 498 }, { "epoch": 0.06, "grad_norm": 1.469914373917854, "learning_rate": 9.973346719128395e-06, "loss": 0.4947, "step": 499 }, { "epoch": 0.06, "grad_norm": 1.570025029324263, "learning_rate": 9.973139083338052e-06, "loss": 0.5887, "step": 500 }, { "epoch": 0.06, "grad_norm": 1.7122866183202354, "learning_rate": 9.97293064409329e-06, "loss": 0.5287, "step": 501 }, { "epoch": 0.06, "grad_norm": 1.4599374691955285, "learning_rate": 9.972721401427785e-06, "loss": 0.5127, "step": 502 }, { "epoch": 0.06, "grad_norm": 1.4856231479658881, "learning_rate": 9.972511355375341e-06, "loss": 0.5805, "step": 503 }, { "epoch": 0.06, "grad_norm": 2.0164015385758502, "learning_rate": 9.972300505969896e-06, "loss": 0.5619, "step": 504 }, { "epoch": 0.06, "grad_norm": 2.2997701887534436, "learning_rate": 9.97208885324551e-06, "loss": 0.4729, "step": 505 }, { "epoch": 0.06, "grad_norm": 1.756468060556157, "learning_rate": 9.97187639723638e-06, "loss": 0.5284, "step": 506 }, { "epoch": 0.06, "grad_norm": 1.7198355693077105, "learning_rate": 9.971663137976829e-06, "loss": 0.5327, "step": 507 }, { "epoch": 0.06, "grad_norm": 1.7937945953770489, "learning_rate": 9.971449075501313e-06, "loss": 0.5882, "step": 508 }, { "epoch": 0.06, "grad_norm": 1.4428157798894228, "learning_rate": 9.971234209844413e-06, "loss": 0.516, "step": 509 }, { "epoch": 0.06, "grad_norm": 0.6343343346489136, "learning_rate": 9.971018541040844e-06, "loss": 0.4583, "step": 510 }, { "epoch": 0.06, "grad_norm": 1.6346268222138216, "learning_rate": 9.970802069125449e-06, "loss": 0.5788, "step": 511 }, { "epoch": 0.06, "grad_norm": 2.236311818205272, "learning_rate": 9.9705847941332e-06, "loss": 0.5473, "step": 512 }, { "epoch": 0.06, "grad_norm": 4.186404431476861, "learning_rate": 9.970366716099203e-06, "loss": 0.5021, "step": 513 }, { "epoch": 0.06, "grad_norm": 1.448273391917764, "learning_rate": 9.970147835058686e-06, "loss": 0.4891, "step": 514 }, { "epoch": 0.06, "grad_norm": 1.6707135102073185, "learning_rate": 9.969928151047012e-06, "loss": 0.5128, "step": 515 }, { "epoch": 0.06, "grad_norm": 1.9157604861759812, "learning_rate": 9.969707664099677e-06, "loss": 0.5314, "step": 516 }, { "epoch": 0.06, "grad_norm": 1.4641731434697254, "learning_rate": 9.969486374252297e-06, "loss": 0.5648, "step": 517 }, { "epoch": 0.06, "grad_norm": 0.6951811673512422, "learning_rate": 9.969264281540627e-06, "loss": 0.5429, "step": 518 }, { "epoch": 0.06, "grad_norm": 1.5351361206003573, "learning_rate": 9.969041386000547e-06, "loss": 0.5203, "step": 519 }, { "epoch": 0.06, "grad_norm": 1.8922924188464132, "learning_rate": 9.968817687668067e-06, "loss": 0.5381, "step": 520 }, { "epoch": 0.06, "grad_norm": 1.5799451731489043, "learning_rate": 9.96859318657933e-06, "loss": 0.5668, "step": 521 }, { "epoch": 0.06, "grad_norm": 2.0926399677961864, "learning_rate": 9.968367882770601e-06, "loss": 0.5635, "step": 522 }, { "epoch": 0.06, "grad_norm": 1.9474775210488486, "learning_rate": 9.968141776278287e-06, "loss": 0.571, "step": 523 }, { "epoch": 0.07, "grad_norm": 1.7431408889119921, "learning_rate": 9.96791486713891e-06, "loss": 0.5826, "step": 524 }, { "epoch": 0.07, "grad_norm": 1.8072112193176124, "learning_rate": 9.967687155389135e-06, "loss": 0.6058, "step": 525 }, { "epoch": 0.07, "grad_norm": 1.5487646831257245, "learning_rate": 9.967458641065748e-06, "loss": 0.5161, "step": 526 }, { "epoch": 0.07, "grad_norm": 1.9345132321574223, "learning_rate": 9.967229324205666e-06, "loss": 0.5713, "step": 527 }, { "epoch": 0.07, "grad_norm": 1.9435970449322106, "learning_rate": 9.966999204845942e-06, "loss": 0.5563, "step": 528 }, { "epoch": 0.07, "grad_norm": 1.43796673259639, "learning_rate": 9.966768283023752e-06, "loss": 0.5661, "step": 529 }, { "epoch": 0.07, "grad_norm": 1.6557336189733225, "learning_rate": 9.966536558776399e-06, "loss": 0.5597, "step": 530 }, { "epoch": 0.07, "grad_norm": 1.9099136381706796, "learning_rate": 9.966304032141325e-06, "loss": 0.4683, "step": 531 }, { "epoch": 0.07, "grad_norm": 1.5887496350127177, "learning_rate": 9.966070703156096e-06, "loss": 0.476, "step": 532 }, { "epoch": 0.07, "grad_norm": 2.2832288568597026, "learning_rate": 9.965836571858408e-06, "loss": 0.4918, "step": 533 }, { "epoch": 0.07, "grad_norm": 1.8915667838943808, "learning_rate": 9.965601638286085e-06, "loss": 0.4534, "step": 534 }, { "epoch": 0.07, "grad_norm": 1.5838608224606279, "learning_rate": 9.965365902477085e-06, "loss": 0.5287, "step": 535 }, { "epoch": 0.07, "grad_norm": 1.5069296219030366, "learning_rate": 9.965129364469492e-06, "loss": 0.522, "step": 536 }, { "epoch": 0.07, "grad_norm": 1.575781289038582, "learning_rate": 9.96489202430152e-06, "loss": 0.5527, "step": 537 }, { "epoch": 0.07, "grad_norm": 1.495415835304909, "learning_rate": 9.964653882011516e-06, "loss": 0.5092, "step": 538 }, { "epoch": 0.07, "grad_norm": 1.6262876945226945, "learning_rate": 9.964414937637953e-06, "loss": 0.4981, "step": 539 }, { "epoch": 0.07, "grad_norm": 0.6387936310785975, "learning_rate": 9.964175191219436e-06, "loss": 0.4891, "step": 540 }, { "epoch": 0.07, "grad_norm": 1.6217650667702996, "learning_rate": 9.963934642794694e-06, "loss": 0.5808, "step": 541 }, { "epoch": 0.07, "grad_norm": 1.4841632250821106, "learning_rate": 9.963693292402592e-06, "loss": 0.552, "step": 542 }, { "epoch": 0.07, "grad_norm": 2.2404882976865395, "learning_rate": 9.963451140082124e-06, "loss": 0.5235, "step": 543 }, { "epoch": 0.07, "grad_norm": 1.8579527183857778, "learning_rate": 9.963208185872407e-06, "loss": 0.5543, "step": 544 }, { "epoch": 0.07, "grad_norm": 1.9117719830923814, "learning_rate": 9.962964429812697e-06, "loss": 0.5189, "step": 545 }, { "epoch": 0.07, "grad_norm": 1.5305379419100715, "learning_rate": 9.962719871942374e-06, "loss": 0.5631, "step": 546 }, { "epoch": 0.07, "grad_norm": 1.882149654586994, "learning_rate": 9.962474512300948e-06, "loss": 0.5624, "step": 547 }, { "epoch": 0.07, "grad_norm": 1.6646759237474957, "learning_rate": 9.962228350928058e-06, "loss": 0.5551, "step": 548 }, { "epoch": 0.07, "grad_norm": 1.5649208125553558, "learning_rate": 9.961981387863476e-06, "loss": 0.5388, "step": 549 }, { "epoch": 0.07, "grad_norm": 1.4445542695684628, "learning_rate": 9.961733623147098e-06, "loss": 0.5359, "step": 550 }, { "epoch": 0.07, "grad_norm": 1.621089252780402, "learning_rate": 9.961485056818957e-06, "loss": 0.5501, "step": 551 }, { "epoch": 0.07, "grad_norm": 1.3600616481489842, "learning_rate": 9.961235688919204e-06, "loss": 0.5164, "step": 552 }, { "epoch": 0.07, "grad_norm": 1.5291096429900197, "learning_rate": 9.960985519488133e-06, "loss": 0.5464, "step": 553 }, { "epoch": 0.07, "grad_norm": 1.8444597766221988, "learning_rate": 9.96073454856616e-06, "loss": 0.5621, "step": 554 }, { "epoch": 0.07, "grad_norm": 1.6732011691950741, "learning_rate": 9.96048277619383e-06, "loss": 0.5049, "step": 555 }, { "epoch": 0.07, "grad_norm": 1.438419778694226, "learning_rate": 9.960230202411818e-06, "loss": 0.5538, "step": 556 }, { "epoch": 0.07, "grad_norm": 1.6603929165553797, "learning_rate": 9.95997682726093e-06, "loss": 0.5469, "step": 557 }, { "epoch": 0.07, "grad_norm": 1.7668487791259488, "learning_rate": 9.959722650782104e-06, "loss": 0.555, "step": 558 }, { "epoch": 0.07, "grad_norm": 1.6218233851859314, "learning_rate": 9.959467673016403e-06, "loss": 0.5416, "step": 559 }, { "epoch": 0.07, "grad_norm": 2.0098143485676965, "learning_rate": 9.959211894005018e-06, "loss": 0.588, "step": 560 }, { "epoch": 0.07, "grad_norm": 1.5346689636597521, "learning_rate": 9.958955313789277e-06, "loss": 0.6114, "step": 561 }, { "epoch": 0.07, "grad_norm": 1.3691963998562475, "learning_rate": 9.958697932410631e-06, "loss": 0.5647, "step": 562 }, { "epoch": 0.07, "grad_norm": 1.5902891681020572, "learning_rate": 9.958439749910658e-06, "loss": 0.5241, "step": 563 }, { "epoch": 0.07, "grad_norm": 1.4317090557464656, "learning_rate": 9.958180766331076e-06, "loss": 0.5344, "step": 564 }, { "epoch": 0.07, "grad_norm": 1.6223451298186553, "learning_rate": 9.957920981713723e-06, "loss": 0.5354, "step": 565 }, { "epoch": 0.07, "grad_norm": 1.4911420362299912, "learning_rate": 9.95766039610057e-06, "loss": 0.5261, "step": 566 }, { "epoch": 0.07, "grad_norm": 1.459595289704798, "learning_rate": 9.957399009533716e-06, "loss": 0.5441, "step": 567 }, { "epoch": 0.07, "grad_norm": 1.50536016248566, "learning_rate": 9.957136822055392e-06, "loss": 0.5742, "step": 568 }, { "epoch": 0.07, "grad_norm": 3.099600133776981, "learning_rate": 9.956873833707958e-06, "loss": 0.5347, "step": 569 }, { "epoch": 0.07, "grad_norm": 1.823400575954244, "learning_rate": 9.956610044533897e-06, "loss": 0.5679, "step": 570 }, { "epoch": 0.07, "grad_norm": 2.211244433828808, "learning_rate": 9.956345454575831e-06, "loss": 0.5144, "step": 571 }, { "epoch": 0.07, "grad_norm": 1.3813580483388566, "learning_rate": 9.956080063876503e-06, "loss": 0.5213, "step": 572 }, { "epoch": 0.07, "grad_norm": 1.547609620866616, "learning_rate": 9.955813872478794e-06, "loss": 0.5107, "step": 573 }, { "epoch": 0.07, "grad_norm": 1.9698736531779897, "learning_rate": 9.955546880425706e-06, "loss": 0.5011, "step": 574 }, { "epoch": 0.07, "grad_norm": 0.7132662201669262, "learning_rate": 9.955279087760374e-06, "loss": 0.4852, "step": 575 }, { "epoch": 0.07, "grad_norm": 2.1289820789250253, "learning_rate": 9.955010494526062e-06, "loss": 0.6084, "step": 576 }, { "epoch": 0.07, "grad_norm": 0.6894981066518925, "learning_rate": 9.954741100766167e-06, "loss": 0.5068, "step": 577 }, { "epoch": 0.07, "grad_norm": 1.993185449507924, "learning_rate": 9.95447090652421e-06, "loss": 0.5744, "step": 578 }, { "epoch": 0.07, "grad_norm": 1.9175405662571439, "learning_rate": 9.954199911843842e-06, "loss": 0.5143, "step": 579 }, { "epoch": 0.07, "grad_norm": 1.707912930910467, "learning_rate": 9.953928116768848e-06, "loss": 0.5874, "step": 580 }, { "epoch": 0.07, "grad_norm": 2.1196759791921496, "learning_rate": 9.953655521343132e-06, "loss": 0.5544, "step": 581 }, { "epoch": 0.07, "grad_norm": 1.6578798781305177, "learning_rate": 9.953382125610742e-06, "loss": 0.5006, "step": 582 }, { "epoch": 0.07, "grad_norm": 1.7117964965327253, "learning_rate": 9.953107929615843e-06, "loss": 0.5286, "step": 583 }, { "epoch": 0.07, "grad_norm": 1.718532476961154, "learning_rate": 9.952832933402733e-06, "loss": 0.5907, "step": 584 }, { "epoch": 0.07, "grad_norm": 4.874584366704575, "learning_rate": 9.952557137015846e-06, "loss": 0.5456, "step": 585 }, { "epoch": 0.07, "grad_norm": 2.091952706607619, "learning_rate": 9.952280540499733e-06, "loss": 0.5807, "step": 586 }, { "epoch": 0.07, "grad_norm": 1.8340961057465826, "learning_rate": 9.952003143899082e-06, "loss": 0.5044, "step": 587 }, { "epoch": 0.07, "grad_norm": 1.4142321636334534, "learning_rate": 9.951724947258713e-06, "loss": 0.4952, "step": 588 }, { "epoch": 0.07, "grad_norm": 1.5498823391862544, "learning_rate": 9.951445950623565e-06, "loss": 0.5229, "step": 589 }, { "epoch": 0.07, "grad_norm": 7.976631116773797, "learning_rate": 9.951166154038716e-06, "loss": 0.5212, "step": 590 }, { "epoch": 0.07, "grad_norm": 1.4043970725348118, "learning_rate": 9.950885557549369e-06, "loss": 0.5213, "step": 591 }, { "epoch": 0.07, "grad_norm": 1.6411492750911545, "learning_rate": 9.950604161200855e-06, "loss": 0.508, "step": 592 }, { "epoch": 0.07, "grad_norm": 1.6155280198944444, "learning_rate": 9.950321965038638e-06, "loss": 0.5176, "step": 593 }, { "epoch": 0.07, "grad_norm": 1.6329573178476742, "learning_rate": 9.950038969108312e-06, "loss": 0.5381, "step": 594 }, { "epoch": 0.07, "grad_norm": 1.5322368189387603, "learning_rate": 9.949755173455591e-06, "loss": 0.4737, "step": 595 }, { "epoch": 0.07, "grad_norm": 1.8966104227734712, "learning_rate": 9.94947057812633e-06, "loss": 0.5447, "step": 596 }, { "epoch": 0.07, "grad_norm": 2.3255920186297554, "learning_rate": 9.949185183166506e-06, "loss": 0.4983, "step": 597 }, { "epoch": 0.07, "grad_norm": 1.514982661457202, "learning_rate": 9.948898988622226e-06, "loss": 0.4703, "step": 598 }, { "epoch": 0.07, "grad_norm": 1.5578472451579808, "learning_rate": 9.94861199453973e-06, "loss": 0.5355, "step": 599 }, { "epoch": 0.07, "grad_norm": 1.8462377204745257, "learning_rate": 9.948324200965382e-06, "loss": 0.5217, "step": 600 }, { "epoch": 0.07, "grad_norm": 2.0501987500355052, "learning_rate": 9.948035607945678e-06, "loss": 0.5216, "step": 601 }, { "epoch": 0.07, "grad_norm": 0.7196396011246718, "learning_rate": 9.947746215527246e-06, "loss": 0.4568, "step": 602 }, { "epoch": 0.07, "grad_norm": 2.2029295205716912, "learning_rate": 9.947456023756834e-06, "loss": 0.5113, "step": 603 }, { "epoch": 0.07, "grad_norm": 2.4106620683922255, "learning_rate": 9.94716503268133e-06, "loss": 0.5382, "step": 604 }, { "epoch": 0.08, "grad_norm": 1.4704451672909378, "learning_rate": 9.946873242347741e-06, "loss": 0.552, "step": 605 }, { "epoch": 0.08, "grad_norm": 1.5073803415658742, "learning_rate": 9.946580652803215e-06, "loss": 0.4871, "step": 606 }, { "epoch": 0.08, "grad_norm": 1.9073111646355374, "learning_rate": 9.946287264095016e-06, "loss": 0.5451, "step": 607 }, { "epoch": 0.08, "grad_norm": 1.4481931142536788, "learning_rate": 9.945993076270548e-06, "loss": 0.4984, "step": 608 }, { "epoch": 0.08, "grad_norm": 1.5698280416999764, "learning_rate": 9.945698089377338e-06, "loss": 0.5163, "step": 609 }, { "epoch": 0.08, "grad_norm": 1.5623181354974682, "learning_rate": 9.945402303463045e-06, "loss": 0.4943, "step": 610 }, { "epoch": 0.08, "grad_norm": 1.545427331377954, "learning_rate": 9.945105718575452e-06, "loss": 0.5753, "step": 611 }, { "epoch": 0.08, "grad_norm": 2.519017117437064, "learning_rate": 9.94480833476248e-06, "loss": 0.4687, "step": 612 }, { "epoch": 0.08, "grad_norm": 1.320764969254525, "learning_rate": 9.94451015207217e-06, "loss": 0.5192, "step": 613 }, { "epoch": 0.08, "grad_norm": 1.585198566620101, "learning_rate": 9.944211170552698e-06, "loss": 0.5299, "step": 614 }, { "epoch": 0.08, "grad_norm": 2.065016309964868, "learning_rate": 9.943911390252368e-06, "loss": 0.5043, "step": 615 }, { "epoch": 0.08, "grad_norm": 1.717156629974031, "learning_rate": 9.943610811219608e-06, "loss": 0.5314, "step": 616 }, { "epoch": 0.08, "grad_norm": 1.3992808758110693, "learning_rate": 9.943309433502985e-06, "loss": 0.4816, "step": 617 }, { "epoch": 0.08, "grad_norm": 1.4741157871118, "learning_rate": 9.943007257151185e-06, "loss": 0.5328, "step": 618 }, { "epoch": 0.08, "grad_norm": 1.6625878117192396, "learning_rate": 9.942704282213028e-06, "loss": 0.5106, "step": 619 }, { "epoch": 0.08, "grad_norm": 1.5814144236989829, "learning_rate": 9.942400508737465e-06, "loss": 0.5308, "step": 620 }, { "epoch": 0.08, "grad_norm": 1.5003023210361535, "learning_rate": 9.942095936773568e-06, "loss": 0.5584, "step": 621 }, { "epoch": 0.08, "grad_norm": 1.5907340486009833, "learning_rate": 9.941790566370547e-06, "loss": 0.5498, "step": 622 }, { "epoch": 0.08, "grad_norm": 1.2882804193460367, "learning_rate": 9.941484397577737e-06, "loss": 0.5108, "step": 623 }, { "epoch": 0.08, "grad_norm": 1.679829152208987, "learning_rate": 9.941177430444601e-06, "loss": 0.5687, "step": 624 }, { "epoch": 0.08, "grad_norm": 1.4950337923655184, "learning_rate": 9.940869665020736e-06, "loss": 0.583, "step": 625 }, { "epoch": 0.08, "grad_norm": 0.692963812544074, "learning_rate": 9.940561101355858e-06, "loss": 0.507, "step": 626 }, { "epoch": 0.08, "grad_norm": 1.5440986461401733, "learning_rate": 9.940251739499824e-06, "loss": 0.5577, "step": 627 }, { "epoch": 0.08, "grad_norm": 1.6306303651228953, "learning_rate": 9.93994157950261e-06, "loss": 0.4947, "step": 628 }, { "epoch": 0.08, "grad_norm": 0.6663936494364988, "learning_rate": 9.939630621414328e-06, "loss": 0.467, "step": 629 }, { "epoch": 0.08, "grad_norm": 1.8972238621140487, "learning_rate": 9.939318865285214e-06, "loss": 0.5485, "step": 630 }, { "epoch": 0.08, "grad_norm": 1.4247705518456582, "learning_rate": 9.939006311165636e-06, "loss": 0.5473, "step": 631 }, { "epoch": 0.08, "grad_norm": 1.718273329018105, "learning_rate": 9.938692959106089e-06, "loss": 0.5431, "step": 632 }, { "epoch": 0.08, "grad_norm": 0.6943576952650891, "learning_rate": 9.9383788091572e-06, "loss": 0.5074, "step": 633 }, { "epoch": 0.08, "grad_norm": 0.6876591007539876, "learning_rate": 9.938063861369718e-06, "loss": 0.5159, "step": 634 }, { "epoch": 0.08, "grad_norm": 1.7548691737757782, "learning_rate": 9.93774811579453e-06, "loss": 0.5381, "step": 635 }, { "epoch": 0.08, "grad_norm": 1.5154806990192187, "learning_rate": 9.937431572482646e-06, "loss": 0.5053, "step": 636 }, { "epoch": 0.08, "grad_norm": 1.5611688276113909, "learning_rate": 9.937114231485207e-06, "loss": 0.506, "step": 637 }, { "epoch": 0.08, "grad_norm": 1.9093304638448643, "learning_rate": 9.936796092853482e-06, "loss": 0.482, "step": 638 }, { "epoch": 0.08, "grad_norm": 2.097493189282801, "learning_rate": 9.936477156638868e-06, "loss": 0.5737, "step": 639 }, { "epoch": 0.08, "grad_norm": 1.430398311443158, "learning_rate": 9.936157422892892e-06, "loss": 0.5013, "step": 640 }, { "epoch": 0.08, "grad_norm": 1.8149506986026083, "learning_rate": 9.935836891667212e-06, "loss": 0.5898, "step": 641 }, { "epoch": 0.08, "grad_norm": 1.6180185699234901, "learning_rate": 9.93551556301361e-06, "loss": 0.5698, "step": 642 }, { "epoch": 0.08, "grad_norm": 0.7222498111719983, "learning_rate": 9.935193436984001e-06, "loss": 0.5386, "step": 643 }, { "epoch": 0.08, "grad_norm": 0.7724001895597116, "learning_rate": 9.934870513630428e-06, "loss": 0.5139, "step": 644 }, { "epoch": 0.08, "grad_norm": 1.5072040003212988, "learning_rate": 9.93454679300506e-06, "loss": 0.5524, "step": 645 }, { "epoch": 0.08, "grad_norm": 1.5356579540974762, "learning_rate": 9.934222275160199e-06, "loss": 0.5048, "step": 646 }, { "epoch": 0.08, "grad_norm": 2.108613917254826, "learning_rate": 9.933896960148272e-06, "loss": 0.5056, "step": 647 }, { "epoch": 0.08, "grad_norm": 0.6624497135490821, "learning_rate": 9.933570848021837e-06, "loss": 0.4759, "step": 648 }, { "epoch": 0.08, "grad_norm": 1.8727451645721778, "learning_rate": 9.933243938833581e-06, "loss": 0.5638, "step": 649 }, { "epoch": 0.08, "grad_norm": 1.4408274780775827, "learning_rate": 9.932916232636318e-06, "loss": 0.5385, "step": 650 }, { "epoch": 0.08, "grad_norm": 1.488341298958083, "learning_rate": 9.932587729482992e-06, "loss": 0.5178, "step": 651 }, { "epoch": 0.08, "grad_norm": 1.5293715713962739, "learning_rate": 9.932258429426678e-06, "loss": 0.4827, "step": 652 }, { "epoch": 0.08, "grad_norm": 1.739811601628948, "learning_rate": 9.931928332520573e-06, "loss": 0.5144, "step": 653 }, { "epoch": 0.08, "grad_norm": 1.7864609906352191, "learning_rate": 9.93159743881801e-06, "loss": 0.5343, "step": 654 }, { "epoch": 0.08, "grad_norm": 1.5659000901067446, "learning_rate": 9.931265748372447e-06, "loss": 0.4829, "step": 655 }, { "epoch": 0.08, "grad_norm": 1.3856751502290265, "learning_rate": 9.930933261237472e-06, "loss": 0.5254, "step": 656 }, { "epoch": 0.08, "grad_norm": 1.6639494622011914, "learning_rate": 9.930599977466802e-06, "loss": 0.5489, "step": 657 }, { "epoch": 0.08, "grad_norm": 1.5503942485192046, "learning_rate": 9.93026589711428e-06, "loss": 0.4999, "step": 658 }, { "epoch": 0.08, "grad_norm": 1.6374283584517775, "learning_rate": 9.929931020233878e-06, "loss": 0.5678, "step": 659 }, { "epoch": 0.08, "grad_norm": 1.4451768173598474, "learning_rate": 9.929595346879704e-06, "loss": 0.5326, "step": 660 }, { "epoch": 0.08, "grad_norm": 3.771283449714104, "learning_rate": 9.929258877105985e-06, "loss": 0.5726, "step": 661 }, { "epoch": 0.08, "grad_norm": 1.5995943876705156, "learning_rate": 9.928921610967079e-06, "loss": 0.5317, "step": 662 }, { "epoch": 0.08, "grad_norm": 2.0357535708739407, "learning_rate": 9.92858354851748e-06, "loss": 0.5355, "step": 663 }, { "epoch": 0.08, "grad_norm": 1.6543400868553626, "learning_rate": 9.928244689811799e-06, "loss": 0.5311, "step": 664 }, { "epoch": 0.08, "grad_norm": 3.5531763061855233, "learning_rate": 9.927905034904785e-06, "loss": 0.4885, "step": 665 }, { "epoch": 0.08, "grad_norm": 1.6727414973141812, "learning_rate": 9.927564583851313e-06, "loss": 0.5728, "step": 666 }, { "epoch": 0.08, "grad_norm": 1.5057542539112305, "learning_rate": 9.927223336706384e-06, "loss": 0.5613, "step": 667 }, { "epoch": 0.08, "grad_norm": 1.3621577823437236, "learning_rate": 9.926881293525129e-06, "loss": 0.5133, "step": 668 }, { "epoch": 0.08, "grad_norm": 1.3917135906447404, "learning_rate": 9.92653845436281e-06, "loss": 0.582, "step": 669 }, { "epoch": 0.08, "grad_norm": 1.1837662859106874, "learning_rate": 9.926194819274812e-06, "loss": 0.5057, "step": 670 }, { "epoch": 0.08, "grad_norm": 1.5927160077016809, "learning_rate": 9.925850388316657e-06, "loss": 0.44, "step": 671 }, { "epoch": 0.08, "grad_norm": 1.3077815833136432, "learning_rate": 9.925505161543988e-06, "loss": 0.5325, "step": 672 }, { "epoch": 0.08, "grad_norm": 1.3547793282803282, "learning_rate": 9.925159139012581e-06, "loss": 0.4944, "step": 673 }, { "epoch": 0.08, "grad_norm": 3.8707164450695393, "learning_rate": 9.924812320778338e-06, "loss": 0.497, "step": 674 }, { "epoch": 0.08, "grad_norm": 1.5811520204557328, "learning_rate": 9.92446470689729e-06, "loss": 0.5824, "step": 675 }, { "epoch": 0.08, "grad_norm": 1.6237920019107122, "learning_rate": 9.924116297425598e-06, "loss": 0.546, "step": 676 }, { "epoch": 0.08, "grad_norm": 1.300885812955191, "learning_rate": 9.923767092419551e-06, "loss": 0.543, "step": 677 }, { "epoch": 0.08, "grad_norm": 1.4815799886567405, "learning_rate": 9.923417091935564e-06, "loss": 0.5462, "step": 678 }, { "epoch": 0.08, "grad_norm": 0.7256114801909598, "learning_rate": 9.923066296030185e-06, "loss": 0.5211, "step": 679 }, { "epoch": 0.08, "grad_norm": 1.5039129063851944, "learning_rate": 9.922714704760088e-06, "loss": 0.5257, "step": 680 }, { "epoch": 0.08, "grad_norm": 1.4942655542398324, "learning_rate": 9.922362318182075e-06, "loss": 0.534, "step": 681 }, { "epoch": 0.08, "grad_norm": 1.6854065304986234, "learning_rate": 9.922009136353077e-06, "loss": 0.5436, "step": 682 }, { "epoch": 0.08, "grad_norm": 1.6207158877626267, "learning_rate": 9.921655159330154e-06, "loss": 0.5232, "step": 683 }, { "epoch": 0.08, "grad_norm": 1.9176833918479952, "learning_rate": 9.921300387170494e-06, "loss": 0.5185, "step": 684 }, { "epoch": 0.09, "grad_norm": 2.222262958976972, "learning_rate": 9.920944819931412e-06, "loss": 0.5406, "step": 685 }, { "epoch": 0.09, "grad_norm": 1.5443955907847353, "learning_rate": 9.920588457670357e-06, "loss": 0.5517, "step": 686 }, { "epoch": 0.09, "grad_norm": 1.4342050400083974, "learning_rate": 9.920231300444897e-06, "loss": 0.5497, "step": 687 }, { "epoch": 0.09, "grad_norm": 1.311067650265911, "learning_rate": 9.91987334831274e-06, "loss": 0.5226, "step": 688 }, { "epoch": 0.09, "grad_norm": 1.471479637322531, "learning_rate": 9.919514601331712e-06, "loss": 0.5122, "step": 689 }, { "epoch": 0.09, "grad_norm": 1.7926523501870986, "learning_rate": 9.919155059559772e-06, "loss": 0.55, "step": 690 }, { "epoch": 0.09, "grad_norm": 1.5675007462680313, "learning_rate": 9.918794723055009e-06, "loss": 0.5821, "step": 691 }, { "epoch": 0.09, "grad_norm": 1.4604240106900952, "learning_rate": 9.918433591875637e-06, "loss": 0.564, "step": 692 }, { "epoch": 0.09, "grad_norm": 1.4861143775734027, "learning_rate": 9.918071666080003e-06, "loss": 0.5813, "step": 693 }, { "epoch": 0.09, "grad_norm": 2.143330932505597, "learning_rate": 9.917708945726574e-06, "loss": 0.5196, "step": 694 }, { "epoch": 0.09, "grad_norm": 3.44234958841204, "learning_rate": 9.917345430873955e-06, "loss": 0.5551, "step": 695 }, { "epoch": 0.09, "grad_norm": 1.48817999565854, "learning_rate": 9.916981121580873e-06, "loss": 0.521, "step": 696 }, { "epoch": 0.09, "grad_norm": 1.774381530446406, "learning_rate": 9.916616017906185e-06, "loss": 0.5086, "step": 697 }, { "epoch": 0.09, "grad_norm": 0.6229915734516575, "learning_rate": 9.91625011990888e-06, "loss": 0.4771, "step": 698 }, { "epoch": 0.09, "grad_norm": 2.448594905519429, "learning_rate": 9.915883427648066e-06, "loss": 0.5662, "step": 699 }, { "epoch": 0.09, "grad_norm": 1.9591531503663824, "learning_rate": 9.915515941182992e-06, "loss": 0.4842, "step": 700 }, { "epoch": 0.09, "grad_norm": 1.36950835978183, "learning_rate": 9.915147660573024e-06, "loss": 0.4944, "step": 701 }, { "epoch": 0.09, "grad_norm": 1.8648299110331485, "learning_rate": 9.914778585877665e-06, "loss": 0.5539, "step": 702 }, { "epoch": 0.09, "grad_norm": 1.7320586877113233, "learning_rate": 9.914408717156538e-06, "loss": 0.5153, "step": 703 }, { "epoch": 0.09, "grad_norm": 1.4658542485684234, "learning_rate": 9.9140380544694e-06, "loss": 0.572, "step": 704 }, { "epoch": 0.09, "grad_norm": 1.4362278910057131, "learning_rate": 9.913666597876137e-06, "loss": 0.5114, "step": 705 }, { "epoch": 0.09, "grad_norm": 2.0480313933332335, "learning_rate": 9.913294347436758e-06, "loss": 0.4573, "step": 706 }, { "epoch": 0.09, "grad_norm": 1.3863115172008518, "learning_rate": 9.912921303211405e-06, "loss": 0.5416, "step": 707 }, { "epoch": 0.09, "grad_norm": 1.756096947460397, "learning_rate": 9.912547465260347e-06, "loss": 0.4997, "step": 708 }, { "epoch": 0.09, "grad_norm": 1.681996815479112, "learning_rate": 9.91217283364398e-06, "loss": 0.5189, "step": 709 }, { "epoch": 0.09, "grad_norm": 1.8491451039304299, "learning_rate": 9.91179740842283e-06, "loss": 0.5712, "step": 710 }, { "epoch": 0.09, "grad_norm": 0.6846675664657432, "learning_rate": 9.911421189657548e-06, "loss": 0.52, "step": 711 }, { "epoch": 0.09, "grad_norm": 1.6256288323393315, "learning_rate": 9.911044177408917e-06, "loss": 0.5461, "step": 712 }, { "epoch": 0.09, "grad_norm": 1.4504352891549575, "learning_rate": 9.910666371737848e-06, "loss": 0.5386, "step": 713 }, { "epoch": 0.09, "grad_norm": 0.6943756408365623, "learning_rate": 9.910287772705374e-06, "loss": 0.51, "step": 714 }, { "epoch": 0.09, "grad_norm": 1.6456825439428284, "learning_rate": 9.909908380372667e-06, "loss": 0.5576, "step": 715 }, { "epoch": 0.09, "grad_norm": 1.3685240161884205, "learning_rate": 9.909528194801019e-06, "loss": 0.5561, "step": 716 }, { "epoch": 0.09, "grad_norm": 2.6513433815250442, "learning_rate": 9.90914721605185e-06, "loss": 0.4893, "step": 717 }, { "epoch": 0.09, "grad_norm": 1.2340929468815107, "learning_rate": 9.908765444186713e-06, "loss": 0.5007, "step": 718 }, { "epoch": 0.09, "grad_norm": 1.3786299592454312, "learning_rate": 9.908382879267286e-06, "loss": 0.5752, "step": 719 }, { "epoch": 0.09, "grad_norm": 1.4800630744877326, "learning_rate": 9.907999521355377e-06, "loss": 0.5015, "step": 720 }, { "epoch": 0.09, "grad_norm": 2.2232871580378, "learning_rate": 9.907615370512919e-06, "loss": 0.497, "step": 721 }, { "epoch": 0.09, "grad_norm": 1.2928878365401588, "learning_rate": 9.907230426801975e-06, "loss": 0.5447, "step": 722 }, { "epoch": 0.09, "grad_norm": 1.4967207824812627, "learning_rate": 9.906844690284737e-06, "loss": 0.5291, "step": 723 }, { "epoch": 0.09, "grad_norm": 1.816746657284892, "learning_rate": 9.906458161023522e-06, "loss": 0.5342, "step": 724 }, { "epoch": 0.09, "grad_norm": 2.0861253042637484, "learning_rate": 9.90607083908078e-06, "loss": 0.5903, "step": 725 }, { "epoch": 0.09, "grad_norm": 1.5926144364257493, "learning_rate": 9.905682724519086e-06, "loss": 0.5445, "step": 726 }, { "epoch": 0.09, "grad_norm": 1.3870599530402037, "learning_rate": 9.90529381740114e-06, "loss": 0.5318, "step": 727 }, { "epoch": 0.09, "grad_norm": 1.6319587413630097, "learning_rate": 9.904904117789777e-06, "loss": 0.5497, "step": 728 }, { "epoch": 0.09, "grad_norm": 1.9513204246105857, "learning_rate": 9.904513625747957e-06, "loss": 0.5404, "step": 729 }, { "epoch": 0.09, "grad_norm": 1.6492222551928712, "learning_rate": 9.904122341338764e-06, "loss": 0.5099, "step": 730 }, { "epoch": 0.09, "grad_norm": 1.7330336994895605, "learning_rate": 9.903730264625415e-06, "loss": 0.5237, "step": 731 }, { "epoch": 0.09, "grad_norm": 1.335780098202449, "learning_rate": 9.903337395671255e-06, "loss": 0.5068, "step": 732 }, { "epoch": 0.09, "grad_norm": 0.6746985228306138, "learning_rate": 9.902943734539752e-06, "loss": 0.479, "step": 733 }, { "epoch": 0.09, "grad_norm": 1.5679095009354174, "learning_rate": 9.902549281294508e-06, "loss": 0.5457, "step": 734 }, { "epoch": 0.09, "grad_norm": 1.5978083443392859, "learning_rate": 9.90215403599925e-06, "loss": 0.5094, "step": 735 }, { "epoch": 0.09, "grad_norm": 1.3502793900824692, "learning_rate": 9.901757998717833e-06, "loss": 0.5287, "step": 736 }, { "epoch": 0.09, "grad_norm": 1.3767717373956716, "learning_rate": 9.90136116951424e-06, "loss": 0.5638, "step": 737 }, { "epoch": 0.09, "grad_norm": 1.4100309801230537, "learning_rate": 9.900963548452584e-06, "loss": 0.5088, "step": 738 }, { "epoch": 0.09, "grad_norm": 1.6723873034933143, "learning_rate": 9.900565135597104e-06, "loss": 0.5254, "step": 739 }, { "epoch": 0.09, "grad_norm": 1.4968579496803647, "learning_rate": 9.900165931012164e-06, "loss": 0.5401, "step": 740 }, { "epoch": 0.09, "grad_norm": 1.512318360157006, "learning_rate": 9.899765934762263e-06, "loss": 0.4433, "step": 741 }, { "epoch": 0.09, "grad_norm": 2.5025123815418007, "learning_rate": 9.899365146912018e-06, "loss": 0.511, "step": 742 }, { "epoch": 0.09, "grad_norm": 1.6683442789870047, "learning_rate": 9.898963567526188e-06, "loss": 0.5479, "step": 743 }, { "epoch": 0.09, "grad_norm": 1.6151733284076952, "learning_rate": 9.898561196669645e-06, "loss": 0.5266, "step": 744 }, { "epoch": 0.09, "grad_norm": 1.4827781091080732, "learning_rate": 9.8981580344074e-06, "loss": 0.5435, "step": 745 }, { "epoch": 0.09, "grad_norm": 1.3361012140606874, "learning_rate": 9.897754080804583e-06, "loss": 0.5011, "step": 746 }, { "epoch": 0.09, "grad_norm": 1.570149978353058, "learning_rate": 9.89734933592646e-06, "loss": 0.5391, "step": 747 }, { "epoch": 0.09, "grad_norm": 1.4440488628055852, "learning_rate": 9.896943799838419e-06, "loss": 0.563, "step": 748 }, { "epoch": 0.09, "grad_norm": 1.4722485313647773, "learning_rate": 9.896537472605979e-06, "loss": 0.5872, "step": 749 }, { "epoch": 0.09, "grad_norm": 1.6458885186334251, "learning_rate": 9.896130354294787e-06, "loss": 0.4825, "step": 750 }, { "epoch": 0.09, "grad_norm": 2.0870975798160076, "learning_rate": 9.895722444970613e-06, "loss": 0.5378, "step": 751 }, { "epoch": 0.09, "grad_norm": 0.6713600482428013, "learning_rate": 9.89531374469936e-06, "loss": 0.5243, "step": 752 }, { "epoch": 0.09, "grad_norm": 1.3666798769307291, "learning_rate": 9.89490425354706e-06, "loss": 0.537, "step": 753 }, { "epoch": 0.09, "grad_norm": 1.6965904249779475, "learning_rate": 9.894493971579864e-06, "loss": 0.5512, "step": 754 }, { "epoch": 0.09, "grad_norm": 1.3900037721001617, "learning_rate": 9.894082898864062e-06, "loss": 0.5467, "step": 755 }, { "epoch": 0.09, "grad_norm": 1.524197186398035, "learning_rate": 9.893671035466065e-06, "loss": 0.4735, "step": 756 }, { "epoch": 0.09, "grad_norm": 0.7339854586343567, "learning_rate": 9.893258381452412e-06, "loss": 0.5089, "step": 757 }, { "epoch": 0.09, "grad_norm": 1.3923524219523842, "learning_rate": 9.892844936889772e-06, "loss": 0.4909, "step": 758 }, { "epoch": 0.09, "grad_norm": 1.5124594114302838, "learning_rate": 9.89243070184494e-06, "loss": 0.5394, "step": 759 }, { "epoch": 0.09, "grad_norm": 1.8034130536722808, "learning_rate": 9.892015676384838e-06, "loss": 0.512, "step": 760 }, { "epoch": 0.09, "grad_norm": 1.5762898788709665, "learning_rate": 9.891599860576519e-06, "loss": 0.5248, "step": 761 }, { "epoch": 0.09, "grad_norm": 1.4714297837827943, "learning_rate": 9.891183254487162e-06, "loss": 0.5465, "step": 762 }, { "epoch": 0.09, "grad_norm": 1.3214866314049687, "learning_rate": 9.890765858184073e-06, "loss": 0.516, "step": 763 }, { "epoch": 0.09, "grad_norm": 1.9633591420432888, "learning_rate": 9.890347671734684e-06, "loss": 0.5366, "step": 764 }, { "epoch": 0.09, "grad_norm": 1.3580228817655544, "learning_rate": 9.889928695206559e-06, "loss": 0.5315, "step": 765 }, { "epoch": 0.1, "grad_norm": 1.5198904764841936, "learning_rate": 9.889508928667387e-06, "loss": 0.5439, "step": 766 }, { "epoch": 0.1, "grad_norm": 2.239507833632809, "learning_rate": 9.889088372184983e-06, "loss": 0.549, "step": 767 }, { "epoch": 0.1, "grad_norm": 2.0090601913369452, "learning_rate": 9.888667025827295e-06, "loss": 0.5388, "step": 768 }, { "epoch": 0.1, "grad_norm": 1.7600992496202628, "learning_rate": 9.888244889662394e-06, "loss": 0.5286, "step": 769 }, { "epoch": 0.1, "grad_norm": 1.363528192366846, "learning_rate": 9.887821963758478e-06, "loss": 0.5206, "step": 770 }, { "epoch": 0.1, "grad_norm": 1.3125021258357175, "learning_rate": 9.887398248183876e-06, "loss": 0.5342, "step": 771 }, { "epoch": 0.1, "grad_norm": 2.1185739451091274, "learning_rate": 9.886973743007042e-06, "loss": 0.559, "step": 772 }, { "epoch": 0.1, "grad_norm": 1.3878073704129925, "learning_rate": 9.88654844829656e-06, "loss": 0.5069, "step": 773 }, { "epoch": 0.1, "grad_norm": 1.603486854404389, "learning_rate": 9.88612236412114e-06, "loss": 0.5484, "step": 774 }, { "epoch": 0.1, "grad_norm": 1.5023961328735826, "learning_rate": 9.885695490549616e-06, "loss": 0.5537, "step": 775 }, { "epoch": 0.1, "grad_norm": 1.345851732601472, "learning_rate": 9.88526782765096e-06, "loss": 0.5059, "step": 776 }, { "epoch": 0.1, "grad_norm": 1.3519885963470128, "learning_rate": 9.884839375494258e-06, "loss": 0.4886, "step": 777 }, { "epoch": 0.1, "grad_norm": 1.48289100247411, "learning_rate": 9.884410134148733e-06, "loss": 0.5144, "step": 778 }, { "epoch": 0.1, "grad_norm": 1.4008779870643109, "learning_rate": 9.883980103683735e-06, "loss": 0.5803, "step": 779 }, { "epoch": 0.1, "grad_norm": 1.4562131509620257, "learning_rate": 9.883549284168736e-06, "loss": 0.5574, "step": 780 }, { "epoch": 0.1, "grad_norm": 1.4444072614610364, "learning_rate": 9.883117675673339e-06, "loss": 0.4858, "step": 781 }, { "epoch": 0.1, "grad_norm": 1.5080506086390568, "learning_rate": 9.882685278267277e-06, "loss": 0.5444, "step": 782 }, { "epoch": 0.1, "grad_norm": 1.7045840262615868, "learning_rate": 9.882252092020403e-06, "loss": 0.5505, "step": 783 }, { "epoch": 0.1, "grad_norm": 1.7370813343274576, "learning_rate": 9.881818117002706e-06, "loss": 0.5415, "step": 784 }, { "epoch": 0.1, "grad_norm": 2.018153134156803, "learning_rate": 9.881383353284295e-06, "loss": 0.4996, "step": 785 }, { "epoch": 0.1, "grad_norm": 1.3253195899746142, "learning_rate": 9.880947800935414e-06, "loss": 0.5106, "step": 786 }, { "epoch": 0.1, "grad_norm": 1.7024567402620956, "learning_rate": 9.880511460026428e-06, "loss": 0.5396, "step": 787 }, { "epoch": 0.1, "grad_norm": 1.4091988414010796, "learning_rate": 9.88007433062783e-06, "loss": 0.555, "step": 788 }, { "epoch": 0.1, "grad_norm": 0.7087771391854086, "learning_rate": 9.879636412810244e-06, "loss": 0.4847, "step": 789 }, { "epoch": 0.1, "grad_norm": 2.7231897900894366, "learning_rate": 9.879197706644423e-06, "loss": 0.5219, "step": 790 }, { "epoch": 0.1, "grad_norm": 1.7027320981466387, "learning_rate": 9.878758212201238e-06, "loss": 0.5014, "step": 791 }, { "epoch": 0.1, "grad_norm": 1.3482912883508638, "learning_rate": 9.878317929551695e-06, "loss": 0.4895, "step": 792 }, { "epoch": 0.1, "grad_norm": 1.3152332995824816, "learning_rate": 9.877876858766927e-06, "loss": 0.5465, "step": 793 }, { "epoch": 0.1, "grad_norm": 1.4997609418658844, "learning_rate": 9.877434999918192e-06, "loss": 0.5262, "step": 794 }, { "epoch": 0.1, "grad_norm": 0.6539265342268819, "learning_rate": 9.876992353076875e-06, "loss": 0.4821, "step": 795 }, { "epoch": 0.1, "grad_norm": 1.6249056812162253, "learning_rate": 9.876548918314492e-06, "loss": 0.534, "step": 796 }, { "epoch": 0.1, "grad_norm": 1.5004396199830836, "learning_rate": 9.876104695702682e-06, "loss": 0.5699, "step": 797 }, { "epoch": 0.1, "grad_norm": 1.2826945141311428, "learning_rate": 9.875659685313214e-06, "loss": 0.4789, "step": 798 }, { "epoch": 0.1, "grad_norm": 1.5469724214523863, "learning_rate": 9.875213887217983e-06, "loss": 0.5576, "step": 799 }, { "epoch": 0.1, "grad_norm": 1.5415041213137937, "learning_rate": 9.87476730148901e-06, "loss": 0.5988, "step": 800 }, { "epoch": 0.1, "grad_norm": 0.7021497983380178, "learning_rate": 9.874319928198448e-06, "loss": 0.5403, "step": 801 }, { "epoch": 0.1, "grad_norm": 1.3549275899238842, "learning_rate": 9.873871767418572e-06, "loss": 0.455, "step": 802 }, { "epoch": 0.1, "grad_norm": 1.2532327534233543, "learning_rate": 9.873422819221785e-06, "loss": 0.5064, "step": 803 }, { "epoch": 0.1, "grad_norm": 1.7360535633059955, "learning_rate": 9.872973083680622e-06, "loss": 0.5882, "step": 804 }, { "epoch": 0.1, "grad_norm": 1.4059071965483354, "learning_rate": 9.87252256086774e-06, "loss": 0.5658, "step": 805 }, { "epoch": 0.1, "grad_norm": 2.530355778895807, "learning_rate": 9.872071250855925e-06, "loss": 0.5147, "step": 806 }, { "epoch": 0.1, "grad_norm": 1.3847353474560062, "learning_rate": 9.87161915371809e-06, "loss": 0.5407, "step": 807 }, { "epoch": 0.1, "grad_norm": 1.2893581665896208, "learning_rate": 9.871166269527275e-06, "loss": 0.5172, "step": 808 }, { "epoch": 0.1, "grad_norm": 1.4749327657449687, "learning_rate": 9.870712598356647e-06, "loss": 0.5339, "step": 809 }, { "epoch": 0.1, "grad_norm": 1.4358631901359513, "learning_rate": 9.870258140279503e-06, "loss": 0.5119, "step": 810 }, { "epoch": 0.1, "grad_norm": 2.1722531501187503, "learning_rate": 9.869802895369262e-06, "loss": 0.5623, "step": 811 }, { "epoch": 0.1, "grad_norm": 1.3154410118141262, "learning_rate": 9.869346863699474e-06, "loss": 0.5222, "step": 812 }, { "epoch": 0.1, "grad_norm": 1.5619568170354916, "learning_rate": 9.868890045343814e-06, "loss": 0.5189, "step": 813 }, { "epoch": 0.1, "grad_norm": 1.5104678506036293, "learning_rate": 9.868432440376086e-06, "loss": 0.5747, "step": 814 }, { "epoch": 0.1, "grad_norm": 1.3470148837454545, "learning_rate": 9.867974048870221e-06, "loss": 0.5123, "step": 815 }, { "epoch": 0.1, "grad_norm": 1.3544857302522781, "learning_rate": 9.867514870900274e-06, "loss": 0.5256, "step": 816 }, { "epoch": 0.1, "grad_norm": 2.1417064240265287, "learning_rate": 9.867054906540432e-06, "loss": 0.5228, "step": 817 }, { "epoch": 0.1, "grad_norm": 2.069195455278062, "learning_rate": 9.866594155865004e-06, "loss": 0.5605, "step": 818 }, { "epoch": 0.1, "grad_norm": 0.6880607864614212, "learning_rate": 9.866132618948428e-06, "loss": 0.49, "step": 819 }, { "epoch": 0.1, "grad_norm": 1.9067634496940928, "learning_rate": 9.86567029586527e-06, "loss": 0.5758, "step": 820 }, { "epoch": 0.1, "grad_norm": 1.3809878109560794, "learning_rate": 9.865207186690225e-06, "loss": 0.4819, "step": 821 }, { "epoch": 0.1, "grad_norm": 1.4377570369203871, "learning_rate": 9.86474329149811e-06, "loss": 0.5409, "step": 822 }, { "epoch": 0.1, "grad_norm": 1.2837566075142783, "learning_rate": 9.864278610363869e-06, "loss": 0.5082, "step": 823 }, { "epoch": 0.1, "grad_norm": 1.376197397673897, "learning_rate": 9.863813143362579e-06, "loss": 0.468, "step": 824 }, { "epoch": 0.1, "grad_norm": 3.5091655795057206, "learning_rate": 9.863346890569438e-06, "loss": 0.5511, "step": 825 }, { "epoch": 0.1, "grad_norm": 1.2654935539583412, "learning_rate": 9.862879852059775e-06, "loss": 0.5588, "step": 826 }, { "epoch": 0.1, "grad_norm": 1.480429796301672, "learning_rate": 9.862412027909045e-06, "loss": 0.5077, "step": 827 }, { "epoch": 0.1, "grad_norm": 1.4231689337630336, "learning_rate": 9.861943418192825e-06, "loss": 0.5572, "step": 828 }, { "epoch": 0.1, "grad_norm": 1.6537058377816523, "learning_rate": 9.861474022986828e-06, "loss": 0.5611, "step": 829 }, { "epoch": 0.1, "grad_norm": 1.4257922752948444, "learning_rate": 9.861003842366886e-06, "loss": 0.5383, "step": 830 }, { "epoch": 0.1, "grad_norm": 1.5238556011282056, "learning_rate": 9.86053287640896e-06, "loss": 0.5396, "step": 831 }, { "epoch": 0.1, "grad_norm": 1.5593477658026424, "learning_rate": 9.860061125189143e-06, "loss": 0.5556, "step": 832 }, { "epoch": 0.1, "grad_norm": 1.6877101213463102, "learning_rate": 9.859588588783644e-06, "loss": 0.5304, "step": 833 }, { "epoch": 0.1, "grad_norm": 1.52836473336756, "learning_rate": 9.85911526726881e-06, "loss": 0.5365, "step": 834 }, { "epoch": 0.1, "grad_norm": 1.4292053696202944, "learning_rate": 9.85864116072111e-06, "loss": 0.524, "step": 835 }, { "epoch": 0.1, "grad_norm": 1.3567394691546826, "learning_rate": 9.85816626921714e-06, "loss": 0.5385, "step": 836 }, { "epoch": 0.1, "grad_norm": 1.813389252993085, "learning_rate": 9.857690592833621e-06, "loss": 0.5389, "step": 837 }, { "epoch": 0.1, "grad_norm": 1.6373671449371334, "learning_rate": 9.857214131647405e-06, "loss": 0.517, "step": 838 }, { "epoch": 0.1, "grad_norm": 1.550185315855915, "learning_rate": 9.856736885735467e-06, "loss": 0.5129, "step": 839 }, { "epoch": 0.1, "grad_norm": 1.2396005024409704, "learning_rate": 9.856258855174912e-06, "loss": 0.5535, "step": 840 }, { "epoch": 0.1, "grad_norm": 1.4116583979770747, "learning_rate": 9.855780040042966e-06, "loss": 0.4832, "step": 841 }, { "epoch": 0.1, "grad_norm": 1.6187709239417065, "learning_rate": 9.85530044041699e-06, "loss": 0.5649, "step": 842 }, { "epoch": 0.1, "grad_norm": 1.3743636030552622, "learning_rate": 9.854820056374468e-06, "loss": 0.548, "step": 843 }, { "epoch": 0.1, "grad_norm": 1.4003641447914825, "learning_rate": 9.854338887993006e-06, "loss": 0.5033, "step": 844 }, { "epoch": 0.1, "grad_norm": 1.6185345234958257, "learning_rate": 9.853856935350345e-06, "loss": 0.593, "step": 845 }, { "epoch": 0.1, "grad_norm": 1.8383993635732563, "learning_rate": 9.853374198524347e-06, "loss": 0.5019, "step": 846 }, { "epoch": 0.11, "grad_norm": 1.4429440730187004, "learning_rate": 9.852890677593003e-06, "loss": 0.553, "step": 847 }, { "epoch": 0.11, "grad_norm": 1.502777317388333, "learning_rate": 9.852406372634427e-06, "loss": 0.5163, "step": 848 }, { "epoch": 0.11, "grad_norm": 1.7042353791138922, "learning_rate": 9.85192128372687e-06, "loss": 0.5671, "step": 849 }, { "epoch": 0.11, "grad_norm": 1.5670146983134932, "learning_rate": 9.851435410948693e-06, "loss": 0.5254, "step": 850 }, { "epoch": 0.11, "grad_norm": 1.6943773775700972, "learning_rate": 9.8509487543784e-06, "loss": 0.5858, "step": 851 }, { "epoch": 0.11, "grad_norm": 1.7259489878374763, "learning_rate": 9.850461314094611e-06, "loss": 0.5266, "step": 852 }, { "epoch": 0.11, "grad_norm": 1.4839090775806405, "learning_rate": 9.84997309017608e-06, "loss": 0.4945, "step": 853 }, { "epoch": 0.11, "grad_norm": 1.6430132038958682, "learning_rate": 9.849484082701679e-06, "loss": 0.5514, "step": 854 }, { "epoch": 0.11, "grad_norm": 1.330689307629748, "learning_rate": 9.848994291750416e-06, "loss": 0.5068, "step": 855 }, { "epoch": 0.11, "grad_norm": 1.4367448150793294, "learning_rate": 9.848503717401416e-06, "loss": 0.4849, "step": 856 }, { "epoch": 0.11, "grad_norm": 1.5435415775983266, "learning_rate": 9.848012359733941e-06, "loss": 0.5299, "step": 857 }, { "epoch": 0.11, "grad_norm": 2.0779784325734822, "learning_rate": 9.847520218827372e-06, "loss": 0.5144, "step": 858 }, { "epoch": 0.11, "grad_norm": 0.7886067252096047, "learning_rate": 9.847027294761218e-06, "loss": 0.5121, "step": 859 }, { "epoch": 0.11, "grad_norm": 0.6874506009278324, "learning_rate": 9.846533587615117e-06, "loss": 0.5396, "step": 860 }, { "epoch": 0.11, "grad_norm": 1.6245535898960135, "learning_rate": 9.846039097468829e-06, "loss": 0.5138, "step": 861 }, { "epoch": 0.11, "grad_norm": 1.5610689448816104, "learning_rate": 9.845543824402245e-06, "loss": 0.5456, "step": 862 }, { "epoch": 0.11, "grad_norm": 3.867756358837439, "learning_rate": 9.84504776849538e-06, "loss": 0.5296, "step": 863 }, { "epoch": 0.11, "grad_norm": 0.6572342482634592, "learning_rate": 9.844550929828375e-06, "loss": 0.4763, "step": 864 }, { "epoch": 0.11, "grad_norm": 1.5801874916299146, "learning_rate": 9.844053308481504e-06, "loss": 0.5699, "step": 865 }, { "epoch": 0.11, "grad_norm": 1.4583504122348614, "learning_rate": 9.843554904535157e-06, "loss": 0.5041, "step": 866 }, { "epoch": 0.11, "grad_norm": 1.611938008913365, "learning_rate": 9.843055718069856e-06, "loss": 0.5419, "step": 867 }, { "epoch": 0.11, "grad_norm": 1.43075395951784, "learning_rate": 9.842555749166252e-06, "loss": 0.5617, "step": 868 }, { "epoch": 0.11, "grad_norm": 5.380501740350102, "learning_rate": 9.842054997905115e-06, "loss": 0.4923, "step": 869 }, { "epoch": 0.11, "grad_norm": 1.4650493645359763, "learning_rate": 9.841553464367349e-06, "loss": 0.5058, "step": 870 }, { "epoch": 0.11, "grad_norm": 2.314374886920701, "learning_rate": 9.84105114863398e-06, "loss": 0.5574, "step": 871 }, { "epoch": 0.11, "grad_norm": 1.335405641847203, "learning_rate": 9.840548050786162e-06, "loss": 0.5267, "step": 872 }, { "epoch": 0.11, "grad_norm": 2.882923799877262, "learning_rate": 9.840044170905175e-06, "loss": 0.5018, "step": 873 }, { "epoch": 0.11, "grad_norm": 1.606183086211936, "learning_rate": 9.839539509072425e-06, "loss": 0.5532, "step": 874 }, { "epoch": 0.11, "grad_norm": 1.4106290303040485, "learning_rate": 9.839034065369444e-06, "loss": 0.4926, "step": 875 }, { "epoch": 0.11, "grad_norm": 1.8398370549040346, "learning_rate": 9.83852783987789e-06, "loss": 0.5319, "step": 876 }, { "epoch": 0.11, "grad_norm": 1.8145118768900863, "learning_rate": 9.838020832679552e-06, "loss": 0.5236, "step": 877 }, { "epoch": 0.11, "grad_norm": 1.1562995225815056, "learning_rate": 9.837513043856338e-06, "loss": 0.513, "step": 878 }, { "epoch": 0.11, "grad_norm": 1.5446941041707583, "learning_rate": 9.837004473490286e-06, "loss": 0.5432, "step": 879 }, { "epoch": 0.11, "grad_norm": 1.7330545831926203, "learning_rate": 9.83649512166356e-06, "loss": 0.5111, "step": 880 }, { "epoch": 0.11, "grad_norm": 1.822826636212995, "learning_rate": 9.835984988458454e-06, "loss": 0.5, "step": 881 }, { "epoch": 0.11, "grad_norm": 1.483699967216602, "learning_rate": 9.835474073957379e-06, "loss": 0.5181, "step": 882 }, { "epoch": 0.11, "grad_norm": 1.8228914204587476, "learning_rate": 9.834962378242882e-06, "loss": 0.5745, "step": 883 }, { "epoch": 0.11, "grad_norm": 1.7397105597518532, "learning_rate": 9.834449901397628e-06, "loss": 0.5306, "step": 884 }, { "epoch": 0.11, "grad_norm": 1.3840024922502834, "learning_rate": 9.833936643504417e-06, "loss": 0.4915, "step": 885 }, { "epoch": 0.11, "grad_norm": 1.6649130295487007, "learning_rate": 9.833422604646165e-06, "loss": 0.5362, "step": 886 }, { "epoch": 0.11, "grad_norm": 1.7078837515289695, "learning_rate": 9.832907784905924e-06, "loss": 0.4886, "step": 887 }, { "epoch": 0.11, "grad_norm": 1.7511691661944424, "learning_rate": 9.832392184366867e-06, "loss": 0.4758, "step": 888 }, { "epoch": 0.11, "grad_norm": 1.3751504432892512, "learning_rate": 9.831875803112291e-06, "loss": 0.5, "step": 889 }, { "epoch": 0.11, "grad_norm": 1.4513862307154417, "learning_rate": 9.831358641225624e-06, "loss": 0.5377, "step": 890 }, { "epoch": 0.11, "grad_norm": 2.5931908846008063, "learning_rate": 9.830840698790418e-06, "loss": 0.5329, "step": 891 }, { "epoch": 0.11, "grad_norm": 1.3425686954116223, "learning_rate": 9.83032197589035e-06, "loss": 0.4917, "step": 892 }, { "epoch": 0.11, "grad_norm": 1.2865043128692821, "learning_rate": 9.829802472609227e-06, "loss": 0.4823, "step": 893 }, { "epoch": 0.11, "grad_norm": 1.4653845961751846, "learning_rate": 9.829282189030976e-06, "loss": 0.5754, "step": 894 }, { "epoch": 0.11, "grad_norm": 1.5380876117092384, "learning_rate": 9.828761125239655e-06, "loss": 0.5642, "step": 895 }, { "epoch": 0.11, "grad_norm": 1.6425053205832336, "learning_rate": 9.828239281319447e-06, "loss": 0.5546, "step": 896 }, { "epoch": 0.11, "grad_norm": 1.256609403421453, "learning_rate": 9.827716657354658e-06, "loss": 0.4601, "step": 897 }, { "epoch": 0.11, "grad_norm": 6.390734805023612, "learning_rate": 9.827193253429726e-06, "loss": 0.5261, "step": 898 }, { "epoch": 0.11, "grad_norm": 2.7799958313656696, "learning_rate": 9.826669069629209e-06, "loss": 0.5457, "step": 899 }, { "epoch": 0.11, "grad_norm": 1.4733773601073992, "learning_rate": 9.826144106037794e-06, "loss": 0.5611, "step": 900 }, { "epoch": 0.11, "grad_norm": 2.9129884749417165, "learning_rate": 9.825618362740295e-06, "loss": 0.5339, "step": 901 }, { "epoch": 0.11, "grad_norm": 2.243230069846606, "learning_rate": 9.825091839821648e-06, "loss": 0.5816, "step": 902 }, { "epoch": 0.11, "grad_norm": 2.0242936924161343, "learning_rate": 9.82456453736692e-06, "loss": 0.5167, "step": 903 }, { "epoch": 0.11, "grad_norm": 1.434184689682166, "learning_rate": 9.8240364554613e-06, "loss": 0.4836, "step": 904 }, { "epoch": 0.11, "grad_norm": 0.6775533855530116, "learning_rate": 9.823507594190103e-06, "loss": 0.4603, "step": 905 }, { "epoch": 0.11, "grad_norm": 1.6827627319527105, "learning_rate": 9.822977953638772e-06, "loss": 0.5396, "step": 906 }, { "epoch": 0.11, "grad_norm": 1.3676057637250456, "learning_rate": 9.822447533892877e-06, "loss": 0.5059, "step": 907 }, { "epoch": 0.11, "grad_norm": 1.3258876354387583, "learning_rate": 9.821916335038111e-06, "loss": 0.5057, "step": 908 }, { "epoch": 0.11, "grad_norm": 134.66857492128472, "learning_rate": 9.821384357160292e-06, "loss": 0.5451, "step": 909 }, { "epoch": 0.11, "grad_norm": 1.2585093599662378, "learning_rate": 9.820851600345368e-06, "loss": 0.5558, "step": 910 }, { "epoch": 0.11, "grad_norm": 1.5964531920502647, "learning_rate": 9.820318064679408e-06, "loss": 0.505, "step": 911 }, { "epoch": 0.11, "grad_norm": 1.969482196985344, "learning_rate": 9.819783750248612e-06, "loss": 0.5582, "step": 912 }, { "epoch": 0.11, "grad_norm": 1.5323419980885993, "learning_rate": 9.819248657139304e-06, "loss": 0.5278, "step": 913 }, { "epoch": 0.11, "grad_norm": 7.449348072753027, "learning_rate": 9.818712785437929e-06, "loss": 0.5646, "step": 914 }, { "epoch": 0.11, "grad_norm": 5.113064908635709, "learning_rate": 9.818176135231066e-06, "loss": 0.5043, "step": 915 }, { "epoch": 0.11, "grad_norm": 1.46539588571356, "learning_rate": 9.817638706605413e-06, "loss": 0.5927, "step": 916 }, { "epoch": 0.11, "grad_norm": 1.9591832216721343, "learning_rate": 9.817100499647796e-06, "loss": 0.4954, "step": 917 }, { "epoch": 0.11, "grad_norm": 1.8904706076981188, "learning_rate": 9.81656151444517e-06, "loss": 0.5074, "step": 918 }, { "epoch": 0.11, "grad_norm": 1.603397229383552, "learning_rate": 9.81602175108461e-06, "loss": 0.5213, "step": 919 }, { "epoch": 0.11, "grad_norm": 1.4407858273805725, "learning_rate": 9.815481209653318e-06, "loss": 0.5073, "step": 920 }, { "epoch": 0.11, "grad_norm": 1.6780133192986095, "learning_rate": 9.814939890238629e-06, "loss": 0.5749, "step": 921 }, { "epoch": 0.11, "grad_norm": 1.3890417822540029, "learning_rate": 9.814397792927993e-06, "loss": 0.5103, "step": 922 }, { "epoch": 0.11, "grad_norm": 1.4123428104356073, "learning_rate": 9.813854917808993e-06, "loss": 0.511, "step": 923 }, { "epoch": 0.11, "grad_norm": 2.3239410306747974, "learning_rate": 9.813311264969333e-06, "loss": 0.5895, "step": 924 }, { "epoch": 0.11, "grad_norm": 1.6481482416750444, "learning_rate": 9.812766834496847e-06, "loss": 0.5619, "step": 925 }, { "epoch": 0.11, "grad_norm": 1.5682167709528458, "learning_rate": 9.812221626479493e-06, "loss": 0.5106, "step": 926 }, { "epoch": 0.12, "grad_norm": 2.108483178988802, "learning_rate": 9.811675641005352e-06, "loss": 0.4574, "step": 927 }, { "epoch": 0.12, "grad_norm": 1.272404284404087, "learning_rate": 9.811128878162633e-06, "loss": 0.5017, "step": 928 }, { "epoch": 0.12, "grad_norm": 2.229753751249011, "learning_rate": 9.81058133803967e-06, "loss": 0.4652, "step": 929 }, { "epoch": 0.12, "grad_norm": 3.8807442456609524, "learning_rate": 9.810033020724927e-06, "loss": 0.5045, "step": 930 }, { "epoch": 0.12, "grad_norm": 1.5325223466990254, "learning_rate": 9.809483926306984e-06, "loss": 0.5153, "step": 931 }, { "epoch": 0.12, "grad_norm": 1.293134868842404, "learning_rate": 9.808934054874557e-06, "loss": 0.4899, "step": 932 }, { "epoch": 0.12, "grad_norm": 1.398989709606525, "learning_rate": 9.808383406516478e-06, "loss": 0.5187, "step": 933 }, { "epoch": 0.12, "grad_norm": 2.071140988213683, "learning_rate": 9.807831981321711e-06, "loss": 0.5403, "step": 934 }, { "epoch": 0.12, "grad_norm": 0.6607542531288317, "learning_rate": 9.807279779379346e-06, "loss": 0.4914, "step": 935 }, { "epoch": 0.12, "grad_norm": 1.3850092138390073, "learning_rate": 9.806726800778592e-06, "loss": 0.4681, "step": 936 }, { "epoch": 0.12, "grad_norm": 1.5181764228685495, "learning_rate": 9.806173045608791e-06, "loss": 0.4813, "step": 937 }, { "epoch": 0.12, "grad_norm": 1.71479008444105, "learning_rate": 9.805618513959405e-06, "loss": 0.5298, "step": 938 }, { "epoch": 0.12, "grad_norm": 1.7241969898445435, "learning_rate": 9.805063205920024e-06, "loss": 0.5251, "step": 939 }, { "epoch": 0.12, "grad_norm": 1.4856505135312046, "learning_rate": 9.804507121580363e-06, "loss": 0.5439, "step": 940 }, { "epoch": 0.12, "grad_norm": 1.5218457627879594, "learning_rate": 9.80395026103026e-06, "loss": 0.538, "step": 941 }, { "epoch": 0.12, "grad_norm": 1.9835057866029318, "learning_rate": 9.803392624359686e-06, "loss": 0.5694, "step": 942 }, { "epoch": 0.12, "grad_norm": 1.7890943425296235, "learning_rate": 9.802834211658727e-06, "loss": 0.556, "step": 943 }, { "epoch": 0.12, "grad_norm": 1.823356520693511, "learning_rate": 9.802275023017603e-06, "loss": 0.4954, "step": 944 }, { "epoch": 0.12, "grad_norm": 2.778959746033984, "learning_rate": 9.801715058526654e-06, "loss": 0.5047, "step": 945 }, { "epoch": 0.12, "grad_norm": 1.5026611567465495, "learning_rate": 9.801154318276346e-06, "loss": 0.5611, "step": 946 }, { "epoch": 0.12, "grad_norm": 1.3920378820144184, "learning_rate": 9.800592802357274e-06, "loss": 0.5048, "step": 947 }, { "epoch": 0.12, "grad_norm": 1.5561386982115961, "learning_rate": 9.800030510860154e-06, "loss": 0.5821, "step": 948 }, { "epoch": 0.12, "grad_norm": 5.0569047792119814, "learning_rate": 9.799467443875832e-06, "loss": 0.4784, "step": 949 }, { "epoch": 0.12, "grad_norm": 1.5279500400553616, "learning_rate": 9.798903601495274e-06, "loss": 0.5358, "step": 950 }, { "epoch": 0.12, "grad_norm": 1.4977870060684855, "learning_rate": 9.798338983809574e-06, "loss": 0.5229, "step": 951 }, { "epoch": 0.12, "grad_norm": 1.7866661445205454, "learning_rate": 9.797773590909951e-06, "loss": 0.5753, "step": 952 }, { "epoch": 0.12, "grad_norm": 1.6653644954339666, "learning_rate": 9.797207422887749e-06, "loss": 0.5222, "step": 953 }, { "epoch": 0.12, "grad_norm": 1.4942644071101057, "learning_rate": 9.796640479834439e-06, "loss": 0.5648, "step": 954 }, { "epoch": 0.12, "grad_norm": 2.3212009783951033, "learning_rate": 9.796072761841614e-06, "loss": 0.4975, "step": 955 }, { "epoch": 0.12, "grad_norm": 2.3157046474966476, "learning_rate": 9.795504269000993e-06, "loss": 0.5508, "step": 956 }, { "epoch": 0.12, "grad_norm": 1.5525174218441946, "learning_rate": 9.794935001404424e-06, "loss": 0.5046, "step": 957 }, { "epoch": 0.12, "grad_norm": 1.5409578454278048, "learning_rate": 9.794364959143876e-06, "loss": 0.5008, "step": 958 }, { "epoch": 0.12, "grad_norm": 2.0286529491143623, "learning_rate": 9.793794142311442e-06, "loss": 0.5295, "step": 959 }, { "epoch": 0.12, "grad_norm": 1.735399312413163, "learning_rate": 9.793222550999343e-06, "loss": 0.4717, "step": 960 }, { "epoch": 0.12, "grad_norm": 3.364301097630389, "learning_rate": 9.79265018529993e-06, "loss": 0.5434, "step": 961 }, { "epoch": 0.12, "grad_norm": 0.6307910155590302, "learning_rate": 9.792077045305667e-06, "loss": 0.5009, "step": 962 }, { "epoch": 0.12, "grad_norm": 2.0261251017328084, "learning_rate": 9.791503131109153e-06, "loss": 0.5054, "step": 963 }, { "epoch": 0.12, "grad_norm": 1.8934827285210059, "learning_rate": 9.790928442803109e-06, "loss": 0.5408, "step": 964 }, { "epoch": 0.12, "grad_norm": 1.7390284854415334, "learning_rate": 9.79035298048038e-06, "loss": 0.5017, "step": 965 }, { "epoch": 0.12, "grad_norm": 1.5967519421160552, "learning_rate": 9.789776744233937e-06, "loss": 0.5105, "step": 966 }, { "epoch": 0.12, "grad_norm": 1.4326649427114473, "learning_rate": 9.789199734156876e-06, "loss": 0.5143, "step": 967 }, { "epoch": 0.12, "grad_norm": 2.0047895387472607, "learning_rate": 9.788621950342419e-06, "loss": 0.5886, "step": 968 }, { "epoch": 0.12, "grad_norm": 2.059058151775754, "learning_rate": 9.788043392883913e-06, "loss": 0.5324, "step": 969 }, { "epoch": 0.12, "grad_norm": 1.7462739610027802, "learning_rate": 9.787464061874826e-06, "loss": 0.5006, "step": 970 }, { "epoch": 0.12, "grad_norm": 1.440421328798765, "learning_rate": 9.786883957408755e-06, "loss": 0.5404, "step": 971 }, { "epoch": 0.12, "grad_norm": 1.4410596054166127, "learning_rate": 9.786303079579423e-06, "loss": 0.562, "step": 972 }, { "epoch": 0.12, "grad_norm": 1.3845526423586314, "learning_rate": 9.785721428480672e-06, "loss": 0.5482, "step": 973 }, { "epoch": 0.12, "grad_norm": 1.6455669709278569, "learning_rate": 9.785139004206478e-06, "loss": 0.5413, "step": 974 }, { "epoch": 0.12, "grad_norm": 2.965987983372974, "learning_rate": 9.784555806850932e-06, "loss": 0.546, "step": 975 }, { "epoch": 0.12, "grad_norm": 1.3477974890308504, "learning_rate": 9.783971836508258e-06, "loss": 0.5016, "step": 976 }, { "epoch": 0.12, "grad_norm": 1.4996179118803765, "learning_rate": 9.783387093272801e-06, "loss": 0.603, "step": 977 }, { "epoch": 0.12, "grad_norm": 1.6326168420110307, "learning_rate": 9.78280157723903e-06, "loss": 0.4822, "step": 978 }, { "epoch": 0.12, "grad_norm": 2.1691513988958606, "learning_rate": 9.782215288501541e-06, "loss": 0.5877, "step": 979 }, { "epoch": 0.12, "grad_norm": 1.3980271196527394, "learning_rate": 9.781628227155056e-06, "loss": 0.4766, "step": 980 }, { "epoch": 0.12, "grad_norm": 1.5816788788667602, "learning_rate": 9.781040393294415e-06, "loss": 0.5204, "step": 981 }, { "epoch": 0.12, "grad_norm": 1.4866512555478568, "learning_rate": 9.780451787014593e-06, "loss": 0.5519, "step": 982 }, { "epoch": 0.12, "grad_norm": 1.500045693403318, "learning_rate": 9.779862408410682e-06, "loss": 0.5559, "step": 983 }, { "epoch": 0.12, "grad_norm": 1.539271350449334, "learning_rate": 9.779272257577901e-06, "loss": 0.5247, "step": 984 }, { "epoch": 0.12, "grad_norm": 1.452860261604936, "learning_rate": 9.778681334611595e-06, "loss": 0.5641, "step": 985 }, { "epoch": 0.12, "grad_norm": 1.598342601973268, "learning_rate": 9.778089639607232e-06, "loss": 0.5046, "step": 986 }, { "epoch": 0.12, "grad_norm": 1.5798505955932483, "learning_rate": 9.777497172660406e-06, "loss": 0.4981, "step": 987 }, { "epoch": 0.12, "grad_norm": 2.4824775493039155, "learning_rate": 9.776903933866838e-06, "loss": 0.5306, "step": 988 }, { "epoch": 0.12, "grad_norm": 1.7456312083003678, "learning_rate": 9.776309923322365e-06, "loss": 0.5612, "step": 989 }, { "epoch": 0.12, "grad_norm": 2.0909503716589932, "learning_rate": 9.77571514112296e-06, "loss": 0.5093, "step": 990 }, { "epoch": 0.12, "grad_norm": 2.043487520816119, "learning_rate": 9.77511958736471e-06, "loss": 0.5899, "step": 991 }, { "epoch": 0.12, "grad_norm": 1.5009302169192755, "learning_rate": 9.774523262143838e-06, "loss": 0.5234, "step": 992 }, { "epoch": 0.12, "grad_norm": 1.693164828745711, "learning_rate": 9.773926165556682e-06, "loss": 0.5293, "step": 993 }, { "epoch": 0.12, "grad_norm": 1.320473825716359, "learning_rate": 9.773328297699709e-06, "loss": 0.4707, "step": 994 }, { "epoch": 0.12, "grad_norm": 1.493561778782454, "learning_rate": 9.772729658669511e-06, "loss": 0.5299, "step": 995 }, { "epoch": 0.12, "grad_norm": 1.3194409305794026, "learning_rate": 9.7721302485628e-06, "loss": 0.5391, "step": 996 }, { "epoch": 0.12, "grad_norm": 1.5548800287869815, "learning_rate": 9.77153006747642e-06, "loss": 0.5221, "step": 997 }, { "epoch": 0.12, "grad_norm": 1.3753921622169898, "learning_rate": 9.770929115507333e-06, "loss": 0.4613, "step": 998 }, { "epoch": 0.12, "grad_norm": 1.4793123657960134, "learning_rate": 9.770327392752627e-06, "loss": 0.5219, "step": 999 }, { "epoch": 0.12, "grad_norm": 1.6888355213538224, "learning_rate": 9.76972489930952e-06, "loss": 0.5256, "step": 1000 }, { "epoch": 0.12, "grad_norm": 3.510342478308107, "learning_rate": 9.769121635275348e-06, "loss": 0.506, "step": 1001 }, { "epoch": 0.12, "grad_norm": 1.3832178583611607, "learning_rate": 9.768517600747572e-06, "loss": 0.5308, "step": 1002 }, { "epoch": 0.12, "grad_norm": 1.3804901712551811, "learning_rate": 9.76791279582378e-06, "loss": 0.5085, "step": 1003 }, { "epoch": 0.12, "grad_norm": 1.5874172918864036, "learning_rate": 9.767307220601682e-06, "loss": 0.564, "step": 1004 }, { "epoch": 0.12, "grad_norm": 1.709225786964121, "learning_rate": 9.766700875179118e-06, "loss": 0.5527, "step": 1005 }, { "epoch": 0.12, "grad_norm": 1.6674120712422085, "learning_rate": 9.766093759654047e-06, "loss": 0.549, "step": 1006 }, { "epoch": 0.12, "grad_norm": 1.5771344976496824, "learning_rate": 9.765485874124551e-06, "loss": 0.541, "step": 1007 }, { "epoch": 0.13, "grad_norm": 2.0461275407031114, "learning_rate": 9.764877218688844e-06, "loss": 0.5979, "step": 1008 }, { "epoch": 0.13, "grad_norm": 1.5508383621666664, "learning_rate": 9.764267793445254e-06, "loss": 0.5072, "step": 1009 }, { "epoch": 0.13, "grad_norm": 1.5389952214975169, "learning_rate": 9.763657598492244e-06, "loss": 0.5267, "step": 1010 }, { "epoch": 0.13, "grad_norm": 2.19076555655587, "learning_rate": 9.763046633928394e-06, "loss": 0.5695, "step": 1011 }, { "epoch": 0.13, "grad_norm": 2.1357995975740542, "learning_rate": 9.762434899852412e-06, "loss": 0.5243, "step": 1012 }, { "epoch": 0.13, "grad_norm": 10.30912398608652, "learning_rate": 9.761822396363127e-06, "loss": 0.5061, "step": 1013 }, { "epoch": 0.13, "grad_norm": 1.7211057851844471, "learning_rate": 9.761209123559497e-06, "loss": 0.5614, "step": 1014 }, { "epoch": 0.13, "grad_norm": 1.6430505979593055, "learning_rate": 9.760595081540598e-06, "loss": 0.5224, "step": 1015 }, { "epoch": 0.13, "grad_norm": 1.7293605186403183, "learning_rate": 9.759980270405636e-06, "loss": 0.5426, "step": 1016 }, { "epoch": 0.13, "grad_norm": 0.6819165374186842, "learning_rate": 9.75936469025394e-06, "loss": 0.5263, "step": 1017 }, { "epoch": 0.13, "grad_norm": 3.0238097603809133, "learning_rate": 9.758748341184963e-06, "loss": 0.5028, "step": 1018 }, { "epoch": 0.13, "grad_norm": 10.498056742579276, "learning_rate": 9.758131223298277e-06, "loss": 0.4839, "step": 1019 }, { "epoch": 0.13, "grad_norm": 2.9442501356371493, "learning_rate": 9.757513336693589e-06, "loss": 0.4814, "step": 1020 }, { "epoch": 0.13, "grad_norm": 1.5512458163562854, "learning_rate": 9.756894681470718e-06, "loss": 0.4852, "step": 1021 }, { "epoch": 0.13, "grad_norm": 2.1008113097679795, "learning_rate": 9.756275257729619e-06, "loss": 0.5667, "step": 1022 }, { "epoch": 0.13, "grad_norm": 1.8076424691587238, "learning_rate": 9.755655065570363e-06, "loss": 0.5121, "step": 1023 }, { "epoch": 0.13, "grad_norm": 1.799319617461757, "learning_rate": 9.755034105093143e-06, "loss": 0.5469, "step": 1024 }, { "epoch": 0.13, "grad_norm": 2.364799714460202, "learning_rate": 9.754412376398288e-06, "loss": 0.5173, "step": 1025 }, { "epoch": 0.13, "grad_norm": 1.5452171924103493, "learning_rate": 9.75378987958624e-06, "loss": 0.5226, "step": 1026 }, { "epoch": 0.13, "grad_norm": 4.548293795499255, "learning_rate": 9.75316661475757e-06, "loss": 0.5025, "step": 1027 }, { "epoch": 0.13, "grad_norm": 1.5622088483551202, "learning_rate": 9.752542582012969e-06, "loss": 0.5667, "step": 1028 }, { "epoch": 0.13, "grad_norm": 3.2172447624890994, "learning_rate": 9.75191778145326e-06, "loss": 0.5161, "step": 1029 }, { "epoch": 0.13, "grad_norm": 1.5164628494052323, "learning_rate": 9.75129221317938e-06, "loss": 0.5504, "step": 1030 }, { "epoch": 0.13, "grad_norm": 1.803976492652808, "learning_rate": 9.750665877292399e-06, "loss": 0.5444, "step": 1031 }, { "epoch": 0.13, "grad_norm": 3.6078363814371985, "learning_rate": 9.750038773893504e-06, "loss": 0.4997, "step": 1032 }, { "epoch": 0.13, "grad_norm": 1.5506457096195898, "learning_rate": 9.749410903084012e-06, "loss": 0.5513, "step": 1033 }, { "epoch": 0.13, "grad_norm": 1.9688749331319362, "learning_rate": 9.748782264965358e-06, "loss": 0.5313, "step": 1034 }, { "epoch": 0.13, "grad_norm": 3.383066967658014, "learning_rate": 9.748152859639106e-06, "loss": 0.5261, "step": 1035 }, { "epoch": 0.13, "grad_norm": 1.39514071147194, "learning_rate": 9.74752268720694e-06, "loss": 0.4936, "step": 1036 }, { "epoch": 0.13, "grad_norm": 3.8765304882173792, "learning_rate": 9.746891747770674e-06, "loss": 0.5348, "step": 1037 }, { "epoch": 0.13, "grad_norm": 1.6546071258012456, "learning_rate": 9.746260041432238e-06, "loss": 0.5669, "step": 1038 }, { "epoch": 0.13, "grad_norm": 1.8137595354780747, "learning_rate": 9.745627568293692e-06, "loss": 0.5026, "step": 1039 }, { "epoch": 0.13, "grad_norm": 1.7045762249053962, "learning_rate": 9.744994328457216e-06, "loss": 0.5531, "step": 1040 }, { "epoch": 0.13, "grad_norm": 2.227903087362228, "learning_rate": 9.744360322025116e-06, "loss": 0.5391, "step": 1041 }, { "epoch": 0.13, "grad_norm": 1.685844631800516, "learning_rate": 9.743725549099821e-06, "loss": 0.4648, "step": 1042 }, { "epoch": 0.13, "grad_norm": 1.569940061174605, "learning_rate": 9.743090009783884e-06, "loss": 0.4944, "step": 1043 }, { "epoch": 0.13, "grad_norm": 6.723142959449064, "learning_rate": 9.742453704179984e-06, "loss": 0.5125, "step": 1044 }, { "epoch": 0.13, "grad_norm": 1.4912047218885114, "learning_rate": 9.74181663239092e-06, "loss": 0.4813, "step": 1045 }, { "epoch": 0.13, "grad_norm": 2.5235008521313596, "learning_rate": 9.741178794519617e-06, "loss": 0.4845, "step": 1046 }, { "epoch": 0.13, "grad_norm": 2.138918688090799, "learning_rate": 9.740540190669123e-06, "loss": 0.5212, "step": 1047 }, { "epoch": 0.13, "grad_norm": 1.6402131120648562, "learning_rate": 9.73990082094261e-06, "loss": 0.5325, "step": 1048 }, { "epoch": 0.13, "grad_norm": 2.3317651053778317, "learning_rate": 9.739260685443373e-06, "loss": 0.6036, "step": 1049 }, { "epoch": 0.13, "grad_norm": 1.870170924854555, "learning_rate": 9.738619784274833e-06, "loss": 0.547, "step": 1050 }, { "epoch": 0.13, "grad_norm": 1.645815534813316, "learning_rate": 9.737978117540535e-06, "loss": 0.5063, "step": 1051 }, { "epoch": 0.13, "grad_norm": 4.392607147245962, "learning_rate": 9.73733568534414e-06, "loss": 0.5087, "step": 1052 }, { "epoch": 0.13, "grad_norm": 3.5143165627174358, "learning_rate": 9.736692487789445e-06, "loss": 0.5406, "step": 1053 }, { "epoch": 0.13, "grad_norm": 2.8633855334581346, "learning_rate": 9.736048524980361e-06, "loss": 0.5339, "step": 1054 }, { "epoch": 0.13, "grad_norm": 2.4131426572227155, "learning_rate": 9.735403797020927e-06, "loss": 0.5163, "step": 1055 }, { "epoch": 0.13, "grad_norm": 1.7726441057919677, "learning_rate": 9.734758304015304e-06, "loss": 0.5178, "step": 1056 }, { "epoch": 0.13, "grad_norm": 3.4664637428741596, "learning_rate": 9.734112046067776e-06, "loss": 0.5636, "step": 1057 }, { "epoch": 0.13, "grad_norm": 2.250829141697953, "learning_rate": 9.733465023282752e-06, "loss": 0.5415, "step": 1058 }, { "epoch": 0.13, "grad_norm": 4.034243227211706, "learning_rate": 9.732817235764766e-06, "loss": 0.5273, "step": 1059 }, { "epoch": 0.13, "grad_norm": 2.4069863342768443, "learning_rate": 9.732168683618473e-06, "loss": 0.5037, "step": 1060 }, { "epoch": 0.13, "grad_norm": 1.9783336033315735, "learning_rate": 9.731519366948649e-06, "loss": 0.4826, "step": 1061 }, { "epoch": 0.13, "grad_norm": 2.083472842664245, "learning_rate": 9.730869285860203e-06, "loss": 0.539, "step": 1062 }, { "epoch": 0.13, "grad_norm": 3.1097580935012274, "learning_rate": 9.730218440458157e-06, "loss": 0.5468, "step": 1063 }, { "epoch": 0.13, "grad_norm": 3.532998375527931, "learning_rate": 9.729566830847662e-06, "loss": 0.5339, "step": 1064 }, { "epoch": 0.13, "grad_norm": 4.236358533233571, "learning_rate": 9.72891445713399e-06, "loss": 0.526, "step": 1065 }, { "epoch": 0.13, "grad_norm": 0.6568477345833922, "learning_rate": 9.728261319422541e-06, "loss": 0.5168, "step": 1066 }, { "epoch": 0.13, "grad_norm": 3.524033186735707, "learning_rate": 9.727607417818831e-06, "loss": 0.5217, "step": 1067 }, { "epoch": 0.13, "grad_norm": 5.71213752658569, "learning_rate": 9.726952752428509e-06, "loss": 0.5378, "step": 1068 }, { "epoch": 0.13, "grad_norm": 1.898260673307282, "learning_rate": 9.726297323357335e-06, "loss": 0.5476, "step": 1069 }, { "epoch": 0.13, "grad_norm": 1.4246224608948932, "learning_rate": 9.725641130711205e-06, "loss": 0.4839, "step": 1070 }, { "epoch": 0.13, "grad_norm": 1.7794987519402379, "learning_rate": 9.724984174596129e-06, "loss": 0.5076, "step": 1071 }, { "epoch": 0.13, "grad_norm": 2.9061616591589945, "learning_rate": 9.724326455118247e-06, "loss": 0.4956, "step": 1072 }, { "epoch": 0.13, "grad_norm": 2.173745750118815, "learning_rate": 9.723667972383816e-06, "loss": 0.6017, "step": 1073 }, { "epoch": 0.13, "grad_norm": 1.9698620736533579, "learning_rate": 9.723008726499224e-06, "loss": 0.5057, "step": 1074 }, { "epoch": 0.13, "grad_norm": 0.6466460434953547, "learning_rate": 9.722348717570974e-06, "loss": 0.4908, "step": 1075 }, { "epoch": 0.13, "grad_norm": 1.8438028852879214, "learning_rate": 9.7216879457057e-06, "loss": 0.5895, "step": 1076 }, { "epoch": 0.13, "grad_norm": 1.7790940990355575, "learning_rate": 9.721026411010152e-06, "loss": 0.5293, "step": 1077 }, { "epoch": 0.13, "grad_norm": 1.5031660420904411, "learning_rate": 9.720364113591209e-06, "loss": 0.5265, "step": 1078 }, { "epoch": 0.13, "grad_norm": 5.536355883265839, "learning_rate": 9.71970105355587e-06, "loss": 0.5356, "step": 1079 }, { "epoch": 0.13, "grad_norm": 0.7276800472528657, "learning_rate": 9.719037231011258e-06, "loss": 0.5216, "step": 1080 }, { "epoch": 0.13, "grad_norm": 1.5881462730538876, "learning_rate": 9.71837264606462e-06, "loss": 0.5356, "step": 1081 }, { "epoch": 0.13, "grad_norm": 1.6318467478851473, "learning_rate": 9.717707298823325e-06, "loss": 0.5273, "step": 1082 }, { "epoch": 0.13, "grad_norm": 2.9848151538178227, "learning_rate": 9.717041189394865e-06, "loss": 0.5267, "step": 1083 }, { "epoch": 0.13, "grad_norm": 0.6819293422314283, "learning_rate": 9.716374317886858e-06, "loss": 0.5276, "step": 1084 }, { "epoch": 0.13, "grad_norm": 1.8559724047291661, "learning_rate": 9.715706684407042e-06, "loss": 0.5191, "step": 1085 }, { "epoch": 0.13, "grad_norm": 1.5339688193120449, "learning_rate": 9.715038289063278e-06, "loss": 0.5334, "step": 1086 }, { "epoch": 0.13, "grad_norm": 2.5166992172797564, "learning_rate": 9.714369131963554e-06, "loss": 0.4737, "step": 1087 }, { "epoch": 0.14, "grad_norm": 2.134209639993174, "learning_rate": 9.713699213215974e-06, "loss": 0.5355, "step": 1088 }, { "epoch": 0.14, "grad_norm": 1.96269880054436, "learning_rate": 9.713028532928771e-06, "loss": 0.5655, "step": 1089 }, { "epoch": 0.14, "grad_norm": 1.8661433241028522, "learning_rate": 9.712357091210303e-06, "loss": 0.5408, "step": 1090 }, { "epoch": 0.14, "grad_norm": 1.6968495452565269, "learning_rate": 9.711684888169043e-06, "loss": 0.5566, "step": 1091 }, { "epoch": 0.14, "grad_norm": 1.7839624583219784, "learning_rate": 9.711011923913592e-06, "loss": 0.4723, "step": 1092 }, { "epoch": 0.14, "grad_norm": 1.9629701859367892, "learning_rate": 9.710338198552673e-06, "loss": 0.5142, "step": 1093 }, { "epoch": 0.14, "grad_norm": 2.614758849948944, "learning_rate": 9.709663712195134e-06, "loss": 0.5048, "step": 1094 }, { "epoch": 0.14, "grad_norm": 1.667464215660333, "learning_rate": 9.708988464949944e-06, "loss": 0.5172, "step": 1095 }, { "epoch": 0.14, "grad_norm": 3.7918874492859533, "learning_rate": 9.708312456926195e-06, "loss": 0.5401, "step": 1096 }, { "epoch": 0.14, "grad_norm": 1.7004882321052495, "learning_rate": 9.707635688233098e-06, "loss": 0.5092, "step": 1097 }, { "epoch": 0.14, "grad_norm": 1.701452526614193, "learning_rate": 9.706958158979997e-06, "loss": 0.5389, "step": 1098 }, { "epoch": 0.14, "grad_norm": 1.7636567128885436, "learning_rate": 9.70627986927635e-06, "loss": 0.6421, "step": 1099 }, { "epoch": 0.14, "grad_norm": 1.8276796485736944, "learning_rate": 9.705600819231743e-06, "loss": 0.5333, "step": 1100 }, { "epoch": 0.14, "grad_norm": 2.246268565950378, "learning_rate": 9.704921008955876e-06, "loss": 0.5561, "step": 1101 }, { "epoch": 0.14, "grad_norm": 1.438082199230945, "learning_rate": 9.704240438558585e-06, "loss": 0.569, "step": 1102 }, { "epoch": 0.14, "grad_norm": 1.9144248893253415, "learning_rate": 9.70355910814982e-06, "loss": 0.4746, "step": 1103 }, { "epoch": 0.14, "grad_norm": 1.9469849927167315, "learning_rate": 9.702877017839656e-06, "loss": 0.5467, "step": 1104 }, { "epoch": 0.14, "grad_norm": 1.645061673320711, "learning_rate": 9.70219416773829e-06, "loss": 0.5401, "step": 1105 }, { "epoch": 0.14, "grad_norm": 1.7672589402175505, "learning_rate": 9.701510557956041e-06, "loss": 0.5233, "step": 1106 }, { "epoch": 0.14, "grad_norm": 1.6562526037884509, "learning_rate": 9.700826188603358e-06, "loss": 0.573, "step": 1107 }, { "epoch": 0.14, "grad_norm": 2.1079545454951174, "learning_rate": 9.700141059790801e-06, "loss": 0.5971, "step": 1108 }, { "epoch": 0.14, "grad_norm": 1.484603231624597, "learning_rate": 9.699455171629063e-06, "loss": 0.5241, "step": 1109 }, { "epoch": 0.14, "grad_norm": 1.5334514844206923, "learning_rate": 9.69876852422895e-06, "loss": 0.4631, "step": 1110 }, { "epoch": 0.14, "grad_norm": 1.456284075939431, "learning_rate": 9.698081117701399e-06, "loss": 0.5309, "step": 1111 }, { "epoch": 0.14, "grad_norm": 3.1560014571082715, "learning_rate": 9.697392952157467e-06, "loss": 0.4962, "step": 1112 }, { "epoch": 0.14, "grad_norm": 1.6442324739481327, "learning_rate": 9.696704027708332e-06, "loss": 0.5049, "step": 1113 }, { "epoch": 0.14, "grad_norm": 1.7439283088142261, "learning_rate": 9.6960143444653e-06, "loss": 0.5787, "step": 1114 }, { "epoch": 0.14, "grad_norm": 1.8398447633635922, "learning_rate": 9.695323902539787e-06, "loss": 0.5191, "step": 1115 }, { "epoch": 0.14, "grad_norm": 1.6251227622842839, "learning_rate": 9.694632702043347e-06, "loss": 0.5029, "step": 1116 }, { "epoch": 0.14, "grad_norm": 5.79714755581544, "learning_rate": 9.693940743087647e-06, "loss": 0.5115, "step": 1117 }, { "epoch": 0.14, "grad_norm": 2.064944948011489, "learning_rate": 9.693248025784481e-06, "loss": 0.5948, "step": 1118 }, { "epoch": 0.14, "grad_norm": 2.179435891042551, "learning_rate": 9.692554550245759e-06, "loss": 0.55, "step": 1119 }, { "epoch": 0.14, "grad_norm": 4.815622421678713, "learning_rate": 9.691860316583523e-06, "loss": 0.4634, "step": 1120 }, { "epoch": 0.14, "grad_norm": 1.8473286275242924, "learning_rate": 9.69116532490993e-06, "loss": 0.4928, "step": 1121 }, { "epoch": 0.14, "grad_norm": 1.6666329256151406, "learning_rate": 9.690469575337265e-06, "loss": 0.5003, "step": 1122 }, { "epoch": 0.14, "grad_norm": 2.5464761651711085, "learning_rate": 9.689773067977927e-06, "loss": 0.4924, "step": 1123 }, { "epoch": 0.14, "grad_norm": 3.378718901137373, "learning_rate": 9.689075802944447e-06, "loss": 0.5337, "step": 1124 }, { "epoch": 0.14, "grad_norm": 2.8700837581036818, "learning_rate": 9.688377780349475e-06, "loss": 0.5231, "step": 1125 }, { "epoch": 0.14, "grad_norm": 1.7010701035393139, "learning_rate": 9.687679000305779e-06, "loss": 0.5754, "step": 1126 }, { "epoch": 0.14, "grad_norm": 2.6688092521075686, "learning_rate": 9.686979462926255e-06, "loss": 0.5792, "step": 1127 }, { "epoch": 0.14, "grad_norm": 1.8813021532141243, "learning_rate": 9.68627916832392e-06, "loss": 0.6095, "step": 1128 }, { "epoch": 0.14, "grad_norm": 1.6581996476541911, "learning_rate": 9.685578116611913e-06, "loss": 0.5042, "step": 1129 }, { "epoch": 0.14, "grad_norm": 1.6582970311756564, "learning_rate": 9.684876307903495e-06, "loss": 0.4988, "step": 1130 }, { "epoch": 0.14, "grad_norm": 1.683884028197148, "learning_rate": 9.684173742312047e-06, "loss": 0.4801, "step": 1131 }, { "epoch": 0.14, "grad_norm": 3.588948092366968, "learning_rate": 9.683470419951076e-06, "loss": 0.5665, "step": 1132 }, { "epoch": 0.14, "grad_norm": 2.310523817864352, "learning_rate": 9.682766340934212e-06, "loss": 0.5613, "step": 1133 }, { "epoch": 0.14, "grad_norm": 4.195517805689883, "learning_rate": 9.682061505375203e-06, "loss": 0.5476, "step": 1134 }, { "epoch": 0.14, "grad_norm": 1.6244317571837932, "learning_rate": 9.681355913387921e-06, "loss": 0.5026, "step": 1135 }, { "epoch": 0.14, "grad_norm": 1.8925554979075394, "learning_rate": 9.680649565086363e-06, "loss": 0.5229, "step": 1136 }, { "epoch": 0.14, "grad_norm": 1.632376881931433, "learning_rate": 9.679942460584643e-06, "loss": 0.5724, "step": 1137 }, { "epoch": 0.14, "grad_norm": 0.7020674525959631, "learning_rate": 9.679234599997003e-06, "loss": 0.4889, "step": 1138 }, { "epoch": 0.14, "grad_norm": 1.5508086290219265, "learning_rate": 9.6785259834378e-06, "loss": 0.5535, "step": 1139 }, { "epoch": 0.14, "grad_norm": 2.3720437102762606, "learning_rate": 9.677816611021522e-06, "loss": 0.5221, "step": 1140 }, { "epoch": 0.14, "grad_norm": 3.6902927516019997, "learning_rate": 9.67710648286277e-06, "loss": 0.5167, "step": 1141 }, { "epoch": 0.14, "grad_norm": 2.430557957008761, "learning_rate": 9.676395599076274e-06, "loss": 0.4993, "step": 1142 }, { "epoch": 0.14, "grad_norm": 1.8064802832707425, "learning_rate": 9.675683959776883e-06, "loss": 0.4976, "step": 1143 }, { "epoch": 0.14, "grad_norm": 1.7692889961512195, "learning_rate": 9.67497156507957e-06, "loss": 0.6078, "step": 1144 }, { "epoch": 0.14, "grad_norm": 0.7279284571335646, "learning_rate": 9.674258415099424e-06, "loss": 0.4917, "step": 1145 }, { "epoch": 0.14, "grad_norm": 1.5671149456296096, "learning_rate": 9.673544509951666e-06, "loss": 0.5279, "step": 1146 }, { "epoch": 0.14, "grad_norm": 2.3614047258344826, "learning_rate": 9.672829849751633e-06, "loss": 0.5778, "step": 1147 }, { "epoch": 0.14, "grad_norm": 1.6605177543197422, "learning_rate": 9.67211443461478e-06, "loss": 0.5539, "step": 1148 }, { "epoch": 0.14, "grad_norm": 1.745652249143334, "learning_rate": 9.671398264656693e-06, "loss": 0.5044, "step": 1149 }, { "epoch": 0.14, "grad_norm": 1.6403940468245404, "learning_rate": 9.670681339993076e-06, "loss": 0.5688, "step": 1150 }, { "epoch": 0.14, "grad_norm": 1.3705504539528528, "learning_rate": 9.669963660739749e-06, "loss": 0.4915, "step": 1151 }, { "epoch": 0.14, "grad_norm": 2.0052403405897046, "learning_rate": 9.669245227012667e-06, "loss": 0.5709, "step": 1152 }, { "epoch": 0.14, "grad_norm": 1.555499863113405, "learning_rate": 9.668526038927895e-06, "loss": 0.4766, "step": 1153 }, { "epoch": 0.14, "grad_norm": 1.531025831393635, "learning_rate": 9.667806096601621e-06, "loss": 0.4946, "step": 1154 }, { "epoch": 0.14, "grad_norm": 2.0443285220787972, "learning_rate": 9.667085400150167e-06, "loss": 0.4962, "step": 1155 }, { "epoch": 0.14, "grad_norm": 1.4424983450814588, "learning_rate": 9.666363949689959e-06, "loss": 0.4927, "step": 1156 }, { "epoch": 0.14, "grad_norm": 2.059446725914697, "learning_rate": 9.665641745337558e-06, "loss": 0.5343, "step": 1157 }, { "epoch": 0.14, "grad_norm": 2.0802914443470293, "learning_rate": 9.664918787209643e-06, "loss": 0.532, "step": 1158 }, { "epoch": 0.14, "grad_norm": 0.6472887207129542, "learning_rate": 9.664195075423011e-06, "loss": 0.4588, "step": 1159 }, { "epoch": 0.14, "grad_norm": 1.7292441767378381, "learning_rate": 9.663470610094587e-06, "loss": 0.5606, "step": 1160 }, { "epoch": 0.14, "grad_norm": 1.788898862305559, "learning_rate": 9.662745391341415e-06, "loss": 0.513, "step": 1161 }, { "epoch": 0.14, "grad_norm": 1.5309024684748662, "learning_rate": 9.662019419280659e-06, "loss": 0.5694, "step": 1162 }, { "epoch": 0.14, "grad_norm": 1.4995683508223077, "learning_rate": 9.661292694029605e-06, "loss": 0.5268, "step": 1163 }, { "epoch": 0.14, "grad_norm": 1.7653072066574216, "learning_rate": 9.660565215705664e-06, "loss": 0.5659, "step": 1164 }, { "epoch": 0.14, "grad_norm": 1.963524493317451, "learning_rate": 9.659836984426366e-06, "loss": 0.4975, "step": 1165 }, { "epoch": 0.14, "grad_norm": 1.6558752769707101, "learning_rate": 9.65910800030936e-06, "loss": 0.4612, "step": 1166 }, { "epoch": 0.14, "grad_norm": 1.569688065552612, "learning_rate": 9.658378263472428e-06, "loss": 0.5306, "step": 1167 }, { "epoch": 0.14, "grad_norm": 1.600377545926572, "learning_rate": 9.657647774033456e-06, "loss": 0.5281, "step": 1168 }, { "epoch": 0.15, "grad_norm": 2.094413769681627, "learning_rate": 9.656916532110468e-06, "loss": 0.4827, "step": 1169 }, { "epoch": 0.15, "grad_norm": 2.4195357712238534, "learning_rate": 9.656184537821598e-06, "loss": 0.4979, "step": 1170 }, { "epoch": 0.15, "grad_norm": 1.5100123439164768, "learning_rate": 9.655451791285108e-06, "loss": 0.5261, "step": 1171 }, { "epoch": 0.15, "grad_norm": 1.688940664076167, "learning_rate": 9.65471829261938e-06, "loss": 0.5062, "step": 1172 }, { "epoch": 0.15, "grad_norm": 1.5548426778604, "learning_rate": 9.653984041942917e-06, "loss": 0.5516, "step": 1173 }, { "epoch": 0.15, "grad_norm": 1.7100786513962827, "learning_rate": 9.653249039374344e-06, "loss": 0.5542, "step": 1174 }, { "epoch": 0.15, "grad_norm": 1.6241572009144818, "learning_rate": 9.652513285032406e-06, "loss": 0.4834, "step": 1175 }, { "epoch": 0.15, "grad_norm": 1.6386928401709666, "learning_rate": 9.65177677903597e-06, "loss": 0.5102, "step": 1176 }, { "epoch": 0.15, "grad_norm": 1.6867664454182212, "learning_rate": 9.651039521504026e-06, "loss": 0.5255, "step": 1177 }, { "epoch": 0.15, "grad_norm": 3.4139399172397975, "learning_rate": 9.650301512555687e-06, "loss": 0.5622, "step": 1178 }, { "epoch": 0.15, "grad_norm": 4.094308688561519, "learning_rate": 9.64956275231018e-06, "loss": 0.4882, "step": 1179 }, { "epoch": 0.15, "grad_norm": 1.865048965284112, "learning_rate": 9.648823240886862e-06, "loss": 0.5096, "step": 1180 }, { "epoch": 0.15, "grad_norm": 2.494701863132983, "learning_rate": 9.648082978405207e-06, "loss": 0.5809, "step": 1181 }, { "epoch": 0.15, "grad_norm": 0.6793676392905069, "learning_rate": 9.647341964984808e-06, "loss": 0.5012, "step": 1182 }, { "epoch": 0.15, "grad_norm": 5.213005790580897, "learning_rate": 9.646600200745386e-06, "loss": 0.5641, "step": 1183 }, { "epoch": 0.15, "grad_norm": 0.695252467817391, "learning_rate": 9.645857685806776e-06, "loss": 0.4946, "step": 1184 }, { "epoch": 0.15, "grad_norm": 1.6872888667684667, "learning_rate": 9.645114420288943e-06, "loss": 0.5504, "step": 1185 }, { "epoch": 0.15, "grad_norm": 2.1474607546107864, "learning_rate": 9.644370404311962e-06, "loss": 0.5554, "step": 1186 }, { "epoch": 0.15, "grad_norm": 2.65613332157712, "learning_rate": 9.64362563799604e-06, "loss": 0.5397, "step": 1187 }, { "epoch": 0.15, "grad_norm": 2.1984130027521513, "learning_rate": 9.642880121461498e-06, "loss": 0.5274, "step": 1188 }, { "epoch": 0.15, "grad_norm": 1.9276657533703743, "learning_rate": 9.642133854828782e-06, "loss": 0.5502, "step": 1189 }, { "epoch": 0.15, "grad_norm": 0.6981372675873604, "learning_rate": 9.641386838218457e-06, "loss": 0.471, "step": 1190 }, { "epoch": 0.15, "grad_norm": 2.117793484644697, "learning_rate": 9.640639071751211e-06, "loss": 0.5068, "step": 1191 }, { "epoch": 0.15, "grad_norm": 2.737088935356651, "learning_rate": 9.639890555547851e-06, "loss": 0.5434, "step": 1192 }, { "epoch": 0.15, "grad_norm": 2.215032646390322, "learning_rate": 9.639141289729308e-06, "loss": 0.5475, "step": 1193 }, { "epoch": 0.15, "grad_norm": 8.586242568006778, "learning_rate": 9.638391274416631e-06, "loss": 0.5123, "step": 1194 }, { "epoch": 0.15, "grad_norm": 0.7004319954959887, "learning_rate": 9.637640509730994e-06, "loss": 0.5152, "step": 1195 }, { "epoch": 0.15, "grad_norm": 5.300052946091436, "learning_rate": 9.636888995793688e-06, "loss": 0.516, "step": 1196 }, { "epoch": 0.15, "grad_norm": 3.3913417939583823, "learning_rate": 9.636136732726125e-06, "loss": 0.5321, "step": 1197 }, { "epoch": 0.15, "grad_norm": 2.6905813420549474, "learning_rate": 9.635383720649842e-06, "loss": 0.5162, "step": 1198 }, { "epoch": 0.15, "grad_norm": 2.1986911451055486, "learning_rate": 9.634629959686495e-06, "loss": 0.5013, "step": 1199 }, { "epoch": 0.15, "grad_norm": 4.310535302411778, "learning_rate": 9.633875449957858e-06, "loss": 0.535, "step": 1200 }, { "epoch": 0.15, "grad_norm": 1.8547606195531325, "learning_rate": 9.633120191585831e-06, "loss": 0.51, "step": 1201 }, { "epoch": 0.15, "grad_norm": 2.613164494224369, "learning_rate": 9.632364184692433e-06, "loss": 0.5094, "step": 1202 }, { "epoch": 0.15, "grad_norm": 2.85274118986579, "learning_rate": 9.631607429399804e-06, "loss": 0.5153, "step": 1203 }, { "epoch": 0.15, "grad_norm": 2.5773498083449433, "learning_rate": 9.6308499258302e-06, "loss": 0.5393, "step": 1204 }, { "epoch": 0.15, "grad_norm": 3.010815599427557, "learning_rate": 9.630091674106007e-06, "loss": 0.5093, "step": 1205 }, { "epoch": 0.15, "grad_norm": 2.9770650666755865, "learning_rate": 9.629332674349726e-06, "loss": 0.4783, "step": 1206 }, { "epoch": 0.15, "grad_norm": 3.437068958047755, "learning_rate": 9.62857292668398e-06, "loss": 0.5322, "step": 1207 }, { "epoch": 0.15, "grad_norm": 4.74042908492775, "learning_rate": 9.627812431231513e-06, "loss": 0.5359, "step": 1208 }, { "epoch": 0.15, "grad_norm": 2.01476278074651, "learning_rate": 9.627051188115188e-06, "loss": 0.5211, "step": 1209 }, { "epoch": 0.15, "grad_norm": 4.8731844210119135, "learning_rate": 9.626289197457994e-06, "loss": 0.5703, "step": 1210 }, { "epoch": 0.15, "grad_norm": 2.3127041969062683, "learning_rate": 9.625526459383036e-06, "loss": 0.5378, "step": 1211 }, { "epoch": 0.15, "grad_norm": 2.2867488219449634, "learning_rate": 9.62476297401354e-06, "loss": 0.5546, "step": 1212 }, { "epoch": 0.15, "grad_norm": 2.5360675967832114, "learning_rate": 9.623998741472853e-06, "loss": 0.4797, "step": 1213 }, { "epoch": 0.15, "grad_norm": 2.0113735199718814, "learning_rate": 9.623233761884445e-06, "loss": 0.5743, "step": 1214 }, { "epoch": 0.15, "grad_norm": 2.367006214996671, "learning_rate": 9.622468035371905e-06, "loss": 0.5069, "step": 1215 }, { "epoch": 0.15, "grad_norm": 3.009616179978305, "learning_rate": 9.621701562058945e-06, "loss": 0.5268, "step": 1216 }, { "epoch": 0.15, "grad_norm": 2.7213470140986056, "learning_rate": 9.620934342069391e-06, "loss": 0.5008, "step": 1217 }, { "epoch": 0.15, "grad_norm": 1.8416928579180722, "learning_rate": 9.620166375527199e-06, "loss": 0.5389, "step": 1218 }, { "epoch": 0.15, "grad_norm": 3.5360768675242755, "learning_rate": 9.619397662556434e-06, "loss": 0.5154, "step": 1219 }, { "epoch": 0.15, "grad_norm": 2.666612843631181, "learning_rate": 9.618628203281295e-06, "loss": 0.5828, "step": 1220 }, { "epoch": 0.15, "grad_norm": 2.2125291127077475, "learning_rate": 9.617857997826093e-06, "loss": 0.5387, "step": 1221 }, { "epoch": 0.15, "grad_norm": 4.7546061712011785, "learning_rate": 9.617087046315261e-06, "loss": 0.4793, "step": 1222 }, { "epoch": 0.15, "grad_norm": 2.464835676400265, "learning_rate": 9.616315348873351e-06, "loss": 0.5424, "step": 1223 }, { "epoch": 0.15, "grad_norm": 8.164795916642207, "learning_rate": 9.615542905625041e-06, "loss": 0.5119, "step": 1224 }, { "epoch": 0.15, "grad_norm": 2.6484271639304557, "learning_rate": 9.614769716695124e-06, "loss": 0.5667, "step": 1225 }, { "epoch": 0.15, "grad_norm": 5.150313277541799, "learning_rate": 9.613995782208519e-06, "loss": 0.5262, "step": 1226 }, { "epoch": 0.15, "grad_norm": 2.321639794941608, "learning_rate": 9.613221102290256e-06, "loss": 0.4829, "step": 1227 }, { "epoch": 0.15, "grad_norm": 2.6141589182441907, "learning_rate": 9.612445677065494e-06, "loss": 0.5041, "step": 1228 }, { "epoch": 0.15, "grad_norm": 1.9570658209637812, "learning_rate": 9.611669506659512e-06, "loss": 0.5886, "step": 1229 }, { "epoch": 0.15, "grad_norm": 2.8923861340904082, "learning_rate": 9.610892591197702e-06, "loss": 0.5248, "step": 1230 }, { "epoch": 0.15, "grad_norm": 2.8984705127022097, "learning_rate": 9.610114930805588e-06, "loss": 0.5049, "step": 1231 }, { "epoch": 0.15, "grad_norm": 2.4040263275304556, "learning_rate": 9.609336525608804e-06, "loss": 0.5476, "step": 1232 }, { "epoch": 0.15, "grad_norm": 2.5329760895843587, "learning_rate": 9.608557375733108e-06, "loss": 0.5361, "step": 1233 }, { "epoch": 0.15, "grad_norm": 2.4091340353572606, "learning_rate": 9.607777481304378e-06, "loss": 0.5241, "step": 1234 }, { "epoch": 0.15, "grad_norm": 4.083298586043196, "learning_rate": 9.606996842448617e-06, "loss": 0.5274, "step": 1235 }, { "epoch": 0.15, "grad_norm": 4.131902615128241, "learning_rate": 9.60621545929194e-06, "loss": 0.5581, "step": 1236 }, { "epoch": 0.15, "grad_norm": 0.7455567595837368, "learning_rate": 9.605433331960589e-06, "loss": 0.4963, "step": 1237 }, { "epoch": 0.15, "grad_norm": 3.2914875837784163, "learning_rate": 9.60465046058092e-06, "loss": 0.4893, "step": 1238 }, { "epoch": 0.15, "grad_norm": 3.2680055610154803, "learning_rate": 9.603866845279416e-06, "loss": 0.547, "step": 1239 }, { "epoch": 0.15, "grad_norm": 2.3527127987284717, "learning_rate": 9.603082486182677e-06, "loss": 0.5201, "step": 1240 }, { "epoch": 0.15, "grad_norm": 3.8932230576608493, "learning_rate": 9.60229738341742e-06, "loss": 0.5067, "step": 1241 }, { "epoch": 0.15, "grad_norm": 4.521453010399267, "learning_rate": 9.601511537110488e-06, "loss": 0.5039, "step": 1242 }, { "epoch": 0.15, "grad_norm": 2.902197545583407, "learning_rate": 9.600724947388842e-06, "loss": 0.5102, "step": 1243 }, { "epoch": 0.15, "grad_norm": 3.8936990939314446, "learning_rate": 9.59993761437956e-06, "loss": 0.4766, "step": 1244 }, { "epoch": 0.15, "grad_norm": 1.818935057937169, "learning_rate": 9.599149538209844e-06, "loss": 0.4479, "step": 1245 }, { "epoch": 0.15, "grad_norm": 2.4841266281922247, "learning_rate": 9.598360719007014e-06, "loss": 0.5502, "step": 1246 }, { "epoch": 0.15, "grad_norm": 0.6573299803407252, "learning_rate": 9.597571156898512e-06, "loss": 0.4948, "step": 1247 }, { "epoch": 0.15, "grad_norm": 1.9694935567001643, "learning_rate": 9.596780852011898e-06, "loss": 0.5235, "step": 1248 }, { "epoch": 0.16, "grad_norm": 1.7258316103612048, "learning_rate": 9.59598980447485e-06, "loss": 0.5048, "step": 1249 }, { "epoch": 0.16, "grad_norm": 2.4025233679193496, "learning_rate": 9.595198014415175e-06, "loss": 0.4647, "step": 1250 }, { "epoch": 0.16, "grad_norm": 1.9544947136772115, "learning_rate": 9.594405481960788e-06, "loss": 0.5649, "step": 1251 }, { "epoch": 0.16, "grad_norm": 1.9777545383227084, "learning_rate": 9.593612207239731e-06, "loss": 0.5665, "step": 1252 }, { "epoch": 0.16, "grad_norm": 2.117499822077094, "learning_rate": 9.592818190380164e-06, "loss": 0.5135, "step": 1253 }, { "epoch": 0.16, "grad_norm": 7.076146244025416, "learning_rate": 9.59202343151037e-06, "loss": 0.5269, "step": 1254 }, { "epoch": 0.16, "grad_norm": 3.223126647867816, "learning_rate": 9.591227930758747e-06, "loss": 0.5216, "step": 1255 }, { "epoch": 0.16, "grad_norm": 2.0328596241189216, "learning_rate": 9.590431688253816e-06, "loss": 0.5427, "step": 1256 }, { "epoch": 0.16, "grad_norm": 2.6556957352883486, "learning_rate": 9.589634704124218e-06, "loss": 0.5067, "step": 1257 }, { "epoch": 0.16, "grad_norm": 2.804364284260751, "learning_rate": 9.58883697849871e-06, "loss": 0.5287, "step": 1258 }, { "epoch": 0.16, "grad_norm": 1.5480925200101965, "learning_rate": 9.588038511506174e-06, "loss": 0.45, "step": 1259 }, { "epoch": 0.16, "grad_norm": 2.730298843291153, "learning_rate": 9.587239303275609e-06, "loss": 0.5133, "step": 1260 }, { "epoch": 0.16, "grad_norm": 4.908091031732794, "learning_rate": 9.586439353936134e-06, "loss": 0.4956, "step": 1261 }, { "epoch": 0.16, "grad_norm": 2.708475563781002, "learning_rate": 9.585638663616988e-06, "loss": 0.5518, "step": 1262 }, { "epoch": 0.16, "grad_norm": 1.9115489553355347, "learning_rate": 9.584837232447528e-06, "loss": 0.484, "step": 1263 }, { "epoch": 0.16, "grad_norm": 2.6690112469223255, "learning_rate": 9.584035060557232e-06, "loss": 0.558, "step": 1264 }, { "epoch": 0.16, "grad_norm": 1.8645618150965557, "learning_rate": 9.583232148075704e-06, "loss": 0.5433, "step": 1265 }, { "epoch": 0.16, "grad_norm": 1.6006910031055772, "learning_rate": 9.582428495132652e-06, "loss": 0.5044, "step": 1266 }, { "epoch": 0.16, "grad_norm": 1.643665291170957, "learning_rate": 9.58162410185792e-06, "loss": 0.5421, "step": 1267 }, { "epoch": 0.16, "grad_norm": 2.0755796639475856, "learning_rate": 9.580818968381465e-06, "loss": 0.5329, "step": 1268 }, { "epoch": 0.16, "grad_norm": 1.4024091543968857, "learning_rate": 9.580013094833358e-06, "loss": 0.5128, "step": 1269 }, { "epoch": 0.16, "grad_norm": 1.4212575894723896, "learning_rate": 9.579206481343802e-06, "loss": 0.4955, "step": 1270 }, { "epoch": 0.16, "grad_norm": 1.5678792635077088, "learning_rate": 9.578399128043106e-06, "loss": 0.4909, "step": 1271 }, { "epoch": 0.16, "grad_norm": 2.2412182086972967, "learning_rate": 9.577591035061709e-06, "loss": 0.4752, "step": 1272 }, { "epoch": 0.16, "grad_norm": 2.0582740833905353, "learning_rate": 9.576782202530164e-06, "loss": 0.5094, "step": 1273 }, { "epoch": 0.16, "grad_norm": 2.1595951744380097, "learning_rate": 9.575972630579147e-06, "loss": 0.4988, "step": 1274 }, { "epoch": 0.16, "grad_norm": 1.7575760712452722, "learning_rate": 9.575162319339448e-06, "loss": 0.5597, "step": 1275 }, { "epoch": 0.16, "grad_norm": 2.1055777822631323, "learning_rate": 9.574351268941982e-06, "loss": 0.5209, "step": 1276 }, { "epoch": 0.16, "grad_norm": 2.8901163453768444, "learning_rate": 9.573539479517782e-06, "loss": 0.5642, "step": 1277 }, { "epoch": 0.16, "grad_norm": 1.781553074512771, "learning_rate": 9.572726951198e-06, "loss": 0.5262, "step": 1278 }, { "epoch": 0.16, "grad_norm": 1.9014220500004342, "learning_rate": 9.571913684113905e-06, "loss": 0.5311, "step": 1279 }, { "epoch": 0.16, "grad_norm": 1.5394281191849384, "learning_rate": 9.571099678396886e-06, "loss": 0.5251, "step": 1280 }, { "epoch": 0.16, "grad_norm": 2.749150285162678, "learning_rate": 9.57028493417846e-06, "loss": 0.558, "step": 1281 }, { "epoch": 0.16, "grad_norm": 2.194358414181551, "learning_rate": 9.569469451590248e-06, "loss": 0.5159, "step": 1282 }, { "epoch": 0.16, "grad_norm": 1.3419010510041824, "learning_rate": 9.568653230764003e-06, "loss": 0.4754, "step": 1283 }, { "epoch": 0.16, "grad_norm": 1.60353026025585, "learning_rate": 9.567836271831592e-06, "loss": 0.5184, "step": 1284 }, { "epoch": 0.16, "grad_norm": 2.08991136852093, "learning_rate": 9.567018574925e-06, "loss": 0.5461, "step": 1285 }, { "epoch": 0.16, "grad_norm": 3.3027449836301668, "learning_rate": 9.566200140176336e-06, "loss": 0.5126, "step": 1286 }, { "epoch": 0.16, "grad_norm": 1.675273115727022, "learning_rate": 9.565380967717824e-06, "loss": 0.5403, "step": 1287 }, { "epoch": 0.16, "grad_norm": 1.5043081964292935, "learning_rate": 9.564561057681805e-06, "loss": 0.4648, "step": 1288 }, { "epoch": 0.16, "grad_norm": 3.3349823760180684, "learning_rate": 9.56374041020075e-06, "loss": 0.5427, "step": 1289 }, { "epoch": 0.16, "grad_norm": 2.211619642804284, "learning_rate": 9.562919025407236e-06, "loss": 0.5283, "step": 1290 }, { "epoch": 0.16, "grad_norm": 1.45557876575767, "learning_rate": 9.562096903433968e-06, "loss": 0.5412, "step": 1291 }, { "epoch": 0.16, "grad_norm": 2.9953449011412303, "learning_rate": 9.561274044413764e-06, "loss": 0.5083, "step": 1292 }, { "epoch": 0.16, "grad_norm": 11.668342208345377, "learning_rate": 9.560450448479567e-06, "loss": 0.5149, "step": 1293 }, { "epoch": 0.16, "grad_norm": 2.170669371423883, "learning_rate": 9.559626115764437e-06, "loss": 0.5424, "step": 1294 }, { "epoch": 0.16, "grad_norm": 1.794732304031168, "learning_rate": 9.558801046401547e-06, "loss": 0.5584, "step": 1295 }, { "epoch": 0.16, "grad_norm": 1.8864192744126171, "learning_rate": 9.5579752405242e-06, "loss": 0.5109, "step": 1296 }, { "epoch": 0.16, "grad_norm": 1.9524520373710526, "learning_rate": 9.55714869826581e-06, "loss": 0.5622, "step": 1297 }, { "epoch": 0.16, "grad_norm": 1.6924629447195305, "learning_rate": 9.55632141975991e-06, "loss": 0.5265, "step": 1298 }, { "epoch": 0.16, "grad_norm": 1.5101536120787722, "learning_rate": 9.555493405140158e-06, "loss": 0.5426, "step": 1299 }, { "epoch": 0.16, "grad_norm": 1.4822593254375265, "learning_rate": 9.554664654540324e-06, "loss": 0.5498, "step": 1300 }, { "epoch": 0.16, "grad_norm": 2.1051335057769935, "learning_rate": 9.553835168094302e-06, "loss": 0.5347, "step": 1301 }, { "epoch": 0.16, "grad_norm": 1.7821949833507682, "learning_rate": 9.553004945936101e-06, "loss": 0.5628, "step": 1302 }, { "epoch": 0.16, "grad_norm": 0.7230312544660243, "learning_rate": 9.552173988199854e-06, "loss": 0.5047, "step": 1303 }, { "epoch": 0.16, "grad_norm": 4.525803056649358, "learning_rate": 9.551342295019805e-06, "loss": 0.5135, "step": 1304 }, { "epoch": 0.16, "grad_norm": 0.6930037398549997, "learning_rate": 9.550509866530323e-06, "loss": 0.4996, "step": 1305 }, { "epoch": 0.16, "grad_norm": 1.5104572978125144, "learning_rate": 9.549676702865897e-06, "loss": 0.5371, "step": 1306 }, { "epoch": 0.16, "grad_norm": 1.8562563426485768, "learning_rate": 9.54884280416113e-06, "loss": 0.5612, "step": 1307 }, { "epoch": 0.16, "grad_norm": 1.926794889658595, "learning_rate": 9.548008170550744e-06, "loss": 0.4897, "step": 1308 }, { "epoch": 0.16, "grad_norm": 1.639247567223645, "learning_rate": 9.547172802169582e-06, "loss": 0.5497, "step": 1309 }, { "epoch": 0.16, "grad_norm": 2.8091300839987694, "learning_rate": 9.546336699152608e-06, "loss": 0.5176, "step": 1310 }, { "epoch": 0.16, "grad_norm": 1.9440447348284993, "learning_rate": 9.545499861634897e-06, "loss": 0.5783, "step": 1311 }, { "epoch": 0.16, "grad_norm": 1.8076948729636986, "learning_rate": 9.544662289751651e-06, "loss": 0.5092, "step": 1312 }, { "epoch": 0.16, "grad_norm": 6.715210485185207, "learning_rate": 9.543823983638187e-06, "loss": 0.4853, "step": 1313 }, { "epoch": 0.16, "grad_norm": 3.513563901986961, "learning_rate": 9.54298494342994e-06, "loss": 0.5144, "step": 1314 }, { "epoch": 0.16, "grad_norm": 2.3014768871227727, "learning_rate": 9.542145169262465e-06, "loss": 0.5047, "step": 1315 }, { "epoch": 0.16, "grad_norm": 2.8557169997552467, "learning_rate": 9.541304661271433e-06, "loss": 0.5442, "step": 1316 }, { "epoch": 0.16, "grad_norm": 2.638193691189023, "learning_rate": 9.540463419592638e-06, "loss": 0.5737, "step": 1317 }, { "epoch": 0.16, "grad_norm": 1.4331612514067824, "learning_rate": 9.539621444361988e-06, "loss": 0.4952, "step": 1318 }, { "epoch": 0.16, "grad_norm": 1.814975821494965, "learning_rate": 9.538778735715512e-06, "loss": 0.5214, "step": 1319 }, { "epoch": 0.16, "grad_norm": 1.6358206258929424, "learning_rate": 9.537935293789357e-06, "loss": 0.5852, "step": 1320 }, { "epoch": 0.16, "grad_norm": 11.785035433589641, "learning_rate": 9.53709111871979e-06, "loss": 0.4918, "step": 1321 }, { "epoch": 0.16, "grad_norm": 2.7894871619403805, "learning_rate": 9.536246210643192e-06, "loss": 0.4917, "step": 1322 }, { "epoch": 0.16, "grad_norm": 0.6960296087976123, "learning_rate": 9.535400569696068e-06, "loss": 0.4808, "step": 1323 }, { "epoch": 0.16, "grad_norm": 1.6707368064014259, "learning_rate": 9.534554196015038e-06, "loss": 0.5208, "step": 1324 }, { "epoch": 0.16, "grad_norm": 4.418989617577566, "learning_rate": 9.53370708973684e-06, "loss": 0.5419, "step": 1325 }, { "epoch": 0.16, "grad_norm": 2.9542552706872764, "learning_rate": 9.532859250998332e-06, "loss": 0.5268, "step": 1326 }, { "epoch": 0.16, "grad_norm": 2.109108500431418, "learning_rate": 9.532010679936491e-06, "loss": 0.5321, "step": 1327 }, { "epoch": 0.16, "grad_norm": 2.40541389320721, "learning_rate": 9.53116137668841e-06, "loss": 0.5123, "step": 1328 }, { "epoch": 0.16, "grad_norm": 4.120180365123593, "learning_rate": 9.530311341391303e-06, "loss": 0.5523, "step": 1329 }, { "epoch": 0.17, "grad_norm": 1.837416018100702, "learning_rate": 9.529460574182498e-06, "loss": 0.5293, "step": 1330 }, { "epoch": 0.17, "grad_norm": 1.8130170198172144, "learning_rate": 9.528609075199445e-06, "loss": 0.5078, "step": 1331 }, { "epoch": 0.17, "grad_norm": 3.5671538001377203, "learning_rate": 9.527756844579711e-06, "loss": 0.5376, "step": 1332 }, { "epoch": 0.17, "grad_norm": 1.7851923640009364, "learning_rate": 9.526903882460983e-06, "loss": 0.5495, "step": 1333 }, { "epoch": 0.17, "grad_norm": 2.058190778408725, "learning_rate": 9.526050188981064e-06, "loss": 0.488, "step": 1334 }, { "epoch": 0.17, "grad_norm": 1.9634306303556182, "learning_rate": 9.525195764277874e-06, "loss": 0.5185, "step": 1335 }, { "epoch": 0.17, "grad_norm": 2.032739483474685, "learning_rate": 9.524340608489454e-06, "loss": 0.5238, "step": 1336 }, { "epoch": 0.17, "grad_norm": 3.412786853743459, "learning_rate": 9.523484721753961e-06, "loss": 0.4834, "step": 1337 }, { "epoch": 0.17, "grad_norm": 0.7243055388004805, "learning_rate": 9.522628104209675e-06, "loss": 0.5075, "step": 1338 }, { "epoch": 0.17, "grad_norm": 2.4597665228729926, "learning_rate": 9.521770755994983e-06, "loss": 0.5515, "step": 1339 }, { "epoch": 0.17, "grad_norm": 1.616644720048126, "learning_rate": 9.520912677248403e-06, "loss": 0.5475, "step": 1340 }, { "epoch": 0.17, "grad_norm": 1.5366952523712116, "learning_rate": 9.520053868108566e-06, "loss": 0.4744, "step": 1341 }, { "epoch": 0.17, "grad_norm": 3.09729427720111, "learning_rate": 9.519194328714214e-06, "loss": 0.5455, "step": 1342 }, { "epoch": 0.17, "grad_norm": 3.556155394623956, "learning_rate": 9.518334059204218e-06, "loss": 0.5228, "step": 1343 }, { "epoch": 0.17, "grad_norm": 3.2820341211515673, "learning_rate": 9.517473059717559e-06, "loss": 0.5088, "step": 1344 }, { "epoch": 0.17, "grad_norm": 1.7841649432175868, "learning_rate": 9.516611330393343e-06, "loss": 0.499, "step": 1345 }, { "epoch": 0.17, "grad_norm": 2.3170294309282338, "learning_rate": 9.515748871370786e-06, "loss": 0.5223, "step": 1346 }, { "epoch": 0.17, "grad_norm": 1.7675365545954447, "learning_rate": 9.51488568278923e-06, "loss": 0.5589, "step": 1347 }, { "epoch": 0.17, "grad_norm": 2.876530940821112, "learning_rate": 9.514021764788127e-06, "loss": 0.5056, "step": 1348 }, { "epoch": 0.17, "grad_norm": 1.8763300559518927, "learning_rate": 9.513157117507053e-06, "loss": 0.5009, "step": 1349 }, { "epoch": 0.17, "grad_norm": 2.299660676483901, "learning_rate": 9.512291741085696e-06, "loss": 0.4943, "step": 1350 }, { "epoch": 0.17, "grad_norm": 1.8836219482182772, "learning_rate": 9.51142563566387e-06, "loss": 0.5369, "step": 1351 }, { "epoch": 0.17, "grad_norm": 1.7009943354735515, "learning_rate": 9.510558801381497e-06, "loss": 0.5177, "step": 1352 }, { "epoch": 0.17, "grad_norm": 2.007229156877643, "learning_rate": 9.509691238378626e-06, "loss": 0.5089, "step": 1353 }, { "epoch": 0.17, "grad_norm": 1.4387259770576408, "learning_rate": 9.508822946795417e-06, "loss": 0.4878, "step": 1354 }, { "epoch": 0.17, "grad_norm": 3.1516983884115723, "learning_rate": 9.507953926772152e-06, "loss": 0.5281, "step": 1355 }, { "epoch": 0.17, "grad_norm": 1.8865060786578052, "learning_rate": 9.507084178449226e-06, "loss": 0.5245, "step": 1356 }, { "epoch": 0.17, "grad_norm": 1.8621271424152859, "learning_rate": 9.506213701967157e-06, "loss": 0.5745, "step": 1357 }, { "epoch": 0.17, "grad_norm": 1.6162835398610804, "learning_rate": 9.505342497466577e-06, "loss": 0.5083, "step": 1358 }, { "epoch": 0.17, "grad_norm": 1.9539800242472292, "learning_rate": 9.504470565088237e-06, "loss": 0.5211, "step": 1359 }, { "epoch": 0.17, "grad_norm": 1.8163097041032972, "learning_rate": 9.503597904973006e-06, "loss": 0.5399, "step": 1360 }, { "epoch": 0.17, "grad_norm": 0.6481196635818798, "learning_rate": 9.502724517261867e-06, "loss": 0.4951, "step": 1361 }, { "epoch": 0.17, "grad_norm": 1.7434840971002281, "learning_rate": 9.50185040209593e-06, "loss": 0.4897, "step": 1362 }, { "epoch": 0.17, "grad_norm": 1.938999967638018, "learning_rate": 9.500975559616407e-06, "loss": 0.5322, "step": 1363 }, { "epoch": 0.17, "grad_norm": 1.5166896975682522, "learning_rate": 9.500099989964644e-06, "loss": 0.4906, "step": 1364 }, { "epoch": 0.17, "grad_norm": 0.6493103354228393, "learning_rate": 9.499223693282095e-06, "loss": 0.4824, "step": 1365 }, { "epoch": 0.17, "grad_norm": 1.5580997421435645, "learning_rate": 9.498346669710331e-06, "loss": 0.5484, "step": 1366 }, { "epoch": 0.17, "grad_norm": 1.5722533449608065, "learning_rate": 9.497468919391046e-06, "loss": 0.5286, "step": 1367 }, { "epoch": 0.17, "grad_norm": 2.1379581735023523, "learning_rate": 9.496590442466045e-06, "loss": 0.5285, "step": 1368 }, { "epoch": 0.17, "grad_norm": 6.004542983588922, "learning_rate": 9.495711239077256e-06, "loss": 0.5383, "step": 1369 }, { "epoch": 0.17, "grad_norm": 1.723423923699892, "learning_rate": 9.494831309366723e-06, "loss": 0.5023, "step": 1370 }, { "epoch": 0.17, "grad_norm": 7.294621546390402, "learning_rate": 9.493950653476604e-06, "loss": 0.4739, "step": 1371 }, { "epoch": 0.17, "grad_norm": 1.8962630752434209, "learning_rate": 9.493069271549179e-06, "loss": 0.5278, "step": 1372 }, { "epoch": 0.17, "grad_norm": 1.2888590584106818, "learning_rate": 9.49218716372684e-06, "loss": 0.4836, "step": 1373 }, { "epoch": 0.17, "grad_norm": 1.7787949561950795, "learning_rate": 9.491304330152102e-06, "loss": 0.5112, "step": 1374 }, { "epoch": 0.17, "grad_norm": 2.1784795200831315, "learning_rate": 9.490420770967594e-06, "loss": 0.5267, "step": 1375 }, { "epoch": 0.17, "grad_norm": 1.6375490466954763, "learning_rate": 9.489536486316062e-06, "loss": 0.5242, "step": 1376 }, { "epoch": 0.17, "grad_norm": 1.477956447346823, "learning_rate": 9.48865147634037e-06, "loss": 0.5442, "step": 1377 }, { "epoch": 0.17, "grad_norm": 1.409881186281856, "learning_rate": 9.487765741183499e-06, "loss": 0.4991, "step": 1378 }, { "epoch": 0.17, "grad_norm": 3.6800854486941126, "learning_rate": 9.486879280988548e-06, "loss": 0.5077, "step": 1379 }, { "epoch": 0.17, "grad_norm": 1.9714553284132872, "learning_rate": 9.485992095898734e-06, "loss": 0.4876, "step": 1380 }, { "epoch": 0.17, "grad_norm": 1.5844507509035346, "learning_rate": 9.485104186057386e-06, "loss": 0.4705, "step": 1381 }, { "epoch": 0.17, "grad_norm": 1.4796161279871252, "learning_rate": 9.484215551607956e-06, "loss": 0.4945, "step": 1382 }, { "epoch": 0.17, "grad_norm": 1.3891117518032612, "learning_rate": 9.48332619269401e-06, "loss": 0.5054, "step": 1383 }, { "epoch": 0.17, "grad_norm": 1.7929379067706905, "learning_rate": 9.482436109459231e-06, "loss": 0.5061, "step": 1384 }, { "epoch": 0.17, "grad_norm": 1.6640724080747038, "learning_rate": 9.481545302047423e-06, "loss": 0.502, "step": 1385 }, { "epoch": 0.17, "grad_norm": 1.5469445671131699, "learning_rate": 9.480653770602502e-06, "loss": 0.5031, "step": 1386 }, { "epoch": 0.17, "grad_norm": 1.356349412795799, "learning_rate": 9.479761515268499e-06, "loss": 0.5271, "step": 1387 }, { "epoch": 0.17, "grad_norm": 1.44417765309099, "learning_rate": 9.478868536189571e-06, "loss": 0.4856, "step": 1388 }, { "epoch": 0.17, "grad_norm": 3.332946182241321, "learning_rate": 9.477974833509984e-06, "loss": 0.5198, "step": 1389 }, { "epoch": 0.17, "grad_norm": 1.7290837012935454, "learning_rate": 9.477080407374124e-06, "loss": 0.4916, "step": 1390 }, { "epoch": 0.17, "grad_norm": 4.4563147780501335, "learning_rate": 9.476185257926496e-06, "loss": 0.5489, "step": 1391 }, { "epoch": 0.17, "grad_norm": 1.5845936119723913, "learning_rate": 9.475289385311714e-06, "loss": 0.5404, "step": 1392 }, { "epoch": 0.17, "grad_norm": 0.6851537462201612, "learning_rate": 9.474392789674517e-06, "loss": 0.4969, "step": 1393 }, { "epoch": 0.17, "grad_norm": 1.2924924777343387, "learning_rate": 9.473495471159759e-06, "loss": 0.4973, "step": 1394 }, { "epoch": 0.17, "grad_norm": 1.8189207932754312, "learning_rate": 9.472597429912409e-06, "loss": 0.5301, "step": 1395 }, { "epoch": 0.17, "grad_norm": 3.9522121006208017, "learning_rate": 9.471698666077554e-06, "loss": 0.5599, "step": 1396 }, { "epoch": 0.17, "grad_norm": 3.654001619422945, "learning_rate": 9.470799179800393e-06, "loss": 0.513, "step": 1397 }, { "epoch": 0.17, "grad_norm": 1.4693946073677817, "learning_rate": 9.469898971226251e-06, "loss": 0.4943, "step": 1398 }, { "epoch": 0.17, "grad_norm": 1.352280469067633, "learning_rate": 9.468998040500563e-06, "loss": 0.4851, "step": 1399 }, { "epoch": 0.17, "grad_norm": 1.713410517258502, "learning_rate": 9.468096387768882e-06, "loss": 0.4895, "step": 1400 }, { "epoch": 0.17, "grad_norm": 0.6359837268312648, "learning_rate": 9.467194013176878e-06, "loss": 0.4622, "step": 1401 }, { "epoch": 0.17, "grad_norm": 1.9106064409196937, "learning_rate": 9.466290916870338e-06, "loss": 0.5316, "step": 1402 }, { "epoch": 0.17, "grad_norm": 1.5652596983399456, "learning_rate": 9.465387098995165e-06, "loss": 0.524, "step": 1403 }, { "epoch": 0.17, "grad_norm": 1.4641608430759605, "learning_rate": 9.464482559697377e-06, "loss": 0.5306, "step": 1404 }, { "epoch": 0.17, "grad_norm": 2.365277486999446, "learning_rate": 9.463577299123113e-06, "loss": 0.4959, "step": 1405 }, { "epoch": 0.17, "grad_norm": 1.3531053244732796, "learning_rate": 9.462671317418625e-06, "loss": 0.4777, "step": 1406 }, { "epoch": 0.17, "grad_norm": 1.529796907855802, "learning_rate": 9.461764614730282e-06, "loss": 0.5073, "step": 1407 }, { "epoch": 0.17, "grad_norm": 1.6037050306107454, "learning_rate": 9.46085719120457e-06, "loss": 0.5345, "step": 1408 }, { "epoch": 0.17, "grad_norm": 4.184384273671617, "learning_rate": 9.459949046988089e-06, "loss": 0.5283, "step": 1409 }, { "epoch": 0.17, "grad_norm": 1.971143226559445, "learning_rate": 9.459040182227561e-06, "loss": 0.4918, "step": 1410 }, { "epoch": 0.18, "grad_norm": 1.6162454601784557, "learning_rate": 9.458130597069818e-06, "loss": 0.5123, "step": 1411 }, { "epoch": 0.18, "grad_norm": 2.187548515299508, "learning_rate": 9.457220291661817e-06, "loss": 0.6018, "step": 1412 }, { "epoch": 0.18, "grad_norm": 1.417545035108629, "learning_rate": 9.456309266150621e-06, "loss": 0.5043, "step": 1413 }, { "epoch": 0.18, "grad_norm": 1.6683483655573892, "learning_rate": 9.455397520683414e-06, "loss": 0.5066, "step": 1414 }, { "epoch": 0.18, "grad_norm": 1.8267071607296352, "learning_rate": 9.454485055407498e-06, "loss": 0.5395, "step": 1415 }, { "epoch": 0.18, "grad_norm": 2.2769052311188753, "learning_rate": 9.45357187047029e-06, "loss": 0.5016, "step": 1416 }, { "epoch": 0.18, "grad_norm": 1.5160384864380638, "learning_rate": 9.452657966019324e-06, "loss": 0.5127, "step": 1417 }, { "epoch": 0.18, "grad_norm": 1.5904869300889308, "learning_rate": 9.451743342202248e-06, "loss": 0.5364, "step": 1418 }, { "epoch": 0.18, "grad_norm": 1.6504050121743716, "learning_rate": 9.450827999166825e-06, "loss": 0.5016, "step": 1419 }, { "epoch": 0.18, "grad_norm": 1.5705800991617183, "learning_rate": 9.449911937060943e-06, "loss": 0.5539, "step": 1420 }, { "epoch": 0.18, "grad_norm": 1.4645945472912039, "learning_rate": 9.448995156032595e-06, "loss": 0.5034, "step": 1421 }, { "epoch": 0.18, "grad_norm": 1.4074577541628757, "learning_rate": 9.448077656229895e-06, "loss": 0.5257, "step": 1422 }, { "epoch": 0.18, "grad_norm": 1.6932730921411916, "learning_rate": 9.447159437801074e-06, "loss": 0.525, "step": 1423 }, { "epoch": 0.18, "grad_norm": 1.508238112135693, "learning_rate": 9.44624050089448e-06, "loss": 0.5297, "step": 1424 }, { "epoch": 0.18, "grad_norm": 1.386080118480711, "learning_rate": 9.445320845658574e-06, "loss": 0.4918, "step": 1425 }, { "epoch": 0.18, "grad_norm": 1.7120372086794187, "learning_rate": 9.444400472241934e-06, "loss": 0.4962, "step": 1426 }, { "epoch": 0.18, "grad_norm": 1.2643237638504257, "learning_rate": 9.443479380793256e-06, "loss": 0.5, "step": 1427 }, { "epoch": 0.18, "grad_norm": 0.6849918060542648, "learning_rate": 9.44255757146135e-06, "loss": 0.5596, "step": 1428 }, { "epoch": 0.18, "grad_norm": 1.7220988937001047, "learning_rate": 9.44163504439514e-06, "loss": 0.5398, "step": 1429 }, { "epoch": 0.18, "grad_norm": 1.7402435053337213, "learning_rate": 9.44071179974367e-06, "loss": 0.5265, "step": 1430 }, { "epoch": 0.18, "grad_norm": 1.7330985961158227, "learning_rate": 9.4397878376561e-06, "loss": 0.4891, "step": 1431 }, { "epoch": 0.18, "grad_norm": 1.9559886571410015, "learning_rate": 9.438863158281702e-06, "loss": 0.5233, "step": 1432 }, { "epoch": 0.18, "grad_norm": 1.6463205429913006, "learning_rate": 9.437937761769867e-06, "loss": 0.5349, "step": 1433 }, { "epoch": 0.18, "grad_norm": 1.6663696669907717, "learning_rate": 9.4370116482701e-06, "loss": 0.5131, "step": 1434 }, { "epoch": 0.18, "grad_norm": 0.7039126134585667, "learning_rate": 9.436084817932023e-06, "loss": 0.537, "step": 1435 }, { "epoch": 0.18, "grad_norm": 1.5876792100322659, "learning_rate": 9.435157270905375e-06, "loss": 0.5268, "step": 1436 }, { "epoch": 0.18, "grad_norm": 1.4258461786668768, "learning_rate": 9.434229007340008e-06, "loss": 0.4963, "step": 1437 }, { "epoch": 0.18, "grad_norm": 1.899112215769487, "learning_rate": 9.433300027385891e-06, "loss": 0.5093, "step": 1438 }, { "epoch": 0.18, "grad_norm": 0.5966334451115977, "learning_rate": 9.432370331193112e-06, "loss": 0.506, "step": 1439 }, { "epoch": 0.18, "grad_norm": 2.2001297668171955, "learning_rate": 9.43143991891187e-06, "loss": 0.5542, "step": 1440 }, { "epoch": 0.18, "grad_norm": 1.5979829933620182, "learning_rate": 9.430508790692477e-06, "loss": 0.5353, "step": 1441 }, { "epoch": 0.18, "grad_norm": 1.3938257090441009, "learning_rate": 9.429576946685369e-06, "loss": 0.5508, "step": 1442 }, { "epoch": 0.18, "grad_norm": 1.887500531029625, "learning_rate": 9.428644387041094e-06, "loss": 0.5216, "step": 1443 }, { "epoch": 0.18, "grad_norm": 3.830904789433637, "learning_rate": 9.427711111910314e-06, "loss": 0.559, "step": 1444 }, { "epoch": 0.18, "grad_norm": 1.4963688236773471, "learning_rate": 9.42677712144381e-06, "loss": 0.5282, "step": 1445 }, { "epoch": 0.18, "grad_norm": 1.4798735024063236, "learning_rate": 9.42584241579247e-06, "loss": 0.5549, "step": 1446 }, { "epoch": 0.18, "grad_norm": 1.2502644942892267, "learning_rate": 9.424906995107312e-06, "loss": 0.541, "step": 1447 }, { "epoch": 0.18, "grad_norm": 1.3223247871109725, "learning_rate": 9.423970859539456e-06, "loss": 0.4921, "step": 1448 }, { "epoch": 0.18, "grad_norm": 1.891800694340005, "learning_rate": 9.423034009240146e-06, "loss": 0.4867, "step": 1449 }, { "epoch": 0.18, "grad_norm": 1.5131892594414695, "learning_rate": 9.422096444360736e-06, "loss": 0.5101, "step": 1450 }, { "epoch": 0.18, "grad_norm": 1.4027176765940803, "learning_rate": 9.4211581650527e-06, "loss": 0.4721, "step": 1451 }, { "epoch": 0.18, "grad_norm": 2.05923787696074, "learning_rate": 9.420219171467624e-06, "loss": 0.5484, "step": 1452 }, { "epoch": 0.18, "grad_norm": 1.6191945144015785, "learning_rate": 9.419279463757212e-06, "loss": 0.5314, "step": 1453 }, { "epoch": 0.18, "grad_norm": 1.5430874657099605, "learning_rate": 9.41833904207328e-06, "loss": 0.5428, "step": 1454 }, { "epoch": 0.18, "grad_norm": 1.544150626032831, "learning_rate": 9.417397906567762e-06, "loss": 0.5192, "step": 1455 }, { "epoch": 0.18, "grad_norm": 1.828887008965527, "learning_rate": 9.41645605739271e-06, "loss": 0.5674, "step": 1456 }, { "epoch": 0.18, "grad_norm": 1.446962550807306, "learning_rate": 9.415513494700281e-06, "loss": 0.5534, "step": 1457 }, { "epoch": 0.18, "grad_norm": 1.409505263960533, "learning_rate": 9.414570218642762e-06, "loss": 0.5118, "step": 1458 }, { "epoch": 0.18, "grad_norm": 1.9473100463811759, "learning_rate": 9.413626229372543e-06, "loss": 0.5302, "step": 1459 }, { "epoch": 0.18, "grad_norm": 2.0115230441559584, "learning_rate": 9.412681527042135e-06, "loss": 0.5832, "step": 1460 }, { "epoch": 0.18, "grad_norm": 2.504650850157876, "learning_rate": 9.411736111804161e-06, "loss": 0.5084, "step": 1461 }, { "epoch": 0.18, "grad_norm": 1.5976807919316702, "learning_rate": 9.410789983811366e-06, "loss": 0.5124, "step": 1462 }, { "epoch": 0.18, "grad_norm": 1.7259305630841915, "learning_rate": 9.409843143216602e-06, "loss": 0.5163, "step": 1463 }, { "epoch": 0.18, "grad_norm": 1.3109812844968418, "learning_rate": 9.408895590172837e-06, "loss": 0.5135, "step": 1464 }, { "epoch": 0.18, "grad_norm": 1.501449859949129, "learning_rate": 9.407947324833161e-06, "loss": 0.5308, "step": 1465 }, { "epoch": 0.18, "grad_norm": 1.8953543710540945, "learning_rate": 9.406998347350774e-06, "loss": 0.4843, "step": 1466 }, { "epoch": 0.18, "grad_norm": 0.7027642277487309, "learning_rate": 9.40604865787899e-06, "loss": 0.4771, "step": 1467 }, { "epoch": 0.18, "grad_norm": 1.7233924413310757, "learning_rate": 9.40509825657124e-06, "loss": 0.5098, "step": 1468 }, { "epoch": 0.18, "grad_norm": 0.7107652884518568, "learning_rate": 9.40414714358107e-06, "loss": 0.5001, "step": 1469 }, { "epoch": 0.18, "grad_norm": 1.582631591668211, "learning_rate": 9.403195319062142e-06, "loss": 0.5248, "step": 1470 }, { "epoch": 0.18, "grad_norm": 2.387342543665346, "learning_rate": 9.402242783168228e-06, "loss": 0.5538, "step": 1471 }, { "epoch": 0.18, "grad_norm": 0.672928018604899, "learning_rate": 9.401289536053223e-06, "loss": 0.5351, "step": 1472 }, { "epoch": 0.18, "grad_norm": 1.9915864472791334, "learning_rate": 9.400335577871128e-06, "loss": 0.5635, "step": 1473 }, { "epoch": 0.18, "grad_norm": 1.6520622595751824, "learning_rate": 9.399380908776068e-06, "loss": 0.5298, "step": 1474 }, { "epoch": 0.18, "grad_norm": 1.931210566761977, "learning_rate": 9.398425528922275e-06, "loss": 0.558, "step": 1475 }, { "epoch": 0.18, "grad_norm": 2.7686227437021595, "learning_rate": 9.3974694384641e-06, "loss": 0.5278, "step": 1476 }, { "epoch": 0.18, "grad_norm": 1.5142014550507987, "learning_rate": 9.396512637556007e-06, "loss": 0.4852, "step": 1477 }, { "epoch": 0.18, "grad_norm": 2.121450635344311, "learning_rate": 9.395555126352576e-06, "loss": 0.5849, "step": 1478 }, { "epoch": 0.18, "grad_norm": 1.9456554463436584, "learning_rate": 9.394596905008504e-06, "loss": 0.5376, "step": 1479 }, { "epoch": 0.18, "grad_norm": 1.697792582575253, "learning_rate": 9.393637973678595e-06, "loss": 0.5283, "step": 1480 }, { "epoch": 0.18, "grad_norm": 3.311532952026007, "learning_rate": 9.392678332517778e-06, "loss": 0.5285, "step": 1481 }, { "epoch": 0.18, "grad_norm": 1.6887015599370656, "learning_rate": 9.391717981681089e-06, "loss": 0.5606, "step": 1482 }, { "epoch": 0.18, "grad_norm": 1.8749079141161675, "learning_rate": 9.39075692132368e-06, "loss": 0.5518, "step": 1483 }, { "epoch": 0.18, "grad_norm": 2.5186622420467013, "learning_rate": 9.38979515160082e-06, "loss": 0.5646, "step": 1484 }, { "epoch": 0.18, "grad_norm": 1.5302094559262753, "learning_rate": 9.388832672667893e-06, "loss": 0.5038, "step": 1485 }, { "epoch": 0.18, "grad_norm": 1.7591373713642573, "learning_rate": 9.387869484680395e-06, "loss": 0.5247, "step": 1486 }, { "epoch": 0.18, "grad_norm": 1.9377967760499746, "learning_rate": 9.386905587793933e-06, "loss": 0.5463, "step": 1487 }, { "epoch": 0.18, "grad_norm": 1.3403669028777074, "learning_rate": 9.385940982164239e-06, "loss": 0.5146, "step": 1488 }, { "epoch": 0.18, "grad_norm": 1.811749023747539, "learning_rate": 9.384975667947152e-06, "loss": 0.5511, "step": 1489 }, { "epoch": 0.18, "grad_norm": 4.577574886181875, "learning_rate": 9.384009645298627e-06, "loss": 0.5508, "step": 1490 }, { "epoch": 0.19, "grad_norm": 5.301454623132674, "learning_rate": 9.383042914374731e-06, "loss": 0.5493, "step": 1491 }, { "epoch": 0.19, "grad_norm": 2.4533170252188423, "learning_rate": 9.382075475331652e-06, "loss": 0.5466, "step": 1492 }, { "epoch": 0.19, "grad_norm": 2.5421032500320355, "learning_rate": 9.381107328325683e-06, "loss": 0.52, "step": 1493 }, { "epoch": 0.19, "grad_norm": 4.352474522527251, "learning_rate": 9.380138473513241e-06, "loss": 0.4819, "step": 1494 }, { "epoch": 0.19, "grad_norm": 2.3336804024823565, "learning_rate": 9.379168911050853e-06, "loss": 0.5604, "step": 1495 }, { "epoch": 0.19, "grad_norm": 2.450935676102443, "learning_rate": 9.378198641095159e-06, "loss": 0.5465, "step": 1496 }, { "epoch": 0.19, "grad_norm": 1.8670887608442013, "learning_rate": 9.377227663802913e-06, "loss": 0.5355, "step": 1497 }, { "epoch": 0.19, "grad_norm": 0.8171098999218793, "learning_rate": 9.376255979330988e-06, "loss": 0.5302, "step": 1498 }, { "epoch": 0.19, "grad_norm": 2.2358050289181253, "learning_rate": 9.375283587836368e-06, "loss": 0.5323, "step": 1499 }, { "epoch": 0.19, "grad_norm": 3.586997073057225, "learning_rate": 9.374310489476149e-06, "loss": 0.5512, "step": 1500 }, { "epoch": 0.19, "grad_norm": 4.55579447130064, "learning_rate": 9.373336684407545e-06, "loss": 0.5447, "step": 1501 }, { "epoch": 0.19, "grad_norm": 1.6129422186439102, "learning_rate": 9.372362172787882e-06, "loss": 0.4549, "step": 1502 }, { "epoch": 0.19, "grad_norm": 6.055117530737087, "learning_rate": 9.371386954774603e-06, "loss": 0.5521, "step": 1503 }, { "epoch": 0.19, "grad_norm": 1.6137472317864316, "learning_rate": 9.370411030525261e-06, "loss": 0.4941, "step": 1504 }, { "epoch": 0.19, "grad_norm": 0.6825693446935227, "learning_rate": 9.369434400197526e-06, "loss": 0.4765, "step": 1505 }, { "epoch": 0.19, "grad_norm": 3.0047785918869723, "learning_rate": 9.36845706394918e-06, "loss": 0.4718, "step": 1506 }, { "epoch": 0.19, "grad_norm": 2.184404353395833, "learning_rate": 9.367479021938123e-06, "loss": 0.5639, "step": 1507 }, { "epoch": 0.19, "grad_norm": 3.128493573070691, "learning_rate": 9.366500274322365e-06, "loss": 0.5322, "step": 1508 }, { "epoch": 0.19, "grad_norm": 2.2077003304423544, "learning_rate": 9.36552082126003e-06, "loss": 0.5479, "step": 1509 }, { "epoch": 0.19, "grad_norm": 2.5001203755032226, "learning_rate": 9.364540662909358e-06, "loss": 0.5705, "step": 1510 }, { "epoch": 0.19, "grad_norm": 1.706994629835939, "learning_rate": 9.363559799428704e-06, "loss": 0.5147, "step": 1511 }, { "epoch": 0.19, "grad_norm": 1.9071940375807923, "learning_rate": 9.362578230976532e-06, "loss": 0.5358, "step": 1512 }, { "epoch": 0.19, "grad_norm": 2.199072784364784, "learning_rate": 9.361595957711425e-06, "loss": 0.5274, "step": 1513 }, { "epoch": 0.19, "grad_norm": 3.0735830354933347, "learning_rate": 9.360612979792078e-06, "loss": 0.5311, "step": 1514 }, { "epoch": 0.19, "grad_norm": 2.2066166573545036, "learning_rate": 9.3596292973773e-06, "loss": 0.4923, "step": 1515 }, { "epoch": 0.19, "grad_norm": 2.4159872038811874, "learning_rate": 9.358644910626012e-06, "loss": 0.54, "step": 1516 }, { "epoch": 0.19, "grad_norm": 2.479018280513656, "learning_rate": 9.35765981969725e-06, "loss": 0.5287, "step": 1517 }, { "epoch": 0.19, "grad_norm": 3.089308304624241, "learning_rate": 9.356674024750166e-06, "loss": 0.5309, "step": 1518 }, { "epoch": 0.19, "grad_norm": 2.2037636350371397, "learning_rate": 9.355687525944025e-06, "loss": 0.5754, "step": 1519 }, { "epoch": 0.19, "grad_norm": 2.3408652508775556, "learning_rate": 9.3547003234382e-06, "loss": 0.4907, "step": 1520 }, { "epoch": 0.19, "grad_norm": 1.913428552260077, "learning_rate": 9.353712417392186e-06, "loss": 0.5455, "step": 1521 }, { "epoch": 0.19, "grad_norm": 3.6113559679228633, "learning_rate": 9.352723807965586e-06, "loss": 0.5563, "step": 1522 }, { "epoch": 0.19, "grad_norm": 1.8314547370094012, "learning_rate": 9.35173449531812e-06, "loss": 0.4734, "step": 1523 }, { "epoch": 0.19, "grad_norm": 2.0034022703098637, "learning_rate": 9.350744479609622e-06, "loss": 0.4954, "step": 1524 }, { "epoch": 0.19, "grad_norm": 1.9027907885598594, "learning_rate": 9.349753761000034e-06, "loss": 0.5279, "step": 1525 }, { "epoch": 0.19, "grad_norm": 1.928394262689405, "learning_rate": 9.348762339649416e-06, "loss": 0.5482, "step": 1526 }, { "epoch": 0.19, "grad_norm": 2.638848020410885, "learning_rate": 9.347770215717941e-06, "loss": 0.6168, "step": 1527 }, { "epoch": 0.19, "grad_norm": 1.9437404774526785, "learning_rate": 9.346777389365896e-06, "loss": 0.5453, "step": 1528 }, { "epoch": 0.19, "grad_norm": 5.581529142975484, "learning_rate": 9.345783860753681e-06, "loss": 0.5647, "step": 1529 }, { "epoch": 0.19, "grad_norm": 32.45158123886623, "learning_rate": 9.344789630041811e-06, "loss": 0.5425, "step": 1530 }, { "epoch": 0.19, "grad_norm": 2.4210339557850866, "learning_rate": 9.343794697390908e-06, "loss": 0.5446, "step": 1531 }, { "epoch": 0.19, "grad_norm": 2.0194607726636917, "learning_rate": 9.342799062961716e-06, "loss": 0.5179, "step": 1532 }, { "epoch": 0.19, "grad_norm": 3.666606781187195, "learning_rate": 9.341802726915088e-06, "loss": 0.5297, "step": 1533 }, { "epoch": 0.19, "grad_norm": 2.596514276782259, "learning_rate": 9.340805689411989e-06, "loss": 0.4748, "step": 1534 }, { "epoch": 0.19, "grad_norm": 4.127376711598774, "learning_rate": 9.339807950613502e-06, "loss": 0.5213, "step": 1535 }, { "epoch": 0.19, "grad_norm": 4.213578919065974, "learning_rate": 9.338809510680818e-06, "loss": 0.4784, "step": 1536 }, { "epoch": 0.19, "grad_norm": 2.6406784786202318, "learning_rate": 9.337810369775245e-06, "loss": 0.5167, "step": 1537 }, { "epoch": 0.19, "grad_norm": 4.710331055507111, "learning_rate": 9.336810528058202e-06, "loss": 0.513, "step": 1538 }, { "epoch": 0.19, "grad_norm": 2.1709666221921617, "learning_rate": 9.335809985691224e-06, "loss": 0.5563, "step": 1539 }, { "epoch": 0.19, "grad_norm": 2.128357963800714, "learning_rate": 9.334808742835956e-06, "loss": 0.5162, "step": 1540 }, { "epoch": 0.19, "grad_norm": 2.18402549553364, "learning_rate": 9.33380679965416e-06, "loss": 0.5271, "step": 1541 }, { "epoch": 0.19, "grad_norm": 3.855067283290758, "learning_rate": 9.332804156307705e-06, "loss": 0.4588, "step": 1542 }, { "epoch": 0.19, "grad_norm": 1.7396959013303646, "learning_rate": 9.331800812958576e-06, "loss": 0.4747, "step": 1543 }, { "epoch": 0.19, "grad_norm": 2.2383869045867417, "learning_rate": 9.330796769768876e-06, "loss": 0.4842, "step": 1544 }, { "epoch": 0.19, "grad_norm": 5.501740378958404, "learning_rate": 9.32979202690082e-06, "loss": 0.5378, "step": 1545 }, { "epoch": 0.19, "grad_norm": 2.5750775887331003, "learning_rate": 9.328786584516725e-06, "loss": 0.5284, "step": 1546 }, { "epoch": 0.19, "grad_norm": 1.8052053950979472, "learning_rate": 9.327780442779032e-06, "loss": 0.4851, "step": 1547 }, { "epoch": 0.19, "grad_norm": 2.419443490992145, "learning_rate": 9.326773601850294e-06, "loss": 0.4742, "step": 1548 }, { "epoch": 0.19, "grad_norm": 1.79993293703367, "learning_rate": 9.325766061893174e-06, "loss": 0.5099, "step": 1549 }, { "epoch": 0.19, "grad_norm": 1.9208065957405682, "learning_rate": 9.324757823070448e-06, "loss": 0.4399, "step": 1550 }, { "epoch": 0.19, "grad_norm": 2.868678691846867, "learning_rate": 9.323748885545006e-06, "loss": 0.5128, "step": 1551 }, { "epoch": 0.19, "grad_norm": 2.714265342211101, "learning_rate": 9.322739249479853e-06, "loss": 0.5015, "step": 1552 }, { "epoch": 0.19, "grad_norm": 1.9555294131012604, "learning_rate": 9.321728915038101e-06, "loss": 0.4642, "step": 1553 }, { "epoch": 0.19, "grad_norm": 1.956602369828446, "learning_rate": 9.320717882382983e-06, "loss": 0.5373, "step": 1554 }, { "epoch": 0.19, "grad_norm": 2.0660493606510353, "learning_rate": 9.319706151677837e-06, "loss": 0.5311, "step": 1555 }, { "epoch": 0.19, "grad_norm": 1.6164649314832973, "learning_rate": 9.318693723086117e-06, "loss": 0.5194, "step": 1556 }, { "epoch": 0.19, "grad_norm": 2.8465365990890934, "learning_rate": 9.317680596771389e-06, "loss": 0.5243, "step": 1557 }, { "epoch": 0.19, "grad_norm": 2.587440187024195, "learning_rate": 9.316666772897336e-06, "loss": 0.5472, "step": 1558 }, { "epoch": 0.19, "grad_norm": 1.9684250433411536, "learning_rate": 9.315652251627747e-06, "loss": 0.5507, "step": 1559 }, { "epoch": 0.19, "grad_norm": 2.7791357535604546, "learning_rate": 9.314637033126529e-06, "loss": 0.4917, "step": 1560 }, { "epoch": 0.19, "grad_norm": 2.948195593969024, "learning_rate": 9.313621117557696e-06, "loss": 0.5052, "step": 1561 }, { "epoch": 0.19, "grad_norm": 1.9947405314980446, "learning_rate": 9.312604505085383e-06, "loss": 0.4566, "step": 1562 }, { "epoch": 0.19, "grad_norm": 2.2847089946324814, "learning_rate": 9.311587195873828e-06, "loss": 0.5639, "step": 1563 }, { "epoch": 0.19, "grad_norm": 1.6519078478731801, "learning_rate": 9.310569190087389e-06, "loss": 0.5206, "step": 1564 }, { "epoch": 0.19, "grad_norm": 5.990246449649651, "learning_rate": 9.309550487890533e-06, "loss": 0.5136, "step": 1565 }, { "epoch": 0.19, "grad_norm": 1.839093204640025, "learning_rate": 9.308531089447842e-06, "loss": 0.5123, "step": 1566 }, { "epoch": 0.19, "grad_norm": 8.90895040912295, "learning_rate": 9.307510994924008e-06, "loss": 0.5208, "step": 1567 }, { "epoch": 0.19, "grad_norm": 0.6585239573059181, "learning_rate": 9.306490204483834e-06, "loss": 0.4448, "step": 1568 }, { "epoch": 0.19, "grad_norm": 2.499030323961249, "learning_rate": 9.305468718292239e-06, "loss": 0.5165, "step": 1569 }, { "epoch": 0.19, "grad_norm": 2.685184213028577, "learning_rate": 9.304446536514253e-06, "loss": 0.5815, "step": 1570 }, { "epoch": 0.19, "grad_norm": 4.653346665510227, "learning_rate": 9.303423659315021e-06, "loss": 0.5341, "step": 1571 }, { "epoch": 0.2, "grad_norm": 4.487347787640413, "learning_rate": 9.302400086859792e-06, "loss": 0.4511, "step": 1572 }, { "epoch": 0.2, "grad_norm": 11.835863837095015, "learning_rate": 9.30137581931394e-06, "loss": 0.5304, "step": 1573 }, { "epoch": 0.2, "grad_norm": 2.2039324524357453, "learning_rate": 9.30035085684294e-06, "loss": 0.4979, "step": 1574 }, { "epoch": 0.2, "grad_norm": 1.9724914467420795, "learning_rate": 9.299325199612387e-06, "loss": 0.5541, "step": 1575 }, { "epoch": 0.2, "grad_norm": 0.7016534015009618, "learning_rate": 9.29829884778798e-06, "loss": 0.501, "step": 1576 }, { "epoch": 0.2, "grad_norm": 0.6458296648826666, "learning_rate": 9.29727180153554e-06, "loss": 0.4692, "step": 1577 }, { "epoch": 0.2, "grad_norm": 2.2079147099479624, "learning_rate": 9.296244061020993e-06, "loss": 0.5151, "step": 1578 }, { "epoch": 0.2, "grad_norm": 1.636960136634182, "learning_rate": 9.295215626410382e-06, "loss": 0.5485, "step": 1579 }, { "epoch": 0.2, "grad_norm": 4.709804496192575, "learning_rate": 9.294186497869854e-06, "loss": 0.4918, "step": 1580 }, { "epoch": 0.2, "grad_norm": 2.4096354356444096, "learning_rate": 9.29315667556568e-06, "loss": 0.4803, "step": 1581 }, { "epoch": 0.2, "grad_norm": 3.0354798485566814, "learning_rate": 9.292126159664231e-06, "loss": 0.509, "step": 1582 }, { "epoch": 0.2, "grad_norm": 2.2221754554853494, "learning_rate": 9.291094950332002e-06, "loss": 0.5475, "step": 1583 }, { "epoch": 0.2, "grad_norm": 1.9423280254716395, "learning_rate": 9.290063047735592e-06, "loss": 0.5052, "step": 1584 }, { "epoch": 0.2, "grad_norm": 2.1675876477187406, "learning_rate": 9.289030452041712e-06, "loss": 0.5406, "step": 1585 }, { "epoch": 0.2, "grad_norm": 2.0501627862867737, "learning_rate": 9.287997163417189e-06, "loss": 0.5008, "step": 1586 }, { "epoch": 0.2, "grad_norm": 1.9004854423947835, "learning_rate": 9.286963182028956e-06, "loss": 0.4894, "step": 1587 }, { "epoch": 0.2, "grad_norm": 2.1099537386796596, "learning_rate": 9.285928508044067e-06, "loss": 0.5329, "step": 1588 }, { "epoch": 0.2, "grad_norm": 2.8551813231815677, "learning_rate": 9.284893141629681e-06, "loss": 0.5531, "step": 1589 }, { "epoch": 0.2, "grad_norm": 1.793130285133734, "learning_rate": 9.283857082953069e-06, "loss": 0.4937, "step": 1590 }, { "epoch": 0.2, "grad_norm": 2.0523223005721443, "learning_rate": 9.282820332181617e-06, "loss": 0.5133, "step": 1591 }, { "epoch": 0.2, "grad_norm": 3.8939020501115116, "learning_rate": 9.281782889482819e-06, "loss": 0.4925, "step": 1592 }, { "epoch": 0.2, "grad_norm": 2.0010390620354106, "learning_rate": 9.280744755024286e-06, "loss": 0.5246, "step": 1593 }, { "epoch": 0.2, "grad_norm": 2.0795143229091786, "learning_rate": 9.279705928973736e-06, "loss": 0.5206, "step": 1594 }, { "epoch": 0.2, "grad_norm": 8.924256171448095, "learning_rate": 9.278666411499e-06, "loss": 0.507, "step": 1595 }, { "epoch": 0.2, "grad_norm": 2.186584015434262, "learning_rate": 9.277626202768024e-06, "loss": 0.5503, "step": 1596 }, { "epoch": 0.2, "grad_norm": 6.910497080223889, "learning_rate": 9.276585302948861e-06, "loss": 0.4957, "step": 1597 }, { "epoch": 0.2, "grad_norm": 3.177423463231272, "learning_rate": 9.275543712209675e-06, "loss": 0.546, "step": 1598 }, { "epoch": 0.2, "grad_norm": 0.7135803240589209, "learning_rate": 9.27450143071875e-06, "loss": 0.4871, "step": 1599 }, { "epoch": 0.2, "grad_norm": 7.6993256970327915, "learning_rate": 9.27345845864447e-06, "loss": 0.5413, "step": 1600 }, { "epoch": 0.2, "grad_norm": 2.760557263160197, "learning_rate": 9.27241479615534e-06, "loss": 0.5103, "step": 1601 }, { "epoch": 0.2, "grad_norm": 1.91286078054797, "learning_rate": 9.271370443419971e-06, "loss": 0.5083, "step": 1602 }, { "epoch": 0.2, "grad_norm": 2.2604935256819743, "learning_rate": 9.270325400607087e-06, "loss": 0.5342, "step": 1603 }, { "epoch": 0.2, "grad_norm": 1.506226488554768, "learning_rate": 9.269279667885527e-06, "loss": 0.5538, "step": 1604 }, { "epoch": 0.2, "grad_norm": 2.2697591097698377, "learning_rate": 9.268233245424235e-06, "loss": 0.5124, "step": 1605 }, { "epoch": 0.2, "grad_norm": 1.5951640188619323, "learning_rate": 9.267186133392272e-06, "loss": 0.5352, "step": 1606 }, { "epoch": 0.2, "grad_norm": 2.932511232716658, "learning_rate": 9.266138331958805e-06, "loss": 0.4822, "step": 1607 }, { "epoch": 0.2, "grad_norm": 2.9386140715583693, "learning_rate": 9.265089841293119e-06, "loss": 0.4821, "step": 1608 }, { "epoch": 0.2, "grad_norm": 0.6560129362849357, "learning_rate": 9.264040661564606e-06, "loss": 0.4677, "step": 1609 }, { "epoch": 0.2, "grad_norm": 1.8727163082095566, "learning_rate": 9.262990792942769e-06, "loss": 0.5077, "step": 1610 }, { "epoch": 0.2, "grad_norm": 0.6686106337025884, "learning_rate": 9.261940235597225e-06, "loss": 0.4837, "step": 1611 }, { "epoch": 0.2, "grad_norm": 1.7054558852668906, "learning_rate": 9.260888989697699e-06, "loss": 0.5034, "step": 1612 }, { "epoch": 0.2, "grad_norm": 1.5885063961864796, "learning_rate": 9.259837055414032e-06, "loss": 0.5449, "step": 1613 }, { "epoch": 0.2, "grad_norm": 1.5075623137868153, "learning_rate": 9.258784432916169e-06, "loss": 0.5099, "step": 1614 }, { "epoch": 0.2, "grad_norm": 2.4840673530181774, "learning_rate": 9.257731122374175e-06, "loss": 0.5326, "step": 1615 }, { "epoch": 0.2, "grad_norm": 1.5251406015657, "learning_rate": 9.256677123958218e-06, "loss": 0.4976, "step": 1616 }, { "epoch": 0.2, "grad_norm": 1.6575980627911382, "learning_rate": 9.255622437838583e-06, "loss": 0.4913, "step": 1617 }, { "epoch": 0.2, "grad_norm": 1.5594753292416925, "learning_rate": 9.254567064185662e-06, "loss": 0.5068, "step": 1618 }, { "epoch": 0.2, "grad_norm": 3.8252392196648803, "learning_rate": 9.253511003169962e-06, "loss": 0.5559, "step": 1619 }, { "epoch": 0.2, "grad_norm": 6.602915905442299, "learning_rate": 9.252454254962098e-06, "loss": 0.5077, "step": 1620 }, { "epoch": 0.2, "grad_norm": 2.3477427111270988, "learning_rate": 9.251396819732796e-06, "loss": 0.5271, "step": 1621 }, { "epoch": 0.2, "grad_norm": 2.08775411330249, "learning_rate": 9.250338697652894e-06, "loss": 0.5081, "step": 1622 }, { "epoch": 0.2, "grad_norm": 1.6607209226617103, "learning_rate": 9.249279888893343e-06, "loss": 0.5438, "step": 1623 }, { "epoch": 0.2, "grad_norm": 1.6352177119328937, "learning_rate": 9.248220393625203e-06, "loss": 0.5325, "step": 1624 }, { "epoch": 0.2, "grad_norm": 2.1768861690048777, "learning_rate": 9.247160212019642e-06, "loss": 0.5183, "step": 1625 }, { "epoch": 0.2, "grad_norm": 2.385573126299658, "learning_rate": 9.246099344247942e-06, "loss": 0.5403, "step": 1626 }, { "epoch": 0.2, "grad_norm": 3.489702228564718, "learning_rate": 9.245037790481497e-06, "loss": 0.5016, "step": 1627 }, { "epoch": 0.2, "grad_norm": 3.341294944395603, "learning_rate": 9.243975550891811e-06, "loss": 0.4733, "step": 1628 }, { "epoch": 0.2, "grad_norm": 3.3586603475734966, "learning_rate": 9.242912625650497e-06, "loss": 0.499, "step": 1629 }, { "epoch": 0.2, "grad_norm": 2.2481656074495446, "learning_rate": 9.24184901492928e-06, "loss": 0.4805, "step": 1630 }, { "epoch": 0.2, "grad_norm": 2.3536741952704943, "learning_rate": 9.240784718899996e-06, "loss": 0.5595, "step": 1631 }, { "epoch": 0.2, "grad_norm": 2.099961793335952, "learning_rate": 9.23971973773459e-06, "loss": 0.463, "step": 1632 }, { "epoch": 0.2, "grad_norm": 2.036367273374173, "learning_rate": 9.238654071605122e-06, "loss": 0.5815, "step": 1633 }, { "epoch": 0.2, "grad_norm": 1.6846706618374232, "learning_rate": 9.237587720683757e-06, "loss": 0.5025, "step": 1634 }, { "epoch": 0.2, "grad_norm": 1.7727570659719454, "learning_rate": 9.236520685142774e-06, "loss": 0.4953, "step": 1635 }, { "epoch": 0.2, "grad_norm": 2.125186372058996, "learning_rate": 9.235452965154563e-06, "loss": 0.4817, "step": 1636 }, { "epoch": 0.2, "grad_norm": 2.119531132765183, "learning_rate": 9.234384560891623e-06, "loss": 0.5453, "step": 1637 }, { "epoch": 0.2, "grad_norm": 1.7431348160011935, "learning_rate": 9.233315472526564e-06, "loss": 0.5565, "step": 1638 }, { "epoch": 0.2, "grad_norm": 2.0585035517080037, "learning_rate": 9.232245700232106e-06, "loss": 0.5027, "step": 1639 }, { "epoch": 0.2, "grad_norm": 1.890438150417577, "learning_rate": 9.231175244181081e-06, "loss": 0.5089, "step": 1640 }, { "epoch": 0.2, "grad_norm": 1.7057878522990162, "learning_rate": 9.230104104546432e-06, "loss": 0.4724, "step": 1641 }, { "epoch": 0.2, "grad_norm": 2.412450440165766, "learning_rate": 9.229032281501209e-06, "loss": 0.4912, "step": 1642 }, { "epoch": 0.2, "grad_norm": 1.886096384576177, "learning_rate": 9.227959775218573e-06, "loss": 0.5774, "step": 1643 }, { "epoch": 0.2, "grad_norm": 46.04729752018606, "learning_rate": 9.2268865858718e-06, "loss": 0.5116, "step": 1644 }, { "epoch": 0.2, "grad_norm": 1.643269640799219, "learning_rate": 9.225812713634272e-06, "loss": 0.5138, "step": 1645 }, { "epoch": 0.2, "grad_norm": 2.3246059542850226, "learning_rate": 9.224738158679482e-06, "loss": 0.4874, "step": 1646 }, { "epoch": 0.2, "grad_norm": 1.9035809681676414, "learning_rate": 9.223662921181036e-06, "loss": 0.5225, "step": 1647 }, { "epoch": 0.2, "grad_norm": 4.151399192600683, "learning_rate": 9.222587001312643e-06, "loss": 0.532, "step": 1648 }, { "epoch": 0.2, "grad_norm": 6.844265666331204, "learning_rate": 9.221510399248135e-06, "loss": 0.5036, "step": 1649 }, { "epoch": 0.2, "grad_norm": 1.4196112703467962, "learning_rate": 9.220433115161438e-06, "loss": 0.4798, "step": 1650 }, { "epoch": 0.2, "grad_norm": 2.252588897696059, "learning_rate": 9.219355149226604e-06, "loss": 0.5083, "step": 1651 }, { "epoch": 0.21, "grad_norm": 0.6791610636974595, "learning_rate": 9.218276501617784e-06, "loss": 0.5234, "step": 1652 }, { "epoch": 0.21, "grad_norm": 1.6705201817878617, "learning_rate": 9.217197172509245e-06, "loss": 0.4772, "step": 1653 }, { "epoch": 0.21, "grad_norm": 1.5807569647746058, "learning_rate": 9.216117162075358e-06, "loss": 0.537, "step": 1654 }, { "epoch": 0.21, "grad_norm": 1.9277385315437, "learning_rate": 9.215036470490614e-06, "loss": 0.4648, "step": 1655 }, { "epoch": 0.21, "grad_norm": 5.4803467893276085, "learning_rate": 9.213955097929605e-06, "loss": 0.5672, "step": 1656 }, { "epoch": 0.21, "grad_norm": 6.188819558803046, "learning_rate": 9.212873044567037e-06, "loss": 0.5753, "step": 1657 }, { "epoch": 0.21, "grad_norm": 1.865195647290685, "learning_rate": 9.211790310577723e-06, "loss": 0.5022, "step": 1658 }, { "epoch": 0.21, "grad_norm": 1.4354281045268755, "learning_rate": 9.210706896136592e-06, "loss": 0.4858, "step": 1659 }, { "epoch": 0.21, "grad_norm": 3.2891107036246554, "learning_rate": 9.209622801418676e-06, "loss": 0.4664, "step": 1660 }, { "epoch": 0.21, "grad_norm": 2.4323787584802106, "learning_rate": 9.208538026599124e-06, "loss": 0.4623, "step": 1661 }, { "epoch": 0.21, "grad_norm": 6.08512998477563, "learning_rate": 9.207452571853184e-06, "loss": 0.5376, "step": 1662 }, { "epoch": 0.21, "grad_norm": 1.5491397801481632, "learning_rate": 9.206366437356229e-06, "loss": 0.4774, "step": 1663 }, { "epoch": 0.21, "grad_norm": 2.6062166151520283, "learning_rate": 9.205279623283726e-06, "loss": 0.5563, "step": 1664 }, { "epoch": 0.21, "grad_norm": 4.764291375497078, "learning_rate": 9.204192129811267e-06, "loss": 0.5121, "step": 1665 }, { "epoch": 0.21, "grad_norm": 1.7158970499491477, "learning_rate": 9.203103957114542e-06, "loss": 0.5255, "step": 1666 }, { "epoch": 0.21, "grad_norm": 1.4593279660480194, "learning_rate": 9.202015105369352e-06, "loss": 0.4987, "step": 1667 }, { "epoch": 0.21, "grad_norm": 1.8706544392705933, "learning_rate": 9.200925574751616e-06, "loss": 0.5272, "step": 1668 }, { "epoch": 0.21, "grad_norm": 3.5665826039814967, "learning_rate": 9.199835365437353e-06, "loss": 0.4803, "step": 1669 }, { "epoch": 0.21, "grad_norm": 1.5876414430314243, "learning_rate": 9.1987444776027e-06, "loss": 0.4969, "step": 1670 }, { "epoch": 0.21, "grad_norm": 1.8052599557337923, "learning_rate": 9.197652911423896e-06, "loss": 0.4989, "step": 1671 }, { "epoch": 0.21, "grad_norm": 2.1687653891029224, "learning_rate": 9.196560667077294e-06, "loss": 0.5182, "step": 1672 }, { "epoch": 0.21, "grad_norm": 1.9959404100536227, "learning_rate": 9.19546774473936e-06, "loss": 0.5439, "step": 1673 }, { "epoch": 0.21, "grad_norm": 1.6202007061973254, "learning_rate": 9.194374144586657e-06, "loss": 0.5115, "step": 1674 }, { "epoch": 0.21, "grad_norm": 1.9857764472674335, "learning_rate": 9.193279866795872e-06, "loss": 0.5182, "step": 1675 }, { "epoch": 0.21, "grad_norm": 1.9178192223929929, "learning_rate": 9.192184911543794e-06, "loss": 0.4937, "step": 1676 }, { "epoch": 0.21, "grad_norm": 3.602441531524967, "learning_rate": 9.19108927900732e-06, "loss": 0.5622, "step": 1677 }, { "epoch": 0.21, "grad_norm": 1.7741676838270075, "learning_rate": 9.189992969363463e-06, "loss": 0.4866, "step": 1678 }, { "epoch": 0.21, "grad_norm": 1.48544166609113, "learning_rate": 9.18889598278934e-06, "loss": 0.5614, "step": 1679 }, { "epoch": 0.21, "grad_norm": 1.533303252704799, "learning_rate": 9.187798319462177e-06, "loss": 0.5184, "step": 1680 }, { "epoch": 0.21, "grad_norm": 1.6051159343776416, "learning_rate": 9.186699979559314e-06, "loss": 0.5394, "step": 1681 }, { "epoch": 0.21, "grad_norm": 2.432811234549796, "learning_rate": 9.185600963258194e-06, "loss": 0.4813, "step": 1682 }, { "epoch": 0.21, "grad_norm": 1.408143189466315, "learning_rate": 9.184501270736378e-06, "loss": 0.471, "step": 1683 }, { "epoch": 0.21, "grad_norm": 0.659302765312688, "learning_rate": 9.183400902171527e-06, "loss": 0.5021, "step": 1684 }, { "epoch": 0.21, "grad_norm": 1.3610849090522068, "learning_rate": 9.182299857741415e-06, "loss": 0.4949, "step": 1685 }, { "epoch": 0.21, "grad_norm": 1.7850082372585498, "learning_rate": 9.181198137623925e-06, "loss": 0.5339, "step": 1686 }, { "epoch": 0.21, "grad_norm": 1.7702658610128066, "learning_rate": 9.180095741997055e-06, "loss": 0.4974, "step": 1687 }, { "epoch": 0.21, "grad_norm": 1.6362295093390105, "learning_rate": 9.1789926710389e-06, "loss": 0.6164, "step": 1688 }, { "epoch": 0.21, "grad_norm": 1.651506207170075, "learning_rate": 9.177888924927675e-06, "loss": 0.5517, "step": 1689 }, { "epoch": 0.21, "grad_norm": 1.7460872196879362, "learning_rate": 9.176784503841699e-06, "loss": 0.5577, "step": 1690 }, { "epoch": 0.21, "grad_norm": 1.4073768477156903, "learning_rate": 9.175679407959399e-06, "loss": 0.5033, "step": 1691 }, { "epoch": 0.21, "grad_norm": 1.9985932651418432, "learning_rate": 9.174573637459317e-06, "loss": 0.5495, "step": 1692 }, { "epoch": 0.21, "grad_norm": 2.634411342635036, "learning_rate": 9.173467192520095e-06, "loss": 0.5501, "step": 1693 }, { "epoch": 0.21, "grad_norm": 1.768504397733744, "learning_rate": 9.172360073320493e-06, "loss": 0.496, "step": 1694 }, { "epoch": 0.21, "grad_norm": 1.5573104075423723, "learning_rate": 9.171252280039374e-06, "loss": 0.5204, "step": 1695 }, { "epoch": 0.21, "grad_norm": 1.3393545628964552, "learning_rate": 9.170143812855711e-06, "loss": 0.5045, "step": 1696 }, { "epoch": 0.21, "grad_norm": 1.975789021549798, "learning_rate": 9.169034671948589e-06, "loss": 0.5396, "step": 1697 }, { "epoch": 0.21, "grad_norm": 1.6801972267122265, "learning_rate": 9.167924857497197e-06, "loss": 0.5435, "step": 1698 }, { "epoch": 0.21, "grad_norm": 0.7246085283194033, "learning_rate": 9.166814369680837e-06, "loss": 0.5395, "step": 1699 }, { "epoch": 0.21, "grad_norm": 1.7107966860149284, "learning_rate": 9.165703208678917e-06, "loss": 0.5077, "step": 1700 }, { "epoch": 0.21, "grad_norm": 1.9115099585645674, "learning_rate": 9.164591374670957e-06, "loss": 0.5343, "step": 1701 }, { "epoch": 0.21, "grad_norm": 1.588858967214631, "learning_rate": 9.163478867836582e-06, "loss": 0.58, "step": 1702 }, { "epoch": 0.21, "grad_norm": 7.554126927044856, "learning_rate": 9.162365688355526e-06, "loss": 0.4872, "step": 1703 }, { "epoch": 0.21, "grad_norm": 1.3298965104985374, "learning_rate": 9.161251836407635e-06, "loss": 0.5012, "step": 1704 }, { "epoch": 0.21, "grad_norm": 1.4103330693756335, "learning_rate": 9.16013731217286e-06, "loss": 0.5279, "step": 1705 }, { "epoch": 0.21, "grad_norm": 1.2852666812092637, "learning_rate": 9.159022115831263e-06, "loss": 0.5313, "step": 1706 }, { "epoch": 0.21, "grad_norm": 1.3941519860589664, "learning_rate": 9.157906247563013e-06, "loss": 0.5133, "step": 1707 }, { "epoch": 0.21, "grad_norm": 1.413329364028389, "learning_rate": 9.15678970754839e-06, "loss": 0.5126, "step": 1708 }, { "epoch": 0.21, "grad_norm": 1.42572581665144, "learning_rate": 9.15567249596778e-06, "loss": 0.5391, "step": 1709 }, { "epoch": 0.21, "grad_norm": 1.739416463421691, "learning_rate": 9.154554613001679e-06, "loss": 0.5283, "step": 1710 }, { "epoch": 0.21, "grad_norm": 2.014337410080953, "learning_rate": 9.15343605883069e-06, "loss": 0.5153, "step": 1711 }, { "epoch": 0.21, "grad_norm": 1.9909225909292738, "learning_rate": 9.152316833635526e-06, "loss": 0.5594, "step": 1712 }, { "epoch": 0.21, "grad_norm": 2.7323915263313325, "learning_rate": 9.151196937597006e-06, "loss": 0.5514, "step": 1713 }, { "epoch": 0.21, "grad_norm": 1.5930929939562986, "learning_rate": 9.150076370896061e-06, "loss": 0.5265, "step": 1714 }, { "epoch": 0.21, "grad_norm": 1.561376133480682, "learning_rate": 9.148955133713728e-06, "loss": 0.4775, "step": 1715 }, { "epoch": 0.21, "grad_norm": 1.357765216075096, "learning_rate": 9.147833226231151e-06, "loss": 0.4957, "step": 1716 }, { "epoch": 0.21, "grad_norm": 15.316501717867558, "learning_rate": 9.146710648629587e-06, "loss": 0.5084, "step": 1717 }, { "epoch": 0.21, "grad_norm": 1.622111052086209, "learning_rate": 9.145587401090394e-06, "loss": 0.5024, "step": 1718 }, { "epoch": 0.21, "grad_norm": 0.6842905276591803, "learning_rate": 9.144463483795049e-06, "loss": 0.5024, "step": 1719 }, { "epoch": 0.21, "grad_norm": 2.7523349635715433, "learning_rate": 9.143338896925124e-06, "loss": 0.5035, "step": 1720 }, { "epoch": 0.21, "grad_norm": 1.9129601709051165, "learning_rate": 9.142213640662312e-06, "loss": 0.6002, "step": 1721 }, { "epoch": 0.21, "grad_norm": 1.7903992740470418, "learning_rate": 9.141087715188402e-06, "loss": 0.5401, "step": 1722 }, { "epoch": 0.21, "grad_norm": 1.9102915119427542, "learning_rate": 9.1399611206853e-06, "loss": 0.5402, "step": 1723 }, { "epoch": 0.21, "grad_norm": 1.5467085531898992, "learning_rate": 9.138833857335021e-06, "loss": 0.5115, "step": 1724 }, { "epoch": 0.21, "grad_norm": 3.1841285847142387, "learning_rate": 9.137705925319677e-06, "loss": 0.5619, "step": 1725 }, { "epoch": 0.21, "grad_norm": 0.6965895219325391, "learning_rate": 9.136577324821501e-06, "loss": 0.4995, "step": 1726 }, { "epoch": 0.21, "grad_norm": 1.6859448097443563, "learning_rate": 9.135448056022827e-06, "loss": 0.4778, "step": 1727 }, { "epoch": 0.21, "grad_norm": 1.2927993289518513, "learning_rate": 9.134318119106098e-06, "loss": 0.4931, "step": 1728 }, { "epoch": 0.21, "grad_norm": 1.2036250081564543, "learning_rate": 9.133187514253862e-06, "loss": 0.4606, "step": 1729 }, { "epoch": 0.21, "grad_norm": 1.4265361266647725, "learning_rate": 9.132056241648784e-06, "loss": 0.5319, "step": 1730 }, { "epoch": 0.21, "grad_norm": 1.3683153281725227, "learning_rate": 9.130924301473627e-06, "loss": 0.512, "step": 1731 }, { "epoch": 0.21, "grad_norm": 3.3247604798264128, "learning_rate": 9.129791693911268e-06, "loss": 0.5108, "step": 1732 }, { "epoch": 0.22, "grad_norm": 1.446197135116374, "learning_rate": 9.128658419144689e-06, "loss": 0.5134, "step": 1733 }, { "epoch": 0.22, "grad_norm": 1.4740690486820653, "learning_rate": 9.127524477356979e-06, "loss": 0.4845, "step": 1734 }, { "epoch": 0.22, "grad_norm": 1.5251618719712636, "learning_rate": 9.12638986873134e-06, "loss": 0.5262, "step": 1735 }, { "epoch": 0.22, "grad_norm": 1.3988676213731237, "learning_rate": 9.125254593451074e-06, "loss": 0.5271, "step": 1736 }, { "epoch": 0.22, "grad_norm": 1.4890342420503115, "learning_rate": 9.124118651699596e-06, "loss": 0.4984, "step": 1737 }, { "epoch": 0.22, "grad_norm": 0.7355545059416543, "learning_rate": 9.122982043660428e-06, "loss": 0.5047, "step": 1738 }, { "epoch": 0.22, "grad_norm": 1.4378053013014622, "learning_rate": 9.121844769517201e-06, "loss": 0.5143, "step": 1739 }, { "epoch": 0.22, "grad_norm": 2.0339115280934568, "learning_rate": 9.120706829453648e-06, "loss": 0.5517, "step": 1740 }, { "epoch": 0.22, "grad_norm": 3.1660031363051604, "learning_rate": 9.119568223653614e-06, "loss": 0.5358, "step": 1741 }, { "epoch": 0.22, "grad_norm": 0.646667354488079, "learning_rate": 9.118428952301052e-06, "loss": 0.5022, "step": 1742 }, { "epoch": 0.22, "grad_norm": 2.6285272562157975, "learning_rate": 9.117289015580022e-06, "loss": 0.5378, "step": 1743 }, { "epoch": 0.22, "grad_norm": 1.3783401691815536, "learning_rate": 9.116148413674688e-06, "loss": 0.5544, "step": 1744 }, { "epoch": 0.22, "grad_norm": 1.8822723672322517, "learning_rate": 9.115007146769326e-06, "loss": 0.5124, "step": 1745 }, { "epoch": 0.22, "grad_norm": 1.791138047925575, "learning_rate": 9.11386521504832e-06, "loss": 0.5198, "step": 1746 }, { "epoch": 0.22, "grad_norm": 1.7874423277192169, "learning_rate": 9.112722618696155e-06, "loss": 0.5609, "step": 1747 }, { "epoch": 0.22, "grad_norm": 1.5767870564083402, "learning_rate": 9.11157935789743e-06, "loss": 0.5131, "step": 1748 }, { "epoch": 0.22, "grad_norm": 1.7726451129948841, "learning_rate": 9.110435432836847e-06, "loss": 0.6044, "step": 1749 }, { "epoch": 0.22, "grad_norm": 1.6653491459083822, "learning_rate": 9.109290843699219e-06, "loss": 0.531, "step": 1750 }, { "epoch": 0.22, "grad_norm": 1.3072485709613986, "learning_rate": 9.108145590669464e-06, "loss": 0.5101, "step": 1751 }, { "epoch": 0.22, "grad_norm": 2.094158973402322, "learning_rate": 9.106999673932608e-06, "loss": 0.5323, "step": 1752 }, { "epoch": 0.22, "grad_norm": 1.620502440367058, "learning_rate": 9.105853093673782e-06, "loss": 0.5151, "step": 1753 }, { "epoch": 0.22, "grad_norm": 2.028921567090642, "learning_rate": 9.104705850078229e-06, "loss": 0.5269, "step": 1754 }, { "epoch": 0.22, "grad_norm": 1.8064643452958555, "learning_rate": 9.103557943331294e-06, "loss": 0.5825, "step": 1755 }, { "epoch": 0.22, "grad_norm": 1.6589087033035828, "learning_rate": 9.102409373618433e-06, "loss": 0.5637, "step": 1756 }, { "epoch": 0.22, "grad_norm": 1.5052128611635847, "learning_rate": 9.101260141125207e-06, "loss": 0.5489, "step": 1757 }, { "epoch": 0.22, "grad_norm": 2.444200630168782, "learning_rate": 9.100110246037284e-06, "loss": 0.523, "step": 1758 }, { "epoch": 0.22, "grad_norm": 1.3240725750282878, "learning_rate": 9.09895968854044e-06, "loss": 0.5259, "step": 1759 }, { "epoch": 0.22, "grad_norm": 1.4390570656456079, "learning_rate": 9.097808468820559e-06, "loss": 0.5009, "step": 1760 }, { "epoch": 0.22, "grad_norm": 0.7482633852588438, "learning_rate": 9.096656587063628e-06, "loss": 0.5182, "step": 1761 }, { "epoch": 0.22, "grad_norm": 1.4406976180426263, "learning_rate": 9.095504043455747e-06, "loss": 0.5115, "step": 1762 }, { "epoch": 0.22, "grad_norm": 1.3518685475810814, "learning_rate": 9.094350838183117e-06, "loss": 0.5111, "step": 1763 }, { "epoch": 0.22, "grad_norm": 2.2916236319235064, "learning_rate": 9.093196971432048e-06, "loss": 0.5317, "step": 1764 }, { "epoch": 0.22, "grad_norm": 1.8835108269892396, "learning_rate": 9.09204244338896e-06, "loss": 0.549, "step": 1765 }, { "epoch": 0.22, "grad_norm": 1.6435708630601518, "learning_rate": 9.090887254240375e-06, "loss": 0.5201, "step": 1766 }, { "epoch": 0.22, "grad_norm": 2.8321184478689867, "learning_rate": 9.089731404172926e-06, "loss": 0.5411, "step": 1767 }, { "epoch": 0.22, "grad_norm": 1.2943460426664266, "learning_rate": 9.08857489337335e-06, "loss": 0.5587, "step": 1768 }, { "epoch": 0.22, "grad_norm": 1.6125378503682504, "learning_rate": 9.08741772202849e-06, "loss": 0.4937, "step": 1769 }, { "epoch": 0.22, "grad_norm": 2.2238112852588428, "learning_rate": 9.086259890325297e-06, "loss": 0.5269, "step": 1770 }, { "epoch": 0.22, "grad_norm": 1.6489029783453386, "learning_rate": 9.08510139845083e-06, "loss": 0.5192, "step": 1771 }, { "epoch": 0.22, "grad_norm": 1.552669094441354, "learning_rate": 9.083942246592256e-06, "loss": 0.5442, "step": 1772 }, { "epoch": 0.22, "grad_norm": 1.3550158439915052, "learning_rate": 9.082782434936844e-06, "loss": 0.4781, "step": 1773 }, { "epoch": 0.22, "grad_norm": 1.3058746893566382, "learning_rate": 9.08162196367197e-06, "loss": 0.5051, "step": 1774 }, { "epoch": 0.22, "grad_norm": 1.5481492836508022, "learning_rate": 9.08046083298512e-06, "loss": 0.5488, "step": 1775 }, { "epoch": 0.22, "grad_norm": 1.4310769766264753, "learning_rate": 9.079299043063885e-06, "loss": 0.5832, "step": 1776 }, { "epoch": 0.22, "grad_norm": 1.6531261378265354, "learning_rate": 9.078136594095964e-06, "loss": 0.5043, "step": 1777 }, { "epoch": 0.22, "grad_norm": 2.0159329090312497, "learning_rate": 9.076973486269158e-06, "loss": 0.517, "step": 1778 }, { "epoch": 0.22, "grad_norm": 1.9511807427420154, "learning_rate": 9.075809719771378e-06, "loss": 0.5206, "step": 1779 }, { "epoch": 0.22, "grad_norm": 1.5978232229037173, "learning_rate": 9.074645294790643e-06, "loss": 0.5324, "step": 1780 }, { "epoch": 0.22, "grad_norm": 1.4798428851101637, "learning_rate": 9.073480211515071e-06, "loss": 0.5067, "step": 1781 }, { "epoch": 0.22, "grad_norm": 2.0306639014145014, "learning_rate": 9.0723144701329e-06, "loss": 0.5573, "step": 1782 }, { "epoch": 0.22, "grad_norm": 1.4728696179541005, "learning_rate": 9.071148070832456e-06, "loss": 0.5273, "step": 1783 }, { "epoch": 0.22, "grad_norm": 1.627559526949772, "learning_rate": 9.069981013802188e-06, "loss": 0.5133, "step": 1784 }, { "epoch": 0.22, "grad_norm": 1.4834810512434706, "learning_rate": 9.06881329923064e-06, "loss": 0.4869, "step": 1785 }, { "epoch": 0.22, "grad_norm": 1.372365155180784, "learning_rate": 9.067644927306471e-06, "loss": 0.5169, "step": 1786 }, { "epoch": 0.22, "grad_norm": 0.6951708523849913, "learning_rate": 9.066475898218439e-06, "loss": 0.4823, "step": 1787 }, { "epoch": 0.22, "grad_norm": 1.3878873806746732, "learning_rate": 9.06530621215541e-06, "loss": 0.55, "step": 1788 }, { "epoch": 0.22, "grad_norm": 1.4695670506341068, "learning_rate": 9.064135869306359e-06, "loss": 0.4979, "step": 1789 }, { "epoch": 0.22, "grad_norm": 1.3469285321385347, "learning_rate": 9.062964869860364e-06, "loss": 0.5153, "step": 1790 }, { "epoch": 0.22, "grad_norm": 1.3684711900202686, "learning_rate": 9.06179321400661e-06, "loss": 0.5329, "step": 1791 }, { "epoch": 0.22, "grad_norm": 1.4754969731161858, "learning_rate": 9.060620901934393e-06, "loss": 0.5619, "step": 1792 }, { "epoch": 0.22, "grad_norm": 1.3439809939316543, "learning_rate": 9.059447933833103e-06, "loss": 0.4887, "step": 1793 }, { "epoch": 0.22, "grad_norm": 1.4620094045772167, "learning_rate": 9.058274309892248e-06, "loss": 0.5352, "step": 1794 }, { "epoch": 0.22, "grad_norm": 1.336998172657835, "learning_rate": 9.057100030301438e-06, "loss": 0.5195, "step": 1795 }, { "epoch": 0.22, "grad_norm": 7.740380989435937, "learning_rate": 9.055925095250384e-06, "loss": 0.5416, "step": 1796 }, { "epoch": 0.22, "grad_norm": 2.654630803377701, "learning_rate": 9.05474950492891e-06, "loss": 0.4519, "step": 1797 }, { "epoch": 0.22, "grad_norm": 1.9937748629540164, "learning_rate": 9.053573259526941e-06, "loss": 0.5766, "step": 1798 }, { "epoch": 0.22, "grad_norm": 1.7901930378208728, "learning_rate": 9.052396359234514e-06, "loss": 0.4942, "step": 1799 }, { "epoch": 0.22, "grad_norm": 0.6924941301747856, "learning_rate": 9.051218804241764e-06, "loss": 0.5251, "step": 1800 }, { "epoch": 0.22, "grad_norm": 1.5309637058525534, "learning_rate": 9.050040594738937e-06, "loss": 0.5792, "step": 1801 }, { "epoch": 0.22, "grad_norm": 1.355722641980439, "learning_rate": 9.048861730916381e-06, "loss": 0.4687, "step": 1802 }, { "epoch": 0.22, "grad_norm": 1.845814720364885, "learning_rate": 9.047682212964553e-06, "loss": 0.5346, "step": 1803 }, { "epoch": 0.22, "grad_norm": 1.3425542240340513, "learning_rate": 9.046502041074014e-06, "loss": 0.481, "step": 1804 }, { "epoch": 0.22, "grad_norm": 1.617695978622734, "learning_rate": 9.045321215435433e-06, "loss": 0.4759, "step": 1805 }, { "epoch": 0.22, "grad_norm": 1.5334485516373337, "learning_rate": 9.044139736239581e-06, "loss": 0.562, "step": 1806 }, { "epoch": 0.22, "grad_norm": 1.5226781612458151, "learning_rate": 9.042957603677338e-06, "loss": 0.4891, "step": 1807 }, { "epoch": 0.22, "grad_norm": 1.3686279475665037, "learning_rate": 9.041774817939686e-06, "loss": 0.5542, "step": 1808 }, { "epoch": 0.22, "grad_norm": 1.5436064750306893, "learning_rate": 9.040591379217718e-06, "loss": 0.4889, "step": 1809 }, { "epoch": 0.22, "grad_norm": 1.501701685513641, "learning_rate": 9.039407287702622e-06, "loss": 0.5308, "step": 1810 }, { "epoch": 0.22, "grad_norm": 1.8016377081845487, "learning_rate": 9.038222543585706e-06, "loss": 0.5534, "step": 1811 }, { "epoch": 0.22, "grad_norm": 1.4173818191469483, "learning_rate": 9.037037147058372e-06, "loss": 0.499, "step": 1812 }, { "epoch": 0.22, "grad_norm": 1.4346008752805965, "learning_rate": 9.035851098312131e-06, "loss": 0.4631, "step": 1813 }, { "epoch": 0.23, "grad_norm": 1.4053591169104527, "learning_rate": 9.0346643975386e-06, "loss": 0.5564, "step": 1814 }, { "epoch": 0.23, "grad_norm": 3.275060750971617, "learning_rate": 9.033477044929504e-06, "loss": 0.5159, "step": 1815 }, { "epoch": 0.23, "grad_norm": 4.77859597684764, "learning_rate": 9.032289040676665e-06, "loss": 0.5705, "step": 1816 }, { "epoch": 0.23, "grad_norm": 1.7519356417482121, "learning_rate": 9.03110038497202e-06, "loss": 0.5405, "step": 1817 }, { "epoch": 0.23, "grad_norm": 1.4251065097842237, "learning_rate": 9.029911078007604e-06, "loss": 0.5315, "step": 1818 }, { "epoch": 0.23, "grad_norm": 1.2492462894322915, "learning_rate": 9.02872111997556e-06, "loss": 0.5089, "step": 1819 }, { "epoch": 0.23, "grad_norm": 1.4463132073460823, "learning_rate": 9.027530511068139e-06, "loss": 0.4827, "step": 1820 }, { "epoch": 0.23, "grad_norm": 3.3032911990407365, "learning_rate": 9.026339251477692e-06, "loss": 0.5078, "step": 1821 }, { "epoch": 0.23, "grad_norm": 1.626706774190053, "learning_rate": 9.025147341396678e-06, "loss": 0.5205, "step": 1822 }, { "epoch": 0.23, "grad_norm": 1.3509087885099418, "learning_rate": 9.023954781017662e-06, "loss": 0.4761, "step": 1823 }, { "epoch": 0.23, "grad_norm": 1.8946587915817568, "learning_rate": 9.02276157053331e-06, "loss": 0.5425, "step": 1824 }, { "epoch": 0.23, "grad_norm": 1.4134687843412066, "learning_rate": 9.021567710136397e-06, "loss": 0.4827, "step": 1825 }, { "epoch": 0.23, "grad_norm": 1.5361484675395476, "learning_rate": 9.020373200019802e-06, "loss": 0.5378, "step": 1826 }, { "epoch": 0.23, "grad_norm": 1.513940693690181, "learning_rate": 9.019178040376509e-06, "loss": 0.5161, "step": 1827 }, { "epoch": 0.23, "grad_norm": 1.507508231829813, "learning_rate": 9.017982231399604e-06, "loss": 0.5442, "step": 1828 }, { "epoch": 0.23, "grad_norm": 1.5256370685141893, "learning_rate": 9.016785773282284e-06, "loss": 0.5371, "step": 1829 }, { "epoch": 0.23, "grad_norm": 1.497173577544496, "learning_rate": 9.015588666217845e-06, "loss": 0.5134, "step": 1830 }, { "epoch": 0.23, "grad_norm": 4.0854993405691715, "learning_rate": 9.014390910399691e-06, "loss": 0.5161, "step": 1831 }, { "epoch": 0.23, "grad_norm": 4.518221248548298, "learning_rate": 9.013192506021328e-06, "loss": 0.5122, "step": 1832 }, { "epoch": 0.23, "grad_norm": 1.6377551021684842, "learning_rate": 9.011993453276373e-06, "loss": 0.5408, "step": 1833 }, { "epoch": 0.23, "grad_norm": 1.5068880542901153, "learning_rate": 9.01079375235854e-06, "loss": 0.4908, "step": 1834 }, { "epoch": 0.23, "grad_norm": 1.4292090316250703, "learning_rate": 9.009593403461652e-06, "loss": 0.5954, "step": 1835 }, { "epoch": 0.23, "grad_norm": 10.733599321570736, "learning_rate": 9.008392406779638e-06, "loss": 0.5509, "step": 1836 }, { "epoch": 0.23, "grad_norm": 3.8927517677665815, "learning_rate": 9.007190762506527e-06, "loss": 0.5052, "step": 1837 }, { "epoch": 0.23, "grad_norm": 1.541189839125486, "learning_rate": 9.005988470836456e-06, "loss": 0.5135, "step": 1838 }, { "epoch": 0.23, "grad_norm": 1.9180698533113243, "learning_rate": 9.004785531963665e-06, "loss": 0.5047, "step": 1839 }, { "epoch": 0.23, "grad_norm": 1.3942542286939754, "learning_rate": 9.003581946082503e-06, "loss": 0.5786, "step": 1840 }, { "epoch": 0.23, "grad_norm": 2.0924217443966975, "learning_rate": 9.002377713387415e-06, "loss": 0.4938, "step": 1841 }, { "epoch": 0.23, "grad_norm": 1.8068366112672747, "learning_rate": 9.001172834072958e-06, "loss": 0.4915, "step": 1842 }, { "epoch": 0.23, "grad_norm": 1.4742349524709262, "learning_rate": 8.999967308333791e-06, "loss": 0.4864, "step": 1843 }, { "epoch": 0.23, "grad_norm": 1.9117290532257252, "learning_rate": 8.998761136364675e-06, "loss": 0.5857, "step": 1844 }, { "epoch": 0.23, "grad_norm": 1.8116074153470343, "learning_rate": 8.997554318360482e-06, "loss": 0.489, "step": 1845 }, { "epoch": 0.23, "grad_norm": 2.0987590232716706, "learning_rate": 8.99634685451618e-06, "loss": 0.5531, "step": 1846 }, { "epoch": 0.23, "grad_norm": 1.5604499423857239, "learning_rate": 8.995138745026847e-06, "loss": 0.4864, "step": 1847 }, { "epoch": 0.23, "grad_norm": 1.5388505102375079, "learning_rate": 8.993929990087664e-06, "loss": 0.5363, "step": 1848 }, { "epoch": 0.23, "grad_norm": 1.29222554323978, "learning_rate": 8.992720589893915e-06, "loss": 0.5159, "step": 1849 }, { "epoch": 0.23, "grad_norm": 1.9916577839753073, "learning_rate": 8.991510544640992e-06, "loss": 0.5397, "step": 1850 }, { "epoch": 0.23, "grad_norm": 0.6948489030363554, "learning_rate": 8.990299854524384e-06, "loss": 0.5693, "step": 1851 }, { "epoch": 0.23, "grad_norm": 1.9513734930911082, "learning_rate": 8.989088519739693e-06, "loss": 0.5161, "step": 1852 }, { "epoch": 0.23, "grad_norm": 1.4587444301512456, "learning_rate": 8.987876540482618e-06, "loss": 0.4824, "step": 1853 }, { "epoch": 0.23, "grad_norm": 1.6710499041249678, "learning_rate": 8.986663916948965e-06, "loss": 0.5454, "step": 1854 }, { "epoch": 0.23, "grad_norm": 1.9152211306936873, "learning_rate": 8.985450649334646e-06, "loss": 0.5987, "step": 1855 }, { "epoch": 0.23, "grad_norm": 2.6560317393676094, "learning_rate": 8.984236737835673e-06, "loss": 0.5399, "step": 1856 }, { "epoch": 0.23, "grad_norm": 1.5195714881992344, "learning_rate": 8.983022182648166e-06, "loss": 0.597, "step": 1857 }, { "epoch": 0.23, "grad_norm": 1.355222695904536, "learning_rate": 8.981806983968346e-06, "loss": 0.5864, "step": 1858 }, { "epoch": 0.23, "grad_norm": 1.2761619323097473, "learning_rate": 8.980591141992538e-06, "loss": 0.5001, "step": 1859 }, { "epoch": 0.23, "grad_norm": 1.4872401648051745, "learning_rate": 8.979374656917174e-06, "loss": 0.4997, "step": 1860 }, { "epoch": 0.23, "grad_norm": 1.588819005755008, "learning_rate": 8.978157528938786e-06, "loss": 0.5829, "step": 1861 }, { "epoch": 0.23, "grad_norm": 1.6542912440317863, "learning_rate": 8.976939758254015e-06, "loss": 0.5478, "step": 1862 }, { "epoch": 0.23, "grad_norm": 26.682459025536943, "learning_rate": 8.975721345059598e-06, "loss": 0.5039, "step": 1863 }, { "epoch": 0.23, "grad_norm": 2.0430820852157336, "learning_rate": 8.974502289552384e-06, "loss": 0.5205, "step": 1864 }, { "epoch": 0.23, "grad_norm": 1.43661731754798, "learning_rate": 8.973282591929319e-06, "loss": 0.5473, "step": 1865 }, { "epoch": 0.23, "grad_norm": 6.688736566388985, "learning_rate": 8.97206225238746e-06, "loss": 0.5611, "step": 1866 }, { "epoch": 0.23, "grad_norm": 1.4425314258470237, "learning_rate": 8.970841271123962e-06, "loss": 0.5586, "step": 1867 }, { "epoch": 0.23, "grad_norm": 1.8776702201654907, "learning_rate": 8.969619648336082e-06, "loss": 0.5739, "step": 1868 }, { "epoch": 0.23, "grad_norm": 0.7012564447126629, "learning_rate": 8.968397384221188e-06, "loss": 0.5036, "step": 1869 }, { "epoch": 0.23, "grad_norm": 1.3566032268734702, "learning_rate": 8.967174478976745e-06, "loss": 0.5345, "step": 1870 }, { "epoch": 0.23, "grad_norm": 1.8068390422598124, "learning_rate": 8.965950932800326e-06, "loss": 0.5652, "step": 1871 }, { "epoch": 0.23, "grad_norm": 1.435850750817673, "learning_rate": 8.964726745889606e-06, "loss": 0.5618, "step": 1872 }, { "epoch": 0.23, "grad_norm": 1.2816893239440885, "learning_rate": 8.963501918442359e-06, "loss": 0.4831, "step": 1873 }, { "epoch": 0.23, "grad_norm": 1.3104746693874945, "learning_rate": 8.962276450656471e-06, "loss": 0.4523, "step": 1874 }, { "epoch": 0.23, "grad_norm": 1.4357443614868366, "learning_rate": 8.961050342729927e-06, "loss": 0.4957, "step": 1875 }, { "epoch": 0.23, "grad_norm": 1.568259871694868, "learning_rate": 8.959823594860813e-06, "loss": 0.4929, "step": 1876 }, { "epoch": 0.23, "grad_norm": 1.372551882835372, "learning_rate": 8.958596207247322e-06, "loss": 0.585, "step": 1877 }, { "epoch": 0.23, "grad_norm": 1.3909216826293513, "learning_rate": 8.95736818008775e-06, "loss": 0.4942, "step": 1878 }, { "epoch": 0.23, "grad_norm": 1.6154288710912335, "learning_rate": 8.956139513580495e-06, "loss": 0.4675, "step": 1879 }, { "epoch": 0.23, "grad_norm": 1.4455950323321096, "learning_rate": 8.95491020792406e-06, "loss": 0.5411, "step": 1880 }, { "epoch": 0.23, "grad_norm": 1.259868701492697, "learning_rate": 8.953680263317048e-06, "loss": 0.4533, "step": 1881 }, { "epoch": 0.23, "grad_norm": 3.522655048402381, "learning_rate": 8.952449679958168e-06, "loss": 0.5309, "step": 1882 }, { "epoch": 0.23, "grad_norm": 1.6655362560273492, "learning_rate": 8.951218458046233e-06, "loss": 0.5407, "step": 1883 }, { "epoch": 0.23, "grad_norm": 1.3048223977489455, "learning_rate": 8.949986597780157e-06, "loss": 0.5039, "step": 1884 }, { "epoch": 0.23, "grad_norm": 1.5562776861902379, "learning_rate": 8.94875409935896e-06, "loss": 0.5763, "step": 1885 }, { "epoch": 0.23, "grad_norm": 1.2745792479455942, "learning_rate": 8.947520962981758e-06, "loss": 0.4908, "step": 1886 }, { "epoch": 0.23, "grad_norm": 1.6672757070096937, "learning_rate": 8.946287188847778e-06, "loss": 0.5517, "step": 1887 }, { "epoch": 0.23, "grad_norm": 1.3056030322769647, "learning_rate": 8.945052777156346e-06, "loss": 0.5022, "step": 1888 }, { "epoch": 0.23, "grad_norm": 1.8962485527766033, "learning_rate": 8.943817728106894e-06, "loss": 0.506, "step": 1889 }, { "epoch": 0.23, "grad_norm": 1.4549494573125639, "learning_rate": 8.942582041898954e-06, "loss": 0.4836, "step": 1890 }, { "epoch": 0.23, "grad_norm": 1.7727010742859703, "learning_rate": 8.941345718732162e-06, "loss": 0.5042, "step": 1891 }, { "epoch": 0.23, "grad_norm": 2.383471940931024, "learning_rate": 8.940108758806258e-06, "loss": 0.5198, "step": 1892 }, { "epoch": 0.23, "grad_norm": 1.6303903494517051, "learning_rate": 8.938871162321082e-06, "loss": 0.5107, "step": 1893 }, { "epoch": 0.24, "grad_norm": 1.783742959636969, "learning_rate": 8.937632929476578e-06, "loss": 0.5629, "step": 1894 }, { "epoch": 0.24, "grad_norm": 1.4634858088918852, "learning_rate": 8.936394060472796e-06, "loss": 0.4866, "step": 1895 }, { "epoch": 0.24, "grad_norm": 1.339140151238824, "learning_rate": 8.935154555509883e-06, "loss": 0.5651, "step": 1896 }, { "epoch": 0.24, "grad_norm": 1.4343314700039496, "learning_rate": 8.933914414788095e-06, "loss": 0.5304, "step": 1897 }, { "epoch": 0.24, "grad_norm": 1.447195399814874, "learning_rate": 8.932673638507787e-06, "loss": 0.5352, "step": 1898 }, { "epoch": 0.24, "grad_norm": 1.5450789392126534, "learning_rate": 8.931432226869416e-06, "loss": 0.4946, "step": 1899 }, { "epoch": 0.24, "grad_norm": 2.1174940010910475, "learning_rate": 8.930190180073544e-06, "loss": 0.5236, "step": 1900 }, { "epoch": 0.24, "grad_norm": 1.6545234531448152, "learning_rate": 8.928947498320835e-06, "loss": 0.5477, "step": 1901 }, { "epoch": 0.24, "grad_norm": 1.3292775696499728, "learning_rate": 8.927704181812053e-06, "loss": 0.4817, "step": 1902 }, { "epoch": 0.24, "grad_norm": 1.5071985105494015, "learning_rate": 8.92646023074807e-06, "loss": 0.5697, "step": 1903 }, { "epoch": 0.24, "grad_norm": 1.9818677569842058, "learning_rate": 8.925215645329854e-06, "loss": 0.4559, "step": 1904 }, { "epoch": 0.24, "grad_norm": 1.580179304566661, "learning_rate": 8.923970425758481e-06, "loss": 0.5159, "step": 1905 }, { "epoch": 0.24, "grad_norm": 2.458276371266938, "learning_rate": 8.922724572235128e-06, "loss": 0.5959, "step": 1906 }, { "epoch": 0.24, "grad_norm": 1.7012565794755887, "learning_rate": 8.921478084961071e-06, "loss": 0.5144, "step": 1907 }, { "epoch": 0.24, "grad_norm": 1.4290181998565596, "learning_rate": 8.92023096413769e-06, "loss": 0.5189, "step": 1908 }, { "epoch": 0.24, "grad_norm": 1.4312359811126811, "learning_rate": 8.918983209966475e-06, "loss": 0.5151, "step": 1909 }, { "epoch": 0.24, "grad_norm": 7.336001691916545, "learning_rate": 8.917734822649002e-06, "loss": 0.4681, "step": 1910 }, { "epoch": 0.24, "grad_norm": 1.9200723965237108, "learning_rate": 8.916485802386968e-06, "loss": 0.5125, "step": 1911 }, { "epoch": 0.24, "grad_norm": 1.5797716719857873, "learning_rate": 8.915236149382155e-06, "loss": 0.5416, "step": 1912 }, { "epoch": 0.24, "grad_norm": 1.783287209569103, "learning_rate": 8.913985863836465e-06, "loss": 0.5443, "step": 1913 }, { "epoch": 0.24, "grad_norm": 1.8272401889966197, "learning_rate": 8.912734945951884e-06, "loss": 0.5295, "step": 1914 }, { "epoch": 0.24, "grad_norm": 1.6138889353965085, "learning_rate": 8.911483395930514e-06, "loss": 0.5037, "step": 1915 }, { "epoch": 0.24, "grad_norm": 1.495452942442373, "learning_rate": 8.910231213974549e-06, "loss": 0.5173, "step": 1916 }, { "epoch": 0.24, "grad_norm": 2.146077073992904, "learning_rate": 8.908978400286297e-06, "loss": 0.5296, "step": 1917 }, { "epoch": 0.24, "grad_norm": 1.7738406140487761, "learning_rate": 8.907724955068156e-06, "loss": 0.5089, "step": 1918 }, { "epoch": 0.24, "grad_norm": 1.4445001960204054, "learning_rate": 8.90647087852263e-06, "loss": 0.4729, "step": 1919 }, { "epoch": 0.24, "grad_norm": 3.064579524062797, "learning_rate": 8.905216170852332e-06, "loss": 0.4936, "step": 1920 }, { "epoch": 0.24, "grad_norm": 1.5744305050856235, "learning_rate": 8.903960832259966e-06, "loss": 0.4603, "step": 1921 }, { "epoch": 0.24, "grad_norm": 1.6956951304877073, "learning_rate": 8.902704862948344e-06, "loss": 0.496, "step": 1922 }, { "epoch": 0.24, "grad_norm": 2.1043226523378875, "learning_rate": 8.901448263120379e-06, "loss": 0.5216, "step": 1923 }, { "epoch": 0.24, "grad_norm": 1.2288602774625417, "learning_rate": 8.900191032979088e-06, "loss": 0.4907, "step": 1924 }, { "epoch": 0.24, "grad_norm": 1.6974651681887711, "learning_rate": 8.898933172727584e-06, "loss": 0.5437, "step": 1925 }, { "epoch": 0.24, "grad_norm": 2.1277108296392484, "learning_rate": 8.897674682569088e-06, "loss": 0.4111, "step": 1926 }, { "epoch": 0.24, "grad_norm": 1.3434009357471774, "learning_rate": 8.896415562706919e-06, "loss": 0.5192, "step": 1927 }, { "epoch": 0.24, "grad_norm": 1.5168120489469041, "learning_rate": 8.8951558133445e-06, "loss": 0.5363, "step": 1928 }, { "epoch": 0.24, "grad_norm": 1.372964230231597, "learning_rate": 8.893895434685353e-06, "loss": 0.5477, "step": 1929 }, { "epoch": 0.24, "grad_norm": 1.3341145832969576, "learning_rate": 8.892634426933106e-06, "loss": 0.5334, "step": 1930 }, { "epoch": 0.24, "grad_norm": 1.381671572338022, "learning_rate": 8.891372790291482e-06, "loss": 0.4387, "step": 1931 }, { "epoch": 0.24, "grad_norm": 1.3889571726074057, "learning_rate": 8.890110524964313e-06, "loss": 0.5352, "step": 1932 }, { "epoch": 0.24, "grad_norm": 1.6346808498868517, "learning_rate": 8.888847631155525e-06, "loss": 0.5435, "step": 1933 }, { "epoch": 0.24, "grad_norm": 3.389766354005252, "learning_rate": 8.887584109069157e-06, "loss": 0.4923, "step": 1934 }, { "epoch": 0.24, "grad_norm": 1.506276037797442, "learning_rate": 8.886319958909334e-06, "loss": 0.5358, "step": 1935 }, { "epoch": 0.24, "grad_norm": 1.8617829100438767, "learning_rate": 8.885055180880294e-06, "loss": 0.5455, "step": 1936 }, { "epoch": 0.24, "grad_norm": 1.4403503924568222, "learning_rate": 8.883789775186374e-06, "loss": 0.4609, "step": 1937 }, { "epoch": 0.24, "grad_norm": 1.5025770716659845, "learning_rate": 8.88252374203201e-06, "loss": 0.5037, "step": 1938 }, { "epoch": 0.24, "grad_norm": 1.713090061036998, "learning_rate": 8.881257081621741e-06, "loss": 0.4901, "step": 1939 }, { "epoch": 0.24, "grad_norm": 0.7078369186879294, "learning_rate": 8.879989794160208e-06, "loss": 0.4839, "step": 1940 }, { "epoch": 0.24, "grad_norm": 1.5524521922610566, "learning_rate": 8.878721879852153e-06, "loss": 0.5419, "step": 1941 }, { "epoch": 0.24, "grad_norm": 1.7800261176459817, "learning_rate": 8.877453338902415e-06, "loss": 0.5274, "step": 1942 }, { "epoch": 0.24, "grad_norm": 1.438442558784284, "learning_rate": 8.876184171515943e-06, "loss": 0.5588, "step": 1943 }, { "epoch": 0.24, "grad_norm": 1.579247057716277, "learning_rate": 8.874914377897778e-06, "loss": 0.4872, "step": 1944 }, { "epoch": 0.24, "grad_norm": 2.7752495014824574, "learning_rate": 8.87364395825307e-06, "loss": 0.5349, "step": 1945 }, { "epoch": 0.24, "grad_norm": 1.5952965241983228, "learning_rate": 8.872372912787061e-06, "loss": 0.5209, "step": 1946 }, { "epoch": 0.24, "grad_norm": 1.4540745806687716, "learning_rate": 8.871101241705105e-06, "loss": 0.5252, "step": 1947 }, { "epoch": 0.24, "grad_norm": 1.5604738622505752, "learning_rate": 8.86982894521265e-06, "loss": 0.4982, "step": 1948 }, { "epoch": 0.24, "grad_norm": 1.7187200851975826, "learning_rate": 8.868556023515247e-06, "loss": 0.5191, "step": 1949 }, { "epoch": 0.24, "grad_norm": 1.3690982491354926, "learning_rate": 8.867282476818546e-06, "loss": 0.5194, "step": 1950 }, { "epoch": 0.24, "grad_norm": 1.872490573096125, "learning_rate": 8.866008305328303e-06, "loss": 0.4693, "step": 1951 }, { "epoch": 0.24, "grad_norm": 1.653900492109943, "learning_rate": 8.864733509250367e-06, "loss": 0.4991, "step": 1952 }, { "epoch": 0.24, "grad_norm": 1.8198036332691097, "learning_rate": 8.863458088790695e-06, "loss": 0.5657, "step": 1953 }, { "epoch": 0.24, "grad_norm": 0.7071267036238204, "learning_rate": 8.862182044155345e-06, "loss": 0.5261, "step": 1954 }, { "epoch": 0.24, "grad_norm": 1.4544658968334625, "learning_rate": 8.860905375550469e-06, "loss": 0.5482, "step": 1955 }, { "epoch": 0.24, "grad_norm": 1.5844352202023428, "learning_rate": 8.859628083182326e-06, "loss": 0.5331, "step": 1956 }, { "epoch": 0.24, "grad_norm": 1.8270985290546589, "learning_rate": 8.858350167257275e-06, "loss": 0.5229, "step": 1957 }, { "epoch": 0.24, "grad_norm": 2.2358524365219887, "learning_rate": 8.85707162798177e-06, "loss": 0.5315, "step": 1958 }, { "epoch": 0.24, "grad_norm": 1.6698059723203507, "learning_rate": 8.855792465562377e-06, "loss": 0.5325, "step": 1959 }, { "epoch": 0.24, "grad_norm": 0.7400217029843602, "learning_rate": 8.854512680205748e-06, "loss": 0.5117, "step": 1960 }, { "epoch": 0.24, "grad_norm": 1.4293314877485304, "learning_rate": 8.853232272118653e-06, "loss": 0.5194, "step": 1961 }, { "epoch": 0.24, "grad_norm": 1.3617601801455612, "learning_rate": 8.851951241507945e-06, "loss": 0.48, "step": 1962 }, { "epoch": 0.24, "grad_norm": 2.5593848472649214, "learning_rate": 8.850669588580591e-06, "loss": 0.5879, "step": 1963 }, { "epoch": 0.24, "grad_norm": 1.329342422082542, "learning_rate": 8.84938731354365e-06, "loss": 0.5234, "step": 1964 }, { "epoch": 0.24, "grad_norm": 1.6158031021826076, "learning_rate": 8.848104416604287e-06, "loss": 0.5032, "step": 1965 }, { "epoch": 0.24, "grad_norm": 1.4835217258908457, "learning_rate": 8.846820897969763e-06, "loss": 0.53, "step": 1966 }, { "epoch": 0.24, "grad_norm": 0.6760264757911146, "learning_rate": 8.845536757847444e-06, "loss": 0.502, "step": 1967 }, { "epoch": 0.24, "grad_norm": 1.8477646844223596, "learning_rate": 8.844251996444792e-06, "loss": 0.5484, "step": 1968 }, { "epoch": 0.24, "grad_norm": 1.7376392792536866, "learning_rate": 8.842966613969376e-06, "loss": 0.5252, "step": 1969 }, { "epoch": 0.24, "grad_norm": 2.165308432040499, "learning_rate": 8.841680610628853e-06, "loss": 0.4646, "step": 1970 }, { "epoch": 0.24, "grad_norm": 1.4974071983421144, "learning_rate": 8.840393986630996e-06, "loss": 0.4945, "step": 1971 }, { "epoch": 0.24, "grad_norm": 1.845611195832243, "learning_rate": 8.839106742183668e-06, "loss": 0.5535, "step": 1972 }, { "epoch": 0.24, "grad_norm": 1.5394864096122054, "learning_rate": 8.837818877494833e-06, "loss": 0.5139, "step": 1973 }, { "epoch": 0.24, "grad_norm": 1.4206296295423873, "learning_rate": 8.836530392772555e-06, "loss": 0.5274, "step": 1974 }, { "epoch": 0.25, "grad_norm": 0.6776243792818417, "learning_rate": 8.835241288225007e-06, "loss": 0.5352, "step": 1975 }, { "epoch": 0.25, "grad_norm": 1.3565471277974326, "learning_rate": 8.83395156406045e-06, "loss": 0.5327, "step": 1976 }, { "epoch": 0.25, "grad_norm": 1.4313562691928976, "learning_rate": 8.832661220487251e-06, "loss": 0.5132, "step": 1977 }, { "epoch": 0.25, "grad_norm": 1.7967161826065228, "learning_rate": 8.831370257713877e-06, "loss": 0.4947, "step": 1978 }, { "epoch": 0.25, "grad_norm": 1.467448215573434, "learning_rate": 8.830078675948894e-06, "loss": 0.5552, "step": 1979 }, { "epoch": 0.25, "grad_norm": 1.5835629523108656, "learning_rate": 8.828786475400968e-06, "loss": 0.5141, "step": 1980 }, { "epoch": 0.25, "grad_norm": 2.0099353012114616, "learning_rate": 8.827493656278867e-06, "loss": 0.4699, "step": 1981 }, { "epoch": 0.25, "grad_norm": 1.5473553695010045, "learning_rate": 8.826200218791455e-06, "loss": 0.5123, "step": 1982 }, { "epoch": 0.25, "grad_norm": 1.5795956022466797, "learning_rate": 8.8249061631477e-06, "loss": 0.522, "step": 1983 }, { "epoch": 0.25, "grad_norm": 2.222064880909354, "learning_rate": 8.823611489556668e-06, "loss": 0.5207, "step": 1984 }, { "epoch": 0.25, "grad_norm": 1.7684840911887174, "learning_rate": 8.822316198227525e-06, "loss": 0.5149, "step": 1985 }, { "epoch": 0.25, "grad_norm": 1.476508776650612, "learning_rate": 8.821020289369535e-06, "loss": 0.5619, "step": 1986 }, { "epoch": 0.25, "grad_norm": 1.3676529888008777, "learning_rate": 8.819723763192065e-06, "loss": 0.4907, "step": 1987 }, { "epoch": 0.25, "grad_norm": 2.8100143598963343, "learning_rate": 8.81842661990458e-06, "loss": 0.4757, "step": 1988 }, { "epoch": 0.25, "grad_norm": 0.6704306315147124, "learning_rate": 8.817128859716646e-06, "loss": 0.5198, "step": 1989 }, { "epoch": 0.25, "grad_norm": 1.6174741992271353, "learning_rate": 8.815830482837925e-06, "loss": 0.4942, "step": 1990 }, { "epoch": 0.25, "grad_norm": 2.1235866635095344, "learning_rate": 8.814531489478183e-06, "loss": 0.5388, "step": 1991 }, { "epoch": 0.25, "grad_norm": 1.749572638929686, "learning_rate": 8.813231879847284e-06, "loss": 0.4947, "step": 1992 }, { "epoch": 0.25, "grad_norm": 1.8765404375963373, "learning_rate": 8.811931654155191e-06, "loss": 0.4857, "step": 1993 }, { "epoch": 0.25, "grad_norm": 0.7127206859801521, "learning_rate": 8.810630812611965e-06, "loss": 0.5374, "step": 1994 }, { "epoch": 0.25, "grad_norm": 1.3765373181685499, "learning_rate": 8.80932935542777e-06, "loss": 0.5574, "step": 1995 }, { "epoch": 0.25, "grad_norm": 1.4220313237505227, "learning_rate": 8.808027282812871e-06, "loss": 0.551, "step": 1996 }, { "epoch": 0.25, "grad_norm": 1.390487340074321, "learning_rate": 8.806724594977625e-06, "loss": 0.5951, "step": 1997 }, { "epoch": 0.25, "grad_norm": 1.6304586844315077, "learning_rate": 8.805421292132495e-06, "loss": 0.4819, "step": 1998 }, { "epoch": 0.25, "grad_norm": 1.548784626397272, "learning_rate": 8.804117374488037e-06, "loss": 0.4743, "step": 1999 }, { "epoch": 0.25, "grad_norm": 2.7079415571467975, "learning_rate": 8.802812842254917e-06, "loss": 0.5411, "step": 2000 }, { "epoch": 0.25, "grad_norm": 1.7494791245756824, "learning_rate": 8.801507695643886e-06, "loss": 0.552, "step": 2001 }, { "epoch": 0.25, "grad_norm": 1.5625148747215771, "learning_rate": 8.80020193486581e-06, "loss": 0.5551, "step": 2002 }, { "epoch": 0.25, "grad_norm": 1.5943239427434999, "learning_rate": 8.798895560131642e-06, "loss": 0.46, "step": 2003 }, { "epoch": 0.25, "grad_norm": 1.8847274473426747, "learning_rate": 8.797588571652439e-06, "loss": 0.5404, "step": 2004 }, { "epoch": 0.25, "grad_norm": 1.7756131902868117, "learning_rate": 8.796280969639353e-06, "loss": 0.4842, "step": 2005 }, { "epoch": 0.25, "grad_norm": 1.852938704264008, "learning_rate": 8.794972754303644e-06, "loss": 0.5657, "step": 2006 }, { "epoch": 0.25, "grad_norm": 4.412632757606416, "learning_rate": 8.793663925856662e-06, "loss": 0.5468, "step": 2007 }, { "epoch": 0.25, "grad_norm": 1.6268459566919793, "learning_rate": 8.792354484509863e-06, "loss": 0.5665, "step": 2008 }, { "epoch": 0.25, "grad_norm": 1.318500406755192, "learning_rate": 8.791044430474795e-06, "loss": 0.5019, "step": 2009 }, { "epoch": 0.25, "grad_norm": 1.4252737383395815, "learning_rate": 8.789733763963112e-06, "loss": 0.5352, "step": 2010 }, { "epoch": 0.25, "grad_norm": 0.7028911675339917, "learning_rate": 8.788422485186561e-06, "loss": 0.4947, "step": 2011 }, { "epoch": 0.25, "grad_norm": 1.295162872112619, "learning_rate": 8.787110594356993e-06, "loss": 0.5005, "step": 2012 }, { "epoch": 0.25, "grad_norm": 1.2920138918598154, "learning_rate": 8.785798091686356e-06, "loss": 0.4892, "step": 2013 }, { "epoch": 0.25, "grad_norm": 1.5507368136823545, "learning_rate": 8.784484977386691e-06, "loss": 0.5525, "step": 2014 }, { "epoch": 0.25, "grad_norm": 1.4858272518440248, "learning_rate": 8.783171251670151e-06, "loss": 0.5463, "step": 2015 }, { "epoch": 0.25, "grad_norm": 0.6740133113942713, "learning_rate": 8.781856914748974e-06, "loss": 0.5366, "step": 2016 }, { "epoch": 0.25, "grad_norm": 1.6130432936138699, "learning_rate": 8.780541966835506e-06, "loss": 0.4703, "step": 2017 }, { "epoch": 0.25, "grad_norm": 1.4865849447276784, "learning_rate": 8.779226408142187e-06, "loss": 0.5175, "step": 2018 }, { "epoch": 0.25, "grad_norm": 2.477496458338817, "learning_rate": 8.777910238881557e-06, "loss": 0.5737, "step": 2019 }, { "epoch": 0.25, "grad_norm": 1.590972661806151, "learning_rate": 8.776593459266256e-06, "loss": 0.5884, "step": 2020 }, { "epoch": 0.25, "grad_norm": 1.5083726906854342, "learning_rate": 8.77527606950902e-06, "loss": 0.5236, "step": 2021 }, { "epoch": 0.25, "grad_norm": 1.6928660682592702, "learning_rate": 8.773958069822684e-06, "loss": 0.5336, "step": 2022 }, { "epoch": 0.25, "grad_norm": 1.8285529561145588, "learning_rate": 8.772639460420184e-06, "loss": 0.5104, "step": 2023 }, { "epoch": 0.25, "grad_norm": 1.4625362649817097, "learning_rate": 8.771320241514553e-06, "loss": 0.5071, "step": 2024 }, { "epoch": 0.25, "grad_norm": 1.209422273835458, "learning_rate": 8.770000413318923e-06, "loss": 0.5072, "step": 2025 }, { "epoch": 0.25, "grad_norm": 1.3313275590933291, "learning_rate": 8.768679976046523e-06, "loss": 0.4998, "step": 2026 }, { "epoch": 0.25, "grad_norm": 1.2482005079526335, "learning_rate": 8.767358929910681e-06, "loss": 0.469, "step": 2027 }, { "epoch": 0.25, "grad_norm": 1.4071157891730426, "learning_rate": 8.766037275124824e-06, "loss": 0.5088, "step": 2028 }, { "epoch": 0.25, "grad_norm": 1.6073781895786337, "learning_rate": 8.764715011902477e-06, "loss": 0.5158, "step": 2029 }, { "epoch": 0.25, "grad_norm": 2.084224486603882, "learning_rate": 8.763392140457261e-06, "loss": 0.526, "step": 2030 }, { "epoch": 0.25, "grad_norm": 1.7510253233417739, "learning_rate": 8.762068661002902e-06, "loss": 0.4855, "step": 2031 }, { "epoch": 0.25, "grad_norm": 2.3038810606284432, "learning_rate": 8.760744573753218e-06, "loss": 0.5514, "step": 2032 }, { "epoch": 0.25, "grad_norm": 1.6022421653238001, "learning_rate": 8.759419878922125e-06, "loss": 0.5673, "step": 2033 }, { "epoch": 0.25, "grad_norm": 1.20301441399954, "learning_rate": 8.758094576723641e-06, "loss": 0.539, "step": 2034 }, { "epoch": 0.25, "grad_norm": 1.6577453684894792, "learning_rate": 8.75676866737188e-06, "loss": 0.5521, "step": 2035 }, { "epoch": 0.25, "grad_norm": 0.6839437161456582, "learning_rate": 8.755442151081054e-06, "loss": 0.4958, "step": 2036 }, { "epoch": 0.25, "grad_norm": 1.5834842804236429, "learning_rate": 8.754115028065474e-06, "loss": 0.4872, "step": 2037 }, { "epoch": 0.25, "grad_norm": 2.2421241088167467, "learning_rate": 8.752787298539547e-06, "loss": 0.5161, "step": 2038 }, { "epoch": 0.25, "grad_norm": 2.161941749490066, "learning_rate": 8.75145896271778e-06, "loss": 0.4987, "step": 2039 }, { "epoch": 0.25, "grad_norm": 1.4927151108479508, "learning_rate": 8.750130020814779e-06, "loss": 0.4894, "step": 2040 }, { "epoch": 0.25, "grad_norm": 1.502074237777443, "learning_rate": 8.748800473045245e-06, "loss": 0.5471, "step": 2041 }, { "epoch": 0.25, "grad_norm": 1.5446193105812245, "learning_rate": 8.747470319623976e-06, "loss": 0.5269, "step": 2042 }, { "epoch": 0.25, "grad_norm": 1.8144058798330323, "learning_rate": 8.746139560765873e-06, "loss": 0.5298, "step": 2043 }, { "epoch": 0.25, "grad_norm": 1.3392301561466349, "learning_rate": 8.744808196685933e-06, "loss": 0.5376, "step": 2044 }, { "epoch": 0.25, "grad_norm": 1.3212574679321898, "learning_rate": 8.743476227599245e-06, "loss": 0.5288, "step": 2045 }, { "epoch": 0.25, "grad_norm": 1.953631424817136, "learning_rate": 8.742143653721004e-06, "loss": 0.5045, "step": 2046 }, { "epoch": 0.25, "grad_norm": 1.3254459527990614, "learning_rate": 8.740810475266497e-06, "loss": 0.5295, "step": 2047 }, { "epoch": 0.25, "grad_norm": 1.2471258040754365, "learning_rate": 8.739476692451112e-06, "loss": 0.5237, "step": 2048 }, { "epoch": 0.25, "grad_norm": 2.2406500377543024, "learning_rate": 8.738142305490335e-06, "loss": 0.5653, "step": 2049 }, { "epoch": 0.25, "grad_norm": 1.3120908521833168, "learning_rate": 8.736807314599744e-06, "loss": 0.4988, "step": 2050 }, { "epoch": 0.25, "grad_norm": 1.56240581885028, "learning_rate": 8.73547171999502e-06, "loss": 0.5052, "step": 2051 }, { "epoch": 0.25, "grad_norm": 1.3269004649671916, "learning_rate": 8.734135521891941e-06, "loss": 0.5342, "step": 2052 }, { "epoch": 0.25, "grad_norm": 1.472749561684422, "learning_rate": 8.732798720506381e-06, "loss": 0.4932, "step": 2053 }, { "epoch": 0.25, "grad_norm": 1.5456774928408388, "learning_rate": 8.731461316054313e-06, "loss": 0.5342, "step": 2054 }, { "epoch": 0.26, "grad_norm": 1.2554204562914544, "learning_rate": 8.730123308751806e-06, "loss": 0.4592, "step": 2055 }, { "epoch": 0.26, "grad_norm": 1.3112293500770817, "learning_rate": 8.728784698815026e-06, "loss": 0.5272, "step": 2056 }, { "epoch": 0.26, "grad_norm": 1.1667461673415906, "learning_rate": 8.727445486460236e-06, "loss": 0.4595, "step": 2057 }, { "epoch": 0.26, "grad_norm": 3.6367370511644874, "learning_rate": 8.7261056719038e-06, "loss": 0.5088, "step": 2058 }, { "epoch": 0.26, "grad_norm": 1.50527731481556, "learning_rate": 8.724765255362176e-06, "loss": 0.5197, "step": 2059 }, { "epoch": 0.26, "grad_norm": 1.5322878593937856, "learning_rate": 8.72342423705192e-06, "loss": 0.5218, "step": 2060 }, { "epoch": 0.26, "grad_norm": 1.4793378737196778, "learning_rate": 8.722082617189688e-06, "loss": 0.5401, "step": 2061 }, { "epoch": 0.26, "grad_norm": 1.4610037837615666, "learning_rate": 8.720740395992225e-06, "loss": 0.5336, "step": 2062 }, { "epoch": 0.26, "grad_norm": 1.5622701747115364, "learning_rate": 8.719397573676384e-06, "loss": 0.5043, "step": 2063 }, { "epoch": 0.26, "grad_norm": 1.4367898452648369, "learning_rate": 8.718054150459106e-06, "loss": 0.5192, "step": 2064 }, { "epoch": 0.26, "grad_norm": 1.5175871163956165, "learning_rate": 8.716710126557435e-06, "loss": 0.5194, "step": 2065 }, { "epoch": 0.26, "grad_norm": 1.3458435739714716, "learning_rate": 8.715365502188508e-06, "loss": 0.5238, "step": 2066 }, { "epoch": 0.26, "grad_norm": 1.7803905820852695, "learning_rate": 8.714020277569564e-06, "loss": 0.5646, "step": 2067 }, { "epoch": 0.26, "grad_norm": 1.7385985543427787, "learning_rate": 8.712674452917934e-06, "loss": 0.5495, "step": 2068 }, { "epoch": 0.26, "grad_norm": 1.4759265210333785, "learning_rate": 8.711328028451045e-06, "loss": 0.4545, "step": 2069 }, { "epoch": 0.26, "grad_norm": 1.3288442314806546, "learning_rate": 8.709981004386429e-06, "loss": 0.5034, "step": 2070 }, { "epoch": 0.26, "grad_norm": 1.4758448460788367, "learning_rate": 8.708633380941706e-06, "loss": 0.5045, "step": 2071 }, { "epoch": 0.26, "grad_norm": 1.3920330580189069, "learning_rate": 8.707285158334598e-06, "loss": 0.4962, "step": 2072 }, { "epoch": 0.26, "grad_norm": 1.2930994944296241, "learning_rate": 8.705936336782921e-06, "loss": 0.5308, "step": 2073 }, { "epoch": 0.26, "grad_norm": 1.8773196012080102, "learning_rate": 8.704586916504592e-06, "loss": 0.4816, "step": 2074 }, { "epoch": 0.26, "grad_norm": 1.3165810492564216, "learning_rate": 8.703236897717617e-06, "loss": 0.4903, "step": 2075 }, { "epoch": 0.26, "grad_norm": 1.5163855156336135, "learning_rate": 8.701886280640109e-06, "loss": 0.4882, "step": 2076 }, { "epoch": 0.26, "grad_norm": 1.6838193810705397, "learning_rate": 8.700535065490266e-06, "loss": 0.5349, "step": 2077 }, { "epoch": 0.26, "grad_norm": 1.3303115176381106, "learning_rate": 8.699183252486395e-06, "loss": 0.5408, "step": 2078 }, { "epoch": 0.26, "grad_norm": 1.3360266668930008, "learning_rate": 8.697830841846887e-06, "loss": 0.525, "step": 2079 }, { "epoch": 0.26, "grad_norm": 1.439059769559954, "learning_rate": 8.69647783379024e-06, "loss": 0.5354, "step": 2080 }, { "epoch": 0.26, "grad_norm": 1.3942462512315814, "learning_rate": 8.695124228535044e-06, "loss": 0.5391, "step": 2081 }, { "epoch": 0.26, "grad_norm": 0.626043540508294, "learning_rate": 8.693770026299984e-06, "loss": 0.475, "step": 2082 }, { "epoch": 0.26, "grad_norm": 1.3662165330675327, "learning_rate": 8.692415227303844e-06, "loss": 0.5206, "step": 2083 }, { "epoch": 0.26, "grad_norm": 1.4217799982549042, "learning_rate": 8.691059831765505e-06, "loss": 0.5648, "step": 2084 }, { "epoch": 0.26, "grad_norm": 1.7365946002808297, "learning_rate": 8.689703839903943e-06, "loss": 0.5146, "step": 2085 }, { "epoch": 0.26, "grad_norm": 1.6943207518116408, "learning_rate": 8.688347251938229e-06, "loss": 0.5379, "step": 2086 }, { "epoch": 0.26, "grad_norm": 1.5989235306650624, "learning_rate": 8.686990068087532e-06, "loss": 0.5212, "step": 2087 }, { "epoch": 0.26, "grad_norm": 1.5245090919991986, "learning_rate": 8.685632288571118e-06, "loss": 0.5261, "step": 2088 }, { "epoch": 0.26, "grad_norm": 1.3285545188358303, "learning_rate": 8.684273913608346e-06, "loss": 0.4977, "step": 2089 }, { "epoch": 0.26, "grad_norm": 2.488096499314304, "learning_rate": 8.682914943418677e-06, "loss": 0.5072, "step": 2090 }, { "epoch": 0.26, "grad_norm": 1.4838765109056207, "learning_rate": 8.681555378221661e-06, "loss": 0.5111, "step": 2091 }, { "epoch": 0.26, "grad_norm": 1.6550785967751374, "learning_rate": 8.680195218236951e-06, "loss": 0.4896, "step": 2092 }, { "epoch": 0.26, "grad_norm": 1.4018756217200874, "learning_rate": 8.67883446368429e-06, "loss": 0.547, "step": 2093 }, { "epoch": 0.26, "grad_norm": 1.3465600335196564, "learning_rate": 8.677473114783524e-06, "loss": 0.5501, "step": 2094 }, { "epoch": 0.26, "grad_norm": 1.296107978657512, "learning_rate": 8.676111171754585e-06, "loss": 0.5285, "step": 2095 }, { "epoch": 0.26, "grad_norm": 1.3967902124449163, "learning_rate": 8.67474863481751e-06, "loss": 0.5699, "step": 2096 }, { "epoch": 0.26, "grad_norm": 1.5257350142040416, "learning_rate": 8.673385504192428e-06, "loss": 0.5171, "step": 2097 }, { "epoch": 0.26, "grad_norm": 1.3449457010675756, "learning_rate": 8.672021780099569e-06, "loss": 0.524, "step": 2098 }, { "epoch": 0.26, "grad_norm": 1.519683542426732, "learning_rate": 8.670657462759248e-06, "loss": 0.5065, "step": 2099 }, { "epoch": 0.26, "grad_norm": 1.4787471182390501, "learning_rate": 8.669292552391888e-06, "loss": 0.5024, "step": 2100 }, { "epoch": 0.26, "grad_norm": 1.6463547942760857, "learning_rate": 8.667927049217997e-06, "loss": 0.5639, "step": 2101 }, { "epoch": 0.26, "grad_norm": 1.8358536422213843, "learning_rate": 8.66656095345819e-06, "loss": 0.4962, "step": 2102 }, { "epoch": 0.26, "grad_norm": 1.7067416368535604, "learning_rate": 8.665194265333167e-06, "loss": 0.5317, "step": 2103 }, { "epoch": 0.26, "grad_norm": 1.3435245189908687, "learning_rate": 8.66382698506373e-06, "loss": 0.5432, "step": 2104 }, { "epoch": 0.26, "grad_norm": 1.5260755140956461, "learning_rate": 8.662459112870777e-06, "loss": 0.4877, "step": 2105 }, { "epoch": 0.26, "grad_norm": 1.8155532123302065, "learning_rate": 8.661090648975297e-06, "loss": 0.5655, "step": 2106 }, { "epoch": 0.26, "grad_norm": 1.4194970138329932, "learning_rate": 8.659721593598379e-06, "loss": 0.5591, "step": 2107 }, { "epoch": 0.26, "grad_norm": 1.2344258030946587, "learning_rate": 8.658351946961206e-06, "loss": 0.4829, "step": 2108 }, { "epoch": 0.26, "grad_norm": 1.5723751549266327, "learning_rate": 8.656981709285054e-06, "loss": 0.5254, "step": 2109 }, { "epoch": 0.26, "grad_norm": 6.204612264343951, "learning_rate": 8.655610880791303e-06, "loss": 0.4789, "step": 2110 }, { "epoch": 0.26, "grad_norm": 1.3796945954800486, "learning_rate": 8.654239461701414e-06, "loss": 0.587, "step": 2111 }, { "epoch": 0.26, "grad_norm": 1.526936464451939, "learning_rate": 8.65286745223696e-06, "loss": 0.5601, "step": 2112 }, { "epoch": 0.26, "grad_norm": 1.5491201410612896, "learning_rate": 8.651494852619596e-06, "loss": 0.5303, "step": 2113 }, { "epoch": 0.26, "grad_norm": 1.3635548901742522, "learning_rate": 8.650121663071079e-06, "loss": 0.4809, "step": 2114 }, { "epoch": 0.26, "grad_norm": 0.6497764251763823, "learning_rate": 8.64874788381326e-06, "loss": 0.4995, "step": 2115 }, { "epoch": 0.26, "grad_norm": 1.4728552317424948, "learning_rate": 8.647373515068085e-06, "loss": 0.4796, "step": 2116 }, { "epoch": 0.26, "grad_norm": 1.3439837298838473, "learning_rate": 8.645998557057595e-06, "loss": 0.5647, "step": 2117 }, { "epoch": 0.26, "grad_norm": 0.6876273724296037, "learning_rate": 8.644623010003928e-06, "loss": 0.5131, "step": 2118 }, { "epoch": 0.26, "grad_norm": 1.5868033275463422, "learning_rate": 8.643246874129316e-06, "loss": 0.5644, "step": 2119 }, { "epoch": 0.26, "grad_norm": 1.511917733457017, "learning_rate": 8.641870149656082e-06, "loss": 0.5396, "step": 2120 }, { "epoch": 0.26, "grad_norm": 1.3439071166591285, "learning_rate": 8.64049283680665e-06, "loss": 0.5074, "step": 2121 }, { "epoch": 0.26, "grad_norm": 1.2768752235566887, "learning_rate": 8.639114935803542e-06, "loss": 0.5101, "step": 2122 }, { "epoch": 0.26, "grad_norm": 1.3351149989408924, "learning_rate": 8.637736446869362e-06, "loss": 0.5379, "step": 2123 }, { "epoch": 0.26, "grad_norm": 1.7684832724906134, "learning_rate": 8.63635737022682e-06, "loss": 0.506, "step": 2124 }, { "epoch": 0.26, "grad_norm": 1.5063907422372096, "learning_rate": 8.634977706098721e-06, "loss": 0.5006, "step": 2125 }, { "epoch": 0.26, "grad_norm": 1.3146592975273879, "learning_rate": 8.633597454707959e-06, "loss": 0.5306, "step": 2126 }, { "epoch": 0.26, "grad_norm": 1.3187236273362868, "learning_rate": 8.632216616277527e-06, "loss": 0.4807, "step": 2127 }, { "epoch": 0.26, "grad_norm": 2.077537638693262, "learning_rate": 8.630835191030508e-06, "loss": 0.5125, "step": 2128 }, { "epoch": 0.26, "grad_norm": 1.4717538690131045, "learning_rate": 8.62945317919009e-06, "loss": 0.5009, "step": 2129 }, { "epoch": 0.26, "grad_norm": 1.5072010277133576, "learning_rate": 8.628070580979544e-06, "loss": 0.5108, "step": 2130 }, { "epoch": 0.26, "grad_norm": 1.7909802743135568, "learning_rate": 8.626687396622242e-06, "loss": 0.4817, "step": 2131 }, { "epoch": 0.26, "grad_norm": 1.522209883270738, "learning_rate": 8.62530362634165e-06, "loss": 0.5689, "step": 2132 }, { "epoch": 0.26, "grad_norm": 1.6072768318168984, "learning_rate": 8.623919270361329e-06, "loss": 0.4947, "step": 2133 }, { "epoch": 0.26, "grad_norm": 1.479690164963758, "learning_rate": 8.622534328904932e-06, "loss": 0.5179, "step": 2134 }, { "epoch": 0.26, "grad_norm": 1.2871577638299292, "learning_rate": 8.621148802196211e-06, "loss": 0.4856, "step": 2135 }, { "epoch": 0.27, "grad_norm": 1.6382205250968358, "learning_rate": 8.619762690459008e-06, "loss": 0.5136, "step": 2136 }, { "epoch": 0.27, "grad_norm": 0.6979597886359754, "learning_rate": 8.618375993917263e-06, "loss": 0.4986, "step": 2137 }, { "epoch": 0.27, "grad_norm": 1.3555980447677438, "learning_rate": 8.616988712795008e-06, "loss": 0.4785, "step": 2138 }, { "epoch": 0.27, "grad_norm": 4.748291939308693, "learning_rate": 8.615600847316372e-06, "loss": 0.5331, "step": 2139 }, { "epoch": 0.27, "grad_norm": 2.00088888375592, "learning_rate": 8.614212397705575e-06, "loss": 0.5701, "step": 2140 }, { "epoch": 0.27, "grad_norm": 1.573457827272612, "learning_rate": 8.612823364186933e-06, "loss": 0.4721, "step": 2141 }, { "epoch": 0.27, "grad_norm": 1.551083989047537, "learning_rate": 8.611433746984858e-06, "loss": 0.5552, "step": 2142 }, { "epoch": 0.27, "grad_norm": 1.4912064054984209, "learning_rate": 8.610043546323855e-06, "loss": 0.5488, "step": 2143 }, { "epoch": 0.27, "grad_norm": 1.2760824885449171, "learning_rate": 8.608652762428521e-06, "loss": 0.5275, "step": 2144 }, { "epoch": 0.27, "grad_norm": 1.5981452550912965, "learning_rate": 8.607261395523554e-06, "loss": 0.4953, "step": 2145 }, { "epoch": 0.27, "grad_norm": 1.4329804648625786, "learning_rate": 8.605869445833737e-06, "loss": 0.5042, "step": 2146 }, { "epoch": 0.27, "grad_norm": 1.3602541838086386, "learning_rate": 8.604476913583955e-06, "loss": 0.5388, "step": 2147 }, { "epoch": 0.27, "grad_norm": 2.7925939709059873, "learning_rate": 8.60308379899918e-06, "loss": 0.5228, "step": 2148 }, { "epoch": 0.27, "grad_norm": 1.2888107219051665, "learning_rate": 8.601690102304486e-06, "loss": 0.5206, "step": 2149 }, { "epoch": 0.27, "grad_norm": 1.3557563294974224, "learning_rate": 8.600295823725033e-06, "loss": 0.5455, "step": 2150 }, { "epoch": 0.27, "grad_norm": 1.3427580417032645, "learning_rate": 8.598900963486083e-06, "loss": 0.5269, "step": 2151 }, { "epoch": 0.27, "grad_norm": 1.4120668579056157, "learning_rate": 8.597505521812984e-06, "loss": 0.5046, "step": 2152 }, { "epoch": 0.27, "grad_norm": 1.3108192996952082, "learning_rate": 8.596109498931185e-06, "loss": 0.5103, "step": 2153 }, { "epoch": 0.27, "grad_norm": 1.5495240706755715, "learning_rate": 8.594712895066226e-06, "loss": 0.4984, "step": 2154 }, { "epoch": 0.27, "grad_norm": 1.3005502095759358, "learning_rate": 8.593315710443739e-06, "loss": 0.549, "step": 2155 }, { "epoch": 0.27, "grad_norm": 1.4965928633369276, "learning_rate": 8.59191794528945e-06, "loss": 0.4768, "step": 2156 }, { "epoch": 0.27, "grad_norm": 1.3650726161121465, "learning_rate": 8.590519599829184e-06, "loss": 0.5329, "step": 2157 }, { "epoch": 0.27, "grad_norm": 1.4564801871656392, "learning_rate": 8.589120674288853e-06, "loss": 0.4863, "step": 2158 }, { "epoch": 0.27, "grad_norm": 1.632300836612375, "learning_rate": 8.587721168894466e-06, "loss": 0.5334, "step": 2159 }, { "epoch": 0.27, "grad_norm": 1.1623408678050393, "learning_rate": 8.58632108387213e-06, "loss": 0.4864, "step": 2160 }, { "epoch": 0.27, "grad_norm": 2.1603521892398447, "learning_rate": 8.584920419448035e-06, "loss": 0.4918, "step": 2161 }, { "epoch": 0.27, "grad_norm": 1.2579644869776925, "learning_rate": 8.583519175848474e-06, "loss": 0.4942, "step": 2162 }, { "epoch": 0.27, "grad_norm": 2.129015625363322, "learning_rate": 8.582117353299828e-06, "loss": 0.469, "step": 2163 }, { "epoch": 0.27, "grad_norm": 1.924953776540199, "learning_rate": 8.580714952028575e-06, "loss": 0.5748, "step": 2164 }, { "epoch": 0.27, "grad_norm": 1.507743860015434, "learning_rate": 8.579311972261286e-06, "loss": 0.5841, "step": 2165 }, { "epoch": 0.27, "grad_norm": 1.480253101467622, "learning_rate": 8.577908414224625e-06, "loss": 0.5915, "step": 2166 }, { "epoch": 0.27, "grad_norm": 1.3203271426351713, "learning_rate": 8.576504278145348e-06, "loss": 0.4924, "step": 2167 }, { "epoch": 0.27, "grad_norm": 1.2872361086433888, "learning_rate": 8.575099564250304e-06, "loss": 0.5593, "step": 2168 }, { "epoch": 0.27, "grad_norm": 1.4261526727740197, "learning_rate": 8.57369427276644e-06, "loss": 0.5528, "step": 2169 }, { "epoch": 0.27, "grad_norm": 1.435250926712347, "learning_rate": 8.572288403920791e-06, "loss": 0.5494, "step": 2170 }, { "epoch": 0.27, "grad_norm": 1.3595312099940866, "learning_rate": 8.570881957940491e-06, "loss": 0.5357, "step": 2171 }, { "epoch": 0.27, "grad_norm": 1.7425534944356988, "learning_rate": 8.56947493505276e-06, "loss": 0.5608, "step": 2172 }, { "epoch": 0.27, "grad_norm": 1.5923240614789778, "learning_rate": 8.568067335484915e-06, "loss": 0.5255, "step": 2173 }, { "epoch": 0.27, "grad_norm": 1.3081107286282319, "learning_rate": 8.566659159464367e-06, "loss": 0.5069, "step": 2174 }, { "epoch": 0.27, "grad_norm": 1.2351708937483399, "learning_rate": 8.565250407218622e-06, "loss": 0.506, "step": 2175 }, { "epoch": 0.27, "grad_norm": 1.5097611716714756, "learning_rate": 8.563841078975273e-06, "loss": 0.5172, "step": 2176 }, { "epoch": 0.27, "grad_norm": 5.71048705718627, "learning_rate": 8.562431174962009e-06, "loss": 0.5111, "step": 2177 }, { "epoch": 0.27, "grad_norm": 1.5677375671906304, "learning_rate": 8.561020695406614e-06, "loss": 0.5155, "step": 2178 }, { "epoch": 0.27, "grad_norm": 1.5195255872079538, "learning_rate": 8.559609640536962e-06, "loss": 0.5085, "step": 2179 }, { "epoch": 0.27, "grad_norm": 1.541472808973938, "learning_rate": 8.558198010581022e-06, "loss": 0.5067, "step": 2180 }, { "epoch": 0.27, "grad_norm": 1.5948220379252283, "learning_rate": 8.556785805766859e-06, "loss": 0.4828, "step": 2181 }, { "epoch": 0.27, "grad_norm": 1.3828488243455312, "learning_rate": 8.55537302632262e-06, "loss": 0.5429, "step": 2182 }, { "epoch": 0.27, "grad_norm": 1.6313382434068164, "learning_rate": 8.553959672476558e-06, "loss": 0.4702, "step": 2183 }, { "epoch": 0.27, "grad_norm": 1.9515593617267215, "learning_rate": 8.55254574445701e-06, "loss": 0.5574, "step": 2184 }, { "epoch": 0.27, "grad_norm": 1.6291863643827857, "learning_rate": 8.551131242492407e-06, "loss": 0.5196, "step": 2185 }, { "epoch": 0.27, "grad_norm": 1.4610097753460443, "learning_rate": 8.54971616681128e-06, "loss": 0.486, "step": 2186 }, { "epoch": 0.27, "grad_norm": 1.243411626461023, "learning_rate": 8.54830051764224e-06, "loss": 0.4869, "step": 2187 }, { "epoch": 0.27, "grad_norm": 1.5254988279151824, "learning_rate": 8.546884295214002e-06, "loss": 0.5065, "step": 2188 }, { "epoch": 0.27, "grad_norm": 1.348325328290899, "learning_rate": 8.545467499755366e-06, "loss": 0.5287, "step": 2189 }, { "epoch": 0.27, "grad_norm": 1.4484265724296916, "learning_rate": 8.544050131495233e-06, "loss": 0.4964, "step": 2190 }, { "epoch": 0.27, "grad_norm": 1.3966660469184176, "learning_rate": 8.542632190662586e-06, "loss": 0.5599, "step": 2191 }, { "epoch": 0.27, "grad_norm": 3.7947891690402593, "learning_rate": 8.541213677486509e-06, "loss": 0.5735, "step": 2192 }, { "epoch": 0.27, "grad_norm": 1.306427889420237, "learning_rate": 8.539794592196173e-06, "loss": 0.5341, "step": 2193 }, { "epoch": 0.27, "grad_norm": 2.0480279716104333, "learning_rate": 8.538374935020846e-06, "loss": 0.5067, "step": 2194 }, { "epoch": 0.27, "grad_norm": 1.3681137444792715, "learning_rate": 8.536954706189883e-06, "loss": 0.5132, "step": 2195 }, { "epoch": 0.27, "grad_norm": 1.7656680141879872, "learning_rate": 8.535533905932739e-06, "loss": 0.5815, "step": 2196 }, { "epoch": 0.27, "grad_norm": 1.2767603247891768, "learning_rate": 8.534112534478953e-06, "loss": 0.4554, "step": 2197 }, { "epoch": 0.27, "grad_norm": 1.5866330370451214, "learning_rate": 8.532690592058161e-06, "loss": 0.5033, "step": 2198 }, { "epoch": 0.27, "grad_norm": 1.9089032305015174, "learning_rate": 8.53126807890009e-06, "loss": 0.5001, "step": 2199 }, { "epoch": 0.27, "grad_norm": 1.474155149864476, "learning_rate": 8.529844995234563e-06, "loss": 0.5069, "step": 2200 }, { "epoch": 0.27, "grad_norm": 1.4499060008784328, "learning_rate": 8.528421341291488e-06, "loss": 0.5084, "step": 2201 }, { "epoch": 0.27, "grad_norm": 1.3897034097569219, "learning_rate": 8.526997117300868e-06, "loss": 0.5091, "step": 2202 }, { "epoch": 0.27, "grad_norm": 1.3914446118557149, "learning_rate": 8.525572323492803e-06, "loss": 0.5164, "step": 2203 }, { "epoch": 0.27, "grad_norm": 3.0644901750892553, "learning_rate": 8.524146960097476e-06, "loss": 0.4661, "step": 2204 }, { "epoch": 0.27, "grad_norm": 1.5115707604600233, "learning_rate": 8.522721027345173e-06, "loss": 0.5254, "step": 2205 }, { "epoch": 0.27, "grad_norm": 1.529006318561144, "learning_rate": 8.52129452546626e-06, "loss": 0.5786, "step": 2206 }, { "epoch": 0.27, "grad_norm": 1.6774863769516049, "learning_rate": 8.519867454691204e-06, "loss": 0.505, "step": 2207 }, { "epoch": 0.27, "grad_norm": 1.7550570033418513, "learning_rate": 8.518439815250561e-06, "loss": 0.5145, "step": 2208 }, { "epoch": 0.27, "grad_norm": 1.8311153441470829, "learning_rate": 8.517011607374978e-06, "loss": 0.494, "step": 2209 }, { "epoch": 0.27, "grad_norm": 1.508065531312494, "learning_rate": 8.515582831295195e-06, "loss": 0.5514, "step": 2210 }, { "epoch": 0.27, "grad_norm": 3.8391102633980023, "learning_rate": 8.514153487242042e-06, "loss": 0.5094, "step": 2211 }, { "epoch": 0.27, "grad_norm": 1.6333778269326167, "learning_rate": 8.512723575446446e-06, "loss": 0.5497, "step": 2212 }, { "epoch": 0.27, "grad_norm": 1.5528173792460196, "learning_rate": 8.511293096139417e-06, "loss": 0.551, "step": 2213 }, { "epoch": 0.27, "grad_norm": 1.5720104471169536, "learning_rate": 8.509862049552065e-06, "loss": 0.4421, "step": 2214 }, { "epoch": 0.27, "grad_norm": 1.3400011066040898, "learning_rate": 8.508430435915584e-06, "loss": 0.4903, "step": 2215 }, { "epoch": 0.28, "grad_norm": 1.5794600856104057, "learning_rate": 8.506998255461269e-06, "loss": 0.494, "step": 2216 }, { "epoch": 0.28, "grad_norm": 1.5033180990212154, "learning_rate": 8.505565508420498e-06, "loss": 0.5592, "step": 2217 }, { "epoch": 0.28, "grad_norm": 1.359279658821798, "learning_rate": 8.504132195024747e-06, "loss": 0.546, "step": 2218 }, { "epoch": 0.28, "grad_norm": 1.4952159845406447, "learning_rate": 8.502698315505573e-06, "loss": 0.5359, "step": 2219 }, { "epoch": 0.28, "grad_norm": 1.5247955931103605, "learning_rate": 8.501263870094642e-06, "loss": 0.5179, "step": 2220 }, { "epoch": 0.28, "grad_norm": 1.7782806600137773, "learning_rate": 8.499828859023696e-06, "loss": 0.5474, "step": 2221 }, { "epoch": 0.28, "grad_norm": 1.5995731206263903, "learning_rate": 8.498393282524572e-06, "loss": 0.4935, "step": 2222 }, { "epoch": 0.28, "grad_norm": 1.2088287114101004, "learning_rate": 8.496957140829203e-06, "loss": 0.4073, "step": 2223 }, { "epoch": 0.28, "grad_norm": 2.0638784444279548, "learning_rate": 8.495520434169609e-06, "loss": 0.5159, "step": 2224 }, { "epoch": 0.28, "grad_norm": 1.497881780594768, "learning_rate": 8.494083162777903e-06, "loss": 0.5605, "step": 2225 }, { "epoch": 0.28, "grad_norm": 1.413788779749229, "learning_rate": 8.492645326886291e-06, "loss": 0.5486, "step": 2226 }, { "epoch": 0.28, "grad_norm": 2.654873395286464, "learning_rate": 8.491206926727064e-06, "loss": 0.5209, "step": 2227 }, { "epoch": 0.28, "grad_norm": 0.6926087836331076, "learning_rate": 8.489767962532611e-06, "loss": 0.5036, "step": 2228 }, { "epoch": 0.28, "grad_norm": 1.422679228906343, "learning_rate": 8.488328434535408e-06, "loss": 0.5435, "step": 2229 }, { "epoch": 0.28, "grad_norm": 2.1664800474007877, "learning_rate": 8.486888342968023e-06, "loss": 0.4997, "step": 2230 }, { "epoch": 0.28, "grad_norm": 1.4461426110457904, "learning_rate": 8.485447688063117e-06, "loss": 0.522, "step": 2231 }, { "epoch": 0.28, "grad_norm": 2.1007331833215424, "learning_rate": 8.484006470053441e-06, "loss": 0.498, "step": 2232 }, { "epoch": 0.28, "grad_norm": 1.3641751224103698, "learning_rate": 8.482564689171834e-06, "loss": 0.4979, "step": 2233 }, { "epoch": 0.28, "grad_norm": 1.5884561346617245, "learning_rate": 8.481122345651233e-06, "loss": 0.5074, "step": 2234 }, { "epoch": 0.28, "grad_norm": 1.4192325095037206, "learning_rate": 8.479679439724654e-06, "loss": 0.5471, "step": 2235 }, { "epoch": 0.28, "grad_norm": 1.5989179484159823, "learning_rate": 8.478235971625218e-06, "loss": 0.5239, "step": 2236 }, { "epoch": 0.28, "grad_norm": 1.8668321080947397, "learning_rate": 8.476791941586126e-06, "loss": 0.5037, "step": 2237 }, { "epoch": 0.28, "grad_norm": 1.5158066815377742, "learning_rate": 8.475347349840674e-06, "loss": 0.5369, "step": 2238 }, { "epoch": 0.28, "grad_norm": 1.3645982218539816, "learning_rate": 8.473902196622252e-06, "loss": 0.5108, "step": 2239 }, { "epoch": 0.28, "grad_norm": 1.265074006713083, "learning_rate": 8.472456482164332e-06, "loss": 0.498, "step": 2240 }, { "epoch": 0.28, "grad_norm": 1.2999038326538637, "learning_rate": 8.471010206700488e-06, "loss": 0.5254, "step": 2241 }, { "epoch": 0.28, "grad_norm": 1.6060369052945243, "learning_rate": 8.469563370464372e-06, "loss": 0.5791, "step": 2242 }, { "epoch": 0.28, "grad_norm": 1.426987421889657, "learning_rate": 8.468115973689739e-06, "loss": 0.5261, "step": 2243 }, { "epoch": 0.28, "grad_norm": 1.3635787901340615, "learning_rate": 8.466668016610423e-06, "loss": 0.5393, "step": 2244 }, { "epoch": 0.28, "grad_norm": 2.369209352696687, "learning_rate": 8.46521949946036e-06, "loss": 0.508, "step": 2245 }, { "epoch": 0.28, "grad_norm": 3.8226736763186273, "learning_rate": 8.463770422473566e-06, "loss": 0.5892, "step": 2246 }, { "epoch": 0.28, "grad_norm": 1.5394075544765462, "learning_rate": 8.462320785884155e-06, "loss": 0.5151, "step": 2247 }, { "epoch": 0.28, "grad_norm": 1.815742457072043, "learning_rate": 8.460870589926327e-06, "loss": 0.5484, "step": 2248 }, { "epoch": 0.28, "grad_norm": 1.4875209217946046, "learning_rate": 8.459419834834374e-06, "loss": 0.5414, "step": 2249 }, { "epoch": 0.28, "grad_norm": 1.274126755614334, "learning_rate": 8.45796852084268e-06, "loss": 0.4801, "step": 2250 }, { "epoch": 0.28, "grad_norm": 1.4106176029586979, "learning_rate": 8.456516648185717e-06, "loss": 0.4854, "step": 2251 }, { "epoch": 0.28, "grad_norm": 1.621250166058811, "learning_rate": 8.455064217098046e-06, "loss": 0.5174, "step": 2252 }, { "epoch": 0.28, "grad_norm": 0.6601874583519945, "learning_rate": 8.453611227814322e-06, "loss": 0.496, "step": 2253 }, { "epoch": 0.28, "grad_norm": 1.1827510625394257, "learning_rate": 8.452157680569287e-06, "loss": 0.5282, "step": 2254 }, { "epoch": 0.28, "grad_norm": 1.4471288796504076, "learning_rate": 8.450703575597775e-06, "loss": 0.4927, "step": 2255 }, { "epoch": 0.28, "grad_norm": 1.569274387714447, "learning_rate": 8.449248913134709e-06, "loss": 0.5473, "step": 2256 }, { "epoch": 0.28, "grad_norm": 1.221832571991313, "learning_rate": 8.447793693415103e-06, "loss": 0.5164, "step": 2257 }, { "epoch": 0.28, "grad_norm": 1.4929632157250627, "learning_rate": 8.446337916674062e-06, "loss": 0.5397, "step": 2258 }, { "epoch": 0.28, "grad_norm": 1.4075589123438812, "learning_rate": 8.444881583146776e-06, "loss": 0.5151, "step": 2259 }, { "epoch": 0.28, "grad_norm": 2.2567410524008205, "learning_rate": 8.44342469306853e-06, "loss": 0.5171, "step": 2260 }, { "epoch": 0.28, "grad_norm": 2.27510092229832, "learning_rate": 8.441967246674698e-06, "loss": 0.5706, "step": 2261 }, { "epoch": 0.28, "grad_norm": 1.7994978265127097, "learning_rate": 8.440509244200743e-06, "loss": 0.5459, "step": 2262 }, { "epoch": 0.28, "grad_norm": 3.6112199363869086, "learning_rate": 8.43905068588222e-06, "loss": 0.5131, "step": 2263 }, { "epoch": 0.28, "grad_norm": 1.9579492063060595, "learning_rate": 8.437591571954768e-06, "loss": 0.4655, "step": 2264 }, { "epoch": 0.28, "grad_norm": 1.6073308539208877, "learning_rate": 8.436131902654123e-06, "loss": 0.5265, "step": 2265 }, { "epoch": 0.28, "grad_norm": 1.7586176288861322, "learning_rate": 8.43467167821611e-06, "loss": 0.5683, "step": 2266 }, { "epoch": 0.28, "grad_norm": 1.3915881330613307, "learning_rate": 8.433210898876632e-06, "loss": 0.4483, "step": 2267 }, { "epoch": 0.28, "grad_norm": 1.8019170267996496, "learning_rate": 8.431749564871698e-06, "loss": 0.5564, "step": 2268 }, { "epoch": 0.28, "grad_norm": 3.1320048897516775, "learning_rate": 8.430287676437399e-06, "loss": 0.5397, "step": 2269 }, { "epoch": 0.28, "grad_norm": 8.967697613735462, "learning_rate": 8.428825233809914e-06, "loss": 0.5517, "step": 2270 }, { "epoch": 0.28, "grad_norm": 1.446804864822726, "learning_rate": 8.427362237225513e-06, "loss": 0.5354, "step": 2271 }, { "epoch": 0.28, "grad_norm": 1.5128368460256831, "learning_rate": 8.425898686920557e-06, "loss": 0.5594, "step": 2272 }, { "epoch": 0.28, "grad_norm": 1.3255339105369666, "learning_rate": 8.424434583131496e-06, "loss": 0.4988, "step": 2273 }, { "epoch": 0.28, "grad_norm": 2.1360665417876894, "learning_rate": 8.42296992609487e-06, "loss": 0.5444, "step": 2274 }, { "epoch": 0.28, "grad_norm": 1.4927309323485651, "learning_rate": 8.421504716047305e-06, "loss": 0.5291, "step": 2275 }, { "epoch": 0.28, "grad_norm": 1.4284228996787336, "learning_rate": 8.420038953225518e-06, "loss": 0.4888, "step": 2276 }, { "epoch": 0.28, "grad_norm": 2.98282046193224, "learning_rate": 8.418572637866316e-06, "loss": 0.4727, "step": 2277 }, { "epoch": 0.28, "grad_norm": 1.3807713542293747, "learning_rate": 8.417105770206598e-06, "loss": 0.523, "step": 2278 }, { "epoch": 0.28, "grad_norm": 1.8528045536909103, "learning_rate": 8.415638350483348e-06, "loss": 0.5091, "step": 2279 }, { "epoch": 0.28, "grad_norm": 1.632035367705425, "learning_rate": 8.41417037893364e-06, "loss": 0.5403, "step": 2280 }, { "epoch": 0.28, "grad_norm": 0.7208618427001102, "learning_rate": 8.412701855794637e-06, "loss": 0.5022, "step": 2281 }, { "epoch": 0.28, "grad_norm": 1.4717548446224873, "learning_rate": 8.411232781303593e-06, "loss": 0.4975, "step": 2282 }, { "epoch": 0.28, "grad_norm": 1.4415037838550437, "learning_rate": 8.409763155697852e-06, "loss": 0.553, "step": 2283 }, { "epoch": 0.28, "grad_norm": 1.6165367478786659, "learning_rate": 8.40829297921484e-06, "loss": 0.4942, "step": 2284 }, { "epoch": 0.28, "grad_norm": 1.4188139758484897, "learning_rate": 8.40682225209208e-06, "loss": 0.4914, "step": 2285 }, { "epoch": 0.28, "grad_norm": 2.0225226153497595, "learning_rate": 8.405350974567182e-06, "loss": 0.4985, "step": 2286 }, { "epoch": 0.28, "grad_norm": 1.6167432111333073, "learning_rate": 8.403879146877841e-06, "loss": 0.4977, "step": 2287 }, { "epoch": 0.28, "grad_norm": 1.5411985035248754, "learning_rate": 8.402406769261846e-06, "loss": 0.563, "step": 2288 }, { "epoch": 0.28, "grad_norm": 1.4103723311543415, "learning_rate": 8.400933841957072e-06, "loss": 0.4985, "step": 2289 }, { "epoch": 0.28, "grad_norm": 1.4687767595342092, "learning_rate": 8.399460365201481e-06, "loss": 0.4976, "step": 2290 }, { "epoch": 0.28, "grad_norm": 1.9856889826237913, "learning_rate": 8.397986339233128e-06, "loss": 0.5085, "step": 2291 }, { "epoch": 0.28, "grad_norm": 1.7220986166950227, "learning_rate": 8.396511764290158e-06, "loss": 0.5138, "step": 2292 }, { "epoch": 0.28, "grad_norm": 1.421132299453895, "learning_rate": 8.395036640610796e-06, "loss": 0.5148, "step": 2293 }, { "epoch": 0.28, "grad_norm": 1.8056847578745472, "learning_rate": 8.393560968433366e-06, "loss": 0.5342, "step": 2294 }, { "epoch": 0.28, "grad_norm": 1.5731443883103524, "learning_rate": 8.392084747996275e-06, "loss": 0.4951, "step": 2295 }, { "epoch": 0.28, "grad_norm": 1.265804124392798, "learning_rate": 8.390607979538014e-06, "loss": 0.4872, "step": 2296 }, { "epoch": 0.29, "grad_norm": 2.297878688851731, "learning_rate": 8.389130663297175e-06, "loss": 0.5321, "step": 2297 }, { "epoch": 0.29, "grad_norm": 1.8342068119319483, "learning_rate": 8.387652799512427e-06, "loss": 0.5113, "step": 2298 }, { "epoch": 0.29, "grad_norm": 1.34842173900626, "learning_rate": 8.386174388422535e-06, "loss": 0.531, "step": 2299 }, { "epoch": 0.29, "grad_norm": 1.59218032004979, "learning_rate": 8.384695430266348e-06, "loss": 0.5145, "step": 2300 }, { "epoch": 0.29, "grad_norm": 1.5341020416331983, "learning_rate": 8.383215925282802e-06, "loss": 0.5395, "step": 2301 }, { "epoch": 0.29, "grad_norm": 1.7895960441730228, "learning_rate": 8.381735873710928e-06, "loss": 0.4936, "step": 2302 }, { "epoch": 0.29, "grad_norm": 1.5468634833190629, "learning_rate": 8.38025527578984e-06, "loss": 0.4864, "step": 2303 }, { "epoch": 0.29, "grad_norm": 0.7169385720661268, "learning_rate": 8.378774131758742e-06, "loss": 0.5151, "step": 2304 }, { "epoch": 0.29, "grad_norm": 1.7764810966446116, "learning_rate": 8.377292441856926e-06, "loss": 0.5585, "step": 2305 }, { "epoch": 0.29, "grad_norm": 1.4247792641176793, "learning_rate": 8.37581020632377e-06, "loss": 0.5499, "step": 2306 }, { "epoch": 0.29, "grad_norm": 3.24826140127667, "learning_rate": 8.374327425398744e-06, "loss": 0.5943, "step": 2307 }, { "epoch": 0.29, "grad_norm": 1.5190074639788063, "learning_rate": 8.372844099321404e-06, "loss": 0.4983, "step": 2308 }, { "epoch": 0.29, "grad_norm": 1.3870284274978932, "learning_rate": 8.371360228331393e-06, "loss": 0.4672, "step": 2309 }, { "epoch": 0.29, "grad_norm": 1.8711754548899875, "learning_rate": 8.369875812668449e-06, "loss": 0.5546, "step": 2310 }, { "epoch": 0.29, "grad_norm": 1.444759030629122, "learning_rate": 8.368390852572384e-06, "loss": 0.5414, "step": 2311 }, { "epoch": 0.29, "grad_norm": 1.2826081853724405, "learning_rate": 8.366905348283114e-06, "loss": 0.4739, "step": 2312 }, { "epoch": 0.29, "grad_norm": 1.6215351080572908, "learning_rate": 8.365419300040628e-06, "loss": 0.4838, "step": 2313 }, { "epoch": 0.29, "grad_norm": 1.368664685893942, "learning_rate": 8.363932708085016e-06, "loss": 0.4851, "step": 2314 }, { "epoch": 0.29, "grad_norm": 1.268709114460342, "learning_rate": 8.362445572656451e-06, "loss": 0.4537, "step": 2315 }, { "epoch": 0.29, "grad_norm": 1.2595580792264183, "learning_rate": 8.360957893995187e-06, "loss": 0.5065, "step": 2316 }, { "epoch": 0.29, "grad_norm": 1.3180728188145547, "learning_rate": 8.359469672341574e-06, "loss": 0.449, "step": 2317 }, { "epoch": 0.29, "grad_norm": 1.3748707275140444, "learning_rate": 8.357980907936048e-06, "loss": 0.5071, "step": 2318 }, { "epoch": 0.29, "grad_norm": 2.2182849061073715, "learning_rate": 8.356491601019135e-06, "loss": 0.5262, "step": 2319 }, { "epoch": 0.29, "grad_norm": 1.8497839329843278, "learning_rate": 8.35500175183144e-06, "loss": 0.5182, "step": 2320 }, { "epoch": 0.29, "grad_norm": 3.7064279805028324, "learning_rate": 8.353511360613665e-06, "loss": 0.488, "step": 2321 }, { "epoch": 0.29, "grad_norm": 1.696234532399231, "learning_rate": 8.352020427606591e-06, "loss": 0.5753, "step": 2322 }, { "epoch": 0.29, "grad_norm": 1.6188147545472062, "learning_rate": 8.350528953051098e-06, "loss": 0.5407, "step": 2323 }, { "epoch": 0.29, "grad_norm": 1.3692002533058387, "learning_rate": 8.349036937188143e-06, "loss": 0.5252, "step": 2324 }, { "epoch": 0.29, "grad_norm": 1.443875711526957, "learning_rate": 8.347544380258777e-06, "loss": 0.5334, "step": 2325 }, { "epoch": 0.29, "grad_norm": 1.4178971172449506, "learning_rate": 8.34605128250413e-06, "loss": 0.5319, "step": 2326 }, { "epoch": 0.29, "grad_norm": 1.2259432902634666, "learning_rate": 8.344557644165431e-06, "loss": 0.5138, "step": 2327 }, { "epoch": 0.29, "grad_norm": 0.6778873298316646, "learning_rate": 8.34306346548399e-06, "loss": 0.5238, "step": 2328 }, { "epoch": 0.29, "grad_norm": 1.4288498248716177, "learning_rate": 8.341568746701202e-06, "loss": 0.5337, "step": 2329 }, { "epoch": 0.29, "grad_norm": 1.5684048901647027, "learning_rate": 8.340073488058552e-06, "loss": 0.4903, "step": 2330 }, { "epoch": 0.29, "grad_norm": 1.2599485203250966, "learning_rate": 8.338577689797615e-06, "loss": 0.5134, "step": 2331 }, { "epoch": 0.29, "grad_norm": 1.4253859816398988, "learning_rate": 8.337081352160048e-06, "loss": 0.505, "step": 2332 }, { "epoch": 0.29, "grad_norm": 2.1339071401428553, "learning_rate": 8.335584475387597e-06, "loss": 0.5273, "step": 2333 }, { "epoch": 0.29, "grad_norm": 1.4778341026992288, "learning_rate": 8.334087059722097e-06, "loss": 0.4823, "step": 2334 }, { "epoch": 0.29, "grad_norm": 1.63766176270152, "learning_rate": 8.33258910540547e-06, "loss": 0.4946, "step": 2335 }, { "epoch": 0.29, "grad_norm": 1.888945724040837, "learning_rate": 8.33109061267972e-06, "loss": 0.5359, "step": 2336 }, { "epoch": 0.29, "grad_norm": 0.6516161391520482, "learning_rate": 8.329591581786946e-06, "loss": 0.4986, "step": 2337 }, { "epoch": 0.29, "grad_norm": 1.5261816214983497, "learning_rate": 8.328092012969327e-06, "loss": 0.5083, "step": 2338 }, { "epoch": 0.29, "grad_norm": 1.4821802013711955, "learning_rate": 8.326591906469132e-06, "loss": 0.5237, "step": 2339 }, { "epoch": 0.29, "grad_norm": 1.6683955615857764, "learning_rate": 8.325091262528715e-06, "loss": 0.5683, "step": 2340 }, { "epoch": 0.29, "grad_norm": 1.357707619827874, "learning_rate": 8.323590081390522e-06, "loss": 0.5236, "step": 2341 }, { "epoch": 0.29, "grad_norm": 1.457783527864208, "learning_rate": 8.322088363297078e-06, "loss": 0.4967, "step": 2342 }, { "epoch": 0.29, "grad_norm": 1.7460405208068128, "learning_rate": 8.320586108491002e-06, "loss": 0.5097, "step": 2343 }, { "epoch": 0.29, "grad_norm": 1.7744582128639608, "learning_rate": 8.319083317214996e-06, "loss": 0.5355, "step": 2344 }, { "epoch": 0.29, "grad_norm": 1.5390306253936152, "learning_rate": 8.317579989711846e-06, "loss": 0.5013, "step": 2345 }, { "epoch": 0.29, "grad_norm": 0.7215048565142089, "learning_rate": 8.31607612622443e-06, "loss": 0.4965, "step": 2346 }, { "epoch": 0.29, "grad_norm": 1.4736527077899817, "learning_rate": 8.314571726995711e-06, "loss": 0.5626, "step": 2347 }, { "epoch": 0.29, "grad_norm": 1.8277857445033219, "learning_rate": 8.313066792268737e-06, "loss": 0.5266, "step": 2348 }, { "epoch": 0.29, "grad_norm": 1.6391679556371148, "learning_rate": 8.311561322286645e-06, "loss": 0.5405, "step": 2349 }, { "epoch": 0.29, "grad_norm": 1.4251390271552782, "learning_rate": 8.310055317292656e-06, "loss": 0.4482, "step": 2350 }, { "epoch": 0.29, "grad_norm": 1.4518548043395432, "learning_rate": 8.308548777530077e-06, "loss": 0.5224, "step": 2351 }, { "epoch": 0.29, "grad_norm": 1.529537909676569, "learning_rate": 8.307041703242305e-06, "loss": 0.4757, "step": 2352 }, { "epoch": 0.29, "grad_norm": 1.4641076631316765, "learning_rate": 8.305534094672818e-06, "loss": 0.5606, "step": 2353 }, { "epoch": 0.29, "grad_norm": 0.6459680091281955, "learning_rate": 8.304025952065187e-06, "loss": 0.4939, "step": 2354 }, { "epoch": 0.29, "grad_norm": 1.5502872604777798, "learning_rate": 8.302517275663063e-06, "loss": 0.5186, "step": 2355 }, { "epoch": 0.29, "grad_norm": 1.4767675987142823, "learning_rate": 8.301008065710188e-06, "loss": 0.5366, "step": 2356 }, { "epoch": 0.29, "grad_norm": 1.4597094379451114, "learning_rate": 8.299498322450388e-06, "loss": 0.5207, "step": 2357 }, { "epoch": 0.29, "grad_norm": 1.5145188084629189, "learning_rate": 8.297988046127574e-06, "loss": 0.468, "step": 2358 }, { "epoch": 0.29, "grad_norm": 1.5879209005951993, "learning_rate": 8.296477236985744e-06, "loss": 0.468, "step": 2359 }, { "epoch": 0.29, "grad_norm": 1.389298414107315, "learning_rate": 8.294965895268985e-06, "loss": 0.5107, "step": 2360 }, { "epoch": 0.29, "grad_norm": 2.367116410018487, "learning_rate": 8.293454021221466e-06, "loss": 0.4894, "step": 2361 }, { "epoch": 0.29, "grad_norm": 1.4671157811272946, "learning_rate": 8.291941615087442e-06, "loss": 0.5283, "step": 2362 }, { "epoch": 0.29, "grad_norm": 1.500644862850445, "learning_rate": 8.290428677111258e-06, "loss": 0.4954, "step": 2363 }, { "epoch": 0.29, "grad_norm": 1.4648161938444693, "learning_rate": 8.288915207537343e-06, "loss": 0.4996, "step": 2364 }, { "epoch": 0.29, "grad_norm": 1.4305139064341361, "learning_rate": 8.28740120661021e-06, "loss": 0.4865, "step": 2365 }, { "epoch": 0.29, "grad_norm": 1.5603416367884653, "learning_rate": 8.285886674574459e-06, "loss": 0.4875, "step": 2366 }, { "epoch": 0.29, "grad_norm": 1.2213046850748692, "learning_rate": 8.284371611674776e-06, "loss": 0.4688, "step": 2367 }, { "epoch": 0.29, "grad_norm": 1.212734451608669, "learning_rate": 8.282856018155932e-06, "loss": 0.4468, "step": 2368 }, { "epoch": 0.29, "grad_norm": 1.4988856306143041, "learning_rate": 8.281339894262786e-06, "loss": 0.5517, "step": 2369 }, { "epoch": 0.29, "grad_norm": 1.9316901312270092, "learning_rate": 8.279823240240282e-06, "loss": 0.4647, "step": 2370 }, { "epoch": 0.29, "grad_norm": 0.6550193444792216, "learning_rate": 8.278306056333445e-06, "loss": 0.5398, "step": 2371 }, { "epoch": 0.29, "grad_norm": 1.6404194777907943, "learning_rate": 8.276788342787394e-06, "loss": 0.5123, "step": 2372 }, { "epoch": 0.29, "grad_norm": 1.315161270026293, "learning_rate": 8.275270099847325e-06, "loss": 0.5449, "step": 2373 }, { "epoch": 0.29, "grad_norm": 1.517240516199405, "learning_rate": 8.273751327758526e-06, "loss": 0.5153, "step": 2374 }, { "epoch": 0.29, "grad_norm": 2.2174133228862587, "learning_rate": 8.272232026766368e-06, "loss": 0.491, "step": 2375 }, { "epoch": 0.29, "grad_norm": 1.4053448725891333, "learning_rate": 8.270712197116306e-06, "loss": 0.5205, "step": 2376 }, { "epoch": 0.29, "grad_norm": 1.9740569155576038, "learning_rate": 8.269191839053884e-06, "loss": 0.4661, "step": 2377 }, { "epoch": 0.3, "grad_norm": 2.672953358964489, "learning_rate": 8.267670952824726e-06, "loss": 0.5081, "step": 2378 }, { "epoch": 0.3, "grad_norm": 2.077306245332074, "learning_rate": 8.266149538674548e-06, "loss": 0.4973, "step": 2379 }, { "epoch": 0.3, "grad_norm": 1.409380624306681, "learning_rate": 8.264627596849146e-06, "loss": 0.5062, "step": 2380 }, { "epoch": 0.3, "grad_norm": 1.8358787110603083, "learning_rate": 8.263105127594405e-06, "loss": 0.5327, "step": 2381 }, { "epoch": 0.3, "grad_norm": 1.4579310672276335, "learning_rate": 8.261582131156289e-06, "loss": 0.4562, "step": 2382 }, { "epoch": 0.3, "grad_norm": 1.4387234091141146, "learning_rate": 8.260058607780857e-06, "loss": 0.5368, "step": 2383 }, { "epoch": 0.3, "grad_norm": 1.285121771570786, "learning_rate": 8.258534557714242e-06, "loss": 0.5482, "step": 2384 }, { "epoch": 0.3, "grad_norm": 1.4535365064196426, "learning_rate": 8.257009981202673e-06, "loss": 0.539, "step": 2385 }, { "epoch": 0.3, "grad_norm": 1.4290733161456042, "learning_rate": 8.255484878492454e-06, "loss": 0.5029, "step": 2386 }, { "epoch": 0.3, "grad_norm": 1.3993424374411836, "learning_rate": 8.253959249829983e-06, "loss": 0.5006, "step": 2387 }, { "epoch": 0.3, "grad_norm": 0.6216777802783738, "learning_rate": 8.252433095461736e-06, "loss": 0.5374, "step": 2388 }, { "epoch": 0.3, "grad_norm": 1.8538060165988877, "learning_rate": 8.250906415634279e-06, "loss": 0.493, "step": 2389 }, { "epoch": 0.3, "grad_norm": 1.4053489404747652, "learning_rate": 8.249379210594258e-06, "loss": 0.5127, "step": 2390 }, { "epoch": 0.3, "grad_norm": 15.824381664090078, "learning_rate": 8.247851480588407e-06, "loss": 0.4867, "step": 2391 }, { "epoch": 0.3, "grad_norm": 1.4129574000855118, "learning_rate": 8.246323225863545e-06, "loss": 0.5446, "step": 2392 }, { "epoch": 0.3, "grad_norm": 1.4106399327103274, "learning_rate": 8.244794446666575e-06, "loss": 0.4639, "step": 2393 }, { "epoch": 0.3, "grad_norm": 1.449898253618827, "learning_rate": 8.243265143244485e-06, "loss": 0.5184, "step": 2394 }, { "epoch": 0.3, "grad_norm": 1.4255742042291377, "learning_rate": 8.241735315844348e-06, "loss": 0.5316, "step": 2395 }, { "epoch": 0.3, "grad_norm": 2.495255075502373, "learning_rate": 8.240204964713317e-06, "loss": 0.4794, "step": 2396 }, { "epoch": 0.3, "grad_norm": 1.19925092790902, "learning_rate": 8.238674090098639e-06, "loss": 0.4861, "step": 2397 }, { "epoch": 0.3, "grad_norm": 1.2584502352522227, "learning_rate": 8.237142692247637e-06, "loss": 0.5453, "step": 2398 }, { "epoch": 0.3, "grad_norm": 1.7706569851686855, "learning_rate": 8.235610771407725e-06, "loss": 0.5128, "step": 2399 }, { "epoch": 0.3, "grad_norm": 1.393286411135955, "learning_rate": 8.234078327826394e-06, "loss": 0.5132, "step": 2400 }, { "epoch": 0.3, "grad_norm": 1.5911983819549134, "learning_rate": 8.232545361751227e-06, "loss": 0.5423, "step": 2401 }, { "epoch": 0.3, "grad_norm": 1.2768352893732755, "learning_rate": 8.231011873429887e-06, "loss": 0.5027, "step": 2402 }, { "epoch": 0.3, "grad_norm": 2.6464670337009326, "learning_rate": 8.22947786311012e-06, "loss": 0.5297, "step": 2403 }, { "epoch": 0.3, "grad_norm": 2.277057973772232, "learning_rate": 8.227943331039765e-06, "loss": 0.53, "step": 2404 }, { "epoch": 0.3, "grad_norm": 1.5165937159294598, "learning_rate": 8.226408277466735e-06, "loss": 0.5029, "step": 2405 }, { "epoch": 0.3, "grad_norm": 1.6307182302388195, "learning_rate": 8.22487270263903e-06, "loss": 0.5076, "step": 2406 }, { "epoch": 0.3, "grad_norm": 1.4016704206015314, "learning_rate": 8.22333660680474e-06, "loss": 0.4607, "step": 2407 }, { "epoch": 0.3, "grad_norm": 1.5733249006556254, "learning_rate": 8.221799990212031e-06, "loss": 0.5086, "step": 2408 }, { "epoch": 0.3, "grad_norm": 1.428256429738577, "learning_rate": 8.22026285310916e-06, "loss": 0.515, "step": 2409 }, { "epoch": 0.3, "grad_norm": 1.974071549092424, "learning_rate": 8.218725195744464e-06, "loss": 0.5051, "step": 2410 }, { "epoch": 0.3, "grad_norm": 1.3210469873226283, "learning_rate": 8.217187018366364e-06, "loss": 0.5219, "step": 2411 }, { "epoch": 0.3, "grad_norm": 1.7354476740515565, "learning_rate": 8.215648321223363e-06, "loss": 0.5185, "step": 2412 }, { "epoch": 0.3, "grad_norm": 1.3385225264897804, "learning_rate": 8.21410910456406e-06, "loss": 0.5344, "step": 2413 }, { "epoch": 0.3, "grad_norm": 1.5655282839619544, "learning_rate": 8.212569368637123e-06, "loss": 0.5764, "step": 2414 }, { "epoch": 0.3, "grad_norm": 1.838377707828001, "learning_rate": 8.21102911369131e-06, "loss": 0.5015, "step": 2415 }, { "epoch": 0.3, "grad_norm": 1.3334971659877586, "learning_rate": 8.209488339975461e-06, "loss": 0.5422, "step": 2416 }, { "epoch": 0.3, "grad_norm": 1.601034277173312, "learning_rate": 8.207947047738508e-06, "loss": 0.5079, "step": 2417 }, { "epoch": 0.3, "grad_norm": 1.732019949409373, "learning_rate": 8.206405237229453e-06, "loss": 0.5537, "step": 2418 }, { "epoch": 0.3, "grad_norm": 1.600774736304845, "learning_rate": 8.204862908697396e-06, "loss": 0.5462, "step": 2419 }, { "epoch": 0.3, "grad_norm": 1.2915947660919527, "learning_rate": 8.203320062391506e-06, "loss": 0.4483, "step": 2420 }, { "epoch": 0.3, "grad_norm": 1.482367313516658, "learning_rate": 8.201776698561049e-06, "loss": 0.5089, "step": 2421 }, { "epoch": 0.3, "grad_norm": 1.379824868329647, "learning_rate": 8.200232817455369e-06, "loss": 0.522, "step": 2422 }, { "epoch": 0.3, "grad_norm": 1.6160975125022232, "learning_rate": 8.198688419323893e-06, "loss": 0.5134, "step": 2423 }, { "epoch": 0.3, "grad_norm": 1.4497050493829207, "learning_rate": 8.197143504416127e-06, "loss": 0.5213, "step": 2424 }, { "epoch": 0.3, "grad_norm": 1.370280107513277, "learning_rate": 8.195598072981674e-06, "loss": 0.4872, "step": 2425 }, { "epoch": 0.3, "grad_norm": 1.5170579867779488, "learning_rate": 8.194052125270207e-06, "loss": 0.5569, "step": 2426 }, { "epoch": 0.3, "grad_norm": 2.0868880854014726, "learning_rate": 8.192505661531489e-06, "loss": 0.5531, "step": 2427 }, { "epoch": 0.3, "grad_norm": 1.596934387560802, "learning_rate": 8.190958682015362e-06, "loss": 0.4936, "step": 2428 }, { "epoch": 0.3, "grad_norm": 1.6234727453741495, "learning_rate": 8.189411186971759e-06, "loss": 0.4954, "step": 2429 }, { "epoch": 0.3, "grad_norm": 1.3541932266704004, "learning_rate": 8.187863176650688e-06, "loss": 0.5169, "step": 2430 }, { "epoch": 0.3, "grad_norm": 1.7951463915061205, "learning_rate": 8.186314651302242e-06, "loss": 0.4556, "step": 2431 }, { "epoch": 0.3, "grad_norm": 1.3940159809171142, "learning_rate": 8.184765611176605e-06, "loss": 0.5454, "step": 2432 }, { "epoch": 0.3, "grad_norm": 1.3019380098469693, "learning_rate": 8.183216056524035e-06, "loss": 0.522, "step": 2433 }, { "epoch": 0.3, "grad_norm": 1.233015420492335, "learning_rate": 8.181665987594874e-06, "loss": 0.478, "step": 2434 }, { "epoch": 0.3, "grad_norm": 1.347521272975239, "learning_rate": 8.18011540463955e-06, "loss": 0.5021, "step": 2435 }, { "epoch": 0.3, "grad_norm": 1.4394524554290282, "learning_rate": 8.178564307908577e-06, "loss": 0.4458, "step": 2436 }, { "epoch": 0.3, "grad_norm": 0.705192311131923, "learning_rate": 8.177012697652544e-06, "loss": 0.4793, "step": 2437 }, { "epoch": 0.3, "grad_norm": 1.5623035789488708, "learning_rate": 8.17546057412213e-06, "loss": 0.5614, "step": 2438 }, { "epoch": 0.3, "grad_norm": 1.3216329542885294, "learning_rate": 8.173907937568093e-06, "loss": 0.4732, "step": 2439 }, { "epoch": 0.3, "grad_norm": 1.4405423395533385, "learning_rate": 8.172354788241277e-06, "loss": 0.5659, "step": 2440 }, { "epoch": 0.3, "grad_norm": 1.4266787372028973, "learning_rate": 8.170801126392602e-06, "loss": 0.533, "step": 2441 }, { "epoch": 0.3, "grad_norm": 4.544729767161987, "learning_rate": 8.169246952273081e-06, "loss": 0.4868, "step": 2442 }, { "epoch": 0.3, "grad_norm": 0.7137674103325354, "learning_rate": 8.167692266133804e-06, "loss": 0.4834, "step": 2443 }, { "epoch": 0.3, "grad_norm": 1.5556203466575353, "learning_rate": 8.166137068225942e-06, "loss": 0.5492, "step": 2444 }, { "epoch": 0.3, "grad_norm": 1.7711514179158319, "learning_rate": 8.164581358800749e-06, "loss": 0.4767, "step": 2445 }, { "epoch": 0.3, "grad_norm": 1.5301362267020098, "learning_rate": 8.16302513810957e-06, "loss": 0.5439, "step": 2446 }, { "epoch": 0.3, "grad_norm": 1.6901022197817894, "learning_rate": 8.16146840640382e-06, "loss": 0.5311, "step": 2447 }, { "epoch": 0.3, "grad_norm": 1.4387929530835506, "learning_rate": 8.159911163935007e-06, "loss": 0.5052, "step": 2448 }, { "epoch": 0.3, "grad_norm": 1.4881162518562419, "learning_rate": 8.158353410954715e-06, "loss": 0.4841, "step": 2449 }, { "epoch": 0.3, "grad_norm": 2.3232025249317436, "learning_rate": 8.156795147714612e-06, "loss": 0.5394, "step": 2450 }, { "epoch": 0.3, "grad_norm": 2.1502311983887634, "learning_rate": 8.155236374466452e-06, "loss": 0.5267, "step": 2451 }, { "epoch": 0.3, "grad_norm": 1.4184547623606276, "learning_rate": 8.153677091462067e-06, "loss": 0.4806, "step": 2452 }, { "epoch": 0.3, "grad_norm": 1.3045598237461515, "learning_rate": 8.15211729895337e-06, "loss": 0.5313, "step": 2453 }, { "epoch": 0.3, "grad_norm": 1.4262062024035553, "learning_rate": 8.150556997192366e-06, "loss": 0.5778, "step": 2454 }, { "epoch": 0.3, "grad_norm": 1.9812787992571088, "learning_rate": 8.148996186431129e-06, "loss": 0.5107, "step": 2455 }, { "epoch": 0.3, "grad_norm": 1.49217716763415, "learning_rate": 8.147434866921824e-06, "loss": 0.5229, "step": 2456 }, { "epoch": 0.3, "grad_norm": 1.5400517482926255, "learning_rate": 8.145873038916696e-06, "loss": 0.5363, "step": 2457 }, { "epoch": 0.31, "grad_norm": 1.7288336436232086, "learning_rate": 8.144310702668072e-06, "loss": 0.4677, "step": 2458 }, { "epoch": 0.31, "grad_norm": 1.397832321796737, "learning_rate": 8.142747858428364e-06, "loss": 0.4786, "step": 2459 }, { "epoch": 0.31, "grad_norm": 1.6129287143413962, "learning_rate": 8.141184506450058e-06, "loss": 0.5479, "step": 2460 }, { "epoch": 0.31, "grad_norm": 1.4428208255741053, "learning_rate": 8.13962064698573e-06, "loss": 0.5064, "step": 2461 }, { "epoch": 0.31, "grad_norm": 1.6383815783802438, "learning_rate": 8.138056280288036e-06, "loss": 0.5306, "step": 2462 }, { "epoch": 0.31, "grad_norm": 1.7647730587391064, "learning_rate": 8.136491406609712e-06, "loss": 0.4727, "step": 2463 }, { "epoch": 0.31, "grad_norm": 1.424228033452532, "learning_rate": 8.134926026203578e-06, "loss": 0.5626, "step": 2464 }, { "epoch": 0.31, "grad_norm": 2.176560989777953, "learning_rate": 8.133360139322533e-06, "loss": 0.4763, "step": 2465 }, { "epoch": 0.31, "grad_norm": 1.4747933472510153, "learning_rate": 8.131793746219563e-06, "loss": 0.5606, "step": 2466 }, { "epoch": 0.31, "grad_norm": 1.5579184573495903, "learning_rate": 8.13022684714773e-06, "loss": 0.5483, "step": 2467 }, { "epoch": 0.31, "grad_norm": 1.3212477231943376, "learning_rate": 8.128659442360182e-06, "loss": 0.5219, "step": 2468 }, { "epoch": 0.31, "grad_norm": 1.4716718393118688, "learning_rate": 8.127091532110147e-06, "loss": 0.5485, "step": 2469 }, { "epoch": 0.31, "grad_norm": 2.1543171145452837, "learning_rate": 8.125523116650933e-06, "loss": 0.5366, "step": 2470 }, { "epoch": 0.31, "grad_norm": 1.9532575568968857, "learning_rate": 8.123954196235932e-06, "loss": 0.4726, "step": 2471 }, { "epoch": 0.31, "grad_norm": 1.3300282366699865, "learning_rate": 8.122384771118619e-06, "loss": 0.45, "step": 2472 }, { "epoch": 0.31, "grad_norm": 1.6658818267429538, "learning_rate": 8.120814841552544e-06, "loss": 0.5226, "step": 2473 }, { "epoch": 0.31, "grad_norm": 2.015297622544853, "learning_rate": 8.119244407791346e-06, "loss": 0.5539, "step": 2474 }, { "epoch": 0.31, "grad_norm": 1.9033582376604703, "learning_rate": 8.117673470088745e-06, "loss": 0.5468, "step": 2475 }, { "epoch": 0.31, "grad_norm": 1.8411435527307176, "learning_rate": 8.116102028698536e-06, "loss": 0.5181, "step": 2476 }, { "epoch": 0.31, "grad_norm": 1.4750661354518269, "learning_rate": 8.114530083874599e-06, "loss": 0.5282, "step": 2477 }, { "epoch": 0.31, "grad_norm": 1.5342175409546561, "learning_rate": 8.112957635870895e-06, "loss": 0.5107, "step": 2478 }, { "epoch": 0.31, "grad_norm": 1.4270612045971265, "learning_rate": 8.111384684941471e-06, "loss": 0.4622, "step": 2479 }, { "epoch": 0.31, "grad_norm": 1.731355664199108, "learning_rate": 8.109811231340448e-06, "loss": 0.5026, "step": 2480 }, { "epoch": 0.31, "grad_norm": 2.4987372239820895, "learning_rate": 8.108237275322031e-06, "loss": 0.5075, "step": 2481 }, { "epoch": 0.31, "grad_norm": 2.195280425107586, "learning_rate": 8.106662817140508e-06, "loss": 0.5285, "step": 2482 }, { "epoch": 0.31, "grad_norm": 8.236829502099297, "learning_rate": 8.105087857050246e-06, "loss": 0.4448, "step": 2483 }, { "epoch": 0.31, "grad_norm": 1.552047281830834, "learning_rate": 8.103512395305693e-06, "loss": 0.5142, "step": 2484 }, { "epoch": 0.31, "grad_norm": 1.6745640264545936, "learning_rate": 8.10193643216138e-06, "loss": 0.505, "step": 2485 }, { "epoch": 0.31, "grad_norm": 1.3051593774586854, "learning_rate": 8.100359967871915e-06, "loss": 0.542, "step": 2486 }, { "epoch": 0.31, "grad_norm": 1.6495030342810197, "learning_rate": 8.098783002691994e-06, "loss": 0.5699, "step": 2487 }, { "epoch": 0.31, "grad_norm": 2.166135329822737, "learning_rate": 8.097205536876387e-06, "loss": 0.5654, "step": 2488 }, { "epoch": 0.31, "grad_norm": 1.7492440243837697, "learning_rate": 8.095627570679947e-06, "loss": 0.5052, "step": 2489 }, { "epoch": 0.31, "grad_norm": 1.550936281764954, "learning_rate": 8.094049104357608e-06, "loss": 0.553, "step": 2490 }, { "epoch": 0.31, "grad_norm": 1.4772954354123726, "learning_rate": 8.092470138164388e-06, "loss": 0.5395, "step": 2491 }, { "epoch": 0.31, "grad_norm": 2.0886376026135105, "learning_rate": 8.09089067235538e-06, "loss": 0.5575, "step": 2492 }, { "epoch": 0.31, "grad_norm": 1.977419426615732, "learning_rate": 8.089310707185763e-06, "loss": 0.508, "step": 2493 }, { "epoch": 0.31, "grad_norm": 1.8598515635774202, "learning_rate": 8.087730242910792e-06, "loss": 0.4959, "step": 2494 }, { "epoch": 0.31, "grad_norm": 1.6130983505295127, "learning_rate": 8.086149279785807e-06, "loss": 0.5438, "step": 2495 }, { "epoch": 0.31, "grad_norm": 1.6412643748883935, "learning_rate": 8.084567818066225e-06, "loss": 0.524, "step": 2496 }, { "epoch": 0.31, "grad_norm": 2.796679205539921, "learning_rate": 8.082985858007544e-06, "loss": 0.512, "step": 2497 }, { "epoch": 0.31, "grad_norm": 1.493724261833098, "learning_rate": 8.081403399865347e-06, "loss": 0.526, "step": 2498 }, { "epoch": 0.31, "grad_norm": 1.2693373006893207, "learning_rate": 8.079820443895292e-06, "loss": 0.5074, "step": 2499 }, { "epoch": 0.31, "grad_norm": 8.523194363659194, "learning_rate": 8.07823699035312e-06, "loss": 0.4962, "step": 2500 }, { "epoch": 0.31, "grad_norm": 1.4165837605180032, "learning_rate": 8.076653039494649e-06, "loss": 0.4806, "step": 2501 }, { "epoch": 0.31, "grad_norm": 3.5154178932893485, "learning_rate": 8.075068591575783e-06, "loss": 0.4951, "step": 2502 }, { "epoch": 0.31, "grad_norm": 1.4374752979267316, "learning_rate": 8.073483646852507e-06, "loss": 0.4523, "step": 2503 }, { "epoch": 0.31, "grad_norm": 1.4793590132895706, "learning_rate": 8.071898205580877e-06, "loss": 0.5124, "step": 2504 }, { "epoch": 0.31, "grad_norm": 2.1797717404084764, "learning_rate": 8.070312268017036e-06, "loss": 0.5688, "step": 2505 }, { "epoch": 0.31, "grad_norm": 1.3568393334677344, "learning_rate": 8.068725834417208e-06, "loss": 0.5011, "step": 2506 }, { "epoch": 0.31, "grad_norm": 1.3726472919474497, "learning_rate": 8.067138905037694e-06, "loss": 0.549, "step": 2507 }, { "epoch": 0.31, "grad_norm": 1.4125513596310788, "learning_rate": 8.065551480134879e-06, "loss": 0.4578, "step": 2508 }, { "epoch": 0.31, "grad_norm": 1.3339617497863514, "learning_rate": 8.063963559965221e-06, "loss": 0.4802, "step": 2509 }, { "epoch": 0.31, "grad_norm": 1.5081473397590144, "learning_rate": 8.062375144785265e-06, "loss": 0.4897, "step": 2510 }, { "epoch": 0.31, "grad_norm": 13.303668471143709, "learning_rate": 8.060786234851634e-06, "loss": 0.4694, "step": 2511 }, { "epoch": 0.31, "grad_norm": 1.573520854460718, "learning_rate": 8.059196830421032e-06, "loss": 0.5268, "step": 2512 }, { "epoch": 0.31, "grad_norm": 1.3482665581028472, "learning_rate": 8.057606931750235e-06, "loss": 0.4916, "step": 2513 }, { "epoch": 0.31, "grad_norm": 1.8330345165028457, "learning_rate": 8.056016539096112e-06, "loss": 0.4659, "step": 2514 }, { "epoch": 0.31, "grad_norm": 1.509864150941314, "learning_rate": 8.0544256527156e-06, "loss": 0.5633, "step": 2515 }, { "epoch": 0.31, "grad_norm": 1.4224595451530193, "learning_rate": 8.052834272865724e-06, "loss": 0.5587, "step": 2516 }, { "epoch": 0.31, "grad_norm": 1.9882292371596657, "learning_rate": 8.051242399803586e-06, "loss": 0.5115, "step": 2517 }, { "epoch": 0.31, "grad_norm": 1.443219180257739, "learning_rate": 8.049650033786364e-06, "loss": 0.506, "step": 2518 }, { "epoch": 0.31, "grad_norm": 1.4703440636482108, "learning_rate": 8.04805717507132e-06, "loss": 0.5088, "step": 2519 }, { "epoch": 0.31, "grad_norm": 1.938646696914128, "learning_rate": 8.046463823915794e-06, "loss": 0.4723, "step": 2520 }, { "epoch": 0.31, "grad_norm": 1.8214933656484336, "learning_rate": 8.044869980577205e-06, "loss": 0.4937, "step": 2521 }, { "epoch": 0.31, "grad_norm": 2.5537002581659554, "learning_rate": 8.043275645313058e-06, "loss": 0.5428, "step": 2522 }, { "epoch": 0.31, "grad_norm": 1.8339999808752838, "learning_rate": 8.041680818380924e-06, "loss": 0.5098, "step": 2523 }, { "epoch": 0.31, "grad_norm": 1.3599642946469352, "learning_rate": 8.040085500038465e-06, "loss": 0.5299, "step": 2524 }, { "epoch": 0.31, "grad_norm": 1.2462733428982753, "learning_rate": 8.038489690543421e-06, "loss": 0.4943, "step": 2525 }, { "epoch": 0.31, "grad_norm": 2.4496657853471375, "learning_rate": 8.036893390153606e-06, "loss": 0.4942, "step": 2526 }, { "epoch": 0.31, "grad_norm": 1.6281553227437193, "learning_rate": 8.035296599126917e-06, "loss": 0.5068, "step": 2527 }, { "epoch": 0.31, "grad_norm": 1.3263402475379906, "learning_rate": 8.033699317721331e-06, "loss": 0.4942, "step": 2528 }, { "epoch": 0.31, "grad_norm": 2.5396019747741265, "learning_rate": 8.0321015461949e-06, "loss": 0.4934, "step": 2529 }, { "epoch": 0.31, "grad_norm": 2.2515443414601215, "learning_rate": 8.030503284805762e-06, "loss": 0.5595, "step": 2530 }, { "epoch": 0.31, "grad_norm": 0.7028155212480076, "learning_rate": 8.028904533812125e-06, "loss": 0.5103, "step": 2531 }, { "epoch": 0.31, "grad_norm": 1.3470718084717475, "learning_rate": 8.027305293472287e-06, "loss": 0.5419, "step": 2532 }, { "epoch": 0.31, "grad_norm": 1.3517717148809503, "learning_rate": 8.025705564044615e-06, "loss": 0.542, "step": 2533 }, { "epoch": 0.31, "grad_norm": 1.6400748484848002, "learning_rate": 8.024105345787562e-06, "loss": 0.5487, "step": 2534 }, { "epoch": 0.31, "grad_norm": 1.4080101276322876, "learning_rate": 8.022504638959657e-06, "loss": 0.5375, "step": 2535 }, { "epoch": 0.31, "grad_norm": 1.4608135823018693, "learning_rate": 8.020903443819507e-06, "loss": 0.5419, "step": 2536 }, { "epoch": 0.31, "grad_norm": 1.2070914576507898, "learning_rate": 8.0193017606258e-06, "loss": 0.5425, "step": 2537 }, { "epoch": 0.31, "grad_norm": 1.2988911897831374, "learning_rate": 8.017699589637302e-06, "loss": 0.4764, "step": 2538 }, { "epoch": 0.32, "grad_norm": 1.3755261690769904, "learning_rate": 8.016096931112858e-06, "loss": 0.5326, "step": 2539 }, { "epoch": 0.32, "grad_norm": 1.7204618851485292, "learning_rate": 8.014493785311391e-06, "loss": 0.4886, "step": 2540 }, { "epoch": 0.32, "grad_norm": 1.3003456270794864, "learning_rate": 8.012890152491904e-06, "loss": 0.4928, "step": 2541 }, { "epoch": 0.32, "grad_norm": 1.3400916153759963, "learning_rate": 8.011286032913478e-06, "loss": 0.5215, "step": 2542 }, { "epoch": 0.32, "grad_norm": 1.4896133467098513, "learning_rate": 8.009681426835273e-06, "loss": 0.5422, "step": 2543 }, { "epoch": 0.32, "grad_norm": 1.8446002305350264, "learning_rate": 8.008076334516523e-06, "loss": 0.4814, "step": 2544 }, { "epoch": 0.32, "grad_norm": 1.4655503832221062, "learning_rate": 8.006470756216551e-06, "loss": 0.5761, "step": 2545 }, { "epoch": 0.32, "grad_norm": 1.332714497007832, "learning_rate": 8.00486469219475e-06, "loss": 0.4785, "step": 2546 }, { "epoch": 0.32, "grad_norm": 1.772899784431288, "learning_rate": 8.003258142710593e-06, "loss": 0.5127, "step": 2547 }, { "epoch": 0.32, "grad_norm": 2.18496507520994, "learning_rate": 8.001651108023632e-06, "loss": 0.5681, "step": 2548 }, { "epoch": 0.32, "grad_norm": 1.5808393491809347, "learning_rate": 8.0000435883935e-06, "loss": 0.4923, "step": 2549 }, { "epoch": 0.32, "grad_norm": 1.9652897069753408, "learning_rate": 7.998435584079904e-06, "loss": 0.5546, "step": 2550 }, { "epoch": 0.32, "grad_norm": 1.4685113192092831, "learning_rate": 7.99682709534263e-06, "loss": 0.5046, "step": 2551 }, { "epoch": 0.32, "grad_norm": 1.3755259188398998, "learning_rate": 7.995218122441545e-06, "loss": 0.5402, "step": 2552 }, { "epoch": 0.32, "grad_norm": 1.6402565725216716, "learning_rate": 7.993608665636594e-06, "loss": 0.4868, "step": 2553 }, { "epoch": 0.32, "grad_norm": 1.340748457030908, "learning_rate": 7.991998725187797e-06, "loss": 0.4743, "step": 2554 }, { "epoch": 0.32, "grad_norm": 1.509932753295099, "learning_rate": 7.990388301355257e-06, "loss": 0.5071, "step": 2555 }, { "epoch": 0.32, "grad_norm": 1.4331341944162324, "learning_rate": 7.988777394399146e-06, "loss": 0.5645, "step": 2556 }, { "epoch": 0.32, "grad_norm": 1.7525247336155012, "learning_rate": 7.987166004579727e-06, "loss": 0.4617, "step": 2557 }, { "epoch": 0.32, "grad_norm": 1.4084454111962381, "learning_rate": 7.98555413215733e-06, "loss": 0.5474, "step": 2558 }, { "epoch": 0.32, "grad_norm": 1.303093075197306, "learning_rate": 7.98394177739237e-06, "loss": 0.4948, "step": 2559 }, { "epoch": 0.32, "grad_norm": 1.2288203173720957, "learning_rate": 7.982328940545334e-06, "loss": 0.4921, "step": 2560 }, { "epoch": 0.32, "grad_norm": 1.2101474981944487, "learning_rate": 7.980715621876793e-06, "loss": 0.4831, "step": 2561 }, { "epoch": 0.32, "grad_norm": 1.2970614349320075, "learning_rate": 7.97910182164739e-06, "loss": 0.495, "step": 2562 }, { "epoch": 0.32, "grad_norm": 0.6276571869904369, "learning_rate": 7.977487540117852e-06, "loss": 0.4888, "step": 2563 }, { "epoch": 0.32, "grad_norm": 1.31307825281209, "learning_rate": 7.975872777548977e-06, "loss": 0.5072, "step": 2564 }, { "epoch": 0.32, "grad_norm": 1.8758618093049964, "learning_rate": 7.974257534201647e-06, "loss": 0.5052, "step": 2565 }, { "epoch": 0.32, "grad_norm": 1.4751921344767631, "learning_rate": 7.972641810336816e-06, "loss": 0.5237, "step": 2566 }, { "epoch": 0.32, "grad_norm": 5.354956242965793, "learning_rate": 7.971025606215521e-06, "loss": 0.4831, "step": 2567 }, { "epoch": 0.32, "grad_norm": 1.8797787633285996, "learning_rate": 7.969408922098871e-06, "loss": 0.4967, "step": 2568 }, { "epoch": 0.32, "grad_norm": 1.9374064384200749, "learning_rate": 7.96779175824806e-06, "loss": 0.5014, "step": 2569 }, { "epoch": 0.32, "grad_norm": 1.572450726443971, "learning_rate": 7.966174114924352e-06, "loss": 0.5579, "step": 2570 }, { "epoch": 0.32, "grad_norm": 1.424117716618892, "learning_rate": 7.964555992389092e-06, "loss": 0.5364, "step": 2571 }, { "epoch": 0.32, "grad_norm": 1.4695100357496795, "learning_rate": 7.9629373909037e-06, "loss": 0.5565, "step": 2572 }, { "epoch": 0.32, "grad_norm": 1.5546732225028876, "learning_rate": 7.961318310729678e-06, "loss": 0.5246, "step": 2573 }, { "epoch": 0.32, "grad_norm": 1.4686814158564432, "learning_rate": 7.959698752128602e-06, "loss": 0.5406, "step": 2574 }, { "epoch": 0.32, "grad_norm": 1.5002993149698707, "learning_rate": 7.958078715362127e-06, "loss": 0.5398, "step": 2575 }, { "epoch": 0.32, "grad_norm": 1.3502921398391599, "learning_rate": 7.956458200691981e-06, "loss": 0.529, "step": 2576 }, { "epoch": 0.32, "grad_norm": 2.138050239994702, "learning_rate": 7.954837208379978e-06, "loss": 0.5314, "step": 2577 }, { "epoch": 0.32, "grad_norm": 1.2974781099214527, "learning_rate": 7.953215738687997e-06, "loss": 0.4856, "step": 2578 }, { "epoch": 0.32, "grad_norm": 1.4912136233611968, "learning_rate": 7.951593791878005e-06, "loss": 0.5309, "step": 2579 }, { "epoch": 0.32, "grad_norm": 1.519571706645829, "learning_rate": 7.94997136821204e-06, "loss": 0.5126, "step": 2580 }, { "epoch": 0.32, "grad_norm": 1.5345595769482312, "learning_rate": 7.948348467952221e-06, "loss": 0.4793, "step": 2581 }, { "epoch": 0.32, "grad_norm": 1.5034221535478678, "learning_rate": 7.946725091360738e-06, "loss": 0.5109, "step": 2582 }, { "epoch": 0.32, "grad_norm": 0.6353221949559826, "learning_rate": 7.945101238699865e-06, "loss": 0.5091, "step": 2583 }, { "epoch": 0.32, "grad_norm": 1.6441115764867356, "learning_rate": 7.943476910231948e-06, "loss": 0.5287, "step": 2584 }, { "epoch": 0.32, "grad_norm": 1.548879078222247, "learning_rate": 7.941852106219414e-06, "loss": 0.4787, "step": 2585 }, { "epoch": 0.32, "grad_norm": 1.5134010020200608, "learning_rate": 7.940226826924761e-06, "loss": 0.464, "step": 2586 }, { "epoch": 0.32, "grad_norm": 1.3246410761168317, "learning_rate": 7.938601072610573e-06, "loss": 0.5143, "step": 2587 }, { "epoch": 0.32, "grad_norm": 0.6072185208166537, "learning_rate": 7.936974843539496e-06, "loss": 0.4914, "step": 2588 }, { "epoch": 0.32, "grad_norm": 1.5873833753998925, "learning_rate": 7.935348139974268e-06, "loss": 0.487, "step": 2589 }, { "epoch": 0.32, "grad_norm": 1.3803477410016485, "learning_rate": 7.933720962177696e-06, "loss": 0.5445, "step": 2590 }, { "epoch": 0.32, "grad_norm": 1.479728555323523, "learning_rate": 7.932093310412665e-06, "loss": 0.5432, "step": 2591 }, { "epoch": 0.32, "grad_norm": 1.461786336220308, "learning_rate": 7.930465184942135e-06, "loss": 0.4729, "step": 2592 }, { "epoch": 0.32, "grad_norm": 1.372907725327091, "learning_rate": 7.928836586029146e-06, "loss": 0.4982, "step": 2593 }, { "epoch": 0.32, "grad_norm": 1.3760889483532308, "learning_rate": 7.927207513936812e-06, "loss": 0.509, "step": 2594 }, { "epoch": 0.32, "grad_norm": 1.3209344770538574, "learning_rate": 7.925577968928323e-06, "loss": 0.5233, "step": 2595 }, { "epoch": 0.32, "grad_norm": 2.263176280923, "learning_rate": 7.923947951266947e-06, "loss": 0.5248, "step": 2596 }, { "epoch": 0.32, "grad_norm": 2.869151617512003, "learning_rate": 7.922317461216027e-06, "loss": 0.556, "step": 2597 }, { "epoch": 0.32, "grad_norm": 1.2799391376527303, "learning_rate": 7.920686499038985e-06, "loss": 0.4808, "step": 2598 }, { "epoch": 0.32, "grad_norm": 1.3579113165318353, "learning_rate": 7.919055064999315e-06, "loss": 0.4971, "step": 2599 }, { "epoch": 0.32, "grad_norm": 2.6106478321846343, "learning_rate": 7.917423159360592e-06, "loss": 0.5491, "step": 2600 }, { "epoch": 0.32, "grad_norm": 5.060188368021533, "learning_rate": 7.915790782386462e-06, "loss": 0.4894, "step": 2601 }, { "epoch": 0.32, "grad_norm": 1.4593670616968968, "learning_rate": 7.91415793434065e-06, "loss": 0.5285, "step": 2602 }, { "epoch": 0.32, "grad_norm": 1.8946627434610617, "learning_rate": 7.91252461548696e-06, "loss": 0.5221, "step": 2603 }, { "epoch": 0.32, "grad_norm": 1.486158593598876, "learning_rate": 7.910890826089267e-06, "loss": 0.5064, "step": 2604 }, { "epoch": 0.32, "grad_norm": 1.5781546113023983, "learning_rate": 7.909256566411522e-06, "loss": 0.5123, "step": 2605 }, { "epoch": 0.32, "grad_norm": 1.2732298072495625, "learning_rate": 7.907621836717757e-06, "loss": 0.4986, "step": 2606 }, { "epoch": 0.32, "grad_norm": 1.469675681281866, "learning_rate": 7.905986637272079e-06, "loss": 0.5008, "step": 2607 }, { "epoch": 0.32, "grad_norm": 1.6806249896449872, "learning_rate": 7.904350968338663e-06, "loss": 0.5207, "step": 2608 }, { "epoch": 0.32, "grad_norm": 1.3999951993913762, "learning_rate": 7.90271483018177e-06, "loss": 0.5482, "step": 2609 }, { "epoch": 0.32, "grad_norm": 2.3996070047305675, "learning_rate": 7.901078223065731e-06, "loss": 0.528, "step": 2610 }, { "epoch": 0.32, "grad_norm": 1.3957940027695925, "learning_rate": 7.899441147254956e-06, "loss": 0.5362, "step": 2611 }, { "epoch": 0.32, "grad_norm": 1.4092902102297067, "learning_rate": 7.897803603013927e-06, "loss": 0.5231, "step": 2612 }, { "epoch": 0.32, "grad_norm": 1.313813586992601, "learning_rate": 7.896165590607204e-06, "loss": 0.4838, "step": 2613 }, { "epoch": 0.32, "grad_norm": 1.829202367501445, "learning_rate": 7.894527110299422e-06, "loss": 0.5226, "step": 2614 }, { "epoch": 0.32, "grad_norm": 1.4582804197443877, "learning_rate": 7.892888162355293e-06, "loss": 0.5125, "step": 2615 }, { "epoch": 0.32, "grad_norm": 1.4644426641480262, "learning_rate": 7.891248747039605e-06, "loss": 0.5256, "step": 2616 }, { "epoch": 0.32, "grad_norm": 1.5832117310103972, "learning_rate": 7.889608864617216e-06, "loss": 0.569, "step": 2617 }, { "epoch": 0.32, "grad_norm": 1.349664804555716, "learning_rate": 7.887968515353065e-06, "loss": 0.508, "step": 2618 }, { "epoch": 0.33, "grad_norm": 1.5097419964276189, "learning_rate": 7.886327699512166e-06, "loss": 0.5373, "step": 2619 }, { "epoch": 0.33, "grad_norm": 1.314238734393822, "learning_rate": 7.884686417359609e-06, "loss": 0.5263, "step": 2620 }, { "epoch": 0.33, "grad_norm": 1.3193574606716998, "learning_rate": 7.88304466916055e-06, "loss": 0.4542, "step": 2621 }, { "epoch": 0.33, "grad_norm": 1.3564255854315617, "learning_rate": 7.881402455180238e-06, "loss": 0.4647, "step": 2622 }, { "epoch": 0.33, "grad_norm": 1.3643754065286928, "learning_rate": 7.87975977568398e-06, "loss": 0.4483, "step": 2623 }, { "epoch": 0.33, "grad_norm": 0.6474917452755293, "learning_rate": 7.878116630937169e-06, "loss": 0.5312, "step": 2624 }, { "epoch": 0.33, "grad_norm": 1.3898488341013995, "learning_rate": 7.876473021205266e-06, "loss": 0.4786, "step": 2625 }, { "epoch": 0.33, "grad_norm": 1.4444599189015728, "learning_rate": 7.874828946753814e-06, "loss": 0.5075, "step": 2626 }, { "epoch": 0.33, "grad_norm": 1.4016382588893839, "learning_rate": 7.873184407848428e-06, "loss": 0.4868, "step": 2627 }, { "epoch": 0.33, "grad_norm": 1.3055135483450153, "learning_rate": 7.871539404754793e-06, "loss": 0.4448, "step": 2628 }, { "epoch": 0.33, "grad_norm": 1.4260820129750083, "learning_rate": 7.86989393773868e-06, "loss": 0.5093, "step": 2629 }, { "epoch": 0.33, "grad_norm": 1.4595683756814524, "learning_rate": 7.868248007065923e-06, "loss": 0.528, "step": 2630 }, { "epoch": 0.33, "grad_norm": 1.383304259086062, "learning_rate": 7.86660161300244e-06, "loss": 0.537, "step": 2631 }, { "epoch": 0.33, "grad_norm": 0.7295511857203106, "learning_rate": 7.86495475581422e-06, "loss": 0.4833, "step": 2632 }, { "epoch": 0.33, "grad_norm": 1.4553464257059818, "learning_rate": 7.863307435767329e-06, "loss": 0.5198, "step": 2633 }, { "epoch": 0.33, "grad_norm": 1.2005384216193022, "learning_rate": 7.861659653127899e-06, "loss": 0.4692, "step": 2634 }, { "epoch": 0.33, "grad_norm": 1.300754657681368, "learning_rate": 7.860011408162153e-06, "loss": 0.5397, "step": 2635 }, { "epoch": 0.33, "grad_norm": 1.3431649243225865, "learning_rate": 7.858362701136374e-06, "loss": 0.5268, "step": 2636 }, { "epoch": 0.33, "grad_norm": 1.4874194965826724, "learning_rate": 7.856713532316927e-06, "loss": 0.5276, "step": 2637 }, { "epoch": 0.33, "grad_norm": 1.6575599415205786, "learning_rate": 7.855063901970248e-06, "loss": 0.5225, "step": 2638 }, { "epoch": 0.33, "grad_norm": 1.41297561898523, "learning_rate": 7.85341381036285e-06, "loss": 0.5139, "step": 2639 }, { "epoch": 0.33, "grad_norm": 1.1716924968520417, "learning_rate": 7.851763257761322e-06, "loss": 0.5113, "step": 2640 }, { "epoch": 0.33, "grad_norm": 1.536281272372059, "learning_rate": 7.850112244432322e-06, "loss": 0.5021, "step": 2641 }, { "epoch": 0.33, "grad_norm": 2.5685613187358625, "learning_rate": 7.848460770642588e-06, "loss": 0.5094, "step": 2642 }, { "epoch": 0.33, "grad_norm": 1.5203119298235317, "learning_rate": 7.846808836658931e-06, "loss": 0.48, "step": 2643 }, { "epoch": 0.33, "grad_norm": 1.5732793968340726, "learning_rate": 7.845156442748232e-06, "loss": 0.5238, "step": 2644 }, { "epoch": 0.33, "grad_norm": 1.6797119267555378, "learning_rate": 7.843503589177453e-06, "loss": 0.5117, "step": 2645 }, { "epoch": 0.33, "grad_norm": 1.3572741565536737, "learning_rate": 7.841850276213626e-06, "loss": 0.4669, "step": 2646 }, { "epoch": 0.33, "grad_norm": 0.7178449432637385, "learning_rate": 7.840196504123856e-06, "loss": 0.5076, "step": 2647 }, { "epoch": 0.33, "grad_norm": 1.6289373215406235, "learning_rate": 7.838542273175328e-06, "loss": 0.5302, "step": 2648 }, { "epoch": 0.33, "grad_norm": 1.306086112198206, "learning_rate": 7.836887583635297e-06, "loss": 0.5639, "step": 2649 }, { "epoch": 0.33, "grad_norm": 1.4157064488153976, "learning_rate": 7.835232435771089e-06, "loss": 0.524, "step": 2650 }, { "epoch": 0.33, "grad_norm": 2.4036284659590974, "learning_rate": 7.833576829850113e-06, "loss": 0.5574, "step": 2651 }, { "epoch": 0.33, "grad_norm": 1.5176353764519612, "learning_rate": 7.831920766139844e-06, "loss": 0.5141, "step": 2652 }, { "epoch": 0.33, "grad_norm": 1.6075456494439913, "learning_rate": 7.830264244907834e-06, "loss": 0.5325, "step": 2653 }, { "epoch": 0.33, "grad_norm": 0.6382628147633554, "learning_rate": 7.828607266421705e-06, "loss": 0.4846, "step": 2654 }, { "epoch": 0.33, "grad_norm": 1.6454288930497467, "learning_rate": 7.826949830949164e-06, "loss": 0.5323, "step": 2655 }, { "epoch": 0.33, "grad_norm": 1.994581273390472, "learning_rate": 7.82529193875798e-06, "loss": 0.4747, "step": 2656 }, { "epoch": 0.33, "grad_norm": 2.349328328927657, "learning_rate": 7.823633590116e-06, "loss": 0.5051, "step": 2657 }, { "epoch": 0.33, "grad_norm": 1.8911900323622857, "learning_rate": 7.821974785291145e-06, "loss": 0.5159, "step": 2658 }, { "epoch": 0.33, "grad_norm": 1.6299950870453979, "learning_rate": 7.82031552455141e-06, "loss": 0.5159, "step": 2659 }, { "epoch": 0.33, "grad_norm": 0.7036980394919018, "learning_rate": 7.81865580816486e-06, "loss": 0.5079, "step": 2660 }, { "epoch": 0.33, "grad_norm": 1.2437606573047195, "learning_rate": 7.816995636399644e-06, "loss": 0.5237, "step": 2661 }, { "epoch": 0.33, "grad_norm": 1.3132373861797184, "learning_rate": 7.81533500952397e-06, "loss": 0.5083, "step": 2662 }, { "epoch": 0.33, "grad_norm": 1.38307268654614, "learning_rate": 7.813673927806132e-06, "loss": 0.5469, "step": 2663 }, { "epoch": 0.33, "grad_norm": 1.649046553253519, "learning_rate": 7.812012391514488e-06, "loss": 0.528, "step": 2664 }, { "epoch": 0.33, "grad_norm": 1.381471144918611, "learning_rate": 7.81035040091748e-06, "loss": 0.5184, "step": 2665 }, { "epoch": 0.33, "grad_norm": 1.4352863734471388, "learning_rate": 7.808687956283609e-06, "loss": 0.5141, "step": 2666 }, { "epoch": 0.33, "grad_norm": 1.3719121111184451, "learning_rate": 7.807025057881463e-06, "loss": 0.5035, "step": 2667 }, { "epoch": 0.33, "grad_norm": 1.3920475091905369, "learning_rate": 7.805361705979698e-06, "loss": 0.5194, "step": 2668 }, { "epoch": 0.33, "grad_norm": 1.5848959505401254, "learning_rate": 7.803697900847042e-06, "loss": 0.5866, "step": 2669 }, { "epoch": 0.33, "grad_norm": 1.8520042447777643, "learning_rate": 7.802033642752298e-06, "loss": 0.5271, "step": 2670 }, { "epoch": 0.33, "grad_norm": 1.4521535881547414, "learning_rate": 7.80036893196434e-06, "loss": 0.5194, "step": 2671 }, { "epoch": 0.33, "grad_norm": 1.5181509592818552, "learning_rate": 7.798703768752116e-06, "loss": 0.5228, "step": 2672 }, { "epoch": 0.33, "grad_norm": 1.5555764528978064, "learning_rate": 7.79703815338465e-06, "loss": 0.5209, "step": 2673 }, { "epoch": 0.33, "grad_norm": 3.8939780601693137, "learning_rate": 7.795372086131038e-06, "loss": 0.5103, "step": 2674 }, { "epoch": 0.33, "grad_norm": 1.5966979452355454, "learning_rate": 7.793705567260445e-06, "loss": 0.4822, "step": 2675 }, { "epoch": 0.33, "grad_norm": 4.574764738764695, "learning_rate": 7.792038597042113e-06, "loss": 0.5219, "step": 2676 }, { "epoch": 0.33, "grad_norm": 1.3781605626328648, "learning_rate": 7.790371175745355e-06, "loss": 0.5177, "step": 2677 }, { "epoch": 0.33, "grad_norm": 1.31760703133334, "learning_rate": 7.78870330363956e-06, "loss": 0.5038, "step": 2678 }, { "epoch": 0.33, "grad_norm": 1.4686543270678922, "learning_rate": 7.787034980994184e-06, "loss": 0.4935, "step": 2679 }, { "epoch": 0.33, "grad_norm": 1.898415093232563, "learning_rate": 7.78536620807876e-06, "loss": 0.5388, "step": 2680 }, { "epoch": 0.33, "grad_norm": 1.8871341002492472, "learning_rate": 7.783696985162896e-06, "loss": 0.5185, "step": 2681 }, { "epoch": 0.33, "grad_norm": 1.756160964701136, "learning_rate": 7.782027312516267e-06, "loss": 0.4977, "step": 2682 }, { "epoch": 0.33, "grad_norm": 1.6106092870879871, "learning_rate": 7.780357190408622e-06, "loss": 0.5323, "step": 2683 }, { "epoch": 0.33, "grad_norm": 1.2645675766050717, "learning_rate": 7.778686619109787e-06, "loss": 0.4845, "step": 2684 }, { "epoch": 0.33, "grad_norm": 1.7084608637158094, "learning_rate": 7.777015598889656e-06, "loss": 0.5252, "step": 2685 }, { "epoch": 0.33, "grad_norm": 0.714675656281538, "learning_rate": 7.775344130018196e-06, "loss": 0.5275, "step": 2686 }, { "epoch": 0.33, "grad_norm": 1.4409858609141166, "learning_rate": 7.77367221276545e-06, "loss": 0.4673, "step": 2687 }, { "epoch": 0.33, "grad_norm": 1.4382057653735294, "learning_rate": 7.77199984740153e-06, "loss": 0.4813, "step": 2688 }, { "epoch": 0.33, "grad_norm": 1.6903980772123022, "learning_rate": 7.77032703419662e-06, "loss": 0.4768, "step": 2689 }, { "epoch": 0.33, "grad_norm": 2.7131248191772204, "learning_rate": 7.76865377342098e-06, "loss": 0.537, "step": 2690 }, { "epoch": 0.33, "grad_norm": 1.5944381852953722, "learning_rate": 7.766980065344938e-06, "loss": 0.4751, "step": 2691 }, { "epoch": 0.33, "grad_norm": 1.4666012126559613, "learning_rate": 7.765305910238898e-06, "loss": 0.5269, "step": 2692 }, { "epoch": 0.33, "grad_norm": 3.471755000511237, "learning_rate": 7.763631308373333e-06, "loss": 0.5487, "step": 2693 }, { "epoch": 0.33, "grad_norm": 1.3624088593151427, "learning_rate": 7.76195626001879e-06, "loss": 0.5085, "step": 2694 }, { "epoch": 0.33, "grad_norm": 2.710395640324599, "learning_rate": 7.760280765445888e-06, "loss": 0.4905, "step": 2695 }, { "epoch": 0.33, "grad_norm": 1.4268137254771691, "learning_rate": 7.758604824925318e-06, "loss": 0.5352, "step": 2696 }, { "epoch": 0.33, "grad_norm": 2.3876286121831773, "learning_rate": 7.756928438727844e-06, "loss": 0.5045, "step": 2697 }, { "epoch": 0.33, "grad_norm": 1.580257529292837, "learning_rate": 7.755251607124298e-06, "loss": 0.5337, "step": 2698 }, { "epoch": 0.33, "grad_norm": 1.9168052951765553, "learning_rate": 7.75357433038559e-06, "loss": 0.5272, "step": 2699 }, { "epoch": 0.34, "grad_norm": 1.6677726778026305, "learning_rate": 7.751896608782696e-06, "loss": 0.4915, "step": 2700 }, { "epoch": 0.34, "grad_norm": 1.5021980264138404, "learning_rate": 7.75021844258667e-06, "loss": 0.5036, "step": 2701 }, { "epoch": 0.34, "grad_norm": 1.6929177369758843, "learning_rate": 7.748539832068633e-06, "loss": 0.5464, "step": 2702 }, { "epoch": 0.34, "grad_norm": 1.5228255867041702, "learning_rate": 7.746860777499778e-06, "loss": 0.5678, "step": 2703 }, { "epoch": 0.34, "grad_norm": 1.8989886501816657, "learning_rate": 7.745181279151373e-06, "loss": 0.5263, "step": 2704 }, { "epoch": 0.34, "grad_norm": 1.2883835482583696, "learning_rate": 7.743501337294754e-06, "loss": 0.5216, "step": 2705 }, { "epoch": 0.34, "grad_norm": 3.960176675059009, "learning_rate": 7.741820952201333e-06, "loss": 0.5232, "step": 2706 }, { "epoch": 0.34, "grad_norm": 1.7759698679308338, "learning_rate": 7.740140124142587e-06, "loss": 0.5104, "step": 2707 }, { "epoch": 0.34, "grad_norm": 3.4527436454312945, "learning_rate": 7.738458853390072e-06, "loss": 0.5327, "step": 2708 }, { "epoch": 0.34, "grad_norm": 1.3174291504094895, "learning_rate": 7.736777140215412e-06, "loss": 0.4753, "step": 2709 }, { "epoch": 0.34, "grad_norm": 1.7018137791242018, "learning_rate": 7.735094984890302e-06, "loss": 0.5085, "step": 2710 }, { "epoch": 0.34, "grad_norm": 1.5993123487899685, "learning_rate": 7.733412387686508e-06, "loss": 0.4816, "step": 2711 }, { "epoch": 0.34, "grad_norm": 1.904796254721156, "learning_rate": 7.731729348875868e-06, "loss": 0.5451, "step": 2712 }, { "epoch": 0.34, "grad_norm": 1.604283221653655, "learning_rate": 7.730045868730294e-06, "loss": 0.5185, "step": 2713 }, { "epoch": 0.34, "grad_norm": 1.93420487671135, "learning_rate": 7.728361947521765e-06, "loss": 0.5288, "step": 2714 }, { "epoch": 0.34, "grad_norm": 1.6354612809092355, "learning_rate": 7.726677585522335e-06, "loss": 0.466, "step": 2715 }, { "epoch": 0.34, "grad_norm": 1.5483837104096023, "learning_rate": 7.724992783004125e-06, "loss": 0.545, "step": 2716 }, { "epoch": 0.34, "grad_norm": 1.5651476019378647, "learning_rate": 7.723307540239332e-06, "loss": 0.5436, "step": 2717 }, { "epoch": 0.34, "grad_norm": 2.886905512469594, "learning_rate": 7.721621857500221e-06, "loss": 0.4745, "step": 2718 }, { "epoch": 0.34, "grad_norm": 2.1861427286963115, "learning_rate": 7.719935735059131e-06, "loss": 0.4465, "step": 2719 }, { "epoch": 0.34, "grad_norm": 1.866549648988967, "learning_rate": 7.718249173188465e-06, "loss": 0.5152, "step": 2720 }, { "epoch": 0.34, "grad_norm": 1.586552465840777, "learning_rate": 7.716562172160706e-06, "loss": 0.5207, "step": 2721 }, { "epoch": 0.34, "grad_norm": 1.8513938439300908, "learning_rate": 7.714874732248404e-06, "loss": 0.4664, "step": 2722 }, { "epoch": 0.34, "grad_norm": 1.4103660610251199, "learning_rate": 7.713186853724176e-06, "loss": 0.5427, "step": 2723 }, { "epoch": 0.34, "grad_norm": 1.2258139433120145, "learning_rate": 7.711498536860719e-06, "loss": 0.5043, "step": 2724 }, { "epoch": 0.34, "grad_norm": 1.3097524148833894, "learning_rate": 7.709809781930791e-06, "loss": 0.4709, "step": 2725 }, { "epoch": 0.34, "grad_norm": 3.0590687856687837, "learning_rate": 7.708120589207227e-06, "loss": 0.5472, "step": 2726 }, { "epoch": 0.34, "grad_norm": 2.0120462476542746, "learning_rate": 7.706430958962932e-06, "loss": 0.5511, "step": 2727 }, { "epoch": 0.34, "grad_norm": 1.6327764346512121, "learning_rate": 7.704740891470878e-06, "loss": 0.4788, "step": 2728 }, { "epoch": 0.34, "grad_norm": 1.4071899449611387, "learning_rate": 7.703050387004111e-06, "loss": 0.528, "step": 2729 }, { "epoch": 0.34, "grad_norm": 1.4315602215673797, "learning_rate": 7.70135944583575e-06, "loss": 0.5531, "step": 2730 }, { "epoch": 0.34, "grad_norm": 1.755214304596285, "learning_rate": 7.69966806823898e-06, "loss": 0.4996, "step": 2731 }, { "epoch": 0.34, "grad_norm": 1.2982408118906106, "learning_rate": 7.697976254487054e-06, "loss": 0.5133, "step": 2732 }, { "epoch": 0.34, "grad_norm": 1.3617359856408846, "learning_rate": 7.696284004853303e-06, "loss": 0.4639, "step": 2733 }, { "epoch": 0.34, "grad_norm": 1.277470567398012, "learning_rate": 7.694591319611124e-06, "loss": 0.5406, "step": 2734 }, { "epoch": 0.34, "grad_norm": 1.2375158989213473, "learning_rate": 7.692898199033988e-06, "loss": 0.4945, "step": 2735 }, { "epoch": 0.34, "grad_norm": 1.6525320292638912, "learning_rate": 7.691204643395426e-06, "loss": 0.4814, "step": 2736 }, { "epoch": 0.34, "grad_norm": 1.2916993899549458, "learning_rate": 7.689510652969055e-06, "loss": 0.4816, "step": 2737 }, { "epoch": 0.34, "grad_norm": 3.391677601694305, "learning_rate": 7.687816228028552e-06, "loss": 0.4606, "step": 2738 }, { "epoch": 0.34, "grad_norm": 1.4519576798931901, "learning_rate": 7.686121368847666e-06, "loss": 0.5472, "step": 2739 }, { "epoch": 0.34, "grad_norm": 1.5009721768272837, "learning_rate": 7.68442607570021e-06, "loss": 0.5325, "step": 2740 }, { "epoch": 0.34, "grad_norm": 1.3792196069659568, "learning_rate": 7.682730348860085e-06, "loss": 0.5191, "step": 2741 }, { "epoch": 0.34, "grad_norm": 1.5022991314730445, "learning_rate": 7.681034188601242e-06, "loss": 0.517, "step": 2742 }, { "epoch": 0.34, "grad_norm": 1.5302237971630834, "learning_rate": 7.679337595197715e-06, "loss": 0.4969, "step": 2743 }, { "epoch": 0.34, "grad_norm": 1.4982130605022177, "learning_rate": 7.677640568923601e-06, "loss": 0.5161, "step": 2744 }, { "epoch": 0.34, "grad_norm": 1.3912316865151857, "learning_rate": 7.67594311005307e-06, "loss": 0.4482, "step": 2745 }, { "epoch": 0.34, "grad_norm": 1.3729878686082877, "learning_rate": 7.674245218860362e-06, "loss": 0.4852, "step": 2746 }, { "epoch": 0.34, "grad_norm": 1.419155005976854, "learning_rate": 7.672546895619786e-06, "loss": 0.517, "step": 2747 }, { "epoch": 0.34, "grad_norm": 1.4587619792042252, "learning_rate": 7.670848140605723e-06, "loss": 0.5279, "step": 2748 }, { "epoch": 0.34, "grad_norm": 1.5269943732151012, "learning_rate": 7.66914895409262e-06, "loss": 0.5329, "step": 2749 }, { "epoch": 0.34, "grad_norm": 1.975878240194581, "learning_rate": 7.667449336354996e-06, "loss": 0.4831, "step": 2750 }, { "epoch": 0.34, "grad_norm": 1.4672990700567374, "learning_rate": 7.665749287667436e-06, "loss": 0.4792, "step": 2751 }, { "epoch": 0.34, "grad_norm": 1.39519067576427, "learning_rate": 7.664048808304603e-06, "loss": 0.5704, "step": 2752 }, { "epoch": 0.34, "grad_norm": 1.81220504073997, "learning_rate": 7.662347898541222e-06, "loss": 0.474, "step": 2753 }, { "epoch": 0.34, "grad_norm": 1.454364363303493, "learning_rate": 7.66064655865209e-06, "loss": 0.4896, "step": 2754 }, { "epoch": 0.34, "grad_norm": 2.345457097405705, "learning_rate": 7.658944788912073e-06, "loss": 0.4907, "step": 2755 }, { "epoch": 0.34, "grad_norm": 1.6197799716830366, "learning_rate": 7.657242589596107e-06, "loss": 0.5136, "step": 2756 }, { "epoch": 0.34, "grad_norm": 1.4100260413117522, "learning_rate": 7.655539960979199e-06, "loss": 0.4799, "step": 2757 }, { "epoch": 0.34, "grad_norm": 1.4355816404214223, "learning_rate": 7.653836903336423e-06, "loss": 0.5246, "step": 2758 }, { "epoch": 0.34, "grad_norm": 1.4093389459141474, "learning_rate": 7.652133416942921e-06, "loss": 0.4405, "step": 2759 }, { "epoch": 0.34, "grad_norm": 2.1045817688671162, "learning_rate": 7.650429502073909e-06, "loss": 0.484, "step": 2760 }, { "epoch": 0.34, "grad_norm": 1.4695892914070552, "learning_rate": 7.648725159004666e-06, "loss": 0.542, "step": 2761 }, { "epoch": 0.34, "grad_norm": 1.3991047999982942, "learning_rate": 7.647020388010546e-06, "loss": 0.5196, "step": 2762 }, { "epoch": 0.34, "grad_norm": 1.4581867933362196, "learning_rate": 7.64531518936697e-06, "loss": 0.5035, "step": 2763 }, { "epoch": 0.34, "grad_norm": 1.505294682141353, "learning_rate": 7.643609563349428e-06, "loss": 0.5147, "step": 2764 }, { "epoch": 0.34, "grad_norm": 1.6133177462796544, "learning_rate": 7.641903510233478e-06, "loss": 0.5426, "step": 2765 }, { "epoch": 0.34, "grad_norm": 1.4806029734791937, "learning_rate": 7.640197030294749e-06, "loss": 0.5115, "step": 2766 }, { "epoch": 0.34, "grad_norm": 1.616248266794655, "learning_rate": 7.638490123808935e-06, "loss": 0.4804, "step": 2767 }, { "epoch": 0.34, "grad_norm": 1.3394062449465627, "learning_rate": 7.636782791051805e-06, "loss": 0.516, "step": 2768 }, { "epoch": 0.34, "grad_norm": 4.177620453219728, "learning_rate": 7.63507503229919e-06, "loss": 0.5205, "step": 2769 }, { "epoch": 0.34, "grad_norm": 2.5007327575946827, "learning_rate": 7.633366847826999e-06, "loss": 0.5107, "step": 2770 }, { "epoch": 0.34, "grad_norm": 1.3918381642554443, "learning_rate": 7.6316582379112e-06, "loss": 0.5833, "step": 2771 }, { "epoch": 0.34, "grad_norm": 1.8013497027412864, "learning_rate": 7.629949202827835e-06, "loss": 0.5097, "step": 2772 }, { "epoch": 0.34, "grad_norm": 1.5598225631411284, "learning_rate": 7.6282397428530135e-06, "loss": 0.4993, "step": 2773 }, { "epoch": 0.34, "grad_norm": 1.4219298971027583, "learning_rate": 7.626529858262914e-06, "loss": 0.5187, "step": 2774 }, { "epoch": 0.34, "grad_norm": 1.601086179767575, "learning_rate": 7.624819549333784e-06, "loss": 0.4492, "step": 2775 }, { "epoch": 0.34, "grad_norm": 1.5134415486399717, "learning_rate": 7.62310881634194e-06, "loss": 0.5014, "step": 2776 }, { "epoch": 0.34, "grad_norm": 1.3155320012758154, "learning_rate": 7.621397659563761e-06, "loss": 0.5282, "step": 2777 }, { "epoch": 0.34, "grad_norm": 4.026077379891773, "learning_rate": 7.619686079275705e-06, "loss": 0.516, "step": 2778 }, { "epoch": 0.34, "grad_norm": 1.2985785718733407, "learning_rate": 7.617974075754291e-06, "loss": 0.487, "step": 2779 }, { "epoch": 0.34, "grad_norm": 1.487272549933582, "learning_rate": 7.616261649276107e-06, "loss": 0.5503, "step": 2780 }, { "epoch": 0.35, "grad_norm": 1.4482877030737968, "learning_rate": 7.614548800117812e-06, "loss": 0.5212, "step": 2781 }, { "epoch": 0.35, "grad_norm": 1.264435244527073, "learning_rate": 7.612835528556131e-06, "loss": 0.4865, "step": 2782 }, { "epoch": 0.35, "grad_norm": 1.428380630654264, "learning_rate": 7.611121834867858e-06, "loss": 0.4424, "step": 2783 }, { "epoch": 0.35, "grad_norm": 1.4593193495946697, "learning_rate": 7.609407719329858e-06, "loss": 0.5258, "step": 2784 }, { "epoch": 0.35, "grad_norm": 2.595849152307415, "learning_rate": 7.607693182219058e-06, "loss": 0.5197, "step": 2785 }, { "epoch": 0.35, "grad_norm": 2.1367861478862946, "learning_rate": 7.605978223812458e-06, "loss": 0.5408, "step": 2786 }, { "epoch": 0.35, "grad_norm": 1.237539684863762, "learning_rate": 7.604262844387125e-06, "loss": 0.5326, "step": 2787 }, { "epoch": 0.35, "grad_norm": 1.4710863852700993, "learning_rate": 7.602547044220192e-06, "loss": 0.481, "step": 2788 }, { "epoch": 0.35, "grad_norm": 1.2748156376890598, "learning_rate": 7.600830823588864e-06, "loss": 0.465, "step": 2789 }, { "epoch": 0.35, "grad_norm": 1.2474025526575772, "learning_rate": 7.5991141827704084e-06, "loss": 0.4957, "step": 2790 }, { "epoch": 0.35, "grad_norm": 1.361004388110375, "learning_rate": 7.5973971220421685e-06, "loss": 0.4817, "step": 2791 }, { "epoch": 0.35, "grad_norm": 1.727555590942853, "learning_rate": 7.595679641681546e-06, "loss": 0.5122, "step": 2792 }, { "epoch": 0.35, "grad_norm": 1.463675582477899, "learning_rate": 7.593961741966019e-06, "loss": 0.4723, "step": 2793 }, { "epoch": 0.35, "grad_norm": 1.6756022980618925, "learning_rate": 7.592243423173124e-06, "loss": 0.5637, "step": 2794 }, { "epoch": 0.35, "grad_norm": 1.339013495875156, "learning_rate": 7.5905246855804735e-06, "loss": 0.4775, "step": 2795 }, { "epoch": 0.35, "grad_norm": 1.9885912649008708, "learning_rate": 7.588805529465747e-06, "loss": 0.5408, "step": 2796 }, { "epoch": 0.35, "grad_norm": 1.3473130209888207, "learning_rate": 7.587085955106685e-06, "loss": 0.4883, "step": 2797 }, { "epoch": 0.35, "grad_norm": 1.721228409362882, "learning_rate": 7.585365962781103e-06, "loss": 0.4898, "step": 2798 }, { "epoch": 0.35, "grad_norm": 0.6689030328737668, "learning_rate": 7.58364555276688e-06, "loss": 0.5007, "step": 2799 }, { "epoch": 0.35, "grad_norm": 1.312323699854146, "learning_rate": 7.5819247253419624e-06, "loss": 0.4944, "step": 2800 }, { "epoch": 0.35, "grad_norm": 1.2682572784572763, "learning_rate": 7.5802034807843675e-06, "loss": 0.4607, "step": 2801 }, { "epoch": 0.35, "grad_norm": 1.536315066547835, "learning_rate": 7.578481819372174e-06, "loss": 0.5442, "step": 2802 }, { "epoch": 0.35, "grad_norm": 1.6095888676927919, "learning_rate": 7.576759741383534e-06, "loss": 0.5528, "step": 2803 }, { "epoch": 0.35, "grad_norm": 1.3160002037099694, "learning_rate": 7.575037247096664e-06, "loss": 0.5164, "step": 2804 }, { "epoch": 0.35, "grad_norm": 1.8566511781629418, "learning_rate": 7.5733143367898475e-06, "loss": 0.4993, "step": 2805 }, { "epoch": 0.35, "grad_norm": 1.5097008377535144, "learning_rate": 7.571591010741436e-06, "loss": 0.5012, "step": 2806 }, { "epoch": 0.35, "grad_norm": 1.3973734343760345, "learning_rate": 7.569867269229849e-06, "loss": 0.5597, "step": 2807 }, { "epoch": 0.35, "grad_norm": 1.3440934662874864, "learning_rate": 7.56814311253357e-06, "loss": 0.5049, "step": 2808 }, { "epoch": 0.35, "grad_norm": 1.7619676156979018, "learning_rate": 7.566418540931154e-06, "loss": 0.5333, "step": 2809 }, { "epoch": 0.35, "grad_norm": 2.4386815776036004, "learning_rate": 7.56469355470122e-06, "loss": 0.4954, "step": 2810 }, { "epoch": 0.35, "grad_norm": 1.347905562414754, "learning_rate": 7.5629681541224544e-06, "loss": 0.4376, "step": 2811 }, { "epoch": 0.35, "grad_norm": 1.7566813074923715, "learning_rate": 7.56124233947361e-06, "loss": 0.5428, "step": 2812 }, { "epoch": 0.35, "grad_norm": 1.4255239120541885, "learning_rate": 7.559516111033509e-06, "loss": 0.5485, "step": 2813 }, { "epoch": 0.35, "grad_norm": 7.169394255097597, "learning_rate": 7.557789469081036e-06, "loss": 0.4957, "step": 2814 }, { "epoch": 0.35, "grad_norm": 5.599542317523516, "learning_rate": 7.55606241389515e-06, "loss": 0.5238, "step": 2815 }, { "epoch": 0.35, "grad_norm": 1.7719411350941612, "learning_rate": 7.554334945754869e-06, "loss": 0.5163, "step": 2816 }, { "epoch": 0.35, "grad_norm": 2.781268122054056, "learning_rate": 7.552607064939279e-06, "loss": 0.5377, "step": 2817 }, { "epoch": 0.35, "grad_norm": 1.4987431909303914, "learning_rate": 7.550878771727537e-06, "loss": 0.5219, "step": 2818 }, { "epoch": 0.35, "grad_norm": 1.2623810509286617, "learning_rate": 7.549150066398865e-06, "loss": 0.5444, "step": 2819 }, { "epoch": 0.35, "grad_norm": 1.5077713293075448, "learning_rate": 7.5474209492325466e-06, "loss": 0.5356, "step": 2820 }, { "epoch": 0.35, "grad_norm": 1.4323981568838295, "learning_rate": 7.5456914205079405e-06, "loss": 0.5173, "step": 2821 }, { "epoch": 0.35, "grad_norm": 1.4514281182699293, "learning_rate": 7.543961480504463e-06, "loss": 0.4911, "step": 2822 }, { "epoch": 0.35, "grad_norm": 1.3481986067972602, "learning_rate": 7.542231129501603e-06, "loss": 0.5285, "step": 2823 }, { "epoch": 0.35, "grad_norm": 3.539752312980218, "learning_rate": 7.540500367778916e-06, "loss": 0.4925, "step": 2824 }, { "epoch": 0.35, "grad_norm": 1.3970147800199832, "learning_rate": 7.538769195616018e-06, "loss": 0.5636, "step": 2825 }, { "epoch": 0.35, "grad_norm": 2.2279673122276384, "learning_rate": 7.537037613292597e-06, "loss": 0.4888, "step": 2826 }, { "epoch": 0.35, "grad_norm": 1.7812718415678297, "learning_rate": 7.535305621088407e-06, "loss": 0.5011, "step": 2827 }, { "epoch": 0.35, "grad_norm": 1.5110838443802594, "learning_rate": 7.533573219283264e-06, "loss": 0.5166, "step": 2828 }, { "epoch": 0.35, "grad_norm": 1.3924394435460279, "learning_rate": 7.531840408157054e-06, "loss": 0.5189, "step": 2829 }, { "epoch": 0.35, "grad_norm": 1.5926740455071986, "learning_rate": 7.530107187989727e-06, "loss": 0.5627, "step": 2830 }, { "epoch": 0.35, "grad_norm": 1.679223687426762, "learning_rate": 7.528373559061299e-06, "loss": 0.5035, "step": 2831 }, { "epoch": 0.35, "grad_norm": 1.4755516250039995, "learning_rate": 7.526639521651858e-06, "loss": 0.4423, "step": 2832 }, { "epoch": 0.35, "grad_norm": 1.452773889809484, "learning_rate": 7.524905076041548e-06, "loss": 0.5392, "step": 2833 }, { "epoch": 0.35, "grad_norm": 1.3163601597702044, "learning_rate": 7.5231702225105855e-06, "loss": 0.4859, "step": 2834 }, { "epoch": 0.35, "grad_norm": 1.4811247611517149, "learning_rate": 7.521434961339251e-06, "loss": 0.5279, "step": 2835 }, { "epoch": 0.35, "grad_norm": 1.3828663889486925, "learning_rate": 7.519699292807891e-06, "loss": 0.5661, "step": 2836 }, { "epoch": 0.35, "grad_norm": 1.4661354083477, "learning_rate": 7.517963217196922e-06, "loss": 0.5295, "step": 2837 }, { "epoch": 0.35, "grad_norm": 1.2459607024844084, "learning_rate": 7.516226734786818e-06, "loss": 0.4879, "step": 2838 }, { "epoch": 0.35, "grad_norm": 0.6301408152941137, "learning_rate": 7.514489845858122e-06, "loss": 0.509, "step": 2839 }, { "epoch": 0.35, "grad_norm": 3.89784173623586, "learning_rate": 7.512752550691447e-06, "loss": 0.51, "step": 2840 }, { "epoch": 0.35, "grad_norm": 1.5483161180179505, "learning_rate": 7.511014849567466e-06, "loss": 0.4395, "step": 2841 }, { "epoch": 0.35, "grad_norm": 1.509431583014947, "learning_rate": 7.509276742766922e-06, "loss": 0.5203, "step": 2842 }, { "epoch": 0.35, "grad_norm": 1.3967413235682298, "learning_rate": 7.50753823057062e-06, "loss": 0.5108, "step": 2843 }, { "epoch": 0.35, "grad_norm": 2.454115123765699, "learning_rate": 7.505799313259433e-06, "loss": 0.4783, "step": 2844 }, { "epoch": 0.35, "grad_norm": 1.6608105137667641, "learning_rate": 7.504059991114298e-06, "loss": 0.5376, "step": 2845 }, { "epoch": 0.35, "grad_norm": 1.3770781664755012, "learning_rate": 7.502320264416217e-06, "loss": 0.5124, "step": 2846 }, { "epoch": 0.35, "grad_norm": 1.2886223833941657, "learning_rate": 7.500580133446259e-06, "loss": 0.4993, "step": 2847 }, { "epoch": 0.35, "grad_norm": 1.3551384829891164, "learning_rate": 7.498839598485557e-06, "loss": 0.5414, "step": 2848 }, { "epoch": 0.35, "grad_norm": 1.4519349854873853, "learning_rate": 7.497098659815312e-06, "loss": 0.4865, "step": 2849 }, { "epoch": 0.35, "grad_norm": 1.5215147079444222, "learning_rate": 7.495357317716784e-06, "loss": 0.4933, "step": 2850 }, { "epoch": 0.35, "grad_norm": 1.6889627685059738, "learning_rate": 7.493615572471303e-06, "loss": 0.5437, "step": 2851 }, { "epoch": 0.35, "grad_norm": 1.5426468176935286, "learning_rate": 7.491873424360267e-06, "loss": 0.5536, "step": 2852 }, { "epoch": 0.35, "grad_norm": 1.2934185127919198, "learning_rate": 7.490130873665131e-06, "loss": 0.555, "step": 2853 }, { "epoch": 0.35, "grad_norm": 3.8929268075082235, "learning_rate": 7.488387920667423e-06, "loss": 0.5058, "step": 2854 }, { "epoch": 0.35, "grad_norm": 1.5327500386068296, "learning_rate": 7.486644565648731e-06, "loss": 0.4923, "step": 2855 }, { "epoch": 0.35, "grad_norm": 1.7011582348896546, "learning_rate": 7.484900808890707e-06, "loss": 0.5602, "step": 2856 }, { "epoch": 0.35, "grad_norm": 1.5780262628699275, "learning_rate": 7.483156650675073e-06, "loss": 0.5853, "step": 2857 }, { "epoch": 0.35, "grad_norm": 1.9623520716665723, "learning_rate": 7.481412091283613e-06, "loss": 0.5624, "step": 2858 }, { "epoch": 0.35, "grad_norm": 1.4578953352943917, "learning_rate": 7.479667130998174e-06, "loss": 0.5114, "step": 2859 }, { "epoch": 0.35, "grad_norm": 1.5016057014942639, "learning_rate": 7.477921770100672e-06, "loss": 0.5319, "step": 2860 }, { "epoch": 0.36, "grad_norm": 0.6416102122722422, "learning_rate": 7.476176008873084e-06, "loss": 0.5037, "step": 2861 }, { "epoch": 0.36, "grad_norm": 1.179152985299466, "learning_rate": 7.474429847597454e-06, "loss": 0.5166, "step": 2862 }, { "epoch": 0.36, "grad_norm": 1.2704451553872496, "learning_rate": 7.472683286555889e-06, "loss": 0.4983, "step": 2863 }, { "epoch": 0.36, "grad_norm": 1.4942884809928834, "learning_rate": 7.470936326030562e-06, "loss": 0.5133, "step": 2864 }, { "epoch": 0.36, "grad_norm": 1.553793602713353, "learning_rate": 7.46918896630371e-06, "loss": 0.4992, "step": 2865 }, { "epoch": 0.36, "grad_norm": 1.4876867607076067, "learning_rate": 7.467441207657633e-06, "loss": 0.5729, "step": 2866 }, { "epoch": 0.36, "grad_norm": 1.3095012089466964, "learning_rate": 7.465693050374698e-06, "loss": 0.4689, "step": 2867 }, { "epoch": 0.36, "grad_norm": 1.2044094333174193, "learning_rate": 7.463944494737334e-06, "loss": 0.5217, "step": 2868 }, { "epoch": 0.36, "grad_norm": 1.8193312473869578, "learning_rate": 7.462195541028037e-06, "loss": 0.5285, "step": 2869 }, { "epoch": 0.36, "grad_norm": 1.4909392607182927, "learning_rate": 7.460446189529365e-06, "loss": 0.5196, "step": 2870 }, { "epoch": 0.36, "grad_norm": 1.24481673162198, "learning_rate": 7.458696440523942e-06, "loss": 0.4666, "step": 2871 }, { "epoch": 0.36, "grad_norm": 1.5376375614629765, "learning_rate": 7.4569462942944525e-06, "loss": 0.5228, "step": 2872 }, { "epoch": 0.36, "grad_norm": 1.4790336815690879, "learning_rate": 7.455195751123654e-06, "loss": 0.539, "step": 2873 }, { "epoch": 0.36, "grad_norm": 1.3890204848477787, "learning_rate": 7.453444811294357e-06, "loss": 0.493, "step": 2874 }, { "epoch": 0.36, "grad_norm": 1.9199696185437813, "learning_rate": 7.451693475089442e-06, "loss": 0.5167, "step": 2875 }, { "epoch": 0.36, "grad_norm": 1.4822184521520818, "learning_rate": 7.449941742791853e-06, "loss": 0.5066, "step": 2876 }, { "epoch": 0.36, "grad_norm": 1.3316945278591281, "learning_rate": 7.448189614684599e-06, "loss": 0.5055, "step": 2877 }, { "epoch": 0.36, "grad_norm": 5.5407533800955155, "learning_rate": 7.446437091050751e-06, "loss": 0.4829, "step": 2878 }, { "epoch": 0.36, "grad_norm": 1.7652822301013407, "learning_rate": 7.444684172173445e-06, "loss": 0.4929, "step": 2879 }, { "epoch": 0.36, "grad_norm": 1.4786722254819273, "learning_rate": 7.442930858335879e-06, "loss": 0.4984, "step": 2880 }, { "epoch": 0.36, "grad_norm": 1.5148769815243324, "learning_rate": 7.44117714982132e-06, "loss": 0.4862, "step": 2881 }, { "epoch": 0.36, "grad_norm": 1.2571723121367273, "learning_rate": 7.43942304691309e-06, "loss": 0.5446, "step": 2882 }, { "epoch": 0.36, "grad_norm": 1.4647084158513137, "learning_rate": 7.437668549894583e-06, "loss": 0.5334, "step": 2883 }, { "epoch": 0.36, "grad_norm": 1.4714820556689951, "learning_rate": 7.435913659049253e-06, "loss": 0.4864, "step": 2884 }, { "epoch": 0.36, "grad_norm": 1.2478116026485935, "learning_rate": 7.434158374660617e-06, "loss": 0.5445, "step": 2885 }, { "epoch": 0.36, "grad_norm": 1.616471999591579, "learning_rate": 7.432402697012258e-06, "loss": 0.5104, "step": 2886 }, { "epoch": 0.36, "grad_norm": 1.5126140439909073, "learning_rate": 7.430646626387821e-06, "loss": 0.5154, "step": 2887 }, { "epoch": 0.36, "grad_norm": 1.4132347024921716, "learning_rate": 7.428890163071013e-06, "loss": 0.5495, "step": 2888 }, { "epoch": 0.36, "grad_norm": 1.301685970559841, "learning_rate": 7.427133307345608e-06, "loss": 0.4776, "step": 2889 }, { "epoch": 0.36, "grad_norm": 1.221108781930627, "learning_rate": 7.425376059495442e-06, "loss": 0.4889, "step": 2890 }, { "epoch": 0.36, "grad_norm": 2.177713222796027, "learning_rate": 7.4236184198044115e-06, "loss": 0.4286, "step": 2891 }, { "epoch": 0.36, "grad_norm": 1.631555248333458, "learning_rate": 7.421860388556481e-06, "loss": 0.5464, "step": 2892 }, { "epoch": 0.36, "grad_norm": 1.4870772812914097, "learning_rate": 7.4201019660356745e-06, "loss": 0.4909, "step": 2893 }, { "epoch": 0.36, "grad_norm": 0.7172788153908546, "learning_rate": 7.418343152526081e-06, "loss": 0.5042, "step": 2894 }, { "epoch": 0.36, "grad_norm": 2.8282438880838727, "learning_rate": 7.416583948311852e-06, "loss": 0.5513, "step": 2895 }, { "epoch": 0.36, "grad_norm": 1.7214347174465112, "learning_rate": 7.414824353677202e-06, "loss": 0.5221, "step": 2896 }, { "epoch": 0.36, "grad_norm": 2.3148725718413026, "learning_rate": 7.4130643689064105e-06, "loss": 0.5182, "step": 2897 }, { "epoch": 0.36, "grad_norm": 1.2618207897069937, "learning_rate": 7.411303994283818e-06, "loss": 0.4973, "step": 2898 }, { "epoch": 0.36, "grad_norm": 1.5852620561225959, "learning_rate": 7.4095432300938295e-06, "loss": 0.5121, "step": 2899 }, { "epoch": 0.36, "grad_norm": 1.3827972981621872, "learning_rate": 7.407782076620909e-06, "loss": 0.4795, "step": 2900 }, { "epoch": 0.36, "grad_norm": 1.3328404352084577, "learning_rate": 7.4060205341495895e-06, "loss": 0.5338, "step": 2901 }, { "epoch": 0.36, "grad_norm": 1.5203616571569938, "learning_rate": 7.404258602964462e-06, "loss": 0.5179, "step": 2902 }, { "epoch": 0.36, "grad_norm": 1.4862472301200262, "learning_rate": 7.402496283350182e-06, "loss": 0.582, "step": 2903 }, { "epoch": 0.36, "grad_norm": 1.5447324937281806, "learning_rate": 7.400733575591469e-06, "loss": 0.5216, "step": 2904 }, { "epoch": 0.36, "grad_norm": 1.353338309003225, "learning_rate": 7.398970479973101e-06, "loss": 0.5196, "step": 2905 }, { "epoch": 0.36, "grad_norm": 1.390823770991522, "learning_rate": 7.3972069967799255e-06, "loss": 0.5827, "step": 2906 }, { "epoch": 0.36, "grad_norm": 1.3209525144108847, "learning_rate": 7.395443126296846e-06, "loss": 0.5036, "step": 2907 }, { "epoch": 0.36, "grad_norm": 1.5146042399906572, "learning_rate": 7.3936788688088335e-06, "loss": 0.5087, "step": 2908 }, { "epoch": 0.36, "grad_norm": 1.7655408507349146, "learning_rate": 7.391914224600918e-06, "loss": 0.5265, "step": 2909 }, { "epoch": 0.36, "grad_norm": 1.3637002400842049, "learning_rate": 7.390149193958192e-06, "loss": 0.5321, "step": 2910 }, { "epoch": 0.36, "grad_norm": 1.559416043795649, "learning_rate": 7.388383777165815e-06, "loss": 0.5393, "step": 2911 }, { "epoch": 0.36, "grad_norm": 1.2247524083410941, "learning_rate": 7.386617974509002e-06, "loss": 0.4754, "step": 2912 }, { "epoch": 0.36, "grad_norm": 1.4028519626964275, "learning_rate": 7.3848517862730364e-06, "loss": 0.5671, "step": 2913 }, { "epoch": 0.36, "grad_norm": 1.6481816227825374, "learning_rate": 7.38308521274326e-06, "loss": 0.4992, "step": 2914 }, { "epoch": 0.36, "grad_norm": 1.5708813531444694, "learning_rate": 7.381318254205081e-06, "loss": 0.5116, "step": 2915 }, { "epoch": 0.36, "grad_norm": 1.6203098339157995, "learning_rate": 7.3795509109439645e-06, "loss": 0.5183, "step": 2916 }, { "epoch": 0.36, "grad_norm": 1.2587625270337148, "learning_rate": 7.377783183245442e-06, "loss": 0.4795, "step": 2917 }, { "epoch": 0.36, "grad_norm": 1.4369800726343376, "learning_rate": 7.376015071395103e-06, "loss": 0.5057, "step": 2918 }, { "epoch": 0.36, "grad_norm": 1.3720643090525502, "learning_rate": 7.374246575678604e-06, "loss": 0.4933, "step": 2919 }, { "epoch": 0.36, "grad_norm": 1.4278576722593987, "learning_rate": 7.372477696381659e-06, "loss": 0.5417, "step": 2920 }, { "epoch": 0.36, "grad_norm": 2.8221467932962376, "learning_rate": 7.370708433790048e-06, "loss": 0.5501, "step": 2921 }, { "epoch": 0.36, "grad_norm": 1.6611923403031745, "learning_rate": 7.36893878818961e-06, "loss": 0.545, "step": 2922 }, { "epoch": 0.36, "grad_norm": 1.3367746208219013, "learning_rate": 7.367168759866248e-06, "loss": 0.5271, "step": 2923 }, { "epoch": 0.36, "grad_norm": 1.3873043665769509, "learning_rate": 7.3653983491059245e-06, "loss": 0.5342, "step": 2924 }, { "epoch": 0.36, "grad_norm": 1.686022468418169, "learning_rate": 7.363627556194663e-06, "loss": 0.5073, "step": 2925 }, { "epoch": 0.36, "grad_norm": 3.864577609095847, "learning_rate": 7.361856381418555e-06, "loss": 0.5025, "step": 2926 }, { "epoch": 0.36, "grad_norm": 1.5126834615642972, "learning_rate": 7.360084825063748e-06, "loss": 0.5628, "step": 2927 }, { "epoch": 0.36, "grad_norm": 1.4677059654643878, "learning_rate": 7.35831288741645e-06, "loss": 0.493, "step": 2928 }, { "epoch": 0.36, "grad_norm": 1.5816508458999734, "learning_rate": 7.356540568762936e-06, "loss": 0.5537, "step": 2929 }, { "epoch": 0.36, "grad_norm": 1.5373700847214247, "learning_rate": 7.354767869389537e-06, "loss": 0.526, "step": 2930 }, { "epoch": 0.36, "grad_norm": 1.407280590400603, "learning_rate": 7.3529947895826505e-06, "loss": 0.5235, "step": 2931 }, { "epoch": 0.36, "grad_norm": 1.3570233205482056, "learning_rate": 7.351221329628733e-06, "loss": 0.485, "step": 2932 }, { "epoch": 0.36, "grad_norm": 1.3560438717504448, "learning_rate": 7.349447489814301e-06, "loss": 0.539, "step": 2933 }, { "epoch": 0.36, "grad_norm": 1.5107167346554402, "learning_rate": 7.347673270425935e-06, "loss": 0.5465, "step": 2934 }, { "epoch": 0.36, "grad_norm": 1.5350279692878122, "learning_rate": 7.345898671750277e-06, "loss": 0.5205, "step": 2935 }, { "epoch": 0.36, "grad_norm": 1.9087051914119872, "learning_rate": 7.344123694074028e-06, "loss": 0.46, "step": 2936 }, { "epoch": 0.36, "grad_norm": 1.316312234164915, "learning_rate": 7.342348337683949e-06, "loss": 0.5125, "step": 2937 }, { "epoch": 0.36, "grad_norm": 1.237503490731533, "learning_rate": 7.340572602866868e-06, "loss": 0.4504, "step": 2938 }, { "epoch": 0.36, "grad_norm": 1.4873898920144497, "learning_rate": 7.338796489909668e-06, "loss": 0.5169, "step": 2939 }, { "epoch": 0.36, "grad_norm": 1.4055193178497787, "learning_rate": 7.337019999099297e-06, "loss": 0.5279, "step": 2940 }, { "epoch": 0.36, "grad_norm": 1.3350012468586496, "learning_rate": 7.335243130722763e-06, "loss": 0.5018, "step": 2941 }, { "epoch": 0.37, "grad_norm": 1.3696508667171363, "learning_rate": 7.333465885067133e-06, "loss": 0.4933, "step": 2942 }, { "epoch": 0.37, "grad_norm": 1.2081108750109073, "learning_rate": 7.331688262419539e-06, "loss": 0.4823, "step": 2943 }, { "epoch": 0.37, "grad_norm": 2.126683979625058, "learning_rate": 7.329910263067172e-06, "loss": 0.5157, "step": 2944 }, { "epoch": 0.37, "grad_norm": 1.6644563292123145, "learning_rate": 7.328131887297281e-06, "loss": 0.4806, "step": 2945 }, { "epoch": 0.37, "grad_norm": 2.006276596512924, "learning_rate": 7.326353135397177e-06, "loss": 0.4804, "step": 2946 }, { "epoch": 0.37, "grad_norm": 1.5446131874657179, "learning_rate": 7.3245740076542385e-06, "loss": 0.4882, "step": 2947 }, { "epoch": 0.37, "grad_norm": 1.6923510062890936, "learning_rate": 7.322794504355894e-06, "loss": 0.5492, "step": 2948 }, { "epoch": 0.37, "grad_norm": 1.3394289668212493, "learning_rate": 7.321014625789641e-06, "loss": 0.4962, "step": 2949 }, { "epoch": 0.37, "grad_norm": 2.4749240895876494, "learning_rate": 7.319234372243032e-06, "loss": 0.5458, "step": 2950 }, { "epoch": 0.37, "grad_norm": 1.6144924649110135, "learning_rate": 7.317453744003686e-06, "loss": 0.5033, "step": 2951 }, { "epoch": 0.37, "grad_norm": 1.4906919511209462, "learning_rate": 7.315672741359277e-06, "loss": 0.5148, "step": 2952 }, { "epoch": 0.37, "grad_norm": 1.4532325048454164, "learning_rate": 7.313891364597541e-06, "loss": 0.5407, "step": 2953 }, { "epoch": 0.37, "grad_norm": 2.1811973839585446, "learning_rate": 7.3121096140062776e-06, "loss": 0.4752, "step": 2954 }, { "epoch": 0.37, "grad_norm": 4.461913376198192, "learning_rate": 7.310327489873341e-06, "loss": 0.5184, "step": 2955 }, { "epoch": 0.37, "grad_norm": 1.4023156824094034, "learning_rate": 7.308544992486653e-06, "loss": 0.5481, "step": 2956 }, { "epoch": 0.37, "grad_norm": 1.3419673390743536, "learning_rate": 7.3067621221341875e-06, "loss": 0.4797, "step": 2957 }, { "epoch": 0.37, "grad_norm": 2.821401888509523, "learning_rate": 7.304978879103986e-06, "loss": 0.5255, "step": 2958 }, { "epoch": 0.37, "grad_norm": 1.7809055463536292, "learning_rate": 7.303195263684146e-06, "loss": 0.4903, "step": 2959 }, { "epoch": 0.37, "grad_norm": 1.5198332285659275, "learning_rate": 7.301411276162827e-06, "loss": 0.4952, "step": 2960 }, { "epoch": 0.37, "grad_norm": 1.4624158529664613, "learning_rate": 7.299626916828246e-06, "loss": 0.5132, "step": 2961 }, { "epoch": 0.37, "grad_norm": 1.4693717947778802, "learning_rate": 7.2978421859686845e-06, "loss": 0.4981, "step": 2962 }, { "epoch": 0.37, "grad_norm": 1.3892004234553528, "learning_rate": 7.296057083872481e-06, "loss": 0.5037, "step": 2963 }, { "epoch": 0.37, "grad_norm": 1.6086533258064626, "learning_rate": 7.294271610828032e-06, "loss": 0.4843, "step": 2964 }, { "epoch": 0.37, "grad_norm": 1.438061788483176, "learning_rate": 7.2924857671237996e-06, "loss": 0.4706, "step": 2965 }, { "epoch": 0.37, "grad_norm": 1.3813683589454293, "learning_rate": 7.2906995530482986e-06, "loss": 0.5731, "step": 2966 }, { "epoch": 0.37, "grad_norm": 1.4969900098101476, "learning_rate": 7.288912968890112e-06, "loss": 0.4994, "step": 2967 }, { "epoch": 0.37, "grad_norm": 1.483277435038951, "learning_rate": 7.287126014937876e-06, "loss": 0.5449, "step": 2968 }, { "epoch": 0.37, "grad_norm": 1.5714496524913182, "learning_rate": 7.285338691480289e-06, "loss": 0.5268, "step": 2969 }, { "epoch": 0.37, "grad_norm": 1.396912755938003, "learning_rate": 7.283550998806108e-06, "loss": 0.5263, "step": 2970 }, { "epoch": 0.37, "grad_norm": 1.3184850804940975, "learning_rate": 7.2817629372041544e-06, "loss": 0.4417, "step": 2971 }, { "epoch": 0.37, "grad_norm": 1.5471097162982606, "learning_rate": 7.279974506963301e-06, "loss": 0.5393, "step": 2972 }, { "epoch": 0.37, "grad_norm": 1.501741983849576, "learning_rate": 7.278185708372485e-06, "loss": 0.5418, "step": 2973 }, { "epoch": 0.37, "grad_norm": 1.5656687536865674, "learning_rate": 7.276396541720703e-06, "loss": 0.5272, "step": 2974 }, { "epoch": 0.37, "grad_norm": 1.3627469786622979, "learning_rate": 7.274607007297011e-06, "loss": 0.5223, "step": 2975 }, { "epoch": 0.37, "grad_norm": 1.6736889928051844, "learning_rate": 7.272817105390525e-06, "loss": 0.5768, "step": 2976 }, { "epoch": 0.37, "grad_norm": 1.284650601665037, "learning_rate": 7.271026836290418e-06, "loss": 0.4861, "step": 2977 }, { "epoch": 0.37, "grad_norm": 1.1384482986339133, "learning_rate": 7.269236200285925e-06, "loss": 0.4921, "step": 2978 }, { "epoch": 0.37, "grad_norm": 1.3967900521778172, "learning_rate": 7.267445197666336e-06, "loss": 0.446, "step": 2979 }, { "epoch": 0.37, "grad_norm": 1.4677001082213201, "learning_rate": 7.265653828721007e-06, "loss": 0.4801, "step": 2980 }, { "epoch": 0.37, "grad_norm": 1.4026183591203205, "learning_rate": 7.263862093739349e-06, "loss": 0.5119, "step": 2981 }, { "epoch": 0.37, "grad_norm": 1.495942156017364, "learning_rate": 7.2620699930108295e-06, "loss": 0.5288, "step": 2982 }, { "epoch": 0.37, "grad_norm": 1.5489605948938818, "learning_rate": 7.26027752682498e-06, "loss": 0.5008, "step": 2983 }, { "epoch": 0.37, "grad_norm": 1.70728934022771, "learning_rate": 7.258484695471391e-06, "loss": 0.4936, "step": 2984 }, { "epoch": 0.37, "grad_norm": 1.4794145127355325, "learning_rate": 7.256691499239708e-06, "loss": 0.5802, "step": 2985 }, { "epoch": 0.37, "grad_norm": 1.2814675673826028, "learning_rate": 7.254897938419637e-06, "loss": 0.5676, "step": 2986 }, { "epoch": 0.37, "grad_norm": 1.828637831299985, "learning_rate": 7.253104013300944e-06, "loss": 0.4818, "step": 2987 }, { "epoch": 0.37, "grad_norm": 1.4575986554723903, "learning_rate": 7.251309724173457e-06, "loss": 0.5136, "step": 2988 }, { "epoch": 0.37, "grad_norm": 1.4005442517007833, "learning_rate": 7.249515071327054e-06, "loss": 0.5347, "step": 2989 }, { "epoch": 0.37, "grad_norm": 1.3491908553203378, "learning_rate": 7.2477200550516805e-06, "loss": 0.5472, "step": 2990 }, { "epoch": 0.37, "grad_norm": 2.1381894261443333, "learning_rate": 7.245924675637335e-06, "loss": 0.4848, "step": 2991 }, { "epoch": 0.37, "grad_norm": 1.3732018389100948, "learning_rate": 7.244128933374078e-06, "loss": 0.5319, "step": 2992 }, { "epoch": 0.37, "grad_norm": 1.7890526708319623, "learning_rate": 7.242332828552028e-06, "loss": 0.4988, "step": 2993 }, { "epoch": 0.37, "grad_norm": 3.0272958037206954, "learning_rate": 7.240536361461361e-06, "loss": 0.5384, "step": 2994 }, { "epoch": 0.37, "grad_norm": 1.5727488158742704, "learning_rate": 7.238739532392311e-06, "loss": 0.5257, "step": 2995 }, { "epoch": 0.37, "grad_norm": 1.6202822584631098, "learning_rate": 7.236942341635172e-06, "loss": 0.5044, "step": 2996 }, { "epoch": 0.37, "grad_norm": 1.3387512396302006, "learning_rate": 7.2351447894802975e-06, "loss": 0.5279, "step": 2997 }, { "epoch": 0.37, "grad_norm": 1.4411390442007235, "learning_rate": 7.233346876218097e-06, "loss": 0.5236, "step": 2998 }, { "epoch": 0.37, "grad_norm": 1.5184640188144154, "learning_rate": 7.231548602139038e-06, "loss": 0.4996, "step": 2999 }, { "epoch": 0.37, "grad_norm": 2.94169479448034, "learning_rate": 7.22974996753365e-06, "loss": 0.5685, "step": 3000 }, { "epoch": 0.37, "grad_norm": 0.6834086594487581, "learning_rate": 7.227950972692517e-06, "loss": 0.4798, "step": 3001 }, { "epoch": 0.37, "grad_norm": 0.6671153007124514, "learning_rate": 7.226151617906281e-06, "loss": 0.5056, "step": 3002 }, { "epoch": 0.37, "grad_norm": 1.703830372372508, "learning_rate": 7.224351903465644e-06, "loss": 0.5304, "step": 3003 }, { "epoch": 0.37, "grad_norm": 1.4897957844867715, "learning_rate": 7.222551829661368e-06, "loss": 0.5406, "step": 3004 }, { "epoch": 0.37, "grad_norm": 1.2547913791026493, "learning_rate": 7.22075139678427e-06, "loss": 0.5353, "step": 3005 }, { "epoch": 0.37, "grad_norm": 1.3197654635000653, "learning_rate": 7.2189506051252255e-06, "loss": 0.479, "step": 3006 }, { "epoch": 0.37, "grad_norm": 1.7469509824577085, "learning_rate": 7.217149454975168e-06, "loss": 0.5134, "step": 3007 }, { "epoch": 0.37, "grad_norm": 1.6336829981917924, "learning_rate": 7.215347946625088e-06, "loss": 0.5164, "step": 3008 }, { "epoch": 0.37, "grad_norm": 1.3129226881612475, "learning_rate": 7.213546080366036e-06, "loss": 0.4935, "step": 3009 }, { "epoch": 0.37, "grad_norm": 1.476449022913655, "learning_rate": 7.21174385648912e-06, "loss": 0.5313, "step": 3010 }, { "epoch": 0.37, "grad_norm": 1.457575248183513, "learning_rate": 7.209941275285504e-06, "loss": 0.554, "step": 3011 }, { "epoch": 0.37, "grad_norm": 1.4815888327448383, "learning_rate": 7.208138337046413e-06, "loss": 0.5028, "step": 3012 }, { "epoch": 0.37, "grad_norm": 1.7516509103197164, "learning_rate": 7.206335042063125e-06, "loss": 0.5096, "step": 3013 }, { "epoch": 0.37, "grad_norm": 1.391886475393212, "learning_rate": 7.20453139062698e-06, "loss": 0.5031, "step": 3014 }, { "epoch": 0.37, "grad_norm": 1.5367984607946585, "learning_rate": 7.202727383029372e-06, "loss": 0.5289, "step": 3015 }, { "epoch": 0.37, "grad_norm": 1.7267274430184791, "learning_rate": 7.200923019561756e-06, "loss": 0.5325, "step": 3016 }, { "epoch": 0.37, "grad_norm": 1.538263403773552, "learning_rate": 7.199118300515644e-06, "loss": 0.4914, "step": 3017 }, { "epoch": 0.37, "grad_norm": 1.5598289001903407, "learning_rate": 7.197313226182601e-06, "loss": 0.4706, "step": 3018 }, { "epoch": 0.37, "grad_norm": 1.39709071983512, "learning_rate": 7.195507796854253e-06, "loss": 0.519, "step": 3019 }, { "epoch": 0.37, "grad_norm": 1.97273352499986, "learning_rate": 7.193702012822285e-06, "loss": 0.521, "step": 3020 }, { "epoch": 0.37, "grad_norm": 2.484045494576766, "learning_rate": 7.191895874378436e-06, "loss": 0.5457, "step": 3021 }, { "epoch": 0.38, "grad_norm": 1.4923881307280848, "learning_rate": 7.190089381814505e-06, "loss": 0.4928, "step": 3022 }, { "epoch": 0.38, "grad_norm": 8.044763977393002, "learning_rate": 7.188282535422345e-06, "loss": 0.5349, "step": 3023 }, { "epoch": 0.38, "grad_norm": 1.6422923664857394, "learning_rate": 7.186475335493867e-06, "loss": 0.5185, "step": 3024 }, { "epoch": 0.38, "grad_norm": 2.1337642672469794, "learning_rate": 7.184667782321044e-06, "loss": 0.568, "step": 3025 }, { "epoch": 0.38, "grad_norm": 1.285695554350569, "learning_rate": 7.182859876195903e-06, "loss": 0.5264, "step": 3026 }, { "epoch": 0.38, "grad_norm": 1.7352072076106895, "learning_rate": 7.1810516174105195e-06, "loss": 0.5556, "step": 3027 }, { "epoch": 0.38, "grad_norm": 1.631523989032772, "learning_rate": 7.179243006257038e-06, "loss": 0.5464, "step": 3028 }, { "epoch": 0.38, "grad_norm": 1.5491726163007693, "learning_rate": 7.177434043027658e-06, "loss": 0.4847, "step": 3029 }, { "epoch": 0.38, "grad_norm": 0.7115960087940415, "learning_rate": 7.175624728014631e-06, "loss": 0.5297, "step": 3030 }, { "epoch": 0.38, "grad_norm": 1.3545527937682438, "learning_rate": 7.173815061510267e-06, "loss": 0.45, "step": 3031 }, { "epoch": 0.38, "grad_norm": 1.2777021474778372, "learning_rate": 7.172005043806934e-06, "loss": 0.5129, "step": 3032 }, { "epoch": 0.38, "grad_norm": 1.5843455375733593, "learning_rate": 7.170194675197059e-06, "loss": 0.4926, "step": 3033 }, { "epoch": 0.38, "grad_norm": 1.5214693147833276, "learning_rate": 7.168383955973119e-06, "loss": 0.5229, "step": 3034 }, { "epoch": 0.38, "grad_norm": 1.261454930205327, "learning_rate": 7.166572886427655e-06, "loss": 0.465, "step": 3035 }, { "epoch": 0.38, "grad_norm": 1.419593791340515, "learning_rate": 7.164761466853258e-06, "loss": 0.548, "step": 3036 }, { "epoch": 0.38, "grad_norm": 1.364144842925871, "learning_rate": 7.162949697542583e-06, "loss": 0.4924, "step": 3037 }, { "epoch": 0.38, "grad_norm": 1.417175570424057, "learning_rate": 7.161137578788333e-06, "loss": 0.5086, "step": 3038 }, { "epoch": 0.38, "grad_norm": 1.6138840172605053, "learning_rate": 7.159325110883274e-06, "loss": 0.5614, "step": 3039 }, { "epoch": 0.38, "grad_norm": 1.4548766722256878, "learning_rate": 7.157512294120225e-06, "loss": 0.5243, "step": 3040 }, { "epoch": 0.38, "grad_norm": 1.6327820741598216, "learning_rate": 7.155699128792063e-06, "loss": 0.537, "step": 3041 }, { "epoch": 0.38, "grad_norm": 1.6640787478353725, "learning_rate": 7.153885615191723e-06, "loss": 0.55, "step": 3042 }, { "epoch": 0.38, "grad_norm": 1.526062849121019, "learning_rate": 7.15207175361219e-06, "loss": 0.5163, "step": 3043 }, { "epoch": 0.38, "grad_norm": 1.488004814390481, "learning_rate": 7.150257544346513e-06, "loss": 0.5014, "step": 3044 }, { "epoch": 0.38, "grad_norm": 1.458619976622426, "learning_rate": 7.148442987687792e-06, "loss": 0.5324, "step": 3045 }, { "epoch": 0.38, "grad_norm": 0.6743278152473341, "learning_rate": 7.146628083929183e-06, "loss": 0.5597, "step": 3046 }, { "epoch": 0.38, "grad_norm": 1.4734133235019455, "learning_rate": 7.144812833363902e-06, "loss": 0.4929, "step": 3047 }, { "epoch": 0.38, "grad_norm": 1.5452831563979532, "learning_rate": 7.142997236285217e-06, "loss": 0.5187, "step": 3048 }, { "epoch": 0.38, "grad_norm": 1.2664513770058123, "learning_rate": 7.141181292986457e-06, "loss": 0.5586, "step": 3049 }, { "epoch": 0.38, "grad_norm": 1.3615095997700022, "learning_rate": 7.139365003760998e-06, "loss": 0.5051, "step": 3050 }, { "epoch": 0.38, "grad_norm": 1.843213610546877, "learning_rate": 7.137548368902284e-06, "loss": 0.4999, "step": 3051 }, { "epoch": 0.38, "grad_norm": 1.4640428774254095, "learning_rate": 7.135731388703804e-06, "loss": 0.4822, "step": 3052 }, { "epoch": 0.38, "grad_norm": 1.5575020507385404, "learning_rate": 7.133914063459108e-06, "loss": 0.5176, "step": 3053 }, { "epoch": 0.38, "grad_norm": 1.3698924554546537, "learning_rate": 7.132096393461801e-06, "loss": 0.5603, "step": 3054 }, { "epoch": 0.38, "grad_norm": 1.3405839230187293, "learning_rate": 7.130278379005545e-06, "loss": 0.5195, "step": 3055 }, { "epoch": 0.38, "grad_norm": 1.5140107590051919, "learning_rate": 7.128460020384055e-06, "loss": 0.4992, "step": 3056 }, { "epoch": 0.38, "grad_norm": 1.1915114977633574, "learning_rate": 7.126641317891101e-06, "loss": 0.4718, "step": 3057 }, { "epoch": 0.38, "grad_norm": 1.4336714376199327, "learning_rate": 7.124822271820513e-06, "loss": 0.4776, "step": 3058 }, { "epoch": 0.38, "grad_norm": 1.388417058377369, "learning_rate": 7.123002882466174e-06, "loss": 0.4925, "step": 3059 }, { "epoch": 0.38, "grad_norm": 1.538238246586762, "learning_rate": 7.121183150122022e-06, "loss": 0.5452, "step": 3060 }, { "epoch": 0.38, "grad_norm": 0.7098794479468666, "learning_rate": 7.1193630750820495e-06, "loss": 0.5372, "step": 3061 }, { "epoch": 0.38, "grad_norm": 1.6669472641299299, "learning_rate": 7.117542657640307e-06, "loss": 0.5566, "step": 3062 }, { "epoch": 0.38, "grad_norm": 1.6543545849103167, "learning_rate": 7.1157218980908984e-06, "loss": 0.5145, "step": 3063 }, { "epoch": 0.38, "grad_norm": 1.9544024341278428, "learning_rate": 7.113900796727984e-06, "loss": 0.5462, "step": 3064 }, { "epoch": 0.38, "grad_norm": 1.933868451967738, "learning_rate": 7.112079353845775e-06, "loss": 0.5273, "step": 3065 }, { "epoch": 0.38, "grad_norm": 1.4942206033292202, "learning_rate": 7.110257569738549e-06, "loss": 0.4997, "step": 3066 }, { "epoch": 0.38, "grad_norm": 1.9699941736244426, "learning_rate": 7.108435444700626e-06, "loss": 0.4467, "step": 3067 }, { "epoch": 0.38, "grad_norm": 1.5284924845003787, "learning_rate": 7.106612979026387e-06, "loss": 0.4825, "step": 3068 }, { "epoch": 0.38, "grad_norm": 0.6737801292785671, "learning_rate": 7.104790173010268e-06, "loss": 0.5245, "step": 3069 }, { "epoch": 0.38, "grad_norm": 1.4078834972046943, "learning_rate": 7.102967026946758e-06, "loss": 0.5132, "step": 3070 }, { "epoch": 0.38, "grad_norm": 1.3875591801648743, "learning_rate": 7.101143541130407e-06, "loss": 0.4935, "step": 3071 }, { "epoch": 0.38, "grad_norm": 1.2158183608208777, "learning_rate": 7.0993197158558095e-06, "loss": 0.491, "step": 3072 }, { "epoch": 0.38, "grad_norm": 1.7437431691806793, "learning_rate": 7.097495551417621e-06, "loss": 0.5167, "step": 3073 }, { "epoch": 0.38, "grad_norm": 1.429828259302202, "learning_rate": 7.095671048110555e-06, "loss": 0.5072, "step": 3074 }, { "epoch": 0.38, "grad_norm": 1.4081079213848775, "learning_rate": 7.093846206229373e-06, "loss": 0.521, "step": 3075 }, { "epoch": 0.38, "grad_norm": 1.512805807502615, "learning_rate": 7.092021026068897e-06, "loss": 0.5068, "step": 3076 }, { "epoch": 0.38, "grad_norm": 1.6821786251772541, "learning_rate": 7.090195507923998e-06, "loss": 0.5147, "step": 3077 }, { "epoch": 0.38, "grad_norm": 1.5016684049091409, "learning_rate": 7.088369652089607e-06, "loss": 0.4811, "step": 3078 }, { "epoch": 0.38, "grad_norm": 1.3965871276845612, "learning_rate": 7.086543458860706e-06, "loss": 0.517, "step": 3079 }, { "epoch": 0.38, "grad_norm": 1.9053127055549555, "learning_rate": 7.084716928532334e-06, "loss": 0.4807, "step": 3080 }, { "epoch": 0.38, "grad_norm": 1.5364628357558445, "learning_rate": 7.0828900613995775e-06, "loss": 0.5673, "step": 3081 }, { "epoch": 0.38, "grad_norm": 1.5726149366761402, "learning_rate": 7.08106285775759e-06, "loss": 0.5595, "step": 3082 }, { "epoch": 0.38, "grad_norm": 1.4407485616166227, "learning_rate": 7.079235317901569e-06, "loss": 0.5062, "step": 3083 }, { "epoch": 0.38, "grad_norm": 1.9085919826528777, "learning_rate": 7.07740744212677e-06, "loss": 0.5158, "step": 3084 }, { "epoch": 0.38, "grad_norm": 1.486163869615989, "learning_rate": 7.0755792307285024e-06, "loss": 0.4841, "step": 3085 }, { "epoch": 0.38, "grad_norm": 1.3083113514428966, "learning_rate": 7.07375068400213e-06, "loss": 0.4705, "step": 3086 }, { "epoch": 0.38, "grad_norm": 1.831123081797392, "learning_rate": 7.0719218022430715e-06, "loss": 0.5052, "step": 3087 }, { "epoch": 0.38, "grad_norm": 1.6126664823754961, "learning_rate": 7.070092585746798e-06, "loss": 0.5042, "step": 3088 }, { "epoch": 0.38, "grad_norm": 1.4079867138889446, "learning_rate": 7.0682630348088336e-06, "loss": 0.5271, "step": 3089 }, { "epoch": 0.38, "grad_norm": 1.4242873896765287, "learning_rate": 7.066433149724762e-06, "loss": 0.4858, "step": 3090 }, { "epoch": 0.38, "grad_norm": 1.4017570824767323, "learning_rate": 7.064602930790215e-06, "loss": 0.5615, "step": 3091 }, { "epoch": 0.38, "grad_norm": 0.6840962622500598, "learning_rate": 7.062772378300882e-06, "loss": 0.4723, "step": 3092 }, { "epoch": 0.38, "grad_norm": 1.3857989267211437, "learning_rate": 7.060941492552502e-06, "loss": 0.5404, "step": 3093 }, { "epoch": 0.38, "grad_norm": 5.490729207593928, "learning_rate": 7.0591102738408735e-06, "loss": 0.543, "step": 3094 }, { "epoch": 0.38, "grad_norm": 2.5869525897166517, "learning_rate": 7.057278722461845e-06, "loss": 0.5254, "step": 3095 }, { "epoch": 0.38, "grad_norm": 1.6003221119375428, "learning_rate": 7.0554468387113214e-06, "loss": 0.524, "step": 3096 }, { "epoch": 0.38, "grad_norm": 1.557506866511841, "learning_rate": 7.053614622885258e-06, "loss": 0.4996, "step": 3097 }, { "epoch": 0.38, "grad_norm": 1.973734265611302, "learning_rate": 7.051782075279665e-06, "loss": 0.4846, "step": 3098 }, { "epoch": 0.38, "grad_norm": 1.393020111046219, "learning_rate": 7.049949196190607e-06, "loss": 0.5014, "step": 3099 }, { "epoch": 0.38, "grad_norm": 1.6516241639640925, "learning_rate": 7.048115985914204e-06, "loss": 0.4566, "step": 3100 }, { "epoch": 0.38, "grad_norm": 1.314792255216263, "learning_rate": 7.046282444746624e-06, "loss": 0.4513, "step": 3101 }, { "epoch": 0.38, "grad_norm": 1.5707224724922517, "learning_rate": 7.044448572984091e-06, "loss": 0.5088, "step": 3102 }, { "epoch": 0.39, "grad_norm": 1.4978671506491446, "learning_rate": 7.042614370922887e-06, "loss": 0.527, "step": 3103 }, { "epoch": 0.39, "grad_norm": 1.7301185957841059, "learning_rate": 7.0407798388593415e-06, "loss": 0.4978, "step": 3104 }, { "epoch": 0.39, "grad_norm": 1.2063768657721639, "learning_rate": 7.03894497708984e-06, "loss": 0.5352, "step": 3105 }, { "epoch": 0.39, "grad_norm": 1.5425265184295172, "learning_rate": 7.03710978591082e-06, "loss": 0.5108, "step": 3106 }, { "epoch": 0.39, "grad_norm": 1.646854735973331, "learning_rate": 7.035274265618772e-06, "loss": 0.5169, "step": 3107 }, { "epoch": 0.39, "grad_norm": 1.5097235016060884, "learning_rate": 7.033438416510241e-06, "loss": 0.4808, "step": 3108 }, { "epoch": 0.39, "grad_norm": 1.6812662874754638, "learning_rate": 7.031602238881826e-06, "loss": 0.5268, "step": 3109 }, { "epoch": 0.39, "grad_norm": 1.479023250144563, "learning_rate": 7.029765733030175e-06, "loss": 0.523, "step": 3110 }, { "epoch": 0.39, "grad_norm": 1.3145790277291225, "learning_rate": 7.027928899251995e-06, "loss": 0.5166, "step": 3111 }, { "epoch": 0.39, "grad_norm": 1.4619991861740071, "learning_rate": 7.02609173784404e-06, "loss": 0.5387, "step": 3112 }, { "epoch": 0.39, "grad_norm": 1.3539576115167475, "learning_rate": 7.0242542491031205e-06, "loss": 0.4792, "step": 3113 }, { "epoch": 0.39, "grad_norm": 1.859711344496118, "learning_rate": 7.022416433326099e-06, "loss": 0.5065, "step": 3114 }, { "epoch": 0.39, "grad_norm": 0.6856514624428732, "learning_rate": 7.020578290809892e-06, "loss": 0.4905, "step": 3115 }, { "epoch": 0.39, "grad_norm": 2.294158902627508, "learning_rate": 7.018739821851466e-06, "loss": 0.5295, "step": 3116 }, { "epoch": 0.39, "grad_norm": 1.5880413052145466, "learning_rate": 7.016901026747842e-06, "loss": 0.5467, "step": 3117 }, { "epoch": 0.39, "grad_norm": 1.3439530838922578, "learning_rate": 7.0150619057960926e-06, "loss": 0.4675, "step": 3118 }, { "epoch": 0.39, "grad_norm": 1.6216617436812264, "learning_rate": 7.0132224592933464e-06, "loss": 0.5298, "step": 3119 }, { "epoch": 0.39, "grad_norm": 1.5317615759809857, "learning_rate": 7.011382687536781e-06, "loss": 0.5099, "step": 3120 }, { "epoch": 0.39, "grad_norm": 1.8957008492341494, "learning_rate": 7.009542590823628e-06, "loss": 0.5106, "step": 3121 }, { "epoch": 0.39, "grad_norm": 1.3386803487504768, "learning_rate": 7.007702169451169e-06, "loss": 0.4911, "step": 3122 }, { "epoch": 0.39, "grad_norm": 1.600767614933352, "learning_rate": 7.0058614237167445e-06, "loss": 0.4981, "step": 3123 }, { "epoch": 0.39, "grad_norm": 1.6010427135166088, "learning_rate": 7.004020353917742e-06, "loss": 0.4986, "step": 3124 }, { "epoch": 0.39, "grad_norm": 1.6392739027265255, "learning_rate": 7.0021789603515995e-06, "loss": 0.5125, "step": 3125 }, { "epoch": 0.39, "grad_norm": 1.7549088594860496, "learning_rate": 7.000337243315812e-06, "loss": 0.4807, "step": 3126 }, { "epoch": 0.39, "grad_norm": 1.7399474805452981, "learning_rate": 6.998495203107927e-06, "loss": 0.4826, "step": 3127 }, { "epoch": 0.39, "grad_norm": 2.0452652233489848, "learning_rate": 6.996652840025539e-06, "loss": 0.5083, "step": 3128 }, { "epoch": 0.39, "grad_norm": 1.4516835604294887, "learning_rate": 6.994810154366302e-06, "loss": 0.481, "step": 3129 }, { "epoch": 0.39, "grad_norm": 2.229369461643623, "learning_rate": 6.992967146427913e-06, "loss": 0.5527, "step": 3130 }, { "epoch": 0.39, "grad_norm": 1.2383374837601475, "learning_rate": 6.991123816508131e-06, "loss": 0.4843, "step": 3131 }, { "epoch": 0.39, "grad_norm": 1.4156075245677167, "learning_rate": 6.989280164904759e-06, "loss": 0.4891, "step": 3132 }, { "epoch": 0.39, "grad_norm": 1.711299286904344, "learning_rate": 6.987436191915658e-06, "loss": 0.5811, "step": 3133 }, { "epoch": 0.39, "grad_norm": 2.4285050158864885, "learning_rate": 6.985591897838736e-06, "loss": 0.4793, "step": 3134 }, { "epoch": 0.39, "grad_norm": 1.6946023949726925, "learning_rate": 6.983747282971954e-06, "loss": 0.5321, "step": 3135 }, { "epoch": 0.39, "grad_norm": 1.468337455984439, "learning_rate": 6.981902347613328e-06, "loss": 0.4785, "step": 3136 }, { "epoch": 0.39, "grad_norm": 1.5886861702690127, "learning_rate": 6.980057092060924e-06, "loss": 0.4854, "step": 3137 }, { "epoch": 0.39, "grad_norm": 1.2264486795838963, "learning_rate": 6.9782115166128565e-06, "loss": 0.5179, "step": 3138 }, { "epoch": 0.39, "grad_norm": 1.5917890050453414, "learning_rate": 6.976365621567295e-06, "loss": 0.5331, "step": 3139 }, { "epoch": 0.39, "grad_norm": 1.478122933457729, "learning_rate": 6.974519407222462e-06, "loss": 0.4715, "step": 3140 }, { "epoch": 0.39, "grad_norm": 1.7984096108555954, "learning_rate": 6.9726728738766295e-06, "loss": 0.5443, "step": 3141 }, { "epoch": 0.39, "grad_norm": 1.6560769479857425, "learning_rate": 6.97082602182812e-06, "loss": 0.4792, "step": 3142 }, { "epoch": 0.39, "grad_norm": 1.7874171722080707, "learning_rate": 6.9689788513753094e-06, "loss": 0.5356, "step": 3143 }, { "epoch": 0.39, "grad_norm": 0.6950227531979957, "learning_rate": 6.967131362816623e-06, "loss": 0.4974, "step": 3144 }, { "epoch": 0.39, "grad_norm": 1.539464285764076, "learning_rate": 6.965283556450542e-06, "loss": 0.5479, "step": 3145 }, { "epoch": 0.39, "grad_norm": 1.4146763723631428, "learning_rate": 6.963435432575593e-06, "loss": 0.5373, "step": 3146 }, { "epoch": 0.39, "grad_norm": 1.4136312113724916, "learning_rate": 6.961586991490357e-06, "loss": 0.529, "step": 3147 }, { "epoch": 0.39, "grad_norm": 1.9189851726240688, "learning_rate": 6.959738233493466e-06, "loss": 0.5202, "step": 3148 }, { "epoch": 0.39, "grad_norm": 1.399927805798971, "learning_rate": 6.957889158883604e-06, "loss": 0.5258, "step": 3149 }, { "epoch": 0.39, "grad_norm": 1.6344488410503175, "learning_rate": 6.9560397679595044e-06, "loss": 0.5367, "step": 3150 }, { "epoch": 0.39, "grad_norm": 1.441472487643674, "learning_rate": 6.954190061019954e-06, "loss": 0.5707, "step": 3151 }, { "epoch": 0.39, "grad_norm": 1.4277228917494604, "learning_rate": 6.952340038363788e-06, "loss": 0.4729, "step": 3152 }, { "epoch": 0.39, "grad_norm": 1.3571806069444707, "learning_rate": 6.950489700289894e-06, "loss": 0.4925, "step": 3153 }, { "epoch": 0.39, "grad_norm": 1.3716644997004555, "learning_rate": 6.948639047097211e-06, "loss": 0.433, "step": 3154 }, { "epoch": 0.39, "grad_norm": 0.6052244929492939, "learning_rate": 6.946788079084727e-06, "loss": 0.4785, "step": 3155 }, { "epoch": 0.39, "grad_norm": 1.4155172324818357, "learning_rate": 6.944936796551482e-06, "loss": 0.4885, "step": 3156 }, { "epoch": 0.39, "grad_norm": 1.707369994295181, "learning_rate": 6.943085199796571e-06, "loss": 0.4817, "step": 3157 }, { "epoch": 0.39, "grad_norm": 1.4534014173887673, "learning_rate": 6.9412332891191315e-06, "loss": 0.5137, "step": 3158 }, { "epoch": 0.39, "grad_norm": 2.103367237771373, "learning_rate": 6.9393810648183566e-06, "loss": 0.5128, "step": 3159 }, { "epoch": 0.39, "grad_norm": 1.6222332634783083, "learning_rate": 6.937528527193491e-06, "loss": 0.469, "step": 3160 }, { "epoch": 0.39, "grad_norm": 3.3360922385144436, "learning_rate": 6.935675676543827e-06, "loss": 0.5262, "step": 3161 }, { "epoch": 0.39, "grad_norm": 0.6389463231703143, "learning_rate": 6.93382251316871e-06, "loss": 0.5106, "step": 3162 }, { "epoch": 0.39, "grad_norm": 1.4765630400181307, "learning_rate": 6.931969037367533e-06, "loss": 0.5152, "step": 3163 }, { "epoch": 0.39, "grad_norm": 1.4176110067019618, "learning_rate": 6.930115249439744e-06, "loss": 0.4844, "step": 3164 }, { "epoch": 0.39, "grad_norm": 1.3708693622727413, "learning_rate": 6.928261149684837e-06, "loss": 0.5306, "step": 3165 }, { "epoch": 0.39, "grad_norm": 1.4359433735125964, "learning_rate": 6.926406738402359e-06, "loss": 0.4372, "step": 3166 }, { "epoch": 0.39, "grad_norm": 1.567047899929472, "learning_rate": 6.924552015891905e-06, "loss": 0.5303, "step": 3167 }, { "epoch": 0.39, "grad_norm": 1.490193781690818, "learning_rate": 6.9226969824531254e-06, "loss": 0.5776, "step": 3168 }, { "epoch": 0.39, "grad_norm": 2.002612215630255, "learning_rate": 6.920841638385715e-06, "loss": 0.4715, "step": 3169 }, { "epoch": 0.39, "grad_norm": 1.8963193464549652, "learning_rate": 6.918985983989418e-06, "loss": 0.5875, "step": 3170 }, { "epoch": 0.39, "grad_norm": 2.213321963170932, "learning_rate": 6.917130019564034e-06, "loss": 0.5225, "step": 3171 }, { "epoch": 0.39, "grad_norm": 1.4544021040788373, "learning_rate": 6.915273745409413e-06, "loss": 0.5195, "step": 3172 }, { "epoch": 0.39, "grad_norm": 1.9278156233429231, "learning_rate": 6.913417161825449e-06, "loss": 0.5123, "step": 3173 }, { "epoch": 0.39, "grad_norm": 1.6524752099528455, "learning_rate": 6.911560269112092e-06, "loss": 0.5542, "step": 3174 }, { "epoch": 0.39, "grad_norm": 3.541481926746516, "learning_rate": 6.909703067569337e-06, "loss": 0.5246, "step": 3175 }, { "epoch": 0.39, "grad_norm": 1.4279355948714822, "learning_rate": 6.907845557497231e-06, "loss": 0.4483, "step": 3176 }, { "epoch": 0.39, "grad_norm": 1.743431193415441, "learning_rate": 6.905987739195874e-06, "loss": 0.5881, "step": 3177 }, { "epoch": 0.39, "grad_norm": 2.61440211095754, "learning_rate": 6.9041296129654125e-06, "loss": 0.5301, "step": 3178 }, { "epoch": 0.39, "grad_norm": 1.6702754112326066, "learning_rate": 6.902271179106041e-06, "loss": 0.5233, "step": 3179 }, { "epoch": 0.39, "grad_norm": 3.920713679990864, "learning_rate": 6.900412437918005e-06, "loss": 0.4744, "step": 3180 }, { "epoch": 0.39, "grad_norm": 1.3697518409491982, "learning_rate": 6.898553389701603e-06, "loss": 0.5108, "step": 3181 }, { "epoch": 0.39, "grad_norm": 2.1212805977850646, "learning_rate": 6.896694034757181e-06, "loss": 0.5294, "step": 3182 }, { "epoch": 0.4, "grad_norm": 1.5411551878710228, "learning_rate": 6.894834373385132e-06, "loss": 0.5106, "step": 3183 }, { "epoch": 0.4, "grad_norm": 1.4368851055143177, "learning_rate": 6.892974405885902e-06, "loss": 0.4735, "step": 3184 }, { "epoch": 0.4, "grad_norm": 1.9299020300383583, "learning_rate": 6.891114132559985e-06, "loss": 0.5244, "step": 3185 }, { "epoch": 0.4, "grad_norm": 1.473232038624168, "learning_rate": 6.8892535537079245e-06, "loss": 0.5071, "step": 3186 }, { "epoch": 0.4, "grad_norm": 1.588323591332897, "learning_rate": 6.8873926696303135e-06, "loss": 0.4919, "step": 3187 }, { "epoch": 0.4, "grad_norm": 0.7334748667952768, "learning_rate": 6.885531480627794e-06, "loss": 0.5087, "step": 3188 }, { "epoch": 0.4, "grad_norm": 1.4829244717106247, "learning_rate": 6.883669987001058e-06, "loss": 0.5095, "step": 3189 }, { "epoch": 0.4, "grad_norm": 1.6092064733528466, "learning_rate": 6.8818081890508456e-06, "loss": 0.5232, "step": 3190 }, { "epoch": 0.4, "grad_norm": 1.341271386639204, "learning_rate": 6.8799460870779465e-06, "loss": 0.5029, "step": 3191 }, { "epoch": 0.4, "grad_norm": 1.4481930839888286, "learning_rate": 6.878083681383198e-06, "loss": 0.5057, "step": 3192 }, { "epoch": 0.4, "grad_norm": 1.3083656791753513, "learning_rate": 6.876220972267494e-06, "loss": 0.4886, "step": 3193 }, { "epoch": 0.4, "grad_norm": 11.39296038386585, "learning_rate": 6.874357960031765e-06, "loss": 0.5084, "step": 3194 }, { "epoch": 0.4, "grad_norm": 1.900143200573506, "learning_rate": 6.872494644977e-06, "loss": 0.5262, "step": 3195 }, { "epoch": 0.4, "grad_norm": 1.8050499601037346, "learning_rate": 6.8706310274042345e-06, "loss": 0.5068, "step": 3196 }, { "epoch": 0.4, "grad_norm": 1.352345388329052, "learning_rate": 6.868767107614552e-06, "loss": 0.4604, "step": 3197 }, { "epoch": 0.4, "grad_norm": 1.6545175377347725, "learning_rate": 6.866902885909083e-06, "loss": 0.5316, "step": 3198 }, { "epoch": 0.4, "grad_norm": 1.2856771180257691, "learning_rate": 6.865038362589012e-06, "loss": 0.5097, "step": 3199 }, { "epoch": 0.4, "grad_norm": 1.6801585779785584, "learning_rate": 6.863173537955566e-06, "loss": 0.5037, "step": 3200 }, { "epoch": 0.4, "grad_norm": 1.6786852023536945, "learning_rate": 6.861308412310026e-06, "loss": 0.5134, "step": 3201 }, { "epoch": 0.4, "grad_norm": 1.3999297059160014, "learning_rate": 6.85944298595372e-06, "loss": 0.5327, "step": 3202 }, { "epoch": 0.4, "grad_norm": 1.3839457585180088, "learning_rate": 6.857577259188022e-06, "loss": 0.547, "step": 3203 }, { "epoch": 0.4, "grad_norm": 1.4739330800383263, "learning_rate": 6.855711232314358e-06, "loss": 0.5638, "step": 3204 }, { "epoch": 0.4, "grad_norm": 1.569513748632115, "learning_rate": 6.853844905634202e-06, "loss": 0.4931, "step": 3205 }, { "epoch": 0.4, "grad_norm": 1.8421076393489355, "learning_rate": 6.851978279449073e-06, "loss": 0.5661, "step": 3206 }, { "epoch": 0.4, "grad_norm": 1.956359431715477, "learning_rate": 6.850111354060543e-06, "loss": 0.4918, "step": 3207 }, { "epoch": 0.4, "grad_norm": 1.577412127294876, "learning_rate": 6.848244129770228e-06, "loss": 0.5145, "step": 3208 }, { "epoch": 0.4, "grad_norm": 1.4958117513380969, "learning_rate": 6.8463766068797964e-06, "loss": 0.5455, "step": 3209 }, { "epoch": 0.4, "grad_norm": 1.4911133031146588, "learning_rate": 6.844508785690964e-06, "loss": 0.489, "step": 3210 }, { "epoch": 0.4, "grad_norm": 1.4088742719955414, "learning_rate": 6.842640666505491e-06, "loss": 0.5215, "step": 3211 }, { "epoch": 0.4, "grad_norm": 1.6629950094869201, "learning_rate": 6.840772249625189e-06, "loss": 0.5135, "step": 3212 }, { "epoch": 0.4, "grad_norm": 1.2384801027680652, "learning_rate": 6.838903535351921e-06, "loss": 0.5416, "step": 3213 }, { "epoch": 0.4, "grad_norm": 1.460819899140803, "learning_rate": 6.837034523987589e-06, "loss": 0.5163, "step": 3214 }, { "epoch": 0.4, "grad_norm": 1.4184315724520054, "learning_rate": 6.835165215834151e-06, "loss": 0.4849, "step": 3215 }, { "epoch": 0.4, "grad_norm": 1.3412792084883807, "learning_rate": 6.83329561119361e-06, "loss": 0.5011, "step": 3216 }, { "epoch": 0.4, "grad_norm": 1.877516849937572, "learning_rate": 6.831425710368016e-06, "loss": 0.5177, "step": 3217 }, { "epoch": 0.4, "grad_norm": 1.4397412895230428, "learning_rate": 6.829555513659468e-06, "loss": 0.4972, "step": 3218 }, { "epoch": 0.4, "grad_norm": 1.6384697735109914, "learning_rate": 6.827685021370115e-06, "loss": 0.5218, "step": 3219 }, { "epoch": 0.4, "grad_norm": 1.8141569051259778, "learning_rate": 6.825814233802151e-06, "loss": 0.4911, "step": 3220 }, { "epoch": 0.4, "grad_norm": 1.306899734027489, "learning_rate": 6.8239431512578135e-06, "loss": 0.5356, "step": 3221 }, { "epoch": 0.4, "grad_norm": 1.3572460136657636, "learning_rate": 6.822071774039399e-06, "loss": 0.5072, "step": 3222 }, { "epoch": 0.4, "grad_norm": 1.5945829296918865, "learning_rate": 6.820200102449243e-06, "loss": 0.5119, "step": 3223 }, { "epoch": 0.4, "grad_norm": 1.2301820912603123, "learning_rate": 6.818328136789727e-06, "loss": 0.4695, "step": 3224 }, { "epoch": 0.4, "grad_norm": 1.379457106022792, "learning_rate": 6.816455877363286e-06, "loss": 0.4784, "step": 3225 }, { "epoch": 0.4, "grad_norm": 1.3225665325357554, "learning_rate": 6.814583324472401e-06, "loss": 0.5207, "step": 3226 }, { "epoch": 0.4, "grad_norm": 1.2700246527823775, "learning_rate": 6.8127104784196e-06, "loss": 0.461, "step": 3227 }, { "epoch": 0.4, "grad_norm": 1.8472757730911826, "learning_rate": 6.810837339507454e-06, "loss": 0.5445, "step": 3228 }, { "epoch": 0.4, "grad_norm": 0.6707847338140217, "learning_rate": 6.808963908038589e-06, "loss": 0.4855, "step": 3229 }, { "epoch": 0.4, "grad_norm": 2.691319968280161, "learning_rate": 6.807090184315671e-06, "loss": 0.5171, "step": 3230 }, { "epoch": 0.4, "grad_norm": 1.2302438266834328, "learning_rate": 6.80521616864142e-06, "loss": 0.4727, "step": 3231 }, { "epoch": 0.4, "grad_norm": 1.3207802784267022, "learning_rate": 6.803341861318598e-06, "loss": 0.5208, "step": 3232 }, { "epoch": 0.4, "grad_norm": 1.4476132703215128, "learning_rate": 6.801467262650015e-06, "loss": 0.476, "step": 3233 }, { "epoch": 0.4, "grad_norm": 1.6336738979678858, "learning_rate": 6.799592372938529e-06, "loss": 0.5643, "step": 3234 }, { "epoch": 0.4, "grad_norm": 1.835620022589171, "learning_rate": 6.797717192487046e-06, "loss": 0.5108, "step": 3235 }, { "epoch": 0.4, "grad_norm": 1.5619037774003635, "learning_rate": 6.79584172159852e-06, "loss": 0.4783, "step": 3236 }, { "epoch": 0.4, "grad_norm": 1.7819998664226975, "learning_rate": 6.793965960575944e-06, "loss": 0.5288, "step": 3237 }, { "epoch": 0.4, "grad_norm": 1.4359135475085967, "learning_rate": 6.79208990972237e-06, "loss": 0.5227, "step": 3238 }, { "epoch": 0.4, "grad_norm": 1.6807354645349077, "learning_rate": 6.790213569340887e-06, "loss": 0.5521, "step": 3239 }, { "epoch": 0.4, "grad_norm": 1.3640185458322238, "learning_rate": 6.788336939734634e-06, "loss": 0.5437, "step": 3240 }, { "epoch": 0.4, "grad_norm": 1.5519889606097421, "learning_rate": 6.7864600212068e-06, "loss": 0.4967, "step": 3241 }, { "epoch": 0.4, "grad_norm": 1.3749868972314432, "learning_rate": 6.784582814060615e-06, "loss": 0.4629, "step": 3242 }, { "epoch": 0.4, "grad_norm": 1.7267913886036812, "learning_rate": 6.78270531859936e-06, "loss": 0.5539, "step": 3243 }, { "epoch": 0.4, "grad_norm": 1.609565352487597, "learning_rate": 6.7808275351263595e-06, "loss": 0.4702, "step": 3244 }, { "epoch": 0.4, "grad_norm": 1.4380316093444492, "learning_rate": 6.778949463944985e-06, "loss": 0.4981, "step": 3245 }, { "epoch": 0.4, "grad_norm": 1.515498940838077, "learning_rate": 6.777071105358659e-06, "loss": 0.5276, "step": 3246 }, { "epoch": 0.4, "grad_norm": 2.2043397648266816, "learning_rate": 6.775192459670844e-06, "loss": 0.5164, "step": 3247 }, { "epoch": 0.4, "grad_norm": 1.364429422355196, "learning_rate": 6.773313527185053e-06, "loss": 0.5203, "step": 3248 }, { "epoch": 0.4, "grad_norm": 1.2666796513747796, "learning_rate": 6.771434308204844e-06, "loss": 0.48, "step": 3249 }, { "epoch": 0.4, "grad_norm": 1.6534994415436592, "learning_rate": 6.769554803033821e-06, "loss": 0.4969, "step": 3250 }, { "epoch": 0.4, "grad_norm": 1.2553049497037567, "learning_rate": 6.767675011975634e-06, "loss": 0.531, "step": 3251 }, { "epoch": 0.4, "grad_norm": 1.7674560829884376, "learning_rate": 6.765794935333981e-06, "loss": 0.5026, "step": 3252 }, { "epoch": 0.4, "grad_norm": 1.829940354533642, "learning_rate": 6.763914573412604e-06, "loss": 0.5008, "step": 3253 }, { "epoch": 0.4, "grad_norm": 10.78692529089743, "learning_rate": 6.762033926515293e-06, "loss": 0.4988, "step": 3254 }, { "epoch": 0.4, "grad_norm": 1.6657136686058656, "learning_rate": 6.760152994945882e-06, "loss": 0.4766, "step": 3255 }, { "epoch": 0.4, "grad_norm": 1.3284019099181725, "learning_rate": 6.758271779008254e-06, "loss": 0.5209, "step": 3256 }, { "epoch": 0.4, "grad_norm": 1.4389937674757496, "learning_rate": 6.756390279006333e-06, "loss": 0.5446, "step": 3257 }, { "epoch": 0.4, "grad_norm": 1.4769530562233115, "learning_rate": 6.754508495244096e-06, "loss": 0.4907, "step": 3258 }, { "epoch": 0.4, "grad_norm": 1.562785102305474, "learning_rate": 6.752626428025557e-06, "loss": 0.4999, "step": 3259 }, { "epoch": 0.4, "grad_norm": 1.5520415303055872, "learning_rate": 6.750744077654783e-06, "loss": 0.547, "step": 3260 }, { "epoch": 0.4, "grad_norm": 1.2761812793650475, "learning_rate": 6.748861444435885e-06, "loss": 0.4909, "step": 3261 }, { "epoch": 0.4, "grad_norm": 1.5786024125723916, "learning_rate": 6.746978528673016e-06, "loss": 0.5128, "step": 3262 }, { "epoch": 0.4, "grad_norm": 1.5004031067813843, "learning_rate": 6.7450953306703815e-06, "loss": 0.4872, "step": 3263 }, { "epoch": 0.41, "grad_norm": 1.7776122389127431, "learning_rate": 6.743211850732227e-06, "loss": 0.5313, "step": 3264 }, { "epoch": 0.41, "grad_norm": 1.5788272163994252, "learning_rate": 6.7413280891628445e-06, "loss": 0.5826, "step": 3265 }, { "epoch": 0.41, "grad_norm": 1.4900859967676385, "learning_rate": 6.739444046266572e-06, "loss": 0.5607, "step": 3266 }, { "epoch": 0.41, "grad_norm": 1.5222895795374038, "learning_rate": 6.7375597223477975e-06, "loss": 0.4946, "step": 3267 }, { "epoch": 0.41, "grad_norm": 1.3541859908879346, "learning_rate": 6.7356751177109435e-06, "loss": 0.5121, "step": 3268 }, { "epoch": 0.41, "grad_norm": 1.3777186829538355, "learning_rate": 6.73379023266049e-06, "loss": 0.56, "step": 3269 }, { "epoch": 0.41, "grad_norm": 1.4314582012171, "learning_rate": 6.731905067500952e-06, "loss": 0.5078, "step": 3270 }, { "epoch": 0.41, "grad_norm": 1.639823130130427, "learning_rate": 6.730019622536899e-06, "loss": 0.5102, "step": 3271 }, { "epoch": 0.41, "grad_norm": 1.4987241384585417, "learning_rate": 6.72813389807294e-06, "loss": 0.5499, "step": 3272 }, { "epoch": 0.41, "grad_norm": 0.6573727062661225, "learning_rate": 6.726247894413728e-06, "loss": 0.5103, "step": 3273 }, { "epoch": 0.41, "grad_norm": 3.261235589777226, "learning_rate": 6.724361611863964e-06, "loss": 0.4597, "step": 3274 }, { "epoch": 0.41, "grad_norm": 2.3445659210242793, "learning_rate": 6.722475050728396e-06, "loss": 0.55, "step": 3275 }, { "epoch": 0.41, "grad_norm": 1.5693155050154757, "learning_rate": 6.720588211311815e-06, "loss": 0.5569, "step": 3276 }, { "epoch": 0.41, "grad_norm": 0.6456907454871615, "learning_rate": 6.7187010939190555e-06, "loss": 0.5133, "step": 3277 }, { "epoch": 0.41, "grad_norm": 1.6245573463874465, "learning_rate": 6.7168136988549935e-06, "loss": 0.4931, "step": 3278 }, { "epoch": 0.41, "grad_norm": 1.8694239394978942, "learning_rate": 6.714926026424561e-06, "loss": 0.5084, "step": 3279 }, { "epoch": 0.41, "grad_norm": 1.6244660101509918, "learning_rate": 6.713038076932725e-06, "loss": 0.5219, "step": 3280 }, { "epoch": 0.41, "grad_norm": 1.5859703131172727, "learning_rate": 6.711149850684499e-06, "loss": 0.511, "step": 3281 }, { "epoch": 0.41, "grad_norm": 1.6905258567661081, "learning_rate": 6.709261347984946e-06, "loss": 0.4687, "step": 3282 }, { "epoch": 0.41, "grad_norm": 1.3208341183094228, "learning_rate": 6.707372569139167e-06, "loss": 0.4472, "step": 3283 }, { "epoch": 0.41, "grad_norm": 1.7078454269081773, "learning_rate": 6.705483514452314e-06, "loss": 0.518, "step": 3284 }, { "epoch": 0.41, "grad_norm": 2.776538177003102, "learning_rate": 6.703594184229576e-06, "loss": 0.5311, "step": 3285 }, { "epoch": 0.41, "grad_norm": 1.429757999962165, "learning_rate": 6.701704578776196e-06, "loss": 0.4716, "step": 3286 }, { "epoch": 0.41, "grad_norm": 1.2293398881643591, "learning_rate": 6.699814698397454e-06, "loss": 0.4841, "step": 3287 }, { "epoch": 0.41, "grad_norm": 1.295501405232231, "learning_rate": 6.697924543398675e-06, "loss": 0.4988, "step": 3288 }, { "epoch": 0.41, "grad_norm": 1.4495497732771263, "learning_rate": 6.696034114085233e-06, "loss": 0.5466, "step": 3289 }, { "epoch": 0.41, "grad_norm": 1.684673687979301, "learning_rate": 6.694143410762543e-06, "loss": 0.5172, "step": 3290 }, { "epoch": 0.41, "grad_norm": 1.4588467248806118, "learning_rate": 6.692252433736063e-06, "loss": 0.486, "step": 3291 }, { "epoch": 0.41, "grad_norm": 1.454877955929744, "learning_rate": 6.690361183311299e-06, "loss": 0.5379, "step": 3292 }, { "epoch": 0.41, "grad_norm": 1.4608802875863345, "learning_rate": 6.688469659793799e-06, "loss": 0.5089, "step": 3293 }, { "epoch": 0.41, "grad_norm": 1.2498258415718937, "learning_rate": 6.686577863489154e-06, "loss": 0.5039, "step": 3294 }, { "epoch": 0.41, "grad_norm": 1.4267227972809884, "learning_rate": 6.684685794703003e-06, "loss": 0.5738, "step": 3295 }, { "epoch": 0.41, "grad_norm": 2.4259808443137425, "learning_rate": 6.682793453741022e-06, "loss": 0.5103, "step": 3296 }, { "epoch": 0.41, "grad_norm": 1.506083836138939, "learning_rate": 6.6809008409089396e-06, "loss": 0.565, "step": 3297 }, { "epoch": 0.41, "grad_norm": 1.5930280829423225, "learning_rate": 6.679007956512522e-06, "loss": 0.5352, "step": 3298 }, { "epoch": 0.41, "grad_norm": 1.4478525083131581, "learning_rate": 6.6771148008575805e-06, "loss": 0.4984, "step": 3299 }, { "epoch": 0.41, "grad_norm": 1.562424146635213, "learning_rate": 6.675221374249972e-06, "loss": 0.5472, "step": 3300 }, { "epoch": 0.41, "grad_norm": 1.4015293961648203, "learning_rate": 6.673327676995598e-06, "loss": 0.492, "step": 3301 }, { "epoch": 0.41, "grad_norm": 1.5461992961632107, "learning_rate": 6.671433709400399e-06, "loss": 0.5391, "step": 3302 }, { "epoch": 0.41, "grad_norm": 2.0638014465505985, "learning_rate": 6.6695394717703654e-06, "loss": 0.5239, "step": 3303 }, { "epoch": 0.41, "grad_norm": 1.4292878955601038, "learning_rate": 6.6676449644115246e-06, "loss": 0.5406, "step": 3304 }, { "epoch": 0.41, "grad_norm": 1.8768562923306327, "learning_rate": 6.665750187629953e-06, "loss": 0.5434, "step": 3305 }, { "epoch": 0.41, "grad_norm": 1.62994328696491, "learning_rate": 6.6638551417317675e-06, "loss": 0.547, "step": 3306 }, { "epoch": 0.41, "grad_norm": 1.5736630956087028, "learning_rate": 6.66195982702313e-06, "loss": 0.5195, "step": 3307 }, { "epoch": 0.41, "grad_norm": 1.4803308672254103, "learning_rate": 6.6600642438102454e-06, "loss": 0.527, "step": 3308 }, { "epoch": 0.41, "grad_norm": 1.4342128458844394, "learning_rate": 6.658168392399362e-06, "loss": 0.4953, "step": 3309 }, { "epoch": 0.41, "grad_norm": 1.412313512785876, "learning_rate": 6.656272273096771e-06, "loss": 0.5431, "step": 3310 }, { "epoch": 0.41, "grad_norm": 1.6077253118836052, "learning_rate": 6.654375886208806e-06, "loss": 0.5168, "step": 3311 }, { "epoch": 0.41, "grad_norm": 1.7743537280984463, "learning_rate": 6.652479232041849e-06, "loss": 0.4673, "step": 3312 }, { "epoch": 0.41, "grad_norm": 1.3382429044577637, "learning_rate": 6.650582310902316e-06, "loss": 0.5023, "step": 3313 }, { "epoch": 0.41, "grad_norm": 1.336504964303114, "learning_rate": 6.648685123096674e-06, "loss": 0.4931, "step": 3314 }, { "epoch": 0.41, "grad_norm": 1.468379926370309, "learning_rate": 6.646787668931429e-06, "loss": 0.5196, "step": 3315 }, { "epoch": 0.41, "grad_norm": 1.4020497395917302, "learning_rate": 6.644889948713135e-06, "loss": 0.4779, "step": 3316 }, { "epoch": 0.41, "grad_norm": 1.416022357706084, "learning_rate": 6.642991962748381e-06, "loss": 0.5308, "step": 3317 }, { "epoch": 0.41, "grad_norm": 1.4729653949046748, "learning_rate": 6.641093711343806e-06, "loss": 0.4772, "step": 3318 }, { "epoch": 0.41, "grad_norm": 3.288982109275439, "learning_rate": 6.639195194806087e-06, "loss": 0.5182, "step": 3319 }, { "epoch": 0.41, "grad_norm": 1.2569075774740808, "learning_rate": 6.637296413441949e-06, "loss": 0.4978, "step": 3320 }, { "epoch": 0.41, "grad_norm": 1.8210600894090125, "learning_rate": 6.635397367558156e-06, "loss": 0.5356, "step": 3321 }, { "epoch": 0.41, "grad_norm": 1.4408394135319371, "learning_rate": 6.633498057461514e-06, "loss": 0.4808, "step": 3322 }, { "epoch": 0.41, "grad_norm": 1.3604884909269044, "learning_rate": 6.631598483458874e-06, "loss": 0.4612, "step": 3323 }, { "epoch": 0.41, "grad_norm": 1.6252149096540622, "learning_rate": 6.629698645857129e-06, "loss": 0.5598, "step": 3324 }, { "epoch": 0.41, "grad_norm": 1.1562231472595759, "learning_rate": 6.6277985449632155e-06, "loss": 0.4699, "step": 3325 }, { "epoch": 0.41, "grad_norm": 1.9768297893227216, "learning_rate": 6.625898181084111e-06, "loss": 0.5483, "step": 3326 }, { "epoch": 0.41, "grad_norm": 1.5422501208070072, "learning_rate": 6.623997554526833e-06, "loss": 0.4897, "step": 3327 }, { "epoch": 0.41, "grad_norm": 3.361915446466029, "learning_rate": 6.62209666559845e-06, "loss": 0.5565, "step": 3328 }, { "epoch": 0.41, "grad_norm": 1.4386159169197326, "learning_rate": 6.620195514606063e-06, "loss": 0.5225, "step": 3329 }, { "epoch": 0.41, "grad_norm": 1.4925820107560432, "learning_rate": 6.6182941018568224e-06, "loss": 0.5247, "step": 3330 }, { "epoch": 0.41, "grad_norm": 1.414937074964594, "learning_rate": 6.616392427657918e-06, "loss": 0.5447, "step": 3331 }, { "epoch": 0.41, "grad_norm": 1.419668460668176, "learning_rate": 6.614490492316578e-06, "loss": 0.5357, "step": 3332 }, { "epoch": 0.41, "grad_norm": 1.781817186987209, "learning_rate": 6.612588296140082e-06, "loss": 0.5121, "step": 3333 }, { "epoch": 0.41, "grad_norm": 3.48145844537759, "learning_rate": 6.610685839435744e-06, "loss": 0.5703, "step": 3334 }, { "epoch": 0.41, "grad_norm": 1.3152571504495545, "learning_rate": 6.608783122510922e-06, "loss": 0.515, "step": 3335 }, { "epoch": 0.41, "grad_norm": 1.410781395131533, "learning_rate": 6.606880145673018e-06, "loss": 0.5163, "step": 3336 }, { "epoch": 0.41, "grad_norm": 1.5398437941017569, "learning_rate": 6.604976909229475e-06, "loss": 0.5226, "step": 3337 }, { "epoch": 0.41, "grad_norm": 1.8230133487038807, "learning_rate": 6.603073413487777e-06, "loss": 0.5104, "step": 3338 }, { "epoch": 0.41, "grad_norm": 1.319234468105146, "learning_rate": 6.6011696587554495e-06, "loss": 0.5276, "step": 3339 }, { "epoch": 0.41, "grad_norm": 1.8885195195601567, "learning_rate": 6.599265645340063e-06, "loss": 0.5423, "step": 3340 }, { "epoch": 0.41, "grad_norm": 7.341219235782096, "learning_rate": 6.597361373549226e-06, "loss": 0.4838, "step": 3341 }, { "epoch": 0.41, "grad_norm": 1.9725768133216375, "learning_rate": 6.595456843690591e-06, "loss": 0.5362, "step": 3342 }, { "epoch": 0.41, "grad_norm": 1.7764264535883638, "learning_rate": 6.59355205607185e-06, "loss": 0.4959, "step": 3343 }, { "epoch": 0.41, "grad_norm": 0.657915233528162, "learning_rate": 6.59164701100074e-06, "loss": 0.5096, "step": 3344 }, { "epoch": 0.42, "grad_norm": 1.365255514796697, "learning_rate": 6.589741708785038e-06, "loss": 0.4849, "step": 3345 }, { "epoch": 0.42, "grad_norm": 1.385547860326123, "learning_rate": 6.587836149732562e-06, "loss": 0.5173, "step": 3346 }, { "epoch": 0.42, "grad_norm": 1.6822192304164865, "learning_rate": 6.585930334151172e-06, "loss": 0.4999, "step": 3347 }, { "epoch": 0.42, "grad_norm": 1.401093568895059, "learning_rate": 6.584024262348767e-06, "loss": 0.5137, "step": 3348 }, { "epoch": 0.42, "grad_norm": 1.5128507982800243, "learning_rate": 6.582117934633293e-06, "loss": 0.5186, "step": 3349 }, { "epoch": 0.42, "grad_norm": 1.3496302725839122, "learning_rate": 6.580211351312733e-06, "loss": 0.5126, "step": 3350 }, { "epoch": 0.42, "grad_norm": 1.5700861571843783, "learning_rate": 6.5783045126951104e-06, "loss": 0.5038, "step": 3351 }, { "epoch": 0.42, "grad_norm": 1.332489990796265, "learning_rate": 6.576397419088494e-06, "loss": 0.5392, "step": 3352 }, { "epoch": 0.42, "grad_norm": 3.3029563614078903, "learning_rate": 6.574490070800991e-06, "loss": 0.508, "step": 3353 }, { "epoch": 0.42, "grad_norm": 3.434447606032874, "learning_rate": 6.5725824681407505e-06, "loss": 0.5029, "step": 3354 }, { "epoch": 0.42, "grad_norm": 1.4351387854652031, "learning_rate": 6.570674611415962e-06, "loss": 0.5237, "step": 3355 }, { "epoch": 0.42, "grad_norm": 0.7332743393085002, "learning_rate": 6.5687665009348564e-06, "loss": 0.5281, "step": 3356 }, { "epoch": 0.42, "grad_norm": 1.3254583143473562, "learning_rate": 6.566858137005707e-06, "loss": 0.5138, "step": 3357 }, { "epoch": 0.42, "grad_norm": 1.6459229735641585, "learning_rate": 6.564949519936825e-06, "loss": 0.5463, "step": 3358 }, { "epoch": 0.42, "grad_norm": 1.798749927990085, "learning_rate": 6.563040650036566e-06, "loss": 0.5203, "step": 3359 }, { "epoch": 0.42, "grad_norm": 1.8899694122457764, "learning_rate": 6.5611315276133224e-06, "loss": 0.488, "step": 3360 }, { "epoch": 0.42, "grad_norm": 1.2222412116863846, "learning_rate": 6.559222152975533e-06, "loss": 0.4919, "step": 3361 }, { "epoch": 0.42, "grad_norm": 1.6101329963661517, "learning_rate": 6.5573125264316715e-06, "loss": 0.5285, "step": 3362 }, { "epoch": 0.42, "grad_norm": 1.3733533959449282, "learning_rate": 6.555402648290256e-06, "loss": 0.4836, "step": 3363 }, { "epoch": 0.42, "grad_norm": 1.3305592653903977, "learning_rate": 6.553492518859843e-06, "loss": 0.4739, "step": 3364 }, { "epoch": 0.42, "grad_norm": 1.5819335222378026, "learning_rate": 6.551582138449033e-06, "loss": 0.5243, "step": 3365 }, { "epoch": 0.42, "grad_norm": 1.68120273290888, "learning_rate": 6.549671507366464e-06, "loss": 0.5258, "step": 3366 }, { "epoch": 0.42, "grad_norm": 1.6916725965006718, "learning_rate": 6.547760625920814e-06, "loss": 0.5715, "step": 3367 }, { "epoch": 0.42, "grad_norm": 1.5603014124207755, "learning_rate": 6.545849494420802e-06, "loss": 0.5392, "step": 3368 }, { "epoch": 0.42, "grad_norm": 2.110977366460912, "learning_rate": 6.543938113175191e-06, "loss": 0.5224, "step": 3369 }, { "epoch": 0.42, "grad_norm": 1.3684723813682567, "learning_rate": 6.5420264824927796e-06, "loss": 0.4657, "step": 3370 }, { "epoch": 0.42, "grad_norm": 1.4282419027256408, "learning_rate": 6.540114602682409e-06, "loss": 0.527, "step": 3371 }, { "epoch": 0.42, "grad_norm": 1.4251475318763287, "learning_rate": 6.5382024740529605e-06, "loss": 0.4968, "step": 3372 }, { "epoch": 0.42, "grad_norm": 1.3420448115949397, "learning_rate": 6.536290096913354e-06, "loss": 0.5144, "step": 3373 }, { "epoch": 0.42, "grad_norm": 1.295778067774979, "learning_rate": 6.5343774715725525e-06, "loss": 0.4714, "step": 3374 }, { "epoch": 0.42, "grad_norm": 1.3489738242574505, "learning_rate": 6.532464598339557e-06, "loss": 0.5089, "step": 3375 }, { "epoch": 0.42, "grad_norm": 1.7563087403545115, "learning_rate": 6.530551477523411e-06, "loss": 0.535, "step": 3376 }, { "epoch": 0.42, "grad_norm": 1.4205894446580483, "learning_rate": 6.528638109433191e-06, "loss": 0.5118, "step": 3377 }, { "epoch": 0.42, "grad_norm": 1.5426548496710313, "learning_rate": 6.526724494378023e-06, "loss": 0.4733, "step": 3378 }, { "epoch": 0.42, "grad_norm": 1.4987232712876852, "learning_rate": 6.524810632667066e-06, "loss": 0.4909, "step": 3379 }, { "epoch": 0.42, "grad_norm": 1.790263490504134, "learning_rate": 6.522896524609521e-06, "loss": 0.51, "step": 3380 }, { "epoch": 0.42, "grad_norm": 1.8194563671942883, "learning_rate": 6.520982170514631e-06, "loss": 0.4846, "step": 3381 }, { "epoch": 0.42, "grad_norm": 1.2429209424748784, "learning_rate": 6.519067570691675e-06, "loss": 0.4647, "step": 3382 }, { "epoch": 0.42, "grad_norm": 1.342898948876468, "learning_rate": 6.517152725449976e-06, "loss": 0.5154, "step": 3383 }, { "epoch": 0.42, "grad_norm": 1.3705724281814533, "learning_rate": 6.515237635098891e-06, "loss": 0.5706, "step": 3384 }, { "epoch": 0.42, "grad_norm": 1.5642337602627236, "learning_rate": 6.513322299947822e-06, "loss": 0.5745, "step": 3385 }, { "epoch": 0.42, "grad_norm": 1.4367398118514414, "learning_rate": 6.511406720306206e-06, "loss": 0.527, "step": 3386 }, { "epoch": 0.42, "grad_norm": 1.2768734239191017, "learning_rate": 6.509490896483524e-06, "loss": 0.4542, "step": 3387 }, { "epoch": 0.42, "grad_norm": 1.4721952171363482, "learning_rate": 6.507574828789292e-06, "loss": 0.4273, "step": 3388 }, { "epoch": 0.42, "grad_norm": 1.8717370896892942, "learning_rate": 6.50565851753307e-06, "loss": 0.5026, "step": 3389 }, { "epoch": 0.42, "grad_norm": 1.4527504054398452, "learning_rate": 6.503741963024454e-06, "loss": 0.5421, "step": 3390 }, { "epoch": 0.42, "grad_norm": 6.178249397843699, "learning_rate": 6.5018251655730795e-06, "loss": 0.5536, "step": 3391 }, { "epoch": 0.42, "grad_norm": 1.2495856813403767, "learning_rate": 6.499908125488623e-06, "loss": 0.5353, "step": 3392 }, { "epoch": 0.42, "grad_norm": 2.096512316760758, "learning_rate": 6.4979908430807995e-06, "loss": 0.5541, "step": 3393 }, { "epoch": 0.42, "grad_norm": 1.408989150160295, "learning_rate": 6.4960733186593604e-06, "loss": 0.5071, "step": 3394 }, { "epoch": 0.42, "grad_norm": 1.6106819885568957, "learning_rate": 6.494155552534102e-06, "loss": 0.4794, "step": 3395 }, { "epoch": 0.42, "grad_norm": 1.8647778316019838, "learning_rate": 6.492237545014853e-06, "loss": 0.5133, "step": 3396 }, { "epoch": 0.42, "grad_norm": 1.4265607978785886, "learning_rate": 6.490319296411487e-06, "loss": 0.5425, "step": 3397 }, { "epoch": 0.42, "grad_norm": 1.239661238066202, "learning_rate": 6.488400807033913e-06, "loss": 0.468, "step": 3398 }, { "epoch": 0.42, "grad_norm": 1.5414791454347683, "learning_rate": 6.486482077192081e-06, "loss": 0.5226, "step": 3399 }, { "epoch": 0.42, "grad_norm": 1.610310909755451, "learning_rate": 6.484563107195977e-06, "loss": 0.5389, "step": 3400 }, { "epoch": 0.42, "grad_norm": 1.2558984430146658, "learning_rate": 6.482643897355628e-06, "loss": 0.5248, "step": 3401 }, { "epoch": 0.42, "grad_norm": 0.6212989357041312, "learning_rate": 6.4807244479810995e-06, "loss": 0.5324, "step": 3402 }, { "epoch": 0.42, "grad_norm": 2.1973737707139973, "learning_rate": 6.478804759382495e-06, "loss": 0.4898, "step": 3403 }, { "epoch": 0.42, "grad_norm": 1.3683074620200943, "learning_rate": 6.476884831869958e-06, "loss": 0.4739, "step": 3404 }, { "epoch": 0.42, "grad_norm": 1.4320958020667869, "learning_rate": 6.4749646657536695e-06, "loss": 0.4828, "step": 3405 }, { "epoch": 0.42, "grad_norm": 4.118003030237448, "learning_rate": 6.473044261343848e-06, "loss": 0.5994, "step": 3406 }, { "epoch": 0.42, "grad_norm": 1.5132374141721459, "learning_rate": 6.4711236189507535e-06, "loss": 0.5209, "step": 3407 }, { "epoch": 0.42, "grad_norm": 2.9169913340489075, "learning_rate": 6.469202738884681e-06, "loss": 0.4791, "step": 3408 }, { "epoch": 0.42, "grad_norm": 1.5541877617357418, "learning_rate": 6.467281621455967e-06, "loss": 0.5716, "step": 3409 }, { "epoch": 0.42, "grad_norm": 1.3159596193980914, "learning_rate": 6.465360266974984e-06, "loss": 0.5009, "step": 3410 }, { "epoch": 0.42, "grad_norm": 2.5372368754643895, "learning_rate": 6.463438675752145e-06, "loss": 0.5137, "step": 3411 }, { "epoch": 0.42, "grad_norm": 1.8482767490950647, "learning_rate": 6.461516848097899e-06, "loss": 0.5033, "step": 3412 }, { "epoch": 0.42, "grad_norm": 1.3063262071097612, "learning_rate": 6.459594784322734e-06, "loss": 0.5729, "step": 3413 }, { "epoch": 0.42, "grad_norm": 1.4749651475169578, "learning_rate": 6.457672484737177e-06, "loss": 0.5183, "step": 3414 }, { "epoch": 0.42, "grad_norm": 1.371908921907626, "learning_rate": 6.455749949651791e-06, "loss": 0.5636, "step": 3415 }, { "epoch": 0.42, "grad_norm": 1.6074630823942877, "learning_rate": 6.45382717937718e-06, "loss": 0.5369, "step": 3416 }, { "epoch": 0.42, "grad_norm": 1.4537404369016569, "learning_rate": 6.4519041742239844e-06, "loss": 0.4837, "step": 3417 }, { "epoch": 0.42, "grad_norm": 2.470038846548514, "learning_rate": 6.449980934502881e-06, "loss": 0.5673, "step": 3418 }, { "epoch": 0.42, "grad_norm": 1.411105238613095, "learning_rate": 6.448057460524588e-06, "loss": 0.5568, "step": 3419 }, { "epoch": 0.42, "grad_norm": 1.672003949426101, "learning_rate": 6.44613375259986e-06, "loss": 0.4877, "step": 3420 }, { "epoch": 0.42, "grad_norm": 2.7186118198150053, "learning_rate": 6.444209811039488e-06, "loss": 0.5366, "step": 3421 }, { "epoch": 0.42, "grad_norm": 0.6707392924215835, "learning_rate": 6.4422856361543e-06, "loss": 0.549, "step": 3422 }, { "epoch": 0.42, "grad_norm": 1.479892625489135, "learning_rate": 6.440361228255165e-06, "loss": 0.5229, "step": 3423 }, { "epoch": 0.42, "grad_norm": 1.6666535300936052, "learning_rate": 6.438436587652989e-06, "loss": 0.4785, "step": 3424 }, { "epoch": 0.43, "grad_norm": 1.6953266703542345, "learning_rate": 6.436511714658713e-06, "loss": 0.5738, "step": 3425 }, { "epoch": 0.43, "grad_norm": 1.6245088591621328, "learning_rate": 6.434586609583316e-06, "loss": 0.4554, "step": 3426 }, { "epoch": 0.43, "grad_norm": 1.5209450118584893, "learning_rate": 6.43266127273782e-06, "loss": 0.4935, "step": 3427 }, { "epoch": 0.43, "grad_norm": 1.3368355711738351, "learning_rate": 6.430735704433278e-06, "loss": 0.5396, "step": 3428 }, { "epoch": 0.43, "grad_norm": 9.899006884541238, "learning_rate": 6.428809904980782e-06, "loss": 0.4872, "step": 3429 }, { "epoch": 0.43, "grad_norm": 1.5256633510601914, "learning_rate": 6.426883874691461e-06, "loss": 0.4879, "step": 3430 }, { "epoch": 0.43, "grad_norm": 1.7959945788719616, "learning_rate": 6.424957613876483e-06, "loss": 0.4867, "step": 3431 }, { "epoch": 0.43, "grad_norm": 2.035467778275846, "learning_rate": 6.4230311228470535e-06, "loss": 0.5156, "step": 3432 }, { "epoch": 0.43, "grad_norm": 1.6409432369288743, "learning_rate": 6.421104401914413e-06, "loss": 0.4887, "step": 3433 }, { "epoch": 0.43, "grad_norm": 1.316556873161966, "learning_rate": 6.41917745138984e-06, "loss": 0.4915, "step": 3434 }, { "epoch": 0.43, "grad_norm": 1.4845717808016468, "learning_rate": 6.417250271584649e-06, "loss": 0.482, "step": 3435 }, { "epoch": 0.43, "grad_norm": 1.3906828490733454, "learning_rate": 6.415322862810198e-06, "loss": 0.506, "step": 3436 }, { "epoch": 0.43, "grad_norm": 1.4789087457973646, "learning_rate": 6.413395225377872e-06, "loss": 0.5158, "step": 3437 }, { "epoch": 0.43, "grad_norm": 1.4001127768438166, "learning_rate": 6.4114673595991e-06, "loss": 0.5042, "step": 3438 }, { "epoch": 0.43, "grad_norm": 0.6352396847654843, "learning_rate": 6.409539265785344e-06, "loss": 0.4848, "step": 3439 }, { "epoch": 0.43, "grad_norm": 0.6458804502407448, "learning_rate": 6.407610944248106e-06, "loss": 0.4742, "step": 3440 }, { "epoch": 0.43, "grad_norm": 0.6893783720661152, "learning_rate": 6.405682395298922e-06, "loss": 0.5247, "step": 3441 }, { "epoch": 0.43, "grad_norm": 1.3762211902781725, "learning_rate": 6.4037536192493665e-06, "loss": 0.5144, "step": 3442 }, { "epoch": 0.43, "grad_norm": 1.8440755133843545, "learning_rate": 6.401824616411052e-06, "loss": 0.5605, "step": 3443 }, { "epoch": 0.43, "grad_norm": 1.502330810149086, "learning_rate": 6.399895387095624e-06, "loss": 0.5145, "step": 3444 }, { "epoch": 0.43, "grad_norm": 1.310208911902816, "learning_rate": 6.397965931614767e-06, "loss": 0.5021, "step": 3445 }, { "epoch": 0.43, "grad_norm": 1.4878071758351954, "learning_rate": 6.396036250280202e-06, "loss": 0.5111, "step": 3446 }, { "epoch": 0.43, "grad_norm": 1.3499148482849967, "learning_rate": 6.394106343403685e-06, "loss": 0.4843, "step": 3447 }, { "epoch": 0.43, "grad_norm": 1.6672272401015171, "learning_rate": 6.392176211297011e-06, "loss": 0.4892, "step": 3448 }, { "epoch": 0.43, "grad_norm": 2.1963725876544813, "learning_rate": 6.3902458542720085e-06, "loss": 0.5252, "step": 3449 }, { "epoch": 0.43, "grad_norm": 1.8342464367231288, "learning_rate": 6.388315272640544e-06, "loss": 0.4821, "step": 3450 }, { "epoch": 0.43, "grad_norm": 1.4035352327594603, "learning_rate": 6.386384466714518e-06, "loss": 0.4818, "step": 3451 }, { "epoch": 0.43, "grad_norm": 1.342198015827931, "learning_rate": 6.384453436805873e-06, "loss": 0.4663, "step": 3452 }, { "epoch": 0.43, "grad_norm": 1.704755890875126, "learning_rate": 6.382522183226583e-06, "loss": 0.5597, "step": 3453 }, { "epoch": 0.43, "grad_norm": 1.6219798429195122, "learning_rate": 6.3805907062886564e-06, "loss": 0.476, "step": 3454 }, { "epoch": 0.43, "grad_norm": 1.7164666803756226, "learning_rate": 6.3786590063041434e-06, "loss": 0.5154, "step": 3455 }, { "epoch": 0.43, "grad_norm": 1.3471849166172916, "learning_rate": 6.376727083585126e-06, "loss": 0.5078, "step": 3456 }, { "epoch": 0.43, "grad_norm": 1.6146640259005276, "learning_rate": 6.374794938443722e-06, "loss": 0.5718, "step": 3457 }, { "epoch": 0.43, "grad_norm": 1.6818007005864406, "learning_rate": 6.372862571192088e-06, "loss": 0.5091, "step": 3458 }, { "epoch": 0.43, "grad_norm": 1.366665236720085, "learning_rate": 6.370929982142413e-06, "loss": 0.4912, "step": 3459 }, { "epoch": 0.43, "grad_norm": 2.0380886234440747, "learning_rate": 6.368997171606927e-06, "loss": 0.5472, "step": 3460 }, { "epoch": 0.43, "grad_norm": 1.6612318777400736, "learning_rate": 6.367064139897891e-06, "loss": 0.4455, "step": 3461 }, { "epoch": 0.43, "grad_norm": 1.378939506184432, "learning_rate": 6.365130887327603e-06, "loss": 0.5667, "step": 3462 }, { "epoch": 0.43, "grad_norm": 1.3967605747616427, "learning_rate": 6.363197414208396e-06, "loss": 0.4876, "step": 3463 }, { "epoch": 0.43, "grad_norm": 1.8320960845441028, "learning_rate": 6.361263720852642e-06, "loss": 0.4931, "step": 3464 }, { "epoch": 0.43, "grad_norm": 0.6450256223949539, "learning_rate": 6.359329807572746e-06, "loss": 0.4911, "step": 3465 }, { "epoch": 0.43, "grad_norm": 1.4463229538998164, "learning_rate": 6.357395674681146e-06, "loss": 0.455, "step": 3466 }, { "epoch": 0.43, "grad_norm": 1.3119855418983852, "learning_rate": 6.355461322490319e-06, "loss": 0.5339, "step": 3467 }, { "epoch": 0.43, "grad_norm": 2.0993905938665463, "learning_rate": 6.35352675131278e-06, "loss": 0.476, "step": 3468 }, { "epoch": 0.43, "grad_norm": 2.0789495941589324, "learning_rate": 6.3515919614610725e-06, "loss": 0.5042, "step": 3469 }, { "epoch": 0.43, "grad_norm": 1.505001124357574, "learning_rate": 6.3496569532477796e-06, "loss": 0.5039, "step": 3470 }, { "epoch": 0.43, "grad_norm": 1.3543726155896296, "learning_rate": 6.347721726985518e-06, "loss": 0.4937, "step": 3471 }, { "epoch": 0.43, "grad_norm": 1.410733061613502, "learning_rate": 6.345786282986944e-06, "loss": 0.5036, "step": 3472 }, { "epoch": 0.43, "grad_norm": 2.288134741913164, "learning_rate": 6.343850621564742e-06, "loss": 0.5071, "step": 3473 }, { "epoch": 0.43, "grad_norm": 1.3889345099741446, "learning_rate": 6.3419147430316375e-06, "loss": 0.5137, "step": 3474 }, { "epoch": 0.43, "grad_norm": 1.408902839713213, "learning_rate": 6.3399786477003866e-06, "loss": 0.5138, "step": 3475 }, { "epoch": 0.43, "grad_norm": 1.4541724062815307, "learning_rate": 6.338042335883784e-06, "loss": 0.4752, "step": 3476 }, { "epoch": 0.43, "grad_norm": 3.0315020298988067, "learning_rate": 6.336105807894658e-06, "loss": 0.5071, "step": 3477 }, { "epoch": 0.43, "grad_norm": 1.479139923284239, "learning_rate": 6.334169064045871e-06, "loss": 0.5207, "step": 3478 }, { "epoch": 0.43, "grad_norm": 0.6708097406058517, "learning_rate": 6.332232104650321e-06, "loss": 0.4893, "step": 3479 }, { "epoch": 0.43, "grad_norm": 1.7443151935211285, "learning_rate": 6.330294930020941e-06, "loss": 0.5204, "step": 3480 }, { "epoch": 0.43, "grad_norm": 1.4373772526834405, "learning_rate": 6.3283575404706996e-06, "loss": 0.5344, "step": 3481 }, { "epoch": 0.43, "grad_norm": 4.2391986402563235, "learning_rate": 6.326419936312599e-06, "loss": 0.5516, "step": 3482 }, { "epoch": 0.43, "grad_norm": 1.4624592865974275, "learning_rate": 6.324482117859676e-06, "loss": 0.5251, "step": 3483 }, { "epoch": 0.43, "grad_norm": 1.525858161534442, "learning_rate": 6.322544085425001e-06, "loss": 0.5243, "step": 3484 }, { "epoch": 0.43, "grad_norm": 3.6321469993075612, "learning_rate": 6.320605839321681e-06, "loss": 0.5121, "step": 3485 }, { "epoch": 0.43, "grad_norm": 1.6245468809709254, "learning_rate": 6.318667379862856e-06, "loss": 0.5002, "step": 3486 }, { "epoch": 0.43, "grad_norm": 1.3950302507184524, "learning_rate": 6.3167287073617035e-06, "loss": 0.4825, "step": 3487 }, { "epoch": 0.43, "grad_norm": 1.4184155641850023, "learning_rate": 6.31478982213143e-06, "loss": 0.5087, "step": 3488 }, { "epoch": 0.43, "grad_norm": 1.5052726070482894, "learning_rate": 6.312850724485282e-06, "loss": 0.4351, "step": 3489 }, { "epoch": 0.43, "grad_norm": 1.2968373517440517, "learning_rate": 6.310911414736537e-06, "loss": 0.5013, "step": 3490 }, { "epoch": 0.43, "grad_norm": 1.6496514532699211, "learning_rate": 6.308971893198508e-06, "loss": 0.5353, "step": 3491 }, { "epoch": 0.43, "grad_norm": 1.5839632150330647, "learning_rate": 6.307032160184541e-06, "loss": 0.5673, "step": 3492 }, { "epoch": 0.43, "grad_norm": 1.3248369734797494, "learning_rate": 6.305092216008016e-06, "loss": 0.5356, "step": 3493 }, { "epoch": 0.43, "grad_norm": 1.4246486211657718, "learning_rate": 6.30315206098235e-06, "loss": 0.5004, "step": 3494 }, { "epoch": 0.43, "grad_norm": 1.829505588718067, "learning_rate": 6.301211695420992e-06, "loss": 0.4931, "step": 3495 }, { "epoch": 0.43, "grad_norm": 1.5619325285771204, "learning_rate": 6.2992711196374236e-06, "loss": 0.5172, "step": 3496 }, { "epoch": 0.43, "grad_norm": 1.3228374138272259, "learning_rate": 6.297330333945164e-06, "loss": 0.4938, "step": 3497 }, { "epoch": 0.43, "grad_norm": 1.3977534860361773, "learning_rate": 6.2953893386577626e-06, "loss": 0.4958, "step": 3498 }, { "epoch": 0.43, "grad_norm": 1.2944479809162501, "learning_rate": 6.293448134088805e-06, "loss": 0.5331, "step": 3499 }, { "epoch": 0.43, "grad_norm": 1.7182384782561086, "learning_rate": 6.2915067205519085e-06, "loss": 0.4769, "step": 3500 }, { "epoch": 0.43, "grad_norm": 1.2931322581828373, "learning_rate": 6.289565098360728e-06, "loss": 0.4586, "step": 3501 }, { "epoch": 0.43, "grad_norm": 2.522939179168897, "learning_rate": 6.287623267828948e-06, "loss": 0.4692, "step": 3502 }, { "epoch": 0.43, "grad_norm": 2.328118856112479, "learning_rate": 6.2856812292702884e-06, "loss": 0.4894, "step": 3503 }, { "epoch": 0.43, "grad_norm": 1.9947553241739746, "learning_rate": 6.283738982998502e-06, "loss": 0.4862, "step": 3504 }, { "epoch": 0.43, "grad_norm": 0.6311486849974827, "learning_rate": 6.281796529327378e-06, "loss": 0.5108, "step": 3505 }, { "epoch": 0.44, "grad_norm": 0.6625257516752647, "learning_rate": 6.279853868570736e-06, "loss": 0.5143, "step": 3506 }, { "epoch": 0.44, "grad_norm": 1.9066907850009018, "learning_rate": 6.27791100104243e-06, "loss": 0.5267, "step": 3507 }, { "epoch": 0.44, "grad_norm": 1.4781973315845938, "learning_rate": 6.2759679270563446e-06, "loss": 0.5299, "step": 3508 }, { "epoch": 0.44, "grad_norm": 1.308884538474327, "learning_rate": 6.274024646926405e-06, "loss": 0.4653, "step": 3509 }, { "epoch": 0.44, "grad_norm": 1.278907688874593, "learning_rate": 6.272081160966564e-06, "loss": 0.5022, "step": 3510 }, { "epoch": 0.44, "grad_norm": 1.6621416069496444, "learning_rate": 6.2701374694908045e-06, "loss": 0.4967, "step": 3511 }, { "epoch": 0.44, "grad_norm": 2.3440202170943447, "learning_rate": 6.268193572813151e-06, "loss": 0.4782, "step": 3512 }, { "epoch": 0.44, "grad_norm": 0.7500464171050284, "learning_rate": 6.266249471247659e-06, "loss": 0.4943, "step": 3513 }, { "epoch": 0.44, "grad_norm": 1.411055795264252, "learning_rate": 6.264305165108412e-06, "loss": 0.4958, "step": 3514 }, { "epoch": 0.44, "grad_norm": 1.2671764977020974, "learning_rate": 6.262360654709529e-06, "loss": 0.4663, "step": 3515 }, { "epoch": 0.44, "grad_norm": 1.1726670323695136, "learning_rate": 6.260415940365165e-06, "loss": 0.4504, "step": 3516 }, { "epoch": 0.44, "grad_norm": 1.5489519189048802, "learning_rate": 6.258471022389506e-06, "loss": 0.5345, "step": 3517 }, { "epoch": 0.44, "grad_norm": 2.9116402906622896, "learning_rate": 6.256525901096769e-06, "loss": 0.4731, "step": 3518 }, { "epoch": 0.44, "grad_norm": 1.4590621142943716, "learning_rate": 6.254580576801208e-06, "loss": 0.5038, "step": 3519 }, { "epoch": 0.44, "grad_norm": 1.4887237061309944, "learning_rate": 6.252635049817104e-06, "loss": 0.5682, "step": 3520 }, { "epoch": 0.44, "grad_norm": 1.554091148712146, "learning_rate": 6.250689320458775e-06, "loss": 0.4634, "step": 3521 }, { "epoch": 0.44, "grad_norm": 1.5543738127421725, "learning_rate": 6.248743389040573e-06, "loss": 0.4984, "step": 3522 }, { "epoch": 0.44, "grad_norm": 1.7629146754869744, "learning_rate": 6.246797255876876e-06, "loss": 0.5209, "step": 3523 }, { "epoch": 0.44, "grad_norm": 2.9998417318041923, "learning_rate": 6.244850921282102e-06, "loss": 0.5541, "step": 3524 }, { "epoch": 0.44, "grad_norm": 1.2551611920725345, "learning_rate": 6.242904385570699e-06, "loss": 0.5279, "step": 3525 }, { "epoch": 0.44, "grad_norm": 1.5336429069292903, "learning_rate": 6.240957649057145e-06, "loss": 0.5148, "step": 3526 }, { "epoch": 0.44, "grad_norm": 2.044244210210011, "learning_rate": 6.239010712055955e-06, "loss": 0.5184, "step": 3527 }, { "epoch": 0.44, "grad_norm": 1.4181219458895138, "learning_rate": 6.2370635748816725e-06, "loss": 0.4707, "step": 3528 }, { "epoch": 0.44, "grad_norm": 1.6032922870244413, "learning_rate": 6.235116237848872e-06, "loss": 0.4305, "step": 3529 }, { "epoch": 0.44, "grad_norm": 0.7304352877367216, "learning_rate": 6.233168701272167e-06, "loss": 0.5232, "step": 3530 }, { "epoch": 0.44, "grad_norm": 1.3911196910792476, "learning_rate": 6.231220965466197e-06, "loss": 0.5021, "step": 3531 }, { "epoch": 0.44, "grad_norm": 1.3165358719004459, "learning_rate": 6.229273030745638e-06, "loss": 0.5183, "step": 3532 }, { "epoch": 0.44, "grad_norm": 1.7453145753076977, "learning_rate": 6.227324897425191e-06, "loss": 0.5097, "step": 3533 }, { "epoch": 0.44, "grad_norm": 5.5380079581301915, "learning_rate": 6.2253765658195986e-06, "loss": 0.5064, "step": 3534 }, { "epoch": 0.44, "grad_norm": 2.0266057416813785, "learning_rate": 6.223428036243631e-06, "loss": 0.5195, "step": 3535 }, { "epoch": 0.44, "grad_norm": 1.3156969788819382, "learning_rate": 6.2214793090120896e-06, "loss": 0.4469, "step": 3536 }, { "epoch": 0.44, "grad_norm": 1.5487418691974917, "learning_rate": 6.219530384439807e-06, "loss": 0.502, "step": 3537 }, { "epoch": 0.44, "grad_norm": 1.4488340230656243, "learning_rate": 6.217581262841651e-06, "loss": 0.5008, "step": 3538 }, { "epoch": 0.44, "grad_norm": 2.0032013299670752, "learning_rate": 6.215631944532518e-06, "loss": 0.5196, "step": 3539 }, { "epoch": 0.44, "grad_norm": 1.6068169770942995, "learning_rate": 6.213682429827338e-06, "loss": 0.5189, "step": 3540 }, { "epoch": 0.44, "grad_norm": 2.1926932157629393, "learning_rate": 6.21173271904107e-06, "loss": 0.484, "step": 3541 }, { "epoch": 0.44, "grad_norm": 0.6671431070107134, "learning_rate": 6.209782812488713e-06, "loss": 0.511, "step": 3542 }, { "epoch": 0.44, "grad_norm": 1.6212237617207985, "learning_rate": 6.207832710485285e-06, "loss": 0.528, "step": 3543 }, { "epoch": 0.44, "grad_norm": 1.6305336046241468, "learning_rate": 6.2058824133458476e-06, "loss": 0.478, "step": 3544 }, { "epoch": 0.44, "grad_norm": 1.822234471136306, "learning_rate": 6.203931921385484e-06, "loss": 0.5406, "step": 3545 }, { "epoch": 0.44, "grad_norm": 0.6679266912093794, "learning_rate": 6.201981234919317e-06, "loss": 0.4911, "step": 3546 }, { "epoch": 0.44, "grad_norm": 1.439282471232996, "learning_rate": 6.200030354262493e-06, "loss": 0.5135, "step": 3547 }, { "epoch": 0.44, "grad_norm": 1.3884166014461918, "learning_rate": 6.198079279730198e-06, "loss": 0.4351, "step": 3548 }, { "epoch": 0.44, "grad_norm": 1.4047748956301707, "learning_rate": 6.196128011637642e-06, "loss": 0.5472, "step": 3549 }, { "epoch": 0.44, "grad_norm": 1.4808855445369755, "learning_rate": 6.194176550300071e-06, "loss": 0.4965, "step": 3550 }, { "epoch": 0.44, "grad_norm": 2.0614228950515447, "learning_rate": 6.1922248960327635e-06, "loss": 0.5206, "step": 3551 }, { "epoch": 0.44, "grad_norm": 1.7331701568695543, "learning_rate": 6.190273049151022e-06, "loss": 0.5328, "step": 3552 }, { "epoch": 0.44, "grad_norm": 1.8941941076976965, "learning_rate": 6.1883210099701864e-06, "loss": 0.5372, "step": 3553 }, { "epoch": 0.44, "grad_norm": 1.83335774832791, "learning_rate": 6.186368778805628e-06, "loss": 0.4962, "step": 3554 }, { "epoch": 0.44, "grad_norm": 1.5119225106231424, "learning_rate": 6.184416355972743e-06, "loss": 0.5088, "step": 3555 }, { "epoch": 0.44, "grad_norm": 1.2784469665914808, "learning_rate": 6.182463741786965e-06, "loss": 0.5033, "step": 3556 }, { "epoch": 0.44, "grad_norm": 1.416495701529496, "learning_rate": 6.180510936563754e-06, "loss": 0.5359, "step": 3557 }, { "epoch": 0.44, "grad_norm": 1.4934566071982733, "learning_rate": 6.178557940618605e-06, "loss": 0.486, "step": 3558 }, { "epoch": 0.44, "grad_norm": 2.258013858046018, "learning_rate": 6.17660475426704e-06, "loss": 0.5424, "step": 3559 }, { "epoch": 0.44, "grad_norm": 1.4267716289763825, "learning_rate": 6.174651377824615e-06, "loss": 0.4984, "step": 3560 }, { "epoch": 0.44, "grad_norm": 1.5973749407245927, "learning_rate": 6.172697811606914e-06, "loss": 0.4614, "step": 3561 }, { "epoch": 0.44, "grad_norm": 1.4895046489791153, "learning_rate": 6.1707440559295554e-06, "loss": 0.5066, "step": 3562 }, { "epoch": 0.44, "grad_norm": 1.4827775512387729, "learning_rate": 6.1687901111081826e-06, "loss": 0.5524, "step": 3563 }, { "epoch": 0.44, "grad_norm": 1.715697107372859, "learning_rate": 6.166835977458473e-06, "loss": 0.5476, "step": 3564 }, { "epoch": 0.44, "grad_norm": 1.4663246145687245, "learning_rate": 6.1648816552961355e-06, "loss": 0.5031, "step": 3565 }, { "epoch": 0.44, "grad_norm": 1.5067132761428363, "learning_rate": 6.162927144936906e-06, "loss": 0.465, "step": 3566 }, { "epoch": 0.44, "grad_norm": 1.7021011233090093, "learning_rate": 6.1609724466965535e-06, "loss": 0.5044, "step": 3567 }, { "epoch": 0.44, "grad_norm": 1.3699920347424173, "learning_rate": 6.159017560890879e-06, "loss": 0.4359, "step": 3568 }, { "epoch": 0.44, "grad_norm": 1.40510218511984, "learning_rate": 6.15706248783571e-06, "loss": 0.4913, "step": 3569 }, { "epoch": 0.44, "grad_norm": 0.6211969399829059, "learning_rate": 6.155107227846904e-06, "loss": 0.4779, "step": 3570 }, { "epoch": 0.44, "grad_norm": 1.2953802588952739, "learning_rate": 6.153151781240352e-06, "loss": 0.4684, "step": 3571 }, { "epoch": 0.44, "grad_norm": 1.3483986543487585, "learning_rate": 6.151196148331975e-06, "loss": 0.4908, "step": 3572 }, { "epoch": 0.44, "grad_norm": 1.5473054117537224, "learning_rate": 6.1492403294377225e-06, "loss": 0.4678, "step": 3573 }, { "epoch": 0.44, "grad_norm": 1.2787212957409102, "learning_rate": 6.14728432487357e-06, "loss": 0.517, "step": 3574 }, { "epoch": 0.44, "grad_norm": 1.3290235549835527, "learning_rate": 6.145328134955533e-06, "loss": 0.504, "step": 3575 }, { "epoch": 0.44, "grad_norm": 1.4494007619059526, "learning_rate": 6.143371759999648e-06, "loss": 0.5333, "step": 3576 }, { "epoch": 0.44, "grad_norm": 1.5308204499267646, "learning_rate": 6.1414152003219854e-06, "loss": 0.5299, "step": 3577 }, { "epoch": 0.44, "grad_norm": 1.481114838188324, "learning_rate": 6.1394584562386425e-06, "loss": 0.5139, "step": 3578 }, { "epoch": 0.44, "grad_norm": 2.219964866020278, "learning_rate": 6.137501528065752e-06, "loss": 0.553, "step": 3579 }, { "epoch": 0.44, "grad_norm": 1.3323902271568275, "learning_rate": 6.1355444161194724e-06, "loss": 0.4944, "step": 3580 }, { "epoch": 0.44, "grad_norm": 2.275208807005506, "learning_rate": 6.13358712071599e-06, "loss": 0.5012, "step": 3581 }, { "epoch": 0.44, "grad_norm": 0.7100296280459837, "learning_rate": 6.131629642171526e-06, "loss": 0.4876, "step": 3582 }, { "epoch": 0.44, "grad_norm": 1.3075143321738223, "learning_rate": 6.129671980802325e-06, "loss": 0.4824, "step": 3583 }, { "epoch": 0.44, "grad_norm": 1.8781822785139812, "learning_rate": 6.127714136924667e-06, "loss": 0.4955, "step": 3584 }, { "epoch": 0.44, "grad_norm": 1.8573339867952048, "learning_rate": 6.125756110854859e-06, "loss": 0.5335, "step": 3585 }, { "epoch": 0.45, "grad_norm": 1.3234182294746977, "learning_rate": 6.123797902909236e-06, "loss": 0.5012, "step": 3586 }, { "epoch": 0.45, "grad_norm": 1.5037302731280628, "learning_rate": 6.121839513404163e-06, "loss": 0.5497, "step": 3587 }, { "epoch": 0.45, "grad_norm": 1.2704736827188403, "learning_rate": 6.119880942656038e-06, "loss": 0.5592, "step": 3588 }, { "epoch": 0.45, "grad_norm": 1.751140929108546, "learning_rate": 6.117922190981282e-06, "loss": 0.513, "step": 3589 }, { "epoch": 0.45, "grad_norm": 1.552318521249153, "learning_rate": 6.1159632586963524e-06, "loss": 0.5248, "step": 3590 }, { "epoch": 0.45, "grad_norm": 1.4367054279952804, "learning_rate": 6.114004146117729e-06, "loss": 0.5626, "step": 3591 }, { "epoch": 0.45, "grad_norm": 1.2107766082355556, "learning_rate": 6.112044853561925e-06, "loss": 0.4458, "step": 3592 }, { "epoch": 0.45, "grad_norm": 1.4829866654121693, "learning_rate": 6.11008538134548e-06, "loss": 0.5067, "step": 3593 }, { "epoch": 0.45, "grad_norm": 1.5846189280659801, "learning_rate": 6.108125729784964e-06, "loss": 0.5742, "step": 3594 }, { "epoch": 0.45, "grad_norm": 1.5089263131720982, "learning_rate": 6.106165899196978e-06, "loss": 0.5726, "step": 3595 }, { "epoch": 0.45, "grad_norm": 1.4150877058823383, "learning_rate": 6.1042058898981484e-06, "loss": 0.5297, "step": 3596 }, { "epoch": 0.45, "grad_norm": 1.4547063878965534, "learning_rate": 6.1022457022051325e-06, "loss": 0.5352, "step": 3597 }, { "epoch": 0.45, "grad_norm": 1.3973769343405766, "learning_rate": 6.100285336434616e-06, "loss": 0.5057, "step": 3598 }, { "epoch": 0.45, "grad_norm": 1.403887011962654, "learning_rate": 6.098324792903313e-06, "loss": 0.4837, "step": 3599 }, { "epoch": 0.45, "grad_norm": 1.2463330225672213, "learning_rate": 6.096364071927966e-06, "loss": 0.4729, "step": 3600 }, { "epoch": 0.45, "grad_norm": 2.050882947847235, "learning_rate": 6.094403173825348e-06, "loss": 0.4663, "step": 3601 }, { "epoch": 0.45, "grad_norm": 1.611584775857751, "learning_rate": 6.0924420989122565e-06, "loss": 0.5078, "step": 3602 }, { "epoch": 0.45, "grad_norm": 1.5437249180480355, "learning_rate": 6.0904808475055246e-06, "loss": 0.5367, "step": 3603 }, { "epoch": 0.45, "grad_norm": 2.138459892149861, "learning_rate": 6.088519419922008e-06, "loss": 0.4932, "step": 3604 }, { "epoch": 0.45, "grad_norm": 0.6913666855531088, "learning_rate": 6.086557816478591e-06, "loss": 0.5108, "step": 3605 }, { "epoch": 0.45, "grad_norm": 1.5783740456870101, "learning_rate": 6.08459603749219e-06, "loss": 0.5018, "step": 3606 }, { "epoch": 0.45, "grad_norm": 1.2359527048762666, "learning_rate": 6.082634083279746e-06, "loss": 0.4702, "step": 3607 }, { "epoch": 0.45, "grad_norm": 1.4654275615242967, "learning_rate": 6.080671954158232e-06, "loss": 0.5437, "step": 3608 }, { "epoch": 0.45, "grad_norm": 2.272816051175789, "learning_rate": 6.0787096504446465e-06, "loss": 0.4884, "step": 3609 }, { "epoch": 0.45, "grad_norm": 1.5154753911364525, "learning_rate": 6.076747172456016e-06, "loss": 0.5131, "step": 3610 }, { "epoch": 0.45, "grad_norm": 1.491605697034516, "learning_rate": 6.074784520509395e-06, "loss": 0.5344, "step": 3611 }, { "epoch": 0.45, "grad_norm": 0.6704317411138851, "learning_rate": 6.07282169492187e-06, "loss": 0.517, "step": 3612 }, { "epoch": 0.45, "grad_norm": 2.2193079275691656, "learning_rate": 6.070858696010552e-06, "loss": 0.5255, "step": 3613 }, { "epoch": 0.45, "grad_norm": 1.442941458805938, "learning_rate": 6.068895524092581e-06, "loss": 0.5055, "step": 3614 }, { "epoch": 0.45, "grad_norm": 1.4500045510329642, "learning_rate": 6.066932179485122e-06, "loss": 0.544, "step": 3615 }, { "epoch": 0.45, "grad_norm": 1.243790000007731, "learning_rate": 6.064968662505374e-06, "loss": 0.501, "step": 3616 }, { "epoch": 0.45, "grad_norm": 1.3412828250391715, "learning_rate": 6.06300497347056e-06, "loss": 0.5653, "step": 3617 }, { "epoch": 0.45, "grad_norm": 1.2171866043917068, "learning_rate": 6.06104111269793e-06, "loss": 0.3964, "step": 3618 }, { "epoch": 0.45, "grad_norm": 1.3164204240625614, "learning_rate": 6.059077080504761e-06, "loss": 0.4738, "step": 3619 }, { "epoch": 0.45, "grad_norm": 1.424702659928036, "learning_rate": 6.057112877208364e-06, "loss": 0.5737, "step": 3620 }, { "epoch": 0.45, "grad_norm": 1.4070668142198544, "learning_rate": 6.0551485031260725e-06, "loss": 0.4949, "step": 3621 }, { "epoch": 0.45, "grad_norm": 1.3408711147549046, "learning_rate": 6.053183958575246e-06, "loss": 0.4988, "step": 3622 }, { "epoch": 0.45, "grad_norm": 3.8979002790542197, "learning_rate": 6.051219243873275e-06, "loss": 0.4505, "step": 3623 }, { "epoch": 0.45, "grad_norm": 1.3990509882706048, "learning_rate": 6.049254359337578e-06, "loss": 0.5486, "step": 3624 }, { "epoch": 0.45, "grad_norm": 1.3622160719714407, "learning_rate": 6.0472893052855985e-06, "loss": 0.4876, "step": 3625 }, { "epoch": 0.45, "grad_norm": 1.3103141054320948, "learning_rate": 6.0453240820348066e-06, "loss": 0.4933, "step": 3626 }, { "epoch": 0.45, "grad_norm": 1.5448586835372746, "learning_rate": 6.043358689902704e-06, "loss": 0.5565, "step": 3627 }, { "epoch": 0.45, "grad_norm": 1.740198154341534, "learning_rate": 6.041393129206816e-06, "loss": 0.5272, "step": 3628 }, { "epoch": 0.45, "grad_norm": 1.4284041583081468, "learning_rate": 6.0394274002646965e-06, "loss": 0.4735, "step": 3629 }, { "epoch": 0.45, "grad_norm": 1.502198490514338, "learning_rate": 6.037461503393925e-06, "loss": 0.5462, "step": 3630 }, { "epoch": 0.45, "grad_norm": 1.2648890363953325, "learning_rate": 6.03549543891211e-06, "loss": 0.5275, "step": 3631 }, { "epoch": 0.45, "grad_norm": 1.4609835118631087, "learning_rate": 6.033529207136888e-06, "loss": 0.5122, "step": 3632 }, { "epoch": 0.45, "grad_norm": 1.5404894439925987, "learning_rate": 6.03156280838592e-06, "loss": 0.5405, "step": 3633 }, { "epoch": 0.45, "grad_norm": 0.681033615151205, "learning_rate": 6.029596242976895e-06, "loss": 0.5145, "step": 3634 }, { "epoch": 0.45, "grad_norm": 1.2585368863622328, "learning_rate": 6.02762951122753e-06, "loss": 0.5044, "step": 3635 }, { "epoch": 0.45, "grad_norm": 1.6716525797681374, "learning_rate": 6.025662613455566e-06, "loss": 0.5353, "step": 3636 }, { "epoch": 0.45, "grad_norm": 2.690724018178695, "learning_rate": 6.023695549978774e-06, "loss": 0.5276, "step": 3637 }, { "epoch": 0.45, "grad_norm": 0.6397336826076158, "learning_rate": 6.021728321114949e-06, "loss": 0.4837, "step": 3638 }, { "epoch": 0.45, "grad_norm": 1.333082952090204, "learning_rate": 6.0197609271819145e-06, "loss": 0.5052, "step": 3639 }, { "epoch": 0.45, "grad_norm": 1.638944018469146, "learning_rate": 6.017793368497523e-06, "loss": 0.548, "step": 3640 }, { "epoch": 0.45, "grad_norm": 1.3767723047356812, "learning_rate": 6.0158256453796485e-06, "loss": 0.492, "step": 3641 }, { "epoch": 0.45, "grad_norm": 1.9370306650773428, "learning_rate": 6.013857758146193e-06, "loss": 0.538, "step": 3642 }, { "epoch": 0.45, "grad_norm": 1.3766171654282706, "learning_rate": 6.01188970711509e-06, "loss": 0.47, "step": 3643 }, { "epoch": 0.45, "grad_norm": 1.555917116487695, "learning_rate": 6.0099214926042905e-06, "loss": 0.5003, "step": 3644 }, { "epoch": 0.45, "grad_norm": 3.9513733336041112, "learning_rate": 6.00795311493178e-06, "loss": 0.5294, "step": 3645 }, { "epoch": 0.45, "grad_norm": 1.4435089295034635, "learning_rate": 6.005984574415568e-06, "loss": 0.5174, "step": 3646 }, { "epoch": 0.45, "grad_norm": 1.3801760935816803, "learning_rate": 6.0040158713736865e-06, "loss": 0.5404, "step": 3647 }, { "epoch": 0.45, "grad_norm": 1.5837482763871387, "learning_rate": 6.002047006124198e-06, "loss": 0.5395, "step": 3648 }, { "epoch": 0.45, "grad_norm": 1.7192576032754558, "learning_rate": 6.000077978985191e-06, "loss": 0.4661, "step": 3649 }, { "epoch": 0.45, "grad_norm": 1.3025945963336203, "learning_rate": 5.9981087902747785e-06, "loss": 0.5132, "step": 3650 }, { "epoch": 0.45, "grad_norm": 1.477148566837164, "learning_rate": 5.996139440311099e-06, "loss": 0.5258, "step": 3651 }, { "epoch": 0.45, "grad_norm": 1.2595738614799532, "learning_rate": 5.994169929412323e-06, "loss": 0.4461, "step": 3652 }, { "epoch": 0.45, "grad_norm": 1.2587053512253195, "learning_rate": 5.9922002578966364e-06, "loss": 0.5135, "step": 3653 }, { "epoch": 0.45, "grad_norm": 1.4674379833778877, "learning_rate": 5.990230426082261e-06, "loss": 0.552, "step": 3654 }, { "epoch": 0.45, "grad_norm": 1.5625476515475012, "learning_rate": 5.988260434287438e-06, "loss": 0.5393, "step": 3655 }, { "epoch": 0.45, "grad_norm": 1.2436803191403425, "learning_rate": 5.986290282830438e-06, "loss": 0.5203, "step": 3656 }, { "epoch": 0.45, "grad_norm": 1.4690916234746247, "learning_rate": 5.984319972029558e-06, "loss": 0.5051, "step": 3657 }, { "epoch": 0.45, "grad_norm": 1.4158183487114746, "learning_rate": 5.982349502203116e-06, "loss": 0.537, "step": 3658 }, { "epoch": 0.45, "grad_norm": 0.6914721822117508, "learning_rate": 5.98037887366946e-06, "loss": 0.5228, "step": 3659 }, { "epoch": 0.45, "grad_norm": 1.3885647159507815, "learning_rate": 5.978408086746962e-06, "loss": 0.4751, "step": 3660 }, { "epoch": 0.45, "grad_norm": 3.0146078697076937, "learning_rate": 5.976437141754021e-06, "loss": 0.4981, "step": 3661 }, { "epoch": 0.45, "grad_norm": 0.613300758549818, "learning_rate": 5.974466039009063e-06, "loss": 0.4567, "step": 3662 }, { "epoch": 0.45, "grad_norm": 2.2454938124351362, "learning_rate": 5.972494778830531e-06, "loss": 0.4523, "step": 3663 }, { "epoch": 0.45, "grad_norm": 1.391952792088452, "learning_rate": 5.970523361536903e-06, "loss": 0.4386, "step": 3664 }, { "epoch": 0.45, "grad_norm": 1.470221657599158, "learning_rate": 5.968551787446678e-06, "loss": 0.5198, "step": 3665 }, { "epoch": 0.45, "grad_norm": 1.5239604080432068, "learning_rate": 5.966580056878382e-06, "loss": 0.5177, "step": 3666 }, { "epoch": 0.46, "grad_norm": 1.3258176562134478, "learning_rate": 5.9646081701505654e-06, "loss": 0.4909, "step": 3667 }, { "epoch": 0.46, "grad_norm": 1.7142755784727002, "learning_rate": 5.962636127581802e-06, "loss": 0.4909, "step": 3668 }, { "epoch": 0.46, "grad_norm": 1.484667936557781, "learning_rate": 5.960663929490696e-06, "loss": 0.5158, "step": 3669 }, { "epoch": 0.46, "grad_norm": 2.0552574632383447, "learning_rate": 5.95869157619587e-06, "loss": 0.4578, "step": 3670 }, { "epoch": 0.46, "grad_norm": 1.592695773175072, "learning_rate": 5.956719068015977e-06, "loss": 0.5533, "step": 3671 }, { "epoch": 0.46, "grad_norm": 2.2148265940903142, "learning_rate": 5.954746405269692e-06, "loss": 0.493, "step": 3672 }, { "epoch": 0.46, "grad_norm": 1.479906503336225, "learning_rate": 5.952773588275718e-06, "loss": 0.5627, "step": 3673 }, { "epoch": 0.46, "grad_norm": 1.4811317212396697, "learning_rate": 5.950800617352778e-06, "loss": 0.5211, "step": 3674 }, { "epoch": 0.46, "grad_norm": 1.4535882719954414, "learning_rate": 5.9488274928196245e-06, "loss": 0.4846, "step": 3675 }, { "epoch": 0.46, "grad_norm": 1.8321506613837863, "learning_rate": 5.946854214995032e-06, "loss": 0.4663, "step": 3676 }, { "epoch": 0.46, "grad_norm": 1.6928171693718244, "learning_rate": 5.9448807841978044e-06, "loss": 0.5501, "step": 3677 }, { "epoch": 0.46, "grad_norm": 1.3754667256719582, "learning_rate": 5.942907200746762e-06, "loss": 0.4814, "step": 3678 }, { "epoch": 0.46, "grad_norm": 1.6116269924251991, "learning_rate": 5.940933464960759e-06, "loss": 0.5573, "step": 3679 }, { "epoch": 0.46, "grad_norm": 1.3579375286635698, "learning_rate": 5.9389595771586675e-06, "loss": 0.4808, "step": 3680 }, { "epoch": 0.46, "grad_norm": 2.3432840262996693, "learning_rate": 5.936985537659385e-06, "loss": 0.49, "step": 3681 }, { "epoch": 0.46, "grad_norm": 1.9449076892590997, "learning_rate": 5.93501134678184e-06, "loss": 0.4858, "step": 3682 }, { "epoch": 0.46, "grad_norm": 1.712462555171365, "learning_rate": 5.9330370048449736e-06, "loss": 0.5005, "step": 3683 }, { "epoch": 0.46, "grad_norm": 1.4118247291797974, "learning_rate": 5.9310625121677636e-06, "loss": 0.5104, "step": 3684 }, { "epoch": 0.46, "grad_norm": 1.324781398126362, "learning_rate": 5.929087869069202e-06, "loss": 0.4799, "step": 3685 }, { "epoch": 0.46, "grad_norm": 1.2617013365478964, "learning_rate": 5.927113075868315e-06, "loss": 0.5406, "step": 3686 }, { "epoch": 0.46, "grad_norm": 1.5523795998609873, "learning_rate": 5.925138132884145e-06, "loss": 0.5128, "step": 3687 }, { "epoch": 0.46, "grad_norm": 1.4891053513027392, "learning_rate": 5.923163040435762e-06, "loss": 0.5259, "step": 3688 }, { "epoch": 0.46, "grad_norm": 1.459970627139374, "learning_rate": 5.921187798842258e-06, "loss": 0.5568, "step": 3689 }, { "epoch": 0.46, "grad_norm": 1.4923420303638777, "learning_rate": 5.9192124084227534e-06, "loss": 0.538, "step": 3690 }, { "epoch": 0.46, "grad_norm": 1.4986361120402343, "learning_rate": 5.917236869496388e-06, "loss": 0.5481, "step": 3691 }, { "epoch": 0.46, "grad_norm": 2.3799188836426888, "learning_rate": 5.915261182382328e-06, "loss": 0.5267, "step": 3692 }, { "epoch": 0.46, "grad_norm": 1.4245940989096637, "learning_rate": 5.913285347399762e-06, "loss": 0.5152, "step": 3693 }, { "epoch": 0.46, "grad_norm": 1.752625008923162, "learning_rate": 5.9113093648679065e-06, "loss": 0.5141, "step": 3694 }, { "epoch": 0.46, "grad_norm": 1.6379456852940069, "learning_rate": 5.909333235105996e-06, "loss": 0.5097, "step": 3695 }, { "epoch": 0.46, "grad_norm": 1.4584429688128662, "learning_rate": 5.907356958433292e-06, "loss": 0.4911, "step": 3696 }, { "epoch": 0.46, "grad_norm": 1.441952787689109, "learning_rate": 5.905380535169082e-06, "loss": 0.5118, "step": 3697 }, { "epoch": 0.46, "grad_norm": 1.5176438222118591, "learning_rate": 5.903403965632672e-06, "loss": 0.4929, "step": 3698 }, { "epoch": 0.46, "grad_norm": 1.2751446770162105, "learning_rate": 5.9014272501433945e-06, "loss": 0.4591, "step": 3699 }, { "epoch": 0.46, "grad_norm": 1.404506261471304, "learning_rate": 5.899450389020605e-06, "loss": 0.516, "step": 3700 }, { "epoch": 0.46, "grad_norm": 1.3889948229171092, "learning_rate": 5.897473382583684e-06, "loss": 0.4677, "step": 3701 }, { "epoch": 0.46, "grad_norm": 2.2631815193287492, "learning_rate": 5.895496231152033e-06, "loss": 0.5102, "step": 3702 }, { "epoch": 0.46, "grad_norm": 1.5626518136918286, "learning_rate": 5.893518935045081e-06, "loss": 0.5141, "step": 3703 }, { "epoch": 0.46, "grad_norm": 1.4104502177968783, "learning_rate": 5.891541494582274e-06, "loss": 0.5233, "step": 3704 }, { "epoch": 0.46, "grad_norm": 1.6254946366913998, "learning_rate": 5.889563910083087e-06, "loss": 0.5165, "step": 3705 }, { "epoch": 0.46, "grad_norm": 1.3610756542467268, "learning_rate": 5.887586181867015e-06, "loss": 0.5031, "step": 3706 }, { "epoch": 0.46, "grad_norm": 1.4098997632065402, "learning_rate": 5.885608310253581e-06, "loss": 0.5532, "step": 3707 }, { "epoch": 0.46, "grad_norm": 1.5357593910677314, "learning_rate": 5.8836302955623225e-06, "loss": 0.5059, "step": 3708 }, { "epoch": 0.46, "grad_norm": 1.822301409849621, "learning_rate": 5.881652138112808e-06, "loss": 0.5389, "step": 3709 }, { "epoch": 0.46, "grad_norm": 1.4528299534532378, "learning_rate": 5.879673838224625e-06, "loss": 0.5259, "step": 3710 }, { "epoch": 0.46, "grad_norm": 0.6502222787439851, "learning_rate": 5.877695396217386e-06, "loss": 0.4961, "step": 3711 }, { "epoch": 0.46, "grad_norm": 1.5887911511047892, "learning_rate": 5.875716812410727e-06, "loss": 0.5187, "step": 3712 }, { "epoch": 0.46, "grad_norm": 1.6127291879447092, "learning_rate": 5.873738087124302e-06, "loss": 0.5522, "step": 3713 }, { "epoch": 0.46, "grad_norm": 2.3110231020898824, "learning_rate": 5.871759220677795e-06, "loss": 0.4711, "step": 3714 }, { "epoch": 0.46, "grad_norm": 1.2923517567560525, "learning_rate": 5.8697802133909085e-06, "loss": 0.494, "step": 3715 }, { "epoch": 0.46, "grad_norm": 2.0171625532417607, "learning_rate": 5.867801065583369e-06, "loss": 0.5045, "step": 3716 }, { "epoch": 0.46, "grad_norm": 1.459700446720111, "learning_rate": 5.865821777574922e-06, "loss": 0.4705, "step": 3717 }, { "epoch": 0.46, "grad_norm": 2.238435685941135, "learning_rate": 5.863842349685344e-06, "loss": 0.5557, "step": 3718 }, { "epoch": 0.46, "grad_norm": 1.3170644542756293, "learning_rate": 5.861862782234425e-06, "loss": 0.5146, "step": 3719 }, { "epoch": 0.46, "grad_norm": 1.77112402196552, "learning_rate": 5.8598830755419835e-06, "loss": 0.5185, "step": 3720 }, { "epoch": 0.46, "grad_norm": 1.4227671148027017, "learning_rate": 5.8579032299278585e-06, "loss": 0.547, "step": 3721 }, { "epoch": 0.46, "grad_norm": 4.8600426263452965, "learning_rate": 5.855923245711909e-06, "loss": 0.4657, "step": 3722 }, { "epoch": 0.46, "grad_norm": 1.8078423840950666, "learning_rate": 5.853943123214022e-06, "loss": 0.4963, "step": 3723 }, { "epoch": 0.46, "grad_norm": 1.3140178472040205, "learning_rate": 5.851962862754103e-06, "loss": 0.4336, "step": 3724 }, { "epoch": 0.46, "grad_norm": 1.3518973598901456, "learning_rate": 5.84998246465208e-06, "loss": 0.4447, "step": 3725 }, { "epoch": 0.46, "grad_norm": 2.6492949452638737, "learning_rate": 5.848001929227902e-06, "loss": 0.5199, "step": 3726 }, { "epoch": 0.46, "grad_norm": 1.7079034101337343, "learning_rate": 5.846021256801546e-06, "loss": 0.4598, "step": 3727 }, { "epoch": 0.46, "grad_norm": 1.3625776176116, "learning_rate": 5.844040447693004e-06, "loss": 0.4952, "step": 3728 }, { "epoch": 0.46, "grad_norm": 1.5736507105134774, "learning_rate": 5.842059502222295e-06, "loss": 0.5335, "step": 3729 }, { "epoch": 0.46, "grad_norm": 1.3900575889574893, "learning_rate": 5.840078420709456e-06, "loss": 0.4571, "step": 3730 }, { "epoch": 0.46, "grad_norm": 1.3081077443249847, "learning_rate": 5.838097203474549e-06, "loss": 0.5325, "step": 3731 }, { "epoch": 0.46, "grad_norm": 1.4149913690397518, "learning_rate": 5.8361158508376584e-06, "loss": 0.5263, "step": 3732 }, { "epoch": 0.46, "grad_norm": 1.4227367199239105, "learning_rate": 5.834134363118889e-06, "loss": 0.554, "step": 3733 }, { "epoch": 0.46, "grad_norm": 1.3659653418941065, "learning_rate": 5.832152740638366e-06, "loss": 0.5594, "step": 3734 }, { "epoch": 0.46, "grad_norm": 1.4498815236131395, "learning_rate": 5.8301709837162375e-06, "loss": 0.4839, "step": 3735 }, { "epoch": 0.46, "grad_norm": 0.6379826138444108, "learning_rate": 5.828189092672677e-06, "loss": 0.4814, "step": 3736 }, { "epoch": 0.46, "grad_norm": 1.1874404120184479, "learning_rate": 5.826207067827874e-06, "loss": 0.4355, "step": 3737 }, { "epoch": 0.46, "grad_norm": 1.1896968083972823, "learning_rate": 5.824224909502042e-06, "loss": 0.5023, "step": 3738 }, { "epoch": 0.46, "grad_norm": 1.28469351928837, "learning_rate": 5.822242618015417e-06, "loss": 0.4747, "step": 3739 }, { "epoch": 0.46, "grad_norm": 1.3405004682609176, "learning_rate": 5.8202601936882565e-06, "loss": 0.4912, "step": 3740 }, { "epoch": 0.46, "grad_norm": 1.521509508415757, "learning_rate": 5.8182776368408365e-06, "loss": 0.5042, "step": 3741 }, { "epoch": 0.46, "grad_norm": 1.4304713086594372, "learning_rate": 5.816294947793457e-06, "loss": 0.4857, "step": 3742 }, { "epoch": 0.46, "grad_norm": 1.407516635409624, "learning_rate": 5.814312126866441e-06, "loss": 0.4815, "step": 3743 }, { "epoch": 0.46, "grad_norm": 3.374491776202101, "learning_rate": 5.812329174380128e-06, "loss": 0.4992, "step": 3744 }, { "epoch": 0.46, "grad_norm": 1.2658042191199907, "learning_rate": 5.810346090654883e-06, "loss": 0.5197, "step": 3745 }, { "epoch": 0.46, "grad_norm": 1.589948736983883, "learning_rate": 5.808362876011089e-06, "loss": 0.5338, "step": 3746 }, { "epoch": 0.47, "grad_norm": 1.5472017582413746, "learning_rate": 5.806379530769154e-06, "loss": 0.5497, "step": 3747 }, { "epoch": 0.47, "grad_norm": 1.4941205729620681, "learning_rate": 5.804396055249504e-06, "loss": 0.4912, "step": 3748 }, { "epoch": 0.47, "grad_norm": 1.2873758547493115, "learning_rate": 5.802412449772585e-06, "loss": 0.4737, "step": 3749 }, { "epoch": 0.47, "grad_norm": 1.9862356925132492, "learning_rate": 5.800428714658869e-06, "loss": 0.5114, "step": 3750 }, { "epoch": 0.47, "grad_norm": 1.2015546976438813, "learning_rate": 5.7984448502288455e-06, "loss": 0.5073, "step": 3751 }, { "epoch": 0.47, "grad_norm": 1.2466761880916173, "learning_rate": 5.796460856803024e-06, "loss": 0.4497, "step": 3752 }, { "epoch": 0.47, "grad_norm": 1.4910622887566962, "learning_rate": 5.794476734701936e-06, "loss": 0.5127, "step": 3753 }, { "epoch": 0.47, "grad_norm": 1.290204703858359, "learning_rate": 5.792492484246134e-06, "loss": 0.4756, "step": 3754 }, { "epoch": 0.47, "grad_norm": 1.4159973156842203, "learning_rate": 5.790508105756192e-06, "loss": 0.4534, "step": 3755 }, { "epoch": 0.47, "grad_norm": 1.4655772184973017, "learning_rate": 5.788523599552704e-06, "loss": 0.4617, "step": 3756 }, { "epoch": 0.47, "grad_norm": 1.63522079548207, "learning_rate": 5.7865389659562835e-06, "loss": 0.5522, "step": 3757 }, { "epoch": 0.47, "grad_norm": 1.342507328459723, "learning_rate": 5.784554205287564e-06, "loss": 0.5328, "step": 3758 }, { "epoch": 0.47, "grad_norm": 1.3710636892525927, "learning_rate": 5.782569317867204e-06, "loss": 0.4799, "step": 3759 }, { "epoch": 0.47, "grad_norm": 1.238963629696016, "learning_rate": 5.780584304015879e-06, "loss": 0.4872, "step": 3760 }, { "epoch": 0.47, "grad_norm": 5.516859888821994, "learning_rate": 5.778599164054286e-06, "loss": 0.5175, "step": 3761 }, { "epoch": 0.47, "grad_norm": 1.5242376957130266, "learning_rate": 5.77661389830314e-06, "loss": 0.5162, "step": 3762 }, { "epoch": 0.47, "grad_norm": 1.5959688209328888, "learning_rate": 5.774628507083178e-06, "loss": 0.478, "step": 3763 }, { "epoch": 0.47, "grad_norm": 1.5390937579591093, "learning_rate": 5.772642990715158e-06, "loss": 0.549, "step": 3764 }, { "epoch": 0.47, "grad_norm": 1.5459986321471937, "learning_rate": 5.7706573495198595e-06, "loss": 0.4824, "step": 3765 }, { "epoch": 0.47, "grad_norm": 1.7485615328310886, "learning_rate": 5.768671583818077e-06, "loss": 0.5419, "step": 3766 }, { "epoch": 0.47, "grad_norm": 1.6617800056061585, "learning_rate": 5.7666856939306316e-06, "loss": 0.5149, "step": 3767 }, { "epoch": 0.47, "grad_norm": 2.035412095338443, "learning_rate": 5.7646996801783595e-06, "loss": 0.5039, "step": 3768 }, { "epoch": 0.47, "grad_norm": 1.2570451162662535, "learning_rate": 5.762713542882119e-06, "loss": 0.514, "step": 3769 }, { "epoch": 0.47, "grad_norm": 1.5096625637139065, "learning_rate": 5.76072728236279e-06, "loss": 0.4711, "step": 3770 }, { "epoch": 0.47, "grad_norm": 1.4769654769939338, "learning_rate": 5.7587408989412666e-06, "loss": 0.4925, "step": 3771 }, { "epoch": 0.47, "grad_norm": 2.7859026735626404, "learning_rate": 5.7567543929384696e-06, "loss": 0.4986, "step": 3772 }, { "epoch": 0.47, "grad_norm": 1.7966181544995699, "learning_rate": 5.754767764675335e-06, "loss": 0.4997, "step": 3773 }, { "epoch": 0.47, "grad_norm": 1.4071480749895844, "learning_rate": 5.752781014472822e-06, "loss": 0.482, "step": 3774 }, { "epoch": 0.47, "grad_norm": 1.3596223913000691, "learning_rate": 5.750794142651904e-06, "loss": 0.5223, "step": 3775 }, { "epoch": 0.47, "grad_norm": 1.4904250217919546, "learning_rate": 5.74880714953358e-06, "loss": 0.517, "step": 3776 }, { "epoch": 0.47, "grad_norm": 1.9235479357932799, "learning_rate": 5.746820035438868e-06, "loss": 0.5683, "step": 3777 }, { "epoch": 0.47, "grad_norm": 1.8101897413589039, "learning_rate": 5.744832800688801e-06, "loss": 0.4943, "step": 3778 }, { "epoch": 0.47, "grad_norm": 2.255858221208562, "learning_rate": 5.742845445604436e-06, "loss": 0.5104, "step": 3779 }, { "epoch": 0.47, "grad_norm": 0.7438680744054342, "learning_rate": 5.740857970506846e-06, "loss": 0.5296, "step": 3780 }, { "epoch": 0.47, "grad_norm": 1.9525014070396836, "learning_rate": 5.738870375717125e-06, "loss": 0.5288, "step": 3781 }, { "epoch": 0.47, "grad_norm": 1.5688073783490026, "learning_rate": 5.736882661556389e-06, "loss": 0.5331, "step": 3782 }, { "epoch": 0.47, "grad_norm": 1.4595580168795548, "learning_rate": 5.7348948283457675e-06, "loss": 0.5149, "step": 3783 }, { "epoch": 0.47, "grad_norm": 2.3998571453059667, "learning_rate": 5.7329068764064155e-06, "loss": 0.4991, "step": 3784 }, { "epoch": 0.47, "grad_norm": 1.538385560945485, "learning_rate": 5.730918806059502e-06, "loss": 0.5069, "step": 3785 }, { "epoch": 0.47, "grad_norm": 1.5397070432129656, "learning_rate": 5.728930617626218e-06, "loss": 0.5278, "step": 3786 }, { "epoch": 0.47, "grad_norm": 0.64968775068539, "learning_rate": 5.7269423114277745e-06, "loss": 0.5205, "step": 3787 }, { "epoch": 0.47, "grad_norm": 1.783297417690423, "learning_rate": 5.724953887785396e-06, "loss": 0.4983, "step": 3788 }, { "epoch": 0.47, "grad_norm": 3.8262445860846808, "learning_rate": 5.722965347020334e-06, "loss": 0.4443, "step": 3789 }, { "epoch": 0.47, "grad_norm": 1.673647777908011, "learning_rate": 5.7209766894538525e-06, "loss": 0.5117, "step": 3790 }, { "epoch": 0.47, "grad_norm": 1.5446791654566836, "learning_rate": 5.718987915407235e-06, "loss": 0.4558, "step": 3791 }, { "epoch": 0.47, "grad_norm": 1.7108875475636238, "learning_rate": 5.716999025201789e-06, "loss": 0.518, "step": 3792 }, { "epoch": 0.47, "grad_norm": 1.830804067213509, "learning_rate": 5.715010019158835e-06, "loss": 0.5068, "step": 3793 }, { "epoch": 0.47, "grad_norm": 1.4892383342831381, "learning_rate": 5.713020897599717e-06, "loss": 0.5711, "step": 3794 }, { "epoch": 0.47, "grad_norm": 1.6181834703601836, "learning_rate": 5.7110316608457905e-06, "loss": 0.492, "step": 3795 }, { "epoch": 0.47, "grad_norm": 10.807414076229394, "learning_rate": 5.70904230921844e-06, "loss": 0.4895, "step": 3796 }, { "epoch": 0.47, "grad_norm": 1.3334993049574357, "learning_rate": 5.7070528430390585e-06, "loss": 0.4797, "step": 3797 }, { "epoch": 0.47, "grad_norm": 1.481416961209663, "learning_rate": 5.705063262629062e-06, "loss": 0.521, "step": 3798 }, { "epoch": 0.47, "grad_norm": 1.5103932214495144, "learning_rate": 5.703073568309888e-06, "loss": 0.4994, "step": 3799 }, { "epoch": 0.47, "grad_norm": 1.697328962339054, "learning_rate": 5.701083760402984e-06, "loss": 0.552, "step": 3800 }, { "epoch": 0.47, "grad_norm": 1.4739255860006288, "learning_rate": 5.699093839229826e-06, "loss": 0.4767, "step": 3801 }, { "epoch": 0.47, "grad_norm": 1.634062539340076, "learning_rate": 5.697103805111901e-06, "loss": 0.4903, "step": 3802 }, { "epoch": 0.47, "grad_norm": 1.3965792045845735, "learning_rate": 5.695113658370717e-06, "loss": 0.5166, "step": 3803 }, { "epoch": 0.47, "grad_norm": 1.3024963936312466, "learning_rate": 5.693123399327798e-06, "loss": 0.5122, "step": 3804 }, { "epoch": 0.47, "grad_norm": 1.2964010800289418, "learning_rate": 5.691133028304691e-06, "loss": 0.4602, "step": 3805 }, { "epoch": 0.47, "grad_norm": 1.4052206110404424, "learning_rate": 5.689142545622955e-06, "loss": 0.5785, "step": 3806 }, { "epoch": 0.47, "grad_norm": 1.5247851003929445, "learning_rate": 5.687151951604173e-06, "loss": 0.5741, "step": 3807 }, { "epoch": 0.47, "grad_norm": 1.3450074532349305, "learning_rate": 5.685161246569939e-06, "loss": 0.4638, "step": 3808 }, { "epoch": 0.47, "grad_norm": 1.5339496052297188, "learning_rate": 5.6831704308418726e-06, "loss": 0.4924, "step": 3809 }, { "epoch": 0.47, "grad_norm": 1.3515665945195043, "learning_rate": 5.681179504741606e-06, "loss": 0.4642, "step": 3810 }, { "epoch": 0.47, "grad_norm": 1.5465504956664637, "learning_rate": 5.679188468590792e-06, "loss": 0.5482, "step": 3811 }, { "epoch": 0.47, "grad_norm": 1.4018583974536207, "learning_rate": 5.6771973227110976e-06, "loss": 0.526, "step": 3812 }, { "epoch": 0.47, "grad_norm": 1.2934414393005258, "learning_rate": 5.675206067424212e-06, "loss": 0.459, "step": 3813 }, { "epoch": 0.47, "grad_norm": 1.315842288116888, "learning_rate": 5.67321470305184e-06, "loss": 0.4843, "step": 3814 }, { "epoch": 0.47, "grad_norm": 1.3248801202211313, "learning_rate": 5.671223229915705e-06, "loss": 0.5124, "step": 3815 }, { "epoch": 0.47, "grad_norm": 1.2614354452804377, "learning_rate": 5.669231648337543e-06, "loss": 0.511, "step": 3816 }, { "epoch": 0.47, "grad_norm": 1.4200889081262933, "learning_rate": 5.667239958639114e-06, "loss": 0.4547, "step": 3817 }, { "epoch": 0.47, "grad_norm": 1.4868397770474286, "learning_rate": 5.665248161142196e-06, "loss": 0.4711, "step": 3818 }, { "epoch": 0.47, "grad_norm": 1.4873342914262366, "learning_rate": 5.663256256168577e-06, "loss": 0.5622, "step": 3819 }, { "epoch": 0.47, "grad_norm": 1.5527944075539648, "learning_rate": 5.661264244040067e-06, "loss": 0.5585, "step": 3820 }, { "epoch": 0.47, "grad_norm": 1.455195628107584, "learning_rate": 5.659272125078495e-06, "loss": 0.5281, "step": 3821 }, { "epoch": 0.47, "grad_norm": 1.4272424919821816, "learning_rate": 5.6572798996057065e-06, "loss": 0.4699, "step": 3822 }, { "epoch": 0.47, "grad_norm": 1.4537711024353266, "learning_rate": 5.65528756794356e-06, "loss": 0.526, "step": 3823 }, { "epoch": 0.47, "grad_norm": 1.7239883379658023, "learning_rate": 5.653295130413937e-06, "loss": 0.501, "step": 3824 }, { "epoch": 0.47, "grad_norm": 1.388762025067527, "learning_rate": 5.651302587338732e-06, "loss": 0.465, "step": 3825 }, { "epoch": 0.47, "grad_norm": 1.605357518243063, "learning_rate": 5.649309939039856e-06, "loss": 0.5448, "step": 3826 }, { "epoch": 0.47, "grad_norm": 1.3004710250305824, "learning_rate": 5.647317185839243e-06, "loss": 0.5147, "step": 3827 }, { "epoch": 0.48, "grad_norm": 1.2453765452625825, "learning_rate": 5.645324328058834e-06, "loss": 0.4939, "step": 3828 }, { "epoch": 0.48, "grad_norm": 1.6630221600508244, "learning_rate": 5.643331366020599e-06, "loss": 0.5313, "step": 3829 }, { "epoch": 0.48, "grad_norm": 2.1403267643695534, "learning_rate": 5.641338300046516e-06, "loss": 0.5347, "step": 3830 }, { "epoch": 0.48, "grad_norm": 1.6645397725384086, "learning_rate": 5.639345130458582e-06, "loss": 0.4903, "step": 3831 }, { "epoch": 0.48, "grad_norm": 1.30182281674491, "learning_rate": 5.637351857578811e-06, "loss": 0.4917, "step": 3832 }, { "epoch": 0.48, "grad_norm": 1.4362963547064533, "learning_rate": 5.635358481729234e-06, "loss": 0.5219, "step": 3833 }, { "epoch": 0.48, "grad_norm": 1.4382650532418015, "learning_rate": 5.6333650032318985e-06, "loss": 0.5203, "step": 3834 }, { "epoch": 0.48, "grad_norm": 1.3442530910752974, "learning_rate": 5.631371422408869e-06, "loss": 0.4943, "step": 3835 }, { "epoch": 0.48, "grad_norm": 1.4265060064893866, "learning_rate": 5.629377739582225e-06, "loss": 0.4498, "step": 3836 }, { "epoch": 0.48, "grad_norm": 4.216269417602553, "learning_rate": 5.627383955074064e-06, "loss": 0.477, "step": 3837 }, { "epoch": 0.48, "grad_norm": 1.8816201018509997, "learning_rate": 5.6253900692065e-06, "loss": 0.5216, "step": 3838 }, { "epoch": 0.48, "grad_norm": 1.5067962273330215, "learning_rate": 5.623396082301662e-06, "loss": 0.4927, "step": 3839 }, { "epoch": 0.48, "grad_norm": 1.553380864316869, "learning_rate": 5.6214019946816975e-06, "loss": 0.5067, "step": 3840 }, { "epoch": 0.48, "grad_norm": 2.2467239685806337, "learning_rate": 5.619407806668768e-06, "loss": 0.5472, "step": 3841 }, { "epoch": 0.48, "grad_norm": 1.3959033963800893, "learning_rate": 5.617413518585051e-06, "loss": 0.5428, "step": 3842 }, { "epoch": 0.48, "grad_norm": 1.833831320835665, "learning_rate": 5.615419130752743e-06, "loss": 0.5274, "step": 3843 }, { "epoch": 0.48, "grad_norm": 1.447765424375853, "learning_rate": 5.613424643494054e-06, "loss": 0.552, "step": 3844 }, { "epoch": 0.48, "grad_norm": 1.423418358339179, "learning_rate": 5.611430057131211e-06, "loss": 0.5416, "step": 3845 }, { "epoch": 0.48, "grad_norm": 1.9508924456607224, "learning_rate": 5.609435371986457e-06, "loss": 0.5314, "step": 3846 }, { "epoch": 0.48, "grad_norm": 1.1955084497773902, "learning_rate": 5.607440588382052e-06, "loss": 0.4636, "step": 3847 }, { "epoch": 0.48, "grad_norm": 1.8439394899829424, "learning_rate": 5.60544570664027e-06, "loss": 0.5107, "step": 3848 }, { "epoch": 0.48, "grad_norm": 1.515882843463731, "learning_rate": 5.6034507270834e-06, "loss": 0.5218, "step": 3849 }, { "epoch": 0.48, "grad_norm": 1.5241593853007185, "learning_rate": 5.601455650033754e-06, "loss": 0.5216, "step": 3850 }, { "epoch": 0.48, "grad_norm": 12.162683050587866, "learning_rate": 5.599460475813648e-06, "loss": 0.4684, "step": 3851 }, { "epoch": 0.48, "grad_norm": 2.2701973747754316, "learning_rate": 5.5974652047454235e-06, "loss": 0.5149, "step": 3852 }, { "epoch": 0.48, "grad_norm": 1.9791516248661194, "learning_rate": 5.595469837151432e-06, "loss": 0.531, "step": 3853 }, { "epoch": 0.48, "grad_norm": 1.2696017553188235, "learning_rate": 5.593474373354045e-06, "loss": 0.5118, "step": 3854 }, { "epoch": 0.48, "grad_norm": 1.3986543128280633, "learning_rate": 5.591478813675646e-06, "loss": 0.53, "step": 3855 }, { "epoch": 0.48, "grad_norm": 1.2703344434000077, "learning_rate": 5.589483158438636e-06, "loss": 0.5266, "step": 3856 }, { "epoch": 0.48, "grad_norm": 1.4011913209638538, "learning_rate": 5.587487407965429e-06, "loss": 0.5057, "step": 3857 }, { "epoch": 0.48, "grad_norm": 1.313243610761376, "learning_rate": 5.585491562578456e-06, "loss": 0.497, "step": 3858 }, { "epoch": 0.48, "grad_norm": 1.4951327213403844, "learning_rate": 5.583495622600168e-06, "loss": 0.5133, "step": 3859 }, { "epoch": 0.48, "grad_norm": 1.7132730355293824, "learning_rate": 5.581499588353021e-06, "loss": 0.4932, "step": 3860 }, { "epoch": 0.48, "grad_norm": 1.479779515901214, "learning_rate": 5.579503460159493e-06, "loss": 0.5218, "step": 3861 }, { "epoch": 0.48, "grad_norm": 1.3103752607086714, "learning_rate": 5.577507238342078e-06, "loss": 0.5094, "step": 3862 }, { "epoch": 0.48, "grad_norm": 1.5486202310935997, "learning_rate": 5.575510923223284e-06, "loss": 0.4926, "step": 3863 }, { "epoch": 0.48, "grad_norm": 8.033733047007289, "learning_rate": 5.57351451512563e-06, "loss": 0.4873, "step": 3864 }, { "epoch": 0.48, "grad_norm": 1.3273892616667802, "learning_rate": 5.5715180143716555e-06, "loss": 0.5261, "step": 3865 }, { "epoch": 0.48, "grad_norm": 1.2925855023656074, "learning_rate": 5.569521421283912e-06, "loss": 0.5083, "step": 3866 }, { "epoch": 0.48, "grad_norm": 1.1539965994659505, "learning_rate": 5.567524736184967e-06, "loss": 0.4574, "step": 3867 }, { "epoch": 0.48, "grad_norm": 1.5021009839892818, "learning_rate": 5.565527959397403e-06, "loss": 0.5179, "step": 3868 }, { "epoch": 0.48, "grad_norm": 1.2228970177505332, "learning_rate": 5.563531091243817e-06, "loss": 0.5128, "step": 3869 }, { "epoch": 0.48, "grad_norm": 1.7249298140042144, "learning_rate": 5.5615341320468195e-06, "loss": 0.5216, "step": 3870 }, { "epoch": 0.48, "grad_norm": 1.342039726006231, "learning_rate": 5.559537082129037e-06, "loss": 0.5276, "step": 3871 }, { "epoch": 0.48, "grad_norm": 1.660683024715625, "learning_rate": 5.5575399418131115e-06, "loss": 0.5323, "step": 3872 }, { "epoch": 0.48, "grad_norm": 1.7599077618364973, "learning_rate": 5.555542711421696e-06, "loss": 0.5335, "step": 3873 }, { "epoch": 0.48, "grad_norm": 1.4276717562059684, "learning_rate": 5.553545391277465e-06, "loss": 0.4936, "step": 3874 }, { "epoch": 0.48, "grad_norm": 3.0651420932368425, "learning_rate": 5.5515479817031005e-06, "loss": 0.5023, "step": 3875 }, { "epoch": 0.48, "grad_norm": 1.2854772402377153, "learning_rate": 5.549550483021302e-06, "loss": 0.4946, "step": 3876 }, { "epoch": 0.48, "grad_norm": 2.999653481852163, "learning_rate": 5.547552895554783e-06, "loss": 0.4761, "step": 3877 }, { "epoch": 0.48, "grad_norm": 2.355792789812382, "learning_rate": 5.545555219626271e-06, "loss": 0.5174, "step": 3878 }, { "epoch": 0.48, "grad_norm": 1.4044057073752125, "learning_rate": 5.543557455558509e-06, "loss": 0.528, "step": 3879 }, { "epoch": 0.48, "grad_norm": 1.4615035364180013, "learning_rate": 5.541559603674252e-06, "loss": 0.5201, "step": 3880 }, { "epoch": 0.48, "grad_norm": 2.052410206473402, "learning_rate": 5.539561664296273e-06, "loss": 0.519, "step": 3881 }, { "epoch": 0.48, "grad_norm": 1.7101247179212373, "learning_rate": 5.537563637747352e-06, "loss": 0.4933, "step": 3882 }, { "epoch": 0.48, "grad_norm": 1.4553625355001882, "learning_rate": 5.535565524350293e-06, "loss": 0.515, "step": 3883 }, { "epoch": 0.48, "grad_norm": 1.5823929048265364, "learning_rate": 5.533567324427906e-06, "loss": 0.5251, "step": 3884 }, { "epoch": 0.48, "grad_norm": 1.5125343261415436, "learning_rate": 5.5315690383030195e-06, "loss": 0.5091, "step": 3885 }, { "epoch": 0.48, "grad_norm": 0.6701063803891216, "learning_rate": 5.529570666298473e-06, "loss": 0.4574, "step": 3886 }, { "epoch": 0.48, "grad_norm": 1.7559956685205522, "learning_rate": 5.5275722087371205e-06, "loss": 0.5381, "step": 3887 }, { "epoch": 0.48, "grad_norm": 1.9654385967049164, "learning_rate": 5.525573665941833e-06, "loss": 0.5253, "step": 3888 }, { "epoch": 0.48, "grad_norm": 1.92878214412045, "learning_rate": 5.523575038235489e-06, "loss": 0.4617, "step": 3889 }, { "epoch": 0.48, "grad_norm": 2.9839273447811436, "learning_rate": 5.521576325940986e-06, "loss": 0.5056, "step": 3890 }, { "epoch": 0.48, "grad_norm": 1.448419304382558, "learning_rate": 5.519577529381235e-06, "loss": 0.509, "step": 3891 }, { "epoch": 0.48, "grad_norm": 1.6441958788761752, "learning_rate": 5.5175786488791575e-06, "loss": 0.5632, "step": 3892 }, { "epoch": 0.48, "grad_norm": 2.1335798112114635, "learning_rate": 5.515579684757691e-06, "loss": 0.5622, "step": 3893 }, { "epoch": 0.48, "grad_norm": 1.362047097744073, "learning_rate": 5.513580637339786e-06, "loss": 0.556, "step": 3894 }, { "epoch": 0.48, "grad_norm": 1.4222148143798927, "learning_rate": 5.511581506948407e-06, "loss": 0.4875, "step": 3895 }, { "epoch": 0.48, "grad_norm": 1.4232338129775202, "learning_rate": 5.509582293906528e-06, "loss": 0.4922, "step": 3896 }, { "epoch": 0.48, "grad_norm": 1.479210921980862, "learning_rate": 5.507582998537142e-06, "loss": 0.5159, "step": 3897 }, { "epoch": 0.48, "grad_norm": 1.3022543152571098, "learning_rate": 5.505583621163252e-06, "loss": 0.5361, "step": 3898 }, { "epoch": 0.48, "grad_norm": 1.3123499942506818, "learning_rate": 5.503584162107876e-06, "loss": 0.5051, "step": 3899 }, { "epoch": 0.48, "grad_norm": 1.434330700741348, "learning_rate": 5.501584621694043e-06, "loss": 0.5379, "step": 3900 }, { "epoch": 0.48, "grad_norm": 1.9198939466196938, "learning_rate": 5.4995850002447955e-06, "loss": 0.5165, "step": 3901 }, { "epoch": 0.48, "grad_norm": 1.5452717097371367, "learning_rate": 5.4975852980831925e-06, "loss": 0.4853, "step": 3902 }, { "epoch": 0.48, "grad_norm": 1.4825630920396455, "learning_rate": 5.495585515532302e-06, "loss": 0.526, "step": 3903 }, { "epoch": 0.48, "grad_norm": 1.3945136410814463, "learning_rate": 5.4935856529152075e-06, "loss": 0.477, "step": 3904 }, { "epoch": 0.48, "grad_norm": 1.8283627173286865, "learning_rate": 5.491585710555004e-06, "loss": 0.5037, "step": 3905 }, { "epoch": 0.48, "grad_norm": 4.863992368418988, "learning_rate": 5.489585688774798e-06, "loss": 0.5524, "step": 3906 }, { "epoch": 0.48, "grad_norm": 1.5047987926389876, "learning_rate": 5.487585587897713e-06, "loss": 0.4737, "step": 3907 }, { "epoch": 0.48, "grad_norm": 2.051631341269757, "learning_rate": 5.4855854082468805e-06, "loss": 0.5492, "step": 3908 }, { "epoch": 0.49, "grad_norm": 1.411897797407055, "learning_rate": 5.483585150145451e-06, "loss": 0.4651, "step": 3909 }, { "epoch": 0.49, "grad_norm": 1.5153953133448943, "learning_rate": 5.481584813916579e-06, "loss": 0.5339, "step": 3910 }, { "epoch": 0.49, "grad_norm": 1.5773017112958216, "learning_rate": 5.47958439988344e-06, "loss": 0.5031, "step": 3911 }, { "epoch": 0.49, "grad_norm": 1.4551035961762118, "learning_rate": 5.477583908369219e-06, "loss": 0.5042, "step": 3912 }, { "epoch": 0.49, "grad_norm": 1.6593828724311739, "learning_rate": 5.47558333969711e-06, "loss": 0.5674, "step": 3913 }, { "epoch": 0.49, "grad_norm": 1.52905879375427, "learning_rate": 5.473582694190323e-06, "loss": 0.4978, "step": 3914 }, { "epoch": 0.49, "grad_norm": 2.068302465264425, "learning_rate": 5.471581972172082e-06, "loss": 0.4969, "step": 3915 }, { "epoch": 0.49, "grad_norm": 1.9660457837978043, "learning_rate": 5.4695811739656204e-06, "loss": 0.5076, "step": 3916 }, { "epoch": 0.49, "grad_norm": 1.45853978070074, "learning_rate": 5.467580299894183e-06, "loss": 0.5606, "step": 3917 }, { "epoch": 0.49, "grad_norm": 0.664872886856364, "learning_rate": 5.465579350281032e-06, "loss": 0.513, "step": 3918 }, { "epoch": 0.49, "grad_norm": 1.6934631719056283, "learning_rate": 5.463578325449434e-06, "loss": 0.5506, "step": 3919 }, { "epoch": 0.49, "grad_norm": 3.0296482449817947, "learning_rate": 5.461577225722676e-06, "loss": 0.5128, "step": 3920 }, { "epoch": 0.49, "grad_norm": 0.6661246497699034, "learning_rate": 5.4595760514240525e-06, "loss": 0.4678, "step": 3921 }, { "epoch": 0.49, "grad_norm": 1.3550930215234538, "learning_rate": 5.45757480287687e-06, "loss": 0.5224, "step": 3922 }, { "epoch": 0.49, "grad_norm": 1.4528187165438429, "learning_rate": 5.455573480404448e-06, "loss": 0.512, "step": 3923 }, { "epoch": 0.49, "grad_norm": 1.4543519260418145, "learning_rate": 5.453572084330117e-06, "loss": 0.4873, "step": 3924 }, { "epoch": 0.49, "grad_norm": 1.4530409679251879, "learning_rate": 5.451570614977223e-06, "loss": 0.5308, "step": 3925 }, { "epoch": 0.49, "grad_norm": 1.4345990352945623, "learning_rate": 5.449569072669119e-06, "loss": 0.4699, "step": 3926 }, { "epoch": 0.49, "grad_norm": 1.3333642766386664, "learning_rate": 5.447567457729169e-06, "loss": 0.4703, "step": 3927 }, { "epoch": 0.49, "grad_norm": 1.9541630497777558, "learning_rate": 5.4455657704807566e-06, "loss": 0.5087, "step": 3928 }, { "epoch": 0.49, "grad_norm": 1.5383476148156443, "learning_rate": 5.44356401124727e-06, "loss": 0.5101, "step": 3929 }, { "epoch": 0.49, "grad_norm": 1.3848178623750136, "learning_rate": 5.44156218035211e-06, "loss": 0.5336, "step": 3930 }, { "epoch": 0.49, "grad_norm": 1.2823093070822174, "learning_rate": 5.439560278118692e-06, "loss": 0.4547, "step": 3931 }, { "epoch": 0.49, "grad_norm": 1.7596416413390763, "learning_rate": 5.437558304870438e-06, "loss": 0.5239, "step": 3932 }, { "epoch": 0.49, "grad_norm": 1.6311320117875125, "learning_rate": 5.435556260930788e-06, "loss": 0.4999, "step": 3933 }, { "epoch": 0.49, "grad_norm": 2.1424810969438024, "learning_rate": 5.433554146623187e-06, "loss": 0.4954, "step": 3934 }, { "epoch": 0.49, "grad_norm": 1.2357186619718303, "learning_rate": 5.431551962271094e-06, "loss": 0.5075, "step": 3935 }, { "epoch": 0.49, "grad_norm": 1.5599698797491244, "learning_rate": 5.429549708197982e-06, "loss": 0.4683, "step": 3936 }, { "epoch": 0.49, "grad_norm": 1.531008438280879, "learning_rate": 5.427547384727332e-06, "loss": 0.5502, "step": 3937 }, { "epoch": 0.49, "grad_norm": 1.4741978722495628, "learning_rate": 5.425544992182636e-06, "loss": 0.5169, "step": 3938 }, { "epoch": 0.49, "grad_norm": 1.8507114164050131, "learning_rate": 5.423542530887399e-06, "loss": 0.4601, "step": 3939 }, { "epoch": 0.49, "grad_norm": 1.3637993319871706, "learning_rate": 5.421540001165135e-06, "loss": 0.5606, "step": 3940 }, { "epoch": 0.49, "grad_norm": 1.7277813484271007, "learning_rate": 5.419537403339372e-06, "loss": 0.4859, "step": 3941 }, { "epoch": 0.49, "grad_norm": 2.2992120001099763, "learning_rate": 5.417534737733648e-06, "loss": 0.5243, "step": 3942 }, { "epoch": 0.49, "grad_norm": 1.3523696755445922, "learning_rate": 5.415532004671506e-06, "loss": 0.5216, "step": 3943 }, { "epoch": 0.49, "grad_norm": 1.4589926049928805, "learning_rate": 5.413529204476512e-06, "loss": 0.4862, "step": 3944 }, { "epoch": 0.49, "grad_norm": 1.334457517749518, "learning_rate": 5.411526337472232e-06, "loss": 0.5187, "step": 3945 }, { "epoch": 0.49, "grad_norm": 1.3357269722684226, "learning_rate": 5.409523403982247e-06, "loss": 0.5084, "step": 3946 }, { "epoch": 0.49, "grad_norm": 1.596655906918947, "learning_rate": 5.40752040433015e-06, "loss": 0.4783, "step": 3947 }, { "epoch": 0.49, "grad_norm": 1.386972195789598, "learning_rate": 5.4055173388395445e-06, "loss": 0.5405, "step": 3948 }, { "epoch": 0.49, "grad_norm": 1.3719441891820827, "learning_rate": 5.40351420783404e-06, "loss": 0.4992, "step": 3949 }, { "epoch": 0.49, "grad_norm": 1.5172485941704512, "learning_rate": 5.40151101163726e-06, "loss": 0.5282, "step": 3950 }, { "epoch": 0.49, "grad_norm": 1.4264798256383389, "learning_rate": 5.399507750572841e-06, "loss": 0.5071, "step": 3951 }, { "epoch": 0.49, "grad_norm": 1.87510756362639, "learning_rate": 5.397504424964426e-06, "loss": 0.5177, "step": 3952 }, { "epoch": 0.49, "grad_norm": 1.3567163748524484, "learning_rate": 5.395501035135671e-06, "loss": 0.4396, "step": 3953 }, { "epoch": 0.49, "grad_norm": 1.3409907238416823, "learning_rate": 5.39349758141024e-06, "loss": 0.4629, "step": 3954 }, { "epoch": 0.49, "grad_norm": 1.982873474793292, "learning_rate": 5.391494064111809e-06, "loss": 0.4955, "step": 3955 }, { "epoch": 0.49, "grad_norm": 1.508085533195063, "learning_rate": 5.389490483564064e-06, "loss": 0.5198, "step": 3956 }, { "epoch": 0.49, "grad_norm": 1.2250701542250673, "learning_rate": 5.387486840090701e-06, "loss": 0.4803, "step": 3957 }, { "epoch": 0.49, "grad_norm": 1.4641385971210494, "learning_rate": 5.3854831340154265e-06, "loss": 0.5253, "step": 3958 }, { "epoch": 0.49, "grad_norm": 1.3998754036190544, "learning_rate": 5.383479365661958e-06, "loss": 0.4961, "step": 3959 }, { "epoch": 0.49, "grad_norm": 1.4065888899634549, "learning_rate": 5.381475535354018e-06, "loss": 0.5066, "step": 3960 }, { "epoch": 0.49, "grad_norm": 1.4691889688853568, "learning_rate": 5.379471643415347e-06, "loss": 0.5421, "step": 3961 }, { "epoch": 0.49, "grad_norm": 1.39716223039857, "learning_rate": 5.37746769016969e-06, "loss": 0.5345, "step": 3962 }, { "epoch": 0.49, "grad_norm": 1.2562632841531238, "learning_rate": 5.375463675940803e-06, "loss": 0.4499, "step": 3963 }, { "epoch": 0.49, "grad_norm": 1.8008475312845498, "learning_rate": 5.373459601052451e-06, "loss": 0.5108, "step": 3964 }, { "epoch": 0.49, "grad_norm": 1.483453578025183, "learning_rate": 5.3714554658284125e-06, "loss": 0.565, "step": 3965 }, { "epoch": 0.49, "grad_norm": 1.4975414533192573, "learning_rate": 5.369451270592472e-06, "loss": 0.5173, "step": 3966 }, { "epoch": 0.49, "grad_norm": 2.123985067004447, "learning_rate": 5.3674470156684255e-06, "loss": 0.522, "step": 3967 }, { "epoch": 0.49, "grad_norm": 0.6500700336932811, "learning_rate": 5.365442701380077e-06, "loss": 0.4349, "step": 3968 }, { "epoch": 0.49, "grad_norm": 1.6029152251625887, "learning_rate": 5.3634383280512416e-06, "loss": 0.5308, "step": 3969 }, { "epoch": 0.49, "grad_norm": 1.3769653475610781, "learning_rate": 5.361433896005743e-06, "loss": 0.5098, "step": 3970 }, { "epoch": 0.49, "grad_norm": 7.020545630737324, "learning_rate": 5.359429405567415e-06, "loss": 0.5218, "step": 3971 }, { "epoch": 0.49, "grad_norm": 1.4426438371728305, "learning_rate": 5.357424857060102e-06, "loss": 0.5149, "step": 3972 }, { "epoch": 0.49, "grad_norm": 1.498625679032598, "learning_rate": 5.355420250807654e-06, "loss": 0.5784, "step": 3973 }, { "epoch": 0.49, "grad_norm": 1.337563009204282, "learning_rate": 5.353415587133936e-06, "loss": 0.5322, "step": 3974 }, { "epoch": 0.49, "grad_norm": 2.1258963333162684, "learning_rate": 5.351410866362816e-06, "loss": 0.5215, "step": 3975 }, { "epoch": 0.49, "grad_norm": 2.1845941268352767, "learning_rate": 5.349406088818176e-06, "loss": 0.5404, "step": 3976 }, { "epoch": 0.49, "grad_norm": 1.5751923241999841, "learning_rate": 5.347401254823906e-06, "loss": 0.5373, "step": 3977 }, { "epoch": 0.49, "grad_norm": 1.646247402251166, "learning_rate": 5.3453963647039035e-06, "loss": 0.5345, "step": 3978 }, { "epoch": 0.49, "grad_norm": 1.58443216877823, "learning_rate": 5.343391418782076e-06, "loss": 0.4974, "step": 3979 }, { "epoch": 0.49, "grad_norm": 4.9367408343514425, "learning_rate": 5.341386417382338e-06, "loss": 0.4804, "step": 3980 }, { "epoch": 0.49, "grad_norm": 1.3965703455304954, "learning_rate": 5.33938136082862e-06, "loss": 0.5394, "step": 3981 }, { "epoch": 0.49, "grad_norm": 1.590559268954731, "learning_rate": 5.3373762494448546e-06, "loss": 0.5371, "step": 3982 }, { "epoch": 0.49, "grad_norm": 1.320429476244967, "learning_rate": 5.335371083554984e-06, "loss": 0.53, "step": 3983 }, { "epoch": 0.49, "grad_norm": 1.421899153361156, "learning_rate": 5.333365863482961e-06, "loss": 0.4955, "step": 3984 }, { "epoch": 0.49, "grad_norm": 1.5542947739027118, "learning_rate": 5.331360589552746e-06, "loss": 0.4968, "step": 3985 }, { "epoch": 0.49, "grad_norm": 0.7457943000103398, "learning_rate": 5.3293552620883115e-06, "loss": 0.4566, "step": 3986 }, { "epoch": 0.49, "grad_norm": 4.2672149801902926, "learning_rate": 5.327349881413632e-06, "loss": 0.5328, "step": 3987 }, { "epoch": 0.49, "grad_norm": 1.6192367900745737, "learning_rate": 5.325344447852696e-06, "loss": 0.508, "step": 3988 }, { "epoch": 0.5, "grad_norm": 1.478967517170313, "learning_rate": 5.323338961729499e-06, "loss": 0.5139, "step": 3989 }, { "epoch": 0.5, "grad_norm": 1.5728731128199156, "learning_rate": 5.321333423368047e-06, "loss": 0.5522, "step": 3990 }, { "epoch": 0.5, "grad_norm": 1.3371364747051662, "learning_rate": 5.319327833092348e-06, "loss": 0.4853, "step": 3991 }, { "epoch": 0.5, "grad_norm": 0.6996233064142633, "learning_rate": 5.317322191226426e-06, "loss": 0.4729, "step": 3992 }, { "epoch": 0.5, "grad_norm": 1.4012552294803955, "learning_rate": 5.3153164980943104e-06, "loss": 0.5021, "step": 3993 }, { "epoch": 0.5, "grad_norm": 1.4430297337479316, "learning_rate": 5.313310754020037e-06, "loss": 0.4965, "step": 3994 }, { "epoch": 0.5, "grad_norm": 1.2621347231021145, "learning_rate": 5.311304959327651e-06, "loss": 0.4459, "step": 3995 }, { "epoch": 0.5, "grad_norm": 1.2817572185185382, "learning_rate": 5.309299114341209e-06, "loss": 0.5423, "step": 3996 }, { "epoch": 0.5, "grad_norm": 1.5992598858188514, "learning_rate": 5.307293219384768e-06, "loss": 0.4952, "step": 3997 }, { "epoch": 0.5, "grad_norm": 1.3738706690239122, "learning_rate": 5.305287274782403e-06, "loss": 0.5252, "step": 3998 }, { "epoch": 0.5, "grad_norm": 1.422877770604774, "learning_rate": 5.303281280858189e-06, "loss": 0.5104, "step": 3999 }, { "epoch": 0.5, "grad_norm": 1.4768927119527948, "learning_rate": 5.301275237936214e-06, "loss": 0.5426, "step": 4000 }, { "epoch": 0.5, "grad_norm": 1.3443790195812775, "learning_rate": 5.29926914634057e-06, "loss": 0.511, "step": 4001 }, { "epoch": 0.5, "grad_norm": 0.7210003573811843, "learning_rate": 5.29726300639536e-06, "loss": 0.5112, "step": 4002 }, { "epoch": 0.5, "grad_norm": 1.4934549647405275, "learning_rate": 5.295256818424695e-06, "loss": 0.5028, "step": 4003 }, { "epoch": 0.5, "grad_norm": 1.5346083778331399, "learning_rate": 5.293250582752689e-06, "loss": 0.5074, "step": 4004 }, { "epoch": 0.5, "grad_norm": 1.51676023911406, "learning_rate": 5.291244299703469e-06, "loss": 0.4786, "step": 4005 }, { "epoch": 0.5, "grad_norm": 1.2407408253081538, "learning_rate": 5.2892379696011665e-06, "loss": 0.549, "step": 4006 }, { "epoch": 0.5, "grad_norm": 1.482395526869878, "learning_rate": 5.2872315927699235e-06, "loss": 0.5327, "step": 4007 }, { "epoch": 0.5, "grad_norm": 1.9826848283440506, "learning_rate": 5.2852251695338865e-06, "loss": 0.5153, "step": 4008 }, { "epoch": 0.5, "grad_norm": 2.1060888524002794, "learning_rate": 5.283218700217211e-06, "loss": 0.5266, "step": 4009 }, { "epoch": 0.5, "grad_norm": 1.4164346549674205, "learning_rate": 5.28121218514406e-06, "loss": 0.4965, "step": 4010 }, { "epoch": 0.5, "grad_norm": 1.4669936305099687, "learning_rate": 5.279205624638605e-06, "loss": 0.4723, "step": 4011 }, { "epoch": 0.5, "grad_norm": 1.4057306748118408, "learning_rate": 5.277199019025022e-06, "loss": 0.4843, "step": 4012 }, { "epoch": 0.5, "grad_norm": 1.4158292097211684, "learning_rate": 5.275192368627495e-06, "loss": 0.5183, "step": 4013 }, { "epoch": 0.5, "grad_norm": 1.347733607048643, "learning_rate": 5.2731856737702195e-06, "loss": 0.5201, "step": 4014 }, { "epoch": 0.5, "grad_norm": 1.4507919841534922, "learning_rate": 5.27117893477739e-06, "loss": 0.4904, "step": 4015 }, { "epoch": 0.5, "grad_norm": 1.351191256523609, "learning_rate": 5.269172151973216e-06, "loss": 0.5062, "step": 4016 }, { "epoch": 0.5, "grad_norm": 1.8170541743140898, "learning_rate": 5.26716532568191e-06, "loss": 0.5185, "step": 4017 }, { "epoch": 0.5, "grad_norm": 1.8948850199277016, "learning_rate": 5.2651584562276935e-06, "loss": 0.5088, "step": 4018 }, { "epoch": 0.5, "grad_norm": 1.4895022409198124, "learning_rate": 5.263151543934792e-06, "loss": 0.4905, "step": 4019 }, { "epoch": 0.5, "grad_norm": 1.2034136078269118, "learning_rate": 5.261144589127441e-06, "loss": 0.4706, "step": 4020 }, { "epoch": 0.5, "grad_norm": 1.5148654943676552, "learning_rate": 5.259137592129883e-06, "loss": 0.5068, "step": 4021 }, { "epoch": 0.5, "grad_norm": 1.7539595585635952, "learning_rate": 5.257130553266364e-06, "loss": 0.4596, "step": 4022 }, { "epoch": 0.5, "grad_norm": 2.1276164530176103, "learning_rate": 5.25512347286114e-06, "loss": 0.4614, "step": 4023 }, { "epoch": 0.5, "grad_norm": 1.293833851355961, "learning_rate": 5.253116351238472e-06, "loss": 0.4858, "step": 4024 }, { "epoch": 0.5, "grad_norm": 1.488875806713043, "learning_rate": 5.251109188722626e-06, "loss": 0.5054, "step": 4025 }, { "epoch": 0.5, "grad_norm": 1.436624795404598, "learning_rate": 5.249101985637881e-06, "loss": 0.5032, "step": 4026 }, { "epoch": 0.5, "grad_norm": 1.621443769321251, "learning_rate": 5.247094742308516e-06, "loss": 0.4775, "step": 4027 }, { "epoch": 0.5, "grad_norm": 1.9501526582620479, "learning_rate": 5.24508745905882e-06, "loss": 0.5201, "step": 4028 }, { "epoch": 0.5, "grad_norm": 2.042610189384628, "learning_rate": 5.243080136213085e-06, "loss": 0.544, "step": 4029 }, { "epoch": 0.5, "grad_norm": 1.5994079979309168, "learning_rate": 5.241072774095615e-06, "loss": 0.5709, "step": 4030 }, { "epoch": 0.5, "grad_norm": 1.251606242450616, "learning_rate": 5.239065373030713e-06, "loss": 0.4817, "step": 4031 }, { "epoch": 0.5, "grad_norm": 1.4523263770339998, "learning_rate": 5.237057933342696e-06, "loss": 0.4666, "step": 4032 }, { "epoch": 0.5, "grad_norm": 1.3870858370349122, "learning_rate": 5.235050455355881e-06, "loss": 0.5148, "step": 4033 }, { "epoch": 0.5, "grad_norm": 1.2956128600540535, "learning_rate": 5.233042939394595e-06, "loss": 0.5353, "step": 4034 }, { "epoch": 0.5, "grad_norm": 1.5130642043818001, "learning_rate": 5.2310353857831695e-06, "loss": 0.5045, "step": 4035 }, { "epoch": 0.5, "grad_norm": 1.4965739798426347, "learning_rate": 5.229027794845944e-06, "loss": 0.5177, "step": 4036 }, { "epoch": 0.5, "grad_norm": 2.3940798964222623, "learning_rate": 5.227020166907259e-06, "loss": 0.5138, "step": 4037 }, { "epoch": 0.5, "grad_norm": 1.216930430982062, "learning_rate": 5.225012502291469e-06, "loss": 0.4939, "step": 4038 }, { "epoch": 0.5, "grad_norm": 1.4608752793636535, "learning_rate": 5.223004801322926e-06, "loss": 0.5409, "step": 4039 }, { "epoch": 0.5, "grad_norm": 1.4213725178723116, "learning_rate": 5.220997064325994e-06, "loss": 0.5562, "step": 4040 }, { "epoch": 0.5, "grad_norm": 1.3870879937572156, "learning_rate": 5.21898929162504e-06, "loss": 0.5173, "step": 4041 }, { "epoch": 0.5, "grad_norm": 2.782760226940039, "learning_rate": 5.2169814835444356e-06, "loss": 0.4981, "step": 4042 }, { "epoch": 0.5, "grad_norm": 0.6988563115846317, "learning_rate": 5.214973640408563e-06, "loss": 0.4882, "step": 4043 }, { "epoch": 0.5, "grad_norm": 1.3902236234519256, "learning_rate": 5.2129657625418055e-06, "loss": 0.458, "step": 4044 }, { "epoch": 0.5, "grad_norm": 1.3850148995382587, "learning_rate": 5.2109578502685534e-06, "loss": 0.5024, "step": 4045 }, { "epoch": 0.5, "grad_norm": 1.3471538316223168, "learning_rate": 5.208949903913201e-06, "loss": 0.4689, "step": 4046 }, { "epoch": 0.5, "grad_norm": 1.6714409613819254, "learning_rate": 5.206941923800154e-06, "loss": 0.5532, "step": 4047 }, { "epoch": 0.5, "grad_norm": 1.2658631922999917, "learning_rate": 5.2049339102538154e-06, "loss": 0.4786, "step": 4048 }, { "epoch": 0.5, "grad_norm": 1.511130438261548, "learning_rate": 5.202925863598599e-06, "loss": 0.5137, "step": 4049 }, { "epoch": 0.5, "grad_norm": 1.3184918589934416, "learning_rate": 5.200917784158921e-06, "loss": 0.4591, "step": 4050 }, { "epoch": 0.5, "grad_norm": 2.237884065088822, "learning_rate": 5.1989096722592055e-06, "loss": 0.4556, "step": 4051 }, { "epoch": 0.5, "grad_norm": 1.2726858470097968, "learning_rate": 5.1969015282238824e-06, "loss": 0.4671, "step": 4052 }, { "epoch": 0.5, "grad_norm": 1.4979136717841783, "learning_rate": 5.1948933523773824e-06, "loss": 0.5146, "step": 4053 }, { "epoch": 0.5, "grad_norm": 1.503751302350897, "learning_rate": 5.192885145044143e-06, "loss": 0.449, "step": 4054 }, { "epoch": 0.5, "grad_norm": 2.3001077377892787, "learning_rate": 5.190876906548612e-06, "loss": 0.4992, "step": 4055 }, { "epoch": 0.5, "grad_norm": 1.35780055003118, "learning_rate": 5.188868637215235e-06, "loss": 0.5042, "step": 4056 }, { "epoch": 0.5, "grad_norm": 1.8096393235457786, "learning_rate": 5.186860337368468e-06, "loss": 0.4997, "step": 4057 }, { "epoch": 0.5, "grad_norm": 1.7976803429353938, "learning_rate": 5.184852007332765e-06, "loss": 0.5438, "step": 4058 }, { "epoch": 0.5, "grad_norm": 1.4295847395483166, "learning_rate": 5.182843647432593e-06, "loss": 0.5195, "step": 4059 }, { "epoch": 0.5, "grad_norm": 1.329781113500144, "learning_rate": 5.180835257992419e-06, "loss": 0.4692, "step": 4060 }, { "epoch": 0.5, "grad_norm": 1.4198247587720287, "learning_rate": 5.178826839336718e-06, "loss": 0.5149, "step": 4061 }, { "epoch": 0.5, "grad_norm": 1.45463475933709, "learning_rate": 5.176818391789964e-06, "loss": 0.4831, "step": 4062 }, { "epoch": 0.5, "grad_norm": 1.405498695171607, "learning_rate": 5.174809915676643e-06, "loss": 0.5151, "step": 4063 }, { "epoch": 0.5, "grad_norm": 1.8636479793019347, "learning_rate": 5.17280141132124e-06, "loss": 0.5218, "step": 4064 }, { "epoch": 0.5, "grad_norm": 1.3187889761676121, "learning_rate": 5.170792879048248e-06, "loss": 0.5031, "step": 4065 }, { "epoch": 0.5, "grad_norm": 1.2851291534308047, "learning_rate": 5.168784319182161e-06, "loss": 0.4685, "step": 4066 }, { "epoch": 0.5, "grad_norm": 3.901671464065162, "learning_rate": 5.166775732047481e-06, "loss": 0.4776, "step": 4067 }, { "epoch": 0.5, "grad_norm": 1.577965546141581, "learning_rate": 5.164767117968713e-06, "loss": 0.504, "step": 4068 }, { "epoch": 0.5, "grad_norm": 2.611583306216855, "learning_rate": 5.162758477270366e-06, "loss": 0.5139, "step": 4069 }, { "epoch": 0.51, "grad_norm": 1.4722771473445617, "learning_rate": 5.160749810276952e-06, "loss": 0.5049, "step": 4070 }, { "epoch": 0.51, "grad_norm": 0.6841236873282883, "learning_rate": 5.158741117312992e-06, "loss": 0.4823, "step": 4071 }, { "epoch": 0.51, "grad_norm": 1.7283913002786766, "learning_rate": 5.156732398703007e-06, "loss": 0.486, "step": 4072 }, { "epoch": 0.51, "grad_norm": 1.3644169274208862, "learning_rate": 5.154723654771522e-06, "loss": 0.5316, "step": 4073 }, { "epoch": 0.51, "grad_norm": 1.412978580938532, "learning_rate": 5.1527148858430675e-06, "loss": 0.5205, "step": 4074 }, { "epoch": 0.51, "grad_norm": 1.304308989910714, "learning_rate": 5.150706092242178e-06, "loss": 0.5112, "step": 4075 }, { "epoch": 0.51, "grad_norm": 1.5096581064631402, "learning_rate": 5.148697274293392e-06, "loss": 0.5137, "step": 4076 }, { "epoch": 0.51, "grad_norm": 1.9507912798218543, "learning_rate": 5.146688432321253e-06, "loss": 0.5665, "step": 4077 }, { "epoch": 0.51, "grad_norm": 1.3974637208424106, "learning_rate": 5.144679566650306e-06, "loss": 0.5266, "step": 4078 }, { "epoch": 0.51, "grad_norm": 1.381186236440111, "learning_rate": 5.1426706776050985e-06, "loss": 0.4926, "step": 4079 }, { "epoch": 0.51, "grad_norm": 1.2908982111875704, "learning_rate": 5.140661765510187e-06, "loss": 0.5299, "step": 4080 }, { "epoch": 0.51, "grad_norm": 1.517278250782335, "learning_rate": 5.138652830690129e-06, "loss": 0.5097, "step": 4081 }, { "epoch": 0.51, "grad_norm": 1.7679776693034879, "learning_rate": 5.136643873469487e-06, "loss": 0.536, "step": 4082 }, { "epoch": 0.51, "grad_norm": 1.3584650382939063, "learning_rate": 5.1346348941728215e-06, "loss": 0.4941, "step": 4083 }, { "epoch": 0.51, "grad_norm": 1.260066066177889, "learning_rate": 5.132625893124704e-06, "loss": 0.5343, "step": 4084 }, { "epoch": 0.51, "grad_norm": 1.2417882558433795, "learning_rate": 5.130616870649705e-06, "loss": 0.5519, "step": 4085 }, { "epoch": 0.51, "grad_norm": 1.4348244112899506, "learning_rate": 5.1286078270724e-06, "loss": 0.5182, "step": 4086 }, { "epoch": 0.51, "grad_norm": 1.3761732902076147, "learning_rate": 5.126598762717367e-06, "loss": 0.5643, "step": 4087 }, { "epoch": 0.51, "grad_norm": 7.093279684038697, "learning_rate": 5.1245896779091905e-06, "loss": 0.544, "step": 4088 }, { "epoch": 0.51, "grad_norm": 1.1953773828090084, "learning_rate": 5.122580572972453e-06, "loss": 0.4644, "step": 4089 }, { "epoch": 0.51, "grad_norm": 1.335699984289526, "learning_rate": 5.120571448231746e-06, "loss": 0.481, "step": 4090 }, { "epoch": 0.51, "grad_norm": 1.539519553974143, "learning_rate": 5.118562304011657e-06, "loss": 0.5513, "step": 4091 }, { "epoch": 0.51, "grad_norm": 1.3014018277586958, "learning_rate": 5.116553140636788e-06, "loss": 0.5649, "step": 4092 }, { "epoch": 0.51, "grad_norm": 1.2963592808820954, "learning_rate": 5.114543958431729e-06, "loss": 0.5241, "step": 4093 }, { "epoch": 0.51, "grad_norm": 1.3751884600265256, "learning_rate": 5.112534757721086e-06, "loss": 0.4718, "step": 4094 }, { "epoch": 0.51, "grad_norm": 0.6805985594689253, "learning_rate": 5.110525538829461e-06, "loss": 0.5219, "step": 4095 }, { "epoch": 0.51, "grad_norm": 0.7427122972421231, "learning_rate": 5.108516302081461e-06, "loss": 0.5227, "step": 4096 }, { "epoch": 0.51, "grad_norm": 1.4777276883977413, "learning_rate": 5.106507047801699e-06, "loss": 0.4717, "step": 4097 }, { "epoch": 0.51, "grad_norm": 1.4404767805534364, "learning_rate": 5.104497776314784e-06, "loss": 0.4985, "step": 4098 }, { "epoch": 0.51, "grad_norm": 1.32514183192607, "learning_rate": 5.102488487945332e-06, "loss": 0.4796, "step": 4099 }, { "epoch": 0.51, "grad_norm": 1.6300526573029417, "learning_rate": 5.100479183017963e-06, "loss": 0.5484, "step": 4100 }, { "epoch": 0.51, "grad_norm": 1.506233040859698, "learning_rate": 5.098469861857299e-06, "loss": 0.4938, "step": 4101 }, { "epoch": 0.51, "grad_norm": 1.5854909157924568, "learning_rate": 5.09646052478796e-06, "loss": 0.5012, "step": 4102 }, { "epoch": 0.51, "grad_norm": 2.7334239667693274, "learning_rate": 5.094451172134573e-06, "loss": 0.5402, "step": 4103 }, { "epoch": 0.51, "grad_norm": 1.421782694911038, "learning_rate": 5.092441804221767e-06, "loss": 0.5402, "step": 4104 }, { "epoch": 0.51, "grad_norm": 1.2380410240615574, "learning_rate": 5.090432421374175e-06, "loss": 0.4782, "step": 4105 }, { "epoch": 0.51, "grad_norm": 1.8233363976081143, "learning_rate": 5.0884230239164274e-06, "loss": 0.5275, "step": 4106 }, { "epoch": 0.51, "grad_norm": 1.2157977078482316, "learning_rate": 5.0864136121731614e-06, "loss": 0.5032, "step": 4107 }, { "epoch": 0.51, "grad_norm": 2.0628077114535106, "learning_rate": 5.084404186469016e-06, "loss": 0.529, "step": 4108 }, { "epoch": 0.51, "grad_norm": 1.4673681464236095, "learning_rate": 5.082394747128632e-06, "loss": 0.5267, "step": 4109 }, { "epoch": 0.51, "grad_norm": 1.405043504400975, "learning_rate": 5.08038529447665e-06, "loss": 0.4953, "step": 4110 }, { "epoch": 0.51, "grad_norm": 1.426360350391599, "learning_rate": 5.078375828837716e-06, "loss": 0.5208, "step": 4111 }, { "epoch": 0.51, "grad_norm": 1.8072437193277209, "learning_rate": 5.0763663505364754e-06, "loss": 0.5057, "step": 4112 }, { "epoch": 0.51, "grad_norm": 1.3660490562593417, "learning_rate": 5.07435685989758e-06, "loss": 0.462, "step": 4113 }, { "epoch": 0.51, "grad_norm": 1.5486600415029512, "learning_rate": 5.072347357245678e-06, "loss": 0.5372, "step": 4114 }, { "epoch": 0.51, "grad_norm": 1.616974280163713, "learning_rate": 5.0703378429054226e-06, "loss": 0.5241, "step": 4115 }, { "epoch": 0.51, "grad_norm": 1.4809366060080675, "learning_rate": 5.06832831720147e-06, "loss": 0.542, "step": 4116 }, { "epoch": 0.51, "grad_norm": 2.8635109413540314, "learning_rate": 5.066318780458476e-06, "loss": 0.5301, "step": 4117 }, { "epoch": 0.51, "grad_norm": 0.6926137434244817, "learning_rate": 5.064309233001099e-06, "loss": 0.4694, "step": 4118 }, { "epoch": 0.51, "grad_norm": 1.1567850084651718, "learning_rate": 5.062299675153999e-06, "loss": 0.4616, "step": 4119 }, { "epoch": 0.51, "grad_norm": 1.600980417226766, "learning_rate": 5.0602901072418375e-06, "loss": 0.4872, "step": 4120 }, { "epoch": 0.51, "grad_norm": 1.6067968795720464, "learning_rate": 5.058280529589279e-06, "loss": 0.4697, "step": 4121 }, { "epoch": 0.51, "grad_norm": 1.3858276131809577, "learning_rate": 5.056270942520986e-06, "loss": 0.5071, "step": 4122 }, { "epoch": 0.51, "grad_norm": 1.5759322132167428, "learning_rate": 5.054261346361628e-06, "loss": 0.5134, "step": 4123 }, { "epoch": 0.51, "grad_norm": 1.7641683615692467, "learning_rate": 5.0522517414358705e-06, "loss": 0.5418, "step": 4124 }, { "epoch": 0.51, "grad_norm": 1.5814241502874156, "learning_rate": 5.050242128068386e-06, "loss": 0.453, "step": 4125 }, { "epoch": 0.51, "grad_norm": 1.2912461088706648, "learning_rate": 5.048232506583841e-06, "loss": 0.4845, "step": 4126 }, { "epoch": 0.51, "grad_norm": 1.2778937846886074, "learning_rate": 5.046222877306911e-06, "loss": 0.4403, "step": 4127 }, { "epoch": 0.51, "grad_norm": 2.1456675490207737, "learning_rate": 5.044213240562268e-06, "loss": 0.5408, "step": 4128 }, { "epoch": 0.51, "grad_norm": 1.6392616148549624, "learning_rate": 5.042203596674586e-06, "loss": 0.4769, "step": 4129 }, { "epoch": 0.51, "grad_norm": 1.4748949968945793, "learning_rate": 5.040193945968542e-06, "loss": 0.507, "step": 4130 }, { "epoch": 0.51, "grad_norm": 1.385296056790979, "learning_rate": 5.038184288768813e-06, "loss": 0.5147, "step": 4131 }, { "epoch": 0.51, "grad_norm": 1.812264408884099, "learning_rate": 5.036174625400073e-06, "loss": 0.5232, "step": 4132 }, { "epoch": 0.51, "grad_norm": 1.2807690983806912, "learning_rate": 5.034164956187006e-06, "loss": 0.53, "step": 4133 }, { "epoch": 0.51, "grad_norm": 1.4308465971303546, "learning_rate": 5.032155281454288e-06, "loss": 0.5092, "step": 4134 }, { "epoch": 0.51, "grad_norm": 1.2571579009281904, "learning_rate": 5.030145601526603e-06, "loss": 0.5141, "step": 4135 }, { "epoch": 0.51, "grad_norm": 3.4038444686476588, "learning_rate": 5.028135916728628e-06, "loss": 0.4968, "step": 4136 }, { "epoch": 0.51, "grad_norm": 1.5252270252095017, "learning_rate": 5.02612622738505e-06, "loss": 0.4903, "step": 4137 }, { "epoch": 0.51, "grad_norm": 1.8263454315055303, "learning_rate": 5.024116533820549e-06, "loss": 0.5004, "step": 4138 }, { "epoch": 0.51, "grad_norm": 2.1238635399425574, "learning_rate": 5.02210683635981e-06, "loss": 0.516, "step": 4139 }, { "epoch": 0.51, "grad_norm": 1.491279320066013, "learning_rate": 5.020097135327515e-06, "loss": 0.4862, "step": 4140 }, { "epoch": 0.51, "grad_norm": 1.3561697321845458, "learning_rate": 5.018087431048353e-06, "loss": 0.4839, "step": 4141 }, { "epoch": 0.51, "grad_norm": 1.375828041356734, "learning_rate": 5.016077723847006e-06, "loss": 0.5209, "step": 4142 }, { "epoch": 0.51, "grad_norm": 1.5082826103778804, "learning_rate": 5.0140680140481625e-06, "loss": 0.546, "step": 4143 }, { "epoch": 0.51, "grad_norm": 1.4036655538213862, "learning_rate": 5.012058301976505e-06, "loss": 0.5226, "step": 4144 }, { "epoch": 0.51, "grad_norm": 1.3933326710765965, "learning_rate": 5.010048587956724e-06, "loss": 0.4945, "step": 4145 }, { "epoch": 0.51, "grad_norm": 1.506118935176337, "learning_rate": 5.008038872313506e-06, "loss": 0.5511, "step": 4146 }, { "epoch": 0.51, "grad_norm": 1.5381169525014873, "learning_rate": 5.006029155371538e-06, "loss": 0.5581, "step": 4147 }, { "epoch": 0.51, "grad_norm": 1.2389131059981193, "learning_rate": 5.004019437455504e-06, "loss": 0.4614, "step": 4148 }, { "epoch": 0.51, "grad_norm": 1.3076621463048856, "learning_rate": 5.0020097188900965e-06, "loss": 0.5189, "step": 4149 }, { "epoch": 0.52, "grad_norm": 1.5725310274284423, "learning_rate": 5e-06, "loss": 0.469, "step": 4150 }, { "epoch": 0.52, "grad_norm": 1.4551892770544779, "learning_rate": 4.997990281109905e-06, "loss": 0.5506, "step": 4151 }, { "epoch": 0.52, "grad_norm": 1.5266131165054042, "learning_rate": 4.995980562544497e-06, "loss": 0.5118, "step": 4152 }, { "epoch": 0.52, "grad_norm": 1.4304922054207752, "learning_rate": 4.993970844628464e-06, "loss": 0.5216, "step": 4153 }, { "epoch": 0.52, "grad_norm": 1.3661929147010907, "learning_rate": 4.9919611276864956e-06, "loss": 0.4755, "step": 4154 }, { "epoch": 0.52, "grad_norm": 1.5063251153366095, "learning_rate": 4.989951412043276e-06, "loss": 0.4918, "step": 4155 }, { "epoch": 0.52, "grad_norm": 1.3266955956864537, "learning_rate": 4.987941698023495e-06, "loss": 0.5007, "step": 4156 }, { "epoch": 0.52, "grad_norm": 1.594738069327744, "learning_rate": 4.985931985951839e-06, "loss": 0.5322, "step": 4157 }, { "epoch": 0.52, "grad_norm": 1.3599941087299574, "learning_rate": 4.983922276152995e-06, "loss": 0.5543, "step": 4158 }, { "epoch": 0.52, "grad_norm": 1.6960652624546686, "learning_rate": 4.981912568951649e-06, "loss": 0.4587, "step": 4159 }, { "epoch": 0.52, "grad_norm": 1.6547482077030211, "learning_rate": 4.979902864672486e-06, "loss": 0.5651, "step": 4160 }, { "epoch": 0.52, "grad_norm": 1.4706748716930576, "learning_rate": 4.977893163640193e-06, "loss": 0.4898, "step": 4161 }, { "epoch": 0.52, "grad_norm": 1.4434440513618652, "learning_rate": 4.975883466179453e-06, "loss": 0.4831, "step": 4162 }, { "epoch": 0.52, "grad_norm": 1.9645279976236119, "learning_rate": 4.973873772614952e-06, "loss": 0.4389, "step": 4163 }, { "epoch": 0.52, "grad_norm": 0.7249626792549597, "learning_rate": 4.9718640832713725e-06, "loss": 0.5114, "step": 4164 }, { "epoch": 0.52, "grad_norm": 1.7902153666586336, "learning_rate": 4.9698543984733995e-06, "loss": 0.5163, "step": 4165 }, { "epoch": 0.52, "grad_norm": 1.784462551091202, "learning_rate": 4.967844718545713e-06, "loss": 0.4856, "step": 4166 }, { "epoch": 0.52, "grad_norm": 5.968567016437116, "learning_rate": 4.965835043812996e-06, "loss": 0.5301, "step": 4167 }, { "epoch": 0.52, "grad_norm": 1.415022168086972, "learning_rate": 4.963825374599929e-06, "loss": 0.495, "step": 4168 }, { "epoch": 0.52, "grad_norm": 1.6551021929136456, "learning_rate": 4.96181571123119e-06, "loss": 0.4626, "step": 4169 }, { "epoch": 0.52, "grad_norm": 1.2770436177440663, "learning_rate": 4.959806054031459e-06, "loss": 0.4402, "step": 4170 }, { "epoch": 0.52, "grad_norm": 1.4890483786874007, "learning_rate": 4.957796403325415e-06, "loss": 0.535, "step": 4171 }, { "epoch": 0.52, "grad_norm": 0.6292700008320747, "learning_rate": 4.955786759437733e-06, "loss": 0.4561, "step": 4172 }, { "epoch": 0.52, "grad_norm": 1.3794138507763596, "learning_rate": 4.9537771226930895e-06, "loss": 0.5358, "step": 4173 }, { "epoch": 0.52, "grad_norm": 1.2242660546252222, "learning_rate": 4.9517674934161595e-06, "loss": 0.4633, "step": 4174 }, { "epoch": 0.52, "grad_norm": 1.644565449493744, "learning_rate": 4.949757871931616e-06, "loss": 0.4738, "step": 4175 }, { "epoch": 0.52, "grad_norm": 1.5253593098849336, "learning_rate": 4.94774825856413e-06, "loss": 0.5339, "step": 4176 }, { "epoch": 0.52, "grad_norm": 1.691103468736044, "learning_rate": 4.945738653638374e-06, "loss": 0.4447, "step": 4177 }, { "epoch": 0.52, "grad_norm": 1.3440088062239994, "learning_rate": 4.943729057479016e-06, "loss": 0.5049, "step": 4178 }, { "epoch": 0.52, "grad_norm": 1.55358748969524, "learning_rate": 4.941719470410722e-06, "loss": 0.5487, "step": 4179 }, { "epoch": 0.52, "grad_norm": 1.6662063178996205, "learning_rate": 4.9397098927581625e-06, "loss": 0.5347, "step": 4180 }, { "epoch": 0.52, "grad_norm": 2.1629719586070864, "learning_rate": 4.937700324846002e-06, "loss": 0.5606, "step": 4181 }, { "epoch": 0.52, "grad_norm": 1.7344846885416572, "learning_rate": 4.935690766998902e-06, "loss": 0.5023, "step": 4182 }, { "epoch": 0.52, "grad_norm": 1.9817220234093216, "learning_rate": 4.9336812195415256e-06, "loss": 0.452, "step": 4183 }, { "epoch": 0.52, "grad_norm": 1.3397533477646433, "learning_rate": 4.931671682798532e-06, "loss": 0.4803, "step": 4184 }, { "epoch": 0.52, "grad_norm": 1.7432222684048106, "learning_rate": 4.929662157094579e-06, "loss": 0.5058, "step": 4185 }, { "epoch": 0.52, "grad_norm": 1.7210622686065686, "learning_rate": 4.9276526427543246e-06, "loss": 0.506, "step": 4186 }, { "epoch": 0.52, "grad_norm": 1.4401480942162428, "learning_rate": 4.925643140102421e-06, "loss": 0.4744, "step": 4187 }, { "epoch": 0.52, "grad_norm": 1.3887530249461608, "learning_rate": 4.9236336494635245e-06, "loss": 0.4719, "step": 4188 }, { "epoch": 0.52, "grad_norm": 1.6669286955765157, "learning_rate": 4.921624171162285e-06, "loss": 0.5595, "step": 4189 }, { "epoch": 0.52, "grad_norm": 1.3289537902911408, "learning_rate": 4.919614705523352e-06, "loss": 0.5222, "step": 4190 }, { "epoch": 0.52, "grad_norm": 0.6561483477939754, "learning_rate": 4.91760525287137e-06, "loss": 0.4604, "step": 4191 }, { "epoch": 0.52, "grad_norm": 1.2549264933188877, "learning_rate": 4.915595813530985e-06, "loss": 0.4916, "step": 4192 }, { "epoch": 0.52, "grad_norm": 1.337661073623808, "learning_rate": 4.913586387826839e-06, "loss": 0.5058, "step": 4193 }, { "epoch": 0.52, "grad_norm": 1.7660930060464695, "learning_rate": 4.911576976083574e-06, "loss": 0.4937, "step": 4194 }, { "epoch": 0.52, "grad_norm": 3.4433537947099384, "learning_rate": 4.909567578625828e-06, "loss": 0.5052, "step": 4195 }, { "epoch": 0.52, "grad_norm": 1.5505560786292858, "learning_rate": 4.907558195778233e-06, "loss": 0.4929, "step": 4196 }, { "epoch": 0.52, "grad_norm": 3.0204960000285306, "learning_rate": 4.905548827865428e-06, "loss": 0.4991, "step": 4197 }, { "epoch": 0.52, "grad_norm": 1.2185001831453082, "learning_rate": 4.903539475212042e-06, "loss": 0.4375, "step": 4198 }, { "epoch": 0.52, "grad_norm": 1.3643582538915, "learning_rate": 4.9015301381427024e-06, "loss": 0.4703, "step": 4199 }, { "epoch": 0.52, "grad_norm": 1.8839576911566032, "learning_rate": 4.899520816982038e-06, "loss": 0.5241, "step": 4200 }, { "epoch": 0.52, "grad_norm": 1.3994381417308601, "learning_rate": 4.8975115120546696e-06, "loss": 0.4646, "step": 4201 }, { "epoch": 0.52, "grad_norm": 1.495212091403681, "learning_rate": 4.895502223685219e-06, "loss": 0.5456, "step": 4202 }, { "epoch": 0.52, "grad_norm": 1.6878448599306082, "learning_rate": 4.8934929521983045e-06, "loss": 0.5805, "step": 4203 }, { "epoch": 0.52, "grad_norm": 1.4437728881272724, "learning_rate": 4.891483697918539e-06, "loss": 0.479, "step": 4204 }, { "epoch": 0.52, "grad_norm": 1.7700415243388306, "learning_rate": 4.88947446117054e-06, "loss": 0.4935, "step": 4205 }, { "epoch": 0.52, "grad_norm": 2.5026598746843285, "learning_rate": 4.887465242278915e-06, "loss": 0.5104, "step": 4206 }, { "epoch": 0.52, "grad_norm": 1.2791913508237092, "learning_rate": 4.885456041568272e-06, "loss": 0.4952, "step": 4207 }, { "epoch": 0.52, "grad_norm": 1.3321160626968813, "learning_rate": 4.883446859363215e-06, "loss": 0.5013, "step": 4208 }, { "epoch": 0.52, "grad_norm": 0.6531483540609091, "learning_rate": 4.881437695988344e-06, "loss": 0.5052, "step": 4209 }, { "epoch": 0.52, "grad_norm": 1.8734086791695874, "learning_rate": 4.8794285517682565e-06, "loss": 0.5431, "step": 4210 }, { "epoch": 0.52, "grad_norm": 1.6352629459090195, "learning_rate": 4.877419427027548e-06, "loss": 0.4828, "step": 4211 }, { "epoch": 0.52, "grad_norm": 1.498449057211703, "learning_rate": 4.87541032209081e-06, "loss": 0.5139, "step": 4212 }, { "epoch": 0.52, "grad_norm": 1.4920133056165215, "learning_rate": 4.873401237282634e-06, "loss": 0.5033, "step": 4213 }, { "epoch": 0.52, "grad_norm": 1.4502112098248143, "learning_rate": 4.8713921729276015e-06, "loss": 0.5103, "step": 4214 }, { "epoch": 0.52, "grad_norm": 0.6614611334638816, "learning_rate": 4.869383129350297e-06, "loss": 0.4647, "step": 4215 }, { "epoch": 0.52, "grad_norm": 0.7022236557980238, "learning_rate": 4.867374106875298e-06, "loss": 0.4935, "step": 4216 }, { "epoch": 0.52, "grad_norm": 1.7900496633745702, "learning_rate": 4.86536510582718e-06, "loss": 0.5193, "step": 4217 }, { "epoch": 0.52, "grad_norm": 1.3757970981568506, "learning_rate": 4.8633561265305156e-06, "loss": 0.4933, "step": 4218 }, { "epoch": 0.52, "grad_norm": 1.868217808906103, "learning_rate": 4.8613471693098724e-06, "loss": 0.5326, "step": 4219 }, { "epoch": 0.52, "grad_norm": 1.2677455892833875, "learning_rate": 4.859338234489813e-06, "loss": 0.5134, "step": 4220 }, { "epoch": 0.52, "grad_norm": 1.3549139939818315, "learning_rate": 4.857329322394902e-06, "loss": 0.5593, "step": 4221 }, { "epoch": 0.52, "grad_norm": 1.7574557695669282, "learning_rate": 4.8553204333496965e-06, "loss": 0.5168, "step": 4222 }, { "epoch": 0.52, "grad_norm": 1.4457527589125292, "learning_rate": 4.853311567678748e-06, "loss": 0.5191, "step": 4223 }, { "epoch": 0.52, "grad_norm": 1.4256473720212115, "learning_rate": 4.8513027257066085e-06, "loss": 0.4653, "step": 4224 }, { "epoch": 0.52, "grad_norm": 1.6287755213141875, "learning_rate": 4.849293907757823e-06, "loss": 0.5237, "step": 4225 }, { "epoch": 0.52, "grad_norm": 1.655830538154988, "learning_rate": 4.847285114156934e-06, "loss": 0.5446, "step": 4226 }, { "epoch": 0.52, "grad_norm": 2.4664096866038583, "learning_rate": 4.84527634522848e-06, "loss": 0.5061, "step": 4227 }, { "epoch": 0.52, "grad_norm": 1.6320403680583409, "learning_rate": 4.843267601296994e-06, "loss": 0.5454, "step": 4228 }, { "epoch": 0.52, "grad_norm": 2.450134674129634, "learning_rate": 4.8412588826870075e-06, "loss": 0.5739, "step": 4229 }, { "epoch": 0.52, "grad_norm": 0.6707231135387525, "learning_rate": 4.839250189723048e-06, "loss": 0.5417, "step": 4230 }, { "epoch": 0.53, "grad_norm": 2.1014948435593066, "learning_rate": 4.8372415227296355e-06, "loss": 0.4824, "step": 4231 }, { "epoch": 0.53, "grad_norm": 1.5850577118025315, "learning_rate": 4.835232882031288e-06, "loss": 0.5486, "step": 4232 }, { "epoch": 0.53, "grad_norm": 1.363920654716586, "learning_rate": 4.83322426795252e-06, "loss": 0.5245, "step": 4233 }, { "epoch": 0.53, "grad_norm": 1.2817202214537684, "learning_rate": 4.8312156808178405e-06, "loss": 0.5268, "step": 4234 }, { "epoch": 0.53, "grad_norm": 1.5467511661175979, "learning_rate": 4.829207120951754e-06, "loss": 0.4896, "step": 4235 }, { "epoch": 0.53, "grad_norm": 1.147514907824803, "learning_rate": 4.827198588678761e-06, "loss": 0.4676, "step": 4236 }, { "epoch": 0.53, "grad_norm": 1.349527074343018, "learning_rate": 4.825190084323358e-06, "loss": 0.5278, "step": 4237 }, { "epoch": 0.53, "grad_norm": 1.2386225759661265, "learning_rate": 4.823181608210036e-06, "loss": 0.4553, "step": 4238 }, { "epoch": 0.53, "grad_norm": 1.3773870018897933, "learning_rate": 4.821173160663284e-06, "loss": 0.4915, "step": 4239 }, { "epoch": 0.53, "grad_norm": 1.337943712069457, "learning_rate": 4.819164742007582e-06, "loss": 0.4422, "step": 4240 }, { "epoch": 0.53, "grad_norm": 2.50843833816639, "learning_rate": 4.817156352567409e-06, "loss": 0.4631, "step": 4241 }, { "epoch": 0.53, "grad_norm": 1.888360839786836, "learning_rate": 4.815147992667237e-06, "loss": 0.4285, "step": 4242 }, { "epoch": 0.53, "grad_norm": 1.4492756322046154, "learning_rate": 4.813139662631535e-06, "loss": 0.5241, "step": 4243 }, { "epoch": 0.53, "grad_norm": 1.597220166897354, "learning_rate": 4.811131362784766e-06, "loss": 0.4924, "step": 4244 }, { "epoch": 0.53, "grad_norm": 1.6341422036164526, "learning_rate": 4.809123093451388e-06, "loss": 0.4793, "step": 4245 }, { "epoch": 0.53, "grad_norm": 1.381380178994785, "learning_rate": 4.807114854955856e-06, "loss": 0.5316, "step": 4246 }, { "epoch": 0.53, "grad_norm": 0.6357430017650092, "learning_rate": 4.805106647622619e-06, "loss": 0.5142, "step": 4247 }, { "epoch": 0.53, "grad_norm": 1.323250740425993, "learning_rate": 4.803098471776119e-06, "loss": 0.5756, "step": 4248 }, { "epoch": 0.53, "grad_norm": 1.2960946149258648, "learning_rate": 4.801090327740795e-06, "loss": 0.517, "step": 4249 }, { "epoch": 0.53, "grad_norm": 1.3783769541175654, "learning_rate": 4.799082215841081e-06, "loss": 0.4731, "step": 4250 }, { "epoch": 0.53, "grad_norm": 1.4238922609555158, "learning_rate": 4.797074136401403e-06, "loss": 0.5305, "step": 4251 }, { "epoch": 0.53, "grad_norm": 2.3187213899984527, "learning_rate": 4.795066089746187e-06, "loss": 0.5133, "step": 4252 }, { "epoch": 0.53, "grad_norm": 1.5319986153504062, "learning_rate": 4.793058076199847e-06, "loss": 0.4949, "step": 4253 }, { "epoch": 0.53, "grad_norm": 1.3313211380770655, "learning_rate": 4.791050096086799e-06, "loss": 0.5175, "step": 4254 }, { "epoch": 0.53, "grad_norm": 1.661121794113625, "learning_rate": 4.789042149731448e-06, "loss": 0.5314, "step": 4255 }, { "epoch": 0.53, "grad_norm": 1.8520098290199518, "learning_rate": 4.787034237458195e-06, "loss": 0.5665, "step": 4256 }, { "epoch": 0.53, "grad_norm": 1.4383615798536171, "learning_rate": 4.785026359591438e-06, "loss": 0.4781, "step": 4257 }, { "epoch": 0.53, "grad_norm": 2.36015549004612, "learning_rate": 4.783018516455565e-06, "loss": 0.5016, "step": 4258 }, { "epoch": 0.53, "grad_norm": 2.8240998590538453, "learning_rate": 4.781010708374963e-06, "loss": 0.5126, "step": 4259 }, { "epoch": 0.53, "grad_norm": 1.4355718796301644, "learning_rate": 4.779002935674008e-06, "loss": 0.5586, "step": 4260 }, { "epoch": 0.53, "grad_norm": 0.7266807431996253, "learning_rate": 4.776995198677075e-06, "loss": 0.5162, "step": 4261 }, { "epoch": 0.53, "grad_norm": 1.2775623472876219, "learning_rate": 4.774987497708533e-06, "loss": 0.4685, "step": 4262 }, { "epoch": 0.53, "grad_norm": 2.307364784532959, "learning_rate": 4.7729798330927415e-06, "loss": 0.4875, "step": 4263 }, { "epoch": 0.53, "grad_norm": 1.393824074495462, "learning_rate": 4.770972205154058e-06, "loss": 0.4782, "step": 4264 }, { "epoch": 0.53, "grad_norm": 1.7554410138205054, "learning_rate": 4.768964614216831e-06, "loss": 0.4668, "step": 4265 }, { "epoch": 0.53, "grad_norm": 2.3652602730204615, "learning_rate": 4.7669570606054066e-06, "loss": 0.5051, "step": 4266 }, { "epoch": 0.53, "grad_norm": 1.4637991891015811, "learning_rate": 4.764949544644121e-06, "loss": 0.5407, "step": 4267 }, { "epoch": 0.53, "grad_norm": 1.2055380948074947, "learning_rate": 4.7629420666573065e-06, "loss": 0.4802, "step": 4268 }, { "epoch": 0.53, "grad_norm": 1.3646970283412756, "learning_rate": 4.760934626969289e-06, "loss": 0.464, "step": 4269 }, { "epoch": 0.53, "grad_norm": 1.2475512160334674, "learning_rate": 4.7589272259043875e-06, "loss": 0.4824, "step": 4270 }, { "epoch": 0.53, "grad_norm": 1.382321088190782, "learning_rate": 4.756919863786916e-06, "loss": 0.4819, "step": 4271 }, { "epoch": 0.53, "grad_norm": 1.2062445168984832, "learning_rate": 4.754912540941182e-06, "loss": 0.5103, "step": 4272 }, { "epoch": 0.53, "grad_norm": 1.3685375269870788, "learning_rate": 4.752905257691485e-06, "loss": 0.4959, "step": 4273 }, { "epoch": 0.53, "grad_norm": 0.6638309922683546, "learning_rate": 4.7508980143621205e-06, "loss": 0.4928, "step": 4274 }, { "epoch": 0.53, "grad_norm": 1.4235012573773838, "learning_rate": 4.7488908112773755e-06, "loss": 0.4925, "step": 4275 }, { "epoch": 0.53, "grad_norm": 1.6268177268925723, "learning_rate": 4.746883648761531e-06, "loss": 0.4982, "step": 4276 }, { "epoch": 0.53, "grad_norm": 1.4566520518954882, "learning_rate": 4.744876527138863e-06, "loss": 0.5318, "step": 4277 }, { "epoch": 0.53, "grad_norm": 1.4686807392611485, "learning_rate": 4.742869446733636e-06, "loss": 0.4866, "step": 4278 }, { "epoch": 0.53, "grad_norm": 1.3052234583127929, "learning_rate": 4.740862407870118e-06, "loss": 0.5233, "step": 4279 }, { "epoch": 0.53, "grad_norm": 1.4928138119168188, "learning_rate": 4.7388554108725594e-06, "loss": 0.4965, "step": 4280 }, { "epoch": 0.53, "grad_norm": 1.3119229364651537, "learning_rate": 4.7368484560652085e-06, "loss": 0.4925, "step": 4281 }, { "epoch": 0.53, "grad_norm": 1.8819516435959767, "learning_rate": 4.734841543772308e-06, "loss": 0.5072, "step": 4282 }, { "epoch": 0.53, "grad_norm": 1.3552684872325136, "learning_rate": 4.732834674318091e-06, "loss": 0.4941, "step": 4283 }, { "epoch": 0.53, "grad_norm": 1.502677241539126, "learning_rate": 4.7308278480267865e-06, "loss": 0.4369, "step": 4284 }, { "epoch": 0.53, "grad_norm": 1.28365550736728, "learning_rate": 4.728821065222612e-06, "loss": 0.4956, "step": 4285 }, { "epoch": 0.53, "grad_norm": 1.4078836243131156, "learning_rate": 4.726814326229781e-06, "loss": 0.4885, "step": 4286 }, { "epoch": 0.53, "grad_norm": 1.3401141295793377, "learning_rate": 4.724807631372505e-06, "loss": 0.4562, "step": 4287 }, { "epoch": 0.53, "grad_norm": 1.6390815902227274, "learning_rate": 4.722800980974979e-06, "loss": 0.4953, "step": 4288 }, { "epoch": 0.53, "grad_norm": 1.6719051916157315, "learning_rate": 4.720794375361397e-06, "loss": 0.4831, "step": 4289 }, { "epoch": 0.53, "grad_norm": 1.3668822128904665, "learning_rate": 4.718787814855942e-06, "loss": 0.509, "step": 4290 }, { "epoch": 0.53, "grad_norm": 1.2587487927128795, "learning_rate": 4.716781299782791e-06, "loss": 0.4483, "step": 4291 }, { "epoch": 0.53, "grad_norm": 1.4839484448291642, "learning_rate": 4.714774830466116e-06, "loss": 0.5056, "step": 4292 }, { "epoch": 0.53, "grad_norm": 0.6584679793549012, "learning_rate": 4.71276840723008e-06, "loss": 0.5071, "step": 4293 }, { "epoch": 0.53, "grad_norm": 1.4562233356371959, "learning_rate": 4.7107620303988335e-06, "loss": 0.5153, "step": 4294 }, { "epoch": 0.53, "grad_norm": 1.2737569846521757, "learning_rate": 4.708755700296532e-06, "loss": 0.4427, "step": 4295 }, { "epoch": 0.53, "grad_norm": 1.3727841747162295, "learning_rate": 4.706749417247312e-06, "loss": 0.4871, "step": 4296 }, { "epoch": 0.53, "grad_norm": 1.5004984388484188, "learning_rate": 4.704743181575306e-06, "loss": 0.4623, "step": 4297 }, { "epoch": 0.53, "grad_norm": 1.4457080763319423, "learning_rate": 4.7027369936046415e-06, "loss": 0.4888, "step": 4298 }, { "epoch": 0.53, "grad_norm": 1.476081348114596, "learning_rate": 4.700730853659432e-06, "loss": 0.4998, "step": 4299 }, { "epoch": 0.53, "grad_norm": 1.3457600490722774, "learning_rate": 4.698724762063789e-06, "loss": 0.4999, "step": 4300 }, { "epoch": 0.53, "grad_norm": 1.5710943313782562, "learning_rate": 4.696718719141813e-06, "loss": 0.5094, "step": 4301 }, { "epoch": 0.53, "grad_norm": 2.023508681986042, "learning_rate": 4.694712725217598e-06, "loss": 0.5668, "step": 4302 }, { "epoch": 0.53, "grad_norm": 1.6785659078411073, "learning_rate": 4.692706780615232e-06, "loss": 0.5215, "step": 4303 }, { "epoch": 0.53, "grad_norm": 1.3301372259571647, "learning_rate": 4.690700885658793e-06, "loss": 0.4892, "step": 4304 }, { "epoch": 0.53, "grad_norm": 1.942794004902333, "learning_rate": 4.68869504067235e-06, "loss": 0.5059, "step": 4305 }, { "epoch": 0.53, "grad_norm": 1.3480045867939938, "learning_rate": 4.686689245979965e-06, "loss": 0.5091, "step": 4306 }, { "epoch": 0.53, "grad_norm": 1.4420165452073062, "learning_rate": 4.68468350190569e-06, "loss": 0.48, "step": 4307 }, { "epoch": 0.53, "grad_norm": 1.4199701993205618, "learning_rate": 4.682677808773576e-06, "loss": 0.5201, "step": 4308 }, { "epoch": 0.53, "grad_norm": 1.2548107192351894, "learning_rate": 4.680672166907654e-06, "loss": 0.4661, "step": 4309 }, { "epoch": 0.53, "grad_norm": 1.5109225918516362, "learning_rate": 4.678666576631956e-06, "loss": 0.5373, "step": 4310 }, { "epoch": 0.53, "grad_norm": 1.2714281680666961, "learning_rate": 4.676661038270501e-06, "loss": 0.5001, "step": 4311 }, { "epoch": 0.54, "grad_norm": 1.3277386522117836, "learning_rate": 4.674655552147305e-06, "loss": 0.5494, "step": 4312 }, { "epoch": 0.54, "grad_norm": 1.4401191668265765, "learning_rate": 4.6726501185863694e-06, "loss": 0.4942, "step": 4313 }, { "epoch": 0.54, "grad_norm": 1.3129932472587458, "learning_rate": 4.67064473791169e-06, "loss": 0.5189, "step": 4314 }, { "epoch": 0.54, "grad_norm": 1.5058725829686856, "learning_rate": 4.668639410447255e-06, "loss": 0.4832, "step": 4315 }, { "epoch": 0.54, "grad_norm": 1.5243590945691707, "learning_rate": 4.666634136517041e-06, "loss": 0.4935, "step": 4316 }, { "epoch": 0.54, "grad_norm": 2.614027731511778, "learning_rate": 4.664628916445018e-06, "loss": 0.547, "step": 4317 }, { "epoch": 0.54, "grad_norm": 1.7247810144381126, "learning_rate": 4.662623750555149e-06, "loss": 0.5399, "step": 4318 }, { "epoch": 0.54, "grad_norm": 1.5402121533125563, "learning_rate": 4.6606186391713805e-06, "loss": 0.4774, "step": 4319 }, { "epoch": 0.54, "grad_norm": 1.282588310149704, "learning_rate": 4.6586135826176625e-06, "loss": 0.533, "step": 4320 }, { "epoch": 0.54, "grad_norm": 2.6803085872164045, "learning_rate": 4.6566085812179265e-06, "loss": 0.5164, "step": 4321 }, { "epoch": 0.54, "grad_norm": 1.3154255919996705, "learning_rate": 4.654603635296098e-06, "loss": 0.5056, "step": 4322 }, { "epoch": 0.54, "grad_norm": 1.266041772416684, "learning_rate": 4.652598745176095e-06, "loss": 0.4811, "step": 4323 }, { "epoch": 0.54, "grad_norm": 1.3636206626837686, "learning_rate": 4.6505939111818246e-06, "loss": 0.4925, "step": 4324 }, { "epoch": 0.54, "grad_norm": 1.478486601519347, "learning_rate": 4.648589133637185e-06, "loss": 0.4737, "step": 4325 }, { "epoch": 0.54, "grad_norm": 2.5515659815698486, "learning_rate": 4.646584412866065e-06, "loss": 0.4896, "step": 4326 }, { "epoch": 0.54, "grad_norm": 1.2139131098187366, "learning_rate": 4.644579749192346e-06, "loss": 0.4533, "step": 4327 }, { "epoch": 0.54, "grad_norm": 1.3647906625218897, "learning_rate": 4.642575142939898e-06, "loss": 0.5035, "step": 4328 }, { "epoch": 0.54, "grad_norm": 1.2970981149774912, "learning_rate": 4.640570594432586e-06, "loss": 0.5153, "step": 4329 }, { "epoch": 0.54, "grad_norm": 1.4040798471253462, "learning_rate": 4.638566103994258e-06, "loss": 0.552, "step": 4330 }, { "epoch": 0.54, "grad_norm": 2.6276214042065345, "learning_rate": 4.63656167194876e-06, "loss": 0.501, "step": 4331 }, { "epoch": 0.54, "grad_norm": 1.5530490248697577, "learning_rate": 4.634557298619924e-06, "loss": 0.5374, "step": 4332 }, { "epoch": 0.54, "grad_norm": 1.3407778756048003, "learning_rate": 4.632552984331576e-06, "loss": 0.4578, "step": 4333 }, { "epoch": 0.54, "grad_norm": 1.4791583074513865, "learning_rate": 4.630548729407529e-06, "loss": 0.5266, "step": 4334 }, { "epoch": 0.54, "grad_norm": 1.4249788214778287, "learning_rate": 4.6285445341715875e-06, "loss": 0.4824, "step": 4335 }, { "epoch": 0.54, "grad_norm": 1.2562567504953168, "learning_rate": 4.626540398947549e-06, "loss": 0.4949, "step": 4336 }, { "epoch": 0.54, "grad_norm": 1.321673506920769, "learning_rate": 4.624536324059199e-06, "loss": 0.51, "step": 4337 }, { "epoch": 0.54, "grad_norm": 1.7009338899251323, "learning_rate": 4.622532309830312e-06, "loss": 0.5161, "step": 4338 }, { "epoch": 0.54, "grad_norm": 1.3804748517386487, "learning_rate": 4.620528356584655e-06, "loss": 0.5386, "step": 4339 }, { "epoch": 0.54, "grad_norm": 1.2710254221528257, "learning_rate": 4.6185244646459835e-06, "loss": 0.4611, "step": 4340 }, { "epoch": 0.54, "grad_norm": 1.8608849766501319, "learning_rate": 4.616520634338045e-06, "loss": 0.471, "step": 4341 }, { "epoch": 0.54, "grad_norm": 0.6798704097529361, "learning_rate": 4.614516865984575e-06, "loss": 0.4846, "step": 4342 }, { "epoch": 0.54, "grad_norm": 2.1922263872791192, "learning_rate": 4.6125131599092995e-06, "loss": 0.4922, "step": 4343 }, { "epoch": 0.54, "grad_norm": 1.409966446941975, "learning_rate": 4.610509516435937e-06, "loss": 0.542, "step": 4344 }, { "epoch": 0.54, "grad_norm": 1.2967390492106146, "learning_rate": 4.608505935888192e-06, "loss": 0.5277, "step": 4345 }, { "epoch": 0.54, "grad_norm": 1.4520281467958223, "learning_rate": 4.606502418589762e-06, "loss": 0.4888, "step": 4346 }, { "epoch": 0.54, "grad_norm": 1.5444683940931594, "learning_rate": 4.604498964864331e-06, "loss": 0.5162, "step": 4347 }, { "epoch": 0.54, "grad_norm": 1.4585220753140886, "learning_rate": 4.6024955750355755e-06, "loss": 0.4734, "step": 4348 }, { "epoch": 0.54, "grad_norm": 2.054117529489168, "learning_rate": 4.600492249427161e-06, "loss": 0.5057, "step": 4349 }, { "epoch": 0.54, "grad_norm": 1.7080318860903294, "learning_rate": 4.598488988362742e-06, "loss": 0.4853, "step": 4350 }, { "epoch": 0.54, "grad_norm": 1.5706453071719417, "learning_rate": 4.5964857921659635e-06, "loss": 0.5186, "step": 4351 }, { "epoch": 0.54, "grad_norm": 1.3140820055921787, "learning_rate": 4.594482661160458e-06, "loss": 0.5169, "step": 4352 }, { "epoch": 0.54, "grad_norm": 1.2680956481661165, "learning_rate": 4.59247959566985e-06, "loss": 0.4936, "step": 4353 }, { "epoch": 0.54, "grad_norm": 1.3409682132801277, "learning_rate": 4.5904765960177535e-06, "loss": 0.5418, "step": 4354 }, { "epoch": 0.54, "grad_norm": 1.310893595658717, "learning_rate": 4.58847366252777e-06, "loss": 0.495, "step": 4355 }, { "epoch": 0.54, "grad_norm": 1.5131214786662126, "learning_rate": 4.58647079552349e-06, "loss": 0.4563, "step": 4356 }, { "epoch": 0.54, "grad_norm": 1.826228307832778, "learning_rate": 4.5844679953284946e-06, "loss": 0.5028, "step": 4357 }, { "epoch": 0.54, "grad_norm": 1.320033672212789, "learning_rate": 4.582465262266355e-06, "loss": 0.4299, "step": 4358 }, { "epoch": 0.54, "grad_norm": 1.5183627776360842, "learning_rate": 4.58046259666063e-06, "loss": 0.4833, "step": 4359 }, { "epoch": 0.54, "grad_norm": 1.4938905628289831, "learning_rate": 4.5784599988348656e-06, "loss": 0.5108, "step": 4360 }, { "epoch": 0.54, "grad_norm": 1.6296818643660147, "learning_rate": 4.576457469112602e-06, "loss": 0.5559, "step": 4361 }, { "epoch": 0.54, "grad_norm": 1.7693617181056263, "learning_rate": 4.574455007817365e-06, "loss": 0.5516, "step": 4362 }, { "epoch": 0.54, "grad_norm": 1.3295911623522392, "learning_rate": 4.57245261527267e-06, "loss": 0.522, "step": 4363 }, { "epoch": 0.54, "grad_norm": 1.5954330240640078, "learning_rate": 4.570450291802019e-06, "loss": 0.5147, "step": 4364 }, { "epoch": 0.54, "grad_norm": 1.7853840557710536, "learning_rate": 4.568448037728907e-06, "loss": 0.5434, "step": 4365 }, { "epoch": 0.54, "grad_norm": 1.4879695205979435, "learning_rate": 4.5664458533768155e-06, "loss": 0.4781, "step": 4366 }, { "epoch": 0.54, "grad_norm": 1.4936270775797755, "learning_rate": 4.564443739069215e-06, "loss": 0.5157, "step": 4367 }, { "epoch": 0.54, "grad_norm": 1.710118172072937, "learning_rate": 4.562441695129563e-06, "loss": 0.4956, "step": 4368 }, { "epoch": 0.54, "grad_norm": 2.479920114691241, "learning_rate": 4.56043972188131e-06, "loss": 0.5305, "step": 4369 }, { "epoch": 0.54, "grad_norm": 1.4216813600328682, "learning_rate": 4.558437819647892e-06, "loss": 0.519, "step": 4370 }, { "epoch": 0.54, "grad_norm": 0.6207893049711012, "learning_rate": 4.556435988752732e-06, "loss": 0.4515, "step": 4371 }, { "epoch": 0.54, "grad_norm": 1.7293017674348017, "learning_rate": 4.554434229519244e-06, "loss": 0.5257, "step": 4372 }, { "epoch": 0.54, "grad_norm": 1.4846186532957242, "learning_rate": 4.552432542270832e-06, "loss": 0.4847, "step": 4373 }, { "epoch": 0.54, "grad_norm": 1.4686377152136705, "learning_rate": 4.550430927330885e-06, "loss": 0.4727, "step": 4374 }, { "epoch": 0.54, "grad_norm": 2.0718575292030823, "learning_rate": 4.54842938502278e-06, "loss": 0.5046, "step": 4375 }, { "epoch": 0.54, "grad_norm": 1.5944676215700275, "learning_rate": 4.546427915669882e-06, "loss": 0.4735, "step": 4376 }, { "epoch": 0.54, "grad_norm": 1.4081743207529693, "learning_rate": 4.5444265195955525e-06, "loss": 0.5041, "step": 4377 }, { "epoch": 0.54, "grad_norm": 1.484577775781484, "learning_rate": 4.542425197123131e-06, "loss": 0.539, "step": 4378 }, { "epoch": 0.54, "grad_norm": 1.6249081818741813, "learning_rate": 4.540423948575949e-06, "loss": 0.4749, "step": 4379 }, { "epoch": 0.54, "grad_norm": 1.2982887830474357, "learning_rate": 4.538422774277325e-06, "loss": 0.5252, "step": 4380 }, { "epoch": 0.54, "grad_norm": 1.3766822741656382, "learning_rate": 4.536421674550567e-06, "loss": 0.5047, "step": 4381 }, { "epoch": 0.54, "grad_norm": 1.406652765077386, "learning_rate": 4.534420649718972e-06, "loss": 0.4652, "step": 4382 }, { "epoch": 0.54, "grad_norm": 0.6791428428602214, "learning_rate": 4.532419700105819e-06, "loss": 0.5047, "step": 4383 }, { "epoch": 0.54, "grad_norm": 1.9436755327984607, "learning_rate": 4.53041882603438e-06, "loss": 0.4829, "step": 4384 }, { "epoch": 0.54, "grad_norm": 1.5698938749165816, "learning_rate": 4.528418027827918e-06, "loss": 0.4655, "step": 4385 }, { "epoch": 0.54, "grad_norm": 1.7356508093213097, "learning_rate": 4.526417305809677e-06, "loss": 0.5052, "step": 4386 }, { "epoch": 0.54, "grad_norm": 1.174600104406179, "learning_rate": 4.5244166603028915e-06, "loss": 0.4722, "step": 4387 }, { "epoch": 0.54, "grad_norm": 1.5507075320836625, "learning_rate": 4.522416091630784e-06, "loss": 0.4298, "step": 4388 }, { "epoch": 0.54, "grad_norm": 1.771951957318884, "learning_rate": 4.520415600116561e-06, "loss": 0.4666, "step": 4389 }, { "epoch": 0.54, "grad_norm": 1.4399449863102074, "learning_rate": 4.518415186083422e-06, "loss": 0.4913, "step": 4390 }, { "epoch": 0.54, "grad_norm": 1.3278433017347138, "learning_rate": 4.516414849854552e-06, "loss": 0.5082, "step": 4391 }, { "epoch": 0.55, "grad_norm": 1.4678291151658778, "learning_rate": 4.514414591753121e-06, "loss": 0.5698, "step": 4392 }, { "epoch": 0.55, "grad_norm": 1.5435085395494517, "learning_rate": 4.512414412102288e-06, "loss": 0.5142, "step": 4393 }, { "epoch": 0.55, "grad_norm": 1.556647391808432, "learning_rate": 4.510414311225203e-06, "loss": 0.4913, "step": 4394 }, { "epoch": 0.55, "grad_norm": 1.6916292073049486, "learning_rate": 4.508414289444998e-06, "loss": 0.4798, "step": 4395 }, { "epoch": 0.55, "grad_norm": 1.6692120312743866, "learning_rate": 4.506414347084793e-06, "loss": 0.5023, "step": 4396 }, { "epoch": 0.55, "grad_norm": 1.604330736759279, "learning_rate": 4.5044144844676995e-06, "loss": 0.4997, "step": 4397 }, { "epoch": 0.55, "grad_norm": 2.118799793257604, "learning_rate": 4.50241470191681e-06, "loss": 0.5001, "step": 4398 }, { "epoch": 0.55, "grad_norm": 1.3528728363724072, "learning_rate": 4.500414999755207e-06, "loss": 0.4631, "step": 4399 }, { "epoch": 0.55, "grad_norm": 1.5128477685762356, "learning_rate": 4.498415378305961e-06, "loss": 0.483, "step": 4400 }, { "epoch": 0.55, "grad_norm": 1.454443965715015, "learning_rate": 4.496415837892125e-06, "loss": 0.4791, "step": 4401 }, { "epoch": 0.55, "grad_norm": 1.3334002112817895, "learning_rate": 4.494416378836749e-06, "loss": 0.4398, "step": 4402 }, { "epoch": 0.55, "grad_norm": 1.7095351293567198, "learning_rate": 4.492417001462859e-06, "loss": 0.5341, "step": 4403 }, { "epoch": 0.55, "grad_norm": 1.562988755978761, "learning_rate": 4.490417706093473e-06, "loss": 0.5465, "step": 4404 }, { "epoch": 0.55, "grad_norm": 1.4290886758069221, "learning_rate": 4.4884184930515955e-06, "loss": 0.4642, "step": 4405 }, { "epoch": 0.55, "grad_norm": 0.65549340328514, "learning_rate": 4.486419362660214e-06, "loss": 0.4921, "step": 4406 }, { "epoch": 0.55, "grad_norm": 1.6983782171234239, "learning_rate": 4.484420315242311e-06, "loss": 0.48, "step": 4407 }, { "epoch": 0.55, "grad_norm": 1.6234618539161976, "learning_rate": 4.482421351120845e-06, "loss": 0.5449, "step": 4408 }, { "epoch": 0.55, "grad_norm": 1.6357978088500176, "learning_rate": 4.480422470618766e-06, "loss": 0.5332, "step": 4409 }, { "epoch": 0.55, "grad_norm": 1.9663873905647158, "learning_rate": 4.478423674059015e-06, "loss": 0.5126, "step": 4410 }, { "epoch": 0.55, "grad_norm": 1.4502008959206971, "learning_rate": 4.476424961764513e-06, "loss": 0.5099, "step": 4411 }, { "epoch": 0.55, "grad_norm": 1.4117212277671094, "learning_rate": 4.47442633405817e-06, "loss": 0.4995, "step": 4412 }, { "epoch": 0.55, "grad_norm": 1.387171017364631, "learning_rate": 4.472427791262881e-06, "loss": 0.4811, "step": 4413 }, { "epoch": 0.55, "grad_norm": 1.722836592836977, "learning_rate": 4.470429333701529e-06, "loss": 0.4993, "step": 4414 }, { "epoch": 0.55, "grad_norm": 2.046809463793669, "learning_rate": 4.468430961696982e-06, "loss": 0.5041, "step": 4415 }, { "epoch": 0.55, "grad_norm": 1.4247780429245809, "learning_rate": 4.466432675572096e-06, "loss": 0.4771, "step": 4416 }, { "epoch": 0.55, "grad_norm": 1.4185856211803107, "learning_rate": 4.464434475649708e-06, "loss": 0.5237, "step": 4417 }, { "epoch": 0.55, "grad_norm": 11.468863603545287, "learning_rate": 4.462436362252648e-06, "loss": 0.5028, "step": 4418 }, { "epoch": 0.55, "grad_norm": 1.5714487313977783, "learning_rate": 4.46043833570373e-06, "loss": 0.5409, "step": 4419 }, { "epoch": 0.55, "grad_norm": 1.5369611189212409, "learning_rate": 4.4584403963257485e-06, "loss": 0.4821, "step": 4420 }, { "epoch": 0.55, "grad_norm": 1.5062598221616934, "learning_rate": 4.456442544441493e-06, "loss": 0.4762, "step": 4421 }, { "epoch": 0.55, "grad_norm": 1.8761573257549888, "learning_rate": 4.45444478037373e-06, "loss": 0.5218, "step": 4422 }, { "epoch": 0.55, "grad_norm": 1.780866183894427, "learning_rate": 4.452447104445218e-06, "loss": 0.4763, "step": 4423 }, { "epoch": 0.55, "grad_norm": 2.0530249281707036, "learning_rate": 4.450449516978699e-06, "loss": 0.507, "step": 4424 }, { "epoch": 0.55, "grad_norm": 1.357116791630275, "learning_rate": 4.4484520182969e-06, "loss": 0.5084, "step": 4425 }, { "epoch": 0.55, "grad_norm": 2.4447708016997076, "learning_rate": 4.4464546087225346e-06, "loss": 0.4825, "step": 4426 }, { "epoch": 0.55, "grad_norm": 1.4769381978965075, "learning_rate": 4.444457288578303e-06, "loss": 0.4954, "step": 4427 }, { "epoch": 0.55, "grad_norm": 1.6761864909530553, "learning_rate": 4.44246005818689e-06, "loss": 0.4857, "step": 4428 }, { "epoch": 0.55, "grad_norm": 1.4748459109991388, "learning_rate": 4.440462917870964e-06, "loss": 0.5239, "step": 4429 }, { "epoch": 0.55, "grad_norm": 1.3350226162503687, "learning_rate": 4.438465867953182e-06, "loss": 0.5043, "step": 4430 }, { "epoch": 0.55, "grad_norm": 1.80295600244093, "learning_rate": 4.4364689087561845e-06, "loss": 0.5169, "step": 4431 }, { "epoch": 0.55, "grad_norm": 1.334114055897355, "learning_rate": 4.434472040602599e-06, "loss": 0.5331, "step": 4432 }, { "epoch": 0.55, "grad_norm": 1.5164473111659773, "learning_rate": 4.432475263815035e-06, "loss": 0.4775, "step": 4433 }, { "epoch": 0.55, "grad_norm": 1.3005993439552876, "learning_rate": 4.430478578716089e-06, "loss": 0.4822, "step": 4434 }, { "epoch": 0.55, "grad_norm": 1.474955584683926, "learning_rate": 4.428481985628345e-06, "loss": 0.5167, "step": 4435 }, { "epoch": 0.55, "grad_norm": 1.4789894800140106, "learning_rate": 4.426485484874371e-06, "loss": 0.4987, "step": 4436 }, { "epoch": 0.55, "grad_norm": 1.3903653813140513, "learning_rate": 4.424489076776718e-06, "loss": 0.5015, "step": 4437 }, { "epoch": 0.55, "grad_norm": 1.3625637879056767, "learning_rate": 4.422492761657923e-06, "loss": 0.4851, "step": 4438 }, { "epoch": 0.55, "grad_norm": 1.4422805656413178, "learning_rate": 4.420496539840509e-06, "loss": 0.4535, "step": 4439 }, { "epoch": 0.55, "grad_norm": 1.505004277895124, "learning_rate": 4.4185004116469824e-06, "loss": 0.4876, "step": 4440 }, { "epoch": 0.55, "grad_norm": 1.293316550344579, "learning_rate": 4.416504377399835e-06, "loss": 0.4517, "step": 4441 }, { "epoch": 0.55, "grad_norm": 1.4825354853263424, "learning_rate": 4.414508437421544e-06, "loss": 0.5174, "step": 4442 }, { "epoch": 0.55, "grad_norm": 1.3531475040207626, "learning_rate": 4.412512592034572e-06, "loss": 0.5488, "step": 4443 }, { "epoch": 0.55, "grad_norm": 1.3345359208586365, "learning_rate": 4.410516841561366e-06, "loss": 0.5341, "step": 4444 }, { "epoch": 0.55, "grad_norm": 1.7683739150605549, "learning_rate": 4.408521186324356e-06, "loss": 0.4505, "step": 4445 }, { "epoch": 0.55, "grad_norm": 1.4109196527564847, "learning_rate": 4.406525626645956e-06, "loss": 0.4472, "step": 4446 }, { "epoch": 0.55, "grad_norm": 1.4761501607577767, "learning_rate": 4.404530162848569e-06, "loss": 0.4795, "step": 4447 }, { "epoch": 0.55, "grad_norm": 1.2703248406744538, "learning_rate": 4.402534795254578e-06, "loss": 0.4719, "step": 4448 }, { "epoch": 0.55, "grad_norm": 2.1843683187084255, "learning_rate": 4.4005395241863535e-06, "loss": 0.502, "step": 4449 }, { "epoch": 0.55, "grad_norm": 2.0136952093233424, "learning_rate": 4.398544349966247e-06, "loss": 0.5151, "step": 4450 }, { "epoch": 0.55, "grad_norm": 3.106552861840512, "learning_rate": 4.3965492729166e-06, "loss": 0.4904, "step": 4451 }, { "epoch": 0.55, "grad_norm": 1.6491909140414045, "learning_rate": 4.394554293359731e-06, "loss": 0.554, "step": 4452 }, { "epoch": 0.55, "grad_norm": 1.4630506011879885, "learning_rate": 4.392559411617949e-06, "loss": 0.5344, "step": 4453 }, { "epoch": 0.55, "grad_norm": 1.55165485319063, "learning_rate": 4.390564628013545e-06, "loss": 0.5136, "step": 4454 }, { "epoch": 0.55, "grad_norm": 3.094936177696235, "learning_rate": 4.388569942868791e-06, "loss": 0.4817, "step": 4455 }, { "epoch": 0.55, "grad_norm": 1.9932684063707253, "learning_rate": 4.3865753565059485e-06, "loss": 0.5695, "step": 4456 }, { "epoch": 0.55, "grad_norm": 1.8901580466559014, "learning_rate": 4.384580869247259e-06, "loss": 0.4978, "step": 4457 }, { "epoch": 0.55, "grad_norm": 1.505734659711322, "learning_rate": 4.38258648141495e-06, "loss": 0.4256, "step": 4458 }, { "epoch": 0.55, "grad_norm": 1.6879853623833825, "learning_rate": 4.380592193331234e-06, "loss": 0.6263, "step": 4459 }, { "epoch": 0.55, "grad_norm": 1.3953623635669, "learning_rate": 4.378598005318304e-06, "loss": 0.4724, "step": 4460 }, { "epoch": 0.55, "grad_norm": 1.7850285873847695, "learning_rate": 4.376603917698339e-06, "loss": 0.4961, "step": 4461 }, { "epoch": 0.55, "grad_norm": 1.5945344825899566, "learning_rate": 4.374609930793501e-06, "loss": 0.4908, "step": 4462 }, { "epoch": 0.55, "grad_norm": 1.5711748763666404, "learning_rate": 4.372616044925938e-06, "loss": 0.4778, "step": 4463 }, { "epoch": 0.55, "grad_norm": 1.362564324612832, "learning_rate": 4.370622260417777e-06, "loss": 0.4729, "step": 4464 }, { "epoch": 0.55, "grad_norm": 1.5883608424991777, "learning_rate": 4.368628577591134e-06, "loss": 0.5014, "step": 4465 }, { "epoch": 0.55, "grad_norm": 1.4573401950173126, "learning_rate": 4.366634996768104e-06, "loss": 0.491, "step": 4466 }, { "epoch": 0.55, "grad_norm": 1.3802325792037728, "learning_rate": 4.364641518270767e-06, "loss": 0.5029, "step": 4467 }, { "epoch": 0.55, "grad_norm": 2.1344719427915315, "learning_rate": 4.362648142421191e-06, "loss": 0.4574, "step": 4468 }, { "epoch": 0.55, "grad_norm": 1.369050190285605, "learning_rate": 4.360654869541419e-06, "loss": 0.5084, "step": 4469 }, { "epoch": 0.55, "grad_norm": 1.909973556369964, "learning_rate": 4.358661699953486e-06, "loss": 0.4853, "step": 4470 }, { "epoch": 0.55, "grad_norm": 1.4003381183520367, "learning_rate": 4.356668633979402e-06, "loss": 0.4647, "step": 4471 }, { "epoch": 0.55, "grad_norm": 1.606974333963992, "learning_rate": 4.354675671941167e-06, "loss": 0.4842, "step": 4472 }, { "epoch": 0.56, "grad_norm": 1.2865352907440146, "learning_rate": 4.3526828141607605e-06, "loss": 0.4676, "step": 4473 }, { "epoch": 0.56, "grad_norm": 1.239797259529583, "learning_rate": 4.350690060960146e-06, "loss": 0.4793, "step": 4474 }, { "epoch": 0.56, "grad_norm": 1.4018255932426165, "learning_rate": 4.348697412661269e-06, "loss": 0.455, "step": 4475 }, { "epoch": 0.56, "grad_norm": 1.2995263446919354, "learning_rate": 4.346704869586064e-06, "loss": 0.4692, "step": 4476 }, { "epoch": 0.56, "grad_norm": 1.7508953414723027, "learning_rate": 4.344712432056441e-06, "loss": 0.5329, "step": 4477 }, { "epoch": 0.56, "grad_norm": 1.4401002791730046, "learning_rate": 4.342720100394295e-06, "loss": 0.5505, "step": 4478 }, { "epoch": 0.56, "grad_norm": 1.581965730475437, "learning_rate": 4.340727874921506e-06, "loss": 0.534, "step": 4479 }, { "epoch": 0.56, "grad_norm": 1.2937840348727583, "learning_rate": 4.338735755959935e-06, "loss": 0.4743, "step": 4480 }, { "epoch": 0.56, "grad_norm": 1.4277893209041128, "learning_rate": 4.336743743831426e-06, "loss": 0.4859, "step": 4481 }, { "epoch": 0.56, "grad_norm": 0.727417677604167, "learning_rate": 4.334751838857807e-06, "loss": 0.4857, "step": 4482 }, { "epoch": 0.56, "grad_norm": 1.3905935748983391, "learning_rate": 4.332760041360885e-06, "loss": 0.5166, "step": 4483 }, { "epoch": 0.56, "grad_norm": 2.4155875374503135, "learning_rate": 4.330768351662458e-06, "loss": 0.505, "step": 4484 }, { "epoch": 0.56, "grad_norm": 1.4320288646888266, "learning_rate": 4.328776770084296e-06, "loss": 0.5258, "step": 4485 }, { "epoch": 0.56, "grad_norm": 1.2614713949066259, "learning_rate": 4.326785296948162e-06, "loss": 0.46, "step": 4486 }, { "epoch": 0.56, "grad_norm": 1.462031081338204, "learning_rate": 4.324793932575789e-06, "loss": 0.5246, "step": 4487 }, { "epoch": 0.56, "grad_norm": 1.4480727488848604, "learning_rate": 4.322802677288904e-06, "loss": 0.5121, "step": 4488 }, { "epoch": 0.56, "grad_norm": 1.5817956181131727, "learning_rate": 4.320811531409211e-06, "loss": 0.5003, "step": 4489 }, { "epoch": 0.56, "grad_norm": 1.362465636960399, "learning_rate": 4.318820495258396e-06, "loss": 0.4872, "step": 4490 }, { "epoch": 0.56, "grad_norm": 1.4532442998980175, "learning_rate": 4.316829569158127e-06, "loss": 0.5363, "step": 4491 }, { "epoch": 0.56, "grad_norm": 0.6472721593024459, "learning_rate": 4.3148387534300615e-06, "loss": 0.5004, "step": 4492 }, { "epoch": 0.56, "grad_norm": 1.560388920149118, "learning_rate": 4.312848048395828e-06, "loss": 0.5634, "step": 4493 }, { "epoch": 0.56, "grad_norm": 1.6795898740165551, "learning_rate": 4.310857454377045e-06, "loss": 0.4687, "step": 4494 }, { "epoch": 0.56, "grad_norm": 2.7042855561623385, "learning_rate": 4.30886697169531e-06, "loss": 0.4863, "step": 4495 }, { "epoch": 0.56, "grad_norm": 0.6835008969281285, "learning_rate": 4.306876600672204e-06, "loss": 0.4789, "step": 4496 }, { "epoch": 0.56, "grad_norm": 1.5439431277188411, "learning_rate": 4.3048863416292866e-06, "loss": 0.5109, "step": 4497 }, { "epoch": 0.56, "grad_norm": 1.2984059818710183, "learning_rate": 4.302896194888102e-06, "loss": 0.4721, "step": 4498 }, { "epoch": 0.56, "grad_norm": 2.9009395166991934, "learning_rate": 4.300906160770174e-06, "loss": 0.5337, "step": 4499 }, { "epoch": 0.56, "grad_norm": 1.5366204918981408, "learning_rate": 4.298916239597016e-06, "loss": 0.5198, "step": 4500 }, { "epoch": 0.56, "grad_norm": 1.4234969050909656, "learning_rate": 4.2969264316901135e-06, "loss": 0.5181, "step": 4501 }, { "epoch": 0.56, "grad_norm": 1.8208805018296175, "learning_rate": 4.2949367373709385e-06, "loss": 0.5468, "step": 4502 }, { "epoch": 0.56, "grad_norm": 1.4042996491593032, "learning_rate": 4.292947156960942e-06, "loss": 0.5222, "step": 4503 }, { "epoch": 0.56, "grad_norm": 1.763208800475825, "learning_rate": 4.290957690781561e-06, "loss": 0.5531, "step": 4504 }, { "epoch": 0.56, "grad_norm": 1.7865389229207818, "learning_rate": 4.28896833915421e-06, "loss": 0.4983, "step": 4505 }, { "epoch": 0.56, "grad_norm": 2.170165864220015, "learning_rate": 4.286979102400286e-06, "loss": 0.4926, "step": 4506 }, { "epoch": 0.56, "grad_norm": 1.4323082470024437, "learning_rate": 4.2849899808411665e-06, "loss": 0.5024, "step": 4507 }, { "epoch": 0.56, "grad_norm": 1.4889225319510406, "learning_rate": 4.2830009747982115e-06, "loss": 0.4891, "step": 4508 }, { "epoch": 0.56, "grad_norm": 1.3382345132730324, "learning_rate": 4.281012084592766e-06, "loss": 0.5222, "step": 4509 }, { "epoch": 0.56, "grad_norm": 0.675762563925517, "learning_rate": 4.27902331054615e-06, "loss": 0.5036, "step": 4510 }, { "epoch": 0.56, "grad_norm": 1.6067386368305003, "learning_rate": 4.277034652979668e-06, "loss": 0.4876, "step": 4511 }, { "epoch": 0.56, "grad_norm": 1.611708514685027, "learning_rate": 4.275046112214604e-06, "loss": 0.5042, "step": 4512 }, { "epoch": 0.56, "grad_norm": 1.494644895380749, "learning_rate": 4.273057688572227e-06, "loss": 0.5389, "step": 4513 }, { "epoch": 0.56, "grad_norm": 1.5774346058796291, "learning_rate": 4.271069382373783e-06, "loss": 0.4572, "step": 4514 }, { "epoch": 0.56, "grad_norm": 1.4256215511273256, "learning_rate": 4.2690811939405e-06, "loss": 0.523, "step": 4515 }, { "epoch": 0.56, "grad_norm": 1.3297159375719316, "learning_rate": 4.267093123593585e-06, "loss": 0.4912, "step": 4516 }, { "epoch": 0.56, "grad_norm": 1.4035373991816045, "learning_rate": 4.265105171654233e-06, "loss": 0.4572, "step": 4517 }, { "epoch": 0.56, "grad_norm": 1.3611163373689483, "learning_rate": 4.263117338443612e-06, "loss": 0.5006, "step": 4518 }, { "epoch": 0.56, "grad_norm": 1.4072504029722148, "learning_rate": 4.261129624282876e-06, "loss": 0.4834, "step": 4519 }, { "epoch": 0.56, "grad_norm": 1.5658032422045698, "learning_rate": 4.2591420294931565e-06, "loss": 0.5035, "step": 4520 }, { "epoch": 0.56, "grad_norm": 1.823794325382853, "learning_rate": 4.257154554395566e-06, "loss": 0.516, "step": 4521 }, { "epoch": 0.56, "grad_norm": 1.6114850530253642, "learning_rate": 4.2551671993112e-06, "loss": 0.483, "step": 4522 }, { "epoch": 0.56, "grad_norm": 1.4416077224034098, "learning_rate": 4.253179964561133e-06, "loss": 0.5327, "step": 4523 }, { "epoch": 0.56, "grad_norm": 2.233556266526752, "learning_rate": 4.25119285046642e-06, "loss": 0.5014, "step": 4524 }, { "epoch": 0.56, "grad_norm": 1.4926199002802785, "learning_rate": 4.249205857348097e-06, "loss": 0.509, "step": 4525 }, { "epoch": 0.56, "grad_norm": 1.5610452174421434, "learning_rate": 4.247218985527179e-06, "loss": 0.5521, "step": 4526 }, { "epoch": 0.56, "grad_norm": 1.7556944477904484, "learning_rate": 4.245232235324666e-06, "loss": 0.4896, "step": 4527 }, { "epoch": 0.56, "grad_norm": 1.4150439393082945, "learning_rate": 4.243245607061531e-06, "loss": 0.4713, "step": 4528 }, { "epoch": 0.56, "grad_norm": 1.3719288884987528, "learning_rate": 4.241259101058734e-06, "loss": 0.4368, "step": 4529 }, { "epoch": 0.56, "grad_norm": 1.4296319411129048, "learning_rate": 4.239272717637212e-06, "loss": 0.532, "step": 4530 }, { "epoch": 0.56, "grad_norm": 1.6677207912248433, "learning_rate": 4.237286457117882e-06, "loss": 0.4865, "step": 4531 }, { "epoch": 0.56, "grad_norm": 1.4600157709673596, "learning_rate": 4.2353003198216405e-06, "loss": 0.5526, "step": 4532 }, { "epoch": 0.56, "grad_norm": 3.1718828387548506, "learning_rate": 4.233314306069369e-06, "loss": 0.4848, "step": 4533 }, { "epoch": 0.56, "grad_norm": 1.2922949432077968, "learning_rate": 4.231328416181923e-06, "loss": 0.4936, "step": 4534 }, { "epoch": 0.56, "grad_norm": 1.6398879189413544, "learning_rate": 4.229342650480143e-06, "loss": 0.5041, "step": 4535 }, { "epoch": 0.56, "grad_norm": 3.175714480238288, "learning_rate": 4.227357009284843e-06, "loss": 0.4687, "step": 4536 }, { "epoch": 0.56, "grad_norm": 1.7546675858146281, "learning_rate": 4.225371492916824e-06, "loss": 0.4849, "step": 4537 }, { "epoch": 0.56, "grad_norm": 1.4694551238357199, "learning_rate": 4.223386101696863e-06, "loss": 0.538, "step": 4538 }, { "epoch": 0.56, "grad_norm": 1.5947169512599413, "learning_rate": 4.221400835945716e-06, "loss": 0.5196, "step": 4539 }, { "epoch": 0.56, "grad_norm": 1.7358708080151768, "learning_rate": 4.2194156959841215e-06, "loss": 0.4918, "step": 4540 }, { "epoch": 0.56, "grad_norm": 1.3877507453717872, "learning_rate": 4.217430682132796e-06, "loss": 0.4993, "step": 4541 }, { "epoch": 0.56, "grad_norm": 1.2591813594630503, "learning_rate": 4.215445794712436e-06, "loss": 0.5215, "step": 4542 }, { "epoch": 0.56, "grad_norm": 1.4205477304663412, "learning_rate": 4.213461034043719e-06, "loss": 0.5416, "step": 4543 }, { "epoch": 0.56, "grad_norm": 3.6272911494152797, "learning_rate": 4.211476400447298e-06, "loss": 0.5314, "step": 4544 }, { "epoch": 0.56, "grad_norm": 1.4961401717993632, "learning_rate": 4.20949189424381e-06, "loss": 0.527, "step": 4545 }, { "epoch": 0.56, "grad_norm": 2.3452851390092455, "learning_rate": 4.207507515753867e-06, "loss": 0.5442, "step": 4546 }, { "epoch": 0.56, "grad_norm": 1.654315036033042, "learning_rate": 4.205523265298066e-06, "loss": 0.5496, "step": 4547 }, { "epoch": 0.56, "grad_norm": 1.346401650183757, "learning_rate": 4.203539143196978e-06, "loss": 0.5013, "step": 4548 }, { "epoch": 0.56, "grad_norm": 1.3989470161936295, "learning_rate": 4.201555149771155e-06, "loss": 0.4853, "step": 4549 }, { "epoch": 0.56, "grad_norm": 1.3946104740281104, "learning_rate": 4.199571285341131e-06, "loss": 0.4594, "step": 4550 }, { "epoch": 0.56, "grad_norm": 1.6578275793558712, "learning_rate": 4.197587550227416e-06, "loss": 0.5044, "step": 4551 }, { "epoch": 0.56, "grad_norm": 1.2876289276965982, "learning_rate": 4.195603944750498e-06, "loss": 0.4791, "step": 4552 }, { "epoch": 0.57, "grad_norm": 1.5370400838336578, "learning_rate": 4.193620469230848e-06, "loss": 0.537, "step": 4553 }, { "epoch": 0.57, "grad_norm": 1.3196531138427288, "learning_rate": 4.191637123988913e-06, "loss": 0.4833, "step": 4554 }, { "epoch": 0.57, "grad_norm": 1.2417498417268902, "learning_rate": 4.18965390934512e-06, "loss": 0.4718, "step": 4555 }, { "epoch": 0.57, "grad_norm": 1.6394348825494676, "learning_rate": 4.187670825619875e-06, "loss": 0.495, "step": 4556 }, { "epoch": 0.57, "grad_norm": 1.3141667309696983, "learning_rate": 4.185687873133561e-06, "loss": 0.5267, "step": 4557 }, { "epoch": 0.57, "grad_norm": 1.690376529517557, "learning_rate": 4.1837050522065434e-06, "loss": 0.5207, "step": 4558 }, { "epoch": 0.57, "grad_norm": 1.3277323468444318, "learning_rate": 4.181722363159165e-06, "loss": 0.5052, "step": 4559 }, { "epoch": 0.57, "grad_norm": 1.4801879039268384, "learning_rate": 4.179739806311746e-06, "loss": 0.4945, "step": 4560 }, { "epoch": 0.57, "grad_norm": 2.042987090195881, "learning_rate": 4.177757381984584e-06, "loss": 0.4719, "step": 4561 }, { "epoch": 0.57, "grad_norm": 1.4214907749637817, "learning_rate": 4.17577509049796e-06, "loss": 0.5609, "step": 4562 }, { "epoch": 0.57, "grad_norm": 1.3860726873778075, "learning_rate": 4.173792932172128e-06, "loss": 0.4199, "step": 4563 }, { "epoch": 0.57, "grad_norm": 1.6571916732434602, "learning_rate": 4.171810907327325e-06, "loss": 0.5922, "step": 4564 }, { "epoch": 0.57, "grad_norm": 1.51516415102186, "learning_rate": 4.169829016283762e-06, "loss": 0.5021, "step": 4565 }, { "epoch": 0.57, "grad_norm": 1.4737232507571463, "learning_rate": 4.167847259361636e-06, "loss": 0.4847, "step": 4566 }, { "epoch": 0.57, "grad_norm": 1.555560655088034, "learning_rate": 4.165865636881113e-06, "loss": 0.5149, "step": 4567 }, { "epoch": 0.57, "grad_norm": 1.6523285718797909, "learning_rate": 4.163884149162342e-06, "loss": 0.4794, "step": 4568 }, { "epoch": 0.57, "grad_norm": 3.2591279740887935, "learning_rate": 4.161902796525452e-06, "loss": 0.4776, "step": 4569 }, { "epoch": 0.57, "grad_norm": 1.627216635272726, "learning_rate": 4.159921579290546e-06, "loss": 0.5305, "step": 4570 }, { "epoch": 0.57, "grad_norm": 1.4736015643294451, "learning_rate": 4.157940497777708e-06, "loss": 0.5281, "step": 4571 }, { "epoch": 0.57, "grad_norm": 2.0493794792054776, "learning_rate": 4.155959552306998e-06, "loss": 0.5319, "step": 4572 }, { "epoch": 0.57, "grad_norm": 2.7710477417592, "learning_rate": 4.153978743198454e-06, "loss": 0.5209, "step": 4573 }, { "epoch": 0.57, "grad_norm": 2.4132708828952816, "learning_rate": 4.151998070772098e-06, "loss": 0.5137, "step": 4574 }, { "epoch": 0.57, "grad_norm": 3.3970788930640707, "learning_rate": 4.150017535347922e-06, "loss": 0.5331, "step": 4575 }, { "epoch": 0.57, "grad_norm": 1.6491027358822592, "learning_rate": 4.148037137245899e-06, "loss": 0.5045, "step": 4576 }, { "epoch": 0.57, "grad_norm": 1.19176300713811, "learning_rate": 4.1460568767859795e-06, "loss": 0.4623, "step": 4577 }, { "epoch": 0.57, "grad_norm": 0.6787985574674565, "learning_rate": 4.144076754288093e-06, "loss": 0.4907, "step": 4578 }, { "epoch": 0.57, "grad_norm": 1.327884011769675, "learning_rate": 4.142096770072144e-06, "loss": 0.525, "step": 4579 }, { "epoch": 0.57, "grad_norm": 1.6676423532750408, "learning_rate": 4.140116924458018e-06, "loss": 0.5443, "step": 4580 }, { "epoch": 0.57, "grad_norm": 1.441471086599204, "learning_rate": 4.138137217765577e-06, "loss": 0.5027, "step": 4581 }, { "epoch": 0.57, "grad_norm": 1.5039303844416383, "learning_rate": 4.1361576503146564e-06, "loss": 0.5751, "step": 4582 }, { "epoch": 0.57, "grad_norm": 1.394640112008406, "learning_rate": 4.134178222425077e-06, "loss": 0.5296, "step": 4583 }, { "epoch": 0.57, "grad_norm": 1.3152535316930878, "learning_rate": 4.1321989344166315e-06, "loss": 0.5119, "step": 4584 }, { "epoch": 0.57, "grad_norm": 2.2997952010293106, "learning_rate": 4.130219786609092e-06, "loss": 0.5026, "step": 4585 }, { "epoch": 0.57, "grad_norm": 1.9590631327681363, "learning_rate": 4.128240779322206e-06, "loss": 0.5002, "step": 4586 }, { "epoch": 0.57, "grad_norm": 1.8870920809940253, "learning_rate": 4.1262619128757e-06, "loss": 0.4644, "step": 4587 }, { "epoch": 0.57, "grad_norm": 1.5428964670246743, "learning_rate": 4.1242831875892755e-06, "loss": 0.5423, "step": 4588 }, { "epoch": 0.57, "grad_norm": 1.3085757589141793, "learning_rate": 4.122304603782616e-06, "loss": 0.4827, "step": 4589 }, { "epoch": 0.57, "grad_norm": 1.4333341177657333, "learning_rate": 4.120326161775375e-06, "loss": 0.4813, "step": 4590 }, { "epoch": 0.57, "grad_norm": 0.6827500733524555, "learning_rate": 4.118347861887193e-06, "loss": 0.4964, "step": 4591 }, { "epoch": 0.57, "grad_norm": 1.6543162660363646, "learning_rate": 4.116369704437678e-06, "loss": 0.5188, "step": 4592 }, { "epoch": 0.57, "grad_norm": 2.6541812074781275, "learning_rate": 4.1143916897464204e-06, "loss": 0.5051, "step": 4593 }, { "epoch": 0.57, "grad_norm": 1.3791053313562698, "learning_rate": 4.112413818132986e-06, "loss": 0.5294, "step": 4594 }, { "epoch": 0.57, "grad_norm": 1.3454682491868881, "learning_rate": 4.110436089916915e-06, "loss": 0.5059, "step": 4595 }, { "epoch": 0.57, "grad_norm": 1.3218337421022976, "learning_rate": 4.108458505417728e-06, "loss": 0.4583, "step": 4596 }, { "epoch": 0.57, "grad_norm": 1.5307112265445209, "learning_rate": 4.1064810649549216e-06, "loss": 0.5283, "step": 4597 }, { "epoch": 0.57, "grad_norm": 1.8299476585591976, "learning_rate": 4.104503768847967e-06, "loss": 0.5274, "step": 4598 }, { "epoch": 0.57, "grad_norm": 1.364327794991199, "learning_rate": 4.102526617416317e-06, "loss": 0.4876, "step": 4599 }, { "epoch": 0.57, "grad_norm": 0.6838317119053259, "learning_rate": 4.100549610979396e-06, "loss": 0.4948, "step": 4600 }, { "epoch": 0.57, "grad_norm": 1.5079001057864923, "learning_rate": 4.098572749856607e-06, "loss": 0.489, "step": 4601 }, { "epoch": 0.57, "grad_norm": 1.440960720058185, "learning_rate": 4.09659603436733e-06, "loss": 0.5237, "step": 4602 }, { "epoch": 0.57, "grad_norm": 1.6452758498878746, "learning_rate": 4.09461946483092e-06, "loss": 0.4759, "step": 4603 }, { "epoch": 0.57, "grad_norm": 1.4792873873986112, "learning_rate": 4.092643041566709e-06, "loss": 0.4775, "step": 4604 }, { "epoch": 0.57, "grad_norm": 2.0103496990842897, "learning_rate": 4.090666764894007e-06, "loss": 0.5322, "step": 4605 }, { "epoch": 0.57, "grad_norm": 1.3916054186675135, "learning_rate": 4.088690635132094e-06, "loss": 0.5386, "step": 4606 }, { "epoch": 0.57, "grad_norm": 1.225499772634962, "learning_rate": 4.0867146526002384e-06, "loss": 0.5163, "step": 4607 }, { "epoch": 0.57, "grad_norm": 1.7005086325318728, "learning_rate": 4.084738817617673e-06, "loss": 0.5397, "step": 4608 }, { "epoch": 0.57, "grad_norm": 1.330519772925582, "learning_rate": 4.082763130503613e-06, "loss": 0.4989, "step": 4609 }, { "epoch": 0.57, "grad_norm": 1.6195287767062285, "learning_rate": 4.080787591577247e-06, "loss": 0.5027, "step": 4610 }, { "epoch": 0.57, "grad_norm": 0.6955980481374432, "learning_rate": 4.078812201157743e-06, "loss": 0.468, "step": 4611 }, { "epoch": 0.57, "grad_norm": 1.6819958162274402, "learning_rate": 4.0768369595642396e-06, "loss": 0.5542, "step": 4612 }, { "epoch": 0.57, "grad_norm": 1.3882961570525039, "learning_rate": 4.074861867115856e-06, "loss": 0.4712, "step": 4613 }, { "epoch": 0.57, "grad_norm": 1.418499308360564, "learning_rate": 4.072886924131685e-06, "loss": 0.5266, "step": 4614 }, { "epoch": 0.57, "grad_norm": 1.3503995058949858, "learning_rate": 4.070912130930798e-06, "loss": 0.4793, "step": 4615 }, { "epoch": 0.57, "grad_norm": 1.6011790859547999, "learning_rate": 4.068937487832239e-06, "loss": 0.4503, "step": 4616 }, { "epoch": 0.57, "grad_norm": 0.7033286082288611, "learning_rate": 4.066962995155028e-06, "loss": 0.5097, "step": 4617 }, { "epoch": 0.57, "grad_norm": 1.6431022028485887, "learning_rate": 4.064988653218163e-06, "loss": 0.52, "step": 4618 }, { "epoch": 0.57, "grad_norm": 1.4719628028092255, "learning_rate": 4.063014462340616e-06, "loss": 0.5023, "step": 4619 }, { "epoch": 0.57, "grad_norm": 1.9883852076554343, "learning_rate": 4.061040422841334e-06, "loss": 0.4708, "step": 4620 }, { "epoch": 0.57, "grad_norm": 1.7468912437684312, "learning_rate": 4.059066535039242e-06, "loss": 0.5219, "step": 4621 }, { "epoch": 0.57, "grad_norm": 1.5013487110806898, "learning_rate": 4.057092799253239e-06, "loss": 0.4259, "step": 4622 }, { "epoch": 0.57, "grad_norm": 1.549834919534906, "learning_rate": 4.055119215802196e-06, "loss": 0.6012, "step": 4623 }, { "epoch": 0.57, "grad_norm": 2.181190433647081, "learning_rate": 4.053145785004968e-06, "loss": 0.5504, "step": 4624 }, { "epoch": 0.57, "grad_norm": 0.6775103648742493, "learning_rate": 4.051172507180376e-06, "loss": 0.4888, "step": 4625 }, { "epoch": 0.57, "grad_norm": 1.1712565831079453, "learning_rate": 4.049199382647224e-06, "loss": 0.4742, "step": 4626 }, { "epoch": 0.57, "grad_norm": 1.3516211442014003, "learning_rate": 4.0472264117242845e-06, "loss": 0.5361, "step": 4627 }, { "epoch": 0.57, "grad_norm": 1.4519730976420815, "learning_rate": 4.045253594730309e-06, "loss": 0.4768, "step": 4628 }, { "epoch": 0.57, "grad_norm": 1.4421565763159003, "learning_rate": 4.043280931984025e-06, "loss": 0.4832, "step": 4629 }, { "epoch": 0.57, "grad_norm": 1.5819635692994547, "learning_rate": 4.041308423804132e-06, "loss": 0.5221, "step": 4630 }, { "epoch": 0.57, "grad_norm": 1.366048129453399, "learning_rate": 4.039336070509305e-06, "loss": 0.4943, "step": 4631 }, { "epoch": 0.57, "grad_norm": 1.4460322616649448, "learning_rate": 4.037363872418199e-06, "loss": 0.4793, "step": 4632 }, { "epoch": 0.57, "grad_norm": 1.9828714102023244, "learning_rate": 4.035391829849436e-06, "loss": 0.5577, "step": 4633 }, { "epoch": 0.58, "grad_norm": 0.7198579219425816, "learning_rate": 4.033419943121619e-06, "loss": 0.5233, "step": 4634 }, { "epoch": 0.58, "grad_norm": 1.427904785615395, "learning_rate": 4.0314482125533235e-06, "loss": 0.5267, "step": 4635 }, { "epoch": 0.58, "grad_norm": 1.4270553507828687, "learning_rate": 4.029476638463099e-06, "loss": 0.5129, "step": 4636 }, { "epoch": 0.58, "grad_norm": 1.3353797022534557, "learning_rate": 4.027505221169471e-06, "loss": 0.4807, "step": 4637 }, { "epoch": 0.58, "grad_norm": 1.9051587551980136, "learning_rate": 4.02553396099094e-06, "loss": 0.4763, "step": 4638 }, { "epoch": 0.58, "grad_norm": 1.3909718159931581, "learning_rate": 4.023562858245979e-06, "loss": 0.5409, "step": 4639 }, { "epoch": 0.58, "grad_norm": 1.4703475909795112, "learning_rate": 4.021591913253039e-06, "loss": 0.4978, "step": 4640 }, { "epoch": 0.58, "grad_norm": 1.4012920248011926, "learning_rate": 4.019621126330541e-06, "loss": 0.4711, "step": 4641 }, { "epoch": 0.58, "grad_norm": 1.6960206540230156, "learning_rate": 4.017650497796886e-06, "loss": 0.5333, "step": 4642 }, { "epoch": 0.58, "grad_norm": 3.567076240933028, "learning_rate": 4.015680027970445e-06, "loss": 0.4917, "step": 4643 }, { "epoch": 0.58, "grad_norm": 1.9646219421988003, "learning_rate": 4.013709717169563e-06, "loss": 0.5112, "step": 4644 }, { "epoch": 0.58, "grad_norm": 1.3626883688448634, "learning_rate": 4.011739565712564e-06, "loss": 0.4741, "step": 4645 }, { "epoch": 0.58, "grad_norm": 1.4192552372878515, "learning_rate": 4.009769573917741e-06, "loss": 0.5132, "step": 4646 }, { "epoch": 0.58, "grad_norm": 1.5294985071234206, "learning_rate": 4.007799742103365e-06, "loss": 0.4852, "step": 4647 }, { "epoch": 0.58, "grad_norm": 3.103961404045815, "learning_rate": 4.005830070587679e-06, "loss": 0.4851, "step": 4648 }, { "epoch": 0.58, "grad_norm": 1.3305211645597752, "learning_rate": 4.003860559688902e-06, "loss": 0.4964, "step": 4649 }, { "epoch": 0.58, "grad_norm": 1.4043067447743847, "learning_rate": 4.001891209725224e-06, "loss": 0.5026, "step": 4650 }, { "epoch": 0.58, "grad_norm": 1.9045569050008961, "learning_rate": 3.999922021014812e-06, "loss": 0.4974, "step": 4651 }, { "epoch": 0.58, "grad_norm": 1.513471288790594, "learning_rate": 3.997952993875805e-06, "loss": 0.4903, "step": 4652 }, { "epoch": 0.58, "grad_norm": 3.3190837653397103, "learning_rate": 3.995984128626317e-06, "loss": 0.5218, "step": 4653 }, { "epoch": 0.58, "grad_norm": 1.2803737183980128, "learning_rate": 3.9940154255844355e-06, "loss": 0.4901, "step": 4654 }, { "epoch": 0.58, "grad_norm": 2.324017478162515, "learning_rate": 3.992046885068221e-06, "loss": 0.5455, "step": 4655 }, { "epoch": 0.58, "grad_norm": 1.405213306853622, "learning_rate": 3.99007850739571e-06, "loss": 0.5239, "step": 4656 }, { "epoch": 0.58, "grad_norm": 1.4332496852812633, "learning_rate": 3.988110292884912e-06, "loss": 0.5049, "step": 4657 }, { "epoch": 0.58, "grad_norm": 1.5801465841808873, "learning_rate": 3.986142241853808e-06, "loss": 0.5177, "step": 4658 }, { "epoch": 0.58, "grad_norm": 1.5017776976421946, "learning_rate": 3.984174354620353e-06, "loss": 0.5474, "step": 4659 }, { "epoch": 0.58, "grad_norm": 1.264765259932856, "learning_rate": 3.982206631502478e-06, "loss": 0.4865, "step": 4660 }, { "epoch": 0.58, "grad_norm": 1.8198470905922306, "learning_rate": 3.980239072818086e-06, "loss": 0.4662, "step": 4661 }, { "epoch": 0.58, "grad_norm": 1.4756018404030682, "learning_rate": 3.9782716788850525e-06, "loss": 0.5083, "step": 4662 }, { "epoch": 0.58, "grad_norm": 1.4214216069678782, "learning_rate": 3.9763044500212285e-06, "loss": 0.5023, "step": 4663 }, { "epoch": 0.58, "grad_norm": 1.4574395662824027, "learning_rate": 3.974337386544436e-06, "loss": 0.4644, "step": 4664 }, { "epoch": 0.58, "grad_norm": 1.4535700532434277, "learning_rate": 3.972370488772472e-06, "loss": 0.4926, "step": 4665 }, { "epoch": 0.58, "grad_norm": 1.5898246354626455, "learning_rate": 3.9704037570231055e-06, "loss": 0.5456, "step": 4666 }, { "epoch": 0.58, "grad_norm": 0.6957198744230172, "learning_rate": 3.968437191614081e-06, "loss": 0.5038, "step": 4667 }, { "epoch": 0.58, "grad_norm": 1.535459803714931, "learning_rate": 3.966470792863113e-06, "loss": 0.5132, "step": 4668 }, { "epoch": 0.58, "grad_norm": 1.6608193469359567, "learning_rate": 3.964504561087891e-06, "loss": 0.4485, "step": 4669 }, { "epoch": 0.58, "grad_norm": 2.349331946559182, "learning_rate": 3.962538496606077e-06, "loss": 0.5023, "step": 4670 }, { "epoch": 0.58, "grad_norm": 1.580207427022666, "learning_rate": 3.960572599735306e-06, "loss": 0.4777, "step": 4671 }, { "epoch": 0.58, "grad_norm": 1.6782527319642873, "learning_rate": 3.958606870793184e-06, "loss": 0.488, "step": 4672 }, { "epoch": 0.58, "grad_norm": 1.3316424558043596, "learning_rate": 3.956641310097296e-06, "loss": 0.5741, "step": 4673 }, { "epoch": 0.58, "grad_norm": 1.3524018346110955, "learning_rate": 3.954675917965194e-06, "loss": 0.5005, "step": 4674 }, { "epoch": 0.58, "grad_norm": 0.684657285155044, "learning_rate": 3.952710694714403e-06, "loss": 0.4864, "step": 4675 }, { "epoch": 0.58, "grad_norm": 1.510587123243163, "learning_rate": 3.9507456406624235e-06, "loss": 0.5308, "step": 4676 }, { "epoch": 0.58, "grad_norm": 1.4383162007434367, "learning_rate": 3.948780756126726e-06, "loss": 0.5088, "step": 4677 }, { "epoch": 0.58, "grad_norm": 1.2909856888220486, "learning_rate": 3.946816041424756e-06, "loss": 0.5403, "step": 4678 }, { "epoch": 0.58, "grad_norm": 1.522311388231281, "learning_rate": 3.94485149687393e-06, "loss": 0.4866, "step": 4679 }, { "epoch": 0.58, "grad_norm": 1.5130355478873698, "learning_rate": 3.942887122791636e-06, "loss": 0.5284, "step": 4680 }, { "epoch": 0.58, "grad_norm": 1.3370427571409305, "learning_rate": 3.940922919495239e-06, "loss": 0.481, "step": 4681 }, { "epoch": 0.58, "grad_norm": 1.2204372209939194, "learning_rate": 3.938958887302072e-06, "loss": 0.5164, "step": 4682 }, { "epoch": 0.58, "grad_norm": 1.8622732708112701, "learning_rate": 3.9369950265294415e-06, "loss": 0.4943, "step": 4683 }, { "epoch": 0.58, "grad_norm": 0.6782135724024395, "learning_rate": 3.9350313374946275e-06, "loss": 0.4423, "step": 4684 }, { "epoch": 0.58, "grad_norm": 2.0674398268960297, "learning_rate": 3.933067820514879e-06, "loss": 0.5081, "step": 4685 }, { "epoch": 0.58, "grad_norm": 1.5423215799903884, "learning_rate": 3.931104475907423e-06, "loss": 0.4722, "step": 4686 }, { "epoch": 0.58, "grad_norm": 1.455909664098324, "learning_rate": 3.92914130398945e-06, "loss": 0.5115, "step": 4687 }, { "epoch": 0.58, "grad_norm": 1.4171266629705266, "learning_rate": 3.92717830507813e-06, "loss": 0.4569, "step": 4688 }, { "epoch": 0.58, "grad_norm": 1.5722967736044233, "learning_rate": 3.925215479490605e-06, "loss": 0.519, "step": 4689 }, { "epoch": 0.58, "grad_norm": 1.8243100670739596, "learning_rate": 3.923252827543986e-06, "loss": 0.4902, "step": 4690 }, { "epoch": 0.58, "grad_norm": 1.4910512694948186, "learning_rate": 3.921290349555355e-06, "loss": 0.5111, "step": 4691 }, { "epoch": 0.58, "grad_norm": 0.6551753415818495, "learning_rate": 3.9193280458417685e-06, "loss": 0.527, "step": 4692 }, { "epoch": 0.58, "grad_norm": 1.660451709064471, "learning_rate": 3.917365916720255e-06, "loss": 0.5305, "step": 4693 }, { "epoch": 0.58, "grad_norm": 1.64326306596274, "learning_rate": 3.915403962507812e-06, "loss": 0.5228, "step": 4694 }, { "epoch": 0.58, "grad_norm": 1.601187680203445, "learning_rate": 3.9134421835214105e-06, "loss": 0.5226, "step": 4695 }, { "epoch": 0.58, "grad_norm": 1.490661965402863, "learning_rate": 3.911480580077992e-06, "loss": 0.5178, "step": 4696 }, { "epoch": 0.58, "grad_norm": 1.683396309148199, "learning_rate": 3.909519152494475e-06, "loss": 0.5278, "step": 4697 }, { "epoch": 0.58, "grad_norm": 1.812167758657147, "learning_rate": 3.907557901087743e-06, "loss": 0.5433, "step": 4698 }, { "epoch": 0.58, "grad_norm": 1.466698533298413, "learning_rate": 3.905596826174654e-06, "loss": 0.518, "step": 4699 }, { "epoch": 0.58, "grad_norm": 1.7310256508734216, "learning_rate": 3.903635928072035e-06, "loss": 0.4678, "step": 4700 }, { "epoch": 0.58, "grad_norm": 1.4647284525075164, "learning_rate": 3.901675207096689e-06, "loss": 0.5054, "step": 4701 }, { "epoch": 0.58, "grad_norm": 1.4310062514790178, "learning_rate": 3.899714663565386e-06, "loss": 0.4812, "step": 4702 }, { "epoch": 0.58, "grad_norm": 1.4360374783010557, "learning_rate": 3.897754297794869e-06, "loss": 0.5054, "step": 4703 }, { "epoch": 0.58, "grad_norm": 1.4011598640375142, "learning_rate": 3.895794110101854e-06, "loss": 0.4953, "step": 4704 }, { "epoch": 0.58, "grad_norm": 1.3910790082629851, "learning_rate": 3.893834100803023e-06, "loss": 0.4697, "step": 4705 }, { "epoch": 0.58, "grad_norm": 1.384821909994724, "learning_rate": 3.891874270215037e-06, "loss": 0.5501, "step": 4706 }, { "epoch": 0.58, "grad_norm": 1.358041590073802, "learning_rate": 3.889914618654522e-06, "loss": 0.5326, "step": 4707 }, { "epoch": 0.58, "grad_norm": 1.7752222465176617, "learning_rate": 3.887955146438077e-06, "loss": 0.5044, "step": 4708 }, { "epoch": 0.58, "grad_norm": 1.5484824637392638, "learning_rate": 3.885995853882273e-06, "loss": 0.4953, "step": 4709 }, { "epoch": 0.58, "grad_norm": 1.1610373215701917, "learning_rate": 3.884036741303649e-06, "loss": 0.45, "step": 4710 }, { "epoch": 0.58, "grad_norm": 1.276864536888107, "learning_rate": 3.8820778090187185e-06, "loss": 0.5035, "step": 4711 }, { "epoch": 0.58, "grad_norm": 1.9254796816869655, "learning_rate": 3.880119057343965e-06, "loss": 0.5279, "step": 4712 }, { "epoch": 0.58, "grad_norm": 1.6475310297625902, "learning_rate": 3.878160486595837e-06, "loss": 0.5642, "step": 4713 }, { "epoch": 0.59, "grad_norm": 1.4280491895781326, "learning_rate": 3.876202097090765e-06, "loss": 0.4728, "step": 4714 }, { "epoch": 0.59, "grad_norm": 2.2032458161147614, "learning_rate": 3.874243889145142e-06, "loss": 0.528, "step": 4715 }, { "epoch": 0.59, "grad_norm": 1.4660037572297808, "learning_rate": 3.872285863075334e-06, "loss": 0.5171, "step": 4716 }, { "epoch": 0.59, "grad_norm": 1.445124971634497, "learning_rate": 3.8703280191976764e-06, "loss": 0.4733, "step": 4717 }, { "epoch": 0.59, "grad_norm": 0.6264348500908541, "learning_rate": 3.868370357828476e-06, "loss": 0.513, "step": 4718 }, { "epoch": 0.59, "grad_norm": 1.7280898543065133, "learning_rate": 3.866412879284011e-06, "loss": 0.5092, "step": 4719 }, { "epoch": 0.59, "grad_norm": 1.7655472224257922, "learning_rate": 3.864455583880529e-06, "loss": 0.5303, "step": 4720 }, { "epoch": 0.59, "grad_norm": 1.3950462536660542, "learning_rate": 3.862498471934248e-06, "loss": 0.454, "step": 4721 }, { "epoch": 0.59, "grad_norm": 1.5446451372213836, "learning_rate": 3.860541543761358e-06, "loss": 0.4389, "step": 4722 }, { "epoch": 0.59, "grad_norm": 2.202339874550893, "learning_rate": 3.858584799678017e-06, "loss": 0.5332, "step": 4723 }, { "epoch": 0.59, "grad_norm": 1.7557477548315092, "learning_rate": 3.8566282400003545e-06, "loss": 0.5418, "step": 4724 }, { "epoch": 0.59, "grad_norm": 1.2698707672176435, "learning_rate": 3.854671865044469e-06, "loss": 0.4851, "step": 4725 }, { "epoch": 0.59, "grad_norm": 1.6644076653329196, "learning_rate": 3.852715675126431e-06, "loss": 0.4325, "step": 4726 }, { "epoch": 0.59, "grad_norm": 1.2646724952132085, "learning_rate": 3.85075967056228e-06, "loss": 0.5054, "step": 4727 }, { "epoch": 0.59, "grad_norm": 1.5026733833240709, "learning_rate": 3.848803851668026e-06, "loss": 0.4743, "step": 4728 }, { "epoch": 0.59, "grad_norm": 1.4012847881227701, "learning_rate": 3.8468482187596475e-06, "loss": 0.4744, "step": 4729 }, { "epoch": 0.59, "grad_norm": 1.5112355270821844, "learning_rate": 3.844892772153097e-06, "loss": 0.5427, "step": 4730 }, { "epoch": 0.59, "grad_norm": 0.7444916837957993, "learning_rate": 3.842937512164292e-06, "loss": 0.5219, "step": 4731 }, { "epoch": 0.59, "grad_norm": 2.3893452549099528, "learning_rate": 3.840982439109122e-06, "loss": 0.4558, "step": 4732 }, { "epoch": 0.59, "grad_norm": 1.7086719823044167, "learning_rate": 3.839027553303447e-06, "loss": 0.5013, "step": 4733 }, { "epoch": 0.59, "grad_norm": 1.4469872927187486, "learning_rate": 3.837072855063097e-06, "loss": 0.5551, "step": 4734 }, { "epoch": 0.59, "grad_norm": 1.2457288599780056, "learning_rate": 3.835118344703868e-06, "loss": 0.4889, "step": 4735 }, { "epoch": 0.59, "grad_norm": 1.2896620308751967, "learning_rate": 3.833164022541529e-06, "loss": 0.4871, "step": 4736 }, { "epoch": 0.59, "grad_norm": 2.03442397818743, "learning_rate": 3.83120988889182e-06, "loss": 0.5277, "step": 4737 }, { "epoch": 0.59, "grad_norm": 1.3622425574642558, "learning_rate": 3.829255944070445e-06, "loss": 0.4914, "step": 4738 }, { "epoch": 0.59, "grad_norm": 2.9296216351758595, "learning_rate": 3.8273021883930865e-06, "loss": 0.4908, "step": 4739 }, { "epoch": 0.59, "grad_norm": 1.5253787207780585, "learning_rate": 3.825348622175386e-06, "loss": 0.4846, "step": 4740 }, { "epoch": 0.59, "grad_norm": 2.187766035709169, "learning_rate": 3.823395245732961e-06, "loss": 0.4719, "step": 4741 }, { "epoch": 0.59, "grad_norm": 1.4975496186557569, "learning_rate": 3.8214420593813975e-06, "loss": 0.5083, "step": 4742 }, { "epoch": 0.59, "grad_norm": 0.7297899522931308, "learning_rate": 3.819489063436248e-06, "loss": 0.5002, "step": 4743 }, { "epoch": 0.59, "grad_norm": 1.484072419508185, "learning_rate": 3.817536258213038e-06, "loss": 0.4853, "step": 4744 }, { "epoch": 0.59, "grad_norm": 1.5021104486952073, "learning_rate": 3.81558364402726e-06, "loss": 0.4509, "step": 4745 }, { "epoch": 0.59, "grad_norm": 1.6685918184482609, "learning_rate": 3.8136312211943736e-06, "loss": 0.5322, "step": 4746 }, { "epoch": 0.59, "grad_norm": 1.6368358729433512, "learning_rate": 3.8116789900298135e-06, "loss": 0.4764, "step": 4747 }, { "epoch": 0.59, "grad_norm": 1.43761500492689, "learning_rate": 3.809726950848979e-06, "loss": 0.4895, "step": 4748 }, { "epoch": 0.59, "grad_norm": 1.5978788025111923, "learning_rate": 3.8077751039672377e-06, "loss": 0.5373, "step": 4749 }, { "epoch": 0.59, "grad_norm": 1.2847629859841037, "learning_rate": 3.805823449699929e-06, "loss": 0.4763, "step": 4750 }, { "epoch": 0.59, "grad_norm": 1.7553604371536287, "learning_rate": 3.80387198836236e-06, "loss": 0.477, "step": 4751 }, { "epoch": 0.59, "grad_norm": 1.8653801195128799, "learning_rate": 3.801920720269805e-06, "loss": 0.4816, "step": 4752 }, { "epoch": 0.59, "grad_norm": 1.6215137768031846, "learning_rate": 3.7999696457375094e-06, "loss": 0.4689, "step": 4753 }, { "epoch": 0.59, "grad_norm": 1.5537199741329855, "learning_rate": 3.7980187650806855e-06, "loss": 0.497, "step": 4754 }, { "epoch": 0.59, "grad_norm": 1.3683582861339472, "learning_rate": 3.7960680786145177e-06, "loss": 0.4956, "step": 4755 }, { "epoch": 0.59, "grad_norm": 1.3744261784431637, "learning_rate": 3.7941175866541545e-06, "loss": 0.4658, "step": 4756 }, { "epoch": 0.59, "grad_norm": 1.834936259971978, "learning_rate": 3.7921672895147154e-06, "loss": 0.4878, "step": 4757 }, { "epoch": 0.59, "grad_norm": 0.6618545833319962, "learning_rate": 3.7902171875112893e-06, "loss": 0.4895, "step": 4758 }, { "epoch": 0.59, "grad_norm": 3.5550754129562883, "learning_rate": 3.7882672809589303e-06, "loss": 0.5166, "step": 4759 }, { "epoch": 0.59, "grad_norm": 1.6752689455613092, "learning_rate": 3.786317570172665e-06, "loss": 0.4803, "step": 4760 }, { "epoch": 0.59, "grad_norm": 1.817067473755128, "learning_rate": 3.784368055467485e-06, "loss": 0.4799, "step": 4761 }, { "epoch": 0.59, "grad_norm": 1.796483415824333, "learning_rate": 3.78241873715835e-06, "loss": 0.4372, "step": 4762 }, { "epoch": 0.59, "grad_norm": 1.4581358695876308, "learning_rate": 3.7804696155601946e-06, "loss": 0.5114, "step": 4763 }, { "epoch": 0.59, "grad_norm": 1.5162840470971275, "learning_rate": 3.7785206909879125e-06, "loss": 0.5593, "step": 4764 }, { "epoch": 0.59, "grad_norm": 1.7687354735721073, "learning_rate": 3.7765719637563704e-06, "loss": 0.4865, "step": 4765 }, { "epoch": 0.59, "grad_norm": 0.6016449273847941, "learning_rate": 3.7746234341804023e-06, "loss": 0.4856, "step": 4766 }, { "epoch": 0.59, "grad_norm": 1.5925792667624954, "learning_rate": 3.772675102574811e-06, "loss": 0.4904, "step": 4767 }, { "epoch": 0.59, "grad_norm": 1.5491375986862115, "learning_rate": 3.7707269692543657e-06, "loss": 0.4987, "step": 4768 }, { "epoch": 0.59, "grad_norm": 4.024481064899184, "learning_rate": 3.7687790345338054e-06, "loss": 0.5366, "step": 4769 }, { "epoch": 0.59, "grad_norm": 1.5008907912251712, "learning_rate": 3.7668312987278333e-06, "loss": 0.4803, "step": 4770 }, { "epoch": 0.59, "grad_norm": 1.4008267317117975, "learning_rate": 3.764883762151128e-06, "loss": 0.4885, "step": 4771 }, { "epoch": 0.59, "grad_norm": 1.3949664117379335, "learning_rate": 3.762936425118329e-06, "loss": 0.5142, "step": 4772 }, { "epoch": 0.59, "grad_norm": 1.3049393678095358, "learning_rate": 3.760989287944047e-06, "loss": 0.5097, "step": 4773 }, { "epoch": 0.59, "grad_norm": 1.5641320129466818, "learning_rate": 3.7590423509428557e-06, "loss": 0.5167, "step": 4774 }, { "epoch": 0.59, "grad_norm": 1.4951590978042526, "learning_rate": 3.7570956144293025e-06, "loss": 0.5374, "step": 4775 }, { "epoch": 0.59, "grad_norm": 4.330005347816297, "learning_rate": 3.7551490787178996e-06, "loss": 0.5422, "step": 4776 }, { "epoch": 0.59, "grad_norm": 1.3316052030847458, "learning_rate": 3.753202744123126e-06, "loss": 0.453, "step": 4777 }, { "epoch": 0.59, "grad_norm": 1.6324208318735371, "learning_rate": 3.7512566109594308e-06, "loss": 0.5317, "step": 4778 }, { "epoch": 0.59, "grad_norm": 1.5633976503408085, "learning_rate": 3.7493106795412254e-06, "loss": 0.4704, "step": 4779 }, { "epoch": 0.59, "grad_norm": 1.4535976872534775, "learning_rate": 3.747364950182897e-06, "loss": 0.5258, "step": 4780 }, { "epoch": 0.59, "grad_norm": 5.231741944089487, "learning_rate": 3.7454194231987927e-06, "loss": 0.4684, "step": 4781 }, { "epoch": 0.59, "grad_norm": 1.2879488975320648, "learning_rate": 3.7434740989032316e-06, "loss": 0.5012, "step": 4782 }, { "epoch": 0.59, "grad_norm": 0.7339082361721024, "learning_rate": 3.7415289776104953e-06, "loss": 0.4991, "step": 4783 }, { "epoch": 0.59, "grad_norm": 1.8207277057420377, "learning_rate": 3.739584059634836e-06, "loss": 0.4733, "step": 4784 }, { "epoch": 0.59, "grad_norm": 1.816680471066737, "learning_rate": 3.7376393452904725e-06, "loss": 0.5114, "step": 4785 }, { "epoch": 0.59, "grad_norm": 1.4300159454723032, "learning_rate": 3.7356948348915913e-06, "loss": 0.4716, "step": 4786 }, { "epoch": 0.59, "grad_norm": 1.466517194302703, "learning_rate": 3.7337505287523413e-06, "loss": 0.5039, "step": 4787 }, { "epoch": 0.59, "grad_norm": 1.6332080984609707, "learning_rate": 3.731806427186848e-06, "loss": 0.4736, "step": 4788 }, { "epoch": 0.59, "grad_norm": 1.521013037464149, "learning_rate": 3.7298625305091963e-06, "loss": 0.4883, "step": 4789 }, { "epoch": 0.59, "grad_norm": 1.5662771829569915, "learning_rate": 3.7279188390334385e-06, "loss": 0.4823, "step": 4790 }, { "epoch": 0.59, "grad_norm": 1.4664761110857254, "learning_rate": 3.7259753530735964e-06, "loss": 0.5037, "step": 4791 }, { "epoch": 0.59, "grad_norm": 1.406015218659607, "learning_rate": 3.7240320729436575e-06, "loss": 0.5087, "step": 4792 }, { "epoch": 0.59, "grad_norm": 1.4644074732784458, "learning_rate": 3.7220889989575737e-06, "loss": 0.4667, "step": 4793 }, { "epoch": 0.59, "grad_norm": 1.6162804324068345, "learning_rate": 3.7201461314292665e-06, "loss": 0.5539, "step": 4794 }, { "epoch": 0.6, "grad_norm": 1.9052845018302897, "learning_rate": 3.7182034706726224e-06, "loss": 0.4905, "step": 4795 }, { "epoch": 0.6, "grad_norm": 1.3987498526053814, "learning_rate": 3.716261017001498e-06, "loss": 0.4913, "step": 4796 }, { "epoch": 0.6, "grad_norm": 1.653132447704072, "learning_rate": 3.714318770729713e-06, "loss": 0.5096, "step": 4797 }, { "epoch": 0.6, "grad_norm": 1.4485827282911432, "learning_rate": 3.7123767321710538e-06, "loss": 0.5258, "step": 4798 }, { "epoch": 0.6, "grad_norm": 1.3441229709454283, "learning_rate": 3.710434901639274e-06, "loss": 0.5023, "step": 4799 }, { "epoch": 0.6, "grad_norm": 1.5096540584449896, "learning_rate": 3.708493279448093e-06, "loss": 0.4974, "step": 4800 }, { "epoch": 0.6, "grad_norm": 1.3348271010756676, "learning_rate": 3.7065518659111982e-06, "loss": 0.5133, "step": 4801 }, { "epoch": 0.6, "grad_norm": 1.3362769274646589, "learning_rate": 3.7046106613422404e-06, "loss": 0.4978, "step": 4802 }, { "epoch": 0.6, "grad_norm": 0.6586552427143136, "learning_rate": 3.7026696660548365e-06, "loss": 0.5204, "step": 4803 }, { "epoch": 0.6, "grad_norm": 1.2970674817252872, "learning_rate": 3.700728880362577e-06, "loss": 0.5126, "step": 4804 }, { "epoch": 0.6, "grad_norm": 1.2692411052824943, "learning_rate": 3.6987883045790093e-06, "loss": 0.4919, "step": 4805 }, { "epoch": 0.6, "grad_norm": 7.544857137361854, "learning_rate": 3.6968479390176503e-06, "loss": 0.4943, "step": 4806 }, { "epoch": 0.6, "grad_norm": 1.6111067019105714, "learning_rate": 3.6949077839919852e-06, "loss": 0.5665, "step": 4807 }, { "epoch": 0.6, "grad_norm": 1.5148276199786361, "learning_rate": 3.692967839815461e-06, "loss": 0.5128, "step": 4808 }, { "epoch": 0.6, "grad_norm": 1.9289750603078077, "learning_rate": 3.6910281068014935e-06, "loss": 0.5046, "step": 4809 }, { "epoch": 0.6, "grad_norm": 1.3571727913310128, "learning_rate": 3.689088585263464e-06, "loss": 0.544, "step": 4810 }, { "epoch": 0.6, "grad_norm": 1.514263873049238, "learning_rate": 3.687149275514718e-06, "loss": 0.4944, "step": 4811 }, { "epoch": 0.6, "grad_norm": 1.320448446471603, "learning_rate": 3.68521017786857e-06, "loss": 0.4634, "step": 4812 }, { "epoch": 0.6, "grad_norm": 1.4253421646515063, "learning_rate": 3.6832712926382978e-06, "loss": 0.4323, "step": 4813 }, { "epoch": 0.6, "grad_norm": 1.6248156405188252, "learning_rate": 3.6813326201371448e-06, "loss": 0.4576, "step": 4814 }, { "epoch": 0.6, "grad_norm": 1.5491421709460802, "learning_rate": 3.679394160678321e-06, "loss": 0.4942, "step": 4815 }, { "epoch": 0.6, "grad_norm": 1.5293882527430553, "learning_rate": 3.677455914575001e-06, "loss": 0.5424, "step": 4816 }, { "epoch": 0.6, "grad_norm": 2.6610870035965135, "learning_rate": 3.675517882140326e-06, "loss": 0.491, "step": 4817 }, { "epoch": 0.6, "grad_norm": 1.5876662699587831, "learning_rate": 3.673580063687402e-06, "loss": 0.5051, "step": 4818 }, { "epoch": 0.6, "grad_norm": 0.750905294107381, "learning_rate": 3.671642459529301e-06, "loss": 0.4937, "step": 4819 }, { "epoch": 0.6, "grad_norm": 1.7565409567351697, "learning_rate": 3.6697050699790586e-06, "loss": 0.5273, "step": 4820 }, { "epoch": 0.6, "grad_norm": 1.5630185373429069, "learning_rate": 3.6677678953496797e-06, "loss": 0.536, "step": 4821 }, { "epoch": 0.6, "grad_norm": 1.5729595543148676, "learning_rate": 3.66583093595413e-06, "loss": 0.5653, "step": 4822 }, { "epoch": 0.6, "grad_norm": 1.3837247868854905, "learning_rate": 3.663894192105344e-06, "loss": 0.5577, "step": 4823 }, { "epoch": 0.6, "grad_norm": 1.7157111912006533, "learning_rate": 3.6619576641162176e-06, "loss": 0.5539, "step": 4824 }, { "epoch": 0.6, "grad_norm": 0.6563532770825058, "learning_rate": 3.660021352299615e-06, "loss": 0.5398, "step": 4825 }, { "epoch": 0.6, "grad_norm": 1.4097431835544845, "learning_rate": 3.658085256968365e-06, "loss": 0.5385, "step": 4826 }, { "epoch": 0.6, "grad_norm": 1.360389593574117, "learning_rate": 3.65614937843526e-06, "loss": 0.5052, "step": 4827 }, { "epoch": 0.6, "grad_norm": 1.353844494224524, "learning_rate": 3.6542137170130576e-06, "loss": 0.4953, "step": 4828 }, { "epoch": 0.6, "grad_norm": 1.422660488634164, "learning_rate": 3.652278273014482e-06, "loss": 0.5504, "step": 4829 }, { "epoch": 0.6, "grad_norm": 1.5049937847673278, "learning_rate": 3.650343046752222e-06, "loss": 0.4411, "step": 4830 }, { "epoch": 0.6, "grad_norm": 2.8728735408056143, "learning_rate": 3.6484080385389296e-06, "loss": 0.4558, "step": 4831 }, { "epoch": 0.6, "grad_norm": 1.3355117575474822, "learning_rate": 3.6464732486872216e-06, "loss": 0.4604, "step": 4832 }, { "epoch": 0.6, "grad_norm": 1.2747248934156539, "learning_rate": 3.6445386775096813e-06, "loss": 0.4732, "step": 4833 }, { "epoch": 0.6, "grad_norm": 1.3518302410791765, "learning_rate": 3.642604325318856e-06, "loss": 0.4815, "step": 4834 }, { "epoch": 0.6, "grad_norm": 1.4051502217298926, "learning_rate": 3.640670192427257e-06, "loss": 0.4661, "step": 4835 }, { "epoch": 0.6, "grad_norm": 1.4809715490321091, "learning_rate": 3.6387362791473583e-06, "loss": 0.5039, "step": 4836 }, { "epoch": 0.6, "grad_norm": 1.396861806595747, "learning_rate": 3.6368025857916044e-06, "loss": 0.4719, "step": 4837 }, { "epoch": 0.6, "grad_norm": 1.4157885612242203, "learning_rate": 3.6348691126723984e-06, "loss": 0.5259, "step": 4838 }, { "epoch": 0.6, "grad_norm": 1.7313886903818687, "learning_rate": 3.6329358601021103e-06, "loss": 0.5609, "step": 4839 }, { "epoch": 0.6, "grad_norm": 1.4253943377103053, "learning_rate": 3.6310028283930743e-06, "loss": 0.5139, "step": 4840 }, { "epoch": 0.6, "grad_norm": 2.849074190948096, "learning_rate": 3.629070017857588e-06, "loss": 0.4811, "step": 4841 }, { "epoch": 0.6, "grad_norm": 1.3268550582958893, "learning_rate": 3.6271374288079142e-06, "loss": 0.4985, "step": 4842 }, { "epoch": 0.6, "grad_norm": 1.3070111115301872, "learning_rate": 3.6252050615562805e-06, "loss": 0.4931, "step": 4843 }, { "epoch": 0.6, "grad_norm": 1.3391127564835532, "learning_rate": 3.623272916414876e-06, "loss": 0.5093, "step": 4844 }, { "epoch": 0.6, "grad_norm": 1.3718861741774446, "learning_rate": 3.621340993695858e-06, "loss": 0.5138, "step": 4845 }, { "epoch": 0.6, "grad_norm": 1.4276519023118137, "learning_rate": 3.6194092937113444e-06, "loss": 0.5519, "step": 4846 }, { "epoch": 0.6, "grad_norm": 1.672717431707128, "learning_rate": 3.617477816773419e-06, "loss": 0.4756, "step": 4847 }, { "epoch": 0.6, "grad_norm": 1.588454212554117, "learning_rate": 3.6155465631941276e-06, "loss": 0.5124, "step": 4848 }, { "epoch": 0.6, "grad_norm": 0.7098411838422147, "learning_rate": 3.6136155332854835e-06, "loss": 0.5085, "step": 4849 }, { "epoch": 0.6, "grad_norm": 1.4768604915329149, "learning_rate": 3.611684727359459e-06, "loss": 0.513, "step": 4850 }, { "epoch": 0.6, "grad_norm": 1.6237177436087704, "learning_rate": 3.6097541457279945e-06, "loss": 0.5036, "step": 4851 }, { "epoch": 0.6, "grad_norm": 2.008599890001384, "learning_rate": 3.607823788702991e-06, "loss": 0.5041, "step": 4852 }, { "epoch": 0.6, "grad_norm": 1.33530351725401, "learning_rate": 3.6058936565963158e-06, "loss": 0.5178, "step": 4853 }, { "epoch": 0.6, "grad_norm": 1.5829084651240015, "learning_rate": 3.6039637497197995e-06, "loss": 0.4938, "step": 4854 }, { "epoch": 0.6, "grad_norm": 3.9056804696592824, "learning_rate": 3.602034068385234e-06, "loss": 0.5461, "step": 4855 }, { "epoch": 0.6, "grad_norm": 1.9163992244200692, "learning_rate": 3.6001046129043778e-06, "loss": 0.4962, "step": 4856 }, { "epoch": 0.6, "grad_norm": 0.7212350091995289, "learning_rate": 3.59817538358895e-06, "loss": 0.5098, "step": 4857 }, { "epoch": 0.6, "grad_norm": 1.9415046450085558, "learning_rate": 3.596246380750635e-06, "loss": 0.5487, "step": 4858 }, { "epoch": 0.6, "grad_norm": 1.5949102498616452, "learning_rate": 3.5943176047010807e-06, "loss": 0.6143, "step": 4859 }, { "epoch": 0.6, "grad_norm": 1.630007389484059, "learning_rate": 3.592389055751897e-06, "loss": 0.5065, "step": 4860 }, { "epoch": 0.6, "grad_norm": 1.476053795816333, "learning_rate": 3.590460734214657e-06, "loss": 0.4923, "step": 4861 }, { "epoch": 0.6, "grad_norm": 1.8988176215501187, "learning_rate": 3.5885326404009022e-06, "loss": 0.5095, "step": 4862 }, { "epoch": 0.6, "grad_norm": 1.7075130802141478, "learning_rate": 3.5866047746221294e-06, "loss": 0.5574, "step": 4863 }, { "epoch": 0.6, "grad_norm": 1.96164473093287, "learning_rate": 3.5846771371898037e-06, "loss": 0.5252, "step": 4864 }, { "epoch": 0.6, "grad_norm": 1.7917743538361852, "learning_rate": 3.582749728415351e-06, "loss": 0.5252, "step": 4865 }, { "epoch": 0.6, "grad_norm": 0.645792885373291, "learning_rate": 3.580822548610162e-06, "loss": 0.4615, "step": 4866 }, { "epoch": 0.6, "grad_norm": 1.4344210864773772, "learning_rate": 3.5788955980855894e-06, "loss": 0.4666, "step": 4867 }, { "epoch": 0.6, "grad_norm": 1.3952139809581494, "learning_rate": 3.5769688771529486e-06, "loss": 0.4653, "step": 4868 }, { "epoch": 0.6, "grad_norm": 0.6897052423443377, "learning_rate": 3.575042386123517e-06, "loss": 0.5299, "step": 4869 }, { "epoch": 0.6, "grad_norm": 1.5339288039723356, "learning_rate": 3.57311612530854e-06, "loss": 0.4769, "step": 4870 }, { "epoch": 0.6, "grad_norm": 3.188281057473945, "learning_rate": 3.5711900950192204e-06, "loss": 0.4984, "step": 4871 }, { "epoch": 0.6, "grad_norm": 1.511980292628914, "learning_rate": 3.5692642955667235e-06, "loss": 0.48, "step": 4872 }, { "epoch": 0.6, "grad_norm": 1.362985578204832, "learning_rate": 3.5673387272621805e-06, "loss": 0.415, "step": 4873 }, { "epoch": 0.6, "grad_norm": 1.7036428859732629, "learning_rate": 3.565413390416684e-06, "loss": 0.5269, "step": 4874 }, { "epoch": 0.6, "grad_norm": 2.012695203425002, "learning_rate": 3.56348828534129e-06, "loss": 0.4847, "step": 4875 }, { "epoch": 0.61, "grad_norm": 1.4546222976760228, "learning_rate": 3.5615634123470143e-06, "loss": 0.4663, "step": 4876 }, { "epoch": 0.61, "grad_norm": 1.5513891298669515, "learning_rate": 3.5596387717448354e-06, "loss": 0.5032, "step": 4877 }, { "epoch": 0.61, "grad_norm": 2.0380830917483737, "learning_rate": 3.5577143638457014e-06, "loss": 0.4912, "step": 4878 }, { "epoch": 0.61, "grad_norm": 1.600213084538027, "learning_rate": 3.555790188960514e-06, "loss": 0.5027, "step": 4879 }, { "epoch": 0.61, "grad_norm": 3.7253184447834027, "learning_rate": 3.5538662474001414e-06, "loss": 0.4657, "step": 4880 }, { "epoch": 0.61, "grad_norm": 1.390200812178653, "learning_rate": 3.551942539475414e-06, "loss": 0.5211, "step": 4881 }, { "epoch": 0.61, "grad_norm": 1.3614362906935606, "learning_rate": 3.550019065497121e-06, "loss": 0.5094, "step": 4882 }, { "epoch": 0.61, "grad_norm": 1.4460947953267989, "learning_rate": 3.5480958257760185e-06, "loss": 0.4452, "step": 4883 }, { "epoch": 0.61, "grad_norm": 1.7139647753932892, "learning_rate": 3.546172820622823e-06, "loss": 0.5304, "step": 4884 }, { "epoch": 0.61, "grad_norm": 1.6558235733830828, "learning_rate": 3.54425005034821e-06, "loss": 0.442, "step": 4885 }, { "epoch": 0.61, "grad_norm": 1.5570124837515966, "learning_rate": 3.5423275152628245e-06, "loss": 0.5011, "step": 4886 }, { "epoch": 0.61, "grad_norm": 1.4332172878692637, "learning_rate": 3.540405215677267e-06, "loss": 0.5, "step": 4887 }, { "epoch": 0.61, "grad_norm": 1.6731989318783418, "learning_rate": 3.5384831519021024e-06, "loss": 0.5592, "step": 4888 }, { "epoch": 0.61, "grad_norm": 1.3510517557984159, "learning_rate": 3.536561324247856e-06, "loss": 0.527, "step": 4889 }, { "epoch": 0.61, "grad_norm": 1.6372062487205699, "learning_rate": 3.5346397330250176e-06, "loss": 0.505, "step": 4890 }, { "epoch": 0.61, "grad_norm": 1.4613237336873037, "learning_rate": 3.532718378544035e-06, "loss": 0.5313, "step": 4891 }, { "epoch": 0.61, "grad_norm": 1.5685237927072886, "learning_rate": 3.530797261115321e-06, "loss": 0.5362, "step": 4892 }, { "epoch": 0.61, "grad_norm": 1.340615457625864, "learning_rate": 3.5288763810492486e-06, "loss": 0.5179, "step": 4893 }, { "epoch": 0.61, "grad_norm": 1.48299723682518, "learning_rate": 3.5269557386561524e-06, "loss": 0.4949, "step": 4894 }, { "epoch": 0.61, "grad_norm": 1.1982134842143222, "learning_rate": 3.5250353342463318e-06, "loss": 0.4733, "step": 4895 }, { "epoch": 0.61, "grad_norm": 1.3323531179160906, "learning_rate": 3.5231151681300426e-06, "loss": 0.4817, "step": 4896 }, { "epoch": 0.61, "grad_norm": 2.1072391564702007, "learning_rate": 3.5211952406175056e-06, "loss": 0.5162, "step": 4897 }, { "epoch": 0.61, "grad_norm": 1.9360651071059258, "learning_rate": 3.5192755520189013e-06, "loss": 0.4815, "step": 4898 }, { "epoch": 0.61, "grad_norm": 1.316168543249597, "learning_rate": 3.5173561026443737e-06, "loss": 0.4946, "step": 4899 }, { "epoch": 0.61, "grad_norm": 1.5539248601419484, "learning_rate": 3.5154368928040255e-06, "loss": 0.5328, "step": 4900 }, { "epoch": 0.61, "grad_norm": 1.452887193496379, "learning_rate": 3.513517922807922e-06, "loss": 0.4714, "step": 4901 }, { "epoch": 0.61, "grad_norm": 1.2772497753613659, "learning_rate": 3.511599192966087e-06, "loss": 0.5088, "step": 4902 }, { "epoch": 0.61, "grad_norm": 1.6841954429457333, "learning_rate": 3.5096807035885134e-06, "loss": 0.5319, "step": 4903 }, { "epoch": 0.61, "grad_norm": 0.6568211084906097, "learning_rate": 3.5077624549851472e-06, "loss": 0.4766, "step": 4904 }, { "epoch": 0.61, "grad_norm": 1.251958650680514, "learning_rate": 3.5058444474659e-06, "loss": 0.4197, "step": 4905 }, { "epoch": 0.61, "grad_norm": 1.3744636066787468, "learning_rate": 3.503926681340641e-06, "loss": 0.4747, "step": 4906 }, { "epoch": 0.61, "grad_norm": 1.7970179104924786, "learning_rate": 3.5020091569192025e-06, "loss": 0.468, "step": 4907 }, { "epoch": 0.61, "grad_norm": 2.1002716500361447, "learning_rate": 3.500091874511379e-06, "loss": 0.4674, "step": 4908 }, { "epoch": 0.61, "grad_norm": 1.7344236020353803, "learning_rate": 3.4981748344269218e-06, "loss": 0.5158, "step": 4909 }, { "epoch": 0.61, "grad_norm": 1.33071499329348, "learning_rate": 3.496258036975547e-06, "loss": 0.5131, "step": 4910 }, { "epoch": 0.61, "grad_norm": 1.5623268444562706, "learning_rate": 3.494341482466931e-06, "loss": 0.5587, "step": 4911 }, { "epoch": 0.61, "grad_norm": 1.2844120808059343, "learning_rate": 3.492425171210708e-06, "loss": 0.5187, "step": 4912 }, { "epoch": 0.61, "grad_norm": 2.0085068307255383, "learning_rate": 3.4905091035164775e-06, "loss": 0.5232, "step": 4913 }, { "epoch": 0.61, "grad_norm": 1.3519355908870683, "learning_rate": 3.4885932796937948e-06, "loss": 0.5681, "step": 4914 }, { "epoch": 0.61, "grad_norm": 1.623680657057162, "learning_rate": 3.48667770005218e-06, "loss": 0.4876, "step": 4915 }, { "epoch": 0.61, "grad_norm": 1.3509431507059002, "learning_rate": 3.4847623649011104e-06, "loss": 0.5221, "step": 4916 }, { "epoch": 0.61, "grad_norm": 1.7033636091227293, "learning_rate": 3.4828472745500256e-06, "loss": 0.5763, "step": 4917 }, { "epoch": 0.61, "grad_norm": 1.4902431059393642, "learning_rate": 3.4809324293083244e-06, "loss": 0.4778, "step": 4918 }, { "epoch": 0.61, "grad_norm": 1.663639956345997, "learning_rate": 3.479017829485369e-06, "loss": 0.4622, "step": 4919 }, { "epoch": 0.61, "grad_norm": 1.2922645060372164, "learning_rate": 3.4771034753904798e-06, "loss": 0.4629, "step": 4920 }, { "epoch": 0.61, "grad_norm": 1.7922928399840536, "learning_rate": 3.4751893673329363e-06, "loss": 0.4866, "step": 4921 }, { "epoch": 0.61, "grad_norm": 1.5676006214673102, "learning_rate": 3.473275505621979e-06, "loss": 0.5136, "step": 4922 }, { "epoch": 0.61, "grad_norm": 1.3092971392638122, "learning_rate": 3.4713618905668103e-06, "loss": 0.5068, "step": 4923 }, { "epoch": 0.61, "grad_norm": 1.4406852634601057, "learning_rate": 3.469448522476592e-06, "loss": 0.5243, "step": 4924 }, { "epoch": 0.61, "grad_norm": 1.5603319740314827, "learning_rate": 3.4675354016604433e-06, "loss": 0.5204, "step": 4925 }, { "epoch": 0.61, "grad_norm": 0.663970478353552, "learning_rate": 3.465622528427447e-06, "loss": 0.4964, "step": 4926 }, { "epoch": 0.61, "grad_norm": 1.3333166367010005, "learning_rate": 3.463709903086646e-06, "loss": 0.5025, "step": 4927 }, { "epoch": 0.61, "grad_norm": 1.271444599665555, "learning_rate": 3.4617975259470403e-06, "loss": 0.4348, "step": 4928 }, { "epoch": 0.61, "grad_norm": 1.3694773642972735, "learning_rate": 3.459885397317592e-06, "loss": 0.4865, "step": 4929 }, { "epoch": 0.61, "grad_norm": 1.4734405859367252, "learning_rate": 3.4579735175072217e-06, "loss": 0.4868, "step": 4930 }, { "epoch": 0.61, "grad_norm": 1.5059492843366027, "learning_rate": 3.456061886824811e-06, "loss": 0.5086, "step": 4931 }, { "epoch": 0.61, "grad_norm": 1.5004990367175046, "learning_rate": 3.4541505055791993e-06, "loss": 0.46, "step": 4932 }, { "epoch": 0.61, "grad_norm": 1.6902157867585337, "learning_rate": 3.4522393740791887e-06, "loss": 0.4743, "step": 4933 }, { "epoch": 0.61, "grad_norm": 1.3336540061732831, "learning_rate": 3.4503284926335385e-06, "loss": 0.4962, "step": 4934 }, { "epoch": 0.61, "grad_norm": 1.4185962631748819, "learning_rate": 3.4484178615509676e-06, "loss": 0.4756, "step": 4935 }, { "epoch": 0.61, "grad_norm": 1.443001312748826, "learning_rate": 3.446507481140157e-06, "loss": 0.4472, "step": 4936 }, { "epoch": 0.61, "grad_norm": 0.6777553610363183, "learning_rate": 3.4445973517097453e-06, "loss": 0.5026, "step": 4937 }, { "epoch": 0.61, "grad_norm": 1.4188816330877883, "learning_rate": 3.44268747356833e-06, "loss": 0.4978, "step": 4938 }, { "epoch": 0.61, "grad_norm": 1.5110459009189268, "learning_rate": 3.440777847024469e-06, "loss": 0.5127, "step": 4939 }, { "epoch": 0.61, "grad_norm": 1.6410841022146765, "learning_rate": 3.438868472386679e-06, "loss": 0.4696, "step": 4940 }, { "epoch": 0.61, "grad_norm": 1.4175618287038916, "learning_rate": 3.436959349963437e-06, "loss": 0.5367, "step": 4941 }, { "epoch": 0.61, "grad_norm": 0.6666608715251021, "learning_rate": 3.4350504800631775e-06, "loss": 0.5037, "step": 4942 }, { "epoch": 0.61, "grad_norm": 1.542889725936689, "learning_rate": 3.433141862994295e-06, "loss": 0.5429, "step": 4943 }, { "epoch": 0.61, "grad_norm": 0.7142075551377389, "learning_rate": 3.431233499065145e-06, "loss": 0.4819, "step": 4944 }, { "epoch": 0.61, "grad_norm": 1.5725999569088291, "learning_rate": 3.4293253885840395e-06, "loss": 0.5112, "step": 4945 }, { "epoch": 0.61, "grad_norm": 1.3023802101493411, "learning_rate": 3.427417531859251e-06, "loss": 0.4196, "step": 4946 }, { "epoch": 0.61, "grad_norm": 0.6768819589739452, "learning_rate": 3.42550992919901e-06, "loss": 0.5163, "step": 4947 }, { "epoch": 0.61, "grad_norm": 0.7029471252807711, "learning_rate": 3.4236025809115076e-06, "loss": 0.4865, "step": 4948 }, { "epoch": 0.61, "grad_norm": 1.3768694181253318, "learning_rate": 3.4216954873048912e-06, "loss": 0.5202, "step": 4949 }, { "epoch": 0.61, "grad_norm": 1.9456364823131251, "learning_rate": 3.41978864868727e-06, "loss": 0.5039, "step": 4950 }, { "epoch": 0.61, "grad_norm": 1.7185901326118083, "learning_rate": 3.4178820653667085e-06, "loss": 0.4309, "step": 4951 }, { "epoch": 0.61, "grad_norm": 1.518680620525008, "learning_rate": 3.415975737651234e-06, "loss": 0.5213, "step": 4952 }, { "epoch": 0.61, "grad_norm": 1.153241301832152, "learning_rate": 3.4140696658488304e-06, "loss": 0.4299, "step": 4953 }, { "epoch": 0.61, "grad_norm": 1.5080971432334338, "learning_rate": 3.4121638502674397e-06, "loss": 0.552, "step": 4954 }, { "epoch": 0.61, "grad_norm": 1.3662961579017965, "learning_rate": 3.4102582912149636e-06, "loss": 0.5008, "step": 4955 }, { "epoch": 0.62, "grad_norm": 1.5083284681008178, "learning_rate": 3.4083529889992617e-06, "loss": 0.4877, "step": 4956 }, { "epoch": 0.62, "grad_norm": 1.6526344945720335, "learning_rate": 3.406447943928152e-06, "loss": 0.4756, "step": 4957 }, { "epoch": 0.62, "grad_norm": 0.6386548943776154, "learning_rate": 3.4045431563094123e-06, "loss": 0.4902, "step": 4958 }, { "epoch": 0.62, "grad_norm": 1.4213765901889195, "learning_rate": 3.402638626450775e-06, "loss": 0.486, "step": 4959 }, { "epoch": 0.62, "grad_norm": 4.848591393086902, "learning_rate": 3.4007343546599384e-06, "loss": 0.4606, "step": 4960 }, { "epoch": 0.62, "grad_norm": 1.4880124777484838, "learning_rate": 3.3988303412445518e-06, "loss": 0.4948, "step": 4961 }, { "epoch": 0.62, "grad_norm": 1.277388551560406, "learning_rate": 3.3969265865122247e-06, "loss": 0.468, "step": 4962 }, { "epoch": 0.62, "grad_norm": 1.2436123249223645, "learning_rate": 3.395023090770526e-06, "loss": 0.4636, "step": 4963 }, { "epoch": 0.62, "grad_norm": 1.4514812397300938, "learning_rate": 3.3931198543269835e-06, "loss": 0.5198, "step": 4964 }, { "epoch": 0.62, "grad_norm": 1.6008351074207805, "learning_rate": 3.3912168774890797e-06, "loss": 0.513, "step": 4965 }, { "epoch": 0.62, "grad_norm": 1.6545897409823502, "learning_rate": 3.3893141605642586e-06, "loss": 0.5582, "step": 4966 }, { "epoch": 0.62, "grad_norm": 1.4886116131312268, "learning_rate": 3.3874117038599186e-06, "loss": 0.516, "step": 4967 }, { "epoch": 0.62, "grad_norm": 1.60481546255909, "learning_rate": 3.3855095076834216e-06, "loss": 0.4751, "step": 4968 }, { "epoch": 0.62, "grad_norm": 1.5747293988329985, "learning_rate": 3.3836075723420836e-06, "loss": 0.544, "step": 4969 }, { "epoch": 0.62, "grad_norm": 1.5765305986762645, "learning_rate": 3.3817058981431784e-06, "loss": 0.5053, "step": 4970 }, { "epoch": 0.62, "grad_norm": 1.5653479677087112, "learning_rate": 3.3798044853939375e-06, "loss": 0.5167, "step": 4971 }, { "epoch": 0.62, "grad_norm": 1.6885810336216072, "learning_rate": 3.3779033344015515e-06, "loss": 0.5074, "step": 4972 }, { "epoch": 0.62, "grad_norm": 1.565508621884468, "learning_rate": 3.3760024454731677e-06, "loss": 0.5037, "step": 4973 }, { "epoch": 0.62, "grad_norm": 1.588896209724684, "learning_rate": 3.374101818915892e-06, "loss": 0.5485, "step": 4974 }, { "epoch": 0.62, "grad_norm": 1.5535713099568533, "learning_rate": 3.372201455036787e-06, "loss": 0.4844, "step": 4975 }, { "epoch": 0.62, "grad_norm": 1.6389578924823094, "learning_rate": 3.370301354142871e-06, "loss": 0.5083, "step": 4976 }, { "epoch": 0.62, "grad_norm": 1.416294243738421, "learning_rate": 3.3684015165411264e-06, "loss": 0.4844, "step": 4977 }, { "epoch": 0.62, "grad_norm": 1.1980295333717508, "learning_rate": 3.366501942538487e-06, "loss": 0.5276, "step": 4978 }, { "epoch": 0.62, "grad_norm": 1.2092080604491835, "learning_rate": 3.3646026324418456e-06, "loss": 0.4384, "step": 4979 }, { "epoch": 0.62, "grad_norm": 2.073980580745277, "learning_rate": 3.3627035865580525e-06, "loss": 0.5405, "step": 4980 }, { "epoch": 0.62, "grad_norm": 3.6017686291826134, "learning_rate": 3.3608048051939147e-06, "loss": 0.5, "step": 4981 }, { "epoch": 0.62, "grad_norm": 1.3082842776433188, "learning_rate": 3.358906288656197e-06, "loss": 0.4998, "step": 4982 }, { "epoch": 0.62, "grad_norm": 1.4383088949447156, "learning_rate": 3.3570080372516213e-06, "loss": 0.5247, "step": 4983 }, { "epoch": 0.62, "grad_norm": 1.3887107654466078, "learning_rate": 3.3551100512868663e-06, "loss": 0.5017, "step": 4984 }, { "epoch": 0.62, "grad_norm": 1.38804570646994, "learning_rate": 3.3532123310685706e-06, "loss": 0.5464, "step": 4985 }, { "epoch": 0.62, "grad_norm": 1.4133582163794047, "learning_rate": 3.3513148769033265e-06, "loss": 0.4914, "step": 4986 }, { "epoch": 0.62, "grad_norm": 1.5795226928379709, "learning_rate": 3.3494176890976847e-06, "loss": 0.4805, "step": 4987 }, { "epoch": 0.62, "grad_norm": 1.415716935901781, "learning_rate": 3.3475207679581524e-06, "loss": 0.5058, "step": 4988 }, { "epoch": 0.62, "grad_norm": 1.7829808944908172, "learning_rate": 3.3456241137911947e-06, "loss": 0.4917, "step": 4989 }, { "epoch": 0.62, "grad_norm": 2.5579286305124613, "learning_rate": 3.343727726903231e-06, "loss": 0.4855, "step": 4990 }, { "epoch": 0.62, "grad_norm": 1.9304590658531278, "learning_rate": 3.3418316076006394e-06, "loss": 0.4925, "step": 4991 }, { "epoch": 0.62, "grad_norm": 1.6227599018995744, "learning_rate": 3.3399357561897545e-06, "loss": 0.5281, "step": 4992 }, { "epoch": 0.62, "grad_norm": 1.677743562278396, "learning_rate": 3.33804017297687e-06, "loss": 0.4952, "step": 4993 }, { "epoch": 0.62, "grad_norm": 1.4807846872178518, "learning_rate": 3.3361448582682333e-06, "loss": 0.4668, "step": 4994 }, { "epoch": 0.62, "grad_norm": 1.526223315842072, "learning_rate": 3.3342498123700484e-06, "loss": 0.4895, "step": 4995 }, { "epoch": 0.62, "grad_norm": 2.0357225736417877, "learning_rate": 3.3323550355884767e-06, "loss": 0.5361, "step": 4996 }, { "epoch": 0.62, "grad_norm": 1.5860862144050736, "learning_rate": 3.3304605282296367e-06, "loss": 0.4871, "step": 4997 }, { "epoch": 0.62, "grad_norm": 1.6424354224213873, "learning_rate": 3.328566290599602e-06, "loss": 0.5181, "step": 4998 }, { "epoch": 0.62, "grad_norm": 1.648090373847227, "learning_rate": 3.326672323004405e-06, "loss": 0.5208, "step": 4999 }, { "epoch": 0.62, "grad_norm": 2.248674939834039, "learning_rate": 3.324778625750028e-06, "loss": 0.5155, "step": 5000 }, { "epoch": 0.62, "grad_norm": 1.2753266896871043, "learning_rate": 3.3228851991424203e-06, "loss": 0.4403, "step": 5001 }, { "epoch": 0.62, "grad_norm": 1.464593760439206, "learning_rate": 3.32099204348748e-06, "loss": 0.4549, "step": 5002 }, { "epoch": 0.62, "grad_norm": 2.000264604751063, "learning_rate": 3.319099159091062e-06, "loss": 0.4919, "step": 5003 }, { "epoch": 0.62, "grad_norm": 2.8468860351733025, "learning_rate": 3.3172065462589786e-06, "loss": 0.5091, "step": 5004 }, { "epoch": 0.62, "grad_norm": 1.5417003575190913, "learning_rate": 3.315314205296999e-06, "loss": 0.5152, "step": 5005 }, { "epoch": 0.62, "grad_norm": 1.4809068825565626, "learning_rate": 3.3134221365108466e-06, "loss": 0.509, "step": 5006 }, { "epoch": 0.62, "grad_norm": 1.3551581168063283, "learning_rate": 3.311530340206202e-06, "loss": 0.5268, "step": 5007 }, { "epoch": 0.62, "grad_norm": 1.4109766464130555, "learning_rate": 3.3096388166887007e-06, "loss": 0.5166, "step": 5008 }, { "epoch": 0.62, "grad_norm": 1.3582712933874206, "learning_rate": 3.307747566263937e-06, "loss": 0.5598, "step": 5009 }, { "epoch": 0.62, "grad_norm": 1.4505331602252791, "learning_rate": 3.3058565892374584e-06, "loss": 0.5152, "step": 5010 }, { "epoch": 0.62, "grad_norm": 1.819511271452722, "learning_rate": 3.3039658859147683e-06, "loss": 0.4617, "step": 5011 }, { "epoch": 0.62, "grad_norm": 1.710710478621651, "learning_rate": 3.3020754566013256e-06, "loss": 0.521, "step": 5012 }, { "epoch": 0.62, "grad_norm": 1.821480222707406, "learning_rate": 3.300185301602549e-06, "loss": 0.5599, "step": 5013 }, { "epoch": 0.62, "grad_norm": 2.206968709126485, "learning_rate": 3.2982954212238056e-06, "loss": 0.5515, "step": 5014 }, { "epoch": 0.62, "grad_norm": 1.397308898229944, "learning_rate": 3.2964058157704247e-06, "loss": 0.4342, "step": 5015 }, { "epoch": 0.62, "grad_norm": 1.333446236835325, "learning_rate": 3.2945164855476885e-06, "loss": 0.4645, "step": 5016 }, { "epoch": 0.62, "grad_norm": 1.8032287899771318, "learning_rate": 3.292627430860833e-06, "loss": 0.4875, "step": 5017 }, { "epoch": 0.62, "grad_norm": 1.5778051329605842, "learning_rate": 3.2907386520150553e-06, "loss": 0.4935, "step": 5018 }, { "epoch": 0.62, "grad_norm": 1.4134204464925468, "learning_rate": 3.288850149315501e-06, "loss": 0.5004, "step": 5019 }, { "epoch": 0.62, "grad_norm": 1.3615187047308002, "learning_rate": 3.2869619230672765e-06, "loss": 0.5062, "step": 5020 }, { "epoch": 0.62, "grad_norm": 1.4682549324756706, "learning_rate": 3.28507397357544e-06, "loss": 0.4583, "step": 5021 }, { "epoch": 0.62, "grad_norm": 1.5559248236760226, "learning_rate": 3.2831863011450073e-06, "loss": 0.4816, "step": 5022 }, { "epoch": 0.62, "grad_norm": 2.475983413954976, "learning_rate": 3.2812989060809474e-06, "loss": 0.5502, "step": 5023 }, { "epoch": 0.62, "grad_norm": 1.3817847332999171, "learning_rate": 3.279411788688186e-06, "loss": 0.4537, "step": 5024 }, { "epoch": 0.62, "grad_norm": 1.4282415662444998, "learning_rate": 3.2775249492716034e-06, "loss": 0.4906, "step": 5025 }, { "epoch": 0.62, "grad_norm": 1.6435460317787725, "learning_rate": 3.275638388136036e-06, "loss": 0.5215, "step": 5026 }, { "epoch": 0.62, "grad_norm": 1.6332556687334305, "learning_rate": 3.2737521055862744e-06, "loss": 0.501, "step": 5027 }, { "epoch": 0.62, "grad_norm": 1.655276725205797, "learning_rate": 3.2718661019270624e-06, "loss": 0.5005, "step": 5028 }, { "epoch": 0.62, "grad_norm": 1.3042666406087373, "learning_rate": 3.269980377463103e-06, "loss": 0.472, "step": 5029 }, { "epoch": 0.62, "grad_norm": 1.918025369784106, "learning_rate": 3.2680949324990497e-06, "loss": 0.5144, "step": 5030 }, { "epoch": 0.62, "grad_norm": 1.4987614591058473, "learning_rate": 3.2662097673395134e-06, "loss": 0.4655, "step": 5031 }, { "epoch": 0.62, "grad_norm": 0.6840112024608185, "learning_rate": 3.264324882289058e-06, "loss": 0.484, "step": 5032 }, { "epoch": 0.62, "grad_norm": 1.361880385408784, "learning_rate": 3.2624402776522046e-06, "loss": 0.5415, "step": 5033 }, { "epoch": 0.62, "grad_norm": 1.2987058792137038, "learning_rate": 3.260555953733428e-06, "loss": 0.4534, "step": 5034 }, { "epoch": 0.62, "grad_norm": 1.6933632604516184, "learning_rate": 3.2586719108371567e-06, "loss": 0.4754, "step": 5035 }, { "epoch": 0.62, "grad_norm": 1.621284804628328, "learning_rate": 3.2567881492677746e-06, "loss": 0.4625, "step": 5036 }, { "epoch": 0.63, "grad_norm": 1.4867385124248171, "learning_rate": 3.2549046693296198e-06, "loss": 0.4758, "step": 5037 }, { "epoch": 0.63, "grad_norm": 1.4309626921575582, "learning_rate": 3.2530214713269853e-06, "loss": 0.463, "step": 5038 }, { "epoch": 0.63, "grad_norm": 1.3982991375504024, "learning_rate": 3.251138555564118e-06, "loss": 0.5258, "step": 5039 }, { "epoch": 0.63, "grad_norm": 2.8473583948807533, "learning_rate": 3.2492559223452192e-06, "loss": 0.5246, "step": 5040 }, { "epoch": 0.63, "grad_norm": 1.6653642314561725, "learning_rate": 3.247373571974445e-06, "loss": 0.4932, "step": 5041 }, { "epoch": 0.63, "grad_norm": 1.430380196755407, "learning_rate": 3.2454915047559064e-06, "loss": 0.5089, "step": 5042 }, { "epoch": 0.63, "grad_norm": 1.314125842643514, "learning_rate": 3.2436097209936678e-06, "loss": 0.4556, "step": 5043 }, { "epoch": 0.63, "grad_norm": 1.8527143155929457, "learning_rate": 3.241728220991748e-06, "loss": 0.4847, "step": 5044 }, { "epoch": 0.63, "grad_norm": 1.7999529352552075, "learning_rate": 3.2398470050541187e-06, "loss": 0.5358, "step": 5045 }, { "epoch": 0.63, "grad_norm": 1.7267699225161, "learning_rate": 3.2379660734847085e-06, "loss": 0.5211, "step": 5046 }, { "epoch": 0.63, "grad_norm": 1.6039494417327065, "learning_rate": 3.2360854265873975e-06, "loss": 0.4832, "step": 5047 }, { "epoch": 0.63, "grad_norm": 1.6272571061404455, "learning_rate": 3.234205064666021e-06, "loss": 0.5201, "step": 5048 }, { "epoch": 0.63, "grad_norm": 1.5485993016422799, "learning_rate": 3.232324988024368e-06, "loss": 0.4914, "step": 5049 }, { "epoch": 0.63, "grad_norm": 1.6654638703390399, "learning_rate": 3.230445196966181e-06, "loss": 0.5096, "step": 5050 }, { "epoch": 0.63, "grad_norm": 0.7206764273073127, "learning_rate": 3.228565691795158e-06, "loss": 0.4857, "step": 5051 }, { "epoch": 0.63, "grad_norm": 1.6704633349447144, "learning_rate": 3.226686472814948e-06, "loss": 0.5206, "step": 5052 }, { "epoch": 0.63, "grad_norm": 1.6874928554487323, "learning_rate": 3.2248075403291573e-06, "loss": 0.4941, "step": 5053 }, { "epoch": 0.63, "grad_norm": 1.448843695999027, "learning_rate": 3.222928894641343e-06, "loss": 0.4932, "step": 5054 }, { "epoch": 0.63, "grad_norm": 1.9136320642533393, "learning_rate": 3.2210505360550157e-06, "loss": 0.5233, "step": 5055 }, { "epoch": 0.63, "grad_norm": 1.4908347517455087, "learning_rate": 3.2191724648736434e-06, "loss": 0.4807, "step": 5056 }, { "epoch": 0.63, "grad_norm": 1.2934989746182866, "learning_rate": 3.217294681400643e-06, "loss": 0.4955, "step": 5057 }, { "epoch": 0.63, "grad_norm": 1.580504193882281, "learning_rate": 3.2154171859393847e-06, "loss": 0.4419, "step": 5058 }, { "epoch": 0.63, "grad_norm": 1.6964366519154959, "learning_rate": 3.213539978793201e-06, "loss": 0.4858, "step": 5059 }, { "epoch": 0.63, "grad_norm": 1.5524515714090368, "learning_rate": 3.2116630602653665e-06, "loss": 0.475, "step": 5060 }, { "epoch": 0.63, "grad_norm": 1.167181040290641, "learning_rate": 3.2097864306591143e-06, "loss": 0.4805, "step": 5061 }, { "epoch": 0.63, "grad_norm": 1.5429939849232062, "learning_rate": 3.2079100902776318e-06, "loss": 0.5109, "step": 5062 }, { "epoch": 0.63, "grad_norm": 1.4068352002330462, "learning_rate": 3.2060340394240567e-06, "loss": 0.4986, "step": 5063 }, { "epoch": 0.63, "grad_norm": 1.4641161058301413, "learning_rate": 3.204158278401483e-06, "loss": 0.4867, "step": 5064 }, { "epoch": 0.63, "grad_norm": 1.5267975713122197, "learning_rate": 3.2022828075129553e-06, "loss": 0.5088, "step": 5065 }, { "epoch": 0.63, "grad_norm": 1.4808147883075506, "learning_rate": 3.2004076270614714e-06, "loss": 0.4332, "step": 5066 }, { "epoch": 0.63, "grad_norm": 1.6800443743499096, "learning_rate": 3.1985327373499864e-06, "loss": 0.4791, "step": 5067 }, { "epoch": 0.63, "grad_norm": 2.136655646383344, "learning_rate": 3.196658138681404e-06, "loss": 0.5061, "step": 5068 }, { "epoch": 0.63, "grad_norm": 1.450993950615321, "learning_rate": 3.1947838313585823e-06, "loss": 0.5267, "step": 5069 }, { "epoch": 0.63, "grad_norm": 1.5025753880323134, "learning_rate": 3.1929098156843307e-06, "loss": 0.5506, "step": 5070 }, { "epoch": 0.63, "grad_norm": 1.525847759768212, "learning_rate": 3.1910360919614135e-06, "loss": 0.4512, "step": 5071 }, { "epoch": 0.63, "grad_norm": 1.4025262095606097, "learning_rate": 3.189162660492548e-06, "loss": 0.4771, "step": 5072 }, { "epoch": 0.63, "grad_norm": 1.5297814691298464, "learning_rate": 3.1872895215804035e-06, "loss": 0.5194, "step": 5073 }, { "epoch": 0.63, "grad_norm": 0.647810789148842, "learning_rate": 3.1854166755275982e-06, "loss": 0.4734, "step": 5074 }, { "epoch": 0.63, "grad_norm": 2.0943659345513552, "learning_rate": 3.1835441226367137e-06, "loss": 0.5102, "step": 5075 }, { "epoch": 0.63, "grad_norm": 1.4940861348841228, "learning_rate": 3.181671863210274e-06, "loss": 0.5145, "step": 5076 }, { "epoch": 0.63, "grad_norm": 1.642438898262156, "learning_rate": 3.1797998975507594e-06, "loss": 0.5052, "step": 5077 }, { "epoch": 0.63, "grad_norm": 0.7155625181979648, "learning_rate": 3.1779282259606026e-06, "loss": 0.5192, "step": 5078 }, { "epoch": 0.63, "grad_norm": 1.4464801924254893, "learning_rate": 3.1760568487421873e-06, "loss": 0.5187, "step": 5079 }, { "epoch": 0.63, "grad_norm": 1.2066580365905597, "learning_rate": 3.1741857661978528e-06, "loss": 0.4903, "step": 5080 }, { "epoch": 0.63, "grad_norm": 1.528292843093238, "learning_rate": 3.1723149786298867e-06, "loss": 0.5126, "step": 5081 }, { "epoch": 0.63, "grad_norm": 1.5730967438515793, "learning_rate": 3.1704444863405314e-06, "loss": 0.4854, "step": 5082 }, { "epoch": 0.63, "grad_norm": 1.4127354692919383, "learning_rate": 3.1685742896319847e-06, "loss": 0.5259, "step": 5083 }, { "epoch": 0.63, "grad_norm": 1.5901344549251768, "learning_rate": 3.1667043888063914e-06, "loss": 0.5187, "step": 5084 }, { "epoch": 0.63, "grad_norm": 1.510968311326373, "learning_rate": 3.16483478416585e-06, "loss": 0.5035, "step": 5085 }, { "epoch": 0.63, "grad_norm": 2.103291137686788, "learning_rate": 3.1629654760124117e-06, "loss": 0.4973, "step": 5086 }, { "epoch": 0.63, "grad_norm": 1.362060770332344, "learning_rate": 3.1610964646480806e-06, "loss": 0.4657, "step": 5087 }, { "epoch": 0.63, "grad_norm": 1.3181780493491968, "learning_rate": 3.159227750374812e-06, "loss": 0.4677, "step": 5088 }, { "epoch": 0.63, "grad_norm": 1.6546426254608904, "learning_rate": 3.157359333494511e-06, "loss": 0.5134, "step": 5089 }, { "epoch": 0.63, "grad_norm": 1.43936413406887, "learning_rate": 3.155491214309039e-06, "loss": 0.4698, "step": 5090 }, { "epoch": 0.63, "grad_norm": 1.2386937886476648, "learning_rate": 3.153623393120203e-06, "loss": 0.462, "step": 5091 }, { "epoch": 0.63, "grad_norm": 1.4513888189237607, "learning_rate": 3.1517558702297724e-06, "loss": 0.4814, "step": 5092 }, { "epoch": 0.63, "grad_norm": 1.3730832063641922, "learning_rate": 3.1498886459394585e-06, "loss": 0.4829, "step": 5093 }, { "epoch": 0.63, "grad_norm": 1.4175167888787246, "learning_rate": 3.1480217205509282e-06, "loss": 0.5328, "step": 5094 }, { "epoch": 0.63, "grad_norm": 1.8484807418961515, "learning_rate": 3.1461550943657996e-06, "loss": 0.4671, "step": 5095 }, { "epoch": 0.63, "grad_norm": 1.7497109505535595, "learning_rate": 3.144288767685643e-06, "loss": 0.5283, "step": 5096 }, { "epoch": 0.63, "grad_norm": 1.3495861137244243, "learning_rate": 3.1424227408119803e-06, "loss": 0.5009, "step": 5097 }, { "epoch": 0.63, "grad_norm": 1.373311925624968, "learning_rate": 3.1405570140462833e-06, "loss": 0.4901, "step": 5098 }, { "epoch": 0.63, "grad_norm": 1.2854736848654358, "learning_rate": 3.138691587689975e-06, "loss": 0.4316, "step": 5099 }, { "epoch": 0.63, "grad_norm": 1.506737083470091, "learning_rate": 3.1368264620444356e-06, "loss": 0.4619, "step": 5100 }, { "epoch": 0.63, "grad_norm": 2.885061084921074, "learning_rate": 3.1349616374109903e-06, "loss": 0.5372, "step": 5101 }, { "epoch": 0.63, "grad_norm": 1.4289873958250099, "learning_rate": 3.1330971140909184e-06, "loss": 0.4549, "step": 5102 }, { "epoch": 0.63, "grad_norm": 3.3060382248016653, "learning_rate": 3.131232892385451e-06, "loss": 0.5197, "step": 5103 }, { "epoch": 0.63, "grad_norm": 2.0688664902976086, "learning_rate": 3.1293689725957667e-06, "loss": 0.5058, "step": 5104 }, { "epoch": 0.63, "grad_norm": 1.829403295529987, "learning_rate": 3.1275053550230005e-06, "loss": 0.4975, "step": 5105 }, { "epoch": 0.63, "grad_norm": 2.430231383079677, "learning_rate": 3.1256420399682365e-06, "loss": 0.4455, "step": 5106 }, { "epoch": 0.63, "grad_norm": 1.9110115547985471, "learning_rate": 3.1237790277325076e-06, "loss": 0.4466, "step": 5107 }, { "epoch": 0.63, "grad_norm": 1.3324536100635709, "learning_rate": 3.121916318616801e-06, "loss": 0.5126, "step": 5108 }, { "epoch": 0.63, "grad_norm": 1.3753593719641783, "learning_rate": 3.1200539129220548e-06, "loss": 0.5434, "step": 5109 }, { "epoch": 0.63, "grad_norm": 1.3814478368048255, "learning_rate": 3.118191810949156e-06, "loss": 0.5164, "step": 5110 }, { "epoch": 0.63, "grad_norm": 2.9334061670531075, "learning_rate": 3.1163300129989434e-06, "loss": 0.5299, "step": 5111 }, { "epoch": 0.63, "grad_norm": 1.300266009222235, "learning_rate": 3.114468519372207e-06, "loss": 0.5049, "step": 5112 }, { "epoch": 0.63, "grad_norm": 1.3880272247312915, "learning_rate": 3.1126073303696873e-06, "loss": 0.4898, "step": 5113 }, { "epoch": 0.63, "grad_norm": 2.3431655578387383, "learning_rate": 3.1107464462920767e-06, "loss": 0.5187, "step": 5114 }, { "epoch": 0.63, "grad_norm": 1.3546211679493592, "learning_rate": 3.108885867440015e-06, "loss": 0.531, "step": 5115 }, { "epoch": 0.63, "grad_norm": 1.371488891276787, "learning_rate": 3.1070255941140987e-06, "loss": 0.4707, "step": 5116 }, { "epoch": 0.64, "grad_norm": 1.3519675577207597, "learning_rate": 3.105165626614869e-06, "loss": 0.5278, "step": 5117 }, { "epoch": 0.64, "grad_norm": 1.4497383499852987, "learning_rate": 3.103305965242821e-06, "loss": 0.461, "step": 5118 }, { "epoch": 0.64, "grad_norm": 1.6053194736104737, "learning_rate": 3.1014466102983986e-06, "loss": 0.4781, "step": 5119 }, { "epoch": 0.64, "grad_norm": 0.649216465541498, "learning_rate": 3.099587562081997e-06, "loss": 0.4484, "step": 5120 }, { "epoch": 0.64, "grad_norm": 1.7489882471719203, "learning_rate": 3.0977288208939627e-06, "loss": 0.4445, "step": 5121 }, { "epoch": 0.64, "grad_norm": 1.7986665377692794, "learning_rate": 3.09587038703459e-06, "loss": 0.4882, "step": 5122 }, { "epoch": 0.64, "grad_norm": 1.3269475764699166, "learning_rate": 3.094012260804127e-06, "loss": 0.4894, "step": 5123 }, { "epoch": 0.64, "grad_norm": 1.3517778780960439, "learning_rate": 3.0921544425027695e-06, "loss": 0.5173, "step": 5124 }, { "epoch": 0.64, "grad_norm": 1.334966564160536, "learning_rate": 3.0902969324306643e-06, "loss": 0.4906, "step": 5125 }, { "epoch": 0.64, "grad_norm": 1.6757784490718937, "learning_rate": 3.0884397308879098e-06, "loss": 0.5075, "step": 5126 }, { "epoch": 0.64, "grad_norm": 1.6295839395714828, "learning_rate": 3.0865828381745515e-06, "loss": 0.4894, "step": 5127 }, { "epoch": 0.64, "grad_norm": 1.5048313436145033, "learning_rate": 3.0847262545905882e-06, "loss": 0.5114, "step": 5128 }, { "epoch": 0.64, "grad_norm": 1.4335249322231058, "learning_rate": 3.0828699804359663e-06, "loss": 0.4923, "step": 5129 }, { "epoch": 0.64, "grad_norm": 1.3866191188244976, "learning_rate": 3.081014016010584e-06, "loss": 0.5375, "step": 5130 }, { "epoch": 0.64, "grad_norm": 1.4240045872604474, "learning_rate": 3.0791583616142883e-06, "loss": 0.4682, "step": 5131 }, { "epoch": 0.64, "grad_norm": 1.161237880252194, "learning_rate": 3.0773030175468754e-06, "loss": 0.4879, "step": 5132 }, { "epoch": 0.64, "grad_norm": 0.7629290611916298, "learning_rate": 3.0754479841080943e-06, "loss": 0.4959, "step": 5133 }, { "epoch": 0.64, "grad_norm": 1.4955674586673207, "learning_rate": 3.0735932615976416e-06, "loss": 0.5465, "step": 5134 }, { "epoch": 0.64, "grad_norm": 1.3894155976672427, "learning_rate": 3.071738850315164e-06, "loss": 0.4957, "step": 5135 }, { "epoch": 0.64, "grad_norm": 1.442652357430899, "learning_rate": 3.0698847505602576e-06, "loss": 0.5341, "step": 5136 }, { "epoch": 0.64, "grad_norm": 1.7858544192469936, "learning_rate": 3.0680309626324685e-06, "loss": 0.5084, "step": 5137 }, { "epoch": 0.64, "grad_norm": 1.2466849264870778, "learning_rate": 3.0661774868312928e-06, "loss": 0.5389, "step": 5138 }, { "epoch": 0.64, "grad_norm": 1.6570292869455046, "learning_rate": 3.064324323456176e-06, "loss": 0.5112, "step": 5139 }, { "epoch": 0.64, "grad_norm": 1.3190113984903318, "learning_rate": 3.0624714728065106e-06, "loss": 0.4613, "step": 5140 }, { "epoch": 0.64, "grad_norm": 1.4251908409537135, "learning_rate": 3.060618935181645e-06, "loss": 0.4501, "step": 5141 }, { "epoch": 0.64, "grad_norm": 1.4261908907765468, "learning_rate": 3.0587667108808706e-06, "loss": 0.4794, "step": 5142 }, { "epoch": 0.64, "grad_norm": 1.536541083783513, "learning_rate": 3.056914800203431e-06, "loss": 0.516, "step": 5143 }, { "epoch": 0.64, "grad_norm": 2.2948676856612353, "learning_rate": 3.0550632034485186e-06, "loss": 0.495, "step": 5144 }, { "epoch": 0.64, "grad_norm": 1.949593745483904, "learning_rate": 3.053211920915275e-06, "loss": 0.4635, "step": 5145 }, { "epoch": 0.64, "grad_norm": 1.4042463519036232, "learning_rate": 3.0513609529027914e-06, "loss": 0.4489, "step": 5146 }, { "epoch": 0.64, "grad_norm": 1.5353044882980715, "learning_rate": 3.049510299710108e-06, "loss": 0.5417, "step": 5147 }, { "epoch": 0.64, "grad_norm": 1.368567437939403, "learning_rate": 3.0476599616362136e-06, "loss": 0.5106, "step": 5148 }, { "epoch": 0.64, "grad_norm": 1.602691605831889, "learning_rate": 3.045809938980047e-06, "loss": 0.4857, "step": 5149 }, { "epoch": 0.64, "grad_norm": 1.4870178731861237, "learning_rate": 3.0439602320404964e-06, "loss": 0.4919, "step": 5150 }, { "epoch": 0.64, "grad_norm": 1.5277486373103906, "learning_rate": 3.0421108411163975e-06, "loss": 0.4446, "step": 5151 }, { "epoch": 0.64, "grad_norm": 1.7414214443891542, "learning_rate": 3.040261766506536e-06, "loss": 0.507, "step": 5152 }, { "epoch": 0.64, "grad_norm": 1.7320551130735706, "learning_rate": 3.038413008509645e-06, "loss": 0.491, "step": 5153 }, { "epoch": 0.64, "grad_norm": 1.5798003564239524, "learning_rate": 3.0365645674244094e-06, "loss": 0.4709, "step": 5154 }, { "epoch": 0.64, "grad_norm": 1.6953870389910313, "learning_rate": 3.03471644354946e-06, "loss": 0.5186, "step": 5155 }, { "epoch": 0.64, "grad_norm": 1.2829814597360416, "learning_rate": 3.0328686371833765e-06, "loss": 0.4842, "step": 5156 }, { "epoch": 0.64, "grad_norm": 1.520157858202863, "learning_rate": 3.031021148624691e-06, "loss": 0.4392, "step": 5157 }, { "epoch": 0.64, "grad_norm": 1.584303013751112, "learning_rate": 3.0291739781718808e-06, "loss": 0.5274, "step": 5158 }, { "epoch": 0.64, "grad_norm": 1.4803637139074282, "learning_rate": 3.0273271261233718e-06, "loss": 0.4807, "step": 5159 }, { "epoch": 0.64, "grad_norm": 1.5198555339306656, "learning_rate": 3.025480592777539e-06, "loss": 0.5198, "step": 5160 }, { "epoch": 0.64, "grad_norm": 0.7245927757697032, "learning_rate": 3.023634378432706e-06, "loss": 0.4865, "step": 5161 }, { "epoch": 0.64, "grad_norm": 1.5536795950544744, "learning_rate": 3.021788483387146e-06, "loss": 0.5101, "step": 5162 }, { "epoch": 0.64, "grad_norm": 5.739620434569134, "learning_rate": 3.019942907939079e-06, "loss": 0.5367, "step": 5163 }, { "epoch": 0.64, "grad_norm": 2.3241117537551723, "learning_rate": 3.0180976523866717e-06, "loss": 0.4565, "step": 5164 }, { "epoch": 0.64, "grad_norm": 1.3465344224268267, "learning_rate": 3.016252717028046e-06, "loss": 0.4703, "step": 5165 }, { "epoch": 0.64, "grad_norm": 1.3999767438476154, "learning_rate": 3.0144081021612648e-06, "loss": 0.4976, "step": 5166 }, { "epoch": 0.64, "grad_norm": 1.3766289113380032, "learning_rate": 3.0125638080843435e-06, "loss": 0.4953, "step": 5167 }, { "epoch": 0.64, "grad_norm": 1.6140554152088646, "learning_rate": 3.0107198350952415e-06, "loss": 0.5032, "step": 5168 }, { "epoch": 0.64, "grad_norm": 0.6590837178206382, "learning_rate": 3.0088761834918706e-06, "loss": 0.5041, "step": 5169 }, { "epoch": 0.64, "grad_norm": 2.092104546557681, "learning_rate": 3.0070328535720884e-06, "loss": 0.5199, "step": 5170 }, { "epoch": 0.64, "grad_norm": 1.4407452582905922, "learning_rate": 3.0051898456337013e-06, "loss": 0.5068, "step": 5171 }, { "epoch": 0.64, "grad_norm": 1.9815869758106484, "learning_rate": 3.003347159974463e-06, "loss": 0.5009, "step": 5172 }, { "epoch": 0.64, "grad_norm": 1.2769554752225565, "learning_rate": 3.001504796892074e-06, "loss": 0.4757, "step": 5173 }, { "epoch": 0.64, "grad_norm": 1.483988926560004, "learning_rate": 2.9996627566841886e-06, "loss": 0.4817, "step": 5174 }, { "epoch": 0.64, "grad_norm": 1.3562102230857813, "learning_rate": 2.9978210396484013e-06, "loss": 0.4965, "step": 5175 }, { "epoch": 0.64, "grad_norm": 1.7509090416933895, "learning_rate": 2.99597964608226e-06, "loss": 0.4859, "step": 5176 }, { "epoch": 0.64, "grad_norm": 1.3193288728949342, "learning_rate": 2.9941385762832563e-06, "loss": 0.4898, "step": 5177 }, { "epoch": 0.64, "grad_norm": 1.4786685504426724, "learning_rate": 2.9922978305488317e-06, "loss": 0.5607, "step": 5178 }, { "epoch": 0.64, "grad_norm": 1.4479667494823674, "learning_rate": 2.990457409176375e-06, "loss": 0.5253, "step": 5179 }, { "epoch": 0.64, "grad_norm": 1.4814821436756966, "learning_rate": 2.9886173124632213e-06, "loss": 0.4418, "step": 5180 }, { "epoch": 0.64, "grad_norm": 0.674004839372321, "learning_rate": 2.986777540706654e-06, "loss": 0.4746, "step": 5181 }, { "epoch": 0.64, "grad_norm": 1.3409942229031568, "learning_rate": 2.984938094203908e-06, "loss": 0.492, "step": 5182 }, { "epoch": 0.64, "grad_norm": 1.3204528480802598, "learning_rate": 2.98309897325216e-06, "loss": 0.482, "step": 5183 }, { "epoch": 0.64, "grad_norm": 2.2290584335361103, "learning_rate": 2.9812601781485356e-06, "loss": 0.5398, "step": 5184 }, { "epoch": 0.64, "grad_norm": 1.4015321794285387, "learning_rate": 2.9794217091901094e-06, "loss": 0.5118, "step": 5185 }, { "epoch": 0.64, "grad_norm": 2.1052379988251193, "learning_rate": 2.9775835666739028e-06, "loss": 0.4802, "step": 5186 }, { "epoch": 0.64, "grad_norm": 1.4781251630529064, "learning_rate": 2.975745750896881e-06, "loss": 0.4244, "step": 5187 }, { "epoch": 0.64, "grad_norm": 1.5160119248805892, "learning_rate": 2.973908262155962e-06, "loss": 0.4722, "step": 5188 }, { "epoch": 0.64, "grad_norm": 2.0726917074692084, "learning_rate": 2.9720711007480056e-06, "loss": 0.5402, "step": 5189 }, { "epoch": 0.64, "grad_norm": 1.8211951470508632, "learning_rate": 2.9702342669698247e-06, "loss": 0.4835, "step": 5190 }, { "epoch": 0.64, "grad_norm": 3.5279731366187965, "learning_rate": 2.968397761118175e-06, "loss": 0.4654, "step": 5191 }, { "epoch": 0.64, "grad_norm": 1.4769202027950719, "learning_rate": 2.9665615834897597e-06, "loss": 0.4981, "step": 5192 }, { "epoch": 0.64, "grad_norm": 1.429049044873586, "learning_rate": 2.9647257343812298e-06, "loss": 0.4863, "step": 5193 }, { "epoch": 0.64, "grad_norm": 1.712926248039558, "learning_rate": 2.9628902140891823e-06, "loss": 0.5002, "step": 5194 }, { "epoch": 0.64, "grad_norm": 1.4039040905005618, "learning_rate": 2.961055022910162e-06, "loss": 0.4909, "step": 5195 }, { "epoch": 0.64, "grad_norm": 1.5767933463615202, "learning_rate": 2.9592201611406606e-06, "loss": 0.5375, "step": 5196 }, { "epoch": 0.64, "grad_norm": 1.450513560065177, "learning_rate": 2.957385629077113e-06, "loss": 0.4729, "step": 5197 }, { "epoch": 0.65, "grad_norm": 1.3789791613221432, "learning_rate": 2.955551427015909e-06, "loss": 0.5128, "step": 5198 }, { "epoch": 0.65, "grad_norm": 2.0296334637657703, "learning_rate": 2.953717555253378e-06, "loss": 0.5196, "step": 5199 }, { "epoch": 0.65, "grad_norm": 1.4936191092661981, "learning_rate": 2.951884014085798e-06, "loss": 0.4448, "step": 5200 }, { "epoch": 0.65, "grad_norm": 1.4903974201120964, "learning_rate": 2.9500508038093932e-06, "loss": 0.5354, "step": 5201 }, { "epoch": 0.65, "grad_norm": 0.674069417016388, "learning_rate": 2.9482179247203357e-06, "loss": 0.5179, "step": 5202 }, { "epoch": 0.65, "grad_norm": 1.3382165010372378, "learning_rate": 2.9463853771147434e-06, "loss": 0.5242, "step": 5203 }, { "epoch": 0.65, "grad_norm": 1.4771001395371512, "learning_rate": 2.94455316128868e-06, "loss": 0.4246, "step": 5204 }, { "epoch": 0.65, "grad_norm": 1.437699263205684, "learning_rate": 2.942721277538154e-06, "loss": 0.4561, "step": 5205 }, { "epoch": 0.65, "grad_norm": 1.5707752606080958, "learning_rate": 2.940889726159127e-06, "loss": 0.5656, "step": 5206 }, { "epoch": 0.65, "grad_norm": 1.3425068014198047, "learning_rate": 2.939058507447499e-06, "loss": 0.5571, "step": 5207 }, { "epoch": 0.65, "grad_norm": 1.6390851225460445, "learning_rate": 2.9372276216991204e-06, "loss": 0.509, "step": 5208 }, { "epoch": 0.65, "grad_norm": 1.4889246574510457, "learning_rate": 2.9353970692097865e-06, "loss": 0.4664, "step": 5209 }, { "epoch": 0.65, "grad_norm": 1.421107639733984, "learning_rate": 2.9335668502752395e-06, "loss": 0.5479, "step": 5210 }, { "epoch": 0.65, "grad_norm": 1.5940583579838103, "learning_rate": 2.9317369651911677e-06, "loss": 0.465, "step": 5211 }, { "epoch": 0.65, "grad_norm": 1.5987081821051372, "learning_rate": 2.9299074142532045e-06, "loss": 0.4642, "step": 5212 }, { "epoch": 0.65, "grad_norm": 2.2117746474994493, "learning_rate": 2.9280781977569306e-06, "loss": 0.4773, "step": 5213 }, { "epoch": 0.65, "grad_norm": 2.8658518759658125, "learning_rate": 2.9262493159978703e-06, "loss": 0.468, "step": 5214 }, { "epoch": 0.65, "grad_norm": 1.3225637881539518, "learning_rate": 2.924420769271499e-06, "loss": 0.469, "step": 5215 }, { "epoch": 0.65, "grad_norm": 1.3203399947443093, "learning_rate": 2.922592557873231e-06, "loss": 0.4908, "step": 5216 }, { "epoch": 0.65, "grad_norm": 2.328462249497007, "learning_rate": 2.9207646820984325e-06, "loss": 0.5081, "step": 5217 }, { "epoch": 0.65, "grad_norm": 1.6711124618967272, "learning_rate": 2.9189371422424123e-06, "loss": 0.5171, "step": 5218 }, { "epoch": 0.65, "grad_norm": 1.6230161561337815, "learning_rate": 2.917109938600423e-06, "loss": 0.4466, "step": 5219 }, { "epoch": 0.65, "grad_norm": 1.3219255341465752, "learning_rate": 2.9152830714676706e-06, "loss": 0.4642, "step": 5220 }, { "epoch": 0.65, "grad_norm": 1.4107900141016096, "learning_rate": 2.9134565411392958e-06, "loss": 0.5056, "step": 5221 }, { "epoch": 0.65, "grad_norm": 1.4295451490426954, "learning_rate": 2.9116303479103934e-06, "loss": 0.5116, "step": 5222 }, { "epoch": 0.65, "grad_norm": 1.856583146682241, "learning_rate": 2.909804492076001e-06, "loss": 0.493, "step": 5223 }, { "epoch": 0.65, "grad_norm": 3.729174337478149, "learning_rate": 2.9079789739311037e-06, "loss": 0.5561, "step": 5224 }, { "epoch": 0.65, "grad_norm": 0.6528515005726849, "learning_rate": 2.906153793770626e-06, "loss": 0.5052, "step": 5225 }, { "epoch": 0.65, "grad_norm": 1.393222031708734, "learning_rate": 2.904328951889447e-06, "loss": 0.4537, "step": 5226 }, { "epoch": 0.65, "grad_norm": 1.3073884655399801, "learning_rate": 2.9025044485823815e-06, "loss": 0.5334, "step": 5227 }, { "epoch": 0.65, "grad_norm": 1.284878231455661, "learning_rate": 2.900680284144194e-06, "loss": 0.5229, "step": 5228 }, { "epoch": 0.65, "grad_norm": 2.0539141190578754, "learning_rate": 2.898856458869597e-06, "loss": 0.4971, "step": 5229 }, { "epoch": 0.65, "grad_norm": 1.4554782786293619, "learning_rate": 2.897032973053241e-06, "loss": 0.487, "step": 5230 }, { "epoch": 0.65, "grad_norm": 1.5009432100606122, "learning_rate": 2.895209826989733e-06, "loss": 0.5017, "step": 5231 }, { "epoch": 0.65, "grad_norm": 1.3348701171976673, "learning_rate": 2.8933870209736136e-06, "loss": 0.4668, "step": 5232 }, { "epoch": 0.65, "grad_norm": 1.4133037167718843, "learning_rate": 2.8915645552993756e-06, "loss": 0.484, "step": 5233 }, { "epoch": 0.65, "grad_norm": 1.2891795152863093, "learning_rate": 2.889742430261452e-06, "loss": 0.4565, "step": 5234 }, { "epoch": 0.65, "grad_norm": 1.5537923383669643, "learning_rate": 2.8879206461542253e-06, "loss": 0.4766, "step": 5235 }, { "epoch": 0.65, "grad_norm": 1.482406989146018, "learning_rate": 2.8860992032720204e-06, "loss": 0.479, "step": 5236 }, { "epoch": 0.65, "grad_norm": 1.3677960555410495, "learning_rate": 2.8842781019091037e-06, "loss": 0.5023, "step": 5237 }, { "epoch": 0.65, "grad_norm": 1.4134634784188358, "learning_rate": 2.8824573423596946e-06, "loss": 0.4854, "step": 5238 }, { "epoch": 0.65, "grad_norm": 1.7260629179279658, "learning_rate": 2.8806369249179513e-06, "loss": 0.5403, "step": 5239 }, { "epoch": 0.65, "grad_norm": 1.5421852186392826, "learning_rate": 2.87881684987798e-06, "loss": 0.5302, "step": 5240 }, { "epoch": 0.65, "grad_norm": 1.7102444314625498, "learning_rate": 2.876997117533826e-06, "loss": 0.508, "step": 5241 }, { "epoch": 0.65, "grad_norm": 1.350860727657689, "learning_rate": 2.875177728179488e-06, "loss": 0.5409, "step": 5242 }, { "epoch": 0.65, "grad_norm": 0.6972621262407982, "learning_rate": 2.8733586821088998e-06, "loss": 0.5073, "step": 5243 }, { "epoch": 0.65, "grad_norm": 1.4389568481054311, "learning_rate": 2.871539979615948e-06, "loss": 0.475, "step": 5244 }, { "epoch": 0.65, "grad_norm": 1.44297013306258, "learning_rate": 2.8697216209944585e-06, "loss": 0.5036, "step": 5245 }, { "epoch": 0.65, "grad_norm": 1.6421364177474709, "learning_rate": 2.8679036065382003e-06, "loss": 0.4985, "step": 5246 }, { "epoch": 0.65, "grad_norm": 1.3111731981965287, "learning_rate": 2.8660859365408934e-06, "loss": 0.5307, "step": 5247 }, { "epoch": 0.65, "grad_norm": 1.370112143441277, "learning_rate": 2.8642686112961964e-06, "loss": 0.5473, "step": 5248 }, { "epoch": 0.65, "grad_norm": 0.6811937781641748, "learning_rate": 2.8624516310977172e-06, "loss": 0.4813, "step": 5249 }, { "epoch": 0.65, "grad_norm": 1.5384833532133326, "learning_rate": 2.860634996239001e-06, "loss": 0.5112, "step": 5250 }, { "epoch": 0.65, "grad_norm": 1.3948737627803693, "learning_rate": 2.858818707013545e-06, "loss": 0.4642, "step": 5251 }, { "epoch": 0.65, "grad_norm": 1.2792735675727973, "learning_rate": 2.8570027637147835e-06, "loss": 0.5108, "step": 5252 }, { "epoch": 0.65, "grad_norm": 1.366292951276149, "learning_rate": 2.8551871666361e-06, "loss": 0.5163, "step": 5253 }, { "epoch": 0.65, "grad_norm": 1.4864720122990032, "learning_rate": 2.8533719160708186e-06, "loss": 0.5041, "step": 5254 }, { "epoch": 0.65, "grad_norm": 1.570123238810715, "learning_rate": 2.8515570123122094e-06, "loss": 0.5027, "step": 5255 }, { "epoch": 0.65, "grad_norm": 4.351478202009039, "learning_rate": 2.8497424556534893e-06, "loss": 0.4334, "step": 5256 }, { "epoch": 0.65, "grad_norm": 1.3568254180690646, "learning_rate": 2.84792824638781e-06, "loss": 0.526, "step": 5257 }, { "epoch": 0.65, "grad_norm": 1.3958890681102574, "learning_rate": 2.8461143848082793e-06, "loss": 0.4824, "step": 5258 }, { "epoch": 0.65, "grad_norm": 1.56032635116221, "learning_rate": 2.844300871207937e-06, "loss": 0.5152, "step": 5259 }, { "epoch": 0.65, "grad_norm": 1.4501231717596466, "learning_rate": 2.842487705879777e-06, "loss": 0.4875, "step": 5260 }, { "epoch": 0.65, "grad_norm": 1.4516955521205268, "learning_rate": 2.840674889116728e-06, "loss": 0.492, "step": 5261 }, { "epoch": 0.65, "grad_norm": 1.1679179628063483, "learning_rate": 2.83886242121167e-06, "loss": 0.4231, "step": 5262 }, { "epoch": 0.65, "grad_norm": 1.391145838108834, "learning_rate": 2.8370503024574192e-06, "loss": 0.5274, "step": 5263 }, { "epoch": 0.65, "grad_norm": 1.432913453073426, "learning_rate": 2.835238533146741e-06, "loss": 0.5189, "step": 5264 }, { "epoch": 0.65, "grad_norm": 1.306301674370133, "learning_rate": 2.8334271135723468e-06, "loss": 0.5035, "step": 5265 }, { "epoch": 0.65, "grad_norm": 1.6885766126397033, "learning_rate": 2.8316160440268813e-06, "loss": 0.5766, "step": 5266 }, { "epoch": 0.65, "grad_norm": 1.407825610945716, "learning_rate": 2.8298053248029434e-06, "loss": 0.4842, "step": 5267 }, { "epoch": 0.65, "grad_norm": 1.5167359860431884, "learning_rate": 2.8279949561930665e-06, "loss": 0.5149, "step": 5268 }, { "epoch": 0.65, "grad_norm": 1.384774347864669, "learning_rate": 2.8261849384897353e-06, "loss": 0.4946, "step": 5269 }, { "epoch": 0.65, "grad_norm": 1.7555210761499582, "learning_rate": 2.8243752719853714e-06, "loss": 0.4819, "step": 5270 }, { "epoch": 0.65, "grad_norm": 1.27797541813699, "learning_rate": 2.822565956972342e-06, "loss": 0.4937, "step": 5271 }, { "epoch": 0.65, "grad_norm": 1.917424442556268, "learning_rate": 2.8207569937429626e-06, "loss": 0.4998, "step": 5272 }, { "epoch": 0.65, "grad_norm": 1.6865994988327913, "learning_rate": 2.8189483825894813e-06, "loss": 0.4762, "step": 5273 }, { "epoch": 0.65, "grad_norm": 1.480227158958637, "learning_rate": 2.8171401238041007e-06, "loss": 0.5159, "step": 5274 }, { "epoch": 0.65, "grad_norm": 1.3659359585215833, "learning_rate": 2.8153322176789556e-06, "loss": 0.4841, "step": 5275 }, { "epoch": 0.65, "grad_norm": 1.8040659390676708, "learning_rate": 2.813524664506133e-06, "loss": 0.4936, "step": 5276 }, { "epoch": 0.65, "grad_norm": 1.2706477624521206, "learning_rate": 2.811717464577657e-06, "loss": 0.4966, "step": 5277 }, { "epoch": 0.66, "grad_norm": 1.493513927680801, "learning_rate": 2.8099106181854974e-06, "loss": 0.5047, "step": 5278 }, { "epoch": 0.66, "grad_norm": 1.8631817433842472, "learning_rate": 2.8081041256215654e-06, "loss": 0.489, "step": 5279 }, { "epoch": 0.66, "grad_norm": 1.1856851549371594, "learning_rate": 2.8062979871777157e-06, "loss": 0.4861, "step": 5280 }, { "epoch": 0.66, "grad_norm": 2.471930383855249, "learning_rate": 2.8044922031457487e-06, "loss": 0.5283, "step": 5281 }, { "epoch": 0.66, "grad_norm": 5.025917538098957, "learning_rate": 2.8026867738174013e-06, "loss": 0.5313, "step": 5282 }, { "epoch": 0.66, "grad_norm": 1.358502126189759, "learning_rate": 2.8008816994843592e-06, "loss": 0.5362, "step": 5283 }, { "epoch": 0.66, "grad_norm": 1.526702975365077, "learning_rate": 2.7990769804382446e-06, "loss": 0.5274, "step": 5284 }, { "epoch": 0.66, "grad_norm": 5.7768441835174045, "learning_rate": 2.79727261697063e-06, "loss": 0.4959, "step": 5285 }, { "epoch": 0.66, "grad_norm": 1.5861308351808623, "learning_rate": 2.7954686093730216e-06, "loss": 0.4983, "step": 5286 }, { "epoch": 0.66, "grad_norm": 1.2829002137593568, "learning_rate": 2.7936649579368776e-06, "loss": 0.4543, "step": 5287 }, { "epoch": 0.66, "grad_norm": 1.3304196406298983, "learning_rate": 2.791861662953589e-06, "loss": 0.4866, "step": 5288 }, { "epoch": 0.66, "grad_norm": 1.5286558611134053, "learning_rate": 2.790058724714496e-06, "loss": 0.4656, "step": 5289 }, { "epoch": 0.66, "grad_norm": 1.3558140277966517, "learning_rate": 2.7882561435108823e-06, "loss": 0.4728, "step": 5290 }, { "epoch": 0.66, "grad_norm": 1.453425915868669, "learning_rate": 2.7864539196339658e-06, "loss": 0.4677, "step": 5291 }, { "epoch": 0.66, "grad_norm": 1.3025199072127507, "learning_rate": 2.784652053374915e-06, "loss": 0.478, "step": 5292 }, { "epoch": 0.66, "grad_norm": 1.35504074531167, "learning_rate": 2.7828505450248343e-06, "loss": 0.4876, "step": 5293 }, { "epoch": 0.66, "grad_norm": 1.4178035232568393, "learning_rate": 2.7810493948747775e-06, "loss": 0.4972, "step": 5294 }, { "epoch": 0.66, "grad_norm": 1.4695984310786603, "learning_rate": 2.779248603215731e-06, "loss": 0.5301, "step": 5295 }, { "epoch": 0.66, "grad_norm": 1.4901468728111962, "learning_rate": 2.777448170338632e-06, "loss": 0.4498, "step": 5296 }, { "epoch": 0.66, "grad_norm": 1.3709843878126562, "learning_rate": 2.775648096534357e-06, "loss": 0.4982, "step": 5297 }, { "epoch": 0.66, "grad_norm": 1.6853526249118413, "learning_rate": 2.7738483820937208e-06, "loss": 0.4892, "step": 5298 }, { "epoch": 0.66, "grad_norm": 1.5074452665631897, "learning_rate": 2.7720490273074865e-06, "loss": 0.5364, "step": 5299 }, { "epoch": 0.66, "grad_norm": 1.555266495159016, "learning_rate": 2.7702500324663518e-06, "loss": 0.54, "step": 5300 }, { "epoch": 0.66, "grad_norm": 7.48752313122983, "learning_rate": 2.768451397860964e-06, "loss": 0.5542, "step": 5301 }, { "epoch": 0.66, "grad_norm": 1.351707167167519, "learning_rate": 2.766653123781905e-06, "loss": 0.502, "step": 5302 }, { "epoch": 0.66, "grad_norm": 2.080333246439908, "learning_rate": 2.7648552105197046e-06, "loss": 0.5655, "step": 5303 }, { "epoch": 0.66, "grad_norm": 1.567104012954525, "learning_rate": 2.763057658364827e-06, "loss": 0.4765, "step": 5304 }, { "epoch": 0.66, "grad_norm": 1.3716735973388143, "learning_rate": 2.7612604676076902e-06, "loss": 0.5053, "step": 5305 }, { "epoch": 0.66, "grad_norm": 1.367634803659491, "learning_rate": 2.759463638538642e-06, "loss": 0.4825, "step": 5306 }, { "epoch": 0.66, "grad_norm": 1.3321793751367244, "learning_rate": 2.757667171447973e-06, "loss": 0.4742, "step": 5307 }, { "epoch": 0.66, "grad_norm": 1.4559183914666705, "learning_rate": 2.7558710666259235e-06, "loss": 0.5147, "step": 5308 }, { "epoch": 0.66, "grad_norm": 1.6637424420725844, "learning_rate": 2.754075324362666e-06, "loss": 0.5307, "step": 5309 }, { "epoch": 0.66, "grad_norm": 1.7309577384722221, "learning_rate": 2.7522799449483224e-06, "loss": 0.5454, "step": 5310 }, { "epoch": 0.66, "grad_norm": 1.7461041363532903, "learning_rate": 2.7504849286729475e-06, "loss": 0.5696, "step": 5311 }, { "epoch": 0.66, "grad_norm": 1.360186930190867, "learning_rate": 2.7486902758265445e-06, "loss": 0.4063, "step": 5312 }, { "epoch": 0.66, "grad_norm": 1.4725073849409913, "learning_rate": 2.7468959866990554e-06, "loss": 0.5528, "step": 5313 }, { "epoch": 0.66, "grad_norm": 1.83191227764115, "learning_rate": 2.745102061580365e-06, "loss": 0.4732, "step": 5314 }, { "epoch": 0.66, "grad_norm": 1.5576463111550587, "learning_rate": 2.7433085007602955e-06, "loss": 0.4874, "step": 5315 }, { "epoch": 0.66, "grad_norm": 2.615574991409325, "learning_rate": 2.7415153045286108e-06, "loss": 0.5217, "step": 5316 }, { "epoch": 0.66, "grad_norm": 1.7472158798068373, "learning_rate": 2.7397224731750215e-06, "loss": 0.5164, "step": 5317 }, { "epoch": 0.66, "grad_norm": 1.564034329369173, "learning_rate": 2.737930006989172e-06, "loss": 0.5332, "step": 5318 }, { "epoch": 0.66, "grad_norm": 1.3528228639339923, "learning_rate": 2.7361379062606545e-06, "loss": 0.4847, "step": 5319 }, { "epoch": 0.66, "grad_norm": 1.3151718626427842, "learning_rate": 2.734346171278992e-06, "loss": 0.5129, "step": 5320 }, { "epoch": 0.66, "grad_norm": 0.6482216627008905, "learning_rate": 2.7325548023336645e-06, "loss": 0.4735, "step": 5321 }, { "epoch": 0.66, "grad_norm": 1.3814757439240166, "learning_rate": 2.7307637997140757e-06, "loss": 0.5166, "step": 5322 }, { "epoch": 0.66, "grad_norm": 1.4763492860259062, "learning_rate": 2.728973163709583e-06, "loss": 0.4862, "step": 5323 }, { "epoch": 0.66, "grad_norm": 2.74270202227617, "learning_rate": 2.7271828946094753e-06, "loss": 0.4746, "step": 5324 }, { "epoch": 0.66, "grad_norm": 1.4641364925937064, "learning_rate": 2.7253929927029897e-06, "loss": 0.4816, "step": 5325 }, { "epoch": 0.66, "grad_norm": 1.5149325381015646, "learning_rate": 2.7236034582793e-06, "loss": 0.448, "step": 5326 }, { "epoch": 0.66, "grad_norm": 1.5913256590745417, "learning_rate": 2.7218142916275174e-06, "loss": 0.5024, "step": 5327 }, { "epoch": 0.66, "grad_norm": 2.0427625447444466, "learning_rate": 2.720025493036703e-06, "loss": 0.5191, "step": 5328 }, { "epoch": 0.66, "grad_norm": 1.3152223721438832, "learning_rate": 2.718237062795846e-06, "loss": 0.5157, "step": 5329 }, { "epoch": 0.66, "grad_norm": 1.5793215264008136, "learning_rate": 2.7164490011938915e-06, "loss": 0.4871, "step": 5330 }, { "epoch": 0.66, "grad_norm": 1.41395011217096, "learning_rate": 2.714661308519711e-06, "loss": 0.5235, "step": 5331 }, { "epoch": 0.66, "grad_norm": 1.5978172239163189, "learning_rate": 2.7128739850621255e-06, "loss": 0.4837, "step": 5332 }, { "epoch": 0.66, "grad_norm": 9.564577961403666, "learning_rate": 2.7110870311098884e-06, "loss": 0.527, "step": 5333 }, { "epoch": 0.66, "grad_norm": 1.6697258115680298, "learning_rate": 2.7093004469517027e-06, "loss": 0.5105, "step": 5334 }, { "epoch": 0.66, "grad_norm": 1.6120972779403429, "learning_rate": 2.707514232876204e-06, "loss": 0.4547, "step": 5335 }, { "epoch": 0.66, "grad_norm": 1.7313556878552248, "learning_rate": 2.7057283891719703e-06, "loss": 0.5047, "step": 5336 }, { "epoch": 0.66, "grad_norm": 1.7503742181212245, "learning_rate": 2.703942916127521e-06, "loss": 0.4938, "step": 5337 }, { "epoch": 0.66, "grad_norm": 1.8591629343416418, "learning_rate": 2.7021578140313155e-06, "loss": 0.4805, "step": 5338 }, { "epoch": 0.66, "grad_norm": 1.280015653078731, "learning_rate": 2.7003730831717545e-06, "loss": 0.4685, "step": 5339 }, { "epoch": 0.66, "grad_norm": 1.3874985784665126, "learning_rate": 2.6985887238371736e-06, "loss": 0.4851, "step": 5340 }, { "epoch": 0.66, "grad_norm": 2.4078872115531422, "learning_rate": 2.6968047363158556e-06, "loss": 0.4947, "step": 5341 }, { "epoch": 0.66, "grad_norm": 1.5915183243210775, "learning_rate": 2.6950211208960147e-06, "loss": 0.5, "step": 5342 }, { "epoch": 0.66, "grad_norm": 1.597352790740755, "learning_rate": 2.693237877865814e-06, "loss": 0.4887, "step": 5343 }, { "epoch": 0.66, "grad_norm": 1.9562463930653387, "learning_rate": 2.6914550075133506e-06, "loss": 0.4693, "step": 5344 }, { "epoch": 0.66, "grad_norm": 2.407055832699335, "learning_rate": 2.6896725101266584e-06, "loss": 0.5051, "step": 5345 }, { "epoch": 0.66, "grad_norm": 1.657836405659061, "learning_rate": 2.6878903859937245e-06, "loss": 0.4861, "step": 5346 }, { "epoch": 0.66, "grad_norm": 1.4575434472330062, "learning_rate": 2.686108635402459e-06, "loss": 0.5044, "step": 5347 }, { "epoch": 0.66, "grad_norm": 1.2441405862275843, "learning_rate": 2.684327258640725e-06, "loss": 0.4904, "step": 5348 }, { "epoch": 0.66, "grad_norm": 1.3436498811887572, "learning_rate": 2.6825462559963144e-06, "loss": 0.4471, "step": 5349 }, { "epoch": 0.66, "grad_norm": 1.5582742686000106, "learning_rate": 2.6807656277569694e-06, "loss": 0.5168, "step": 5350 }, { "epoch": 0.66, "grad_norm": 1.6881226866019212, "learning_rate": 2.67898537421036e-06, "loss": 0.501, "step": 5351 }, { "epoch": 0.66, "grad_norm": 1.2901602306826498, "learning_rate": 2.677205495644108e-06, "loss": 0.4999, "step": 5352 }, { "epoch": 0.66, "grad_norm": 1.4483734768467285, "learning_rate": 2.675425992345763e-06, "loss": 0.4864, "step": 5353 }, { "epoch": 0.66, "grad_norm": 1.358456513957977, "learning_rate": 2.673646864602822e-06, "loss": 0.4564, "step": 5354 }, { "epoch": 0.66, "grad_norm": 1.3999051743052522, "learning_rate": 2.671868112702721e-06, "loss": 0.5713, "step": 5355 }, { "epoch": 0.66, "grad_norm": 1.7831927687168119, "learning_rate": 2.6700897369328286e-06, "loss": 0.5226, "step": 5356 }, { "epoch": 0.66, "grad_norm": 1.4520519300332069, "learning_rate": 2.668311737580461e-06, "loss": 0.5093, "step": 5357 }, { "epoch": 0.66, "grad_norm": 1.5009878577871714, "learning_rate": 2.6665341149328667e-06, "loss": 0.5286, "step": 5358 }, { "epoch": 0.67, "grad_norm": 0.7095770926619317, "learning_rate": 2.6647568692772386e-06, "loss": 0.4946, "step": 5359 }, { "epoch": 0.67, "grad_norm": 1.5466040717092826, "learning_rate": 2.662980000900704e-06, "loss": 0.5143, "step": 5360 }, { "epoch": 0.67, "grad_norm": 1.3673672944178537, "learning_rate": 2.661203510090332e-06, "loss": 0.5227, "step": 5361 }, { "epoch": 0.67, "grad_norm": 1.5160648696253634, "learning_rate": 2.659427397133134e-06, "loss": 0.4959, "step": 5362 }, { "epoch": 0.67, "grad_norm": 1.5195269244707366, "learning_rate": 2.6576516623160515e-06, "loss": 0.4796, "step": 5363 }, { "epoch": 0.67, "grad_norm": 2.0608837465904233, "learning_rate": 2.6558763059259745e-06, "loss": 0.4623, "step": 5364 }, { "epoch": 0.67, "grad_norm": 1.4251657068323844, "learning_rate": 2.6541013282497234e-06, "loss": 0.52, "step": 5365 }, { "epoch": 0.67, "grad_norm": 1.582165778558871, "learning_rate": 2.6523267295740663e-06, "loss": 0.4503, "step": 5366 }, { "epoch": 0.67, "grad_norm": 1.7127631256028564, "learning_rate": 2.6505525101857e-06, "loss": 0.5577, "step": 5367 }, { "epoch": 0.67, "grad_norm": 1.6587222980531775, "learning_rate": 2.6487786703712692e-06, "loss": 0.4739, "step": 5368 }, { "epoch": 0.67, "grad_norm": 1.4720907931346792, "learning_rate": 2.6470052104173504e-06, "loss": 0.5673, "step": 5369 }, { "epoch": 0.67, "grad_norm": 1.7165887666323725, "learning_rate": 2.6452321306104634e-06, "loss": 0.4357, "step": 5370 }, { "epoch": 0.67, "grad_norm": 1.6261364104457807, "learning_rate": 2.6434594312370664e-06, "loss": 0.5145, "step": 5371 }, { "epoch": 0.67, "grad_norm": 1.8610267248885162, "learning_rate": 2.641687112583551e-06, "loss": 0.4855, "step": 5372 }, { "epoch": 0.67, "grad_norm": 1.6204122173348687, "learning_rate": 2.639915174936254e-06, "loss": 0.5469, "step": 5373 }, { "epoch": 0.67, "grad_norm": 1.3873663278041979, "learning_rate": 2.638143618581445e-06, "loss": 0.5024, "step": 5374 }, { "epoch": 0.67, "grad_norm": 1.7635110022929765, "learning_rate": 2.6363724438053377e-06, "loss": 0.4929, "step": 5375 }, { "epoch": 0.67, "grad_norm": 1.5521889396540993, "learning_rate": 2.6346016508940776e-06, "loss": 0.4955, "step": 5376 }, { "epoch": 0.67, "grad_norm": 1.6675794203169498, "learning_rate": 2.632831240133754e-06, "loss": 0.5452, "step": 5377 }, { "epoch": 0.67, "grad_norm": 1.7268314997834946, "learning_rate": 2.631061211810391e-06, "loss": 0.4706, "step": 5378 }, { "epoch": 0.67, "grad_norm": 2.1173737686340006, "learning_rate": 2.629291566209952e-06, "loss": 0.4931, "step": 5379 }, { "epoch": 0.67, "grad_norm": 1.7066487562954822, "learning_rate": 2.627522303618343e-06, "loss": 0.4422, "step": 5380 }, { "epoch": 0.67, "grad_norm": 1.496890984897217, "learning_rate": 2.6257534243213977e-06, "loss": 0.4978, "step": 5381 }, { "epoch": 0.67, "grad_norm": 1.2373340535763007, "learning_rate": 2.623984928604899e-06, "loss": 0.4543, "step": 5382 }, { "epoch": 0.67, "grad_norm": 1.3134546338998108, "learning_rate": 2.6222168167545603e-06, "loss": 0.4877, "step": 5383 }, { "epoch": 0.67, "grad_norm": 1.4630151280361863, "learning_rate": 2.6204490890560376e-06, "loss": 0.5555, "step": 5384 }, { "epoch": 0.67, "grad_norm": 1.451411796522502, "learning_rate": 2.6186817457949203e-06, "loss": 0.5066, "step": 5385 }, { "epoch": 0.67, "grad_norm": 1.3195842991720612, "learning_rate": 2.6169147872567398e-06, "loss": 0.5071, "step": 5386 }, { "epoch": 0.67, "grad_norm": 1.3759847548738064, "learning_rate": 2.6151482137269652e-06, "loss": 0.4938, "step": 5387 }, { "epoch": 0.67, "grad_norm": 1.3174398231895585, "learning_rate": 2.6133820254909993e-06, "loss": 0.5007, "step": 5388 }, { "epoch": 0.67, "grad_norm": 1.9484527254681077, "learning_rate": 2.611616222834188e-06, "loss": 0.4696, "step": 5389 }, { "epoch": 0.67, "grad_norm": 1.3472055337152538, "learning_rate": 2.6098508060418094e-06, "loss": 0.5497, "step": 5390 }, { "epoch": 0.67, "grad_norm": 1.3918179486748368, "learning_rate": 2.6080857753990853e-06, "loss": 0.4979, "step": 5391 }, { "epoch": 0.67, "grad_norm": 1.3937102693000716, "learning_rate": 2.6063211311911677e-06, "loss": 0.5036, "step": 5392 }, { "epoch": 0.67, "grad_norm": 1.479782076219447, "learning_rate": 2.6045568737031557e-06, "loss": 0.4686, "step": 5393 }, { "epoch": 0.67, "grad_norm": 1.3786810743205558, "learning_rate": 2.6027930032200744e-06, "loss": 0.4774, "step": 5394 }, { "epoch": 0.67, "grad_norm": 1.6375943044791605, "learning_rate": 2.6010295200268993e-06, "loss": 0.4739, "step": 5395 }, { "epoch": 0.67, "grad_norm": 1.2752704209922996, "learning_rate": 2.5992664244085337e-06, "loss": 0.4696, "step": 5396 }, { "epoch": 0.67, "grad_norm": 1.6687567800175958, "learning_rate": 2.597503716649819e-06, "loss": 0.5181, "step": 5397 }, { "epoch": 0.67, "grad_norm": 2.0429923657351186, "learning_rate": 2.5957413970355404e-06, "loss": 0.5373, "step": 5398 }, { "epoch": 0.67, "grad_norm": 1.5646069102263396, "learning_rate": 2.5939794658504113e-06, "loss": 0.5409, "step": 5399 }, { "epoch": 0.67, "grad_norm": 1.6190220992841864, "learning_rate": 2.592217923379093e-06, "loss": 0.4874, "step": 5400 }, { "epoch": 0.67, "grad_norm": 1.4753599318825783, "learning_rate": 2.590456769906172e-06, "loss": 0.4838, "step": 5401 }, { "epoch": 0.67, "grad_norm": 2.4759908583800505, "learning_rate": 2.588696005716184e-06, "loss": 0.5297, "step": 5402 }, { "epoch": 0.67, "grad_norm": 1.4459072953716825, "learning_rate": 2.586935631093588e-06, "loss": 0.5382, "step": 5403 }, { "epoch": 0.67, "grad_norm": 1.5083852254491767, "learning_rate": 2.5851756463227985e-06, "loss": 0.5376, "step": 5404 }, { "epoch": 0.67, "grad_norm": 1.4886043444239714, "learning_rate": 2.5834160516881503e-06, "loss": 0.5065, "step": 5405 }, { "epoch": 0.67, "grad_norm": 1.4808882691319507, "learning_rate": 2.5816568474739205e-06, "loss": 0.4836, "step": 5406 }, { "epoch": 0.67, "grad_norm": 1.8579835845587833, "learning_rate": 2.579898033964328e-06, "loss": 0.4827, "step": 5407 }, { "epoch": 0.67, "grad_norm": 1.3889553938886416, "learning_rate": 2.578139611443521e-06, "loss": 0.474, "step": 5408 }, { "epoch": 0.67, "grad_norm": 1.3946539677304366, "learning_rate": 2.5763815801955906e-06, "loss": 0.4721, "step": 5409 }, { "epoch": 0.67, "grad_norm": 1.3500902016545895, "learning_rate": 2.5746239405045592e-06, "loss": 0.4449, "step": 5410 }, { "epoch": 0.67, "grad_norm": 1.1094960897936739, "learning_rate": 2.572866692654392e-06, "loss": 0.4604, "step": 5411 }, { "epoch": 0.67, "grad_norm": 1.7659196268114692, "learning_rate": 2.5711098369289867e-06, "loss": 0.4616, "step": 5412 }, { "epoch": 0.67, "grad_norm": 1.494765942335093, "learning_rate": 2.56935337361218e-06, "loss": 0.5224, "step": 5413 }, { "epoch": 0.67, "grad_norm": 1.4371359529712553, "learning_rate": 2.5675973029877437e-06, "loss": 0.4767, "step": 5414 }, { "epoch": 0.67, "grad_norm": 1.2718173194398794, "learning_rate": 2.565841625339384e-06, "loss": 0.4592, "step": 5415 }, { "epoch": 0.67, "grad_norm": 1.5232337238041327, "learning_rate": 2.5640863409507497e-06, "loss": 0.4844, "step": 5416 }, { "epoch": 0.67, "grad_norm": 1.6899511433971866, "learning_rate": 2.5623314501054187e-06, "loss": 0.5109, "step": 5417 }, { "epoch": 0.67, "grad_norm": 1.3912369568606449, "learning_rate": 2.560576953086913e-06, "loss": 0.4751, "step": 5418 }, { "epoch": 0.67, "grad_norm": 0.6970311944964789, "learning_rate": 2.5588228501786804e-06, "loss": 0.4954, "step": 5419 }, { "epoch": 0.67, "grad_norm": 1.728631154719974, "learning_rate": 2.5570691416641215e-06, "loss": 0.5504, "step": 5420 }, { "epoch": 0.67, "grad_norm": 1.588536882862975, "learning_rate": 2.5553158278265553e-06, "loss": 0.5101, "step": 5421 }, { "epoch": 0.67, "grad_norm": 2.2162522648632335, "learning_rate": 2.5535629089492496e-06, "loss": 0.5346, "step": 5422 }, { "epoch": 0.67, "grad_norm": 1.3282845832866839, "learning_rate": 2.551810385315403e-06, "loss": 0.4933, "step": 5423 }, { "epoch": 0.67, "grad_norm": 1.8115742362225473, "learning_rate": 2.550058257208149e-06, "loss": 0.4784, "step": 5424 }, { "epoch": 0.67, "grad_norm": 1.4077049409848308, "learning_rate": 2.5483065249105614e-06, "loss": 0.4772, "step": 5425 }, { "epoch": 0.67, "grad_norm": 1.3679985824357763, "learning_rate": 2.546555188705646e-06, "loss": 0.5134, "step": 5426 }, { "epoch": 0.67, "grad_norm": 0.6740862704181385, "learning_rate": 2.544804248876348e-06, "loss": 0.4975, "step": 5427 }, { "epoch": 0.67, "grad_norm": 1.3944543755158425, "learning_rate": 2.5430537057055466e-06, "loss": 0.4992, "step": 5428 }, { "epoch": 0.67, "grad_norm": 1.4906121548513718, "learning_rate": 2.54130355947606e-06, "loss": 0.535, "step": 5429 }, { "epoch": 0.67, "grad_norm": 1.3062691441609326, "learning_rate": 2.539553810470636e-06, "loss": 0.4755, "step": 5430 }, { "epoch": 0.67, "grad_norm": 2.2265555064473412, "learning_rate": 2.537804458971965e-06, "loss": 0.4526, "step": 5431 }, { "epoch": 0.67, "grad_norm": 1.6179227016248734, "learning_rate": 2.5360555052626666e-06, "loss": 0.4724, "step": 5432 }, { "epoch": 0.67, "grad_norm": 1.7545604440470863, "learning_rate": 2.534306949625305e-06, "loss": 0.5003, "step": 5433 }, { "epoch": 0.67, "grad_norm": 1.4398909017521693, "learning_rate": 2.53255879234237e-06, "loss": 0.482, "step": 5434 }, { "epoch": 0.67, "grad_norm": 1.5461741379825082, "learning_rate": 2.5308110336962904e-06, "loss": 0.4763, "step": 5435 }, { "epoch": 0.67, "grad_norm": 1.481190626860878, "learning_rate": 2.5290636739694384e-06, "loss": 0.5068, "step": 5436 }, { "epoch": 0.67, "grad_norm": 1.7330072487063952, "learning_rate": 2.5273167134441107e-06, "loss": 0.4936, "step": 5437 }, { "epoch": 0.67, "grad_norm": 1.7135444486315234, "learning_rate": 2.5255701524025466e-06, "loss": 0.5632, "step": 5438 }, { "epoch": 0.67, "grad_norm": 1.3555355930905109, "learning_rate": 2.523823991126916e-06, "loss": 0.4596, "step": 5439 }, { "epoch": 0.68, "grad_norm": 1.8351747571846526, "learning_rate": 2.5220782298993297e-06, "loss": 0.4841, "step": 5440 }, { "epoch": 0.68, "grad_norm": 1.3244901585305506, "learning_rate": 2.5203328690018266e-06, "loss": 0.5499, "step": 5441 }, { "epoch": 0.68, "grad_norm": 1.2873125222445203, "learning_rate": 2.5185879087163896e-06, "loss": 0.522, "step": 5442 }, { "epoch": 0.68, "grad_norm": 1.4651537968091042, "learning_rate": 2.51684334932493e-06, "loss": 0.5391, "step": 5443 }, { "epoch": 0.68, "grad_norm": 1.4053443389505933, "learning_rate": 2.5150991911092935e-06, "loss": 0.4871, "step": 5444 }, { "epoch": 0.68, "grad_norm": 1.492951892417051, "learning_rate": 2.513355434351271e-06, "loss": 0.5062, "step": 5445 }, { "epoch": 0.68, "grad_norm": 1.943793634105126, "learning_rate": 2.511612079332577e-06, "loss": 0.4549, "step": 5446 }, { "epoch": 0.68, "grad_norm": 1.3936898928307528, "learning_rate": 2.5098691263348697e-06, "loss": 0.4575, "step": 5447 }, { "epoch": 0.68, "grad_norm": 2.26250521984885, "learning_rate": 2.508126575639733e-06, "loss": 0.4571, "step": 5448 }, { "epoch": 0.68, "grad_norm": 1.4848240903706404, "learning_rate": 2.5063844275286974e-06, "loss": 0.5088, "step": 5449 }, { "epoch": 0.68, "grad_norm": 1.5001599313859406, "learning_rate": 2.5046426822832175e-06, "loss": 0.463, "step": 5450 }, { "epoch": 0.68, "grad_norm": 1.5160790102059807, "learning_rate": 2.5029013401846913e-06, "loss": 0.4656, "step": 5451 }, { "epoch": 0.68, "grad_norm": 2.123264081371732, "learning_rate": 2.5011604015144435e-06, "loss": 0.4472, "step": 5452 }, { "epoch": 0.68, "grad_norm": 1.3789951391327997, "learning_rate": 2.499419866553741e-06, "loss": 0.5015, "step": 5453 }, { "epoch": 0.68, "grad_norm": 1.4603123809144725, "learning_rate": 2.4976797355837845e-06, "loss": 0.5077, "step": 5454 }, { "epoch": 0.68, "grad_norm": 1.7650029454656442, "learning_rate": 2.495940008885703e-06, "loss": 0.5056, "step": 5455 }, { "epoch": 0.68, "grad_norm": 3.4849664616453735, "learning_rate": 2.4942006867405685e-06, "loss": 0.531, "step": 5456 }, { "epoch": 0.68, "grad_norm": 1.493518897193527, "learning_rate": 2.492461769429381e-06, "loss": 0.5234, "step": 5457 }, { "epoch": 0.68, "grad_norm": 1.3869502079430214, "learning_rate": 2.49072325723308e-06, "loss": 0.5068, "step": 5458 }, { "epoch": 0.68, "grad_norm": 0.681702737575638, "learning_rate": 2.4889851504325348e-06, "loss": 0.5342, "step": 5459 }, { "epoch": 0.68, "grad_norm": 0.6611963051545626, "learning_rate": 2.487247449308554e-06, "loss": 0.5225, "step": 5460 }, { "epoch": 0.68, "grad_norm": 1.4589571409635018, "learning_rate": 2.4855101541418797e-06, "loss": 0.5014, "step": 5461 }, { "epoch": 0.68, "grad_norm": 1.2537043292301941, "learning_rate": 2.483773265213184e-06, "loss": 0.4961, "step": 5462 }, { "epoch": 0.68, "grad_norm": 1.32920342648201, "learning_rate": 2.48203678280308e-06, "loss": 0.4458, "step": 5463 }, { "epoch": 0.68, "grad_norm": 1.3681830900025551, "learning_rate": 2.4803007071921083e-06, "loss": 0.4675, "step": 5464 }, { "epoch": 0.68, "grad_norm": 1.4756488701641188, "learning_rate": 2.478565038660751e-06, "loss": 0.452, "step": 5465 }, { "epoch": 0.68, "grad_norm": 1.6817738940482172, "learning_rate": 2.4768297774894157e-06, "loss": 0.5072, "step": 5466 }, { "epoch": 0.68, "grad_norm": 1.5245969438197655, "learning_rate": 2.4750949239584543e-06, "loss": 0.475, "step": 5467 }, { "epoch": 0.68, "grad_norm": 1.280761509374414, "learning_rate": 2.4733604783481436e-06, "loss": 0.4787, "step": 5468 }, { "epoch": 0.68, "grad_norm": 1.4899473963878174, "learning_rate": 2.4716264409387005e-06, "loss": 0.496, "step": 5469 }, { "epoch": 0.68, "grad_norm": 1.7960664090989007, "learning_rate": 2.469892812010275e-06, "loss": 0.5228, "step": 5470 }, { "epoch": 0.68, "grad_norm": 1.2990665854475318, "learning_rate": 2.4681595918429473e-06, "loss": 0.4516, "step": 5471 }, { "epoch": 0.68, "grad_norm": 1.717509107476117, "learning_rate": 2.466426780716738e-06, "loss": 0.5279, "step": 5472 }, { "epoch": 0.68, "grad_norm": 3.820181283099114, "learning_rate": 2.4646943789115947e-06, "loss": 0.5585, "step": 5473 }, { "epoch": 0.68, "grad_norm": 1.4540625568618581, "learning_rate": 2.4629623867074043e-06, "loss": 0.5351, "step": 5474 }, { "epoch": 0.68, "grad_norm": 1.3742096801051715, "learning_rate": 2.4612308043839835e-06, "loss": 0.5146, "step": 5475 }, { "epoch": 0.68, "grad_norm": 1.5797660549317376, "learning_rate": 2.459499632221085e-06, "loss": 0.5133, "step": 5476 }, { "epoch": 0.68, "grad_norm": 1.776131095900743, "learning_rate": 2.4577688704983984e-06, "loss": 0.4713, "step": 5477 }, { "epoch": 0.68, "grad_norm": 1.483890797314977, "learning_rate": 2.456038519495538e-06, "loss": 0.4944, "step": 5478 }, { "epoch": 0.68, "grad_norm": 1.958313627579842, "learning_rate": 2.4543085794920616e-06, "loss": 0.5002, "step": 5479 }, { "epoch": 0.68, "grad_norm": 1.6271441054448663, "learning_rate": 2.4525790507674543e-06, "loss": 0.5027, "step": 5480 }, { "epoch": 0.68, "grad_norm": 0.6516835480074524, "learning_rate": 2.4508499336011377e-06, "loss": 0.4594, "step": 5481 }, { "epoch": 0.68, "grad_norm": 1.5440243026978204, "learning_rate": 2.4491212282724637e-06, "loss": 0.5465, "step": 5482 }, { "epoch": 0.68, "grad_norm": 1.4122249610320332, "learning_rate": 2.447392935060723e-06, "loss": 0.4976, "step": 5483 }, { "epoch": 0.68, "grad_norm": 1.4013850774891232, "learning_rate": 2.4456650542451333e-06, "loss": 0.4864, "step": 5484 }, { "epoch": 0.68, "grad_norm": 1.77467430479846, "learning_rate": 2.443937586104851e-06, "loss": 0.5007, "step": 5485 }, { "epoch": 0.68, "grad_norm": 1.3686948184301855, "learning_rate": 2.4422105309189646e-06, "loss": 0.4656, "step": 5486 }, { "epoch": 0.68, "grad_norm": 1.6813398955724435, "learning_rate": 2.4404838889664923e-06, "loss": 0.51, "step": 5487 }, { "epoch": 0.68, "grad_norm": 1.3466499318485836, "learning_rate": 2.4387576605263924e-06, "loss": 0.4295, "step": 5488 }, { "epoch": 0.68, "grad_norm": 1.312175726859582, "learning_rate": 2.4370318458775472e-06, "loss": 0.4509, "step": 5489 }, { "epoch": 0.68, "grad_norm": 1.4006040680258878, "learning_rate": 2.435306445298782e-06, "loss": 0.4833, "step": 5490 }, { "epoch": 0.68, "grad_norm": 1.960510313343304, "learning_rate": 2.4335814590688465e-06, "loss": 0.4776, "step": 5491 }, { "epoch": 0.68, "grad_norm": 1.3678018833794279, "learning_rate": 2.4318568874664318e-06, "loss": 0.4486, "step": 5492 }, { "epoch": 0.68, "grad_norm": 1.786463696861167, "learning_rate": 2.4301327307701526e-06, "loss": 0.4443, "step": 5493 }, { "epoch": 0.68, "grad_norm": 1.9480246804073973, "learning_rate": 2.4284089892585642e-06, "loss": 0.4685, "step": 5494 }, { "epoch": 0.68, "grad_norm": 1.3265526795294944, "learning_rate": 2.426685663210154e-06, "loss": 0.4253, "step": 5495 }, { "epoch": 0.68, "grad_norm": 1.341929758596841, "learning_rate": 2.424962752903337e-06, "loss": 0.5203, "step": 5496 }, { "epoch": 0.68, "grad_norm": 1.4182009629873498, "learning_rate": 2.4232402586164677e-06, "loss": 0.4407, "step": 5497 }, { "epoch": 0.68, "grad_norm": 1.742253522033818, "learning_rate": 2.421518180627827e-06, "loss": 0.5481, "step": 5498 }, { "epoch": 0.68, "grad_norm": 0.6532235894146765, "learning_rate": 2.4197965192156354e-06, "loss": 0.4956, "step": 5499 }, { "epoch": 0.68, "grad_norm": 1.3161751128320978, "learning_rate": 2.418075274658039e-06, "loss": 0.4593, "step": 5500 }, { "epoch": 0.68, "grad_norm": 0.6334983346371813, "learning_rate": 2.4163544472331207e-06, "loss": 0.4933, "step": 5501 }, { "epoch": 0.68, "grad_norm": 1.6149656215565829, "learning_rate": 2.4146340372188964e-06, "loss": 0.4689, "step": 5502 }, { "epoch": 0.68, "grad_norm": 1.7919360703700247, "learning_rate": 2.412914044893316e-06, "loss": 0.5363, "step": 5503 }, { "epoch": 0.68, "grad_norm": 2.457696564614905, "learning_rate": 2.4111944705342554e-06, "loss": 0.5046, "step": 5504 }, { "epoch": 0.68, "grad_norm": 1.8186669373877837, "learning_rate": 2.409475314419527e-06, "loss": 0.4993, "step": 5505 }, { "epoch": 0.68, "grad_norm": 1.3221148999267518, "learning_rate": 2.407756576826879e-06, "loss": 0.4626, "step": 5506 }, { "epoch": 0.68, "grad_norm": 1.6052732752272765, "learning_rate": 2.4060382580339842e-06, "loss": 0.4899, "step": 5507 }, { "epoch": 0.68, "grad_norm": 2.5767022145696568, "learning_rate": 2.404320358318456e-06, "loss": 0.5222, "step": 5508 }, { "epoch": 0.68, "grad_norm": 2.5095809579755186, "learning_rate": 2.4026028779578315e-06, "loss": 0.4617, "step": 5509 }, { "epoch": 0.68, "grad_norm": 1.97793047083496, "learning_rate": 2.400885817229591e-06, "loss": 0.4826, "step": 5510 }, { "epoch": 0.68, "grad_norm": 1.3588500491345321, "learning_rate": 2.3991691764111365e-06, "loss": 0.5548, "step": 5511 }, { "epoch": 0.68, "grad_norm": 2.6518198586561694, "learning_rate": 2.3974529557798095e-06, "loss": 0.4769, "step": 5512 }, { "epoch": 0.68, "grad_norm": 1.390464564482471, "learning_rate": 2.395737155612878e-06, "loss": 0.5309, "step": 5513 }, { "epoch": 0.68, "grad_norm": 1.3918943127026036, "learning_rate": 2.3940217761875433e-06, "loss": 0.51, "step": 5514 }, { "epoch": 0.68, "grad_norm": 1.6977411274786531, "learning_rate": 2.392306817780945e-06, "loss": 0.4702, "step": 5515 }, { "epoch": 0.68, "grad_norm": 1.6922389974526388, "learning_rate": 2.390592280670144e-06, "loss": 0.4979, "step": 5516 }, { "epoch": 0.68, "grad_norm": 1.3787793285566037, "learning_rate": 2.388878165132142e-06, "loss": 0.4635, "step": 5517 }, { "epoch": 0.68, "grad_norm": 1.646674192788396, "learning_rate": 2.387164471443869e-06, "loss": 0.4554, "step": 5518 }, { "epoch": 0.68, "grad_norm": 1.3207958483783915, "learning_rate": 2.3854511998821894e-06, "loss": 0.5156, "step": 5519 }, { "epoch": 0.69, "grad_norm": 2.227695167477801, "learning_rate": 2.3837383507238936e-06, "loss": 0.5333, "step": 5520 }, { "epoch": 0.69, "grad_norm": 1.4267150504517874, "learning_rate": 2.382025924245711e-06, "loss": 0.5071, "step": 5521 }, { "epoch": 0.69, "grad_norm": 1.3844833274592145, "learning_rate": 2.3803139207242974e-06, "loss": 0.4785, "step": 5522 }, { "epoch": 0.69, "grad_norm": 1.4789844668096037, "learning_rate": 2.37860234043624e-06, "loss": 0.5884, "step": 5523 }, { "epoch": 0.69, "grad_norm": 1.4224685107437975, "learning_rate": 2.3768911836580645e-06, "loss": 0.5148, "step": 5524 }, { "epoch": 0.69, "grad_norm": 1.9225409466991767, "learning_rate": 2.3751804506662174e-06, "loss": 0.5083, "step": 5525 }, { "epoch": 0.69, "grad_norm": 1.5887749841143064, "learning_rate": 2.3734701417370866e-06, "loss": 0.4917, "step": 5526 }, { "epoch": 0.69, "grad_norm": 1.7569083490679074, "learning_rate": 2.3717602571469865e-06, "loss": 0.4632, "step": 5527 }, { "epoch": 0.69, "grad_norm": 1.4140716874174504, "learning_rate": 2.3700507971721663e-06, "loss": 0.427, "step": 5528 }, { "epoch": 0.69, "grad_norm": 1.609909140872164, "learning_rate": 2.3683417620888003e-06, "loss": 0.5611, "step": 5529 }, { "epoch": 0.69, "grad_norm": 1.5878396327956295, "learning_rate": 2.3666331521730026e-06, "loss": 0.4592, "step": 5530 }, { "epoch": 0.69, "grad_norm": 0.7151379897062956, "learning_rate": 2.3649249677008097e-06, "loss": 0.5009, "step": 5531 }, { "epoch": 0.69, "grad_norm": 1.2996372953750224, "learning_rate": 2.3632172089481973e-06, "loss": 0.546, "step": 5532 }, { "epoch": 0.69, "grad_norm": 2.077678869251373, "learning_rate": 2.361509876191068e-06, "loss": 0.4523, "step": 5533 }, { "epoch": 0.69, "grad_norm": 1.747621552902312, "learning_rate": 2.3598029697052522e-06, "loss": 0.4888, "step": 5534 }, { "epoch": 0.69, "grad_norm": 1.3678679837061298, "learning_rate": 2.358096489766523e-06, "loss": 0.5107, "step": 5535 }, { "epoch": 0.69, "grad_norm": 1.6739888453600988, "learning_rate": 2.3563904366505717e-06, "loss": 0.4862, "step": 5536 }, { "epoch": 0.69, "grad_norm": 1.8955093820392193, "learning_rate": 2.3546848106330308e-06, "loss": 0.5101, "step": 5537 }, { "epoch": 0.69, "grad_norm": 1.6366006897451797, "learning_rate": 2.352979611989454e-06, "loss": 0.5044, "step": 5538 }, { "epoch": 0.69, "grad_norm": 1.5750665629516203, "learning_rate": 2.3512748409953358e-06, "loss": 0.489, "step": 5539 }, { "epoch": 0.69, "grad_norm": 1.4189184004954971, "learning_rate": 2.3495704979260924e-06, "loss": 0.4985, "step": 5540 }, { "epoch": 0.69, "grad_norm": 1.666993666611936, "learning_rate": 2.3478665830570807e-06, "loss": 0.4446, "step": 5541 }, { "epoch": 0.69, "grad_norm": 1.4494929286590599, "learning_rate": 2.3461630966635784e-06, "loss": 0.497, "step": 5542 }, { "epoch": 0.69, "grad_norm": 1.487393842420435, "learning_rate": 2.3444600390208007e-06, "loss": 0.4965, "step": 5543 }, { "epoch": 0.69, "grad_norm": 1.5902444904379578, "learning_rate": 2.3427574104038934e-06, "loss": 0.4581, "step": 5544 }, { "epoch": 0.69, "grad_norm": 1.161066446508982, "learning_rate": 2.3410552110879277e-06, "loss": 0.4993, "step": 5545 }, { "epoch": 0.69, "grad_norm": 1.3929795053155725, "learning_rate": 2.339353441347912e-06, "loss": 0.5134, "step": 5546 }, { "epoch": 0.69, "grad_norm": 1.37935134864808, "learning_rate": 2.3376521014587787e-06, "loss": 0.504, "step": 5547 }, { "epoch": 0.69, "grad_norm": 2.0874526415093935, "learning_rate": 2.335951191695399e-06, "loss": 0.53, "step": 5548 }, { "epoch": 0.69, "grad_norm": 1.7303232882596309, "learning_rate": 2.334250712332565e-06, "loss": 0.4948, "step": 5549 }, { "epoch": 0.69, "grad_norm": 1.8220759392032708, "learning_rate": 2.3325506636450056e-06, "loss": 0.5296, "step": 5550 }, { "epoch": 0.69, "grad_norm": 1.4777829873197916, "learning_rate": 2.3308510459073817e-06, "loss": 0.4775, "step": 5551 }, { "epoch": 0.69, "grad_norm": 1.5027960581625106, "learning_rate": 2.3291518593942774e-06, "loss": 0.5385, "step": 5552 }, { "epoch": 0.69, "grad_norm": 1.5217717603564733, "learning_rate": 2.3274531043802148e-06, "loss": 0.4594, "step": 5553 }, { "epoch": 0.69, "grad_norm": 1.5906557921951225, "learning_rate": 2.325754781139638e-06, "loss": 0.4724, "step": 5554 }, { "epoch": 0.69, "grad_norm": 1.329096145339709, "learning_rate": 2.3240568899469317e-06, "loss": 0.4716, "step": 5555 }, { "epoch": 0.69, "grad_norm": 1.4012838734313477, "learning_rate": 2.322359431076401e-06, "loss": 0.4911, "step": 5556 }, { "epoch": 0.69, "grad_norm": 1.5384021925882139, "learning_rate": 2.3206624048022872e-06, "loss": 0.506, "step": 5557 }, { "epoch": 0.69, "grad_norm": 0.655449448220534, "learning_rate": 2.318965811398759e-06, "loss": 0.5081, "step": 5558 }, { "epoch": 0.69, "grad_norm": 1.5825164260224847, "learning_rate": 2.317269651139916e-06, "loss": 0.5677, "step": 5559 }, { "epoch": 0.69, "grad_norm": 1.6821233375146059, "learning_rate": 2.3155739242997893e-06, "loss": 0.5366, "step": 5560 }, { "epoch": 0.69, "grad_norm": 1.5594929350983364, "learning_rate": 2.3138786311523364e-06, "loss": 0.4455, "step": 5561 }, { "epoch": 0.69, "grad_norm": 1.4382628232628942, "learning_rate": 2.3121837719714496e-06, "loss": 0.4402, "step": 5562 }, { "epoch": 0.69, "grad_norm": 1.4816789194218292, "learning_rate": 2.310489347030945e-06, "loss": 0.4789, "step": 5563 }, { "epoch": 0.69, "grad_norm": 1.5188438068192134, "learning_rate": 2.3087953566045743e-06, "loss": 0.4484, "step": 5564 }, { "epoch": 0.69, "grad_norm": 1.5098238352487316, "learning_rate": 2.307101800966015e-06, "loss": 0.4725, "step": 5565 }, { "epoch": 0.69, "grad_norm": 1.7038045623006983, "learning_rate": 2.305408680388878e-06, "loss": 0.4944, "step": 5566 }, { "epoch": 0.69, "grad_norm": 1.2637120328181233, "learning_rate": 2.303715995146699e-06, "loss": 0.5007, "step": 5567 }, { "epoch": 0.69, "grad_norm": 1.4973349132088374, "learning_rate": 2.3020237455129473e-06, "loss": 0.5026, "step": 5568 }, { "epoch": 0.69, "grad_norm": 2.5771957423178784, "learning_rate": 2.3003319317610232e-06, "loss": 0.441, "step": 5569 }, { "epoch": 0.69, "grad_norm": 1.9968666654182445, "learning_rate": 2.298640554164251e-06, "loss": 0.5056, "step": 5570 }, { "epoch": 0.69, "grad_norm": 1.5088225959683377, "learning_rate": 2.2969496129958896e-06, "loss": 0.4609, "step": 5571 }, { "epoch": 0.69, "grad_norm": 1.5688355879342355, "learning_rate": 2.295259108529123e-06, "loss": 0.5082, "step": 5572 }, { "epoch": 0.69, "grad_norm": 2.1218690144924017, "learning_rate": 2.29356904103707e-06, "loss": 0.477, "step": 5573 }, { "epoch": 0.69, "grad_norm": 1.3317460113932156, "learning_rate": 2.291879410792774e-06, "loss": 0.5049, "step": 5574 }, { "epoch": 0.69, "grad_norm": 1.6361394777722302, "learning_rate": 2.2901902180692094e-06, "loss": 0.4927, "step": 5575 }, { "epoch": 0.69, "grad_norm": 1.2600474098118004, "learning_rate": 2.288501463139282e-06, "loss": 0.4862, "step": 5576 }, { "epoch": 0.69, "grad_norm": 1.8835019069260908, "learning_rate": 2.286813146275823e-06, "loss": 0.465, "step": 5577 }, { "epoch": 0.69, "grad_norm": 0.8127391882820307, "learning_rate": 2.285125267751598e-06, "loss": 0.5114, "step": 5578 }, { "epoch": 0.69, "grad_norm": 1.8926521832120369, "learning_rate": 2.2834378278392937e-06, "loss": 0.5105, "step": 5579 }, { "epoch": 0.69, "grad_norm": 1.4873921582045961, "learning_rate": 2.2817508268115364e-06, "loss": 0.4717, "step": 5580 }, { "epoch": 0.69, "grad_norm": 2.819933821860014, "learning_rate": 2.280064264940871e-06, "loss": 0.5442, "step": 5581 }, { "epoch": 0.69, "grad_norm": 1.683485669972065, "learning_rate": 2.27837814249978e-06, "loss": 0.5305, "step": 5582 }, { "epoch": 0.69, "grad_norm": 0.6440117666442702, "learning_rate": 2.2766924597606686e-06, "loss": 0.499, "step": 5583 }, { "epoch": 0.69, "grad_norm": 1.6879618779299213, "learning_rate": 2.2750072169958754e-06, "loss": 0.5379, "step": 5584 }, { "epoch": 0.69, "grad_norm": 1.9671847158512492, "learning_rate": 2.273322414477668e-06, "loss": 0.4351, "step": 5585 }, { "epoch": 0.69, "grad_norm": 1.466482216219935, "learning_rate": 2.271638052478236e-06, "loss": 0.4478, "step": 5586 }, { "epoch": 0.69, "grad_norm": 1.9616512482384683, "learning_rate": 2.269954131269708e-06, "loss": 0.4975, "step": 5587 }, { "epoch": 0.69, "grad_norm": 1.3639134192599855, "learning_rate": 2.268270651124133e-06, "loss": 0.4843, "step": 5588 }, { "epoch": 0.69, "grad_norm": 1.3553964640402418, "learning_rate": 2.266587612313495e-06, "loss": 0.492, "step": 5589 }, { "epoch": 0.69, "grad_norm": 1.5577828659517323, "learning_rate": 2.2649050151096994e-06, "loss": 0.5129, "step": 5590 }, { "epoch": 0.69, "grad_norm": 1.6522603447358237, "learning_rate": 2.2632228597845878e-06, "loss": 0.4852, "step": 5591 }, { "epoch": 0.69, "grad_norm": 1.6798186942226545, "learning_rate": 2.2615411466099283e-06, "loss": 0.5052, "step": 5592 }, { "epoch": 0.69, "grad_norm": 1.7162936489412086, "learning_rate": 2.259859875857413e-06, "loss": 0.4911, "step": 5593 }, { "epoch": 0.69, "grad_norm": 1.4025456726402974, "learning_rate": 2.2581790477986692e-06, "loss": 0.5039, "step": 5594 }, { "epoch": 0.69, "grad_norm": 1.5845470815346903, "learning_rate": 2.2564986627052463e-06, "loss": 0.5274, "step": 5595 }, { "epoch": 0.69, "grad_norm": 1.6416412378921217, "learning_rate": 2.2548187208486293e-06, "loss": 0.5115, "step": 5596 }, { "epoch": 0.69, "grad_norm": 1.3451856385540713, "learning_rate": 2.2531392225002236e-06, "loss": 0.4623, "step": 5597 }, { "epoch": 0.69, "grad_norm": 1.3698888882150644, "learning_rate": 2.25146016793137e-06, "loss": 0.4535, "step": 5598 }, { "epoch": 0.69, "grad_norm": 0.6976438584873728, "learning_rate": 2.2497815574133313e-06, "loss": 0.5107, "step": 5599 }, { "epoch": 0.69, "grad_norm": 2.1391226535547676, "learning_rate": 2.2481033912173044e-06, "loss": 0.5465, "step": 5600 }, { "epoch": 0.7, "grad_norm": 1.4528519800705884, "learning_rate": 2.2464256696144106e-06, "loss": 0.5174, "step": 5601 }, { "epoch": 0.7, "grad_norm": 1.4471062589320642, "learning_rate": 2.2447483928757034e-06, "loss": 0.5343, "step": 5602 }, { "epoch": 0.7, "grad_norm": 1.7013573683309002, "learning_rate": 2.243071561272159e-06, "loss": 0.51, "step": 5603 }, { "epoch": 0.7, "grad_norm": 1.6929219375700657, "learning_rate": 2.241395175074683e-06, "loss": 0.5223, "step": 5604 }, { "epoch": 0.7, "grad_norm": 1.582600692096603, "learning_rate": 2.2397192345541146e-06, "loss": 0.5067, "step": 5605 }, { "epoch": 0.7, "grad_norm": 1.3480484291333905, "learning_rate": 2.238043739981212e-06, "loss": 0.4497, "step": 5606 }, { "epoch": 0.7, "grad_norm": 1.346904838587814, "learning_rate": 2.2363686916266696e-06, "loss": 0.5279, "step": 5607 }, { "epoch": 0.7, "grad_norm": 1.4764789978750652, "learning_rate": 2.234694089761102e-06, "loss": 0.5216, "step": 5608 }, { "epoch": 0.7, "grad_norm": 0.7128029989775837, "learning_rate": 2.2330199346550624e-06, "loss": 0.5023, "step": 5609 }, { "epoch": 0.7, "grad_norm": 1.4965838514525496, "learning_rate": 2.2313462265790198e-06, "loss": 0.5115, "step": 5610 }, { "epoch": 0.7, "grad_norm": 1.6073821385339875, "learning_rate": 2.22967296580338e-06, "loss": 0.4895, "step": 5611 }, { "epoch": 0.7, "grad_norm": 1.479216078712145, "learning_rate": 2.2280001525984718e-06, "loss": 0.4812, "step": 5612 }, { "epoch": 0.7, "grad_norm": 1.412021542856434, "learning_rate": 2.2263277872345505e-06, "loss": 0.482, "step": 5613 }, { "epoch": 0.7, "grad_norm": 1.4877331023293925, "learning_rate": 2.2246558699818056e-06, "loss": 0.5364, "step": 5614 }, { "epoch": 0.7, "grad_norm": 1.3517184707148822, "learning_rate": 2.222984401110346e-06, "loss": 0.467, "step": 5615 }, { "epoch": 0.7, "grad_norm": 1.4341668244370513, "learning_rate": 2.2213133808902143e-06, "loss": 0.5515, "step": 5616 }, { "epoch": 0.7, "grad_norm": 1.2868126565065974, "learning_rate": 2.219642809591378e-06, "loss": 0.5173, "step": 5617 }, { "epoch": 0.7, "grad_norm": 1.3828616571993415, "learning_rate": 2.2179726874837353e-06, "loss": 0.4831, "step": 5618 }, { "epoch": 0.7, "grad_norm": 1.6489534862379591, "learning_rate": 2.2163030148371044e-06, "loss": 0.4489, "step": 5619 }, { "epoch": 0.7, "grad_norm": 3.1143036312350367, "learning_rate": 2.214633791921241e-06, "loss": 0.4652, "step": 5620 }, { "epoch": 0.7, "grad_norm": 0.6425173069258526, "learning_rate": 2.2129650190058188e-06, "loss": 0.5185, "step": 5621 }, { "epoch": 0.7, "grad_norm": 0.7896536582636093, "learning_rate": 2.211296696360442e-06, "loss": 0.4961, "step": 5622 }, { "epoch": 0.7, "grad_norm": 1.363767251022316, "learning_rate": 2.2096288242546464e-06, "loss": 0.5514, "step": 5623 }, { "epoch": 0.7, "grad_norm": 1.6085365905066162, "learning_rate": 2.2079614029578865e-06, "loss": 0.5116, "step": 5624 }, { "epoch": 0.7, "grad_norm": 1.6616446613161735, "learning_rate": 2.206294432739556e-06, "loss": 0.4866, "step": 5625 }, { "epoch": 0.7, "grad_norm": 2.405353254193536, "learning_rate": 2.2046279138689617e-06, "loss": 0.5026, "step": 5626 }, { "epoch": 0.7, "grad_norm": 1.6128351079408425, "learning_rate": 2.20296184661535e-06, "loss": 0.4926, "step": 5627 }, { "epoch": 0.7, "grad_norm": 1.4587630016659268, "learning_rate": 2.201296231247884e-06, "loss": 0.4461, "step": 5628 }, { "epoch": 0.7, "grad_norm": 1.3835200685248408, "learning_rate": 2.1996310680356623e-06, "loss": 0.4562, "step": 5629 }, { "epoch": 0.7, "grad_norm": 1.3805367109816795, "learning_rate": 2.1979663572477057e-06, "loss": 0.4576, "step": 5630 }, { "epoch": 0.7, "grad_norm": 0.6463950616780922, "learning_rate": 2.19630209915296e-06, "loss": 0.4814, "step": 5631 }, { "epoch": 0.7, "grad_norm": 0.6488872376356858, "learning_rate": 2.1946382940203024e-06, "loss": 0.4789, "step": 5632 }, { "epoch": 0.7, "grad_norm": 1.3765869540183546, "learning_rate": 2.1929749421185363e-06, "loss": 0.4682, "step": 5633 }, { "epoch": 0.7, "grad_norm": 1.7079525292081255, "learning_rate": 2.191312043716392e-06, "loss": 0.5344, "step": 5634 }, { "epoch": 0.7, "grad_norm": 1.5976843490539296, "learning_rate": 2.1896495990825224e-06, "loss": 0.5024, "step": 5635 }, { "epoch": 0.7, "grad_norm": 1.9632934935601438, "learning_rate": 2.187987608485513e-06, "loss": 0.461, "step": 5636 }, { "epoch": 0.7, "grad_norm": 1.2722730035586236, "learning_rate": 2.1863260721938696e-06, "loss": 0.4721, "step": 5637 }, { "epoch": 0.7, "grad_norm": 1.4855676434755076, "learning_rate": 2.1846649904760315e-06, "loss": 0.4987, "step": 5638 }, { "epoch": 0.7, "grad_norm": 1.4833522957894028, "learning_rate": 2.1830043636003574e-06, "loss": 0.4885, "step": 5639 }, { "epoch": 0.7, "grad_norm": 1.3258031890663018, "learning_rate": 2.1813441918351407e-06, "loss": 0.5451, "step": 5640 }, { "epoch": 0.7, "grad_norm": 1.644381239071562, "learning_rate": 2.179684475448592e-06, "loss": 0.5008, "step": 5641 }, { "epoch": 0.7, "grad_norm": 1.4396401967283894, "learning_rate": 2.1780252147088555e-06, "loss": 0.5547, "step": 5642 }, { "epoch": 0.7, "grad_norm": 3.235056697104862, "learning_rate": 2.1763664098840013e-06, "loss": 0.4742, "step": 5643 }, { "epoch": 0.7, "grad_norm": 2.538756193902995, "learning_rate": 2.1747080612420202e-06, "loss": 0.479, "step": 5644 }, { "epoch": 0.7, "grad_norm": 1.489100563897908, "learning_rate": 2.1730501690508363e-06, "loss": 0.4465, "step": 5645 }, { "epoch": 0.7, "grad_norm": 1.4644523867767538, "learning_rate": 2.1713927335782934e-06, "loss": 0.4719, "step": 5646 }, { "epoch": 0.7, "grad_norm": 1.821065940683249, "learning_rate": 2.169735755092168e-06, "loss": 0.5072, "step": 5647 }, { "epoch": 0.7, "grad_norm": 1.5885001554236768, "learning_rate": 2.168079233860157e-06, "loss": 0.5026, "step": 5648 }, { "epoch": 0.7, "grad_norm": 1.7164720858256994, "learning_rate": 2.166423170149887e-06, "loss": 0.5201, "step": 5649 }, { "epoch": 0.7, "grad_norm": 1.7594213840845911, "learning_rate": 2.164767564228911e-06, "loss": 0.5021, "step": 5650 }, { "epoch": 0.7, "grad_norm": 3.028797252998077, "learning_rate": 2.1631124163647043e-06, "loss": 0.4976, "step": 5651 }, { "epoch": 0.7, "grad_norm": 1.5397052115546976, "learning_rate": 2.1614577268246735e-06, "loss": 0.4835, "step": 5652 }, { "epoch": 0.7, "grad_norm": 2.2110349025128313, "learning_rate": 2.1598034958761448e-06, "loss": 0.4917, "step": 5653 }, { "epoch": 0.7, "grad_norm": 1.5698386403094702, "learning_rate": 2.1581497237863767e-06, "loss": 0.4759, "step": 5654 }, { "epoch": 0.7, "grad_norm": 1.4109353451334927, "learning_rate": 2.1564964108225485e-06, "loss": 0.488, "step": 5655 }, { "epoch": 0.7, "grad_norm": 0.6598736568588375, "learning_rate": 2.15484355725177e-06, "loss": 0.5005, "step": 5656 }, { "epoch": 0.7, "grad_norm": 1.4919745602635004, "learning_rate": 2.153191163341071e-06, "loss": 0.486, "step": 5657 }, { "epoch": 0.7, "grad_norm": 1.2253434355089252, "learning_rate": 2.151539229357412e-06, "loss": 0.4571, "step": 5658 }, { "epoch": 0.7, "grad_norm": 1.5071311879513096, "learning_rate": 2.149887755567679e-06, "loss": 0.5155, "step": 5659 }, { "epoch": 0.7, "grad_norm": 1.4334540922292345, "learning_rate": 2.148236742238679e-06, "loss": 0.4797, "step": 5660 }, { "epoch": 0.7, "grad_norm": 1.6044020149090967, "learning_rate": 2.1465861896371514e-06, "loss": 0.5131, "step": 5661 }, { "epoch": 0.7, "grad_norm": 1.3693514507790747, "learning_rate": 2.1449360980297536e-06, "loss": 0.4986, "step": 5662 }, { "epoch": 0.7, "grad_norm": 1.230734930065685, "learning_rate": 2.143286467683076e-06, "loss": 0.4817, "step": 5663 }, { "epoch": 0.7, "grad_norm": 1.3873301908337472, "learning_rate": 2.1416372988636275e-06, "loss": 0.4873, "step": 5664 }, { "epoch": 0.7, "grad_norm": 0.6559315099206287, "learning_rate": 2.1399885918378478e-06, "loss": 0.4808, "step": 5665 }, { "epoch": 0.7, "grad_norm": 2.19002529984588, "learning_rate": 2.1383403468721013e-06, "loss": 0.5296, "step": 5666 }, { "epoch": 0.7, "grad_norm": 1.4688445935033496, "learning_rate": 2.1366925642326735e-06, "loss": 0.5354, "step": 5667 }, { "epoch": 0.7, "grad_norm": 2.079621728241784, "learning_rate": 2.135045244185781e-06, "loss": 0.5183, "step": 5668 }, { "epoch": 0.7, "grad_norm": 1.413368987131197, "learning_rate": 2.13339838699756e-06, "loss": 0.4898, "step": 5669 }, { "epoch": 0.7, "grad_norm": 1.3210859609376642, "learning_rate": 2.1317519929340787e-06, "loss": 0.4783, "step": 5670 }, { "epoch": 0.7, "grad_norm": 1.32159694606616, "learning_rate": 2.130106062261322e-06, "loss": 0.5103, "step": 5671 }, { "epoch": 0.7, "grad_norm": 1.7155454747055365, "learning_rate": 2.128460595245208e-06, "loss": 0.4686, "step": 5672 }, { "epoch": 0.7, "grad_norm": 1.4536064375277995, "learning_rate": 2.126815592151574e-06, "loss": 0.4817, "step": 5673 }, { "epoch": 0.7, "grad_norm": 1.7002556576948114, "learning_rate": 2.1251710532461854e-06, "loss": 0.4708, "step": 5674 }, { "epoch": 0.7, "grad_norm": 1.3823878012680884, "learning_rate": 2.1235269787947345e-06, "loss": 0.505, "step": 5675 }, { "epoch": 0.7, "grad_norm": 1.3071705033544256, "learning_rate": 2.121883369062832e-06, "loss": 0.5239, "step": 5676 }, { "epoch": 0.7, "grad_norm": 1.6916273684756746, "learning_rate": 2.1202402243160215e-06, "loss": 0.4552, "step": 5677 }, { "epoch": 0.7, "grad_norm": 1.8044470154991188, "learning_rate": 2.118597544819763e-06, "loss": 0.4711, "step": 5678 }, { "epoch": 0.7, "grad_norm": 1.4612633988316264, "learning_rate": 2.11695533083945e-06, "loss": 0.4819, "step": 5679 }, { "epoch": 0.7, "grad_norm": 2.4045791920997903, "learning_rate": 2.1153135826403936e-06, "loss": 0.5078, "step": 5680 }, { "epoch": 0.71, "grad_norm": 1.4976793521690002, "learning_rate": 2.1136723004878356e-06, "loss": 0.4956, "step": 5681 }, { "epoch": 0.71, "grad_norm": 1.7046915634044535, "learning_rate": 2.1120314846469364e-06, "loss": 0.5065, "step": 5682 }, { "epoch": 0.71, "grad_norm": 1.4735756528319723, "learning_rate": 2.1103911353827855e-06, "loss": 0.5042, "step": 5683 }, { "epoch": 0.71, "grad_norm": 1.6003853911474244, "learning_rate": 2.1087512529603984e-06, "loss": 0.546, "step": 5684 }, { "epoch": 0.71, "grad_norm": 1.6980308517680083, "learning_rate": 2.1071118376447074e-06, "loss": 0.5027, "step": 5685 }, { "epoch": 0.71, "grad_norm": 1.6285847762124912, "learning_rate": 2.10547288970058e-06, "loss": 0.504, "step": 5686 }, { "epoch": 0.71, "grad_norm": 1.7985510618826166, "learning_rate": 2.1038344093927983e-06, "loss": 0.5049, "step": 5687 }, { "epoch": 0.71, "grad_norm": 1.4584743770881174, "learning_rate": 2.102196396986076e-06, "loss": 0.4688, "step": 5688 }, { "epoch": 0.71, "grad_norm": 2.062882331433079, "learning_rate": 2.100558852745046e-06, "loss": 0.5158, "step": 5689 }, { "epoch": 0.71, "grad_norm": 1.3176991940677245, "learning_rate": 2.098921776934269e-06, "loss": 0.5022, "step": 5690 }, { "epoch": 0.71, "grad_norm": 1.6950672852350026, "learning_rate": 2.097285169818232e-06, "loss": 0.4937, "step": 5691 }, { "epoch": 0.71, "grad_norm": 1.4397600009557168, "learning_rate": 2.0956490316613375e-06, "loss": 0.4694, "step": 5692 }, { "epoch": 0.71, "grad_norm": 1.4476077867770372, "learning_rate": 2.094013362727924e-06, "loss": 0.514, "step": 5693 }, { "epoch": 0.71, "grad_norm": 1.4207692375720573, "learning_rate": 2.0923781632822434e-06, "loss": 0.4678, "step": 5694 }, { "epoch": 0.71, "grad_norm": 2.2564321877149407, "learning_rate": 2.09074343358848e-06, "loss": 0.5346, "step": 5695 }, { "epoch": 0.71, "grad_norm": 1.416804884151017, "learning_rate": 2.0891091739107355e-06, "loss": 0.494, "step": 5696 }, { "epoch": 0.71, "grad_norm": 1.6357332631911095, "learning_rate": 2.087475384513043e-06, "loss": 0.515, "step": 5697 }, { "epoch": 0.71, "grad_norm": 2.5157776993178462, "learning_rate": 2.08584206565935e-06, "loss": 0.4975, "step": 5698 }, { "epoch": 0.71, "grad_norm": 1.6552230500936123, "learning_rate": 2.0842092176135396e-06, "loss": 0.4694, "step": 5699 }, { "epoch": 0.71, "grad_norm": 2.0776540845584193, "learning_rate": 2.082576840639411e-06, "loss": 0.4866, "step": 5700 }, { "epoch": 0.71, "grad_norm": 1.4391172169598452, "learning_rate": 2.080944935000686e-06, "loss": 0.4897, "step": 5701 }, { "epoch": 0.71, "grad_norm": 1.508862587297614, "learning_rate": 2.0793135009610173e-06, "loss": 0.5091, "step": 5702 }, { "epoch": 0.71, "grad_norm": 1.4673804733296882, "learning_rate": 2.077682538783974e-06, "loss": 0.4757, "step": 5703 }, { "epoch": 0.71, "grad_norm": 1.345254379836162, "learning_rate": 2.0760520487330554e-06, "loss": 0.5242, "step": 5704 }, { "epoch": 0.71, "grad_norm": 1.5063600999543156, "learning_rate": 2.074422031071679e-06, "loss": 0.5018, "step": 5705 }, { "epoch": 0.71, "grad_norm": 1.4460384916405886, "learning_rate": 2.0727924860631886e-06, "loss": 0.4711, "step": 5706 }, { "epoch": 0.71, "grad_norm": 1.1349399254738972, "learning_rate": 2.071163413970854e-06, "loss": 0.4204, "step": 5707 }, { "epoch": 0.71, "grad_norm": 2.421480116647311, "learning_rate": 2.0695348150578655e-06, "loss": 0.4944, "step": 5708 }, { "epoch": 0.71, "grad_norm": 3.32601532841179, "learning_rate": 2.0679066895873358e-06, "loss": 0.4766, "step": 5709 }, { "epoch": 0.71, "grad_norm": 1.5325685134493685, "learning_rate": 2.066279037822305e-06, "loss": 0.5301, "step": 5710 }, { "epoch": 0.71, "grad_norm": 1.3833534069309887, "learning_rate": 2.0646518600257343e-06, "loss": 0.495, "step": 5711 }, { "epoch": 0.71, "grad_norm": 1.4562439694652227, "learning_rate": 2.0630251564605053e-06, "loss": 0.4554, "step": 5712 }, { "epoch": 0.71, "grad_norm": 1.8753303528850167, "learning_rate": 2.0613989273894313e-06, "loss": 0.5322, "step": 5713 }, { "epoch": 0.71, "grad_norm": 1.4374425915890892, "learning_rate": 2.059773173075239e-06, "loss": 0.4604, "step": 5714 }, { "epoch": 0.71, "grad_norm": 1.435434545196034, "learning_rate": 2.0581478937805864e-06, "loss": 0.5105, "step": 5715 }, { "epoch": 0.71, "grad_norm": 2.781414285188784, "learning_rate": 2.056523089768051e-06, "loss": 0.5118, "step": 5716 }, { "epoch": 0.71, "grad_norm": 1.4164073183621666, "learning_rate": 2.054898761300136e-06, "loss": 0.4928, "step": 5717 }, { "epoch": 0.71, "grad_norm": 1.8871129395490855, "learning_rate": 2.0532749086392625e-06, "loss": 0.5301, "step": 5718 }, { "epoch": 0.71, "grad_norm": 1.7594277639683007, "learning_rate": 2.0516515320477813e-06, "loss": 0.4697, "step": 5719 }, { "epoch": 0.71, "grad_norm": 26.873164838615025, "learning_rate": 2.0500286317879626e-06, "loss": 0.4826, "step": 5720 }, { "epoch": 0.71, "grad_norm": 1.5551879147629808, "learning_rate": 2.0484062081219967e-06, "loss": 0.5253, "step": 5721 }, { "epoch": 0.71, "grad_norm": 1.5665900419493022, "learning_rate": 2.046784261312006e-06, "loss": 0.5023, "step": 5722 }, { "epoch": 0.71, "grad_norm": 1.5347720498202977, "learning_rate": 2.0451627916200236e-06, "loss": 0.5354, "step": 5723 }, { "epoch": 0.71, "grad_norm": 3.4266227402970557, "learning_rate": 2.0435417993080194e-06, "loss": 0.5164, "step": 5724 }, { "epoch": 0.71, "grad_norm": 1.2459870240656372, "learning_rate": 2.041921284637874e-06, "loss": 0.5072, "step": 5725 }, { "epoch": 0.71, "grad_norm": 1.4351773132797392, "learning_rate": 2.040301247871399e-06, "loss": 0.4542, "step": 5726 }, { "epoch": 0.71, "grad_norm": 1.5153000504442997, "learning_rate": 2.0386816892703225e-06, "loss": 0.4969, "step": 5727 }, { "epoch": 0.71, "grad_norm": 1.994793007552151, "learning_rate": 2.037062609096302e-06, "loss": 0.4812, "step": 5728 }, { "epoch": 0.71, "grad_norm": 1.3214414084860961, "learning_rate": 2.035444007610912e-06, "loss": 0.4687, "step": 5729 }, { "epoch": 0.71, "grad_norm": 1.6187642823487383, "learning_rate": 2.0338258850756507e-06, "loss": 0.4927, "step": 5730 }, { "epoch": 0.71, "grad_norm": 3.4220341032570714, "learning_rate": 2.032208241751941e-06, "loss": 0.4744, "step": 5731 }, { "epoch": 0.71, "grad_norm": 1.3246981718775324, "learning_rate": 2.0305910779011277e-06, "loss": 0.5096, "step": 5732 }, { "epoch": 0.71, "grad_norm": 1.2892581949710045, "learning_rate": 2.02897439378448e-06, "loss": 0.5107, "step": 5733 }, { "epoch": 0.71, "grad_norm": 1.5615295073884667, "learning_rate": 2.0273581896631837e-06, "loss": 0.4812, "step": 5734 }, { "epoch": 0.71, "grad_norm": 1.4455183398778615, "learning_rate": 2.025742465798355e-06, "loss": 0.502, "step": 5735 }, { "epoch": 0.71, "grad_norm": 2.557923520928341, "learning_rate": 2.0241272224510235e-06, "loss": 0.5193, "step": 5736 }, { "epoch": 0.71, "grad_norm": 1.3621564814195775, "learning_rate": 2.0225124598821498e-06, "loss": 0.4869, "step": 5737 }, { "epoch": 0.71, "grad_norm": 2.45337552752045, "learning_rate": 2.02089817835261e-06, "loss": 0.5248, "step": 5738 }, { "epoch": 0.71, "grad_norm": 2.233319918986319, "learning_rate": 2.019284378123207e-06, "loss": 0.4867, "step": 5739 }, { "epoch": 0.71, "grad_norm": 1.3556952200964876, "learning_rate": 2.017671059454667e-06, "loss": 0.471, "step": 5740 }, { "epoch": 0.71, "grad_norm": 1.4902769625106627, "learning_rate": 2.0160582226076304e-06, "loss": 0.4932, "step": 5741 }, { "epoch": 0.71, "grad_norm": 1.7142701712089856, "learning_rate": 2.0144458678426705e-06, "loss": 0.4717, "step": 5742 }, { "epoch": 0.71, "grad_norm": 1.3939041873730047, "learning_rate": 2.0128339954202734e-06, "loss": 0.5065, "step": 5743 }, { "epoch": 0.71, "grad_norm": 2.8410248810440564, "learning_rate": 2.0112226056008547e-06, "loss": 0.4957, "step": 5744 }, { "epoch": 0.71, "grad_norm": 1.6987436213955667, "learning_rate": 2.009611698644745e-06, "loss": 0.4998, "step": 5745 }, { "epoch": 0.71, "grad_norm": 1.3997597838397018, "learning_rate": 2.008001274812204e-06, "loss": 0.4884, "step": 5746 }, { "epoch": 0.71, "grad_norm": 1.8261750841737288, "learning_rate": 2.006391334363407e-06, "loss": 0.5535, "step": 5747 }, { "epoch": 0.71, "grad_norm": 1.6321622352936611, "learning_rate": 2.004781877558455e-06, "loss": 0.4615, "step": 5748 }, { "epoch": 0.71, "grad_norm": 2.914268686757844, "learning_rate": 2.003172904657372e-06, "loss": 0.4293, "step": 5749 }, { "epoch": 0.71, "grad_norm": 1.3570114851918123, "learning_rate": 2.0015644159200974e-06, "loss": 0.5056, "step": 5750 }, { "epoch": 0.71, "grad_norm": 2.0706697522578237, "learning_rate": 1.9999564116065017e-06, "loss": 0.4964, "step": 5751 }, { "epoch": 0.71, "grad_norm": 2.0556824675288645, "learning_rate": 1.998348891976368e-06, "loss": 0.4605, "step": 5752 }, { "epoch": 0.71, "grad_norm": 1.9574956421434813, "learning_rate": 1.9967418572894087e-06, "loss": 0.4795, "step": 5753 }, { "epoch": 0.71, "grad_norm": 0.6449227143739888, "learning_rate": 1.995135307805251e-06, "loss": 0.5001, "step": 5754 }, { "epoch": 0.71, "grad_norm": 1.46664255915646, "learning_rate": 1.9935292437834508e-06, "loss": 0.5121, "step": 5755 }, { "epoch": 0.71, "grad_norm": 1.3813895466955008, "learning_rate": 1.9919236654834776e-06, "loss": 0.5109, "step": 5756 }, { "epoch": 0.71, "grad_norm": 2.0464129272443827, "learning_rate": 1.9903185731647294e-06, "loss": 0.4965, "step": 5757 }, { "epoch": 0.71, "grad_norm": 1.575954407433671, "learning_rate": 1.988713967086524e-06, "loss": 0.5024, "step": 5758 }, { "epoch": 0.71, "grad_norm": 1.5791710315234764, "learning_rate": 1.9871098475080968e-06, "loss": 0.5293, "step": 5759 }, { "epoch": 0.71, "grad_norm": 1.5824864416558846, "learning_rate": 1.9855062146886104e-06, "loss": 0.526, "step": 5760 }, { "epoch": 0.71, "grad_norm": 1.7775269855188194, "learning_rate": 1.9839030688871432e-06, "loss": 0.5091, "step": 5761 }, { "epoch": 0.72, "grad_norm": 1.5349204131009164, "learning_rate": 1.9823004103626996e-06, "loss": 0.478, "step": 5762 }, { "epoch": 0.72, "grad_norm": 1.3719854482227383, "learning_rate": 1.980698239374201e-06, "loss": 0.4685, "step": 5763 }, { "epoch": 0.72, "grad_norm": 1.6852175119165915, "learning_rate": 1.979096556180493e-06, "loss": 0.4881, "step": 5764 }, { "epoch": 0.72, "grad_norm": 1.8625313877636154, "learning_rate": 1.9774953610403443e-06, "loss": 0.5216, "step": 5765 }, { "epoch": 0.72, "grad_norm": 1.450107885202208, "learning_rate": 1.975894654212438e-06, "loss": 0.4513, "step": 5766 }, { "epoch": 0.72, "grad_norm": 1.5230450906221689, "learning_rate": 1.9742944359553855e-06, "loss": 0.5149, "step": 5767 }, { "epoch": 0.72, "grad_norm": 2.873593720362626, "learning_rate": 1.972694706527714e-06, "loss": 0.4503, "step": 5768 }, { "epoch": 0.72, "grad_norm": 3.623455636704811, "learning_rate": 1.971095466187876e-06, "loss": 0.538, "step": 5769 }, { "epoch": 0.72, "grad_norm": 1.696139524516132, "learning_rate": 1.9694967151942403e-06, "loss": 0.471, "step": 5770 }, { "epoch": 0.72, "grad_norm": 1.9568653647405692, "learning_rate": 1.9678984538051015e-06, "loss": 0.5261, "step": 5771 }, { "epoch": 0.72, "grad_norm": 1.5443916493642844, "learning_rate": 1.966300682278671e-06, "loss": 0.4776, "step": 5772 }, { "epoch": 0.72, "grad_norm": 1.6470232629154051, "learning_rate": 1.964703400873083e-06, "loss": 0.465, "step": 5773 }, { "epoch": 0.72, "grad_norm": 1.4860505755950013, "learning_rate": 1.963106609846395e-06, "loss": 0.4593, "step": 5774 }, { "epoch": 0.72, "grad_norm": 0.6271448336615271, "learning_rate": 1.9615103094565798e-06, "loss": 0.5128, "step": 5775 }, { "epoch": 0.72, "grad_norm": 1.6066932286908366, "learning_rate": 1.9599144999615355e-06, "loss": 0.513, "step": 5776 }, { "epoch": 0.72, "grad_norm": 2.1186709048927224, "learning_rate": 1.9583191816190773e-06, "loss": 0.5115, "step": 5777 }, { "epoch": 0.72, "grad_norm": 1.4370821816965451, "learning_rate": 1.9567243546869453e-06, "loss": 0.4406, "step": 5778 }, { "epoch": 0.72, "grad_norm": 1.352541970965906, "learning_rate": 1.955130019422795e-06, "loss": 0.5274, "step": 5779 }, { "epoch": 0.72, "grad_norm": 1.3274741708035742, "learning_rate": 1.953536176084207e-06, "loss": 0.4426, "step": 5780 }, { "epoch": 0.72, "grad_norm": 1.4631876821754177, "learning_rate": 1.9519428249286825e-06, "loss": 0.5083, "step": 5781 }, { "epoch": 0.72, "grad_norm": 1.4116956697135918, "learning_rate": 1.9503499662136378e-06, "loss": 0.5186, "step": 5782 }, { "epoch": 0.72, "grad_norm": 1.684331510368117, "learning_rate": 1.9487576001964166e-06, "loss": 0.4865, "step": 5783 }, { "epoch": 0.72, "grad_norm": 1.4742398214878512, "learning_rate": 1.947165727134276e-06, "loss": 0.5243, "step": 5784 }, { "epoch": 0.72, "grad_norm": 0.6588770030376725, "learning_rate": 1.945574347284401e-06, "loss": 0.5375, "step": 5785 }, { "epoch": 0.72, "grad_norm": 1.4088544461510655, "learning_rate": 1.9439834609038893e-06, "loss": 0.4529, "step": 5786 }, { "epoch": 0.72, "grad_norm": 1.4820327420256838, "learning_rate": 1.9423930682497664e-06, "loss": 0.4909, "step": 5787 }, { "epoch": 0.72, "grad_norm": 1.4534489856721242, "learning_rate": 1.940803169578969e-06, "loss": 0.5388, "step": 5788 }, { "epoch": 0.72, "grad_norm": 1.4831040727804403, "learning_rate": 1.939213765148366e-06, "loss": 0.5078, "step": 5789 }, { "epoch": 0.72, "grad_norm": 1.3836192912332803, "learning_rate": 1.937624855214736e-06, "loss": 0.5005, "step": 5790 }, { "epoch": 0.72, "grad_norm": 1.8974608544636409, "learning_rate": 1.9360364400347803e-06, "loss": 0.5136, "step": 5791 }, { "epoch": 0.72, "grad_norm": 1.59025346888609, "learning_rate": 1.9344485198651243e-06, "loss": 0.5284, "step": 5792 }, { "epoch": 0.72, "grad_norm": 1.767797328488242, "learning_rate": 1.9328610949623068e-06, "loss": 0.5235, "step": 5793 }, { "epoch": 0.72, "grad_norm": 1.7296551897243873, "learning_rate": 1.9312741655827945e-06, "loss": 0.5297, "step": 5794 }, { "epoch": 0.72, "grad_norm": 1.8958100276090986, "learning_rate": 1.9296877319829656e-06, "loss": 0.4773, "step": 5795 }, { "epoch": 0.72, "grad_norm": 1.8361969548943964, "learning_rate": 1.928101794419126e-06, "loss": 0.4559, "step": 5796 }, { "epoch": 0.72, "grad_norm": 3.215608924820692, "learning_rate": 1.9265163531474935e-06, "loss": 0.5468, "step": 5797 }, { "epoch": 0.72, "grad_norm": 1.1295612581503611, "learning_rate": 1.924931408424216e-06, "loss": 0.4339, "step": 5798 }, { "epoch": 0.72, "grad_norm": 1.4019818248835516, "learning_rate": 1.923346960505353e-06, "loss": 0.5145, "step": 5799 }, { "epoch": 0.72, "grad_norm": 1.6831507214857904, "learning_rate": 1.9217630096468824e-06, "loss": 0.5159, "step": 5800 }, { "epoch": 0.72, "grad_norm": 1.4923805376132564, "learning_rate": 1.920179556104711e-06, "loss": 0.5629, "step": 5801 }, { "epoch": 0.72, "grad_norm": 1.2287299430523901, "learning_rate": 1.9185966001346546e-06, "loss": 0.4693, "step": 5802 }, { "epoch": 0.72, "grad_norm": 0.6705904758601706, "learning_rate": 1.9170141419924583e-06, "loss": 0.4705, "step": 5803 }, { "epoch": 0.72, "grad_norm": 1.447868152017363, "learning_rate": 1.915432181933778e-06, "loss": 0.4798, "step": 5804 }, { "epoch": 0.72, "grad_norm": 0.7060735277920716, "learning_rate": 1.9138507202141947e-06, "loss": 0.4894, "step": 5805 }, { "epoch": 0.72, "grad_norm": 1.4861249493666067, "learning_rate": 1.912269757089208e-06, "loss": 0.5246, "step": 5806 }, { "epoch": 0.72, "grad_norm": 1.505205873541527, "learning_rate": 1.9106892928142383e-06, "loss": 0.4855, "step": 5807 }, { "epoch": 0.72, "grad_norm": 2.0736622759742143, "learning_rate": 1.9091093276446197e-06, "loss": 0.4664, "step": 5808 }, { "epoch": 0.72, "grad_norm": 1.5728866068387426, "learning_rate": 1.9075298618356134e-06, "loss": 0.4832, "step": 5809 }, { "epoch": 0.72, "grad_norm": 1.567346546602158, "learning_rate": 1.905950895642394e-06, "loss": 0.523, "step": 5810 }, { "epoch": 0.72, "grad_norm": 2.4303099435053412, "learning_rate": 1.9043724293200556e-06, "loss": 0.5129, "step": 5811 }, { "epoch": 0.72, "grad_norm": 2.0313109936019274, "learning_rate": 1.9027944631236161e-06, "loss": 0.533, "step": 5812 }, { "epoch": 0.72, "grad_norm": 1.6289956972165225, "learning_rate": 1.9012169973080064e-06, "loss": 0.4587, "step": 5813 }, { "epoch": 0.72, "grad_norm": 1.4720872598727337, "learning_rate": 1.899640032128085e-06, "loss": 0.4889, "step": 5814 }, { "epoch": 0.72, "grad_norm": 1.3338535876848339, "learning_rate": 1.8980635678386206e-06, "loss": 0.4974, "step": 5815 }, { "epoch": 0.72, "grad_norm": 1.4441314017374718, "learning_rate": 1.8964876046943081e-06, "loss": 0.484, "step": 5816 }, { "epoch": 0.72, "grad_norm": 1.5841626988615176, "learning_rate": 1.8949121429497546e-06, "loss": 0.5346, "step": 5817 }, { "epoch": 0.72, "grad_norm": 1.51894023016548, "learning_rate": 1.8933371828594932e-06, "loss": 0.4579, "step": 5818 }, { "epoch": 0.72, "grad_norm": 1.3708186159993732, "learning_rate": 1.8917627246779708e-06, "loss": 0.4752, "step": 5819 }, { "epoch": 0.72, "grad_norm": 1.351325739538545, "learning_rate": 1.890188768659554e-06, "loss": 0.5004, "step": 5820 }, { "epoch": 0.72, "grad_norm": 1.4460543738772533, "learning_rate": 1.8886153150585295e-06, "loss": 0.5045, "step": 5821 }, { "epoch": 0.72, "grad_norm": 1.4696479913824958, "learning_rate": 1.8870423641291042e-06, "loss": 0.4601, "step": 5822 }, { "epoch": 0.72, "grad_norm": 1.5593078416590118, "learning_rate": 1.8854699161254031e-06, "loss": 0.4386, "step": 5823 }, { "epoch": 0.72, "grad_norm": 1.6144109272623572, "learning_rate": 1.8838979713014654e-06, "loss": 0.4931, "step": 5824 }, { "epoch": 0.72, "grad_norm": 7.021351446475987, "learning_rate": 1.8823265299112564e-06, "loss": 0.4625, "step": 5825 }, { "epoch": 0.72, "grad_norm": 1.5024912284123797, "learning_rate": 1.880755592208653e-06, "loss": 0.4997, "step": 5826 }, { "epoch": 0.72, "grad_norm": 1.7213862970566716, "learning_rate": 1.879185158447457e-06, "loss": 0.4717, "step": 5827 }, { "epoch": 0.72, "grad_norm": 1.6218774973615702, "learning_rate": 1.8776152288813842e-06, "loss": 0.474, "step": 5828 }, { "epoch": 0.72, "grad_norm": 1.9492154048572368, "learning_rate": 1.8760458037640677e-06, "loss": 0.4841, "step": 5829 }, { "epoch": 0.72, "grad_norm": 2.374140362550844, "learning_rate": 1.874476883349068e-06, "loss": 0.4599, "step": 5830 }, { "epoch": 0.72, "grad_norm": 1.8033689611937722, "learning_rate": 1.8729084678898534e-06, "loss": 0.4844, "step": 5831 }, { "epoch": 0.72, "grad_norm": 1.441908053408819, "learning_rate": 1.8713405576398187e-06, "loss": 0.4918, "step": 5832 }, { "epoch": 0.72, "grad_norm": 1.6312171403721705, "learning_rate": 1.8697731528522694e-06, "loss": 0.5077, "step": 5833 }, { "epoch": 0.72, "grad_norm": 6.715085014776812, "learning_rate": 1.868206253780438e-06, "loss": 0.4836, "step": 5834 }, { "epoch": 0.72, "grad_norm": 1.3763809397355025, "learning_rate": 1.8666398606774667e-06, "loss": 0.4653, "step": 5835 }, { "epoch": 0.72, "grad_norm": 1.4817881047709291, "learning_rate": 1.865073973796424e-06, "loss": 0.5401, "step": 5836 }, { "epoch": 0.72, "grad_norm": 1.8522709107388002, "learning_rate": 1.8635085933902907e-06, "loss": 0.5189, "step": 5837 }, { "epoch": 0.72, "grad_norm": 0.6687465006836861, "learning_rate": 1.8619437197119644e-06, "loss": 0.4893, "step": 5838 }, { "epoch": 0.72, "grad_norm": 1.8708791015591064, "learning_rate": 1.860379353014271e-06, "loss": 0.5368, "step": 5839 }, { "epoch": 0.72, "grad_norm": 1.7487248472707533, "learning_rate": 1.858815493549943e-06, "loss": 0.5026, "step": 5840 }, { "epoch": 0.72, "grad_norm": 17.919683857302203, "learning_rate": 1.8572521415716387e-06, "loss": 0.4393, "step": 5841 }, { "epoch": 0.72, "grad_norm": 1.462150126869578, "learning_rate": 1.8556892973319284e-06, "loss": 0.4983, "step": 5842 }, { "epoch": 0.73, "grad_norm": 2.334792323329871, "learning_rate": 1.8541269610833061e-06, "loss": 0.5266, "step": 5843 }, { "epoch": 0.73, "grad_norm": 2.2295138381557114, "learning_rate": 1.852565133078178e-06, "loss": 0.5002, "step": 5844 }, { "epoch": 0.73, "grad_norm": 1.3606810914725282, "learning_rate": 1.851003813568874e-06, "loss": 0.5448, "step": 5845 }, { "epoch": 0.73, "grad_norm": 1.3410900367471983, "learning_rate": 1.8494430028076372e-06, "loss": 0.4918, "step": 5846 }, { "epoch": 0.73, "grad_norm": 1.353687681132741, "learning_rate": 1.84788270104663e-06, "loss": 0.4591, "step": 5847 }, { "epoch": 0.73, "grad_norm": 2.2867728993341863, "learning_rate": 1.846322908537936e-06, "loss": 0.4626, "step": 5848 }, { "epoch": 0.73, "grad_norm": 1.5021326759171463, "learning_rate": 1.8447636255335488e-06, "loss": 0.5241, "step": 5849 }, { "epoch": 0.73, "grad_norm": 1.5503247553351363, "learning_rate": 1.8432048522853891e-06, "loss": 0.5121, "step": 5850 }, { "epoch": 0.73, "grad_norm": 1.851781475388872, "learning_rate": 1.8416465890452862e-06, "loss": 0.4923, "step": 5851 }, { "epoch": 0.73, "grad_norm": 1.7091706381910228, "learning_rate": 1.8400888360649949e-06, "loss": 0.477, "step": 5852 }, { "epoch": 0.73, "grad_norm": 1.4134026379502893, "learning_rate": 1.8385315935961805e-06, "loss": 0.5358, "step": 5853 }, { "epoch": 0.73, "grad_norm": 2.0054461199669316, "learning_rate": 1.836974861890431e-06, "loss": 0.5161, "step": 5854 }, { "epoch": 0.73, "grad_norm": 1.3766422435819043, "learning_rate": 1.8354186411992514e-06, "loss": 0.4829, "step": 5855 }, { "epoch": 0.73, "grad_norm": 1.3201172847650677, "learning_rate": 1.8338629317740598e-06, "loss": 0.4482, "step": 5856 }, { "epoch": 0.73, "grad_norm": 2.0163699674700495, "learning_rate": 1.8323077338661981e-06, "loss": 0.5215, "step": 5857 }, { "epoch": 0.73, "grad_norm": 1.6190726538042708, "learning_rate": 1.8307530477269192e-06, "loss": 0.4652, "step": 5858 }, { "epoch": 0.73, "grad_norm": 1.4391573430666145, "learning_rate": 1.829198873607399e-06, "loss": 0.5407, "step": 5859 }, { "epoch": 0.73, "grad_norm": 2.5196588134185403, "learning_rate": 1.8276452117587252e-06, "loss": 0.5364, "step": 5860 }, { "epoch": 0.73, "grad_norm": 1.4095757168875038, "learning_rate": 1.8260920624319084e-06, "loss": 0.5028, "step": 5861 }, { "epoch": 0.73, "grad_norm": 1.915322471299682, "learning_rate": 1.824539425877871e-06, "loss": 0.4948, "step": 5862 }, { "epoch": 0.73, "grad_norm": 1.3294204484252052, "learning_rate": 1.822987302347456e-06, "loss": 0.5076, "step": 5863 }, { "epoch": 0.73, "grad_norm": 1.472479235952562, "learning_rate": 1.8214356920914244e-06, "loss": 0.4979, "step": 5864 }, { "epoch": 0.73, "grad_norm": 1.3967686022768886, "learning_rate": 1.8198845953604494e-06, "loss": 0.4981, "step": 5865 }, { "epoch": 0.73, "grad_norm": 2.250885405548494, "learning_rate": 1.818334012405128e-06, "loss": 0.4818, "step": 5866 }, { "epoch": 0.73, "grad_norm": 1.5312106093910633, "learning_rate": 1.8167839434759665e-06, "loss": 0.4943, "step": 5867 }, { "epoch": 0.73, "grad_norm": 1.5033972291446327, "learning_rate": 1.8152343888233965e-06, "loss": 0.5091, "step": 5868 }, { "epoch": 0.73, "grad_norm": 1.4241722148533027, "learning_rate": 1.8136853486977575e-06, "loss": 0.4618, "step": 5869 }, { "epoch": 0.73, "grad_norm": 1.837726102143834, "learning_rate": 1.8121368233493154e-06, "loss": 0.4202, "step": 5870 }, { "epoch": 0.73, "grad_norm": 1.3763583291820833, "learning_rate": 1.8105888130282433e-06, "loss": 0.4719, "step": 5871 }, { "epoch": 0.73, "grad_norm": 1.3135692863116784, "learning_rate": 1.8090413179846383e-06, "loss": 0.4808, "step": 5872 }, { "epoch": 0.73, "grad_norm": 0.6601278979307443, "learning_rate": 1.8074943384685139e-06, "loss": 0.4719, "step": 5873 }, { "epoch": 0.73, "grad_norm": 1.533500102581177, "learning_rate": 1.8059478747297942e-06, "loss": 0.5315, "step": 5874 }, { "epoch": 0.73, "grad_norm": 1.9325656771854214, "learning_rate": 1.8044019270183278e-06, "loss": 0.4732, "step": 5875 }, { "epoch": 0.73, "grad_norm": 1.6384871820813307, "learning_rate": 1.802856495583873e-06, "loss": 0.4881, "step": 5876 }, { "epoch": 0.73, "grad_norm": 1.3817610947695655, "learning_rate": 1.8013115806761105e-06, "loss": 0.5039, "step": 5877 }, { "epoch": 0.73, "grad_norm": 1.9646188909105635, "learning_rate": 1.7997671825446323e-06, "loss": 0.5361, "step": 5878 }, { "epoch": 0.73, "grad_norm": 1.658577168296523, "learning_rate": 1.798223301438951e-06, "loss": 0.5399, "step": 5879 }, { "epoch": 0.73, "grad_norm": 0.6561815290671748, "learning_rate": 1.7966799376084954e-06, "loss": 0.5165, "step": 5880 }, { "epoch": 0.73, "grad_norm": 1.698367245602605, "learning_rate": 1.7951370913026067e-06, "loss": 0.5214, "step": 5881 }, { "epoch": 0.73, "grad_norm": 1.4701773401872964, "learning_rate": 1.7935947627705485e-06, "loss": 0.4431, "step": 5882 }, { "epoch": 0.73, "grad_norm": 1.4422387143040183, "learning_rate": 1.7920529522614943e-06, "loss": 0.5262, "step": 5883 }, { "epoch": 0.73, "grad_norm": 1.3581629909265374, "learning_rate": 1.7905116600245404e-06, "loss": 0.5287, "step": 5884 }, { "epoch": 0.73, "grad_norm": 1.5816641254198855, "learning_rate": 1.788970886308693e-06, "loss": 0.5171, "step": 5885 }, { "epoch": 0.73, "grad_norm": 1.6127891341786535, "learning_rate": 1.7874306313628802e-06, "loss": 0.4907, "step": 5886 }, { "epoch": 0.73, "grad_norm": 1.3761269959792422, "learning_rate": 1.78589089543594e-06, "loss": 0.4984, "step": 5887 }, { "epoch": 0.73, "grad_norm": 1.6734571305492423, "learning_rate": 1.7843516787766357e-06, "loss": 0.5091, "step": 5888 }, { "epoch": 0.73, "grad_norm": 1.4534246916626015, "learning_rate": 1.7828129816336387e-06, "loss": 0.5099, "step": 5889 }, { "epoch": 0.73, "grad_norm": 1.4742219251314783, "learning_rate": 1.7812748042555378e-06, "loss": 0.4914, "step": 5890 }, { "epoch": 0.73, "grad_norm": 1.357326060264499, "learning_rate": 1.7797371468908414e-06, "loss": 0.461, "step": 5891 }, { "epoch": 0.73, "grad_norm": 1.6500132546473334, "learning_rate": 1.7782000097879692e-06, "loss": 0.4542, "step": 5892 }, { "epoch": 0.73, "grad_norm": 1.4501522402373412, "learning_rate": 1.776663393195262e-06, "loss": 0.4509, "step": 5893 }, { "epoch": 0.73, "grad_norm": 1.3054125980508295, "learning_rate": 1.7751272973609707e-06, "loss": 0.4766, "step": 5894 }, { "epoch": 0.73, "grad_norm": 5.982710616249453, "learning_rate": 1.7735917225332666e-06, "loss": 0.5887, "step": 5895 }, { "epoch": 0.73, "grad_norm": 1.888839005725094, "learning_rate": 1.7720566689602354e-06, "loss": 0.4827, "step": 5896 }, { "epoch": 0.73, "grad_norm": 1.4611341133548106, "learning_rate": 1.77052213688988e-06, "loss": 0.4747, "step": 5897 }, { "epoch": 0.73, "grad_norm": 1.894171439291106, "learning_rate": 1.768988126570116e-06, "loss": 0.5112, "step": 5898 }, { "epoch": 0.73, "grad_norm": 2.2186601395328758, "learning_rate": 1.767454638248775e-06, "loss": 0.4769, "step": 5899 }, { "epoch": 0.73, "grad_norm": 1.2891341736086237, "learning_rate": 1.7659216721736082e-06, "loss": 0.4466, "step": 5900 }, { "epoch": 0.73, "grad_norm": 1.4263591816159855, "learning_rate": 1.764389228592277e-06, "loss": 0.4862, "step": 5901 }, { "epoch": 0.73, "grad_norm": 1.997366258609366, "learning_rate": 1.7628573077523647e-06, "loss": 0.4851, "step": 5902 }, { "epoch": 0.73, "grad_norm": 0.6842837922339248, "learning_rate": 1.7613259099013608e-06, "loss": 0.4807, "step": 5903 }, { "epoch": 0.73, "grad_norm": 1.4564574019636156, "learning_rate": 1.7597950352866833e-06, "loss": 0.4421, "step": 5904 }, { "epoch": 0.73, "grad_norm": 1.783210696583975, "learning_rate": 1.7582646841556533e-06, "loss": 0.5436, "step": 5905 }, { "epoch": 0.73, "grad_norm": 1.3190447863888473, "learning_rate": 1.756734856755516e-06, "loss": 0.4655, "step": 5906 }, { "epoch": 0.73, "grad_norm": 1.8206358433489251, "learning_rate": 1.7552055533334268e-06, "loss": 0.4928, "step": 5907 }, { "epoch": 0.73, "grad_norm": 1.4403153492787868, "learning_rate": 1.7536767741364558e-06, "loss": 0.4993, "step": 5908 }, { "epoch": 0.73, "grad_norm": 1.3428632704917827, "learning_rate": 1.752148519411595e-06, "loss": 0.4975, "step": 5909 }, { "epoch": 0.73, "grad_norm": 1.3080469443346712, "learning_rate": 1.7506207894057442e-06, "loss": 0.4692, "step": 5910 }, { "epoch": 0.73, "grad_norm": 1.2920041190174922, "learning_rate": 1.7490935843657242e-06, "loss": 0.5237, "step": 5911 }, { "epoch": 0.73, "grad_norm": 2.4014102888616162, "learning_rate": 1.7475669045382636e-06, "loss": 0.4767, "step": 5912 }, { "epoch": 0.73, "grad_norm": 1.4549220607061533, "learning_rate": 1.7460407501700178e-06, "loss": 0.5206, "step": 5913 }, { "epoch": 0.73, "grad_norm": 2.503657221600372, "learning_rate": 1.7445151215075456e-06, "loss": 0.4472, "step": 5914 }, { "epoch": 0.73, "grad_norm": 0.6978670048145843, "learning_rate": 1.7429900187973287e-06, "loss": 0.496, "step": 5915 }, { "epoch": 0.73, "grad_norm": 1.9133555141324627, "learning_rate": 1.741465442285758e-06, "loss": 0.5304, "step": 5916 }, { "epoch": 0.73, "grad_norm": 2.3521218724701396, "learning_rate": 1.7399413922191455e-06, "loss": 0.4711, "step": 5917 }, { "epoch": 0.73, "grad_norm": 1.2834337970618197, "learning_rate": 1.7384178688437132e-06, "loss": 0.4776, "step": 5918 }, { "epoch": 0.73, "grad_norm": 1.4133634953894676, "learning_rate": 1.7368948724055974e-06, "loss": 0.5234, "step": 5919 }, { "epoch": 0.73, "grad_norm": 1.7352465408962612, "learning_rate": 1.735372403150854e-06, "loss": 0.499, "step": 5920 }, { "epoch": 0.73, "grad_norm": 1.5830249891711863, "learning_rate": 1.7338504613254515e-06, "loss": 0.5055, "step": 5921 }, { "epoch": 0.73, "grad_norm": 1.6650457763157955, "learning_rate": 1.7323290471752741e-06, "loss": 0.4851, "step": 5922 }, { "epoch": 0.74, "grad_norm": 1.181933619004897, "learning_rate": 1.7308081609461163e-06, "loss": 0.4511, "step": 5923 }, { "epoch": 0.74, "grad_norm": 1.7000840553200878, "learning_rate": 1.7292878028836946e-06, "loss": 0.4752, "step": 5924 }, { "epoch": 0.74, "grad_norm": 2.3554970293049347, "learning_rate": 1.7277679732336328e-06, "loss": 0.474, "step": 5925 }, { "epoch": 0.74, "grad_norm": 1.67754838277534, "learning_rate": 1.7262486722414752e-06, "loss": 0.5077, "step": 5926 }, { "epoch": 0.74, "grad_norm": 1.3496912569635924, "learning_rate": 1.7247299001526773e-06, "loss": 0.4886, "step": 5927 }, { "epoch": 0.74, "grad_norm": 5.27870766288199, "learning_rate": 1.7232116572126067e-06, "loss": 0.423, "step": 5928 }, { "epoch": 0.74, "grad_norm": 1.8641894032739894, "learning_rate": 1.7216939436665558e-06, "loss": 0.5036, "step": 5929 }, { "epoch": 0.74, "grad_norm": 1.6300684737499331, "learning_rate": 1.7201767597597197e-06, "loss": 0.4689, "step": 5930 }, { "epoch": 0.74, "grad_norm": 1.2278131681948699, "learning_rate": 1.7186601057372155e-06, "loss": 0.4723, "step": 5931 }, { "epoch": 0.74, "grad_norm": 2.031259017202795, "learning_rate": 1.7171439818440688e-06, "loss": 0.4803, "step": 5932 }, { "epoch": 0.74, "grad_norm": 0.6538675916367489, "learning_rate": 1.7156283883252268e-06, "loss": 0.4797, "step": 5933 }, { "epoch": 0.74, "grad_norm": 1.6663573116080352, "learning_rate": 1.7141133254255426e-06, "loss": 0.4742, "step": 5934 }, { "epoch": 0.74, "grad_norm": 1.3804574862462933, "learning_rate": 1.712598793389792e-06, "loss": 0.4476, "step": 5935 }, { "epoch": 0.74, "grad_norm": 0.7024565514569163, "learning_rate": 1.7110847924626578e-06, "loss": 0.5029, "step": 5936 }, { "epoch": 0.74, "grad_norm": 1.4488627553590303, "learning_rate": 1.7095713228887411e-06, "loss": 0.4519, "step": 5937 }, { "epoch": 0.74, "grad_norm": 1.5775295336664255, "learning_rate": 1.7080583849125588e-06, "loss": 0.511, "step": 5938 }, { "epoch": 0.74, "grad_norm": 1.6119925589353812, "learning_rate": 1.7065459787785355e-06, "loss": 0.4613, "step": 5939 }, { "epoch": 0.74, "grad_norm": 1.6115711077895967, "learning_rate": 1.705034104731017e-06, "loss": 0.5089, "step": 5940 }, { "epoch": 0.74, "grad_norm": 1.7297349527982928, "learning_rate": 1.703522763014257e-06, "loss": 0.5235, "step": 5941 }, { "epoch": 0.74, "grad_norm": 1.353036605970803, "learning_rate": 1.702011953872429e-06, "loss": 0.462, "step": 5942 }, { "epoch": 0.74, "grad_norm": 2.9813637490464453, "learning_rate": 1.7005016775496135e-06, "loss": 0.5045, "step": 5943 }, { "epoch": 0.74, "grad_norm": 1.5793417303571762, "learning_rate": 1.6989919342898127e-06, "loss": 0.4784, "step": 5944 }, { "epoch": 0.74, "grad_norm": 1.50614086925251, "learning_rate": 1.697482724336938e-06, "loss": 0.4773, "step": 5945 }, { "epoch": 0.74, "grad_norm": 1.7080841760436045, "learning_rate": 1.695974047934814e-06, "loss": 0.5019, "step": 5946 }, { "epoch": 0.74, "grad_norm": 3.0029589857718344, "learning_rate": 1.694465905327184e-06, "loss": 0.5011, "step": 5947 }, { "epoch": 0.74, "grad_norm": 4.684124017120539, "learning_rate": 1.6929582967576975e-06, "loss": 0.5296, "step": 5948 }, { "epoch": 0.74, "grad_norm": 2.2969706559048375, "learning_rate": 1.6914512224699253e-06, "loss": 0.514, "step": 5949 }, { "epoch": 0.74, "grad_norm": 1.5848640384886945, "learning_rate": 1.6899446827073458e-06, "loss": 0.5012, "step": 5950 }, { "epoch": 0.74, "grad_norm": 1.9123582479570977, "learning_rate": 1.6884386777133566e-06, "loss": 0.5222, "step": 5951 }, { "epoch": 0.74, "grad_norm": 0.6449177798951945, "learning_rate": 1.6869332077312634e-06, "loss": 0.4658, "step": 5952 }, { "epoch": 0.74, "grad_norm": 1.4782830397628994, "learning_rate": 1.6854282730042893e-06, "loss": 0.4986, "step": 5953 }, { "epoch": 0.74, "grad_norm": 1.667017827478795, "learning_rate": 1.6839238737755715e-06, "loss": 0.4877, "step": 5954 }, { "epoch": 0.74, "grad_norm": 1.203265992788667, "learning_rate": 1.682420010288155e-06, "loss": 0.4829, "step": 5955 }, { "epoch": 0.74, "grad_norm": 1.5407269389663272, "learning_rate": 1.680916682785007e-06, "loss": 0.5296, "step": 5956 }, { "epoch": 0.74, "grad_norm": 8.962889161814955, "learning_rate": 1.6794138915089987e-06, "loss": 0.4829, "step": 5957 }, { "epoch": 0.74, "grad_norm": 1.6374891209770737, "learning_rate": 1.677911636702923e-06, "loss": 0.5128, "step": 5958 }, { "epoch": 0.74, "grad_norm": 1.3436117385055986, "learning_rate": 1.676409918609479e-06, "loss": 0.4986, "step": 5959 }, { "epoch": 0.74, "grad_norm": 1.5132495078968142, "learning_rate": 1.6749087374712858e-06, "loss": 0.5377, "step": 5960 }, { "epoch": 0.74, "grad_norm": 2.465158233052855, "learning_rate": 1.6734080935308694e-06, "loss": 0.512, "step": 5961 }, { "epoch": 0.74, "grad_norm": 1.704589541225872, "learning_rate": 1.6719079870306737e-06, "loss": 0.4977, "step": 5962 }, { "epoch": 0.74, "grad_norm": 1.3285426383237866, "learning_rate": 1.6704084182130552e-06, "loss": 0.4882, "step": 5963 }, { "epoch": 0.74, "grad_norm": 1.767399204910095, "learning_rate": 1.66890938732028e-06, "loss": 0.4877, "step": 5964 }, { "epoch": 0.74, "grad_norm": 5.677426192641951, "learning_rate": 1.6674108945945323e-06, "loss": 0.4759, "step": 5965 }, { "epoch": 0.74, "grad_norm": 1.3255144212986685, "learning_rate": 1.6659129402779034e-06, "loss": 0.4964, "step": 5966 }, { "epoch": 0.74, "grad_norm": 1.4742160373416446, "learning_rate": 1.664415524612405e-06, "loss": 0.4928, "step": 5967 }, { "epoch": 0.74, "grad_norm": 2.1391153981085873, "learning_rate": 1.6629186478399538e-06, "loss": 0.508, "step": 5968 }, { "epoch": 0.74, "grad_norm": 1.3940825832138908, "learning_rate": 1.6614223102023857e-06, "loss": 0.4659, "step": 5969 }, { "epoch": 0.74, "grad_norm": 1.777478443620191, "learning_rate": 1.6599265119414487e-06, "loss": 0.4779, "step": 5970 }, { "epoch": 0.74, "grad_norm": 1.4064111179425418, "learning_rate": 1.658431253298799e-06, "loss": 0.4892, "step": 5971 }, { "epoch": 0.74, "grad_norm": 1.3686681256777833, "learning_rate": 1.6569365345160116e-06, "loss": 0.5192, "step": 5972 }, { "epoch": 0.74, "grad_norm": 1.6184845033964539, "learning_rate": 1.6554423558345683e-06, "loss": 0.4287, "step": 5973 }, { "epoch": 0.74, "grad_norm": 1.9290552096867895, "learning_rate": 1.6539487174958706e-06, "loss": 0.4356, "step": 5974 }, { "epoch": 0.74, "grad_norm": 1.3287246157659554, "learning_rate": 1.652455619741225e-06, "loss": 0.4179, "step": 5975 }, { "epoch": 0.74, "grad_norm": 1.3576869075341544, "learning_rate": 1.6509630628118584e-06, "loss": 0.4915, "step": 5976 }, { "epoch": 0.74, "grad_norm": 1.7120315215609672, "learning_rate": 1.6494710469489033e-06, "loss": 0.5053, "step": 5977 }, { "epoch": 0.74, "grad_norm": 1.845271579003526, "learning_rate": 1.6479795723934088e-06, "loss": 0.5596, "step": 5978 }, { "epoch": 0.74, "grad_norm": 1.5981560815448819, "learning_rate": 1.646488639386339e-06, "loss": 0.49, "step": 5979 }, { "epoch": 0.74, "grad_norm": 1.3663767878155546, "learning_rate": 1.6449982481685616e-06, "loss": 0.4987, "step": 5980 }, { "epoch": 0.74, "grad_norm": 1.3410677975477996, "learning_rate": 1.6435083989808682e-06, "loss": 0.5274, "step": 5981 }, { "epoch": 0.74, "grad_norm": 1.4945763766755613, "learning_rate": 1.6420190920639522e-06, "loss": 0.544, "step": 5982 }, { "epoch": 0.74, "grad_norm": 2.3177394972577257, "learning_rate": 1.6405303276584277e-06, "loss": 0.482, "step": 5983 }, { "epoch": 0.74, "grad_norm": 2.7405380423742085, "learning_rate": 1.6390421060048151e-06, "loss": 0.4612, "step": 5984 }, { "epoch": 0.74, "grad_norm": 1.3509356339041592, "learning_rate": 1.6375544273435506e-06, "loss": 0.5186, "step": 5985 }, { "epoch": 0.74, "grad_norm": 1.3877589841195326, "learning_rate": 1.636067291914983e-06, "loss": 0.4668, "step": 5986 }, { "epoch": 0.74, "grad_norm": 1.9029029071359749, "learning_rate": 1.6345806999593717e-06, "loss": 0.5034, "step": 5987 }, { "epoch": 0.74, "grad_norm": 1.6081825938648133, "learning_rate": 1.633094651716889e-06, "loss": 0.4844, "step": 5988 }, { "epoch": 0.74, "grad_norm": 1.4427022899923019, "learning_rate": 1.6316091474276163e-06, "loss": 0.4864, "step": 5989 }, { "epoch": 0.74, "grad_norm": 2.4622955643128948, "learning_rate": 1.6301241873315544e-06, "loss": 0.5059, "step": 5990 }, { "epoch": 0.74, "grad_norm": 2.2968792017603343, "learning_rate": 1.628639771668607e-06, "loss": 0.5585, "step": 5991 }, { "epoch": 0.74, "grad_norm": 1.873706603490066, "learning_rate": 1.627155900678598e-06, "loss": 0.4722, "step": 5992 }, { "epoch": 0.74, "grad_norm": 7.139979943714902, "learning_rate": 1.625672574601258e-06, "loss": 0.4448, "step": 5993 }, { "epoch": 0.74, "grad_norm": 1.6664026791604412, "learning_rate": 1.6241897936762313e-06, "loss": 0.5281, "step": 5994 }, { "epoch": 0.74, "grad_norm": 1.3131089978810235, "learning_rate": 1.6227075581430751e-06, "loss": 0.5188, "step": 5995 }, { "epoch": 0.74, "grad_norm": 1.287306397039178, "learning_rate": 1.621225868241259e-06, "loss": 0.4818, "step": 5996 }, { "epoch": 0.74, "grad_norm": 1.4776254966103886, "learning_rate": 1.6197447242101615e-06, "loss": 0.4644, "step": 5997 }, { "epoch": 0.74, "grad_norm": 0.6944930673044044, "learning_rate": 1.618264126289073e-06, "loss": 0.5063, "step": 5998 }, { "epoch": 0.74, "grad_norm": 1.554341842980303, "learning_rate": 1.6167840747171997e-06, "loss": 0.4816, "step": 5999 }, { "epoch": 0.74, "grad_norm": 1.4297897583485726, "learning_rate": 1.6153045697336544e-06, "loss": 0.4987, "step": 6000 }, { "epoch": 0.74, "grad_norm": 2.810195614493212, "learning_rate": 1.6138256115774676e-06, "loss": 0.5221, "step": 6001 }, { "epoch": 0.74, "grad_norm": 2.166741697516714, "learning_rate": 1.6123472004875724e-06, "loss": 0.4781, "step": 6002 }, { "epoch": 0.74, "grad_norm": 1.3194032375621023, "learning_rate": 1.610869336702826e-06, "loss": 0.459, "step": 6003 }, { "epoch": 0.75, "grad_norm": 1.3722221268424892, "learning_rate": 1.6093920204619856e-06, "loss": 0.4974, "step": 6004 }, { "epoch": 0.75, "grad_norm": 8.340858799507641, "learning_rate": 1.6079152520037277e-06, "loss": 0.5013, "step": 6005 }, { "epoch": 0.75, "grad_norm": 2.1658509794232037, "learning_rate": 1.6064390315666356e-06, "loss": 0.4842, "step": 6006 }, { "epoch": 0.75, "grad_norm": 4.843094029528777, "learning_rate": 1.6049633593892045e-06, "loss": 0.4917, "step": 6007 }, { "epoch": 0.75, "grad_norm": 1.6668625189375703, "learning_rate": 1.6034882357098447e-06, "loss": 0.5162, "step": 6008 }, { "epoch": 0.75, "grad_norm": 0.6536351282188947, "learning_rate": 1.6020136607668724e-06, "loss": 0.4686, "step": 6009 }, { "epoch": 0.75, "grad_norm": 1.7594181925466228, "learning_rate": 1.6005396347985204e-06, "loss": 0.5251, "step": 6010 }, { "epoch": 0.75, "grad_norm": 1.3645245329180153, "learning_rate": 1.59906615804293e-06, "loss": 0.4973, "step": 6011 }, { "epoch": 0.75, "grad_norm": 13.455150978415842, "learning_rate": 1.5975932307381564e-06, "loss": 0.5131, "step": 6012 }, { "epoch": 0.75, "grad_norm": 1.7661889663932724, "learning_rate": 1.59612085312216e-06, "loss": 0.5674, "step": 6013 }, { "epoch": 0.75, "grad_norm": 1.5823722119837638, "learning_rate": 1.5946490254328207e-06, "loss": 0.517, "step": 6014 }, { "epoch": 0.75, "grad_norm": 1.2825435915322208, "learning_rate": 1.5931777479079224e-06, "loss": 0.4547, "step": 6015 }, { "epoch": 0.75, "grad_norm": 1.4780593108877558, "learning_rate": 1.591707020785162e-06, "loss": 0.4743, "step": 6016 }, { "epoch": 0.75, "grad_norm": 1.5317792626925832, "learning_rate": 1.5902368443021515e-06, "loss": 0.4931, "step": 6017 }, { "epoch": 0.75, "grad_norm": 1.5205937863915808, "learning_rate": 1.5887672186964066e-06, "loss": 0.5418, "step": 6018 }, { "epoch": 0.75, "grad_norm": 1.5859984688186561, "learning_rate": 1.5872981442053643e-06, "loss": 0.4967, "step": 6019 }, { "epoch": 0.75, "grad_norm": 2.2044996149466667, "learning_rate": 1.585829621066361e-06, "loss": 0.5056, "step": 6020 }, { "epoch": 0.75, "grad_norm": 2.523560594149025, "learning_rate": 1.5843616495166536e-06, "loss": 0.4778, "step": 6021 }, { "epoch": 0.75, "grad_norm": 1.3660556794601857, "learning_rate": 1.5828942297934018e-06, "loss": 0.4743, "step": 6022 }, { "epoch": 0.75, "grad_norm": 1.5846245967103545, "learning_rate": 1.581427362133685e-06, "loss": 0.4882, "step": 6023 }, { "epoch": 0.75, "grad_norm": 1.292360003145945, "learning_rate": 1.5799610467744836e-06, "loss": 0.4528, "step": 6024 }, { "epoch": 0.75, "grad_norm": 2.007002426929482, "learning_rate": 1.578495283952698e-06, "loss": 0.4652, "step": 6025 }, { "epoch": 0.75, "grad_norm": 1.7468838114684746, "learning_rate": 1.577030073905133e-06, "loss": 0.4918, "step": 6026 }, { "epoch": 0.75, "grad_norm": 0.7271485836999365, "learning_rate": 1.5755654168685037e-06, "loss": 0.4931, "step": 6027 }, { "epoch": 0.75, "grad_norm": 1.3111986543695473, "learning_rate": 1.5741013130794435e-06, "loss": 0.4633, "step": 6028 }, { "epoch": 0.75, "grad_norm": 0.6461414512441257, "learning_rate": 1.5726377627744877e-06, "loss": 0.4677, "step": 6029 }, { "epoch": 0.75, "grad_norm": 2.3608209292052993, "learning_rate": 1.5711747661900884e-06, "loss": 0.5287, "step": 6030 }, { "epoch": 0.75, "grad_norm": 1.47618381807499, "learning_rate": 1.5697123235626021e-06, "loss": 0.51, "step": 6031 }, { "epoch": 0.75, "grad_norm": 1.524575288440333, "learning_rate": 1.5682504351283034e-06, "loss": 0.5236, "step": 6032 }, { "epoch": 0.75, "grad_norm": 1.9620674437293155, "learning_rate": 1.5667891011233688e-06, "loss": 0.5136, "step": 6033 }, { "epoch": 0.75, "grad_norm": 1.429051606799475, "learning_rate": 1.5653283217838938e-06, "loss": 0.48, "step": 6034 }, { "epoch": 0.75, "grad_norm": 1.9913598932383123, "learning_rate": 1.563868097345877e-06, "loss": 0.4949, "step": 6035 }, { "epoch": 0.75, "grad_norm": 1.724628914180807, "learning_rate": 1.5624084280452313e-06, "loss": 0.5289, "step": 6036 }, { "epoch": 0.75, "grad_norm": 1.3544155289688808, "learning_rate": 1.5609493141177817e-06, "loss": 0.4698, "step": 6037 }, { "epoch": 0.75, "grad_norm": 1.4219588068053217, "learning_rate": 1.5594907557992568e-06, "loss": 0.5146, "step": 6038 }, { "epoch": 0.75, "grad_norm": 1.409410630606562, "learning_rate": 1.5580327533253037e-06, "loss": 0.487, "step": 6039 }, { "epoch": 0.75, "grad_norm": 1.5799718286887938, "learning_rate": 1.5565753069314716e-06, "loss": 0.4632, "step": 6040 }, { "epoch": 0.75, "grad_norm": 1.6423879889232786, "learning_rate": 1.555118416853227e-06, "loss": 0.5132, "step": 6041 }, { "epoch": 0.75, "grad_norm": 1.6570357501497963, "learning_rate": 1.5536620833259409e-06, "loss": 0.5067, "step": 6042 }, { "epoch": 0.75, "grad_norm": 1.6347678528254872, "learning_rate": 1.5522063065848974e-06, "loss": 0.4755, "step": 6043 }, { "epoch": 0.75, "grad_norm": 1.9050361222724341, "learning_rate": 1.5507510868652919e-06, "loss": 0.5277, "step": 6044 }, { "epoch": 0.75, "grad_norm": 1.4023645235573734, "learning_rate": 1.5492964244022258e-06, "loss": 0.5248, "step": 6045 }, { "epoch": 0.75, "grad_norm": 1.5803261516406482, "learning_rate": 1.5478423194307147e-06, "loss": 0.5049, "step": 6046 }, { "epoch": 0.75, "grad_norm": 1.440458581359572, "learning_rate": 1.546388772185679e-06, "loss": 0.4499, "step": 6047 }, { "epoch": 0.75, "grad_norm": 1.5016619536805895, "learning_rate": 1.5449357829019556e-06, "loss": 0.4498, "step": 6048 }, { "epoch": 0.75, "grad_norm": 1.6320705758129939, "learning_rate": 1.5434833518142839e-06, "loss": 0.4782, "step": 6049 }, { "epoch": 0.75, "grad_norm": 5.790727339102416, "learning_rate": 1.542031479157321e-06, "loss": 0.4854, "step": 6050 }, { "epoch": 0.75, "grad_norm": 1.7130602317036618, "learning_rate": 1.5405801651656266e-06, "loss": 0.5126, "step": 6051 }, { "epoch": 0.75, "grad_norm": 1.4593958980420332, "learning_rate": 1.5391294100736736e-06, "loss": 0.4715, "step": 6052 }, { "epoch": 0.75, "grad_norm": 2.6284291543273373, "learning_rate": 1.5376792141158469e-06, "loss": 0.4736, "step": 6053 }, { "epoch": 0.75, "grad_norm": 1.8665726653903791, "learning_rate": 1.5362295775264353e-06, "loss": 0.5013, "step": 6054 }, { "epoch": 0.75, "grad_norm": 1.9314327501677002, "learning_rate": 1.5347805005396427e-06, "loss": 0.5416, "step": 6055 }, { "epoch": 0.75, "grad_norm": 1.5284492197838069, "learning_rate": 1.5333319833895776e-06, "loss": 0.4701, "step": 6056 }, { "epoch": 0.75, "grad_norm": 1.4593419373204979, "learning_rate": 1.5318840263102641e-06, "loss": 0.5075, "step": 6057 }, { "epoch": 0.75, "grad_norm": 1.7455863857710499, "learning_rate": 1.5304366295356287e-06, "loss": 0.5364, "step": 6058 }, { "epoch": 0.75, "grad_norm": 1.464678343219715, "learning_rate": 1.5289897932995134e-06, "loss": 0.4746, "step": 6059 }, { "epoch": 0.75, "grad_norm": 1.8595107075944286, "learning_rate": 1.5275435178356685e-06, "loss": 0.5084, "step": 6060 }, { "epoch": 0.75, "grad_norm": 1.9740787321561957, "learning_rate": 1.526097803377749e-06, "loss": 0.46, "step": 6061 }, { "epoch": 0.75, "grad_norm": 0.6755881265433279, "learning_rate": 1.5246526501593262e-06, "loss": 0.4791, "step": 6062 }, { "epoch": 0.75, "grad_norm": 1.5588452764686183, "learning_rate": 1.5232080584138748e-06, "loss": 0.5, "step": 6063 }, { "epoch": 0.75, "grad_norm": 1.4090921609069993, "learning_rate": 1.5217640283747836e-06, "loss": 0.4794, "step": 6064 }, { "epoch": 0.75, "grad_norm": 1.4383211702238097, "learning_rate": 1.5203205602753462e-06, "loss": 0.4716, "step": 6065 }, { "epoch": 0.75, "grad_norm": 1.6150559905537978, "learning_rate": 1.5188776543487699e-06, "loss": 0.5223, "step": 6066 }, { "epoch": 0.75, "grad_norm": 0.6791885235010544, "learning_rate": 1.5174353108281653e-06, "loss": 0.4273, "step": 6067 }, { "epoch": 0.75, "grad_norm": 0.6686708077913581, "learning_rate": 1.515993529946559e-06, "loss": 0.5173, "step": 6068 }, { "epoch": 0.75, "grad_norm": 1.4998571863692616, "learning_rate": 1.5145523119368832e-06, "loss": 0.4399, "step": 6069 }, { "epoch": 0.75, "grad_norm": 1.3938713925748085, "learning_rate": 1.513111657031977e-06, "loss": 0.477, "step": 6070 }, { "epoch": 0.75, "grad_norm": 1.639811660627296, "learning_rate": 1.5116715654645941e-06, "loss": 0.5163, "step": 6071 }, { "epoch": 0.75, "grad_norm": 1.8654325248173858, "learning_rate": 1.5102320374673907e-06, "loss": 0.4395, "step": 6072 }, { "epoch": 0.75, "grad_norm": 1.502720874368011, "learning_rate": 1.5087930732729378e-06, "loss": 0.5218, "step": 6073 }, { "epoch": 0.75, "grad_norm": 1.716755769956245, "learning_rate": 1.5073546731137105e-06, "loss": 0.5174, "step": 6074 }, { "epoch": 0.75, "grad_norm": 1.2313240862812058, "learning_rate": 1.5059168372220984e-06, "loss": 0.452, "step": 6075 }, { "epoch": 0.75, "grad_norm": 1.4806778543728099, "learning_rate": 1.5044795658303924e-06, "loss": 0.5089, "step": 6076 }, { "epoch": 0.75, "grad_norm": 1.9318546882145895, "learning_rate": 1.503042859170798e-06, "loss": 0.5295, "step": 6077 }, { "epoch": 0.75, "grad_norm": 1.3992136102391561, "learning_rate": 1.5016067174754301e-06, "loss": 0.5428, "step": 6078 }, { "epoch": 0.75, "grad_norm": 1.5946679371637276, "learning_rate": 1.5001711409763065e-06, "loss": 0.5025, "step": 6079 }, { "epoch": 0.75, "grad_norm": 1.3330513773776378, "learning_rate": 1.4987361299053599e-06, "loss": 0.4952, "step": 6080 }, { "epoch": 0.75, "grad_norm": 3.763017399981389, "learning_rate": 1.4973016844944265e-06, "loss": 0.4714, "step": 6081 }, { "epoch": 0.75, "grad_norm": 1.412498599014573, "learning_rate": 1.4958678049752568e-06, "loss": 0.4751, "step": 6082 }, { "epoch": 0.75, "grad_norm": 2.077612354528001, "learning_rate": 1.4944344915795034e-06, "loss": 0.5186, "step": 6083 }, { "epoch": 0.76, "grad_norm": 2.636665581614744, "learning_rate": 1.4930017445387317e-06, "loss": 0.5439, "step": 6084 }, { "epoch": 0.76, "grad_norm": 1.3895732747424572, "learning_rate": 1.4915695640844174e-06, "loss": 0.4998, "step": 6085 }, { "epoch": 0.76, "grad_norm": 1.8684005695710397, "learning_rate": 1.4901379504479374e-06, "loss": 0.5444, "step": 6086 }, { "epoch": 0.76, "grad_norm": 1.5776228687137166, "learning_rate": 1.488706903860585e-06, "loss": 0.4587, "step": 6087 }, { "epoch": 0.76, "grad_norm": 1.5765276916924604, "learning_rate": 1.487276424553556e-06, "loss": 0.4779, "step": 6088 }, { "epoch": 0.76, "grad_norm": 1.5607512892815054, "learning_rate": 1.4858465127579585e-06, "loss": 0.5154, "step": 6089 }, { "epoch": 0.76, "grad_norm": 1.5630673033403901, "learning_rate": 1.484417168704806e-06, "loss": 0.459, "step": 6090 }, { "epoch": 0.76, "grad_norm": 1.608404412381052, "learning_rate": 1.4829883926250238e-06, "loss": 0.5575, "step": 6091 }, { "epoch": 0.76, "grad_norm": 4.239238780180317, "learning_rate": 1.4815601847494388e-06, "loss": 0.5174, "step": 6092 }, { "epoch": 0.76, "grad_norm": 1.386493163183553, "learning_rate": 1.4801325453087967e-06, "loss": 0.4844, "step": 6093 }, { "epoch": 0.76, "grad_norm": 1.2527730800500003, "learning_rate": 1.4787054745337403e-06, "loss": 0.5151, "step": 6094 }, { "epoch": 0.76, "grad_norm": 1.4224906006832736, "learning_rate": 1.4772789726548293e-06, "loss": 0.47, "step": 6095 }, { "epoch": 0.76, "grad_norm": 1.4452147059422642, "learning_rate": 1.475853039902525e-06, "loss": 0.4718, "step": 6096 }, { "epoch": 0.76, "grad_norm": 1.3464263529106104, "learning_rate": 1.4744276765071991e-06, "loss": 0.4993, "step": 6097 }, { "epoch": 0.76, "grad_norm": 1.4470631571011383, "learning_rate": 1.4730028826991338e-06, "loss": 0.5509, "step": 6098 }, { "epoch": 0.76, "grad_norm": 2.2049191188781294, "learning_rate": 1.471578658708514e-06, "loss": 0.4637, "step": 6099 }, { "epoch": 0.76, "grad_norm": 2.5432552984631496, "learning_rate": 1.4701550047654378e-06, "loss": 0.5719, "step": 6100 }, { "epoch": 0.76, "grad_norm": 1.7224810211178514, "learning_rate": 1.4687319210999085e-06, "loss": 0.5144, "step": 6101 }, { "epoch": 0.76, "grad_norm": 1.3750536849035844, "learning_rate": 1.46730940794184e-06, "loss": 0.4745, "step": 6102 }, { "epoch": 0.76, "grad_norm": 3.2787319080534196, "learning_rate": 1.4658874655210476e-06, "loss": 0.5074, "step": 6103 }, { "epoch": 0.76, "grad_norm": 1.8074307407464616, "learning_rate": 1.4644660940672628e-06, "loss": 0.5201, "step": 6104 }, { "epoch": 0.76, "grad_norm": 1.3816429645259922, "learning_rate": 1.4630452938101187e-06, "loss": 0.4676, "step": 6105 }, { "epoch": 0.76, "grad_norm": 1.4917974592944019, "learning_rate": 1.4616250649791563e-06, "loss": 0.4782, "step": 6106 }, { "epoch": 0.76, "grad_norm": 1.7272759649744858, "learning_rate": 1.4602054078038296e-06, "loss": 0.5241, "step": 6107 }, { "epoch": 0.76, "grad_norm": 0.7922423149902295, "learning_rate": 1.4587863225134934e-06, "loss": 0.5275, "step": 6108 }, { "epoch": 0.76, "grad_norm": 3.0917670324044777, "learning_rate": 1.457367809337415e-06, "loss": 0.491, "step": 6109 }, { "epoch": 0.76, "grad_norm": 1.545252583770274, "learning_rate": 1.4559498685047679e-06, "loss": 0.5203, "step": 6110 }, { "epoch": 0.76, "grad_norm": 1.549050685649776, "learning_rate": 1.4545325002446337e-06, "loss": 0.5277, "step": 6111 }, { "epoch": 0.76, "grad_norm": 1.4392223393324777, "learning_rate": 1.453115704785999e-06, "loss": 0.485, "step": 6112 }, { "epoch": 0.76, "grad_norm": 1.4131394903061087, "learning_rate": 1.4516994823577613e-06, "loss": 0.479, "step": 6113 }, { "epoch": 0.76, "grad_norm": 1.386963970835647, "learning_rate": 1.4502838331887232e-06, "loss": 0.5144, "step": 6114 }, { "epoch": 0.76, "grad_norm": 1.5075390793671843, "learning_rate": 1.4488687575075932e-06, "loss": 0.4816, "step": 6115 }, { "epoch": 0.76, "grad_norm": 2.2656131827967814, "learning_rate": 1.4474542555429927e-06, "loss": 0.4684, "step": 6116 }, { "epoch": 0.76, "grad_norm": 1.6712536801227746, "learning_rate": 1.4460403275234425e-06, "loss": 0.4901, "step": 6117 }, { "epoch": 0.76, "grad_norm": 1.6216114085232909, "learning_rate": 1.4446269736773805e-06, "loss": 0.4703, "step": 6118 }, { "epoch": 0.76, "grad_norm": 1.4842233786242254, "learning_rate": 1.4432141942331423e-06, "loss": 0.5212, "step": 6119 }, { "epoch": 0.76, "grad_norm": 1.319943915507911, "learning_rate": 1.4418019894189783e-06, "loss": 0.4533, "step": 6120 }, { "epoch": 0.76, "grad_norm": 1.5602930700038722, "learning_rate": 1.4403903594630387e-06, "loss": 0.495, "step": 6121 }, { "epoch": 0.76, "grad_norm": 1.3866372003821805, "learning_rate": 1.4389793045933887e-06, "loss": 0.542, "step": 6122 }, { "epoch": 0.76, "grad_norm": 1.6214768264161543, "learning_rate": 1.4375688250379927e-06, "loss": 0.4438, "step": 6123 }, { "epoch": 0.76, "grad_norm": 1.5641216385268033, "learning_rate": 1.43615892102473e-06, "loss": 0.5375, "step": 6124 }, { "epoch": 0.76, "grad_norm": 1.5477655317021919, "learning_rate": 1.4347495927813797e-06, "loss": 0.5442, "step": 6125 }, { "epoch": 0.76, "grad_norm": 1.7259272691075274, "learning_rate": 1.4333408405356325e-06, "loss": 0.5016, "step": 6126 }, { "epoch": 0.76, "grad_norm": 1.5600227007704104, "learning_rate": 1.4319326645150861e-06, "loss": 0.5079, "step": 6127 }, { "epoch": 0.76, "grad_norm": 1.7365409956549633, "learning_rate": 1.4305250649472414e-06, "loss": 0.5232, "step": 6128 }, { "epoch": 0.76, "grad_norm": 1.392625333117977, "learning_rate": 1.4291180420595109e-06, "loss": 0.5264, "step": 6129 }, { "epoch": 0.76, "grad_norm": 1.5697261875302033, "learning_rate": 1.4277115960792082e-06, "loss": 0.487, "step": 6130 }, { "epoch": 0.76, "grad_norm": 1.5277808999870366, "learning_rate": 1.4263057272335612e-06, "loss": 0.4424, "step": 6131 }, { "epoch": 0.76, "grad_norm": 1.4738957764427278, "learning_rate": 1.4249004357496965e-06, "loss": 0.5131, "step": 6132 }, { "epoch": 0.76, "grad_norm": 1.5456403435516437, "learning_rate": 1.4234957218546531e-06, "loss": 0.4809, "step": 6133 }, { "epoch": 0.76, "grad_norm": 1.6002757304383604, "learning_rate": 1.4220915857753765e-06, "loss": 0.4778, "step": 6134 }, { "epoch": 0.76, "grad_norm": 1.9512042216256624, "learning_rate": 1.4206880277387143e-06, "loss": 0.5432, "step": 6135 }, { "epoch": 0.76, "grad_norm": 2.780164308223862, "learning_rate": 1.419285047971426e-06, "loss": 0.501, "step": 6136 }, { "epoch": 0.76, "grad_norm": 1.4504743857264533, "learning_rate": 1.4178826467001733e-06, "loss": 0.524, "step": 6137 }, { "epoch": 0.76, "grad_norm": 2.146544480364636, "learning_rate": 1.4164808241515287e-06, "loss": 0.5136, "step": 6138 }, { "epoch": 0.76, "grad_norm": 1.6272967301645165, "learning_rate": 1.4150795805519663e-06, "loss": 0.4752, "step": 6139 }, { "epoch": 0.76, "grad_norm": 1.5723827414808167, "learning_rate": 1.4136789161278724e-06, "loss": 0.4918, "step": 6140 }, { "epoch": 0.76, "grad_norm": 1.3826988338638708, "learning_rate": 1.4122788311055335e-06, "loss": 0.4696, "step": 6141 }, { "epoch": 0.76, "grad_norm": 1.3597633810300407, "learning_rate": 1.410879325711147e-06, "loss": 0.5011, "step": 6142 }, { "epoch": 0.76, "grad_norm": 1.6124829679414978, "learning_rate": 1.4094804001708174e-06, "loss": 0.5958, "step": 6143 }, { "epoch": 0.76, "grad_norm": 1.542078020146645, "learning_rate": 1.4080820547105501e-06, "loss": 0.531, "step": 6144 }, { "epoch": 0.76, "grad_norm": 1.2953216401574426, "learning_rate": 1.4066842895562631e-06, "loss": 0.4607, "step": 6145 }, { "epoch": 0.76, "grad_norm": 1.7238163480996251, "learning_rate": 1.4052871049337752e-06, "loss": 0.4902, "step": 6146 }, { "epoch": 0.76, "grad_norm": 1.6268366494492497, "learning_rate": 1.4038905010688159e-06, "loss": 0.476, "step": 6147 }, { "epoch": 0.76, "grad_norm": 1.4296291021875145, "learning_rate": 1.4024944781870164e-06, "loss": 0.4979, "step": 6148 }, { "epoch": 0.76, "grad_norm": 1.3735107930351835, "learning_rate": 1.4010990365139194e-06, "loss": 0.474, "step": 6149 }, { "epoch": 0.76, "grad_norm": 1.5927504311548404, "learning_rate": 1.399704176274968e-06, "loss": 0.4923, "step": 6150 }, { "epoch": 0.76, "grad_norm": 1.4353602620740302, "learning_rate": 1.3983098976955157e-06, "loss": 0.4654, "step": 6151 }, { "epoch": 0.76, "grad_norm": 1.3544221085197357, "learning_rate": 1.3969162010008213e-06, "loss": 0.4948, "step": 6152 }, { "epoch": 0.76, "grad_norm": 0.7144003386631187, "learning_rate": 1.3955230864160462e-06, "loss": 0.5, "step": 6153 }, { "epoch": 0.76, "grad_norm": 1.350318088129962, "learning_rate": 1.394130554166264e-06, "loss": 0.4938, "step": 6154 }, { "epoch": 0.76, "grad_norm": 1.629170310706614, "learning_rate": 1.3927386044764468e-06, "loss": 0.5517, "step": 6155 }, { "epoch": 0.76, "grad_norm": 1.7072373672078287, "learning_rate": 1.3913472375714792e-06, "loss": 0.4441, "step": 6156 }, { "epoch": 0.76, "grad_norm": 1.9292365455123768, "learning_rate": 1.389956453676146e-06, "loss": 0.4999, "step": 6157 }, { "epoch": 0.76, "grad_norm": 1.9975739951652836, "learning_rate": 1.3885662530151422e-06, "loss": 0.5228, "step": 6158 }, { "epoch": 0.76, "grad_norm": 1.4946002774292722, "learning_rate": 1.3871766358130683e-06, "loss": 0.5346, "step": 6159 }, { "epoch": 0.76, "grad_norm": 1.3701032802565098, "learning_rate": 1.3857876022944266e-06, "loss": 0.5035, "step": 6160 }, { "epoch": 0.76, "grad_norm": 1.7139531071014826, "learning_rate": 1.3843991526836303e-06, "loss": 0.5413, "step": 6161 }, { "epoch": 0.76, "grad_norm": 2.266923031675677, "learning_rate": 1.3830112872049929e-06, "loss": 0.4469, "step": 6162 }, { "epoch": 0.76, "grad_norm": 1.5573888558700415, "learning_rate": 1.3816240060827385e-06, "loss": 0.4741, "step": 6163 }, { "epoch": 0.76, "grad_norm": 1.7676773716867706, "learning_rate": 1.3802373095409926e-06, "loss": 0.5104, "step": 6164 }, { "epoch": 0.77, "grad_norm": 1.5647130625334107, "learning_rate": 1.378851197803791e-06, "loss": 0.4796, "step": 6165 }, { "epoch": 0.77, "grad_norm": 1.2424931016563316, "learning_rate": 1.377465671095069e-06, "loss": 0.4755, "step": 6166 }, { "epoch": 0.77, "grad_norm": 1.3848258055425164, "learning_rate": 1.3760807296386725e-06, "loss": 0.4646, "step": 6167 }, { "epoch": 0.77, "grad_norm": 1.527042221518362, "learning_rate": 1.374696373658352e-06, "loss": 0.4795, "step": 6168 }, { "epoch": 0.77, "grad_norm": 1.2861442316512977, "learning_rate": 1.3733126033777599e-06, "loss": 0.488, "step": 6169 }, { "epoch": 0.77, "grad_norm": 0.724289675612865, "learning_rate": 1.371929419020459e-06, "loss": 0.5291, "step": 6170 }, { "epoch": 0.77, "grad_norm": 0.7075573869726784, "learning_rate": 1.370546820809912e-06, "loss": 0.4787, "step": 6171 }, { "epoch": 0.77, "grad_norm": 1.4051071751574435, "learning_rate": 1.3691648089694931e-06, "loss": 0.4678, "step": 6172 }, { "epoch": 0.77, "grad_norm": 1.4999698282828469, "learning_rate": 1.3677833837224757e-06, "loss": 0.4626, "step": 6173 }, { "epoch": 0.77, "grad_norm": 1.407283713252255, "learning_rate": 1.3664025452920421e-06, "loss": 0.4922, "step": 6174 }, { "epoch": 0.77, "grad_norm": 1.5992010864639077, "learning_rate": 1.3650222939012809e-06, "loss": 0.4771, "step": 6175 }, { "epoch": 0.77, "grad_norm": 1.6736440131195913, "learning_rate": 1.3636426297731803e-06, "loss": 0.5068, "step": 6176 }, { "epoch": 0.77, "grad_norm": 2.4355258158852493, "learning_rate": 1.3622635531306405e-06, "loss": 0.545, "step": 6177 }, { "epoch": 0.77, "grad_norm": 1.8904563872315112, "learning_rate": 1.3608850641964604e-06, "loss": 0.4989, "step": 6178 }, { "epoch": 0.77, "grad_norm": 1.3131385472915835, "learning_rate": 1.3595071631933504e-06, "loss": 0.4593, "step": 6179 }, { "epoch": 0.77, "grad_norm": 1.6578571419253867, "learning_rate": 1.3581298503439193e-06, "loss": 0.4936, "step": 6180 }, { "epoch": 0.77, "grad_norm": 1.517459438778973, "learning_rate": 1.3567531258706874e-06, "loss": 0.4905, "step": 6181 }, { "epoch": 0.77, "grad_norm": 1.281799319717632, "learning_rate": 1.3553769899960716e-06, "loss": 0.5137, "step": 6182 }, { "epoch": 0.77, "grad_norm": 1.5282590270708574, "learning_rate": 1.3540014429424049e-06, "loss": 0.542, "step": 6183 }, { "epoch": 0.77, "grad_norm": 1.4806309356958096, "learning_rate": 1.3526264849319166e-06, "loss": 0.413, "step": 6184 }, { "epoch": 0.77, "grad_norm": 1.4836012581304867, "learning_rate": 1.351252116186741e-06, "loss": 0.4292, "step": 6185 }, { "epoch": 0.77, "grad_norm": 1.3555241248525332, "learning_rate": 1.3498783369289226e-06, "loss": 0.4885, "step": 6186 }, { "epoch": 0.77, "grad_norm": 1.2483446342138798, "learning_rate": 1.348505147380405e-06, "loss": 0.4379, "step": 6187 }, { "epoch": 0.77, "grad_norm": 1.493588619382274, "learning_rate": 1.347132547763042e-06, "loss": 0.4947, "step": 6188 }, { "epoch": 0.77, "grad_norm": 2.088968788701897, "learning_rate": 1.3457605382985862e-06, "loss": 0.4351, "step": 6189 }, { "epoch": 0.77, "grad_norm": 1.3924279918426934, "learning_rate": 1.3443891192087e-06, "loss": 0.4355, "step": 6190 }, { "epoch": 0.77, "grad_norm": 1.3716290693646196, "learning_rate": 1.3430182907149447e-06, "loss": 0.4726, "step": 6191 }, { "epoch": 0.77, "grad_norm": 1.3336070112616283, "learning_rate": 1.3416480530387955e-06, "loss": 0.4395, "step": 6192 }, { "epoch": 0.77, "grad_norm": 1.6298967280551317, "learning_rate": 1.340278406401621e-06, "loss": 0.5065, "step": 6193 }, { "epoch": 0.77, "grad_norm": 1.4516402621866342, "learning_rate": 1.3389093510247043e-06, "loss": 0.4969, "step": 6194 }, { "epoch": 0.77, "grad_norm": 1.3431009790179682, "learning_rate": 1.3375408871292256e-06, "loss": 0.4955, "step": 6195 }, { "epoch": 0.77, "grad_norm": 1.5731051836007435, "learning_rate": 1.3361730149362706e-06, "loss": 0.4964, "step": 6196 }, { "epoch": 0.77, "grad_norm": 1.331718298876939, "learning_rate": 1.3348057346668353e-06, "loss": 0.5137, "step": 6197 }, { "epoch": 0.77, "grad_norm": 0.6625395922594366, "learning_rate": 1.3334390465418122e-06, "loss": 0.4958, "step": 6198 }, { "epoch": 0.77, "grad_norm": 1.4321764259365748, "learning_rate": 1.3320729507820029e-06, "loss": 0.5206, "step": 6199 }, { "epoch": 0.77, "grad_norm": 1.4436428193547688, "learning_rate": 1.3307074476081127e-06, "loss": 0.4634, "step": 6200 }, { "epoch": 0.77, "grad_norm": 1.4385882583497545, "learning_rate": 1.3293425372407526e-06, "loss": 0.4517, "step": 6201 }, { "epoch": 0.77, "grad_norm": 1.9622624537904163, "learning_rate": 1.3279782199004321e-06, "loss": 0.5084, "step": 6202 }, { "epoch": 0.77, "grad_norm": 1.2241283119219653, "learning_rate": 1.3266144958075717e-06, "loss": 0.4693, "step": 6203 }, { "epoch": 0.77, "grad_norm": 1.5993684273325757, "learning_rate": 1.325251365182492e-06, "loss": 0.4913, "step": 6204 }, { "epoch": 0.77, "grad_norm": 1.4483264471948116, "learning_rate": 1.323888828245417e-06, "loss": 0.5085, "step": 6205 }, { "epoch": 0.77, "grad_norm": 1.916745646680996, "learning_rate": 1.3225268852164797e-06, "loss": 0.5189, "step": 6206 }, { "epoch": 0.77, "grad_norm": 1.4769884347476094, "learning_rate": 1.3211655363157094e-06, "loss": 0.4837, "step": 6207 }, { "epoch": 0.77, "grad_norm": 1.5140704961515885, "learning_rate": 1.31980478176305e-06, "loss": 0.522, "step": 6208 }, { "epoch": 0.77, "grad_norm": 1.6809489514739075, "learning_rate": 1.3184446217783387e-06, "loss": 0.4498, "step": 6209 }, { "epoch": 0.77, "grad_norm": 1.4330904014880177, "learning_rate": 1.3170850565813243e-06, "loss": 0.5133, "step": 6210 }, { "epoch": 0.77, "grad_norm": 1.4340701334442918, "learning_rate": 1.3157260863916544e-06, "loss": 0.5312, "step": 6211 }, { "epoch": 0.77, "grad_norm": 1.4960968171210913, "learning_rate": 1.3143677114288845e-06, "loss": 0.5202, "step": 6212 }, { "epoch": 0.77, "grad_norm": 1.5065093070867135, "learning_rate": 1.3130099319124706e-06, "loss": 0.531, "step": 6213 }, { "epoch": 0.77, "grad_norm": 1.6062720817845197, "learning_rate": 1.3116527480617735e-06, "loss": 0.4971, "step": 6214 }, { "epoch": 0.77, "grad_norm": 1.4525219409854628, "learning_rate": 1.3102961600960584e-06, "loss": 0.5125, "step": 6215 }, { "epoch": 0.77, "grad_norm": 1.333691087311118, "learning_rate": 1.3089401682344955e-06, "loss": 0.5467, "step": 6216 }, { "epoch": 0.77, "grad_norm": 1.738900694292726, "learning_rate": 1.3075847726961571e-06, "loss": 0.52, "step": 6217 }, { "epoch": 0.77, "grad_norm": 3.2815307166243293, "learning_rate": 1.3062299737000173e-06, "loss": 0.5316, "step": 6218 }, { "epoch": 0.77, "grad_norm": 1.543516672086808, "learning_rate": 1.3048757714649585e-06, "loss": 0.5369, "step": 6219 }, { "epoch": 0.77, "grad_norm": 1.6412206549282686, "learning_rate": 1.3035221662097614e-06, "loss": 0.4903, "step": 6220 }, { "epoch": 0.77, "grad_norm": 1.4833664255463928, "learning_rate": 1.302169158153115e-06, "loss": 0.5045, "step": 6221 }, { "epoch": 0.77, "grad_norm": 1.5423653118353517, "learning_rate": 1.300816747513609e-06, "loss": 0.5146, "step": 6222 }, { "epoch": 0.77, "grad_norm": 1.5360441497197643, "learning_rate": 1.2994649345097354e-06, "loss": 0.4826, "step": 6223 }, { "epoch": 0.77, "grad_norm": 1.9787375634564193, "learning_rate": 1.298113719359893e-06, "loss": 0.5337, "step": 6224 }, { "epoch": 0.77, "grad_norm": 1.4540743401928375, "learning_rate": 1.2967631022823824e-06, "loss": 0.4575, "step": 6225 }, { "epoch": 0.77, "grad_norm": 2.262005236895857, "learning_rate": 1.29541308349541e-06, "loss": 0.5949, "step": 6226 }, { "epoch": 0.77, "grad_norm": 1.6664454512775195, "learning_rate": 1.294063663217079e-06, "loss": 0.4938, "step": 6227 }, { "epoch": 0.77, "grad_norm": 1.6280218423673491, "learning_rate": 1.2927148416654033e-06, "loss": 0.5394, "step": 6228 }, { "epoch": 0.77, "grad_norm": 1.430640150277463, "learning_rate": 1.2913666190582947e-06, "loss": 0.492, "step": 6229 }, { "epoch": 0.77, "grad_norm": 1.607236994045575, "learning_rate": 1.2900189956135728e-06, "loss": 0.4961, "step": 6230 }, { "epoch": 0.77, "grad_norm": 0.6616581269513682, "learning_rate": 1.288671971548956e-06, "loss": 0.4807, "step": 6231 }, { "epoch": 0.77, "grad_norm": 2.1011305393950583, "learning_rate": 1.2873255470820677e-06, "loss": 0.5271, "step": 6232 }, { "epoch": 0.77, "grad_norm": 1.3148633012922666, "learning_rate": 1.2859797224304378e-06, "loss": 0.4674, "step": 6233 }, { "epoch": 0.77, "grad_norm": 1.5799807696563415, "learning_rate": 1.2846344978114921e-06, "loss": 0.4335, "step": 6234 }, { "epoch": 0.77, "grad_norm": 1.2282627622228512, "learning_rate": 1.283289873442567e-06, "loss": 0.4612, "step": 6235 }, { "epoch": 0.77, "grad_norm": 1.5334044812396002, "learning_rate": 1.2819458495408948e-06, "loss": 0.4714, "step": 6236 }, { "epoch": 0.77, "grad_norm": 1.3003968312361376, "learning_rate": 1.2806024263236178e-06, "loss": 0.4699, "step": 6237 }, { "epoch": 0.77, "grad_norm": 2.214653471241372, "learning_rate": 1.2792596040077753e-06, "loss": 0.4541, "step": 6238 }, { "epoch": 0.77, "grad_norm": 1.4826821480376464, "learning_rate": 1.277917382810314e-06, "loss": 0.5076, "step": 6239 }, { "epoch": 0.77, "grad_norm": 3.823356149131492, "learning_rate": 1.27657576294808e-06, "loss": 0.505, "step": 6240 }, { "epoch": 0.77, "grad_norm": 1.6382532688362617, "learning_rate": 1.2752347446378238e-06, "loss": 0.5678, "step": 6241 }, { "epoch": 0.77, "grad_norm": 1.4384221892090772, "learning_rate": 1.2738943280962013e-06, "loss": 0.4839, "step": 6242 }, { "epoch": 0.77, "grad_norm": 1.342711899294759, "learning_rate": 1.272554513539765e-06, "loss": 0.481, "step": 6243 }, { "epoch": 0.77, "grad_norm": 2.044459208891881, "learning_rate": 1.271215301184977e-06, "loss": 0.4773, "step": 6244 }, { "epoch": 0.78, "grad_norm": 1.3545482257587682, "learning_rate": 1.2698766912481958e-06, "loss": 0.491, "step": 6245 }, { "epoch": 0.78, "grad_norm": 1.3167855784532667, "learning_rate": 1.2685386839456886e-06, "loss": 0.5057, "step": 6246 }, { "epoch": 0.78, "grad_norm": 1.8376158472318225, "learning_rate": 1.2672012794936195e-06, "loss": 0.4646, "step": 6247 }, { "epoch": 0.78, "grad_norm": 2.4815672785751532, "learning_rate": 1.2658644781080593e-06, "loss": 0.4576, "step": 6248 }, { "epoch": 0.78, "grad_norm": 1.8421058516258766, "learning_rate": 1.2645282800049812e-06, "loss": 0.5177, "step": 6249 }, { "epoch": 0.78, "grad_norm": 1.8411185131379155, "learning_rate": 1.2631926854002574e-06, "loss": 0.5084, "step": 6250 }, { "epoch": 0.78, "grad_norm": 1.3816045998659126, "learning_rate": 1.2618576945096671e-06, "loss": 0.5492, "step": 6251 }, { "epoch": 0.78, "grad_norm": 1.3650484318670382, "learning_rate": 1.2605233075488877e-06, "loss": 0.4511, "step": 6252 }, { "epoch": 0.78, "grad_norm": 1.4297217792305226, "learning_rate": 1.259189524733504e-06, "loss": 0.5025, "step": 6253 }, { "epoch": 0.78, "grad_norm": 1.7866400137312053, "learning_rate": 1.2578563462789967e-06, "loss": 0.5479, "step": 6254 }, { "epoch": 0.78, "grad_norm": 1.288055058437218, "learning_rate": 1.2565237724007563e-06, "loss": 0.5066, "step": 6255 }, { "epoch": 0.78, "grad_norm": 1.5601250969802751, "learning_rate": 1.2551918033140687e-06, "loss": 0.525, "step": 6256 }, { "epoch": 0.78, "grad_norm": 1.2428756455250791, "learning_rate": 1.2538604392341264e-06, "loss": 0.4523, "step": 6257 }, { "epoch": 0.78, "grad_norm": 1.306311357857756, "learning_rate": 1.2525296803760246e-06, "loss": 0.4819, "step": 6258 }, { "epoch": 0.78, "grad_norm": 1.5382898567298926, "learning_rate": 1.2511995269547566e-06, "loss": 0.5187, "step": 6259 }, { "epoch": 0.78, "grad_norm": 1.528938983892805, "learning_rate": 1.2498699791852225e-06, "loss": 0.4815, "step": 6260 }, { "epoch": 0.78, "grad_norm": 1.4201500893708132, "learning_rate": 1.2485410372822205e-06, "loss": 0.4682, "step": 6261 }, { "epoch": 0.78, "grad_norm": 1.4320467050540584, "learning_rate": 1.247212701460455e-06, "loss": 0.5452, "step": 6262 }, { "epoch": 0.78, "grad_norm": 1.5668199288766043, "learning_rate": 1.2458849719345279e-06, "loss": 0.515, "step": 6263 }, { "epoch": 0.78, "grad_norm": 1.504159923138868, "learning_rate": 1.244557848918948e-06, "loss": 0.5079, "step": 6264 }, { "epoch": 0.78, "grad_norm": 1.3188449069712342, "learning_rate": 1.2432313326281215e-06, "loss": 0.4766, "step": 6265 }, { "epoch": 0.78, "grad_norm": 1.44142880046114, "learning_rate": 1.2419054232763595e-06, "loss": 0.4971, "step": 6266 }, { "epoch": 0.78, "grad_norm": 1.5180567972225074, "learning_rate": 1.2405801210778762e-06, "loss": 0.4639, "step": 6267 }, { "epoch": 0.78, "grad_norm": 1.3471618262889407, "learning_rate": 1.2392554262467833e-06, "loss": 0.4822, "step": 6268 }, { "epoch": 0.78, "grad_norm": 1.7091520425341729, "learning_rate": 1.2379313389970992e-06, "loss": 0.5229, "step": 6269 }, { "epoch": 0.78, "grad_norm": 1.570282113403896, "learning_rate": 1.2366078595427389e-06, "loss": 0.5271, "step": 6270 }, { "epoch": 0.78, "grad_norm": 1.3949975771134056, "learning_rate": 1.235284988097526e-06, "loss": 0.4658, "step": 6271 }, { "epoch": 0.78, "grad_norm": 1.605511267248311, "learning_rate": 1.2339627248751778e-06, "loss": 0.5136, "step": 6272 }, { "epoch": 0.78, "grad_norm": 1.8807963700316133, "learning_rate": 1.2326410700893198e-06, "loss": 0.4902, "step": 6273 }, { "epoch": 0.78, "grad_norm": 1.465254290911377, "learning_rate": 1.231320023953479e-06, "loss": 0.4896, "step": 6274 }, { "epoch": 0.78, "grad_norm": 2.324978655566404, "learning_rate": 1.2299995866810777e-06, "loss": 0.4975, "step": 6275 }, { "epoch": 0.78, "grad_norm": 1.428055739224206, "learning_rate": 1.2286797584854482e-06, "loss": 0.5418, "step": 6276 }, { "epoch": 0.78, "grad_norm": 1.4792979849123264, "learning_rate": 1.2273605395798165e-06, "loss": 0.5015, "step": 6277 }, { "epoch": 0.78, "grad_norm": 1.3062914972392994, "learning_rate": 1.226041930177318e-06, "loss": 0.5003, "step": 6278 }, { "epoch": 0.78, "grad_norm": 1.3227903716457101, "learning_rate": 1.2247239304909825e-06, "loss": 0.4546, "step": 6279 }, { "epoch": 0.78, "grad_norm": 1.5132406211568603, "learning_rate": 1.223406540733747e-06, "loss": 0.512, "step": 6280 }, { "epoch": 0.78, "grad_norm": 1.5284383420872536, "learning_rate": 1.2220897611184429e-06, "loss": 0.4794, "step": 6281 }, { "epoch": 0.78, "grad_norm": 1.6839892207048355, "learning_rate": 1.220773591857814e-06, "loss": 0.5089, "step": 6282 }, { "epoch": 0.78, "grad_norm": 1.652141145541304, "learning_rate": 1.2194580331644955e-06, "loss": 0.4869, "step": 6283 }, { "epoch": 0.78, "grad_norm": 2.323465813497087, "learning_rate": 1.2181430852510268e-06, "loss": 0.5281, "step": 6284 }, { "epoch": 0.78, "grad_norm": 1.5468279291555211, "learning_rate": 1.2168287483298514e-06, "loss": 0.4227, "step": 6285 }, { "epoch": 0.78, "grad_norm": 1.5243814666158966, "learning_rate": 1.2155150226133094e-06, "loss": 0.5128, "step": 6286 }, { "epoch": 0.78, "grad_norm": 1.6629313928964435, "learning_rate": 1.2142019083136475e-06, "loss": 0.4846, "step": 6287 }, { "epoch": 0.78, "grad_norm": 1.453438332306153, "learning_rate": 1.2128894056430084e-06, "loss": 0.5031, "step": 6288 }, { "epoch": 0.78, "grad_norm": 2.169209631478575, "learning_rate": 1.2115775148134402e-06, "loss": 0.5078, "step": 6289 }, { "epoch": 0.78, "grad_norm": 1.7260170535617319, "learning_rate": 1.2102662360368893e-06, "loss": 0.5449, "step": 6290 }, { "epoch": 0.78, "grad_norm": 1.8277114237596315, "learning_rate": 1.2089555695252064e-06, "loss": 0.513, "step": 6291 }, { "epoch": 0.78, "grad_norm": 1.5156474325060718, "learning_rate": 1.20764551549014e-06, "loss": 0.5189, "step": 6292 }, { "epoch": 0.78, "grad_norm": 9.402604227104481, "learning_rate": 1.2063360741433393e-06, "loss": 0.5027, "step": 6293 }, { "epoch": 0.78, "grad_norm": 4.506607537413577, "learning_rate": 1.205027245696359e-06, "loss": 0.4435, "step": 6294 }, { "epoch": 0.78, "grad_norm": 1.2969275416545543, "learning_rate": 1.2037190303606489e-06, "loss": 0.4377, "step": 6295 }, { "epoch": 0.78, "grad_norm": 2.7527021815831167, "learning_rate": 1.2024114283475652e-06, "loss": 0.5807, "step": 6296 }, { "epoch": 0.78, "grad_norm": 1.3670555930535397, "learning_rate": 1.2011044398683586e-06, "loss": 0.4939, "step": 6297 }, { "epoch": 0.78, "grad_norm": 1.2207355669928845, "learning_rate": 1.1997980651341901e-06, "loss": 0.4313, "step": 6298 }, { "epoch": 0.78, "grad_norm": 1.6068755034967181, "learning_rate": 1.1984923043561124e-06, "loss": 0.4666, "step": 6299 }, { "epoch": 0.78, "grad_norm": 1.3700754190571853, "learning_rate": 1.197187157745085e-06, "loss": 0.4371, "step": 6300 }, { "epoch": 0.78, "grad_norm": 1.5182100119914927, "learning_rate": 1.1958826255119626e-06, "loss": 0.4806, "step": 6301 }, { "epoch": 0.78, "grad_norm": 1.6868763537558658, "learning_rate": 1.1945787078675075e-06, "loss": 0.4545, "step": 6302 }, { "epoch": 0.78, "grad_norm": 1.2726361777698496, "learning_rate": 1.1932754050223772e-06, "loss": 0.437, "step": 6303 }, { "epoch": 0.78, "grad_norm": 1.7997900027014602, "learning_rate": 1.1919727171871303e-06, "loss": 0.4812, "step": 6304 }, { "epoch": 0.78, "grad_norm": 1.7049836031903862, "learning_rate": 1.1906706445722304e-06, "loss": 0.5468, "step": 6305 }, { "epoch": 0.78, "grad_norm": 1.482623305676932, "learning_rate": 1.1893691873880348e-06, "loss": 0.4679, "step": 6306 }, { "epoch": 0.78, "grad_norm": 1.7546459793761726, "learning_rate": 1.1880683458448111e-06, "loss": 0.5173, "step": 6307 }, { "epoch": 0.78, "grad_norm": 2.0376418787164963, "learning_rate": 1.186768120152717e-06, "loss": 0.4762, "step": 6308 }, { "epoch": 0.78, "grad_norm": 1.5897112053677294, "learning_rate": 1.1854685105218183e-06, "loss": 0.5106, "step": 6309 }, { "epoch": 0.78, "grad_norm": 1.554638495563267, "learning_rate": 1.184169517162076e-06, "loss": 0.4453, "step": 6310 }, { "epoch": 0.78, "grad_norm": 1.4527954328226296, "learning_rate": 1.1828711402833559e-06, "loss": 0.4581, "step": 6311 }, { "epoch": 0.78, "grad_norm": 1.324097258318579, "learning_rate": 1.1815733800954221e-06, "loss": 0.4468, "step": 6312 }, { "epoch": 0.78, "grad_norm": 0.6615006974970603, "learning_rate": 1.1802762368079368e-06, "loss": 0.488, "step": 6313 }, { "epoch": 0.78, "grad_norm": 1.1942935989812935, "learning_rate": 1.1789797106304663e-06, "loss": 0.4155, "step": 6314 }, { "epoch": 0.78, "grad_norm": 3.6700048498627162, "learning_rate": 1.1776838017724762e-06, "loss": 0.4806, "step": 6315 }, { "epoch": 0.78, "grad_norm": 1.426500155071954, "learning_rate": 1.1763885104433331e-06, "loss": 0.5418, "step": 6316 }, { "epoch": 0.78, "grad_norm": 1.5091385876369006, "learning_rate": 1.1750938368523e-06, "loss": 0.5207, "step": 6317 }, { "epoch": 0.78, "grad_norm": 1.3085262428892623, "learning_rate": 1.1737997812085468e-06, "loss": 0.4497, "step": 6318 }, { "epoch": 0.78, "grad_norm": 1.4515944463730204, "learning_rate": 1.1725063437211349e-06, "loss": 0.4652, "step": 6319 }, { "epoch": 0.78, "grad_norm": 1.2357108620539061, "learning_rate": 1.1712135245990342e-06, "loss": 0.465, "step": 6320 }, { "epoch": 0.78, "grad_norm": 1.774785089411395, "learning_rate": 1.1699213240511092e-06, "loss": 0.4741, "step": 6321 }, { "epoch": 0.78, "grad_norm": 3.101966765503848, "learning_rate": 1.1686297422861241e-06, "loss": 0.4625, "step": 6322 }, { "epoch": 0.78, "grad_norm": 13.892565752711171, "learning_rate": 1.1673387795127505e-06, "loss": 0.467, "step": 6323 }, { "epoch": 0.78, "grad_norm": 1.6317216482292651, "learning_rate": 1.1660484359395514e-06, "loss": 0.563, "step": 6324 }, { "epoch": 0.78, "grad_norm": 1.440182272936019, "learning_rate": 1.1647587117749942e-06, "loss": 0.5386, "step": 6325 }, { "epoch": 0.79, "grad_norm": 1.6785539736597066, "learning_rate": 1.1634696072274444e-06, "loss": 0.5317, "step": 6326 }, { "epoch": 0.79, "grad_norm": 1.5445312568704768, "learning_rate": 1.1621811225051698e-06, "loss": 0.5147, "step": 6327 }, { "epoch": 0.79, "grad_norm": 1.7406342594313629, "learning_rate": 1.160893257816334e-06, "loss": 0.4757, "step": 6328 }, { "epoch": 0.79, "grad_norm": 1.3839958819337508, "learning_rate": 1.1596060133690057e-06, "loss": 0.5226, "step": 6329 }, { "epoch": 0.79, "grad_norm": 1.3961518978650618, "learning_rate": 1.1583193893711475e-06, "loss": 0.5169, "step": 6330 }, { "epoch": 0.79, "grad_norm": 1.6338408284297716, "learning_rate": 1.1570333860306265e-06, "loss": 0.5181, "step": 6331 }, { "epoch": 0.79, "grad_norm": 1.9004636131185335, "learning_rate": 1.1557480035552093e-06, "loss": 0.5224, "step": 6332 }, { "epoch": 0.79, "grad_norm": 1.8167801921999878, "learning_rate": 1.1544632421525576e-06, "loss": 0.5135, "step": 6333 }, { "epoch": 0.79, "grad_norm": 1.5028563153474552, "learning_rate": 1.1531791020302391e-06, "loss": 0.4808, "step": 6334 }, { "epoch": 0.79, "grad_norm": 1.4995582428022929, "learning_rate": 1.1518955833957153e-06, "loss": 0.5058, "step": 6335 }, { "epoch": 0.79, "grad_norm": 1.564743351446164, "learning_rate": 1.1506126864563522e-06, "loss": 0.5488, "step": 6336 }, { "epoch": 0.79, "grad_norm": 1.5702469015325229, "learning_rate": 1.149330411419411e-06, "loss": 0.5287, "step": 6337 }, { "epoch": 0.79, "grad_norm": 1.4508956069268264, "learning_rate": 1.1480487584920553e-06, "loss": 0.4943, "step": 6338 }, { "epoch": 0.79, "grad_norm": 1.4344574161100998, "learning_rate": 1.146767727881349e-06, "loss": 0.4971, "step": 6339 }, { "epoch": 0.79, "grad_norm": 1.7641317544981567, "learning_rate": 1.1454873197942507e-06, "loss": 0.5236, "step": 6340 }, { "epoch": 0.79, "grad_norm": 1.693249372026301, "learning_rate": 1.1442075344376253e-06, "loss": 0.535, "step": 6341 }, { "epoch": 0.79, "grad_norm": 1.5097494619684362, "learning_rate": 1.14292837201823e-06, "loss": 0.5031, "step": 6342 }, { "epoch": 0.79, "grad_norm": 1.6040740006328453, "learning_rate": 1.1416498327427278e-06, "loss": 0.5009, "step": 6343 }, { "epoch": 0.79, "grad_norm": 1.5067558032158819, "learning_rate": 1.1403719168176747e-06, "loss": 0.4709, "step": 6344 }, { "epoch": 0.79, "grad_norm": 2.2037937435097756, "learning_rate": 1.1390946244495327e-06, "loss": 0.5293, "step": 6345 }, { "epoch": 0.79, "grad_norm": 1.4940316941859748, "learning_rate": 1.1378179558446561e-06, "loss": 0.481, "step": 6346 }, { "epoch": 0.79, "grad_norm": 1.4547452020022014, "learning_rate": 1.136541911209304e-06, "loss": 0.5031, "step": 6347 }, { "epoch": 0.79, "grad_norm": 1.5577078497772556, "learning_rate": 1.135266490749634e-06, "loss": 0.5071, "step": 6348 }, { "epoch": 0.79, "grad_norm": 1.4071597547018102, "learning_rate": 1.1339916946716984e-06, "loss": 0.5052, "step": 6349 }, { "epoch": 0.79, "grad_norm": 1.237185372105183, "learning_rate": 1.1327175231814547e-06, "loss": 0.474, "step": 6350 }, { "epoch": 0.79, "grad_norm": 2.5312058336458136, "learning_rate": 1.131443976484754e-06, "loss": 0.4986, "step": 6351 }, { "epoch": 0.79, "grad_norm": 1.278808877803244, "learning_rate": 1.1301710547873512e-06, "loss": 0.4917, "step": 6352 }, { "epoch": 0.79, "grad_norm": 1.471052130513398, "learning_rate": 1.1288987582948956e-06, "loss": 0.4279, "step": 6353 }, { "epoch": 0.79, "grad_norm": 1.9214340906088334, "learning_rate": 1.1276270872129408e-06, "loss": 0.4989, "step": 6354 }, { "epoch": 0.79, "grad_norm": 1.6966020038555492, "learning_rate": 1.1263560417469332e-06, "loss": 0.4959, "step": 6355 }, { "epoch": 0.79, "grad_norm": 1.9936299392743482, "learning_rate": 1.1250856221022233e-06, "loss": 0.4889, "step": 6356 }, { "epoch": 0.79, "grad_norm": 1.2150697051618198, "learning_rate": 1.1238158284840594e-06, "loss": 0.4787, "step": 6357 }, { "epoch": 0.79, "grad_norm": 1.376086418479404, "learning_rate": 1.1225466610975854e-06, "loss": 0.4775, "step": 6358 }, { "epoch": 0.79, "grad_norm": 1.6770030416826525, "learning_rate": 1.1212781201478496e-06, "loss": 0.4983, "step": 6359 }, { "epoch": 0.79, "grad_norm": 2.3716829551919227, "learning_rate": 1.1200102058397927e-06, "loss": 0.5119, "step": 6360 }, { "epoch": 0.79, "grad_norm": 1.7293039440124984, "learning_rate": 1.11874291837826e-06, "loss": 0.4778, "step": 6361 }, { "epoch": 0.79, "grad_norm": 1.2486910175729624, "learning_rate": 1.117476257967991e-06, "loss": 0.4769, "step": 6362 }, { "epoch": 0.79, "grad_norm": 1.9053404412571056, "learning_rate": 1.1162102248136264e-06, "loss": 0.5044, "step": 6363 }, { "epoch": 0.79, "grad_norm": 1.6287425902785133, "learning_rate": 1.114944819119707e-06, "loss": 0.4668, "step": 6364 }, { "epoch": 0.79, "grad_norm": 1.7416565194171552, "learning_rate": 1.1136800410906672e-06, "loss": 0.4914, "step": 6365 }, { "epoch": 0.79, "grad_norm": 4.085037618074328, "learning_rate": 1.1124158909308458e-06, "loss": 0.4845, "step": 6366 }, { "epoch": 0.79, "grad_norm": 1.7980273651122092, "learning_rate": 1.1111523688444741e-06, "loss": 0.4781, "step": 6367 }, { "epoch": 0.79, "grad_norm": 1.883979465274335, "learning_rate": 1.1098894750356893e-06, "loss": 0.5242, "step": 6368 }, { "epoch": 0.79, "grad_norm": 1.313537169348473, "learning_rate": 1.1086272097085187e-06, "loss": 0.4718, "step": 6369 }, { "epoch": 0.79, "grad_norm": 1.9156618835053345, "learning_rate": 1.1073655730668965e-06, "loss": 0.5015, "step": 6370 }, { "epoch": 0.79, "grad_norm": 9.284402050371876, "learning_rate": 1.1061045653146463e-06, "loss": 0.4395, "step": 6371 }, { "epoch": 0.79, "grad_norm": 1.4106454134612665, "learning_rate": 1.1048441866555004e-06, "loss": 0.4857, "step": 6372 }, { "epoch": 0.79, "grad_norm": 1.622936999846198, "learning_rate": 1.103584437293082e-06, "loss": 0.4752, "step": 6373 }, { "epoch": 0.79, "grad_norm": 1.5432383878844, "learning_rate": 1.1023253174309128e-06, "loss": 0.4941, "step": 6374 }, { "epoch": 0.79, "grad_norm": 1.4826265580141185, "learning_rate": 1.1010668272724178e-06, "loss": 0.5047, "step": 6375 }, { "epoch": 0.79, "grad_norm": 1.4461541616248892, "learning_rate": 1.099808967020914e-06, "loss": 0.5171, "step": 6376 }, { "epoch": 0.79, "grad_norm": 1.6063058861931088, "learning_rate": 1.0985517368796227e-06, "loss": 0.5259, "step": 6377 }, { "epoch": 0.79, "grad_norm": 1.6309292102124595, "learning_rate": 1.0972951370516577e-06, "loss": 0.565, "step": 6378 }, { "epoch": 0.79, "grad_norm": 2.349710798461493, "learning_rate": 1.0960391677400373e-06, "loss": 0.4981, "step": 6379 }, { "epoch": 0.79, "grad_norm": 1.4939126100808078, "learning_rate": 1.0947838291476687e-06, "loss": 0.5147, "step": 6380 }, { "epoch": 0.79, "grad_norm": 1.443005036475542, "learning_rate": 1.0935291214773703e-06, "loss": 0.4891, "step": 6381 }, { "epoch": 0.79, "grad_norm": 1.5200846600311144, "learning_rate": 1.0922750449318464e-06, "loss": 0.4843, "step": 6382 }, { "epoch": 0.79, "grad_norm": 1.9827824644081335, "learning_rate": 1.0910215997137048e-06, "loss": 0.4668, "step": 6383 }, { "epoch": 0.79, "grad_norm": 2.380065844859165, "learning_rate": 1.0897687860254514e-06, "loss": 0.4843, "step": 6384 }, { "epoch": 0.79, "grad_norm": 1.6967284179344584, "learning_rate": 1.0885166040694884e-06, "loss": 0.5127, "step": 6385 }, { "epoch": 0.79, "grad_norm": 1.6695641296913672, "learning_rate": 1.0872650540481178e-06, "loss": 0.4706, "step": 6386 }, { "epoch": 0.79, "grad_norm": 1.5456696537393462, "learning_rate": 1.0860141361635374e-06, "loss": 0.4878, "step": 6387 }, { "epoch": 0.79, "grad_norm": 1.2956173483980842, "learning_rate": 1.0847638506178444e-06, "loss": 0.5433, "step": 6388 }, { "epoch": 0.79, "grad_norm": 1.3262687673658258, "learning_rate": 1.0835141976130331e-06, "loss": 0.4642, "step": 6389 }, { "epoch": 0.79, "grad_norm": 1.64537801718758, "learning_rate": 1.0822651773509984e-06, "loss": 0.5271, "step": 6390 }, { "epoch": 0.79, "grad_norm": 0.6997776708532146, "learning_rate": 1.0810167900335283e-06, "loss": 0.4831, "step": 6391 }, { "epoch": 0.79, "grad_norm": 1.6522170913115182, "learning_rate": 1.0797690358623103e-06, "loss": 0.4778, "step": 6392 }, { "epoch": 0.79, "grad_norm": 1.3692356724278743, "learning_rate": 1.078521915038932e-06, "loss": 0.4815, "step": 6393 }, { "epoch": 0.79, "grad_norm": 1.3408947630589219, "learning_rate": 1.0772754277648739e-06, "loss": 0.4349, "step": 6394 }, { "epoch": 0.79, "grad_norm": 1.8953971727722827, "learning_rate": 1.0760295742415205e-06, "loss": 0.4876, "step": 6395 }, { "epoch": 0.79, "grad_norm": 1.53510002475653, "learning_rate": 1.0747843546701452e-06, "loss": 0.4715, "step": 6396 }, { "epoch": 0.79, "grad_norm": 0.6980999378389511, "learning_rate": 1.0735397692519312e-06, "loss": 0.5138, "step": 6397 }, { "epoch": 0.79, "grad_norm": 2.558202819615058, "learning_rate": 1.0722958181879466e-06, "loss": 0.4924, "step": 6398 }, { "epoch": 0.79, "grad_norm": 1.7068775428592562, "learning_rate": 1.0710525016791667e-06, "loss": 0.5051, "step": 6399 }, { "epoch": 0.79, "grad_norm": 1.6023047898555305, "learning_rate": 1.0698098199264578e-06, "loss": 0.5152, "step": 6400 }, { "epoch": 0.79, "grad_norm": 1.4369425786969896, "learning_rate": 1.0685677731305855e-06, "loss": 0.4318, "step": 6401 }, { "epoch": 0.79, "grad_norm": 1.5998174512047107, "learning_rate": 1.0673263614922152e-06, "loss": 0.558, "step": 6402 }, { "epoch": 0.79, "grad_norm": 1.7877368017433597, "learning_rate": 1.0660855852119062e-06, "loss": 0.4984, "step": 6403 }, { "epoch": 0.79, "grad_norm": 1.9796036482449146, "learning_rate": 1.0648454444901179e-06, "loss": 0.4578, "step": 6404 }, { "epoch": 0.79, "grad_norm": 2.882827935795179, "learning_rate": 1.0636059395272053e-06, "loss": 0.4866, "step": 6405 }, { "epoch": 0.79, "grad_norm": 9.42440315165274, "learning_rate": 1.0623670705234235e-06, "loss": 0.515, "step": 6406 }, { "epoch": 0.8, "grad_norm": 2.3774620416132493, "learning_rate": 1.0611288376789198e-06, "loss": 0.4862, "step": 6407 }, { "epoch": 0.8, "grad_norm": 1.3592757543233827, "learning_rate": 1.059891241193744e-06, "loss": 0.5142, "step": 6408 }, { "epoch": 0.8, "grad_norm": 1.4592048124895594, "learning_rate": 1.058654281267838e-06, "loss": 0.5171, "step": 6409 }, { "epoch": 0.8, "grad_norm": 1.4238846191505734, "learning_rate": 1.057417958101047e-06, "loss": 0.5033, "step": 6410 }, { "epoch": 0.8, "grad_norm": 2.4464240773430705, "learning_rate": 1.0561822718931074e-06, "loss": 0.5016, "step": 6411 }, { "epoch": 0.8, "grad_norm": 1.6974068949415977, "learning_rate": 1.0549472228436535e-06, "loss": 0.4979, "step": 6412 }, { "epoch": 0.8, "grad_norm": 1.3602079975816623, "learning_rate": 1.0537128111522232e-06, "loss": 0.4518, "step": 6413 }, { "epoch": 0.8, "grad_norm": 0.6865326422197805, "learning_rate": 1.0524790370182431e-06, "loss": 0.4919, "step": 6414 }, { "epoch": 0.8, "grad_norm": 1.3161318499696908, "learning_rate": 1.0512459006410424e-06, "loss": 0.4716, "step": 6415 }, { "epoch": 0.8, "grad_norm": 1.3809273299426368, "learning_rate": 1.0500134022198421e-06, "loss": 0.4707, "step": 6416 }, { "epoch": 0.8, "grad_norm": 1.358364784189713, "learning_rate": 1.0487815419537672e-06, "loss": 0.4555, "step": 6417 }, { "epoch": 0.8, "grad_norm": 2.2188671693165967, "learning_rate": 1.047550320041832e-06, "loss": 0.4906, "step": 6418 }, { "epoch": 0.8, "grad_norm": 1.8062966823296596, "learning_rate": 1.0463197366829536e-06, "loss": 0.4798, "step": 6419 }, { "epoch": 0.8, "grad_norm": 1.5575137348150088, "learning_rate": 1.0450897920759422e-06, "loss": 0.515, "step": 6420 }, { "epoch": 0.8, "grad_norm": 1.4850541577111969, "learning_rate": 1.043860486419505e-06, "loss": 0.4679, "step": 6421 }, { "epoch": 0.8, "grad_norm": 1.4781014051994306, "learning_rate": 1.042631819912251e-06, "loss": 0.4556, "step": 6422 }, { "epoch": 0.8, "grad_norm": 3.3726640581035268, "learning_rate": 1.0414037927526782e-06, "loss": 0.4647, "step": 6423 }, { "epoch": 0.8, "grad_norm": 1.714250634087497, "learning_rate": 1.0401764051391888e-06, "loss": 0.4789, "step": 6424 }, { "epoch": 0.8, "grad_norm": 1.4200836700258241, "learning_rate": 1.0389496572700747e-06, "loss": 0.4973, "step": 6425 }, { "epoch": 0.8, "grad_norm": 1.3985361590853174, "learning_rate": 1.0377235493435301e-06, "loss": 0.4872, "step": 6426 }, { "epoch": 0.8, "grad_norm": 1.414545373279879, "learning_rate": 1.0364980815576419e-06, "loss": 0.4704, "step": 6427 }, { "epoch": 0.8, "grad_norm": 1.3847624550964073, "learning_rate": 1.0352732541103972e-06, "loss": 0.5241, "step": 6428 }, { "epoch": 0.8, "grad_norm": 1.5204777902527213, "learning_rate": 1.034049067199675e-06, "loss": 0.4473, "step": 6429 }, { "epoch": 0.8, "grad_norm": 1.3824734739922342, "learning_rate": 1.032825521023255e-06, "loss": 0.4806, "step": 6430 }, { "epoch": 0.8, "grad_norm": 1.35143148368396, "learning_rate": 1.031602615778814e-06, "loss": 0.4947, "step": 6431 }, { "epoch": 0.8, "grad_norm": 1.3017480279577303, "learning_rate": 1.0303803516639187e-06, "loss": 0.5221, "step": 6432 }, { "epoch": 0.8, "grad_norm": 1.314222059951463, "learning_rate": 1.0291587288760403e-06, "loss": 0.5007, "step": 6433 }, { "epoch": 0.8, "grad_norm": 1.7591906723145145, "learning_rate": 1.02793774761254e-06, "loss": 0.4792, "step": 6434 }, { "epoch": 0.8, "grad_norm": 1.495983252455482, "learning_rate": 1.026717408070681e-06, "loss": 0.4376, "step": 6435 }, { "epoch": 0.8, "grad_norm": 1.862959961522283, "learning_rate": 1.0254977104476166e-06, "loss": 0.4925, "step": 6436 }, { "epoch": 0.8, "grad_norm": 1.4445140615487064, "learning_rate": 1.0242786549404015e-06, "loss": 0.515, "step": 6437 }, { "epoch": 0.8, "grad_norm": 1.5709457175445063, "learning_rate": 1.0230602417459862e-06, "loss": 0.5253, "step": 6438 }, { "epoch": 0.8, "grad_norm": 1.4846376046602925, "learning_rate": 1.0218424710612135e-06, "loss": 0.539, "step": 6439 }, { "epoch": 0.8, "grad_norm": 1.805011782517603, "learning_rate": 1.0206253430828266e-06, "loss": 0.4773, "step": 6440 }, { "epoch": 0.8, "grad_norm": 1.4557146952088784, "learning_rate": 1.0194088580074618e-06, "loss": 0.465, "step": 6441 }, { "epoch": 0.8, "grad_norm": 1.380442482912851, "learning_rate": 1.0181930160316554e-06, "loss": 0.5045, "step": 6442 }, { "epoch": 0.8, "grad_norm": 1.321381721792749, "learning_rate": 1.0169778173518347e-06, "loss": 0.4964, "step": 6443 }, { "epoch": 0.8, "grad_norm": 4.081757579088301, "learning_rate": 1.0157632621643282e-06, "loss": 0.514, "step": 6444 }, { "epoch": 0.8, "grad_norm": 1.6201478985872608, "learning_rate": 1.0145493506653548e-06, "loss": 0.5069, "step": 6445 }, { "epoch": 0.8, "grad_norm": 1.6145747097101975, "learning_rate": 1.0133360830510352e-06, "loss": 0.4738, "step": 6446 }, { "epoch": 0.8, "grad_norm": 1.4517395862733522, "learning_rate": 1.0121234595173835e-06, "loss": 0.5372, "step": 6447 }, { "epoch": 0.8, "grad_norm": 2.3009646288536616, "learning_rate": 1.0109114802603081e-06, "loss": 0.4628, "step": 6448 }, { "epoch": 0.8, "grad_norm": 1.5314688288647906, "learning_rate": 1.0097001454756173e-06, "loss": 0.5247, "step": 6449 }, { "epoch": 0.8, "grad_norm": 1.3562688096522537, "learning_rate": 1.0084894553590098e-06, "loss": 0.4466, "step": 6450 }, { "epoch": 0.8, "grad_norm": 1.3705066231074705, "learning_rate": 1.0072794101060861e-06, "loss": 0.4579, "step": 6451 }, { "epoch": 0.8, "grad_norm": 1.5832753927850254, "learning_rate": 1.0060700099123372e-06, "loss": 0.4806, "step": 6452 }, { "epoch": 0.8, "grad_norm": 1.8103783315737578, "learning_rate": 1.004861254973154e-06, "loss": 0.4862, "step": 6453 }, { "epoch": 0.8, "grad_norm": 2.247061170385736, "learning_rate": 1.0036531454838216e-06, "loss": 0.4913, "step": 6454 }, { "epoch": 0.8, "grad_norm": 2.405826263478046, "learning_rate": 1.0024456816395195e-06, "loss": 0.511, "step": 6455 }, { "epoch": 0.8, "grad_norm": 1.800506386193242, "learning_rate": 1.001238863635326e-06, "loss": 0.4625, "step": 6456 }, { "epoch": 0.8, "grad_norm": 1.4412549678615536, "learning_rate": 1.0000326916662112e-06, "loss": 0.519, "step": 6457 }, { "epoch": 0.8, "grad_norm": 1.3940917749864985, "learning_rate": 9.988271659270444e-07, "loss": 0.4812, "step": 6458 }, { "epoch": 0.8, "grad_norm": 1.331626678614534, "learning_rate": 9.976222866125867e-07, "loss": 0.4947, "step": 6459 }, { "epoch": 0.8, "grad_norm": 1.7731658384307778, "learning_rate": 9.964180539175e-07, "loss": 0.5341, "step": 6460 }, { "epoch": 0.8, "grad_norm": 0.6299854711262632, "learning_rate": 9.952144680363358e-07, "loss": 0.5018, "step": 6461 }, { "epoch": 0.8, "grad_norm": 1.7760926328349766, "learning_rate": 9.940115291635448e-07, "loss": 0.4784, "step": 6462 }, { "epoch": 0.8, "grad_norm": 1.8962216803491019, "learning_rate": 9.92809237493475e-07, "loss": 0.47, "step": 6463 }, { "epoch": 0.8, "grad_norm": 1.4401955974492697, "learning_rate": 9.916075932203633e-07, "loss": 0.4428, "step": 6464 }, { "epoch": 0.8, "grad_norm": 1.4001781733936711, "learning_rate": 9.904065965383492e-07, "loss": 0.5315, "step": 6465 }, { "epoch": 0.8, "grad_norm": 1.7370364220018046, "learning_rate": 9.89206247641461e-07, "loss": 0.566, "step": 6466 }, { "epoch": 0.8, "grad_norm": 1.3805628292137204, "learning_rate": 9.880065467236289e-07, "loss": 0.4446, "step": 6467 }, { "epoch": 0.8, "grad_norm": 1.4475562347146755, "learning_rate": 9.868074939786727e-07, "loss": 0.5286, "step": 6468 }, { "epoch": 0.8, "grad_norm": 1.884552940235352, "learning_rate": 9.856090896003117e-07, "loss": 0.5077, "step": 6469 }, { "epoch": 0.8, "grad_norm": 1.2709276223524975, "learning_rate": 9.84411333782157e-07, "loss": 0.4425, "step": 6470 }, { "epoch": 0.8, "grad_norm": 1.7191717752920916, "learning_rate": 9.83214226717717e-07, "loss": 0.5699, "step": 6471 }, { "epoch": 0.8, "grad_norm": 1.426700264224924, "learning_rate": 9.820177686003972e-07, "loss": 0.505, "step": 6472 }, { "epoch": 0.8, "grad_norm": 1.6161588921373857, "learning_rate": 9.808219596234924e-07, "loss": 0.5044, "step": 6473 }, { "epoch": 0.8, "grad_norm": 1.565981060733389, "learning_rate": 9.796267999801995e-07, "loss": 0.5106, "step": 6474 }, { "epoch": 0.8, "grad_norm": 1.5211022442715696, "learning_rate": 9.784322898636034e-07, "loss": 0.5166, "step": 6475 }, { "epoch": 0.8, "grad_norm": 1.4123861555634079, "learning_rate": 9.772384294666915e-07, "loss": 0.5146, "step": 6476 }, { "epoch": 0.8, "grad_norm": 1.796078323450227, "learning_rate": 9.760452189823394e-07, "loss": 0.5041, "step": 6477 }, { "epoch": 0.8, "grad_norm": 0.655518718867122, "learning_rate": 9.748526586033219e-07, "loss": 0.465, "step": 6478 }, { "epoch": 0.8, "grad_norm": 1.321255408001864, "learning_rate": 9.736607485223076e-07, "loss": 0.4534, "step": 6479 }, { "epoch": 0.8, "grad_norm": 0.5973847957315321, "learning_rate": 9.724694889318615e-07, "loss": 0.469, "step": 6480 }, { "epoch": 0.8, "grad_norm": 1.7206749394132033, "learning_rate": 9.71278880024441e-07, "loss": 0.4653, "step": 6481 }, { "epoch": 0.8, "grad_norm": 1.456188547690438, "learning_rate": 9.700889219923975e-07, "loss": 0.4847, "step": 6482 }, { "epoch": 0.8, "grad_norm": 1.361231917322826, "learning_rate": 9.688996150279827e-07, "loss": 0.4994, "step": 6483 }, { "epoch": 0.8, "grad_norm": 2.0411586611106918, "learning_rate": 9.677109593233363e-07, "loss": 0.4838, "step": 6484 }, { "epoch": 0.8, "grad_norm": 1.6873790949681748, "learning_rate": 9.665229550704991e-07, "loss": 0.5037, "step": 6485 }, { "epoch": 0.8, "grad_norm": 1.6506240442314934, "learning_rate": 9.653356024613997e-07, "loss": 0.4929, "step": 6486 }, { "epoch": 0.81, "grad_norm": 0.6794439760619745, "learning_rate": 9.641489016878702e-07, "loss": 0.5285, "step": 6487 }, { "epoch": 0.81, "grad_norm": 1.4150040353312494, "learning_rate": 9.62962852941629e-07, "loss": 0.5077, "step": 6488 }, { "epoch": 0.81, "grad_norm": 1.390843234714611, "learning_rate": 9.61777456414295e-07, "loss": 0.4565, "step": 6489 }, { "epoch": 0.81, "grad_norm": 2.2235657869784267, "learning_rate": 9.60592712297379e-07, "loss": 0.4837, "step": 6490 }, { "epoch": 0.81, "grad_norm": 1.4018516966954073, "learning_rate": 9.594086207822845e-07, "loss": 0.4996, "step": 6491 }, { "epoch": 0.81, "grad_norm": 1.8108744655621944, "learning_rate": 9.58225182060315e-07, "loss": 0.4742, "step": 6492 }, { "epoch": 0.81, "grad_norm": 1.6366771711601844, "learning_rate": 9.570423963226632e-07, "loss": 0.4805, "step": 6493 }, { "epoch": 0.81, "grad_norm": 0.6650749303310582, "learning_rate": 9.558602637604192e-07, "loss": 0.5174, "step": 6494 }, { "epoch": 0.81, "grad_norm": 2.208185061384778, "learning_rate": 9.546787845645672e-07, "loss": 0.554, "step": 6495 }, { "epoch": 0.81, "grad_norm": 1.4893339674896775, "learning_rate": 9.534979589259869e-07, "loss": 0.4519, "step": 6496 }, { "epoch": 0.81, "grad_norm": 0.6165958466542163, "learning_rate": 9.523177870354483e-07, "loss": 0.4916, "step": 6497 }, { "epoch": 0.81, "grad_norm": 1.7710197037064592, "learning_rate": 9.511382690836213e-07, "loss": 0.5109, "step": 6498 }, { "epoch": 0.81, "grad_norm": 1.3793051055716132, "learning_rate": 9.499594052610661e-07, "loss": 0.4939, "step": 6499 }, { "epoch": 0.81, "grad_norm": 2.0578189131865265, "learning_rate": 9.487811957582376e-07, "loss": 0.4535, "step": 6500 }, { "epoch": 0.81, "grad_norm": 1.5081857196219226, "learning_rate": 9.476036407654881e-07, "loss": 0.4491, "step": 6501 }, { "epoch": 0.81, "grad_norm": 1.456257458370949, "learning_rate": 9.464267404730593e-07, "loss": 0.4809, "step": 6502 }, { "epoch": 0.81, "grad_norm": 8.474834198528999, "learning_rate": 9.452504950710911e-07, "loss": 0.4986, "step": 6503 }, { "epoch": 0.81, "grad_norm": 0.7138505129091549, "learning_rate": 9.440749047496168e-07, "loss": 0.5048, "step": 6504 }, { "epoch": 0.81, "grad_norm": 1.4093392810821976, "learning_rate": 9.428999696985642e-07, "loss": 0.4755, "step": 6505 }, { "epoch": 0.81, "grad_norm": 1.6833102214769327, "learning_rate": 9.417256901077515e-07, "loss": 0.487, "step": 6506 }, { "epoch": 0.81, "grad_norm": 1.3875066026251863, "learning_rate": 9.405520661668977e-07, "loss": 0.5175, "step": 6507 }, { "epoch": 0.81, "grad_norm": 1.4386284006670491, "learning_rate": 9.393790980656081e-07, "loss": 0.4967, "step": 6508 }, { "epoch": 0.81, "grad_norm": 1.477341431126602, "learning_rate": 9.382067859933897e-07, "loss": 0.4585, "step": 6509 }, { "epoch": 0.81, "grad_norm": 1.3587078333246, "learning_rate": 9.37035130139638e-07, "loss": 0.4865, "step": 6510 }, { "epoch": 0.81, "grad_norm": 1.4243793950309338, "learning_rate": 9.358641306936416e-07, "loss": 0.513, "step": 6511 }, { "epoch": 0.81, "grad_norm": 1.4195418236296353, "learning_rate": 9.346937878445916e-07, "loss": 0.4805, "step": 6512 }, { "epoch": 0.81, "grad_norm": 1.6071698001178198, "learning_rate": 9.335241017815627e-07, "loss": 0.5024, "step": 6513 }, { "epoch": 0.81, "grad_norm": 2.5025394272531454, "learning_rate": 9.323550726935304e-07, "loss": 0.562, "step": 6514 }, { "epoch": 0.81, "grad_norm": 1.531377625714681, "learning_rate": 9.311867007693598e-07, "loss": 0.4915, "step": 6515 }, { "epoch": 0.81, "grad_norm": 1.7240203212532423, "learning_rate": 9.300189861978143e-07, "loss": 0.4966, "step": 6516 }, { "epoch": 0.81, "grad_norm": 1.6216464797284622, "learning_rate": 9.288519291675451e-07, "loss": 0.4925, "step": 6517 }, { "epoch": 0.81, "grad_norm": 1.5411812159660658, "learning_rate": 9.276855298671033e-07, "loss": 0.5281, "step": 6518 }, { "epoch": 0.81, "grad_norm": 1.580795284714528, "learning_rate": 9.265197884849286e-07, "loss": 0.4922, "step": 6519 }, { "epoch": 0.81, "grad_norm": 1.5446521341749373, "learning_rate": 9.253547052093587e-07, "loss": 0.4868, "step": 6520 }, { "epoch": 0.81, "grad_norm": 1.3983084243758115, "learning_rate": 9.241902802286229e-07, "loss": 0.4888, "step": 6521 }, { "epoch": 0.81, "grad_norm": 1.308543603312496, "learning_rate": 9.23026513730843e-07, "loss": 0.4892, "step": 6522 }, { "epoch": 0.81, "grad_norm": 1.7423299968073849, "learning_rate": 9.218634059040377e-07, "loss": 0.493, "step": 6523 }, { "epoch": 0.81, "grad_norm": 1.3112906377656586, "learning_rate": 9.207009569361153e-07, "loss": 0.5326, "step": 6524 }, { "epoch": 0.81, "grad_norm": 1.4610564692928665, "learning_rate": 9.195391670148812e-07, "loss": 0.4936, "step": 6525 }, { "epoch": 0.81, "grad_norm": 1.890123244074039, "learning_rate": 9.183780363280314e-07, "loss": 0.4905, "step": 6526 }, { "epoch": 0.81, "grad_norm": 1.3160123399401755, "learning_rate": 9.172175650631576e-07, "loss": 0.4919, "step": 6527 }, { "epoch": 0.81, "grad_norm": 0.6255882144586874, "learning_rate": 9.160577534077453e-07, "loss": 0.445, "step": 6528 }, { "epoch": 0.81, "grad_norm": 1.5051745110594048, "learning_rate": 9.148986015491695e-07, "loss": 0.4883, "step": 6529 }, { "epoch": 0.81, "grad_norm": 0.6865982859562735, "learning_rate": 9.137401096747045e-07, "loss": 0.501, "step": 6530 }, { "epoch": 0.81, "grad_norm": 1.9954453456068366, "learning_rate": 9.125822779715121e-07, "loss": 0.4812, "step": 6531 }, { "epoch": 0.81, "grad_norm": 1.683593194230567, "learning_rate": 9.114251066266527e-07, "loss": 0.5226, "step": 6532 }, { "epoch": 0.81, "grad_norm": 2.165224842620645, "learning_rate": 9.102685958270746e-07, "loss": 0.5188, "step": 6533 }, { "epoch": 0.81, "grad_norm": 1.8715140158538006, "learning_rate": 9.09112745759626e-07, "loss": 0.4523, "step": 6534 }, { "epoch": 0.81, "grad_norm": 0.578535773480896, "learning_rate": 9.07957556611041e-07, "loss": 0.4604, "step": 6535 }, { "epoch": 0.81, "grad_norm": 1.1796850042322746, "learning_rate": 9.068030285679519e-07, "loss": 0.4398, "step": 6536 }, { "epoch": 0.81, "grad_norm": 0.7277912894351544, "learning_rate": 9.05649161816885e-07, "loss": 0.4712, "step": 6537 }, { "epoch": 0.81, "grad_norm": 1.6811486351535663, "learning_rate": 9.044959565442541e-07, "loss": 0.5187, "step": 6538 }, { "epoch": 0.81, "grad_norm": 1.3938299493600599, "learning_rate": 9.033434129363727e-07, "loss": 0.5106, "step": 6539 }, { "epoch": 0.81, "grad_norm": 1.865073353995794, "learning_rate": 9.021915311794421e-07, "loss": 0.4647, "step": 6540 }, { "epoch": 0.81, "grad_norm": 1.3106511091520945, "learning_rate": 9.010403114595612e-07, "loss": 0.4894, "step": 6541 }, { "epoch": 0.81, "grad_norm": 1.6391450455268382, "learning_rate": 8.998897539627172e-07, "loss": 0.4969, "step": 6542 }, { "epoch": 0.81, "grad_norm": 1.4456021413174263, "learning_rate": 8.987398588747948e-07, "loss": 0.4843, "step": 6543 }, { "epoch": 0.81, "grad_norm": 2.5590898039625034, "learning_rate": 8.975906263815681e-07, "loss": 0.5073, "step": 6544 }, { "epoch": 0.81, "grad_norm": 1.665956667537492, "learning_rate": 8.96442056668706e-07, "loss": 0.5101, "step": 6545 }, { "epoch": 0.81, "grad_norm": 1.8921706826375544, "learning_rate": 8.952941499217722e-07, "loss": 0.4772, "step": 6546 }, { "epoch": 0.81, "grad_norm": 3.2156733101385413, "learning_rate": 8.941469063262182e-07, "loss": 0.4468, "step": 6547 }, { "epoch": 0.81, "grad_norm": 1.5766155722354083, "learning_rate": 8.930003260673936e-07, "loss": 0.5071, "step": 6548 }, { "epoch": 0.81, "grad_norm": 1.545352202694005, "learning_rate": 8.91854409330537e-07, "loss": 0.4885, "step": 6549 }, { "epoch": 0.81, "grad_norm": 1.31182920256894, "learning_rate": 8.907091563007824e-07, "loss": 0.4635, "step": 6550 }, { "epoch": 0.81, "grad_norm": 1.6185349718289779, "learning_rate": 8.895645671631542e-07, "loss": 0.5057, "step": 6551 }, { "epoch": 0.81, "grad_norm": 1.540854118339132, "learning_rate": 8.884206421025715e-07, "loss": 0.4671, "step": 6552 }, { "epoch": 0.81, "grad_norm": 1.4774845635395661, "learning_rate": 8.872773813038466e-07, "loss": 0.5321, "step": 6553 }, { "epoch": 0.81, "grad_norm": 1.5020064700053972, "learning_rate": 8.861347849516816e-07, "loss": 0.5313, "step": 6554 }, { "epoch": 0.81, "grad_norm": 0.7142941992414023, "learning_rate": 8.849928532306745e-07, "loss": 0.5107, "step": 6555 }, { "epoch": 0.81, "grad_norm": 1.91307148790412, "learning_rate": 8.838515863253128e-07, "loss": 0.5381, "step": 6556 }, { "epoch": 0.81, "grad_norm": 2.6351845953656223, "learning_rate": 8.827109844199805e-07, "loss": 0.4757, "step": 6557 }, { "epoch": 0.81, "grad_norm": 1.6147448240438045, "learning_rate": 8.815710476989487e-07, "loss": 0.5138, "step": 6558 }, { "epoch": 0.81, "grad_norm": 2.1570591241507078, "learning_rate": 8.804317763463877e-07, "loss": 0.4723, "step": 6559 }, { "epoch": 0.81, "grad_norm": 2.0196638013532677, "learning_rate": 8.792931705463542e-07, "loss": 0.5151, "step": 6560 }, { "epoch": 0.81, "grad_norm": 1.358548606885228, "learning_rate": 8.781552304828006e-07, "loss": 0.4373, "step": 6561 }, { "epoch": 0.81, "grad_norm": 1.5555876320839892, "learning_rate": 8.770179563395725e-07, "loss": 0.5272, "step": 6562 }, { "epoch": 0.81, "grad_norm": 1.873411723591231, "learning_rate": 8.758813483004047e-07, "loss": 0.558, "step": 6563 }, { "epoch": 0.81, "grad_norm": 2.5335119418462217, "learning_rate": 8.74745406548928e-07, "loss": 0.4761, "step": 6564 }, { "epoch": 0.81, "grad_norm": 1.4249307697946578, "learning_rate": 8.736101312686618e-07, "loss": 0.525, "step": 6565 }, { "epoch": 0.81, "grad_norm": 1.7083767116323012, "learning_rate": 8.72475522643022e-07, "loss": 0.4557, "step": 6566 }, { "epoch": 0.81, "grad_norm": 1.5754117600788526, "learning_rate": 8.713415808553121e-07, "loss": 0.4816, "step": 6567 }, { "epoch": 0.82, "grad_norm": 1.6529075246510267, "learning_rate": 8.702083060887323e-07, "loss": 0.4781, "step": 6568 }, { "epoch": 0.82, "grad_norm": 1.5804308238440943, "learning_rate": 8.690756985263737e-07, "loss": 0.4972, "step": 6569 }, { "epoch": 0.82, "grad_norm": 1.7295768902466224, "learning_rate": 8.679437583512168e-07, "loss": 0.5174, "step": 6570 }, { "epoch": 0.82, "grad_norm": 0.6384249650212698, "learning_rate": 8.668124857461385e-07, "loss": 0.5158, "step": 6571 }, { "epoch": 0.82, "grad_norm": 3.3816051165201575, "learning_rate": 8.656818808939038e-07, "loss": 0.4673, "step": 6572 }, { "epoch": 0.82, "grad_norm": 1.4216938442872074, "learning_rate": 8.645519439771744e-07, "loss": 0.4542, "step": 6573 }, { "epoch": 0.82, "grad_norm": 1.4833293871723272, "learning_rate": 8.634226751784991e-07, "loss": 0.5507, "step": 6574 }, { "epoch": 0.82, "grad_norm": 1.606769771979235, "learning_rate": 8.622940746803238e-07, "loss": 0.5216, "step": 6575 }, { "epoch": 0.82, "grad_norm": 1.348401256388603, "learning_rate": 8.611661426649809e-07, "loss": 0.4688, "step": 6576 }, { "epoch": 0.82, "grad_norm": 1.6077809246600447, "learning_rate": 8.600388793146991e-07, "loss": 0.5163, "step": 6577 }, { "epoch": 0.82, "grad_norm": 2.4424136275117476, "learning_rate": 8.589122848115977e-07, "loss": 0.5012, "step": 6578 }, { "epoch": 0.82, "grad_norm": 2.6050305815590833, "learning_rate": 8.5778635933769e-07, "loss": 0.4884, "step": 6579 }, { "epoch": 0.82, "grad_norm": 1.4093501853324268, "learning_rate": 8.566611030748767e-07, "loss": 0.4972, "step": 6580 }, { "epoch": 0.82, "grad_norm": 1.4112177810139739, "learning_rate": 8.555365162049529e-07, "loss": 0.5028, "step": 6581 }, { "epoch": 0.82, "grad_norm": 2.0270135079817404, "learning_rate": 8.544125989096063e-07, "loss": 0.5033, "step": 6582 }, { "epoch": 0.82, "grad_norm": 1.2998776012858089, "learning_rate": 8.532893513704149e-07, "loss": 0.4833, "step": 6583 }, { "epoch": 0.82, "grad_norm": 1.483841216926302, "learning_rate": 8.52166773768851e-07, "loss": 0.472, "step": 6584 }, { "epoch": 0.82, "grad_norm": 2.3508207620418844, "learning_rate": 8.51044866286273e-07, "loss": 0.46, "step": 6585 }, { "epoch": 0.82, "grad_norm": 1.653697562371862, "learning_rate": 8.499236291039403e-07, "loss": 0.5096, "step": 6586 }, { "epoch": 0.82, "grad_norm": 1.5744504468038782, "learning_rate": 8.488030624029947e-07, "loss": 0.5106, "step": 6587 }, { "epoch": 0.82, "grad_norm": 1.463803355129119, "learning_rate": 8.476831663644758e-07, "loss": 0.4511, "step": 6588 }, { "epoch": 0.82, "grad_norm": 1.6581237351175009, "learning_rate": 8.465639411693116e-07, "loss": 0.5617, "step": 6589 }, { "epoch": 0.82, "grad_norm": 1.5768383400051669, "learning_rate": 8.45445386998322e-07, "loss": 0.5398, "step": 6590 }, { "epoch": 0.82, "grad_norm": 1.6111736629103783, "learning_rate": 8.443275040322213e-07, "loss": 0.4301, "step": 6591 }, { "epoch": 0.82, "grad_norm": 1.5076339706999673, "learning_rate": 8.432102924516112e-07, "loss": 0.4785, "step": 6592 }, { "epoch": 0.82, "grad_norm": 2.8147989208223385, "learning_rate": 8.420937524369876e-07, "loss": 0.5381, "step": 6593 }, { "epoch": 0.82, "grad_norm": 1.5861599435098752, "learning_rate": 8.409778841687383e-07, "loss": 0.4792, "step": 6594 }, { "epoch": 0.82, "grad_norm": 1.7391419145258895, "learning_rate": 8.398626878271421e-07, "loss": 0.5066, "step": 6595 }, { "epoch": 0.82, "grad_norm": 1.3562454526494723, "learning_rate": 8.387481635923667e-07, "loss": 0.4684, "step": 6596 }, { "epoch": 0.82, "grad_norm": 1.680088091115047, "learning_rate": 8.376343116444763e-07, "loss": 0.5065, "step": 6597 }, { "epoch": 0.82, "grad_norm": 0.6406248070930465, "learning_rate": 8.365211321634209e-07, "loss": 0.5093, "step": 6598 }, { "epoch": 0.82, "grad_norm": 1.4648655817523175, "learning_rate": 8.354086253290445e-07, "loss": 0.4623, "step": 6599 }, { "epoch": 0.82, "grad_norm": 0.7348006256424257, "learning_rate": 8.342967913210843e-07, "loss": 0.5108, "step": 6600 }, { "epoch": 0.82, "grad_norm": 1.6226349665887883, "learning_rate": 8.331856303191632e-07, "loss": 0.4747, "step": 6601 }, { "epoch": 0.82, "grad_norm": 1.3900268223214782, "learning_rate": 8.320751425028039e-07, "loss": 0.5242, "step": 6602 }, { "epoch": 0.82, "grad_norm": 1.5022996561742294, "learning_rate": 8.309653280514118e-07, "loss": 0.4604, "step": 6603 }, { "epoch": 0.82, "grad_norm": 0.6514066294972094, "learning_rate": 8.2985618714429e-07, "loss": 0.4741, "step": 6604 }, { "epoch": 0.82, "grad_norm": 1.3691633878283713, "learning_rate": 8.287477199606276e-07, "loss": 0.4871, "step": 6605 }, { "epoch": 0.82, "grad_norm": 1.5466999097901013, "learning_rate": 8.276399266795088e-07, "loss": 0.5516, "step": 6606 }, { "epoch": 0.82, "grad_norm": 1.4721951482256834, "learning_rate": 8.265328074799067e-07, "loss": 0.5031, "step": 6607 }, { "epoch": 0.82, "grad_norm": 0.6451556978401359, "learning_rate": 8.254263625406856e-07, "loss": 0.5001, "step": 6608 }, { "epoch": 0.82, "grad_norm": 1.4413842515562194, "learning_rate": 8.243205920406016e-07, "loss": 0.5116, "step": 6609 }, { "epoch": 0.82, "grad_norm": 1.287070423719794, "learning_rate": 8.232154961583017e-07, "loss": 0.4357, "step": 6610 }, { "epoch": 0.82, "grad_norm": 1.578682068297509, "learning_rate": 8.22111075072326e-07, "loss": 0.4841, "step": 6611 }, { "epoch": 0.82, "grad_norm": 1.4041042897849592, "learning_rate": 8.210073289611004e-07, "loss": 0.486, "step": 6612 }, { "epoch": 0.82, "grad_norm": 1.4408155137435312, "learning_rate": 8.19904258002947e-07, "loss": 0.4741, "step": 6613 }, { "epoch": 0.82, "grad_norm": 1.8826684892795102, "learning_rate": 8.188018623760747e-07, "loss": 0.4876, "step": 6614 }, { "epoch": 0.82, "grad_norm": 1.4705849420242674, "learning_rate": 8.177001422585873e-07, "loss": 0.4982, "step": 6615 }, { "epoch": 0.82, "grad_norm": 1.6166224577611759, "learning_rate": 8.165990978284749e-07, "loss": 0.483, "step": 6616 }, { "epoch": 0.82, "grad_norm": 0.659097390392933, "learning_rate": 8.154987292636241e-07, "loss": 0.471, "step": 6617 }, { "epoch": 0.82, "grad_norm": 1.5422971791203492, "learning_rate": 8.143990367418059e-07, "loss": 0.5401, "step": 6618 }, { "epoch": 0.82, "grad_norm": 3.252984211458063, "learning_rate": 8.133000204406871e-07, "loss": 0.4784, "step": 6619 }, { "epoch": 0.82, "grad_norm": 1.4384644065425785, "learning_rate": 8.122016805378241e-07, "loss": 0.5327, "step": 6620 }, { "epoch": 0.82, "grad_norm": 1.8644675432583973, "learning_rate": 8.11104017210661e-07, "loss": 0.5353, "step": 6621 }, { "epoch": 0.82, "grad_norm": 1.6273030259451302, "learning_rate": 8.100070306365382e-07, "loss": 0.4845, "step": 6622 }, { "epoch": 0.82, "grad_norm": 1.5151125444202667, "learning_rate": 8.0891072099268e-07, "loss": 0.4817, "step": 6623 }, { "epoch": 0.82, "grad_norm": 1.6395495200122552, "learning_rate": 8.07815088456208e-07, "loss": 0.4903, "step": 6624 }, { "epoch": 0.82, "grad_norm": 1.8833985121582997, "learning_rate": 8.067201332041286e-07, "loss": 0.5114, "step": 6625 }, { "epoch": 0.82, "grad_norm": 1.2573482842800296, "learning_rate": 8.056258554133433e-07, "loss": 0.4473, "step": 6626 }, { "epoch": 0.82, "grad_norm": 4.841027142125862, "learning_rate": 8.045322552606427e-07, "loss": 0.491, "step": 6627 }, { "epoch": 0.82, "grad_norm": 1.4684161021406357, "learning_rate": 8.034393329227052e-07, "loss": 0.5239, "step": 6628 }, { "epoch": 0.82, "grad_norm": 1.4732512569164948, "learning_rate": 8.023470885761053e-07, "loss": 0.4898, "step": 6629 }, { "epoch": 0.82, "grad_norm": 3.2874992148845803, "learning_rate": 8.012555223973012e-07, "loss": 0.4937, "step": 6630 }, { "epoch": 0.82, "grad_norm": 1.3111270367689432, "learning_rate": 8.001646345626479e-07, "loss": 0.4869, "step": 6631 }, { "epoch": 0.82, "grad_norm": 1.5229838431069287, "learning_rate": 7.990744252483856e-07, "loss": 0.512, "step": 6632 }, { "epoch": 0.82, "grad_norm": 1.2912528249724036, "learning_rate": 7.979848946306496e-07, "loss": 0.4723, "step": 6633 }, { "epoch": 0.82, "grad_norm": 1.5275938148321992, "learning_rate": 7.968960428854605e-07, "loss": 0.5204, "step": 6634 }, { "epoch": 0.82, "grad_norm": 1.5890097981606934, "learning_rate": 7.958078701887334e-07, "loss": 0.4763, "step": 6635 }, { "epoch": 0.82, "grad_norm": 1.2712850160856939, "learning_rate": 7.947203767162737e-07, "loss": 0.4592, "step": 6636 }, { "epoch": 0.82, "grad_norm": 1.6645130002721502, "learning_rate": 7.936335626437725e-07, "loss": 0.5086, "step": 6637 }, { "epoch": 0.82, "grad_norm": 1.5569392743648256, "learning_rate": 7.925474281468165e-07, "loss": 0.479, "step": 6638 }, { "epoch": 0.82, "grad_norm": 1.492305078322501, "learning_rate": 7.914619734008782e-07, "loss": 0.512, "step": 6639 }, { "epoch": 0.82, "grad_norm": 1.4290671588367234, "learning_rate": 7.903771985813252e-07, "loss": 0.4731, "step": 6640 }, { "epoch": 0.82, "grad_norm": 1.48286399941432, "learning_rate": 7.892931038634094e-07, "loss": 0.4881, "step": 6641 }, { "epoch": 0.82, "grad_norm": 2.04536664575143, "learning_rate": 7.882096894222774e-07, "loss": 0.5289, "step": 6642 }, { "epoch": 0.82, "grad_norm": 1.758295482436992, "learning_rate": 7.871269554329652e-07, "loss": 0.5026, "step": 6643 }, { "epoch": 0.82, "grad_norm": 1.3460918611676007, "learning_rate": 7.860449020703964e-07, "loss": 0.4616, "step": 6644 }, { "epoch": 0.82, "grad_norm": 2.689594433035852, "learning_rate": 7.849635295093877e-07, "loss": 0.4857, "step": 6645 }, { "epoch": 0.82, "grad_norm": 0.6720809223765922, "learning_rate": 7.838828379246422e-07, "loss": 0.4977, "step": 6646 }, { "epoch": 0.82, "grad_norm": 1.47095461092173, "learning_rate": 7.828028274907579e-07, "loss": 0.5237, "step": 6647 }, { "epoch": 0.83, "grad_norm": 1.5393842391301888, "learning_rate": 7.817234983822169e-07, "loss": 0.4689, "step": 6648 }, { "epoch": 0.83, "grad_norm": 1.4527967916406928, "learning_rate": 7.806448507733977e-07, "loss": 0.4854, "step": 6649 }, { "epoch": 0.83, "grad_norm": 2.9815323734000887, "learning_rate": 7.795668848385624e-07, "loss": 0.5219, "step": 6650 }, { "epoch": 0.83, "grad_norm": 1.7985118940600708, "learning_rate": 7.784896007518667e-07, "loss": 0.4939, "step": 6651 }, { "epoch": 0.83, "grad_norm": 2.2185676476628795, "learning_rate": 7.774129986873574e-07, "loss": 0.5087, "step": 6652 }, { "epoch": 0.83, "grad_norm": 1.635521286764626, "learning_rate": 7.763370788189656e-07, "loss": 0.5354, "step": 6653 }, { "epoch": 0.83, "grad_norm": 1.3406571460882213, "learning_rate": 7.752618413205193e-07, "loss": 0.4728, "step": 6654 }, { "epoch": 0.83, "grad_norm": 1.8504903138524444, "learning_rate": 7.74187286365729e-07, "loss": 0.5104, "step": 6655 }, { "epoch": 0.83, "grad_norm": 1.389096639498019, "learning_rate": 7.731134141282015e-07, "loss": 0.5099, "step": 6656 }, { "epoch": 0.83, "grad_norm": 5.5366339284376185, "learning_rate": 7.720402247814279e-07, "loss": 0.4948, "step": 6657 }, { "epoch": 0.83, "grad_norm": 4.842372319506879, "learning_rate": 7.70967718498794e-07, "loss": 0.4862, "step": 6658 }, { "epoch": 0.83, "grad_norm": 1.4324724171615184, "learning_rate": 7.698958954535696e-07, "loss": 0.5086, "step": 6659 }, { "epoch": 0.83, "grad_norm": 1.522130642732948, "learning_rate": 7.688247558189194e-07, "loss": 0.5072, "step": 6660 }, { "epoch": 0.83, "grad_norm": 1.4951917839977171, "learning_rate": 7.67754299767895e-07, "loss": 0.4972, "step": 6661 }, { "epoch": 0.83, "grad_norm": 1.5651674502519535, "learning_rate": 7.666845274734375e-07, "loss": 0.4754, "step": 6662 }, { "epoch": 0.83, "grad_norm": 1.5627366549655106, "learning_rate": 7.656154391083786e-07, "loss": 0.4705, "step": 6663 }, { "epoch": 0.83, "grad_norm": 0.6720391307867858, "learning_rate": 7.645470348454381e-07, "loss": 0.5589, "step": 6664 }, { "epoch": 0.83, "grad_norm": 3.376154003760944, "learning_rate": 7.634793148572273e-07, "loss": 0.4723, "step": 6665 }, { "epoch": 0.83, "grad_norm": 1.8004953958144452, "learning_rate": 7.624122793162442e-07, "loss": 0.5408, "step": 6666 }, { "epoch": 0.83, "grad_norm": 1.5105450022050773, "learning_rate": 7.613459283948788e-07, "loss": 0.5245, "step": 6667 }, { "epoch": 0.83, "grad_norm": 1.5470366339253507, "learning_rate": 7.602802622654109e-07, "loss": 0.479, "step": 6668 }, { "epoch": 0.83, "grad_norm": 1.4887463762505064, "learning_rate": 7.592152811000053e-07, "loss": 0.4652, "step": 6669 }, { "epoch": 0.83, "grad_norm": 0.6424167995495772, "learning_rate": 7.581509850707214e-07, "loss": 0.4651, "step": 6670 }, { "epoch": 0.83, "grad_norm": 1.493513154419008, "learning_rate": 7.570873743495039e-07, "loss": 0.5321, "step": 6671 }, { "epoch": 0.83, "grad_norm": 1.4814326981200936, "learning_rate": 7.560244491081903e-07, "loss": 0.4731, "step": 6672 }, { "epoch": 0.83, "grad_norm": 1.4213082522322267, "learning_rate": 7.549622095185033e-07, "loss": 0.5278, "step": 6673 }, { "epoch": 0.83, "grad_norm": 1.290850849983848, "learning_rate": 7.539006557520595e-07, "loss": 0.4822, "step": 6674 }, { "epoch": 0.83, "grad_norm": 1.4676546201481493, "learning_rate": 7.528397879803589e-07, "loss": 0.494, "step": 6675 }, { "epoch": 0.83, "grad_norm": 1.9356018002672504, "learning_rate": 7.517796063747984e-07, "loss": 0.4418, "step": 6676 }, { "epoch": 0.83, "grad_norm": 2.3003381438959716, "learning_rate": 7.507201111066581e-07, "loss": 0.5061, "step": 6677 }, { "epoch": 0.83, "grad_norm": 0.9853811495656869, "learning_rate": 7.496613023471061e-07, "loss": 0.468, "step": 6678 }, { "epoch": 0.83, "grad_norm": 1.9867056108884016, "learning_rate": 7.486031802672061e-07, "loss": 0.4867, "step": 6679 }, { "epoch": 0.83, "grad_norm": 2.2765450132478833, "learning_rate": 7.475457450379036e-07, "loss": 0.4914, "step": 6680 }, { "epoch": 0.83, "grad_norm": 1.230696818398601, "learning_rate": 7.464889968300398e-07, "loss": 0.4248, "step": 6681 }, { "epoch": 0.83, "grad_norm": 1.4779685084729597, "learning_rate": 7.454329358143392e-07, "loss": 0.5187, "step": 6682 }, { "epoch": 0.83, "grad_norm": 1.6978034202504741, "learning_rate": 7.44377562161418e-07, "loss": 0.525, "step": 6683 }, { "epoch": 0.83, "grad_norm": 1.902718193616752, "learning_rate": 7.433228760417827e-07, "loss": 0.4744, "step": 6684 }, { "epoch": 0.83, "grad_norm": 1.5683391925887717, "learning_rate": 7.422688776258268e-07, "loss": 0.4921, "step": 6685 }, { "epoch": 0.83, "grad_norm": 1.7998659135311224, "learning_rate": 7.412155670838311e-07, "loss": 0.5272, "step": 6686 }, { "epoch": 0.83, "grad_norm": 1.400954649684802, "learning_rate": 7.401629445859704e-07, "loss": 0.4615, "step": 6687 }, { "epoch": 0.83, "grad_norm": 1.7090806767697804, "learning_rate": 7.391110103023031e-07, "loss": 0.5015, "step": 6688 }, { "epoch": 0.83, "grad_norm": 1.3007966513175657, "learning_rate": 7.38059764402777e-07, "loss": 0.4713, "step": 6689 }, { "epoch": 0.83, "grad_norm": 1.4708094230997422, "learning_rate": 7.370092070572332e-07, "loss": 0.4907, "step": 6690 }, { "epoch": 0.83, "grad_norm": 2.5038268737583214, "learning_rate": 7.359593384353958e-07, "loss": 0.5051, "step": 6691 }, { "epoch": 0.83, "grad_norm": 2.524279014891403, "learning_rate": 7.349101587068819e-07, "loss": 0.5052, "step": 6692 }, { "epoch": 0.83, "grad_norm": 1.7172922101698749, "learning_rate": 7.338616680411953e-07, "loss": 0.4597, "step": 6693 }, { "epoch": 0.83, "grad_norm": 1.5930470211813232, "learning_rate": 7.3281386660773e-07, "loss": 0.515, "step": 6694 }, { "epoch": 0.83, "grad_norm": 1.8029058761861165, "learning_rate": 7.317667545757656e-07, "loss": 0.4888, "step": 6695 }, { "epoch": 0.83, "grad_norm": 1.4923651663363493, "learning_rate": 7.307203321144746e-07, "loss": 0.4664, "step": 6696 }, { "epoch": 0.83, "grad_norm": 1.6185986604616538, "learning_rate": 7.296745993929144e-07, "loss": 0.4886, "step": 6697 }, { "epoch": 0.83, "grad_norm": 1.2183265656631697, "learning_rate": 7.286295565800305e-07, "loss": 0.5012, "step": 6698 }, { "epoch": 0.83, "grad_norm": 3.0722141533527365, "learning_rate": 7.275852038446623e-07, "loss": 0.4462, "step": 6699 }, { "epoch": 0.83, "grad_norm": 1.5310571180094792, "learning_rate": 7.265415413555304e-07, "loss": 0.5104, "step": 6700 }, { "epoch": 0.83, "grad_norm": 1.5599434286674574, "learning_rate": 7.254985692812517e-07, "loss": 0.5282, "step": 6701 }, { "epoch": 0.83, "grad_norm": 4.125125729176223, "learning_rate": 7.244562877903244e-07, "loss": 0.5671, "step": 6702 }, { "epoch": 0.83, "grad_norm": 0.6534221527354256, "learning_rate": 7.234146970511408e-07, "loss": 0.4942, "step": 6703 }, { "epoch": 0.83, "grad_norm": 1.4397086788418985, "learning_rate": 7.22373797231976e-07, "loss": 0.488, "step": 6704 }, { "epoch": 0.83, "grad_norm": 1.4557116794569993, "learning_rate": 7.213335885010004e-07, "loss": 0.5201, "step": 6705 }, { "epoch": 0.83, "grad_norm": 1.5797733791302986, "learning_rate": 7.202940710262657e-07, "loss": 0.5304, "step": 6706 }, { "epoch": 0.83, "grad_norm": 0.6977736195425923, "learning_rate": 7.192552449757157e-07, "loss": 0.4691, "step": 6707 }, { "epoch": 0.83, "grad_norm": 1.5101471426826862, "learning_rate": 7.18217110517182e-07, "loss": 0.5608, "step": 6708 }, { "epoch": 0.83, "grad_norm": 1.5421036528060617, "learning_rate": 7.171796678183845e-07, "loss": 0.4554, "step": 6709 }, { "epoch": 0.83, "grad_norm": 1.8703537498179146, "learning_rate": 7.161429170469325e-07, "loss": 0.4687, "step": 6710 }, { "epoch": 0.83, "grad_norm": 3.48883273820731, "learning_rate": 7.151068583703202e-07, "loss": 0.4986, "step": 6711 }, { "epoch": 0.83, "grad_norm": 1.6717400136691807, "learning_rate": 7.140714919559339e-07, "loss": 0.5116, "step": 6712 }, { "epoch": 0.83, "grad_norm": 1.4106732687778272, "learning_rate": 7.13036817971044e-07, "loss": 0.5113, "step": 6713 }, { "epoch": 0.83, "grad_norm": 1.5991966251618026, "learning_rate": 7.120028365828136e-07, "loss": 0.5148, "step": 6714 }, { "epoch": 0.83, "grad_norm": 2.17589346803186, "learning_rate": 7.109695479582889e-07, "loss": 0.4669, "step": 6715 }, { "epoch": 0.83, "grad_norm": 2.206969652600979, "learning_rate": 7.099369522644084e-07, "loss": 0.4686, "step": 6716 }, { "epoch": 0.83, "grad_norm": 1.8786885049448072, "learning_rate": 7.089050496679983e-07, "loss": 0.4683, "step": 6717 }, { "epoch": 0.83, "grad_norm": 1.7313156690433527, "learning_rate": 7.078738403357682e-07, "loss": 0.4429, "step": 6718 }, { "epoch": 0.83, "grad_norm": 1.9177512900258369, "learning_rate": 7.068433244343221e-07, "loss": 0.4881, "step": 6719 }, { "epoch": 0.83, "grad_norm": 1.477028490334779, "learning_rate": 7.058135021301465e-07, "loss": 0.4805, "step": 6720 }, { "epoch": 0.83, "grad_norm": 1.5125368951161189, "learning_rate": 7.04784373589621e-07, "loss": 0.4786, "step": 6721 }, { "epoch": 0.83, "grad_norm": 1.4642545983208444, "learning_rate": 7.037559389790078e-07, "loss": 0.4947, "step": 6722 }, { "epoch": 0.83, "grad_norm": 1.3479241995411038, "learning_rate": 7.027281984644613e-07, "loss": 0.4791, "step": 6723 }, { "epoch": 0.83, "grad_norm": 2.4424745723584182, "learning_rate": 7.017011522120198e-07, "loss": 0.5482, "step": 6724 }, { "epoch": 0.83, "grad_norm": 1.6080376385555766, "learning_rate": 7.00674800387614e-07, "loss": 0.4837, "step": 6725 }, { "epoch": 0.83, "grad_norm": 1.548812317353501, "learning_rate": 6.996491431570601e-07, "loss": 0.478, "step": 6726 }, { "epoch": 0.83, "grad_norm": 2.0592868543349034, "learning_rate": 6.9862418068606e-07, "loss": 0.4935, "step": 6727 }, { "epoch": 0.83, "grad_norm": 1.4010202204985316, "learning_rate": 6.975999131402078e-07, "loss": 0.4488, "step": 6728 }, { "epoch": 0.84, "grad_norm": 1.3449621662643738, "learning_rate": 6.965763406849807e-07, "loss": 0.5375, "step": 6729 }, { "epoch": 0.84, "grad_norm": 1.4492464573692114, "learning_rate": 6.95553463485748e-07, "loss": 0.5089, "step": 6730 }, { "epoch": 0.84, "grad_norm": 2.11499981877359, "learning_rate": 6.945312817077627e-07, "loss": 0.5208, "step": 6731 }, { "epoch": 0.84, "grad_norm": 2.3102363888371937, "learning_rate": 6.935097955161685e-07, "loss": 0.4738, "step": 6732 }, { "epoch": 0.84, "grad_norm": 1.6401365279072655, "learning_rate": 6.924890050759942e-07, "loss": 0.4658, "step": 6733 }, { "epoch": 0.84, "grad_norm": 0.7261704136767818, "learning_rate": 6.914689105521583e-07, "loss": 0.5132, "step": 6734 }, { "epoch": 0.84, "grad_norm": 1.5893186137882933, "learning_rate": 6.904495121094673e-07, "loss": 0.5327, "step": 6735 }, { "epoch": 0.84, "grad_norm": 7.970506769573906, "learning_rate": 6.894308099126118e-07, "loss": 0.5151, "step": 6736 }, { "epoch": 0.84, "grad_norm": 1.5212801434657697, "learning_rate": 6.884128041261739e-07, "loss": 0.4688, "step": 6737 }, { "epoch": 0.84, "grad_norm": 1.6441088709954745, "learning_rate": 6.873954949146194e-07, "loss": 0.4809, "step": 6738 }, { "epoch": 0.84, "grad_norm": 1.5754933441117371, "learning_rate": 6.863788824423056e-07, "loss": 0.5092, "step": 6739 }, { "epoch": 0.84, "grad_norm": 1.5357523625887821, "learning_rate": 6.853629668734735e-07, "loss": 0.4985, "step": 6740 }, { "epoch": 0.84, "grad_norm": 1.7619076256572428, "learning_rate": 6.843477483722544e-07, "loss": 0.4898, "step": 6741 }, { "epoch": 0.84, "grad_norm": 1.5544198655012471, "learning_rate": 6.83333227102666e-07, "loss": 0.5098, "step": 6742 }, { "epoch": 0.84, "grad_norm": 1.5978962889844126, "learning_rate": 6.823194032286117e-07, "loss": 0.5124, "step": 6743 }, { "epoch": 0.84, "grad_norm": 1.71567831075207, "learning_rate": 6.813062769138856e-07, "loss": 0.4856, "step": 6744 }, { "epoch": 0.84, "grad_norm": 1.7832381321843704, "learning_rate": 6.80293848322165e-07, "loss": 0.5148, "step": 6745 }, { "epoch": 0.84, "grad_norm": 1.4160018873091047, "learning_rate": 6.792821176170189e-07, "loss": 0.4683, "step": 6746 }, { "epoch": 0.84, "grad_norm": 0.6406893819086323, "learning_rate": 6.782710849618989e-07, "loss": 0.4772, "step": 6747 }, { "epoch": 0.84, "grad_norm": 1.4935104545738345, "learning_rate": 6.772607505201489e-07, "loss": 0.5658, "step": 6748 }, { "epoch": 0.84, "grad_norm": 3.196383469215414, "learning_rate": 6.762511144549949e-07, "loss": 0.4915, "step": 6749 }, { "epoch": 0.84, "grad_norm": 1.548129351775618, "learning_rate": 6.75242176929553e-07, "loss": 0.5091, "step": 6750 }, { "epoch": 0.84, "grad_norm": 11.626900511568277, "learning_rate": 6.74233938106828e-07, "loss": 0.4861, "step": 6751 }, { "epoch": 0.84, "grad_norm": 2.3609349319146933, "learning_rate": 6.732263981497073e-07, "loss": 0.5313, "step": 6752 }, { "epoch": 0.84, "grad_norm": 1.650097378062769, "learning_rate": 6.722195572209694e-07, "loss": 0.4674, "step": 6753 }, { "epoch": 0.84, "grad_norm": 19.547528360647437, "learning_rate": 6.712134154832773e-07, "loss": 0.4418, "step": 6754 }, { "epoch": 0.84, "grad_norm": 1.4892496792692191, "learning_rate": 6.702079730991829e-07, "loss": 0.5574, "step": 6755 }, { "epoch": 0.84, "grad_norm": 1.3719467735091257, "learning_rate": 6.692032302311229e-07, "loss": 0.5561, "step": 6756 }, { "epoch": 0.84, "grad_norm": 10.027162632150969, "learning_rate": 6.68199187041424e-07, "loss": 0.4945, "step": 6757 }, { "epoch": 0.84, "grad_norm": 2.517854761362138, "learning_rate": 6.671958436922976e-07, "loss": 0.4923, "step": 6758 }, { "epoch": 0.84, "grad_norm": 1.982350398814244, "learning_rate": 6.661932003458421e-07, "loss": 0.4545, "step": 6759 }, { "epoch": 0.84, "grad_norm": 1.7313162513888793, "learning_rate": 6.651912571640451e-07, "loss": 0.5244, "step": 6760 }, { "epoch": 0.84, "grad_norm": 1.9088298322723904, "learning_rate": 6.641900143087765e-07, "loss": 0.4495, "step": 6761 }, { "epoch": 0.84, "grad_norm": 1.2008124010151733, "learning_rate": 6.631894719417987e-07, "loss": 0.4727, "step": 6762 }, { "epoch": 0.84, "grad_norm": 1.4947199189202311, "learning_rate": 6.62189630224756e-07, "loss": 0.484, "step": 6763 }, { "epoch": 0.84, "grad_norm": 1.3880490751486438, "learning_rate": 6.611904893191834e-07, "loss": 0.4469, "step": 6764 }, { "epoch": 0.84, "grad_norm": 1.5934160169118272, "learning_rate": 6.601920493864983e-07, "loss": 0.5014, "step": 6765 }, { "epoch": 0.84, "grad_norm": 1.5999476697876904, "learning_rate": 6.591943105880111e-07, "loss": 0.4419, "step": 6766 }, { "epoch": 0.84, "grad_norm": 4.5642275673683885, "learning_rate": 6.581972730849134e-07, "loss": 0.4993, "step": 6767 }, { "epoch": 0.84, "grad_norm": 2.8449583643704757, "learning_rate": 6.572009370382842e-07, "loss": 0.4553, "step": 6768 }, { "epoch": 0.84, "grad_norm": 1.3906538181873989, "learning_rate": 6.562053026090931e-07, "loss": 0.4385, "step": 6769 }, { "epoch": 0.84, "grad_norm": 1.352069179403521, "learning_rate": 6.552103699581908e-07, "loss": 0.46, "step": 6770 }, { "epoch": 0.84, "grad_norm": 1.3583931022423161, "learning_rate": 6.542161392463197e-07, "loss": 0.5101, "step": 6771 }, { "epoch": 0.84, "grad_norm": 1.297035235675263, "learning_rate": 6.53222610634105e-07, "loss": 0.4222, "step": 6772 }, { "epoch": 0.84, "grad_norm": 1.5077869483962805, "learning_rate": 6.522297842820613e-07, "loss": 0.5018, "step": 6773 }, { "epoch": 0.84, "grad_norm": 0.6769521844531408, "learning_rate": 6.512376603505855e-07, "loss": 0.5211, "step": 6774 }, { "epoch": 0.84, "grad_norm": 0.700422140158777, "learning_rate": 6.502462389999681e-07, "loss": 0.502, "step": 6775 }, { "epoch": 0.84, "grad_norm": 1.69152040435353, "learning_rate": 6.4925552039038e-07, "loss": 0.4762, "step": 6776 }, { "epoch": 0.84, "grad_norm": 1.3741472861278572, "learning_rate": 6.482655046818798e-07, "loss": 0.4722, "step": 6777 }, { "epoch": 0.84, "grad_norm": 1.3143366070549236, "learning_rate": 6.472761920344145e-07, "loss": 0.4673, "step": 6778 }, { "epoch": 0.84, "grad_norm": 1.7907775755887394, "learning_rate": 6.462875826078152e-07, "loss": 0.4486, "step": 6779 }, { "epoch": 0.84, "grad_norm": 2.1548052117194203, "learning_rate": 6.452996765618019e-07, "loss": 0.4873, "step": 6780 }, { "epoch": 0.84, "grad_norm": 1.4313653704255374, "learning_rate": 6.443124740559775e-07, "loss": 0.5491, "step": 6781 }, { "epoch": 0.84, "grad_norm": 1.4906237832229412, "learning_rate": 6.433259752498344e-07, "loss": 0.5144, "step": 6782 }, { "epoch": 0.84, "grad_norm": 2.533754994591377, "learning_rate": 6.423401803027502e-07, "loss": 0.4494, "step": 6783 }, { "epoch": 0.84, "grad_norm": 0.7584702113360715, "learning_rate": 6.413550893739901e-07, "loss": 0.4887, "step": 6784 }, { "epoch": 0.84, "grad_norm": 1.887154015113772, "learning_rate": 6.403707026227013e-07, "loss": 0.4916, "step": 6785 }, { "epoch": 0.84, "grad_norm": 1.3250076607832437, "learning_rate": 6.393870202079228e-07, "loss": 0.5126, "step": 6786 }, { "epoch": 0.84, "grad_norm": 1.7756467996112335, "learning_rate": 6.384040422885762e-07, "loss": 0.5473, "step": 6787 }, { "epoch": 0.84, "grad_norm": 1.3992838988740335, "learning_rate": 6.374217690234685e-07, "loss": 0.487, "step": 6788 }, { "epoch": 0.84, "grad_norm": 1.5766772797693478, "learning_rate": 6.364402005712978e-07, "loss": 0.4455, "step": 6789 }, { "epoch": 0.84, "grad_norm": 1.7894202623754922, "learning_rate": 6.354593370906414e-07, "loss": 0.452, "step": 6790 }, { "epoch": 0.84, "grad_norm": 1.3385213206944417, "learning_rate": 6.344791787399707e-07, "loss": 0.449, "step": 6791 }, { "epoch": 0.84, "grad_norm": 2.5542387817707777, "learning_rate": 6.334997256776354e-07, "loss": 0.5214, "step": 6792 }, { "epoch": 0.84, "grad_norm": 1.3368617749906255, "learning_rate": 6.325209780618769e-07, "loss": 0.4635, "step": 6793 }, { "epoch": 0.84, "grad_norm": 1.5667271151416986, "learning_rate": 6.31542936050819e-07, "loss": 0.4699, "step": 6794 }, { "epoch": 0.84, "grad_norm": 1.590647491310967, "learning_rate": 6.305655998024751e-07, "loss": 0.4396, "step": 6795 }, { "epoch": 0.84, "grad_norm": 1.4654616436837038, "learning_rate": 6.295889694747409e-07, "loss": 0.5049, "step": 6796 }, { "epoch": 0.84, "grad_norm": 1.2355365348432805, "learning_rate": 6.286130452253985e-07, "loss": 0.4376, "step": 6797 }, { "epoch": 0.84, "grad_norm": 2.384137486464471, "learning_rate": 6.276378272121187e-07, "loss": 0.4944, "step": 6798 }, { "epoch": 0.84, "grad_norm": 1.4240883171336052, "learning_rate": 6.266633155924556e-07, "loss": 0.5043, "step": 6799 }, { "epoch": 0.84, "grad_norm": 1.4914504853613988, "learning_rate": 6.256895105238526e-07, "loss": 0.4874, "step": 6800 }, { "epoch": 0.84, "grad_norm": 1.6128088096293538, "learning_rate": 6.247164121636335e-07, "loss": 0.4839, "step": 6801 }, { "epoch": 0.84, "grad_norm": 2.716479416737869, "learning_rate": 6.237440206690126e-07, "loss": 0.4588, "step": 6802 }, { "epoch": 0.84, "grad_norm": 1.5119129085354808, "learning_rate": 6.227723361970872e-07, "loss": 0.5006, "step": 6803 }, { "epoch": 0.84, "grad_norm": 2.338104109990617, "learning_rate": 6.218013589048427e-07, "loss": 0.4623, "step": 6804 }, { "epoch": 0.84, "grad_norm": 2.765034388598217, "learning_rate": 6.208310889491492e-07, "loss": 0.4845, "step": 6805 }, { "epoch": 0.84, "grad_norm": 1.43352133288454, "learning_rate": 6.198615264867586e-07, "loss": 0.5209, "step": 6806 }, { "epoch": 0.84, "grad_norm": 1.9436874397350934, "learning_rate": 6.188926716743176e-07, "loss": 0.5162, "step": 6807 }, { "epoch": 0.84, "grad_norm": 1.781493429515346, "learning_rate": 6.179245246683501e-07, "loss": 0.4996, "step": 6808 }, { "epoch": 0.84, "grad_norm": 1.4249171534997158, "learning_rate": 6.169570856252705e-07, "loss": 0.4656, "step": 6809 }, { "epoch": 0.85, "grad_norm": 0.6613674725633242, "learning_rate": 6.159903547013746e-07, "loss": 0.5147, "step": 6810 }, { "epoch": 0.85, "grad_norm": 2.0691800130247353, "learning_rate": 6.150243320528492e-07, "loss": 0.563, "step": 6811 }, { "epoch": 0.85, "grad_norm": 1.8332421182034946, "learning_rate": 6.140590178357613e-07, "loss": 0.4808, "step": 6812 }, { "epoch": 0.85, "grad_norm": 1.9932584487443314, "learning_rate": 6.130944122060678e-07, "loss": 0.4274, "step": 6813 }, { "epoch": 0.85, "grad_norm": 2.389824362923636, "learning_rate": 6.121305153196089e-07, "loss": 0.4536, "step": 6814 }, { "epoch": 0.85, "grad_norm": 1.6346590063909652, "learning_rate": 6.111673273321078e-07, "loss": 0.4885, "step": 6815 }, { "epoch": 0.85, "grad_norm": 1.9257813859222959, "learning_rate": 6.102048483991802e-07, "loss": 0.4537, "step": 6816 }, { "epoch": 0.85, "grad_norm": 2.2158152973201872, "learning_rate": 6.092430786763204e-07, "loss": 0.4991, "step": 6817 }, { "epoch": 0.85, "grad_norm": 1.7135611233357684, "learning_rate": 6.082820183189125e-07, "loss": 0.495, "step": 6818 }, { "epoch": 0.85, "grad_norm": 1.550687378435678, "learning_rate": 6.073216674822224e-07, "loss": 0.4899, "step": 6819 }, { "epoch": 0.85, "grad_norm": 0.6664152226717598, "learning_rate": 6.063620263214054e-07, "loss": 0.4895, "step": 6820 }, { "epoch": 0.85, "grad_norm": 0.6455128240796204, "learning_rate": 6.054030949914974e-07, "loss": 0.4846, "step": 6821 }, { "epoch": 0.85, "grad_norm": 1.371850460382941, "learning_rate": 6.044448736474246e-07, "loss": 0.4772, "step": 6822 }, { "epoch": 0.85, "grad_norm": 1.4973161988390176, "learning_rate": 6.03487362443994e-07, "loss": 0.5066, "step": 6823 }, { "epoch": 0.85, "grad_norm": 1.3825259689837013, "learning_rate": 6.025305615359012e-07, "loss": 0.4931, "step": 6824 }, { "epoch": 0.85, "grad_norm": 1.5546522556411053, "learning_rate": 6.015744710777272e-07, "loss": 0.4854, "step": 6825 }, { "epoch": 0.85, "grad_norm": 1.3147495445183237, "learning_rate": 6.006190912239335e-07, "loss": 0.502, "step": 6826 }, { "epoch": 0.85, "grad_norm": 1.4508606786369576, "learning_rate": 5.996644221288727e-07, "loss": 0.5091, "step": 6827 }, { "epoch": 0.85, "grad_norm": 1.4484167775380636, "learning_rate": 5.987104639467789e-07, "loss": 0.5087, "step": 6828 }, { "epoch": 0.85, "grad_norm": 1.4993726181242262, "learning_rate": 5.977572168317736e-07, "loss": 0.47, "step": 6829 }, { "epoch": 0.85, "grad_norm": 1.4413689595288772, "learning_rate": 5.968046809378603e-07, "loss": 0.4919, "step": 6830 }, { "epoch": 0.85, "grad_norm": 1.5899499033265325, "learning_rate": 5.958528564189303e-07, "loss": 0.4734, "step": 6831 }, { "epoch": 0.85, "grad_norm": 2.389840082506576, "learning_rate": 5.949017434287613e-07, "loss": 0.5051, "step": 6832 }, { "epoch": 0.85, "grad_norm": 1.3909594420089038, "learning_rate": 5.939513421210108e-07, "loss": 0.4938, "step": 6833 }, { "epoch": 0.85, "grad_norm": 1.5849512907541565, "learning_rate": 5.930016526492272e-07, "loss": 0.5064, "step": 6834 }, { "epoch": 0.85, "grad_norm": 1.492599074910955, "learning_rate": 5.920526751668388e-07, "loss": 0.4408, "step": 6835 }, { "epoch": 0.85, "grad_norm": 1.5042860804074434, "learning_rate": 5.911044098271634e-07, "loss": 0.5245, "step": 6836 }, { "epoch": 0.85, "grad_norm": 1.6754168298874146, "learning_rate": 5.901568567833999e-07, "loss": 0.4731, "step": 6837 }, { "epoch": 0.85, "grad_norm": 1.409445270913483, "learning_rate": 5.892100161886355e-07, "loss": 0.5193, "step": 6838 }, { "epoch": 0.85, "grad_norm": 1.6385356392034915, "learning_rate": 5.882638881958391e-07, "loss": 0.4756, "step": 6839 }, { "epoch": 0.85, "grad_norm": 1.9955726024647689, "learning_rate": 5.873184729578662e-07, "loss": 0.4707, "step": 6840 }, { "epoch": 0.85, "grad_norm": 1.4777711350788643, "learning_rate": 5.863737706274585e-07, "loss": 0.5064, "step": 6841 }, { "epoch": 0.85, "grad_norm": 1.323637595905769, "learning_rate": 5.854297813572391e-07, "loss": 0.463, "step": 6842 }, { "epoch": 0.85, "grad_norm": 2.2142497034160975, "learning_rate": 5.844865052997195e-07, "loss": 0.5102, "step": 6843 }, { "epoch": 0.85, "grad_norm": 0.6423488316310543, "learning_rate": 5.835439426072925e-07, "loss": 0.4905, "step": 6844 }, { "epoch": 0.85, "grad_norm": 2.0563604548077445, "learning_rate": 5.82602093432239e-07, "loss": 0.4863, "step": 6845 }, { "epoch": 0.85, "grad_norm": 2.095485821395087, "learning_rate": 5.816609579267213e-07, "loss": 0.522, "step": 6846 }, { "epoch": 0.85, "grad_norm": 1.4831526662475276, "learning_rate": 5.807205362427903e-07, "loss": 0.4434, "step": 6847 }, { "epoch": 0.85, "grad_norm": 1.3373394692004903, "learning_rate": 5.797808285323769e-07, "loss": 0.4971, "step": 6848 }, { "epoch": 0.85, "grad_norm": 1.644148456235197, "learning_rate": 5.788418349473007e-07, "loss": 0.4669, "step": 6849 }, { "epoch": 0.85, "grad_norm": 1.5073342737537523, "learning_rate": 5.779035556392649e-07, "loss": 0.5087, "step": 6850 }, { "epoch": 0.85, "grad_norm": 1.379628713431779, "learning_rate": 5.769659907598552e-07, "loss": 0.4415, "step": 6851 }, { "epoch": 0.85, "grad_norm": 2.564518382684953, "learning_rate": 5.760291404605456e-07, "loss": 0.5007, "step": 6852 }, { "epoch": 0.85, "grad_norm": 1.4022145113727098, "learning_rate": 5.750930048926894e-07, "loss": 0.4262, "step": 6853 }, { "epoch": 0.85, "grad_norm": 1.304688007201386, "learning_rate": 5.741575842075315e-07, "loss": 0.4666, "step": 6854 }, { "epoch": 0.85, "grad_norm": 1.545692701820653, "learning_rate": 5.732228785561933e-07, "loss": 0.5042, "step": 6855 }, { "epoch": 0.85, "grad_norm": 1.8449247267045616, "learning_rate": 5.722888880896871e-07, "loss": 0.4493, "step": 6856 }, { "epoch": 0.85, "grad_norm": 1.8883732598933922, "learning_rate": 5.713556129589076e-07, "loss": 0.477, "step": 6857 }, { "epoch": 0.85, "grad_norm": 1.386474045155772, "learning_rate": 5.704230533146321e-07, "loss": 0.4596, "step": 6858 }, { "epoch": 0.85, "grad_norm": 1.6987912717293612, "learning_rate": 5.69491209307525e-07, "loss": 0.4987, "step": 6859 }, { "epoch": 0.85, "grad_norm": 2.060497679451871, "learning_rate": 5.685600810881331e-07, "loss": 0.505, "step": 6860 }, { "epoch": 0.85, "grad_norm": 1.3193315097978286, "learning_rate": 5.676296688068894e-07, "loss": 0.5181, "step": 6861 }, { "epoch": 0.85, "grad_norm": 1.485638345755509, "learning_rate": 5.666999726141086e-07, "loss": 0.5405, "step": 6862 }, { "epoch": 0.85, "grad_norm": 2.3331619343085594, "learning_rate": 5.65770992659993e-07, "loss": 0.4544, "step": 6863 }, { "epoch": 0.85, "grad_norm": 0.6910977225802527, "learning_rate": 5.648427290946251e-07, "loss": 0.4943, "step": 6864 }, { "epoch": 0.85, "grad_norm": 1.5988001915255896, "learning_rate": 5.639151820679778e-07, "loss": 0.5161, "step": 6865 }, { "epoch": 0.85, "grad_norm": 0.5984809540074646, "learning_rate": 5.629883517299023e-07, "loss": 0.4703, "step": 6866 }, { "epoch": 0.85, "grad_norm": 2.0140295213578163, "learning_rate": 5.620622382301349e-07, "loss": 0.4887, "step": 6867 }, { "epoch": 0.85, "grad_norm": 1.6634956971310924, "learning_rate": 5.611368417183e-07, "loss": 0.4703, "step": 6868 }, { "epoch": 0.85, "grad_norm": 1.2212580229409449, "learning_rate": 5.602121623439016e-07, "loss": 0.5075, "step": 6869 }, { "epoch": 0.85, "grad_norm": 1.4546267165288047, "learning_rate": 5.592882002563316e-07, "loss": 0.5057, "step": 6870 }, { "epoch": 0.85, "grad_norm": 1.7950585081558756, "learning_rate": 5.583649556048615e-07, "loss": 0.4638, "step": 6871 }, { "epoch": 0.85, "grad_norm": 1.5665975330120265, "learning_rate": 5.574424285386515e-07, "loss": 0.5191, "step": 6872 }, { "epoch": 0.85, "grad_norm": 1.4222467915785417, "learning_rate": 5.56520619206744e-07, "loss": 0.543, "step": 6873 }, { "epoch": 0.85, "grad_norm": 1.5842470350852345, "learning_rate": 5.555995277580661e-07, "loss": 0.4898, "step": 6874 }, { "epoch": 0.85, "grad_norm": 1.5901026069408062, "learning_rate": 5.546791543414271e-07, "loss": 0.496, "step": 6875 }, { "epoch": 0.85, "grad_norm": 1.5356187517929056, "learning_rate": 5.537594991055207e-07, "loss": 0.4643, "step": 6876 }, { "epoch": 0.85, "grad_norm": 1.5491125842300477, "learning_rate": 5.52840562198927e-07, "loss": 0.4627, "step": 6877 }, { "epoch": 0.85, "grad_norm": 1.6161860408966715, "learning_rate": 5.519223437701066e-07, "loss": 0.5372, "step": 6878 }, { "epoch": 0.85, "grad_norm": 2.671789743114391, "learning_rate": 5.510048439674082e-07, "loss": 0.4862, "step": 6879 }, { "epoch": 0.85, "grad_norm": 1.320335868944061, "learning_rate": 5.500880629390581e-07, "loss": 0.4466, "step": 6880 }, { "epoch": 0.85, "grad_norm": 1.9753531052116389, "learning_rate": 5.491720008331747e-07, "loss": 0.479, "step": 6881 }, { "epoch": 0.85, "grad_norm": 1.4582939912346808, "learning_rate": 5.482566577977533e-07, "loss": 0.4891, "step": 6882 }, { "epoch": 0.85, "grad_norm": 1.5970603233940475, "learning_rate": 5.473420339806768e-07, "loss": 0.527, "step": 6883 }, { "epoch": 0.85, "grad_norm": 1.4712123078288173, "learning_rate": 5.464281295297108e-07, "loss": 0.5004, "step": 6884 }, { "epoch": 0.85, "grad_norm": 1.4977850469823213, "learning_rate": 5.455149445925023e-07, "loss": 0.51, "step": 6885 }, { "epoch": 0.85, "grad_norm": 1.5388104560848637, "learning_rate": 5.446024793165877e-07, "loss": 0.5139, "step": 6886 }, { "epoch": 0.85, "grad_norm": 1.7299515295271184, "learning_rate": 5.436907338493813e-07, "loss": 0.5657, "step": 6887 }, { "epoch": 0.85, "grad_norm": 1.5580953548348517, "learning_rate": 5.427797083381853e-07, "loss": 0.4934, "step": 6888 }, { "epoch": 0.85, "grad_norm": 1.3881940205970094, "learning_rate": 5.418694029301808e-07, "loss": 0.4988, "step": 6889 }, { "epoch": 0.86, "grad_norm": 1.5982128731435383, "learning_rate": 5.409598177724401e-07, "loss": 0.505, "step": 6890 }, { "epoch": 0.86, "grad_norm": 1.482857028697596, "learning_rate": 5.400509530119119e-07, "loss": 0.4682, "step": 6891 }, { "epoch": 0.86, "grad_norm": 1.502884208589032, "learning_rate": 5.391428087954326e-07, "loss": 0.4585, "step": 6892 }, { "epoch": 0.86, "grad_norm": 1.390095861718777, "learning_rate": 5.382353852697198e-07, "loss": 0.5184, "step": 6893 }, { "epoch": 0.86, "grad_norm": 1.380414240486957, "learning_rate": 5.373286825813767e-07, "loss": 0.4599, "step": 6894 }, { "epoch": 0.86, "grad_norm": 1.3751438932439002, "learning_rate": 5.364227008768886e-07, "loss": 0.4775, "step": 6895 }, { "epoch": 0.86, "grad_norm": 1.8372806556281678, "learning_rate": 5.355174403026242e-07, "loss": 0.477, "step": 6896 }, { "epoch": 0.86, "grad_norm": 1.6275339354203016, "learning_rate": 5.346129010048368e-07, "loss": 0.4783, "step": 6897 }, { "epoch": 0.86, "grad_norm": 0.6915073517649066, "learning_rate": 5.337090831296626e-07, "loss": 0.4772, "step": 6898 }, { "epoch": 0.86, "grad_norm": 1.722069587274079, "learning_rate": 5.328059868231228e-07, "loss": 0.4988, "step": 6899 }, { "epoch": 0.86, "grad_norm": 1.7647107186011264, "learning_rate": 5.319036122311184e-07, "loss": 0.501, "step": 6900 }, { "epoch": 0.86, "grad_norm": 1.5874068943608002, "learning_rate": 5.310019594994381e-07, "loss": 0.446, "step": 6901 }, { "epoch": 0.86, "grad_norm": 1.7929932108513464, "learning_rate": 5.301010287737496e-07, "loss": 0.5228, "step": 6902 }, { "epoch": 0.86, "grad_norm": 1.5965039152926832, "learning_rate": 5.292008201996079e-07, "loss": 0.5783, "step": 6903 }, { "epoch": 0.86, "grad_norm": 1.427082342840929, "learning_rate": 5.283013339224491e-07, "loss": 0.4827, "step": 6904 }, { "epoch": 0.86, "grad_norm": 1.6876122832076539, "learning_rate": 5.274025700875912e-07, "loss": 0.4663, "step": 6905 }, { "epoch": 0.86, "grad_norm": 1.398426594294747, "learning_rate": 5.265045288402415e-07, "loss": 0.449, "step": 6906 }, { "epoch": 0.86, "grad_norm": 1.3728899110215704, "learning_rate": 5.256072103254828e-07, "loss": 0.4561, "step": 6907 }, { "epoch": 0.86, "grad_norm": 3.2854946046937044, "learning_rate": 5.247106146882874e-07, "loss": 0.478, "step": 6908 }, { "epoch": 0.86, "grad_norm": 1.279134504546732, "learning_rate": 5.238147420735057e-07, "loss": 0.4776, "step": 6909 }, { "epoch": 0.86, "grad_norm": 1.8530388685628911, "learning_rate": 5.229195926258762e-07, "loss": 0.5059, "step": 6910 }, { "epoch": 0.86, "grad_norm": 1.436969838582139, "learning_rate": 5.220251664900161e-07, "loss": 0.4998, "step": 6911 }, { "epoch": 0.86, "grad_norm": 1.5032036608395283, "learning_rate": 5.211314638104303e-07, "loss": 0.5714, "step": 6912 }, { "epoch": 0.86, "grad_norm": 2.0512575600594394, "learning_rate": 5.202384847315017e-07, "loss": 0.4843, "step": 6913 }, { "epoch": 0.86, "grad_norm": 1.4050897740726445, "learning_rate": 5.193462293974999e-07, "loss": 0.4815, "step": 6914 }, { "epoch": 0.86, "grad_norm": 1.2195798154838986, "learning_rate": 5.184546979525779e-07, "loss": 0.4516, "step": 6915 }, { "epoch": 0.86, "grad_norm": 1.4815168906852805, "learning_rate": 5.175638905407682e-07, "loss": 0.4266, "step": 6916 }, { "epoch": 0.86, "grad_norm": 1.602255012128706, "learning_rate": 5.166738073059907e-07, "loss": 0.5068, "step": 6917 }, { "epoch": 0.86, "grad_norm": 2.0514319780060712, "learning_rate": 5.157844483920449e-07, "loss": 0.4871, "step": 6918 }, { "epoch": 0.86, "grad_norm": 1.3562324046688408, "learning_rate": 5.148958139426152e-07, "loss": 0.5221, "step": 6919 }, { "epoch": 0.86, "grad_norm": 1.3925440441575618, "learning_rate": 5.140079041012674e-07, "loss": 0.5112, "step": 6920 }, { "epoch": 0.86, "grad_norm": 1.463072794109092, "learning_rate": 5.131207190114518e-07, "loss": 0.436, "step": 6921 }, { "epoch": 0.86, "grad_norm": 1.7851821317438705, "learning_rate": 5.122342588165013e-07, "loss": 0.4625, "step": 6922 }, { "epoch": 0.86, "grad_norm": 1.4061491256502077, "learning_rate": 5.113485236596305e-07, "loss": 0.4975, "step": 6923 }, { "epoch": 0.86, "grad_norm": 1.7953328392218353, "learning_rate": 5.104635136839392e-07, "loss": 0.5293, "step": 6924 }, { "epoch": 0.86, "grad_norm": 4.883648584810455, "learning_rate": 5.09579229032407e-07, "loss": 0.4316, "step": 6925 }, { "epoch": 0.86, "grad_norm": 1.4734215631821337, "learning_rate": 5.08695669847899e-07, "loss": 0.5, "step": 6926 }, { "epoch": 0.86, "grad_norm": 1.390752070784834, "learning_rate": 5.078128362731605e-07, "loss": 0.4571, "step": 6927 }, { "epoch": 0.86, "grad_norm": 1.3760124274026402, "learning_rate": 5.06930728450823e-07, "loss": 0.4068, "step": 6928 }, { "epoch": 0.86, "grad_norm": 1.4446249003683183, "learning_rate": 5.060493465233962e-07, "loss": 0.5094, "step": 6929 }, { "epoch": 0.86, "grad_norm": 1.636254259184282, "learning_rate": 5.051686906332776e-07, "loss": 0.5108, "step": 6930 }, { "epoch": 0.86, "grad_norm": 1.267435946951365, "learning_rate": 5.042887609227442e-07, "loss": 0.4685, "step": 6931 }, { "epoch": 0.86, "grad_norm": 1.754610151389348, "learning_rate": 5.034095575339553e-07, "loss": 0.5163, "step": 6932 }, { "epoch": 0.86, "grad_norm": 1.463126296444401, "learning_rate": 5.025310806089562e-07, "loss": 0.4913, "step": 6933 }, { "epoch": 0.86, "grad_norm": 2.013428114686188, "learning_rate": 5.016533302896698e-07, "loss": 0.5233, "step": 6934 }, { "epoch": 0.86, "grad_norm": 1.3545099717129814, "learning_rate": 5.007763067179066e-07, "loss": 0.4909, "step": 6935 }, { "epoch": 0.86, "grad_norm": 0.733885022215113, "learning_rate": 4.999000100353562e-07, "loss": 0.5019, "step": 6936 }, { "epoch": 0.86, "grad_norm": 1.8120183817577111, "learning_rate": 4.990244403835936e-07, "loss": 0.4918, "step": 6937 }, { "epoch": 0.86, "grad_norm": 1.6096501007633839, "learning_rate": 4.981495979040723e-07, "loss": 0.4859, "step": 6938 }, { "epoch": 0.86, "grad_norm": 1.3699844039744509, "learning_rate": 4.972754827381327e-07, "loss": 0.4719, "step": 6939 }, { "epoch": 0.86, "grad_norm": 2.6133878006430424, "learning_rate": 4.964020950269955e-07, "loss": 0.4776, "step": 6940 }, { "epoch": 0.86, "grad_norm": 1.5580353495575774, "learning_rate": 4.955294349117634e-07, "loss": 0.4682, "step": 6941 }, { "epoch": 0.86, "grad_norm": 1.9898604663077117, "learning_rate": 4.946575025334244e-07, "loss": 0.4954, "step": 6942 }, { "epoch": 0.86, "grad_norm": 1.6918769235942603, "learning_rate": 4.937862980328439e-07, "loss": 0.4625, "step": 6943 }, { "epoch": 0.86, "grad_norm": 1.5010276257602275, "learning_rate": 4.929158215507751e-07, "loss": 0.4708, "step": 6944 }, { "epoch": 0.86, "grad_norm": 1.4238385712397128, "learning_rate": 4.920460732278493e-07, "loss": 0.4965, "step": 6945 }, { "epoch": 0.86, "grad_norm": 1.8676252330232028, "learning_rate": 4.911770532045828e-07, "loss": 0.425, "step": 6946 }, { "epoch": 0.86, "grad_norm": 1.8991603311100667, "learning_rate": 4.903087616213748e-07, "loss": 0.4908, "step": 6947 }, { "epoch": 0.86, "grad_norm": 1.8943841158665158, "learning_rate": 4.894411986185027e-07, "loss": 0.5346, "step": 6948 }, { "epoch": 0.86, "grad_norm": 1.7537682284072407, "learning_rate": 4.885743643361318e-07, "loss": 0.4971, "step": 6949 }, { "epoch": 0.86, "grad_norm": 1.5885510591798264, "learning_rate": 4.877082589143045e-07, "loss": 0.4881, "step": 6950 }, { "epoch": 0.86, "grad_norm": 1.647460623372295, "learning_rate": 4.868428824929494e-07, "loss": 0.5323, "step": 6951 }, { "epoch": 0.86, "grad_norm": 1.3393433538659767, "learning_rate": 4.859782352118742e-07, "loss": 0.4605, "step": 6952 }, { "epoch": 0.86, "grad_norm": 1.583819537463703, "learning_rate": 4.851143172107719e-07, "loss": 0.5271, "step": 6953 }, { "epoch": 0.86, "grad_norm": 1.594390898281684, "learning_rate": 4.84251128629214e-07, "loss": 0.4707, "step": 6954 }, { "epoch": 0.86, "grad_norm": 1.8417235223966422, "learning_rate": 4.833886696066581e-07, "loss": 0.4842, "step": 6955 }, { "epoch": 0.86, "grad_norm": 1.5608893660067085, "learning_rate": 4.825269402824417e-07, "loss": 0.5323, "step": 6956 }, { "epoch": 0.86, "grad_norm": 1.6182598019281986, "learning_rate": 4.816659407957835e-07, "loss": 0.4844, "step": 6957 }, { "epoch": 0.86, "grad_norm": 1.7888155570652113, "learning_rate": 4.808056712857878e-07, "loss": 0.4965, "step": 6958 }, { "epoch": 0.86, "grad_norm": 1.470841132601057, "learning_rate": 4.799461318914366e-07, "loss": 0.4736, "step": 6959 }, { "epoch": 0.86, "grad_norm": 1.5958096796885273, "learning_rate": 4.790873227515974e-07, "loss": 0.4258, "step": 6960 }, { "epoch": 0.86, "grad_norm": 1.4668180431159055, "learning_rate": 4.782292440050173e-07, "loss": 0.4788, "step": 6961 }, { "epoch": 0.86, "grad_norm": 1.5940426778726535, "learning_rate": 4.773718957903267e-07, "loss": 0.484, "step": 6962 }, { "epoch": 0.86, "grad_norm": 2.2904653920667863, "learning_rate": 4.7651527824603804e-07, "loss": 0.4734, "step": 6963 }, { "epoch": 0.86, "grad_norm": 1.552833442468006, "learning_rate": 4.7565939151054675e-07, "loss": 0.5049, "step": 6964 }, { "epoch": 0.86, "grad_norm": 1.7028529981961882, "learning_rate": 4.748042357221278e-07, "loss": 0.482, "step": 6965 }, { "epoch": 0.86, "grad_norm": 4.118028428478353, "learning_rate": 4.739498110189372e-07, "loss": 0.5504, "step": 6966 }, { "epoch": 0.86, "grad_norm": 1.5657740026232285, "learning_rate": 4.7309611753901806e-07, "loss": 0.4814, "step": 6967 }, { "epoch": 0.86, "grad_norm": 1.3267340843161755, "learning_rate": 4.7224315542028945e-07, "loss": 0.4931, "step": 6968 }, { "epoch": 0.86, "grad_norm": 1.3367205730794152, "learning_rate": 4.7139092480055657e-07, "loss": 0.4812, "step": 6969 }, { "epoch": 0.86, "grad_norm": 1.4363411485262998, "learning_rate": 4.705394258175039e-07, "loss": 0.5002, "step": 6970 }, { "epoch": 0.87, "grad_norm": 5.3872480517546, "learning_rate": 4.696886586086985e-07, "loss": 0.5091, "step": 6971 }, { "epoch": 0.87, "grad_norm": 1.2398113701487647, "learning_rate": 4.688386233115899e-07, "loss": 0.476, "step": 6972 }, { "epoch": 0.87, "grad_norm": 1.4385066929159718, "learning_rate": 4.679893200635094e-07, "loss": 0.4617, "step": 6973 }, { "epoch": 0.87, "grad_norm": 1.5019290438655046, "learning_rate": 4.671407490016688e-07, "loss": 0.4902, "step": 6974 }, { "epoch": 0.87, "grad_norm": 1.4173222510567882, "learning_rate": 4.662929102631614e-07, "loss": 0.4585, "step": 6975 }, { "epoch": 0.87, "grad_norm": 5.913621336040645, "learning_rate": 4.654458039849641e-07, "loss": 0.4994, "step": 6976 }, { "epoch": 0.87, "grad_norm": 1.5367385729264298, "learning_rate": 4.6459943030393307e-07, "loss": 0.5567, "step": 6977 }, { "epoch": 0.87, "grad_norm": 2.1268769033496473, "learning_rate": 4.637537893568095e-07, "loss": 0.4387, "step": 6978 }, { "epoch": 0.87, "grad_norm": 1.5296982464759212, "learning_rate": 4.629088812802113e-07, "loss": 0.4426, "step": 6979 }, { "epoch": 0.87, "grad_norm": 2.507568436514036, "learning_rate": 4.620647062106437e-07, "loss": 0.4643, "step": 6980 }, { "epoch": 0.87, "grad_norm": 1.5170270026171093, "learning_rate": 4.6122126428448875e-07, "loss": 0.4917, "step": 6981 }, { "epoch": 0.87, "grad_norm": 1.3505376511882239, "learning_rate": 4.603785556380136e-07, "loss": 0.4725, "step": 6982 }, { "epoch": 0.87, "grad_norm": 1.6328794550198478, "learning_rate": 4.595365804073637e-07, "loss": 0.5333, "step": 6983 }, { "epoch": 0.87, "grad_norm": 1.3863656552023294, "learning_rate": 4.5869533872856753e-07, "loss": 0.4819, "step": 6984 }, { "epoch": 0.87, "grad_norm": 1.4091310542928852, "learning_rate": 4.578548307375369e-07, "loss": 0.4823, "step": 6985 }, { "epoch": 0.87, "grad_norm": 1.3905944890206892, "learning_rate": 4.570150565700604e-07, "loss": 0.4707, "step": 6986 }, { "epoch": 0.87, "grad_norm": 1.3268258977962248, "learning_rate": 4.561760163618129e-07, "loss": 0.4803, "step": 6987 }, { "epoch": 0.87, "grad_norm": 0.6313590022212238, "learning_rate": 4.553377102483486e-07, "loss": 0.482, "step": 6988 }, { "epoch": 0.87, "grad_norm": 1.3878481573742947, "learning_rate": 4.545001383651032e-07, "loss": 0.4823, "step": 6989 }, { "epoch": 0.87, "grad_norm": 1.5216919725001328, "learning_rate": 4.536633008473934e-07, "loss": 0.486, "step": 6990 }, { "epoch": 0.87, "grad_norm": 1.6149779706162697, "learning_rate": 4.5282719783041883e-07, "loss": 0.518, "step": 6991 }, { "epoch": 0.87, "grad_norm": 1.3652599863085746, "learning_rate": 4.519918294492581e-07, "loss": 0.4898, "step": 6992 }, { "epoch": 0.87, "grad_norm": 1.4485925323053548, "learning_rate": 4.5115719583887164e-07, "loss": 0.4877, "step": 6993 }, { "epoch": 0.87, "grad_norm": 1.5871411096296153, "learning_rate": 4.503232971341037e-07, "loss": 0.4946, "step": 6994 }, { "epoch": 0.87, "grad_norm": 2.1794323859003204, "learning_rate": 4.494901334696755e-07, "loss": 0.4527, "step": 6995 }, { "epoch": 0.87, "grad_norm": 0.6742379324367876, "learning_rate": 4.4865770498019545e-07, "loss": 0.4836, "step": 6996 }, { "epoch": 0.87, "grad_norm": 1.4569596184809448, "learning_rate": 4.478260118001465e-07, "loss": 0.4605, "step": 6997 }, { "epoch": 0.87, "grad_norm": 2.4222433310926212, "learning_rate": 4.4699505406389844e-07, "loss": 0.5305, "step": 6998 }, { "epoch": 0.87, "grad_norm": 1.9163666236933992, "learning_rate": 4.4616483190569773e-07, "loss": 0.5253, "step": 6999 }, { "epoch": 0.87, "grad_norm": 1.4860749254374586, "learning_rate": 4.4533534545967593e-07, "loss": 0.4816, "step": 7000 }, { "epoch": 0.87, "grad_norm": 2.977852488541996, "learning_rate": 4.445065948598426e-07, "loss": 0.5104, "step": 7001 }, { "epoch": 0.87, "grad_norm": 1.4291979735578966, "learning_rate": 4.436785802400906e-07, "loss": 0.4516, "step": 7002 }, { "epoch": 0.87, "grad_norm": 1.6554197655711902, "learning_rate": 4.428513017341923e-07, "loss": 0.496, "step": 7003 }, { "epoch": 0.87, "grad_norm": 1.4514424167573279, "learning_rate": 4.4202475947579984e-07, "loss": 0.495, "step": 7004 }, { "epoch": 0.87, "grad_norm": 1.3707185593181825, "learning_rate": 4.411989535984529e-07, "loss": 0.4912, "step": 7005 }, { "epoch": 0.87, "grad_norm": 1.5072176077192196, "learning_rate": 4.403738842355643e-07, "loss": 0.4895, "step": 7006 }, { "epoch": 0.87, "grad_norm": 1.4085864240181591, "learning_rate": 4.3954955152043346e-07, "loss": 0.4813, "step": 7007 }, { "epoch": 0.87, "grad_norm": 1.8764392388366993, "learning_rate": 4.387259555862361e-07, "loss": 0.5289, "step": 7008 }, { "epoch": 0.87, "grad_norm": 1.3581652441850534, "learning_rate": 4.3790309656603356e-07, "loss": 0.4587, "step": 7009 }, { "epoch": 0.87, "grad_norm": 1.314257733383638, "learning_rate": 4.3708097459276454e-07, "loss": 0.4924, "step": 7010 }, { "epoch": 0.87, "grad_norm": 1.4795930320292494, "learning_rate": 4.3625958979925167e-07, "loss": 0.5266, "step": 7011 }, { "epoch": 0.87, "grad_norm": 1.6737719701590443, "learning_rate": 4.354389423181948e-07, "loss": 0.5088, "step": 7012 }, { "epoch": 0.87, "grad_norm": 2.0460952816878635, "learning_rate": 4.3461903228217806e-07, "loss": 0.4388, "step": 7013 }, { "epoch": 0.87, "grad_norm": 1.4735752787958016, "learning_rate": 4.3379985982366537e-07, "loss": 0.4596, "step": 7014 }, { "epoch": 0.87, "grad_norm": 1.811600709993341, "learning_rate": 4.329814250750003e-07, "loss": 0.4417, "step": 7015 }, { "epoch": 0.87, "grad_norm": 1.59423739643667, "learning_rate": 4.321637281684099e-07, "loss": 0.5272, "step": 7016 }, { "epoch": 0.87, "grad_norm": 1.5071981707243922, "learning_rate": 4.313467692359974e-07, "loss": 0.5227, "step": 7017 }, { "epoch": 0.87, "grad_norm": 1.3654563630416694, "learning_rate": 4.3053054840975325e-07, "loss": 0.4817, "step": 7018 }, { "epoch": 0.87, "grad_norm": 2.694570195541426, "learning_rate": 4.2971506582154155e-07, "loss": 0.4642, "step": 7019 }, { "epoch": 0.87, "grad_norm": 1.3821178205066278, "learning_rate": 4.289003216031129e-07, "loss": 0.5009, "step": 7020 }, { "epoch": 0.87, "grad_norm": 0.6802113063898786, "learning_rate": 4.2808631588609704e-07, "loss": 0.4776, "step": 7021 }, { "epoch": 0.87, "grad_norm": 1.5209079592364636, "learning_rate": 4.272730488020016e-07, "loss": 0.4953, "step": 7022 }, { "epoch": 0.87, "grad_norm": 1.4538938283349732, "learning_rate": 4.2646052048221866e-07, "loss": 0.5018, "step": 7023 }, { "epoch": 0.87, "grad_norm": 1.4972582845594824, "learning_rate": 4.2564873105801816e-07, "loss": 0.5248, "step": 7024 }, { "epoch": 0.87, "grad_norm": 1.6705282520765294, "learning_rate": 4.2483768066055365e-07, "loss": 0.5378, "step": 7025 }, { "epoch": 0.87, "grad_norm": 1.5286983823682547, "learning_rate": 4.240273694208552e-07, "loss": 0.476, "step": 7026 }, { "epoch": 0.87, "grad_norm": 1.5329349887772938, "learning_rate": 4.2321779746983706e-07, "loss": 0.5087, "step": 7027 }, { "epoch": 0.87, "grad_norm": 2.142670079847223, "learning_rate": 4.224089649382923e-07, "loss": 0.5523, "step": 7028 }, { "epoch": 0.87, "grad_norm": 4.099696836371022, "learning_rate": 4.216008719568948e-07, "loss": 0.4539, "step": 7029 }, { "epoch": 0.87, "grad_norm": 1.3233495269215116, "learning_rate": 4.2079351865620013e-07, "loss": 0.4979, "step": 7030 }, { "epoch": 0.87, "grad_norm": 1.369596831259944, "learning_rate": 4.1998690516664165e-07, "loss": 0.4557, "step": 7031 }, { "epoch": 0.87, "grad_norm": 1.6306367826899542, "learning_rate": 4.191810316185374e-07, "loss": 0.5057, "step": 7032 }, { "epoch": 0.87, "grad_norm": 1.498312849387136, "learning_rate": 4.183758981420799e-07, "loss": 0.5031, "step": 7033 }, { "epoch": 0.87, "grad_norm": 2.390904829889811, "learning_rate": 4.17571504867349e-07, "loss": 0.5169, "step": 7034 }, { "epoch": 0.87, "grad_norm": 1.6042617395300853, "learning_rate": 4.167678519242985e-07, "loss": 0.4576, "step": 7035 }, { "epoch": 0.87, "grad_norm": 5.941251307886222, "learning_rate": 4.159649394427673e-07, "loss": 0.5456, "step": 7036 }, { "epoch": 0.87, "grad_norm": 1.5270596546982536, "learning_rate": 4.151627675524733e-07, "loss": 0.4076, "step": 7037 }, { "epoch": 0.87, "grad_norm": 1.4227414585009857, "learning_rate": 4.1436133638301346e-07, "loss": 0.4517, "step": 7038 }, { "epoch": 0.87, "grad_norm": 0.6963363973200976, "learning_rate": 4.1356064606386694e-07, "loss": 0.5013, "step": 7039 }, { "epoch": 0.87, "grad_norm": 1.3894778049158558, "learning_rate": 4.1276069672439135e-07, "loss": 0.4925, "step": 7040 }, { "epoch": 0.87, "grad_norm": 1.6235082248499837, "learning_rate": 4.119614884938261e-07, "loss": 0.4537, "step": 7041 }, { "epoch": 0.87, "grad_norm": 1.6274598684100545, "learning_rate": 4.111630215012902e-07, "loss": 0.4917, "step": 7042 }, { "epoch": 0.87, "grad_norm": 1.6183380914641294, "learning_rate": 4.103652958757831e-07, "loss": 0.505, "step": 7043 }, { "epoch": 0.87, "grad_norm": 1.5196493987413147, "learning_rate": 4.0956831174618403e-07, "loss": 0.4673, "step": 7044 }, { "epoch": 0.87, "grad_norm": 1.383596502398442, "learning_rate": 4.0877206924125337e-07, "loss": 0.4677, "step": 7045 }, { "epoch": 0.87, "grad_norm": 1.8790719779796432, "learning_rate": 4.0797656848963086e-07, "loss": 0.5108, "step": 7046 }, { "epoch": 0.87, "grad_norm": 1.4487511887778446, "learning_rate": 4.0718180961983655e-07, "loss": 0.472, "step": 7047 }, { "epoch": 0.87, "grad_norm": 1.6373521545515435, "learning_rate": 4.0638779276027106e-07, "loss": 0.5152, "step": 7048 }, { "epoch": 0.87, "grad_norm": 0.6613826241580701, "learning_rate": 4.05594518039214e-07, "loss": 0.4638, "step": 7049 }, { "epoch": 0.87, "grad_norm": 2.698074527625873, "learning_rate": 4.048019855848273e-07, "loss": 0.4555, "step": 7050 }, { "epoch": 0.88, "grad_norm": 0.6727169009641928, "learning_rate": 4.040101955251502e-07, "loss": 0.4903, "step": 7051 }, { "epoch": 0.88, "grad_norm": 1.5725392983596518, "learning_rate": 4.0321914798810423e-07, "loss": 0.4994, "step": 7052 }, { "epoch": 0.88, "grad_norm": 1.4342289383946316, "learning_rate": 4.0242884310148945e-07, "loss": 0.4319, "step": 7053 }, { "epoch": 0.88, "grad_norm": 1.5702375613899564, "learning_rate": 4.0163928099298644e-07, "loss": 0.4696, "step": 7054 }, { "epoch": 0.88, "grad_norm": 1.3036488520632732, "learning_rate": 4.008504617901571e-07, "loss": 0.4305, "step": 7055 }, { "epoch": 0.88, "grad_norm": 0.6565508220144567, "learning_rate": 4.0006238562044054e-07, "loss": 0.476, "step": 7056 }, { "epoch": 0.88, "grad_norm": 1.8813873177790947, "learning_rate": 3.992750526111594e-07, "loss": 0.5009, "step": 7057 }, { "epoch": 0.88, "grad_norm": 1.313124075124981, "learning_rate": 3.984884628895119e-07, "loss": 0.4919, "step": 7058 }, { "epoch": 0.88, "grad_norm": 2.3541457299851034, "learning_rate": 3.9770261658258093e-07, "loss": 0.5033, "step": 7059 }, { "epoch": 0.88, "grad_norm": 1.4366559915321013, "learning_rate": 3.9691751381732423e-07, "loss": 0.4939, "step": 7060 }, { "epoch": 0.88, "grad_norm": 1.5378230470666459, "learning_rate": 3.961331547205838e-07, "loss": 0.4741, "step": 7061 }, { "epoch": 0.88, "grad_norm": 2.6909535505851894, "learning_rate": 3.953495394190804e-07, "loss": 0.5063, "step": 7062 }, { "epoch": 0.88, "grad_norm": 1.3864880882834227, "learning_rate": 3.9456666803941223e-07, "loss": 0.4779, "step": 7063 }, { "epoch": 0.88, "grad_norm": 1.451029212441224, "learning_rate": 3.937845407080604e-07, "loss": 0.4789, "step": 7064 }, { "epoch": 0.88, "grad_norm": 1.5687085727703403, "learning_rate": 3.930031575513837e-07, "loss": 0.5399, "step": 7065 }, { "epoch": 0.88, "grad_norm": 1.2924570237505366, "learning_rate": 3.9222251869562187e-07, "loss": 0.5184, "step": 7066 }, { "epoch": 0.88, "grad_norm": 1.500961660790129, "learning_rate": 3.9144262426689336e-07, "loss": 0.4618, "step": 7067 }, { "epoch": 0.88, "grad_norm": 1.7030292271341143, "learning_rate": 3.90663474391198e-07, "loss": 0.5014, "step": 7068 }, { "epoch": 0.88, "grad_norm": 1.2696125199636845, "learning_rate": 3.898850691944123e-07, "loss": 0.4243, "step": 7069 }, { "epoch": 0.88, "grad_norm": 1.5740029906704345, "learning_rate": 3.891074088022978e-07, "loss": 0.5329, "step": 7070 }, { "epoch": 0.88, "grad_norm": 1.6799717738407427, "learning_rate": 3.8833049334048964e-07, "loss": 0.4688, "step": 7071 }, { "epoch": 0.88, "grad_norm": 2.0888448681941014, "learning_rate": 3.875543229345069e-07, "loss": 0.473, "step": 7072 }, { "epoch": 0.88, "grad_norm": 2.220257275918847, "learning_rate": 3.8677889770974584e-07, "loss": 0.4829, "step": 7073 }, { "epoch": 0.88, "grad_norm": 1.9022525076310015, "learning_rate": 3.8600421779148303e-07, "loss": 0.5442, "step": 7074 }, { "epoch": 0.88, "grad_norm": 1.4515938616789295, "learning_rate": 3.85230283304876e-07, "loss": 0.5364, "step": 7075 }, { "epoch": 0.88, "grad_norm": 1.5089744744651061, "learning_rate": 3.844570943749593e-07, "loss": 0.4802, "step": 7076 }, { "epoch": 0.88, "grad_norm": 1.5771912659647935, "learning_rate": 3.83684651126649e-07, "loss": 0.506, "step": 7077 }, { "epoch": 0.88, "grad_norm": 1.5971997865078935, "learning_rate": 3.829129536847398e-07, "loss": 0.5213, "step": 7078 }, { "epoch": 0.88, "grad_norm": 1.8669592100775223, "learning_rate": 3.8214200217390806e-07, "loss": 0.5021, "step": 7079 }, { "epoch": 0.88, "grad_norm": 1.520345410466755, "learning_rate": 3.8137179671870527e-07, "loss": 0.5044, "step": 7080 }, { "epoch": 0.88, "grad_norm": 1.3843402750451081, "learning_rate": 3.8060233744356634e-07, "loss": 0.484, "step": 7081 }, { "epoch": 0.88, "grad_norm": 1.3083414725989773, "learning_rate": 3.798336244728046e-07, "loss": 0.4468, "step": 7082 }, { "epoch": 0.88, "grad_norm": 1.2366682560363815, "learning_rate": 3.790656579306101e-07, "loss": 0.512, "step": 7083 }, { "epoch": 0.88, "grad_norm": 2.3774229109773617, "learning_rate": 3.7829843794105703e-07, "loss": 0.4919, "step": 7084 }, { "epoch": 0.88, "grad_norm": 2.2780907545876765, "learning_rate": 3.7753196462809504e-07, "loss": 0.4593, "step": 7085 }, { "epoch": 0.88, "grad_norm": 0.6647475571312081, "learning_rate": 3.767662381155551e-07, "loss": 0.4928, "step": 7086 }, { "epoch": 0.88, "grad_norm": 1.4554371973142786, "learning_rate": 3.760012585271472e-07, "loss": 0.5152, "step": 7087 }, { "epoch": 0.88, "grad_norm": 1.2374825063762878, "learning_rate": 3.7523702598646185e-07, "loss": 0.4613, "step": 7088 }, { "epoch": 0.88, "grad_norm": 1.246405451093617, "learning_rate": 3.7447354061696474e-07, "loss": 0.4897, "step": 7089 }, { "epoch": 0.88, "grad_norm": 1.497491611425484, "learning_rate": 3.737108025420061e-07, "loss": 0.5095, "step": 7090 }, { "epoch": 0.88, "grad_norm": 2.448953188662499, "learning_rate": 3.7294881188481235e-07, "loss": 0.5111, "step": 7091 }, { "epoch": 0.88, "grad_norm": 1.3265262982691868, "learning_rate": 3.721875687684884e-07, "loss": 0.4747, "step": 7092 }, { "epoch": 0.88, "grad_norm": 1.4006392357014956, "learning_rate": 3.714270733160219e-07, "loss": 0.4648, "step": 7093 }, { "epoch": 0.88, "grad_norm": 1.4159650763549114, "learning_rate": 3.706673256502746e-07, "loss": 0.5373, "step": 7094 }, { "epoch": 0.88, "grad_norm": 1.71334454708964, "learning_rate": 3.6990832589399396e-07, "loss": 0.4858, "step": 7095 }, { "epoch": 0.88, "grad_norm": 1.4613986540202215, "learning_rate": 3.6915007416980067e-07, "loss": 0.4787, "step": 7096 }, { "epoch": 0.88, "grad_norm": 1.439666219997754, "learning_rate": 3.6839257060019895e-07, "loss": 0.5414, "step": 7097 }, { "epoch": 0.88, "grad_norm": 1.7148078587442241, "learning_rate": 3.6763581530756766e-07, "loss": 0.4941, "step": 7098 }, { "epoch": 0.88, "grad_norm": 1.7614900950085237, "learning_rate": 3.6687980841417005e-07, "loss": 0.5396, "step": 7099 }, { "epoch": 0.88, "grad_norm": 1.4900408278461268, "learning_rate": 3.661245500421429e-07, "loss": 0.5185, "step": 7100 }, { "epoch": 0.88, "grad_norm": 3.410188054615284, "learning_rate": 3.653700403135074e-07, "loss": 0.478, "step": 7101 }, { "epoch": 0.88, "grad_norm": 1.3467691276511204, "learning_rate": 3.6461627935015897e-07, "loss": 0.4917, "step": 7102 }, { "epoch": 0.88, "grad_norm": 1.476580030239518, "learning_rate": 3.638632672738757e-07, "loss": 0.459, "step": 7103 }, { "epoch": 0.88, "grad_norm": 1.6438546283641577, "learning_rate": 3.6311100420631405e-07, "loss": 0.4631, "step": 7104 }, { "epoch": 0.88, "grad_norm": 1.9040075114903101, "learning_rate": 3.623594902690064e-07, "loss": 0.4624, "step": 7105 }, { "epoch": 0.88, "grad_norm": 2.77764161446545, "learning_rate": 3.616087255833689e-07, "loss": 0.4732, "step": 7106 }, { "epoch": 0.88, "grad_norm": 1.5382156221576366, "learning_rate": 3.608587102706923e-07, "loss": 0.4758, "step": 7107 }, { "epoch": 0.88, "grad_norm": 1.880523836838473, "learning_rate": 3.601094444521497e-07, "loss": 0.5175, "step": 7108 }, { "epoch": 0.88, "grad_norm": 1.319183728535753, "learning_rate": 3.5936092824878976e-07, "loss": 0.4914, "step": 7109 }, { "epoch": 0.88, "grad_norm": 1.4221183759049574, "learning_rate": 3.586131617815436e-07, "loss": 0.527, "step": 7110 }, { "epoch": 0.88, "grad_norm": 1.7200757512072127, "learning_rate": 3.57866145171219e-07, "loss": 0.4504, "step": 7111 }, { "epoch": 0.88, "grad_norm": 1.5298123462961473, "learning_rate": 3.571198785385027e-07, "loss": 0.4757, "step": 7112 }, { "epoch": 0.88, "grad_norm": 1.4777977381633165, "learning_rate": 3.5637436200396115e-07, "loss": 0.5434, "step": 7113 }, { "epoch": 0.88, "grad_norm": 1.6301079688826285, "learning_rate": 3.5562959568803843e-07, "loss": 0.4849, "step": 7114 }, { "epoch": 0.88, "grad_norm": 1.3511275125402105, "learning_rate": 3.54885579711059e-07, "loss": 0.5205, "step": 7115 }, { "epoch": 0.88, "grad_norm": 1.4338245476889193, "learning_rate": 3.541423141932238e-07, "loss": 0.4767, "step": 7116 }, { "epoch": 0.88, "grad_norm": 1.5251989790197307, "learning_rate": 3.5339979925461566e-07, "loss": 0.4685, "step": 7117 }, { "epoch": 0.88, "grad_norm": 1.368497415552152, "learning_rate": 3.526580350151931e-07, "loss": 0.5087, "step": 7118 }, { "epoch": 0.88, "grad_norm": 1.4030800124288458, "learning_rate": 3.519170215947948e-07, "loss": 0.5299, "step": 7119 }, { "epoch": 0.88, "grad_norm": 1.8438357371279437, "learning_rate": 3.511767591131393e-07, "loss": 0.5338, "step": 7120 }, { "epoch": 0.88, "grad_norm": 2.089390945220485, "learning_rate": 3.504372476898205e-07, "loss": 0.5054, "step": 7121 }, { "epoch": 0.88, "grad_norm": 1.6357190621813416, "learning_rate": 3.4969848744431487e-07, "loss": 0.5339, "step": 7122 }, { "epoch": 0.88, "grad_norm": 1.5665757084370637, "learning_rate": 3.489604784959744e-07, "loss": 0.4957, "step": 7123 }, { "epoch": 0.88, "grad_norm": 1.4908392013334355, "learning_rate": 3.482232209640318e-07, "loss": 0.469, "step": 7124 }, { "epoch": 0.88, "grad_norm": 1.5897939270204158, "learning_rate": 3.474867149675959e-07, "loss": 0.5276, "step": 7125 }, { "epoch": 0.88, "grad_norm": 3.0001121398745645, "learning_rate": 3.467509606256581e-07, "loss": 0.519, "step": 7126 }, { "epoch": 0.88, "grad_norm": 1.7415972576660335, "learning_rate": 3.4601595805708375e-07, "loss": 0.4467, "step": 7127 }, { "epoch": 0.88, "grad_norm": 1.807146073626419, "learning_rate": 3.4528170738062027e-07, "loss": 0.5191, "step": 7128 }, { "epoch": 0.88, "grad_norm": 0.6450389504343121, "learning_rate": 3.445482087148927e-07, "loss": 0.4805, "step": 7129 }, { "epoch": 0.88, "grad_norm": 1.4979170319680737, "learning_rate": 3.438154621784029e-07, "loss": 0.4916, "step": 7130 }, { "epoch": 0.88, "grad_norm": 1.6597403892980278, "learning_rate": 3.4308346788953394e-07, "loss": 0.4517, "step": 7131 }, { "epoch": 0.89, "grad_norm": 1.4360278235152508, "learning_rate": 3.423522259665446e-07, "loss": 0.4837, "step": 7132 }, { "epoch": 0.89, "grad_norm": 2.78841914191125, "learning_rate": 3.4162173652757427e-07, "loss": 0.5079, "step": 7133 }, { "epoch": 0.89, "grad_norm": 1.7577408413013773, "learning_rate": 3.4089199969063957e-07, "loss": 0.5455, "step": 7134 }, { "epoch": 0.89, "grad_norm": 1.8621486529156506, "learning_rate": 3.4016301557363573e-07, "loss": 0.5004, "step": 7135 }, { "epoch": 0.89, "grad_norm": 3.3443240630108315, "learning_rate": 3.3943478429433797e-07, "loss": 0.4592, "step": 7136 }, { "epoch": 0.89, "grad_norm": 1.5636160522143439, "learning_rate": 3.38707305970396e-07, "loss": 0.4867, "step": 7137 }, { "epoch": 0.89, "grad_norm": 1.4797636371493548, "learning_rate": 3.3798058071934315e-07, "loss": 0.4948, "step": 7138 }, { "epoch": 0.89, "grad_norm": 1.7344713320689402, "learning_rate": 3.372546086585859e-07, "loss": 0.4734, "step": 7139 }, { "epoch": 0.89, "grad_norm": 1.8399894492094186, "learning_rate": 3.365293899054134e-07, "loss": 0.487, "step": 7140 }, { "epoch": 0.89, "grad_norm": 1.6768159138284706, "learning_rate": 3.35804924576989e-07, "loss": 0.5342, "step": 7141 }, { "epoch": 0.89, "grad_norm": 1.492573985054435, "learning_rate": 3.3508121279035856e-07, "loss": 0.4673, "step": 7142 }, { "epoch": 0.89, "grad_norm": 1.8692692734840493, "learning_rate": 3.3435825466244245e-07, "loss": 0.5184, "step": 7143 }, { "epoch": 0.89, "grad_norm": 10.846394080663176, "learning_rate": 3.3363605031004167e-07, "loss": 0.5093, "step": 7144 }, { "epoch": 0.89, "grad_norm": 2.8655230755218826, "learning_rate": 3.329145998498351e-07, "loss": 0.529, "step": 7145 }, { "epoch": 0.89, "grad_norm": 0.7549149164049279, "learning_rate": 3.321939033983784e-07, "loss": 0.5134, "step": 7146 }, { "epoch": 0.89, "grad_norm": 1.7333913256290412, "learning_rate": 3.314739610721074e-07, "loss": 0.4907, "step": 7147 }, { "epoch": 0.89, "grad_norm": 1.9064714456136993, "learning_rate": 3.307547729873339e-07, "loss": 0.4643, "step": 7148 }, { "epoch": 0.89, "grad_norm": 1.3797398845123694, "learning_rate": 3.3003633926025126e-07, "loss": 0.4881, "step": 7149 }, { "epoch": 0.89, "grad_norm": 1.465083071290942, "learning_rate": 3.293186600069259e-07, "loss": 0.4659, "step": 7150 }, { "epoch": 0.89, "grad_norm": 1.6122896595208838, "learning_rate": 3.2860173534330744e-07, "loss": 0.5195, "step": 7151 }, { "epoch": 0.89, "grad_norm": 1.5037231912700562, "learning_rate": 3.2788556538522086e-07, "loss": 0.4601, "step": 7152 }, { "epoch": 0.89, "grad_norm": 1.7022555579379326, "learning_rate": 3.2717015024836875e-07, "loss": 0.453, "step": 7153 }, { "epoch": 0.89, "grad_norm": 1.341508810355981, "learning_rate": 3.2645549004833464e-07, "loss": 0.5044, "step": 7154 }, { "epoch": 0.89, "grad_norm": 1.378762619010031, "learning_rate": 3.2574158490057573e-07, "loss": 0.4611, "step": 7155 }, { "epoch": 0.89, "grad_norm": 1.694921735627754, "learning_rate": 3.2502843492043246e-07, "loss": 0.5117, "step": 7156 }, { "epoch": 0.89, "grad_norm": 1.6336333360011273, "learning_rate": 3.243160402231177e-07, "loss": 0.4878, "step": 7157 }, { "epoch": 0.89, "grad_norm": 1.8195915630189345, "learning_rate": 3.2360440092372716e-07, "loss": 0.491, "step": 7158 }, { "epoch": 0.89, "grad_norm": 1.472846216891738, "learning_rate": 3.228935171372299e-07, "loss": 0.5043, "step": 7159 }, { "epoch": 0.89, "grad_norm": 1.5073396580443326, "learning_rate": 3.221833889784792e-07, "loss": 0.4583, "step": 7160 }, { "epoch": 0.89, "grad_norm": 1.628352258905142, "learning_rate": 3.214740165622005e-07, "loss": 0.5192, "step": 7161 }, { "epoch": 0.89, "grad_norm": 1.4070354758414292, "learning_rate": 3.2076540000299804e-07, "loss": 0.4868, "step": 7162 }, { "epoch": 0.89, "grad_norm": 1.8743589664505378, "learning_rate": 3.200575394153577e-07, "loss": 0.4941, "step": 7163 }, { "epoch": 0.89, "grad_norm": 1.6957401814278172, "learning_rate": 3.1935043491363784e-07, "loss": 0.458, "step": 7164 }, { "epoch": 0.89, "grad_norm": 2.5618551792774675, "learning_rate": 3.1864408661207935e-07, "loss": 0.4657, "step": 7165 }, { "epoch": 0.89, "grad_norm": 1.3676928671793762, "learning_rate": 3.1793849462479817e-07, "loss": 0.4656, "step": 7166 }, { "epoch": 0.89, "grad_norm": 1.3381120850497739, "learning_rate": 3.172336590657893e-07, "loss": 0.5007, "step": 7167 }, { "epoch": 0.89, "grad_norm": 1.7082831621344305, "learning_rate": 3.165295800489238e-07, "loss": 0.4443, "step": 7168 }, { "epoch": 0.89, "grad_norm": 2.805985383532289, "learning_rate": 3.158262576879545e-07, "loss": 0.4938, "step": 7169 }, { "epoch": 0.89, "grad_norm": 1.4085538252156595, "learning_rate": 3.1512369209650673e-07, "loss": 0.5054, "step": 7170 }, { "epoch": 0.89, "grad_norm": 1.3710216311892396, "learning_rate": 3.14421883388088e-07, "loss": 0.423, "step": 7171 }, { "epoch": 0.89, "grad_norm": 1.4943971761522419, "learning_rate": 3.137208316760809e-07, "loss": 0.4773, "step": 7172 }, { "epoch": 0.89, "grad_norm": 1.3146847075274068, "learning_rate": 3.1302053707374605e-07, "loss": 0.5013, "step": 7173 }, { "epoch": 0.89, "grad_norm": 1.5673236063433522, "learning_rate": 3.123209996942228e-07, "loss": 0.4954, "step": 7174 }, { "epoch": 0.89, "grad_norm": 1.5592195204413026, "learning_rate": 3.1162221965052753e-07, "loss": 0.4778, "step": 7175 }, { "epoch": 0.89, "grad_norm": 0.6967523625595746, "learning_rate": 3.109241970555538e-07, "loss": 0.4947, "step": 7176 }, { "epoch": 0.89, "grad_norm": 1.4297009420714675, "learning_rate": 3.1022693202207355e-07, "loss": 0.4709, "step": 7177 }, { "epoch": 0.89, "grad_norm": 1.7542963468229498, "learning_rate": 3.095304246627373e-07, "loss": 0.4753, "step": 7178 }, { "epoch": 0.89, "grad_norm": 1.5299932504585612, "learning_rate": 3.0883467509007005e-07, "loss": 0.4826, "step": 7179 }, { "epoch": 0.89, "grad_norm": 1.199621225915617, "learning_rate": 3.081396834164774e-07, "loss": 0.4789, "step": 7180 }, { "epoch": 0.89, "grad_norm": 1.539053220866323, "learning_rate": 3.0744544975424173e-07, "loss": 0.467, "step": 7181 }, { "epoch": 0.89, "grad_norm": 1.5034629040386387, "learning_rate": 3.0675197421552116e-07, "loss": 0.4767, "step": 7182 }, { "epoch": 0.89, "grad_norm": 1.7777352958898378, "learning_rate": 3.060592569123544e-07, "loss": 0.4573, "step": 7183 }, { "epoch": 0.89, "grad_norm": 1.5106190466274196, "learning_rate": 3.0536729795665355e-07, "loss": 0.5401, "step": 7184 }, { "epoch": 0.89, "grad_norm": 2.4379789088763033, "learning_rate": 3.046760974602136e-07, "loss": 0.446, "step": 7185 }, { "epoch": 0.89, "grad_norm": 1.2860703238892677, "learning_rate": 3.039856555347026e-07, "loss": 0.4643, "step": 7186 }, { "epoch": 0.89, "grad_norm": 1.3835890592762499, "learning_rate": 3.032959722916684e-07, "loss": 0.4692, "step": 7187 }, { "epoch": 0.89, "grad_norm": 0.6878989264050003, "learning_rate": 3.026070478425336e-07, "loss": 0.4983, "step": 7188 }, { "epoch": 0.89, "grad_norm": 1.6771846553556036, "learning_rate": 3.019188822986024e-07, "loss": 0.4646, "step": 7189 }, { "epoch": 0.89, "grad_norm": 1.6440426570746323, "learning_rate": 3.012314757710527e-07, "loss": 0.5425, "step": 7190 }, { "epoch": 0.89, "grad_norm": 2.074279077892209, "learning_rate": 3.0054482837094003e-07, "loss": 0.4386, "step": 7191 }, { "epoch": 0.89, "grad_norm": 1.5173362425262247, "learning_rate": 2.9985894020919946e-07, "loss": 0.504, "step": 7192 }, { "epoch": 0.89, "grad_norm": 1.4264982703753875, "learning_rate": 2.9917381139664245e-07, "loss": 0.5233, "step": 7193 }, { "epoch": 0.89, "grad_norm": 26.549343377170732, "learning_rate": 2.9848944204395827e-07, "loss": 0.5357, "step": 7194 }, { "epoch": 0.89, "grad_norm": 1.4586810658158686, "learning_rate": 2.978058322617111e-07, "loss": 0.4955, "step": 7195 }, { "epoch": 0.89, "grad_norm": 1.3883893237445497, "learning_rate": 2.9712298216034553e-07, "loss": 0.4755, "step": 7196 }, { "epoch": 0.89, "grad_norm": 1.4159920892476956, "learning_rate": 2.96440891850181e-07, "loss": 0.5324, "step": 7197 }, { "epoch": 0.89, "grad_norm": 1.415269551961386, "learning_rate": 2.95759561441416e-07, "loss": 0.5041, "step": 7198 }, { "epoch": 0.89, "grad_norm": 1.5364140965549926, "learning_rate": 2.950789910441254e-07, "loss": 0.5059, "step": 7199 }, { "epoch": 0.89, "grad_norm": 1.4547641326655592, "learning_rate": 2.9439918076826003e-07, "loss": 0.5275, "step": 7200 }, { "epoch": 0.89, "grad_norm": 0.6340603974161589, "learning_rate": 2.937201307236504e-07, "loss": 0.4743, "step": 7201 }, { "epoch": 0.89, "grad_norm": 1.6838431804403837, "learning_rate": 2.930418410200031e-07, "loss": 0.4303, "step": 7202 }, { "epoch": 0.89, "grad_norm": 2.59902466849343, "learning_rate": 2.923643117669023e-07, "loss": 0.4981, "step": 7203 }, { "epoch": 0.89, "grad_norm": 1.588337220386599, "learning_rate": 2.91687543073807e-07, "loss": 0.459, "step": 7204 }, { "epoch": 0.89, "grad_norm": 1.8065695379788074, "learning_rate": 2.9101153505005697e-07, "loss": 0.4769, "step": 7205 }, { "epoch": 0.89, "grad_norm": 1.2622077395268985, "learning_rate": 2.9033628780486603e-07, "loss": 0.4758, "step": 7206 }, { "epoch": 0.89, "grad_norm": 1.2822251089559757, "learning_rate": 2.8966180144732735e-07, "loss": 0.4679, "step": 7207 }, { "epoch": 0.89, "grad_norm": 2.7942440218571605, "learning_rate": 2.889880760864089e-07, "loss": 0.504, "step": 7208 }, { "epoch": 0.89, "grad_norm": 1.5395160273565343, "learning_rate": 2.883151118309574e-07, "loss": 0.4623, "step": 7209 }, { "epoch": 0.89, "grad_norm": 1.5010348696910691, "learning_rate": 2.8764290878969757e-07, "loss": 0.4999, "step": 7210 }, { "epoch": 0.89, "grad_norm": 1.6289662605286688, "learning_rate": 2.869714670712276e-07, "loss": 0.4778, "step": 7211 }, { "epoch": 0.9, "grad_norm": 0.6921986203870252, "learning_rate": 2.8630078678402673e-07, "loss": 0.4852, "step": 7212 }, { "epoch": 0.9, "grad_norm": 1.584016313678421, "learning_rate": 2.856308680364472e-07, "loss": 0.4692, "step": 7213 }, { "epoch": 0.9, "grad_norm": 1.4918658623049326, "learning_rate": 2.8496171093672255e-07, "loss": 0.5201, "step": 7214 }, { "epoch": 0.9, "grad_norm": 1.5396443968278153, "learning_rate": 2.842933155929589e-07, "loss": 0.4347, "step": 7215 }, { "epoch": 0.9, "grad_norm": 1.7675236400198393, "learning_rate": 2.8362568211314334e-07, "loss": 0.4514, "step": 7216 }, { "epoch": 0.9, "grad_norm": 1.568592089505673, "learning_rate": 2.8295881060513565e-07, "loss": 0.5031, "step": 7217 }, { "epoch": 0.9, "grad_norm": 1.4069262109055722, "learning_rate": 2.8229270117667686e-07, "loss": 0.5155, "step": 7218 }, { "epoch": 0.9, "grad_norm": 1.39470701848657, "learning_rate": 2.8162735393538197e-07, "loss": 0.5019, "step": 7219 }, { "epoch": 0.9, "grad_norm": 1.4321517994755029, "learning_rate": 2.8096276898874344e-07, "loss": 0.4615, "step": 7220 }, { "epoch": 0.9, "grad_norm": 1.6475657395982446, "learning_rate": 2.802989464441319e-07, "loss": 0.4951, "step": 7221 }, { "epoch": 0.9, "grad_norm": 1.3483897836284766, "learning_rate": 2.796358864087922e-07, "loss": 0.4781, "step": 7222 }, { "epoch": 0.9, "grad_norm": 2.2149006601586705, "learning_rate": 2.789735889898493e-07, "loss": 0.501, "step": 7223 }, { "epoch": 0.9, "grad_norm": 1.5858220803089245, "learning_rate": 2.7831205429430133e-07, "loss": 0.5271, "step": 7224 }, { "epoch": 0.9, "grad_norm": 1.7325956961464044, "learning_rate": 2.776512824290256e-07, "loss": 0.5268, "step": 7225 }, { "epoch": 0.9, "grad_norm": 1.7756493090951462, "learning_rate": 2.769912735007768e-07, "loss": 0.5266, "step": 7226 }, { "epoch": 0.9, "grad_norm": 1.9930583301822953, "learning_rate": 2.763320276161835e-07, "loss": 0.495, "step": 7227 }, { "epoch": 0.9, "grad_norm": 2.8473930690073925, "learning_rate": 2.7567354488175433e-07, "loss": 0.5277, "step": 7228 }, { "epoch": 0.9, "grad_norm": 1.692660262075829, "learning_rate": 2.750158254038715e-07, "loss": 0.525, "step": 7229 }, { "epoch": 0.9, "grad_norm": 1.3647834288816196, "learning_rate": 2.7435886928879663e-07, "loss": 0.4567, "step": 7230 }, { "epoch": 0.9, "grad_norm": 2.1593877264595376, "learning_rate": 2.737026766426654e-07, "loss": 0.5127, "step": 7231 }, { "epoch": 0.9, "grad_norm": 1.3555113213082137, "learning_rate": 2.730472475714929e-07, "loss": 0.4757, "step": 7232 }, { "epoch": 0.9, "grad_norm": 1.4278182115249216, "learning_rate": 2.723925821811685e-07, "loss": 0.489, "step": 7233 }, { "epoch": 0.9, "grad_norm": 1.8923024576982832, "learning_rate": 2.717386805774591e-07, "loss": 0.5259, "step": 7234 }, { "epoch": 0.9, "grad_norm": 1.3922482113704302, "learning_rate": 2.710855428660097e-07, "loss": 0.4448, "step": 7235 }, { "epoch": 0.9, "grad_norm": 1.50956630158752, "learning_rate": 2.7043316915233874e-07, "loss": 0.497, "step": 7236 }, { "epoch": 0.9, "grad_norm": 2.295771368551603, "learning_rate": 2.6978155954184403e-07, "loss": 0.4751, "step": 7237 }, { "epoch": 0.9, "grad_norm": 1.3927473634808414, "learning_rate": 2.691307141397975e-07, "loss": 0.4655, "step": 7238 }, { "epoch": 0.9, "grad_norm": 1.4048674764987086, "learning_rate": 2.684806330513512e-07, "loss": 0.4892, "step": 7239 }, { "epoch": 0.9, "grad_norm": 3.1409357943365657, "learning_rate": 2.6783131638152883e-07, "loss": 0.5432, "step": 7240 }, { "epoch": 0.9, "grad_norm": 1.547539150362807, "learning_rate": 2.6718276423523593e-07, "loss": 0.489, "step": 7241 }, { "epoch": 0.9, "grad_norm": 1.3301631048062446, "learning_rate": 2.665349767172493e-07, "loss": 0.5291, "step": 7242 }, { "epoch": 0.9, "grad_norm": 2.391096724620809, "learning_rate": 2.6588795393222566e-07, "loss": 0.4868, "step": 7243 }, { "epoch": 0.9, "grad_norm": 1.7189858035690682, "learning_rate": 2.6524169598469816e-07, "loss": 0.5221, "step": 7244 }, { "epoch": 0.9, "grad_norm": 1.4065853979337697, "learning_rate": 2.6459620297907386e-07, "loss": 0.5053, "step": 7245 }, { "epoch": 0.9, "grad_norm": 1.589793540733917, "learning_rate": 2.639514750196398e-07, "loss": 0.5439, "step": 7246 }, { "epoch": 0.9, "grad_norm": 1.694368040776559, "learning_rate": 2.633075122105555e-07, "loss": 0.503, "step": 7247 }, { "epoch": 0.9, "grad_norm": 1.3267658279966987, "learning_rate": 2.626643146558605e-07, "loss": 0.5074, "step": 7248 }, { "epoch": 0.9, "grad_norm": 1.586372075700058, "learning_rate": 2.6202188245946726e-07, "loss": 0.4962, "step": 7249 }, { "epoch": 0.9, "grad_norm": 1.3828261644604938, "learning_rate": 2.6138021572516715e-07, "loss": 0.4942, "step": 7250 }, { "epoch": 0.9, "grad_norm": 3.4093827303195634, "learning_rate": 2.607393145566284e-07, "loss": 0.4875, "step": 7251 }, { "epoch": 0.9, "grad_norm": 1.9936764361454529, "learning_rate": 2.6009917905739203e-07, "loss": 0.5106, "step": 7252 }, { "epoch": 0.9, "grad_norm": 1.422134201103214, "learning_rate": 2.594598093308792e-07, "loss": 0.4947, "step": 7253 }, { "epoch": 0.9, "grad_norm": 1.2892958431167894, "learning_rate": 2.588212054803846e-07, "loss": 0.4645, "step": 7254 }, { "epoch": 0.9, "grad_norm": 3.471846303409366, "learning_rate": 2.5818336760908115e-07, "loss": 0.5207, "step": 7255 }, { "epoch": 0.9, "grad_norm": 1.3972002061205235, "learning_rate": 2.5754629582001644e-07, "loss": 0.5183, "step": 7256 }, { "epoch": 0.9, "grad_norm": 1.4677880478954894, "learning_rate": 2.569099902161165e-07, "loss": 0.4941, "step": 7257 }, { "epoch": 0.9, "grad_norm": 1.985567571670727, "learning_rate": 2.5627445090017913e-07, "loss": 0.4583, "step": 7258 }, { "epoch": 0.9, "grad_norm": 1.5235852187263925, "learning_rate": 2.5563967797488487e-07, "loss": 0.4655, "step": 7259 }, { "epoch": 0.9, "grad_norm": 1.677128208654452, "learning_rate": 2.5500567154278567e-07, "loss": 0.5045, "step": 7260 }, { "epoch": 0.9, "grad_norm": 1.3204359303408124, "learning_rate": 2.54372431706309e-07, "loss": 0.5068, "step": 7261 }, { "epoch": 0.9, "grad_norm": 1.5599365732510986, "learning_rate": 2.537399585677625e-07, "loss": 0.5113, "step": 7262 }, { "epoch": 0.9, "grad_norm": 1.4635723228362618, "learning_rate": 2.531082522293266e-07, "loss": 0.5076, "step": 7263 }, { "epoch": 0.9, "grad_norm": 1.4179714784792024, "learning_rate": 2.524773127930602e-07, "loss": 0.4573, "step": 7264 }, { "epoch": 0.9, "grad_norm": 1.6028209650618992, "learning_rate": 2.518471403608952e-07, "loss": 0.5354, "step": 7265 }, { "epoch": 0.9, "grad_norm": 0.6732471951768726, "learning_rate": 2.512177350346434e-07, "loss": 0.5216, "step": 7266 }, { "epoch": 0.9, "grad_norm": 2.705772586239429, "learning_rate": 2.5058909691598966e-07, "loss": 0.5521, "step": 7267 }, { "epoch": 0.9, "grad_norm": 1.3672127403570247, "learning_rate": 2.499612261064971e-07, "loss": 0.4545, "step": 7268 }, { "epoch": 0.9, "grad_norm": 1.7575170328414065, "learning_rate": 2.4933412270760295e-07, "loss": 0.5077, "step": 7269 }, { "epoch": 0.9, "grad_norm": 1.9858809443827865, "learning_rate": 2.487077868206206e-07, "loss": 0.5322, "step": 7270 }, { "epoch": 0.9, "grad_norm": 1.4890330148958661, "learning_rate": 2.4808221854674185e-07, "loss": 0.493, "step": 7271 }, { "epoch": 0.9, "grad_norm": 1.3164375874859087, "learning_rate": 2.4745741798703094e-07, "loss": 0.4836, "step": 7272 }, { "epoch": 0.9, "grad_norm": 1.4383171376568136, "learning_rate": 2.4683338524243204e-07, "loss": 0.4913, "step": 7273 }, { "epoch": 0.9, "grad_norm": 1.578046977346187, "learning_rate": 2.4621012041376013e-07, "loss": 0.4647, "step": 7274 }, { "epoch": 0.9, "grad_norm": 1.6640790823621678, "learning_rate": 2.455876236017124e-07, "loss": 0.4695, "step": 7275 }, { "epoch": 0.9, "grad_norm": 1.5343616831889717, "learning_rate": 2.4496589490685617e-07, "loss": 0.4824, "step": 7276 }, { "epoch": 0.9, "grad_norm": 1.712511496470845, "learning_rate": 2.4434493442963944e-07, "loss": 0.5031, "step": 7277 }, { "epoch": 0.9, "grad_norm": 1.2402210784873, "learning_rate": 2.4372474227038145e-07, "loss": 0.4437, "step": 7278 }, { "epoch": 0.9, "grad_norm": 1.4835344448500538, "learning_rate": 2.43105318529282e-07, "loss": 0.4807, "step": 7279 }, { "epoch": 0.9, "grad_norm": 1.6450460043573967, "learning_rate": 2.424866633064127e-07, "loss": 0.4643, "step": 7280 }, { "epoch": 0.9, "grad_norm": 1.4435211933583696, "learning_rate": 2.418687767017236e-07, "loss": 0.5171, "step": 7281 }, { "epoch": 0.9, "grad_norm": 2.4803283735920534, "learning_rate": 2.412516588150393e-07, "loss": 0.4445, "step": 7282 }, { "epoch": 0.9, "grad_norm": 1.6756255272826075, "learning_rate": 2.406353097460601e-07, "loss": 0.5409, "step": 7283 }, { "epoch": 0.9, "grad_norm": 1.4945053637205326, "learning_rate": 2.400197295943646e-07, "loss": 0.4947, "step": 7284 }, { "epoch": 0.9, "grad_norm": 1.5668521984285715, "learning_rate": 2.394049184594027e-07, "loss": 0.4485, "step": 7285 }, { "epoch": 0.9, "grad_norm": 0.6935285110259156, "learning_rate": 2.3879087644050503e-07, "loss": 0.5001, "step": 7286 }, { "epoch": 0.9, "grad_norm": 1.478287517350354, "learning_rate": 2.3817760363687382e-07, "loss": 0.5318, "step": 7287 }, { "epoch": 0.9, "grad_norm": 1.5620851531817914, "learning_rate": 2.3756510014758926e-07, "loss": 0.5353, "step": 7288 }, { "epoch": 0.9, "grad_norm": 1.5637832675029486, "learning_rate": 2.3695336607160668e-07, "loss": 0.4819, "step": 7289 }, { "epoch": 0.9, "grad_norm": 1.6615999181013263, "learning_rate": 2.3634240150775645e-07, "loss": 0.5141, "step": 7290 }, { "epoch": 0.9, "grad_norm": 2.0650508896497906, "learning_rate": 2.3573220655474572e-07, "loss": 0.5006, "step": 7291 }, { "epoch": 0.9, "grad_norm": 1.5867828142828393, "learning_rate": 2.351227813111573e-07, "loss": 0.4795, "step": 7292 }, { "epoch": 0.91, "grad_norm": 0.677807090950925, "learning_rate": 2.3451412587544908e-07, "loss": 0.4779, "step": 7293 }, { "epoch": 0.91, "grad_norm": 1.3721886541775619, "learning_rate": 2.3390624034595356e-07, "loss": 0.5185, "step": 7294 }, { "epoch": 0.91, "grad_norm": 1.4681451485285608, "learning_rate": 2.3329912482088213e-07, "loss": 0.5293, "step": 7295 }, { "epoch": 0.91, "grad_norm": 1.4934042079383747, "learning_rate": 2.3269277939831748e-07, "loss": 0.4729, "step": 7296 }, { "epoch": 0.91, "grad_norm": 4.146107040808323, "learning_rate": 2.3208720417622177e-07, "loss": 0.4677, "step": 7297 }, { "epoch": 0.91, "grad_norm": 1.306195733810811, "learning_rate": 2.3148239925243067e-07, "loss": 0.4468, "step": 7298 }, { "epoch": 0.91, "grad_norm": 1.4923955412993348, "learning_rate": 2.3087836472465319e-07, "loss": 0.4738, "step": 7299 }, { "epoch": 0.91, "grad_norm": 1.7773578519524955, "learning_rate": 2.302751006904802e-07, "loss": 0.5329, "step": 7300 }, { "epoch": 0.91, "grad_norm": 1.4199512023896965, "learning_rate": 2.296726072473726e-07, "loss": 0.4481, "step": 7301 }, { "epoch": 0.91, "grad_norm": 1.4258423801499063, "learning_rate": 2.2907088449266867e-07, "loss": 0.5177, "step": 7302 }, { "epoch": 0.91, "grad_norm": 1.5231668326365035, "learning_rate": 2.2846993252358119e-07, "loss": 0.5138, "step": 7303 }, { "epoch": 0.91, "grad_norm": 1.4672551656917383, "learning_rate": 2.278697514372008e-07, "loss": 0.4154, "step": 7304 }, { "epoch": 0.91, "grad_norm": 1.6189360069251224, "learning_rate": 2.2727034133049054e-07, "loss": 0.5066, "step": 7305 }, { "epoch": 0.91, "grad_norm": 2.1915601216281146, "learning_rate": 2.2667170230029177e-07, "loss": 0.532, "step": 7306 }, { "epoch": 0.91, "grad_norm": 1.593834724413796, "learning_rate": 2.2607383444331888e-07, "loss": 0.5509, "step": 7307 }, { "epoch": 0.91, "grad_norm": 1.7054554094812038, "learning_rate": 2.2547673785616285e-07, "loss": 0.4572, "step": 7308 }, { "epoch": 0.91, "grad_norm": 1.8211515336626856, "learning_rate": 2.2488041263529048e-07, "loss": 0.4482, "step": 7309 }, { "epoch": 0.91, "grad_norm": 1.648506058182298, "learning_rate": 2.2428485887704244e-07, "loss": 0.4783, "step": 7310 }, { "epoch": 0.91, "grad_norm": 1.7043440299302586, "learning_rate": 2.2369007667763676e-07, "loss": 0.5503, "step": 7311 }, { "epoch": 0.91, "grad_norm": 1.493921056568484, "learning_rate": 2.2309606613316438e-07, "loss": 0.4892, "step": 7312 }, { "epoch": 0.91, "grad_norm": 1.5789653486264432, "learning_rate": 2.2250282733959462e-07, "loss": 0.4903, "step": 7313 }, { "epoch": 0.91, "grad_norm": 1.2578843844561818, "learning_rate": 2.2191036039276914e-07, "loss": 0.4967, "step": 7314 }, { "epoch": 0.91, "grad_norm": 1.4787303637879343, "learning_rate": 2.213186653884064e-07, "loss": 0.554, "step": 7315 }, { "epoch": 0.91, "grad_norm": 1.4311610016698448, "learning_rate": 2.2072774242210048e-07, "loss": 0.4968, "step": 7316 }, { "epoch": 0.91, "grad_norm": 1.8183962667671558, "learning_rate": 2.2013759158931947e-07, "loss": 0.4779, "step": 7317 }, { "epoch": 0.91, "grad_norm": 2.241204665605121, "learning_rate": 2.1954821298540873e-07, "loss": 0.4723, "step": 7318 }, { "epoch": 0.91, "grad_norm": 2.584852717960534, "learning_rate": 2.1895960670558548e-07, "loss": 0.5204, "step": 7319 }, { "epoch": 0.91, "grad_norm": 2.254694152059909, "learning_rate": 2.1837177284494637e-07, "loss": 0.5354, "step": 7320 }, { "epoch": 0.91, "grad_norm": 1.5623354567276635, "learning_rate": 2.1778471149845938e-07, "loss": 0.5041, "step": 7321 }, { "epoch": 0.91, "grad_norm": 2.440466493552607, "learning_rate": 2.171984227609708e-07, "loss": 0.4777, "step": 7322 }, { "epoch": 0.91, "grad_norm": 1.4591778128934314, "learning_rate": 2.1661290672719993e-07, "loss": 0.4227, "step": 7323 }, { "epoch": 0.91, "grad_norm": 1.335574212901139, "learning_rate": 2.160281634917416e-07, "loss": 0.452, "step": 7324 }, { "epoch": 0.91, "grad_norm": 1.5346708718374296, "learning_rate": 2.1544419314906805e-07, "loss": 0.4654, "step": 7325 }, { "epoch": 0.91, "grad_norm": 2.117331882237975, "learning_rate": 2.1486099579352327e-07, "loss": 0.5138, "step": 7326 }, { "epoch": 0.91, "grad_norm": 1.476418156466487, "learning_rate": 2.1427857151932851e-07, "loss": 0.4655, "step": 7327 }, { "epoch": 0.91, "grad_norm": 2.0906591898100957, "learning_rate": 2.136969204205791e-07, "loss": 0.5024, "step": 7328 }, { "epoch": 0.91, "grad_norm": 1.491630283610432, "learning_rate": 2.1311604259124653e-07, "loss": 0.4872, "step": 7329 }, { "epoch": 0.91, "grad_norm": 1.5464114719601534, "learning_rate": 2.125359381251757e-07, "loss": 0.5307, "step": 7330 }, { "epoch": 0.91, "grad_norm": 1.408479255570862, "learning_rate": 2.1195660711608944e-07, "loss": 0.512, "step": 7331 }, { "epoch": 0.91, "grad_norm": 2.005446125069675, "learning_rate": 2.113780496575818e-07, "loss": 0.4561, "step": 7332 }, { "epoch": 0.91, "grad_norm": 1.7105871509101782, "learning_rate": 2.1080026584312407e-07, "loss": 0.4906, "step": 7333 }, { "epoch": 0.91, "grad_norm": 1.6551695201751928, "learning_rate": 2.102232557660644e-07, "loss": 0.4628, "step": 7334 }, { "epoch": 0.91, "grad_norm": 1.515246530641419, "learning_rate": 2.0964701951962095e-07, "loss": 0.4869, "step": 7335 }, { "epoch": 0.91, "grad_norm": 1.3945580750786308, "learning_rate": 2.0907155719689208e-07, "loss": 0.4854, "step": 7336 }, { "epoch": 0.91, "grad_norm": 1.4034244421481015, "learning_rate": 2.0849686889084786e-07, "loss": 0.459, "step": 7337 }, { "epoch": 0.91, "grad_norm": 1.5868486583384376, "learning_rate": 2.07922954694334e-07, "loss": 0.4551, "step": 7338 }, { "epoch": 0.91, "grad_norm": 1.6710749272297594, "learning_rate": 2.0734981470007133e-07, "loss": 0.5157, "step": 7339 }, { "epoch": 0.91, "grad_norm": 1.874129184801048, "learning_rate": 2.0677744900065633e-07, "loss": 0.5156, "step": 7340 }, { "epoch": 0.91, "grad_norm": 1.8234472645431714, "learning_rate": 2.0620585768855949e-07, "loss": 0.4908, "step": 7341 }, { "epoch": 0.91, "grad_norm": 1.3063947819347423, "learning_rate": 2.0563504085612583e-07, "loss": 0.4893, "step": 7342 }, { "epoch": 0.91, "grad_norm": 2.418876188696213, "learning_rate": 2.0506499859557717e-07, "loss": 0.449, "step": 7343 }, { "epoch": 0.91, "grad_norm": 1.4762691073800227, "learning_rate": 2.04495730999007e-07, "loss": 0.493, "step": 7344 }, { "epoch": 0.91, "grad_norm": 1.554007050981706, "learning_rate": 2.0392723815838734e-07, "loss": 0.499, "step": 7345 }, { "epoch": 0.91, "grad_norm": 1.3702325490919527, "learning_rate": 2.0335952016556193e-07, "loss": 0.4782, "step": 7346 }, { "epoch": 0.91, "grad_norm": 1.6724464755354924, "learning_rate": 2.0279257711225186e-07, "loss": 0.4829, "step": 7347 }, { "epoch": 0.91, "grad_norm": 1.5667356213060892, "learning_rate": 2.0222640909004942e-07, "loss": 0.4719, "step": 7348 }, { "epoch": 0.91, "grad_norm": 1.3072445321244563, "learning_rate": 2.0166101619042643e-07, "loss": 0.4947, "step": 7349 }, { "epoch": 0.91, "grad_norm": 1.2520941586893453, "learning_rate": 2.0109639850472706e-07, "loss": 0.4779, "step": 7350 }, { "epoch": 0.91, "grad_norm": 2.4133855463711558, "learning_rate": 2.0053255612416832e-07, "loss": 0.4619, "step": 7351 }, { "epoch": 0.91, "grad_norm": 1.4728280235748008, "learning_rate": 1.9996948913984626e-07, "loss": 0.4631, "step": 7352 }, { "epoch": 0.91, "grad_norm": 0.6765788312000822, "learning_rate": 1.99407197642727e-07, "loss": 0.4636, "step": 7353 }, { "epoch": 0.91, "grad_norm": 1.9494597592574943, "learning_rate": 1.988456817236556e-07, "loss": 0.4529, "step": 7354 }, { "epoch": 0.91, "grad_norm": 1.443993759050577, "learning_rate": 1.9828494147334843e-07, "loss": 0.4866, "step": 7355 }, { "epoch": 0.91, "grad_norm": 1.5748048125426386, "learning_rate": 1.977249769823991e-07, "loss": 0.4593, "step": 7356 }, { "epoch": 0.91, "grad_norm": 1.3756291053600451, "learning_rate": 1.9716578834127366e-07, "loss": 0.5024, "step": 7357 }, { "epoch": 0.91, "grad_norm": 1.639060653594591, "learning_rate": 1.966073756403153e-07, "loss": 0.4482, "step": 7358 }, { "epoch": 0.91, "grad_norm": 1.3952555902533088, "learning_rate": 1.9604973896974022e-07, "loss": 0.4986, "step": 7359 }, { "epoch": 0.91, "grad_norm": 1.8896722139396491, "learning_rate": 1.9549287841963915e-07, "loss": 0.502, "step": 7360 }, { "epoch": 0.91, "grad_norm": 1.5602669134106195, "learning_rate": 1.949367940799779e-07, "loss": 0.5039, "step": 7361 }, { "epoch": 0.91, "grad_norm": 2.7246978242341773, "learning_rate": 1.9438148604059627e-07, "loss": 0.4685, "step": 7362 }, { "epoch": 0.91, "grad_norm": 2.2521584103521244, "learning_rate": 1.9382695439121024e-07, "loss": 0.4646, "step": 7363 }, { "epoch": 0.91, "grad_norm": 1.9572783096995119, "learning_rate": 1.9327319922140819e-07, "loss": 0.5122, "step": 7364 }, { "epoch": 0.91, "grad_norm": 1.3215317375129358, "learning_rate": 1.9272022062065466e-07, "loss": 0.4642, "step": 7365 }, { "epoch": 0.91, "grad_norm": 1.3334449214336, "learning_rate": 1.9216801867828817e-07, "loss": 0.4742, "step": 7366 }, { "epoch": 0.91, "grad_norm": 3.0968260435500023, "learning_rate": 1.916165934835229e-07, "loss": 0.456, "step": 7367 }, { "epoch": 0.91, "grad_norm": 1.390221874520958, "learning_rate": 1.9106594512544485e-07, "loss": 0.472, "step": 7368 }, { "epoch": 0.91, "grad_norm": 1.3826685396985134, "learning_rate": 1.9051607369301616e-07, "loss": 0.495, "step": 7369 }, { "epoch": 0.91, "grad_norm": 1.6508771302483614, "learning_rate": 1.899669792750747e-07, "loss": 0.5044, "step": 7370 }, { "epoch": 0.91, "grad_norm": 1.5330385791261132, "learning_rate": 1.8941866196032998e-07, "loss": 0.4936, "step": 7371 }, { "epoch": 0.91, "grad_norm": 1.7707144138711233, "learning_rate": 1.88871121837369e-07, "loss": 0.486, "step": 7372 }, { "epoch": 0.91, "grad_norm": 1.262247496461452, "learning_rate": 1.8832435899464984e-07, "loss": 0.4518, "step": 7373 }, { "epoch": 0.92, "grad_norm": 1.3873141734746803, "learning_rate": 1.8777837352050908e-07, "loss": 0.47, "step": 7374 }, { "epoch": 0.92, "grad_norm": 1.3693940975255388, "learning_rate": 1.872331655031534e-07, "loss": 0.5038, "step": 7375 }, { "epoch": 0.92, "grad_norm": 2.0349987280215167, "learning_rate": 1.8668873503066786e-07, "loss": 0.4869, "step": 7376 }, { "epoch": 0.92, "grad_norm": 1.6231415884536113, "learning_rate": 1.861450821910088e-07, "loss": 0.4838, "step": 7377 }, { "epoch": 0.92, "grad_norm": 1.6188759308500793, "learning_rate": 1.856022070720076e-07, "loss": 0.51, "step": 7378 }, { "epoch": 0.92, "grad_norm": 1.2550870322710346, "learning_rate": 1.8506010976137244e-07, "loss": 0.4649, "step": 7379 }, { "epoch": 0.92, "grad_norm": 1.3361841719564518, "learning_rate": 1.8451879034668163e-07, "loss": 0.4599, "step": 7380 }, { "epoch": 0.92, "grad_norm": 2.7603972626571474, "learning_rate": 1.839782489153913e-07, "loss": 0.5608, "step": 7381 }, { "epoch": 0.92, "grad_norm": 1.786862186824463, "learning_rate": 1.83438485554831e-07, "loss": 0.5361, "step": 7382 }, { "epoch": 0.92, "grad_norm": 1.3992880337173184, "learning_rate": 1.828995003522044e-07, "loss": 0.5047, "step": 7383 }, { "epoch": 0.92, "grad_norm": 1.3833373236184248, "learning_rate": 1.8236129339458787e-07, "loss": 0.4584, "step": 7384 }, { "epoch": 0.92, "grad_norm": 1.6915427558700735, "learning_rate": 1.8182386476893467e-07, "loss": 0.4778, "step": 7385 }, { "epoch": 0.92, "grad_norm": 1.5895090237630853, "learning_rate": 1.8128721456207088e-07, "loss": 0.4951, "step": 7386 }, { "epoch": 0.92, "grad_norm": 1.7293079354326406, "learning_rate": 1.8075134286069718e-07, "loss": 0.4858, "step": 7387 }, { "epoch": 0.92, "grad_norm": 1.465841554108314, "learning_rate": 1.802162497513882e-07, "loss": 0.4851, "step": 7388 }, { "epoch": 0.92, "grad_norm": 1.6913645552862504, "learning_rate": 1.7968193532059197e-07, "loss": 0.4761, "step": 7389 }, { "epoch": 0.92, "grad_norm": 1.3774108786510824, "learning_rate": 1.7914839965463339e-07, "loss": 0.4674, "step": 7390 }, { "epoch": 0.92, "grad_norm": 1.4307532756136354, "learning_rate": 1.78615642839709e-07, "loss": 0.4621, "step": 7391 }, { "epoch": 0.92, "grad_norm": 1.4096128289356364, "learning_rate": 1.780836649618911e-07, "loss": 0.5172, "step": 7392 }, { "epoch": 0.92, "grad_norm": 1.7973737870861728, "learning_rate": 1.7755246610712372e-07, "loss": 0.4674, "step": 7393 }, { "epoch": 0.92, "grad_norm": 1.388887248954646, "learning_rate": 1.7702204636122878e-07, "loss": 0.4694, "step": 7394 }, { "epoch": 0.92, "grad_norm": 1.4194576297864852, "learning_rate": 1.764924058098988e-07, "loss": 0.5377, "step": 7395 }, { "epoch": 0.92, "grad_norm": 1.466166965935639, "learning_rate": 1.7596354453870257e-07, "loss": 0.5266, "step": 7396 }, { "epoch": 0.92, "grad_norm": 1.4755667902189378, "learning_rate": 1.7543546263308176e-07, "loss": 0.4729, "step": 7397 }, { "epoch": 0.92, "grad_norm": 2.2663679978132127, "learning_rate": 1.74908160178352e-07, "loss": 0.5259, "step": 7398 }, { "epoch": 0.92, "grad_norm": 1.3924679571475973, "learning_rate": 1.7438163725970624e-07, "loss": 0.5001, "step": 7399 }, { "epoch": 0.92, "grad_norm": 1.8073122795850416, "learning_rate": 1.7385589396220592e-07, "loss": 0.4704, "step": 7400 }, { "epoch": 0.92, "grad_norm": 1.3579610533590791, "learning_rate": 1.7333093037079197e-07, "loss": 0.4827, "step": 7401 }, { "epoch": 0.92, "grad_norm": 1.4829654561982808, "learning_rate": 1.7280674657027486e-07, "loss": 0.4699, "step": 7402 }, { "epoch": 0.92, "grad_norm": 1.4002345695803489, "learning_rate": 1.7228334264534242e-07, "loss": 0.4516, "step": 7403 }, { "epoch": 0.92, "grad_norm": 1.440620235561422, "learning_rate": 1.7176071868055421e-07, "loss": 0.4559, "step": 7404 }, { "epoch": 0.92, "grad_norm": 2.1864763417305415, "learning_rate": 1.7123887476034607e-07, "loss": 0.5007, "step": 7405 }, { "epoch": 0.92, "grad_norm": 1.5826582958870667, "learning_rate": 1.7071781096902497e-07, "loss": 0.4817, "step": 7406 }, { "epoch": 0.92, "grad_norm": 1.6371850257202158, "learning_rate": 1.701975273907741e-07, "loss": 0.5102, "step": 7407 }, { "epoch": 0.92, "grad_norm": 0.6444450230778677, "learning_rate": 1.6967802410965016e-07, "loss": 0.4651, "step": 7408 }, { "epoch": 0.92, "grad_norm": 1.6004728101518253, "learning_rate": 1.6915930120958268e-07, "loss": 0.4958, "step": 7409 }, { "epoch": 0.92, "grad_norm": 1.7822773798269116, "learning_rate": 1.6864135877437683e-07, "loss": 0.4633, "step": 7410 }, { "epoch": 0.92, "grad_norm": 1.435767134768869, "learning_rate": 1.6812419688770953e-07, "loss": 0.467, "step": 7411 }, { "epoch": 0.92, "grad_norm": 1.372733854208186, "learning_rate": 1.6760781563313399e-07, "loss": 0.4703, "step": 7412 }, { "epoch": 0.92, "grad_norm": 1.526168994526638, "learning_rate": 1.670922150940757e-07, "loss": 0.4873, "step": 7413 }, { "epoch": 0.92, "grad_norm": 1.426658070398611, "learning_rate": 1.665773953538341e-07, "loss": 0.5046, "step": 7414 }, { "epoch": 0.92, "grad_norm": 1.7451714554121183, "learning_rate": 1.6606335649558436e-07, "loss": 0.4596, "step": 7415 }, { "epoch": 0.92, "grad_norm": 1.4002667303381664, "learning_rate": 1.655500986023717e-07, "loss": 0.4861, "step": 7416 }, { "epoch": 0.92, "grad_norm": 2.103839952829796, "learning_rate": 1.650376217571198e-07, "loss": 0.5538, "step": 7417 }, { "epoch": 0.92, "grad_norm": 1.8123136742602968, "learning_rate": 1.6452592604262185e-07, "loss": 0.4742, "step": 7418 }, { "epoch": 0.92, "grad_norm": 1.532392411596531, "learning_rate": 1.6401501154154786e-07, "loss": 0.4765, "step": 7419 }, { "epoch": 0.92, "grad_norm": 2.232248589830049, "learning_rate": 1.6350487833644012e-07, "loss": 0.5166, "step": 7420 }, { "epoch": 0.92, "grad_norm": 1.6485974896546554, "learning_rate": 1.629955265097155e-07, "loss": 0.4424, "step": 7421 }, { "epoch": 0.92, "grad_norm": 1.4977480346655385, "learning_rate": 1.6248695614366427e-07, "loss": 0.5384, "step": 7422 }, { "epoch": 0.92, "grad_norm": 1.5735385964767112, "learning_rate": 1.619791673204496e-07, "loss": 0.5221, "step": 7423 }, { "epoch": 0.92, "grad_norm": 1.5112925459666093, "learning_rate": 1.6147216012211087e-07, "loss": 0.5002, "step": 7424 }, { "epoch": 0.92, "grad_norm": 3.9905644059573038, "learning_rate": 1.6096593463055754e-07, "loss": 0.4811, "step": 7425 }, { "epoch": 0.92, "grad_norm": 1.881686217710671, "learning_rate": 1.60460490927577e-07, "loss": 0.5035, "step": 7426 }, { "epoch": 0.92, "grad_norm": 1.314687441900597, "learning_rate": 1.599558290948261e-07, "loss": 0.4807, "step": 7427 }, { "epoch": 0.92, "grad_norm": 0.6786577910900888, "learning_rate": 1.594519492138391e-07, "loss": 0.459, "step": 7428 }, { "epoch": 0.92, "grad_norm": 1.398277622031906, "learning_rate": 1.589488513660209e-07, "loss": 0.5031, "step": 7429 }, { "epoch": 0.92, "grad_norm": 1.373663322700556, "learning_rate": 1.58446535632652e-07, "loss": 0.4752, "step": 7430 }, { "epoch": 0.92, "grad_norm": 1.5268937260525122, "learning_rate": 1.579450020948864e-07, "loss": 0.5015, "step": 7431 }, { "epoch": 0.92, "grad_norm": 2.7816541926644356, "learning_rate": 1.574442508337498e-07, "loss": 0.4621, "step": 7432 }, { "epoch": 0.92, "grad_norm": 1.3089321631636746, "learning_rate": 1.5694428193014477e-07, "loss": 0.4985, "step": 7433 }, { "epoch": 0.92, "grad_norm": 1.4269437872264588, "learning_rate": 1.5644509546484387e-07, "loss": 0.5111, "step": 7434 }, { "epoch": 0.92, "grad_norm": 0.6211926111031364, "learning_rate": 1.5594669151849706e-07, "loss": 0.52, "step": 7435 }, { "epoch": 0.92, "grad_norm": 1.9188201433192622, "learning_rate": 1.5544907017162435e-07, "loss": 0.5104, "step": 7436 }, { "epoch": 0.92, "grad_norm": 1.4296650370385822, "learning_rate": 1.5495223150462145e-07, "loss": 0.5223, "step": 7437 }, { "epoch": 0.92, "grad_norm": 1.379650631045068, "learning_rate": 1.5445617559775694e-07, "loss": 0.5103, "step": 7438 }, { "epoch": 0.92, "grad_norm": 2.5477816134993425, "learning_rate": 1.5396090253117224e-07, "loss": 0.4758, "step": 7439 }, { "epoch": 0.92, "grad_norm": 1.3501455563275457, "learning_rate": 1.53466412384885e-07, "loss": 0.4398, "step": 7440 }, { "epoch": 0.92, "grad_norm": 1.4149457953921925, "learning_rate": 1.529727052387825e-07, "loss": 0.5128, "step": 7441 }, { "epoch": 0.92, "grad_norm": 1.3173573220357797, "learning_rate": 1.5247978117262918e-07, "loss": 0.4253, "step": 7442 }, { "epoch": 0.92, "grad_norm": 1.574707130020846, "learning_rate": 1.5198764026605918e-07, "loss": 0.5004, "step": 7443 }, { "epoch": 0.92, "grad_norm": 1.4846028133844213, "learning_rate": 1.514962825985844e-07, "loss": 0.5118, "step": 7444 }, { "epoch": 0.92, "grad_norm": 2.147310520687052, "learning_rate": 1.5100570824958638e-07, "loss": 0.498, "step": 7445 }, { "epoch": 0.92, "grad_norm": 1.3950785800094783, "learning_rate": 1.5051591729832282e-07, "loss": 0.4847, "step": 7446 }, { "epoch": 0.92, "grad_norm": 1.367432471953318, "learning_rate": 1.5002690982392264e-07, "loss": 0.5167, "step": 7447 }, { "epoch": 0.92, "grad_norm": 2.1658022233578893, "learning_rate": 1.4953868590538989e-07, "loss": 0.4746, "step": 7448 }, { "epoch": 0.92, "grad_norm": 1.7341710818649676, "learning_rate": 1.49051245621602e-07, "loss": 0.5009, "step": 7449 }, { "epoch": 0.92, "grad_norm": 1.9017135541191659, "learning_rate": 1.4856458905130823e-07, "loss": 0.5082, "step": 7450 }, { "epoch": 0.92, "grad_norm": 1.4883425706331486, "learning_rate": 1.480787162731334e-07, "loss": 0.5104, "step": 7451 }, { "epoch": 0.92, "grad_norm": 1.7310640824314047, "learning_rate": 1.4759362736557313e-07, "loss": 0.4518, "step": 7452 }, { "epoch": 0.92, "grad_norm": 1.6820489174655378, "learning_rate": 1.4710932240699915e-07, "loss": 0.4706, "step": 7453 }, { "epoch": 0.93, "grad_norm": 5.835461807837606, "learning_rate": 1.4662580147565386e-07, "loss": 0.5171, "step": 7454 }, { "epoch": 0.93, "grad_norm": 1.5063706588096515, "learning_rate": 1.4614306464965533e-07, "loss": 0.4654, "step": 7455 }, { "epoch": 0.93, "grad_norm": 0.6698220323564079, "learning_rate": 1.4566111200699396e-07, "loss": 0.5236, "step": 7456 }, { "epoch": 0.93, "grad_norm": 1.3900997845556018, "learning_rate": 1.45179943625533e-07, "loss": 0.5373, "step": 7457 }, { "epoch": 0.93, "grad_norm": 1.5143660815460784, "learning_rate": 1.4469955958300974e-07, "loss": 0.4765, "step": 7458 }, { "epoch": 0.93, "grad_norm": 2.626916081300669, "learning_rate": 1.442199599570343e-07, "loss": 0.5145, "step": 7459 }, { "epoch": 0.93, "grad_norm": 1.4632897847436472, "learning_rate": 1.4374114482509028e-07, "loss": 0.5348, "step": 7460 }, { "epoch": 0.93, "grad_norm": 1.4279017685403526, "learning_rate": 1.432631142645341e-07, "loss": 0.4946, "step": 7461 }, { "epoch": 0.93, "grad_norm": 2.2083577425437353, "learning_rate": 1.4278586835259622e-07, "loss": 0.458, "step": 7462 }, { "epoch": 0.93, "grad_norm": 1.4784130586185524, "learning_rate": 1.4230940716637943e-07, "loss": 0.5431, "step": 7463 }, { "epoch": 0.93, "grad_norm": 1.6445800312409866, "learning_rate": 1.41833730782861e-07, "loss": 0.539, "step": 7464 }, { "epoch": 0.93, "grad_norm": 1.2848076635613273, "learning_rate": 1.4135883927889006e-07, "loss": 0.5047, "step": 7465 }, { "epoch": 0.93, "grad_norm": 1.4212789763956692, "learning_rate": 1.408847327311902e-07, "loss": 0.5072, "step": 7466 }, { "epoch": 0.93, "grad_norm": 1.5426409834820622, "learning_rate": 1.4041141121635737e-07, "loss": 0.5697, "step": 7467 }, { "epoch": 0.93, "grad_norm": 1.507637441455359, "learning_rate": 1.399388748108599e-07, "loss": 0.4793, "step": 7468 }, { "epoch": 0.93, "grad_norm": 1.4319365540845996, "learning_rate": 1.394671235910411e-07, "loss": 0.5337, "step": 7469 }, { "epoch": 0.93, "grad_norm": 1.5313856313039929, "learning_rate": 1.389961576331156e-07, "loss": 0.492, "step": 7470 }, { "epoch": 0.93, "grad_norm": 1.505239142027003, "learning_rate": 1.3852597701317306e-07, "loss": 0.4844, "step": 7471 }, { "epoch": 0.93, "grad_norm": 2.2618369771812814, "learning_rate": 1.380565818071744e-07, "loss": 0.4765, "step": 7472 }, { "epoch": 0.93, "grad_norm": 4.497229469316913, "learning_rate": 1.375879720909562e-07, "loss": 0.4455, "step": 7473 }, { "epoch": 0.93, "grad_norm": 1.5833775904859986, "learning_rate": 1.3712014794022455e-07, "loss": 0.4677, "step": 7474 }, { "epoch": 0.93, "grad_norm": 1.567828763742299, "learning_rate": 1.366531094305623e-07, "loss": 0.4709, "step": 7475 }, { "epoch": 0.93, "grad_norm": 1.514977473626188, "learning_rate": 1.3618685663742248e-07, "loss": 0.5325, "step": 7476 }, { "epoch": 0.93, "grad_norm": 1.5067250409224675, "learning_rate": 1.3572138963613258e-07, "loss": 0.5067, "step": 7477 }, { "epoch": 0.93, "grad_norm": 2.087781436589823, "learning_rate": 1.35256708501893e-07, "loss": 0.4266, "step": 7478 }, { "epoch": 0.93, "grad_norm": 3.345971248358072, "learning_rate": 1.3479281330977646e-07, "loss": 0.4553, "step": 7479 }, { "epoch": 0.93, "grad_norm": 2.553935766243598, "learning_rate": 1.3432970413472967e-07, "loss": 0.5117, "step": 7480 }, { "epoch": 0.93, "grad_norm": 1.414846608514627, "learning_rate": 1.3386738105157281e-07, "loss": 0.5022, "step": 7481 }, { "epoch": 0.93, "grad_norm": 1.5417676396175475, "learning_rate": 1.334058441349978e-07, "loss": 0.4737, "step": 7482 }, { "epoch": 0.93, "grad_norm": 1.5825303379028746, "learning_rate": 1.3294509345956884e-07, "loss": 0.4209, "step": 7483 }, { "epoch": 0.93, "grad_norm": 1.4591388912703152, "learning_rate": 1.3248512909972643e-07, "loss": 0.4908, "step": 7484 }, { "epoch": 0.93, "grad_norm": 1.8270115930764523, "learning_rate": 1.3202595112977945e-07, "loss": 0.4857, "step": 7485 }, { "epoch": 0.93, "grad_norm": 1.430368141396785, "learning_rate": 1.3156755962391464e-07, "loss": 0.5005, "step": 7486 }, { "epoch": 0.93, "grad_norm": 1.3706232110255132, "learning_rate": 1.3110995465618725e-07, "loss": 0.4999, "step": 7487 }, { "epoch": 0.93, "grad_norm": 7.2975987019146755, "learning_rate": 1.3065313630052757e-07, "loss": 0.5456, "step": 7488 }, { "epoch": 0.93, "grad_norm": 1.532224608280657, "learning_rate": 1.3019710463073987e-07, "loss": 0.4886, "step": 7489 }, { "epoch": 0.93, "grad_norm": 1.5089578742075092, "learning_rate": 1.2974185972049858e-07, "loss": 0.4434, "step": 7490 }, { "epoch": 0.93, "grad_norm": 1.7338452161486364, "learning_rate": 1.2928740164335375e-07, "loss": 0.5407, "step": 7491 }, { "epoch": 0.93, "grad_norm": 1.8783834907040973, "learning_rate": 1.2883373047272663e-07, "loss": 0.49, "step": 7492 }, { "epoch": 0.93, "grad_norm": 1.6643538611499085, "learning_rate": 1.2838084628191195e-07, "loss": 0.4932, "step": 7493 }, { "epoch": 0.93, "grad_norm": 2.998442612275223, "learning_rate": 1.2792874914407617e-07, "loss": 0.466, "step": 7494 }, { "epoch": 0.93, "grad_norm": 1.3993279711964997, "learning_rate": 1.2747743913226086e-07, "loss": 0.481, "step": 7495 }, { "epoch": 0.93, "grad_norm": 1.4127255593346921, "learning_rate": 1.270269163193788e-07, "loss": 0.4884, "step": 7496 }, { "epoch": 0.93, "grad_norm": 2.1344092010920592, "learning_rate": 1.2657718077821512e-07, "loss": 0.5471, "step": 7497 }, { "epoch": 0.93, "grad_norm": 1.6925433386356652, "learning_rate": 1.2612823258142949e-07, "loss": 0.4819, "step": 7498 }, { "epoch": 0.93, "grad_norm": 1.444345067251979, "learning_rate": 1.2568007180155328e-07, "loss": 0.4577, "step": 7499 }, { "epoch": 0.93, "grad_norm": 1.8671287144526687, "learning_rate": 1.2523269851099085e-07, "loss": 0.51, "step": 7500 }, { "epoch": 0.93, "grad_norm": 2.5835192422472604, "learning_rate": 1.2478611278201824e-07, "loss": 0.4785, "step": 7501 }, { "epoch": 0.93, "grad_norm": 0.6868950034259951, "learning_rate": 1.243403146867872e-07, "loss": 0.4806, "step": 7502 }, { "epoch": 0.93, "grad_norm": 1.5403639562764544, "learning_rate": 1.2389530429731844e-07, "loss": 0.515, "step": 7503 }, { "epoch": 0.93, "grad_norm": 1.434244700079374, "learning_rate": 1.2345108168550836e-07, "loss": 0.4656, "step": 7504 }, { "epoch": 0.93, "grad_norm": 1.662107659471043, "learning_rate": 1.2300764692312507e-07, "loss": 0.5934, "step": 7505 }, { "epoch": 0.93, "grad_norm": 1.3295395274177795, "learning_rate": 1.225650000818085e-07, "loss": 0.4394, "step": 7506 }, { "epoch": 0.93, "grad_norm": 1.6382934586730635, "learning_rate": 1.2212314123307368e-07, "loss": 0.5056, "step": 7507 }, { "epoch": 0.93, "grad_norm": 1.2936263880044172, "learning_rate": 1.2168207044830572e-07, "loss": 0.4899, "step": 7508 }, { "epoch": 0.93, "grad_norm": 3.0376476751674737, "learning_rate": 1.2124178779876373e-07, "loss": 0.501, "step": 7509 }, { "epoch": 0.93, "grad_norm": 0.6932854553594543, "learning_rate": 1.2080229335557858e-07, "loss": 0.5038, "step": 7510 }, { "epoch": 0.93, "grad_norm": 1.3352372815688656, "learning_rate": 1.2036358718975572e-07, "loss": 0.4864, "step": 7511 }, { "epoch": 0.93, "grad_norm": 1.5051657873763646, "learning_rate": 1.1992566937217066e-07, "loss": 0.4696, "step": 7512 }, { "epoch": 0.93, "grad_norm": 1.3730238116729956, "learning_rate": 1.1948853997357402e-07, "loss": 0.5046, "step": 7513 }, { "epoch": 0.93, "grad_norm": 1.8058338907155638, "learning_rate": 1.1905219906458765e-07, "loss": 0.4973, "step": 7514 }, { "epoch": 0.93, "grad_norm": 1.5547767592050927, "learning_rate": 1.1861664671570517e-07, "loss": 0.4965, "step": 7515 }, { "epoch": 0.93, "grad_norm": 1.6235911597108474, "learning_rate": 1.1818188299729583e-07, "loss": 0.4996, "step": 7516 }, { "epoch": 0.93, "grad_norm": 1.452063347559451, "learning_rate": 1.177479079795979e-07, "loss": 0.4445, "step": 7517 }, { "epoch": 0.93, "grad_norm": 1.7283130334264376, "learning_rate": 1.1731472173272529e-07, "loss": 0.5076, "step": 7518 }, { "epoch": 0.93, "grad_norm": 1.700430020011993, "learning_rate": 1.1688232432666147e-07, "loss": 0.5086, "step": 7519 }, { "epoch": 0.93, "grad_norm": 2.1730840757222, "learning_rate": 1.1645071583126499e-07, "loss": 0.4975, "step": 7520 }, { "epoch": 0.93, "grad_norm": 1.403025967928819, "learning_rate": 1.1601989631626565e-07, "loss": 0.4639, "step": 7521 }, { "epoch": 0.93, "grad_norm": 0.6772365589825002, "learning_rate": 1.1558986585126608e-07, "loss": 0.4967, "step": 7522 }, { "epoch": 0.93, "grad_norm": 1.4835936516919237, "learning_rate": 1.1516062450574239e-07, "loss": 0.4588, "step": 7523 }, { "epoch": 0.93, "grad_norm": 0.6819121439101569, "learning_rate": 1.1473217234904133e-07, "loss": 0.4629, "step": 7524 }, { "epoch": 0.93, "grad_norm": 1.4333015642586124, "learning_rate": 1.1430450945038363e-07, "loss": 0.4882, "step": 7525 }, { "epoch": 0.93, "grad_norm": 1.8930763894354676, "learning_rate": 1.1387763587886181e-07, "loss": 0.493, "step": 7526 }, { "epoch": 0.93, "grad_norm": 1.8009081752597993, "learning_rate": 1.1345155170344124e-07, "loss": 0.5328, "step": 7527 }, { "epoch": 0.93, "grad_norm": 1.3986294432791426, "learning_rate": 1.1302625699295855e-07, "loss": 0.4201, "step": 7528 }, { "epoch": 0.93, "grad_norm": 1.4873467528400124, "learning_rate": 1.1260175181612488e-07, "loss": 0.5264, "step": 7529 }, { "epoch": 0.93, "grad_norm": 1.5485362346370584, "learning_rate": 1.1217803624152312e-07, "loss": 0.4878, "step": 7530 }, { "epoch": 0.93, "grad_norm": 1.542461414453805, "learning_rate": 1.1175511033760688e-07, "loss": 0.4484, "step": 7531 }, { "epoch": 0.93, "grad_norm": 1.4454517583014335, "learning_rate": 1.1133297417270539e-07, "loss": 0.5089, "step": 7532 }, { "epoch": 0.93, "grad_norm": 1.6930035810814124, "learning_rate": 1.1091162781501685e-07, "loss": 0.5337, "step": 7533 }, { "epoch": 0.93, "grad_norm": 1.248049538526929, "learning_rate": 1.104910713326146e-07, "loss": 0.4951, "step": 7534 }, { "epoch": 0.94, "grad_norm": 1.8676919633357776, "learning_rate": 1.1007130479344208e-07, "loss": 0.4885, "step": 7535 }, { "epoch": 0.94, "grad_norm": 1.5895304874985916, "learning_rate": 1.0965232826531725e-07, "loss": 0.5174, "step": 7536 }, { "epoch": 0.94, "grad_norm": 1.7402560764830721, "learning_rate": 1.0923414181592873e-07, "loss": 0.498, "step": 7537 }, { "epoch": 0.94, "grad_norm": 1.4436156231144772, "learning_rate": 1.088167455128386e-07, "loss": 0.4194, "step": 7538 }, { "epoch": 0.94, "grad_norm": 1.5020695344067279, "learning_rate": 1.0840013942348182e-07, "loss": 0.4603, "step": 7539 }, { "epoch": 0.94, "grad_norm": 1.905688093155993, "learning_rate": 1.0798432361516287e-07, "loss": 0.4891, "step": 7540 }, { "epoch": 0.94, "grad_norm": 1.4016492508769964, "learning_rate": 1.075692981550619e-07, "loss": 0.4704, "step": 7541 }, { "epoch": 0.94, "grad_norm": 2.3829597401522613, "learning_rate": 1.0715506311022972e-07, "loss": 0.486, "step": 7542 }, { "epoch": 0.94, "grad_norm": 1.5601454546027733, "learning_rate": 1.0674161854758947e-07, "loss": 0.5033, "step": 7543 }, { "epoch": 0.94, "grad_norm": 1.342026448883406, "learning_rate": 1.0632896453393605e-07, "loss": 0.481, "step": 7544 }, { "epoch": 0.94, "grad_norm": 2.4630019635397487, "learning_rate": 1.0591710113593834e-07, "loss": 0.4689, "step": 7545 }, { "epoch": 0.94, "grad_norm": 0.6749586920383936, "learning_rate": 1.0550602842013647e-07, "loss": 0.4868, "step": 7546 }, { "epoch": 0.94, "grad_norm": 1.4416026970217426, "learning_rate": 1.0509574645294173e-07, "loss": 0.5184, "step": 7547 }, { "epoch": 0.94, "grad_norm": 1.9095201673801285, "learning_rate": 1.0468625530064058e-07, "loss": 0.5482, "step": 7548 }, { "epoch": 0.94, "grad_norm": 1.4417897196789546, "learning_rate": 1.042775550293884e-07, "loss": 0.5059, "step": 7549 }, { "epoch": 0.94, "grad_norm": 2.012208951779018, "learning_rate": 1.0386964570521574e-07, "loss": 0.5351, "step": 7550 }, { "epoch": 0.94, "grad_norm": 1.37239360342099, "learning_rate": 1.0346252739402207e-07, "loss": 0.4915, "step": 7551 }, { "epoch": 0.94, "grad_norm": 2.1832857772124155, "learning_rate": 1.0305620016158258e-07, "loss": 0.469, "step": 7552 }, { "epoch": 0.94, "grad_norm": 1.4342619421717173, "learning_rate": 1.0265066407354196e-07, "loss": 0.4912, "step": 7553 }, { "epoch": 0.94, "grad_norm": 1.9992131575406364, "learning_rate": 1.0224591919541837e-07, "loss": 0.5002, "step": 7554 }, { "epoch": 0.94, "grad_norm": 1.3660580834026919, "learning_rate": 1.0184196559260229e-07, "loss": 0.4791, "step": 7555 }, { "epoch": 0.94, "grad_norm": 1.567805960522422, "learning_rate": 1.0143880333035594e-07, "loss": 0.4781, "step": 7556 }, { "epoch": 0.94, "grad_norm": 1.2868250415977684, "learning_rate": 1.010364324738139e-07, "loss": 0.456, "step": 7557 }, { "epoch": 0.94, "grad_norm": 1.4103965262983116, "learning_rate": 1.0063485308798193e-07, "loss": 0.4559, "step": 7558 }, { "epoch": 0.94, "grad_norm": 1.629549103806848, "learning_rate": 1.0023406523773981e-07, "loss": 0.4718, "step": 7559 }, { "epoch": 0.94, "grad_norm": 1.5189480072759118, "learning_rate": 9.983406898783688e-08, "loss": 0.4718, "step": 7560 }, { "epoch": 0.94, "grad_norm": 1.394972304534396, "learning_rate": 9.943486440289751e-08, "loss": 0.4669, "step": 7561 }, { "epoch": 0.94, "grad_norm": 1.5152927943746088, "learning_rate": 9.90364515474157e-08, "loss": 0.4931, "step": 7562 }, { "epoch": 0.94, "grad_norm": 1.4183916894876742, "learning_rate": 9.86388304857594e-08, "loss": 0.4588, "step": 7563 }, { "epoch": 0.94, "grad_norm": 1.6619140841376372, "learning_rate": 9.824200128216665e-08, "loss": 0.4911, "step": 7564 }, { "epoch": 0.94, "grad_norm": 1.4373377755286527, "learning_rate": 9.784596400075063e-08, "loss": 0.4961, "step": 7565 }, { "epoch": 0.94, "grad_norm": 1.3645511041619751, "learning_rate": 9.745071870549295e-08, "loss": 0.489, "step": 7566 }, { "epoch": 0.94, "grad_norm": 1.3053146652099252, "learning_rate": 9.705626546024916e-08, "loss": 0.4641, "step": 7567 }, { "epoch": 0.94, "grad_norm": 1.6799087105795374, "learning_rate": 9.666260432874719e-08, "loss": 0.4837, "step": 7568 }, { "epoch": 0.94, "grad_norm": 1.3840190159719987, "learning_rate": 9.626973537458561e-08, "loss": 0.4886, "step": 7569 }, { "epoch": 0.94, "grad_norm": 1.7803774263684327, "learning_rate": 9.5877658661237e-08, "loss": 0.5038, "step": 7570 }, { "epoch": 0.94, "grad_norm": 1.5466790610753376, "learning_rate": 9.548637425204399e-08, "loss": 0.4751, "step": 7571 }, { "epoch": 0.94, "grad_norm": 1.207045566540985, "learning_rate": 9.50958822102227e-08, "loss": 0.5139, "step": 7572 }, { "epoch": 0.94, "grad_norm": 2.0388938387697997, "learning_rate": 9.470618259885989e-08, "loss": 0.5066, "step": 7573 }, { "epoch": 0.94, "grad_norm": 0.6693846347721168, "learning_rate": 9.431727548091574e-08, "loss": 0.494, "step": 7574 }, { "epoch": 0.94, "grad_norm": 1.5269826421316284, "learning_rate": 9.392916091922111e-08, "loss": 0.5288, "step": 7575 }, { "epoch": 0.94, "grad_norm": 2.011778871310886, "learning_rate": 9.354183897647917e-08, "loss": 0.4874, "step": 7576 }, { "epoch": 0.94, "grad_norm": 1.5042801955481406, "learning_rate": 9.315530971526543e-08, "loss": 0.5289, "step": 7577 }, { "epoch": 0.94, "grad_norm": 0.6520762442159772, "learning_rate": 9.276957319802604e-08, "loss": 0.4724, "step": 7578 }, { "epoch": 0.94, "grad_norm": 1.7630841557824317, "learning_rate": 9.238462948708227e-08, "loss": 0.5152, "step": 7579 }, { "epoch": 0.94, "grad_norm": 1.3418160264322265, "learning_rate": 9.20004786446238e-08, "loss": 0.4603, "step": 7580 }, { "epoch": 0.94, "grad_norm": 1.6097567059166966, "learning_rate": 9.161712073271379e-08, "loss": 0.4397, "step": 7581 }, { "epoch": 0.94, "grad_norm": 1.4953389541248439, "learning_rate": 9.123455581328711e-08, "loss": 0.5223, "step": 7582 }, { "epoch": 0.94, "grad_norm": 1.2689736383401649, "learning_rate": 9.085278394815045e-08, "loss": 0.441, "step": 7583 }, { "epoch": 0.94, "grad_norm": 1.752001103494819, "learning_rate": 9.04718051989828e-08, "loss": 0.4691, "step": 7584 }, { "epoch": 0.94, "grad_norm": 17.642591161965804, "learning_rate": 9.009161962733327e-08, "loss": 0.4844, "step": 7585 }, { "epoch": 0.94, "grad_norm": 1.482388634847006, "learning_rate": 8.971222729462603e-08, "loss": 0.4791, "step": 7586 }, { "epoch": 0.94, "grad_norm": 1.3991761059629115, "learning_rate": 8.933362826215374e-08, "loss": 0.4641, "step": 7587 }, { "epoch": 0.94, "grad_norm": 1.344337928641865, "learning_rate": 8.895582259108415e-08, "loss": 0.4699, "step": 7588 }, { "epoch": 0.94, "grad_norm": 1.6337283293116154, "learning_rate": 8.857881034245285e-08, "loss": 0.5037, "step": 7589 }, { "epoch": 0.94, "grad_norm": 1.5404316087887595, "learning_rate": 8.82025915771717e-08, "loss": 0.5291, "step": 7590 }, { "epoch": 0.94, "grad_norm": 1.4641869888308263, "learning_rate": 8.78271663560204e-08, "loss": 0.4885, "step": 7591 }, { "epoch": 0.94, "grad_norm": 1.9766064882640204, "learning_rate": 8.745253473965376e-08, "loss": 0.5273, "step": 7592 }, { "epoch": 0.94, "grad_norm": 1.3515860790010006, "learning_rate": 8.707869678859504e-08, "loss": 0.4739, "step": 7593 }, { "epoch": 0.94, "grad_norm": 1.7362237175457764, "learning_rate": 8.67056525632426e-08, "loss": 0.5172, "step": 7594 }, { "epoch": 0.94, "grad_norm": 1.3652520626139257, "learning_rate": 8.633340212386431e-08, "loss": 0.4888, "step": 7595 }, { "epoch": 0.94, "grad_norm": 2.562635416592457, "learning_rate": 8.596194553060044e-08, "loss": 0.4362, "step": 7596 }, { "epoch": 0.94, "grad_norm": 0.6489051034073675, "learning_rate": 8.559128284346352e-08, "loss": 0.4773, "step": 7597 }, { "epoch": 0.94, "grad_norm": 1.3462427241427373, "learning_rate": 8.522141412233676e-08, "loss": 0.4857, "step": 7598 }, { "epoch": 0.94, "grad_norm": 1.4627023788707003, "learning_rate": 8.485233942697624e-08, "loss": 0.4767, "step": 7599 }, { "epoch": 0.94, "grad_norm": 2.6271714374623194, "learning_rate": 8.448405881700927e-08, "loss": 0.4635, "step": 7600 }, { "epoch": 0.94, "grad_norm": 1.2335539334688235, "learning_rate": 8.411657235193438e-08, "loss": 0.4921, "step": 7601 }, { "epoch": 0.94, "grad_norm": 1.406101675022231, "learning_rate": 8.37498800911224e-08, "loss": 0.4775, "step": 7602 }, { "epoch": 0.94, "grad_norm": 1.4206446049445998, "learning_rate": 8.338398209381537e-08, "loss": 0.5124, "step": 7603 }, { "epoch": 0.94, "grad_norm": 1.4878336030354353, "learning_rate": 8.301887841912881e-08, "loss": 0.4836, "step": 7604 }, { "epoch": 0.94, "grad_norm": 1.389099926530956, "learning_rate": 8.265456912604608e-08, "loss": 0.535, "step": 7605 }, { "epoch": 0.94, "grad_norm": 0.6773820458682531, "learning_rate": 8.229105427342676e-08, "loss": 0.4777, "step": 7606 }, { "epoch": 0.94, "grad_norm": 1.663011926443581, "learning_rate": 8.192833391999833e-08, "loss": 0.497, "step": 7607 }, { "epoch": 0.94, "grad_norm": 1.4798884001272063, "learning_rate": 8.156640812436278e-08, "loss": 0.5235, "step": 7608 }, { "epoch": 0.94, "grad_norm": 4.610926579403922, "learning_rate": 8.120527694499114e-08, "loss": 0.4755, "step": 7609 }, { "epoch": 0.94, "grad_norm": 2.4878269166594444, "learning_rate": 8.08449404402284e-08, "loss": 0.4835, "step": 7610 }, { "epoch": 0.94, "grad_norm": 1.5222386394029344, "learning_rate": 8.048539866828909e-08, "loss": 0.5179, "step": 7611 }, { "epoch": 0.94, "grad_norm": 0.6431193155061984, "learning_rate": 8.012665168726063e-08, "loss": 0.4471, "step": 7612 }, { "epoch": 0.94, "grad_norm": 1.4935441485253522, "learning_rate": 7.976869955510225e-08, "loss": 0.5175, "step": 7613 }, { "epoch": 0.94, "grad_norm": 1.3801497191756107, "learning_rate": 7.941154232964376e-08, "loss": 0.4915, "step": 7614 }, { "epoch": 0.95, "grad_norm": 0.6613682876564198, "learning_rate": 7.905518006858792e-08, "loss": 0.5026, "step": 7615 }, { "epoch": 0.95, "grad_norm": 1.811683475249906, "learning_rate": 7.8699612829507e-08, "loss": 0.4692, "step": 7616 }, { "epoch": 0.95, "grad_norm": 1.3027553272100894, "learning_rate": 7.834484066984727e-08, "loss": 0.5004, "step": 7617 }, { "epoch": 0.95, "grad_norm": 3.9372547346821705, "learning_rate": 7.799086364692343e-08, "loss": 0.4583, "step": 7618 }, { "epoch": 0.95, "grad_norm": 1.4098518044530997, "learning_rate": 7.76376818179253e-08, "loss": 0.4473, "step": 7619 }, { "epoch": 0.95, "grad_norm": 1.3656186635555496, "learning_rate": 7.728529523991224e-08, "loss": 0.4934, "step": 7620 }, { "epoch": 0.95, "grad_norm": 1.2652488484325881, "learning_rate": 7.693370396981481e-08, "loss": 0.4483, "step": 7621 }, { "epoch": 0.95, "grad_norm": 1.5197178165179743, "learning_rate": 7.658290806443647e-08, "loss": 0.4688, "step": 7622 }, { "epoch": 0.95, "grad_norm": 1.7503132976742892, "learning_rate": 7.623290758045021e-08, "loss": 0.5441, "step": 7623 }, { "epoch": 0.95, "grad_norm": 1.4192647679144745, "learning_rate": 7.588370257440303e-08, "loss": 0.4673, "step": 7624 }, { "epoch": 0.95, "grad_norm": 1.931190050772453, "learning_rate": 7.553529310271147e-08, "loss": 0.4878, "step": 7625 }, { "epoch": 0.95, "grad_norm": 1.440480842373573, "learning_rate": 7.518767922166381e-08, "loss": 0.5096, "step": 7626 }, { "epoch": 0.95, "grad_norm": 1.4898492224534166, "learning_rate": 7.484086098742016e-08, "loss": 0.4761, "step": 7627 }, { "epoch": 0.95, "grad_norm": 1.6981654852220975, "learning_rate": 7.449483845601291e-08, "loss": 0.5055, "step": 7628 }, { "epoch": 0.95, "grad_norm": 1.2724001278107187, "learning_rate": 7.4149611683344e-08, "loss": 0.4587, "step": 7629 }, { "epoch": 0.95, "grad_norm": 1.5613833744236691, "learning_rate": 7.380518072518883e-08, "loss": 0.4533, "step": 7630 }, { "epoch": 0.95, "grad_norm": 1.6580808902507445, "learning_rate": 7.346154563719232e-08, "loss": 0.4535, "step": 7631 }, { "epoch": 0.95, "grad_norm": 0.6613063750238234, "learning_rate": 7.311870647487229e-08, "loss": 0.4727, "step": 7632 }, { "epoch": 0.95, "grad_norm": 3.014698388336979, "learning_rate": 7.277666329361776e-08, "loss": 0.5082, "step": 7633 }, { "epoch": 0.95, "grad_norm": 1.2342182090689713, "learning_rate": 7.243541614868787e-08, "loss": 0.4175, "step": 7634 }, { "epoch": 0.95, "grad_norm": 1.8184782840077198, "learning_rate": 7.209496509521519e-08, "loss": 0.5313, "step": 7635 }, { "epoch": 0.95, "grad_norm": 1.4133824416213492, "learning_rate": 7.175531018820125e-08, "loss": 0.5306, "step": 7636 }, { "epoch": 0.95, "grad_norm": 1.6655108699828332, "learning_rate": 7.141645148252107e-08, "loss": 0.4792, "step": 7637 }, { "epoch": 0.95, "grad_norm": 1.8164807101058542, "learning_rate": 7.107838903292085e-08, "loss": 0.4482, "step": 7638 }, { "epoch": 0.95, "grad_norm": 1.1941787148976293, "learning_rate": 7.074112289401636e-08, "loss": 0.4537, "step": 7639 }, { "epoch": 0.95, "grad_norm": 1.378549116427674, "learning_rate": 7.040465312029731e-08, "loss": 0.4766, "step": 7640 }, { "epoch": 0.95, "grad_norm": 1.1018978404214064, "learning_rate": 7.006897976612192e-08, "loss": 0.4628, "step": 7641 }, { "epoch": 0.95, "grad_norm": 1.44923619105166, "learning_rate": 6.973410288572179e-08, "loss": 0.4854, "step": 7642 }, { "epoch": 0.95, "grad_norm": 2.9691991774920328, "learning_rate": 6.940002253319978e-08, "loss": 0.5281, "step": 7643 }, { "epoch": 0.95, "grad_norm": 1.537616321063936, "learning_rate": 6.906673876252822e-08, "loss": 0.5314, "step": 7644 }, { "epoch": 0.95, "grad_norm": 1.7123009207082152, "learning_rate": 6.873425162755354e-08, "loss": 0.5079, "step": 7645 }, { "epoch": 0.95, "grad_norm": 2.118658183151667, "learning_rate": 6.840256118199051e-08, "loss": 0.4906, "step": 7646 }, { "epoch": 0.95, "grad_norm": 1.6863783866001274, "learning_rate": 6.807166747942795e-08, "loss": 0.4469, "step": 7647 }, { "epoch": 0.95, "grad_norm": 1.524207423173094, "learning_rate": 6.774157057332365e-08, "loss": 0.488, "step": 7648 }, { "epoch": 0.95, "grad_norm": 1.3968831983784278, "learning_rate": 6.741227051700827e-08, "loss": 0.4683, "step": 7649 }, { "epoch": 0.95, "grad_norm": 0.632796678065021, "learning_rate": 6.70837673636826e-08, "loss": 0.4381, "step": 7650 }, { "epoch": 0.95, "grad_norm": 1.2330369250788322, "learning_rate": 6.675606116642031e-08, "loss": 0.4289, "step": 7651 }, { "epoch": 0.95, "grad_norm": 1.5967975832790167, "learning_rate": 6.642915197816347e-08, "loss": 0.4778, "step": 7652 }, { "epoch": 0.95, "grad_norm": 1.3934013360644446, "learning_rate": 6.610303985172873e-08, "loss": 0.4686, "step": 7653 }, { "epoch": 0.95, "grad_norm": 1.1985070265059699, "learning_rate": 6.577772483980228e-08, "loss": 0.4303, "step": 7654 }, { "epoch": 0.95, "grad_norm": 1.4211373045289915, "learning_rate": 6.54532069949404e-08, "loss": 0.4834, "step": 7655 }, { "epoch": 0.95, "grad_norm": 1.6044525557085323, "learning_rate": 6.512948636957284e-08, "loss": 0.5273, "step": 7656 }, { "epoch": 0.95, "grad_norm": 1.2829303434544845, "learning_rate": 6.480656301599886e-08, "loss": 0.4323, "step": 7657 }, { "epoch": 0.95, "grad_norm": 1.6705878337304934, "learning_rate": 6.448443698639062e-08, "loss": 0.4911, "step": 7658 }, { "epoch": 0.95, "grad_norm": 1.310649686550002, "learning_rate": 6.416310833278872e-08, "loss": 0.4296, "step": 7659 }, { "epoch": 0.95, "grad_norm": 1.4713096666729257, "learning_rate": 6.384257710710828e-08, "loss": 0.4754, "step": 7660 }, { "epoch": 0.95, "grad_norm": 3.0267556929527535, "learning_rate": 6.35228433611329e-08, "loss": 0.5011, "step": 7661 }, { "epoch": 0.95, "grad_norm": 0.7116531090517827, "learning_rate": 6.320390714651958e-08, "loss": 0.4999, "step": 7662 }, { "epoch": 0.95, "grad_norm": 2.0886268269934094, "learning_rate": 6.288576851479378e-08, "loss": 0.506, "step": 7663 }, { "epoch": 0.95, "grad_norm": 1.2864415516373995, "learning_rate": 6.256842751735492e-08, "loss": 0.4921, "step": 7664 }, { "epoch": 0.95, "grad_norm": 1.6822313096347454, "learning_rate": 6.22518842054709e-08, "loss": 0.471, "step": 7665 }, { "epoch": 0.95, "grad_norm": 1.5822354547473338, "learning_rate": 6.193613863028303e-08, "loss": 0.5187, "step": 7666 }, { "epoch": 0.95, "grad_norm": 1.2247180484116207, "learning_rate": 6.162119084280271e-08, "loss": 0.4228, "step": 7667 }, { "epoch": 0.95, "grad_norm": 1.932817199646369, "learning_rate": 6.130704089391259e-08, "loss": 0.535, "step": 7668 }, { "epoch": 0.95, "grad_norm": 1.4551478562969469, "learning_rate": 6.09936888343654e-08, "loss": 0.5047, "step": 7669 }, { "epoch": 0.95, "grad_norm": 1.5915916781050325, "learning_rate": 6.068113471478676e-08, "loss": 0.5171, "step": 7670 }, { "epoch": 0.95, "grad_norm": 1.898586512014985, "learning_rate": 6.036937858567294e-08, "loss": 0.4807, "step": 7671 }, { "epoch": 0.95, "grad_norm": 1.796050658097426, "learning_rate": 6.00584204973903e-08, "loss": 0.513, "step": 7672 }, { "epoch": 0.95, "grad_norm": 1.5586630964181434, "learning_rate": 5.974826050017701e-08, "loss": 0.4912, "step": 7673 }, { "epoch": 0.95, "grad_norm": 1.6303241842222034, "learning_rate": 5.943889864414243e-08, "loss": 0.508, "step": 7674 }, { "epoch": 0.95, "grad_norm": 1.3696136170171038, "learning_rate": 5.913033497926546e-08, "loss": 0.4956, "step": 7675 }, { "epoch": 0.95, "grad_norm": 1.1958120852784795, "learning_rate": 5.8822569555399e-08, "loss": 0.4919, "step": 7676 }, { "epoch": 0.95, "grad_norm": 1.3777355475873272, "learning_rate": 5.8515602422263287e-08, "loss": 0.495, "step": 7677 }, { "epoch": 0.95, "grad_norm": 1.391424721471707, "learning_rate": 5.820943362945364e-08, "loss": 0.4778, "step": 7678 }, { "epoch": 0.95, "grad_norm": 1.3233746300787803, "learning_rate": 5.790406322643327e-08, "loss": 0.4705, "step": 7679 }, { "epoch": 0.95, "grad_norm": 1.7454553079953834, "learning_rate": 5.759949126253772e-08, "loss": 0.559, "step": 7680 }, { "epoch": 0.95, "grad_norm": 1.8206013531681495, "learning_rate": 5.7295717786972625e-08, "loss": 0.4776, "step": 7681 }, { "epoch": 0.95, "grad_norm": 0.6651423869229104, "learning_rate": 5.6992742848815955e-08, "loss": 0.5239, "step": 7682 }, { "epoch": 0.95, "grad_norm": 1.5163882063428316, "learning_rate": 5.669056649701632e-08, "loss": 0.4811, "step": 7683 }, { "epoch": 0.95, "grad_norm": 1.6129136935060087, "learning_rate": 5.63891887803919e-08, "loss": 0.5292, "step": 7684 }, { "epoch": 0.95, "grad_norm": 1.4545287345230198, "learning_rate": 5.608860974763319e-08, "loss": 0.46, "step": 7685 }, { "epoch": 0.95, "grad_norm": 1.765077600660373, "learning_rate": 5.5788829447301906e-08, "loss": 0.5191, "step": 7686 }, { "epoch": 0.95, "grad_norm": 1.4372217535237481, "learning_rate": 5.548984792783041e-08, "loss": 0.4949, "step": 7687 }, { "epoch": 0.95, "grad_norm": 1.3253862390810809, "learning_rate": 5.519166523752117e-08, "loss": 0.5215, "step": 7688 }, { "epoch": 0.95, "grad_norm": 1.7964409105865975, "learning_rate": 5.4894281424548446e-08, "loss": 0.4757, "step": 7689 }, { "epoch": 0.95, "grad_norm": 1.3087364335845009, "learning_rate": 5.459769653695657e-08, "loss": 0.4752, "step": 7690 }, { "epoch": 0.95, "grad_norm": 1.627844249158136, "learning_rate": 5.430191062266277e-08, "loss": 0.4828, "step": 7691 }, { "epoch": 0.95, "grad_norm": 1.9735770293227999, "learning_rate": 5.400692372945271e-08, "loss": 0.5109, "step": 7692 }, { "epoch": 0.95, "grad_norm": 1.4459533226148706, "learning_rate": 5.371273590498438e-08, "loss": 0.4588, "step": 7693 }, { "epoch": 0.95, "grad_norm": 2.704100185893509, "learning_rate": 5.341934719678699e-08, "loss": 0.5431, "step": 7694 }, { "epoch": 0.95, "grad_norm": 1.7613191834669115, "learning_rate": 5.312675765225928e-08, "loss": 0.4894, "step": 7695 }, { "epoch": 0.96, "grad_norm": 4.259121277853792, "learning_rate": 5.283496731867288e-08, "loss": 0.5277, "step": 7696 }, { "epoch": 0.96, "grad_norm": 4.344515438831487, "learning_rate": 5.254397624316731e-08, "loss": 0.4692, "step": 7697 }, { "epoch": 0.96, "grad_norm": 1.458748155261817, "learning_rate": 5.2253784472756084e-08, "loss": 0.5377, "step": 7698 }, { "epoch": 0.96, "grad_norm": 1.480855482106529, "learning_rate": 5.1964392054321686e-08, "loss": 0.4587, "step": 7699 }, { "epoch": 0.96, "grad_norm": 1.33662859192495, "learning_rate": 5.167579903461839e-08, "loss": 0.5017, "step": 7700 }, { "epoch": 0.96, "grad_norm": 1.5791003334365201, "learning_rate": 5.1388005460270565e-08, "loss": 0.5071, "step": 7701 }, { "epoch": 0.96, "grad_norm": 1.4395511057076358, "learning_rate": 5.110101137777379e-08, "loss": 0.5164, "step": 7702 }, { "epoch": 0.96, "grad_norm": 1.2375084850602827, "learning_rate": 5.0814816833494876e-08, "loss": 0.4812, "step": 7703 }, { "epoch": 0.96, "grad_norm": 1.3507590154257363, "learning_rate": 5.052942187367016e-08, "loss": 0.4899, "step": 7704 }, { "epoch": 0.96, "grad_norm": 1.315754043115813, "learning_rate": 5.024482654440943e-08, "loss": 0.4567, "step": 7705 }, { "epoch": 0.96, "grad_norm": 1.495124890904158, "learning_rate": 4.99610308916898e-08, "loss": 0.4682, "step": 7706 }, { "epoch": 0.96, "grad_norm": 1.3499923101809124, "learning_rate": 4.967803496136181e-08, "loss": 0.4835, "step": 7707 }, { "epoch": 0.96, "grad_norm": 1.4715506582456879, "learning_rate": 4.9395838799146114e-08, "loss": 0.4866, "step": 7708 }, { "epoch": 0.96, "grad_norm": 1.5842712418136582, "learning_rate": 4.911444245063346e-08, "loss": 0.5489, "step": 7709 }, { "epoch": 0.96, "grad_norm": 1.8163171475763948, "learning_rate": 4.8833845961286375e-08, "loss": 0.4716, "step": 7710 }, { "epoch": 0.96, "grad_norm": 1.3644745215747869, "learning_rate": 4.855404937643693e-08, "loss": 0.4827, "step": 7711 }, { "epoch": 0.96, "grad_norm": 1.4313206438200663, "learning_rate": 4.827505274128952e-08, "loss": 0.4813, "step": 7712 }, { "epoch": 0.96, "grad_norm": 1.315013732174951, "learning_rate": 4.799685610091809e-08, "loss": 0.5051, "step": 7713 }, { "epoch": 0.96, "grad_norm": 1.4328290676809463, "learning_rate": 4.771945950026835e-08, "loss": 0.4569, "step": 7714 }, { "epoch": 0.96, "grad_norm": 2.6288798079160522, "learning_rate": 4.744286298415557e-08, "loss": 0.4907, "step": 7715 }, { "epoch": 0.96, "grad_norm": 1.7069136478926474, "learning_rate": 4.7167066597266776e-08, "loss": 0.4865, "step": 7716 }, { "epoch": 0.96, "grad_norm": 1.308945493190629, "learning_rate": 4.689207038415799e-08, "loss": 0.4076, "step": 7717 }, { "epoch": 0.96, "grad_norm": 1.4611682125753729, "learning_rate": 4.6617874389259246e-08, "loss": 0.5038, "step": 7718 }, { "epoch": 0.96, "grad_norm": 1.724751515685758, "learning_rate": 4.634447865686842e-08, "loss": 0.4707, "step": 7719 }, { "epoch": 0.96, "grad_norm": 1.3998749280496665, "learning_rate": 4.6071883231154077e-08, "loss": 0.4931, "step": 7720 }, { "epoch": 0.96, "grad_norm": 1.3858879631903585, "learning_rate": 4.5800088156158215e-08, "loss": 0.4985, "step": 7721 }, { "epoch": 0.96, "grad_norm": 1.467848420192936, "learning_rate": 4.5529093475790156e-08, "loss": 0.5039, "step": 7722 }, { "epoch": 0.96, "grad_norm": 1.5766696103475495, "learning_rate": 4.525889923383264e-08, "loss": 0.4582, "step": 7723 }, { "epoch": 0.96, "grad_norm": 1.330634533354273, "learning_rate": 4.498950547393743e-08, "loss": 0.4371, "step": 7724 }, { "epoch": 0.96, "grad_norm": 2.841870302597123, "learning_rate": 4.472091223962749e-08, "loss": 0.469, "step": 7725 }, { "epoch": 0.96, "grad_norm": 2.0793156583564767, "learning_rate": 4.445311957429588e-08, "loss": 0.4845, "step": 7726 }, { "epoch": 0.96, "grad_norm": 1.3752340510642325, "learning_rate": 4.418612752120743e-08, "loss": 0.5401, "step": 7727 }, { "epoch": 0.96, "grad_norm": 1.6006130720471534, "learning_rate": 4.3919936123497654e-08, "loss": 0.4821, "step": 7728 }, { "epoch": 0.96, "grad_norm": 1.3840103206152756, "learning_rate": 4.365454542417047e-08, "loss": 0.5033, "step": 7729 }, { "epoch": 0.96, "grad_norm": 1.4145112943608362, "learning_rate": 4.3389955466103804e-08, "loss": 0.4687, "step": 7730 }, { "epoch": 0.96, "grad_norm": 1.4189671569908613, "learning_rate": 4.312616629204347e-08, "loss": 0.478, "step": 7731 }, { "epoch": 0.96, "grad_norm": 1.503259140881166, "learning_rate": 4.2863177944607594e-08, "loss": 0.4842, "step": 7732 }, { "epoch": 0.96, "grad_norm": 1.2896560391253253, "learning_rate": 4.26009904662833e-08, "loss": 0.4583, "step": 7733 }, { "epoch": 0.96, "grad_norm": 1.4580249324439911, "learning_rate": 4.233960389943004e-08, "loss": 0.4627, "step": 7734 }, { "epoch": 0.96, "grad_norm": 1.3594047511001994, "learning_rate": 4.2079018286277365e-08, "loss": 0.4613, "step": 7735 }, { "epoch": 0.96, "grad_norm": 3.0801452303061256, "learning_rate": 4.1819233668924375e-08, "loss": 0.4604, "step": 7736 }, { "epoch": 0.96, "grad_norm": 1.2787454071154456, "learning_rate": 4.156025008934195e-08, "loss": 0.4782, "step": 7737 }, { "epoch": 0.96, "grad_norm": 0.6044864170581877, "learning_rate": 4.130206758937105e-08, "loss": 0.4766, "step": 7738 }, { "epoch": 0.96, "grad_norm": 1.5826720176373226, "learning_rate": 4.1044686210723884e-08, "loss": 0.4529, "step": 7739 }, { "epoch": 0.96, "grad_norm": 2.5454458656320416, "learning_rate": 4.078810599498162e-08, "loss": 0.4545, "step": 7740 }, { "epoch": 0.96, "grad_norm": 1.8520856327740807, "learning_rate": 4.053232698359832e-08, "loss": 0.5048, "step": 7741 }, { "epoch": 0.96, "grad_norm": 1.8044521412502397, "learning_rate": 4.027734921789594e-08, "loss": 0.5048, "step": 7742 }, { "epoch": 0.96, "grad_norm": 1.3926948369663805, "learning_rate": 4.002317273906986e-08, "loss": 0.4797, "step": 7743 }, { "epoch": 0.96, "grad_norm": 1.5154880391926469, "learning_rate": 3.976979758818389e-08, "loss": 0.4929, "step": 7744 }, { "epoch": 0.96, "grad_norm": 1.227339551797787, "learning_rate": 3.9517223806171956e-08, "loss": 0.4407, "step": 7745 }, { "epoch": 0.96, "grad_norm": 1.5725278008193808, "learning_rate": 3.926545143384142e-08, "loss": 0.4647, "step": 7746 }, { "epoch": 0.96, "grad_norm": 1.8648068300491099, "learning_rate": 3.901448051186696e-08, "loss": 0.5158, "step": 7747 }, { "epoch": 0.96, "grad_norm": 2.203877062516585, "learning_rate": 3.876431108079615e-08, "loss": 0.4707, "step": 7748 }, { "epoch": 0.96, "grad_norm": 1.6137307611030258, "learning_rate": 3.8514943181044984e-08, "loss": 0.5176, "step": 7749 }, { "epoch": 0.96, "grad_norm": 1.3832513856819502, "learning_rate": 3.826637685290236e-08, "loss": 0.48, "step": 7750 }, { "epoch": 0.96, "grad_norm": 1.7953339027244035, "learning_rate": 3.8018612136524466e-08, "loss": 0.5226, "step": 7751 }, { "epoch": 0.96, "grad_norm": 1.4591121678175754, "learning_rate": 3.777164907194209e-08, "loss": 0.4381, "step": 7752 }, { "epoch": 0.96, "grad_norm": 0.6930226256798032, "learning_rate": 3.752548769905273e-08, "loss": 0.4754, "step": 7753 }, { "epoch": 0.96, "grad_norm": 2.077763466139759, "learning_rate": 3.728012805762627e-08, "loss": 0.4937, "step": 7754 }, { "epoch": 0.96, "grad_norm": 1.2946149909988884, "learning_rate": 3.7035570187303195e-08, "loss": 0.4801, "step": 7755 }, { "epoch": 0.96, "grad_norm": 1.9916719865590282, "learning_rate": 3.6791814127593585e-08, "loss": 0.5081, "step": 7756 }, { "epoch": 0.96, "grad_norm": 1.458255870646246, "learning_rate": 3.654885991787816e-08, "loss": 0.4952, "step": 7757 }, { "epoch": 0.96, "grad_norm": 1.5908084020469206, "learning_rate": 3.630670759740884e-08, "loss": 0.4759, "step": 7758 }, { "epoch": 0.96, "grad_norm": 1.8546868327967143, "learning_rate": 3.6065357205307125e-08, "loss": 0.4787, "step": 7759 }, { "epoch": 0.96, "grad_norm": 1.5645776854853, "learning_rate": 3.582480878056516e-08, "loss": 0.5225, "step": 7760 }, { "epoch": 0.96, "grad_norm": 1.602317185925536, "learning_rate": 3.5585062362046284e-08, "loss": 0.5119, "step": 7761 }, { "epoch": 0.96, "grad_norm": 2.407875740303682, "learning_rate": 3.534611798848286e-08, "loss": 0.4688, "step": 7762 }, { "epoch": 0.96, "grad_norm": 1.5088178343160268, "learning_rate": 3.510797569847957e-08, "loss": 0.5119, "step": 7763 }, { "epoch": 0.96, "grad_norm": 1.6905489646928964, "learning_rate": 3.487063553050896e-08, "loss": 0.4833, "step": 7764 }, { "epoch": 0.96, "grad_norm": 1.3799093539466991, "learning_rate": 3.4634097522916464e-08, "loss": 0.5413, "step": 7765 }, { "epoch": 0.96, "grad_norm": 1.3819950617851147, "learning_rate": 3.4398361713916526e-08, "loss": 0.4909, "step": 7766 }, { "epoch": 0.96, "grad_norm": 1.3018371723847642, "learning_rate": 3.416342814159423e-08, "loss": 0.4656, "step": 7767 }, { "epoch": 0.96, "grad_norm": 1.29448541490877, "learning_rate": 3.392929684390533e-08, "loss": 0.4505, "step": 7768 }, { "epoch": 0.96, "grad_norm": 1.5527951190707798, "learning_rate": 3.3695967858675685e-08, "loss": 0.4814, "step": 7769 }, { "epoch": 0.96, "grad_norm": 1.9497720926334228, "learning_rate": 3.346344122360179e-08, "loss": 0.5077, "step": 7770 }, { "epoch": 0.96, "grad_norm": 1.3985591816774912, "learning_rate": 3.323171697625027e-08, "loss": 0.4473, "step": 7771 }, { "epoch": 0.96, "grad_norm": 1.413311741709775, "learning_rate": 3.300079515405841e-08, "loss": 0.5118, "step": 7772 }, { "epoch": 0.96, "grad_norm": 1.8534187425790147, "learning_rate": 3.2770675794334126e-08, "loss": 0.4712, "step": 7773 }, { "epoch": 0.96, "grad_norm": 0.6653584338759195, "learning_rate": 3.254135893425381e-08, "loss": 0.4437, "step": 7774 }, { "epoch": 0.96, "grad_norm": 1.7163977797998067, "learning_rate": 3.231284461086615e-08, "loss": 0.5157, "step": 7775 }, { "epoch": 0.97, "grad_norm": 0.6106701632470143, "learning_rate": 3.208513286109049e-08, "loss": 0.4645, "step": 7776 }, { "epoch": 0.97, "grad_norm": 1.7946249031013422, "learning_rate": 3.185822372171521e-08, "loss": 0.5111, "step": 7777 }, { "epoch": 0.97, "grad_norm": 1.5144254795001317, "learning_rate": 3.163211722939874e-08, "loss": 0.4857, "step": 7778 }, { "epoch": 0.97, "grad_norm": 1.4056587916463201, "learning_rate": 3.140681342067187e-08, "loss": 0.5091, "step": 7779 }, { "epoch": 0.97, "grad_norm": 0.6749938480844588, "learning_rate": 3.1182312331933254e-08, "loss": 0.4518, "step": 7780 }, { "epoch": 0.97, "grad_norm": 2.024631804677709, "learning_rate": 3.095861399945388e-08, "loss": 0.4645, "step": 7781 }, { "epoch": 0.97, "grad_norm": 1.5604921011767916, "learning_rate": 3.073571845937373e-08, "loss": 0.4301, "step": 7782 }, { "epoch": 0.97, "grad_norm": 1.3798606656257772, "learning_rate": 3.0513625747703446e-08, "loss": 0.4492, "step": 7783 }, { "epoch": 0.97, "grad_norm": 1.8501868863665396, "learning_rate": 3.029233590032432e-08, "loss": 0.4584, "step": 7784 }, { "epoch": 0.97, "grad_norm": 2.6542302064268632, "learning_rate": 3.007184895298776e-08, "loss": 0.4523, "step": 7785 }, { "epoch": 0.97, "grad_norm": 3.515767367030657, "learning_rate": 2.9852164941315266e-08, "loss": 0.5063, "step": 7786 }, { "epoch": 0.97, "grad_norm": 1.492474367639044, "learning_rate": 2.963328390079845e-08, "loss": 0.517, "step": 7787 }, { "epoch": 0.97, "grad_norm": 1.7311554006788261, "learning_rate": 2.9415205866800133e-08, "loss": 0.5292, "step": 7788 }, { "epoch": 0.97, "grad_norm": 1.6439229672503788, "learning_rate": 2.9197930874551584e-08, "loss": 0.4827, "step": 7789 }, { "epoch": 0.97, "grad_norm": 1.3395622287881843, "learning_rate": 2.898145895915694e-08, "loss": 0.4924, "step": 7790 }, { "epoch": 0.97, "grad_norm": 1.9080209785459703, "learning_rate": 2.8765790155588223e-08, "loss": 0.5357, "step": 7791 }, { "epoch": 0.97, "grad_norm": 2.2865527623095567, "learning_rate": 2.8550924498688127e-08, "loss": 0.4766, "step": 7792 }, { "epoch": 0.97, "grad_norm": 0.6958372990417212, "learning_rate": 2.833686202317165e-08, "loss": 0.5184, "step": 7793 }, { "epoch": 0.97, "grad_norm": 1.5048457616257143, "learning_rate": 2.812360276362114e-08, "loss": 0.5324, "step": 7794 }, { "epoch": 0.97, "grad_norm": 1.3447727190681364, "learning_rate": 2.7911146754491247e-08, "loss": 0.4206, "step": 7795 }, { "epoch": 0.97, "grad_norm": 2.355496696654266, "learning_rate": 2.7699494030105633e-08, "loss": 0.4686, "step": 7796 }, { "epoch": 0.97, "grad_norm": 1.7543877273172854, "learning_rate": 2.7488644624659165e-08, "loss": 0.5243, "step": 7797 }, { "epoch": 0.97, "grad_norm": 1.3222072531274944, "learning_rate": 2.7278598572216264e-08, "loss": 0.4562, "step": 7798 }, { "epoch": 0.97, "grad_norm": 1.3822661254883901, "learning_rate": 2.7069355906711447e-08, "loss": 0.4683, "step": 7799 }, { "epoch": 0.97, "grad_norm": 1.2333982581816603, "learning_rate": 2.6860916661949344e-08, "loss": 0.425, "step": 7800 }, { "epoch": 0.97, "grad_norm": 2.4989416221823086, "learning_rate": 2.665328087160579e-08, "loss": 0.5049, "step": 7801 }, { "epoch": 0.97, "grad_norm": 1.1683052174068702, "learning_rate": 2.6446448569226735e-08, "loss": 0.4297, "step": 7802 }, { "epoch": 0.97, "grad_norm": 14.415304359454165, "learning_rate": 2.6240419788226557e-08, "loss": 0.5134, "step": 7803 }, { "epoch": 0.97, "grad_norm": 1.4887720785871432, "learning_rate": 2.603519456189141e-08, "loss": 0.5064, "step": 7804 }, { "epoch": 0.97, "grad_norm": 2.269943717078514, "learning_rate": 2.5830772923377544e-08, "loss": 0.5161, "step": 7805 }, { "epoch": 0.97, "grad_norm": 1.5539420147292864, "learning_rate": 2.5627154905710772e-08, "loss": 0.5005, "step": 7806 }, { "epoch": 0.97, "grad_norm": 3.1212492048854474, "learning_rate": 2.542434054178755e-08, "loss": 0.5048, "step": 7807 }, { "epoch": 0.97, "grad_norm": 1.3866045054200524, "learning_rate": 2.5222329864373897e-08, "loss": 0.4657, "step": 7808 }, { "epoch": 0.97, "grad_norm": 1.7650098436343118, "learning_rate": 2.5021122906107588e-08, "loss": 0.4732, "step": 7809 }, { "epoch": 0.97, "grad_norm": 1.4154524445769976, "learning_rate": 2.4820719699493735e-08, "loss": 0.4984, "step": 7810 }, { "epoch": 0.97, "grad_norm": 1.3844456717672102, "learning_rate": 2.462112027691088e-08, "loss": 0.5104, "step": 7811 }, { "epoch": 0.97, "grad_norm": 1.729652869759931, "learning_rate": 2.44223246706049e-08, "loss": 0.4732, "step": 7812 }, { "epoch": 0.97, "grad_norm": 1.4065463489571486, "learning_rate": 2.4224332912693438e-08, "loss": 0.497, "step": 7813 }, { "epoch": 0.97, "grad_norm": 1.647805148838936, "learning_rate": 2.4027145035163124e-08, "loss": 0.492, "step": 7814 }, { "epoch": 0.97, "grad_norm": 1.541283830966554, "learning_rate": 2.3830761069872922e-08, "loss": 0.5116, "step": 7815 }, { "epoch": 0.97, "grad_norm": 1.5878840426569956, "learning_rate": 2.363518104854856e-08, "loss": 0.5096, "step": 7816 }, { "epoch": 0.97, "grad_norm": 1.367993865190107, "learning_rate": 2.3440405002789214e-08, "loss": 0.4724, "step": 7817 }, { "epoch": 0.97, "grad_norm": 1.4697283027569419, "learning_rate": 2.3246432964061928e-08, "loss": 0.5135, "step": 7818 }, { "epoch": 0.97, "grad_norm": 2.5397683114755623, "learning_rate": 2.3053264963704413e-08, "loss": 0.5403, "step": 7819 }, { "epoch": 0.97, "grad_norm": 2.1536563276716563, "learning_rate": 2.2860901032925597e-08, "loss": 0.4726, "step": 7820 }, { "epoch": 0.97, "grad_norm": 1.4454911065646863, "learning_rate": 2.2669341202802285e-08, "loss": 0.503, "step": 7821 }, { "epoch": 0.97, "grad_norm": 1.3626319884830418, "learning_rate": 2.2478585504284168e-08, "loss": 0.481, "step": 7822 }, { "epoch": 0.97, "grad_norm": 1.322301916185995, "learning_rate": 2.22886339681877e-08, "loss": 0.4919, "step": 7823 }, { "epoch": 0.97, "grad_norm": 1.2743290032865122, "learning_rate": 2.209948662520278e-08, "loss": 0.4753, "step": 7824 }, { "epoch": 0.97, "grad_norm": 1.7346529047342756, "learning_rate": 2.1911143505887188e-08, "loss": 0.4928, "step": 7825 }, { "epoch": 0.97, "grad_norm": 1.4341911057238383, "learning_rate": 2.1723604640668804e-08, "loss": 0.4799, "step": 7826 }, { "epoch": 0.97, "grad_norm": 1.6480767999457715, "learning_rate": 2.1536870059847835e-08, "loss": 0.4848, "step": 7827 }, { "epoch": 0.97, "grad_norm": 1.5460470829974238, "learning_rate": 2.1350939793591264e-08, "loss": 0.5232, "step": 7828 }, { "epoch": 0.97, "grad_norm": 1.5786795687722468, "learning_rate": 2.116581387193839e-08, "loss": 0.5959, "step": 7829 }, { "epoch": 0.97, "grad_norm": 1.3439945497910952, "learning_rate": 2.0981492324798068e-08, "loss": 0.512, "step": 7830 }, { "epoch": 0.97, "grad_norm": 22.31225001066693, "learning_rate": 2.079797518194926e-08, "loss": 0.4968, "step": 7831 }, { "epoch": 0.97, "grad_norm": 2.2889659992198874, "learning_rate": 2.061526247303991e-08, "loss": 0.4042, "step": 7832 }, { "epoch": 0.97, "grad_norm": 1.571354792462103, "learning_rate": 2.043335422758974e-08, "loss": 0.5359, "step": 7833 }, { "epoch": 0.97, "grad_norm": 1.4696440018354946, "learning_rate": 2.0252250474987468e-08, "loss": 0.5617, "step": 7834 }, { "epoch": 0.97, "grad_norm": 1.7384848762491998, "learning_rate": 2.007195124449135e-08, "loss": 0.4767, "step": 7835 }, { "epoch": 0.97, "grad_norm": 1.5219894565268821, "learning_rate": 1.9892456565230866e-08, "loss": 0.4939, "step": 7836 }, { "epoch": 0.97, "grad_norm": 1.516993884796863, "learning_rate": 1.9713766466205043e-08, "loss": 0.4976, "step": 7837 }, { "epoch": 0.97, "grad_norm": 1.6486390321432192, "learning_rate": 1.9535880976283007e-08, "loss": 0.5048, "step": 7838 }, { "epoch": 0.97, "grad_norm": 1.6886966252760525, "learning_rate": 1.935880012420288e-08, "loss": 0.5321, "step": 7839 }, { "epoch": 0.97, "grad_norm": 1.881517861414415, "learning_rate": 1.918252393857456e-08, "loss": 0.496, "step": 7840 }, { "epoch": 0.97, "grad_norm": 2.227334661334372, "learning_rate": 1.9007052447876373e-08, "loss": 0.4894, "step": 7841 }, { "epoch": 0.97, "grad_norm": 1.5709242214792047, "learning_rate": 1.8832385680457866e-08, "loss": 0.5064, "step": 7842 }, { "epoch": 0.97, "grad_norm": 0.6058301693578824, "learning_rate": 1.865852366453702e-08, "loss": 0.4816, "step": 7843 }, { "epoch": 0.97, "grad_norm": 1.4782888331040613, "learning_rate": 1.8485466428204145e-08, "loss": 0.4699, "step": 7844 }, { "epoch": 0.97, "grad_norm": 1.2843569767793035, "learning_rate": 1.831321399941688e-08, "loss": 0.4522, "step": 7845 }, { "epoch": 0.97, "grad_norm": 1.820698828941528, "learning_rate": 1.814176640600518e-08, "loss": 0.4923, "step": 7846 }, { "epoch": 0.97, "grad_norm": 1.3360093042205863, "learning_rate": 1.797112367566689e-08, "loss": 0.4754, "step": 7847 }, { "epoch": 0.97, "grad_norm": 1.3404701278536049, "learning_rate": 1.7801285835971627e-08, "loss": 0.4751, "step": 7848 }, { "epoch": 0.97, "grad_norm": 1.645967978646397, "learning_rate": 1.7632252914357994e-08, "loss": 0.4745, "step": 7849 }, { "epoch": 0.97, "grad_norm": 1.816582910025013, "learning_rate": 1.746402493813415e-08, "loss": 0.4759, "step": 7850 }, { "epoch": 0.97, "grad_norm": 1.4984141287718775, "learning_rate": 1.7296601934480018e-08, "loss": 0.5054, "step": 7851 }, { "epoch": 0.97, "grad_norm": 1.2956987510462552, "learning_rate": 1.7129983930443405e-08, "loss": 0.5119, "step": 7852 }, { "epoch": 0.97, "grad_norm": 1.2386501615892918, "learning_rate": 1.696417095294278e-08, "loss": 0.4774, "step": 7853 }, { "epoch": 0.97, "grad_norm": 1.624533253640688, "learning_rate": 1.6799163028767273e-08, "loss": 0.4785, "step": 7854 }, { "epoch": 0.97, "grad_norm": 1.3569411519411598, "learning_rate": 1.6634960184575e-08, "loss": 0.4949, "step": 7855 }, { "epoch": 0.97, "grad_norm": 3.2807250837591946, "learning_rate": 1.647156244689474e-08, "loss": 0.4969, "step": 7856 }, { "epoch": 0.98, "grad_norm": 0.7328035968027572, "learning_rate": 1.6308969842124266e-08, "loss": 0.4832, "step": 7857 }, { "epoch": 0.98, "grad_norm": 1.702462953664373, "learning_rate": 1.6147182396532013e-08, "loss": 0.5399, "step": 7858 }, { "epoch": 0.98, "grad_norm": 1.5917224380993937, "learning_rate": 1.5986200136256512e-08, "loss": 0.4511, "step": 7859 }, { "epoch": 0.98, "grad_norm": 1.7284270235794241, "learning_rate": 1.5826023087305853e-08, "loss": 0.4766, "step": 7860 }, { "epoch": 0.98, "grad_norm": 1.5717015680984, "learning_rate": 1.5666651275557664e-08, "loss": 0.5071, "step": 7861 }, { "epoch": 0.98, "grad_norm": 3.5548574313851318, "learning_rate": 1.550808472676024e-08, "loss": 0.5269, "step": 7862 }, { "epoch": 0.98, "grad_norm": 1.540841191632159, "learning_rate": 1.535032346653087e-08, "loss": 0.4903, "step": 7863 }, { "epoch": 0.98, "grad_norm": 0.7457093299726929, "learning_rate": 1.5193367520357493e-08, "loss": 0.4557, "step": 7864 }, { "epoch": 0.98, "grad_norm": 1.7486686969238991, "learning_rate": 1.5037216913598163e-08, "loss": 0.4794, "step": 7865 }, { "epoch": 0.98, "grad_norm": 1.3896525495364775, "learning_rate": 1.488187167147992e-08, "loss": 0.4907, "step": 7866 }, { "epoch": 0.98, "grad_norm": 2.3235501419649687, "learning_rate": 1.4727331819100466e-08, "loss": 0.4744, "step": 7867 }, { "epoch": 0.98, "grad_norm": 1.567961452961845, "learning_rate": 1.4573597381427052e-08, "loss": 0.4887, "step": 7868 }, { "epoch": 0.98, "grad_norm": 1.3819936970432891, "learning_rate": 1.4420668383296476e-08, "loss": 0.5158, "step": 7869 }, { "epoch": 0.98, "grad_norm": 1.4597986889667054, "learning_rate": 1.4268544849416199e-08, "loss": 0.5161, "step": 7870 }, { "epoch": 0.98, "grad_norm": 1.7559433201397077, "learning_rate": 1.411722680436267e-08, "loss": 0.4797, "step": 7871 }, { "epoch": 0.98, "grad_norm": 1.7793292300008385, "learning_rate": 1.3966714272583004e-08, "loss": 0.5119, "step": 7872 }, { "epoch": 0.98, "grad_norm": 0.6959078402252198, "learning_rate": 1.3817007278393302e-08, "loss": 0.4507, "step": 7873 }, { "epoch": 0.98, "grad_norm": 1.2366883966054931, "learning_rate": 1.3668105845980883e-08, "loss": 0.4014, "step": 7874 }, { "epoch": 0.98, "grad_norm": 1.3055286853521844, "learning_rate": 1.3520009999401506e-08, "loss": 0.5215, "step": 7875 }, { "epoch": 0.98, "grad_norm": 1.5049883730512816, "learning_rate": 1.3372719762581587e-08, "loss": 0.4989, "step": 7876 }, { "epoch": 0.98, "grad_norm": 1.2385683699552819, "learning_rate": 1.3226235159317091e-08, "loss": 0.497, "step": 7877 }, { "epoch": 0.98, "grad_norm": 0.6753540862673787, "learning_rate": 1.3080556213274087e-08, "loss": 0.4658, "step": 7878 }, { "epoch": 0.98, "grad_norm": 1.3777811871545333, "learning_rate": 1.2935682947987638e-08, "loss": 0.4743, "step": 7879 }, { "epoch": 0.98, "grad_norm": 1.7929465749642053, "learning_rate": 1.279161538686402e-08, "loss": 0.4985, "step": 7880 }, { "epoch": 0.98, "grad_norm": 1.5867881570055768, "learning_rate": 1.2648353553178506e-08, "loss": 0.5077, "step": 7881 }, { "epoch": 0.98, "grad_norm": 1.496988232167877, "learning_rate": 1.2505897470075357e-08, "loss": 0.4619, "step": 7882 }, { "epoch": 0.98, "grad_norm": 1.388582596738789, "learning_rate": 1.2364247160571164e-08, "loss": 0.4528, "step": 7883 }, { "epoch": 0.98, "grad_norm": 1.3925588795777673, "learning_rate": 1.2223402647549843e-08, "loss": 0.4751, "step": 7884 }, { "epoch": 0.98, "grad_norm": 1.315233321300705, "learning_rate": 1.2083363953766525e-08, "loss": 0.5084, "step": 7885 }, { "epoch": 0.98, "grad_norm": 1.4819177522796911, "learning_rate": 1.194413110184478e-08, "loss": 0.5214, "step": 7886 }, { "epoch": 0.98, "grad_norm": 1.2908476184298494, "learning_rate": 1.1805704114279948e-08, "loss": 0.4593, "step": 7887 }, { "epoch": 0.98, "grad_norm": 1.3019806390033144, "learning_rate": 1.1668083013435804e-08, "loss": 0.5032, "step": 7888 }, { "epoch": 0.98, "grad_norm": 0.7250265062600212, "learning_rate": 1.1531267821545678e-08, "loss": 0.4961, "step": 7889 }, { "epoch": 0.98, "grad_norm": 1.506622513165299, "learning_rate": 1.1395258560714106e-08, "loss": 0.5137, "step": 7890 }, { "epoch": 0.98, "grad_norm": 1.388515573850204, "learning_rate": 1.1260055252914071e-08, "loss": 0.5433, "step": 7891 }, { "epoch": 0.98, "grad_norm": 1.648138461522593, "learning_rate": 1.1125657919989208e-08, "loss": 0.5046, "step": 7892 }, { "epoch": 0.98, "grad_norm": 1.8268093577316844, "learning_rate": 1.099206658365215e-08, "loss": 0.474, "step": 7893 }, { "epoch": 0.98, "grad_norm": 1.462649421981926, "learning_rate": 1.0859281265486189e-08, "loss": 0.4886, "step": 7894 }, { "epoch": 0.98, "grad_norm": 1.8737725933823501, "learning_rate": 1.0727301986943605e-08, "loss": 0.4958, "step": 7895 }, { "epoch": 0.98, "grad_norm": 1.4911950734488788, "learning_rate": 1.0596128769347347e-08, "loss": 0.4844, "step": 7896 }, { "epoch": 0.98, "grad_norm": 1.638525328685385, "learning_rate": 1.0465761633888793e-08, "loss": 0.5159, "step": 7897 }, { "epoch": 0.98, "grad_norm": 1.3760722132055998, "learning_rate": 1.0336200601630542e-08, "loss": 0.504, "step": 7898 }, { "epoch": 0.98, "grad_norm": 1.525924868180361, "learning_rate": 1.020744569350418e-08, "loss": 0.531, "step": 7899 }, { "epoch": 0.98, "grad_norm": 1.6230687050084631, "learning_rate": 1.007949693031085e-08, "loss": 0.4862, "step": 7900 }, { "epoch": 0.98, "grad_norm": 1.759421855350358, "learning_rate": 9.952354332722348e-09, "loss": 0.4731, "step": 7901 }, { "epoch": 0.98, "grad_norm": 3.7801379899361045, "learning_rate": 9.826017921279463e-09, "loss": 0.4566, "step": 7902 }, { "epoch": 0.98, "grad_norm": 1.52962165080147, "learning_rate": 9.700487716392537e-09, "loss": 0.5379, "step": 7903 }, { "epoch": 0.98, "grad_norm": 1.963892250825473, "learning_rate": 9.57576373834257e-09, "loss": 0.4842, "step": 7904 }, { "epoch": 0.98, "grad_norm": 1.373974698728333, "learning_rate": 9.451846007280107e-09, "loss": 0.5251, "step": 7905 }, { "epoch": 0.98, "grad_norm": 0.716667262902133, "learning_rate": 9.328734543224138e-09, "loss": 0.4694, "step": 7906 }, { "epoch": 0.98, "grad_norm": 2.6714175262182387, "learning_rate": 9.206429366065418e-09, "loss": 0.4735, "step": 7907 }, { "epoch": 0.98, "grad_norm": 1.833029537151372, "learning_rate": 9.084930495563138e-09, "loss": 0.5255, "step": 7908 }, { "epoch": 0.98, "grad_norm": 3.5869767827391525, "learning_rate": 8.964237951346044e-09, "loss": 0.4631, "step": 7909 }, { "epoch": 0.98, "grad_norm": 1.4241008230767385, "learning_rate": 8.844351752913539e-09, "loss": 0.5049, "step": 7910 }, { "epoch": 0.98, "grad_norm": 1.5467926959639, "learning_rate": 8.72527191963457e-09, "loss": 0.5108, "step": 7911 }, { "epoch": 0.98, "grad_norm": 1.5800485283358539, "learning_rate": 8.606998470746531e-09, "loss": 0.5012, "step": 7912 }, { "epoch": 0.98, "grad_norm": 1.2881792953126658, "learning_rate": 8.489531425358576e-09, "loss": 0.4238, "step": 7913 }, { "epoch": 0.98, "grad_norm": 1.5260835129310093, "learning_rate": 8.372870802447752e-09, "loss": 0.4783, "step": 7914 }, { "epoch": 0.98, "grad_norm": 1.3749626334965497, "learning_rate": 8.257016620862313e-09, "loss": 0.5112, "step": 7915 }, { "epoch": 0.98, "grad_norm": 1.3489776684036383, "learning_rate": 8.141968899318953e-09, "loss": 0.5131, "step": 7916 }, { "epoch": 0.98, "grad_norm": 1.5186970614708848, "learning_rate": 8.027727656405027e-09, "loss": 0.5219, "step": 7917 }, { "epoch": 0.98, "grad_norm": 1.5606982472763145, "learning_rate": 7.914292910576882e-09, "loss": 0.5179, "step": 7918 }, { "epoch": 0.98, "grad_norm": 1.1687304109309202, "learning_rate": 7.801664680161526e-09, "loss": 0.4706, "step": 7919 }, { "epoch": 0.98, "grad_norm": 2.9162988368769898, "learning_rate": 7.6898429833544e-09, "loss": 0.4693, "step": 7920 }, { "epoch": 0.98, "grad_norm": 1.9704069261789112, "learning_rate": 7.578827838221059e-09, "loss": 0.5034, "step": 7921 }, { "epoch": 0.98, "grad_norm": 1.9394228000122264, "learning_rate": 7.468619262697708e-09, "loss": 0.5163, "step": 7922 }, { "epoch": 0.98, "grad_norm": 1.7271604647231455, "learning_rate": 7.359217274589547e-09, "loss": 0.4904, "step": 7923 }, { "epoch": 0.98, "grad_norm": 1.4181225934413368, "learning_rate": 7.250621891570775e-09, "loss": 0.5419, "step": 7924 }, { "epoch": 0.98, "grad_norm": 1.3000477016700693, "learning_rate": 7.142833131186244e-09, "loss": 0.473, "step": 7925 }, { "epoch": 0.98, "grad_norm": 1.2807710850926293, "learning_rate": 7.035851010850359e-09, "loss": 0.4878, "step": 7926 }, { "epoch": 0.98, "grad_norm": 1.4349642139062453, "learning_rate": 6.929675547846515e-09, "loss": 0.4818, "step": 7927 }, { "epoch": 0.98, "grad_norm": 1.4312178958680224, "learning_rate": 6.824306759328769e-09, "loss": 0.5231, "step": 7928 }, { "epoch": 0.98, "grad_norm": 3.5932973980498493, "learning_rate": 6.719744662320172e-09, "loss": 0.5194, "step": 7929 }, { "epoch": 0.98, "grad_norm": 1.7986015396528459, "learning_rate": 6.6159892737138746e-09, "loss": 0.4934, "step": 7930 }, { "epoch": 0.98, "grad_norm": 2.5386905898599568, "learning_rate": 6.5130406102720255e-09, "loss": 0.4808, "step": 7931 }, { "epoch": 0.98, "grad_norm": 1.596510821692502, "learning_rate": 6.4108986886274315e-09, "loss": 0.4887, "step": 7932 }, { "epoch": 0.98, "grad_norm": 1.44232300560664, "learning_rate": 6.309563525281337e-09, "loss": 0.4932, "step": 7933 }, { "epoch": 0.98, "grad_norm": 1.8056468590688406, "learning_rate": 6.2090351366062005e-09, "loss": 0.5026, "step": 7934 }, { "epoch": 0.98, "grad_norm": 1.4217766938218368, "learning_rate": 6.10931353884292e-09, "loss": 0.4779, "step": 7935 }, { "epoch": 0.98, "grad_norm": 1.289948288498926, "learning_rate": 6.0103987481019425e-09, "loss": 0.4627, "step": 7936 }, { "epoch": 0.98, "grad_norm": 1.5237895784464568, "learning_rate": 5.912290780364371e-09, "loss": 0.4737, "step": 7937 }, { "epoch": 0.99, "grad_norm": 1.2976296732878239, "learning_rate": 5.814989651480307e-09, "loss": 0.4797, "step": 7938 }, { "epoch": 0.99, "grad_norm": 1.4923362635589985, "learning_rate": 5.718495377169398e-09, "loss": 0.5031, "step": 7939 }, { "epoch": 0.99, "grad_norm": 1.6018338958446563, "learning_rate": 5.6228079730208386e-09, "loss": 0.5023, "step": 7940 }, { "epoch": 0.99, "grad_norm": 1.453318636362527, "learning_rate": 5.527927454495041e-09, "loss": 0.5132, "step": 7941 }, { "epoch": 0.99, "grad_norm": 0.6526477392008091, "learning_rate": 5.43385383691919e-09, "loss": 0.5141, "step": 7942 }, { "epoch": 0.99, "grad_norm": 1.527193581772258, "learning_rate": 5.340587135492792e-09, "loss": 0.4674, "step": 7943 }, { "epoch": 0.99, "grad_norm": 1.5351659484527673, "learning_rate": 5.248127365283795e-09, "loss": 0.5062, "step": 7944 }, { "epoch": 0.99, "grad_norm": 1.5048077567795504, "learning_rate": 5.156474541229694e-09, "loss": 0.4739, "step": 7945 }, { "epoch": 0.99, "grad_norm": 1.5923104159215158, "learning_rate": 5.065628678137535e-09, "loss": 0.442, "step": 7946 }, { "epoch": 0.99, "grad_norm": 1.5367669913165014, "learning_rate": 4.975589790684465e-09, "loss": 0.538, "step": 7947 }, { "epoch": 0.99, "grad_norm": 1.710977541997585, "learning_rate": 4.8863578934171815e-09, "loss": 0.5158, "step": 7948 }, { "epoch": 0.99, "grad_norm": 1.408633133018979, "learning_rate": 4.797933000751376e-09, "loss": 0.5119, "step": 7949 }, { "epoch": 0.99, "grad_norm": 1.5436028172282805, "learning_rate": 4.710315126973952e-09, "loss": 0.4909, "step": 7950 }, { "epoch": 0.99, "grad_norm": 3.0541233013668996, "learning_rate": 4.6235042862397e-09, "loss": 0.4652, "step": 7951 }, { "epoch": 0.99, "grad_norm": 3.019705736632635, "learning_rate": 4.53750049257351e-09, "loss": 0.5356, "step": 7952 }, { "epoch": 0.99, "grad_norm": 1.4453039044862797, "learning_rate": 4.452303759869825e-09, "loss": 0.4711, "step": 7953 }, { "epoch": 0.99, "grad_norm": 1.3822195378285782, "learning_rate": 4.3679141018937445e-09, "loss": 0.4825, "step": 7954 }, { "epoch": 0.99, "grad_norm": 1.4037807224753245, "learning_rate": 4.284331532278807e-09, "loss": 0.4862, "step": 7955 }, { "epoch": 0.99, "grad_norm": 1.5031811880438142, "learning_rate": 4.201556064528101e-09, "loss": 0.5291, "step": 7956 }, { "epoch": 0.99, "grad_norm": 1.307167431767514, "learning_rate": 4.1195877120153716e-09, "loss": 0.4643, "step": 7957 }, { "epoch": 0.99, "grad_norm": 1.5174307919187913, "learning_rate": 4.038426487982805e-09, "loss": 0.4864, "step": 7958 }, { "epoch": 0.99, "grad_norm": 1.5627982372472973, "learning_rate": 3.9580724055432455e-09, "loss": 0.5064, "step": 7959 }, { "epoch": 0.99, "grad_norm": 5.398204333643108, "learning_rate": 3.878525477677975e-09, "loss": 0.4993, "step": 7960 }, { "epoch": 0.99, "grad_norm": 1.2810352158757143, "learning_rate": 3.799785717238935e-09, "loss": 0.4969, "step": 7961 }, { "epoch": 0.99, "grad_norm": 1.709476098049419, "learning_rate": 3.7218531369476174e-09, "loss": 0.4781, "step": 7962 }, { "epoch": 0.99, "grad_norm": 1.5636347778347486, "learning_rate": 3.644727749393395e-09, "loss": 0.4709, "step": 7963 }, { "epoch": 0.99, "grad_norm": 0.63189574021935, "learning_rate": 3.568409567037967e-09, "loss": 0.521, "step": 7964 }, { "epoch": 0.99, "grad_norm": 1.6745069970963564, "learning_rate": 3.4928986022103595e-09, "loss": 0.4898, "step": 7965 }, { "epoch": 0.99, "grad_norm": 1.2734109830619809, "learning_rate": 3.4181948671102583e-09, "loss": 0.5043, "step": 7966 }, { "epoch": 0.99, "grad_norm": 1.7824674763979274, "learning_rate": 3.344298373806898e-09, "loss": 0.4987, "step": 7967 }, { "epoch": 0.99, "grad_norm": 1.4767364501336402, "learning_rate": 3.2712091342385065e-09, "loss": 0.5063, "step": 7968 }, { "epoch": 0.99, "grad_norm": 1.4764672841746604, "learning_rate": 3.1989271602134163e-09, "loss": 0.4688, "step": 7969 }, { "epoch": 0.99, "grad_norm": 1.4718649596668767, "learning_rate": 3.1274524634100634e-09, "loss": 0.4937, "step": 7970 }, { "epoch": 0.99, "grad_norm": 1.9146013021572743, "learning_rate": 3.0567850553747667e-09, "loss": 0.4767, "step": 7971 }, { "epoch": 0.99, "grad_norm": 1.5376106330790187, "learning_rate": 2.9869249475256155e-09, "loss": 0.4888, "step": 7972 }, { "epoch": 0.99, "grad_norm": 1.6629488262473051, "learning_rate": 2.917872151148027e-09, "loss": 0.4718, "step": 7973 }, { "epoch": 0.99, "grad_norm": 1.5242519408275426, "learning_rate": 2.8496266773986314e-09, "loss": 0.5081, "step": 7974 }, { "epoch": 0.99, "grad_norm": 1.3755774278666677, "learning_rate": 2.7821885373030545e-09, "loss": 0.5311, "step": 7975 }, { "epoch": 0.99, "grad_norm": 1.385990845021792, "learning_rate": 2.715557741756469e-09, "loss": 0.478, "step": 7976 }, { "epoch": 0.99, "grad_norm": 1.2967126233409623, "learning_rate": 2.6497343015235987e-09, "loss": 0.4459, "step": 7977 }, { "epoch": 0.99, "grad_norm": 1.4684511791320787, "learning_rate": 2.584718227239269e-09, "loss": 0.4752, "step": 7978 }, { "epoch": 0.99, "grad_norm": 1.519683551519481, "learning_rate": 2.520509529406745e-09, "loss": 0.4846, "step": 7979 }, { "epoch": 0.99, "grad_norm": 1.5136123778915815, "learning_rate": 2.4571082183993955e-09, "loss": 0.4975, "step": 7980 }, { "epoch": 0.99, "grad_norm": 1.440141895316327, "learning_rate": 2.3945143044612485e-09, "loss": 0.4474, "step": 7981 }, { "epoch": 0.99, "grad_norm": 1.7085897376426566, "learning_rate": 2.33272779770366e-09, "loss": 0.4575, "step": 7982 }, { "epoch": 0.99, "grad_norm": 1.4363383949547228, "learning_rate": 2.271748708110311e-09, "loss": 0.4879, "step": 7983 }, { "epoch": 0.99, "grad_norm": 1.5093556934051071, "learning_rate": 2.211577045531099e-09, "loss": 0.5451, "step": 7984 }, { "epoch": 0.99, "grad_norm": 1.4565013995684928, "learning_rate": 2.1522128196888036e-09, "loss": 0.5185, "step": 7985 }, { "epoch": 0.99, "grad_norm": 1.4807416392206982, "learning_rate": 2.0936560401735306e-09, "loss": 0.4876, "step": 7986 }, { "epoch": 0.99, "grad_norm": 1.7294427571785986, "learning_rate": 2.0359067164454905e-09, "loss": 0.4929, "step": 7987 }, { "epoch": 0.99, "grad_norm": 1.646187430172883, "learning_rate": 1.9789648578349975e-09, "loss": 0.492, "step": 7988 }, { "epoch": 0.99, "grad_norm": 1.5770653317502463, "learning_rate": 1.9228304735413594e-09, "loss": 0.5405, "step": 7989 }, { "epoch": 0.99, "grad_norm": 1.5488248308485255, "learning_rate": 1.8675035726339883e-09, "loss": 0.4976, "step": 7990 }, { "epoch": 0.99, "grad_norm": 1.5937102313405058, "learning_rate": 1.812984164050735e-09, "loss": 0.544, "step": 7991 }, { "epoch": 0.99, "grad_norm": 1.5565998451857792, "learning_rate": 1.7592722565995536e-09, "loss": 0.4508, "step": 7992 }, { "epoch": 0.99, "grad_norm": 4.208208978134417, "learning_rate": 1.706367858959057e-09, "loss": 0.5158, "step": 7993 }, { "epoch": 0.99, "grad_norm": 0.6943100273181849, "learning_rate": 1.6542709796757429e-09, "loss": 0.4905, "step": 7994 }, { "epoch": 0.99, "grad_norm": 1.4754337734216012, "learning_rate": 1.6029816271667664e-09, "loss": 0.4433, "step": 7995 }, { "epoch": 0.99, "grad_norm": 1.7784248749244436, "learning_rate": 1.5524998097177225e-09, "loss": 0.5054, "step": 7996 }, { "epoch": 0.99, "grad_norm": 1.6584613863369149, "learning_rate": 1.502825535484309e-09, "loss": 0.4693, "step": 7997 }, { "epoch": 0.99, "grad_norm": 1.3627989829776788, "learning_rate": 1.4539588124928838e-09, "loss": 0.4536, "step": 7998 }, { "epoch": 0.99, "grad_norm": 1.6144518696085406, "learning_rate": 1.4058996486376875e-09, "loss": 0.5055, "step": 7999 }, { "epoch": 0.99, "grad_norm": 3.069795091114158, "learning_rate": 1.3586480516825096e-09, "loss": 0.4743, "step": 8000 } ], "logging_steps": 1.0, "max_steps": 8058, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 4101554005508096.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }