{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 2156, "global_step": 21560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.6382189239332094e-05, "grad_norm": 248.3904266357422, "learning_rate": 4.6296296296296295e-08, "loss": 1.3716, "step": 1 }, { "epoch": 9.276437847866419e-05, "grad_norm": 189.84117126464844, "learning_rate": 9.259259259259259e-08, "loss": 1.4317, "step": 2 }, { "epoch": 0.0001391465677179963, "grad_norm": 114.2202377319336, "learning_rate": 1.3888888888888888e-07, "loss": 1.6067, "step": 3 }, { "epoch": 0.00018552875695732838, "grad_norm": 220.6165313720703, "learning_rate": 1.8518518518518518e-07, "loss": 1.717, "step": 4 }, { "epoch": 0.00023191094619666049, "grad_norm": 85.78903198242188, "learning_rate": 2.3148148148148148e-07, "loss": 0.9554, "step": 5 }, { "epoch": 0.0002782931354359926, "grad_norm": 176.37661743164062, "learning_rate": 2.7777777777777776e-07, "loss": 1.2589, "step": 6 }, { "epoch": 0.0003246753246753247, "grad_norm": 49.737144470214844, "learning_rate": 3.2407407407407406e-07, "loss": 0.8629, "step": 7 }, { "epoch": 0.00037105751391465676, "grad_norm": 264.3349914550781, "learning_rate": 3.7037037037037036e-07, "loss": 1.465, "step": 8 }, { "epoch": 0.0004174397031539889, "grad_norm": 197.21090698242188, "learning_rate": 4.1666666666666667e-07, "loss": 1.1485, "step": 9 }, { "epoch": 0.00046382189239332097, "grad_norm": 146.6044921875, "learning_rate": 4.6296296296296297e-07, "loss": 0.886, "step": 10 }, { "epoch": 0.0005102040816326531, "grad_norm": 191.51747131347656, "learning_rate": 5.092592592592593e-07, "loss": 1.2505, "step": 11 }, { "epoch": 0.0005565862708719852, "grad_norm": 186.2329864501953, "learning_rate": 5.555555555555555e-07, "loss": 1.2083, "step": 12 }, { "epoch": 0.0006029684601113173, "grad_norm": 70.10062408447266, "learning_rate": 6.018518518518519e-07, "loss": 0.8909, "step": 13 }, { "epoch": 0.0006493506493506494, "grad_norm": 77.60619354248047, "learning_rate": 6.481481481481481e-07, "loss": 0.8899, "step": 14 }, { "epoch": 0.0006957328385899814, "grad_norm": 223.03794860839844, "learning_rate": 6.944444444444446e-07, "loss": 1.5407, "step": 15 }, { "epoch": 0.0007421150278293135, "grad_norm": 74.99436950683594, "learning_rate": 7.407407407407407e-07, "loss": 0.7344, "step": 16 }, { "epoch": 0.0007884972170686456, "grad_norm": 137.9683074951172, "learning_rate": 7.870370370370371e-07, "loss": 1.5281, "step": 17 }, { "epoch": 0.0008348794063079778, "grad_norm": 109.30713653564453, "learning_rate": 8.333333333333333e-07, "loss": 0.9079, "step": 18 }, { "epoch": 0.0008812615955473099, "grad_norm": 72.10447692871094, "learning_rate": 8.796296296296297e-07, "loss": 0.7475, "step": 19 }, { "epoch": 0.0009276437847866419, "grad_norm": 48.2200813293457, "learning_rate": 9.259259259259259e-07, "loss": 0.76, "step": 20 }, { "epoch": 0.000974025974025974, "grad_norm": 115.83453369140625, "learning_rate": 9.722222222222224e-07, "loss": 0.7543, "step": 21 }, { "epoch": 0.0010204081632653062, "grad_norm": 33.81502914428711, "learning_rate": 1.0185185185185185e-06, "loss": 0.7603, "step": 22 }, { "epoch": 0.0010667903525046383, "grad_norm": 35.02587127685547, "learning_rate": 1.0648148148148149e-06, "loss": 0.8222, "step": 23 }, { "epoch": 0.0011131725417439704, "grad_norm": 66.84224700927734, "learning_rate": 1.111111111111111e-06, "loss": 0.6602, "step": 24 }, { "epoch": 0.0011595547309833025, "grad_norm": 19.236581802368164, "learning_rate": 1.1574074074074076e-06, "loss": 0.7513, "step": 25 }, { "epoch": 0.0012059369202226345, "grad_norm": 28.092620849609375, "learning_rate": 1.2037037037037037e-06, "loss": 0.5685, "step": 26 }, { "epoch": 0.0012523191094619666, "grad_norm": 22.672887802124023, "learning_rate": 1.25e-06, "loss": 0.6796, "step": 27 }, { "epoch": 0.0012987012987012987, "grad_norm": 27.65536117553711, "learning_rate": 1.2962962962962962e-06, "loss": 0.6373, "step": 28 }, { "epoch": 0.0013450834879406308, "grad_norm": 37.75365447998047, "learning_rate": 1.3425925925925928e-06, "loss": 0.8234, "step": 29 }, { "epoch": 0.0013914656771799629, "grad_norm": 31.744443893432617, "learning_rate": 1.3888888888888892e-06, "loss": 0.753, "step": 30 }, { "epoch": 0.001437847866419295, "grad_norm": 33.8834342956543, "learning_rate": 1.4351851851851853e-06, "loss": 0.7557, "step": 31 }, { "epoch": 0.001484230055658627, "grad_norm": 32.67366027832031, "learning_rate": 1.4814814814814815e-06, "loss": 0.5581, "step": 32 }, { "epoch": 0.001530612244897959, "grad_norm": 36.82262420654297, "learning_rate": 1.527777777777778e-06, "loss": 0.6893, "step": 33 }, { "epoch": 0.0015769944341372912, "grad_norm": 28.96379280090332, "learning_rate": 1.5740740740740742e-06, "loss": 0.6988, "step": 34 }, { "epoch": 0.0016233766233766235, "grad_norm": 21.258140563964844, "learning_rate": 1.6203703703703705e-06, "loss": 0.6924, "step": 35 }, { "epoch": 0.0016697588126159556, "grad_norm": 22.975051879882812, "learning_rate": 1.6666666666666667e-06, "loss": 0.5989, "step": 36 }, { "epoch": 0.0017161410018552876, "grad_norm": 41.03731155395508, "learning_rate": 1.7129629629629632e-06, "loss": 0.7193, "step": 37 }, { "epoch": 0.0017625231910946197, "grad_norm": 24.7891845703125, "learning_rate": 1.7592592592592594e-06, "loss": 0.6313, "step": 38 }, { "epoch": 0.0018089053803339518, "grad_norm": 30.025739669799805, "learning_rate": 1.8055555555555557e-06, "loss": 0.6311, "step": 39 }, { "epoch": 0.0018552875695732839, "grad_norm": 27.063064575195312, "learning_rate": 1.8518518518518519e-06, "loss": 0.6747, "step": 40 }, { "epoch": 0.001901669758812616, "grad_norm": 38.09992218017578, "learning_rate": 1.8981481481481484e-06, "loss": 0.6755, "step": 41 }, { "epoch": 0.001948051948051948, "grad_norm": 29.505916595458984, "learning_rate": 1.944444444444445e-06, "loss": 0.6025, "step": 42 }, { "epoch": 0.00199443413729128, "grad_norm": 31.521808624267578, "learning_rate": 1.9907407407407407e-06, "loss": 0.5771, "step": 43 }, { "epoch": 0.0020408163265306124, "grad_norm": 38.516841888427734, "learning_rate": 2.037037037037037e-06, "loss": 0.534, "step": 44 }, { "epoch": 0.0020871985157699443, "grad_norm": 45.36676025390625, "learning_rate": 2.0833333333333334e-06, "loss": 0.6117, "step": 45 }, { "epoch": 0.0021335807050092766, "grad_norm": 38.780269622802734, "learning_rate": 2.1296296296296298e-06, "loss": 0.5027, "step": 46 }, { "epoch": 0.0021799628942486085, "grad_norm": 69.26510620117188, "learning_rate": 2.175925925925926e-06, "loss": 0.5612, "step": 47 }, { "epoch": 0.0022263450834879408, "grad_norm": 37.920387268066406, "learning_rate": 2.222222222222222e-06, "loss": 0.5589, "step": 48 }, { "epoch": 0.0022727272727272726, "grad_norm": 87.3547592163086, "learning_rate": 2.268518518518519e-06, "loss": 0.7629, "step": 49 }, { "epoch": 0.002319109461966605, "grad_norm": 35.8912239074707, "learning_rate": 2.314814814814815e-06, "loss": 0.5608, "step": 50 }, { "epoch": 0.0023654916512059368, "grad_norm": 58.097007751464844, "learning_rate": 2.361111111111111e-06, "loss": 0.6501, "step": 51 }, { "epoch": 0.002411873840445269, "grad_norm": 36.649513244628906, "learning_rate": 2.4074074074074075e-06, "loss": 0.4953, "step": 52 }, { "epoch": 0.002458256029684601, "grad_norm": 28.74641990661621, "learning_rate": 2.453703703703704e-06, "loss": 0.5042, "step": 53 }, { "epoch": 0.0025046382189239332, "grad_norm": 25.44823455810547, "learning_rate": 2.5e-06, "loss": 0.5724, "step": 54 }, { "epoch": 0.002551020408163265, "grad_norm": 31.343860626220703, "learning_rate": 2.5462962962962966e-06, "loss": 0.4619, "step": 55 }, { "epoch": 0.0025974025974025974, "grad_norm": 36.52278518676758, "learning_rate": 2.5925925925925925e-06, "loss": 0.605, "step": 56 }, { "epoch": 0.0026437847866419297, "grad_norm": 14.084637641906738, "learning_rate": 2.6388888888888893e-06, "loss": 0.4773, "step": 57 }, { "epoch": 0.0026901669758812616, "grad_norm": 18.145557403564453, "learning_rate": 2.6851851851851856e-06, "loss": 0.4899, "step": 58 }, { "epoch": 0.002736549165120594, "grad_norm": 27.632888793945312, "learning_rate": 2.7314814814814816e-06, "loss": 0.5841, "step": 59 }, { "epoch": 0.0027829313543599257, "grad_norm": 16.01984977722168, "learning_rate": 2.7777777777777783e-06, "loss": 0.3896, "step": 60 }, { "epoch": 0.002829313543599258, "grad_norm": 21.238632202148438, "learning_rate": 2.8240740740740743e-06, "loss": 0.48, "step": 61 }, { "epoch": 0.00287569573283859, "grad_norm": 23.138965606689453, "learning_rate": 2.8703703703703706e-06, "loss": 0.4077, "step": 62 }, { "epoch": 0.002922077922077922, "grad_norm": 60.37529373168945, "learning_rate": 2.916666666666667e-06, "loss": 0.3696, "step": 63 }, { "epoch": 0.002968460111317254, "grad_norm": 27.562665939331055, "learning_rate": 2.962962962962963e-06, "loss": 0.4265, "step": 64 }, { "epoch": 0.0030148423005565863, "grad_norm": 53.105228424072266, "learning_rate": 3.0092592592592597e-06, "loss": 0.8515, "step": 65 }, { "epoch": 0.003061224489795918, "grad_norm": 32.86825180053711, "learning_rate": 3.055555555555556e-06, "loss": 0.5984, "step": 66 }, { "epoch": 0.0031076066790352505, "grad_norm": 24.942642211914062, "learning_rate": 3.101851851851852e-06, "loss": 0.4403, "step": 67 }, { "epoch": 0.0031539888682745824, "grad_norm": 36.29971694946289, "learning_rate": 3.1481481481481483e-06, "loss": 0.4858, "step": 68 }, { "epoch": 0.0032003710575139147, "grad_norm": 44.12169647216797, "learning_rate": 3.1944444444444443e-06, "loss": 0.499, "step": 69 }, { "epoch": 0.003246753246753247, "grad_norm": 27.13384246826172, "learning_rate": 3.240740740740741e-06, "loss": 0.4028, "step": 70 }, { "epoch": 0.003293135435992579, "grad_norm": 16.611913681030273, "learning_rate": 3.2870370370370374e-06, "loss": 0.4541, "step": 71 }, { "epoch": 0.003339517625231911, "grad_norm": 52.04288864135742, "learning_rate": 3.3333333333333333e-06, "loss": 0.7272, "step": 72 }, { "epoch": 0.003385899814471243, "grad_norm": 33.94670104980469, "learning_rate": 3.37962962962963e-06, "loss": 0.5227, "step": 73 }, { "epoch": 0.0034322820037105753, "grad_norm": 30.45289421081543, "learning_rate": 3.4259259259259265e-06, "loss": 0.4829, "step": 74 }, { "epoch": 0.003478664192949907, "grad_norm": 22.137596130371094, "learning_rate": 3.4722222222222224e-06, "loss": 0.5415, "step": 75 }, { "epoch": 0.0035250463821892395, "grad_norm": 14.146017074584961, "learning_rate": 3.5185185185185187e-06, "loss": 0.3679, "step": 76 }, { "epoch": 0.0035714285714285713, "grad_norm": 20.790040969848633, "learning_rate": 3.5648148148148147e-06, "loss": 0.4201, "step": 77 }, { "epoch": 0.0036178107606679036, "grad_norm": 19.753223419189453, "learning_rate": 3.6111111111111115e-06, "loss": 0.4092, "step": 78 }, { "epoch": 0.0036641929499072355, "grad_norm": 29.523998260498047, "learning_rate": 3.657407407407408e-06, "loss": 0.5527, "step": 79 }, { "epoch": 0.0037105751391465678, "grad_norm": 27.261743545532227, "learning_rate": 3.7037037037037037e-06, "loss": 0.626, "step": 80 }, { "epoch": 0.0037569573283858996, "grad_norm": 20.90980339050293, "learning_rate": 3.7500000000000005e-06, "loss": 0.4336, "step": 81 }, { "epoch": 0.003803339517625232, "grad_norm": 21.4825439453125, "learning_rate": 3.796296296296297e-06, "loss": 0.4845, "step": 82 }, { "epoch": 0.003849721706864564, "grad_norm": 24.37160873413086, "learning_rate": 3.842592592592592e-06, "loss": 0.4765, "step": 83 }, { "epoch": 0.003896103896103896, "grad_norm": 27.597877502441406, "learning_rate": 3.88888888888889e-06, "loss": 0.4065, "step": 84 }, { "epoch": 0.003942486085343228, "grad_norm": 16.933975219726562, "learning_rate": 3.935185185185186e-06, "loss": 0.4747, "step": 85 }, { "epoch": 0.00398886827458256, "grad_norm": 19.706310272216797, "learning_rate": 3.9814814814814814e-06, "loss": 0.4833, "step": 86 }, { "epoch": 0.004035250463821892, "grad_norm": 24.760974884033203, "learning_rate": 4.027777777777779e-06, "loss": 0.5166, "step": 87 }, { "epoch": 0.004081632653061225, "grad_norm": 25.386566162109375, "learning_rate": 4.074074074074074e-06, "loss": 0.5576, "step": 88 }, { "epoch": 0.004128014842300557, "grad_norm": 15.826007843017578, "learning_rate": 4.1203703703703705e-06, "loss": 0.4858, "step": 89 }, { "epoch": 0.004174397031539889, "grad_norm": 17.35371971130371, "learning_rate": 4.166666666666667e-06, "loss": 0.3543, "step": 90 }, { "epoch": 0.0042207792207792205, "grad_norm": 24.46353530883789, "learning_rate": 4.212962962962963e-06, "loss": 0.4022, "step": 91 }, { "epoch": 0.004267161410018553, "grad_norm": 21.782257080078125, "learning_rate": 4.2592592592592596e-06, "loss": 0.5237, "step": 92 }, { "epoch": 0.004313543599257885, "grad_norm": 33.445709228515625, "learning_rate": 4.305555555555556e-06, "loss": 0.3831, "step": 93 }, { "epoch": 0.004359925788497217, "grad_norm": 15.542022705078125, "learning_rate": 4.351851851851852e-06, "loss": 0.4803, "step": 94 }, { "epoch": 0.004406307977736549, "grad_norm": 10.620732307434082, "learning_rate": 4.398148148148149e-06, "loss": 0.3428, "step": 95 }, { "epoch": 0.0044526901669758815, "grad_norm": 28.12500762939453, "learning_rate": 4.444444444444444e-06, "loss": 0.4166, "step": 96 }, { "epoch": 0.004499072356215213, "grad_norm": 28.70795440673828, "learning_rate": 4.490740740740741e-06, "loss": 0.4892, "step": 97 }, { "epoch": 0.004545454545454545, "grad_norm": 32.46931076049805, "learning_rate": 4.537037037037038e-06, "loss": 0.6234, "step": 98 }, { "epoch": 0.004591836734693878, "grad_norm": 17.686321258544922, "learning_rate": 4.583333333333333e-06, "loss": 0.5086, "step": 99 }, { "epoch": 0.00463821892393321, "grad_norm": 14.262630462646484, "learning_rate": 4.62962962962963e-06, "loss": 0.443, "step": 100 }, { "epoch": 0.004684601113172542, "grad_norm": 23.3179931640625, "learning_rate": 4.675925925925927e-06, "loss": 0.5125, "step": 101 }, { "epoch": 0.0047309833024118736, "grad_norm": 17.022871017456055, "learning_rate": 4.722222222222222e-06, "loss": 0.407, "step": 102 }, { "epoch": 0.004777365491651206, "grad_norm": 26.460397720336914, "learning_rate": 4.768518518518519e-06, "loss": 0.4962, "step": 103 }, { "epoch": 0.004823747680890538, "grad_norm": 20.361270904541016, "learning_rate": 4.814814814814815e-06, "loss": 0.4726, "step": 104 }, { "epoch": 0.00487012987012987, "grad_norm": 19.013782501220703, "learning_rate": 4.861111111111111e-06, "loss": 0.4619, "step": 105 }, { "epoch": 0.004916512059369202, "grad_norm": 31.797544479370117, "learning_rate": 4.907407407407408e-06, "loss": 0.4808, "step": 106 }, { "epoch": 0.004962894248608535, "grad_norm": 34.64645004272461, "learning_rate": 4.953703703703704e-06, "loss": 0.4676, "step": 107 }, { "epoch": 0.0050092764378478665, "grad_norm": 22.613168716430664, "learning_rate": 5e-06, "loss": 0.5342, "step": 108 }, { "epoch": 0.005055658627087198, "grad_norm": 17.596174240112305, "learning_rate": 5.046296296296297e-06, "loss": 0.5013, "step": 109 }, { "epoch": 0.00510204081632653, "grad_norm": 10.634197235107422, "learning_rate": 5.092592592592593e-06, "loss": 0.4961, "step": 110 }, { "epoch": 0.005148423005565863, "grad_norm": 21.82354164123535, "learning_rate": 5.138888888888889e-06, "loss": 0.4253, "step": 111 }, { "epoch": 0.005194805194805195, "grad_norm": 17.840600967407227, "learning_rate": 5.185185185185185e-06, "loss": 0.5022, "step": 112 }, { "epoch": 0.005241187384044527, "grad_norm": 45.724910736083984, "learning_rate": 5.231481481481482e-06, "loss": 0.5157, "step": 113 }, { "epoch": 0.005287569573283859, "grad_norm": 14.325698852539062, "learning_rate": 5.2777777777777785e-06, "loss": 0.4326, "step": 114 }, { "epoch": 0.005333951762523191, "grad_norm": 17.872556686401367, "learning_rate": 5.324074074074075e-06, "loss": 0.4383, "step": 115 }, { "epoch": 0.005380333951762523, "grad_norm": 26.280485153198242, "learning_rate": 5.370370370370371e-06, "loss": 0.6054, "step": 116 }, { "epoch": 0.005426716141001855, "grad_norm": 28.133169174194336, "learning_rate": 5.416666666666667e-06, "loss": 0.4199, "step": 117 }, { "epoch": 0.005473098330241188, "grad_norm": 36.857120513916016, "learning_rate": 5.462962962962963e-06, "loss": 0.5221, "step": 118 }, { "epoch": 0.00551948051948052, "grad_norm": 19.058940887451172, "learning_rate": 5.5092592592592595e-06, "loss": 0.4491, "step": 119 }, { "epoch": 0.0055658627087198514, "grad_norm": 13.059062004089355, "learning_rate": 5.555555555555557e-06, "loss": 0.4326, "step": 120 }, { "epoch": 0.005612244897959183, "grad_norm": 13.126514434814453, "learning_rate": 5.601851851851853e-06, "loss": 0.5518, "step": 121 }, { "epoch": 0.005658627087198516, "grad_norm": 15.185569763183594, "learning_rate": 5.6481481481481485e-06, "loss": 0.4617, "step": 122 }, { "epoch": 0.005705009276437848, "grad_norm": 11.395536422729492, "learning_rate": 5.694444444444445e-06, "loss": 0.3653, "step": 123 }, { "epoch": 0.00575139146567718, "grad_norm": 16.398487091064453, "learning_rate": 5.740740740740741e-06, "loss": 0.494, "step": 124 }, { "epoch": 0.005797773654916512, "grad_norm": 38.78457260131836, "learning_rate": 5.787037037037038e-06, "loss": 0.569, "step": 125 }, { "epoch": 0.005844155844155844, "grad_norm": 24.32383918762207, "learning_rate": 5.833333333333334e-06, "loss": 0.4169, "step": 126 }, { "epoch": 0.005890538033395176, "grad_norm": 16.40294647216797, "learning_rate": 5.8796296296296295e-06, "loss": 0.5056, "step": 127 }, { "epoch": 0.005936920222634508, "grad_norm": 17.10957145690918, "learning_rate": 5.925925925925926e-06, "loss": 0.5309, "step": 128 }, { "epoch": 0.005983302411873841, "grad_norm": 18.695249557495117, "learning_rate": 5.972222222222222e-06, "loss": 0.4571, "step": 129 }, { "epoch": 0.006029684601113173, "grad_norm": 18.800140380859375, "learning_rate": 6.018518518518519e-06, "loss": 0.4061, "step": 130 }, { "epoch": 0.0060760667903525046, "grad_norm": 16.639188766479492, "learning_rate": 6.064814814814816e-06, "loss": 0.4612, "step": 131 }, { "epoch": 0.006122448979591836, "grad_norm": 19.14388084411621, "learning_rate": 6.111111111111112e-06, "loss": 0.5161, "step": 132 }, { "epoch": 0.006168831168831169, "grad_norm": 38.0191764831543, "learning_rate": 6.157407407407408e-06, "loss": 0.5633, "step": 133 }, { "epoch": 0.006215213358070501, "grad_norm": 17.42084312438965, "learning_rate": 6.203703703703704e-06, "loss": 0.3955, "step": 134 }, { "epoch": 0.006261595547309833, "grad_norm": 12.045740127563477, "learning_rate": 6.25e-06, "loss": 0.4235, "step": 135 }, { "epoch": 0.006307977736549165, "grad_norm": 19.178266525268555, "learning_rate": 6.296296296296297e-06, "loss": 0.5232, "step": 136 }, { "epoch": 0.0063543599257884975, "grad_norm": 25.558134078979492, "learning_rate": 6.342592592592594e-06, "loss": 0.3594, "step": 137 }, { "epoch": 0.006400742115027829, "grad_norm": 18.682819366455078, "learning_rate": 6.3888888888888885e-06, "loss": 0.4245, "step": 138 }, { "epoch": 0.006447124304267161, "grad_norm": 40.39377212524414, "learning_rate": 6.435185185185186e-06, "loss": 0.6054, "step": 139 }, { "epoch": 0.006493506493506494, "grad_norm": 34.703948974609375, "learning_rate": 6.481481481481482e-06, "loss": 0.7889, "step": 140 }, { "epoch": 0.006539888682745826, "grad_norm": 37.066749572753906, "learning_rate": 6.5277777777777784e-06, "loss": 0.5277, "step": 141 }, { "epoch": 0.006586270871985158, "grad_norm": 15.351313591003418, "learning_rate": 6.574074074074075e-06, "loss": 0.478, "step": 142 }, { "epoch": 0.0066326530612244895, "grad_norm": 18.699535369873047, "learning_rate": 6.620370370370371e-06, "loss": 0.5383, "step": 143 }, { "epoch": 0.006679035250463822, "grad_norm": 22.55130386352539, "learning_rate": 6.666666666666667e-06, "loss": 0.4803, "step": 144 }, { "epoch": 0.006725417439703154, "grad_norm": 33.01333999633789, "learning_rate": 6.712962962962963e-06, "loss": 0.4669, "step": 145 }, { "epoch": 0.006771799628942486, "grad_norm": 18.55819320678711, "learning_rate": 6.75925925925926e-06, "loss": 0.4983, "step": 146 }, { "epoch": 0.006818181818181818, "grad_norm": 17.93608856201172, "learning_rate": 6.8055555555555566e-06, "loss": 0.4539, "step": 147 }, { "epoch": 0.006864564007421151, "grad_norm": 20.645771026611328, "learning_rate": 6.851851851851853e-06, "loss": 0.4445, "step": 148 }, { "epoch": 0.0069109461966604824, "grad_norm": 33.56963348388672, "learning_rate": 6.898148148148148e-06, "loss": 0.6943, "step": 149 }, { "epoch": 0.006957328385899814, "grad_norm": 22.55126190185547, "learning_rate": 6.944444444444445e-06, "loss": 0.4997, "step": 150 }, { "epoch": 0.007003710575139146, "grad_norm": 19.058923721313477, "learning_rate": 6.990740740740741e-06, "loss": 0.4078, "step": 151 }, { "epoch": 0.007050092764378479, "grad_norm": 29.424488067626953, "learning_rate": 7.0370370370370375e-06, "loss": 0.6258, "step": 152 }, { "epoch": 0.007096474953617811, "grad_norm": 21.339170455932617, "learning_rate": 7.083333333333335e-06, "loss": 0.4828, "step": 153 }, { "epoch": 0.007142857142857143, "grad_norm": 21.30118751525879, "learning_rate": 7.129629629629629e-06, "loss": 0.5198, "step": 154 }, { "epoch": 0.007189239332096475, "grad_norm": 17.861112594604492, "learning_rate": 7.1759259259259266e-06, "loss": 0.4163, "step": 155 }, { "epoch": 0.007235621521335807, "grad_norm": 17.474773406982422, "learning_rate": 7.222222222222223e-06, "loss": 0.3534, "step": 156 }, { "epoch": 0.007282003710575139, "grad_norm": 17.728614807128906, "learning_rate": 7.268518518518519e-06, "loss": 0.4552, "step": 157 }, { "epoch": 0.007328385899814471, "grad_norm": 16.840599060058594, "learning_rate": 7.314814814814816e-06, "loss": 0.4046, "step": 158 }, { "epoch": 0.007374768089053804, "grad_norm": 11.588239669799805, "learning_rate": 7.361111111111112e-06, "loss": 0.444, "step": 159 }, { "epoch": 0.0074211502782931356, "grad_norm": 18.30010223388672, "learning_rate": 7.4074074074074075e-06, "loss": 0.5085, "step": 160 }, { "epoch": 0.007467532467532467, "grad_norm": 18.04405975341797, "learning_rate": 7.453703703703704e-06, "loss": 0.4626, "step": 161 }, { "epoch": 0.007513914656771799, "grad_norm": 18.88880157470703, "learning_rate": 7.500000000000001e-06, "loss": 0.4219, "step": 162 }, { "epoch": 0.007560296846011132, "grad_norm": 25.68965721130371, "learning_rate": 7.546296296296297e-06, "loss": 0.3625, "step": 163 }, { "epoch": 0.007606679035250464, "grad_norm": 14.91859245300293, "learning_rate": 7.592592592592594e-06, "loss": 0.4158, "step": 164 }, { "epoch": 0.007653061224489796, "grad_norm": 11.92503547668457, "learning_rate": 7.638888888888888e-06, "loss": 0.4126, "step": 165 }, { "epoch": 0.007699443413729128, "grad_norm": 21.755107879638672, "learning_rate": 7.685185185185185e-06, "loss": 0.4059, "step": 166 }, { "epoch": 0.00774582560296846, "grad_norm": 22.700660705566406, "learning_rate": 7.731481481481483e-06, "loss": 0.42, "step": 167 }, { "epoch": 0.007792207792207792, "grad_norm": 26.656124114990234, "learning_rate": 7.77777777777778e-06, "loss": 0.4388, "step": 168 }, { "epoch": 0.007838589981447125, "grad_norm": 20.434846878051758, "learning_rate": 7.824074074074076e-06, "loss": 0.4062, "step": 169 }, { "epoch": 0.007884972170686457, "grad_norm": 16.002790451049805, "learning_rate": 7.870370370370372e-06, "loss": 0.3971, "step": 170 }, { "epoch": 0.007931354359925789, "grad_norm": 15.378307342529297, "learning_rate": 7.916666666666667e-06, "loss": 0.5006, "step": 171 }, { "epoch": 0.00797773654916512, "grad_norm": 15.126199722290039, "learning_rate": 7.962962962962963e-06, "loss": 0.5408, "step": 172 }, { "epoch": 0.008024118738404452, "grad_norm": 27.122690200805664, "learning_rate": 8.00925925925926e-06, "loss": 0.4593, "step": 173 }, { "epoch": 0.008070500927643784, "grad_norm": 14.106827735900879, "learning_rate": 8.055555555555557e-06, "loss": 0.4859, "step": 174 }, { "epoch": 0.008116883116883116, "grad_norm": 17.783117294311523, "learning_rate": 8.101851851851854e-06, "loss": 0.3711, "step": 175 }, { "epoch": 0.00816326530612245, "grad_norm": 15.990792274475098, "learning_rate": 8.148148148148148e-06, "loss": 0.3722, "step": 176 }, { "epoch": 0.008209647495361782, "grad_norm": 13.611308097839355, "learning_rate": 8.194444444444445e-06, "loss": 0.2879, "step": 177 }, { "epoch": 0.008256029684601113, "grad_norm": 23.81829261779785, "learning_rate": 8.240740740740741e-06, "loss": 0.4116, "step": 178 }, { "epoch": 0.008302411873840445, "grad_norm": 28.194700241088867, "learning_rate": 8.287037037037037e-06, "loss": 0.4909, "step": 179 }, { "epoch": 0.008348794063079777, "grad_norm": 18.695884704589844, "learning_rate": 8.333333333333334e-06, "loss": 0.4177, "step": 180 }, { "epoch": 0.008395176252319109, "grad_norm": 20.357133865356445, "learning_rate": 8.37962962962963e-06, "loss": 0.5161, "step": 181 }, { "epoch": 0.008441558441558441, "grad_norm": 14.77469253540039, "learning_rate": 8.425925925925926e-06, "loss": 0.4294, "step": 182 }, { "epoch": 0.008487940630797774, "grad_norm": 16.5924015045166, "learning_rate": 8.472222222222223e-06, "loss": 0.3404, "step": 183 }, { "epoch": 0.008534322820037106, "grad_norm": 16.7503719329834, "learning_rate": 8.518518518518519e-06, "loss": 0.456, "step": 184 }, { "epoch": 0.008580705009276438, "grad_norm": 27.77326202392578, "learning_rate": 8.564814814814816e-06, "loss": 0.5776, "step": 185 }, { "epoch": 0.00862708719851577, "grad_norm": 16.222898483276367, "learning_rate": 8.611111111111112e-06, "loss": 0.4848, "step": 186 }, { "epoch": 0.008673469387755102, "grad_norm": 8.1627836227417, "learning_rate": 8.657407407407408e-06, "loss": 0.4879, "step": 187 }, { "epoch": 0.008719851576994434, "grad_norm": 18.217714309692383, "learning_rate": 8.703703703703705e-06, "loss": 0.4394, "step": 188 }, { "epoch": 0.008766233766233766, "grad_norm": 24.426122665405273, "learning_rate": 8.750000000000001e-06, "loss": 0.4053, "step": 189 }, { "epoch": 0.008812615955473098, "grad_norm": 15.591957092285156, "learning_rate": 8.796296296296297e-06, "loss": 0.3578, "step": 190 }, { "epoch": 0.008858998144712431, "grad_norm": 21.803455352783203, "learning_rate": 8.842592592592594e-06, "loss": 0.502, "step": 191 }, { "epoch": 0.008905380333951763, "grad_norm": 10.39694595336914, "learning_rate": 8.888888888888888e-06, "loss": 0.4493, "step": 192 }, { "epoch": 0.008951762523191095, "grad_norm": 20.1324462890625, "learning_rate": 8.935185185185186e-06, "loss": 0.4876, "step": 193 }, { "epoch": 0.008998144712430427, "grad_norm": 12.444053649902344, "learning_rate": 8.981481481481483e-06, "loss": 0.4698, "step": 194 }, { "epoch": 0.009044526901669759, "grad_norm": 20.23807716369629, "learning_rate": 9.027777777777779e-06, "loss": 0.4348, "step": 195 }, { "epoch": 0.00909090909090909, "grad_norm": 10.01034927368164, "learning_rate": 9.074074074074075e-06, "loss": 0.4218, "step": 196 }, { "epoch": 0.009137291280148422, "grad_norm": 23.90904998779297, "learning_rate": 9.120370370370372e-06, "loss": 0.511, "step": 197 }, { "epoch": 0.009183673469387756, "grad_norm": 17.908580780029297, "learning_rate": 9.166666666666666e-06, "loss": 0.4747, "step": 198 }, { "epoch": 0.009230055658627088, "grad_norm": 18.63309669494629, "learning_rate": 9.212962962962963e-06, "loss": 0.4375, "step": 199 }, { "epoch": 0.00927643784786642, "grad_norm": 21.594423294067383, "learning_rate": 9.25925925925926e-06, "loss": 0.5553, "step": 200 }, { "epoch": 0.009322820037105752, "grad_norm": 24.707162857055664, "learning_rate": 9.305555555555557e-06, "loss": 0.6026, "step": 201 }, { "epoch": 0.009369202226345083, "grad_norm": 9.65592098236084, "learning_rate": 9.351851851851854e-06, "loss": 0.3707, "step": 202 }, { "epoch": 0.009415584415584415, "grad_norm": 15.519442558288574, "learning_rate": 9.398148148148148e-06, "loss": 0.3786, "step": 203 }, { "epoch": 0.009461966604823747, "grad_norm": 29.226179122924805, "learning_rate": 9.444444444444445e-06, "loss": 0.3937, "step": 204 }, { "epoch": 0.009508348794063079, "grad_norm": 13.147674560546875, "learning_rate": 9.490740740740741e-06, "loss": 0.372, "step": 205 }, { "epoch": 0.009554730983302413, "grad_norm": 14.780241966247559, "learning_rate": 9.537037037037037e-06, "loss": 0.4372, "step": 206 }, { "epoch": 0.009601113172541744, "grad_norm": 12.463011741638184, "learning_rate": 9.583333333333335e-06, "loss": 0.453, "step": 207 }, { "epoch": 0.009647495361781076, "grad_norm": 14.030865669250488, "learning_rate": 9.62962962962963e-06, "loss": 0.4266, "step": 208 }, { "epoch": 0.009693877551020408, "grad_norm": 29.44032859802246, "learning_rate": 9.675925925925926e-06, "loss": 0.4298, "step": 209 }, { "epoch": 0.00974025974025974, "grad_norm": 7.691640853881836, "learning_rate": 9.722222222222223e-06, "loss": 0.3642, "step": 210 }, { "epoch": 0.009786641929499072, "grad_norm": 9.199444770812988, "learning_rate": 9.768518518518519e-06, "loss": 0.5096, "step": 211 }, { "epoch": 0.009833024118738404, "grad_norm": 18.878664016723633, "learning_rate": 9.814814814814815e-06, "loss": 0.4849, "step": 212 }, { "epoch": 0.009879406307977737, "grad_norm": 7.8507914543151855, "learning_rate": 9.861111111111112e-06, "loss": 0.4029, "step": 213 }, { "epoch": 0.00992578849721707, "grad_norm": 10.937633514404297, "learning_rate": 9.907407407407408e-06, "loss": 0.3098, "step": 214 }, { "epoch": 0.009972170686456401, "grad_norm": 17.6702823638916, "learning_rate": 9.953703703703704e-06, "loss": 0.4977, "step": 215 }, { "epoch": 0.010018552875695733, "grad_norm": 9.634824752807617, "learning_rate": 1e-05, "loss": 0.3659, "step": 216 }, { "epoch": 0.010064935064935065, "grad_norm": 21.626113891601562, "learning_rate": 9.999999945838819e-06, "loss": 0.498, "step": 217 }, { "epoch": 0.010111317254174397, "grad_norm": 14.348630905151367, "learning_rate": 9.999999783355277e-06, "loss": 0.4759, "step": 218 }, { "epoch": 0.010157699443413729, "grad_norm": 21.4575252532959, "learning_rate": 9.999999512549375e-06, "loss": 0.4246, "step": 219 }, { "epoch": 0.01020408163265306, "grad_norm": 22.242656707763672, "learning_rate": 9.999999133421123e-06, "loss": 0.5169, "step": 220 }, { "epoch": 0.010250463821892394, "grad_norm": 21.38971710205078, "learning_rate": 9.999998645970526e-06, "loss": 0.5591, "step": 221 }, { "epoch": 0.010296846011131726, "grad_norm": 18.817319869995117, "learning_rate": 9.999998050197595e-06, "loss": 0.5358, "step": 222 }, { "epoch": 0.010343228200371058, "grad_norm": 18.917001724243164, "learning_rate": 9.999997346102344e-06, "loss": 0.4409, "step": 223 }, { "epoch": 0.01038961038961039, "grad_norm": 18.104022979736328, "learning_rate": 9.999996533684785e-06, "loss": 0.5136, "step": 224 }, { "epoch": 0.010435992578849721, "grad_norm": 29.434711456298828, "learning_rate": 9.999995612944942e-06, "loss": 0.5047, "step": 225 }, { "epoch": 0.010482374768089053, "grad_norm": 18.5859317779541, "learning_rate": 9.99999458388283e-06, "loss": 0.5063, "step": 226 }, { "epoch": 0.010528756957328385, "grad_norm": 17.84111976623535, "learning_rate": 9.999993446498473e-06, "loss": 0.4167, "step": 227 }, { "epoch": 0.010575139146567719, "grad_norm": 32.19242858886719, "learning_rate": 9.999992200791895e-06, "loss": 0.6219, "step": 228 }, { "epoch": 0.01062152133580705, "grad_norm": 25.344070434570312, "learning_rate": 9.999990846763122e-06, "loss": 0.5314, "step": 229 }, { "epoch": 0.010667903525046383, "grad_norm": 12.134544372558594, "learning_rate": 9.999989384412185e-06, "loss": 0.3759, "step": 230 }, { "epoch": 0.010714285714285714, "grad_norm": 15.724902153015137, "learning_rate": 9.999987813739116e-06, "loss": 0.5006, "step": 231 }, { "epoch": 0.010760667903525046, "grad_norm": 25.48492431640625, "learning_rate": 9.999986134743949e-06, "loss": 0.5781, "step": 232 }, { "epoch": 0.010807050092764378, "grad_norm": 15.808874130249023, "learning_rate": 9.999984347426718e-06, "loss": 0.3598, "step": 233 }, { "epoch": 0.01085343228200371, "grad_norm": 24.319137573242188, "learning_rate": 9.999982451787464e-06, "loss": 0.5336, "step": 234 }, { "epoch": 0.010899814471243042, "grad_norm": 19.791593551635742, "learning_rate": 9.999980447826227e-06, "loss": 0.5894, "step": 235 }, { "epoch": 0.010946196660482375, "grad_norm": 16.52477264404297, "learning_rate": 9.999978335543053e-06, "loss": 0.5505, "step": 236 }, { "epoch": 0.010992578849721707, "grad_norm": 18.929452896118164, "learning_rate": 9.999976114937982e-06, "loss": 0.452, "step": 237 }, { "epoch": 0.01103896103896104, "grad_norm": 25.63051414489746, "learning_rate": 9.999973786011068e-06, "loss": 0.4213, "step": 238 }, { "epoch": 0.011085343228200371, "grad_norm": 12.311108589172363, "learning_rate": 9.999971348762358e-06, "loss": 0.4566, "step": 239 }, { "epoch": 0.011131725417439703, "grad_norm": 11.277704238891602, "learning_rate": 9.999968803191906e-06, "loss": 0.4672, "step": 240 }, { "epoch": 0.011178107606679035, "grad_norm": 39.40033721923828, "learning_rate": 9.999966149299768e-06, "loss": 0.5382, "step": 241 }, { "epoch": 0.011224489795918367, "grad_norm": 27.05375862121582, "learning_rate": 9.999963387086e-06, "loss": 0.5127, "step": 242 }, { "epoch": 0.0112708719851577, "grad_norm": 25.208600997924805, "learning_rate": 9.999960516550662e-06, "loss": 0.5508, "step": 243 }, { "epoch": 0.011317254174397032, "grad_norm": 41.71096420288086, "learning_rate": 9.999957537693818e-06, "loss": 0.6823, "step": 244 }, { "epoch": 0.011363636363636364, "grad_norm": 19.592487335205078, "learning_rate": 9.99995445051553e-06, "loss": 0.4573, "step": 245 }, { "epoch": 0.011410018552875696, "grad_norm": 22.575822830200195, "learning_rate": 9.999951255015867e-06, "loss": 0.522, "step": 246 }, { "epoch": 0.011456400742115028, "grad_norm": 24.38418960571289, "learning_rate": 9.999947951194895e-06, "loss": 0.5994, "step": 247 }, { "epoch": 0.01150278293135436, "grad_norm": 14.413005828857422, "learning_rate": 9.99994453905269e-06, "loss": 0.4272, "step": 248 }, { "epoch": 0.011549165120593691, "grad_norm": 14.36889362335205, "learning_rate": 9.999941018589323e-06, "loss": 0.4012, "step": 249 }, { "epoch": 0.011595547309833023, "grad_norm": 10.290558815002441, "learning_rate": 9.999937389804872e-06, "loss": 0.4706, "step": 250 }, { "epoch": 0.011641929499072357, "grad_norm": 15.544678688049316, "learning_rate": 9.999933652699414e-06, "loss": 0.4806, "step": 251 }, { "epoch": 0.011688311688311689, "grad_norm": 13.36431884765625, "learning_rate": 9.99992980727303e-06, "loss": 0.5246, "step": 252 }, { "epoch": 0.01173469387755102, "grad_norm": 16.347009658813477, "learning_rate": 9.999925853525804e-06, "loss": 0.4069, "step": 253 }, { "epoch": 0.011781076066790352, "grad_norm": 10.215469360351562, "learning_rate": 9.999921791457823e-06, "loss": 0.4391, "step": 254 }, { "epoch": 0.011827458256029684, "grad_norm": 9.577875137329102, "learning_rate": 9.999917621069172e-06, "loss": 0.4391, "step": 255 }, { "epoch": 0.011873840445269016, "grad_norm": 10.77310848236084, "learning_rate": 9.999913342359944e-06, "loss": 0.3939, "step": 256 }, { "epoch": 0.011920222634508348, "grad_norm": 29.845243453979492, "learning_rate": 9.99990895533023e-06, "loss": 0.561, "step": 257 }, { "epoch": 0.011966604823747682, "grad_norm": 19.473308563232422, "learning_rate": 9.999904459980125e-06, "loss": 0.4875, "step": 258 }, { "epoch": 0.012012987012987014, "grad_norm": 15.401079177856445, "learning_rate": 9.999899856309728e-06, "loss": 0.4034, "step": 259 }, { "epoch": 0.012059369202226345, "grad_norm": 11.751424789428711, "learning_rate": 9.999895144319139e-06, "loss": 0.3211, "step": 260 }, { "epoch": 0.012105751391465677, "grad_norm": 16.057451248168945, "learning_rate": 9.999890324008457e-06, "loss": 0.4444, "step": 261 }, { "epoch": 0.012152133580705009, "grad_norm": 15.862247467041016, "learning_rate": 9.999885395377788e-06, "loss": 0.4927, "step": 262 }, { "epoch": 0.012198515769944341, "grad_norm": 12.877408981323242, "learning_rate": 9.999880358427239e-06, "loss": 0.4934, "step": 263 }, { "epoch": 0.012244897959183673, "grad_norm": 12.830609321594238, "learning_rate": 9.999875213156919e-06, "loss": 0.5721, "step": 264 }, { "epoch": 0.012291280148423005, "grad_norm": 14.795201301574707, "learning_rate": 9.999869959566942e-06, "loss": 0.3257, "step": 265 }, { "epoch": 0.012337662337662338, "grad_norm": 10.316644668579102, "learning_rate": 9.999864597657419e-06, "loss": 0.4135, "step": 266 }, { "epoch": 0.01238404452690167, "grad_norm": 11.407381057739258, "learning_rate": 9.999859127428465e-06, "loss": 0.3759, "step": 267 }, { "epoch": 0.012430426716141002, "grad_norm": 19.35237693786621, "learning_rate": 9.999853548880201e-06, "loss": 0.4824, "step": 268 }, { "epoch": 0.012476808905380334, "grad_norm": 12.819117546081543, "learning_rate": 9.999847862012748e-06, "loss": 0.3113, "step": 269 }, { "epoch": 0.012523191094619666, "grad_norm": 29.586483001708984, "learning_rate": 9.999842066826226e-06, "loss": 0.499, "step": 270 }, { "epoch": 0.012569573283858998, "grad_norm": 15.29499340057373, "learning_rate": 9.999836163320763e-06, "loss": 0.3989, "step": 271 }, { "epoch": 0.01261595547309833, "grad_norm": 12.312557220458984, "learning_rate": 9.999830151496489e-06, "loss": 0.3662, "step": 272 }, { "epoch": 0.012662337662337663, "grad_norm": 21.5645751953125, "learning_rate": 9.99982403135353e-06, "loss": 0.6054, "step": 273 }, { "epoch": 0.012708719851576995, "grad_norm": 20.613798141479492, "learning_rate": 9.999817802892021e-06, "loss": 0.3967, "step": 274 }, { "epoch": 0.012755102040816327, "grad_norm": 12.558703422546387, "learning_rate": 9.999811466112097e-06, "loss": 0.3344, "step": 275 }, { "epoch": 0.012801484230055659, "grad_norm": 8.312406539916992, "learning_rate": 9.999805021013894e-06, "loss": 0.4454, "step": 276 }, { "epoch": 0.01284786641929499, "grad_norm": 5.033135890960693, "learning_rate": 9.999798467597552e-06, "loss": 0.3608, "step": 277 }, { "epoch": 0.012894248608534322, "grad_norm": 31.886476516723633, "learning_rate": 9.999791805863213e-06, "loss": 0.5432, "step": 278 }, { "epoch": 0.012940630797773654, "grad_norm": 13.164901733398438, "learning_rate": 9.999785035811023e-06, "loss": 0.438, "step": 279 }, { "epoch": 0.012987012987012988, "grad_norm": 15.021075248718262, "learning_rate": 9.999778157441126e-06, "loss": 0.3537, "step": 280 }, { "epoch": 0.01303339517625232, "grad_norm": 22.82093620300293, "learning_rate": 9.999771170753674e-06, "loss": 0.4871, "step": 281 }, { "epoch": 0.013079777365491652, "grad_norm": 22.16863250732422, "learning_rate": 9.999764075748815e-06, "loss": 0.5971, "step": 282 }, { "epoch": 0.013126159554730983, "grad_norm": 13.325957298278809, "learning_rate": 9.999756872426705e-06, "loss": 0.3969, "step": 283 }, { "epoch": 0.013172541743970315, "grad_norm": 17.813491821289062, "learning_rate": 9.999749560787501e-06, "loss": 0.4107, "step": 284 }, { "epoch": 0.013218923933209647, "grad_norm": 10.40810775756836, "learning_rate": 9.999742140831357e-06, "loss": 0.402, "step": 285 }, { "epoch": 0.013265306122448979, "grad_norm": 23.118030548095703, "learning_rate": 9.999734612558439e-06, "loss": 0.4592, "step": 286 }, { "epoch": 0.013311688311688311, "grad_norm": 13.456562995910645, "learning_rate": 9.999726975968907e-06, "loss": 0.501, "step": 287 }, { "epoch": 0.013358070500927645, "grad_norm": 11.347116470336914, "learning_rate": 9.999719231062926e-06, "loss": 0.3446, "step": 288 }, { "epoch": 0.013404452690166976, "grad_norm": 11.830185890197754, "learning_rate": 9.999711377840666e-06, "loss": 0.4292, "step": 289 }, { "epoch": 0.013450834879406308, "grad_norm": 14.376225471496582, "learning_rate": 9.999703416302296e-06, "loss": 0.3977, "step": 290 }, { "epoch": 0.01349721706864564, "grad_norm": 25.203556060791016, "learning_rate": 9.999695346447988e-06, "loss": 0.5429, "step": 291 }, { "epoch": 0.013543599257884972, "grad_norm": 14.70173168182373, "learning_rate": 9.999687168277918e-06, "loss": 0.4855, "step": 292 }, { "epoch": 0.013589981447124304, "grad_norm": 23.942310333251953, "learning_rate": 9.999678881792262e-06, "loss": 0.5477, "step": 293 }, { "epoch": 0.013636363636363636, "grad_norm": 25.921907424926758, "learning_rate": 9.999670486991201e-06, "loss": 0.5354, "step": 294 }, { "epoch": 0.01368274582560297, "grad_norm": 21.14668083190918, "learning_rate": 9.999661983874915e-06, "loss": 0.5013, "step": 295 }, { "epoch": 0.013729128014842301, "grad_norm": 15.631307601928711, "learning_rate": 9.99965337244359e-06, "loss": 0.6194, "step": 296 }, { "epoch": 0.013775510204081633, "grad_norm": 15.074217796325684, "learning_rate": 9.999644652697411e-06, "loss": 0.4061, "step": 297 }, { "epoch": 0.013821892393320965, "grad_norm": 10.668344497680664, "learning_rate": 9.999635824636568e-06, "loss": 0.411, "step": 298 }, { "epoch": 0.013868274582560297, "grad_norm": 11.901644706726074, "learning_rate": 9.999626888261254e-06, "loss": 0.4824, "step": 299 }, { "epoch": 0.013914656771799629, "grad_norm": 17.35301399230957, "learning_rate": 9.999617843571657e-06, "loss": 0.5142, "step": 300 }, { "epoch": 0.01396103896103896, "grad_norm": 15.18199634552002, "learning_rate": 9.999608690567978e-06, "loss": 0.4422, "step": 301 }, { "epoch": 0.014007421150278292, "grad_norm": 12.881999969482422, "learning_rate": 9.999599429250413e-06, "loss": 0.369, "step": 302 }, { "epoch": 0.014053803339517626, "grad_norm": 10.430438995361328, "learning_rate": 9.999590059619164e-06, "loss": 0.4652, "step": 303 }, { "epoch": 0.014100185528756958, "grad_norm": 17.699949264526367, "learning_rate": 9.999580581674434e-06, "loss": 0.3876, "step": 304 }, { "epoch": 0.01414656771799629, "grad_norm": 15.291873931884766, "learning_rate": 9.999570995416426e-06, "loss": 0.4256, "step": 305 }, { "epoch": 0.014192949907235622, "grad_norm": 17.903867721557617, "learning_rate": 9.99956130084535e-06, "loss": 0.4578, "step": 306 }, { "epoch": 0.014239332096474953, "grad_norm": 9.430750846862793, "learning_rate": 9.999551497961418e-06, "loss": 0.2735, "step": 307 }, { "epoch": 0.014285714285714285, "grad_norm": 17.75493621826172, "learning_rate": 9.999541586764836e-06, "loss": 0.501, "step": 308 }, { "epoch": 0.014332096474953617, "grad_norm": 12.722823143005371, "learning_rate": 9.999531567255825e-06, "loss": 0.3819, "step": 309 }, { "epoch": 0.01437847866419295, "grad_norm": 11.749302864074707, "learning_rate": 9.999521439434598e-06, "loss": 0.5403, "step": 310 }, { "epoch": 0.014424860853432283, "grad_norm": 22.554380416870117, "learning_rate": 9.999511203301377e-06, "loss": 0.5474, "step": 311 }, { "epoch": 0.014471243042671614, "grad_norm": 11.5840482711792, "learning_rate": 9.999500858856382e-06, "loss": 0.3366, "step": 312 }, { "epoch": 0.014517625231910946, "grad_norm": 16.634658813476562, "learning_rate": 9.999490406099839e-06, "loss": 0.5144, "step": 313 }, { "epoch": 0.014564007421150278, "grad_norm": 16.76579475402832, "learning_rate": 9.999479845031971e-06, "loss": 0.3636, "step": 314 }, { "epoch": 0.01461038961038961, "grad_norm": 5.699509620666504, "learning_rate": 9.999469175653012e-06, "loss": 0.4035, "step": 315 }, { "epoch": 0.014656771799628942, "grad_norm": 12.244647979736328, "learning_rate": 9.999458397963189e-06, "loss": 0.408, "step": 316 }, { "epoch": 0.014703153988868274, "grad_norm": 11.165861129760742, "learning_rate": 9.999447511962737e-06, "loss": 0.4678, "step": 317 }, { "epoch": 0.014749536178107607, "grad_norm": 10.717164039611816, "learning_rate": 9.99943651765189e-06, "loss": 0.4089, "step": 318 }, { "epoch": 0.01479591836734694, "grad_norm": 19.74306297302246, "learning_rate": 9.99942541503089e-06, "loss": 0.6156, "step": 319 }, { "epoch": 0.014842300556586271, "grad_norm": 19.468746185302734, "learning_rate": 9.999414204099973e-06, "loss": 0.4328, "step": 320 }, { "epoch": 0.014888682745825603, "grad_norm": 16.618478775024414, "learning_rate": 9.999402884859385e-06, "loss": 0.4234, "step": 321 }, { "epoch": 0.014935064935064935, "grad_norm": 20.19729232788086, "learning_rate": 9.999391457309371e-06, "loss": 0.6294, "step": 322 }, { "epoch": 0.014981447124304267, "grad_norm": 15.622380256652832, "learning_rate": 9.99937992145018e-06, "loss": 0.3728, "step": 323 }, { "epoch": 0.015027829313543599, "grad_norm": 10.599291801452637, "learning_rate": 9.999368277282056e-06, "loss": 0.3412, "step": 324 }, { "epoch": 0.015074211502782932, "grad_norm": 8.864948272705078, "learning_rate": 9.999356524805257e-06, "loss": 0.4066, "step": 325 }, { "epoch": 0.015120593692022264, "grad_norm": 12.395633697509766, "learning_rate": 9.999344664020037e-06, "loss": 0.5943, "step": 326 }, { "epoch": 0.015166975881261596, "grad_norm": 10.67740535736084, "learning_rate": 9.99933269492665e-06, "loss": 0.3988, "step": 327 }, { "epoch": 0.015213358070500928, "grad_norm": 14.923480033874512, "learning_rate": 9.999320617525356e-06, "loss": 0.4802, "step": 328 }, { "epoch": 0.01525974025974026, "grad_norm": 17.44751739501953, "learning_rate": 9.999308431816422e-06, "loss": 0.553, "step": 329 }, { "epoch": 0.015306122448979591, "grad_norm": 12.231328010559082, "learning_rate": 9.999296137800102e-06, "loss": 0.4153, "step": 330 }, { "epoch": 0.015352504638218923, "grad_norm": 12.884787559509277, "learning_rate": 9.999283735476673e-06, "loss": 0.4583, "step": 331 }, { "epoch": 0.015398886827458255, "grad_norm": 13.177725791931152, "learning_rate": 9.999271224846397e-06, "loss": 0.3817, "step": 332 }, { "epoch": 0.015445269016697589, "grad_norm": 11.400463104248047, "learning_rate": 9.999258605909545e-06, "loss": 0.5005, "step": 333 }, { "epoch": 0.01549165120593692, "grad_norm": 19.554197311401367, "learning_rate": 9.999245878666394e-06, "loss": 0.5241, "step": 334 }, { "epoch": 0.015538033395176253, "grad_norm": 15.878414154052734, "learning_rate": 9.999233043117219e-06, "loss": 0.4711, "step": 335 }, { "epoch": 0.015584415584415584, "grad_norm": 22.297386169433594, "learning_rate": 9.999220099262294e-06, "loss": 0.5309, "step": 336 }, { "epoch": 0.015630797773654916, "grad_norm": 18.70017433166504, "learning_rate": 9.999207047101903e-06, "loss": 0.5919, "step": 337 }, { "epoch": 0.01567717996289425, "grad_norm": 18.673444747924805, "learning_rate": 9.999193886636328e-06, "loss": 0.5728, "step": 338 }, { "epoch": 0.01572356215213358, "grad_norm": 13.823525428771973, "learning_rate": 9.999180617865855e-06, "loss": 0.4644, "step": 339 }, { "epoch": 0.015769944341372914, "grad_norm": 8.363287925720215, "learning_rate": 9.999167240790773e-06, "loss": 0.4813, "step": 340 }, { "epoch": 0.015816326530612244, "grad_norm": 12.232155799865723, "learning_rate": 9.999153755411366e-06, "loss": 0.3236, "step": 341 }, { "epoch": 0.015862708719851577, "grad_norm": 6.351889610290527, "learning_rate": 9.99914016172793e-06, "loss": 0.3829, "step": 342 }, { "epoch": 0.015909090909090907, "grad_norm": 22.591659545898438, "learning_rate": 9.999126459740761e-06, "loss": 0.6178, "step": 343 }, { "epoch": 0.01595547309833024, "grad_norm": 11.436149597167969, "learning_rate": 9.999112649450154e-06, "loss": 0.4593, "step": 344 }, { "epoch": 0.016001855287569575, "grad_norm": 12.440062522888184, "learning_rate": 9.999098730856407e-06, "loss": 0.4297, "step": 345 }, { "epoch": 0.016048237476808905, "grad_norm": 25.424175262451172, "learning_rate": 9.999084703959823e-06, "loss": 0.4324, "step": 346 }, { "epoch": 0.01609461966604824, "grad_norm": 8.49652099609375, "learning_rate": 9.999070568760705e-06, "loss": 0.3443, "step": 347 }, { "epoch": 0.01614100185528757, "grad_norm": 12.926887512207031, "learning_rate": 9.999056325259361e-06, "loss": 0.4195, "step": 348 }, { "epoch": 0.016187384044526902, "grad_norm": 18.043378829956055, "learning_rate": 9.999041973456098e-06, "loss": 0.3381, "step": 349 }, { "epoch": 0.016233766233766232, "grad_norm": 9.602046966552734, "learning_rate": 9.999027513351227e-06, "loss": 0.4306, "step": 350 }, { "epoch": 0.016280148423005566, "grad_norm": 10.777905464172363, "learning_rate": 9.999012944945062e-06, "loss": 0.4143, "step": 351 }, { "epoch": 0.0163265306122449, "grad_norm": 14.565689086914062, "learning_rate": 9.998998268237918e-06, "loss": 0.4354, "step": 352 }, { "epoch": 0.01637291280148423, "grad_norm": 22.20606803894043, "learning_rate": 9.998983483230113e-06, "loss": 0.5712, "step": 353 }, { "epoch": 0.016419294990723563, "grad_norm": 23.742887496948242, "learning_rate": 9.998968589921969e-06, "loss": 0.6575, "step": 354 }, { "epoch": 0.016465677179962893, "grad_norm": 12.960214614868164, "learning_rate": 9.998953588313806e-06, "loss": 0.4366, "step": 355 }, { "epoch": 0.016512059369202227, "grad_norm": 10.846632957458496, "learning_rate": 9.99893847840595e-06, "loss": 0.4319, "step": 356 }, { "epoch": 0.016558441558441557, "grad_norm": 14.653470039367676, "learning_rate": 9.99892326019873e-06, "loss": 0.4222, "step": 357 }, { "epoch": 0.01660482374768089, "grad_norm": 12.274347305297852, "learning_rate": 9.998907933692472e-06, "loss": 0.4161, "step": 358 }, { "epoch": 0.016651205936920224, "grad_norm": 13.709214210510254, "learning_rate": 9.998892498887512e-06, "loss": 0.4664, "step": 359 }, { "epoch": 0.016697588126159554, "grad_norm": 19.684133529663086, "learning_rate": 9.998876955784183e-06, "loss": 0.4536, "step": 360 }, { "epoch": 0.016743970315398888, "grad_norm": 15.420945167541504, "learning_rate": 9.99886130438282e-06, "loss": 0.292, "step": 361 }, { "epoch": 0.016790352504638218, "grad_norm": 25.888181686401367, "learning_rate": 9.998845544683764e-06, "loss": 0.5593, "step": 362 }, { "epoch": 0.01683673469387755, "grad_norm": 19.75338363647461, "learning_rate": 9.998829676687355e-06, "loss": 0.5017, "step": 363 }, { "epoch": 0.016883116883116882, "grad_norm": 13.90053939819336, "learning_rate": 9.998813700393938e-06, "loss": 0.4812, "step": 364 }, { "epoch": 0.016929499072356215, "grad_norm": 16.79246711730957, "learning_rate": 9.998797615803859e-06, "loss": 0.3768, "step": 365 }, { "epoch": 0.01697588126159555, "grad_norm": 20.55197525024414, "learning_rate": 9.998781422917467e-06, "loss": 0.6336, "step": 366 }, { "epoch": 0.01702226345083488, "grad_norm": 12.15927791595459, "learning_rate": 9.998765121735112e-06, "loss": 0.4556, "step": 367 }, { "epoch": 0.017068645640074213, "grad_norm": 14.10701847076416, "learning_rate": 9.998748712257147e-06, "loss": 0.5186, "step": 368 }, { "epoch": 0.017115027829313543, "grad_norm": 19.770206451416016, "learning_rate": 9.998732194483927e-06, "loss": 0.505, "step": 369 }, { "epoch": 0.017161410018552876, "grad_norm": 15.2058744430542, "learning_rate": 9.99871556841581e-06, "loss": 0.4126, "step": 370 }, { "epoch": 0.017207792207792207, "grad_norm": 16.413545608520508, "learning_rate": 9.99869883405316e-06, "loss": 0.4407, "step": 371 }, { "epoch": 0.01725417439703154, "grad_norm": 14.80174446105957, "learning_rate": 9.998681991396333e-06, "loss": 0.4197, "step": 372 }, { "epoch": 0.01730055658627087, "grad_norm": 10.173277854919434, "learning_rate": 9.998665040445698e-06, "loss": 0.3979, "step": 373 }, { "epoch": 0.017346938775510204, "grad_norm": 9.259140968322754, "learning_rate": 9.99864798120162e-06, "loss": 0.3542, "step": 374 }, { "epoch": 0.017393320964749538, "grad_norm": 16.258237838745117, "learning_rate": 9.998630813664474e-06, "loss": 0.4515, "step": 375 }, { "epoch": 0.017439703153988868, "grad_norm": 12.750629425048828, "learning_rate": 9.998613537834625e-06, "loss": 0.5065, "step": 376 }, { "epoch": 0.0174860853432282, "grad_norm": 12.918572425842285, "learning_rate": 9.998596153712451e-06, "loss": 0.4569, "step": 377 }, { "epoch": 0.01753246753246753, "grad_norm": 9.856410026550293, "learning_rate": 9.998578661298327e-06, "loss": 0.3399, "step": 378 }, { "epoch": 0.017578849721706865, "grad_norm": 18.138648986816406, "learning_rate": 9.998561060592633e-06, "loss": 0.4824, "step": 379 }, { "epoch": 0.017625231910946195, "grad_norm": 20.090335845947266, "learning_rate": 9.998543351595752e-06, "loss": 0.5341, "step": 380 }, { "epoch": 0.01767161410018553, "grad_norm": 8.582220077514648, "learning_rate": 9.998525534308064e-06, "loss": 0.4039, "step": 381 }, { "epoch": 0.017717996289424862, "grad_norm": 8.609671592712402, "learning_rate": 9.998507608729957e-06, "loss": 0.4061, "step": 382 }, { "epoch": 0.017764378478664192, "grad_norm": 14.410698890686035, "learning_rate": 9.998489574861818e-06, "loss": 0.4782, "step": 383 }, { "epoch": 0.017810760667903526, "grad_norm": 9.977296829223633, "learning_rate": 9.99847143270404e-06, "loss": 0.4916, "step": 384 }, { "epoch": 0.017857142857142856, "grad_norm": 12.635509490966797, "learning_rate": 9.998453182257015e-06, "loss": 0.4054, "step": 385 }, { "epoch": 0.01790352504638219, "grad_norm": 14.834879875183105, "learning_rate": 9.998434823521138e-06, "loss": 0.4919, "step": 386 }, { "epoch": 0.01794990723562152, "grad_norm": 15.421005249023438, "learning_rate": 9.998416356496807e-06, "loss": 0.5981, "step": 387 }, { "epoch": 0.017996289424860853, "grad_norm": 10.876689910888672, "learning_rate": 9.998397781184422e-06, "loss": 0.3334, "step": 388 }, { "epoch": 0.018042671614100187, "grad_norm": 18.08253288269043, "learning_rate": 9.998379097584386e-06, "loss": 0.5689, "step": 389 }, { "epoch": 0.018089053803339517, "grad_norm": 4.969534397125244, "learning_rate": 9.9983603056971e-06, "loss": 0.3385, "step": 390 }, { "epoch": 0.01813543599257885, "grad_norm": 9.288270950317383, "learning_rate": 9.998341405522977e-06, "loss": 0.4789, "step": 391 }, { "epoch": 0.01818181818181818, "grad_norm": 13.628630638122559, "learning_rate": 9.998322397062426e-06, "loss": 0.322, "step": 392 }, { "epoch": 0.018228200371057515, "grad_norm": 10.653026580810547, "learning_rate": 9.998303280315852e-06, "loss": 0.4467, "step": 393 }, { "epoch": 0.018274582560296845, "grad_norm": 10.981630325317383, "learning_rate": 9.998284055283677e-06, "loss": 0.4748, "step": 394 }, { "epoch": 0.018320964749536178, "grad_norm": 9.591090202331543, "learning_rate": 9.998264721966314e-06, "loss": 0.4616, "step": 395 }, { "epoch": 0.018367346938775512, "grad_norm": 25.453672409057617, "learning_rate": 9.99824528036418e-06, "loss": 0.4829, "step": 396 }, { "epoch": 0.018413729128014842, "grad_norm": 15.349074363708496, "learning_rate": 9.9982257304777e-06, "loss": 0.5076, "step": 397 }, { "epoch": 0.018460111317254176, "grad_norm": 7.021084308624268, "learning_rate": 9.998206072307296e-06, "loss": 0.3528, "step": 398 }, { "epoch": 0.018506493506493506, "grad_norm": 15.269336700439453, "learning_rate": 9.998186305853394e-06, "loss": 0.4418, "step": 399 }, { "epoch": 0.01855287569573284, "grad_norm": 10.455620765686035, "learning_rate": 9.998166431116421e-06, "loss": 0.4199, "step": 400 }, { "epoch": 0.01859925788497217, "grad_norm": 7.480299472808838, "learning_rate": 9.998146448096808e-06, "loss": 0.3783, "step": 401 }, { "epoch": 0.018645640074211503, "grad_norm": 14.57479190826416, "learning_rate": 9.99812635679499e-06, "loss": 0.4725, "step": 402 }, { "epoch": 0.018692022263450833, "grad_norm": 9.474591255187988, "learning_rate": 9.9981061572114e-06, "loss": 0.4423, "step": 403 }, { "epoch": 0.018738404452690167, "grad_norm": 12.353121757507324, "learning_rate": 9.998085849346474e-06, "loss": 0.4773, "step": 404 }, { "epoch": 0.0187847866419295, "grad_norm": 12.38192367553711, "learning_rate": 9.998065433200658e-06, "loss": 0.3668, "step": 405 }, { "epoch": 0.01883116883116883, "grad_norm": 12.570685386657715, "learning_rate": 9.998044908774389e-06, "loss": 0.5234, "step": 406 }, { "epoch": 0.018877551020408164, "grad_norm": 11.429500579833984, "learning_rate": 9.998024276068113e-06, "loss": 0.3646, "step": 407 }, { "epoch": 0.018923933209647494, "grad_norm": 20.052858352661133, "learning_rate": 9.99800353508228e-06, "loss": 0.3613, "step": 408 }, { "epoch": 0.018970315398886828, "grad_norm": 5.934525012969971, "learning_rate": 9.997982685817333e-06, "loss": 0.3007, "step": 409 }, { "epoch": 0.019016697588126158, "grad_norm": 9.791382789611816, "learning_rate": 9.997961728273729e-06, "loss": 0.2548, "step": 410 }, { "epoch": 0.01906307977736549, "grad_norm": 13.638093948364258, "learning_rate": 9.997940662451919e-06, "loss": 0.4609, "step": 411 }, { "epoch": 0.019109461966604825, "grad_norm": 19.470966339111328, "learning_rate": 9.997919488352361e-06, "loss": 0.5155, "step": 412 }, { "epoch": 0.019155844155844155, "grad_norm": 18.560630798339844, "learning_rate": 9.997898205975513e-06, "loss": 0.4947, "step": 413 }, { "epoch": 0.01920222634508349, "grad_norm": 17.138980865478516, "learning_rate": 9.997876815321839e-06, "loss": 0.4066, "step": 414 }, { "epoch": 0.01924860853432282, "grad_norm": 12.902755737304688, "learning_rate": 9.997855316391798e-06, "loss": 0.4668, "step": 415 }, { "epoch": 0.019294990723562153, "grad_norm": 20.309480667114258, "learning_rate": 9.997833709185858e-06, "loss": 0.6251, "step": 416 }, { "epoch": 0.019341372912801483, "grad_norm": 14.848982810974121, "learning_rate": 9.997811993704487e-06, "loss": 0.3955, "step": 417 }, { "epoch": 0.019387755102040816, "grad_norm": 9.513402938842773, "learning_rate": 9.997790169948154e-06, "loss": 0.3959, "step": 418 }, { "epoch": 0.01943413729128015, "grad_norm": 12.262441635131836, "learning_rate": 9.997768237917333e-06, "loss": 0.4122, "step": 419 }, { "epoch": 0.01948051948051948, "grad_norm": 8.94841194152832, "learning_rate": 9.997746197612502e-06, "loss": 0.4497, "step": 420 }, { "epoch": 0.019526901669758814, "grad_norm": 8.417898178100586, "learning_rate": 9.997724049034132e-06, "loss": 0.4746, "step": 421 }, { "epoch": 0.019573283858998144, "grad_norm": 9.352350234985352, "learning_rate": 9.99770179218271e-06, "loss": 0.3374, "step": 422 }, { "epoch": 0.019619666048237477, "grad_norm": 7.416864395141602, "learning_rate": 9.997679427058713e-06, "loss": 0.4466, "step": 423 }, { "epoch": 0.019666048237476808, "grad_norm": 15.214370727539062, "learning_rate": 9.997656953662627e-06, "loss": 0.3388, "step": 424 }, { "epoch": 0.01971243042671614, "grad_norm": 11.57961368560791, "learning_rate": 9.99763437199494e-06, "loss": 0.4667, "step": 425 }, { "epoch": 0.019758812615955475, "grad_norm": 32.25101852416992, "learning_rate": 9.99761168205614e-06, "loss": 0.4509, "step": 426 }, { "epoch": 0.019805194805194805, "grad_norm": 11.605915069580078, "learning_rate": 9.997588883846716e-06, "loss": 0.4625, "step": 427 }, { "epoch": 0.01985157699443414, "grad_norm": 6.423886775970459, "learning_rate": 9.997565977367167e-06, "loss": 0.3915, "step": 428 }, { "epoch": 0.01989795918367347, "grad_norm": 13.567317962646484, "learning_rate": 9.997542962617988e-06, "loss": 0.4429, "step": 429 }, { "epoch": 0.019944341372912802, "grad_norm": 11.648382186889648, "learning_rate": 9.997519839599677e-06, "loss": 0.4084, "step": 430 }, { "epoch": 0.019990723562152132, "grad_norm": 7.8050761222839355, "learning_rate": 9.997496608312733e-06, "loss": 0.3569, "step": 431 }, { "epoch": 0.020037105751391466, "grad_norm": 24.896263122558594, "learning_rate": 9.997473268757662e-06, "loss": 0.6417, "step": 432 }, { "epoch": 0.0200834879406308, "grad_norm": 13.97718620300293, "learning_rate": 9.997449820934969e-06, "loss": 0.6244, "step": 433 }, { "epoch": 0.02012987012987013, "grad_norm": 8.7470121383667, "learning_rate": 9.997426264845157e-06, "loss": 0.3854, "step": 434 }, { "epoch": 0.020176252319109463, "grad_norm": 14.742194175720215, "learning_rate": 9.997402600488745e-06, "loss": 0.3586, "step": 435 }, { "epoch": 0.020222634508348793, "grad_norm": 11.389725685119629, "learning_rate": 9.997378827866242e-06, "loss": 0.479, "step": 436 }, { "epoch": 0.020269016697588127, "grad_norm": 13.522912979125977, "learning_rate": 9.997354946978162e-06, "loss": 0.4663, "step": 437 }, { "epoch": 0.020315398886827457, "grad_norm": 11.85799503326416, "learning_rate": 9.997330957825021e-06, "loss": 0.4344, "step": 438 }, { "epoch": 0.02036178107606679, "grad_norm": 12.894128799438477, "learning_rate": 9.997306860407341e-06, "loss": 0.5099, "step": 439 }, { "epoch": 0.02040816326530612, "grad_norm": 8.857590675354004, "learning_rate": 9.997282654725645e-06, "loss": 0.402, "step": 440 }, { "epoch": 0.020454545454545454, "grad_norm": 9.031881332397461, "learning_rate": 9.997258340780455e-06, "loss": 0.4159, "step": 441 }, { "epoch": 0.020500927643784788, "grad_norm": 14.81790828704834, "learning_rate": 9.9972339185723e-06, "loss": 0.5193, "step": 442 }, { "epoch": 0.020547309833024118, "grad_norm": 16.07683563232422, "learning_rate": 9.997209388101707e-06, "loss": 0.5168, "step": 443 }, { "epoch": 0.020593692022263452, "grad_norm": 10.668745040893555, "learning_rate": 9.99718474936921e-06, "loss": 0.4428, "step": 444 }, { "epoch": 0.020640074211502782, "grad_norm": 18.621889114379883, "learning_rate": 9.997160002375338e-06, "loss": 0.5585, "step": 445 }, { "epoch": 0.020686456400742115, "grad_norm": 8.12794017791748, "learning_rate": 9.997135147120633e-06, "loss": 0.4615, "step": 446 }, { "epoch": 0.020732838589981446, "grad_norm": 5.926182746887207, "learning_rate": 9.99711018360563e-06, "loss": 0.3119, "step": 447 }, { "epoch": 0.02077922077922078, "grad_norm": 18.108600616455078, "learning_rate": 9.99708511183087e-06, "loss": 0.4182, "step": 448 }, { "epoch": 0.020825602968460113, "grad_norm": 14.037094116210938, "learning_rate": 9.997059931796897e-06, "loss": 0.4552, "step": 449 }, { "epoch": 0.020871985157699443, "grad_norm": 13.26927375793457, "learning_rate": 9.997034643504258e-06, "loss": 0.3549, "step": 450 }, { "epoch": 0.020918367346938777, "grad_norm": 15.737500190734863, "learning_rate": 9.997009246953497e-06, "loss": 0.3943, "step": 451 }, { "epoch": 0.020964749536178107, "grad_norm": 15.747722625732422, "learning_rate": 9.996983742145167e-06, "loss": 0.51, "step": 452 }, { "epoch": 0.02101113172541744, "grad_norm": 13.574549674987793, "learning_rate": 9.99695812907982e-06, "loss": 0.4029, "step": 453 }, { "epoch": 0.02105751391465677, "grad_norm": 5.155558109283447, "learning_rate": 9.996932407758012e-06, "loss": 0.2527, "step": 454 }, { "epoch": 0.021103896103896104, "grad_norm": 19.316267013549805, "learning_rate": 9.996906578180296e-06, "loss": 0.5069, "step": 455 }, { "epoch": 0.021150278293135438, "grad_norm": 7.984254360198975, "learning_rate": 9.996880640347234e-06, "loss": 0.4037, "step": 456 }, { "epoch": 0.021196660482374768, "grad_norm": 9.42639446258545, "learning_rate": 9.996854594259393e-06, "loss": 0.4262, "step": 457 }, { "epoch": 0.0212430426716141, "grad_norm": 18.78527069091797, "learning_rate": 9.996828439917329e-06, "loss": 0.4076, "step": 458 }, { "epoch": 0.02128942486085343, "grad_norm": 5.589349269866943, "learning_rate": 9.996802177321614e-06, "loss": 0.3992, "step": 459 }, { "epoch": 0.021335807050092765, "grad_norm": 16.680078506469727, "learning_rate": 9.996775806472816e-06, "loss": 0.5207, "step": 460 }, { "epoch": 0.021382189239332095, "grad_norm": 14.581684112548828, "learning_rate": 9.996749327371505e-06, "loss": 0.4545, "step": 461 }, { "epoch": 0.02142857142857143, "grad_norm": 12.732311248779297, "learning_rate": 9.996722740018255e-06, "loss": 0.3969, "step": 462 }, { "epoch": 0.021474953617810762, "grad_norm": 16.83061981201172, "learning_rate": 9.996696044413642e-06, "loss": 0.563, "step": 463 }, { "epoch": 0.021521335807050092, "grad_norm": 12.510784149169922, "learning_rate": 9.996669240558245e-06, "loss": 0.39, "step": 464 }, { "epoch": 0.021567717996289426, "grad_norm": 13.919502258300781, "learning_rate": 9.996642328452643e-06, "loss": 0.3743, "step": 465 }, { "epoch": 0.021614100185528756, "grad_norm": 21.372177124023438, "learning_rate": 9.996615308097424e-06, "loss": 0.4047, "step": 466 }, { "epoch": 0.02166048237476809, "grad_norm": 24.37531280517578, "learning_rate": 9.996588179493167e-06, "loss": 0.5668, "step": 467 }, { "epoch": 0.02170686456400742, "grad_norm": 10.380960464477539, "learning_rate": 9.996560942640464e-06, "loss": 0.493, "step": 468 }, { "epoch": 0.021753246753246754, "grad_norm": 10.565666198730469, "learning_rate": 9.996533597539901e-06, "loss": 0.4091, "step": 469 }, { "epoch": 0.021799628942486084, "grad_norm": 10.237813949584961, "learning_rate": 9.996506144192075e-06, "loss": 0.4081, "step": 470 }, { "epoch": 0.021846011131725417, "grad_norm": 15.6351957321167, "learning_rate": 9.99647858259758e-06, "loss": 0.4042, "step": 471 }, { "epoch": 0.02189239332096475, "grad_norm": 11.727354049682617, "learning_rate": 9.99645091275701e-06, "loss": 0.4863, "step": 472 }, { "epoch": 0.02193877551020408, "grad_norm": 13.934988021850586, "learning_rate": 9.996423134670967e-06, "loss": 0.5287, "step": 473 }, { "epoch": 0.021985157699443415, "grad_norm": 10.708784103393555, "learning_rate": 9.996395248340051e-06, "loss": 0.4513, "step": 474 }, { "epoch": 0.022031539888682745, "grad_norm": 14.386698722839355, "learning_rate": 9.996367253764868e-06, "loss": 0.3464, "step": 475 }, { "epoch": 0.02207792207792208, "grad_norm": 13.900192260742188, "learning_rate": 9.996339150946024e-06, "loss": 0.5366, "step": 476 }, { "epoch": 0.02212430426716141, "grad_norm": 12.127838134765625, "learning_rate": 9.996310939884128e-06, "loss": 0.4831, "step": 477 }, { "epoch": 0.022170686456400742, "grad_norm": 12.317657470703125, "learning_rate": 9.99628262057979e-06, "loss": 0.3565, "step": 478 }, { "epoch": 0.022217068645640076, "grad_norm": 8.326650619506836, "learning_rate": 9.996254193033625e-06, "loss": 0.4805, "step": 479 }, { "epoch": 0.022263450834879406, "grad_norm": 12.474386215209961, "learning_rate": 9.996225657246248e-06, "loss": 0.443, "step": 480 }, { "epoch": 0.02230983302411874, "grad_norm": 9.641541481018066, "learning_rate": 9.996197013218275e-06, "loss": 0.4209, "step": 481 }, { "epoch": 0.02235621521335807, "grad_norm": 12.268060684204102, "learning_rate": 9.996168260950333e-06, "loss": 0.4168, "step": 482 }, { "epoch": 0.022402597402597403, "grad_norm": 17.13990020751953, "learning_rate": 9.99613940044304e-06, "loss": 0.4545, "step": 483 }, { "epoch": 0.022448979591836733, "grad_norm": 12.269598960876465, "learning_rate": 9.99611043169702e-06, "loss": 0.3549, "step": 484 }, { "epoch": 0.022495361781076067, "grad_norm": 17.89301872253418, "learning_rate": 9.996081354712903e-06, "loss": 0.4391, "step": 485 }, { "epoch": 0.0225417439703154, "grad_norm": 18.883787155151367, "learning_rate": 9.99605216949132e-06, "loss": 0.3174, "step": 486 }, { "epoch": 0.02258812615955473, "grad_norm": 14.12934398651123, "learning_rate": 9.9960228760329e-06, "loss": 0.4598, "step": 487 }, { "epoch": 0.022634508348794064, "grad_norm": 20.61086654663086, "learning_rate": 9.99599347433828e-06, "loss": 0.5071, "step": 488 }, { "epoch": 0.022680890538033394, "grad_norm": 18.13511848449707, "learning_rate": 9.995963964408098e-06, "loss": 0.4067, "step": 489 }, { "epoch": 0.022727272727272728, "grad_norm": 15.059610366821289, "learning_rate": 9.995934346242991e-06, "loss": 0.4022, "step": 490 }, { "epoch": 0.022773654916512058, "grad_norm": 7.75626277923584, "learning_rate": 9.995904619843601e-06, "loss": 0.3735, "step": 491 }, { "epoch": 0.02282003710575139, "grad_norm": 17.870248794555664, "learning_rate": 9.995874785210573e-06, "loss": 0.5989, "step": 492 }, { "epoch": 0.022866419294990725, "grad_norm": 10.790718078613281, "learning_rate": 9.995844842344553e-06, "loss": 0.4733, "step": 493 }, { "epoch": 0.022912801484230055, "grad_norm": 6.733753681182861, "learning_rate": 9.99581479124619e-06, "loss": 0.3168, "step": 494 }, { "epoch": 0.02295918367346939, "grad_norm": 12.90187931060791, "learning_rate": 9.995784631916133e-06, "loss": 0.4726, "step": 495 }, { "epoch": 0.02300556586270872, "grad_norm": 8.824695587158203, "learning_rate": 9.995754364355037e-06, "loss": 0.5228, "step": 496 }, { "epoch": 0.023051948051948053, "grad_norm": 10.770858764648438, "learning_rate": 9.99572398856356e-06, "loss": 0.4192, "step": 497 }, { "epoch": 0.023098330241187383, "grad_norm": 9.161310195922852, "learning_rate": 9.995693504542355e-06, "loss": 0.3434, "step": 498 }, { "epoch": 0.023144712430426716, "grad_norm": 10.027313232421875, "learning_rate": 9.995662912292087e-06, "loss": 0.3593, "step": 499 }, { "epoch": 0.023191094619666047, "grad_norm": 9.69564151763916, "learning_rate": 9.995632211813415e-06, "loss": 0.4387, "step": 500 }, { "epoch": 0.02323747680890538, "grad_norm": 16.446680068969727, "learning_rate": 9.995601403107009e-06, "loss": 0.3991, "step": 501 }, { "epoch": 0.023283858998144714, "grad_norm": 7.468308925628662, "learning_rate": 9.995570486173531e-06, "loss": 0.3727, "step": 502 }, { "epoch": 0.023330241187384044, "grad_norm": 20.268829345703125, "learning_rate": 9.995539461013654e-06, "loss": 0.4834, "step": 503 }, { "epoch": 0.023376623376623377, "grad_norm": 13.176007270812988, "learning_rate": 9.99550832762805e-06, "loss": 0.473, "step": 504 }, { "epoch": 0.023423005565862708, "grad_norm": 15.586201667785645, "learning_rate": 9.995477086017392e-06, "loss": 0.3094, "step": 505 }, { "epoch": 0.02346938775510204, "grad_norm": 29.377565383911133, "learning_rate": 9.99544573618236e-06, "loss": 0.5329, "step": 506 }, { "epoch": 0.02351576994434137, "grad_norm": 22.84436798095703, "learning_rate": 9.995414278123628e-06, "loss": 0.4447, "step": 507 }, { "epoch": 0.023562152133580705, "grad_norm": 12.299846649169922, "learning_rate": 9.995382711841881e-06, "loss": 0.3868, "step": 508 }, { "epoch": 0.02360853432282004, "grad_norm": 14.411394119262695, "learning_rate": 9.995351037337804e-06, "loss": 0.4822, "step": 509 }, { "epoch": 0.02365491651205937, "grad_norm": 12.82713508605957, "learning_rate": 9.99531925461208e-06, "loss": 0.4948, "step": 510 }, { "epoch": 0.023701298701298702, "grad_norm": 11.944758415222168, "learning_rate": 9.9952873636654e-06, "loss": 0.4324, "step": 511 }, { "epoch": 0.023747680890538032, "grad_norm": 13.985873222351074, "learning_rate": 9.995255364498454e-06, "loss": 0.3997, "step": 512 }, { "epoch": 0.023794063079777366, "grad_norm": 13.756515502929688, "learning_rate": 9.995223257111935e-06, "loss": 0.494, "step": 513 }, { "epoch": 0.023840445269016696, "grad_norm": 11.292520523071289, "learning_rate": 9.995191041506538e-06, "loss": 0.4452, "step": 514 }, { "epoch": 0.02388682745825603, "grad_norm": 9.059032440185547, "learning_rate": 9.995158717682963e-06, "loss": 0.403, "step": 515 }, { "epoch": 0.023933209647495363, "grad_norm": 7.809190273284912, "learning_rate": 9.99512628564191e-06, "loss": 0.3812, "step": 516 }, { "epoch": 0.023979591836734693, "grad_norm": 14.084468841552734, "learning_rate": 9.995093745384079e-06, "loss": 0.4743, "step": 517 }, { "epoch": 0.024025974025974027, "grad_norm": 9.819110870361328, "learning_rate": 9.995061096910176e-06, "loss": 0.2726, "step": 518 }, { "epoch": 0.024072356215213357, "grad_norm": 11.324039459228516, "learning_rate": 9.99502834022091e-06, "loss": 0.3705, "step": 519 }, { "epoch": 0.02411873840445269, "grad_norm": 16.892650604248047, "learning_rate": 9.99499547531699e-06, "loss": 0.459, "step": 520 }, { "epoch": 0.02416512059369202, "grad_norm": 13.938953399658203, "learning_rate": 9.994962502199127e-06, "loss": 0.4644, "step": 521 }, { "epoch": 0.024211502782931354, "grad_norm": 18.024951934814453, "learning_rate": 9.994929420868036e-06, "loss": 0.3961, "step": 522 }, { "epoch": 0.024257884972170688, "grad_norm": 9.009638786315918, "learning_rate": 9.994896231324434e-06, "loss": 0.394, "step": 523 }, { "epoch": 0.024304267161410018, "grad_norm": 19.14435386657715, "learning_rate": 9.99486293356904e-06, "loss": 0.5224, "step": 524 }, { "epoch": 0.024350649350649352, "grad_norm": 9.587272644042969, "learning_rate": 9.994829527602575e-06, "loss": 0.2914, "step": 525 }, { "epoch": 0.024397031539888682, "grad_norm": 17.456771850585938, "learning_rate": 9.99479601342576e-06, "loss": 0.4573, "step": 526 }, { "epoch": 0.024443413729128016, "grad_norm": 8.389126777648926, "learning_rate": 9.994762391039327e-06, "loss": 0.4212, "step": 527 }, { "epoch": 0.024489795918367346, "grad_norm": 12.44912338256836, "learning_rate": 9.994728660443999e-06, "loss": 0.4215, "step": 528 }, { "epoch": 0.02453617810760668, "grad_norm": 12.084781646728516, "learning_rate": 9.994694821640512e-06, "loss": 0.4979, "step": 529 }, { "epoch": 0.02458256029684601, "grad_norm": 15.367430686950684, "learning_rate": 9.994660874629594e-06, "loss": 0.4926, "step": 530 }, { "epoch": 0.024628942486085343, "grad_norm": 11.93893814086914, "learning_rate": 9.994626819411984e-06, "loss": 0.3149, "step": 531 }, { "epoch": 0.024675324675324677, "grad_norm": 11.870102882385254, "learning_rate": 9.994592655988417e-06, "loss": 0.4665, "step": 532 }, { "epoch": 0.024721706864564007, "grad_norm": 12.897137641906738, "learning_rate": 9.994558384359634e-06, "loss": 0.4713, "step": 533 }, { "epoch": 0.02476808905380334, "grad_norm": 9.97575569152832, "learning_rate": 9.99452400452638e-06, "loss": 0.5078, "step": 534 }, { "epoch": 0.02481447124304267, "grad_norm": 8.825387954711914, "learning_rate": 9.994489516489396e-06, "loss": 0.444, "step": 535 }, { "epoch": 0.024860853432282004, "grad_norm": 9.033124923706055, "learning_rate": 9.994454920249433e-06, "loss": 0.5008, "step": 536 }, { "epoch": 0.024907235621521334, "grad_norm": 8.010178565979004, "learning_rate": 9.994420215807236e-06, "loss": 0.4505, "step": 537 }, { "epoch": 0.024953617810760668, "grad_norm": 12.437600135803223, "learning_rate": 9.99438540316356e-06, "loss": 0.3808, "step": 538 }, { "epoch": 0.025, "grad_norm": 11.359349250793457, "learning_rate": 9.99435048231916e-06, "loss": 0.5183, "step": 539 }, { "epoch": 0.02504638218923933, "grad_norm": 12.836058616638184, "learning_rate": 9.994315453274789e-06, "loss": 0.4866, "step": 540 }, { "epoch": 0.025092764378478665, "grad_norm": 11.59450626373291, "learning_rate": 9.994280316031208e-06, "loss": 0.2181, "step": 541 }, { "epoch": 0.025139146567717995, "grad_norm": 11.341219902038574, "learning_rate": 9.99424507058918e-06, "loss": 0.4528, "step": 542 }, { "epoch": 0.02518552875695733, "grad_norm": 16.884899139404297, "learning_rate": 9.994209716949466e-06, "loss": 0.6682, "step": 543 }, { "epoch": 0.02523191094619666, "grad_norm": 11.675948143005371, "learning_rate": 9.994174255112831e-06, "loss": 0.276, "step": 544 }, { "epoch": 0.025278293135435993, "grad_norm": 10.073548316955566, "learning_rate": 9.994138685080047e-06, "loss": 0.5035, "step": 545 }, { "epoch": 0.025324675324675326, "grad_norm": 6.844711780548096, "learning_rate": 9.994103006851881e-06, "loss": 0.4197, "step": 546 }, { "epoch": 0.025371057513914656, "grad_norm": 7.230904579162598, "learning_rate": 9.99406722042911e-06, "loss": 0.4242, "step": 547 }, { "epoch": 0.02541743970315399, "grad_norm": 10.018954277038574, "learning_rate": 9.994031325812503e-06, "loss": 0.4034, "step": 548 }, { "epoch": 0.02546382189239332, "grad_norm": 13.652670860290527, "learning_rate": 9.993995323002844e-06, "loss": 0.493, "step": 549 }, { "epoch": 0.025510204081632654, "grad_norm": 11.863456726074219, "learning_rate": 9.993959212000909e-06, "loss": 0.5011, "step": 550 }, { "epoch": 0.025556586270871984, "grad_norm": 8.024276733398438, "learning_rate": 9.993922992807483e-06, "loss": 0.4708, "step": 551 }, { "epoch": 0.025602968460111317, "grad_norm": 6.58811092376709, "learning_rate": 9.993886665423348e-06, "loss": 0.419, "step": 552 }, { "epoch": 0.02564935064935065, "grad_norm": 6.857876300811768, "learning_rate": 9.993850229849296e-06, "loss": 0.3728, "step": 553 }, { "epoch": 0.02569573283858998, "grad_norm": 6.044053077697754, "learning_rate": 9.99381368608611e-06, "loss": 0.3107, "step": 554 }, { "epoch": 0.025742115027829315, "grad_norm": 12.73462200164795, "learning_rate": 9.993777034134584e-06, "loss": 0.4956, "step": 555 }, { "epoch": 0.025788497217068645, "grad_norm": 8.504672050476074, "learning_rate": 9.993740273995512e-06, "loss": 0.4553, "step": 556 }, { "epoch": 0.02583487940630798, "grad_norm": 8.41345500946045, "learning_rate": 9.993703405669693e-06, "loss": 0.3715, "step": 557 }, { "epoch": 0.02588126159554731, "grad_norm": 10.607184410095215, "learning_rate": 9.993666429157922e-06, "loss": 0.2833, "step": 558 }, { "epoch": 0.025927643784786642, "grad_norm": 7.834974765777588, "learning_rate": 9.993629344461003e-06, "loss": 0.4235, "step": 559 }, { "epoch": 0.025974025974025976, "grad_norm": 10.577714920043945, "learning_rate": 9.993592151579736e-06, "loss": 0.3939, "step": 560 }, { "epoch": 0.026020408163265306, "grad_norm": 16.69390869140625, "learning_rate": 9.99355485051493e-06, "loss": 0.442, "step": 561 }, { "epoch": 0.02606679035250464, "grad_norm": 15.654104232788086, "learning_rate": 9.993517441267392e-06, "loss": 0.4389, "step": 562 }, { "epoch": 0.02611317254174397, "grad_norm": 10.259732246398926, "learning_rate": 9.993479923837934e-06, "loss": 0.445, "step": 563 }, { "epoch": 0.026159554730983303, "grad_norm": 10.195446014404297, "learning_rate": 9.993442298227365e-06, "loss": 0.3188, "step": 564 }, { "epoch": 0.026205936920222633, "grad_norm": 8.032925605773926, "learning_rate": 9.993404564436504e-06, "loss": 0.3971, "step": 565 }, { "epoch": 0.026252319109461967, "grad_norm": 10.978717803955078, "learning_rate": 9.993366722466166e-06, "loss": 0.4373, "step": 566 }, { "epoch": 0.026298701298701297, "grad_norm": 23.680706024169922, "learning_rate": 9.993328772317172e-06, "loss": 0.4768, "step": 567 }, { "epoch": 0.02634508348794063, "grad_norm": 17.50523567199707, "learning_rate": 9.993290713990343e-06, "loss": 0.5901, "step": 568 }, { "epoch": 0.026391465677179964, "grad_norm": 9.742980003356934, "learning_rate": 9.993252547486505e-06, "loss": 0.431, "step": 569 }, { "epoch": 0.026437847866419294, "grad_norm": 6.476775646209717, "learning_rate": 9.993214272806484e-06, "loss": 0.4359, "step": 570 }, { "epoch": 0.026484230055658628, "grad_norm": 20.474275588989258, "learning_rate": 9.99317588995111e-06, "loss": 0.3708, "step": 571 }, { "epoch": 0.026530612244897958, "grad_norm": 8.627716064453125, "learning_rate": 9.993137398921214e-06, "loss": 0.4816, "step": 572 }, { "epoch": 0.02657699443413729, "grad_norm": 6.348058700561523, "learning_rate": 9.993098799717628e-06, "loss": 0.489, "step": 573 }, { "epoch": 0.026623376623376622, "grad_norm": 19.524169921875, "learning_rate": 9.993060092341194e-06, "loss": 0.444, "step": 574 }, { "epoch": 0.026669758812615955, "grad_norm": 9.756953239440918, "learning_rate": 9.993021276792742e-06, "loss": 0.3158, "step": 575 }, { "epoch": 0.02671614100185529, "grad_norm": 17.339920043945312, "learning_rate": 9.992982353073122e-06, "loss": 0.3068, "step": 576 }, { "epoch": 0.02676252319109462, "grad_norm": 8.49893856048584, "learning_rate": 9.992943321183169e-06, "loss": 0.3602, "step": 577 }, { "epoch": 0.026808905380333953, "grad_norm": 24.471290588378906, "learning_rate": 9.992904181123735e-06, "loss": 0.3779, "step": 578 }, { "epoch": 0.026855287569573283, "grad_norm": 15.91001033782959, "learning_rate": 9.992864932895664e-06, "loss": 0.4455, "step": 579 }, { "epoch": 0.026901669758812616, "grad_norm": 17.615982055664062, "learning_rate": 9.992825576499808e-06, "loss": 0.5692, "step": 580 }, { "epoch": 0.026948051948051947, "grad_norm": 14.220132827758789, "learning_rate": 9.99278611193702e-06, "loss": 0.5432, "step": 581 }, { "epoch": 0.02699443413729128, "grad_norm": 11.56405258178711, "learning_rate": 9.992746539208153e-06, "loss": 0.399, "step": 582 }, { "epoch": 0.027040816326530614, "grad_norm": 15.66895580291748, "learning_rate": 9.992706858314067e-06, "loss": 0.4863, "step": 583 }, { "epoch": 0.027087198515769944, "grad_norm": 10.690546035766602, "learning_rate": 9.99266706925562e-06, "loss": 0.4374, "step": 584 }, { "epoch": 0.027133580705009278, "grad_norm": 9.170696258544922, "learning_rate": 9.992627172033674e-06, "loss": 0.3825, "step": 585 }, { "epoch": 0.027179962894248608, "grad_norm": 14.075667381286621, "learning_rate": 9.992587166649093e-06, "loss": 0.4512, "step": 586 }, { "epoch": 0.02722634508348794, "grad_norm": 9.495527267456055, "learning_rate": 9.992547053102745e-06, "loss": 0.3915, "step": 587 }, { "epoch": 0.02727272727272727, "grad_norm": 9.544137001037598, "learning_rate": 9.992506831395499e-06, "loss": 0.3407, "step": 588 }, { "epoch": 0.027319109461966605, "grad_norm": 13.509088516235352, "learning_rate": 9.992466501528227e-06, "loss": 0.5745, "step": 589 }, { "epoch": 0.02736549165120594, "grad_norm": 11.44873046875, "learning_rate": 9.9924260635018e-06, "loss": 0.3222, "step": 590 }, { "epoch": 0.02741187384044527, "grad_norm": 17.382095336914062, "learning_rate": 9.992385517317094e-06, "loss": 0.3629, "step": 591 }, { "epoch": 0.027458256029684602, "grad_norm": 7.359893321990967, "learning_rate": 9.99234486297499e-06, "loss": 0.4234, "step": 592 }, { "epoch": 0.027504638218923932, "grad_norm": 14.0462007522583, "learning_rate": 9.992304100476368e-06, "loss": 0.4207, "step": 593 }, { "epoch": 0.027551020408163266, "grad_norm": 4.875443935394287, "learning_rate": 9.99226322982211e-06, "loss": 0.2993, "step": 594 }, { "epoch": 0.027597402597402596, "grad_norm": 12.204375267028809, "learning_rate": 9.992222251013104e-06, "loss": 0.4032, "step": 595 }, { "epoch": 0.02764378478664193, "grad_norm": 14.028940200805664, "learning_rate": 9.992181164050235e-06, "loss": 0.3837, "step": 596 }, { "epoch": 0.02769016697588126, "grad_norm": 10.150957107543945, "learning_rate": 9.992139968934394e-06, "loss": 0.3738, "step": 597 }, { "epoch": 0.027736549165120594, "grad_norm": 13.797502517700195, "learning_rate": 9.992098665666474e-06, "loss": 0.2905, "step": 598 }, { "epoch": 0.027782931354359927, "grad_norm": 7.354369640350342, "learning_rate": 9.992057254247369e-06, "loss": 0.296, "step": 599 }, { "epoch": 0.027829313543599257, "grad_norm": 23.944326400756836, "learning_rate": 9.992015734677979e-06, "loss": 0.5173, "step": 600 }, { "epoch": 0.02787569573283859, "grad_norm": 7.5369181632995605, "learning_rate": 9.991974106959198e-06, "loss": 0.2932, "step": 601 }, { "epoch": 0.02792207792207792, "grad_norm": 32.88855743408203, "learning_rate": 9.991932371091932e-06, "loss": 0.5747, "step": 602 }, { "epoch": 0.027968460111317255, "grad_norm": 16.728864669799805, "learning_rate": 9.991890527077084e-06, "loss": 0.5085, "step": 603 }, { "epoch": 0.028014842300556585, "grad_norm": 26.773242950439453, "learning_rate": 9.991848574915562e-06, "loss": 0.436, "step": 604 }, { "epoch": 0.02806122448979592, "grad_norm": 15.864724159240723, "learning_rate": 9.991806514608272e-06, "loss": 0.5313, "step": 605 }, { "epoch": 0.028107606679035252, "grad_norm": 8.27059555053711, "learning_rate": 9.991764346156127e-06, "loss": 0.4048, "step": 606 }, { "epoch": 0.028153988868274582, "grad_norm": 16.1488037109375, "learning_rate": 9.991722069560041e-06, "loss": 0.32, "step": 607 }, { "epoch": 0.028200371057513916, "grad_norm": 15.515570640563965, "learning_rate": 9.991679684820929e-06, "loss": 0.4119, "step": 608 }, { "epoch": 0.028246753246753246, "grad_norm": 10.545952796936035, "learning_rate": 9.99163719193971e-06, "loss": 0.3386, "step": 609 }, { "epoch": 0.02829313543599258, "grad_norm": 16.754674911499023, "learning_rate": 9.991594590917302e-06, "loss": 0.4578, "step": 610 }, { "epoch": 0.02833951762523191, "grad_norm": 9.866851806640625, "learning_rate": 9.99155188175463e-06, "loss": 0.3846, "step": 611 }, { "epoch": 0.028385899814471243, "grad_norm": 9.596376419067383, "learning_rate": 9.991509064452623e-06, "loss": 0.4026, "step": 612 }, { "epoch": 0.028432282003710577, "grad_norm": 7.729685306549072, "learning_rate": 9.991466139012202e-06, "loss": 0.456, "step": 613 }, { "epoch": 0.028478664192949907, "grad_norm": 6.5927734375, "learning_rate": 9.991423105434301e-06, "loss": 0.4091, "step": 614 }, { "epoch": 0.02852504638218924, "grad_norm": 7.9781341552734375, "learning_rate": 9.99137996371985e-06, "loss": 0.2469, "step": 615 }, { "epoch": 0.02857142857142857, "grad_norm": 7.475320816040039, "learning_rate": 9.991336713869785e-06, "loss": 0.4097, "step": 616 }, { "epoch": 0.028617810760667904, "grad_norm": 7.90785551071167, "learning_rate": 9.991293355885043e-06, "loss": 0.4809, "step": 617 }, { "epoch": 0.028664192949907234, "grad_norm": 14.14836597442627, "learning_rate": 9.991249889766562e-06, "loss": 0.5563, "step": 618 }, { "epoch": 0.028710575139146568, "grad_norm": 6.848025798797607, "learning_rate": 9.991206315515287e-06, "loss": 0.338, "step": 619 }, { "epoch": 0.0287569573283859, "grad_norm": 6.614107131958008, "learning_rate": 9.991162633132157e-06, "loss": 0.3962, "step": 620 }, { "epoch": 0.02880333951762523, "grad_norm": 8.333148956298828, "learning_rate": 9.991118842618124e-06, "loss": 0.346, "step": 621 }, { "epoch": 0.028849721706864565, "grad_norm": 24.07065773010254, "learning_rate": 9.991074943974132e-06, "loss": 0.3637, "step": 622 }, { "epoch": 0.028896103896103895, "grad_norm": 9.248140335083008, "learning_rate": 9.991030937201134e-06, "loss": 0.5001, "step": 623 }, { "epoch": 0.02894248608534323, "grad_norm": 7.94632625579834, "learning_rate": 9.990986822300085e-06, "loss": 0.39, "step": 624 }, { "epoch": 0.02898886827458256, "grad_norm": 12.065841674804688, "learning_rate": 9.990942599271935e-06, "loss": 0.413, "step": 625 }, { "epoch": 0.029035250463821893, "grad_norm": 13.210033416748047, "learning_rate": 9.99089826811765e-06, "loss": 0.4077, "step": 626 }, { "epoch": 0.029081632653061223, "grad_norm": 5.425580024719238, "learning_rate": 9.990853828838186e-06, "loss": 0.428, "step": 627 }, { "epoch": 0.029128014842300556, "grad_norm": 6.882101535797119, "learning_rate": 9.990809281434504e-06, "loss": 0.354, "step": 628 }, { "epoch": 0.02917439703153989, "grad_norm": 12.667609214782715, "learning_rate": 9.990764625907572e-06, "loss": 0.4105, "step": 629 }, { "epoch": 0.02922077922077922, "grad_norm": 12.017352104187012, "learning_rate": 9.990719862258357e-06, "loss": 0.2113, "step": 630 }, { "epoch": 0.029267161410018554, "grad_norm": 10.831001281738281, "learning_rate": 9.99067499048783e-06, "loss": 0.4151, "step": 631 }, { "epoch": 0.029313543599257884, "grad_norm": 19.037355422973633, "learning_rate": 9.99063001059696e-06, "loss": 0.5292, "step": 632 }, { "epoch": 0.029359925788497217, "grad_norm": 14.773015975952148, "learning_rate": 9.990584922586724e-06, "loss": 0.397, "step": 633 }, { "epoch": 0.029406307977736548, "grad_norm": 10.940561294555664, "learning_rate": 9.990539726458096e-06, "loss": 0.4002, "step": 634 }, { "epoch": 0.02945269016697588, "grad_norm": 21.626678466796875, "learning_rate": 9.990494422212059e-06, "loss": 0.8116, "step": 635 }, { "epoch": 0.029499072356215215, "grad_norm": 12.339302062988281, "learning_rate": 9.990449009849591e-06, "loss": 0.4271, "step": 636 }, { "epoch": 0.029545454545454545, "grad_norm": 7.781222343444824, "learning_rate": 9.990403489371678e-06, "loss": 0.3753, "step": 637 }, { "epoch": 0.02959183673469388, "grad_norm": 14.954323768615723, "learning_rate": 9.990357860779307e-06, "loss": 0.3302, "step": 638 }, { "epoch": 0.02963821892393321, "grad_norm": 9.03328800201416, "learning_rate": 9.990312124073465e-06, "loss": 0.379, "step": 639 }, { "epoch": 0.029684601113172542, "grad_norm": 8.458647727966309, "learning_rate": 9.990266279255142e-06, "loss": 0.4556, "step": 640 }, { "epoch": 0.029730983302411872, "grad_norm": 16.327497482299805, "learning_rate": 9.990220326325331e-06, "loss": 0.548, "step": 641 }, { "epoch": 0.029777365491651206, "grad_norm": 8.636689186096191, "learning_rate": 9.990174265285029e-06, "loss": 0.3814, "step": 642 }, { "epoch": 0.02982374768089054, "grad_norm": 10.235777854919434, "learning_rate": 9.990128096135234e-06, "loss": 0.4296, "step": 643 }, { "epoch": 0.02987012987012987, "grad_norm": 9.520835876464844, "learning_rate": 9.990081818876946e-06, "loss": 0.4718, "step": 644 }, { "epoch": 0.029916512059369203, "grad_norm": 13.33792495727539, "learning_rate": 9.99003543351117e-06, "loss": 0.4724, "step": 645 }, { "epoch": 0.029962894248608533, "grad_norm": 7.5005412101745605, "learning_rate": 9.989988940038904e-06, "loss": 0.4087, "step": 646 }, { "epoch": 0.030009276437847867, "grad_norm": 7.611462593078613, "learning_rate": 9.989942338461163e-06, "loss": 0.381, "step": 647 }, { "epoch": 0.030055658627087197, "grad_norm": 16.582359313964844, "learning_rate": 9.989895628778952e-06, "loss": 0.5083, "step": 648 }, { "epoch": 0.03010204081632653, "grad_norm": 8.37784194946289, "learning_rate": 9.989848810993283e-06, "loss": 0.4505, "step": 649 }, { "epoch": 0.030148423005565864, "grad_norm": 6.933984279632568, "learning_rate": 9.989801885105172e-06, "loss": 0.4094, "step": 650 }, { "epoch": 0.030194805194805194, "grad_norm": 10.425581932067871, "learning_rate": 9.989754851115636e-06, "loss": 0.3251, "step": 651 }, { "epoch": 0.030241187384044528, "grad_norm": 8.51232624053955, "learning_rate": 9.98970770902569e-06, "loss": 0.2479, "step": 652 }, { "epoch": 0.030287569573283858, "grad_norm": 10.4830961227417, "learning_rate": 9.989660458836361e-06, "loss": 0.3392, "step": 653 }, { "epoch": 0.030333951762523192, "grad_norm": 7.811337947845459, "learning_rate": 9.98961310054867e-06, "loss": 0.3723, "step": 654 }, { "epoch": 0.030380333951762522, "grad_norm": 12.280510902404785, "learning_rate": 9.989565634163641e-06, "loss": 0.4311, "step": 655 }, { "epoch": 0.030426716141001856, "grad_norm": 16.44389533996582, "learning_rate": 9.989518059682306e-06, "loss": 0.454, "step": 656 }, { "epoch": 0.03047309833024119, "grad_norm": 7.389449596405029, "learning_rate": 9.989470377105693e-06, "loss": 0.3688, "step": 657 }, { "epoch": 0.03051948051948052, "grad_norm": 7.164580345153809, "learning_rate": 9.989422586434838e-06, "loss": 0.3496, "step": 658 }, { "epoch": 0.030565862708719853, "grad_norm": 10.180065155029297, "learning_rate": 9.989374687670773e-06, "loss": 0.3626, "step": 659 }, { "epoch": 0.030612244897959183, "grad_norm": 18.006481170654297, "learning_rate": 9.989326680814534e-06, "loss": 0.4991, "step": 660 }, { "epoch": 0.030658627087198517, "grad_norm": 16.26552963256836, "learning_rate": 9.989278565867168e-06, "loss": 0.4181, "step": 661 }, { "epoch": 0.030705009276437847, "grad_norm": 4.82702112197876, "learning_rate": 9.989230342829712e-06, "loss": 0.3446, "step": 662 }, { "epoch": 0.03075139146567718, "grad_norm": 17.144533157348633, "learning_rate": 9.989182011703212e-06, "loss": 0.493, "step": 663 }, { "epoch": 0.03079777365491651, "grad_norm": 11.673808097839355, "learning_rate": 9.989133572488716e-06, "loss": 0.472, "step": 664 }, { "epoch": 0.030844155844155844, "grad_norm": 20.05531883239746, "learning_rate": 9.989085025187273e-06, "loss": 0.4808, "step": 665 }, { "epoch": 0.030890538033395178, "grad_norm": 21.0502986907959, "learning_rate": 9.989036369799933e-06, "loss": 0.4969, "step": 666 }, { "epoch": 0.030936920222634508, "grad_norm": 12.237833976745605, "learning_rate": 9.98898760632775e-06, "loss": 0.4013, "step": 667 }, { "epoch": 0.03098330241187384, "grad_norm": 19.753517150878906, "learning_rate": 9.988938734771785e-06, "loss": 0.507, "step": 668 }, { "epoch": 0.03102968460111317, "grad_norm": 14.473896980285645, "learning_rate": 9.988889755133093e-06, "loss": 0.3514, "step": 669 }, { "epoch": 0.031076066790352505, "grad_norm": 14.682684898376465, "learning_rate": 9.988840667412736e-06, "loss": 0.4613, "step": 670 }, { "epoch": 0.031122448979591835, "grad_norm": 17.04216766357422, "learning_rate": 9.988791471611776e-06, "loss": 0.4498, "step": 671 }, { "epoch": 0.03116883116883117, "grad_norm": 14.268298149108887, "learning_rate": 9.988742167731282e-06, "loss": 0.5449, "step": 672 }, { "epoch": 0.031215213358070502, "grad_norm": 10.340070724487305, "learning_rate": 9.98869275577232e-06, "loss": 0.3898, "step": 673 }, { "epoch": 0.03126159554730983, "grad_norm": 14.324050903320312, "learning_rate": 9.988643235735958e-06, "loss": 0.3788, "step": 674 }, { "epoch": 0.03130797773654916, "grad_norm": 8.30114459991455, "learning_rate": 9.988593607623273e-06, "loss": 0.4787, "step": 675 }, { "epoch": 0.0313543599257885, "grad_norm": 8.05694580078125, "learning_rate": 9.988543871435342e-06, "loss": 0.308, "step": 676 }, { "epoch": 0.03140074211502783, "grad_norm": 6.547535419464111, "learning_rate": 9.988494027173235e-06, "loss": 0.3667, "step": 677 }, { "epoch": 0.03144712430426716, "grad_norm": 7.563650131225586, "learning_rate": 9.988444074838037e-06, "loss": 0.3124, "step": 678 }, { "epoch": 0.03149350649350649, "grad_norm": 9.088653564453125, "learning_rate": 9.988394014430829e-06, "loss": 0.4622, "step": 679 }, { "epoch": 0.03153988868274583, "grad_norm": 11.949199676513672, "learning_rate": 9.988343845952697e-06, "loss": 0.4398, "step": 680 }, { "epoch": 0.03158627087198516, "grad_norm": 16.407642364501953, "learning_rate": 9.988293569404725e-06, "loss": 0.2428, "step": 681 }, { "epoch": 0.03163265306122449, "grad_norm": 12.607670783996582, "learning_rate": 9.988243184788006e-06, "loss": 0.4833, "step": 682 }, { "epoch": 0.031679035250463825, "grad_norm": 11.433900833129883, "learning_rate": 9.988192692103628e-06, "loss": 0.4283, "step": 683 }, { "epoch": 0.031725417439703155, "grad_norm": 13.964388847351074, "learning_rate": 9.988142091352686e-06, "loss": 0.3529, "step": 684 }, { "epoch": 0.031771799628942485, "grad_norm": 8.083251953125, "learning_rate": 9.988091382536278e-06, "loss": 0.3646, "step": 685 }, { "epoch": 0.031818181818181815, "grad_norm": 13.005758285522461, "learning_rate": 9.9880405656555e-06, "loss": 0.5031, "step": 686 }, { "epoch": 0.03186456400742115, "grad_norm": 12.25972843170166, "learning_rate": 9.987989640711456e-06, "loss": 0.3431, "step": 687 }, { "epoch": 0.03191094619666048, "grad_norm": 8.441509246826172, "learning_rate": 9.987938607705244e-06, "loss": 0.4754, "step": 688 }, { "epoch": 0.03195732838589981, "grad_norm": 11.46064567565918, "learning_rate": 9.987887466637975e-06, "loss": 0.396, "step": 689 }, { "epoch": 0.03200371057513915, "grad_norm": 7.133488178253174, "learning_rate": 9.987836217510754e-06, "loss": 0.3709, "step": 690 }, { "epoch": 0.03205009276437848, "grad_norm": 7.304541110992432, "learning_rate": 9.987784860324692e-06, "loss": 0.4561, "step": 691 }, { "epoch": 0.03209647495361781, "grad_norm": 7.729724407196045, "learning_rate": 9.987733395080903e-06, "loss": 0.3909, "step": 692 }, { "epoch": 0.03214285714285714, "grad_norm": 7.348804473876953, "learning_rate": 9.987681821780502e-06, "loss": 0.3273, "step": 693 }, { "epoch": 0.03218923933209648, "grad_norm": 8.590633392333984, "learning_rate": 9.987630140424603e-06, "loss": 0.4068, "step": 694 }, { "epoch": 0.03223562152133581, "grad_norm": 9.752640724182129, "learning_rate": 9.987578351014327e-06, "loss": 0.4357, "step": 695 }, { "epoch": 0.03228200371057514, "grad_norm": 13.699004173278809, "learning_rate": 9.987526453550798e-06, "loss": 0.5252, "step": 696 }, { "epoch": 0.032328385899814474, "grad_norm": 17.532747268676758, "learning_rate": 9.987474448035138e-06, "loss": 0.5606, "step": 697 }, { "epoch": 0.032374768089053804, "grad_norm": 10.457918167114258, "learning_rate": 9.987422334468475e-06, "loss": 0.3678, "step": 698 }, { "epoch": 0.032421150278293134, "grad_norm": 12.378833770751953, "learning_rate": 9.98737011285194e-06, "loss": 0.4404, "step": 699 }, { "epoch": 0.032467532467532464, "grad_norm": 13.864004135131836, "learning_rate": 9.987317783186658e-06, "loss": 0.4492, "step": 700 }, { "epoch": 0.0325139146567718, "grad_norm": 13.651908874511719, "learning_rate": 9.98726534547377e-06, "loss": 0.3766, "step": 701 }, { "epoch": 0.03256029684601113, "grad_norm": 11.109723091125488, "learning_rate": 9.987212799714408e-06, "loss": 0.4904, "step": 702 }, { "epoch": 0.03260667903525046, "grad_norm": 12.462272644042969, "learning_rate": 9.987160145909711e-06, "loss": 0.3789, "step": 703 }, { "epoch": 0.0326530612244898, "grad_norm": 6.510255813598633, "learning_rate": 9.987107384060819e-06, "loss": 0.3476, "step": 704 }, { "epoch": 0.03269944341372913, "grad_norm": 6.871153831481934, "learning_rate": 9.987054514168878e-06, "loss": 0.4167, "step": 705 }, { "epoch": 0.03274582560296846, "grad_norm": 6.45035982131958, "learning_rate": 9.98700153623503e-06, "loss": 0.3281, "step": 706 }, { "epoch": 0.03279220779220779, "grad_norm": 11.014588356018066, "learning_rate": 9.986948450260423e-06, "loss": 0.4181, "step": 707 }, { "epoch": 0.032838589981447126, "grad_norm": 7.096123695373535, "learning_rate": 9.98689525624621e-06, "loss": 0.2598, "step": 708 }, { "epoch": 0.032884972170686456, "grad_norm": 16.792299270629883, "learning_rate": 9.98684195419354e-06, "loss": 0.2617, "step": 709 }, { "epoch": 0.03293135435992579, "grad_norm": 12.586376190185547, "learning_rate": 9.986788544103572e-06, "loss": 0.403, "step": 710 }, { "epoch": 0.032977736549165124, "grad_norm": 22.328882217407227, "learning_rate": 9.986735025977458e-06, "loss": 0.5201, "step": 711 }, { "epoch": 0.033024118738404454, "grad_norm": 21.296710968017578, "learning_rate": 9.98668139981636e-06, "loss": 0.6097, "step": 712 }, { "epoch": 0.033070500927643784, "grad_norm": 12.344807624816895, "learning_rate": 9.986627665621442e-06, "loss": 0.39, "step": 713 }, { "epoch": 0.033116883116883114, "grad_norm": 13.435027122497559, "learning_rate": 9.986573823393863e-06, "loss": 0.4444, "step": 714 }, { "epoch": 0.03316326530612245, "grad_norm": 10.54784870147705, "learning_rate": 9.986519873134793e-06, "loss": 0.4374, "step": 715 }, { "epoch": 0.03320964749536178, "grad_norm": 13.160649299621582, "learning_rate": 9.986465814845401e-06, "loss": 0.3572, "step": 716 }, { "epoch": 0.03325602968460111, "grad_norm": 13.856224060058594, "learning_rate": 9.986411648526858e-06, "loss": 0.3967, "step": 717 }, { "epoch": 0.03330241187384045, "grad_norm": 11.422259330749512, "learning_rate": 9.986357374180334e-06, "loss": 0.4199, "step": 718 }, { "epoch": 0.03334879406307978, "grad_norm": 9.083560943603516, "learning_rate": 9.986302991807009e-06, "loss": 0.3703, "step": 719 }, { "epoch": 0.03339517625231911, "grad_norm": 14.241143226623535, "learning_rate": 9.986248501408059e-06, "loss": 0.5112, "step": 720 }, { "epoch": 0.03344155844155844, "grad_norm": 9.334636688232422, "learning_rate": 9.986193902984666e-06, "loss": 0.3714, "step": 721 }, { "epoch": 0.033487940630797776, "grad_norm": 8.476656913757324, "learning_rate": 9.986139196538011e-06, "loss": 0.3355, "step": 722 }, { "epoch": 0.033534322820037106, "grad_norm": 16.219038009643555, "learning_rate": 9.986084382069282e-06, "loss": 0.4977, "step": 723 }, { "epoch": 0.033580705009276436, "grad_norm": 15.324316024780273, "learning_rate": 9.986029459579662e-06, "loss": 0.5268, "step": 724 }, { "epoch": 0.03362708719851577, "grad_norm": 8.332223892211914, "learning_rate": 9.985974429070346e-06, "loss": 0.4399, "step": 725 }, { "epoch": 0.0336734693877551, "grad_norm": 13.432218551635742, "learning_rate": 9.985919290542524e-06, "loss": 0.3489, "step": 726 }, { "epoch": 0.033719851576994433, "grad_norm": 13.483512878417969, "learning_rate": 9.985864043997389e-06, "loss": 0.4603, "step": 727 }, { "epoch": 0.033766233766233764, "grad_norm": 10.61400032043457, "learning_rate": 9.98580868943614e-06, "loss": 0.4621, "step": 728 }, { "epoch": 0.0338126159554731, "grad_norm": 8.620793342590332, "learning_rate": 9.985753226859975e-06, "loss": 0.3993, "step": 729 }, { "epoch": 0.03385899814471243, "grad_norm": 6.562267303466797, "learning_rate": 9.985697656270098e-06, "loss": 0.3441, "step": 730 }, { "epoch": 0.03390538033395176, "grad_norm": 10.874139785766602, "learning_rate": 9.985641977667708e-06, "loss": 0.4198, "step": 731 }, { "epoch": 0.0339517625231911, "grad_norm": 17.307632446289062, "learning_rate": 9.985586191054015e-06, "loss": 0.5495, "step": 732 }, { "epoch": 0.03399814471243043, "grad_norm": 7.928165912628174, "learning_rate": 9.985530296430229e-06, "loss": 0.3654, "step": 733 }, { "epoch": 0.03404452690166976, "grad_norm": 13.603765487670898, "learning_rate": 9.985474293797555e-06, "loss": 0.444, "step": 734 }, { "epoch": 0.03409090909090909, "grad_norm": 8.147464752197266, "learning_rate": 9.985418183157212e-06, "loss": 0.23, "step": 735 }, { "epoch": 0.034137291280148425, "grad_norm": 15.2279691696167, "learning_rate": 9.985361964510414e-06, "loss": 0.5232, "step": 736 }, { "epoch": 0.034183673469387756, "grad_norm": 8.874156951904297, "learning_rate": 9.985305637858377e-06, "loss": 0.3345, "step": 737 }, { "epoch": 0.034230055658627086, "grad_norm": 7.186215877532959, "learning_rate": 9.985249203202324e-06, "loss": 0.2815, "step": 738 }, { "epoch": 0.034276437847866416, "grad_norm": 14.20744514465332, "learning_rate": 9.985192660543477e-06, "loss": 0.3378, "step": 739 }, { "epoch": 0.03432282003710575, "grad_norm": 7.889177322387695, "learning_rate": 9.98513600988306e-06, "loss": 0.4459, "step": 740 }, { "epoch": 0.03436920222634508, "grad_norm": 9.669413566589355, "learning_rate": 9.985079251222301e-06, "loss": 0.316, "step": 741 }, { "epoch": 0.03441558441558441, "grad_norm": 14.516461372375488, "learning_rate": 9.985022384562428e-06, "loss": 0.5197, "step": 742 }, { "epoch": 0.03446196660482375, "grad_norm": 22.575439453125, "learning_rate": 9.984965409904677e-06, "loss": 0.3345, "step": 743 }, { "epoch": 0.03450834879406308, "grad_norm": 9.005615234375, "learning_rate": 9.984908327250278e-06, "loss": 0.3322, "step": 744 }, { "epoch": 0.03455473098330241, "grad_norm": 4.999899864196777, "learning_rate": 9.984851136600469e-06, "loss": 0.3064, "step": 745 }, { "epoch": 0.03460111317254174, "grad_norm": 9.446094512939453, "learning_rate": 9.98479383795649e-06, "loss": 0.3797, "step": 746 }, { "epoch": 0.03464749536178108, "grad_norm": 7.272648334503174, "learning_rate": 9.984736431319581e-06, "loss": 0.3927, "step": 747 }, { "epoch": 0.03469387755102041, "grad_norm": 21.25323486328125, "learning_rate": 9.984678916690986e-06, "loss": 0.6446, "step": 748 }, { "epoch": 0.03474025974025974, "grad_norm": 8.403424263000488, "learning_rate": 9.984621294071952e-06, "loss": 0.3173, "step": 749 }, { "epoch": 0.034786641929499075, "grad_norm": 9.938349723815918, "learning_rate": 9.984563563463728e-06, "loss": 0.3753, "step": 750 }, { "epoch": 0.034833024118738405, "grad_norm": 11.755522727966309, "learning_rate": 9.984505724867562e-06, "loss": 0.3555, "step": 751 }, { "epoch": 0.034879406307977735, "grad_norm": 4.977782249450684, "learning_rate": 9.98444777828471e-06, "loss": 0.4241, "step": 752 }, { "epoch": 0.034925788497217065, "grad_norm": 4.052342414855957, "learning_rate": 9.984389723716423e-06, "loss": 0.304, "step": 753 }, { "epoch": 0.0349721706864564, "grad_norm": 14.697955131530762, "learning_rate": 9.984331561163964e-06, "loss": 0.4626, "step": 754 }, { "epoch": 0.03501855287569573, "grad_norm": 27.53087615966797, "learning_rate": 9.984273290628591e-06, "loss": 0.5453, "step": 755 }, { "epoch": 0.03506493506493506, "grad_norm": 12.351605415344238, "learning_rate": 9.984214912111564e-06, "loss": 0.2931, "step": 756 }, { "epoch": 0.0351113172541744, "grad_norm": 18.61176872253418, "learning_rate": 9.984156425614152e-06, "loss": 0.6313, "step": 757 }, { "epoch": 0.03515769944341373, "grad_norm": 10.652955055236816, "learning_rate": 9.984097831137618e-06, "loss": 0.4005, "step": 758 }, { "epoch": 0.03520408163265306, "grad_norm": 12.417840957641602, "learning_rate": 9.984039128683236e-06, "loss": 0.3914, "step": 759 }, { "epoch": 0.03525046382189239, "grad_norm": 11.649137496948242, "learning_rate": 9.983980318252274e-06, "loss": 0.5123, "step": 760 }, { "epoch": 0.03529684601113173, "grad_norm": 15.802209854125977, "learning_rate": 9.983921399846006e-06, "loss": 0.4435, "step": 761 }, { "epoch": 0.03534322820037106, "grad_norm": 9.3419771194458, "learning_rate": 9.983862373465709e-06, "loss": 0.3611, "step": 762 }, { "epoch": 0.03538961038961039, "grad_norm": 10.945963859558105, "learning_rate": 9.983803239112663e-06, "loss": 0.4404, "step": 763 }, { "epoch": 0.035435992578849725, "grad_norm": 17.470632553100586, "learning_rate": 9.98374399678815e-06, "loss": 0.3825, "step": 764 }, { "epoch": 0.035482374768089055, "grad_norm": 12.076683044433594, "learning_rate": 9.983684646493452e-06, "loss": 0.4186, "step": 765 }, { "epoch": 0.035528756957328385, "grad_norm": 15.799609184265137, "learning_rate": 9.983625188229852e-06, "loss": 0.5676, "step": 766 }, { "epoch": 0.035575139146567715, "grad_norm": 8.955639839172363, "learning_rate": 9.983565621998644e-06, "loss": 0.5227, "step": 767 }, { "epoch": 0.03562152133580705, "grad_norm": 7.495518207550049, "learning_rate": 9.983505947801115e-06, "loss": 0.3713, "step": 768 }, { "epoch": 0.03566790352504638, "grad_norm": 25.122461318969727, "learning_rate": 9.983446165638557e-06, "loss": 0.6823, "step": 769 }, { "epoch": 0.03571428571428571, "grad_norm": 9.902099609375, "learning_rate": 9.983386275512265e-06, "loss": 0.3778, "step": 770 }, { "epoch": 0.03576066790352505, "grad_norm": 14.791496276855469, "learning_rate": 9.98332627742354e-06, "loss": 0.4302, "step": 771 }, { "epoch": 0.03580705009276438, "grad_norm": 13.415614128112793, "learning_rate": 9.98326617137368e-06, "loss": 0.4886, "step": 772 }, { "epoch": 0.03585343228200371, "grad_norm": 15.086644172668457, "learning_rate": 9.983205957363986e-06, "loss": 0.5212, "step": 773 }, { "epoch": 0.03589981447124304, "grad_norm": 7.756099700927734, "learning_rate": 9.983145635395764e-06, "loss": 0.4227, "step": 774 }, { "epoch": 0.03594619666048238, "grad_norm": 16.58232307434082, "learning_rate": 9.983085205470318e-06, "loss": 0.4908, "step": 775 }, { "epoch": 0.03599257884972171, "grad_norm": 12.061488151550293, "learning_rate": 9.983024667588961e-06, "loss": 0.3828, "step": 776 }, { "epoch": 0.03603896103896104, "grad_norm": 18.284639358520508, "learning_rate": 9.982964021753003e-06, "loss": 0.4115, "step": 777 }, { "epoch": 0.036085343228200374, "grad_norm": 8.87034797668457, "learning_rate": 9.982903267963756e-06, "loss": 0.4728, "step": 778 }, { "epoch": 0.036131725417439704, "grad_norm": 9.64841365814209, "learning_rate": 9.98284240622254e-06, "loss": 0.3773, "step": 779 }, { "epoch": 0.036178107606679034, "grad_norm": 13.15312385559082, "learning_rate": 9.982781436530669e-06, "loss": 0.3847, "step": 780 }, { "epoch": 0.036224489795918365, "grad_norm": 7.8866119384765625, "learning_rate": 9.982720358889469e-06, "loss": 0.3372, "step": 781 }, { "epoch": 0.0362708719851577, "grad_norm": 6.653123378753662, "learning_rate": 9.982659173300259e-06, "loss": 0.3685, "step": 782 }, { "epoch": 0.03631725417439703, "grad_norm": 5.536250114440918, "learning_rate": 9.982597879764367e-06, "loss": 0.2369, "step": 783 }, { "epoch": 0.03636363636363636, "grad_norm": 9.016667366027832, "learning_rate": 9.982536478283118e-06, "loss": 0.2934, "step": 784 }, { "epoch": 0.0364100185528757, "grad_norm": 14.111895561218262, "learning_rate": 9.982474968857846e-06, "loss": 0.5415, "step": 785 }, { "epoch": 0.03645640074211503, "grad_norm": 6.455445766448975, "learning_rate": 9.98241335148988e-06, "loss": 0.3394, "step": 786 }, { "epoch": 0.03650278293135436, "grad_norm": 22.32430648803711, "learning_rate": 9.98235162618056e-06, "loss": 0.4612, "step": 787 }, { "epoch": 0.03654916512059369, "grad_norm": 17.44993782043457, "learning_rate": 9.982289792931217e-06, "loss": 0.4898, "step": 788 }, { "epoch": 0.036595547309833026, "grad_norm": 17.878990173339844, "learning_rate": 9.982227851743195e-06, "loss": 0.5883, "step": 789 }, { "epoch": 0.036641929499072357, "grad_norm": 17.86383819580078, "learning_rate": 9.982165802617834e-06, "loss": 0.4562, "step": 790 }, { "epoch": 0.03668831168831169, "grad_norm": 14.043179512023926, "learning_rate": 9.98210364555648e-06, "loss": 0.4172, "step": 791 }, { "epoch": 0.036734693877551024, "grad_norm": 11.064116477966309, "learning_rate": 9.982041380560476e-06, "loss": 0.3379, "step": 792 }, { "epoch": 0.036781076066790354, "grad_norm": 9.865928649902344, "learning_rate": 9.981979007631173e-06, "loss": 0.4255, "step": 793 }, { "epoch": 0.036827458256029684, "grad_norm": 8.070916175842285, "learning_rate": 9.981916526769924e-06, "loss": 0.4522, "step": 794 }, { "epoch": 0.036873840445269014, "grad_norm": 7.384469985961914, "learning_rate": 9.981853937978082e-06, "loss": 0.4544, "step": 795 }, { "epoch": 0.03692022263450835, "grad_norm": 9.083176612854004, "learning_rate": 9.981791241257001e-06, "loss": 0.4028, "step": 796 }, { "epoch": 0.03696660482374768, "grad_norm": 10.485040664672852, "learning_rate": 9.98172843660804e-06, "loss": 0.3709, "step": 797 }, { "epoch": 0.03701298701298701, "grad_norm": 15.5316801071167, "learning_rate": 9.98166552403256e-06, "loss": 0.4413, "step": 798 }, { "epoch": 0.03705936920222635, "grad_norm": 12.758498191833496, "learning_rate": 9.981602503531924e-06, "loss": 0.3243, "step": 799 }, { "epoch": 0.03710575139146568, "grad_norm": 8.063529968261719, "learning_rate": 9.981539375107498e-06, "loss": 0.3361, "step": 800 }, { "epoch": 0.03715213358070501, "grad_norm": 14.091282844543457, "learning_rate": 9.981476138760648e-06, "loss": 0.4222, "step": 801 }, { "epoch": 0.03719851576994434, "grad_norm": 16.56045150756836, "learning_rate": 9.981412794492746e-06, "loss": 0.5919, "step": 802 }, { "epoch": 0.037244897959183676, "grad_norm": 11.067214965820312, "learning_rate": 9.981349342305163e-06, "loss": 0.3756, "step": 803 }, { "epoch": 0.037291280148423006, "grad_norm": 10.722183227539062, "learning_rate": 9.981285782199273e-06, "loss": 0.436, "step": 804 }, { "epoch": 0.037337662337662336, "grad_norm": 19.771745681762695, "learning_rate": 9.981222114176455e-06, "loss": 0.4176, "step": 805 }, { "epoch": 0.037384044526901666, "grad_norm": 9.836432456970215, "learning_rate": 9.981158338238088e-06, "loss": 0.4704, "step": 806 }, { "epoch": 0.037430426716141, "grad_norm": 10.770308494567871, "learning_rate": 9.98109445438555e-06, "loss": 0.317, "step": 807 }, { "epoch": 0.037476808905380334, "grad_norm": 10.054449081420898, "learning_rate": 9.98103046262023e-06, "loss": 0.4386, "step": 808 }, { "epoch": 0.037523191094619664, "grad_norm": 9.459250450134277, "learning_rate": 9.980966362943511e-06, "loss": 0.4599, "step": 809 }, { "epoch": 0.037569573283859, "grad_norm": 8.186063766479492, "learning_rate": 9.980902155356785e-06, "loss": 0.3367, "step": 810 }, { "epoch": 0.03761595547309833, "grad_norm": 10.644017219543457, "learning_rate": 9.980837839861438e-06, "loss": 0.4956, "step": 811 }, { "epoch": 0.03766233766233766, "grad_norm": 6.957908630371094, "learning_rate": 9.980773416458869e-06, "loss": 0.4071, "step": 812 }, { "epoch": 0.03770871985157699, "grad_norm": 3.8691039085388184, "learning_rate": 9.98070888515047e-06, "loss": 0.3096, "step": 813 }, { "epoch": 0.03775510204081633, "grad_norm": 6.411385536193848, "learning_rate": 9.98064424593764e-06, "loss": 0.3025, "step": 814 }, { "epoch": 0.03780148423005566, "grad_norm": 8.320751190185547, "learning_rate": 9.980579498821778e-06, "loss": 0.3716, "step": 815 }, { "epoch": 0.03784786641929499, "grad_norm": 16.830276489257812, "learning_rate": 9.98051464380429e-06, "loss": 0.3796, "step": 816 }, { "epoch": 0.037894248608534326, "grad_norm": 16.912853240966797, "learning_rate": 9.980449680886578e-06, "loss": 0.6815, "step": 817 }, { "epoch": 0.037940630797773656, "grad_norm": 11.99659252166748, "learning_rate": 9.980384610070052e-06, "loss": 0.4636, "step": 818 }, { "epoch": 0.037987012987012986, "grad_norm": 10.561948776245117, "learning_rate": 9.98031943135612e-06, "loss": 0.4388, "step": 819 }, { "epoch": 0.038033395176252316, "grad_norm": 12.384515762329102, "learning_rate": 9.980254144746195e-06, "loss": 0.2675, "step": 820 }, { "epoch": 0.03807977736549165, "grad_norm": 13.1101655960083, "learning_rate": 9.98018875024169e-06, "loss": 0.4974, "step": 821 }, { "epoch": 0.03812615955473098, "grad_norm": 7.316015243530273, "learning_rate": 9.980123247844021e-06, "loss": 0.4209, "step": 822 }, { "epoch": 0.03817254174397031, "grad_norm": 10.31053352355957, "learning_rate": 9.98005763755461e-06, "loss": 0.4484, "step": 823 }, { "epoch": 0.03821892393320965, "grad_norm": 13.817811965942383, "learning_rate": 9.979991919374877e-06, "loss": 0.477, "step": 824 }, { "epoch": 0.03826530612244898, "grad_norm": 13.026847839355469, "learning_rate": 9.979926093306246e-06, "loss": 0.4462, "step": 825 }, { "epoch": 0.03831168831168831, "grad_norm": 11.037748336791992, "learning_rate": 9.979860159350141e-06, "loss": 0.4862, "step": 826 }, { "epoch": 0.03835807050092764, "grad_norm": 6.956785202026367, "learning_rate": 9.979794117507996e-06, "loss": 0.4655, "step": 827 }, { "epoch": 0.03840445269016698, "grad_norm": 9.853188514709473, "learning_rate": 9.979727967781234e-06, "loss": 0.4015, "step": 828 }, { "epoch": 0.03845083487940631, "grad_norm": 5.6279497146606445, "learning_rate": 9.979661710171294e-06, "loss": 0.3578, "step": 829 }, { "epoch": 0.03849721706864564, "grad_norm": 8.551844596862793, "learning_rate": 9.979595344679609e-06, "loss": 0.405, "step": 830 }, { "epoch": 0.038543599257884975, "grad_norm": 12.510709762573242, "learning_rate": 9.979528871307617e-06, "loss": 0.6898, "step": 831 }, { "epoch": 0.038589981447124305, "grad_norm": 12.85183048248291, "learning_rate": 9.97946229005676e-06, "loss": 0.4121, "step": 832 }, { "epoch": 0.038636363636363635, "grad_norm": 10.489032745361328, "learning_rate": 9.97939560092848e-06, "loss": 0.3761, "step": 833 }, { "epoch": 0.038682745825602965, "grad_norm": 17.768630981445312, "learning_rate": 9.979328803924216e-06, "loss": 0.4824, "step": 834 }, { "epoch": 0.0387291280148423, "grad_norm": 10.114885330200195, "learning_rate": 9.979261899045423e-06, "loss": 0.4514, "step": 835 }, { "epoch": 0.03877551020408163, "grad_norm": 6.900608062744141, "learning_rate": 9.979194886293546e-06, "loss": 0.4567, "step": 836 }, { "epoch": 0.03882189239332096, "grad_norm": 12.42103385925293, "learning_rate": 9.979127765670038e-06, "loss": 0.3818, "step": 837 }, { "epoch": 0.0388682745825603, "grad_norm": 8.501192092895508, "learning_rate": 9.979060537176355e-06, "loss": 0.5068, "step": 838 }, { "epoch": 0.03891465677179963, "grad_norm": 10.258294105529785, "learning_rate": 9.97899320081395e-06, "loss": 0.3864, "step": 839 }, { "epoch": 0.03896103896103896, "grad_norm": 8.771685600280762, "learning_rate": 9.978925756584284e-06, "loss": 0.3348, "step": 840 }, { "epoch": 0.03900742115027829, "grad_norm": 8.89991283416748, "learning_rate": 9.978858204488818e-06, "loss": 0.425, "step": 841 }, { "epoch": 0.03905380333951763, "grad_norm": 30.22086524963379, "learning_rate": 9.978790544529014e-06, "loss": 0.4988, "step": 842 }, { "epoch": 0.03910018552875696, "grad_norm": 11.198404312133789, "learning_rate": 9.97872277670634e-06, "loss": 0.4896, "step": 843 }, { "epoch": 0.03914656771799629, "grad_norm": 14.923920631408691, "learning_rate": 9.978654901022262e-06, "loss": 0.3877, "step": 844 }, { "epoch": 0.039192949907235625, "grad_norm": 6.756741523742676, "learning_rate": 9.978586917478253e-06, "loss": 0.4277, "step": 845 }, { "epoch": 0.039239332096474955, "grad_norm": 5.162025451660156, "learning_rate": 9.978518826075782e-06, "loss": 0.3539, "step": 846 }, { "epoch": 0.039285714285714285, "grad_norm": 6.9853339195251465, "learning_rate": 9.97845062681633e-06, "loss": 0.415, "step": 847 }, { "epoch": 0.039332096474953615, "grad_norm": 6.891357898712158, "learning_rate": 9.978382319701368e-06, "loss": 0.2896, "step": 848 }, { "epoch": 0.03937847866419295, "grad_norm": 12.944729804992676, "learning_rate": 9.978313904732379e-06, "loss": 0.4566, "step": 849 }, { "epoch": 0.03942486085343228, "grad_norm": 10.087425231933594, "learning_rate": 9.978245381910847e-06, "loss": 0.4293, "step": 850 }, { "epoch": 0.03947124304267161, "grad_norm": 8.393118858337402, "learning_rate": 9.978176751238254e-06, "loss": 0.5035, "step": 851 }, { "epoch": 0.03951762523191095, "grad_norm": 10.911489486694336, "learning_rate": 9.978108012716086e-06, "loss": 0.4287, "step": 852 }, { "epoch": 0.03956400742115028, "grad_norm": 11.708649635314941, "learning_rate": 9.978039166345834e-06, "loss": 0.2923, "step": 853 }, { "epoch": 0.03961038961038961, "grad_norm": 8.20858383178711, "learning_rate": 9.97797021212899e-06, "loss": 0.3195, "step": 854 }, { "epoch": 0.03965677179962894, "grad_norm": 9.362435340881348, "learning_rate": 9.977901150067046e-06, "loss": 0.4576, "step": 855 }, { "epoch": 0.03970315398886828, "grad_norm": 4.80518102645874, "learning_rate": 9.9778319801615e-06, "loss": 0.3997, "step": 856 }, { "epoch": 0.03974953617810761, "grad_norm": 11.206049919128418, "learning_rate": 9.97776270241385e-06, "loss": 0.4134, "step": 857 }, { "epoch": 0.03979591836734694, "grad_norm": 9.13418197631836, "learning_rate": 9.977693316825595e-06, "loss": 0.3621, "step": 858 }, { "epoch": 0.039842300556586274, "grad_norm": 44.14113235473633, "learning_rate": 9.977623823398242e-06, "loss": 0.6868, "step": 859 }, { "epoch": 0.039888682745825604, "grad_norm": 10.745347023010254, "learning_rate": 9.977554222133293e-06, "loss": 0.3171, "step": 860 }, { "epoch": 0.039935064935064934, "grad_norm": 26.513526916503906, "learning_rate": 9.977484513032256e-06, "loss": 0.5821, "step": 861 }, { "epoch": 0.039981447124304265, "grad_norm": 10.17279052734375, "learning_rate": 9.977414696096643e-06, "loss": 0.4147, "step": 862 }, { "epoch": 0.0400278293135436, "grad_norm": 9.948614120483398, "learning_rate": 9.977344771327965e-06, "loss": 0.518, "step": 863 }, { "epoch": 0.04007421150278293, "grad_norm": 7.619440078735352, "learning_rate": 9.977274738727741e-06, "loss": 0.4947, "step": 864 }, { "epoch": 0.04012059369202226, "grad_norm": 7.910632133483887, "learning_rate": 9.977204598297483e-06, "loss": 0.4168, "step": 865 }, { "epoch": 0.0401669758812616, "grad_norm": 7.212706565856934, "learning_rate": 9.977134350038713e-06, "loss": 0.3964, "step": 866 }, { "epoch": 0.04021335807050093, "grad_norm": 8.271952629089355, "learning_rate": 9.977063993952952e-06, "loss": 0.4513, "step": 867 }, { "epoch": 0.04025974025974026, "grad_norm": 7.391940593719482, "learning_rate": 9.976993530041726e-06, "loss": 0.3897, "step": 868 }, { "epoch": 0.04030612244897959, "grad_norm": 11.249006271362305, "learning_rate": 9.976922958306559e-06, "loss": 0.3418, "step": 869 }, { "epoch": 0.040352504638218926, "grad_norm": 7.192676544189453, "learning_rate": 9.976852278748981e-06, "loss": 0.3389, "step": 870 }, { "epoch": 0.04039888682745826, "grad_norm": 15.041473388671875, "learning_rate": 9.976781491370523e-06, "loss": 0.4865, "step": 871 }, { "epoch": 0.04044526901669759, "grad_norm": 20.572750091552734, "learning_rate": 9.976710596172721e-06, "loss": 0.5336, "step": 872 }, { "epoch": 0.04049165120593692, "grad_norm": 15.14177417755127, "learning_rate": 9.976639593157107e-06, "loss": 0.5291, "step": 873 }, { "epoch": 0.040538033395176254, "grad_norm": 13.944427490234375, "learning_rate": 9.976568482325222e-06, "loss": 0.4126, "step": 874 }, { "epoch": 0.040584415584415584, "grad_norm": 13.986983299255371, "learning_rate": 9.976497263678604e-06, "loss": 0.5431, "step": 875 }, { "epoch": 0.040630797773654914, "grad_norm": 9.243247985839844, "learning_rate": 9.9764259372188e-06, "loss": 0.4098, "step": 876 }, { "epoch": 0.04067717996289425, "grad_norm": 11.866296768188477, "learning_rate": 9.976354502947352e-06, "loss": 0.3863, "step": 877 }, { "epoch": 0.04072356215213358, "grad_norm": 9.466934204101562, "learning_rate": 9.976282960865809e-06, "loss": 0.2985, "step": 878 }, { "epoch": 0.04076994434137291, "grad_norm": 17.092632293701172, "learning_rate": 9.976211310975718e-06, "loss": 0.6465, "step": 879 }, { "epoch": 0.04081632653061224, "grad_norm": 9.901687622070312, "learning_rate": 9.976139553278637e-06, "loss": 0.347, "step": 880 }, { "epoch": 0.04086270871985158, "grad_norm": 8.00385570526123, "learning_rate": 9.976067687776114e-06, "loss": 0.4445, "step": 881 }, { "epoch": 0.04090909090909091, "grad_norm": 8.631209373474121, "learning_rate": 9.975995714469711e-06, "loss": 0.4854, "step": 882 }, { "epoch": 0.04095547309833024, "grad_norm": 5.916597366333008, "learning_rate": 9.975923633360985e-06, "loss": 0.3678, "step": 883 }, { "epoch": 0.041001855287569576, "grad_norm": 9.249369621276855, "learning_rate": 9.9758514444515e-06, "loss": 0.3863, "step": 884 }, { "epoch": 0.041048237476808906, "grad_norm": 7.457449913024902, "learning_rate": 9.975779147742815e-06, "loss": 0.4203, "step": 885 }, { "epoch": 0.041094619666048236, "grad_norm": 9.586709976196289, "learning_rate": 9.9757067432365e-06, "loss": 0.4474, "step": 886 }, { "epoch": 0.041141001855287566, "grad_norm": 8.775933265686035, "learning_rate": 9.975634230934122e-06, "loss": 0.4217, "step": 887 }, { "epoch": 0.041187384044526903, "grad_norm": 9.382211685180664, "learning_rate": 9.975561610837254e-06, "loss": 0.5044, "step": 888 }, { "epoch": 0.041233766233766234, "grad_norm": 5.984456539154053, "learning_rate": 9.975488882947468e-06, "loss": 0.4028, "step": 889 }, { "epoch": 0.041280148423005564, "grad_norm": 12.406526565551758, "learning_rate": 9.975416047266339e-06, "loss": 0.5353, "step": 890 }, { "epoch": 0.0413265306122449, "grad_norm": 6.166219234466553, "learning_rate": 9.975343103795448e-06, "loss": 0.365, "step": 891 }, { "epoch": 0.04137291280148423, "grad_norm": 11.562482833862305, "learning_rate": 9.97527005253637e-06, "loss": 0.3759, "step": 892 }, { "epoch": 0.04141929499072356, "grad_norm": 16.990718841552734, "learning_rate": 9.975196893490692e-06, "loss": 0.3574, "step": 893 }, { "epoch": 0.04146567717996289, "grad_norm": 16.96734619140625, "learning_rate": 9.975123626659997e-06, "loss": 0.5668, "step": 894 }, { "epoch": 0.04151205936920223, "grad_norm": 6.819518566131592, "learning_rate": 9.975050252045872e-06, "loss": 0.3631, "step": 895 }, { "epoch": 0.04155844155844156, "grad_norm": 7.788445472717285, "learning_rate": 9.974976769649909e-06, "loss": 0.2979, "step": 896 }, { "epoch": 0.04160482374768089, "grad_norm": 7.305904865264893, "learning_rate": 9.974903179473696e-06, "loss": 0.369, "step": 897 }, { "epoch": 0.041651205936920226, "grad_norm": 9.834715843200684, "learning_rate": 9.974829481518833e-06, "loss": 0.4236, "step": 898 }, { "epoch": 0.041697588126159556, "grad_norm": 13.673022270202637, "learning_rate": 9.974755675786909e-06, "loss": 0.5584, "step": 899 }, { "epoch": 0.041743970315398886, "grad_norm": 10.94511890411377, "learning_rate": 9.97468176227953e-06, "loss": 0.5304, "step": 900 }, { "epoch": 0.041790352504638216, "grad_norm": 7.708440780639648, "learning_rate": 9.974607740998295e-06, "loss": 0.4474, "step": 901 }, { "epoch": 0.04183673469387755, "grad_norm": 9.187613487243652, "learning_rate": 9.974533611944805e-06, "loss": 0.3655, "step": 902 }, { "epoch": 0.04188311688311688, "grad_norm": 9.120685577392578, "learning_rate": 9.974459375120669e-06, "loss": 0.4586, "step": 903 }, { "epoch": 0.04192949907235621, "grad_norm": 10.971817016601562, "learning_rate": 9.974385030527496e-06, "loss": 0.2857, "step": 904 }, { "epoch": 0.04197588126159555, "grad_norm": 9.587824821472168, "learning_rate": 9.974310578166893e-06, "loss": 0.3872, "step": 905 }, { "epoch": 0.04202226345083488, "grad_norm": 7.294124126434326, "learning_rate": 9.974236018040476e-06, "loss": 0.3291, "step": 906 }, { "epoch": 0.04206864564007421, "grad_norm": 8.821385383605957, "learning_rate": 9.974161350149858e-06, "loss": 0.3531, "step": 907 }, { "epoch": 0.04211502782931354, "grad_norm": 6.219289779663086, "learning_rate": 9.974086574496657e-06, "loss": 0.3241, "step": 908 }, { "epoch": 0.04216141001855288, "grad_norm": 8.254718780517578, "learning_rate": 9.974011691082497e-06, "loss": 0.4321, "step": 909 }, { "epoch": 0.04220779220779221, "grad_norm": 8.203338623046875, "learning_rate": 9.973936699908994e-06, "loss": 0.2987, "step": 910 }, { "epoch": 0.04225417439703154, "grad_norm": 19.18236541748047, "learning_rate": 9.973861600977778e-06, "loss": 0.593, "step": 911 }, { "epoch": 0.042300556586270875, "grad_norm": 11.810781478881836, "learning_rate": 9.973786394290475e-06, "loss": 0.399, "step": 912 }, { "epoch": 0.042346938775510205, "grad_norm": 6.282923698425293, "learning_rate": 9.973711079848711e-06, "loss": 0.3678, "step": 913 }, { "epoch": 0.042393320964749535, "grad_norm": 9.032304763793945, "learning_rate": 9.973635657654122e-06, "loss": 0.4579, "step": 914 }, { "epoch": 0.042439703153988866, "grad_norm": 11.987093925476074, "learning_rate": 9.973560127708338e-06, "loss": 0.438, "step": 915 }, { "epoch": 0.0424860853432282, "grad_norm": 13.911323547363281, "learning_rate": 9.973484490012997e-06, "loss": 0.4666, "step": 916 }, { "epoch": 0.04253246753246753, "grad_norm": 8.441017150878906, "learning_rate": 9.973408744569739e-06, "loss": 0.4318, "step": 917 }, { "epoch": 0.04257884972170686, "grad_norm": 9.085371017456055, "learning_rate": 9.973332891380203e-06, "loss": 0.3758, "step": 918 }, { "epoch": 0.0426252319109462, "grad_norm": 6.638881683349609, "learning_rate": 9.973256930446032e-06, "loss": 0.337, "step": 919 }, { "epoch": 0.04267161410018553, "grad_norm": 9.77808666229248, "learning_rate": 9.973180861768874e-06, "loss": 0.415, "step": 920 }, { "epoch": 0.04271799628942486, "grad_norm": 8.215989112854004, "learning_rate": 9.973104685350377e-06, "loss": 0.3989, "step": 921 }, { "epoch": 0.04276437847866419, "grad_norm": 9.200830459594727, "learning_rate": 9.973028401192188e-06, "loss": 0.4533, "step": 922 }, { "epoch": 0.04281076066790353, "grad_norm": 15.316771507263184, "learning_rate": 9.972952009295963e-06, "loss": 0.5538, "step": 923 }, { "epoch": 0.04285714285714286, "grad_norm": 9.797103881835938, "learning_rate": 9.972875509663354e-06, "loss": 0.345, "step": 924 }, { "epoch": 0.04290352504638219, "grad_norm": 6.507185459136963, "learning_rate": 9.972798902296021e-06, "loss": 0.4772, "step": 925 }, { "epoch": 0.042949907235621525, "grad_norm": 6.267787933349609, "learning_rate": 9.972722187195622e-06, "loss": 0.3737, "step": 926 }, { "epoch": 0.042996289424860855, "grad_norm": 7.309338569641113, "learning_rate": 9.972645364363819e-06, "loss": 0.4073, "step": 927 }, { "epoch": 0.043042671614100185, "grad_norm": 15.258577346801758, "learning_rate": 9.972568433802278e-06, "loss": 0.4067, "step": 928 }, { "epoch": 0.043089053803339515, "grad_norm": 11.466996192932129, "learning_rate": 9.972491395512667e-06, "loss": 0.54, "step": 929 }, { "epoch": 0.04313543599257885, "grad_norm": 20.614242553710938, "learning_rate": 9.97241424949665e-06, "loss": 0.4474, "step": 930 }, { "epoch": 0.04318181818181818, "grad_norm": 8.470287322998047, "learning_rate": 9.972336995755901e-06, "loss": 0.3822, "step": 931 }, { "epoch": 0.04322820037105751, "grad_norm": 10.132938385009766, "learning_rate": 9.972259634292093e-06, "loss": 0.2366, "step": 932 }, { "epoch": 0.04327458256029684, "grad_norm": 16.700105667114258, "learning_rate": 9.972182165106904e-06, "loss": 0.3757, "step": 933 }, { "epoch": 0.04332096474953618, "grad_norm": 6.639756202697754, "learning_rate": 9.972104588202011e-06, "loss": 0.2738, "step": 934 }, { "epoch": 0.04336734693877551, "grad_norm": 11.574379920959473, "learning_rate": 9.972026903579095e-06, "loss": 0.5025, "step": 935 }, { "epoch": 0.04341372912801484, "grad_norm": 7.144610404968262, "learning_rate": 9.971949111239838e-06, "loss": 0.2928, "step": 936 }, { "epoch": 0.04346011131725418, "grad_norm": 14.368844985961914, "learning_rate": 9.971871211185926e-06, "loss": 0.4664, "step": 937 }, { "epoch": 0.04350649350649351, "grad_norm": 9.602409362792969, "learning_rate": 9.971793203419046e-06, "loss": 0.3508, "step": 938 }, { "epoch": 0.04355287569573284, "grad_norm": 6.9716267585754395, "learning_rate": 9.97171508794089e-06, "loss": 0.3542, "step": 939 }, { "epoch": 0.04359925788497217, "grad_norm": 12.60700798034668, "learning_rate": 9.971636864753147e-06, "loss": 0.4649, "step": 940 }, { "epoch": 0.043645640074211504, "grad_norm": 17.0604190826416, "learning_rate": 9.971558533857516e-06, "loss": 0.4539, "step": 941 }, { "epoch": 0.043692022263450835, "grad_norm": 10.673162460327148, "learning_rate": 9.971480095255688e-06, "loss": 0.2773, "step": 942 }, { "epoch": 0.043738404452690165, "grad_norm": 8.595359802246094, "learning_rate": 9.971401548949368e-06, "loss": 0.3238, "step": 943 }, { "epoch": 0.0437847866419295, "grad_norm": 12.472744941711426, "learning_rate": 9.971322894940256e-06, "loss": 0.4597, "step": 944 }, { "epoch": 0.04383116883116883, "grad_norm": 5.5114359855651855, "learning_rate": 9.971244133230055e-06, "loss": 0.3746, "step": 945 }, { "epoch": 0.04387755102040816, "grad_norm": 11.868242263793945, "learning_rate": 9.971165263820473e-06, "loss": 0.4065, "step": 946 }, { "epoch": 0.04392393320964749, "grad_norm": 11.012418746948242, "learning_rate": 9.971086286713216e-06, "loss": 0.3878, "step": 947 }, { "epoch": 0.04397031539888683, "grad_norm": 19.23223304748535, "learning_rate": 9.971007201909997e-06, "loss": 0.3476, "step": 948 }, { "epoch": 0.04401669758812616, "grad_norm": 7.625611782073975, "learning_rate": 9.970928009412528e-06, "loss": 0.3826, "step": 949 }, { "epoch": 0.04406307977736549, "grad_norm": 11.126355171203613, "learning_rate": 9.970848709222527e-06, "loss": 0.4246, "step": 950 }, { "epoch": 0.04410946196660483, "grad_norm": 12.664572715759277, "learning_rate": 9.97076930134171e-06, "loss": 0.4382, "step": 951 }, { "epoch": 0.04415584415584416, "grad_norm": 12.361783981323242, "learning_rate": 9.970689785771798e-06, "loss": 0.4868, "step": 952 }, { "epoch": 0.04420222634508349, "grad_norm": 9.482993125915527, "learning_rate": 9.970610162514514e-06, "loss": 0.4334, "step": 953 }, { "epoch": 0.04424860853432282, "grad_norm": 14.602068901062012, "learning_rate": 9.970530431571583e-06, "loss": 0.3936, "step": 954 }, { "epoch": 0.044294990723562154, "grad_norm": 11.912742614746094, "learning_rate": 9.970450592944732e-06, "loss": 0.4787, "step": 955 }, { "epoch": 0.044341372912801484, "grad_norm": 10.348937034606934, "learning_rate": 9.970370646635689e-06, "loss": 0.4445, "step": 956 }, { "epoch": 0.044387755102040814, "grad_norm": 13.130661964416504, "learning_rate": 9.970290592646188e-06, "loss": 0.4511, "step": 957 }, { "epoch": 0.04443413729128015, "grad_norm": 11.332514762878418, "learning_rate": 9.970210430977962e-06, "loss": 0.419, "step": 958 }, { "epoch": 0.04448051948051948, "grad_norm": 11.612855911254883, "learning_rate": 9.97013016163275e-06, "loss": 0.3568, "step": 959 }, { "epoch": 0.04452690166975881, "grad_norm": 13.216611862182617, "learning_rate": 9.970049784612291e-06, "loss": 0.4387, "step": 960 }, { "epoch": 0.04457328385899814, "grad_norm": 10.062793731689453, "learning_rate": 9.96996929991832e-06, "loss": 0.3562, "step": 961 }, { "epoch": 0.04461966604823748, "grad_norm": 8.651711463928223, "learning_rate": 9.969888707552589e-06, "loss": 0.3968, "step": 962 }, { "epoch": 0.04466604823747681, "grad_norm": 16.190807342529297, "learning_rate": 9.96980800751684e-06, "loss": 0.4088, "step": 963 }, { "epoch": 0.04471243042671614, "grad_norm": 15.230914115905762, "learning_rate": 9.969727199812822e-06, "loss": 0.4642, "step": 964 }, { "epoch": 0.044758812615955476, "grad_norm": 12.874356269836426, "learning_rate": 9.969646284442286e-06, "loss": 0.4663, "step": 965 }, { "epoch": 0.044805194805194806, "grad_norm": 15.12104320526123, "learning_rate": 9.969565261406984e-06, "loss": 0.3764, "step": 966 }, { "epoch": 0.044851576994434136, "grad_norm": 7.958413124084473, "learning_rate": 9.96948413070867e-06, "loss": 0.4747, "step": 967 }, { "epoch": 0.044897959183673466, "grad_norm": 12.80143928527832, "learning_rate": 9.969402892349105e-06, "loss": 0.5706, "step": 968 }, { "epoch": 0.044944341372912804, "grad_norm": 12.405646324157715, "learning_rate": 9.96932154633005e-06, "loss": 0.4884, "step": 969 }, { "epoch": 0.044990723562152134, "grad_norm": 12.654348373413086, "learning_rate": 9.96924009265326e-06, "loss": 0.4786, "step": 970 }, { "epoch": 0.045037105751391464, "grad_norm": 17.204269409179688, "learning_rate": 9.969158531320506e-06, "loss": 0.4757, "step": 971 }, { "epoch": 0.0450834879406308, "grad_norm": 9.203259468078613, "learning_rate": 9.969076862333556e-06, "loss": 0.3878, "step": 972 }, { "epoch": 0.04512987012987013, "grad_norm": 5.965961456298828, "learning_rate": 9.968995085694173e-06, "loss": 0.415, "step": 973 }, { "epoch": 0.04517625231910946, "grad_norm": 10.339615821838379, "learning_rate": 9.968913201404134e-06, "loss": 0.2411, "step": 974 }, { "epoch": 0.04522263450834879, "grad_norm": 7.6709794998168945, "learning_rate": 9.96883120946521e-06, "loss": 0.3879, "step": 975 }, { "epoch": 0.04526901669758813, "grad_norm": 15.120798110961914, "learning_rate": 9.968749109879181e-06, "loss": 0.436, "step": 976 }, { "epoch": 0.04531539888682746, "grad_norm": 8.843931198120117, "learning_rate": 9.968666902647823e-06, "loss": 0.4646, "step": 977 }, { "epoch": 0.04536178107606679, "grad_norm": 10.094457626342773, "learning_rate": 9.968584587772916e-06, "loss": 0.4708, "step": 978 }, { "epoch": 0.045408163265306126, "grad_norm": 9.51596450805664, "learning_rate": 9.968502165256245e-06, "loss": 0.4456, "step": 979 }, { "epoch": 0.045454545454545456, "grad_norm": 9.69333267211914, "learning_rate": 9.968419635099596e-06, "loss": 0.3863, "step": 980 }, { "epoch": 0.045500927643784786, "grad_norm": 7.107630729675293, "learning_rate": 9.968336997304753e-06, "loss": 0.4025, "step": 981 }, { "epoch": 0.045547309833024116, "grad_norm": 12.772420883178711, "learning_rate": 9.968254251873512e-06, "loss": 0.3237, "step": 982 }, { "epoch": 0.04559369202226345, "grad_norm": 16.35306739807129, "learning_rate": 9.968171398807665e-06, "loss": 0.5095, "step": 983 }, { "epoch": 0.04564007421150278, "grad_norm": 8.391264915466309, "learning_rate": 9.968088438109002e-06, "loss": 0.2957, "step": 984 }, { "epoch": 0.04568645640074211, "grad_norm": 10.56345272064209, "learning_rate": 9.968005369779323e-06, "loss": 0.4431, "step": 985 }, { "epoch": 0.04573283858998145, "grad_norm": 7.569792747497559, "learning_rate": 9.96792219382043e-06, "loss": 0.3593, "step": 986 }, { "epoch": 0.04577922077922078, "grad_norm": 8.300820350646973, "learning_rate": 9.967838910234123e-06, "loss": 0.4132, "step": 987 }, { "epoch": 0.04582560296846011, "grad_norm": 7.068622589111328, "learning_rate": 9.967755519022205e-06, "loss": 0.3525, "step": 988 }, { "epoch": 0.04587198515769944, "grad_norm": 8.0853271484375, "learning_rate": 9.967672020186485e-06, "loss": 0.3815, "step": 989 }, { "epoch": 0.04591836734693878, "grad_norm": 9.991074562072754, "learning_rate": 9.96758841372877e-06, "loss": 0.464, "step": 990 }, { "epoch": 0.04596474953617811, "grad_norm": 30.066137313842773, "learning_rate": 9.967504699650873e-06, "loss": 0.5648, "step": 991 }, { "epoch": 0.04601113172541744, "grad_norm": 7.51839017868042, "learning_rate": 9.967420877954605e-06, "loss": 0.2858, "step": 992 }, { "epoch": 0.046057513914656775, "grad_norm": 7.245944023132324, "learning_rate": 9.967336948641787e-06, "loss": 0.4347, "step": 993 }, { "epoch": 0.046103896103896105, "grad_norm": 16.130538940429688, "learning_rate": 9.967252911714232e-06, "loss": 0.6118, "step": 994 }, { "epoch": 0.046150278293135436, "grad_norm": 6.69675874710083, "learning_rate": 9.967168767173762e-06, "loss": 0.3457, "step": 995 }, { "epoch": 0.046196660482374766, "grad_norm": 8.242250442504883, "learning_rate": 9.967084515022201e-06, "loss": 0.4483, "step": 996 }, { "epoch": 0.0462430426716141, "grad_norm": 6.43617582321167, "learning_rate": 9.967000155261374e-06, "loss": 0.4209, "step": 997 }, { "epoch": 0.04628942486085343, "grad_norm": 21.648340225219727, "learning_rate": 9.966915687893109e-06, "loss": 0.4896, "step": 998 }, { "epoch": 0.04633580705009276, "grad_norm": 7.697157382965088, "learning_rate": 9.966831112919235e-06, "loss": 0.4674, "step": 999 }, { "epoch": 0.04638218923933209, "grad_norm": 13.449706077575684, "learning_rate": 9.966746430341584e-06, "loss": 0.372, "step": 1000 }, { "epoch": 0.04642857142857143, "grad_norm": 10.43430233001709, "learning_rate": 9.966661640161991e-06, "loss": 0.406, "step": 1001 }, { "epoch": 0.04647495361781076, "grad_norm": 11.75523853302002, "learning_rate": 9.966576742382294e-06, "loss": 0.4092, "step": 1002 }, { "epoch": 0.04652133580705009, "grad_norm": 8.909481048583984, "learning_rate": 9.966491737004332e-06, "loss": 0.3715, "step": 1003 }, { "epoch": 0.04656771799628943, "grad_norm": 7.0916314125061035, "learning_rate": 9.966406624029946e-06, "loss": 0.3929, "step": 1004 }, { "epoch": 0.04661410018552876, "grad_norm": 9.768636703491211, "learning_rate": 9.966321403460979e-06, "loss": 0.3457, "step": 1005 }, { "epoch": 0.04666048237476809, "grad_norm": 14.74716854095459, "learning_rate": 9.966236075299279e-06, "loss": 0.414, "step": 1006 }, { "epoch": 0.04670686456400742, "grad_norm": 13.926876068115234, "learning_rate": 9.966150639546695e-06, "loss": 0.3519, "step": 1007 }, { "epoch": 0.046753246753246755, "grad_norm": 6.271501541137695, "learning_rate": 9.966065096205076e-06, "loss": 0.5039, "step": 1008 }, { "epoch": 0.046799628942486085, "grad_norm": 5.127688884735107, "learning_rate": 9.965979445276274e-06, "loss": 0.3777, "step": 1009 }, { "epoch": 0.046846011131725415, "grad_norm": 11.604674339294434, "learning_rate": 9.965893686762148e-06, "loss": 0.4735, "step": 1010 }, { "epoch": 0.04689239332096475, "grad_norm": 12.585679054260254, "learning_rate": 9.965807820664555e-06, "loss": 0.4056, "step": 1011 }, { "epoch": 0.04693877551020408, "grad_norm": 22.850130081176758, "learning_rate": 9.965721846985355e-06, "loss": 0.5802, "step": 1012 }, { "epoch": 0.04698515769944341, "grad_norm": 11.325098991394043, "learning_rate": 9.96563576572641e-06, "loss": 0.3179, "step": 1013 }, { "epoch": 0.04703153988868274, "grad_norm": 6.602332592010498, "learning_rate": 9.965549576889584e-06, "loss": 0.2719, "step": 1014 }, { "epoch": 0.04707792207792208, "grad_norm": 19.024145126342773, "learning_rate": 9.965463280476746e-06, "loss": 0.3375, "step": 1015 }, { "epoch": 0.04712430426716141, "grad_norm": 12.56495475769043, "learning_rate": 9.965376876489765e-06, "loss": 0.4552, "step": 1016 }, { "epoch": 0.04717068645640074, "grad_norm": 5.800212860107422, "learning_rate": 9.965290364930513e-06, "loss": 0.4366, "step": 1017 }, { "epoch": 0.04721706864564008, "grad_norm": 11.657938957214355, "learning_rate": 9.965203745800864e-06, "loss": 0.3953, "step": 1018 }, { "epoch": 0.04726345083487941, "grad_norm": 6.598074913024902, "learning_rate": 9.965117019102697e-06, "loss": 0.4153, "step": 1019 }, { "epoch": 0.04730983302411874, "grad_norm": 7.643691062927246, "learning_rate": 9.965030184837886e-06, "loss": 0.3553, "step": 1020 }, { "epoch": 0.04735621521335807, "grad_norm": 12.792657852172852, "learning_rate": 9.964943243008316e-06, "loss": 0.3779, "step": 1021 }, { "epoch": 0.047402597402597405, "grad_norm": 13.31843376159668, "learning_rate": 9.964856193615868e-06, "loss": 0.5039, "step": 1022 }, { "epoch": 0.047448979591836735, "grad_norm": 9.499555587768555, "learning_rate": 9.964769036662432e-06, "loss": 0.3833, "step": 1023 }, { "epoch": 0.047495361781076065, "grad_norm": 9.9034423828125, "learning_rate": 9.964681772149891e-06, "loss": 0.4067, "step": 1024 }, { "epoch": 0.0475417439703154, "grad_norm": 7.019118309020996, "learning_rate": 9.964594400080139e-06, "loss": 0.3318, "step": 1025 }, { "epoch": 0.04758812615955473, "grad_norm": 13.739688873291016, "learning_rate": 9.964506920455067e-06, "loss": 0.5022, "step": 1026 }, { "epoch": 0.04763450834879406, "grad_norm": 15.450751304626465, "learning_rate": 9.964419333276571e-06, "loss": 0.4101, "step": 1027 }, { "epoch": 0.04768089053803339, "grad_norm": 15.785492897033691, "learning_rate": 9.96433163854655e-06, "loss": 0.4078, "step": 1028 }, { "epoch": 0.04772727272727273, "grad_norm": 8.952168464660645, "learning_rate": 9.964243836266902e-06, "loss": 0.3634, "step": 1029 }, { "epoch": 0.04777365491651206, "grad_norm": 8.445525169372559, "learning_rate": 9.964155926439529e-06, "loss": 0.3419, "step": 1030 }, { "epoch": 0.04782003710575139, "grad_norm": 10.40766429901123, "learning_rate": 9.964067909066337e-06, "loss": 0.4329, "step": 1031 }, { "epoch": 0.04786641929499073, "grad_norm": 9.632638931274414, "learning_rate": 9.963979784149232e-06, "loss": 0.3609, "step": 1032 }, { "epoch": 0.04791280148423006, "grad_norm": 9.561664581298828, "learning_rate": 9.963891551690123e-06, "loss": 0.4024, "step": 1033 }, { "epoch": 0.04795918367346939, "grad_norm": 11.055066108703613, "learning_rate": 9.96380321169092e-06, "loss": 0.466, "step": 1034 }, { "epoch": 0.04800556586270872, "grad_norm": 10.116277694702148, "learning_rate": 9.96371476415354e-06, "loss": 0.4317, "step": 1035 }, { "epoch": 0.048051948051948054, "grad_norm": 6.122961521148682, "learning_rate": 9.9636262090799e-06, "loss": 0.3338, "step": 1036 }, { "epoch": 0.048098330241187384, "grad_norm": 7.72927713394165, "learning_rate": 9.963537546471913e-06, "loss": 0.4399, "step": 1037 }, { "epoch": 0.048144712430426714, "grad_norm": 24.35207748413086, "learning_rate": 9.963448776331503e-06, "loss": 0.4161, "step": 1038 }, { "epoch": 0.04819109461966605, "grad_norm": 9.348984718322754, "learning_rate": 9.963359898660594e-06, "loss": 0.3646, "step": 1039 }, { "epoch": 0.04823747680890538, "grad_norm": 10.056234359741211, "learning_rate": 9.963270913461111e-06, "loss": 0.3895, "step": 1040 }, { "epoch": 0.04828385899814471, "grad_norm": 16.391950607299805, "learning_rate": 9.963181820734981e-06, "loss": 0.5798, "step": 1041 }, { "epoch": 0.04833024118738404, "grad_norm": 10.997910499572754, "learning_rate": 9.963092620484136e-06, "loss": 0.5192, "step": 1042 }, { "epoch": 0.04837662337662338, "grad_norm": 8.50693130493164, "learning_rate": 9.963003312710506e-06, "loss": 0.4415, "step": 1043 }, { "epoch": 0.04842300556586271, "grad_norm": 13.636285781860352, "learning_rate": 9.962913897416029e-06, "loss": 0.3721, "step": 1044 }, { "epoch": 0.04846938775510204, "grad_norm": 7.688148021697998, "learning_rate": 9.962824374602638e-06, "loss": 0.4779, "step": 1045 }, { "epoch": 0.048515769944341376, "grad_norm": 9.585451126098633, "learning_rate": 9.962734744272274e-06, "loss": 0.4649, "step": 1046 }, { "epoch": 0.048562152133580706, "grad_norm": 6.543616771697998, "learning_rate": 9.962645006426881e-06, "loss": 0.3028, "step": 1047 }, { "epoch": 0.048608534322820036, "grad_norm": 5.940668106079102, "learning_rate": 9.962555161068401e-06, "loss": 0.3602, "step": 1048 }, { "epoch": 0.04865491651205937, "grad_norm": 46.65264892578125, "learning_rate": 9.96246520819878e-06, "loss": 0.4652, "step": 1049 }, { "epoch": 0.048701298701298704, "grad_norm": 14.268065452575684, "learning_rate": 9.962375147819966e-06, "loss": 0.4448, "step": 1050 }, { "epoch": 0.048747680890538034, "grad_norm": 6.0597991943359375, "learning_rate": 9.962284979933917e-06, "loss": 0.3163, "step": 1051 }, { "epoch": 0.048794063079777364, "grad_norm": 10.550209999084473, "learning_rate": 9.962194704542577e-06, "loss": 0.4302, "step": 1052 }, { "epoch": 0.0488404452690167, "grad_norm": 8.628168106079102, "learning_rate": 9.962104321647907e-06, "loss": 0.311, "step": 1053 }, { "epoch": 0.04888682745825603, "grad_norm": 7.447996139526367, "learning_rate": 9.962013831251864e-06, "loss": 0.3954, "step": 1054 }, { "epoch": 0.04893320964749536, "grad_norm": 11.761287689208984, "learning_rate": 9.96192323335641e-06, "loss": 0.4963, "step": 1055 }, { "epoch": 0.04897959183673469, "grad_norm": 9.627856254577637, "learning_rate": 9.961832527963504e-06, "loss": 0.4488, "step": 1056 }, { "epoch": 0.04902597402597403, "grad_norm": 10.404844284057617, "learning_rate": 9.961741715075115e-06, "loss": 0.3934, "step": 1057 }, { "epoch": 0.04907235621521336, "grad_norm": 11.240641593933105, "learning_rate": 9.961650794693209e-06, "loss": 0.3963, "step": 1058 }, { "epoch": 0.04911873840445269, "grad_norm": 6.139443397521973, "learning_rate": 9.961559766819755e-06, "loss": 0.3655, "step": 1059 }, { "epoch": 0.04916512059369202, "grad_norm": 7.049687385559082, "learning_rate": 9.961468631456725e-06, "loss": 0.2781, "step": 1060 }, { "epoch": 0.049211502782931356, "grad_norm": 7.527011871337891, "learning_rate": 9.961377388606095e-06, "loss": 0.4309, "step": 1061 }, { "epoch": 0.049257884972170686, "grad_norm": 16.402816772460938, "learning_rate": 9.96128603826984e-06, "loss": 0.4385, "step": 1062 }, { "epoch": 0.049304267161410016, "grad_norm": 18.73649024963379, "learning_rate": 9.96119458044994e-06, "loss": 0.4576, "step": 1063 }, { "epoch": 0.04935064935064935, "grad_norm": 12.909950256347656, "learning_rate": 9.961103015148376e-06, "loss": 0.3092, "step": 1064 }, { "epoch": 0.04939703153988868, "grad_norm": 12.027917861938477, "learning_rate": 9.961011342367133e-06, "loss": 0.4228, "step": 1065 }, { "epoch": 0.04944341372912801, "grad_norm": 6.38170862197876, "learning_rate": 9.960919562108194e-06, "loss": 0.3951, "step": 1066 }, { "epoch": 0.049489795918367344, "grad_norm": 14.91861629486084, "learning_rate": 9.96082767437355e-06, "loss": 0.4877, "step": 1067 }, { "epoch": 0.04953617810760668, "grad_norm": 9.494978904724121, "learning_rate": 9.960735679165191e-06, "loss": 0.3868, "step": 1068 }, { "epoch": 0.04958256029684601, "grad_norm": 8.275296211242676, "learning_rate": 9.960643576485112e-06, "loss": 0.3962, "step": 1069 }, { "epoch": 0.04962894248608534, "grad_norm": 7.02292537689209, "learning_rate": 9.960551366335304e-06, "loss": 0.3503, "step": 1070 }, { "epoch": 0.04967532467532468, "grad_norm": 5.685388088226318, "learning_rate": 9.960459048717768e-06, "loss": 0.3907, "step": 1071 }, { "epoch": 0.04972170686456401, "grad_norm": 19.940231323242188, "learning_rate": 9.960366623634503e-06, "loss": 0.4933, "step": 1072 }, { "epoch": 0.04976808905380334, "grad_norm": 6.991631031036377, "learning_rate": 9.96027409108751e-06, "loss": 0.4004, "step": 1073 }, { "epoch": 0.04981447124304267, "grad_norm": 9.649443626403809, "learning_rate": 9.960181451078798e-06, "loss": 0.5338, "step": 1074 }, { "epoch": 0.049860853432282005, "grad_norm": 8.972970008850098, "learning_rate": 9.96008870361037e-06, "loss": 0.4422, "step": 1075 }, { "epoch": 0.049907235621521336, "grad_norm": 9.875809669494629, "learning_rate": 9.959995848684234e-06, "loss": 0.3398, "step": 1076 }, { "epoch": 0.049953617810760666, "grad_norm": 8.354192733764648, "learning_rate": 9.959902886302406e-06, "loss": 0.4201, "step": 1077 }, { "epoch": 0.05, "grad_norm": 6.699743270874023, "learning_rate": 9.959809816466898e-06, "loss": 0.2437, "step": 1078 }, { "epoch": 0.05004638218923933, "grad_norm": 11.673683166503906, "learning_rate": 9.959716639179726e-06, "loss": 0.3801, "step": 1079 }, { "epoch": 0.05009276437847866, "grad_norm": 8.308085441589355, "learning_rate": 9.95962335444291e-06, "loss": 0.4884, "step": 1080 }, { "epoch": 0.05013914656771799, "grad_norm": 8.367311477661133, "learning_rate": 9.959529962258469e-06, "loss": 0.4413, "step": 1081 }, { "epoch": 0.05018552875695733, "grad_norm": 13.02929973602295, "learning_rate": 9.959436462628428e-06, "loss": 0.5111, "step": 1082 }, { "epoch": 0.05023191094619666, "grad_norm": 11.46027660369873, "learning_rate": 9.959342855554811e-06, "loss": 0.3578, "step": 1083 }, { "epoch": 0.05027829313543599, "grad_norm": 14.044285774230957, "learning_rate": 9.959249141039648e-06, "loss": 0.448, "step": 1084 }, { "epoch": 0.05032467532467533, "grad_norm": 9.618843078613281, "learning_rate": 9.959155319084966e-06, "loss": 0.3986, "step": 1085 }, { "epoch": 0.05037105751391466, "grad_norm": 12.158838272094727, "learning_rate": 9.9590613896928e-06, "loss": 0.5216, "step": 1086 }, { "epoch": 0.05041743970315399, "grad_norm": 24.04228973388672, "learning_rate": 9.958967352865188e-06, "loss": 0.51, "step": 1087 }, { "epoch": 0.05046382189239332, "grad_norm": 9.937569618225098, "learning_rate": 9.95887320860416e-06, "loss": 0.3738, "step": 1088 }, { "epoch": 0.050510204081632655, "grad_norm": 6.318461894989014, "learning_rate": 9.95877895691176e-06, "loss": 0.4188, "step": 1089 }, { "epoch": 0.050556586270871985, "grad_norm": 11.793315887451172, "learning_rate": 9.958684597790031e-06, "loss": 0.3958, "step": 1090 }, { "epoch": 0.050602968460111315, "grad_norm": 8.565855026245117, "learning_rate": 9.958590131241015e-06, "loss": 0.3839, "step": 1091 }, { "epoch": 0.05064935064935065, "grad_norm": 12.259551048278809, "learning_rate": 9.958495557266761e-06, "loss": 0.3942, "step": 1092 }, { "epoch": 0.05069573283858998, "grad_norm": 12.974520683288574, "learning_rate": 9.958400875869314e-06, "loss": 0.4554, "step": 1093 }, { "epoch": 0.05074211502782931, "grad_norm": 9.935175895690918, "learning_rate": 9.958306087050727e-06, "loss": 0.3618, "step": 1094 }, { "epoch": 0.05078849721706864, "grad_norm": 7.114926338195801, "learning_rate": 9.958211190813054e-06, "loss": 0.261, "step": 1095 }, { "epoch": 0.05083487940630798, "grad_norm": 6.381222248077393, "learning_rate": 9.958116187158351e-06, "loss": 0.4353, "step": 1096 }, { "epoch": 0.05088126159554731, "grad_norm": 9.555322647094727, "learning_rate": 9.958021076088676e-06, "loss": 0.4488, "step": 1097 }, { "epoch": 0.05092764378478664, "grad_norm": 11.265775680541992, "learning_rate": 9.95792585760609e-06, "loss": 0.4845, "step": 1098 }, { "epoch": 0.05097402597402598, "grad_norm": 15.961447715759277, "learning_rate": 9.957830531712656e-06, "loss": 0.3936, "step": 1099 }, { "epoch": 0.05102040816326531, "grad_norm": 12.817883491516113, "learning_rate": 9.957735098410438e-06, "loss": 0.4375, "step": 1100 }, { "epoch": 0.05106679035250464, "grad_norm": 11.241129875183105, "learning_rate": 9.957639557701501e-06, "loss": 0.4411, "step": 1101 }, { "epoch": 0.05111317254174397, "grad_norm": 7.0827531814575195, "learning_rate": 9.957543909587921e-06, "loss": 0.3671, "step": 1102 }, { "epoch": 0.051159554730983305, "grad_norm": 6.719121932983398, "learning_rate": 9.957448154071768e-06, "loss": 0.3585, "step": 1103 }, { "epoch": 0.051205936920222635, "grad_norm": 13.89179801940918, "learning_rate": 9.957352291155112e-06, "loss": 0.371, "step": 1104 }, { "epoch": 0.051252319109461965, "grad_norm": 8.37582015991211, "learning_rate": 9.957256320840036e-06, "loss": 0.3718, "step": 1105 }, { "epoch": 0.0512987012987013, "grad_norm": 7.01788330078125, "learning_rate": 9.957160243128614e-06, "loss": 0.4145, "step": 1106 }, { "epoch": 0.05134508348794063, "grad_norm": 5.477202415466309, "learning_rate": 9.957064058022932e-06, "loss": 0.3478, "step": 1107 }, { "epoch": 0.05139146567717996, "grad_norm": 25.479999542236328, "learning_rate": 9.956967765525072e-06, "loss": 0.4649, "step": 1108 }, { "epoch": 0.05143784786641929, "grad_norm": 6.532976150512695, "learning_rate": 9.95687136563712e-06, "loss": 0.3359, "step": 1109 }, { "epoch": 0.05148423005565863, "grad_norm": 6.6452202796936035, "learning_rate": 9.956774858361162e-06, "loss": 0.3311, "step": 1110 }, { "epoch": 0.05153061224489796, "grad_norm": 11.328365325927734, "learning_rate": 9.956678243699294e-06, "loss": 0.4812, "step": 1111 }, { "epoch": 0.05157699443413729, "grad_norm": 9.814220428466797, "learning_rate": 9.956581521653604e-06, "loss": 0.4357, "step": 1112 }, { "epoch": 0.05162337662337663, "grad_norm": 6.354764938354492, "learning_rate": 9.956484692226192e-06, "loss": 0.3893, "step": 1113 }, { "epoch": 0.05166975881261596, "grad_norm": 13.991436004638672, "learning_rate": 9.95638775541915e-06, "loss": 0.4815, "step": 1114 }, { "epoch": 0.05171614100185529, "grad_norm": 23.669553756713867, "learning_rate": 9.956290711234585e-06, "loss": 0.3088, "step": 1115 }, { "epoch": 0.05176252319109462, "grad_norm": 15.774659156799316, "learning_rate": 9.956193559674595e-06, "loss": 0.2986, "step": 1116 }, { "epoch": 0.051808905380333954, "grad_norm": 12.51264476776123, "learning_rate": 9.956096300741284e-06, "loss": 0.4738, "step": 1117 }, { "epoch": 0.051855287569573284, "grad_norm": 7.4270806312561035, "learning_rate": 9.955998934436761e-06, "loss": 0.3385, "step": 1118 }, { "epoch": 0.051901669758812614, "grad_norm": 9.91375732421875, "learning_rate": 9.955901460763138e-06, "loss": 0.3136, "step": 1119 }, { "epoch": 0.05194805194805195, "grad_norm": 6.224292755126953, "learning_rate": 9.95580387972252e-06, "loss": 0.4367, "step": 1120 }, { "epoch": 0.05199443413729128, "grad_norm": 6.21212911605835, "learning_rate": 9.955706191317028e-06, "loss": 0.3773, "step": 1121 }, { "epoch": 0.05204081632653061, "grad_norm": 9.28734016418457, "learning_rate": 9.955608395548772e-06, "loss": 0.469, "step": 1122 }, { "epoch": 0.05208719851576994, "grad_norm": 8.490169525146484, "learning_rate": 9.955510492419877e-06, "loss": 0.41, "step": 1123 }, { "epoch": 0.05213358070500928, "grad_norm": 10.295958518981934, "learning_rate": 9.955412481932459e-06, "loss": 0.5213, "step": 1124 }, { "epoch": 0.05217996289424861, "grad_norm": 8.467177391052246, "learning_rate": 9.955314364088645e-06, "loss": 0.3083, "step": 1125 }, { "epoch": 0.05222634508348794, "grad_norm": 9.29789924621582, "learning_rate": 9.955216138890558e-06, "loss": 0.4372, "step": 1126 }, { "epoch": 0.05227272727272727, "grad_norm": 7.685262203216553, "learning_rate": 9.955117806340326e-06, "loss": 0.3945, "step": 1127 }, { "epoch": 0.052319109461966606, "grad_norm": 10.450419425964355, "learning_rate": 9.955019366440082e-06, "loss": 0.2207, "step": 1128 }, { "epoch": 0.052365491651205937, "grad_norm": 12.715388298034668, "learning_rate": 9.954920819191955e-06, "loss": 0.5788, "step": 1129 }, { "epoch": 0.05241187384044527, "grad_norm": 8.32487678527832, "learning_rate": 9.954822164598084e-06, "loss": 0.4047, "step": 1130 }, { "epoch": 0.052458256029684604, "grad_norm": 7.632226943969727, "learning_rate": 9.954723402660602e-06, "loss": 0.3725, "step": 1131 }, { "epoch": 0.052504638218923934, "grad_norm": 13.698641777038574, "learning_rate": 9.954624533381653e-06, "loss": 0.4297, "step": 1132 }, { "epoch": 0.052551020408163264, "grad_norm": 10.394265174865723, "learning_rate": 9.954525556763375e-06, "loss": 0.3515, "step": 1133 }, { "epoch": 0.052597402597402594, "grad_norm": 9.479758262634277, "learning_rate": 9.954426472807915e-06, "loss": 0.4822, "step": 1134 }, { "epoch": 0.05264378478664193, "grad_norm": 19.25859832763672, "learning_rate": 9.954327281517418e-06, "loss": 0.5262, "step": 1135 }, { "epoch": 0.05269016697588126, "grad_norm": 7.5072221755981445, "learning_rate": 9.954227982894034e-06, "loss": 0.3942, "step": 1136 }, { "epoch": 0.05273654916512059, "grad_norm": 6.714529991149902, "learning_rate": 9.954128576939914e-06, "loss": 0.4388, "step": 1137 }, { "epoch": 0.05278293135435993, "grad_norm": 9.01496410369873, "learning_rate": 9.954029063657212e-06, "loss": 0.3925, "step": 1138 }, { "epoch": 0.05282931354359926, "grad_norm": 16.290786743164062, "learning_rate": 9.953929443048083e-06, "loss": 0.4986, "step": 1139 }, { "epoch": 0.05287569573283859, "grad_norm": 8.405654907226562, "learning_rate": 9.953829715114685e-06, "loss": 0.3959, "step": 1140 }, { "epoch": 0.05292207792207792, "grad_norm": 7.600898742675781, "learning_rate": 9.953729879859179e-06, "loss": 0.4438, "step": 1141 }, { "epoch": 0.052968460111317256, "grad_norm": 9.146568298339844, "learning_rate": 9.953629937283728e-06, "loss": 0.4553, "step": 1142 }, { "epoch": 0.053014842300556586, "grad_norm": 9.56600570678711, "learning_rate": 9.953529887390498e-06, "loss": 0.3874, "step": 1143 }, { "epoch": 0.053061224489795916, "grad_norm": 10.06430435180664, "learning_rate": 9.953429730181653e-06, "loss": 0.3497, "step": 1144 }, { "epoch": 0.05310760667903525, "grad_norm": 10.365438461303711, "learning_rate": 9.953329465659369e-06, "loss": 0.5151, "step": 1145 }, { "epoch": 0.05315398886827458, "grad_norm": 10.179767608642578, "learning_rate": 9.953229093825813e-06, "loss": 0.3576, "step": 1146 }, { "epoch": 0.053200371057513914, "grad_norm": 6.0966901779174805, "learning_rate": 9.953128614683163e-06, "loss": 0.4417, "step": 1147 }, { "epoch": 0.053246753246753244, "grad_norm": 17.38235092163086, "learning_rate": 9.953028028233591e-06, "loss": 0.3901, "step": 1148 }, { "epoch": 0.05329313543599258, "grad_norm": 13.715578079223633, "learning_rate": 9.952927334479282e-06, "loss": 0.3323, "step": 1149 }, { "epoch": 0.05333951762523191, "grad_norm": 10.9841947555542, "learning_rate": 9.952826533422412e-06, "loss": 0.5436, "step": 1150 }, { "epoch": 0.05338589981447124, "grad_norm": 13.747766494750977, "learning_rate": 9.95272562506517e-06, "loss": 0.4769, "step": 1151 }, { "epoch": 0.05343228200371058, "grad_norm": 8.253847122192383, "learning_rate": 9.95262460940974e-06, "loss": 0.4559, "step": 1152 }, { "epoch": 0.05347866419294991, "grad_norm": 10.49879264831543, "learning_rate": 9.952523486458307e-06, "loss": 0.4555, "step": 1153 }, { "epoch": 0.05352504638218924, "grad_norm": 4.2435688972473145, "learning_rate": 9.952422256213069e-06, "loss": 0.3002, "step": 1154 }, { "epoch": 0.05357142857142857, "grad_norm": 12.67695140838623, "learning_rate": 9.952320918676211e-06, "loss": 0.4126, "step": 1155 }, { "epoch": 0.053617810760667906, "grad_norm": 9.909932136535645, "learning_rate": 9.952219473849934e-06, "loss": 0.4911, "step": 1156 }, { "epoch": 0.053664192949907236, "grad_norm": 11.208426475524902, "learning_rate": 9.952117921736434e-06, "loss": 0.3252, "step": 1157 }, { "epoch": 0.053710575139146566, "grad_norm": 14.345599174499512, "learning_rate": 9.95201626233791e-06, "loss": 0.3864, "step": 1158 }, { "epoch": 0.0537569573283859, "grad_norm": 9.363155364990234, "learning_rate": 9.95191449565657e-06, "loss": 0.409, "step": 1159 }, { "epoch": 0.05380333951762523, "grad_norm": 10.735494613647461, "learning_rate": 9.95181262169461e-06, "loss": 0.4612, "step": 1160 }, { "epoch": 0.05384972170686456, "grad_norm": 9.010823249816895, "learning_rate": 9.951710640454241e-06, "loss": 0.4947, "step": 1161 }, { "epoch": 0.05389610389610389, "grad_norm": 16.996517181396484, "learning_rate": 9.951608551937676e-06, "loss": 0.6661, "step": 1162 }, { "epoch": 0.05394248608534323, "grad_norm": 6.554820537567139, "learning_rate": 9.951506356147121e-06, "loss": 0.3768, "step": 1163 }, { "epoch": 0.05398886827458256, "grad_norm": 8.04642105102539, "learning_rate": 9.951404053084795e-06, "loss": 0.5129, "step": 1164 }, { "epoch": 0.05403525046382189, "grad_norm": 11.493038177490234, "learning_rate": 9.951301642752909e-06, "loss": 0.4431, "step": 1165 }, { "epoch": 0.05408163265306123, "grad_norm": 14.146081924438477, "learning_rate": 9.951199125153686e-06, "loss": 0.4542, "step": 1166 }, { "epoch": 0.05412801484230056, "grad_norm": 9.127884864807129, "learning_rate": 9.951096500289344e-06, "loss": 0.3161, "step": 1167 }, { "epoch": 0.05417439703153989, "grad_norm": 6.451577663421631, "learning_rate": 9.950993768162109e-06, "loss": 0.3953, "step": 1168 }, { "epoch": 0.05422077922077922, "grad_norm": 9.204740524291992, "learning_rate": 9.950890928774204e-06, "loss": 0.4205, "step": 1169 }, { "epoch": 0.054267161410018555, "grad_norm": 9.371588706970215, "learning_rate": 9.950787982127859e-06, "loss": 0.3899, "step": 1170 }, { "epoch": 0.054313543599257885, "grad_norm": 8.84147834777832, "learning_rate": 9.950684928225305e-06, "loss": 0.4137, "step": 1171 }, { "epoch": 0.054359925788497215, "grad_norm": 7.185426235198975, "learning_rate": 9.950581767068772e-06, "loss": 0.441, "step": 1172 }, { "epoch": 0.05440630797773655, "grad_norm": 10.043595314025879, "learning_rate": 9.950478498660496e-06, "loss": 0.3346, "step": 1173 }, { "epoch": 0.05445269016697588, "grad_norm": 11.092812538146973, "learning_rate": 9.950375123002715e-06, "loss": 0.3945, "step": 1174 }, { "epoch": 0.05449907235621521, "grad_norm": 12.490714073181152, "learning_rate": 9.950271640097668e-06, "loss": 0.5222, "step": 1175 }, { "epoch": 0.05454545454545454, "grad_norm": 8.266407012939453, "learning_rate": 9.950168049947597e-06, "loss": 0.3463, "step": 1176 }, { "epoch": 0.05459183673469388, "grad_norm": 12.054036140441895, "learning_rate": 9.950064352554745e-06, "loss": 0.4271, "step": 1177 }, { "epoch": 0.05463821892393321, "grad_norm": 11.344940185546875, "learning_rate": 9.94996054792136e-06, "loss": 0.422, "step": 1178 }, { "epoch": 0.05468460111317254, "grad_norm": 6.490151405334473, "learning_rate": 9.949856636049692e-06, "loss": 0.4098, "step": 1179 }, { "epoch": 0.05473098330241188, "grad_norm": 16.836231231689453, "learning_rate": 9.94975261694199e-06, "loss": 0.4793, "step": 1180 }, { "epoch": 0.05477736549165121, "grad_norm": 11.564423561096191, "learning_rate": 9.949648490600507e-06, "loss": 0.4721, "step": 1181 }, { "epoch": 0.05482374768089054, "grad_norm": 6.921444892883301, "learning_rate": 9.949544257027503e-06, "loss": 0.4005, "step": 1182 }, { "epoch": 0.05487012987012987, "grad_norm": 17.664579391479492, "learning_rate": 9.94943991622523e-06, "loss": 0.5284, "step": 1183 }, { "epoch": 0.054916512059369205, "grad_norm": 6.382064342498779, "learning_rate": 9.949335468195954e-06, "loss": 0.3746, "step": 1184 }, { "epoch": 0.054962894248608535, "grad_norm": 9.30562686920166, "learning_rate": 9.949230912941934e-06, "loss": 0.4771, "step": 1185 }, { "epoch": 0.055009276437847865, "grad_norm": 8.788667678833008, "learning_rate": 9.949126250465439e-06, "loss": 0.4448, "step": 1186 }, { "epoch": 0.0550556586270872, "grad_norm": 13.75639533996582, "learning_rate": 9.949021480768733e-06, "loss": 0.2743, "step": 1187 }, { "epoch": 0.05510204081632653, "grad_norm": 7.495946884155273, "learning_rate": 9.948916603854087e-06, "loss": 0.3598, "step": 1188 }, { "epoch": 0.05514842300556586, "grad_norm": 9.753880500793457, "learning_rate": 9.948811619723772e-06, "loss": 0.3373, "step": 1189 }, { "epoch": 0.05519480519480519, "grad_norm": 10.126957893371582, "learning_rate": 9.948706528380064e-06, "loss": 0.3469, "step": 1190 }, { "epoch": 0.05524118738404453, "grad_norm": 14.901663780212402, "learning_rate": 9.94860132982524e-06, "loss": 0.6178, "step": 1191 }, { "epoch": 0.05528756957328386, "grad_norm": 8.655092239379883, "learning_rate": 9.948496024061577e-06, "loss": 0.3507, "step": 1192 }, { "epoch": 0.05533395176252319, "grad_norm": 5.688948631286621, "learning_rate": 9.948390611091358e-06, "loss": 0.3106, "step": 1193 }, { "epoch": 0.05538033395176252, "grad_norm": 13.258387565612793, "learning_rate": 9.948285090916867e-06, "loss": 0.3409, "step": 1194 }, { "epoch": 0.05542671614100186, "grad_norm": 8.049616813659668, "learning_rate": 9.948179463540391e-06, "loss": 0.4477, "step": 1195 }, { "epoch": 0.05547309833024119, "grad_norm": 19.185409545898438, "learning_rate": 9.948073728964215e-06, "loss": 0.4504, "step": 1196 }, { "epoch": 0.05551948051948052, "grad_norm": 6.917566299438477, "learning_rate": 9.947967887190632e-06, "loss": 0.2219, "step": 1197 }, { "epoch": 0.055565862708719854, "grad_norm": 4.775965213775635, "learning_rate": 9.947861938221934e-06, "loss": 0.2868, "step": 1198 }, { "epoch": 0.055612244897959184, "grad_norm": 10.148934364318848, "learning_rate": 9.947755882060418e-06, "loss": 0.2561, "step": 1199 }, { "epoch": 0.055658627087198514, "grad_norm": 9.460061073303223, "learning_rate": 9.947649718708379e-06, "loss": 0.4041, "step": 1200 }, { "epoch": 0.055705009276437845, "grad_norm": 15.154009819030762, "learning_rate": 9.947543448168118e-06, "loss": 0.5233, "step": 1201 }, { "epoch": 0.05575139146567718, "grad_norm": 12.705288887023926, "learning_rate": 9.947437070441938e-06, "loss": 0.4398, "step": 1202 }, { "epoch": 0.05579777365491651, "grad_norm": 13.999703407287598, "learning_rate": 9.947330585532146e-06, "loss": 0.5189, "step": 1203 }, { "epoch": 0.05584415584415584, "grad_norm": 15.424460411071777, "learning_rate": 9.947223993441044e-06, "loss": 0.3963, "step": 1204 }, { "epoch": 0.05589053803339518, "grad_norm": 16.261505126953125, "learning_rate": 9.947117294170944e-06, "loss": 0.3393, "step": 1205 }, { "epoch": 0.05593692022263451, "grad_norm": 11.353160858154297, "learning_rate": 9.947010487724157e-06, "loss": 0.3422, "step": 1206 }, { "epoch": 0.05598330241187384, "grad_norm": 10.947964668273926, "learning_rate": 9.946903574102997e-06, "loss": 0.403, "step": 1207 }, { "epoch": 0.05602968460111317, "grad_norm": 7.07208251953125, "learning_rate": 9.94679655330978e-06, "loss": 0.343, "step": 1208 }, { "epoch": 0.056076066790352506, "grad_norm": 21.38117218017578, "learning_rate": 9.946689425346826e-06, "loss": 0.3929, "step": 1209 }, { "epoch": 0.05612244897959184, "grad_norm": 24.476646423339844, "learning_rate": 9.946582190216454e-06, "loss": 0.6737, "step": 1210 }, { "epoch": 0.05616883116883117, "grad_norm": 18.5245361328125, "learning_rate": 9.946474847920988e-06, "loss": 0.4798, "step": 1211 }, { "epoch": 0.056215213358070504, "grad_norm": 16.609376907348633, "learning_rate": 9.946367398462753e-06, "loss": 0.6111, "step": 1212 }, { "epoch": 0.056261595547309834, "grad_norm": 9.090283393859863, "learning_rate": 9.946259841844079e-06, "loss": 0.3548, "step": 1213 }, { "epoch": 0.056307977736549164, "grad_norm": 6.861843109130859, "learning_rate": 9.946152178067291e-06, "loss": 0.5039, "step": 1214 }, { "epoch": 0.056354359925788494, "grad_norm": 12.465056419372559, "learning_rate": 9.946044407134727e-06, "loss": 0.4015, "step": 1215 }, { "epoch": 0.05640074211502783, "grad_norm": 9.80840015411377, "learning_rate": 9.945936529048721e-06, "loss": 0.3312, "step": 1216 }, { "epoch": 0.05644712430426716, "grad_norm": 6.005768775939941, "learning_rate": 9.945828543811607e-06, "loss": 0.3973, "step": 1217 }, { "epoch": 0.05649350649350649, "grad_norm": 6.316249847412109, "learning_rate": 9.945720451425726e-06, "loss": 0.2999, "step": 1218 }, { "epoch": 0.05653988868274583, "grad_norm": 6.80589485168457, "learning_rate": 9.945612251893422e-06, "loss": 0.3789, "step": 1219 }, { "epoch": 0.05658627087198516, "grad_norm": 12.135485649108887, "learning_rate": 9.945503945217034e-06, "loss": 0.4697, "step": 1220 }, { "epoch": 0.05663265306122449, "grad_norm": 8.805011749267578, "learning_rate": 9.945395531398914e-06, "loss": 0.4101, "step": 1221 }, { "epoch": 0.05667903525046382, "grad_norm": 7.323430061340332, "learning_rate": 9.945287010441406e-06, "loss": 0.3884, "step": 1222 }, { "epoch": 0.056725417439703156, "grad_norm": 14.05087661743164, "learning_rate": 9.945178382346867e-06, "loss": 0.5133, "step": 1223 }, { "epoch": 0.056771799628942486, "grad_norm": 8.362431526184082, "learning_rate": 9.945069647117645e-06, "loss": 0.3886, "step": 1224 }, { "epoch": 0.056818181818181816, "grad_norm": 6.716300964355469, "learning_rate": 9.944960804756096e-06, "loss": 0.4257, "step": 1225 }, { "epoch": 0.05686456400742115, "grad_norm": 6.464472770690918, "learning_rate": 9.944851855264579e-06, "loss": 0.3904, "step": 1226 }, { "epoch": 0.056910946196660483, "grad_norm": 7.375547409057617, "learning_rate": 9.944742798645456e-06, "loss": 0.3709, "step": 1227 }, { "epoch": 0.056957328385899814, "grad_norm": 7.700148105621338, "learning_rate": 9.94463363490109e-06, "loss": 0.4104, "step": 1228 }, { "epoch": 0.057003710575139144, "grad_norm": 6.068897247314453, "learning_rate": 9.94452436403384e-06, "loss": 0.3125, "step": 1229 }, { "epoch": 0.05705009276437848, "grad_norm": 7.104934215545654, "learning_rate": 9.944414986046081e-06, "loss": 0.3474, "step": 1230 }, { "epoch": 0.05709647495361781, "grad_norm": 11.830253601074219, "learning_rate": 9.944305500940178e-06, "loss": 0.5539, "step": 1231 }, { "epoch": 0.05714285714285714, "grad_norm": 10.386882781982422, "learning_rate": 9.944195908718506e-06, "loss": 0.4718, "step": 1232 }, { "epoch": 0.05718923933209648, "grad_norm": 9.430087089538574, "learning_rate": 9.944086209383435e-06, "loss": 0.4413, "step": 1233 }, { "epoch": 0.05723562152133581, "grad_norm": 8.103569030761719, "learning_rate": 9.943976402937345e-06, "loss": 0.3879, "step": 1234 }, { "epoch": 0.05728200371057514, "grad_norm": 10.856221199035645, "learning_rate": 9.943866489382615e-06, "loss": 0.4343, "step": 1235 }, { "epoch": 0.05732838589981447, "grad_norm": 15.78971004486084, "learning_rate": 9.943756468721623e-06, "loss": 0.3808, "step": 1236 }, { "epoch": 0.057374768089053806, "grad_norm": 12.333218574523926, "learning_rate": 9.943646340956757e-06, "loss": 0.4435, "step": 1237 }, { "epoch": 0.057421150278293136, "grad_norm": 6.570777893066406, "learning_rate": 9.943536106090398e-06, "loss": 0.3929, "step": 1238 }, { "epoch": 0.057467532467532466, "grad_norm": 8.9866361618042, "learning_rate": 9.943425764124938e-06, "loss": 0.3857, "step": 1239 }, { "epoch": 0.0575139146567718, "grad_norm": 6.922204494476318, "learning_rate": 9.943315315062766e-06, "loss": 0.3507, "step": 1240 }, { "epoch": 0.05756029684601113, "grad_norm": 7.802117347717285, "learning_rate": 9.943204758906275e-06, "loss": 0.3658, "step": 1241 }, { "epoch": 0.05760667903525046, "grad_norm": 9.737003326416016, "learning_rate": 9.94309409565786e-06, "loss": 0.421, "step": 1242 }, { "epoch": 0.05765306122448979, "grad_norm": 9.073628425598145, "learning_rate": 9.94298332531992e-06, "loss": 0.3864, "step": 1243 }, { "epoch": 0.05769944341372913, "grad_norm": 8.675249099731445, "learning_rate": 9.942872447894852e-06, "loss": 0.3928, "step": 1244 }, { "epoch": 0.05774582560296846, "grad_norm": 8.170516014099121, "learning_rate": 9.94276146338506e-06, "loss": 0.4543, "step": 1245 }, { "epoch": 0.05779220779220779, "grad_norm": 11.754997253417969, "learning_rate": 9.942650371792948e-06, "loss": 0.4979, "step": 1246 }, { "epoch": 0.05783858998144713, "grad_norm": 7.981929302215576, "learning_rate": 9.942539173120922e-06, "loss": 0.4288, "step": 1247 }, { "epoch": 0.05788497217068646, "grad_norm": 6.407234191894531, "learning_rate": 9.942427867371391e-06, "loss": 0.366, "step": 1248 }, { "epoch": 0.05793135435992579, "grad_norm": 10.973243713378906, "learning_rate": 9.942316454546769e-06, "loss": 0.3512, "step": 1249 }, { "epoch": 0.05797773654916512, "grad_norm": 10.33973503112793, "learning_rate": 9.942204934649467e-06, "loss": 0.3517, "step": 1250 }, { "epoch": 0.058024118738404455, "grad_norm": 6.681507110595703, "learning_rate": 9.942093307681901e-06, "loss": 0.4291, "step": 1251 }, { "epoch": 0.058070500927643785, "grad_norm": 13.078390121459961, "learning_rate": 9.941981573646493e-06, "loss": 0.5442, "step": 1252 }, { "epoch": 0.058116883116883115, "grad_norm": 8.714883804321289, "learning_rate": 9.941869732545657e-06, "loss": 0.3271, "step": 1253 }, { "epoch": 0.058163265306122446, "grad_norm": 11.160974502563477, "learning_rate": 9.941757784381823e-06, "loss": 0.386, "step": 1254 }, { "epoch": 0.05820964749536178, "grad_norm": 6.072278022766113, "learning_rate": 9.941645729157413e-06, "loss": 0.3695, "step": 1255 }, { "epoch": 0.05825602968460111, "grad_norm": 6.732720851898193, "learning_rate": 9.941533566874852e-06, "loss": 0.4131, "step": 1256 }, { "epoch": 0.05830241187384044, "grad_norm": 12.86107063293457, "learning_rate": 9.941421297536574e-06, "loss": 0.3645, "step": 1257 }, { "epoch": 0.05834879406307978, "grad_norm": 22.35491371154785, "learning_rate": 9.941308921145009e-06, "loss": 0.4079, "step": 1258 }, { "epoch": 0.05839517625231911, "grad_norm": 14.407185554504395, "learning_rate": 9.941196437702595e-06, "loss": 0.4621, "step": 1259 }, { "epoch": 0.05844155844155844, "grad_norm": 14.084193229675293, "learning_rate": 9.941083847211765e-06, "loss": 0.4068, "step": 1260 }, { "epoch": 0.05848794063079777, "grad_norm": 12.235149383544922, "learning_rate": 9.940971149674961e-06, "loss": 0.3621, "step": 1261 }, { "epoch": 0.05853432282003711, "grad_norm": 5.420975208282471, "learning_rate": 9.940858345094621e-06, "loss": 0.2782, "step": 1262 }, { "epoch": 0.05858070500927644, "grad_norm": 14.126303672790527, "learning_rate": 9.940745433473191e-06, "loss": 0.4433, "step": 1263 }, { "epoch": 0.05862708719851577, "grad_norm": 10.326460838317871, "learning_rate": 9.940632414813119e-06, "loss": 0.4206, "step": 1264 }, { "epoch": 0.058673469387755105, "grad_norm": 9.685284614562988, "learning_rate": 9.94051928911685e-06, "loss": 0.2326, "step": 1265 }, { "epoch": 0.058719851576994435, "grad_norm": 9.98775577545166, "learning_rate": 9.940406056386836e-06, "loss": 0.4766, "step": 1266 }, { "epoch": 0.058766233766233765, "grad_norm": 6.357358932495117, "learning_rate": 9.940292716625532e-06, "loss": 0.3066, "step": 1267 }, { "epoch": 0.058812615955473095, "grad_norm": 12.163618087768555, "learning_rate": 9.940179269835392e-06, "loss": 0.3679, "step": 1268 }, { "epoch": 0.05885899814471243, "grad_norm": 7.487850189208984, "learning_rate": 9.940065716018874e-06, "loss": 0.3063, "step": 1269 }, { "epoch": 0.05890538033395176, "grad_norm": 7.911416530609131, "learning_rate": 9.939952055178437e-06, "loss": 0.3568, "step": 1270 }, { "epoch": 0.05895176252319109, "grad_norm": 15.233190536499023, "learning_rate": 9.939838287316544e-06, "loss": 0.7319, "step": 1271 }, { "epoch": 0.05899814471243043, "grad_norm": 10.747784614562988, "learning_rate": 9.939724412435661e-06, "loss": 0.4308, "step": 1272 }, { "epoch": 0.05904452690166976, "grad_norm": 12.153280258178711, "learning_rate": 9.939610430538254e-06, "loss": 0.4517, "step": 1273 }, { "epoch": 0.05909090909090909, "grad_norm": 6.817577362060547, "learning_rate": 9.939496341626791e-06, "loss": 0.411, "step": 1274 }, { "epoch": 0.05913729128014842, "grad_norm": 12.61811637878418, "learning_rate": 9.939382145703747e-06, "loss": 0.4926, "step": 1275 }, { "epoch": 0.05918367346938776, "grad_norm": 5.788643836975098, "learning_rate": 9.939267842771592e-06, "loss": 0.2565, "step": 1276 }, { "epoch": 0.05923005565862709, "grad_norm": 7.338935852050781, "learning_rate": 9.939153432832807e-06, "loss": 0.4773, "step": 1277 }, { "epoch": 0.05927643784786642, "grad_norm": 13.264572143554688, "learning_rate": 9.939038915889865e-06, "loss": 0.4342, "step": 1278 }, { "epoch": 0.059322820037105754, "grad_norm": 8.270136833190918, "learning_rate": 9.938924291945251e-06, "loss": 0.3921, "step": 1279 }, { "epoch": 0.059369202226345084, "grad_norm": 5.965403079986572, "learning_rate": 9.938809561001447e-06, "loss": 0.4166, "step": 1280 }, { "epoch": 0.059415584415584415, "grad_norm": 8.735358238220215, "learning_rate": 9.938694723060941e-06, "loss": 0.437, "step": 1281 }, { "epoch": 0.059461966604823745, "grad_norm": 13.311240196228027, "learning_rate": 9.938579778126217e-06, "loss": 0.5667, "step": 1282 }, { "epoch": 0.05950834879406308, "grad_norm": 14.379464149475098, "learning_rate": 9.938464726199768e-06, "loss": 0.4077, "step": 1283 }, { "epoch": 0.05955473098330241, "grad_norm": 6.654997825622559, "learning_rate": 9.938349567284084e-06, "loss": 0.422, "step": 1284 }, { "epoch": 0.05960111317254174, "grad_norm": 6.829489707946777, "learning_rate": 9.938234301381662e-06, "loss": 0.3796, "step": 1285 }, { "epoch": 0.05964749536178108, "grad_norm": 8.694972038269043, "learning_rate": 9.938118928494997e-06, "loss": 0.4336, "step": 1286 }, { "epoch": 0.05969387755102041, "grad_norm": 7.489324569702148, "learning_rate": 9.938003448626591e-06, "loss": 0.4103, "step": 1287 }, { "epoch": 0.05974025974025974, "grad_norm": 13.60302734375, "learning_rate": 9.937887861778947e-06, "loss": 0.4683, "step": 1288 }, { "epoch": 0.05978664192949907, "grad_norm": 4.805753707885742, "learning_rate": 9.937772167954565e-06, "loss": 0.3227, "step": 1289 }, { "epoch": 0.05983302411873841, "grad_norm": 12.032485961914062, "learning_rate": 9.937656367155954e-06, "loss": 0.4041, "step": 1290 }, { "epoch": 0.05987940630797774, "grad_norm": 11.272552490234375, "learning_rate": 9.937540459385622e-06, "loss": 0.4439, "step": 1291 }, { "epoch": 0.05992578849721707, "grad_norm": 16.105419158935547, "learning_rate": 9.93742444464608e-06, "loss": 0.579, "step": 1292 }, { "epoch": 0.059972170686456404, "grad_norm": 10.560561180114746, "learning_rate": 9.93730832293984e-06, "loss": 0.3795, "step": 1293 }, { "epoch": 0.060018552875695734, "grad_norm": 5.873281955718994, "learning_rate": 9.937192094269422e-06, "loss": 0.4759, "step": 1294 }, { "epoch": 0.060064935064935064, "grad_norm": 11.31070613861084, "learning_rate": 9.93707575863734e-06, "loss": 0.3394, "step": 1295 }, { "epoch": 0.060111317254174394, "grad_norm": 8.815508842468262, "learning_rate": 9.936959316046117e-06, "loss": 0.4217, "step": 1296 }, { "epoch": 0.06015769944341373, "grad_norm": 11.364646911621094, "learning_rate": 9.936842766498274e-06, "loss": 0.5109, "step": 1297 }, { "epoch": 0.06020408163265306, "grad_norm": 11.168000221252441, "learning_rate": 9.936726109996336e-06, "loss": 0.4522, "step": 1298 }, { "epoch": 0.06025046382189239, "grad_norm": 8.842950820922852, "learning_rate": 9.93660934654283e-06, "loss": 0.411, "step": 1299 }, { "epoch": 0.06029684601113173, "grad_norm": 8.598922729492188, "learning_rate": 9.936492476140286e-06, "loss": 0.3587, "step": 1300 }, { "epoch": 0.06034322820037106, "grad_norm": 18.199100494384766, "learning_rate": 9.936375498791238e-06, "loss": 0.5276, "step": 1301 }, { "epoch": 0.06038961038961039, "grad_norm": 16.649002075195312, "learning_rate": 9.936258414498217e-06, "loss": 0.4742, "step": 1302 }, { "epoch": 0.06043599257884972, "grad_norm": 5.578676223754883, "learning_rate": 9.936141223263762e-06, "loss": 0.3324, "step": 1303 }, { "epoch": 0.060482374768089056, "grad_norm": 9.958718299865723, "learning_rate": 9.93602392509041e-06, "loss": 0.4322, "step": 1304 }, { "epoch": 0.060528756957328386, "grad_norm": 6.9497528076171875, "learning_rate": 9.935906519980703e-06, "loss": 0.3865, "step": 1305 }, { "epoch": 0.060575139146567716, "grad_norm": 20.15897560119629, "learning_rate": 9.935789007937185e-06, "loss": 0.5569, "step": 1306 }, { "epoch": 0.06062152133580705, "grad_norm": 8.020066261291504, "learning_rate": 9.935671388962403e-06, "loss": 0.3751, "step": 1307 }, { "epoch": 0.060667903525046384, "grad_norm": 10.196487426757812, "learning_rate": 9.9355536630589e-06, "loss": 0.361, "step": 1308 }, { "epoch": 0.060714285714285714, "grad_norm": 5.826897621154785, "learning_rate": 9.935435830229234e-06, "loss": 0.3234, "step": 1309 }, { "epoch": 0.060760667903525044, "grad_norm": 6.45468282699585, "learning_rate": 9.935317890475954e-06, "loss": 0.3239, "step": 1310 }, { "epoch": 0.06080705009276438, "grad_norm": 5.734870910644531, "learning_rate": 9.935199843801613e-06, "loss": 0.2894, "step": 1311 }, { "epoch": 0.06085343228200371, "grad_norm": 9.090092658996582, "learning_rate": 9.93508169020877e-06, "loss": 0.4274, "step": 1312 }, { "epoch": 0.06089981447124304, "grad_norm": 14.268732070922852, "learning_rate": 9.934963429699986e-06, "loss": 0.4834, "step": 1313 }, { "epoch": 0.06094619666048238, "grad_norm": 4.4667792320251465, "learning_rate": 9.934845062277823e-06, "loss": 0.3224, "step": 1314 }, { "epoch": 0.06099257884972171, "grad_norm": 12.479571342468262, "learning_rate": 9.934726587944843e-06, "loss": 0.4438, "step": 1315 }, { "epoch": 0.06103896103896104, "grad_norm": 6.0644049644470215, "learning_rate": 9.934608006703615e-06, "loss": 0.3916, "step": 1316 }, { "epoch": 0.06108534322820037, "grad_norm": 10.78809642791748, "learning_rate": 9.934489318556709e-06, "loss": 0.4014, "step": 1317 }, { "epoch": 0.061131725417439706, "grad_norm": 14.804876327514648, "learning_rate": 9.934370523506691e-06, "loss": 0.4373, "step": 1318 }, { "epoch": 0.061178107606679036, "grad_norm": 16.87528419494629, "learning_rate": 9.934251621556141e-06, "loss": 0.5408, "step": 1319 }, { "epoch": 0.061224489795918366, "grad_norm": 5.6390061378479, "learning_rate": 9.934132612707631e-06, "loss": 0.4369, "step": 1320 }, { "epoch": 0.061270871985157696, "grad_norm": 8.53203010559082, "learning_rate": 9.934013496963741e-06, "loss": 0.3857, "step": 1321 }, { "epoch": 0.06131725417439703, "grad_norm": 18.71330451965332, "learning_rate": 9.933894274327051e-06, "loss": 0.4668, "step": 1322 }, { "epoch": 0.06136363636363636, "grad_norm": 11.73261547088623, "learning_rate": 9.933774944800145e-06, "loss": 0.315, "step": 1323 }, { "epoch": 0.06141001855287569, "grad_norm": 8.932649612426758, "learning_rate": 9.933655508385605e-06, "loss": 0.3326, "step": 1324 }, { "epoch": 0.06145640074211503, "grad_norm": 12.903444290161133, "learning_rate": 9.933535965086021e-06, "loss": 0.4432, "step": 1325 }, { "epoch": 0.06150278293135436, "grad_norm": 12.25141716003418, "learning_rate": 9.933416314903983e-06, "loss": 0.4868, "step": 1326 }, { "epoch": 0.06154916512059369, "grad_norm": 11.005752563476562, "learning_rate": 9.933296557842083e-06, "loss": 0.4079, "step": 1327 }, { "epoch": 0.06159554730983302, "grad_norm": 10.703104972839355, "learning_rate": 9.933176693902915e-06, "loss": 0.448, "step": 1328 }, { "epoch": 0.06164192949907236, "grad_norm": 8.799384117126465, "learning_rate": 9.933056723089075e-06, "loss": 0.3946, "step": 1329 }, { "epoch": 0.06168831168831169, "grad_norm": 7.763978004455566, "learning_rate": 9.932936645403164e-06, "loss": 0.4218, "step": 1330 }, { "epoch": 0.06173469387755102, "grad_norm": 7.21451997756958, "learning_rate": 9.932816460847783e-06, "loss": 0.3711, "step": 1331 }, { "epoch": 0.061781076066790355, "grad_norm": 20.982580184936523, "learning_rate": 9.932696169425534e-06, "loss": 0.5118, "step": 1332 }, { "epoch": 0.061827458256029685, "grad_norm": 13.432997703552246, "learning_rate": 9.932575771139024e-06, "loss": 0.4327, "step": 1333 }, { "epoch": 0.061873840445269015, "grad_norm": 11.486774444580078, "learning_rate": 9.932455265990862e-06, "loss": 0.3913, "step": 1334 }, { "epoch": 0.061920222634508346, "grad_norm": 11.716168403625488, "learning_rate": 9.932334653983657e-06, "loss": 0.4052, "step": 1335 }, { "epoch": 0.06196660482374768, "grad_norm": 8.099894523620605, "learning_rate": 9.932213935120025e-06, "loss": 0.4048, "step": 1336 }, { "epoch": 0.06201298701298701, "grad_norm": 10.947154998779297, "learning_rate": 9.932093109402579e-06, "loss": 0.3491, "step": 1337 }, { "epoch": 0.06205936920222634, "grad_norm": 8.234682083129883, "learning_rate": 9.931972176833938e-06, "loss": 0.3597, "step": 1338 }, { "epoch": 0.06210575139146568, "grad_norm": 12.529253005981445, "learning_rate": 9.931851137416721e-06, "loss": 0.4229, "step": 1339 }, { "epoch": 0.06215213358070501, "grad_norm": 9.84092903137207, "learning_rate": 9.93172999115355e-06, "loss": 0.3471, "step": 1340 }, { "epoch": 0.06219851576994434, "grad_norm": 9.730408668518066, "learning_rate": 9.93160873804705e-06, "loss": 0.4727, "step": 1341 }, { "epoch": 0.06224489795918367, "grad_norm": 16.93484878540039, "learning_rate": 9.931487378099847e-06, "loss": 0.5455, "step": 1342 }, { "epoch": 0.06229128014842301, "grad_norm": 12.750736236572266, "learning_rate": 9.931365911314573e-06, "loss": 0.2963, "step": 1343 }, { "epoch": 0.06233766233766234, "grad_norm": 11.34011459350586, "learning_rate": 9.931244337693855e-06, "loss": 0.3492, "step": 1344 }, { "epoch": 0.06238404452690167, "grad_norm": 10.092450141906738, "learning_rate": 9.93112265724033e-06, "loss": 0.5179, "step": 1345 }, { "epoch": 0.062430426716141005, "grad_norm": 9.408333778381348, "learning_rate": 9.931000869956636e-06, "loss": 0.4465, "step": 1346 }, { "epoch": 0.062476808905380335, "grad_norm": 6.9669508934021, "learning_rate": 9.930878975845406e-06, "loss": 0.3082, "step": 1347 }, { "epoch": 0.06252319109461967, "grad_norm": 12.878951072692871, "learning_rate": 9.930756974909285e-06, "loss": 0.409, "step": 1348 }, { "epoch": 0.062569573283859, "grad_norm": 15.08791732788086, "learning_rate": 9.930634867150914e-06, "loss": 0.4015, "step": 1349 }, { "epoch": 0.06261595547309833, "grad_norm": 10.048559188842773, "learning_rate": 9.930512652572941e-06, "loss": 0.5046, "step": 1350 }, { "epoch": 0.06266233766233766, "grad_norm": 5.3148369789123535, "learning_rate": 9.93039033117801e-06, "loss": 0.3224, "step": 1351 }, { "epoch": 0.062708719851577, "grad_norm": 10.506560325622559, "learning_rate": 9.930267902968774e-06, "loss": 0.3027, "step": 1352 }, { "epoch": 0.06275510204081633, "grad_norm": 10.768289566040039, "learning_rate": 9.930145367947885e-06, "loss": 0.4405, "step": 1353 }, { "epoch": 0.06280148423005566, "grad_norm": 11.156792640686035, "learning_rate": 9.930022726117996e-06, "loss": 0.3784, "step": 1354 }, { "epoch": 0.06284786641929499, "grad_norm": 5.880578994750977, "learning_rate": 9.929899977481764e-06, "loss": 0.4805, "step": 1355 }, { "epoch": 0.06289424860853432, "grad_norm": 16.220550537109375, "learning_rate": 9.929777122041852e-06, "loss": 0.4274, "step": 1356 }, { "epoch": 0.06294063079777365, "grad_norm": 9.951373100280762, "learning_rate": 9.929654159800917e-06, "loss": 0.393, "step": 1357 }, { "epoch": 0.06298701298701298, "grad_norm": 15.356340408325195, "learning_rate": 9.929531090761624e-06, "loss": 0.4339, "step": 1358 }, { "epoch": 0.06303339517625232, "grad_norm": 5.575047016143799, "learning_rate": 9.92940791492664e-06, "loss": 0.3149, "step": 1359 }, { "epoch": 0.06307977736549165, "grad_norm": 8.301352500915527, "learning_rate": 9.929284632298638e-06, "loss": 0.4897, "step": 1360 }, { "epoch": 0.06312615955473098, "grad_norm": 4.6974287033081055, "learning_rate": 9.929161242880279e-06, "loss": 0.375, "step": 1361 }, { "epoch": 0.06317254174397031, "grad_norm": 9.50131893157959, "learning_rate": 9.929037746674245e-06, "loss": 0.3408, "step": 1362 }, { "epoch": 0.06321892393320964, "grad_norm": 15.921456336975098, "learning_rate": 9.928914143683204e-06, "loss": 0.5116, "step": 1363 }, { "epoch": 0.06326530612244897, "grad_norm": 5.851163387298584, "learning_rate": 9.92879043390984e-06, "loss": 0.3792, "step": 1364 }, { "epoch": 0.0633116883116883, "grad_norm": 7.927270889282227, "learning_rate": 9.928666617356832e-06, "loss": 0.3328, "step": 1365 }, { "epoch": 0.06335807050092765, "grad_norm": 11.103459358215332, "learning_rate": 9.928542694026862e-06, "loss": 0.3389, "step": 1366 }, { "epoch": 0.06340445269016698, "grad_norm": 8.914826393127441, "learning_rate": 9.92841866392261e-06, "loss": 0.3789, "step": 1367 }, { "epoch": 0.06345083487940631, "grad_norm": 10.920031547546387, "learning_rate": 9.928294527046771e-06, "loss": 0.4982, "step": 1368 }, { "epoch": 0.06349721706864564, "grad_norm": 9.020547866821289, "learning_rate": 9.928170283402029e-06, "loss": 0.4073, "step": 1369 }, { "epoch": 0.06354359925788497, "grad_norm": 20.360715866088867, "learning_rate": 9.928045932991077e-06, "loss": 0.5798, "step": 1370 }, { "epoch": 0.0635899814471243, "grad_norm": 14.208456039428711, "learning_rate": 9.92792147581661e-06, "loss": 0.5157, "step": 1371 }, { "epoch": 0.06363636363636363, "grad_norm": 11.176518440246582, "learning_rate": 9.927796911881323e-06, "loss": 0.2721, "step": 1372 }, { "epoch": 0.06368274582560297, "grad_norm": 7.140837669372559, "learning_rate": 9.927672241187914e-06, "loss": 0.357, "step": 1373 }, { "epoch": 0.0637291280148423, "grad_norm": 13.992012977600098, "learning_rate": 9.927547463739086e-06, "loss": 0.3823, "step": 1374 }, { "epoch": 0.06377551020408163, "grad_norm": 6.9012956619262695, "learning_rate": 9.927422579537541e-06, "loss": 0.375, "step": 1375 }, { "epoch": 0.06382189239332096, "grad_norm": 10.22557544708252, "learning_rate": 9.927297588585984e-06, "loss": 0.4081, "step": 1376 }, { "epoch": 0.0638682745825603, "grad_norm": 8.518318176269531, "learning_rate": 9.927172490887125e-06, "loss": 0.3865, "step": 1377 }, { "epoch": 0.06391465677179962, "grad_norm": 5.773858547210693, "learning_rate": 9.927047286443673e-06, "loss": 0.33, "step": 1378 }, { "epoch": 0.06396103896103895, "grad_norm": 22.291954040527344, "learning_rate": 9.926921975258339e-06, "loss": 0.6258, "step": 1379 }, { "epoch": 0.0640074211502783, "grad_norm": 6.734996795654297, "learning_rate": 9.926796557333839e-06, "loss": 0.3423, "step": 1380 }, { "epoch": 0.06405380333951763, "grad_norm": 6.747849941253662, "learning_rate": 9.926671032672892e-06, "loss": 0.3671, "step": 1381 }, { "epoch": 0.06410018552875696, "grad_norm": 5.327066898345947, "learning_rate": 9.926545401278213e-06, "loss": 0.2996, "step": 1382 }, { "epoch": 0.06414656771799629, "grad_norm": 14.165864944458008, "learning_rate": 9.926419663152529e-06, "loss": 0.3644, "step": 1383 }, { "epoch": 0.06419294990723562, "grad_norm": 6.78801155090332, "learning_rate": 9.92629381829856e-06, "loss": 0.3934, "step": 1384 }, { "epoch": 0.06423933209647495, "grad_norm": 12.977914810180664, "learning_rate": 9.926167866719034e-06, "loss": 0.4838, "step": 1385 }, { "epoch": 0.06428571428571428, "grad_norm": 9.514668464660645, "learning_rate": 9.92604180841668e-06, "loss": 0.4457, "step": 1386 }, { "epoch": 0.06433209647495362, "grad_norm": 13.666686058044434, "learning_rate": 9.925915643394228e-06, "loss": 0.3569, "step": 1387 }, { "epoch": 0.06437847866419295, "grad_norm": 31.415159225463867, "learning_rate": 9.92578937165441e-06, "loss": 0.4558, "step": 1388 }, { "epoch": 0.06442486085343228, "grad_norm": 10.57362174987793, "learning_rate": 9.925662993199966e-06, "loss": 0.4585, "step": 1389 }, { "epoch": 0.06447124304267161, "grad_norm": 7.15994119644165, "learning_rate": 9.925536508033631e-06, "loss": 0.3791, "step": 1390 }, { "epoch": 0.06451762523191094, "grad_norm": 20.003503799438477, "learning_rate": 9.925409916158143e-06, "loss": 0.5566, "step": 1391 }, { "epoch": 0.06456400742115027, "grad_norm": 8.382892608642578, "learning_rate": 9.925283217576248e-06, "loss": 0.3852, "step": 1392 }, { "epoch": 0.0646103896103896, "grad_norm": 11.158108711242676, "learning_rate": 9.925156412290693e-06, "loss": 0.4245, "step": 1393 }, { "epoch": 0.06465677179962895, "grad_norm": 6.012226581573486, "learning_rate": 9.92502950030422e-06, "loss": 0.3516, "step": 1394 }, { "epoch": 0.06470315398886828, "grad_norm": 10.319758415222168, "learning_rate": 9.92490248161958e-06, "loss": 0.5099, "step": 1395 }, { "epoch": 0.06474953617810761, "grad_norm": 10.154341697692871, "learning_rate": 9.924775356239525e-06, "loss": 0.3491, "step": 1396 }, { "epoch": 0.06479591836734694, "grad_norm": 6.1909565925598145, "learning_rate": 9.924648124166812e-06, "loss": 0.3138, "step": 1397 }, { "epoch": 0.06484230055658627, "grad_norm": 12.077561378479004, "learning_rate": 9.924520785404191e-06, "loss": 0.306, "step": 1398 }, { "epoch": 0.0648886827458256, "grad_norm": 5.212360382080078, "learning_rate": 9.924393339954427e-06, "loss": 0.3685, "step": 1399 }, { "epoch": 0.06493506493506493, "grad_norm": 5.916830062866211, "learning_rate": 9.924265787820279e-06, "loss": 0.4001, "step": 1400 }, { "epoch": 0.06498144712430427, "grad_norm": 5.9332990646362305, "learning_rate": 9.924138129004508e-06, "loss": 0.3969, "step": 1401 }, { "epoch": 0.0650278293135436, "grad_norm": 12.20215129852295, "learning_rate": 9.924010363509884e-06, "loss": 0.4196, "step": 1402 }, { "epoch": 0.06507421150278293, "grad_norm": 9.393488883972168, "learning_rate": 9.923882491339172e-06, "loss": 0.3899, "step": 1403 }, { "epoch": 0.06512059369202226, "grad_norm": 11.696869850158691, "learning_rate": 9.92375451249514e-06, "loss": 0.4789, "step": 1404 }, { "epoch": 0.0651669758812616, "grad_norm": 12.012641906738281, "learning_rate": 9.923626426980568e-06, "loss": 0.4665, "step": 1405 }, { "epoch": 0.06521335807050092, "grad_norm": 16.967205047607422, "learning_rate": 9.923498234798223e-06, "loss": 0.4075, "step": 1406 }, { "epoch": 0.06525974025974025, "grad_norm": 8.738852500915527, "learning_rate": 9.923369935950889e-06, "loss": 0.372, "step": 1407 }, { "epoch": 0.0653061224489796, "grad_norm": 7.579010009765625, "learning_rate": 9.92324153044134e-06, "loss": 0.3769, "step": 1408 }, { "epoch": 0.06535250463821893, "grad_norm": 9.776737213134766, "learning_rate": 9.92311301827236e-06, "loss": 0.4066, "step": 1409 }, { "epoch": 0.06539888682745826, "grad_norm": 6.290853500366211, "learning_rate": 9.92298439944673e-06, "loss": 0.4241, "step": 1410 }, { "epoch": 0.06544526901669759, "grad_norm": 12.611382484436035, "learning_rate": 9.922855673967245e-06, "loss": 0.3522, "step": 1411 }, { "epoch": 0.06549165120593692, "grad_norm": 20.147926330566406, "learning_rate": 9.922726841836685e-06, "loss": 0.439, "step": 1412 }, { "epoch": 0.06553803339517625, "grad_norm": 10.097787857055664, "learning_rate": 9.922597903057847e-06, "loss": 0.462, "step": 1413 }, { "epoch": 0.06558441558441558, "grad_norm": 8.907024383544922, "learning_rate": 9.92246885763352e-06, "loss": 0.4757, "step": 1414 }, { "epoch": 0.06563079777365492, "grad_norm": 27.9068603515625, "learning_rate": 9.922339705566502e-06, "loss": 0.4411, "step": 1415 }, { "epoch": 0.06567717996289425, "grad_norm": 9.298561096191406, "learning_rate": 9.92221044685959e-06, "loss": 0.3891, "step": 1416 }, { "epoch": 0.06572356215213358, "grad_norm": 11.60869312286377, "learning_rate": 9.922081081515585e-06, "loss": 0.54, "step": 1417 }, { "epoch": 0.06576994434137291, "grad_norm": 13.095379829406738, "learning_rate": 9.921951609537291e-06, "loss": 0.5008, "step": 1418 }, { "epoch": 0.06581632653061224, "grad_norm": 11.097042083740234, "learning_rate": 9.92182203092751e-06, "loss": 0.4206, "step": 1419 }, { "epoch": 0.06586270871985157, "grad_norm": 7.749431133270264, "learning_rate": 9.92169234568905e-06, "loss": 0.3934, "step": 1420 }, { "epoch": 0.0659090909090909, "grad_norm": 5.378985404968262, "learning_rate": 9.921562553824721e-06, "loss": 0.4329, "step": 1421 }, { "epoch": 0.06595547309833025, "grad_norm": 7.7063117027282715, "learning_rate": 9.921432655337337e-06, "loss": 0.4173, "step": 1422 }, { "epoch": 0.06600185528756958, "grad_norm": 12.22278881072998, "learning_rate": 9.92130265022971e-06, "loss": 0.5109, "step": 1423 }, { "epoch": 0.06604823747680891, "grad_norm": 8.013806343078613, "learning_rate": 9.921172538504658e-06, "loss": 0.4449, "step": 1424 }, { "epoch": 0.06609461966604824, "grad_norm": 4.056331157684326, "learning_rate": 9.921042320164995e-06, "loss": 0.3317, "step": 1425 }, { "epoch": 0.06614100185528757, "grad_norm": 7.183748722076416, "learning_rate": 9.920911995213549e-06, "loss": 0.385, "step": 1426 }, { "epoch": 0.0661873840445269, "grad_norm": 7.975857257843018, "learning_rate": 9.92078156365314e-06, "loss": 0.3206, "step": 1427 }, { "epoch": 0.06623376623376623, "grad_norm": 14.13263988494873, "learning_rate": 9.920651025486592e-06, "loss": 0.3909, "step": 1428 }, { "epoch": 0.06628014842300557, "grad_norm": 18.932212829589844, "learning_rate": 9.920520380716735e-06, "loss": 0.4073, "step": 1429 }, { "epoch": 0.0663265306122449, "grad_norm": 18.734233856201172, "learning_rate": 9.9203896293464e-06, "loss": 0.4511, "step": 1430 }, { "epoch": 0.06637291280148423, "grad_norm": 8.85221004486084, "learning_rate": 9.92025877137842e-06, "loss": 0.3345, "step": 1431 }, { "epoch": 0.06641929499072356, "grad_norm": 10.716764450073242, "learning_rate": 9.920127806815627e-06, "loss": 0.4115, "step": 1432 }, { "epoch": 0.06646567717996289, "grad_norm": 9.528071403503418, "learning_rate": 9.91999673566086e-06, "loss": 0.4176, "step": 1433 }, { "epoch": 0.06651205936920222, "grad_norm": 7.039771556854248, "learning_rate": 9.91986555791696e-06, "loss": 0.3416, "step": 1434 }, { "epoch": 0.06655844155844155, "grad_norm": 7.141538619995117, "learning_rate": 9.919734273586767e-06, "loss": 0.3735, "step": 1435 }, { "epoch": 0.0666048237476809, "grad_norm": 8.129880905151367, "learning_rate": 9.919602882673127e-06, "loss": 0.5043, "step": 1436 }, { "epoch": 0.06665120593692023, "grad_norm": 12.437419891357422, "learning_rate": 9.919471385178884e-06, "loss": 0.254, "step": 1437 }, { "epoch": 0.06669758812615956, "grad_norm": 11.301762580871582, "learning_rate": 9.919339781106887e-06, "loss": 0.395, "step": 1438 }, { "epoch": 0.06674397031539889, "grad_norm": 13.796494483947754, "learning_rate": 9.919208070459992e-06, "loss": 0.3899, "step": 1439 }, { "epoch": 0.06679035250463822, "grad_norm": 24.989452362060547, "learning_rate": 9.919076253241048e-06, "loss": 0.3191, "step": 1440 }, { "epoch": 0.06683673469387755, "grad_norm": 8.446212768554688, "learning_rate": 9.918944329452909e-06, "loss": 0.3682, "step": 1441 }, { "epoch": 0.06688311688311688, "grad_norm": 9.151593208312988, "learning_rate": 9.918812299098437e-06, "loss": 0.4122, "step": 1442 }, { "epoch": 0.06692949907235622, "grad_norm": 12.929594039916992, "learning_rate": 9.91868016218049e-06, "loss": 0.545, "step": 1443 }, { "epoch": 0.06697588126159555, "grad_norm": 11.132752418518066, "learning_rate": 9.918547918701933e-06, "loss": 0.355, "step": 1444 }, { "epoch": 0.06702226345083488, "grad_norm": 11.164765357971191, "learning_rate": 9.91841556866563e-06, "loss": 0.3762, "step": 1445 }, { "epoch": 0.06706864564007421, "grad_norm": 11.814048767089844, "learning_rate": 9.918283112074444e-06, "loss": 0.3855, "step": 1446 }, { "epoch": 0.06711502782931354, "grad_norm": 7.94216251373291, "learning_rate": 9.918150548931254e-06, "loss": 0.3651, "step": 1447 }, { "epoch": 0.06716141001855287, "grad_norm": 8.36584758758545, "learning_rate": 9.918017879238922e-06, "loss": 0.382, "step": 1448 }, { "epoch": 0.0672077922077922, "grad_norm": 12.419130325317383, "learning_rate": 9.917885103000329e-06, "loss": 0.545, "step": 1449 }, { "epoch": 0.06725417439703155, "grad_norm": 9.852365493774414, "learning_rate": 9.917752220218348e-06, "loss": 0.4404, "step": 1450 }, { "epoch": 0.06730055658627088, "grad_norm": 9.991074562072754, "learning_rate": 9.91761923089586e-06, "loss": 0.4921, "step": 1451 }, { "epoch": 0.0673469387755102, "grad_norm": 7.387878894805908, "learning_rate": 9.917486135035745e-06, "loss": 0.4056, "step": 1452 }, { "epoch": 0.06739332096474954, "grad_norm": 14.026016235351562, "learning_rate": 9.917352932640886e-06, "loss": 0.4602, "step": 1453 }, { "epoch": 0.06743970315398887, "grad_norm": 6.933674335479736, "learning_rate": 9.91721962371417e-06, "loss": 0.4547, "step": 1454 }, { "epoch": 0.0674860853432282, "grad_norm": 5.38724422454834, "learning_rate": 9.917086208258483e-06, "loss": 0.3966, "step": 1455 }, { "epoch": 0.06753246753246753, "grad_norm": 6.950605392456055, "learning_rate": 9.91695268627672e-06, "loss": 0.3848, "step": 1456 }, { "epoch": 0.06757884972170687, "grad_norm": 9.132477760314941, "learning_rate": 9.916819057771767e-06, "loss": 0.4497, "step": 1457 }, { "epoch": 0.0676252319109462, "grad_norm": 7.811252117156982, "learning_rate": 9.916685322746524e-06, "loss": 0.343, "step": 1458 }, { "epoch": 0.06767161410018553, "grad_norm": 8.617566108703613, "learning_rate": 9.916551481203886e-06, "loss": 0.403, "step": 1459 }, { "epoch": 0.06771799628942486, "grad_norm": 9.373895645141602, "learning_rate": 9.916417533146754e-06, "loss": 0.4577, "step": 1460 }, { "epoch": 0.06776437847866419, "grad_norm": 5.3867573738098145, "learning_rate": 9.91628347857803e-06, "loss": 0.2562, "step": 1461 }, { "epoch": 0.06781076066790352, "grad_norm": 7.291882514953613, "learning_rate": 9.916149317500616e-06, "loss": 0.4496, "step": 1462 }, { "epoch": 0.06785714285714285, "grad_norm": 14.43814754486084, "learning_rate": 9.91601504991742e-06, "loss": 0.6133, "step": 1463 }, { "epoch": 0.0679035250463822, "grad_norm": 15.094282150268555, "learning_rate": 9.915880675831352e-06, "loss": 0.3642, "step": 1464 }, { "epoch": 0.06794990723562153, "grad_norm": 11.56502914428711, "learning_rate": 9.915746195245323e-06, "loss": 0.5523, "step": 1465 }, { "epoch": 0.06799628942486086, "grad_norm": 9.459424018859863, "learning_rate": 9.915611608162243e-06, "loss": 0.3541, "step": 1466 }, { "epoch": 0.06804267161410019, "grad_norm": 12.182523727416992, "learning_rate": 9.915476914585032e-06, "loss": 0.3343, "step": 1467 }, { "epoch": 0.06808905380333952, "grad_norm": 6.906976222991943, "learning_rate": 9.915342114516606e-06, "loss": 0.3286, "step": 1468 }, { "epoch": 0.06813543599257885, "grad_norm": 10.134421348571777, "learning_rate": 9.915207207959886e-06, "loss": 0.4203, "step": 1469 }, { "epoch": 0.06818181818181818, "grad_norm": 6.920685768127441, "learning_rate": 9.915072194917791e-06, "loss": 0.3465, "step": 1470 }, { "epoch": 0.06822820037105752, "grad_norm": 10.712104797363281, "learning_rate": 9.914937075393254e-06, "loss": 0.4817, "step": 1471 }, { "epoch": 0.06827458256029685, "grad_norm": 8.295568466186523, "learning_rate": 9.914801849389194e-06, "loss": 0.4846, "step": 1472 }, { "epoch": 0.06832096474953618, "grad_norm": 9.285951614379883, "learning_rate": 9.914666516908546e-06, "loss": 0.3246, "step": 1473 }, { "epoch": 0.06836734693877551, "grad_norm": 8.633328437805176, "learning_rate": 9.91453107795424e-06, "loss": 0.3885, "step": 1474 }, { "epoch": 0.06841372912801484, "grad_norm": 8.941401481628418, "learning_rate": 9.914395532529212e-06, "loss": 0.4084, "step": 1475 }, { "epoch": 0.06846011131725417, "grad_norm": 9.460816383361816, "learning_rate": 9.914259880636395e-06, "loss": 0.3873, "step": 1476 }, { "epoch": 0.0685064935064935, "grad_norm": 9.058016777038574, "learning_rate": 9.91412412227873e-06, "loss": 0.4704, "step": 1477 }, { "epoch": 0.06855287569573283, "grad_norm": 9.46414566040039, "learning_rate": 9.913988257459157e-06, "loss": 0.456, "step": 1478 }, { "epoch": 0.06859925788497218, "grad_norm": 5.4654154777526855, "learning_rate": 9.91385228618062e-06, "loss": 0.3266, "step": 1479 }, { "epoch": 0.0686456400742115, "grad_norm": 9.407960891723633, "learning_rate": 9.913716208446067e-06, "loss": 0.4416, "step": 1480 }, { "epoch": 0.06869202226345084, "grad_norm": 5.933557510375977, "learning_rate": 9.913580024258442e-06, "loss": 0.3576, "step": 1481 }, { "epoch": 0.06873840445269017, "grad_norm": 17.78913688659668, "learning_rate": 9.913443733620699e-06, "loss": 0.4068, "step": 1482 }, { "epoch": 0.0687847866419295, "grad_norm": 9.9055814743042, "learning_rate": 9.91330733653579e-06, "loss": 0.4981, "step": 1483 }, { "epoch": 0.06883116883116883, "grad_norm": 8.763126373291016, "learning_rate": 9.913170833006666e-06, "loss": 0.4199, "step": 1484 }, { "epoch": 0.06887755102040816, "grad_norm": 5.000636100769043, "learning_rate": 9.913034223036289e-06, "loss": 0.3664, "step": 1485 }, { "epoch": 0.0689239332096475, "grad_norm": 8.259180068969727, "learning_rate": 9.912897506627617e-06, "loss": 0.3751, "step": 1486 }, { "epoch": 0.06897031539888683, "grad_norm": 8.058727264404297, "learning_rate": 9.912760683783611e-06, "loss": 0.3246, "step": 1487 }, { "epoch": 0.06901669758812616, "grad_norm": 8.438179969787598, "learning_rate": 9.912623754507237e-06, "loss": 0.4494, "step": 1488 }, { "epoch": 0.06906307977736549, "grad_norm": 7.081662178039551, "learning_rate": 9.91248671880146e-06, "loss": 0.3979, "step": 1489 }, { "epoch": 0.06910946196660482, "grad_norm": 6.56158971786499, "learning_rate": 9.912349576669249e-06, "loss": 0.3837, "step": 1490 }, { "epoch": 0.06915584415584415, "grad_norm": 7.971704959869385, "learning_rate": 9.912212328113575e-06, "loss": 0.43, "step": 1491 }, { "epoch": 0.06920222634508348, "grad_norm": 13.761358261108398, "learning_rate": 9.912074973137413e-06, "loss": 0.2919, "step": 1492 }, { "epoch": 0.06924860853432283, "grad_norm": 6.294440269470215, "learning_rate": 9.911937511743737e-06, "loss": 0.3192, "step": 1493 }, { "epoch": 0.06929499072356216, "grad_norm": 8.141592979431152, "learning_rate": 9.911799943935527e-06, "loss": 0.4087, "step": 1494 }, { "epoch": 0.06934137291280149, "grad_norm": 5.996993064880371, "learning_rate": 9.911662269715761e-06, "loss": 0.519, "step": 1495 }, { "epoch": 0.06938775510204082, "grad_norm": 9.286766052246094, "learning_rate": 9.91152448908742e-06, "loss": 0.3841, "step": 1496 }, { "epoch": 0.06943413729128015, "grad_norm": 5.521185398101807, "learning_rate": 9.911386602053494e-06, "loss": 0.3851, "step": 1497 }, { "epoch": 0.06948051948051948, "grad_norm": 6.36549186706543, "learning_rate": 9.911248608616968e-06, "loss": 0.4854, "step": 1498 }, { "epoch": 0.0695269016697588, "grad_norm": 10.266451835632324, "learning_rate": 9.91111050878083e-06, "loss": 0.4263, "step": 1499 }, { "epoch": 0.06957328385899815, "grad_norm": 8.985712051391602, "learning_rate": 9.910972302548075e-06, "loss": 0.4184, "step": 1500 }, { "epoch": 0.06961966604823748, "grad_norm": 6.369113922119141, "learning_rate": 9.910833989921695e-06, "loss": 0.3712, "step": 1501 }, { "epoch": 0.06966604823747681, "grad_norm": 14.061965942382812, "learning_rate": 9.910695570904687e-06, "loss": 0.4341, "step": 1502 }, { "epoch": 0.06971243042671614, "grad_norm": 8.449063301086426, "learning_rate": 9.910557045500047e-06, "loss": 0.3406, "step": 1503 }, { "epoch": 0.06975881261595547, "grad_norm": 6.013739585876465, "learning_rate": 9.91041841371078e-06, "loss": 0.3647, "step": 1504 }, { "epoch": 0.0698051948051948, "grad_norm": 6.72538948059082, "learning_rate": 9.910279675539889e-06, "loss": 0.3407, "step": 1505 }, { "epoch": 0.06985157699443413, "grad_norm": 11.727437973022461, "learning_rate": 9.910140830990378e-06, "loss": 0.3819, "step": 1506 }, { "epoch": 0.06989795918367347, "grad_norm": 8.58132266998291, "learning_rate": 9.910001880065256e-06, "loss": 0.2624, "step": 1507 }, { "epoch": 0.0699443413729128, "grad_norm": 10.449089050292969, "learning_rate": 9.909862822767533e-06, "loss": 0.5223, "step": 1508 }, { "epoch": 0.06999072356215214, "grad_norm": 17.431640625, "learning_rate": 9.909723659100221e-06, "loss": 0.5834, "step": 1509 }, { "epoch": 0.07003710575139147, "grad_norm": 20.827594757080078, "learning_rate": 9.909584389066336e-06, "loss": 0.5179, "step": 1510 }, { "epoch": 0.0700834879406308, "grad_norm": 17.05020523071289, "learning_rate": 9.909445012668894e-06, "loss": 0.4933, "step": 1511 }, { "epoch": 0.07012987012987013, "grad_norm": 13.5900297164917, "learning_rate": 9.909305529910917e-06, "loss": 0.4571, "step": 1512 }, { "epoch": 0.07017625231910946, "grad_norm": 14.767382621765137, "learning_rate": 9.909165940795425e-06, "loss": 0.5792, "step": 1513 }, { "epoch": 0.0702226345083488, "grad_norm": 6.515346527099609, "learning_rate": 9.909026245325442e-06, "loss": 0.3405, "step": 1514 }, { "epoch": 0.07026901669758813, "grad_norm": 7.224216938018799, "learning_rate": 9.908886443503994e-06, "loss": 0.3287, "step": 1515 }, { "epoch": 0.07031539888682746, "grad_norm": 6.571375846862793, "learning_rate": 9.908746535334108e-06, "loss": 0.4211, "step": 1516 }, { "epoch": 0.07036178107606679, "grad_norm": 6.652623653411865, "learning_rate": 9.908606520818821e-06, "loss": 0.4177, "step": 1517 }, { "epoch": 0.07040816326530612, "grad_norm": 5.15885066986084, "learning_rate": 9.90846639996116e-06, "loss": 0.2447, "step": 1518 }, { "epoch": 0.07045454545454545, "grad_norm": 13.11173152923584, "learning_rate": 9.908326172764166e-06, "loss": 0.3603, "step": 1519 }, { "epoch": 0.07050092764378478, "grad_norm": 8.866739273071289, "learning_rate": 9.908185839230873e-06, "loss": 0.3126, "step": 1520 }, { "epoch": 0.07054730983302412, "grad_norm": 9.015592575073242, "learning_rate": 9.908045399364324e-06, "loss": 0.45, "step": 1521 }, { "epoch": 0.07059369202226345, "grad_norm": 9.790328025817871, "learning_rate": 9.907904853167558e-06, "loss": 0.343, "step": 1522 }, { "epoch": 0.07064007421150278, "grad_norm": 11.232175827026367, "learning_rate": 9.907764200643623e-06, "loss": 0.4114, "step": 1523 }, { "epoch": 0.07068645640074211, "grad_norm": 6.963201999664307, "learning_rate": 9.907623441795566e-06, "loss": 0.4265, "step": 1524 }, { "epoch": 0.07073283858998144, "grad_norm": 8.039623260498047, "learning_rate": 9.907482576626434e-06, "loss": 0.4701, "step": 1525 }, { "epoch": 0.07077922077922078, "grad_norm": 11.24545669555664, "learning_rate": 9.907341605139282e-06, "loss": 0.4431, "step": 1526 }, { "epoch": 0.0708256029684601, "grad_norm": 7.150320053100586, "learning_rate": 9.90720052733716e-06, "loss": 0.3432, "step": 1527 }, { "epoch": 0.07087198515769945, "grad_norm": 9.77342414855957, "learning_rate": 9.907059343223129e-06, "loss": 0.4888, "step": 1528 }, { "epoch": 0.07091836734693878, "grad_norm": 18.251609802246094, "learning_rate": 9.906918052800245e-06, "loss": 0.4804, "step": 1529 }, { "epoch": 0.07096474953617811, "grad_norm": 11.045990943908691, "learning_rate": 9.906776656071569e-06, "loss": 0.3583, "step": 1530 }, { "epoch": 0.07101113172541744, "grad_norm": 5.638646125793457, "learning_rate": 9.906635153040166e-06, "loss": 0.4403, "step": 1531 }, { "epoch": 0.07105751391465677, "grad_norm": 5.7549729347229, "learning_rate": 9.906493543709099e-06, "loss": 0.3973, "step": 1532 }, { "epoch": 0.0711038961038961, "grad_norm": 5.926742076873779, "learning_rate": 9.906351828081439e-06, "loss": 0.3885, "step": 1533 }, { "epoch": 0.07115027829313543, "grad_norm": 6.403658390045166, "learning_rate": 9.906210006160253e-06, "loss": 0.3582, "step": 1534 }, { "epoch": 0.07119666048237477, "grad_norm": 5.853074550628662, "learning_rate": 9.906068077948616e-06, "loss": 0.3647, "step": 1535 }, { "epoch": 0.0712430426716141, "grad_norm": 10.181866645812988, "learning_rate": 9.905926043449601e-06, "loss": 0.3504, "step": 1536 }, { "epoch": 0.07128942486085343, "grad_norm": 11.209775924682617, "learning_rate": 9.905783902666286e-06, "loss": 0.3733, "step": 1537 }, { "epoch": 0.07133580705009276, "grad_norm": 17.368999481201172, "learning_rate": 9.90564165560175e-06, "loss": 0.4284, "step": 1538 }, { "epoch": 0.0713821892393321, "grad_norm": 8.968035697937012, "learning_rate": 9.905499302259077e-06, "loss": 0.4696, "step": 1539 }, { "epoch": 0.07142857142857142, "grad_norm": 5.070900917053223, "learning_rate": 9.905356842641345e-06, "loss": 0.3364, "step": 1540 }, { "epoch": 0.07147495361781075, "grad_norm": 9.276000022888184, "learning_rate": 9.905214276751649e-06, "loss": 0.4251, "step": 1541 }, { "epoch": 0.0715213358070501, "grad_norm": 7.339794635772705, "learning_rate": 9.90507160459307e-06, "loss": 0.3857, "step": 1542 }, { "epoch": 0.07156771799628943, "grad_norm": 10.61128044128418, "learning_rate": 9.904928826168702e-06, "loss": 0.4932, "step": 1543 }, { "epoch": 0.07161410018552876, "grad_norm": 11.40589427947998, "learning_rate": 9.904785941481638e-06, "loss": 0.3222, "step": 1544 }, { "epoch": 0.07166048237476809, "grad_norm": 6.4346418380737305, "learning_rate": 9.904642950534974e-06, "loss": 0.3812, "step": 1545 }, { "epoch": 0.07170686456400742, "grad_norm": 8.487907409667969, "learning_rate": 9.904499853331808e-06, "loss": 0.3386, "step": 1546 }, { "epoch": 0.07175324675324675, "grad_norm": 6.518478870391846, "learning_rate": 9.90435664987524e-06, "loss": 0.3584, "step": 1547 }, { "epoch": 0.07179962894248608, "grad_norm": 11.609885215759277, "learning_rate": 9.904213340168369e-06, "loss": 0.4251, "step": 1548 }, { "epoch": 0.07184601113172542, "grad_norm": 12.552114486694336, "learning_rate": 9.904069924214306e-06, "loss": 0.5339, "step": 1549 }, { "epoch": 0.07189239332096475, "grad_norm": 9.770230293273926, "learning_rate": 9.903926402016153e-06, "loss": 0.4383, "step": 1550 }, { "epoch": 0.07193877551020408, "grad_norm": 10.418357849121094, "learning_rate": 9.903782773577023e-06, "loss": 0.5006, "step": 1551 }, { "epoch": 0.07198515769944341, "grad_norm": 10.9395170211792, "learning_rate": 9.903639038900023e-06, "loss": 0.3608, "step": 1552 }, { "epoch": 0.07203153988868274, "grad_norm": 7.351165294647217, "learning_rate": 9.90349519798827e-06, "loss": 0.4279, "step": 1553 }, { "epoch": 0.07207792207792207, "grad_norm": 7.841404914855957, "learning_rate": 9.903351250844882e-06, "loss": 0.4441, "step": 1554 }, { "epoch": 0.0721243042671614, "grad_norm": 8.621072769165039, "learning_rate": 9.903207197472973e-06, "loss": 0.5108, "step": 1555 }, { "epoch": 0.07217068645640075, "grad_norm": 9.334050178527832, "learning_rate": 9.903063037875667e-06, "loss": 0.3803, "step": 1556 }, { "epoch": 0.07221706864564008, "grad_norm": 7.676994800567627, "learning_rate": 9.902918772056087e-06, "loss": 0.3931, "step": 1557 }, { "epoch": 0.07226345083487941, "grad_norm": 12.775057792663574, "learning_rate": 9.902774400017357e-06, "loss": 0.4311, "step": 1558 }, { "epoch": 0.07230983302411874, "grad_norm": 10.599860191345215, "learning_rate": 9.902629921762607e-06, "loss": 0.4435, "step": 1559 }, { "epoch": 0.07235621521335807, "grad_norm": 6.8902268409729, "learning_rate": 9.902485337294965e-06, "loss": 0.3776, "step": 1560 }, { "epoch": 0.0724025974025974, "grad_norm": 7.785923480987549, "learning_rate": 9.902340646617564e-06, "loss": 0.4853, "step": 1561 }, { "epoch": 0.07244897959183673, "grad_norm": 10.217902183532715, "learning_rate": 9.902195849733537e-06, "loss": 0.401, "step": 1562 }, { "epoch": 0.07249536178107607, "grad_norm": 18.460039138793945, "learning_rate": 9.902050946646024e-06, "loss": 0.4608, "step": 1563 }, { "epoch": 0.0725417439703154, "grad_norm": 9.6320219039917, "learning_rate": 9.901905937358163e-06, "loss": 0.4431, "step": 1564 }, { "epoch": 0.07258812615955473, "grad_norm": 9.89252758026123, "learning_rate": 9.901760821873093e-06, "loss": 0.4656, "step": 1565 }, { "epoch": 0.07263450834879406, "grad_norm": 6.526942253112793, "learning_rate": 9.901615600193963e-06, "loss": 0.3603, "step": 1566 }, { "epoch": 0.0726808905380334, "grad_norm": 8.337512969970703, "learning_rate": 9.901470272323916e-06, "loss": 0.4907, "step": 1567 }, { "epoch": 0.07272727272727272, "grad_norm": 11.226006507873535, "learning_rate": 9.9013248382661e-06, "loss": 0.4678, "step": 1568 }, { "epoch": 0.07277365491651205, "grad_norm": 8.106898307800293, "learning_rate": 9.901179298023667e-06, "loss": 0.3922, "step": 1569 }, { "epoch": 0.0728200371057514, "grad_norm": 6.212292671203613, "learning_rate": 9.90103365159977e-06, "loss": 0.4168, "step": 1570 }, { "epoch": 0.07286641929499073, "grad_norm": 14.321982383728027, "learning_rate": 9.900887898997563e-06, "loss": 0.4789, "step": 1571 }, { "epoch": 0.07291280148423006, "grad_norm": 6.102934837341309, "learning_rate": 9.900742040220204e-06, "loss": 0.33, "step": 1572 }, { "epoch": 0.07295918367346939, "grad_norm": 9.807042121887207, "learning_rate": 9.900596075270855e-06, "loss": 0.5043, "step": 1573 }, { "epoch": 0.07300556586270872, "grad_norm": 6.906396865844727, "learning_rate": 9.900450004152677e-06, "loss": 0.3265, "step": 1574 }, { "epoch": 0.07305194805194805, "grad_norm": 10.470260620117188, "learning_rate": 9.900303826868834e-06, "loss": 0.386, "step": 1575 }, { "epoch": 0.07309833024118738, "grad_norm": 13.083870887756348, "learning_rate": 9.900157543422493e-06, "loss": 0.4323, "step": 1576 }, { "epoch": 0.07314471243042672, "grad_norm": 16.893251419067383, "learning_rate": 9.900011153816821e-06, "loss": 0.3061, "step": 1577 }, { "epoch": 0.07319109461966605, "grad_norm": 7.960744857788086, "learning_rate": 9.899864658054996e-06, "loss": 0.339, "step": 1578 }, { "epoch": 0.07323747680890538, "grad_norm": 9.740241050720215, "learning_rate": 9.899718056140187e-06, "loss": 0.427, "step": 1579 }, { "epoch": 0.07328385899814471, "grad_norm": 6.685577392578125, "learning_rate": 9.899571348075569e-06, "loss": 0.2719, "step": 1580 }, { "epoch": 0.07333024118738404, "grad_norm": 11.253717422485352, "learning_rate": 9.899424533864322e-06, "loss": 0.4074, "step": 1581 }, { "epoch": 0.07337662337662337, "grad_norm": 8.725791931152344, "learning_rate": 9.899277613509627e-06, "loss": 0.422, "step": 1582 }, { "epoch": 0.0734230055658627, "grad_norm": 7.77284049987793, "learning_rate": 9.899130587014666e-06, "loss": 0.3355, "step": 1583 }, { "epoch": 0.07346938775510205, "grad_norm": 15.851858139038086, "learning_rate": 9.898983454382627e-06, "loss": 0.5554, "step": 1584 }, { "epoch": 0.07351576994434138, "grad_norm": 18.274280548095703, "learning_rate": 9.898836215616693e-06, "loss": 0.5018, "step": 1585 }, { "epoch": 0.07356215213358071, "grad_norm": 12.223053932189941, "learning_rate": 9.898688870720057e-06, "loss": 0.359, "step": 1586 }, { "epoch": 0.07360853432282004, "grad_norm": 13.691314697265625, "learning_rate": 9.89854141969591e-06, "loss": 0.4932, "step": 1587 }, { "epoch": 0.07365491651205937, "grad_norm": 9.386008262634277, "learning_rate": 9.89839386254745e-06, "loss": 0.4108, "step": 1588 }, { "epoch": 0.0737012987012987, "grad_norm": 4.094961643218994, "learning_rate": 9.898246199277867e-06, "loss": 0.2912, "step": 1589 }, { "epoch": 0.07374768089053803, "grad_norm": 7.9121317863464355, "learning_rate": 9.898098429890363e-06, "loss": 0.4921, "step": 1590 }, { "epoch": 0.07379406307977737, "grad_norm": 4.8838067054748535, "learning_rate": 9.897950554388143e-06, "loss": 0.3435, "step": 1591 }, { "epoch": 0.0738404452690167, "grad_norm": 9.524635314941406, "learning_rate": 9.897802572774407e-06, "loss": 0.4286, "step": 1592 }, { "epoch": 0.07388682745825603, "grad_norm": 5.625526428222656, "learning_rate": 9.897654485052361e-06, "loss": 0.3729, "step": 1593 }, { "epoch": 0.07393320964749536, "grad_norm": 10.1749849319458, "learning_rate": 9.897506291225214e-06, "loss": 0.33, "step": 1594 }, { "epoch": 0.07397959183673469, "grad_norm": 6.833531379699707, "learning_rate": 9.897357991296175e-06, "loss": 0.3716, "step": 1595 }, { "epoch": 0.07402597402597402, "grad_norm": 11.445122718811035, "learning_rate": 9.897209585268459e-06, "loss": 0.4129, "step": 1596 }, { "epoch": 0.07407235621521335, "grad_norm": 7.901847839355469, "learning_rate": 9.897061073145282e-06, "loss": 0.2784, "step": 1597 }, { "epoch": 0.0741187384044527, "grad_norm": 4.615967273712158, "learning_rate": 9.896912454929857e-06, "loss": 0.3535, "step": 1598 }, { "epoch": 0.07416512059369203, "grad_norm": 10.042531967163086, "learning_rate": 9.89676373062541e-06, "loss": 0.4432, "step": 1599 }, { "epoch": 0.07421150278293136, "grad_norm": 19.618072509765625, "learning_rate": 9.896614900235157e-06, "loss": 0.5697, "step": 1600 }, { "epoch": 0.07425788497217069, "grad_norm": 9.040536880493164, "learning_rate": 9.896465963762326e-06, "loss": 0.4275, "step": 1601 }, { "epoch": 0.07430426716141002, "grad_norm": 6.890982151031494, "learning_rate": 9.89631692121014e-06, "loss": 0.3732, "step": 1602 }, { "epoch": 0.07435064935064935, "grad_norm": 14.203887939453125, "learning_rate": 9.896167772581833e-06, "loss": 0.4995, "step": 1603 }, { "epoch": 0.07439703153988868, "grad_norm": 8.472126007080078, "learning_rate": 9.896018517880634e-06, "loss": 0.4614, "step": 1604 }, { "epoch": 0.07444341372912801, "grad_norm": 11.721049308776855, "learning_rate": 9.895869157109775e-06, "loss": 0.4181, "step": 1605 }, { "epoch": 0.07448979591836735, "grad_norm": 15.64094066619873, "learning_rate": 9.895719690272493e-06, "loss": 0.4992, "step": 1606 }, { "epoch": 0.07453617810760668, "grad_norm": 10.629791259765625, "learning_rate": 9.895570117372026e-06, "loss": 0.4645, "step": 1607 }, { "epoch": 0.07458256029684601, "grad_norm": 8.812399864196777, "learning_rate": 9.895420438411616e-06, "loss": 0.4158, "step": 1608 }, { "epoch": 0.07462894248608534, "grad_norm": 7.919140338897705, "learning_rate": 9.895270653394501e-06, "loss": 0.3887, "step": 1609 }, { "epoch": 0.07467532467532467, "grad_norm": 12.560503005981445, "learning_rate": 9.895120762323933e-06, "loss": 0.4176, "step": 1610 }, { "epoch": 0.074721706864564, "grad_norm": 7.549661159515381, "learning_rate": 9.894970765203153e-06, "loss": 0.3184, "step": 1611 }, { "epoch": 0.07476808905380333, "grad_norm": 7.897232532501221, "learning_rate": 9.894820662035416e-06, "loss": 0.4222, "step": 1612 }, { "epoch": 0.07481447124304268, "grad_norm": 10.421616554260254, "learning_rate": 9.89467045282397e-06, "loss": 0.4519, "step": 1613 }, { "epoch": 0.074860853432282, "grad_norm": 6.681651592254639, "learning_rate": 9.894520137572069e-06, "loss": 0.4768, "step": 1614 }, { "epoch": 0.07490723562152134, "grad_norm": 7.329443454742432, "learning_rate": 9.894369716282971e-06, "loss": 0.3233, "step": 1615 }, { "epoch": 0.07495361781076067, "grad_norm": 11.575569152832031, "learning_rate": 9.894219188959935e-06, "loss": 0.3806, "step": 1616 }, { "epoch": 0.075, "grad_norm": 9.020162582397461, "learning_rate": 9.894068555606222e-06, "loss": 0.4315, "step": 1617 }, { "epoch": 0.07504638218923933, "grad_norm": 7.394064426422119, "learning_rate": 9.893917816225095e-06, "loss": 0.3008, "step": 1618 }, { "epoch": 0.07509276437847866, "grad_norm": 9.892121315002441, "learning_rate": 9.89376697081982e-06, "loss": 0.4621, "step": 1619 }, { "epoch": 0.075139146567718, "grad_norm": 8.54519271850586, "learning_rate": 9.893616019393663e-06, "loss": 0.4032, "step": 1620 }, { "epoch": 0.07518552875695733, "grad_norm": 17.036008834838867, "learning_rate": 9.8934649619499e-06, "loss": 0.5775, "step": 1621 }, { "epoch": 0.07523191094619666, "grad_norm": 5.156581878662109, "learning_rate": 9.893313798491794e-06, "loss": 0.3836, "step": 1622 }, { "epoch": 0.07527829313543599, "grad_norm": 5.186354637145996, "learning_rate": 9.893162529022631e-06, "loss": 0.3191, "step": 1623 }, { "epoch": 0.07532467532467532, "grad_norm": 5.057391166687012, "learning_rate": 9.893011153545679e-06, "loss": 0.3165, "step": 1624 }, { "epoch": 0.07537105751391465, "grad_norm": 8.095443725585938, "learning_rate": 9.892859672064224e-06, "loss": 0.4695, "step": 1625 }, { "epoch": 0.07541743970315398, "grad_norm": 8.784985542297363, "learning_rate": 9.892708084581542e-06, "loss": 0.3837, "step": 1626 }, { "epoch": 0.07546382189239333, "grad_norm": 3.9122869968414307, "learning_rate": 9.892556391100921e-06, "loss": 0.27, "step": 1627 }, { "epoch": 0.07551020408163266, "grad_norm": 9.202192306518555, "learning_rate": 9.892404591625647e-06, "loss": 0.4719, "step": 1628 }, { "epoch": 0.07555658627087199, "grad_norm": 8.467674255371094, "learning_rate": 9.892252686159007e-06, "loss": 0.3685, "step": 1629 }, { "epoch": 0.07560296846011132, "grad_norm": 8.993501663208008, "learning_rate": 9.892100674704295e-06, "loss": 0.4704, "step": 1630 }, { "epoch": 0.07564935064935065, "grad_norm": 10.673452377319336, "learning_rate": 9.891948557264801e-06, "loss": 0.4604, "step": 1631 }, { "epoch": 0.07569573283858998, "grad_norm": 9.8616361618042, "learning_rate": 9.891796333843821e-06, "loss": 0.4144, "step": 1632 }, { "epoch": 0.07574211502782931, "grad_norm": 5.546168804168701, "learning_rate": 9.891644004444654e-06, "loss": 0.3306, "step": 1633 }, { "epoch": 0.07578849721706865, "grad_norm": 9.97414779663086, "learning_rate": 9.8914915690706e-06, "loss": 0.456, "step": 1634 }, { "epoch": 0.07583487940630798, "grad_norm": 7.436406135559082, "learning_rate": 9.891339027724962e-06, "loss": 0.4225, "step": 1635 }, { "epoch": 0.07588126159554731, "grad_norm": 13.416162490844727, "learning_rate": 9.891186380411043e-06, "loss": 0.5373, "step": 1636 }, { "epoch": 0.07592764378478664, "grad_norm": 7.075533866882324, "learning_rate": 9.89103362713215e-06, "loss": 0.2787, "step": 1637 }, { "epoch": 0.07597402597402597, "grad_norm": 10.460691452026367, "learning_rate": 9.890880767891593e-06, "loss": 0.2441, "step": 1638 }, { "epoch": 0.0760204081632653, "grad_norm": 11.58005142211914, "learning_rate": 9.890727802692686e-06, "loss": 0.3291, "step": 1639 }, { "epoch": 0.07606679035250463, "grad_norm": 26.257251739501953, "learning_rate": 9.89057473153874e-06, "loss": 0.4482, "step": 1640 }, { "epoch": 0.07611317254174398, "grad_norm": 14.275768280029297, "learning_rate": 9.890421554433071e-06, "loss": 0.4343, "step": 1641 }, { "epoch": 0.0761595547309833, "grad_norm": 12.311911582946777, "learning_rate": 9.890268271379e-06, "loss": 0.4898, "step": 1642 }, { "epoch": 0.07620593692022264, "grad_norm": 9.644384384155273, "learning_rate": 9.890114882379844e-06, "loss": 0.5111, "step": 1643 }, { "epoch": 0.07625231910946197, "grad_norm": 7.115758895874023, "learning_rate": 9.889961387438932e-06, "loss": 0.3546, "step": 1644 }, { "epoch": 0.0762987012987013, "grad_norm": 11.029818534851074, "learning_rate": 9.889807786559583e-06, "loss": 0.3715, "step": 1645 }, { "epoch": 0.07634508348794063, "grad_norm": 16.3395938873291, "learning_rate": 9.889654079745125e-06, "loss": 0.5443, "step": 1646 }, { "epoch": 0.07639146567717996, "grad_norm": 7.506332874298096, "learning_rate": 9.889500266998896e-06, "loss": 0.4523, "step": 1647 }, { "epoch": 0.0764378478664193, "grad_norm": 16.62588119506836, "learning_rate": 9.889346348324218e-06, "loss": 0.4564, "step": 1648 }, { "epoch": 0.07648423005565863, "grad_norm": 6.062844276428223, "learning_rate": 9.889192323724432e-06, "loss": 0.3678, "step": 1649 }, { "epoch": 0.07653061224489796, "grad_norm": 7.317836761474609, "learning_rate": 9.889038193202874e-06, "loss": 0.268, "step": 1650 }, { "epoch": 0.07657699443413729, "grad_norm": 13.7401762008667, "learning_rate": 9.88888395676288e-06, "loss": 0.6041, "step": 1651 }, { "epoch": 0.07662337662337662, "grad_norm": 12.036697387695312, "learning_rate": 9.888729614407796e-06, "loss": 0.3767, "step": 1652 }, { "epoch": 0.07666975881261595, "grad_norm": 6.718796253204346, "learning_rate": 9.888575166140963e-06, "loss": 0.3215, "step": 1653 }, { "epoch": 0.07671614100185528, "grad_norm": 8.818647384643555, "learning_rate": 9.888420611965726e-06, "loss": 0.3696, "step": 1654 }, { "epoch": 0.07676252319109463, "grad_norm": 15.281747817993164, "learning_rate": 9.888265951885437e-06, "loss": 0.5375, "step": 1655 }, { "epoch": 0.07680890538033396, "grad_norm": 16.433876037597656, "learning_rate": 9.888111185903442e-06, "loss": 0.3838, "step": 1656 }, { "epoch": 0.07685528756957329, "grad_norm": 6.003076553344727, "learning_rate": 9.887956314023097e-06, "loss": 0.3357, "step": 1657 }, { "epoch": 0.07690166975881262, "grad_norm": 10.678915023803711, "learning_rate": 9.887801336247758e-06, "loss": 0.448, "step": 1658 }, { "epoch": 0.07694805194805195, "grad_norm": 7.918118000030518, "learning_rate": 9.88764625258078e-06, "loss": 0.4228, "step": 1659 }, { "epoch": 0.07699443413729128, "grad_norm": 8.506476402282715, "learning_rate": 9.887491063025525e-06, "loss": 0.4323, "step": 1660 }, { "epoch": 0.0770408163265306, "grad_norm": 5.888410568237305, "learning_rate": 9.887335767585353e-06, "loss": 0.4324, "step": 1661 }, { "epoch": 0.07708719851576995, "grad_norm": 7.521121978759766, "learning_rate": 9.887180366263629e-06, "loss": 0.245, "step": 1662 }, { "epoch": 0.07713358070500928, "grad_norm": 8.600191116333008, "learning_rate": 9.887024859063721e-06, "loss": 0.4142, "step": 1663 }, { "epoch": 0.07717996289424861, "grad_norm": 11.9995698928833, "learning_rate": 9.886869245988997e-06, "loss": 0.506, "step": 1664 }, { "epoch": 0.07722634508348794, "grad_norm": 9.656668663024902, "learning_rate": 9.886713527042828e-06, "loss": 0.5018, "step": 1665 }, { "epoch": 0.07727272727272727, "grad_norm": 5.051121234893799, "learning_rate": 9.886557702228588e-06, "loss": 0.2237, "step": 1666 }, { "epoch": 0.0773191094619666, "grad_norm": 13.189322471618652, "learning_rate": 9.886401771549652e-06, "loss": 0.5168, "step": 1667 }, { "epoch": 0.07736549165120593, "grad_norm": 6.240417957305908, "learning_rate": 9.8862457350094e-06, "loss": 0.4356, "step": 1668 }, { "epoch": 0.07741187384044527, "grad_norm": 9.669219017028809, "learning_rate": 9.886089592611211e-06, "loss": 0.4047, "step": 1669 }, { "epoch": 0.0774582560296846, "grad_norm": 8.205099105834961, "learning_rate": 9.885933344358468e-06, "loss": 0.4483, "step": 1670 }, { "epoch": 0.07750463821892394, "grad_norm": 9.231122970581055, "learning_rate": 9.885776990254557e-06, "loss": 0.4611, "step": 1671 }, { "epoch": 0.07755102040816327, "grad_norm": 5.954568386077881, "learning_rate": 9.885620530302865e-06, "loss": 0.3708, "step": 1672 }, { "epoch": 0.0775974025974026, "grad_norm": 8.258077621459961, "learning_rate": 9.885463964506779e-06, "loss": 0.4032, "step": 1673 }, { "epoch": 0.07764378478664193, "grad_norm": 6.189254283905029, "learning_rate": 9.885307292869693e-06, "loss": 0.3956, "step": 1674 }, { "epoch": 0.07769016697588126, "grad_norm": 9.42783260345459, "learning_rate": 9.885150515395003e-06, "loss": 0.4367, "step": 1675 }, { "epoch": 0.0777365491651206, "grad_norm": 7.5031352043151855, "learning_rate": 9.884993632086103e-06, "loss": 0.3461, "step": 1676 }, { "epoch": 0.07778293135435993, "grad_norm": 10.274894714355469, "learning_rate": 9.884836642946392e-06, "loss": 0.2999, "step": 1677 }, { "epoch": 0.07782931354359926, "grad_norm": 6.234572410583496, "learning_rate": 9.884679547979273e-06, "loss": 0.3965, "step": 1678 }, { "epoch": 0.07787569573283859, "grad_norm": 11.252989768981934, "learning_rate": 9.884522347188146e-06, "loss": 0.4652, "step": 1679 }, { "epoch": 0.07792207792207792, "grad_norm": 9.244441032409668, "learning_rate": 9.88436504057642e-06, "loss": 0.3179, "step": 1680 }, { "epoch": 0.07796846011131725, "grad_norm": 7.456929683685303, "learning_rate": 9.884207628147501e-06, "loss": 0.3983, "step": 1681 }, { "epoch": 0.07801484230055658, "grad_norm": 9.453229904174805, "learning_rate": 9.8840501099048e-06, "loss": 0.2292, "step": 1682 }, { "epoch": 0.07806122448979592, "grad_norm": 7.6068291664123535, "learning_rate": 9.883892485851729e-06, "loss": 0.3719, "step": 1683 }, { "epoch": 0.07810760667903525, "grad_norm": 17.310190200805664, "learning_rate": 9.883734755991704e-06, "loss": 0.4659, "step": 1684 }, { "epoch": 0.07815398886827458, "grad_norm": 6.569539546966553, "learning_rate": 9.883576920328142e-06, "loss": 0.3701, "step": 1685 }, { "epoch": 0.07820037105751391, "grad_norm": 12.00133228302002, "learning_rate": 9.88341897886446e-06, "loss": 0.4249, "step": 1686 }, { "epoch": 0.07824675324675325, "grad_norm": 10.125386238098145, "learning_rate": 9.883260931604084e-06, "loss": 0.4353, "step": 1687 }, { "epoch": 0.07829313543599258, "grad_norm": 7.618310451507568, "learning_rate": 9.883102778550434e-06, "loss": 0.3689, "step": 1688 }, { "epoch": 0.0783395176252319, "grad_norm": 8.175498962402344, "learning_rate": 9.882944519706938e-06, "loss": 0.3711, "step": 1689 }, { "epoch": 0.07838589981447125, "grad_norm": 6.236047267913818, "learning_rate": 9.882786155077024e-06, "loss": 0.3089, "step": 1690 }, { "epoch": 0.07843228200371058, "grad_norm": 7.573027610778809, "learning_rate": 9.882627684664126e-06, "loss": 0.3621, "step": 1691 }, { "epoch": 0.07847866419294991, "grad_norm": 14.750383377075195, "learning_rate": 9.882469108471672e-06, "loss": 0.4499, "step": 1692 }, { "epoch": 0.07852504638218924, "grad_norm": 5.78452205657959, "learning_rate": 9.8823104265031e-06, "loss": 0.3916, "step": 1693 }, { "epoch": 0.07857142857142857, "grad_norm": 17.333593368530273, "learning_rate": 9.882151638761848e-06, "loss": 0.5232, "step": 1694 }, { "epoch": 0.0786178107606679, "grad_norm": 9.169943809509277, "learning_rate": 9.881992745251357e-06, "loss": 0.4205, "step": 1695 }, { "epoch": 0.07866419294990723, "grad_norm": 4.893043041229248, "learning_rate": 9.881833745975067e-06, "loss": 0.3464, "step": 1696 }, { "epoch": 0.07871057513914657, "grad_norm": 15.998039245605469, "learning_rate": 9.881674640936424e-06, "loss": 0.432, "step": 1697 }, { "epoch": 0.0787569573283859, "grad_norm": 7.270008563995361, "learning_rate": 9.881515430138875e-06, "loss": 0.3557, "step": 1698 }, { "epoch": 0.07880333951762523, "grad_norm": 8.34329891204834, "learning_rate": 9.881356113585868e-06, "loss": 0.3603, "step": 1699 }, { "epoch": 0.07884972170686456, "grad_norm": 8.074356079101562, "learning_rate": 9.881196691280856e-06, "loss": 0.3202, "step": 1700 }, { "epoch": 0.0788961038961039, "grad_norm": 9.246953964233398, "learning_rate": 9.881037163227293e-06, "loss": 0.3848, "step": 1701 }, { "epoch": 0.07894248608534322, "grad_norm": 5.3219733238220215, "learning_rate": 9.880877529428634e-06, "loss": 0.4305, "step": 1702 }, { "epoch": 0.07898886827458255, "grad_norm": 13.218489646911621, "learning_rate": 9.880717789888337e-06, "loss": 0.4499, "step": 1703 }, { "epoch": 0.0790352504638219, "grad_norm": 5.56295108795166, "learning_rate": 9.880557944609863e-06, "loss": 0.3921, "step": 1704 }, { "epoch": 0.07908163265306123, "grad_norm": 9.116189002990723, "learning_rate": 9.880397993596677e-06, "loss": 0.4374, "step": 1705 }, { "epoch": 0.07912801484230056, "grad_norm": 7.7431206703186035, "learning_rate": 9.880237936852242e-06, "loss": 0.3345, "step": 1706 }, { "epoch": 0.07917439703153989, "grad_norm": 9.77683162689209, "learning_rate": 9.880077774380025e-06, "loss": 0.464, "step": 1707 }, { "epoch": 0.07922077922077922, "grad_norm": 5.294711112976074, "learning_rate": 9.879917506183498e-06, "loss": 0.2905, "step": 1708 }, { "epoch": 0.07926716141001855, "grad_norm": 8.508414268493652, "learning_rate": 9.879757132266132e-06, "loss": 0.5099, "step": 1709 }, { "epoch": 0.07931354359925788, "grad_norm": 12.247174263000488, "learning_rate": 9.879596652631402e-06, "loss": 0.478, "step": 1710 }, { "epoch": 0.07935992578849722, "grad_norm": 5.7554931640625, "learning_rate": 9.879436067282784e-06, "loss": 0.3359, "step": 1711 }, { "epoch": 0.07940630797773655, "grad_norm": 10.424495697021484, "learning_rate": 9.879275376223757e-06, "loss": 0.3955, "step": 1712 }, { "epoch": 0.07945269016697588, "grad_norm": 7.169469833374023, "learning_rate": 9.879114579457802e-06, "loss": 0.3989, "step": 1713 }, { "epoch": 0.07949907235621521, "grad_norm": 11.65039348602295, "learning_rate": 9.878953676988404e-06, "loss": 0.5018, "step": 1714 }, { "epoch": 0.07954545454545454, "grad_norm": 9.429436683654785, "learning_rate": 9.878792668819049e-06, "loss": 0.2901, "step": 1715 }, { "epoch": 0.07959183673469387, "grad_norm": 9.933744430541992, "learning_rate": 9.878631554953223e-06, "loss": 0.3344, "step": 1716 }, { "epoch": 0.0796382189239332, "grad_norm": 23.086565017700195, "learning_rate": 9.878470335394417e-06, "loss": 0.5726, "step": 1717 }, { "epoch": 0.07968460111317255, "grad_norm": 5.152658462524414, "learning_rate": 9.878309010146123e-06, "loss": 0.3068, "step": 1718 }, { "epoch": 0.07973098330241188, "grad_norm": 11.647034645080566, "learning_rate": 9.878147579211839e-06, "loss": 0.3976, "step": 1719 }, { "epoch": 0.07977736549165121, "grad_norm": 7.461597442626953, "learning_rate": 9.877986042595062e-06, "loss": 0.3853, "step": 1720 }, { "epoch": 0.07982374768089054, "grad_norm": 8.715690612792969, "learning_rate": 9.877824400299288e-06, "loss": 0.4232, "step": 1721 }, { "epoch": 0.07987012987012987, "grad_norm": 11.591468811035156, "learning_rate": 9.877662652328023e-06, "loss": 0.4967, "step": 1722 }, { "epoch": 0.0799165120593692, "grad_norm": 7.065646171569824, "learning_rate": 9.877500798684769e-06, "loss": 0.3901, "step": 1723 }, { "epoch": 0.07996289424860853, "grad_norm": 8.120121955871582, "learning_rate": 9.877338839373032e-06, "loss": 0.3169, "step": 1724 }, { "epoch": 0.08000927643784787, "grad_norm": 7.143196105957031, "learning_rate": 9.877176774396322e-06, "loss": 0.4006, "step": 1725 }, { "epoch": 0.0800556586270872, "grad_norm": 13.326051712036133, "learning_rate": 9.877014603758148e-06, "loss": 0.4243, "step": 1726 }, { "epoch": 0.08010204081632653, "grad_norm": 16.720069885253906, "learning_rate": 9.876852327462027e-06, "loss": 0.4566, "step": 1727 }, { "epoch": 0.08014842300556586, "grad_norm": 10.192374229431152, "learning_rate": 9.876689945511472e-06, "loss": 0.3005, "step": 1728 }, { "epoch": 0.0801948051948052, "grad_norm": 10.913646697998047, "learning_rate": 9.87652745791e-06, "loss": 0.4003, "step": 1729 }, { "epoch": 0.08024118738404452, "grad_norm": 14.520634651184082, "learning_rate": 9.876364864661135e-06, "loss": 0.3921, "step": 1730 }, { "epoch": 0.08028756957328385, "grad_norm": 10.461444854736328, "learning_rate": 9.876202165768397e-06, "loss": 0.4728, "step": 1731 }, { "epoch": 0.0803339517625232, "grad_norm": 8.826254844665527, "learning_rate": 9.87603936123531e-06, "loss": 0.3344, "step": 1732 }, { "epoch": 0.08038033395176253, "grad_norm": 7.81748628616333, "learning_rate": 9.875876451065403e-06, "loss": 0.355, "step": 1733 }, { "epoch": 0.08042671614100186, "grad_norm": 11.198054313659668, "learning_rate": 9.875713435262205e-06, "loss": 0.4262, "step": 1734 }, { "epoch": 0.08047309833024119, "grad_norm": 6.742555141448975, "learning_rate": 9.875550313829245e-06, "loss": 0.3308, "step": 1735 }, { "epoch": 0.08051948051948052, "grad_norm": 6.656951904296875, "learning_rate": 9.87538708677006e-06, "loss": 0.4385, "step": 1736 }, { "epoch": 0.08056586270871985, "grad_norm": 10.575294494628906, "learning_rate": 9.875223754088186e-06, "loss": 0.3801, "step": 1737 }, { "epoch": 0.08061224489795918, "grad_norm": 7.210186958312988, "learning_rate": 9.87506031578716e-06, "loss": 0.3118, "step": 1738 }, { "epoch": 0.08065862708719851, "grad_norm": 7.275099754333496, "learning_rate": 9.874896771870523e-06, "loss": 0.3681, "step": 1739 }, { "epoch": 0.08070500927643785, "grad_norm": 11.818473815917969, "learning_rate": 9.87473312234182e-06, "loss": 0.2601, "step": 1740 }, { "epoch": 0.08075139146567718, "grad_norm": 9.028118133544922, "learning_rate": 9.874569367204594e-06, "loss": 0.397, "step": 1741 }, { "epoch": 0.08079777365491651, "grad_norm": 4.184270858764648, "learning_rate": 9.874405506462394e-06, "loss": 0.4233, "step": 1742 }, { "epoch": 0.08084415584415584, "grad_norm": 6.471487522125244, "learning_rate": 9.87424154011877e-06, "loss": 0.4232, "step": 1743 }, { "epoch": 0.08089053803339517, "grad_norm": 5.286855220794678, "learning_rate": 9.874077468177275e-06, "loss": 0.2882, "step": 1744 }, { "epoch": 0.0809369202226345, "grad_norm": 5.909308910369873, "learning_rate": 9.873913290641459e-06, "loss": 0.3792, "step": 1745 }, { "epoch": 0.08098330241187383, "grad_norm": 7.21044397354126, "learning_rate": 9.873749007514886e-06, "loss": 0.3492, "step": 1746 }, { "epoch": 0.08102968460111318, "grad_norm": 5.204335689544678, "learning_rate": 9.87358461880111e-06, "loss": 0.2868, "step": 1747 }, { "epoch": 0.08107606679035251, "grad_norm": 10.84028148651123, "learning_rate": 9.873420124503693e-06, "loss": 0.4347, "step": 1748 }, { "epoch": 0.08112244897959184, "grad_norm": 6.8630852699279785, "learning_rate": 9.8732555246262e-06, "loss": 0.2956, "step": 1749 }, { "epoch": 0.08116883116883117, "grad_norm": 11.757412910461426, "learning_rate": 9.873090819172197e-06, "loss": 0.466, "step": 1750 }, { "epoch": 0.0812152133580705, "grad_norm": 6.050594806671143, "learning_rate": 9.872926008145251e-06, "loss": 0.3877, "step": 1751 }, { "epoch": 0.08126159554730983, "grad_norm": 14.53018856048584, "learning_rate": 9.872761091548933e-06, "loss": 0.4527, "step": 1752 }, { "epoch": 0.08130797773654916, "grad_norm": 10.327205657958984, "learning_rate": 9.872596069386816e-06, "loss": 0.4794, "step": 1753 }, { "epoch": 0.0813543599257885, "grad_norm": 9.04432201385498, "learning_rate": 9.872430941662476e-06, "loss": 0.4706, "step": 1754 }, { "epoch": 0.08140074211502783, "grad_norm": 11.605061531066895, "learning_rate": 9.872265708379491e-06, "loss": 0.39, "step": 1755 }, { "epoch": 0.08144712430426716, "grad_norm": 9.680621147155762, "learning_rate": 9.872100369541437e-06, "loss": 0.4214, "step": 1756 }, { "epoch": 0.08149350649350649, "grad_norm": 9.988808631896973, "learning_rate": 9.8719349251519e-06, "loss": 0.3012, "step": 1757 }, { "epoch": 0.08153988868274582, "grad_norm": 6.4801859855651855, "learning_rate": 9.87176937521446e-06, "loss": 0.353, "step": 1758 }, { "epoch": 0.08158627087198515, "grad_norm": 10.118090629577637, "learning_rate": 9.871603719732708e-06, "loss": 0.3442, "step": 1759 }, { "epoch": 0.08163265306122448, "grad_norm": 9.499702453613281, "learning_rate": 9.871437958710231e-06, "loss": 0.3478, "step": 1760 }, { "epoch": 0.08167903525046383, "grad_norm": 8.951160430908203, "learning_rate": 9.871272092150618e-06, "loss": 0.4358, "step": 1761 }, { "epoch": 0.08172541743970316, "grad_norm": 7.089564800262451, "learning_rate": 9.871106120057467e-06, "loss": 0.2782, "step": 1762 }, { "epoch": 0.08177179962894249, "grad_norm": 8.835188865661621, "learning_rate": 9.870940042434369e-06, "loss": 0.2921, "step": 1763 }, { "epoch": 0.08181818181818182, "grad_norm": 8.947344779968262, "learning_rate": 9.870773859284926e-06, "loss": 0.3796, "step": 1764 }, { "epoch": 0.08186456400742115, "grad_norm": 7.018903732299805, "learning_rate": 9.870607570612736e-06, "loss": 0.3678, "step": 1765 }, { "epoch": 0.08191094619666048, "grad_norm": 9.784744262695312, "learning_rate": 9.870441176421401e-06, "loss": 0.4196, "step": 1766 }, { "epoch": 0.08195732838589981, "grad_norm": 8.502982139587402, "learning_rate": 9.870274676714527e-06, "loss": 0.2536, "step": 1767 }, { "epoch": 0.08200371057513915, "grad_norm": 12.871807098388672, "learning_rate": 9.870108071495721e-06, "loss": 0.476, "step": 1768 }, { "epoch": 0.08205009276437848, "grad_norm": 8.336403846740723, "learning_rate": 9.869941360768594e-06, "loss": 0.4001, "step": 1769 }, { "epoch": 0.08209647495361781, "grad_norm": 9.37166976928711, "learning_rate": 9.869774544536754e-06, "loss": 0.4815, "step": 1770 }, { "epoch": 0.08214285714285714, "grad_norm": 13.348092079162598, "learning_rate": 9.869607622803818e-06, "loss": 0.4107, "step": 1771 }, { "epoch": 0.08218923933209647, "grad_norm": 10.180688858032227, "learning_rate": 9.8694405955734e-06, "loss": 0.3659, "step": 1772 }, { "epoch": 0.0822356215213358, "grad_norm": 8.338189125061035, "learning_rate": 9.869273462849121e-06, "loss": 0.3681, "step": 1773 }, { "epoch": 0.08228200371057513, "grad_norm": 7.038373947143555, "learning_rate": 9.869106224634601e-06, "loss": 0.455, "step": 1774 }, { "epoch": 0.08232838589981448, "grad_norm": 5.347375869750977, "learning_rate": 9.868938880933463e-06, "loss": 0.3495, "step": 1775 }, { "epoch": 0.08237476808905381, "grad_norm": 7.277772903442383, "learning_rate": 9.86877143174933e-06, "loss": 0.4682, "step": 1776 }, { "epoch": 0.08242115027829314, "grad_norm": 6.4095282554626465, "learning_rate": 9.868603877085834e-06, "loss": 0.2953, "step": 1777 }, { "epoch": 0.08246753246753247, "grad_norm": 11.589584350585938, "learning_rate": 9.868436216946604e-06, "loss": 0.4173, "step": 1778 }, { "epoch": 0.0825139146567718, "grad_norm": 13.30026912689209, "learning_rate": 9.86826845133527e-06, "loss": 0.347, "step": 1779 }, { "epoch": 0.08256029684601113, "grad_norm": 7.4309000968933105, "learning_rate": 9.868100580255466e-06, "loss": 0.3122, "step": 1780 }, { "epoch": 0.08260667903525046, "grad_norm": 9.35327434539795, "learning_rate": 9.867932603710832e-06, "loss": 0.4123, "step": 1781 }, { "epoch": 0.0826530612244898, "grad_norm": 6.186493396759033, "learning_rate": 9.867764521705006e-06, "loss": 0.3532, "step": 1782 }, { "epoch": 0.08269944341372913, "grad_norm": 15.862626075744629, "learning_rate": 9.86759633424163e-06, "loss": 0.4713, "step": 1783 }, { "epoch": 0.08274582560296846, "grad_norm": 9.53009033203125, "learning_rate": 9.867428041324345e-06, "loss": 0.3096, "step": 1784 }, { "epoch": 0.08279220779220779, "grad_norm": 14.275022506713867, "learning_rate": 9.867259642956799e-06, "loss": 0.3188, "step": 1785 }, { "epoch": 0.08283858998144712, "grad_norm": 9.62984848022461, "learning_rate": 9.86709113914264e-06, "loss": 0.3462, "step": 1786 }, { "epoch": 0.08288497217068645, "grad_norm": 12.771395683288574, "learning_rate": 9.866922529885518e-06, "loss": 0.5248, "step": 1787 }, { "epoch": 0.08293135435992578, "grad_norm": 10.383371353149414, "learning_rate": 9.866753815189089e-06, "loss": 0.3588, "step": 1788 }, { "epoch": 0.08297773654916513, "grad_norm": 11.187471389770508, "learning_rate": 9.866584995057004e-06, "loss": 0.3326, "step": 1789 }, { "epoch": 0.08302411873840446, "grad_norm": 8.634135246276855, "learning_rate": 9.86641606949292e-06, "loss": 0.3537, "step": 1790 }, { "epoch": 0.08307050092764379, "grad_norm": 11.492875099182129, "learning_rate": 9.866247038500503e-06, "loss": 0.3548, "step": 1791 }, { "epoch": 0.08311688311688312, "grad_norm": 10.23592472076416, "learning_rate": 9.866077902083408e-06, "loss": 0.4269, "step": 1792 }, { "epoch": 0.08316326530612245, "grad_norm": 7.556457996368408, "learning_rate": 9.865908660245303e-06, "loss": 0.4813, "step": 1793 }, { "epoch": 0.08320964749536178, "grad_norm": 9.981407165527344, "learning_rate": 9.865739312989851e-06, "loss": 0.4039, "step": 1794 }, { "epoch": 0.08325602968460111, "grad_norm": 7.368923664093018, "learning_rate": 9.865569860320725e-06, "loss": 0.3242, "step": 1795 }, { "epoch": 0.08330241187384045, "grad_norm": 13.818697929382324, "learning_rate": 9.865400302241593e-06, "loss": 0.488, "step": 1796 }, { "epoch": 0.08334879406307978, "grad_norm": 11.601330757141113, "learning_rate": 9.865230638756131e-06, "loss": 0.3528, "step": 1797 }, { "epoch": 0.08339517625231911, "grad_norm": 7.84343147277832, "learning_rate": 9.865060869868012e-06, "loss": 0.4181, "step": 1798 }, { "epoch": 0.08344155844155844, "grad_norm": 4.821139812469482, "learning_rate": 9.864890995580919e-06, "loss": 0.376, "step": 1799 }, { "epoch": 0.08348794063079777, "grad_norm": 8.319772720336914, "learning_rate": 9.864721015898524e-06, "loss": 0.4066, "step": 1800 }, { "epoch": 0.0835343228200371, "grad_norm": 6.094742298126221, "learning_rate": 9.864550930824516e-06, "loss": 0.3529, "step": 1801 }, { "epoch": 0.08358070500927643, "grad_norm": 5.841383934020996, "learning_rate": 9.864380740362578e-06, "loss": 0.3915, "step": 1802 }, { "epoch": 0.08362708719851578, "grad_norm": 7.089098930358887, "learning_rate": 9.864210444516396e-06, "loss": 0.3868, "step": 1803 }, { "epoch": 0.0836734693877551, "grad_norm": 6.069279193878174, "learning_rate": 9.864040043289662e-06, "loss": 0.4066, "step": 1804 }, { "epoch": 0.08371985157699444, "grad_norm": 15.142096519470215, "learning_rate": 9.863869536686063e-06, "loss": 0.3821, "step": 1805 }, { "epoch": 0.08376623376623377, "grad_norm": 7.003573417663574, "learning_rate": 9.8636989247093e-06, "loss": 0.359, "step": 1806 }, { "epoch": 0.0838126159554731, "grad_norm": 11.828981399536133, "learning_rate": 9.863528207363062e-06, "loss": 0.3849, "step": 1807 }, { "epoch": 0.08385899814471243, "grad_norm": 9.550284385681152, "learning_rate": 9.863357384651051e-06, "loss": 0.4087, "step": 1808 }, { "epoch": 0.08390538033395176, "grad_norm": 7.848781585693359, "learning_rate": 9.863186456576968e-06, "loss": 0.2882, "step": 1809 }, { "epoch": 0.0839517625231911, "grad_norm": 8.546414375305176, "learning_rate": 9.863015423144516e-06, "loss": 0.3455, "step": 1810 }, { "epoch": 0.08399814471243043, "grad_norm": 13.372135162353516, "learning_rate": 9.8628442843574e-06, "loss": 0.5664, "step": 1811 }, { "epoch": 0.08404452690166976, "grad_norm": 6.420201301574707, "learning_rate": 9.862673040219326e-06, "loss": 0.3502, "step": 1812 }, { "epoch": 0.08409090909090909, "grad_norm": 13.16835880279541, "learning_rate": 9.862501690734007e-06, "loss": 0.4911, "step": 1813 }, { "epoch": 0.08413729128014842, "grad_norm": 7.511894226074219, "learning_rate": 9.862330235905153e-06, "loss": 0.4086, "step": 1814 }, { "epoch": 0.08418367346938775, "grad_norm": 8.174295425415039, "learning_rate": 9.862158675736479e-06, "loss": 0.3542, "step": 1815 }, { "epoch": 0.08423005565862708, "grad_norm": 10.026754379272461, "learning_rate": 9.861987010231701e-06, "loss": 0.3672, "step": 1816 }, { "epoch": 0.08427643784786643, "grad_norm": 7.24601411819458, "learning_rate": 9.861815239394539e-06, "loss": 0.3606, "step": 1817 }, { "epoch": 0.08432282003710576, "grad_norm": 7.4481000900268555, "learning_rate": 9.861643363228714e-06, "loss": 0.3156, "step": 1818 }, { "epoch": 0.08436920222634509, "grad_norm": 4.773353576660156, "learning_rate": 9.861471381737952e-06, "loss": 0.3635, "step": 1819 }, { "epoch": 0.08441558441558442, "grad_norm": 10.19943618774414, "learning_rate": 9.861299294925975e-06, "loss": 0.3188, "step": 1820 }, { "epoch": 0.08446196660482375, "grad_norm": 8.475303649902344, "learning_rate": 9.861127102796513e-06, "loss": 0.284, "step": 1821 }, { "epoch": 0.08450834879406308, "grad_norm": 8.339495658874512, "learning_rate": 9.860954805353295e-06, "loss": 0.3437, "step": 1822 }, { "epoch": 0.0845547309833024, "grad_norm": 7.278757572174072, "learning_rate": 9.860782402600057e-06, "loss": 0.3904, "step": 1823 }, { "epoch": 0.08460111317254175, "grad_norm": 7.433480262756348, "learning_rate": 9.860609894540531e-06, "loss": 0.3468, "step": 1824 }, { "epoch": 0.08464749536178108, "grad_norm": 11.086435317993164, "learning_rate": 9.860437281178456e-06, "loss": 0.4054, "step": 1825 }, { "epoch": 0.08469387755102041, "grad_norm": 7.372996807098389, "learning_rate": 9.860264562517571e-06, "loss": 0.3866, "step": 1826 }, { "epoch": 0.08474025974025974, "grad_norm": 7.471354007720947, "learning_rate": 9.860091738561616e-06, "loss": 0.3261, "step": 1827 }, { "epoch": 0.08478664192949907, "grad_norm": 13.178203582763672, "learning_rate": 9.85991880931434e-06, "loss": 0.4449, "step": 1828 }, { "epoch": 0.0848330241187384, "grad_norm": 10.11679744720459, "learning_rate": 9.859745774779483e-06, "loss": 0.368, "step": 1829 }, { "epoch": 0.08487940630797773, "grad_norm": 6.046088218688965, "learning_rate": 9.8595726349608e-06, "loss": 0.3463, "step": 1830 }, { "epoch": 0.08492578849721708, "grad_norm": 7.866136074066162, "learning_rate": 9.859399389862037e-06, "loss": 0.3779, "step": 1831 }, { "epoch": 0.0849721706864564, "grad_norm": 11.087183952331543, "learning_rate": 9.85922603948695e-06, "loss": 0.2981, "step": 1832 }, { "epoch": 0.08501855287569574, "grad_norm": 7.668825149536133, "learning_rate": 9.859052583839294e-06, "loss": 0.3734, "step": 1833 }, { "epoch": 0.08506493506493507, "grad_norm": 7.043103218078613, "learning_rate": 9.858879022922826e-06, "loss": 0.3885, "step": 1834 }, { "epoch": 0.0851113172541744, "grad_norm": 5.010466575622559, "learning_rate": 9.858705356741307e-06, "loss": 0.2891, "step": 1835 }, { "epoch": 0.08515769944341373, "grad_norm": 9.545989036560059, "learning_rate": 9.8585315852985e-06, "loss": 0.4586, "step": 1836 }, { "epoch": 0.08520408163265306, "grad_norm": 16.5853271484375, "learning_rate": 9.858357708598168e-06, "loss": 0.4812, "step": 1837 }, { "epoch": 0.0852504638218924, "grad_norm": 12.90611743927002, "learning_rate": 9.85818372664408e-06, "loss": 0.4729, "step": 1838 }, { "epoch": 0.08529684601113173, "grad_norm": 8.039264678955078, "learning_rate": 9.858009639440003e-06, "loss": 0.3875, "step": 1839 }, { "epoch": 0.08534322820037106, "grad_norm": 13.777863502502441, "learning_rate": 9.857835446989708e-06, "loss": 0.4906, "step": 1840 }, { "epoch": 0.08538961038961039, "grad_norm": 10.801657676696777, "learning_rate": 9.857661149296972e-06, "loss": 0.4143, "step": 1841 }, { "epoch": 0.08543599257884972, "grad_norm": 6.844780445098877, "learning_rate": 9.857486746365568e-06, "loss": 0.4079, "step": 1842 }, { "epoch": 0.08548237476808905, "grad_norm": 11.02305793762207, "learning_rate": 9.857312238199277e-06, "loss": 0.5118, "step": 1843 }, { "epoch": 0.08552875695732838, "grad_norm": 15.785171508789062, "learning_rate": 9.857137624801879e-06, "loss": 0.3779, "step": 1844 }, { "epoch": 0.08557513914656772, "grad_norm": 12.600257873535156, "learning_rate": 9.856962906177155e-06, "loss": 0.3954, "step": 1845 }, { "epoch": 0.08562152133580705, "grad_norm": 6.028163909912109, "learning_rate": 9.856788082328893e-06, "loss": 0.3393, "step": 1846 }, { "epoch": 0.08566790352504638, "grad_norm": 10.171009063720703, "learning_rate": 9.856613153260876e-06, "loss": 0.4249, "step": 1847 }, { "epoch": 0.08571428571428572, "grad_norm": 8.192780494689941, "learning_rate": 9.856438118976899e-06, "loss": 0.2885, "step": 1848 }, { "epoch": 0.08576066790352505, "grad_norm": 7.207990646362305, "learning_rate": 9.85626297948075e-06, "loss": 0.49, "step": 1849 }, { "epoch": 0.08580705009276438, "grad_norm": 7.028314590454102, "learning_rate": 9.856087734776226e-06, "loss": 0.4534, "step": 1850 }, { "epoch": 0.0858534322820037, "grad_norm": 7.435530662536621, "learning_rate": 9.855912384867122e-06, "loss": 0.2488, "step": 1851 }, { "epoch": 0.08589981447124305, "grad_norm": 5.13930082321167, "learning_rate": 9.855736929757237e-06, "loss": 0.375, "step": 1852 }, { "epoch": 0.08594619666048238, "grad_norm": 8.346001625061035, "learning_rate": 9.855561369450375e-06, "loss": 0.4008, "step": 1853 }, { "epoch": 0.08599257884972171, "grad_norm": 5.841650485992432, "learning_rate": 9.855385703950335e-06, "loss": 0.3279, "step": 1854 }, { "epoch": 0.08603896103896104, "grad_norm": 4.700623512268066, "learning_rate": 9.855209933260925e-06, "loss": 0.231, "step": 1855 }, { "epoch": 0.08608534322820037, "grad_norm": 11.449919700622559, "learning_rate": 9.855034057385953e-06, "loss": 0.3846, "step": 1856 }, { "epoch": 0.0861317254174397, "grad_norm": 10.277853012084961, "learning_rate": 9.854858076329228e-06, "loss": 0.3059, "step": 1857 }, { "epoch": 0.08617810760667903, "grad_norm": 8.44060230255127, "learning_rate": 9.854681990094563e-06, "loss": 0.4639, "step": 1858 }, { "epoch": 0.08622448979591837, "grad_norm": 8.64978313446045, "learning_rate": 9.854505798685774e-06, "loss": 0.399, "step": 1859 }, { "epoch": 0.0862708719851577, "grad_norm": 5.616610050201416, "learning_rate": 9.854329502106678e-06, "loss": 0.277, "step": 1860 }, { "epoch": 0.08631725417439703, "grad_norm": 12.171481132507324, "learning_rate": 9.854153100361095e-06, "loss": 0.4287, "step": 1861 }, { "epoch": 0.08636363636363636, "grad_norm": 14.953989028930664, "learning_rate": 9.853976593452843e-06, "loss": 0.5206, "step": 1862 }, { "epoch": 0.0864100185528757, "grad_norm": 22.584941864013672, "learning_rate": 9.853799981385748e-06, "loss": 0.458, "step": 1863 }, { "epoch": 0.08645640074211502, "grad_norm": 8.940023422241211, "learning_rate": 9.853623264163638e-06, "loss": 0.3088, "step": 1864 }, { "epoch": 0.08650278293135436, "grad_norm": 8.25175952911377, "learning_rate": 9.853446441790339e-06, "loss": 0.3199, "step": 1865 }, { "epoch": 0.08654916512059369, "grad_norm": 7.382426738739014, "learning_rate": 9.853269514269684e-06, "loss": 0.3406, "step": 1866 }, { "epoch": 0.08659554730983303, "grad_norm": 8.103839874267578, "learning_rate": 9.853092481605504e-06, "loss": 0.3716, "step": 1867 }, { "epoch": 0.08664192949907236, "grad_norm": 8.459081649780273, "learning_rate": 9.852915343801635e-06, "loss": 0.4337, "step": 1868 }, { "epoch": 0.08668831168831169, "grad_norm": 9.259143829345703, "learning_rate": 9.852738100861916e-06, "loss": 0.4273, "step": 1869 }, { "epoch": 0.08673469387755102, "grad_norm": 9.290444374084473, "learning_rate": 9.852560752790183e-06, "loss": 0.2666, "step": 1870 }, { "epoch": 0.08678107606679035, "grad_norm": 11.055408477783203, "learning_rate": 9.852383299590284e-06, "loss": 0.4996, "step": 1871 }, { "epoch": 0.08682745825602968, "grad_norm": 11.169453620910645, "learning_rate": 9.852205741266058e-06, "loss": 0.4033, "step": 1872 }, { "epoch": 0.08687384044526901, "grad_norm": 12.41319751739502, "learning_rate": 9.852028077821353e-06, "loss": 0.5627, "step": 1873 }, { "epoch": 0.08692022263450835, "grad_norm": 8.773523330688477, "learning_rate": 9.851850309260021e-06, "loss": 0.3947, "step": 1874 }, { "epoch": 0.08696660482374768, "grad_norm": 9.011260986328125, "learning_rate": 9.85167243558591e-06, "loss": 0.3385, "step": 1875 }, { "epoch": 0.08701298701298701, "grad_norm": 9.317134857177734, "learning_rate": 9.851494456802875e-06, "loss": 0.4461, "step": 1876 }, { "epoch": 0.08705936920222634, "grad_norm": 12.981162071228027, "learning_rate": 9.851316372914772e-06, "loss": 0.5028, "step": 1877 }, { "epoch": 0.08710575139146567, "grad_norm": 7.833569049835205, "learning_rate": 9.85113818392546e-06, "loss": 0.3384, "step": 1878 }, { "epoch": 0.087152133580705, "grad_norm": 34.2563362121582, "learning_rate": 9.850959889838795e-06, "loss": 0.4116, "step": 1879 }, { "epoch": 0.08719851576994433, "grad_norm": 8.212505340576172, "learning_rate": 9.850781490658643e-06, "loss": 0.4345, "step": 1880 }, { "epoch": 0.08724489795918368, "grad_norm": 13.00154972076416, "learning_rate": 9.85060298638887e-06, "loss": 0.4291, "step": 1881 }, { "epoch": 0.08729128014842301, "grad_norm": 11.033926010131836, "learning_rate": 9.85042437703334e-06, "loss": 0.406, "step": 1882 }, { "epoch": 0.08733766233766234, "grad_norm": 10.034682273864746, "learning_rate": 9.850245662595925e-06, "loss": 0.4459, "step": 1883 }, { "epoch": 0.08738404452690167, "grad_norm": 8.333170890808105, "learning_rate": 9.850066843080496e-06, "loss": 0.4271, "step": 1884 }, { "epoch": 0.087430426716141, "grad_norm": 11.421411514282227, "learning_rate": 9.849887918490928e-06, "loss": 0.4614, "step": 1885 }, { "epoch": 0.08747680890538033, "grad_norm": 9.21015739440918, "learning_rate": 9.849708888831096e-06, "loss": 0.3993, "step": 1886 }, { "epoch": 0.08752319109461966, "grad_norm": 7.925516128540039, "learning_rate": 9.849529754104877e-06, "loss": 0.3889, "step": 1887 }, { "epoch": 0.087569573283859, "grad_norm": 19.284997940063477, "learning_rate": 9.849350514316157e-06, "loss": 0.4378, "step": 1888 }, { "epoch": 0.08761595547309833, "grad_norm": 4.764822483062744, "learning_rate": 9.849171169468812e-06, "loss": 0.4426, "step": 1889 }, { "epoch": 0.08766233766233766, "grad_norm": 8.024073600769043, "learning_rate": 9.848991719566734e-06, "loss": 0.3575, "step": 1890 }, { "epoch": 0.087708719851577, "grad_norm": 5.839965343475342, "learning_rate": 9.848812164613805e-06, "loss": 0.3418, "step": 1891 }, { "epoch": 0.08775510204081632, "grad_norm": 5.79045295715332, "learning_rate": 9.848632504613922e-06, "loss": 0.415, "step": 1892 }, { "epoch": 0.08780148423005565, "grad_norm": 7.690230846405029, "learning_rate": 9.848452739570971e-06, "loss": 0.4468, "step": 1893 }, { "epoch": 0.08784786641929498, "grad_norm": 7.6489739418029785, "learning_rate": 9.848272869488848e-06, "loss": 0.3646, "step": 1894 }, { "epoch": 0.08789424860853433, "grad_norm": 14.664509773254395, "learning_rate": 9.848092894371453e-06, "loss": 0.4113, "step": 1895 }, { "epoch": 0.08794063079777366, "grad_norm": 7.361799716949463, "learning_rate": 9.84791281422268e-06, "loss": 0.3516, "step": 1896 }, { "epoch": 0.08798701298701299, "grad_norm": 6.445760250091553, "learning_rate": 9.847732629046433e-06, "loss": 0.3735, "step": 1897 }, { "epoch": 0.08803339517625232, "grad_norm": 5.813187122344971, "learning_rate": 9.847552338846617e-06, "loss": 0.2525, "step": 1898 }, { "epoch": 0.08807977736549165, "grad_norm": 5.510781764984131, "learning_rate": 9.847371943627135e-06, "loss": 0.2801, "step": 1899 }, { "epoch": 0.08812615955473098, "grad_norm": 8.796648025512695, "learning_rate": 9.847191443391898e-06, "loss": 0.4247, "step": 1900 }, { "epoch": 0.08817254174397031, "grad_norm": 21.16259765625, "learning_rate": 9.847010838144815e-06, "loss": 0.5352, "step": 1901 }, { "epoch": 0.08821892393320965, "grad_norm": 7.421379566192627, "learning_rate": 9.846830127889797e-06, "loss": 0.3585, "step": 1902 }, { "epoch": 0.08826530612244898, "grad_norm": 12.908018112182617, "learning_rate": 9.846649312630763e-06, "loss": 0.4708, "step": 1903 }, { "epoch": 0.08831168831168831, "grad_norm": 10.249358177185059, "learning_rate": 9.846468392371627e-06, "loss": 0.3937, "step": 1904 }, { "epoch": 0.08835807050092764, "grad_norm": 5.895416259765625, "learning_rate": 9.846287367116307e-06, "loss": 0.2916, "step": 1905 }, { "epoch": 0.08840445269016697, "grad_norm": 5.877336502075195, "learning_rate": 9.84610623686873e-06, "loss": 0.3171, "step": 1906 }, { "epoch": 0.0884508348794063, "grad_norm": 10.51580810546875, "learning_rate": 9.845925001632817e-06, "loss": 0.248, "step": 1907 }, { "epoch": 0.08849721706864563, "grad_norm": 10.722336769104004, "learning_rate": 9.845743661412494e-06, "loss": 0.3184, "step": 1908 }, { "epoch": 0.08854359925788498, "grad_norm": 9.797179222106934, "learning_rate": 9.845562216211693e-06, "loss": 0.2598, "step": 1909 }, { "epoch": 0.08858998144712431, "grad_norm": 7.832176685333252, "learning_rate": 9.84538066603434e-06, "loss": 0.3041, "step": 1910 }, { "epoch": 0.08863636363636364, "grad_norm": 9.054018020629883, "learning_rate": 9.845199010884372e-06, "loss": 0.4383, "step": 1911 }, { "epoch": 0.08868274582560297, "grad_norm": 7.224438667297363, "learning_rate": 9.845017250765721e-06, "loss": 0.4117, "step": 1912 }, { "epoch": 0.0887291280148423, "grad_norm": 12.300139427185059, "learning_rate": 9.84483538568233e-06, "loss": 0.409, "step": 1913 }, { "epoch": 0.08877551020408163, "grad_norm": 15.150297164916992, "learning_rate": 9.844653415638133e-06, "loss": 0.3446, "step": 1914 }, { "epoch": 0.08882189239332096, "grad_norm": 7.483668327331543, "learning_rate": 9.844471340637074e-06, "loss": 0.3249, "step": 1915 }, { "epoch": 0.0888682745825603, "grad_norm": 20.93041229248047, "learning_rate": 9.8442891606831e-06, "loss": 0.4189, "step": 1916 }, { "epoch": 0.08891465677179963, "grad_norm": 7.060400009155273, "learning_rate": 9.844106875780157e-06, "loss": 0.3258, "step": 1917 }, { "epoch": 0.08896103896103896, "grad_norm": 3.9635443687438965, "learning_rate": 9.843924485932195e-06, "loss": 0.2374, "step": 1918 }, { "epoch": 0.08900742115027829, "grad_norm": 7.10048770904541, "learning_rate": 9.843741991143161e-06, "loss": 0.3682, "step": 1919 }, { "epoch": 0.08905380333951762, "grad_norm": 6.547480583190918, "learning_rate": 9.843559391417013e-06, "loss": 0.3748, "step": 1920 }, { "epoch": 0.08910018552875695, "grad_norm": 6.7112135887146, "learning_rate": 9.843376686757706e-06, "loss": 0.3844, "step": 1921 }, { "epoch": 0.08914656771799628, "grad_norm": 10.655341148376465, "learning_rate": 9.843193877169195e-06, "loss": 0.3375, "step": 1922 }, { "epoch": 0.08919294990723563, "grad_norm": 6.337501049041748, "learning_rate": 9.843010962655446e-06, "loss": 0.4097, "step": 1923 }, { "epoch": 0.08923933209647496, "grad_norm": 5.333899974822998, "learning_rate": 9.842827943220419e-06, "loss": 0.3051, "step": 1924 }, { "epoch": 0.08928571428571429, "grad_norm": 12.84621810913086, "learning_rate": 9.842644818868077e-06, "loss": 0.4696, "step": 1925 }, { "epoch": 0.08933209647495362, "grad_norm": 9.627644538879395, "learning_rate": 9.84246158960239e-06, "loss": 0.3289, "step": 1926 }, { "epoch": 0.08937847866419295, "grad_norm": 7.510189056396484, "learning_rate": 9.842278255427327e-06, "loss": 0.3291, "step": 1927 }, { "epoch": 0.08942486085343228, "grad_norm": 11.53294563293457, "learning_rate": 9.84209481634686e-06, "loss": 0.3674, "step": 1928 }, { "epoch": 0.08947124304267161, "grad_norm": 10.82751178741455, "learning_rate": 9.841911272364962e-06, "loss": 0.3132, "step": 1929 }, { "epoch": 0.08951762523191095, "grad_norm": 6.540507793426514, "learning_rate": 9.841727623485611e-06, "loss": 0.432, "step": 1930 }, { "epoch": 0.08956400742115028, "grad_norm": 14.086009979248047, "learning_rate": 9.841543869712784e-06, "loss": 0.3806, "step": 1931 }, { "epoch": 0.08961038961038961, "grad_norm": 7.437479496002197, "learning_rate": 9.841360011050462e-06, "loss": 0.3938, "step": 1932 }, { "epoch": 0.08965677179962894, "grad_norm": 6.364312648773193, "learning_rate": 9.84117604750263e-06, "loss": 0.3946, "step": 1933 }, { "epoch": 0.08970315398886827, "grad_norm": 9.357587814331055, "learning_rate": 9.840991979073273e-06, "loss": 0.3299, "step": 1934 }, { "epoch": 0.0897495361781076, "grad_norm": 5.177455425262451, "learning_rate": 9.840807805766378e-06, "loss": 0.3666, "step": 1935 }, { "epoch": 0.08979591836734693, "grad_norm": 11.000337600708008, "learning_rate": 9.840623527585933e-06, "loss": 0.3838, "step": 1936 }, { "epoch": 0.08984230055658628, "grad_norm": 11.332047462463379, "learning_rate": 9.840439144535935e-06, "loss": 0.4436, "step": 1937 }, { "epoch": 0.08988868274582561, "grad_norm": 8.897135734558105, "learning_rate": 9.840254656620375e-06, "loss": 0.3188, "step": 1938 }, { "epoch": 0.08993506493506494, "grad_norm": 13.059622764587402, "learning_rate": 9.84007006384325e-06, "loss": 0.4717, "step": 1939 }, { "epoch": 0.08998144712430427, "grad_norm": 12.717272758483887, "learning_rate": 9.839885366208562e-06, "loss": 0.4078, "step": 1940 }, { "epoch": 0.0900278293135436, "grad_norm": 8.961374282836914, "learning_rate": 9.83970056372031e-06, "loss": 0.4197, "step": 1941 }, { "epoch": 0.09007421150278293, "grad_norm": 9.92188549041748, "learning_rate": 9.839515656382494e-06, "loss": 0.363, "step": 1942 }, { "epoch": 0.09012059369202226, "grad_norm": 7.837194442749023, "learning_rate": 9.839330644199127e-06, "loss": 0.3606, "step": 1943 }, { "epoch": 0.0901669758812616, "grad_norm": 11.882426261901855, "learning_rate": 9.839145527174216e-06, "loss": 0.4182, "step": 1944 }, { "epoch": 0.09021335807050093, "grad_norm": 7.253396511077881, "learning_rate": 9.838960305311766e-06, "loss": 0.3886, "step": 1945 }, { "epoch": 0.09025974025974026, "grad_norm": 13.487602233886719, "learning_rate": 9.838774978615796e-06, "loss": 0.5483, "step": 1946 }, { "epoch": 0.09030612244897959, "grad_norm": 6.337312698364258, "learning_rate": 9.838589547090316e-06, "loss": 0.4129, "step": 1947 }, { "epoch": 0.09035250463821892, "grad_norm": 14.879129409790039, "learning_rate": 9.838404010739346e-06, "loss": 0.544, "step": 1948 }, { "epoch": 0.09039888682745825, "grad_norm": 9.309020042419434, "learning_rate": 9.838218369566908e-06, "loss": 0.3365, "step": 1949 }, { "epoch": 0.09044526901669758, "grad_norm": 8.349298477172852, "learning_rate": 9.838032623577017e-06, "loss": 0.3233, "step": 1950 }, { "epoch": 0.09049165120593693, "grad_norm": 5.795839309692383, "learning_rate": 9.837846772773703e-06, "loss": 0.4362, "step": 1951 }, { "epoch": 0.09053803339517626, "grad_norm": 8.457387924194336, "learning_rate": 9.83766081716099e-06, "loss": 0.408, "step": 1952 }, { "epoch": 0.09058441558441559, "grad_norm": 9.248271942138672, "learning_rate": 9.837474756742907e-06, "loss": 0.4064, "step": 1953 }, { "epoch": 0.09063079777365492, "grad_norm": 7.735722541809082, "learning_rate": 9.837288591523485e-06, "loss": 0.4042, "step": 1954 }, { "epoch": 0.09067717996289425, "grad_norm": 5.9172682762146, "learning_rate": 9.837102321506757e-06, "loss": 0.295, "step": 1955 }, { "epoch": 0.09072356215213358, "grad_norm": 7.102360248565674, "learning_rate": 9.83691594669676e-06, "loss": 0.4117, "step": 1956 }, { "epoch": 0.09076994434137291, "grad_norm": 18.786222457885742, "learning_rate": 9.836729467097528e-06, "loss": 0.4474, "step": 1957 }, { "epoch": 0.09081632653061225, "grad_norm": 8.701238632202148, "learning_rate": 9.836542882713102e-06, "loss": 0.354, "step": 1958 }, { "epoch": 0.09086270871985158, "grad_norm": 6.769214153289795, "learning_rate": 9.836356193547529e-06, "loss": 0.3865, "step": 1959 }, { "epoch": 0.09090909090909091, "grad_norm": 13.220824241638184, "learning_rate": 9.836169399604846e-06, "loss": 0.4577, "step": 1960 }, { "epoch": 0.09095547309833024, "grad_norm": 7.688075065612793, "learning_rate": 9.835982500889108e-06, "loss": 0.4747, "step": 1961 }, { "epoch": 0.09100185528756957, "grad_norm": 6.149162292480469, "learning_rate": 9.835795497404357e-06, "loss": 0.3629, "step": 1962 }, { "epoch": 0.0910482374768089, "grad_norm": 11.384419441223145, "learning_rate": 9.835608389154649e-06, "loss": 0.4923, "step": 1963 }, { "epoch": 0.09109461966604823, "grad_norm": 13.973649024963379, "learning_rate": 9.835421176144035e-06, "loss": 0.4536, "step": 1964 }, { "epoch": 0.09114100185528758, "grad_norm": 9.537012100219727, "learning_rate": 9.835233858376572e-06, "loss": 0.4559, "step": 1965 }, { "epoch": 0.0911873840445269, "grad_norm": 6.886740684509277, "learning_rate": 9.835046435856316e-06, "loss": 0.4087, "step": 1966 }, { "epoch": 0.09123376623376624, "grad_norm": 7.324423789978027, "learning_rate": 9.834858908587331e-06, "loss": 0.3763, "step": 1967 }, { "epoch": 0.09128014842300557, "grad_norm": 6.148913860321045, "learning_rate": 9.834671276573677e-06, "loss": 0.3654, "step": 1968 }, { "epoch": 0.0913265306122449, "grad_norm": 7.243524551391602, "learning_rate": 9.83448353981942e-06, "loss": 0.4664, "step": 1969 }, { "epoch": 0.09137291280148423, "grad_norm": 11.61533260345459, "learning_rate": 9.834295698328627e-06, "loss": 0.4375, "step": 1970 }, { "epoch": 0.09141929499072356, "grad_norm": 12.312883377075195, "learning_rate": 9.834107752105369e-06, "loss": 0.3673, "step": 1971 }, { "epoch": 0.0914656771799629, "grad_norm": 16.153200149536133, "learning_rate": 9.833919701153714e-06, "loss": 0.4249, "step": 1972 }, { "epoch": 0.09151205936920223, "grad_norm": 4.956932544708252, "learning_rate": 9.83373154547774e-06, "loss": 0.3996, "step": 1973 }, { "epoch": 0.09155844155844156, "grad_norm": 6.867321491241455, "learning_rate": 9.833543285081521e-06, "loss": 0.4009, "step": 1974 }, { "epoch": 0.09160482374768089, "grad_norm": 9.786941528320312, "learning_rate": 9.833354919969135e-06, "loss": 0.4036, "step": 1975 }, { "epoch": 0.09165120593692022, "grad_norm": 9.24079704284668, "learning_rate": 9.833166450144665e-06, "loss": 0.3516, "step": 1976 }, { "epoch": 0.09169758812615955, "grad_norm": 9.225930213928223, "learning_rate": 9.832977875612192e-06, "loss": 0.3515, "step": 1977 }, { "epoch": 0.09174397031539888, "grad_norm": 15.521783828735352, "learning_rate": 9.832789196375805e-06, "loss": 0.6408, "step": 1978 }, { "epoch": 0.09179035250463823, "grad_norm": 8.393463134765625, "learning_rate": 9.832600412439588e-06, "loss": 0.2528, "step": 1979 }, { "epoch": 0.09183673469387756, "grad_norm": 11.163141250610352, "learning_rate": 9.832411523807632e-06, "loss": 0.3783, "step": 1980 }, { "epoch": 0.09188311688311689, "grad_norm": 32.3309211730957, "learning_rate": 9.832222530484028e-06, "loss": 0.4176, "step": 1981 }, { "epoch": 0.09192949907235622, "grad_norm": 7.487981796264648, "learning_rate": 9.832033432472872e-06, "loss": 0.3648, "step": 1982 }, { "epoch": 0.09197588126159555, "grad_norm": 11.28844928741455, "learning_rate": 9.831844229778261e-06, "loss": 0.4281, "step": 1983 }, { "epoch": 0.09202226345083488, "grad_norm": 6.801535129547119, "learning_rate": 9.831654922404294e-06, "loss": 0.3634, "step": 1984 }, { "epoch": 0.0920686456400742, "grad_norm": 5.5602707862854, "learning_rate": 9.831465510355069e-06, "loss": 0.3221, "step": 1985 }, { "epoch": 0.09211502782931355, "grad_norm": 6.341254711151123, "learning_rate": 9.831275993634694e-06, "loss": 0.354, "step": 1986 }, { "epoch": 0.09216141001855288, "grad_norm": 7.621610641479492, "learning_rate": 9.831086372247273e-06, "loss": 0.3554, "step": 1987 }, { "epoch": 0.09220779220779221, "grad_norm": 8.644728660583496, "learning_rate": 9.830896646196912e-06, "loss": 0.4347, "step": 1988 }, { "epoch": 0.09225417439703154, "grad_norm": 12.000982284545898, "learning_rate": 9.830706815487725e-06, "loss": 0.4228, "step": 1989 }, { "epoch": 0.09230055658627087, "grad_norm": 5.2126359939575195, "learning_rate": 9.830516880123823e-06, "loss": 0.2788, "step": 1990 }, { "epoch": 0.0923469387755102, "grad_norm": 11.282500267028809, "learning_rate": 9.83032684010932e-06, "loss": 0.5198, "step": 1991 }, { "epoch": 0.09239332096474953, "grad_norm": 7.472198009490967, "learning_rate": 9.830136695448334e-06, "loss": 0.351, "step": 1992 }, { "epoch": 0.09243970315398886, "grad_norm": 7.174782752990723, "learning_rate": 9.829946446144984e-06, "loss": 0.3269, "step": 1993 }, { "epoch": 0.0924860853432282, "grad_norm": 12.818531036376953, "learning_rate": 9.82975609220339e-06, "loss": 0.426, "step": 1994 }, { "epoch": 0.09253246753246754, "grad_norm": 9.209427833557129, "learning_rate": 9.82956563362768e-06, "loss": 0.2371, "step": 1995 }, { "epoch": 0.09257884972170687, "grad_norm": 8.591608047485352, "learning_rate": 9.829375070421978e-06, "loss": 0.5024, "step": 1996 }, { "epoch": 0.0926252319109462, "grad_norm": 7.510241985321045, "learning_rate": 9.82918440259041e-06, "loss": 0.3518, "step": 1997 }, { "epoch": 0.09267161410018553, "grad_norm": 5.795971870422363, "learning_rate": 9.828993630137111e-06, "loss": 0.3856, "step": 1998 }, { "epoch": 0.09271799628942486, "grad_norm": 4.182537078857422, "learning_rate": 9.82880275306621e-06, "loss": 0.3556, "step": 1999 }, { "epoch": 0.09276437847866419, "grad_norm": 6.836847305297852, "learning_rate": 9.828611771381847e-06, "loss": 0.3265, "step": 2000 }, { "epoch": 0.09281076066790353, "grad_norm": 12.201886177062988, "learning_rate": 9.828420685088156e-06, "loss": 0.4181, "step": 2001 }, { "epoch": 0.09285714285714286, "grad_norm": 11.512421607971191, "learning_rate": 9.828229494189277e-06, "loss": 0.5087, "step": 2002 }, { "epoch": 0.09290352504638219, "grad_norm": 9.009681701660156, "learning_rate": 9.828038198689354e-06, "loss": 0.2878, "step": 2003 }, { "epoch": 0.09294990723562152, "grad_norm": 16.73767852783203, "learning_rate": 9.827846798592529e-06, "loss": 0.3856, "step": 2004 }, { "epoch": 0.09299628942486085, "grad_norm": 5.037207126617432, "learning_rate": 9.82765529390295e-06, "loss": 0.3216, "step": 2005 }, { "epoch": 0.09304267161410018, "grad_norm": 13.205702781677246, "learning_rate": 9.827463684624763e-06, "loss": 0.5185, "step": 2006 }, { "epoch": 0.09308905380333951, "grad_norm": 10.732611656188965, "learning_rate": 9.827271970762123e-06, "loss": 0.3417, "step": 2007 }, { "epoch": 0.09313543599257885, "grad_norm": 5.0009565353393555, "learning_rate": 9.827080152319182e-06, "loss": 0.3548, "step": 2008 }, { "epoch": 0.09318181818181819, "grad_norm": 11.470600128173828, "learning_rate": 9.826888229300096e-06, "loss": 0.3387, "step": 2009 }, { "epoch": 0.09322820037105752, "grad_norm": 8.298450469970703, "learning_rate": 9.826696201709022e-06, "loss": 0.3564, "step": 2010 }, { "epoch": 0.09327458256029685, "grad_norm": 9.412457466125488, "learning_rate": 9.82650406955012e-06, "loss": 0.3298, "step": 2011 }, { "epoch": 0.09332096474953618, "grad_norm": 6.91060209274292, "learning_rate": 9.826311832827554e-06, "loss": 0.3578, "step": 2012 }, { "epoch": 0.0933673469387755, "grad_norm": 8.208511352539062, "learning_rate": 9.826119491545487e-06, "loss": 0.3257, "step": 2013 }, { "epoch": 0.09341372912801484, "grad_norm": 8.750782012939453, "learning_rate": 9.825927045708087e-06, "loss": 0.285, "step": 2014 }, { "epoch": 0.09346011131725418, "grad_norm": 7.257091999053955, "learning_rate": 9.825734495319524e-06, "loss": 0.2728, "step": 2015 }, { "epoch": 0.09350649350649351, "grad_norm": 7.338875770568848, "learning_rate": 9.825541840383967e-06, "loss": 0.3509, "step": 2016 }, { "epoch": 0.09355287569573284, "grad_norm": 9.955730438232422, "learning_rate": 9.825349080905593e-06, "loss": 0.2459, "step": 2017 }, { "epoch": 0.09359925788497217, "grad_norm": 6.537896156311035, "learning_rate": 9.825156216888573e-06, "loss": 0.3529, "step": 2018 }, { "epoch": 0.0936456400742115, "grad_norm": 6.604704856872559, "learning_rate": 9.82496324833709e-06, "loss": 0.4252, "step": 2019 }, { "epoch": 0.09369202226345083, "grad_norm": 5.537231922149658, "learning_rate": 9.824770175255326e-06, "loss": 0.3882, "step": 2020 }, { "epoch": 0.09373840445269016, "grad_norm": 4.952191352844238, "learning_rate": 9.824576997647459e-06, "loss": 0.2887, "step": 2021 }, { "epoch": 0.0937847866419295, "grad_norm": 8.033323287963867, "learning_rate": 9.824383715517676e-06, "loss": 0.344, "step": 2022 }, { "epoch": 0.09383116883116883, "grad_norm": 9.865200996398926, "learning_rate": 9.824190328870163e-06, "loss": 0.5082, "step": 2023 }, { "epoch": 0.09387755102040816, "grad_norm": 6.917645454406738, "learning_rate": 9.823996837709114e-06, "loss": 0.1996, "step": 2024 }, { "epoch": 0.0939239332096475, "grad_norm": 10.247098922729492, "learning_rate": 9.823803242038715e-06, "loss": 0.3567, "step": 2025 }, { "epoch": 0.09397031539888683, "grad_norm": 8.817115783691406, "learning_rate": 9.823609541863163e-06, "loss": 0.3721, "step": 2026 }, { "epoch": 0.09401669758812616, "grad_norm": 8.66623592376709, "learning_rate": 9.823415737186656e-06, "loss": 0.4805, "step": 2027 }, { "epoch": 0.09406307977736549, "grad_norm": 6.747928619384766, "learning_rate": 9.823221828013392e-06, "loss": 0.3171, "step": 2028 }, { "epoch": 0.09410946196660483, "grad_norm": 7.851754665374756, "learning_rate": 9.82302781434757e-06, "loss": 0.4117, "step": 2029 }, { "epoch": 0.09415584415584416, "grad_norm": 7.970576763153076, "learning_rate": 9.822833696193393e-06, "loss": 0.3818, "step": 2030 }, { "epoch": 0.09420222634508349, "grad_norm": 7.969043731689453, "learning_rate": 9.822639473555068e-06, "loss": 0.3872, "step": 2031 }, { "epoch": 0.09424860853432282, "grad_norm": 9.879464149475098, "learning_rate": 9.822445146436805e-06, "loss": 0.3476, "step": 2032 }, { "epoch": 0.09429499072356215, "grad_norm": 24.239974975585938, "learning_rate": 9.82225071484281e-06, "loss": 0.4716, "step": 2033 }, { "epoch": 0.09434137291280148, "grad_norm": 5.277352809906006, "learning_rate": 9.822056178777296e-06, "loss": 0.3979, "step": 2034 }, { "epoch": 0.09438775510204081, "grad_norm": 8.110301971435547, "learning_rate": 9.821861538244479e-06, "loss": 0.3257, "step": 2035 }, { "epoch": 0.09443413729128015, "grad_norm": 7.781154155731201, "learning_rate": 9.821666793248574e-06, "loss": 0.4693, "step": 2036 }, { "epoch": 0.09448051948051948, "grad_norm": 7.430489540100098, "learning_rate": 9.821471943793803e-06, "loss": 0.3678, "step": 2037 }, { "epoch": 0.09452690166975881, "grad_norm": 9.342096328735352, "learning_rate": 9.821276989884384e-06, "loss": 0.3424, "step": 2038 }, { "epoch": 0.09457328385899814, "grad_norm": 6.399441242218018, "learning_rate": 9.821081931524543e-06, "loss": 0.3913, "step": 2039 }, { "epoch": 0.09461966604823747, "grad_norm": 7.3184590339660645, "learning_rate": 9.820886768718503e-06, "loss": 0.316, "step": 2040 }, { "epoch": 0.0946660482374768, "grad_norm": 9.631980895996094, "learning_rate": 9.820691501470496e-06, "loss": 0.3961, "step": 2041 }, { "epoch": 0.09471243042671613, "grad_norm": 5.9354329109191895, "learning_rate": 9.820496129784749e-06, "loss": 0.3879, "step": 2042 }, { "epoch": 0.09475881261595548, "grad_norm": 7.36320686340332, "learning_rate": 9.820300653665495e-06, "loss": 0.2243, "step": 2043 }, { "epoch": 0.09480519480519481, "grad_norm": 9.663556098937988, "learning_rate": 9.820105073116973e-06, "loss": 0.321, "step": 2044 }, { "epoch": 0.09485157699443414, "grad_norm": 8.053409576416016, "learning_rate": 9.819909388143414e-06, "loss": 0.344, "step": 2045 }, { "epoch": 0.09489795918367347, "grad_norm": 18.33388900756836, "learning_rate": 9.819713598749061e-06, "loss": 0.477, "step": 2046 }, { "epoch": 0.0949443413729128, "grad_norm": 15.445647239685059, "learning_rate": 9.819517704938156e-06, "loss": 0.3628, "step": 2047 }, { "epoch": 0.09499072356215213, "grad_norm": 12.910977363586426, "learning_rate": 9.81932170671494e-06, "loss": 0.4797, "step": 2048 }, { "epoch": 0.09503710575139146, "grad_norm": 7.021546363830566, "learning_rate": 9.819125604083663e-06, "loss": 0.3154, "step": 2049 }, { "epoch": 0.0950834879406308, "grad_norm": 16.045825958251953, "learning_rate": 9.81892939704857e-06, "loss": 0.375, "step": 2050 }, { "epoch": 0.09512987012987013, "grad_norm": 12.207967758178711, "learning_rate": 9.818733085613916e-06, "loss": 0.4005, "step": 2051 }, { "epoch": 0.09517625231910946, "grad_norm": 8.959598541259766, "learning_rate": 9.818536669783949e-06, "loss": 0.3905, "step": 2052 }, { "epoch": 0.0952226345083488, "grad_norm": 10.790656089782715, "learning_rate": 9.818340149562927e-06, "loss": 0.4574, "step": 2053 }, { "epoch": 0.09526901669758812, "grad_norm": 10.03626537322998, "learning_rate": 9.818143524955108e-06, "loss": 0.4457, "step": 2054 }, { "epoch": 0.09531539888682745, "grad_norm": 6.00896692276001, "learning_rate": 9.81794679596475e-06, "loss": 0.2242, "step": 2055 }, { "epoch": 0.09536178107606678, "grad_norm": 12.044793128967285, "learning_rate": 9.817749962596115e-06, "loss": 0.5809, "step": 2056 }, { "epoch": 0.09540816326530613, "grad_norm": 15.035238265991211, "learning_rate": 9.81755302485347e-06, "loss": 0.5046, "step": 2057 }, { "epoch": 0.09545454545454546, "grad_norm": 8.270768165588379, "learning_rate": 9.817355982741078e-06, "loss": 0.4211, "step": 2058 }, { "epoch": 0.09550092764378479, "grad_norm": 9.962562561035156, "learning_rate": 9.817158836263211e-06, "loss": 0.4569, "step": 2059 }, { "epoch": 0.09554730983302412, "grad_norm": 14.672916412353516, "learning_rate": 9.816961585424139e-06, "loss": 0.5869, "step": 2060 }, { "epoch": 0.09559369202226345, "grad_norm": 7.844688415527344, "learning_rate": 9.816764230228133e-06, "loss": 0.4314, "step": 2061 }, { "epoch": 0.09564007421150278, "grad_norm": 10.464105606079102, "learning_rate": 9.816566770679472e-06, "loss": 0.3901, "step": 2062 }, { "epoch": 0.09568645640074211, "grad_norm": 18.298471450805664, "learning_rate": 9.816369206782431e-06, "loss": 0.4211, "step": 2063 }, { "epoch": 0.09573283858998145, "grad_norm": 8.15936279296875, "learning_rate": 9.816171538541293e-06, "loss": 0.3853, "step": 2064 }, { "epoch": 0.09577922077922078, "grad_norm": 8.648954391479492, "learning_rate": 9.815973765960339e-06, "loss": 0.3431, "step": 2065 }, { "epoch": 0.09582560296846011, "grad_norm": 11.328388214111328, "learning_rate": 9.815775889043852e-06, "loss": 0.407, "step": 2066 }, { "epoch": 0.09587198515769944, "grad_norm": 7.18965482711792, "learning_rate": 9.815577907796121e-06, "loss": 0.3574, "step": 2067 }, { "epoch": 0.09591836734693877, "grad_norm": 6.635911464691162, "learning_rate": 9.815379822221433e-06, "loss": 0.3313, "step": 2068 }, { "epoch": 0.0959647495361781, "grad_norm": 8.98434066772461, "learning_rate": 9.815181632324085e-06, "loss": 0.4033, "step": 2069 }, { "epoch": 0.09601113172541743, "grad_norm": 10.710533142089844, "learning_rate": 9.814983338108363e-06, "loss": 0.4454, "step": 2070 }, { "epoch": 0.09605751391465678, "grad_norm": 10.95933723449707, "learning_rate": 9.814784939578569e-06, "loss": 0.5075, "step": 2071 }, { "epoch": 0.09610389610389611, "grad_norm": 6.5820794105529785, "learning_rate": 9.814586436738998e-06, "loss": 0.3594, "step": 2072 }, { "epoch": 0.09615027829313544, "grad_norm": 8.521617889404297, "learning_rate": 9.81438782959395e-06, "loss": 0.3875, "step": 2073 }, { "epoch": 0.09619666048237477, "grad_norm": 6.4285888671875, "learning_rate": 9.81418911814773e-06, "loss": 0.2944, "step": 2074 }, { "epoch": 0.0962430426716141, "grad_norm": 5.866112232208252, "learning_rate": 9.813990302404642e-06, "loss": 0.3165, "step": 2075 }, { "epoch": 0.09628942486085343, "grad_norm": 6.774673938751221, "learning_rate": 9.813791382368993e-06, "loss": 0.3518, "step": 2076 }, { "epoch": 0.09633580705009276, "grad_norm": 6.512081146240234, "learning_rate": 9.813592358045094e-06, "loss": 0.3265, "step": 2077 }, { "epoch": 0.0963821892393321, "grad_norm": 5.716059684753418, "learning_rate": 9.813393229437255e-06, "loss": 0.3817, "step": 2078 }, { "epoch": 0.09642857142857143, "grad_norm": 9.998895645141602, "learning_rate": 9.81319399654979e-06, "loss": 0.448, "step": 2079 }, { "epoch": 0.09647495361781076, "grad_norm": 8.08979606628418, "learning_rate": 9.812994659387015e-06, "loss": 0.372, "step": 2080 }, { "epoch": 0.0965213358070501, "grad_norm": 11.915661811828613, "learning_rate": 9.812795217953251e-06, "loss": 0.3482, "step": 2081 }, { "epoch": 0.09656771799628942, "grad_norm": 15.545802116394043, "learning_rate": 9.812595672252816e-06, "loss": 0.5634, "step": 2082 }, { "epoch": 0.09661410018552875, "grad_norm": 4.702352046966553, "learning_rate": 9.812396022290033e-06, "loss": 0.3553, "step": 2083 }, { "epoch": 0.09666048237476808, "grad_norm": 7.963425636291504, "learning_rate": 9.812196268069229e-06, "loss": 0.4371, "step": 2084 }, { "epoch": 0.09670686456400743, "grad_norm": 11.13977336883545, "learning_rate": 9.811996409594733e-06, "loss": 0.3555, "step": 2085 }, { "epoch": 0.09675324675324676, "grad_norm": 7.8518147468566895, "learning_rate": 9.811796446870872e-06, "loss": 0.4345, "step": 2086 }, { "epoch": 0.09679962894248609, "grad_norm": 7.270717144012451, "learning_rate": 9.811596379901978e-06, "loss": 0.279, "step": 2087 }, { "epoch": 0.09684601113172542, "grad_norm": 11.568063735961914, "learning_rate": 9.811396208692387e-06, "loss": 0.4912, "step": 2088 }, { "epoch": 0.09689239332096475, "grad_norm": 7.984249114990234, "learning_rate": 9.811195933246434e-06, "loss": 0.4038, "step": 2089 }, { "epoch": 0.09693877551020408, "grad_norm": 7.477077007293701, "learning_rate": 9.81099555356846e-06, "loss": 0.3098, "step": 2090 }, { "epoch": 0.09698515769944341, "grad_norm": 6.4273762702941895, "learning_rate": 9.810795069662804e-06, "loss": 0.3112, "step": 2091 }, { "epoch": 0.09703153988868275, "grad_norm": 9.81246566772461, "learning_rate": 9.81059448153381e-06, "loss": 0.3901, "step": 2092 }, { "epoch": 0.09707792207792208, "grad_norm": 12.649370193481445, "learning_rate": 9.810393789185826e-06, "loss": 0.3962, "step": 2093 }, { "epoch": 0.09712430426716141, "grad_norm": 9.070586204528809, "learning_rate": 9.810192992623196e-06, "loss": 0.3631, "step": 2094 }, { "epoch": 0.09717068645640074, "grad_norm": 8.81347942352295, "learning_rate": 9.809992091850271e-06, "loss": 0.4396, "step": 2095 }, { "epoch": 0.09721706864564007, "grad_norm": 14.214592933654785, "learning_rate": 9.809791086871405e-06, "loss": 0.5053, "step": 2096 }, { "epoch": 0.0972634508348794, "grad_norm": 6.463985443115234, "learning_rate": 9.809589977690954e-06, "loss": 0.4031, "step": 2097 }, { "epoch": 0.09730983302411873, "grad_norm": 8.005892753601074, "learning_rate": 9.80938876431327e-06, "loss": 0.2877, "step": 2098 }, { "epoch": 0.09735621521335808, "grad_norm": 8.184711456298828, "learning_rate": 9.809187446742717e-06, "loss": 0.4816, "step": 2099 }, { "epoch": 0.09740259740259741, "grad_norm": 8.88301944732666, "learning_rate": 9.808986024983654e-06, "loss": 0.3481, "step": 2100 }, { "epoch": 0.09744897959183674, "grad_norm": 6.256278991699219, "learning_rate": 9.808784499040446e-06, "loss": 0.2917, "step": 2101 }, { "epoch": 0.09749536178107607, "grad_norm": 8.971595764160156, "learning_rate": 9.808582868917458e-06, "loss": 0.3004, "step": 2102 }, { "epoch": 0.0975417439703154, "grad_norm": 6.472075462341309, "learning_rate": 9.808381134619057e-06, "loss": 0.4067, "step": 2103 }, { "epoch": 0.09758812615955473, "grad_norm": 11.41530704498291, "learning_rate": 9.808179296149616e-06, "loss": 0.5029, "step": 2104 }, { "epoch": 0.09763450834879406, "grad_norm": 5.455165863037109, "learning_rate": 9.807977353513507e-06, "loss": 0.3444, "step": 2105 }, { "epoch": 0.0976808905380334, "grad_norm": 12.43828296661377, "learning_rate": 9.807775306715104e-06, "loss": 0.4751, "step": 2106 }, { "epoch": 0.09772727272727273, "grad_norm": 8.959589958190918, "learning_rate": 9.807573155758783e-06, "loss": 0.386, "step": 2107 }, { "epoch": 0.09777365491651206, "grad_norm": 5.538989543914795, "learning_rate": 9.807370900648928e-06, "loss": 0.3537, "step": 2108 }, { "epoch": 0.09782003710575139, "grad_norm": 4.757747173309326, "learning_rate": 9.807168541389918e-06, "loss": 0.2561, "step": 2109 }, { "epoch": 0.09786641929499072, "grad_norm": 10.25355339050293, "learning_rate": 9.806966077986135e-06, "loss": 0.4562, "step": 2110 }, { "epoch": 0.09791280148423005, "grad_norm": 10.565245628356934, "learning_rate": 9.806763510441969e-06, "loss": 0.4238, "step": 2111 }, { "epoch": 0.09795918367346938, "grad_norm": 10.1389799118042, "learning_rate": 9.806560838761807e-06, "loss": 0.4447, "step": 2112 }, { "epoch": 0.09800556586270873, "grad_norm": 4.695957183837891, "learning_rate": 9.806358062950038e-06, "loss": 0.371, "step": 2113 }, { "epoch": 0.09805194805194806, "grad_norm": 6.7416276931762695, "learning_rate": 9.806155183011058e-06, "loss": 0.3109, "step": 2114 }, { "epoch": 0.09809833024118739, "grad_norm": 8.531482696533203, "learning_rate": 9.80595219894926e-06, "loss": 0.4729, "step": 2115 }, { "epoch": 0.09814471243042672, "grad_norm": 5.01001501083374, "learning_rate": 9.805749110769044e-06, "loss": 0.384, "step": 2116 }, { "epoch": 0.09819109461966605, "grad_norm": 12.06481647491455, "learning_rate": 9.805545918474807e-06, "loss": 0.485, "step": 2117 }, { "epoch": 0.09823747680890538, "grad_norm": 18.067873001098633, "learning_rate": 9.805342622070953e-06, "loss": 0.5813, "step": 2118 }, { "epoch": 0.09828385899814471, "grad_norm": 10.16108226776123, "learning_rate": 9.805139221561886e-06, "loss": 0.3183, "step": 2119 }, { "epoch": 0.09833024118738404, "grad_norm": 50.48110580444336, "learning_rate": 9.804935716952011e-06, "loss": 0.4687, "step": 2120 }, { "epoch": 0.09837662337662338, "grad_norm": 8.191619873046875, "learning_rate": 9.804732108245737e-06, "loss": 0.4283, "step": 2121 }, { "epoch": 0.09842300556586271, "grad_norm": 7.38520622253418, "learning_rate": 9.80452839544748e-06, "loss": 0.2629, "step": 2122 }, { "epoch": 0.09846938775510204, "grad_norm": 8.005030632019043, "learning_rate": 9.804324578561646e-06, "loss": 0.4282, "step": 2123 }, { "epoch": 0.09851576994434137, "grad_norm": 6.946767807006836, "learning_rate": 9.804120657592654e-06, "loss": 0.3915, "step": 2124 }, { "epoch": 0.0985621521335807, "grad_norm": 10.284974098205566, "learning_rate": 9.803916632544925e-06, "loss": 0.3116, "step": 2125 }, { "epoch": 0.09860853432282003, "grad_norm": 8.722403526306152, "learning_rate": 9.803712503422874e-06, "loss": 0.437, "step": 2126 }, { "epoch": 0.09865491651205936, "grad_norm": 15.9364652633667, "learning_rate": 9.803508270230924e-06, "loss": 0.4226, "step": 2127 }, { "epoch": 0.0987012987012987, "grad_norm": 7.607192516326904, "learning_rate": 9.803303932973503e-06, "loss": 0.4142, "step": 2128 }, { "epoch": 0.09874768089053804, "grad_norm": 4.656370639801025, "learning_rate": 9.803099491655034e-06, "loss": 0.3418, "step": 2129 }, { "epoch": 0.09879406307977737, "grad_norm": 5.081202983856201, "learning_rate": 9.802894946279951e-06, "loss": 0.2967, "step": 2130 }, { "epoch": 0.0988404452690167, "grad_norm": 9.021353721618652, "learning_rate": 9.802690296852679e-06, "loss": 0.3674, "step": 2131 }, { "epoch": 0.09888682745825603, "grad_norm": 8.9274263381958, "learning_rate": 9.802485543377655e-06, "loss": 0.4244, "step": 2132 }, { "epoch": 0.09893320964749536, "grad_norm": 14.09942626953125, "learning_rate": 9.802280685859316e-06, "loss": 0.6016, "step": 2133 }, { "epoch": 0.09897959183673469, "grad_norm": 7.274792671203613, "learning_rate": 9.802075724302099e-06, "loss": 0.3393, "step": 2134 }, { "epoch": 0.09902597402597403, "grad_norm": 9.52963638305664, "learning_rate": 9.801870658710443e-06, "loss": 0.3279, "step": 2135 }, { "epoch": 0.09907235621521336, "grad_norm": 16.716899871826172, "learning_rate": 9.801665489088795e-06, "loss": 0.5141, "step": 2136 }, { "epoch": 0.09911873840445269, "grad_norm": 10.963375091552734, "learning_rate": 9.801460215441593e-06, "loss": 0.5342, "step": 2137 }, { "epoch": 0.09916512059369202, "grad_norm": 8.651922225952148, "learning_rate": 9.80125483777329e-06, "loss": 0.3477, "step": 2138 }, { "epoch": 0.09921150278293135, "grad_norm": 7.030391216278076, "learning_rate": 9.801049356088332e-06, "loss": 0.3144, "step": 2139 }, { "epoch": 0.09925788497217068, "grad_norm": 6.830869197845459, "learning_rate": 9.800843770391174e-06, "loss": 0.2702, "step": 2140 }, { "epoch": 0.09930426716141001, "grad_norm": 4.30133056640625, "learning_rate": 9.800638080686266e-06, "loss": 0.32, "step": 2141 }, { "epoch": 0.09935064935064936, "grad_norm": 11.365145683288574, "learning_rate": 9.800432286978067e-06, "loss": 0.4799, "step": 2142 }, { "epoch": 0.09939703153988869, "grad_norm": 12.949376106262207, "learning_rate": 9.800226389271034e-06, "loss": 0.3023, "step": 2143 }, { "epoch": 0.09944341372912802, "grad_norm": 12.202448844909668, "learning_rate": 9.80002038756963e-06, "loss": 0.5618, "step": 2144 }, { "epoch": 0.09948979591836735, "grad_norm": 9.129544258117676, "learning_rate": 9.799814281878313e-06, "loss": 0.2661, "step": 2145 }, { "epoch": 0.09953617810760668, "grad_norm": 8.832876205444336, "learning_rate": 9.799608072201553e-06, "loss": 0.4495, "step": 2146 }, { "epoch": 0.099582560296846, "grad_norm": 9.598489761352539, "learning_rate": 9.799401758543816e-06, "loss": 0.3773, "step": 2147 }, { "epoch": 0.09962894248608534, "grad_norm": 10.369868278503418, "learning_rate": 9.799195340909569e-06, "loss": 0.4875, "step": 2148 }, { "epoch": 0.09967532467532468, "grad_norm": 7.587883949279785, "learning_rate": 9.798988819303289e-06, "loss": 0.3409, "step": 2149 }, { "epoch": 0.09972170686456401, "grad_norm": 13.841405868530273, "learning_rate": 9.798782193729446e-06, "loss": 0.4747, "step": 2150 }, { "epoch": 0.09976808905380334, "grad_norm": 6.044116020202637, "learning_rate": 9.798575464192518e-06, "loss": 0.2761, "step": 2151 }, { "epoch": 0.09981447124304267, "grad_norm": 14.743644714355469, "learning_rate": 9.798368630696984e-06, "loss": 0.3328, "step": 2152 }, { "epoch": 0.099860853432282, "grad_norm": 7.714452266693115, "learning_rate": 9.798161693247323e-06, "loss": 0.4039, "step": 2153 }, { "epoch": 0.09990723562152133, "grad_norm": 9.446669578552246, "learning_rate": 9.797954651848022e-06, "loss": 0.4506, "step": 2154 }, { "epoch": 0.09995361781076066, "grad_norm": 5.324394702911377, "learning_rate": 9.797747506503564e-06, "loss": 0.3161, "step": 2155 }, { "epoch": 0.1, "grad_norm": 17.574371337890625, "learning_rate": 9.797540257218434e-06, "loss": 0.4242, "step": 2156 }, { "epoch": 0.1, "eval_loss": 0.3844095766544342, "eval_runtime": 38.0215, "eval_samples_per_second": 45.842, "eval_steps_per_second": 5.734, "step": 2156 }, { "epoch": 0.10004638218923934, "grad_norm": 6.1865339279174805, "learning_rate": 9.797332903997127e-06, "loss": 0.4193, "step": 2157 }, { "epoch": 0.10009276437847867, "grad_norm": 10.759994506835938, "learning_rate": 9.797125446844134e-06, "loss": 0.4106, "step": 2158 }, { "epoch": 0.100139146567718, "grad_norm": 8.018890380859375, "learning_rate": 9.796917885763946e-06, "loss": 0.384, "step": 2159 }, { "epoch": 0.10018552875695733, "grad_norm": 5.965665340423584, "learning_rate": 9.796710220761063e-06, "loss": 0.3062, "step": 2160 }, { "epoch": 0.10023191094619666, "grad_norm": 9.352410316467285, "learning_rate": 9.796502451839984e-06, "loss": 0.4355, "step": 2161 }, { "epoch": 0.10027829313543599, "grad_norm": 6.623101234436035, "learning_rate": 9.796294579005207e-06, "loss": 0.3296, "step": 2162 }, { "epoch": 0.10032467532467533, "grad_norm": 12.306051254272461, "learning_rate": 9.79608660226124e-06, "loss": 0.3668, "step": 2163 }, { "epoch": 0.10037105751391466, "grad_norm": 8.150002479553223, "learning_rate": 9.795878521612584e-06, "loss": 0.4017, "step": 2164 }, { "epoch": 0.10041743970315399, "grad_norm": 8.32210636138916, "learning_rate": 9.795670337063752e-06, "loss": 0.4515, "step": 2165 }, { "epoch": 0.10046382189239332, "grad_norm": 11.973034858703613, "learning_rate": 9.795462048619251e-06, "loss": 0.4873, "step": 2166 }, { "epoch": 0.10051020408163265, "grad_norm": 5.081139087677002, "learning_rate": 9.795253656283595e-06, "loss": 0.3629, "step": 2167 }, { "epoch": 0.10055658627087198, "grad_norm": 6.16492223739624, "learning_rate": 9.795045160061295e-06, "loss": 0.4406, "step": 2168 }, { "epoch": 0.10060296846011131, "grad_norm": 13.212956428527832, "learning_rate": 9.794836559956873e-06, "loss": 0.4933, "step": 2169 }, { "epoch": 0.10064935064935066, "grad_norm": 9.294377326965332, "learning_rate": 9.794627855974844e-06, "loss": 0.4128, "step": 2170 }, { "epoch": 0.10069573283858999, "grad_norm": 5.691891670227051, "learning_rate": 9.794419048119733e-06, "loss": 0.3392, "step": 2171 }, { "epoch": 0.10074211502782932, "grad_norm": 13.381682395935059, "learning_rate": 9.794210136396061e-06, "loss": 0.3545, "step": 2172 }, { "epoch": 0.10078849721706865, "grad_norm": 11.21221923828125, "learning_rate": 9.794001120808356e-06, "loss": 0.3265, "step": 2173 }, { "epoch": 0.10083487940630798, "grad_norm": 10.18570327758789, "learning_rate": 9.793792001361145e-06, "loss": 0.4129, "step": 2174 }, { "epoch": 0.1008812615955473, "grad_norm": 6.923311233520508, "learning_rate": 9.793582778058959e-06, "loss": 0.3952, "step": 2175 }, { "epoch": 0.10092764378478664, "grad_norm": 13.28649616241455, "learning_rate": 9.793373450906329e-06, "loss": 0.4709, "step": 2176 }, { "epoch": 0.10097402597402598, "grad_norm": 12.448244094848633, "learning_rate": 9.793164019907795e-06, "loss": 0.4833, "step": 2177 }, { "epoch": 0.10102040816326531, "grad_norm": 9.425625801086426, "learning_rate": 9.792954485067886e-06, "loss": 0.406, "step": 2178 }, { "epoch": 0.10106679035250464, "grad_norm": 6.834273338317871, "learning_rate": 9.79274484639115e-06, "loss": 0.3829, "step": 2179 }, { "epoch": 0.10111317254174397, "grad_norm": 5.4191460609436035, "learning_rate": 9.792535103882122e-06, "loss": 0.3656, "step": 2180 }, { "epoch": 0.1011595547309833, "grad_norm": 5.867753028869629, "learning_rate": 9.79232525754535e-06, "loss": 0.3647, "step": 2181 }, { "epoch": 0.10120593692022263, "grad_norm": 11.467836380004883, "learning_rate": 9.792115307385378e-06, "loss": 0.4494, "step": 2182 }, { "epoch": 0.10125231910946196, "grad_norm": 8.433326721191406, "learning_rate": 9.791905253406758e-06, "loss": 0.4443, "step": 2183 }, { "epoch": 0.1012987012987013, "grad_norm": 8.904069900512695, "learning_rate": 9.791695095614036e-06, "loss": 0.3893, "step": 2184 }, { "epoch": 0.10134508348794063, "grad_norm": 14.182951927185059, "learning_rate": 9.791484834011768e-06, "loss": 0.5583, "step": 2185 }, { "epoch": 0.10139146567717996, "grad_norm": 7.72596549987793, "learning_rate": 9.791274468604508e-06, "loss": 0.3438, "step": 2186 }, { "epoch": 0.1014378478664193, "grad_norm": 10.19367504119873, "learning_rate": 9.791063999396814e-06, "loss": 0.4419, "step": 2187 }, { "epoch": 0.10148423005565863, "grad_norm": 8.77328109741211, "learning_rate": 9.790853426393246e-06, "loss": 0.4084, "step": 2188 }, { "epoch": 0.10153061224489796, "grad_norm": 16.323654174804688, "learning_rate": 9.790642749598363e-06, "loss": 0.3787, "step": 2189 }, { "epoch": 0.10157699443413729, "grad_norm": 12.654973983764648, "learning_rate": 9.790431969016734e-06, "loss": 0.4466, "step": 2190 }, { "epoch": 0.10162337662337663, "grad_norm": 5.396020412445068, "learning_rate": 9.790221084652922e-06, "loss": 0.3389, "step": 2191 }, { "epoch": 0.10166975881261596, "grad_norm": 11.54733943939209, "learning_rate": 9.790010096511498e-06, "loss": 0.4418, "step": 2192 }, { "epoch": 0.10171614100185529, "grad_norm": 7.8839850425720215, "learning_rate": 9.789799004597029e-06, "loss": 0.3463, "step": 2193 }, { "epoch": 0.10176252319109462, "grad_norm": 10.878827095031738, "learning_rate": 9.789587808914094e-06, "loss": 0.4455, "step": 2194 }, { "epoch": 0.10180890538033395, "grad_norm": 15.41444206237793, "learning_rate": 9.789376509467264e-06, "loss": 0.378, "step": 2195 }, { "epoch": 0.10185528756957328, "grad_norm": 6.084177017211914, "learning_rate": 9.789165106261119e-06, "loss": 0.4111, "step": 2196 }, { "epoch": 0.10190166975881261, "grad_norm": 16.624984741210938, "learning_rate": 9.788953599300236e-06, "loss": 0.4655, "step": 2197 }, { "epoch": 0.10194805194805195, "grad_norm": 8.302910804748535, "learning_rate": 9.7887419885892e-06, "loss": 0.3856, "step": 2198 }, { "epoch": 0.10199443413729128, "grad_norm": 11.153830528259277, "learning_rate": 9.788530274132594e-06, "loss": 0.2738, "step": 2199 }, { "epoch": 0.10204081632653061, "grad_norm": 6.189416885375977, "learning_rate": 9.788318455935008e-06, "loss": 0.3262, "step": 2200 }, { "epoch": 0.10208719851576994, "grad_norm": 8.398209571838379, "learning_rate": 9.788106534001025e-06, "loss": 0.5191, "step": 2201 }, { "epoch": 0.10213358070500927, "grad_norm": 8.40770435333252, "learning_rate": 9.787894508335243e-06, "loss": 0.4676, "step": 2202 }, { "epoch": 0.1021799628942486, "grad_norm": 6.761023044586182, "learning_rate": 9.78768237894225e-06, "loss": 0.4178, "step": 2203 }, { "epoch": 0.10222634508348794, "grad_norm": 8.186811447143555, "learning_rate": 9.787470145826643e-06, "loss": 0.4366, "step": 2204 }, { "epoch": 0.10227272727272728, "grad_norm": 8.242009162902832, "learning_rate": 9.78725780899302e-06, "loss": 0.3824, "step": 2205 }, { "epoch": 0.10231910946196661, "grad_norm": 10.712882995605469, "learning_rate": 9.787045368445982e-06, "loss": 0.4021, "step": 2206 }, { "epoch": 0.10236549165120594, "grad_norm": 14.119919776916504, "learning_rate": 9.78683282419013e-06, "loss": 0.5174, "step": 2207 }, { "epoch": 0.10241187384044527, "grad_norm": 8.432201385498047, "learning_rate": 9.786620176230074e-06, "loss": 0.3681, "step": 2208 }, { "epoch": 0.1024582560296846, "grad_norm": 12.382034301757812, "learning_rate": 9.786407424570412e-06, "loss": 0.462, "step": 2209 }, { "epoch": 0.10250463821892393, "grad_norm": 7.788295745849609, "learning_rate": 9.78619456921576e-06, "loss": 0.3798, "step": 2210 }, { "epoch": 0.10255102040816326, "grad_norm": 7.908099174499512, "learning_rate": 9.785981610170725e-06, "loss": 0.4499, "step": 2211 }, { "epoch": 0.1025974025974026, "grad_norm": 10.6487455368042, "learning_rate": 9.785768547439924e-06, "loss": 0.4139, "step": 2212 }, { "epoch": 0.10264378478664193, "grad_norm": 7.056860446929932, "learning_rate": 9.785555381027971e-06, "loss": 0.2795, "step": 2213 }, { "epoch": 0.10269016697588126, "grad_norm": 5.923229694366455, "learning_rate": 9.785342110939486e-06, "loss": 0.2908, "step": 2214 }, { "epoch": 0.1027365491651206, "grad_norm": 7.42340087890625, "learning_rate": 9.785128737179088e-06, "loss": 0.4856, "step": 2215 }, { "epoch": 0.10278293135435992, "grad_norm": 11.134204864501953, "learning_rate": 9.7849152597514e-06, "loss": 0.5032, "step": 2216 }, { "epoch": 0.10282931354359925, "grad_norm": 9.403404235839844, "learning_rate": 9.784701678661045e-06, "loss": 0.3974, "step": 2217 }, { "epoch": 0.10287569573283858, "grad_norm": 8.003717422485352, "learning_rate": 9.784487993912652e-06, "loss": 0.3846, "step": 2218 }, { "epoch": 0.10292207792207793, "grad_norm": 10.52755069732666, "learning_rate": 9.784274205510851e-06, "loss": 0.3947, "step": 2219 }, { "epoch": 0.10296846011131726, "grad_norm": 5.481959342956543, "learning_rate": 9.784060313460274e-06, "loss": 0.3394, "step": 2220 }, { "epoch": 0.10301484230055659, "grad_norm": 4.598709583282471, "learning_rate": 9.783846317765552e-06, "loss": 0.3802, "step": 2221 }, { "epoch": 0.10306122448979592, "grad_norm": 4.979848384857178, "learning_rate": 9.783632218431323e-06, "loss": 0.3652, "step": 2222 }, { "epoch": 0.10310760667903525, "grad_norm": 12.521197319030762, "learning_rate": 9.783418015462227e-06, "loss": 0.4566, "step": 2223 }, { "epoch": 0.10315398886827458, "grad_norm": 11.830066680908203, "learning_rate": 9.783203708862901e-06, "loss": 0.3776, "step": 2224 }, { "epoch": 0.10320037105751391, "grad_norm": 7.199710845947266, "learning_rate": 9.782989298637989e-06, "loss": 0.3767, "step": 2225 }, { "epoch": 0.10324675324675325, "grad_norm": 7.477212905883789, "learning_rate": 9.782774784792137e-06, "loss": 0.3999, "step": 2226 }, { "epoch": 0.10329313543599258, "grad_norm": 6.920771598815918, "learning_rate": 9.782560167329992e-06, "loss": 0.2965, "step": 2227 }, { "epoch": 0.10333951762523191, "grad_norm": 9.763494491577148, "learning_rate": 9.782345446256202e-06, "loss": 0.5109, "step": 2228 }, { "epoch": 0.10338589981447124, "grad_norm": 7.813385963439941, "learning_rate": 9.782130621575422e-06, "loss": 0.4098, "step": 2229 }, { "epoch": 0.10343228200371057, "grad_norm": 9.435808181762695, "learning_rate": 9.781915693292305e-06, "loss": 0.4501, "step": 2230 }, { "epoch": 0.1034786641929499, "grad_norm": 8.324934959411621, "learning_rate": 9.781700661411506e-06, "loss": 0.5505, "step": 2231 }, { "epoch": 0.10352504638218923, "grad_norm": 9.920231819152832, "learning_rate": 9.781485525937683e-06, "loss": 0.4675, "step": 2232 }, { "epoch": 0.10357142857142858, "grad_norm": 9.014622688293457, "learning_rate": 9.781270286875502e-06, "loss": 0.4188, "step": 2233 }, { "epoch": 0.10361781076066791, "grad_norm": 10.578664779663086, "learning_rate": 9.781054944229618e-06, "loss": 0.4014, "step": 2234 }, { "epoch": 0.10366419294990724, "grad_norm": 8.619400024414062, "learning_rate": 9.780839498004701e-06, "loss": 0.4008, "step": 2235 }, { "epoch": 0.10371057513914657, "grad_norm": 11.292678833007812, "learning_rate": 9.780623948205419e-06, "loss": 0.5446, "step": 2236 }, { "epoch": 0.1037569573283859, "grad_norm": 7.594767093658447, "learning_rate": 9.78040829483644e-06, "loss": 0.3979, "step": 2237 }, { "epoch": 0.10380333951762523, "grad_norm": 11.925005912780762, "learning_rate": 9.780192537902437e-06, "loss": 0.4735, "step": 2238 }, { "epoch": 0.10384972170686456, "grad_norm": 14.028444290161133, "learning_rate": 9.779976677408085e-06, "loss": 0.5239, "step": 2239 }, { "epoch": 0.1038961038961039, "grad_norm": 8.34058952331543, "learning_rate": 9.77976071335806e-06, "loss": 0.4137, "step": 2240 }, { "epoch": 0.10394248608534323, "grad_norm": 11.023406028747559, "learning_rate": 9.779544645757037e-06, "loss": 0.4178, "step": 2241 }, { "epoch": 0.10398886827458256, "grad_norm": 9.651286125183105, "learning_rate": 9.779328474609702e-06, "loss": 0.4123, "step": 2242 }, { "epoch": 0.1040352504638219, "grad_norm": 5.5740966796875, "learning_rate": 9.779112199920737e-06, "loss": 0.426, "step": 2243 }, { "epoch": 0.10408163265306122, "grad_norm": 7.310338020324707, "learning_rate": 9.778895821694826e-06, "loss": 0.4348, "step": 2244 }, { "epoch": 0.10412801484230055, "grad_norm": 10.451340675354004, "learning_rate": 9.778679339936659e-06, "loss": 0.2658, "step": 2245 }, { "epoch": 0.10417439703153988, "grad_norm": 4.7637786865234375, "learning_rate": 9.778462754650922e-06, "loss": 0.4502, "step": 2246 }, { "epoch": 0.10422077922077921, "grad_norm": 8.502481460571289, "learning_rate": 9.778246065842312e-06, "loss": 0.4145, "step": 2247 }, { "epoch": 0.10426716141001856, "grad_norm": 9.472567558288574, "learning_rate": 9.778029273515519e-06, "loss": 0.4599, "step": 2248 }, { "epoch": 0.10431354359925789, "grad_norm": 10.03969669342041, "learning_rate": 9.777812377675245e-06, "loss": 0.3549, "step": 2249 }, { "epoch": 0.10435992578849722, "grad_norm": 7.785267353057861, "learning_rate": 9.777595378326184e-06, "loss": 0.3661, "step": 2250 }, { "epoch": 0.10440630797773655, "grad_norm": 5.028570175170898, "learning_rate": 9.77737827547304e-06, "loss": 0.4059, "step": 2251 }, { "epoch": 0.10445269016697588, "grad_norm": 9.887518882751465, "learning_rate": 9.777161069120513e-06, "loss": 0.4712, "step": 2252 }, { "epoch": 0.10449907235621521, "grad_norm": 12.683966636657715, "learning_rate": 9.776943759273314e-06, "loss": 0.431, "step": 2253 }, { "epoch": 0.10454545454545454, "grad_norm": 7.154293060302734, "learning_rate": 9.776726345936147e-06, "loss": 0.2755, "step": 2254 }, { "epoch": 0.10459183673469388, "grad_norm": 16.28367042541504, "learning_rate": 9.776508829113722e-06, "loss": 0.44, "step": 2255 }, { "epoch": 0.10463821892393321, "grad_norm": 6.215921878814697, "learning_rate": 9.776291208810754e-06, "loss": 0.3838, "step": 2256 }, { "epoch": 0.10468460111317254, "grad_norm": 8.655538558959961, "learning_rate": 9.776073485031956e-06, "loss": 0.3535, "step": 2257 }, { "epoch": 0.10473098330241187, "grad_norm": 8.472025871276855, "learning_rate": 9.775855657782045e-06, "loss": 0.3942, "step": 2258 }, { "epoch": 0.1047773654916512, "grad_norm": 17.610231399536133, "learning_rate": 9.775637727065741e-06, "loss": 0.4208, "step": 2259 }, { "epoch": 0.10482374768089053, "grad_norm": 14.3260498046875, "learning_rate": 9.775419692887764e-06, "loss": 0.5627, "step": 2260 }, { "epoch": 0.10487012987012986, "grad_norm": 8.449742317199707, "learning_rate": 9.775201555252837e-06, "loss": 0.4121, "step": 2261 }, { "epoch": 0.10491651205936921, "grad_norm": 14.611302375793457, "learning_rate": 9.774983314165686e-06, "loss": 0.5376, "step": 2262 }, { "epoch": 0.10496289424860854, "grad_norm": 14.01212215423584, "learning_rate": 9.774764969631042e-06, "loss": 0.3916, "step": 2263 }, { "epoch": 0.10500927643784787, "grad_norm": 7.709319114685059, "learning_rate": 9.774546521653633e-06, "loss": 0.3313, "step": 2264 }, { "epoch": 0.1050556586270872, "grad_norm": 5.692338943481445, "learning_rate": 9.774327970238193e-06, "loss": 0.3574, "step": 2265 }, { "epoch": 0.10510204081632653, "grad_norm": 8.059266090393066, "learning_rate": 9.774109315389455e-06, "loss": 0.4396, "step": 2266 }, { "epoch": 0.10514842300556586, "grad_norm": 5.38188362121582, "learning_rate": 9.773890557112157e-06, "loss": 0.3633, "step": 2267 }, { "epoch": 0.10519480519480519, "grad_norm": 4.127233982086182, "learning_rate": 9.773671695411038e-06, "loss": 0.3217, "step": 2268 }, { "epoch": 0.10524118738404453, "grad_norm": 6.652303218841553, "learning_rate": 9.773452730290837e-06, "loss": 0.2757, "step": 2269 }, { "epoch": 0.10528756957328386, "grad_norm": 8.932190895080566, "learning_rate": 9.773233661756305e-06, "loss": 0.3793, "step": 2270 }, { "epoch": 0.10533395176252319, "grad_norm": 5.984103202819824, "learning_rate": 9.77301448981218e-06, "loss": 0.376, "step": 2271 }, { "epoch": 0.10538033395176252, "grad_norm": 6.059223175048828, "learning_rate": 9.772795214463215e-06, "loss": 0.3732, "step": 2272 }, { "epoch": 0.10542671614100185, "grad_norm": 4.6215596199035645, "learning_rate": 9.772575835714158e-06, "loss": 0.3108, "step": 2273 }, { "epoch": 0.10547309833024118, "grad_norm": 6.793663501739502, "learning_rate": 9.772356353569763e-06, "loss": 0.3369, "step": 2274 }, { "epoch": 0.10551948051948051, "grad_norm": 16.188098907470703, "learning_rate": 9.772136768034786e-06, "loss": 0.4547, "step": 2275 }, { "epoch": 0.10556586270871986, "grad_norm": 11.67348861694336, "learning_rate": 9.771917079113982e-06, "loss": 0.3908, "step": 2276 }, { "epoch": 0.10561224489795919, "grad_norm": 4.4576287269592285, "learning_rate": 9.77169728681211e-06, "loss": 0.3612, "step": 2277 }, { "epoch": 0.10565862708719852, "grad_norm": 5.718044757843018, "learning_rate": 9.771477391133935e-06, "loss": 0.3165, "step": 2278 }, { "epoch": 0.10570500927643785, "grad_norm": 13.143453598022461, "learning_rate": 9.771257392084217e-06, "loss": 0.3646, "step": 2279 }, { "epoch": 0.10575139146567718, "grad_norm": 11.596417427062988, "learning_rate": 9.771037289667726e-06, "loss": 0.4609, "step": 2280 }, { "epoch": 0.10579777365491651, "grad_norm": 6.6548919677734375, "learning_rate": 9.77081708388923e-06, "loss": 0.3799, "step": 2281 }, { "epoch": 0.10584415584415584, "grad_norm": 5.1582136154174805, "learning_rate": 9.770596774753496e-06, "loss": 0.3431, "step": 2282 }, { "epoch": 0.10589053803339518, "grad_norm": 6.508259296417236, "learning_rate": 9.770376362265298e-06, "loss": 0.3775, "step": 2283 }, { "epoch": 0.10593692022263451, "grad_norm": 12.475532531738281, "learning_rate": 9.770155846429415e-06, "loss": 0.3505, "step": 2284 }, { "epoch": 0.10598330241187384, "grad_norm": 6.792490005493164, "learning_rate": 9.76993522725062e-06, "loss": 0.4821, "step": 2285 }, { "epoch": 0.10602968460111317, "grad_norm": 9.171649932861328, "learning_rate": 9.769714504733695e-06, "loss": 0.3054, "step": 2286 }, { "epoch": 0.1060760667903525, "grad_norm": 17.82230567932129, "learning_rate": 9.76949367888342e-06, "loss": 0.4257, "step": 2287 }, { "epoch": 0.10612244897959183, "grad_norm": 9.545005798339844, "learning_rate": 9.76927274970458e-06, "loss": 0.4333, "step": 2288 }, { "epoch": 0.10616883116883116, "grad_norm": 11.924223899841309, "learning_rate": 9.769051717201963e-06, "loss": 0.4354, "step": 2289 }, { "epoch": 0.1062152133580705, "grad_norm": 8.832306861877441, "learning_rate": 9.768830581380354e-06, "loss": 0.5067, "step": 2290 }, { "epoch": 0.10626159554730984, "grad_norm": 8.858625411987305, "learning_rate": 9.768609342244548e-06, "loss": 0.4159, "step": 2291 }, { "epoch": 0.10630797773654917, "grad_norm": 9.552692413330078, "learning_rate": 9.768387999799333e-06, "loss": 0.4291, "step": 2292 }, { "epoch": 0.1063543599257885, "grad_norm": 8.179280281066895, "learning_rate": 9.76816655404951e-06, "loss": 0.4743, "step": 2293 }, { "epoch": 0.10640074211502783, "grad_norm": 9.213638305664062, "learning_rate": 9.767945004999873e-06, "loss": 0.4139, "step": 2294 }, { "epoch": 0.10644712430426716, "grad_norm": 7.582453727722168, "learning_rate": 9.767723352655221e-06, "loss": 0.3355, "step": 2295 }, { "epoch": 0.10649350649350649, "grad_norm": 9.233976364135742, "learning_rate": 9.767501597020357e-06, "loss": 0.3161, "step": 2296 }, { "epoch": 0.10653988868274583, "grad_norm": 10.229665756225586, "learning_rate": 9.767279738100087e-06, "loss": 0.513, "step": 2297 }, { "epoch": 0.10658627087198516, "grad_norm": 8.446182250976562, "learning_rate": 9.767057775899214e-06, "loss": 0.3806, "step": 2298 }, { "epoch": 0.10663265306122449, "grad_norm": 8.237029075622559, "learning_rate": 9.76683571042255e-06, "loss": 0.3728, "step": 2299 }, { "epoch": 0.10667903525046382, "grad_norm": 10.014049530029297, "learning_rate": 9.766613541674905e-06, "loss": 0.3562, "step": 2300 }, { "epoch": 0.10672541743970315, "grad_norm": 7.4884538650512695, "learning_rate": 9.766391269661091e-06, "loss": 0.245, "step": 2301 }, { "epoch": 0.10677179962894248, "grad_norm": 9.511631965637207, "learning_rate": 9.766168894385923e-06, "loss": 0.4412, "step": 2302 }, { "epoch": 0.10681818181818181, "grad_norm": 15.406063079833984, "learning_rate": 9.765946415854222e-06, "loss": 0.2762, "step": 2303 }, { "epoch": 0.10686456400742116, "grad_norm": 9.122702598571777, "learning_rate": 9.765723834070805e-06, "loss": 0.4235, "step": 2304 }, { "epoch": 0.10691094619666049, "grad_norm": 7.548721790313721, "learning_rate": 9.765501149040495e-06, "loss": 0.367, "step": 2305 }, { "epoch": 0.10695732838589982, "grad_norm": 7.735436916351318, "learning_rate": 9.765278360768116e-06, "loss": 0.3934, "step": 2306 }, { "epoch": 0.10700371057513915, "grad_norm": 17.503921508789062, "learning_rate": 9.765055469258493e-06, "loss": 0.5226, "step": 2307 }, { "epoch": 0.10705009276437848, "grad_norm": 6.142821788787842, "learning_rate": 9.764832474516459e-06, "loss": 0.3746, "step": 2308 }, { "epoch": 0.1070964749536178, "grad_norm": 9.304234504699707, "learning_rate": 9.764609376546841e-06, "loss": 0.3205, "step": 2309 }, { "epoch": 0.10714285714285714, "grad_norm": 7.024699687957764, "learning_rate": 9.764386175354474e-06, "loss": 0.284, "step": 2310 }, { "epoch": 0.10718923933209648, "grad_norm": 6.983170509338379, "learning_rate": 9.764162870944194e-06, "loss": 0.4446, "step": 2311 }, { "epoch": 0.10723562152133581, "grad_norm": 10.79858112335205, "learning_rate": 9.76393946332084e-06, "loss": 0.3677, "step": 2312 }, { "epoch": 0.10728200371057514, "grad_norm": 4.875386714935303, "learning_rate": 9.763715952489248e-06, "loss": 0.378, "step": 2313 }, { "epoch": 0.10732838589981447, "grad_norm": 7.268110275268555, "learning_rate": 9.763492338454263e-06, "loss": 0.2614, "step": 2314 }, { "epoch": 0.1073747680890538, "grad_norm": 12.107802391052246, "learning_rate": 9.763268621220729e-06, "loss": 0.3038, "step": 2315 }, { "epoch": 0.10742115027829313, "grad_norm": 14.20657730102539, "learning_rate": 9.763044800793491e-06, "loss": 0.5361, "step": 2316 }, { "epoch": 0.10746753246753246, "grad_norm": 6.978774070739746, "learning_rate": 9.762820877177402e-06, "loss": 0.3918, "step": 2317 }, { "epoch": 0.1075139146567718, "grad_norm": 8.381545066833496, "learning_rate": 9.76259685037731e-06, "loss": 0.4179, "step": 2318 }, { "epoch": 0.10756029684601114, "grad_norm": 9.029717445373535, "learning_rate": 9.76237272039807e-06, "loss": 0.4087, "step": 2319 }, { "epoch": 0.10760667903525047, "grad_norm": 7.067843914031982, "learning_rate": 9.762148487244536e-06, "loss": 0.438, "step": 2320 }, { "epoch": 0.1076530612244898, "grad_norm": 6.085370063781738, "learning_rate": 9.761924150921567e-06, "loss": 0.3199, "step": 2321 }, { "epoch": 0.10769944341372913, "grad_norm": 9.32269287109375, "learning_rate": 9.761699711434023e-06, "loss": 0.468, "step": 2322 }, { "epoch": 0.10774582560296846, "grad_norm": 8.652162551879883, "learning_rate": 9.761475168786766e-06, "loss": 0.4383, "step": 2323 }, { "epoch": 0.10779220779220779, "grad_norm": 8.691279411315918, "learning_rate": 9.761250522984661e-06, "loss": 0.4407, "step": 2324 }, { "epoch": 0.10783858998144713, "grad_norm": 5.046542644500732, "learning_rate": 9.761025774032575e-06, "loss": 0.2729, "step": 2325 }, { "epoch": 0.10788497217068646, "grad_norm": 7.213562965393066, "learning_rate": 9.760800921935376e-06, "loss": 0.4366, "step": 2326 }, { "epoch": 0.10793135435992579, "grad_norm": 8.625876426696777, "learning_rate": 9.760575966697938e-06, "loss": 0.3333, "step": 2327 }, { "epoch": 0.10797773654916512, "grad_norm": 8.340567588806152, "learning_rate": 9.760350908325131e-06, "loss": 0.4684, "step": 2328 }, { "epoch": 0.10802411873840445, "grad_norm": 11.982513427734375, "learning_rate": 9.760125746821833e-06, "loss": 0.5849, "step": 2329 }, { "epoch": 0.10807050092764378, "grad_norm": 6.167661666870117, "learning_rate": 9.75990048219292e-06, "loss": 0.3002, "step": 2330 }, { "epoch": 0.10811688311688311, "grad_norm": 5.357827663421631, "learning_rate": 9.759675114443274e-06, "loss": 0.2832, "step": 2331 }, { "epoch": 0.10816326530612246, "grad_norm": 9.847532272338867, "learning_rate": 9.759449643577779e-06, "loss": 0.2501, "step": 2332 }, { "epoch": 0.10820964749536179, "grad_norm": 9.005549430847168, "learning_rate": 9.759224069601316e-06, "loss": 0.3608, "step": 2333 }, { "epoch": 0.10825602968460112, "grad_norm": 6.975674152374268, "learning_rate": 9.758998392518775e-06, "loss": 0.3466, "step": 2334 }, { "epoch": 0.10830241187384045, "grad_norm": 7.237766742706299, "learning_rate": 9.758772612335043e-06, "loss": 0.3564, "step": 2335 }, { "epoch": 0.10834879406307978, "grad_norm": 7.972733497619629, "learning_rate": 9.758546729055012e-06, "loss": 0.3348, "step": 2336 }, { "epoch": 0.1083951762523191, "grad_norm": 17.49663543701172, "learning_rate": 9.758320742683578e-06, "loss": 0.5374, "step": 2337 }, { "epoch": 0.10844155844155844, "grad_norm": 8.425139427185059, "learning_rate": 9.758094653225633e-06, "loss": 0.431, "step": 2338 }, { "epoch": 0.10848794063079778, "grad_norm": 7.842796325683594, "learning_rate": 9.757868460686078e-06, "loss": 0.3852, "step": 2339 }, { "epoch": 0.10853432282003711, "grad_norm": 13.946714401245117, "learning_rate": 9.757642165069812e-06, "loss": 0.5283, "step": 2340 }, { "epoch": 0.10858070500927644, "grad_norm": 6.171222686767578, "learning_rate": 9.757415766381736e-06, "loss": 0.2515, "step": 2341 }, { "epoch": 0.10862708719851577, "grad_norm": 12.767132759094238, "learning_rate": 9.75718926462676e-06, "loss": 0.4934, "step": 2342 }, { "epoch": 0.1086734693877551, "grad_norm": 9.197601318359375, "learning_rate": 9.756962659809785e-06, "loss": 0.4493, "step": 2343 }, { "epoch": 0.10871985157699443, "grad_norm": 13.108115196228027, "learning_rate": 9.756735951935725e-06, "loss": 0.3736, "step": 2344 }, { "epoch": 0.10876623376623376, "grad_norm": 10.599634170532227, "learning_rate": 9.75650914100949e-06, "loss": 0.3461, "step": 2345 }, { "epoch": 0.1088126159554731, "grad_norm": 7.777130603790283, "learning_rate": 9.756282227035991e-06, "loss": 0.4441, "step": 2346 }, { "epoch": 0.10885899814471243, "grad_norm": 11.145914077758789, "learning_rate": 9.756055210020147e-06, "loss": 0.4488, "step": 2347 }, { "epoch": 0.10890538033395177, "grad_norm": 8.645659446716309, "learning_rate": 9.755828089966877e-06, "loss": 0.2585, "step": 2348 }, { "epoch": 0.1089517625231911, "grad_norm": 9.654170036315918, "learning_rate": 9.755600866881098e-06, "loss": 0.4376, "step": 2349 }, { "epoch": 0.10899814471243043, "grad_norm": 17.941307067871094, "learning_rate": 9.755373540767734e-06, "loss": 0.4411, "step": 2350 }, { "epoch": 0.10904452690166976, "grad_norm": 6.4167022705078125, "learning_rate": 9.755146111631712e-06, "loss": 0.3274, "step": 2351 }, { "epoch": 0.10909090909090909, "grad_norm": 11.849666595458984, "learning_rate": 9.754918579477956e-06, "loss": 0.3853, "step": 2352 }, { "epoch": 0.10913729128014843, "grad_norm": 14.593050956726074, "learning_rate": 9.754690944311399e-06, "loss": 0.3932, "step": 2353 }, { "epoch": 0.10918367346938776, "grad_norm": 8.365323066711426, "learning_rate": 9.754463206136969e-06, "loss": 0.3723, "step": 2354 }, { "epoch": 0.10923005565862709, "grad_norm": 4.5916547775268555, "learning_rate": 9.754235364959602e-06, "loss": 0.3613, "step": 2355 }, { "epoch": 0.10927643784786642, "grad_norm": 9.832925796508789, "learning_rate": 9.754007420784233e-06, "loss": 0.4604, "step": 2356 }, { "epoch": 0.10932282003710575, "grad_norm": 17.1370792388916, "learning_rate": 9.753779373615802e-06, "loss": 0.4908, "step": 2357 }, { "epoch": 0.10936920222634508, "grad_norm": 6.06451940536499, "learning_rate": 9.753551223459247e-06, "loss": 0.3672, "step": 2358 }, { "epoch": 0.10941558441558441, "grad_norm": 7.74780797958374, "learning_rate": 9.753322970319511e-06, "loss": 0.3857, "step": 2359 }, { "epoch": 0.10946196660482375, "grad_norm": 6.8916015625, "learning_rate": 9.753094614201542e-06, "loss": 0.4569, "step": 2360 }, { "epoch": 0.10950834879406308, "grad_norm": 6.79953670501709, "learning_rate": 9.752866155110284e-06, "loss": 0.3807, "step": 2361 }, { "epoch": 0.10955473098330241, "grad_norm": 10.795165061950684, "learning_rate": 9.752637593050689e-06, "loss": 0.4405, "step": 2362 }, { "epoch": 0.10960111317254174, "grad_norm": 18.201339721679688, "learning_rate": 9.752408928027705e-06, "loss": 0.4263, "step": 2363 }, { "epoch": 0.10964749536178107, "grad_norm": 13.646550178527832, "learning_rate": 9.752180160046289e-06, "loss": 0.4283, "step": 2364 }, { "epoch": 0.1096938775510204, "grad_norm": 6.9519429206848145, "learning_rate": 9.751951289111398e-06, "loss": 0.4294, "step": 2365 }, { "epoch": 0.10974025974025974, "grad_norm": 12.214200019836426, "learning_rate": 9.751722315227987e-06, "loss": 0.3969, "step": 2366 }, { "epoch": 0.10978664192949908, "grad_norm": 12.582486152648926, "learning_rate": 9.75149323840102e-06, "loss": 0.4773, "step": 2367 }, { "epoch": 0.10983302411873841, "grad_norm": 7.8881120681762695, "learning_rate": 9.751264058635457e-06, "loss": 0.3195, "step": 2368 }, { "epoch": 0.10987940630797774, "grad_norm": 5.9498724937438965, "learning_rate": 9.751034775936263e-06, "loss": 0.3993, "step": 2369 }, { "epoch": 0.10992578849721707, "grad_norm": 8.547598838806152, "learning_rate": 9.750805390308408e-06, "loss": 0.4256, "step": 2370 }, { "epoch": 0.1099721706864564, "grad_norm": 5.191287994384766, "learning_rate": 9.75057590175686e-06, "loss": 0.381, "step": 2371 }, { "epoch": 0.11001855287569573, "grad_norm": 12.554064750671387, "learning_rate": 9.75034631028659e-06, "loss": 0.404, "step": 2372 }, { "epoch": 0.11006493506493506, "grad_norm": 5.632107734680176, "learning_rate": 9.750116615902574e-06, "loss": 0.4363, "step": 2373 }, { "epoch": 0.1101113172541744, "grad_norm": 9.755102157592773, "learning_rate": 9.749886818609786e-06, "loss": 0.4132, "step": 2374 }, { "epoch": 0.11015769944341373, "grad_norm": 8.431133270263672, "learning_rate": 9.749656918413205e-06, "loss": 0.4793, "step": 2375 }, { "epoch": 0.11020408163265306, "grad_norm": 9.181760787963867, "learning_rate": 9.749426915317812e-06, "loss": 0.3233, "step": 2376 }, { "epoch": 0.1102504638218924, "grad_norm": 6.957909107208252, "learning_rate": 9.749196809328592e-06, "loss": 0.2822, "step": 2377 }, { "epoch": 0.11029684601113172, "grad_norm": 6.841213703155518, "learning_rate": 9.748966600450526e-06, "loss": 0.4349, "step": 2378 }, { "epoch": 0.11034322820037105, "grad_norm": 8.51158332824707, "learning_rate": 9.748736288688602e-06, "loss": 0.3662, "step": 2379 }, { "epoch": 0.11038961038961038, "grad_norm": 6.379548072814941, "learning_rate": 9.748505874047815e-06, "loss": 0.3801, "step": 2380 }, { "epoch": 0.11043599257884971, "grad_norm": 10.207423210144043, "learning_rate": 9.74827535653315e-06, "loss": 0.4862, "step": 2381 }, { "epoch": 0.11048237476808906, "grad_norm": 6.054224014282227, "learning_rate": 9.748044736149605e-06, "loss": 0.3321, "step": 2382 }, { "epoch": 0.11052875695732839, "grad_norm": 11.236869812011719, "learning_rate": 9.747814012902175e-06, "loss": 0.5036, "step": 2383 }, { "epoch": 0.11057513914656772, "grad_norm": 8.996439933776855, "learning_rate": 9.747583186795857e-06, "loss": 0.4032, "step": 2384 }, { "epoch": 0.11062152133580705, "grad_norm": 11.32408332824707, "learning_rate": 9.747352257835656e-06, "loss": 0.4414, "step": 2385 }, { "epoch": 0.11066790352504638, "grad_norm": 6.087433338165283, "learning_rate": 9.74712122602657e-06, "loss": 0.3666, "step": 2386 }, { "epoch": 0.11071428571428571, "grad_norm": 6.130930423736572, "learning_rate": 9.746890091373609e-06, "loss": 0.2932, "step": 2387 }, { "epoch": 0.11076066790352504, "grad_norm": 7.479212284088135, "learning_rate": 9.746658853881776e-06, "loss": 0.4466, "step": 2388 }, { "epoch": 0.11080705009276438, "grad_norm": 5.808692455291748, "learning_rate": 9.746427513556082e-06, "loss": 0.2888, "step": 2389 }, { "epoch": 0.11085343228200371, "grad_norm": 6.528022289276123, "learning_rate": 9.74619607040154e-06, "loss": 0.3881, "step": 2390 }, { "epoch": 0.11089981447124304, "grad_norm": 10.945649147033691, "learning_rate": 9.745964524423164e-06, "loss": 0.3928, "step": 2391 }, { "epoch": 0.11094619666048237, "grad_norm": 7.759605407714844, "learning_rate": 9.74573287562597e-06, "loss": 0.334, "step": 2392 }, { "epoch": 0.1109925788497217, "grad_norm": 8.825139999389648, "learning_rate": 9.745501124014976e-06, "loss": 0.3438, "step": 2393 }, { "epoch": 0.11103896103896103, "grad_norm": 8.775651931762695, "learning_rate": 9.745269269595203e-06, "loss": 0.4102, "step": 2394 }, { "epoch": 0.11108534322820036, "grad_norm": 7.545877933502197, "learning_rate": 9.745037312371675e-06, "loss": 0.4427, "step": 2395 }, { "epoch": 0.11113172541743971, "grad_norm": 5.898342609405518, "learning_rate": 9.744805252349415e-06, "loss": 0.2609, "step": 2396 }, { "epoch": 0.11117810760667904, "grad_norm": 7.4761738777160645, "learning_rate": 9.744573089533454e-06, "loss": 0.3075, "step": 2397 }, { "epoch": 0.11122448979591837, "grad_norm": 8.431875228881836, "learning_rate": 9.744340823928818e-06, "loss": 0.4196, "step": 2398 }, { "epoch": 0.1112708719851577, "grad_norm": 15.163379669189453, "learning_rate": 9.74410845554054e-06, "loss": 0.5069, "step": 2399 }, { "epoch": 0.11131725417439703, "grad_norm": 7.348803520202637, "learning_rate": 9.743875984373654e-06, "loss": 0.3588, "step": 2400 }, { "epoch": 0.11136363636363636, "grad_norm": 6.535592079162598, "learning_rate": 9.743643410433198e-06, "loss": 0.4239, "step": 2401 }, { "epoch": 0.11141001855287569, "grad_norm": 7.778662204742432, "learning_rate": 9.74341073372421e-06, "loss": 0.4969, "step": 2402 }, { "epoch": 0.11145640074211503, "grad_norm": 7.84032678604126, "learning_rate": 9.743177954251728e-06, "loss": 0.3607, "step": 2403 }, { "epoch": 0.11150278293135436, "grad_norm": 10.975639343261719, "learning_rate": 9.7429450720208e-06, "loss": 0.4714, "step": 2404 }, { "epoch": 0.1115491651205937, "grad_norm": 7.944551944732666, "learning_rate": 9.742712087036468e-06, "loss": 0.2632, "step": 2405 }, { "epoch": 0.11159554730983302, "grad_norm": 4.244024753570557, "learning_rate": 9.74247899930378e-06, "loss": 0.4133, "step": 2406 }, { "epoch": 0.11164192949907235, "grad_norm": 10.038718223571777, "learning_rate": 9.742245808827786e-06, "loss": 0.3721, "step": 2407 }, { "epoch": 0.11168831168831168, "grad_norm": 13.130175590515137, "learning_rate": 9.742012515613536e-06, "loss": 0.483, "step": 2408 }, { "epoch": 0.11173469387755101, "grad_norm": 9.599424362182617, "learning_rate": 9.741779119666089e-06, "loss": 0.4045, "step": 2409 }, { "epoch": 0.11178107606679036, "grad_norm": 9.491507530212402, "learning_rate": 9.741545620990497e-06, "loss": 0.3765, "step": 2410 }, { "epoch": 0.11182745825602969, "grad_norm": 10.860553741455078, "learning_rate": 9.74131201959182e-06, "loss": 0.431, "step": 2411 }, { "epoch": 0.11187384044526902, "grad_norm": 8.124934196472168, "learning_rate": 9.74107831547512e-06, "loss": 0.3196, "step": 2412 }, { "epoch": 0.11192022263450835, "grad_norm": 4.682875156402588, "learning_rate": 9.740844508645457e-06, "loss": 0.2862, "step": 2413 }, { "epoch": 0.11196660482374768, "grad_norm": 12.481598854064941, "learning_rate": 9.7406105991079e-06, "loss": 0.4111, "step": 2414 }, { "epoch": 0.11201298701298701, "grad_norm": 5.687943935394287, "learning_rate": 9.740376586867515e-06, "loss": 0.2991, "step": 2415 }, { "epoch": 0.11205936920222634, "grad_norm": 15.286090850830078, "learning_rate": 9.740142471929369e-06, "loss": 0.4483, "step": 2416 }, { "epoch": 0.11210575139146568, "grad_norm": 8.943389892578125, "learning_rate": 9.739908254298539e-06, "loss": 0.3636, "step": 2417 }, { "epoch": 0.11215213358070501, "grad_norm": 5.8629679679870605, "learning_rate": 9.739673933980095e-06, "loss": 0.254, "step": 2418 }, { "epoch": 0.11219851576994434, "grad_norm": 15.801810264587402, "learning_rate": 9.739439510979116e-06, "loss": 0.446, "step": 2419 }, { "epoch": 0.11224489795918367, "grad_norm": 13.441216468811035, "learning_rate": 9.73920498530068e-06, "loss": 0.5135, "step": 2420 }, { "epoch": 0.112291280148423, "grad_norm": 14.846925735473633, "learning_rate": 9.738970356949866e-06, "loss": 0.5193, "step": 2421 }, { "epoch": 0.11233766233766233, "grad_norm": 14.057066917419434, "learning_rate": 9.73873562593176e-06, "loss": 0.4083, "step": 2422 }, { "epoch": 0.11238404452690166, "grad_norm": 6.806555271148682, "learning_rate": 9.738500792251447e-06, "loss": 0.2965, "step": 2423 }, { "epoch": 0.11243042671614101, "grad_norm": 5.824227809906006, "learning_rate": 9.738265855914014e-06, "loss": 0.3619, "step": 2424 }, { "epoch": 0.11247680890538034, "grad_norm": 7.944322109222412, "learning_rate": 9.738030816924549e-06, "loss": 0.4083, "step": 2425 }, { "epoch": 0.11252319109461967, "grad_norm": 6.637465000152588, "learning_rate": 9.737795675288144e-06, "loss": 0.2234, "step": 2426 }, { "epoch": 0.112569573283859, "grad_norm": 8.191855430603027, "learning_rate": 9.737560431009897e-06, "loss": 0.2947, "step": 2427 }, { "epoch": 0.11261595547309833, "grad_norm": 6.666734218597412, "learning_rate": 9.737325084094902e-06, "loss": 0.4263, "step": 2428 }, { "epoch": 0.11266233766233766, "grad_norm": 8.26844596862793, "learning_rate": 9.737089634548256e-06, "loss": 0.2502, "step": 2429 }, { "epoch": 0.11270871985157699, "grad_norm": 6.731276512145996, "learning_rate": 9.736854082375063e-06, "loss": 0.3554, "step": 2430 }, { "epoch": 0.11275510204081633, "grad_norm": 9.74500846862793, "learning_rate": 9.736618427580424e-06, "loss": 0.3352, "step": 2431 }, { "epoch": 0.11280148423005566, "grad_norm": 11.178169250488281, "learning_rate": 9.736382670169447e-06, "loss": 0.3834, "step": 2432 }, { "epoch": 0.11284786641929499, "grad_norm": 7.039275646209717, "learning_rate": 9.736146810147236e-06, "loss": 0.2598, "step": 2433 }, { "epoch": 0.11289424860853432, "grad_norm": 6.76424503326416, "learning_rate": 9.735910847518902e-06, "loss": 0.4345, "step": 2434 }, { "epoch": 0.11294063079777365, "grad_norm": 7.752687931060791, "learning_rate": 9.735674782289557e-06, "loss": 0.4328, "step": 2435 }, { "epoch": 0.11298701298701298, "grad_norm": 4.745444297790527, "learning_rate": 9.735438614464316e-06, "loss": 0.2519, "step": 2436 }, { "epoch": 0.11303339517625231, "grad_norm": 9.701120376586914, "learning_rate": 9.735202344048297e-06, "loss": 0.5721, "step": 2437 }, { "epoch": 0.11307977736549166, "grad_norm": 8.99350357055664, "learning_rate": 9.734965971046614e-06, "loss": 0.3913, "step": 2438 }, { "epoch": 0.11312615955473099, "grad_norm": 10.150267601013184, "learning_rate": 9.734729495464394e-06, "loss": 0.4165, "step": 2439 }, { "epoch": 0.11317254174397032, "grad_norm": 5.441448211669922, "learning_rate": 9.734492917306754e-06, "loss": 0.4057, "step": 2440 }, { "epoch": 0.11321892393320965, "grad_norm": 5.386634826660156, "learning_rate": 9.734256236578824e-06, "loss": 0.3557, "step": 2441 }, { "epoch": 0.11326530612244898, "grad_norm": 8.652061462402344, "learning_rate": 9.734019453285728e-06, "loss": 0.4489, "step": 2442 }, { "epoch": 0.11331168831168831, "grad_norm": 5.917917728424072, "learning_rate": 9.733782567432598e-06, "loss": 0.3376, "step": 2443 }, { "epoch": 0.11335807050092764, "grad_norm": 4.621977806091309, "learning_rate": 9.733545579024566e-06, "loss": 0.2664, "step": 2444 }, { "epoch": 0.11340445269016698, "grad_norm": 8.144176483154297, "learning_rate": 9.733308488066766e-06, "loss": 0.4578, "step": 2445 }, { "epoch": 0.11345083487940631, "grad_norm": 7.220272064208984, "learning_rate": 9.733071294564334e-06, "loss": 0.4768, "step": 2446 }, { "epoch": 0.11349721706864564, "grad_norm": 8.098747253417969, "learning_rate": 9.732833998522408e-06, "loss": 0.3566, "step": 2447 }, { "epoch": 0.11354359925788497, "grad_norm": 11.848838806152344, "learning_rate": 9.73259659994613e-06, "loss": 0.4098, "step": 2448 }, { "epoch": 0.1135899814471243, "grad_norm": 10.190613746643066, "learning_rate": 9.732359098840642e-06, "loss": 0.3762, "step": 2449 }, { "epoch": 0.11363636363636363, "grad_norm": 6.873950481414795, "learning_rate": 9.732121495211091e-06, "loss": 0.3731, "step": 2450 }, { "epoch": 0.11368274582560296, "grad_norm": 6.236372947692871, "learning_rate": 9.731883789062623e-06, "loss": 0.3224, "step": 2451 }, { "epoch": 0.1137291280148423, "grad_norm": 6.426462650299072, "learning_rate": 9.73164598040039e-06, "loss": 0.3422, "step": 2452 }, { "epoch": 0.11377551020408164, "grad_norm": 14.109602928161621, "learning_rate": 9.731408069229543e-06, "loss": 0.4817, "step": 2453 }, { "epoch": 0.11382189239332097, "grad_norm": 5.933311939239502, "learning_rate": 9.731170055555235e-06, "loss": 0.3573, "step": 2454 }, { "epoch": 0.1138682745825603, "grad_norm": 8.875250816345215, "learning_rate": 9.730931939382622e-06, "loss": 0.3787, "step": 2455 }, { "epoch": 0.11391465677179963, "grad_norm": 11.96336555480957, "learning_rate": 9.730693720716866e-06, "loss": 0.4449, "step": 2456 }, { "epoch": 0.11396103896103896, "grad_norm": 9.043020248413086, "learning_rate": 9.730455399563124e-06, "loss": 0.3197, "step": 2457 }, { "epoch": 0.11400742115027829, "grad_norm": 5.976144313812256, "learning_rate": 9.730216975926562e-06, "loss": 0.3046, "step": 2458 }, { "epoch": 0.11405380333951763, "grad_norm": 9.711324691772461, "learning_rate": 9.729978449812344e-06, "loss": 0.4848, "step": 2459 }, { "epoch": 0.11410018552875696, "grad_norm": 6.20064115524292, "learning_rate": 9.729739821225635e-06, "loss": 0.3688, "step": 2460 }, { "epoch": 0.11414656771799629, "grad_norm": 6.91449499130249, "learning_rate": 9.72950109017161e-06, "loss": 0.268, "step": 2461 }, { "epoch": 0.11419294990723562, "grad_norm": 6.0720415115356445, "learning_rate": 9.729262256655438e-06, "loss": 0.3479, "step": 2462 }, { "epoch": 0.11423933209647495, "grad_norm": 6.418968677520752, "learning_rate": 9.729023320682294e-06, "loss": 0.3384, "step": 2463 }, { "epoch": 0.11428571428571428, "grad_norm": 6.137502193450928, "learning_rate": 9.728784282257353e-06, "loss": 0.3968, "step": 2464 }, { "epoch": 0.11433209647495361, "grad_norm": 5.475340843200684, "learning_rate": 9.728545141385796e-06, "loss": 0.4409, "step": 2465 }, { "epoch": 0.11437847866419296, "grad_norm": 10.518699645996094, "learning_rate": 9.728305898072801e-06, "loss": 0.4408, "step": 2466 }, { "epoch": 0.11442486085343229, "grad_norm": 4.328507423400879, "learning_rate": 9.728066552323554e-06, "loss": 0.272, "step": 2467 }, { "epoch": 0.11447124304267162, "grad_norm": 8.972005844116211, "learning_rate": 9.727827104143239e-06, "loss": 0.3865, "step": 2468 }, { "epoch": 0.11451762523191095, "grad_norm": 3.943951368331909, "learning_rate": 9.727587553537043e-06, "loss": 0.2848, "step": 2469 }, { "epoch": 0.11456400742115028, "grad_norm": 5.000097274780273, "learning_rate": 9.727347900510155e-06, "loss": 0.2443, "step": 2470 }, { "epoch": 0.1146103896103896, "grad_norm": 7.497814655303955, "learning_rate": 9.72710814506777e-06, "loss": 0.3694, "step": 2471 }, { "epoch": 0.11465677179962894, "grad_norm": 8.4337158203125, "learning_rate": 9.72686828721508e-06, "loss": 0.4383, "step": 2472 }, { "epoch": 0.11470315398886828, "grad_norm": 9.143965721130371, "learning_rate": 9.726628326957282e-06, "loss": 0.3949, "step": 2473 }, { "epoch": 0.11474953617810761, "grad_norm": 6.484223365783691, "learning_rate": 9.726388264299573e-06, "loss": 0.4098, "step": 2474 }, { "epoch": 0.11479591836734694, "grad_norm": 7.464224815368652, "learning_rate": 9.726148099247155e-06, "loss": 0.3348, "step": 2475 }, { "epoch": 0.11484230055658627, "grad_norm": 4.773985862731934, "learning_rate": 9.725907831805233e-06, "loss": 0.3509, "step": 2476 }, { "epoch": 0.1148886827458256, "grad_norm": 7.904591083526611, "learning_rate": 9.725667461979009e-06, "loss": 0.434, "step": 2477 }, { "epoch": 0.11493506493506493, "grad_norm": 6.650365829467773, "learning_rate": 9.725426989773692e-06, "loss": 0.4328, "step": 2478 }, { "epoch": 0.11498144712430426, "grad_norm": 14.349906921386719, "learning_rate": 9.725186415194493e-06, "loss": 0.4879, "step": 2479 }, { "epoch": 0.1150278293135436, "grad_norm": 8.050241470336914, "learning_rate": 9.724945738246622e-06, "loss": 0.5601, "step": 2480 }, { "epoch": 0.11507421150278294, "grad_norm": 8.427783966064453, "learning_rate": 9.724704958935294e-06, "loss": 0.4061, "step": 2481 }, { "epoch": 0.11512059369202227, "grad_norm": 7.217484474182129, "learning_rate": 9.724464077265723e-06, "loss": 0.3542, "step": 2482 }, { "epoch": 0.1151669758812616, "grad_norm": 4.7009711265563965, "learning_rate": 9.724223093243132e-06, "loss": 0.3292, "step": 2483 }, { "epoch": 0.11521335807050093, "grad_norm": 6.393856048583984, "learning_rate": 9.723982006872738e-06, "loss": 0.3478, "step": 2484 }, { "epoch": 0.11525974025974026, "grad_norm": 5.520620346069336, "learning_rate": 9.723740818159767e-06, "loss": 0.3648, "step": 2485 }, { "epoch": 0.11530612244897959, "grad_norm": 4.303919315338135, "learning_rate": 9.723499527109442e-06, "loss": 0.3816, "step": 2486 }, { "epoch": 0.11535250463821893, "grad_norm": 7.257421493530273, "learning_rate": 9.72325813372699e-06, "loss": 0.3348, "step": 2487 }, { "epoch": 0.11539888682745826, "grad_norm": 5.744921684265137, "learning_rate": 9.723016638017644e-06, "loss": 0.3678, "step": 2488 }, { "epoch": 0.11544526901669759, "grad_norm": 10.548845291137695, "learning_rate": 9.72277503998663e-06, "loss": 0.3689, "step": 2489 }, { "epoch": 0.11549165120593692, "grad_norm": 7.3668389320373535, "learning_rate": 9.722533339639191e-06, "loss": 0.4853, "step": 2490 }, { "epoch": 0.11553803339517625, "grad_norm": 5.604081153869629, "learning_rate": 9.722291536980554e-06, "loss": 0.3508, "step": 2491 }, { "epoch": 0.11558441558441558, "grad_norm": 5.782707214355469, "learning_rate": 9.722049632015965e-06, "loss": 0.2317, "step": 2492 }, { "epoch": 0.11563079777365491, "grad_norm": 5.956707000732422, "learning_rate": 9.721807624750658e-06, "loss": 0.2914, "step": 2493 }, { "epoch": 0.11567717996289426, "grad_norm": 6.122177600860596, "learning_rate": 9.72156551518988e-06, "loss": 0.3833, "step": 2494 }, { "epoch": 0.11572356215213359, "grad_norm": 8.634376525878906, "learning_rate": 9.721323303338876e-06, "loss": 0.4291, "step": 2495 }, { "epoch": 0.11576994434137292, "grad_norm": 11.016212463378906, "learning_rate": 9.721080989202894e-06, "loss": 0.3656, "step": 2496 }, { "epoch": 0.11581632653061225, "grad_norm": 10.428672790527344, "learning_rate": 9.72083857278718e-06, "loss": 0.5324, "step": 2497 }, { "epoch": 0.11586270871985158, "grad_norm": 4.468198776245117, "learning_rate": 9.72059605409699e-06, "loss": 0.1945, "step": 2498 }, { "epoch": 0.1159090909090909, "grad_norm": 8.385214805603027, "learning_rate": 9.720353433137576e-06, "loss": 0.2825, "step": 2499 }, { "epoch": 0.11595547309833024, "grad_norm": 6.257286548614502, "learning_rate": 9.720110709914194e-06, "loss": 0.3243, "step": 2500 }, { "epoch": 0.11600185528756958, "grad_norm": 14.554330825805664, "learning_rate": 9.719867884432104e-06, "loss": 0.4549, "step": 2501 }, { "epoch": 0.11604823747680891, "grad_norm": 8.444414138793945, "learning_rate": 9.719624956696565e-06, "loss": 0.3041, "step": 2502 }, { "epoch": 0.11609461966604824, "grad_norm": 11.04151725769043, "learning_rate": 9.719381926712842e-06, "loss": 0.3297, "step": 2503 }, { "epoch": 0.11614100185528757, "grad_norm": 13.582706451416016, "learning_rate": 9.719138794486198e-06, "loss": 0.4058, "step": 2504 }, { "epoch": 0.1161873840445269, "grad_norm": 8.181230545043945, "learning_rate": 9.7188955600219e-06, "loss": 0.4886, "step": 2505 }, { "epoch": 0.11623376623376623, "grad_norm": 8.96206283569336, "learning_rate": 9.71865222332522e-06, "loss": 0.5203, "step": 2506 }, { "epoch": 0.11628014842300556, "grad_norm": 9.522283554077148, "learning_rate": 9.718408784401427e-06, "loss": 0.5173, "step": 2507 }, { "epoch": 0.11632653061224489, "grad_norm": 6.111891269683838, "learning_rate": 9.7181652432558e-06, "loss": 0.3721, "step": 2508 }, { "epoch": 0.11637291280148424, "grad_norm": 11.249005317687988, "learning_rate": 9.717921599893607e-06, "loss": 0.4917, "step": 2509 }, { "epoch": 0.11641929499072357, "grad_norm": 14.779200553894043, "learning_rate": 9.717677854320133e-06, "loss": 0.3591, "step": 2510 }, { "epoch": 0.1164656771799629, "grad_norm": 7.310931205749512, "learning_rate": 9.717434006540657e-06, "loss": 0.3816, "step": 2511 }, { "epoch": 0.11651205936920223, "grad_norm": 6.273972988128662, "learning_rate": 9.71719005656046e-06, "loss": 0.3561, "step": 2512 }, { "epoch": 0.11655844155844156, "grad_norm": 14.669228553771973, "learning_rate": 9.716946004384831e-06, "loss": 0.5405, "step": 2513 }, { "epoch": 0.11660482374768089, "grad_norm": 7.401534557342529, "learning_rate": 9.716701850019053e-06, "loss": 0.3274, "step": 2514 }, { "epoch": 0.11665120593692022, "grad_norm": 10.30138874053955, "learning_rate": 9.716457593468418e-06, "loss": 0.3842, "step": 2515 }, { "epoch": 0.11669758812615956, "grad_norm": 7.627170562744141, "learning_rate": 9.716213234738216e-06, "loss": 0.4433, "step": 2516 }, { "epoch": 0.11674397031539889, "grad_norm": 15.92538070678711, "learning_rate": 9.715968773833741e-06, "loss": 0.534, "step": 2517 }, { "epoch": 0.11679035250463822, "grad_norm": 7.847415924072266, "learning_rate": 9.715724210760291e-06, "loss": 0.4409, "step": 2518 }, { "epoch": 0.11683673469387755, "grad_norm": 6.432397842407227, "learning_rate": 9.715479545523165e-06, "loss": 0.3394, "step": 2519 }, { "epoch": 0.11688311688311688, "grad_norm": 5.1174116134643555, "learning_rate": 9.715234778127658e-06, "loss": 0.2925, "step": 2520 }, { "epoch": 0.11692949907235621, "grad_norm": 10.688702583312988, "learning_rate": 9.71498990857908e-06, "loss": 0.3123, "step": 2521 }, { "epoch": 0.11697588126159554, "grad_norm": 4.864617824554443, "learning_rate": 9.714744936882732e-06, "loss": 0.2912, "step": 2522 }, { "epoch": 0.11702226345083488, "grad_norm": 7.627715587615967, "learning_rate": 9.71449986304392e-06, "loss": 0.3878, "step": 2523 }, { "epoch": 0.11706864564007421, "grad_norm": 4.68123722076416, "learning_rate": 9.714254687067957e-06, "loss": 0.3185, "step": 2524 }, { "epoch": 0.11711502782931354, "grad_norm": 5.839293479919434, "learning_rate": 9.714009408960152e-06, "loss": 0.3863, "step": 2525 }, { "epoch": 0.11716141001855288, "grad_norm": 7.541346549987793, "learning_rate": 9.713764028725818e-06, "loss": 0.2882, "step": 2526 }, { "epoch": 0.1172077922077922, "grad_norm": 8.465347290039062, "learning_rate": 9.713518546370275e-06, "loss": 0.4969, "step": 2527 }, { "epoch": 0.11725417439703154, "grad_norm": 8.848076820373535, "learning_rate": 9.713272961898837e-06, "loss": 0.4135, "step": 2528 }, { "epoch": 0.11730055658627087, "grad_norm": 8.917451858520508, "learning_rate": 9.713027275316827e-06, "loss": 0.4596, "step": 2529 }, { "epoch": 0.11734693877551021, "grad_norm": 6.1006855964660645, "learning_rate": 9.712781486629567e-06, "loss": 0.3512, "step": 2530 }, { "epoch": 0.11739332096474954, "grad_norm": 17.517169952392578, "learning_rate": 9.712535595842382e-06, "loss": 0.4405, "step": 2531 }, { "epoch": 0.11743970315398887, "grad_norm": 9.386740684509277, "learning_rate": 9.712289602960598e-06, "loss": 0.438, "step": 2532 }, { "epoch": 0.1174860853432282, "grad_norm": 7.563076496124268, "learning_rate": 9.712043507989545e-06, "loss": 0.4915, "step": 2533 }, { "epoch": 0.11753246753246753, "grad_norm": 8.284646987915039, "learning_rate": 9.711797310934556e-06, "loss": 0.3322, "step": 2534 }, { "epoch": 0.11757884972170686, "grad_norm": 8.12000846862793, "learning_rate": 9.711551011800964e-06, "loss": 0.3934, "step": 2535 }, { "epoch": 0.11762523191094619, "grad_norm": 7.808898448944092, "learning_rate": 9.711304610594104e-06, "loss": 0.3882, "step": 2536 }, { "epoch": 0.11767161410018553, "grad_norm": 5.432461738586426, "learning_rate": 9.711058107319313e-06, "loss": 0.3289, "step": 2537 }, { "epoch": 0.11771799628942486, "grad_norm": 7.638829708099365, "learning_rate": 9.710811501981933e-06, "loss": 0.3386, "step": 2538 }, { "epoch": 0.1177643784786642, "grad_norm": 9.068074226379395, "learning_rate": 9.71056479458731e-06, "loss": 0.3908, "step": 2539 }, { "epoch": 0.11781076066790352, "grad_norm": 8.25275993347168, "learning_rate": 9.710317985140782e-06, "loss": 0.2947, "step": 2540 }, { "epoch": 0.11785714285714285, "grad_norm": 9.952778816223145, "learning_rate": 9.7100710736477e-06, "loss": 0.4301, "step": 2541 }, { "epoch": 0.11790352504638218, "grad_norm": 6.778037071228027, "learning_rate": 9.709824060113414e-06, "loss": 0.3414, "step": 2542 }, { "epoch": 0.11794990723562152, "grad_norm": 8.742866516113281, "learning_rate": 9.70957694454327e-06, "loss": 0.3803, "step": 2543 }, { "epoch": 0.11799628942486086, "grad_norm": 8.561656951904297, "learning_rate": 9.709329726942628e-06, "loss": 0.4575, "step": 2544 }, { "epoch": 0.11804267161410019, "grad_norm": 7.421713352203369, "learning_rate": 9.709082407316842e-06, "loss": 0.3257, "step": 2545 }, { "epoch": 0.11808905380333952, "grad_norm": 6.998983860015869, "learning_rate": 9.708834985671269e-06, "loss": 0.3908, "step": 2546 }, { "epoch": 0.11813543599257885, "grad_norm": 5.091124534606934, "learning_rate": 9.70858746201127e-06, "loss": 0.2903, "step": 2547 }, { "epoch": 0.11818181818181818, "grad_norm": 12.457508087158203, "learning_rate": 9.708339836342207e-06, "loss": 0.4632, "step": 2548 }, { "epoch": 0.11822820037105751, "grad_norm": 8.784845352172852, "learning_rate": 9.708092108669444e-06, "loss": 0.3872, "step": 2549 }, { "epoch": 0.11827458256029684, "grad_norm": 7.43298864364624, "learning_rate": 9.707844278998349e-06, "loss": 0.4111, "step": 2550 }, { "epoch": 0.11832096474953618, "grad_norm": 9.056387901306152, "learning_rate": 9.707596347334292e-06, "loss": 0.3936, "step": 2551 }, { "epoch": 0.11836734693877551, "grad_norm": 14.84138011932373, "learning_rate": 9.70734831368264e-06, "loss": 0.3716, "step": 2552 }, { "epoch": 0.11841372912801484, "grad_norm": 6.627893447875977, "learning_rate": 9.707100178048772e-06, "loss": 0.3628, "step": 2553 }, { "epoch": 0.11846011131725417, "grad_norm": 7.612411975860596, "learning_rate": 9.706851940438062e-06, "loss": 0.3681, "step": 2554 }, { "epoch": 0.1185064935064935, "grad_norm": 7.066629886627197, "learning_rate": 9.706603600855887e-06, "loss": 0.3547, "step": 2555 }, { "epoch": 0.11855287569573283, "grad_norm": 9.476188659667969, "learning_rate": 9.706355159307627e-06, "loss": 0.4069, "step": 2556 }, { "epoch": 0.11859925788497216, "grad_norm": 12.247170448303223, "learning_rate": 9.706106615798665e-06, "loss": 0.4041, "step": 2557 }, { "epoch": 0.11864564007421151, "grad_norm": 15.3515625, "learning_rate": 9.705857970334385e-06, "loss": 0.4011, "step": 2558 }, { "epoch": 0.11869202226345084, "grad_norm": 10.347610473632812, "learning_rate": 9.705609222920173e-06, "loss": 0.4332, "step": 2559 }, { "epoch": 0.11873840445269017, "grad_norm": 13.052828788757324, "learning_rate": 9.70536037356142e-06, "loss": 0.397, "step": 2560 }, { "epoch": 0.1187847866419295, "grad_norm": 7.499804973602295, "learning_rate": 9.705111422263518e-06, "loss": 0.3735, "step": 2561 }, { "epoch": 0.11883116883116883, "grad_norm": 7.811633110046387, "learning_rate": 9.704862369031857e-06, "loss": 0.4746, "step": 2562 }, { "epoch": 0.11887755102040816, "grad_norm": 10.296438217163086, "learning_rate": 9.704613213871836e-06, "loss": 0.4362, "step": 2563 }, { "epoch": 0.11892393320964749, "grad_norm": 6.774866104125977, "learning_rate": 9.70436395678885e-06, "loss": 0.3402, "step": 2564 }, { "epoch": 0.11897031539888683, "grad_norm": 7.344481945037842, "learning_rate": 9.7041145977883e-06, "loss": 0.3934, "step": 2565 }, { "epoch": 0.11901669758812616, "grad_norm": 11.244707107543945, "learning_rate": 9.703865136875589e-06, "loss": 0.3897, "step": 2566 }, { "epoch": 0.1190630797773655, "grad_norm": 6.208740711212158, "learning_rate": 9.70361557405612e-06, "loss": 0.4089, "step": 2567 }, { "epoch": 0.11910946196660482, "grad_norm": 15.085216522216797, "learning_rate": 9.7033659093353e-06, "loss": 0.546, "step": 2568 }, { "epoch": 0.11915584415584415, "grad_norm": 11.769935607910156, "learning_rate": 9.70311614271854e-06, "loss": 0.515, "step": 2569 }, { "epoch": 0.11920222634508348, "grad_norm": 7.078968524932861, "learning_rate": 9.702866274211248e-06, "loss": 0.3215, "step": 2570 }, { "epoch": 0.11924860853432281, "grad_norm": 11.0460844039917, "learning_rate": 9.70261630381884e-06, "loss": 0.4547, "step": 2571 }, { "epoch": 0.11929499072356216, "grad_norm": 7.823240756988525, "learning_rate": 9.70236623154673e-06, "loss": 0.348, "step": 2572 }, { "epoch": 0.11934137291280149, "grad_norm": 5.259389400482178, "learning_rate": 9.702116057400335e-06, "loss": 0.294, "step": 2573 }, { "epoch": 0.11938775510204082, "grad_norm": 14.449197769165039, "learning_rate": 9.701865781385075e-06, "loss": 0.3873, "step": 2574 }, { "epoch": 0.11943413729128015, "grad_norm": 9.160083770751953, "learning_rate": 9.701615403506375e-06, "loss": 0.3928, "step": 2575 }, { "epoch": 0.11948051948051948, "grad_norm": 9.025311470031738, "learning_rate": 9.701364923769656e-06, "loss": 0.4857, "step": 2576 }, { "epoch": 0.11952690166975881, "grad_norm": 5.27549934387207, "learning_rate": 9.701114342180346e-06, "loss": 0.3239, "step": 2577 }, { "epoch": 0.11957328385899814, "grad_norm": 38.854549407958984, "learning_rate": 9.700863658743872e-06, "loss": 0.3952, "step": 2578 }, { "epoch": 0.11961966604823748, "grad_norm": 7.295217514038086, "learning_rate": 9.700612873465667e-06, "loss": 0.4528, "step": 2579 }, { "epoch": 0.11966604823747681, "grad_norm": 7.0645012855529785, "learning_rate": 9.700361986351164e-06, "loss": 0.3738, "step": 2580 }, { "epoch": 0.11971243042671614, "grad_norm": 8.292814254760742, "learning_rate": 9.700110997405798e-06, "loss": 0.3907, "step": 2581 }, { "epoch": 0.11975881261595547, "grad_norm": 8.29224681854248, "learning_rate": 9.699859906635005e-06, "loss": 0.3828, "step": 2582 }, { "epoch": 0.1198051948051948, "grad_norm": 5.48253870010376, "learning_rate": 9.699608714044226e-06, "loss": 0.3581, "step": 2583 }, { "epoch": 0.11985157699443413, "grad_norm": 5.950146675109863, "learning_rate": 9.699357419638904e-06, "loss": 0.4455, "step": 2584 }, { "epoch": 0.11989795918367346, "grad_norm": 4.81254243850708, "learning_rate": 9.699106023424482e-06, "loss": 0.3196, "step": 2585 }, { "epoch": 0.11994434137291281, "grad_norm": 7.69331169128418, "learning_rate": 9.698854525406408e-06, "loss": 0.3202, "step": 2586 }, { "epoch": 0.11999072356215214, "grad_norm": 5.417251110076904, "learning_rate": 9.698602925590126e-06, "loss": 0.4225, "step": 2587 }, { "epoch": 0.12003710575139147, "grad_norm": 8.95651912689209, "learning_rate": 9.698351223981091e-06, "loss": 0.3285, "step": 2588 }, { "epoch": 0.1200834879406308, "grad_norm": 7.837121486663818, "learning_rate": 9.698099420584757e-06, "loss": 0.2036, "step": 2589 }, { "epoch": 0.12012987012987013, "grad_norm": 7.865808486938477, "learning_rate": 9.697847515406574e-06, "loss": 0.3726, "step": 2590 }, { "epoch": 0.12017625231910946, "grad_norm": 5.869401454925537, "learning_rate": 9.697595508452004e-06, "loss": 0.3453, "step": 2591 }, { "epoch": 0.12022263450834879, "grad_norm": 10.042312622070312, "learning_rate": 9.697343399726505e-06, "loss": 0.3415, "step": 2592 }, { "epoch": 0.12026901669758813, "grad_norm": 17.45145606994629, "learning_rate": 9.697091189235539e-06, "loss": 0.4872, "step": 2593 }, { "epoch": 0.12031539888682746, "grad_norm": 6.379114151000977, "learning_rate": 9.696838876984568e-06, "loss": 0.3453, "step": 2594 }, { "epoch": 0.12036178107606679, "grad_norm": 12.490964889526367, "learning_rate": 9.696586462979062e-06, "loss": 0.4781, "step": 2595 }, { "epoch": 0.12040816326530612, "grad_norm": 8.916146278381348, "learning_rate": 9.696333947224488e-06, "loss": 0.4423, "step": 2596 }, { "epoch": 0.12045454545454545, "grad_norm": 6.855483055114746, "learning_rate": 9.696081329726314e-06, "loss": 0.2913, "step": 2597 }, { "epoch": 0.12050092764378478, "grad_norm": 6.721715927124023, "learning_rate": 9.695828610490016e-06, "loss": 0.2459, "step": 2598 }, { "epoch": 0.12054730983302411, "grad_norm": 10.316420555114746, "learning_rate": 9.695575789521068e-06, "loss": 0.4186, "step": 2599 }, { "epoch": 0.12059369202226346, "grad_norm": 6.0228705406188965, "learning_rate": 9.695322866824948e-06, "loss": 0.409, "step": 2600 }, { "epoch": 0.12064007421150279, "grad_norm": 6.900842189788818, "learning_rate": 9.695069842407133e-06, "loss": 0.3782, "step": 2601 }, { "epoch": 0.12068645640074212, "grad_norm": 7.4234089851379395, "learning_rate": 9.694816716273106e-06, "loss": 0.3787, "step": 2602 }, { "epoch": 0.12073283858998145, "grad_norm": 5.628103733062744, "learning_rate": 9.694563488428353e-06, "loss": 0.3414, "step": 2603 }, { "epoch": 0.12077922077922078, "grad_norm": 5.665472507476807, "learning_rate": 9.694310158878357e-06, "loss": 0.3631, "step": 2604 }, { "epoch": 0.12082560296846011, "grad_norm": 10.974515914916992, "learning_rate": 9.694056727628607e-06, "loss": 0.4263, "step": 2605 }, { "epoch": 0.12087198515769944, "grad_norm": 20.15072250366211, "learning_rate": 9.693803194684594e-06, "loss": 0.2899, "step": 2606 }, { "epoch": 0.12091836734693878, "grad_norm": 9.215827941894531, "learning_rate": 9.693549560051812e-06, "loss": 0.3634, "step": 2607 }, { "epoch": 0.12096474953617811, "grad_norm": 7.898979187011719, "learning_rate": 9.693295823735754e-06, "loss": 0.2668, "step": 2608 }, { "epoch": 0.12101113172541744, "grad_norm": 5.978367805480957, "learning_rate": 9.693041985741915e-06, "loss": 0.2579, "step": 2609 }, { "epoch": 0.12105751391465677, "grad_norm": 8.337867736816406, "learning_rate": 9.692788046075799e-06, "loss": 0.4393, "step": 2610 }, { "epoch": 0.1211038961038961, "grad_norm": 6.866217613220215, "learning_rate": 9.692534004742906e-06, "loss": 0.3735, "step": 2611 }, { "epoch": 0.12115027829313543, "grad_norm": 15.994229316711426, "learning_rate": 9.692279861748737e-06, "loss": 0.4875, "step": 2612 }, { "epoch": 0.12119666048237476, "grad_norm": 5.568896293640137, "learning_rate": 9.6920256170988e-06, "loss": 0.3155, "step": 2613 }, { "epoch": 0.1212430426716141, "grad_norm": 7.53630256652832, "learning_rate": 9.691771270798602e-06, "loss": 0.3709, "step": 2614 }, { "epoch": 0.12128942486085344, "grad_norm": 6.804693222045898, "learning_rate": 9.691516822853656e-06, "loss": 0.2679, "step": 2615 }, { "epoch": 0.12133580705009277, "grad_norm": 8.958477973937988, "learning_rate": 9.691262273269472e-06, "loss": 0.3784, "step": 2616 }, { "epoch": 0.1213821892393321, "grad_norm": 10.98318099975586, "learning_rate": 9.691007622051564e-06, "loss": 0.3901, "step": 2617 }, { "epoch": 0.12142857142857143, "grad_norm": 8.463451385498047, "learning_rate": 9.690752869205452e-06, "loss": 0.351, "step": 2618 }, { "epoch": 0.12147495361781076, "grad_norm": 7.818624973297119, "learning_rate": 9.690498014736651e-06, "loss": 0.3838, "step": 2619 }, { "epoch": 0.12152133580705009, "grad_norm": 16.04256248474121, "learning_rate": 9.690243058650686e-06, "loss": 0.483, "step": 2620 }, { "epoch": 0.12156771799628943, "grad_norm": 8.286072731018066, "learning_rate": 9.689988000953079e-06, "loss": 0.3674, "step": 2621 }, { "epoch": 0.12161410018552876, "grad_norm": 6.411365509033203, "learning_rate": 9.689732841649356e-06, "loss": 0.3376, "step": 2622 }, { "epoch": 0.12166048237476809, "grad_norm": 19.356172561645508, "learning_rate": 9.689477580745043e-06, "loss": 0.4801, "step": 2623 }, { "epoch": 0.12170686456400742, "grad_norm": 7.500217914581299, "learning_rate": 9.689222218245673e-06, "loss": 0.3973, "step": 2624 }, { "epoch": 0.12175324675324675, "grad_norm": 6.56366491317749, "learning_rate": 9.688966754156776e-06, "loss": 0.2131, "step": 2625 }, { "epoch": 0.12179962894248608, "grad_norm": 9.786648750305176, "learning_rate": 9.688711188483888e-06, "loss": 0.3119, "step": 2626 }, { "epoch": 0.12184601113172541, "grad_norm": 6.409265995025635, "learning_rate": 9.688455521232545e-06, "loss": 0.3415, "step": 2627 }, { "epoch": 0.12189239332096476, "grad_norm": 6.430669784545898, "learning_rate": 9.688199752408285e-06, "loss": 0.3978, "step": 2628 }, { "epoch": 0.12193877551020409, "grad_norm": 8.451266288757324, "learning_rate": 9.687943882016652e-06, "loss": 0.3745, "step": 2629 }, { "epoch": 0.12198515769944342, "grad_norm": 7.182975769042969, "learning_rate": 9.687687910063186e-06, "loss": 0.3891, "step": 2630 }, { "epoch": 0.12203153988868275, "grad_norm": 13.537126541137695, "learning_rate": 9.687431836553435e-06, "loss": 0.4233, "step": 2631 }, { "epoch": 0.12207792207792208, "grad_norm": 6.237149715423584, "learning_rate": 9.687175661492944e-06, "loss": 0.4015, "step": 2632 }, { "epoch": 0.12212430426716141, "grad_norm": 7.812453746795654, "learning_rate": 9.686919384887267e-06, "loss": 0.3943, "step": 2633 }, { "epoch": 0.12217068645640074, "grad_norm": 12.154934883117676, "learning_rate": 9.686663006741952e-06, "loss": 0.4724, "step": 2634 }, { "epoch": 0.12221706864564007, "grad_norm": 4.37749719619751, "learning_rate": 9.686406527062556e-06, "loss": 0.3057, "step": 2635 }, { "epoch": 0.12226345083487941, "grad_norm": 7.072873592376709, "learning_rate": 9.686149945854632e-06, "loss": 0.367, "step": 2636 }, { "epoch": 0.12230983302411874, "grad_norm": 7.7284932136535645, "learning_rate": 9.685893263123744e-06, "loss": 0.4534, "step": 2637 }, { "epoch": 0.12235621521335807, "grad_norm": 8.732033729553223, "learning_rate": 9.685636478875448e-06, "loss": 0.3852, "step": 2638 }, { "epoch": 0.1224025974025974, "grad_norm": 11.368253707885742, "learning_rate": 9.685379593115309e-06, "loss": 0.4497, "step": 2639 }, { "epoch": 0.12244897959183673, "grad_norm": 11.362695693969727, "learning_rate": 9.685122605848894e-06, "loss": 0.4745, "step": 2640 }, { "epoch": 0.12249536178107606, "grad_norm": 11.634739875793457, "learning_rate": 9.684865517081768e-06, "loss": 0.4673, "step": 2641 }, { "epoch": 0.12254174397031539, "grad_norm": 7.988464832305908, "learning_rate": 9.684608326819503e-06, "loss": 0.3681, "step": 2642 }, { "epoch": 0.12258812615955474, "grad_norm": 6.211366176605225, "learning_rate": 9.684351035067668e-06, "loss": 0.3458, "step": 2643 }, { "epoch": 0.12263450834879407, "grad_norm": 11.339733123779297, "learning_rate": 9.684093641831838e-06, "loss": 0.5061, "step": 2644 }, { "epoch": 0.1226808905380334, "grad_norm": 10.195477485656738, "learning_rate": 9.68383614711759e-06, "loss": 0.4647, "step": 2645 }, { "epoch": 0.12272727272727273, "grad_norm": 6.945659637451172, "learning_rate": 9.683578550930503e-06, "loss": 0.3375, "step": 2646 }, { "epoch": 0.12277365491651206, "grad_norm": 5.350811958312988, "learning_rate": 9.683320853276158e-06, "loss": 0.3916, "step": 2647 }, { "epoch": 0.12282003710575139, "grad_norm": 15.074614524841309, "learning_rate": 9.683063054160136e-06, "loss": 0.332, "step": 2648 }, { "epoch": 0.12286641929499072, "grad_norm": 5.098145008087158, "learning_rate": 9.682805153588022e-06, "loss": 0.366, "step": 2649 }, { "epoch": 0.12291280148423006, "grad_norm": 7.3777079582214355, "learning_rate": 9.682547151565405e-06, "loss": 0.4242, "step": 2650 }, { "epoch": 0.12295918367346939, "grad_norm": 6.616337776184082, "learning_rate": 9.682289048097875e-06, "loss": 0.4065, "step": 2651 }, { "epoch": 0.12300556586270872, "grad_norm": 6.2924370765686035, "learning_rate": 9.682030843191021e-06, "loss": 0.2425, "step": 2652 }, { "epoch": 0.12305194805194805, "grad_norm": 5.62634801864624, "learning_rate": 9.681772536850439e-06, "loss": 0.2753, "step": 2653 }, { "epoch": 0.12309833024118738, "grad_norm": 10.748969078063965, "learning_rate": 9.681514129081725e-06, "loss": 0.443, "step": 2654 }, { "epoch": 0.12314471243042671, "grad_norm": 4.435182094573975, "learning_rate": 9.681255619890475e-06, "loss": 0.271, "step": 2655 }, { "epoch": 0.12319109461966604, "grad_norm": 11.063043594360352, "learning_rate": 9.680997009282291e-06, "loss": 0.4386, "step": 2656 }, { "epoch": 0.12323747680890539, "grad_norm": 5.152188777923584, "learning_rate": 9.680738297262777e-06, "loss": 0.3346, "step": 2657 }, { "epoch": 0.12328385899814472, "grad_norm": 15.617369651794434, "learning_rate": 9.680479483837534e-06, "loss": 0.5333, "step": 2658 }, { "epoch": 0.12333024118738405, "grad_norm": 15.824263572692871, "learning_rate": 9.680220569012176e-06, "loss": 0.533, "step": 2659 }, { "epoch": 0.12337662337662338, "grad_norm": 6.748260021209717, "learning_rate": 9.679961552792304e-06, "loss": 0.3934, "step": 2660 }, { "epoch": 0.1234230055658627, "grad_norm": 6.928738117218018, "learning_rate": 9.679702435183536e-06, "loss": 0.4132, "step": 2661 }, { "epoch": 0.12346938775510204, "grad_norm": 9.569464683532715, "learning_rate": 9.679443216191482e-06, "loss": 0.4171, "step": 2662 }, { "epoch": 0.12351576994434137, "grad_norm": 8.072686195373535, "learning_rate": 9.67918389582176e-06, "loss": 0.2857, "step": 2663 }, { "epoch": 0.12356215213358071, "grad_norm": 9.701398849487305, "learning_rate": 9.678924474079986e-06, "loss": 0.4448, "step": 2664 }, { "epoch": 0.12360853432282004, "grad_norm": 7.814606189727783, "learning_rate": 9.678664950971778e-06, "loss": 0.3707, "step": 2665 }, { "epoch": 0.12365491651205937, "grad_norm": 6.8174309730529785, "learning_rate": 9.678405326502767e-06, "loss": 0.3564, "step": 2666 }, { "epoch": 0.1237012987012987, "grad_norm": 8.277583122253418, "learning_rate": 9.678145600678569e-06, "loss": 0.451, "step": 2667 }, { "epoch": 0.12374768089053803, "grad_norm": 5.000206470489502, "learning_rate": 9.677885773504816e-06, "loss": 0.2843, "step": 2668 }, { "epoch": 0.12379406307977736, "grad_norm": 11.854022979736328, "learning_rate": 9.677625844987133e-06, "loss": 0.5773, "step": 2669 }, { "epoch": 0.12384044526901669, "grad_norm": 9.831462860107422, "learning_rate": 9.677365815131155e-06, "loss": 0.4481, "step": 2670 }, { "epoch": 0.12388682745825604, "grad_norm": 5.673962116241455, "learning_rate": 9.677105683942511e-06, "loss": 0.4704, "step": 2671 }, { "epoch": 0.12393320964749537, "grad_norm": 9.139973640441895, "learning_rate": 9.676845451426841e-06, "loss": 0.3369, "step": 2672 }, { "epoch": 0.1239795918367347, "grad_norm": 8.317221641540527, "learning_rate": 9.676585117589781e-06, "loss": 0.2931, "step": 2673 }, { "epoch": 0.12402597402597403, "grad_norm": 9.418966293334961, "learning_rate": 9.676324682436973e-06, "loss": 0.4985, "step": 2674 }, { "epoch": 0.12407235621521336, "grad_norm": 8.62631607055664, "learning_rate": 9.676064145974055e-06, "loss": 0.3051, "step": 2675 }, { "epoch": 0.12411873840445269, "grad_norm": 10.471818923950195, "learning_rate": 9.675803508206673e-06, "loss": 0.5371, "step": 2676 }, { "epoch": 0.12416512059369202, "grad_norm": 8.609306335449219, "learning_rate": 9.675542769140476e-06, "loss": 0.3984, "step": 2677 }, { "epoch": 0.12421150278293136, "grad_norm": 6.714972496032715, "learning_rate": 9.67528192878111e-06, "loss": 0.3056, "step": 2678 }, { "epoch": 0.12425788497217069, "grad_norm": 10.916147232055664, "learning_rate": 9.675020987134227e-06, "loss": 0.3318, "step": 2679 }, { "epoch": 0.12430426716141002, "grad_norm": 10.705193519592285, "learning_rate": 9.67475994420548e-06, "loss": 0.5065, "step": 2680 }, { "epoch": 0.12435064935064935, "grad_norm": 10.113852500915527, "learning_rate": 9.674498800000526e-06, "loss": 0.2828, "step": 2681 }, { "epoch": 0.12439703153988868, "grad_norm": 8.370561599731445, "learning_rate": 9.67423755452502e-06, "loss": 0.3293, "step": 2682 }, { "epoch": 0.12444341372912801, "grad_norm": 6.0902228355407715, "learning_rate": 9.673976207784623e-06, "loss": 0.2686, "step": 2683 }, { "epoch": 0.12448979591836734, "grad_norm": 6.5246124267578125, "learning_rate": 9.673714759784996e-06, "loss": 0.3894, "step": 2684 }, { "epoch": 0.12453617810760668, "grad_norm": 11.680013656616211, "learning_rate": 9.673453210531804e-06, "loss": 0.4606, "step": 2685 }, { "epoch": 0.12458256029684601, "grad_norm": 7.876079559326172, "learning_rate": 9.673191560030715e-06, "loss": 0.3514, "step": 2686 }, { "epoch": 0.12462894248608535, "grad_norm": 8.68073558807373, "learning_rate": 9.672929808287393e-06, "loss": 0.2096, "step": 2687 }, { "epoch": 0.12467532467532468, "grad_norm": 23.150178909301758, "learning_rate": 9.672667955307513e-06, "loss": 0.4744, "step": 2688 }, { "epoch": 0.124721706864564, "grad_norm": 13.657896995544434, "learning_rate": 9.672406001096746e-06, "loss": 0.5164, "step": 2689 }, { "epoch": 0.12476808905380334, "grad_norm": 4.312485694885254, "learning_rate": 9.672143945660768e-06, "loss": 0.3052, "step": 2690 }, { "epoch": 0.12481447124304267, "grad_norm": 5.446333408355713, "learning_rate": 9.671881789005255e-06, "loss": 0.3167, "step": 2691 }, { "epoch": 0.12486085343228201, "grad_norm": 5.8898186683654785, "learning_rate": 9.671619531135887e-06, "loss": 0.2796, "step": 2692 }, { "epoch": 0.12490723562152134, "grad_norm": 12.91630744934082, "learning_rate": 9.671357172058347e-06, "loss": 0.4022, "step": 2693 }, { "epoch": 0.12495361781076067, "grad_norm": 14.158639907836914, "learning_rate": 9.671094711778314e-06, "loss": 0.3254, "step": 2694 }, { "epoch": 0.125, "grad_norm": 8.940656661987305, "learning_rate": 9.670832150301482e-06, "loss": 0.3642, "step": 2695 }, { "epoch": 0.12504638218923933, "grad_norm": 7.707342624664307, "learning_rate": 9.670569487633534e-06, "loss": 0.3587, "step": 2696 }, { "epoch": 0.12509276437847866, "grad_norm": 5.025057315826416, "learning_rate": 9.670306723780163e-06, "loss": 0.3126, "step": 2697 }, { "epoch": 0.125139146567718, "grad_norm": 14.7017183303833, "learning_rate": 9.670043858747057e-06, "loss": 0.5172, "step": 2698 }, { "epoch": 0.12518552875695732, "grad_norm": 6.433660984039307, "learning_rate": 9.669780892539915e-06, "loss": 0.3206, "step": 2699 }, { "epoch": 0.12523191094619665, "grad_norm": 8.621437072753906, "learning_rate": 9.669517825164435e-06, "loss": 0.4074, "step": 2700 }, { "epoch": 0.12527829313543598, "grad_norm": 11.673209190368652, "learning_rate": 9.669254656626312e-06, "loss": 0.5778, "step": 2701 }, { "epoch": 0.1253246753246753, "grad_norm": 7.581501483917236, "learning_rate": 9.668991386931251e-06, "loss": 0.406, "step": 2702 }, { "epoch": 0.12537105751391467, "grad_norm": 5.853889465332031, "learning_rate": 9.668728016084953e-06, "loss": 0.3515, "step": 2703 }, { "epoch": 0.125417439703154, "grad_norm": 7.8165283203125, "learning_rate": 9.668464544093125e-06, "loss": 0.5029, "step": 2704 }, { "epoch": 0.12546382189239333, "grad_norm": 7.936046123504639, "learning_rate": 9.668200970961477e-06, "loss": 0.3311, "step": 2705 }, { "epoch": 0.12551020408163266, "grad_norm": 13.161494255065918, "learning_rate": 9.667937296695715e-06, "loss": 0.5909, "step": 2706 }, { "epoch": 0.125556586270872, "grad_norm": 11.022899627685547, "learning_rate": 9.667673521301555e-06, "loss": 0.3843, "step": 2707 }, { "epoch": 0.12560296846011132, "grad_norm": 3.927999973297119, "learning_rate": 9.667409644784708e-06, "loss": 0.3373, "step": 2708 }, { "epoch": 0.12564935064935065, "grad_norm": 4.358797073364258, "learning_rate": 9.667145667150894e-06, "loss": 0.3154, "step": 2709 }, { "epoch": 0.12569573283858998, "grad_norm": 5.515873908996582, "learning_rate": 9.666881588405832e-06, "loss": 0.3498, "step": 2710 }, { "epoch": 0.1257421150278293, "grad_norm": 7.795820713043213, "learning_rate": 9.66661740855524e-06, "loss": 0.3089, "step": 2711 }, { "epoch": 0.12578849721706864, "grad_norm": 13.651185989379883, "learning_rate": 9.666353127604845e-06, "loss": 0.4572, "step": 2712 }, { "epoch": 0.12583487940630797, "grad_norm": 4.4429826736450195, "learning_rate": 9.66608874556037e-06, "loss": 0.2966, "step": 2713 }, { "epoch": 0.1258812615955473, "grad_norm": 12.186704635620117, "learning_rate": 9.665824262427543e-06, "loss": 0.3584, "step": 2714 }, { "epoch": 0.12592764378478663, "grad_norm": 8.502074241638184, "learning_rate": 9.665559678212095e-06, "loss": 0.4054, "step": 2715 }, { "epoch": 0.12597402597402596, "grad_norm": 5.6989359855651855, "learning_rate": 9.665294992919758e-06, "loss": 0.2499, "step": 2716 }, { "epoch": 0.12602040816326532, "grad_norm": 7.91625452041626, "learning_rate": 9.665030206556265e-06, "loss": 0.3814, "step": 2717 }, { "epoch": 0.12606679035250465, "grad_norm": 6.669426918029785, "learning_rate": 9.66476531912735e-06, "loss": 0.3199, "step": 2718 }, { "epoch": 0.12611317254174398, "grad_norm": 6.3061842918396, "learning_rate": 9.664500330638759e-06, "loss": 0.3847, "step": 2719 }, { "epoch": 0.1261595547309833, "grad_norm": 8.351737976074219, "learning_rate": 9.664235241096228e-06, "loss": 0.5059, "step": 2720 }, { "epoch": 0.12620593692022264, "grad_norm": 8.567158699035645, "learning_rate": 9.6639700505055e-06, "loss": 0.3257, "step": 2721 }, { "epoch": 0.12625231910946197, "grad_norm": 11.310830116271973, "learning_rate": 9.663704758872321e-06, "loss": 0.3896, "step": 2722 }, { "epoch": 0.1262987012987013, "grad_norm": 5.8929667472839355, "learning_rate": 9.663439366202438e-06, "loss": 0.4192, "step": 2723 }, { "epoch": 0.12634508348794063, "grad_norm": 7.826566219329834, "learning_rate": 9.6631738725016e-06, "loss": 0.4022, "step": 2724 }, { "epoch": 0.12639146567717996, "grad_norm": 11.449487686157227, "learning_rate": 9.662908277775562e-06, "loss": 0.3379, "step": 2725 }, { "epoch": 0.1264378478664193, "grad_norm": 7.35101318359375, "learning_rate": 9.662642582030073e-06, "loss": 0.3205, "step": 2726 }, { "epoch": 0.12648423005565862, "grad_norm": 20.227724075317383, "learning_rate": 9.662376785270893e-06, "loss": 0.5346, "step": 2727 }, { "epoch": 0.12653061224489795, "grad_norm": 10.92877197265625, "learning_rate": 9.66211088750378e-06, "loss": 0.4925, "step": 2728 }, { "epoch": 0.12657699443413728, "grad_norm": 10.850037574768066, "learning_rate": 9.661844888734492e-06, "loss": 0.4826, "step": 2729 }, { "epoch": 0.1266233766233766, "grad_norm": 7.240566253662109, "learning_rate": 9.661578788968794e-06, "loss": 0.4176, "step": 2730 }, { "epoch": 0.12666975881261597, "grad_norm": 12.071174621582031, "learning_rate": 9.66131258821245e-06, "loss": 0.4503, "step": 2731 }, { "epoch": 0.1267161410018553, "grad_norm": 10.579204559326172, "learning_rate": 9.661046286471227e-06, "loss": 0.3586, "step": 2732 }, { "epoch": 0.12676252319109463, "grad_norm": 16.051401138305664, "learning_rate": 9.660779883750895e-06, "loss": 0.3434, "step": 2733 }, { "epoch": 0.12680890538033396, "grad_norm": 6.942300319671631, "learning_rate": 9.660513380057226e-06, "loss": 0.4642, "step": 2734 }, { "epoch": 0.1268552875695733, "grad_norm": 7.962601184844971, "learning_rate": 9.660246775395992e-06, "loss": 0.5144, "step": 2735 }, { "epoch": 0.12690166975881262, "grad_norm": 6.278521537780762, "learning_rate": 9.65998006977297e-06, "loss": 0.3937, "step": 2736 }, { "epoch": 0.12694805194805195, "grad_norm": 7.11600923538208, "learning_rate": 9.659713263193937e-06, "loss": 0.3885, "step": 2737 }, { "epoch": 0.12699443413729128, "grad_norm": 4.823896408081055, "learning_rate": 9.659446355664674e-06, "loss": 0.261, "step": 2738 }, { "epoch": 0.1270408163265306, "grad_norm": 4.817630290985107, "learning_rate": 9.659179347190963e-06, "loss": 0.3504, "step": 2739 }, { "epoch": 0.12708719851576994, "grad_norm": 10.275074005126953, "learning_rate": 9.65891223777859e-06, "loss": 0.4669, "step": 2740 }, { "epoch": 0.12713358070500927, "grad_norm": 5.832691669464111, "learning_rate": 9.65864502743334e-06, "loss": 0.3882, "step": 2741 }, { "epoch": 0.1271799628942486, "grad_norm": 6.630307674407959, "learning_rate": 9.658377716161003e-06, "loss": 0.3449, "step": 2742 }, { "epoch": 0.12722634508348793, "grad_norm": 10.483549118041992, "learning_rate": 9.658110303967369e-06, "loss": 0.5634, "step": 2743 }, { "epoch": 0.12727272727272726, "grad_norm": 7.811934471130371, "learning_rate": 9.657842790858235e-06, "loss": 0.3795, "step": 2744 }, { "epoch": 0.12731910946196662, "grad_norm": 10.396733283996582, "learning_rate": 9.65757517683939e-06, "loss": 0.3469, "step": 2745 }, { "epoch": 0.12736549165120595, "grad_norm": 11.381608009338379, "learning_rate": 9.657307461916637e-06, "loss": 0.5568, "step": 2746 }, { "epoch": 0.12741187384044528, "grad_norm": 13.103425979614258, "learning_rate": 9.657039646095774e-06, "loss": 0.4867, "step": 2747 }, { "epoch": 0.1274582560296846, "grad_norm": 8.437664031982422, "learning_rate": 9.656771729382603e-06, "loss": 0.4018, "step": 2748 }, { "epoch": 0.12750463821892394, "grad_norm": 13.831628799438477, "learning_rate": 9.656503711782929e-06, "loss": 0.3652, "step": 2749 }, { "epoch": 0.12755102040816327, "grad_norm": 8.042244911193848, "learning_rate": 9.656235593302559e-06, "loss": 0.4771, "step": 2750 }, { "epoch": 0.1275974025974026, "grad_norm": 22.5938663482666, "learning_rate": 9.655967373947301e-06, "loss": 0.4921, "step": 2751 }, { "epoch": 0.12764378478664193, "grad_norm": 8.671542167663574, "learning_rate": 9.655699053722965e-06, "loss": 0.4244, "step": 2752 }, { "epoch": 0.12769016697588126, "grad_norm": 5.918262958526611, "learning_rate": 9.655430632635364e-06, "loss": 0.3933, "step": 2753 }, { "epoch": 0.1277365491651206, "grad_norm": 8.861992835998535, "learning_rate": 9.655162110690314e-06, "loss": 0.5086, "step": 2754 }, { "epoch": 0.12778293135435992, "grad_norm": 8.18018627166748, "learning_rate": 9.654893487893633e-06, "loss": 0.4325, "step": 2755 }, { "epoch": 0.12782931354359925, "grad_norm": 5.951850891113281, "learning_rate": 9.65462476425114e-06, "loss": 0.3591, "step": 2756 }, { "epoch": 0.12787569573283858, "grad_norm": 11.492191314697266, "learning_rate": 9.654355939768654e-06, "loss": 0.3191, "step": 2757 }, { "epoch": 0.1279220779220779, "grad_norm": 7.712540149688721, "learning_rate": 9.654087014452004e-06, "loss": 0.2986, "step": 2758 }, { "epoch": 0.12796846011131727, "grad_norm": 7.232623100280762, "learning_rate": 9.653817988307013e-06, "loss": 0.4198, "step": 2759 }, { "epoch": 0.1280148423005566, "grad_norm": 10.257439613342285, "learning_rate": 9.65354886133951e-06, "loss": 0.3955, "step": 2760 }, { "epoch": 0.12806122448979593, "grad_norm": 6.270656585693359, "learning_rate": 9.653279633555325e-06, "loss": 0.4075, "step": 2761 }, { "epoch": 0.12810760667903526, "grad_norm": 13.623743057250977, "learning_rate": 9.65301030496029e-06, "loss": 0.4231, "step": 2762 }, { "epoch": 0.1281539888682746, "grad_norm": 7.744356632232666, "learning_rate": 9.652740875560242e-06, "loss": 0.3531, "step": 2763 }, { "epoch": 0.12820037105751392, "grad_norm": 8.11872386932373, "learning_rate": 9.652471345361018e-06, "loss": 0.4462, "step": 2764 }, { "epoch": 0.12824675324675325, "grad_norm": 8.853718757629395, "learning_rate": 9.652201714368457e-06, "loss": 0.348, "step": 2765 }, { "epoch": 0.12829313543599258, "grad_norm": 10.7180757522583, "learning_rate": 9.651931982588397e-06, "loss": 0.4636, "step": 2766 }, { "epoch": 0.1283395176252319, "grad_norm": 7.636736869812012, "learning_rate": 9.651662150026686e-06, "loss": 0.373, "step": 2767 }, { "epoch": 0.12838589981447124, "grad_norm": 6.335357189178467, "learning_rate": 9.651392216689167e-06, "loss": 0.3252, "step": 2768 }, { "epoch": 0.12843228200371057, "grad_norm": 10.816840171813965, "learning_rate": 9.651122182581689e-06, "loss": 0.3528, "step": 2769 }, { "epoch": 0.1284786641929499, "grad_norm": 8.632196426391602, "learning_rate": 9.650852047710101e-06, "loss": 0.3627, "step": 2770 }, { "epoch": 0.12852504638218923, "grad_norm": 4.005206108093262, "learning_rate": 9.65058181208026e-06, "loss": 0.2471, "step": 2771 }, { "epoch": 0.12857142857142856, "grad_norm": 10.17265796661377, "learning_rate": 9.650311475698014e-06, "loss": 0.3259, "step": 2772 }, { "epoch": 0.12861781076066792, "grad_norm": 8.16305923461914, "learning_rate": 9.650041038569225e-06, "loss": 0.3997, "step": 2773 }, { "epoch": 0.12866419294990725, "grad_norm": 11.147943496704102, "learning_rate": 9.649770500699747e-06, "loss": 0.3657, "step": 2774 }, { "epoch": 0.12871057513914658, "grad_norm": 7.9831223487854, "learning_rate": 9.649499862095443e-06, "loss": 0.302, "step": 2775 }, { "epoch": 0.1287569573283859, "grad_norm": 6.147249698638916, "learning_rate": 9.64922912276218e-06, "loss": 0.35, "step": 2776 }, { "epoch": 0.12880333951762524, "grad_norm": 9.300284385681152, "learning_rate": 9.648958282705819e-06, "loss": 0.3561, "step": 2777 }, { "epoch": 0.12884972170686457, "grad_norm": 9.265612602233887, "learning_rate": 9.648687341932228e-06, "loss": 0.4503, "step": 2778 }, { "epoch": 0.1288961038961039, "grad_norm": 7.392070293426514, "learning_rate": 9.648416300447278e-06, "loss": 0.3557, "step": 2779 }, { "epoch": 0.12894248608534323, "grad_norm": 20.045644760131836, "learning_rate": 9.648145158256842e-06, "loss": 0.3913, "step": 2780 }, { "epoch": 0.12898886827458256, "grad_norm": 12.441835403442383, "learning_rate": 9.647873915366792e-06, "loss": 0.4158, "step": 2781 }, { "epoch": 0.1290352504638219, "grad_norm": 6.689111709594727, "learning_rate": 9.647602571783004e-06, "loss": 0.3399, "step": 2782 }, { "epoch": 0.12908163265306122, "grad_norm": 10.490873336791992, "learning_rate": 9.647331127511359e-06, "loss": 0.4937, "step": 2783 }, { "epoch": 0.12912801484230055, "grad_norm": 9.762886047363281, "learning_rate": 9.647059582557737e-06, "loss": 0.3348, "step": 2784 }, { "epoch": 0.12917439703153988, "grad_norm": 8.524284362792969, "learning_rate": 9.646787936928017e-06, "loss": 0.4349, "step": 2785 }, { "epoch": 0.1292207792207792, "grad_norm": 10.58316707611084, "learning_rate": 9.64651619062809e-06, "loss": 0.4055, "step": 2786 }, { "epoch": 0.12926716141001857, "grad_norm": 8.061237335205078, "learning_rate": 9.64624434366384e-06, "loss": 0.3451, "step": 2787 }, { "epoch": 0.1293135435992579, "grad_norm": 8.824085235595703, "learning_rate": 9.645972396041156e-06, "loss": 0.382, "step": 2788 }, { "epoch": 0.12935992578849723, "grad_norm": 6.478639602661133, "learning_rate": 9.645700347765932e-06, "loss": 0.39, "step": 2789 }, { "epoch": 0.12940630797773656, "grad_norm": 7.419658184051514, "learning_rate": 9.64542819884406e-06, "loss": 0.319, "step": 2790 }, { "epoch": 0.1294526901669759, "grad_norm": 8.186975479125977, "learning_rate": 9.645155949281436e-06, "loss": 0.4224, "step": 2791 }, { "epoch": 0.12949907235621522, "grad_norm": 5.61479377746582, "learning_rate": 9.644883599083959e-06, "loss": 0.3532, "step": 2792 }, { "epoch": 0.12954545454545455, "grad_norm": 7.578190803527832, "learning_rate": 9.644611148257528e-06, "loss": 0.3096, "step": 2793 }, { "epoch": 0.12959183673469388, "grad_norm": 8.521087646484375, "learning_rate": 9.644338596808045e-06, "loss": 0.268, "step": 2794 }, { "epoch": 0.1296382189239332, "grad_norm": 6.73854923248291, "learning_rate": 9.644065944741417e-06, "loss": 0.3212, "step": 2795 }, { "epoch": 0.12968460111317254, "grad_norm": 13.225534439086914, "learning_rate": 9.64379319206355e-06, "loss": 0.4166, "step": 2796 }, { "epoch": 0.12973098330241187, "grad_norm": 8.129964828491211, "learning_rate": 9.643520338780354e-06, "loss": 0.3947, "step": 2797 }, { "epoch": 0.1297773654916512, "grad_norm": 7.377765655517578, "learning_rate": 9.643247384897737e-06, "loss": 0.2727, "step": 2798 }, { "epoch": 0.12982374768089053, "grad_norm": 23.069204330444336, "learning_rate": 9.642974330421616e-06, "loss": 0.417, "step": 2799 }, { "epoch": 0.12987012987012986, "grad_norm": 11.728180885314941, "learning_rate": 9.642701175357905e-06, "loss": 0.538, "step": 2800 }, { "epoch": 0.12991651205936922, "grad_norm": 7.470340728759766, "learning_rate": 9.642427919712521e-06, "loss": 0.3014, "step": 2801 }, { "epoch": 0.12996289424860855, "grad_norm": 6.736328601837158, "learning_rate": 9.642154563491385e-06, "loss": 0.3828, "step": 2802 }, { "epoch": 0.13000927643784788, "grad_norm": 10.645236015319824, "learning_rate": 9.64188110670042e-06, "loss": 0.3463, "step": 2803 }, { "epoch": 0.1300556586270872, "grad_norm": 10.01425552368164, "learning_rate": 9.641607549345546e-06, "loss": 0.3997, "step": 2804 }, { "epoch": 0.13010204081632654, "grad_norm": 16.18962287902832, "learning_rate": 9.641333891432695e-06, "loss": 0.5612, "step": 2805 }, { "epoch": 0.13014842300556587, "grad_norm": 9.417101860046387, "learning_rate": 9.641060132967793e-06, "loss": 0.3901, "step": 2806 }, { "epoch": 0.1301948051948052, "grad_norm": 5.57171106338501, "learning_rate": 9.640786273956772e-06, "loss": 0.349, "step": 2807 }, { "epoch": 0.13024118738404453, "grad_norm": 8.494061470031738, "learning_rate": 9.640512314405563e-06, "loss": 0.4326, "step": 2808 }, { "epoch": 0.13028756957328386, "grad_norm": 9.700638771057129, "learning_rate": 9.640238254320103e-06, "loss": 0.3861, "step": 2809 }, { "epoch": 0.1303339517625232, "grad_norm": 7.64077091217041, "learning_rate": 9.639964093706327e-06, "loss": 0.3512, "step": 2810 }, { "epoch": 0.13038033395176252, "grad_norm": 10.083430290222168, "learning_rate": 9.639689832570178e-06, "loss": 0.3698, "step": 2811 }, { "epoch": 0.13042671614100185, "grad_norm": 21.95828628540039, "learning_rate": 9.639415470917595e-06, "loss": 0.4218, "step": 2812 }, { "epoch": 0.13047309833024118, "grad_norm": 9.227144241333008, "learning_rate": 9.639141008754524e-06, "loss": 0.296, "step": 2813 }, { "epoch": 0.1305194805194805, "grad_norm": 7.779021263122559, "learning_rate": 9.63886644608691e-06, "loss": 0.3307, "step": 2814 }, { "epoch": 0.13056586270871987, "grad_norm": 10.639341354370117, "learning_rate": 9.638591782920698e-06, "loss": 0.3512, "step": 2815 }, { "epoch": 0.1306122448979592, "grad_norm": 6.381546974182129, "learning_rate": 9.638317019261845e-06, "loss": 0.4046, "step": 2816 }, { "epoch": 0.13065862708719853, "grad_norm": 6.538912773132324, "learning_rate": 9.6380421551163e-06, "loss": 0.4206, "step": 2817 }, { "epoch": 0.13070500927643786, "grad_norm": 6.981396198272705, "learning_rate": 9.637767190490018e-06, "loss": 0.487, "step": 2818 }, { "epoch": 0.13075139146567719, "grad_norm": 10.820474624633789, "learning_rate": 9.637492125388954e-06, "loss": 0.5285, "step": 2819 }, { "epoch": 0.13079777365491652, "grad_norm": 8.945185661315918, "learning_rate": 9.63721695981907e-06, "loss": 0.3369, "step": 2820 }, { "epoch": 0.13084415584415585, "grad_norm": 6.538008689880371, "learning_rate": 9.636941693786328e-06, "loss": 0.3519, "step": 2821 }, { "epoch": 0.13089053803339518, "grad_norm": 9.478346824645996, "learning_rate": 9.63666632729669e-06, "loss": 0.3722, "step": 2822 }, { "epoch": 0.1309369202226345, "grad_norm": 12.731701850891113, "learning_rate": 9.636390860356119e-06, "loss": 0.3993, "step": 2823 }, { "epoch": 0.13098330241187384, "grad_norm": 11.477499008178711, "learning_rate": 9.636115292970587e-06, "loss": 0.4342, "step": 2824 }, { "epoch": 0.13102968460111317, "grad_norm": 13.874221801757812, "learning_rate": 9.635839625146062e-06, "loss": 0.3799, "step": 2825 }, { "epoch": 0.1310760667903525, "grad_norm": 5.6285600662231445, "learning_rate": 9.635563856888516e-06, "loss": 0.2943, "step": 2826 }, { "epoch": 0.13112244897959183, "grad_norm": 5.7630438804626465, "learning_rate": 9.635287988203926e-06, "loss": 0.3204, "step": 2827 }, { "epoch": 0.13116883116883116, "grad_norm": 5.6036295890808105, "learning_rate": 9.635012019098265e-06, "loss": 0.4161, "step": 2828 }, { "epoch": 0.1312152133580705, "grad_norm": 11.413254737854004, "learning_rate": 9.634735949577514e-06, "loss": 0.5108, "step": 2829 }, { "epoch": 0.13126159554730985, "grad_norm": 8.15827751159668, "learning_rate": 9.634459779647653e-06, "loss": 0.4169, "step": 2830 }, { "epoch": 0.13130797773654918, "grad_norm": 6.235762596130371, "learning_rate": 9.634183509314667e-06, "loss": 0.3567, "step": 2831 }, { "epoch": 0.1313543599257885, "grad_norm": 14.55509090423584, "learning_rate": 9.633907138584538e-06, "loss": 0.4095, "step": 2832 }, { "epoch": 0.13140074211502784, "grad_norm": 5.729394435882568, "learning_rate": 9.633630667463254e-06, "loss": 0.3054, "step": 2833 }, { "epoch": 0.13144712430426717, "grad_norm": 5.693118095397949, "learning_rate": 9.633354095956806e-06, "loss": 0.2836, "step": 2834 }, { "epoch": 0.1314935064935065, "grad_norm": 10.091691970825195, "learning_rate": 9.633077424071187e-06, "loss": 0.3138, "step": 2835 }, { "epoch": 0.13153988868274583, "grad_norm": 14.295193672180176, "learning_rate": 9.632800651812389e-06, "loss": 0.3084, "step": 2836 }, { "epoch": 0.13158627087198516, "grad_norm": 7.902765274047852, "learning_rate": 9.632523779186407e-06, "loss": 0.382, "step": 2837 }, { "epoch": 0.13163265306122449, "grad_norm": 9.810620307922363, "learning_rate": 9.632246806199242e-06, "loss": 0.435, "step": 2838 }, { "epoch": 0.13167903525046382, "grad_norm": 7.368998050689697, "learning_rate": 9.631969732856893e-06, "loss": 0.3307, "step": 2839 }, { "epoch": 0.13172541743970315, "grad_norm": 6.2303466796875, "learning_rate": 9.63169255916536e-06, "loss": 0.317, "step": 2840 }, { "epoch": 0.13177179962894248, "grad_norm": 6.627070426940918, "learning_rate": 9.631415285130655e-06, "loss": 0.3193, "step": 2841 }, { "epoch": 0.1318181818181818, "grad_norm": 12.704377174377441, "learning_rate": 9.631137910758778e-06, "loss": 0.2609, "step": 2842 }, { "epoch": 0.13186456400742114, "grad_norm": 11.84454345703125, "learning_rate": 9.630860436055741e-06, "loss": 0.5353, "step": 2843 }, { "epoch": 0.1319109461966605, "grad_norm": 6.50977897644043, "learning_rate": 9.630582861027556e-06, "loss": 0.3478, "step": 2844 }, { "epoch": 0.13195732838589982, "grad_norm": 8.456235885620117, "learning_rate": 9.630305185680236e-06, "loss": 0.2499, "step": 2845 }, { "epoch": 0.13200371057513915, "grad_norm": 4.831812858581543, "learning_rate": 9.630027410019796e-06, "loss": 0.2902, "step": 2846 }, { "epoch": 0.13205009276437848, "grad_norm": 13.896519660949707, "learning_rate": 9.629749534052253e-06, "loss": 0.416, "step": 2847 }, { "epoch": 0.13209647495361782, "grad_norm": 4.4379754066467285, "learning_rate": 9.629471557783629e-06, "loss": 0.3249, "step": 2848 }, { "epoch": 0.13214285714285715, "grad_norm": 9.388497352600098, "learning_rate": 9.629193481219948e-06, "loss": 0.3626, "step": 2849 }, { "epoch": 0.13218923933209648, "grad_norm": 8.405763626098633, "learning_rate": 9.628915304367228e-06, "loss": 0.3778, "step": 2850 }, { "epoch": 0.1322356215213358, "grad_norm": 5.937280178070068, "learning_rate": 9.6286370272315e-06, "loss": 0.3344, "step": 2851 }, { "epoch": 0.13228200371057514, "grad_norm": 10.405071258544922, "learning_rate": 9.628358649818794e-06, "loss": 0.4794, "step": 2852 }, { "epoch": 0.13232838589981447, "grad_norm": 7.354053497314453, "learning_rate": 9.628080172135139e-06, "loss": 0.3294, "step": 2853 }, { "epoch": 0.1323747680890538, "grad_norm": 7.569945335388184, "learning_rate": 9.627801594186568e-06, "loss": 0.35, "step": 2854 }, { "epoch": 0.13242115027829313, "grad_norm": 18.897146224975586, "learning_rate": 9.627522915979116e-06, "loss": 0.6317, "step": 2855 }, { "epoch": 0.13246753246753246, "grad_norm": 6.601134777069092, "learning_rate": 9.627244137518821e-06, "loss": 0.3326, "step": 2856 }, { "epoch": 0.1325139146567718, "grad_norm": 10.728853225708008, "learning_rate": 9.62696525881172e-06, "loss": 0.339, "step": 2857 }, { "epoch": 0.13256029684601114, "grad_norm": 5.756510257720947, "learning_rate": 9.626686279863859e-06, "loss": 0.3007, "step": 2858 }, { "epoch": 0.13260667903525047, "grad_norm": 7.098876476287842, "learning_rate": 9.626407200681281e-06, "loss": 0.3628, "step": 2859 }, { "epoch": 0.1326530612244898, "grad_norm": 7.492963790893555, "learning_rate": 9.626128021270032e-06, "loss": 0.3959, "step": 2860 }, { "epoch": 0.13269944341372913, "grad_norm": 4.106863975524902, "learning_rate": 9.625848741636156e-06, "loss": 0.3996, "step": 2861 }, { "epoch": 0.13274582560296846, "grad_norm": 9.528974533081055, "learning_rate": 9.625569361785709e-06, "loss": 0.4649, "step": 2862 }, { "epoch": 0.1327922077922078, "grad_norm": 7.731560230255127, "learning_rate": 9.625289881724743e-06, "loss": 0.3406, "step": 2863 }, { "epoch": 0.13283858998144712, "grad_norm": 11.27430534362793, "learning_rate": 9.625010301459308e-06, "loss": 0.4344, "step": 2864 }, { "epoch": 0.13288497217068646, "grad_norm": 7.134929656982422, "learning_rate": 9.624730620995466e-06, "loss": 0.3626, "step": 2865 }, { "epoch": 0.13293135435992579, "grad_norm": 10.42935848236084, "learning_rate": 9.624450840339275e-06, "loss": 0.3867, "step": 2866 }, { "epoch": 0.13297773654916512, "grad_norm": 9.26103401184082, "learning_rate": 9.624170959496794e-06, "loss": 0.3417, "step": 2867 }, { "epoch": 0.13302411873840445, "grad_norm": 7.104258060455322, "learning_rate": 9.623890978474089e-06, "loss": 0.2278, "step": 2868 }, { "epoch": 0.13307050092764378, "grad_norm": 6.922877311706543, "learning_rate": 9.623610897277225e-06, "loss": 0.3582, "step": 2869 }, { "epoch": 0.1331168831168831, "grad_norm": 8.303643226623535, "learning_rate": 9.62333071591227e-06, "loss": 0.4083, "step": 2870 }, { "epoch": 0.13316326530612244, "grad_norm": 9.57475471496582, "learning_rate": 9.623050434385293e-06, "loss": 0.4208, "step": 2871 }, { "epoch": 0.1332096474953618, "grad_norm": 7.8092875480651855, "learning_rate": 9.622770052702366e-06, "loss": 0.3795, "step": 2872 }, { "epoch": 0.13325602968460112, "grad_norm": 7.001712799072266, "learning_rate": 9.622489570869566e-06, "loss": 0.3687, "step": 2873 }, { "epoch": 0.13330241187384045, "grad_norm": 7.529606342315674, "learning_rate": 9.622208988892966e-06, "loss": 0.4617, "step": 2874 }, { "epoch": 0.13334879406307978, "grad_norm": 9.212821006774902, "learning_rate": 9.621928306778647e-06, "loss": 0.4199, "step": 2875 }, { "epoch": 0.13339517625231911, "grad_norm": 10.210067749023438, "learning_rate": 9.621647524532688e-06, "loss": 0.4525, "step": 2876 }, { "epoch": 0.13344155844155844, "grad_norm": 7.4513115882873535, "learning_rate": 9.621366642161174e-06, "loss": 0.3262, "step": 2877 }, { "epoch": 0.13348794063079777, "grad_norm": 14.063202857971191, "learning_rate": 9.621085659670188e-06, "loss": 0.4334, "step": 2878 }, { "epoch": 0.1335343228200371, "grad_norm": 8.093035697937012, "learning_rate": 9.62080457706582e-06, "loss": 0.3336, "step": 2879 }, { "epoch": 0.13358070500927643, "grad_norm": 8.282144546508789, "learning_rate": 9.620523394354158e-06, "loss": 0.3329, "step": 2880 }, { "epoch": 0.13362708719851576, "grad_norm": 14.974542617797852, "learning_rate": 9.620242111541293e-06, "loss": 0.3345, "step": 2881 }, { "epoch": 0.1336734693877551, "grad_norm": 11.213883399963379, "learning_rate": 9.61996072863332e-06, "loss": 0.3948, "step": 2882 }, { "epoch": 0.13371985157699443, "grad_norm": 6.795157432556152, "learning_rate": 9.619679245636334e-06, "loss": 0.4181, "step": 2883 }, { "epoch": 0.13376623376623376, "grad_norm": 12.827706336975098, "learning_rate": 9.619397662556434e-06, "loss": 0.4209, "step": 2884 }, { "epoch": 0.13381261595547309, "grad_norm": 5.008548259735107, "learning_rate": 9.61911597939972e-06, "loss": 0.2588, "step": 2885 }, { "epoch": 0.13385899814471244, "grad_norm": 12.117061614990234, "learning_rate": 9.618834196172295e-06, "loss": 0.3462, "step": 2886 }, { "epoch": 0.13390538033395177, "grad_norm": 9.375110626220703, "learning_rate": 9.618552312880264e-06, "loss": 0.4268, "step": 2887 }, { "epoch": 0.1339517625231911, "grad_norm": 8.587984085083008, "learning_rate": 9.618270329529734e-06, "loss": 0.3225, "step": 2888 }, { "epoch": 0.13399814471243043, "grad_norm": 7.882594108581543, "learning_rate": 9.617988246126811e-06, "loss": 0.3575, "step": 2889 }, { "epoch": 0.13404452690166976, "grad_norm": 11.78418254852295, "learning_rate": 9.617706062677608e-06, "loss": 0.482, "step": 2890 }, { "epoch": 0.1340909090909091, "grad_norm": 6.974329948425293, "learning_rate": 9.617423779188239e-06, "loss": 0.3642, "step": 2891 }, { "epoch": 0.13413729128014842, "grad_norm": 10.139154434204102, "learning_rate": 9.617141395664821e-06, "loss": 0.3461, "step": 2892 }, { "epoch": 0.13418367346938775, "grad_norm": 9.29815673828125, "learning_rate": 9.61685891211347e-06, "loss": 0.4944, "step": 2893 }, { "epoch": 0.13423005565862708, "grad_norm": 13.2628173828125, "learning_rate": 9.616576328540304e-06, "loss": 0.4602, "step": 2894 }, { "epoch": 0.13427643784786641, "grad_norm": 12.063140869140625, "learning_rate": 9.616293644951448e-06, "loss": 0.5161, "step": 2895 }, { "epoch": 0.13432282003710574, "grad_norm": 16.619443893432617, "learning_rate": 9.616010861353025e-06, "loss": 0.4296, "step": 2896 }, { "epoch": 0.13436920222634507, "grad_norm": 7.931696891784668, "learning_rate": 9.61572797775116e-06, "loss": 0.4051, "step": 2897 }, { "epoch": 0.1344155844155844, "grad_norm": 7.782049179077148, "learning_rate": 9.615444994151984e-06, "loss": 0.4042, "step": 2898 }, { "epoch": 0.13446196660482373, "grad_norm": 12.585897445678711, "learning_rate": 9.615161910561627e-06, "loss": 0.3639, "step": 2899 }, { "epoch": 0.1345083487940631, "grad_norm": 5.027262210845947, "learning_rate": 9.61487872698622e-06, "loss": 0.4379, "step": 2900 }, { "epoch": 0.13455473098330242, "grad_norm": 6.71103572845459, "learning_rate": 9.614595443431901e-06, "loss": 0.3565, "step": 2901 }, { "epoch": 0.13460111317254175, "grad_norm": 5.86973237991333, "learning_rate": 9.614312059904806e-06, "loss": 0.3019, "step": 2902 }, { "epoch": 0.13464749536178108, "grad_norm": 12.242582321166992, "learning_rate": 9.614028576411072e-06, "loss": 0.4795, "step": 2903 }, { "epoch": 0.1346938775510204, "grad_norm": 8.14031982421875, "learning_rate": 9.613744992956844e-06, "loss": 0.4557, "step": 2904 }, { "epoch": 0.13474025974025974, "grad_norm": 7.993523597717285, "learning_rate": 9.613461309548264e-06, "loss": 0.3038, "step": 2905 }, { "epoch": 0.13478664192949907, "grad_norm": 8.079764366149902, "learning_rate": 9.613177526191478e-06, "loss": 0.4409, "step": 2906 }, { "epoch": 0.1348330241187384, "grad_norm": 6.824124336242676, "learning_rate": 9.612893642892635e-06, "loss": 0.3682, "step": 2907 }, { "epoch": 0.13487940630797773, "grad_norm": 6.722688674926758, "learning_rate": 9.612609659657883e-06, "loss": 0.4497, "step": 2908 }, { "epoch": 0.13492578849721706, "grad_norm": 8.996682167053223, "learning_rate": 9.612325576493375e-06, "loss": 0.4111, "step": 2909 }, { "epoch": 0.1349721706864564, "grad_norm": 6.1246771812438965, "learning_rate": 9.612041393405266e-06, "loss": 0.3627, "step": 2910 }, { "epoch": 0.13501855287569572, "grad_norm": 5.359704494476318, "learning_rate": 9.611757110399715e-06, "loss": 0.2923, "step": 2911 }, { "epoch": 0.13506493506493505, "grad_norm": 6.186345100402832, "learning_rate": 9.611472727482877e-06, "loss": 0.3211, "step": 2912 }, { "epoch": 0.13511131725417438, "grad_norm": 10.501598358154297, "learning_rate": 9.611188244660916e-06, "loss": 0.4854, "step": 2913 }, { "epoch": 0.13515769944341374, "grad_norm": 10.846824645996094, "learning_rate": 9.610903661939995e-06, "loss": 0.3546, "step": 2914 }, { "epoch": 0.13520408163265307, "grad_norm": 11.249231338500977, "learning_rate": 9.610618979326273e-06, "loss": 0.3853, "step": 2915 }, { "epoch": 0.1352504638218924, "grad_norm": 9.370808601379395, "learning_rate": 9.610334196825928e-06, "loss": 0.357, "step": 2916 }, { "epoch": 0.13529684601113173, "grad_norm": 8.692170143127441, "learning_rate": 9.610049314445122e-06, "loss": 0.4796, "step": 2917 }, { "epoch": 0.13534322820037106, "grad_norm": 7.995572566986084, "learning_rate": 9.609764332190029e-06, "loss": 0.3379, "step": 2918 }, { "epoch": 0.1353896103896104, "grad_norm": 8.919164657592773, "learning_rate": 9.609479250066824e-06, "loss": 0.249, "step": 2919 }, { "epoch": 0.13543599257884972, "grad_norm": 13.026778221130371, "learning_rate": 9.609194068081682e-06, "loss": 0.435, "step": 2920 }, { "epoch": 0.13548237476808905, "grad_norm": 8.009319305419922, "learning_rate": 9.60890878624078e-06, "loss": 0.3818, "step": 2921 }, { "epoch": 0.13552875695732838, "grad_norm": 4.691335678100586, "learning_rate": 9.608623404550302e-06, "loss": 0.3748, "step": 2922 }, { "epoch": 0.1355751391465677, "grad_norm": 6.31721830368042, "learning_rate": 9.60833792301643e-06, "loss": 0.3042, "step": 2923 }, { "epoch": 0.13562152133580704, "grad_norm": 6.811092853546143, "learning_rate": 9.608052341645344e-06, "loss": 0.3833, "step": 2924 }, { "epoch": 0.13566790352504637, "grad_norm": 4.5915913581848145, "learning_rate": 9.607766660443236e-06, "loss": 0.2552, "step": 2925 }, { "epoch": 0.1357142857142857, "grad_norm": 5.293720245361328, "learning_rate": 9.607480879416295e-06, "loss": 0.2646, "step": 2926 }, { "epoch": 0.13576066790352503, "grad_norm": 5.599786281585693, "learning_rate": 9.60719499857071e-06, "loss": 0.3197, "step": 2927 }, { "epoch": 0.1358070500927644, "grad_norm": 5.891587734222412, "learning_rate": 9.606909017912674e-06, "loss": 0.3623, "step": 2928 }, { "epoch": 0.13585343228200372, "grad_norm": 5.927166938781738, "learning_rate": 9.606622937448386e-06, "loss": 0.3356, "step": 2929 }, { "epoch": 0.13589981447124305, "grad_norm": 19.8474178314209, "learning_rate": 9.606336757184041e-06, "loss": 0.5531, "step": 2930 }, { "epoch": 0.13594619666048238, "grad_norm": 16.924983978271484, "learning_rate": 9.606050477125839e-06, "loss": 0.5791, "step": 2931 }, { "epoch": 0.1359925788497217, "grad_norm": 6.422410488128662, "learning_rate": 9.605764097279984e-06, "loss": 0.2869, "step": 2932 }, { "epoch": 0.13603896103896104, "grad_norm": 7.362586975097656, "learning_rate": 9.605477617652678e-06, "loss": 0.3242, "step": 2933 }, { "epoch": 0.13608534322820037, "grad_norm": 11.469298362731934, "learning_rate": 9.60519103825013e-06, "loss": 0.418, "step": 2934 }, { "epoch": 0.1361317254174397, "grad_norm": 9.806001663208008, "learning_rate": 9.604904359078547e-06, "loss": 0.3703, "step": 2935 }, { "epoch": 0.13617810760667903, "grad_norm": 7.091019153594971, "learning_rate": 9.60461758014414e-06, "loss": 0.3254, "step": 2936 }, { "epoch": 0.13622448979591836, "grad_norm": 5.5115251541137695, "learning_rate": 9.604330701453121e-06, "loss": 0.3276, "step": 2937 }, { "epoch": 0.1362708719851577, "grad_norm": 8.72243595123291, "learning_rate": 9.604043723011705e-06, "loss": 0.4402, "step": 2938 }, { "epoch": 0.13631725417439702, "grad_norm": 10.955229759216309, "learning_rate": 9.603756644826112e-06, "loss": 0.3666, "step": 2939 }, { "epoch": 0.13636363636363635, "grad_norm": 6.9836249351501465, "learning_rate": 9.603469466902559e-06, "loss": 0.3126, "step": 2940 }, { "epoch": 0.13641001855287568, "grad_norm": 9.507083892822266, "learning_rate": 9.603182189247266e-06, "loss": 0.3911, "step": 2941 }, { "epoch": 0.13645640074211504, "grad_norm": 6.436991214752197, "learning_rate": 9.602894811866461e-06, "loss": 0.4212, "step": 2942 }, { "epoch": 0.13650278293135437, "grad_norm": 5.16743803024292, "learning_rate": 9.602607334766367e-06, "loss": 0.3454, "step": 2943 }, { "epoch": 0.1365491651205937, "grad_norm": 8.028952598571777, "learning_rate": 9.602319757953213e-06, "loss": 0.3429, "step": 2944 }, { "epoch": 0.13659554730983303, "grad_norm": 8.57649040222168, "learning_rate": 9.60203208143323e-06, "loss": 0.3007, "step": 2945 }, { "epoch": 0.13664192949907236, "grad_norm": 5.892724990844727, "learning_rate": 9.601744305212648e-06, "loss": 0.3446, "step": 2946 }, { "epoch": 0.1366883116883117, "grad_norm": 9.434779167175293, "learning_rate": 9.601456429297703e-06, "loss": 0.4791, "step": 2947 }, { "epoch": 0.13673469387755102, "grad_norm": 13.24988842010498, "learning_rate": 9.601168453694631e-06, "loss": 0.3675, "step": 2948 }, { "epoch": 0.13678107606679035, "grad_norm": 11.88906478881836, "learning_rate": 9.600880378409672e-06, "loss": 0.4746, "step": 2949 }, { "epoch": 0.13682745825602968, "grad_norm": 6.710061550140381, "learning_rate": 9.600592203449066e-06, "loss": 0.2499, "step": 2950 }, { "epoch": 0.136873840445269, "grad_norm": 23.223447799682617, "learning_rate": 9.600303928819057e-06, "loss": 0.5968, "step": 2951 }, { "epoch": 0.13692022263450834, "grad_norm": 9.645950317382812, "learning_rate": 9.60001555452589e-06, "loss": 0.3734, "step": 2952 }, { "epoch": 0.13696660482374767, "grad_norm": 6.821942329406738, "learning_rate": 9.599727080575811e-06, "loss": 0.3327, "step": 2953 }, { "epoch": 0.137012987012987, "grad_norm": 11.844600677490234, "learning_rate": 9.599438506975071e-06, "loss": 0.4534, "step": 2954 }, { "epoch": 0.13705936920222633, "grad_norm": 10.0664644241333, "learning_rate": 9.599149833729922e-06, "loss": 0.3543, "step": 2955 }, { "epoch": 0.13710575139146566, "grad_norm": 10.404685974121094, "learning_rate": 9.598861060846617e-06, "loss": 0.2588, "step": 2956 }, { "epoch": 0.13715213358070502, "grad_norm": 9.433145523071289, "learning_rate": 9.598572188331415e-06, "loss": 0.3677, "step": 2957 }, { "epoch": 0.13719851576994435, "grad_norm": 8.232545852661133, "learning_rate": 9.59828321619057e-06, "loss": 0.3918, "step": 2958 }, { "epoch": 0.13724489795918368, "grad_norm": 5.45314884185791, "learning_rate": 9.597994144430345e-06, "loss": 0.3897, "step": 2959 }, { "epoch": 0.137291280148423, "grad_norm": 5.196531772613525, "learning_rate": 9.597704973057003e-06, "loss": 0.3434, "step": 2960 }, { "epoch": 0.13733766233766234, "grad_norm": 9.719941139221191, "learning_rate": 9.597415702076806e-06, "loss": 0.4279, "step": 2961 }, { "epoch": 0.13738404452690167, "grad_norm": 7.252481937408447, "learning_rate": 9.597126331496023e-06, "loss": 0.3262, "step": 2962 }, { "epoch": 0.137430426716141, "grad_norm": 25.51727867126465, "learning_rate": 9.596836861320924e-06, "loss": 0.53, "step": 2963 }, { "epoch": 0.13747680890538033, "grad_norm": 5.080368995666504, "learning_rate": 9.596547291557779e-06, "loss": 0.3529, "step": 2964 }, { "epoch": 0.13752319109461966, "grad_norm": 5.575477600097656, "learning_rate": 9.596257622212862e-06, "loss": 0.3209, "step": 2965 }, { "epoch": 0.137569573283859, "grad_norm": 9.75389289855957, "learning_rate": 9.595967853292445e-06, "loss": 0.4341, "step": 2966 }, { "epoch": 0.13761595547309832, "grad_norm": 8.994118690490723, "learning_rate": 9.595677984802811e-06, "loss": 0.4074, "step": 2967 }, { "epoch": 0.13766233766233765, "grad_norm": 8.865456581115723, "learning_rate": 9.595388016750236e-06, "loss": 0.4265, "step": 2968 }, { "epoch": 0.13770871985157698, "grad_norm": 8.488494873046875, "learning_rate": 9.595097949141003e-06, "loss": 0.2465, "step": 2969 }, { "epoch": 0.1377551020408163, "grad_norm": 11.062304496765137, "learning_rate": 9.594807781981399e-06, "loss": 0.4574, "step": 2970 }, { "epoch": 0.13780148423005567, "grad_norm": 7.5551886558532715, "learning_rate": 9.594517515277705e-06, "loss": 0.2721, "step": 2971 }, { "epoch": 0.137847866419295, "grad_norm": 11.419105529785156, "learning_rate": 9.594227149036215e-06, "loss": 0.5078, "step": 2972 }, { "epoch": 0.13789424860853433, "grad_norm": 6.930784702301025, "learning_rate": 9.593936683263216e-06, "loss": 0.267, "step": 2973 }, { "epoch": 0.13794063079777366, "grad_norm": 5.049108028411865, "learning_rate": 9.593646117965001e-06, "loss": 0.3993, "step": 2974 }, { "epoch": 0.137987012987013, "grad_norm": 15.302162170410156, "learning_rate": 9.593355453147869e-06, "loss": 0.5187, "step": 2975 }, { "epoch": 0.13803339517625232, "grad_norm": 5.3373308181762695, "learning_rate": 9.59306468881811e-06, "loss": 0.371, "step": 2976 }, { "epoch": 0.13807977736549165, "grad_norm": 8.28668212890625, "learning_rate": 9.592773824982029e-06, "loss": 0.4044, "step": 2977 }, { "epoch": 0.13812615955473098, "grad_norm": 6.201292514801025, "learning_rate": 9.592482861645925e-06, "loss": 0.3711, "step": 2978 }, { "epoch": 0.1381725417439703, "grad_norm": 11.100418090820312, "learning_rate": 9.592191798816101e-06, "loss": 0.3177, "step": 2979 }, { "epoch": 0.13821892393320964, "grad_norm": 7.4316864013671875, "learning_rate": 9.591900636498865e-06, "loss": 0.3706, "step": 2980 }, { "epoch": 0.13826530612244897, "grad_norm": 6.751199722290039, "learning_rate": 9.591609374700523e-06, "loss": 0.372, "step": 2981 }, { "epoch": 0.1383116883116883, "grad_norm": 6.645961284637451, "learning_rate": 9.591318013427387e-06, "loss": 0.333, "step": 2982 }, { "epoch": 0.13835807050092763, "grad_norm": 8.046194076538086, "learning_rate": 9.591026552685767e-06, "loss": 0.5133, "step": 2983 }, { "epoch": 0.13840445269016696, "grad_norm": 6.866132736206055, "learning_rate": 9.590734992481978e-06, "loss": 0.3807, "step": 2984 }, { "epoch": 0.13845083487940632, "grad_norm": 9.862552642822266, "learning_rate": 9.590443332822338e-06, "loss": 0.4622, "step": 2985 }, { "epoch": 0.13849721706864565, "grad_norm": 6.816839218139648, "learning_rate": 9.590151573713163e-06, "loss": 0.3967, "step": 2986 }, { "epoch": 0.13854359925788498, "grad_norm": 9.798393249511719, "learning_rate": 9.589859715160777e-06, "loss": 0.2683, "step": 2987 }, { "epoch": 0.1385899814471243, "grad_norm": 6.929235458374023, "learning_rate": 9.589567757171498e-06, "loss": 0.373, "step": 2988 }, { "epoch": 0.13863636363636364, "grad_norm": 13.97670841217041, "learning_rate": 9.589275699751656e-06, "loss": 0.3743, "step": 2989 }, { "epoch": 0.13868274582560297, "grad_norm": 12.399890899658203, "learning_rate": 9.588983542907578e-06, "loss": 0.4339, "step": 2990 }, { "epoch": 0.1387291280148423, "grad_norm": 7.7005696296691895, "learning_rate": 9.588691286645591e-06, "loss": 0.4496, "step": 2991 }, { "epoch": 0.13877551020408163, "grad_norm": 7.446300983428955, "learning_rate": 9.588398930972028e-06, "loss": 0.4149, "step": 2992 }, { "epoch": 0.13882189239332096, "grad_norm": 22.800561904907227, "learning_rate": 9.58810647589322e-06, "loss": 0.4353, "step": 2993 }, { "epoch": 0.1388682745825603, "grad_norm": 10.632572174072266, "learning_rate": 9.587813921415507e-06, "loss": 0.5095, "step": 2994 }, { "epoch": 0.13891465677179962, "grad_norm": 6.114190578460693, "learning_rate": 9.587521267545226e-06, "loss": 0.3524, "step": 2995 }, { "epoch": 0.13896103896103895, "grad_norm": 9.69186019897461, "learning_rate": 9.587228514288716e-06, "loss": 0.3875, "step": 2996 }, { "epoch": 0.13900742115027828, "grad_norm": 14.097745895385742, "learning_rate": 9.58693566165232e-06, "loss": 0.5053, "step": 2997 }, { "epoch": 0.1390538033395176, "grad_norm": 5.651580810546875, "learning_rate": 9.58664270964238e-06, "loss": 0.3412, "step": 2998 }, { "epoch": 0.13910018552875697, "grad_norm": 9.966137886047363, "learning_rate": 9.586349658265245e-06, "loss": 0.3046, "step": 2999 }, { "epoch": 0.1391465677179963, "grad_norm": 10.991254806518555, "learning_rate": 9.586056507527266e-06, "loss": 0.2695, "step": 3000 }, { "epoch": 0.13919294990723563, "grad_norm": 17.30319595336914, "learning_rate": 9.585763257434791e-06, "loss": 0.5996, "step": 3001 }, { "epoch": 0.13923933209647496, "grad_norm": 6.976191520690918, "learning_rate": 9.585469907994173e-06, "loss": 0.4146, "step": 3002 }, { "epoch": 0.1392857142857143, "grad_norm": 9.976237297058105, "learning_rate": 9.585176459211769e-06, "loss": 0.3682, "step": 3003 }, { "epoch": 0.13933209647495362, "grad_norm": 4.951038837432861, "learning_rate": 9.584882911093935e-06, "loss": 0.303, "step": 3004 }, { "epoch": 0.13937847866419295, "grad_norm": 6.039548397064209, "learning_rate": 9.584589263647031e-06, "loss": 0.2974, "step": 3005 }, { "epoch": 0.13942486085343228, "grad_norm": 8.229700088500977, "learning_rate": 9.584295516877418e-06, "loss": 0.4336, "step": 3006 }, { "epoch": 0.1394712430426716, "grad_norm": 10.163483619689941, "learning_rate": 9.58400167079146e-06, "loss": 0.4054, "step": 3007 }, { "epoch": 0.13951762523191094, "grad_norm": 5.6648101806640625, "learning_rate": 9.583707725395526e-06, "loss": 0.3328, "step": 3008 }, { "epoch": 0.13956400742115027, "grad_norm": 9.249089241027832, "learning_rate": 9.58341368069598e-06, "loss": 0.3073, "step": 3009 }, { "epoch": 0.1396103896103896, "grad_norm": 7.047428607940674, "learning_rate": 9.583119536699195e-06, "loss": 0.3218, "step": 3010 }, { "epoch": 0.13965677179962893, "grad_norm": 14.241738319396973, "learning_rate": 9.582825293411541e-06, "loss": 0.4502, "step": 3011 }, { "epoch": 0.13970315398886826, "grad_norm": 5.566911697387695, "learning_rate": 9.582530950839397e-06, "loss": 0.3436, "step": 3012 }, { "epoch": 0.13974953617810762, "grad_norm": 8.27416706085205, "learning_rate": 9.582236508989135e-06, "loss": 0.3617, "step": 3013 }, { "epoch": 0.13979591836734695, "grad_norm": 10.931900024414062, "learning_rate": 9.581941967867136e-06, "loss": 0.5609, "step": 3014 }, { "epoch": 0.13984230055658628, "grad_norm": 7.917172431945801, "learning_rate": 9.581647327479782e-06, "loss": 0.4057, "step": 3015 }, { "epoch": 0.1398886827458256, "grad_norm": 7.291550636291504, "learning_rate": 9.581352587833455e-06, "loss": 0.3149, "step": 3016 }, { "epoch": 0.13993506493506494, "grad_norm": 21.19748306274414, "learning_rate": 9.581057748934541e-06, "loss": 0.4184, "step": 3017 }, { "epoch": 0.13998144712430427, "grad_norm": 4.950630187988281, "learning_rate": 9.580762810789426e-06, "loss": 0.2932, "step": 3018 }, { "epoch": 0.1400278293135436, "grad_norm": 7.578801155090332, "learning_rate": 9.5804677734045e-06, "loss": 0.3943, "step": 3019 }, { "epoch": 0.14007421150278293, "grad_norm": 8.590635299682617, "learning_rate": 9.580172636786158e-06, "loss": 0.3395, "step": 3020 }, { "epoch": 0.14012059369202226, "grad_norm": 6.805634498596191, "learning_rate": 9.579877400940789e-06, "loss": 0.3469, "step": 3021 }, { "epoch": 0.1401669758812616, "grad_norm": 6.311460494995117, "learning_rate": 9.579582065874794e-06, "loss": 0.2621, "step": 3022 }, { "epoch": 0.14021335807050092, "grad_norm": 5.666399955749512, "learning_rate": 9.579286631594569e-06, "loss": 0.3391, "step": 3023 }, { "epoch": 0.14025974025974025, "grad_norm": 6.913588523864746, "learning_rate": 9.578991098106512e-06, "loss": 0.3938, "step": 3024 }, { "epoch": 0.14030612244897958, "grad_norm": 7.128274917602539, "learning_rate": 9.57869546541703e-06, "loss": 0.3718, "step": 3025 }, { "epoch": 0.1403525046382189, "grad_norm": 8.740790367126465, "learning_rate": 9.578399733532525e-06, "loss": 0.3759, "step": 3026 }, { "epoch": 0.14039888682745827, "grad_norm": 6.787901401519775, "learning_rate": 9.578103902459406e-06, "loss": 0.4151, "step": 3027 }, { "epoch": 0.1404452690166976, "grad_norm": 6.862914562225342, "learning_rate": 9.577807972204079e-06, "loss": 0.2441, "step": 3028 }, { "epoch": 0.14049165120593693, "grad_norm": 6.4883880615234375, "learning_rate": 9.577511942772957e-06, "loss": 0.3112, "step": 3029 }, { "epoch": 0.14053803339517626, "grad_norm": 5.816178798675537, "learning_rate": 9.577215814172453e-06, "loss": 0.3857, "step": 3030 }, { "epoch": 0.1405844155844156, "grad_norm": 10.110045433044434, "learning_rate": 9.576919586408983e-06, "loss": 0.3404, "step": 3031 }, { "epoch": 0.14063079777365492, "grad_norm": 8.908896446228027, "learning_rate": 9.576623259488966e-06, "loss": 0.442, "step": 3032 }, { "epoch": 0.14067717996289425, "grad_norm": 6.467118740081787, "learning_rate": 9.576326833418818e-06, "loss": 0.3672, "step": 3033 }, { "epoch": 0.14072356215213358, "grad_norm": 6.970015525817871, "learning_rate": 9.576030308204964e-06, "loss": 0.3389, "step": 3034 }, { "epoch": 0.1407699443413729, "grad_norm": 16.36017608642578, "learning_rate": 9.575733683853824e-06, "loss": 0.4395, "step": 3035 }, { "epoch": 0.14081632653061224, "grad_norm": 7.615310192108154, "learning_rate": 9.57543696037183e-06, "loss": 0.3867, "step": 3036 }, { "epoch": 0.14086270871985157, "grad_norm": 5.732385158538818, "learning_rate": 9.575140137765408e-06, "loss": 0.2975, "step": 3037 }, { "epoch": 0.1409090909090909, "grad_norm": 8.890869140625, "learning_rate": 9.574843216040987e-06, "loss": 0.3302, "step": 3038 }, { "epoch": 0.14095547309833023, "grad_norm": 7.783227443695068, "learning_rate": 9.574546195205001e-06, "loss": 0.3867, "step": 3039 }, { "epoch": 0.14100185528756956, "grad_norm": 5.580079555511475, "learning_rate": 9.574249075263885e-06, "loss": 0.2588, "step": 3040 }, { "epoch": 0.14104823747680892, "grad_norm": 11.217182159423828, "learning_rate": 9.573951856224076e-06, "loss": 0.466, "step": 3041 }, { "epoch": 0.14109461966604825, "grad_norm": 22.765155792236328, "learning_rate": 9.57365453809201e-06, "loss": 0.4631, "step": 3042 }, { "epoch": 0.14114100185528758, "grad_norm": 10.199326515197754, "learning_rate": 9.573357120874132e-06, "loss": 0.4287, "step": 3043 }, { "epoch": 0.1411873840445269, "grad_norm": 5.82615852355957, "learning_rate": 9.573059604576884e-06, "loss": 0.3985, "step": 3044 }, { "epoch": 0.14123376623376624, "grad_norm": 13.204206466674805, "learning_rate": 9.572761989206712e-06, "loss": 0.4056, "step": 3045 }, { "epoch": 0.14128014842300557, "grad_norm": 5.9283857345581055, "learning_rate": 9.572464274770063e-06, "loss": 0.435, "step": 3046 }, { "epoch": 0.1413265306122449, "grad_norm": 6.453741073608398, "learning_rate": 9.572166461273389e-06, "loss": 0.3968, "step": 3047 }, { "epoch": 0.14137291280148423, "grad_norm": 12.298917770385742, "learning_rate": 9.571868548723137e-06, "loss": 0.5423, "step": 3048 }, { "epoch": 0.14141929499072356, "grad_norm": 8.575348854064941, "learning_rate": 9.571570537125767e-06, "loss": 0.4201, "step": 3049 }, { "epoch": 0.1414656771799629, "grad_norm": 9.062766075134277, "learning_rate": 9.571272426487732e-06, "loss": 0.3056, "step": 3050 }, { "epoch": 0.14151205936920222, "grad_norm": 5.551932334899902, "learning_rate": 9.57097421681549e-06, "loss": 0.412, "step": 3051 }, { "epoch": 0.14155844155844155, "grad_norm": 10.088099479675293, "learning_rate": 9.570675908115503e-06, "loss": 0.3484, "step": 3052 }, { "epoch": 0.14160482374768088, "grad_norm": 8.749051094055176, "learning_rate": 9.570377500394234e-06, "loss": 0.4354, "step": 3053 }, { "epoch": 0.1416512059369202, "grad_norm": 11.640469551086426, "learning_rate": 9.570078993658147e-06, "loss": 0.4862, "step": 3054 }, { "epoch": 0.14169758812615957, "grad_norm": 9.4189453125, "learning_rate": 9.569780387913707e-06, "loss": 0.3958, "step": 3055 }, { "epoch": 0.1417439703153989, "grad_norm": 9.069602966308594, "learning_rate": 9.569481683167388e-06, "loss": 0.3286, "step": 3056 }, { "epoch": 0.14179035250463823, "grad_norm": 10.163983345031738, "learning_rate": 9.569182879425656e-06, "loss": 0.4164, "step": 3057 }, { "epoch": 0.14183673469387756, "grad_norm": 5.127231121063232, "learning_rate": 9.56888397669499e-06, "loss": 0.3575, "step": 3058 }, { "epoch": 0.1418831168831169, "grad_norm": 13.883390426635742, "learning_rate": 9.56858497498186e-06, "loss": 0.5418, "step": 3059 }, { "epoch": 0.14192949907235622, "grad_norm": 8.909729957580566, "learning_rate": 9.568285874292746e-06, "loss": 0.451, "step": 3060 }, { "epoch": 0.14197588126159555, "grad_norm": 4.900751113891602, "learning_rate": 9.567986674634128e-06, "loss": 0.3016, "step": 3061 }, { "epoch": 0.14202226345083488, "grad_norm": 5.490914821624756, "learning_rate": 9.567687376012488e-06, "loss": 0.454, "step": 3062 }, { "epoch": 0.1420686456400742, "grad_norm": 3.8335037231445312, "learning_rate": 9.567387978434312e-06, "loss": 0.3639, "step": 3063 }, { "epoch": 0.14211502782931354, "grad_norm": 6.72628927230835, "learning_rate": 9.567088481906084e-06, "loss": 0.2815, "step": 3064 }, { "epoch": 0.14216141001855287, "grad_norm": 9.990432739257812, "learning_rate": 9.566788886434293e-06, "loss": 0.5402, "step": 3065 }, { "epoch": 0.1422077922077922, "grad_norm": 6.402757167816162, "learning_rate": 9.56648919202543e-06, "loss": 0.3628, "step": 3066 }, { "epoch": 0.14225417439703153, "grad_norm": 6.462105751037598, "learning_rate": 9.566189398685986e-06, "loss": 0.3876, "step": 3067 }, { "epoch": 0.14230055658627086, "grad_norm": 5.110078811645508, "learning_rate": 9.565889506422457e-06, "loss": 0.3382, "step": 3068 }, { "epoch": 0.14234693877551022, "grad_norm": 7.93704891204834, "learning_rate": 9.56558951524134e-06, "loss": 0.3985, "step": 3069 }, { "epoch": 0.14239332096474955, "grad_norm": 8.153634071350098, "learning_rate": 9.565289425149134e-06, "loss": 0.4267, "step": 3070 }, { "epoch": 0.14243970315398888, "grad_norm": 8.936190605163574, "learning_rate": 9.564989236152343e-06, "loss": 0.4354, "step": 3071 }, { "epoch": 0.1424860853432282, "grad_norm": 10.401971817016602, "learning_rate": 9.564688948257466e-06, "loss": 0.4172, "step": 3072 }, { "epoch": 0.14253246753246754, "grad_norm": 13.133710861206055, "learning_rate": 9.56438856147101e-06, "loss": 0.5971, "step": 3073 }, { "epoch": 0.14257884972170687, "grad_norm": 13.887351989746094, "learning_rate": 9.564088075799484e-06, "loss": 0.4616, "step": 3074 }, { "epoch": 0.1426252319109462, "grad_norm": 10.643508911132812, "learning_rate": 9.563787491249396e-06, "loss": 0.2866, "step": 3075 }, { "epoch": 0.14267161410018553, "grad_norm": 7.527498245239258, "learning_rate": 9.56348680782726e-06, "loss": 0.3732, "step": 3076 }, { "epoch": 0.14271799628942486, "grad_norm": 10.538958549499512, "learning_rate": 9.56318602553959e-06, "loss": 0.3948, "step": 3077 }, { "epoch": 0.1427643784786642, "grad_norm": 8.078094482421875, "learning_rate": 9.562885144392902e-06, "loss": 0.4717, "step": 3078 }, { "epoch": 0.14281076066790352, "grad_norm": 8.56544303894043, "learning_rate": 9.562584164393713e-06, "loss": 0.3416, "step": 3079 }, { "epoch": 0.14285714285714285, "grad_norm": 11.458020210266113, "learning_rate": 9.562283085548546e-06, "loss": 0.4049, "step": 3080 }, { "epoch": 0.14290352504638218, "grad_norm": 7.767672061920166, "learning_rate": 9.56198190786392e-06, "loss": 0.3451, "step": 3081 }, { "epoch": 0.1429499072356215, "grad_norm": 8.673761367797852, "learning_rate": 9.561680631346364e-06, "loss": 0.4627, "step": 3082 }, { "epoch": 0.14299628942486084, "grad_norm": 7.5970988273620605, "learning_rate": 9.561379256002402e-06, "loss": 0.3461, "step": 3083 }, { "epoch": 0.1430426716141002, "grad_norm": 10.917089462280273, "learning_rate": 9.561077781838564e-06, "loss": 0.3102, "step": 3084 }, { "epoch": 0.14308905380333953, "grad_norm": 9.846426010131836, "learning_rate": 9.560776208861383e-06, "loss": 0.4471, "step": 3085 }, { "epoch": 0.14313543599257886, "grad_norm": 15.892309188842773, "learning_rate": 9.560474537077391e-06, "loss": 0.5276, "step": 3086 }, { "epoch": 0.1431818181818182, "grad_norm": 11.056203842163086, "learning_rate": 9.560172766493121e-06, "loss": 0.4772, "step": 3087 }, { "epoch": 0.14322820037105752, "grad_norm": 9.392976760864258, "learning_rate": 9.559870897115115e-06, "loss": 0.3154, "step": 3088 }, { "epoch": 0.14327458256029685, "grad_norm": 10.178154945373535, "learning_rate": 9.559568928949913e-06, "loss": 0.4163, "step": 3089 }, { "epoch": 0.14332096474953618, "grad_norm": 8.391048431396484, "learning_rate": 9.559266862004053e-06, "loss": 0.406, "step": 3090 }, { "epoch": 0.1433673469387755, "grad_norm": 8.689509391784668, "learning_rate": 9.55896469628408e-06, "loss": 0.4002, "step": 3091 }, { "epoch": 0.14341372912801484, "grad_norm": 8.610767364501953, "learning_rate": 9.558662431796544e-06, "loss": 0.2385, "step": 3092 }, { "epoch": 0.14346011131725417, "grad_norm": 12.253988265991211, "learning_rate": 9.55836006854799e-06, "loss": 0.5706, "step": 3093 }, { "epoch": 0.1435064935064935, "grad_norm": 6.698322296142578, "learning_rate": 9.558057606544969e-06, "loss": 0.3686, "step": 3094 }, { "epoch": 0.14355287569573283, "grad_norm": 7.314149856567383, "learning_rate": 9.557755045794035e-06, "loss": 0.3991, "step": 3095 }, { "epoch": 0.14359925788497216, "grad_norm": 7.905384540557861, "learning_rate": 9.55745238630174e-06, "loss": 0.3637, "step": 3096 }, { "epoch": 0.1436456400742115, "grad_norm": 5.964815139770508, "learning_rate": 9.557149628074645e-06, "loss": 0.2288, "step": 3097 }, { "epoch": 0.14369202226345085, "grad_norm": 7.555202484130859, "learning_rate": 9.556846771119306e-06, "loss": 0.3611, "step": 3098 }, { "epoch": 0.14373840445269018, "grad_norm": 7.300815582275391, "learning_rate": 9.556543815442286e-06, "loss": 0.3618, "step": 3099 }, { "epoch": 0.1437847866419295, "grad_norm": 9.8679780960083, "learning_rate": 9.556240761050146e-06, "loss": 0.4956, "step": 3100 }, { "epoch": 0.14383116883116884, "grad_norm": 7.271358489990234, "learning_rate": 9.555937607949452e-06, "loss": 0.3651, "step": 3101 }, { "epoch": 0.14387755102040817, "grad_norm": 8.292926788330078, "learning_rate": 9.555634356146775e-06, "loss": 0.3044, "step": 3102 }, { "epoch": 0.1439239332096475, "grad_norm": 5.611334800720215, "learning_rate": 9.555331005648683e-06, "loss": 0.3077, "step": 3103 }, { "epoch": 0.14397031539888683, "grad_norm": 5.49520206451416, "learning_rate": 9.555027556461747e-06, "loss": 0.3041, "step": 3104 }, { "epoch": 0.14401669758812616, "grad_norm": 12.27502155303955, "learning_rate": 9.55472400859254e-06, "loss": 0.3755, "step": 3105 }, { "epoch": 0.1440630797773655, "grad_norm": 7.800805568695068, "learning_rate": 9.554420362047641e-06, "loss": 0.4135, "step": 3106 }, { "epoch": 0.14410946196660482, "grad_norm": 6.856680870056152, "learning_rate": 9.554116616833627e-06, "loss": 0.3975, "step": 3107 }, { "epoch": 0.14415584415584415, "grad_norm": 9.348366737365723, "learning_rate": 9.553812772957078e-06, "loss": 0.2693, "step": 3108 }, { "epoch": 0.14420222634508348, "grad_norm": 10.56647777557373, "learning_rate": 9.553508830424579e-06, "loss": 0.3171, "step": 3109 }, { "epoch": 0.1442486085343228, "grad_norm": 5.424107074737549, "learning_rate": 9.553204789242711e-06, "loss": 0.4138, "step": 3110 }, { "epoch": 0.14429499072356214, "grad_norm": 5.157892227172852, "learning_rate": 9.552900649418061e-06, "loss": 0.4347, "step": 3111 }, { "epoch": 0.1443413729128015, "grad_norm": 7.4243292808532715, "learning_rate": 9.552596410957224e-06, "loss": 0.3871, "step": 3112 }, { "epoch": 0.14438775510204083, "grad_norm": 7.713744163513184, "learning_rate": 9.552292073866785e-06, "loss": 0.3247, "step": 3113 }, { "epoch": 0.14443413729128016, "grad_norm": 5.130343437194824, "learning_rate": 9.551987638153339e-06, "loss": 0.3962, "step": 3114 }, { "epoch": 0.1444805194805195, "grad_norm": 6.990132808685303, "learning_rate": 9.551683103823482e-06, "loss": 0.3378, "step": 3115 }, { "epoch": 0.14452690166975882, "grad_norm": 7.916896343231201, "learning_rate": 9.551378470883813e-06, "loss": 0.3857, "step": 3116 }, { "epoch": 0.14457328385899815, "grad_norm": 8.900256156921387, "learning_rate": 9.551073739340926e-06, "loss": 0.2721, "step": 3117 }, { "epoch": 0.14461966604823748, "grad_norm": 12.610336303710938, "learning_rate": 9.550768909201431e-06, "loss": 0.4294, "step": 3118 }, { "epoch": 0.1446660482374768, "grad_norm": 7.878386974334717, "learning_rate": 9.550463980471926e-06, "loss": 0.3041, "step": 3119 }, { "epoch": 0.14471243042671614, "grad_norm": 6.74741268157959, "learning_rate": 9.550158953159019e-06, "loss": 0.3553, "step": 3120 }, { "epoch": 0.14475881261595547, "grad_norm": 15.304070472717285, "learning_rate": 9.549853827269317e-06, "loss": 0.505, "step": 3121 }, { "epoch": 0.1448051948051948, "grad_norm": 10.056863784790039, "learning_rate": 9.549548602809434e-06, "loss": 0.478, "step": 3122 }, { "epoch": 0.14485157699443413, "grad_norm": 7.436342716217041, "learning_rate": 9.54924327978598e-06, "loss": 0.3929, "step": 3123 }, { "epoch": 0.14489795918367346, "grad_norm": 13.317652702331543, "learning_rate": 9.54893785820557e-06, "loss": 0.4806, "step": 3124 }, { "epoch": 0.1449443413729128, "grad_norm": 10.18309211730957, "learning_rate": 9.548632338074818e-06, "loss": 0.3458, "step": 3125 }, { "epoch": 0.14499072356215215, "grad_norm": 7.820944309234619, "learning_rate": 9.548326719400346e-06, "loss": 0.5039, "step": 3126 }, { "epoch": 0.14503710575139148, "grad_norm": 4.79063081741333, "learning_rate": 9.548021002188777e-06, "loss": 0.3746, "step": 3127 }, { "epoch": 0.1450834879406308, "grad_norm": 6.460827827453613, "learning_rate": 9.547715186446732e-06, "loss": 0.3689, "step": 3128 }, { "epoch": 0.14512987012987014, "grad_norm": 10.725167274475098, "learning_rate": 9.547409272180835e-06, "loss": 0.518, "step": 3129 }, { "epoch": 0.14517625231910947, "grad_norm": 8.998867988586426, "learning_rate": 9.547103259397713e-06, "loss": 0.34, "step": 3130 }, { "epoch": 0.1452226345083488, "grad_norm": 10.907371520996094, "learning_rate": 9.546797148103999e-06, "loss": 0.3644, "step": 3131 }, { "epoch": 0.14526901669758813, "grad_norm": 13.811958312988281, "learning_rate": 9.546490938306323e-06, "loss": 0.4406, "step": 3132 }, { "epoch": 0.14531539888682746, "grad_norm": 6.7738566398620605, "learning_rate": 9.546184630011317e-06, "loss": 0.3423, "step": 3133 }, { "epoch": 0.1453617810760668, "grad_norm": 10.622868537902832, "learning_rate": 9.54587822322562e-06, "loss": 0.3352, "step": 3134 }, { "epoch": 0.14540816326530612, "grad_norm": 6.314622402191162, "learning_rate": 9.545571717955871e-06, "loss": 0.3548, "step": 3135 }, { "epoch": 0.14545454545454545, "grad_norm": 15.354731559753418, "learning_rate": 9.545265114208706e-06, "loss": 0.4951, "step": 3136 }, { "epoch": 0.14550092764378478, "grad_norm": 10.787800788879395, "learning_rate": 9.54495841199077e-06, "loss": 0.371, "step": 3137 }, { "epoch": 0.1455473098330241, "grad_norm": 4.77965784072876, "learning_rate": 9.544651611308709e-06, "loss": 0.3557, "step": 3138 }, { "epoch": 0.14559369202226344, "grad_norm": 6.405579090118408, "learning_rate": 9.544344712169164e-06, "loss": 0.4098, "step": 3139 }, { "epoch": 0.1456400742115028, "grad_norm": 11.913166046142578, "learning_rate": 9.544037714578792e-06, "loss": 0.5193, "step": 3140 }, { "epoch": 0.14568645640074213, "grad_norm": 5.479671478271484, "learning_rate": 9.543730618544236e-06, "loss": 0.3292, "step": 3141 }, { "epoch": 0.14573283858998146, "grad_norm": 6.006195068359375, "learning_rate": 9.543423424072156e-06, "loss": 0.2779, "step": 3142 }, { "epoch": 0.1457792207792208, "grad_norm": 8.421915054321289, "learning_rate": 9.543116131169202e-06, "loss": 0.3592, "step": 3143 }, { "epoch": 0.14582560296846012, "grad_norm": 8.272126197814941, "learning_rate": 9.542808739842034e-06, "loss": 0.2977, "step": 3144 }, { "epoch": 0.14587198515769945, "grad_norm": 5.780688285827637, "learning_rate": 9.54250125009731e-06, "loss": 0.3573, "step": 3145 }, { "epoch": 0.14591836734693878, "grad_norm": 6.452991485595703, "learning_rate": 9.542193661941692e-06, "loss": 0.4092, "step": 3146 }, { "epoch": 0.1459647495361781, "grad_norm": 18.79931640625, "learning_rate": 9.541885975381846e-06, "loss": 0.7424, "step": 3147 }, { "epoch": 0.14601113172541744, "grad_norm": 7.323483943939209, "learning_rate": 9.541578190424435e-06, "loss": 0.3356, "step": 3148 }, { "epoch": 0.14605751391465677, "grad_norm": 6.332387924194336, "learning_rate": 9.541270307076128e-06, "loss": 0.4108, "step": 3149 }, { "epoch": 0.1461038961038961, "grad_norm": 11.540311813354492, "learning_rate": 9.540962325343592e-06, "loss": 0.5271, "step": 3150 }, { "epoch": 0.14615027829313543, "grad_norm": 4.471089839935303, "learning_rate": 9.540654245233507e-06, "loss": 0.3549, "step": 3151 }, { "epoch": 0.14619666048237476, "grad_norm": 6.640982151031494, "learning_rate": 9.540346066752539e-06, "loss": 0.381, "step": 3152 }, { "epoch": 0.1462430426716141, "grad_norm": 6.16079044342041, "learning_rate": 9.540037789907371e-06, "loss": 0.3546, "step": 3153 }, { "epoch": 0.14628942486085345, "grad_norm": 12.462011337280273, "learning_rate": 9.539729414704677e-06, "loss": 0.409, "step": 3154 }, { "epoch": 0.14633580705009278, "grad_norm": 8.292695999145508, "learning_rate": 9.53942094115114e-06, "loss": 0.4346, "step": 3155 }, { "epoch": 0.1463821892393321, "grad_norm": 19.485422134399414, "learning_rate": 9.539112369253445e-06, "loss": 0.4966, "step": 3156 }, { "epoch": 0.14642857142857144, "grad_norm": 19.143966674804688, "learning_rate": 9.538803699018272e-06, "loss": 0.4305, "step": 3157 }, { "epoch": 0.14647495361781077, "grad_norm": 5.240145206451416, "learning_rate": 9.538494930452312e-06, "loss": 0.3881, "step": 3158 }, { "epoch": 0.1465213358070501, "grad_norm": 6.098468780517578, "learning_rate": 9.538186063562253e-06, "loss": 0.4188, "step": 3159 }, { "epoch": 0.14656771799628943, "grad_norm": 6.436062335968018, "learning_rate": 9.537877098354787e-06, "loss": 0.392, "step": 3160 }, { "epoch": 0.14661410018552876, "grad_norm": 7.829648494720459, "learning_rate": 9.537568034836606e-06, "loss": 0.3872, "step": 3161 }, { "epoch": 0.1466604823747681, "grad_norm": 8.848292350769043, "learning_rate": 9.537258873014408e-06, "loss": 0.3784, "step": 3162 }, { "epoch": 0.14670686456400742, "grad_norm": 8.258782386779785, "learning_rate": 9.536949612894888e-06, "loss": 0.3758, "step": 3163 }, { "epoch": 0.14675324675324675, "grad_norm": 5.815758228302002, "learning_rate": 9.536640254484748e-06, "loss": 0.2584, "step": 3164 }, { "epoch": 0.14679962894248608, "grad_norm": 9.408515930175781, "learning_rate": 9.536330797790691e-06, "loss": 0.3097, "step": 3165 }, { "epoch": 0.1468460111317254, "grad_norm": 10.732638359069824, "learning_rate": 9.53602124281942e-06, "loss": 0.4199, "step": 3166 }, { "epoch": 0.14689239332096474, "grad_norm": 9.507155418395996, "learning_rate": 9.53571158957764e-06, "loss": 0.3744, "step": 3167 }, { "epoch": 0.1469387755102041, "grad_norm": 6.875708103179932, "learning_rate": 9.535401838072062e-06, "loss": 0.3747, "step": 3168 }, { "epoch": 0.14698515769944343, "grad_norm": 7.934098720550537, "learning_rate": 9.535091988309395e-06, "loss": 0.4464, "step": 3169 }, { "epoch": 0.14703153988868276, "grad_norm": 13.675769805908203, "learning_rate": 9.53478204029635e-06, "loss": 0.3671, "step": 3170 }, { "epoch": 0.14707792207792209, "grad_norm": 7.19978666305542, "learning_rate": 9.534471994039646e-06, "loss": 0.3136, "step": 3171 }, { "epoch": 0.14712430426716142, "grad_norm": 8.089760780334473, "learning_rate": 9.534161849545997e-06, "loss": 0.2772, "step": 3172 }, { "epoch": 0.14717068645640075, "grad_norm": 20.55471420288086, "learning_rate": 9.533851606822125e-06, "loss": 0.5814, "step": 3173 }, { "epoch": 0.14721706864564008, "grad_norm": 7.051401138305664, "learning_rate": 9.533541265874748e-06, "loss": 0.4393, "step": 3174 }, { "epoch": 0.1472634508348794, "grad_norm": 5.076112747192383, "learning_rate": 9.533230826710591e-06, "loss": 0.3078, "step": 3175 }, { "epoch": 0.14730983302411874, "grad_norm": 9.652226448059082, "learning_rate": 9.532920289336378e-06, "loss": 0.3557, "step": 3176 }, { "epoch": 0.14735621521335807, "grad_norm": 6.393198490142822, "learning_rate": 9.53260965375884e-06, "loss": 0.2943, "step": 3177 }, { "epoch": 0.1474025974025974, "grad_norm": 11.784300804138184, "learning_rate": 9.532298919984701e-06, "loss": 0.3351, "step": 3178 }, { "epoch": 0.14744897959183673, "grad_norm": 11.777868270874023, "learning_rate": 9.531988088020701e-06, "loss": 0.5136, "step": 3179 }, { "epoch": 0.14749536178107606, "grad_norm": 6.071252346038818, "learning_rate": 9.531677157873566e-06, "loss": 0.3664, "step": 3180 }, { "epoch": 0.1475417439703154, "grad_norm": 5.728254795074463, "learning_rate": 9.531366129550037e-06, "loss": 0.4232, "step": 3181 }, { "epoch": 0.14758812615955474, "grad_norm": 9.142988204956055, "learning_rate": 9.53105500305685e-06, "loss": 0.3903, "step": 3182 }, { "epoch": 0.14763450834879407, "grad_norm": 9.026300430297852, "learning_rate": 9.530743778400747e-06, "loss": 0.4157, "step": 3183 }, { "epoch": 0.1476808905380334, "grad_norm": 6.3504204750061035, "learning_rate": 9.53043245558847e-06, "loss": 0.4007, "step": 3184 }, { "epoch": 0.14772727272727273, "grad_norm": 7.445947647094727, "learning_rate": 9.530121034626762e-06, "loss": 0.4921, "step": 3185 }, { "epoch": 0.14777365491651206, "grad_norm": 4.918888092041016, "learning_rate": 9.529809515522373e-06, "loss": 0.2926, "step": 3186 }, { "epoch": 0.1478200371057514, "grad_norm": 8.191694259643555, "learning_rate": 9.529497898282048e-06, "loss": 0.4261, "step": 3187 }, { "epoch": 0.14786641929499073, "grad_norm": 7.410702705383301, "learning_rate": 9.529186182912542e-06, "loss": 0.3281, "step": 3188 }, { "epoch": 0.14791280148423006, "grad_norm": 8.46715259552002, "learning_rate": 9.528874369420606e-06, "loss": 0.3642, "step": 3189 }, { "epoch": 0.14795918367346939, "grad_norm": 10.073895454406738, "learning_rate": 9.528562457812993e-06, "loss": 0.5455, "step": 3190 }, { "epoch": 0.14800556586270872, "grad_norm": 7.48680305480957, "learning_rate": 9.528250448096467e-06, "loss": 0.338, "step": 3191 }, { "epoch": 0.14805194805194805, "grad_norm": 8.63542366027832, "learning_rate": 9.52793834027778e-06, "loss": 0.3207, "step": 3192 }, { "epoch": 0.14809833024118738, "grad_norm": 7.574570655822754, "learning_rate": 9.5276261343637e-06, "loss": 0.3279, "step": 3193 }, { "epoch": 0.1481447124304267, "grad_norm": 8.89976692199707, "learning_rate": 9.527313830360984e-06, "loss": 0.361, "step": 3194 }, { "epoch": 0.14819109461966604, "grad_norm": 6.051562786102295, "learning_rate": 9.527001428276406e-06, "loss": 0.3717, "step": 3195 }, { "epoch": 0.1482374768089054, "grad_norm": 9.766242980957031, "learning_rate": 9.526688928116728e-06, "loss": 0.4319, "step": 3196 }, { "epoch": 0.14828385899814472, "grad_norm": 10.077509880065918, "learning_rate": 9.526376329888721e-06, "loss": 0.4101, "step": 3197 }, { "epoch": 0.14833024118738405, "grad_norm": 7.7862043380737305, "learning_rate": 9.52606363359916e-06, "loss": 0.3787, "step": 3198 }, { "epoch": 0.14837662337662338, "grad_norm": 6.3317952156066895, "learning_rate": 9.525750839254816e-06, "loss": 0.4204, "step": 3199 }, { "epoch": 0.14842300556586271, "grad_norm": 6.4668869972229, "learning_rate": 9.525437946862468e-06, "loss": 0.3111, "step": 3200 }, { "epoch": 0.14846938775510204, "grad_norm": 6.970216274261475, "learning_rate": 9.525124956428894e-06, "loss": 0.383, "step": 3201 }, { "epoch": 0.14851576994434137, "grad_norm": 6.5851521492004395, "learning_rate": 9.524811867960874e-06, "loss": 0.3785, "step": 3202 }, { "epoch": 0.1485621521335807, "grad_norm": 8.261178016662598, "learning_rate": 9.524498681465192e-06, "loss": 0.3153, "step": 3203 }, { "epoch": 0.14860853432282004, "grad_norm": 7.467803955078125, "learning_rate": 9.524185396948631e-06, "loss": 0.4867, "step": 3204 }, { "epoch": 0.14865491651205937, "grad_norm": 10.757431030273438, "learning_rate": 9.523872014417982e-06, "loss": 0.3296, "step": 3205 }, { "epoch": 0.1487012987012987, "grad_norm": 13.270112037658691, "learning_rate": 9.52355853388003e-06, "loss": 0.357, "step": 3206 }, { "epoch": 0.14874768089053803, "grad_norm": 8.562253952026367, "learning_rate": 9.523244955341569e-06, "loss": 0.3827, "step": 3207 }, { "epoch": 0.14879406307977736, "grad_norm": 6.72128438949585, "learning_rate": 9.522931278809393e-06, "loss": 0.4186, "step": 3208 }, { "epoch": 0.14884044526901669, "grad_norm": 10.43346118927002, "learning_rate": 9.522617504290295e-06, "loss": 0.474, "step": 3209 }, { "epoch": 0.14888682745825602, "grad_norm": 6.834056854248047, "learning_rate": 9.522303631791074e-06, "loss": 0.3796, "step": 3210 }, { "epoch": 0.14893320964749537, "grad_norm": 10.40219783782959, "learning_rate": 9.521989661318532e-06, "loss": 0.3447, "step": 3211 }, { "epoch": 0.1489795918367347, "grad_norm": 4.877387046813965, "learning_rate": 9.521675592879467e-06, "loss": 0.3916, "step": 3212 }, { "epoch": 0.14902597402597403, "grad_norm": 8.06818675994873, "learning_rate": 9.521361426480686e-06, "loss": 0.3629, "step": 3213 }, { "epoch": 0.14907235621521336, "grad_norm": 5.136203289031982, "learning_rate": 9.521047162128994e-06, "loss": 0.3791, "step": 3214 }, { "epoch": 0.1491187384044527, "grad_norm": 6.704014301300049, "learning_rate": 9.5207327998312e-06, "loss": 0.3742, "step": 3215 }, { "epoch": 0.14916512059369202, "grad_norm": 6.543520927429199, "learning_rate": 9.520418339594115e-06, "loss": 0.2458, "step": 3216 }, { "epoch": 0.14921150278293135, "grad_norm": 13.852320671081543, "learning_rate": 9.52010378142455e-06, "loss": 0.4559, "step": 3217 }, { "epoch": 0.14925788497217068, "grad_norm": 7.596766471862793, "learning_rate": 9.51978912532932e-06, "loss": 0.5137, "step": 3218 }, { "epoch": 0.14930426716141001, "grad_norm": 11.611621856689453, "learning_rate": 9.519474371315244e-06, "loss": 0.3508, "step": 3219 }, { "epoch": 0.14935064935064934, "grad_norm": 11.80346965789795, "learning_rate": 9.519159519389141e-06, "loss": 0.413, "step": 3220 }, { "epoch": 0.14939703153988868, "grad_norm": 12.580756187438965, "learning_rate": 9.518844569557828e-06, "loss": 0.5233, "step": 3221 }, { "epoch": 0.149443413729128, "grad_norm": 9.490913391113281, "learning_rate": 9.518529521828132e-06, "loss": 0.3858, "step": 3222 }, { "epoch": 0.14948979591836734, "grad_norm": 5.996954917907715, "learning_rate": 9.518214376206876e-06, "loss": 0.2962, "step": 3223 }, { "epoch": 0.14953617810760667, "grad_norm": 18.755258560180664, "learning_rate": 9.517899132700889e-06, "loss": 0.4321, "step": 3224 }, { "epoch": 0.14958256029684602, "grad_norm": 8.413195610046387, "learning_rate": 9.517583791317e-06, "loss": 0.4449, "step": 3225 }, { "epoch": 0.14962894248608535, "grad_norm": 8.341048240661621, "learning_rate": 9.517268352062043e-06, "loss": 0.2816, "step": 3226 }, { "epoch": 0.14967532467532468, "grad_norm": 24.2689151763916, "learning_rate": 9.516952814942847e-06, "loss": 0.473, "step": 3227 }, { "epoch": 0.149721706864564, "grad_norm": 13.656481742858887, "learning_rate": 9.516637179966254e-06, "loss": 0.4005, "step": 3228 }, { "epoch": 0.14976808905380334, "grad_norm": 9.229218482971191, "learning_rate": 9.516321447139096e-06, "loss": 0.3842, "step": 3229 }, { "epoch": 0.14981447124304267, "grad_norm": 7.28914213180542, "learning_rate": 9.516005616468218e-06, "loss": 0.3735, "step": 3230 }, { "epoch": 0.149860853432282, "grad_norm": 21.706741333007812, "learning_rate": 9.515689687960459e-06, "loss": 0.4348, "step": 3231 }, { "epoch": 0.14990723562152133, "grad_norm": 7.134343147277832, "learning_rate": 9.515373661622665e-06, "loss": 0.2675, "step": 3232 }, { "epoch": 0.14995361781076066, "grad_norm": 11.648024559020996, "learning_rate": 9.515057537461682e-06, "loss": 0.3091, "step": 3233 }, { "epoch": 0.15, "grad_norm": 5.932558059692383, "learning_rate": 9.514741315484358e-06, "loss": 0.3581, "step": 3234 }, { "epoch": 0.15004638218923932, "grad_norm": 5.246791362762451, "learning_rate": 9.514424995697547e-06, "loss": 0.2781, "step": 3235 }, { "epoch": 0.15009276437847865, "grad_norm": 12.259147644042969, "learning_rate": 9.514108578108097e-06, "loss": 0.4424, "step": 3236 }, { "epoch": 0.15013914656771798, "grad_norm": 10.343071937561035, "learning_rate": 9.513792062722866e-06, "loss": 0.4234, "step": 3237 }, { "epoch": 0.15018552875695731, "grad_norm": 12.424776077270508, "learning_rate": 9.513475449548713e-06, "loss": 0.4382, "step": 3238 }, { "epoch": 0.15023191094619667, "grad_norm": 67.89166259765625, "learning_rate": 9.513158738592493e-06, "loss": 0.5434, "step": 3239 }, { "epoch": 0.150278293135436, "grad_norm": 6.504655361175537, "learning_rate": 9.512841929861069e-06, "loss": 0.3406, "step": 3240 }, { "epoch": 0.15032467532467533, "grad_norm": 5.504758358001709, "learning_rate": 9.512525023361307e-06, "loss": 0.3776, "step": 3241 }, { "epoch": 0.15037105751391466, "grad_norm": 6.713589668273926, "learning_rate": 9.512208019100068e-06, "loss": 0.3613, "step": 3242 }, { "epoch": 0.150417439703154, "grad_norm": 5.870182991027832, "learning_rate": 9.511890917084224e-06, "loss": 0.3487, "step": 3243 }, { "epoch": 0.15046382189239332, "grad_norm": 6.87131404876709, "learning_rate": 9.511573717320644e-06, "loss": 0.3134, "step": 3244 }, { "epoch": 0.15051020408163265, "grad_norm": 7.555490970611572, "learning_rate": 9.511256419816198e-06, "loss": 0.3508, "step": 3245 }, { "epoch": 0.15055658627087198, "grad_norm": 7.275595188140869, "learning_rate": 9.51093902457776e-06, "loss": 0.3809, "step": 3246 }, { "epoch": 0.15060296846011131, "grad_norm": 12.752788543701172, "learning_rate": 9.510621531612207e-06, "loss": 0.4641, "step": 3247 }, { "epoch": 0.15064935064935064, "grad_norm": 6.315047740936279, "learning_rate": 9.510303940926418e-06, "loss": 0.3535, "step": 3248 }, { "epoch": 0.15069573283858997, "grad_norm": 7.099930763244629, "learning_rate": 9.509986252527275e-06, "loss": 0.276, "step": 3249 }, { "epoch": 0.1507421150278293, "grad_norm": 6.020030498504639, "learning_rate": 9.509668466421656e-06, "loss": 0.4398, "step": 3250 }, { "epoch": 0.15078849721706863, "grad_norm": 6.051369667053223, "learning_rate": 9.50935058261645e-06, "loss": 0.3009, "step": 3251 }, { "epoch": 0.15083487940630796, "grad_norm": 8.533278465270996, "learning_rate": 9.509032601118541e-06, "loss": 0.45, "step": 3252 }, { "epoch": 0.15088126159554732, "grad_norm": 12.041597366333008, "learning_rate": 9.50871452193482e-06, "loss": 0.5351, "step": 3253 }, { "epoch": 0.15092764378478665, "grad_norm": 8.759512901306152, "learning_rate": 9.508396345072177e-06, "loss": 0.4298, "step": 3254 }, { "epoch": 0.15097402597402598, "grad_norm": 10.671260833740234, "learning_rate": 9.508078070537505e-06, "loss": 0.3743, "step": 3255 }, { "epoch": 0.1510204081632653, "grad_norm": 7.978993892669678, "learning_rate": 9.507759698337698e-06, "loss": 0.4271, "step": 3256 }, { "epoch": 0.15106679035250464, "grad_norm": 10.65917682647705, "learning_rate": 9.507441228479655e-06, "loss": 0.4341, "step": 3257 }, { "epoch": 0.15111317254174397, "grad_norm": 12.201339721679688, "learning_rate": 9.507122660970278e-06, "loss": 0.4314, "step": 3258 }, { "epoch": 0.1511595547309833, "grad_norm": 12.39246940612793, "learning_rate": 9.506803995816463e-06, "loss": 0.4182, "step": 3259 }, { "epoch": 0.15120593692022263, "grad_norm": 12.653697967529297, "learning_rate": 9.506485233025117e-06, "loss": 0.5635, "step": 3260 }, { "epoch": 0.15125231910946196, "grad_norm": 6.538372993469238, "learning_rate": 9.506166372603145e-06, "loss": 0.4047, "step": 3261 }, { "epoch": 0.1512987012987013, "grad_norm": 7.218308448791504, "learning_rate": 9.505847414557457e-06, "loss": 0.4343, "step": 3262 }, { "epoch": 0.15134508348794062, "grad_norm": 7.549492835998535, "learning_rate": 9.50552835889496e-06, "loss": 0.3865, "step": 3263 }, { "epoch": 0.15139146567717995, "grad_norm": 7.546477794647217, "learning_rate": 9.505209205622567e-06, "loss": 0.4271, "step": 3264 }, { "epoch": 0.15143784786641928, "grad_norm": 5.752878665924072, "learning_rate": 9.504889954747194e-06, "loss": 0.2805, "step": 3265 }, { "epoch": 0.15148423005565861, "grad_norm": 10.528766632080078, "learning_rate": 9.504570606275757e-06, "loss": 0.4092, "step": 3266 }, { "epoch": 0.15153061224489797, "grad_norm": 5.766369819641113, "learning_rate": 9.504251160215171e-06, "loss": 0.2611, "step": 3267 }, { "epoch": 0.1515769944341373, "grad_norm": 4.492600917816162, "learning_rate": 9.503931616572362e-06, "loss": 0.3641, "step": 3268 }, { "epoch": 0.15162337662337663, "grad_norm": 9.016024589538574, "learning_rate": 9.503611975354248e-06, "loss": 0.3139, "step": 3269 }, { "epoch": 0.15166975881261596, "grad_norm": 7.587086200714111, "learning_rate": 9.503292236567756e-06, "loss": 0.3516, "step": 3270 }, { "epoch": 0.1517161410018553, "grad_norm": 13.019851684570312, "learning_rate": 9.502972400219814e-06, "loss": 0.4486, "step": 3271 }, { "epoch": 0.15176252319109462, "grad_norm": 11.254544258117676, "learning_rate": 9.50265246631735e-06, "loss": 0.4339, "step": 3272 }, { "epoch": 0.15180890538033395, "grad_norm": 12.697038650512695, "learning_rate": 9.502332434867296e-06, "loss": 0.4476, "step": 3273 }, { "epoch": 0.15185528756957328, "grad_norm": 10.054926872253418, "learning_rate": 9.502012305876583e-06, "loss": 0.5663, "step": 3274 }, { "epoch": 0.1519016697588126, "grad_norm": 10.652621269226074, "learning_rate": 9.50169207935215e-06, "loss": 0.4054, "step": 3275 }, { "epoch": 0.15194805194805194, "grad_norm": 5.792475700378418, "learning_rate": 9.50137175530093e-06, "loss": 0.3407, "step": 3276 }, { "epoch": 0.15199443413729127, "grad_norm": 13.269488334655762, "learning_rate": 9.501051333729867e-06, "loss": 0.2834, "step": 3277 }, { "epoch": 0.1520408163265306, "grad_norm": 7.262105941772461, "learning_rate": 9.5007308146459e-06, "loss": 0.3751, "step": 3278 }, { "epoch": 0.15208719851576993, "grad_norm": 7.877135276794434, "learning_rate": 9.500410198055973e-06, "loss": 0.4015, "step": 3279 }, { "epoch": 0.15213358070500926, "grad_norm": 6.886824607849121, "learning_rate": 9.500089483967034e-06, "loss": 0.4682, "step": 3280 }, { "epoch": 0.15217996289424862, "grad_norm": 8.201306343078613, "learning_rate": 9.49976867238603e-06, "loss": 0.4111, "step": 3281 }, { "epoch": 0.15222634508348795, "grad_norm": 9.277331352233887, "learning_rate": 9.499447763319911e-06, "loss": 0.3413, "step": 3282 }, { "epoch": 0.15227272727272728, "grad_norm": 9.055710792541504, "learning_rate": 9.49912675677563e-06, "loss": 0.3367, "step": 3283 }, { "epoch": 0.1523191094619666, "grad_norm": 6.929777145385742, "learning_rate": 9.498805652760139e-06, "loss": 0.3515, "step": 3284 }, { "epoch": 0.15236549165120594, "grad_norm": 4.346687316894531, "learning_rate": 9.498484451280398e-06, "loss": 0.2277, "step": 3285 }, { "epoch": 0.15241187384044527, "grad_norm": 9.410420417785645, "learning_rate": 9.498163152343362e-06, "loss": 0.4537, "step": 3286 }, { "epoch": 0.1524582560296846, "grad_norm": 6.816463947296143, "learning_rate": 9.497841755955997e-06, "loss": 0.3057, "step": 3287 }, { "epoch": 0.15250463821892393, "grad_norm": 9.14013671875, "learning_rate": 9.49752026212526e-06, "loss": 0.3717, "step": 3288 }, { "epoch": 0.15255102040816326, "grad_norm": 12.054356575012207, "learning_rate": 9.49719867085812e-06, "loss": 0.3719, "step": 3289 }, { "epoch": 0.1525974025974026, "grad_norm": 5.569713592529297, "learning_rate": 9.496876982161543e-06, "loss": 0.3492, "step": 3290 }, { "epoch": 0.15264378478664192, "grad_norm": 13.929530143737793, "learning_rate": 9.496555196042496e-06, "loss": 0.4474, "step": 3291 }, { "epoch": 0.15269016697588125, "grad_norm": 12.297765731811523, "learning_rate": 9.496233312507955e-06, "loss": 0.4731, "step": 3292 }, { "epoch": 0.15273654916512058, "grad_norm": 10.753870010375977, "learning_rate": 9.49591133156489e-06, "loss": 0.356, "step": 3293 }, { "epoch": 0.1527829313543599, "grad_norm": 8.815567016601562, "learning_rate": 9.495589253220277e-06, "loss": 0.3828, "step": 3294 }, { "epoch": 0.15282931354359927, "grad_norm": 7.626843452453613, "learning_rate": 9.495267077481094e-06, "loss": 0.3694, "step": 3295 }, { "epoch": 0.1528756957328386, "grad_norm": 16.539363861083984, "learning_rate": 9.49494480435432e-06, "loss": 0.3787, "step": 3296 }, { "epoch": 0.15292207792207793, "grad_norm": 15.180830001831055, "learning_rate": 9.494622433846939e-06, "loss": 0.5126, "step": 3297 }, { "epoch": 0.15296846011131726, "grad_norm": 5.981568813323975, "learning_rate": 9.494299965965935e-06, "loss": 0.2743, "step": 3298 }, { "epoch": 0.1530148423005566, "grad_norm": 11.427383422851562, "learning_rate": 9.49397740071829e-06, "loss": 0.4406, "step": 3299 }, { "epoch": 0.15306122448979592, "grad_norm": 35.53485870361328, "learning_rate": 9.493654738110996e-06, "loss": 0.4766, "step": 3300 }, { "epoch": 0.15310760667903525, "grad_norm": 9.737631797790527, "learning_rate": 9.493331978151041e-06, "loss": 0.4683, "step": 3301 }, { "epoch": 0.15315398886827458, "grad_norm": 14.155850410461426, "learning_rate": 9.49300912084542e-06, "loss": 0.2296, "step": 3302 }, { "epoch": 0.1532003710575139, "grad_norm": 8.794875144958496, "learning_rate": 9.492686166201125e-06, "loss": 0.4311, "step": 3303 }, { "epoch": 0.15324675324675324, "grad_norm": 7.011448860168457, "learning_rate": 9.492363114225156e-06, "loss": 0.3874, "step": 3304 }, { "epoch": 0.15329313543599257, "grad_norm": 5.657992839813232, "learning_rate": 9.492039964924509e-06, "loss": 0.4161, "step": 3305 }, { "epoch": 0.1533395176252319, "grad_norm": 5.311514854431152, "learning_rate": 9.491716718306184e-06, "loss": 0.3533, "step": 3306 }, { "epoch": 0.15338589981447123, "grad_norm": 9.248211860656738, "learning_rate": 9.491393374377187e-06, "loss": 0.3835, "step": 3307 }, { "epoch": 0.15343228200371056, "grad_norm": 6.1134538650512695, "learning_rate": 9.49106993314452e-06, "loss": 0.3956, "step": 3308 }, { "epoch": 0.15347866419294992, "grad_norm": 10.415624618530273, "learning_rate": 9.490746394615194e-06, "loss": 0.491, "step": 3309 }, { "epoch": 0.15352504638218925, "grad_norm": 7.1283769607543945, "learning_rate": 9.490422758796214e-06, "loss": 0.4308, "step": 3310 }, { "epoch": 0.15357142857142858, "grad_norm": 9.306464195251465, "learning_rate": 9.490099025694592e-06, "loss": 0.527, "step": 3311 }, { "epoch": 0.1536178107606679, "grad_norm": 6.698577880859375, "learning_rate": 9.489775195317346e-06, "loss": 0.3664, "step": 3312 }, { "epoch": 0.15366419294990724, "grad_norm": 7.796428203582764, "learning_rate": 9.489451267671489e-06, "loss": 0.4275, "step": 3313 }, { "epoch": 0.15371057513914657, "grad_norm": 4.966303825378418, "learning_rate": 9.489127242764035e-06, "loss": 0.3201, "step": 3314 }, { "epoch": 0.1537569573283859, "grad_norm": 7.699331760406494, "learning_rate": 9.488803120602007e-06, "loss": 0.3942, "step": 3315 }, { "epoch": 0.15380333951762523, "grad_norm": 4.918903827667236, "learning_rate": 9.488478901192431e-06, "loss": 0.3515, "step": 3316 }, { "epoch": 0.15384972170686456, "grad_norm": 5.961235046386719, "learning_rate": 9.488154584542323e-06, "loss": 0.3022, "step": 3317 }, { "epoch": 0.1538961038961039, "grad_norm": 5.653087139129639, "learning_rate": 9.487830170658715e-06, "loss": 0.3664, "step": 3318 }, { "epoch": 0.15394248608534322, "grad_norm": 8.796485900878906, "learning_rate": 9.487505659548632e-06, "loss": 0.3884, "step": 3319 }, { "epoch": 0.15398886827458255, "grad_norm": 6.878520488739014, "learning_rate": 9.487181051219107e-06, "loss": 0.2882, "step": 3320 }, { "epoch": 0.15403525046382188, "grad_norm": 8.96938419342041, "learning_rate": 9.486856345677173e-06, "loss": 0.3502, "step": 3321 }, { "epoch": 0.1540816326530612, "grad_norm": 4.976649284362793, "learning_rate": 9.48653154292986e-06, "loss": 0.2626, "step": 3322 }, { "epoch": 0.15412801484230057, "grad_norm": 8.76458740234375, "learning_rate": 9.48620664298421e-06, "loss": 0.4447, "step": 3323 }, { "epoch": 0.1541743970315399, "grad_norm": 6.991619110107422, "learning_rate": 9.485881645847257e-06, "loss": 0.3225, "step": 3324 }, { "epoch": 0.15422077922077923, "grad_norm": 9.879266738891602, "learning_rate": 9.485556551526045e-06, "loss": 0.2845, "step": 3325 }, { "epoch": 0.15426716141001856, "grad_norm": 11.5641450881958, "learning_rate": 9.485231360027617e-06, "loss": 0.5653, "step": 3326 }, { "epoch": 0.1543135435992579, "grad_norm": 8.897397994995117, "learning_rate": 9.484906071359017e-06, "loss": 0.3712, "step": 3327 }, { "epoch": 0.15435992578849722, "grad_norm": 9.636303901672363, "learning_rate": 9.484580685527292e-06, "loss": 0.5061, "step": 3328 }, { "epoch": 0.15440630797773655, "grad_norm": 6.765988349914551, "learning_rate": 9.484255202539491e-06, "loss": 0.2621, "step": 3329 }, { "epoch": 0.15445269016697588, "grad_norm": 8.516458511352539, "learning_rate": 9.483929622402668e-06, "loss": 0.3239, "step": 3330 }, { "epoch": 0.1544990723562152, "grad_norm": 8.202204704284668, "learning_rate": 9.483603945123874e-06, "loss": 0.4049, "step": 3331 }, { "epoch": 0.15454545454545454, "grad_norm": 4.982245922088623, "learning_rate": 9.483278170710166e-06, "loss": 0.2889, "step": 3332 }, { "epoch": 0.15459183673469387, "grad_norm": 11.377192497253418, "learning_rate": 9.482952299168599e-06, "loss": 0.4525, "step": 3333 }, { "epoch": 0.1546382189239332, "grad_norm": 4.651195526123047, "learning_rate": 9.482626330506238e-06, "loss": 0.3171, "step": 3334 }, { "epoch": 0.15468460111317253, "grad_norm": 9.459156036376953, "learning_rate": 9.482300264730138e-06, "loss": 0.467, "step": 3335 }, { "epoch": 0.15473098330241186, "grad_norm": 7.35720157623291, "learning_rate": 9.481974101847371e-06, "loss": 0.3372, "step": 3336 }, { "epoch": 0.1547773654916512, "grad_norm": 6.221092224121094, "learning_rate": 9.481647841864995e-06, "loss": 0.2701, "step": 3337 }, { "epoch": 0.15482374768089055, "grad_norm": 7.695028781890869, "learning_rate": 9.481321484790086e-06, "loss": 0.3746, "step": 3338 }, { "epoch": 0.15487012987012988, "grad_norm": 11.44375991821289, "learning_rate": 9.48099503062971e-06, "loss": 0.4791, "step": 3339 }, { "epoch": 0.1549165120593692, "grad_norm": 4.538843631744385, "learning_rate": 9.480668479390939e-06, "loss": 0.3143, "step": 3340 }, { "epoch": 0.15496289424860854, "grad_norm": 6.9134979248046875, "learning_rate": 9.480341831080849e-06, "loss": 0.3922, "step": 3341 }, { "epoch": 0.15500927643784787, "grad_norm": 8.404030799865723, "learning_rate": 9.480015085706517e-06, "loss": 0.3082, "step": 3342 }, { "epoch": 0.1550556586270872, "grad_norm": 13.871604919433594, "learning_rate": 9.47968824327502e-06, "loss": 0.4318, "step": 3343 }, { "epoch": 0.15510204081632653, "grad_norm": 4.370174407958984, "learning_rate": 9.479361303793441e-06, "loss": 0.3009, "step": 3344 }, { "epoch": 0.15514842300556586, "grad_norm": 10.801545143127441, "learning_rate": 9.479034267268861e-06, "loss": 0.3929, "step": 3345 }, { "epoch": 0.1551948051948052, "grad_norm": 7.200533390045166, "learning_rate": 9.478707133708368e-06, "loss": 0.3494, "step": 3346 }, { "epoch": 0.15524118738404452, "grad_norm": 11.774714469909668, "learning_rate": 9.478379903119046e-06, "loss": 0.4358, "step": 3347 }, { "epoch": 0.15528756957328385, "grad_norm": 9.66439151763916, "learning_rate": 9.478052575507983e-06, "loss": 0.4391, "step": 3348 }, { "epoch": 0.15533395176252318, "grad_norm": 12.090330123901367, "learning_rate": 9.477725150882276e-06, "loss": 0.3897, "step": 3349 }, { "epoch": 0.1553803339517625, "grad_norm": 8.188817024230957, "learning_rate": 9.477397629249015e-06, "loss": 0.2348, "step": 3350 }, { "epoch": 0.15542671614100184, "grad_norm": 6.6640472412109375, "learning_rate": 9.477070010615295e-06, "loss": 0.4231, "step": 3351 }, { "epoch": 0.1554730983302412, "grad_norm": 11.375706672668457, "learning_rate": 9.476742294988214e-06, "loss": 0.438, "step": 3352 }, { "epoch": 0.15551948051948053, "grad_norm": 7.149500370025635, "learning_rate": 9.476414482374875e-06, "loss": 0.3079, "step": 3353 }, { "epoch": 0.15556586270871986, "grad_norm": 9.199604034423828, "learning_rate": 9.476086572782375e-06, "loss": 0.3273, "step": 3354 }, { "epoch": 0.1556122448979592, "grad_norm": 8.261510848999023, "learning_rate": 9.475758566217824e-06, "loss": 0.3439, "step": 3355 }, { "epoch": 0.15565862708719852, "grad_norm": 12.785767555236816, "learning_rate": 9.47543046268832e-06, "loss": 0.3705, "step": 3356 }, { "epoch": 0.15570500927643785, "grad_norm": 10.163084983825684, "learning_rate": 9.475102262200977e-06, "loss": 0.5051, "step": 3357 }, { "epoch": 0.15575139146567718, "grad_norm": 7.788730144500732, "learning_rate": 9.474773964762904e-06, "loss": 0.4083, "step": 3358 }, { "epoch": 0.1557977736549165, "grad_norm": 4.629958629608154, "learning_rate": 9.474445570381212e-06, "loss": 0.2526, "step": 3359 }, { "epoch": 0.15584415584415584, "grad_norm": 9.180010795593262, "learning_rate": 9.474117079063019e-06, "loss": 0.3167, "step": 3360 }, { "epoch": 0.15589053803339517, "grad_norm": 7.975658416748047, "learning_rate": 9.473788490815438e-06, "loss": 0.4395, "step": 3361 }, { "epoch": 0.1559369202226345, "grad_norm": 6.155855655670166, "learning_rate": 9.473459805645589e-06, "loss": 0.2944, "step": 3362 }, { "epoch": 0.15598330241187383, "grad_norm": 4.5830793380737305, "learning_rate": 9.473131023560593e-06, "loss": 0.2343, "step": 3363 }, { "epoch": 0.15602968460111316, "grad_norm": 9.410418510437012, "learning_rate": 9.472802144567573e-06, "loss": 0.3716, "step": 3364 }, { "epoch": 0.1560760667903525, "grad_norm": 8.551363945007324, "learning_rate": 9.472473168673654e-06, "loss": 0.3471, "step": 3365 }, { "epoch": 0.15612244897959185, "grad_norm": 11.162662506103516, "learning_rate": 9.472144095885962e-06, "loss": 0.4164, "step": 3366 }, { "epoch": 0.15616883116883118, "grad_norm": 8.213863372802734, "learning_rate": 9.471814926211628e-06, "loss": 0.3222, "step": 3367 }, { "epoch": 0.1562152133580705, "grad_norm": 7.841702461242676, "learning_rate": 9.471485659657782e-06, "loss": 0.3635, "step": 3368 }, { "epoch": 0.15626159554730984, "grad_norm": 9.07311725616455, "learning_rate": 9.471156296231557e-06, "loss": 0.4667, "step": 3369 }, { "epoch": 0.15630797773654917, "grad_norm": 11.725456237792969, "learning_rate": 9.470826835940089e-06, "loss": 0.4715, "step": 3370 }, { "epoch": 0.1563543599257885, "grad_norm": 7.458972454071045, "learning_rate": 9.470497278790515e-06, "loss": 0.2585, "step": 3371 }, { "epoch": 0.15640074211502783, "grad_norm": 9.506555557250977, "learning_rate": 9.470167624789977e-06, "loss": 0.4546, "step": 3372 }, { "epoch": 0.15644712430426716, "grad_norm": 15.84103012084961, "learning_rate": 9.469837873945615e-06, "loss": 0.4411, "step": 3373 }, { "epoch": 0.1564935064935065, "grad_norm": 8.165645599365234, "learning_rate": 9.469508026264574e-06, "loss": 0.19, "step": 3374 }, { "epoch": 0.15653988868274582, "grad_norm": 8.662262916564941, "learning_rate": 9.469178081753996e-06, "loss": 0.3436, "step": 3375 }, { "epoch": 0.15658627087198515, "grad_norm": 4.1528825759887695, "learning_rate": 9.468848040421035e-06, "loss": 0.3884, "step": 3376 }, { "epoch": 0.15663265306122448, "grad_norm": 7.848420143127441, "learning_rate": 9.468517902272835e-06, "loss": 0.3699, "step": 3377 }, { "epoch": 0.1566790352504638, "grad_norm": 6.483770847320557, "learning_rate": 9.468187667316555e-06, "loss": 0.3244, "step": 3378 }, { "epoch": 0.15672541743970314, "grad_norm": 5.903493881225586, "learning_rate": 9.467857335559344e-06, "loss": 0.265, "step": 3379 }, { "epoch": 0.1567717996289425, "grad_norm": 11.99654483795166, "learning_rate": 9.467526907008362e-06, "loss": 0.4472, "step": 3380 }, { "epoch": 0.15681818181818183, "grad_norm": 7.116504192352295, "learning_rate": 9.467196381670765e-06, "loss": 0.269, "step": 3381 }, { "epoch": 0.15686456400742116, "grad_norm": 12.417790412902832, "learning_rate": 9.466865759553714e-06, "loss": 0.4543, "step": 3382 }, { "epoch": 0.1569109461966605, "grad_norm": 6.23379373550415, "learning_rate": 9.466535040664374e-06, "loss": 0.3504, "step": 3383 }, { "epoch": 0.15695732838589982, "grad_norm": 5.772726535797119, "learning_rate": 9.466204225009905e-06, "loss": 0.2488, "step": 3384 }, { "epoch": 0.15700371057513915, "grad_norm": 14.687508583068848, "learning_rate": 9.46587331259748e-06, "loss": 0.5173, "step": 3385 }, { "epoch": 0.15705009276437848, "grad_norm": 14.050257682800293, "learning_rate": 9.465542303434264e-06, "loss": 0.3634, "step": 3386 }, { "epoch": 0.1570964749536178, "grad_norm": 8.789999961853027, "learning_rate": 9.465211197527428e-06, "loss": 0.3151, "step": 3387 }, { "epoch": 0.15714285714285714, "grad_norm": 9.48953914642334, "learning_rate": 9.46487999488415e-06, "loss": 0.3179, "step": 3388 }, { "epoch": 0.15718923933209647, "grad_norm": 8.709644317626953, "learning_rate": 9.4645486955116e-06, "loss": 0.2993, "step": 3389 }, { "epoch": 0.1572356215213358, "grad_norm": 7.182007312774658, "learning_rate": 9.464217299416956e-06, "loss": 0.3037, "step": 3390 }, { "epoch": 0.15728200371057513, "grad_norm": 11.42570686340332, "learning_rate": 9.463885806607402e-06, "loss": 0.4144, "step": 3391 }, { "epoch": 0.15732838589981446, "grad_norm": 5.932999134063721, "learning_rate": 9.463554217090114e-06, "loss": 0.3347, "step": 3392 }, { "epoch": 0.1573747680890538, "grad_norm": 9.311579704284668, "learning_rate": 9.463222530872278e-06, "loss": 0.4301, "step": 3393 }, { "epoch": 0.15742115027829315, "grad_norm": 6.531132221221924, "learning_rate": 9.462890747961082e-06, "loss": 0.3158, "step": 3394 }, { "epoch": 0.15746753246753248, "grad_norm": 5.857607841491699, "learning_rate": 9.46255886836371e-06, "loss": 0.4343, "step": 3395 }, { "epoch": 0.1575139146567718, "grad_norm": 10.77376937866211, "learning_rate": 9.462226892087355e-06, "loss": 0.4388, "step": 3396 }, { "epoch": 0.15756029684601114, "grad_norm": 10.193819046020508, "learning_rate": 9.461894819139207e-06, "loss": 0.3743, "step": 3397 }, { "epoch": 0.15760667903525047, "grad_norm": 8.382216453552246, "learning_rate": 9.461562649526462e-06, "loss": 0.3298, "step": 3398 }, { "epoch": 0.1576530612244898, "grad_norm": 5.730118751525879, "learning_rate": 9.461230383256314e-06, "loss": 0.3311, "step": 3399 }, { "epoch": 0.15769944341372913, "grad_norm": 10.550509452819824, "learning_rate": 9.460898020335964e-06, "loss": 0.4048, "step": 3400 }, { "epoch": 0.15774582560296846, "grad_norm": 5.974307537078857, "learning_rate": 9.460565560772613e-06, "loss": 0.3411, "step": 3401 }, { "epoch": 0.1577922077922078, "grad_norm": 5.088113307952881, "learning_rate": 9.46023300457346e-06, "loss": 0.3625, "step": 3402 }, { "epoch": 0.15783858998144712, "grad_norm": 7.315636157989502, "learning_rate": 9.45990035174571e-06, "loss": 0.3206, "step": 3403 }, { "epoch": 0.15788497217068645, "grad_norm": 4.736874580383301, "learning_rate": 9.459567602296573e-06, "loss": 0.3244, "step": 3404 }, { "epoch": 0.15793135435992578, "grad_norm": 15.296063423156738, "learning_rate": 9.459234756233255e-06, "loss": 0.3627, "step": 3405 }, { "epoch": 0.1579777365491651, "grad_norm": 28.079814910888672, "learning_rate": 9.45890181356297e-06, "loss": 0.4493, "step": 3406 }, { "epoch": 0.15802411873840444, "grad_norm": 9.7229585647583, "learning_rate": 9.458568774292928e-06, "loss": 0.3976, "step": 3407 }, { "epoch": 0.1580705009276438, "grad_norm": 6.772511005401611, "learning_rate": 9.458235638430345e-06, "loss": 0.3596, "step": 3408 }, { "epoch": 0.15811688311688313, "grad_norm": 10.688858032226562, "learning_rate": 9.457902405982438e-06, "loss": 0.3848, "step": 3409 }, { "epoch": 0.15816326530612246, "grad_norm": 8.71306037902832, "learning_rate": 9.457569076956427e-06, "loss": 0.3673, "step": 3410 }, { "epoch": 0.1582096474953618, "grad_norm": 5.103772163391113, "learning_rate": 9.457235651359533e-06, "loss": 0.3934, "step": 3411 }, { "epoch": 0.15825602968460112, "grad_norm": 10.21319580078125, "learning_rate": 9.456902129198979e-06, "loss": 0.449, "step": 3412 }, { "epoch": 0.15830241187384045, "grad_norm": 12.42799186706543, "learning_rate": 9.456568510481993e-06, "loss": 0.4908, "step": 3413 }, { "epoch": 0.15834879406307978, "grad_norm": 16.346595764160156, "learning_rate": 9.456234795215799e-06, "loss": 0.5157, "step": 3414 }, { "epoch": 0.1583951762523191, "grad_norm": 8.995285034179688, "learning_rate": 9.455900983407629e-06, "loss": 0.3922, "step": 3415 }, { "epoch": 0.15844155844155844, "grad_norm": 11.234823226928711, "learning_rate": 9.455567075064715e-06, "loss": 0.41, "step": 3416 }, { "epoch": 0.15848794063079777, "grad_norm": 9.84694766998291, "learning_rate": 9.45523307019429e-06, "loss": 0.5594, "step": 3417 }, { "epoch": 0.1585343228200371, "grad_norm": 9.726473808288574, "learning_rate": 9.45489896880359e-06, "loss": 0.3482, "step": 3418 }, { "epoch": 0.15858070500927643, "grad_norm": 7.033463001251221, "learning_rate": 9.454564770899855e-06, "loss": 0.3313, "step": 3419 }, { "epoch": 0.15862708719851576, "grad_norm": 11.934697151184082, "learning_rate": 9.454230476490323e-06, "loss": 0.4314, "step": 3420 }, { "epoch": 0.1586734693877551, "grad_norm": 8.186285972595215, "learning_rate": 9.453896085582236e-06, "loss": 0.3923, "step": 3421 }, { "epoch": 0.15871985157699445, "grad_norm": 10.355289459228516, "learning_rate": 9.453561598182842e-06, "loss": 0.3587, "step": 3422 }, { "epoch": 0.15876623376623378, "grad_norm": 7.266480445861816, "learning_rate": 9.453227014299382e-06, "loss": 0.3588, "step": 3423 }, { "epoch": 0.1588126159554731, "grad_norm": 5.7413811683654785, "learning_rate": 9.45289233393911e-06, "loss": 0.3641, "step": 3424 }, { "epoch": 0.15885899814471244, "grad_norm": 7.0630693435668945, "learning_rate": 9.452557557109272e-06, "loss": 0.3406, "step": 3425 }, { "epoch": 0.15890538033395177, "grad_norm": 6.527876853942871, "learning_rate": 9.452222683817124e-06, "loss": 0.2952, "step": 3426 }, { "epoch": 0.1589517625231911, "grad_norm": 8.145832061767578, "learning_rate": 9.451887714069921e-06, "loss": 0.4448, "step": 3427 }, { "epoch": 0.15899814471243043, "grad_norm": 3.7495031356811523, "learning_rate": 9.451552647874918e-06, "loss": 0.3008, "step": 3428 }, { "epoch": 0.15904452690166976, "grad_norm": 4.352504253387451, "learning_rate": 9.451217485239373e-06, "loss": 0.2636, "step": 3429 }, { "epoch": 0.1590909090909091, "grad_norm": 10.328414916992188, "learning_rate": 9.45088222617055e-06, "loss": 0.4668, "step": 3430 }, { "epoch": 0.15913729128014842, "grad_norm": 10.566361427307129, "learning_rate": 9.450546870675712e-06, "loss": 0.3995, "step": 3431 }, { "epoch": 0.15918367346938775, "grad_norm": 6.065738201141357, "learning_rate": 9.450211418762123e-06, "loss": 0.3234, "step": 3432 }, { "epoch": 0.15923005565862708, "grad_norm": 7.997493267059326, "learning_rate": 9.449875870437053e-06, "loss": 0.4476, "step": 3433 }, { "epoch": 0.1592764378478664, "grad_norm": 4.949219226837158, "learning_rate": 9.449540225707766e-06, "loss": 0.3203, "step": 3434 }, { "epoch": 0.15932282003710574, "grad_norm": 4.15817928314209, "learning_rate": 9.449204484581539e-06, "loss": 0.2932, "step": 3435 }, { "epoch": 0.1593692022263451, "grad_norm": 4.9416022300720215, "learning_rate": 9.448868647065644e-06, "loss": 0.29, "step": 3436 }, { "epoch": 0.15941558441558443, "grad_norm": 8.299478530883789, "learning_rate": 9.448532713167354e-06, "loss": 0.3953, "step": 3437 }, { "epoch": 0.15946196660482376, "grad_norm": 12.42672348022461, "learning_rate": 9.44819668289395e-06, "loss": 0.3574, "step": 3438 }, { "epoch": 0.1595083487940631, "grad_norm": 8.87352180480957, "learning_rate": 9.447860556252712e-06, "loss": 0.4389, "step": 3439 }, { "epoch": 0.15955473098330242, "grad_norm": 8.449625015258789, "learning_rate": 9.44752433325092e-06, "loss": 0.3929, "step": 3440 }, { "epoch": 0.15960111317254175, "grad_norm": 9.306459426879883, "learning_rate": 9.447188013895859e-06, "loss": 0.4743, "step": 3441 }, { "epoch": 0.15964749536178108, "grad_norm": 5.504213809967041, "learning_rate": 9.446851598194817e-06, "loss": 0.4391, "step": 3442 }, { "epoch": 0.1596938775510204, "grad_norm": 5.877998352050781, "learning_rate": 9.44651508615508e-06, "loss": 0.3832, "step": 3443 }, { "epoch": 0.15974025974025974, "grad_norm": 25.302268981933594, "learning_rate": 9.446178477783938e-06, "loss": 0.2923, "step": 3444 }, { "epoch": 0.15978664192949907, "grad_norm": 5.371648788452148, "learning_rate": 9.445841773088684e-06, "loss": 0.3665, "step": 3445 }, { "epoch": 0.1598330241187384, "grad_norm": 8.608285903930664, "learning_rate": 9.445504972076614e-06, "loss": 0.3154, "step": 3446 }, { "epoch": 0.15987940630797773, "grad_norm": 12.44825267791748, "learning_rate": 9.445168074755023e-06, "loss": 0.4613, "step": 3447 }, { "epoch": 0.15992578849721706, "grad_norm": 8.615156173706055, "learning_rate": 9.444831081131209e-06, "loss": 0.3054, "step": 3448 }, { "epoch": 0.1599721706864564, "grad_norm": 8.16757583618164, "learning_rate": 9.444493991212476e-06, "loss": 0.2662, "step": 3449 }, { "epoch": 0.16001855287569575, "grad_norm": 8.329231262207031, "learning_rate": 9.444156805006125e-06, "loss": 0.3987, "step": 3450 }, { "epoch": 0.16006493506493508, "grad_norm": 5.067461967468262, "learning_rate": 9.44381952251946e-06, "loss": 0.3729, "step": 3451 }, { "epoch": 0.1601113172541744, "grad_norm": 8.074993133544922, "learning_rate": 9.44348214375979e-06, "loss": 0.4666, "step": 3452 }, { "epoch": 0.16015769944341374, "grad_norm": 11.698902130126953, "learning_rate": 9.443144668734423e-06, "loss": 0.3152, "step": 3453 }, { "epoch": 0.16020408163265307, "grad_norm": 5.899063587188721, "learning_rate": 9.442807097450668e-06, "loss": 0.3756, "step": 3454 }, { "epoch": 0.1602504638218924, "grad_norm": 6.860467910766602, "learning_rate": 9.442469429915843e-06, "loss": 0.3386, "step": 3455 }, { "epoch": 0.16029684601113173, "grad_norm": 5.289626598358154, "learning_rate": 9.44213166613726e-06, "loss": 0.3322, "step": 3456 }, { "epoch": 0.16034322820037106, "grad_norm": 6.144717693328857, "learning_rate": 9.441793806122238e-06, "loss": 0.3792, "step": 3457 }, { "epoch": 0.1603896103896104, "grad_norm": 7.178347587585449, "learning_rate": 9.441455849878093e-06, "loss": 0.3587, "step": 3458 }, { "epoch": 0.16043599257884972, "grad_norm": 3.7061474323272705, "learning_rate": 9.441117797412155e-06, "loss": 0.3851, "step": 3459 }, { "epoch": 0.16048237476808905, "grad_norm": 3.777451276779175, "learning_rate": 9.440779648731737e-06, "loss": 0.2392, "step": 3460 }, { "epoch": 0.16052875695732838, "grad_norm": 8.758207321166992, "learning_rate": 9.440441403844173e-06, "loss": 0.3477, "step": 3461 }, { "epoch": 0.1605751391465677, "grad_norm": 6.226269721984863, "learning_rate": 9.440103062756789e-06, "loss": 0.2652, "step": 3462 }, { "epoch": 0.16062152133580704, "grad_norm": 6.445071220397949, "learning_rate": 9.439764625476913e-06, "loss": 0.3537, "step": 3463 }, { "epoch": 0.1606679035250464, "grad_norm": 5.945757865905762, "learning_rate": 9.439426092011877e-06, "loss": 0.3598, "step": 3464 }, { "epoch": 0.16071428571428573, "grad_norm": 8.118852615356445, "learning_rate": 9.439087462369016e-06, "loss": 0.4396, "step": 3465 }, { "epoch": 0.16076066790352506, "grad_norm": 15.111766815185547, "learning_rate": 9.43874873655567e-06, "loss": 0.3899, "step": 3466 }, { "epoch": 0.1608070500927644, "grad_norm": 8.934120178222656, "learning_rate": 9.43840991457917e-06, "loss": 0.4126, "step": 3467 }, { "epoch": 0.16085343228200372, "grad_norm": 8.383219718933105, "learning_rate": 9.438070996446862e-06, "loss": 0.3432, "step": 3468 }, { "epoch": 0.16089981447124305, "grad_norm": 13.237502098083496, "learning_rate": 9.437731982166086e-06, "loss": 0.4211, "step": 3469 }, { "epoch": 0.16094619666048238, "grad_norm": 8.267974853515625, "learning_rate": 9.437392871744188e-06, "loss": 0.4948, "step": 3470 }, { "epoch": 0.1609925788497217, "grad_norm": 7.867684364318848, "learning_rate": 9.437053665188514e-06, "loss": 0.3717, "step": 3471 }, { "epoch": 0.16103896103896104, "grad_norm": 9.739006042480469, "learning_rate": 9.436714362506412e-06, "loss": 0.4222, "step": 3472 }, { "epoch": 0.16108534322820037, "grad_norm": 12.355666160583496, "learning_rate": 9.436374963705234e-06, "loss": 0.5596, "step": 3473 }, { "epoch": 0.1611317254174397, "grad_norm": 8.549964904785156, "learning_rate": 9.436035468792331e-06, "loss": 0.4566, "step": 3474 }, { "epoch": 0.16117810760667903, "grad_norm": 8.272258758544922, "learning_rate": 9.435695877775061e-06, "loss": 0.4332, "step": 3475 }, { "epoch": 0.16122448979591836, "grad_norm": 6.015355587005615, "learning_rate": 9.435356190660778e-06, "loss": 0.2993, "step": 3476 }, { "epoch": 0.1612708719851577, "grad_norm": 7.683106422424316, "learning_rate": 9.435016407456843e-06, "loss": 0.4323, "step": 3477 }, { "epoch": 0.16131725417439702, "grad_norm": 13.76278018951416, "learning_rate": 9.434676528170619e-06, "loss": 0.4195, "step": 3478 }, { "epoch": 0.16136363636363638, "grad_norm": 10.912585258483887, "learning_rate": 9.434336552809464e-06, "loss": 0.4905, "step": 3479 }, { "epoch": 0.1614100185528757, "grad_norm": 11.912643432617188, "learning_rate": 9.433996481380747e-06, "loss": 0.4624, "step": 3480 }, { "epoch": 0.16145640074211504, "grad_norm": 7.955239772796631, "learning_rate": 9.433656313891837e-06, "loss": 0.3254, "step": 3481 }, { "epoch": 0.16150278293135437, "grad_norm": 7.497670650482178, "learning_rate": 9.433316050350099e-06, "loss": 0.3403, "step": 3482 }, { "epoch": 0.1615491651205937, "grad_norm": 6.948604583740234, "learning_rate": 9.432975690762908e-06, "loss": 0.4608, "step": 3483 }, { "epoch": 0.16159554730983303, "grad_norm": 9.467940330505371, "learning_rate": 9.432635235137638e-06, "loss": 0.33, "step": 3484 }, { "epoch": 0.16164192949907236, "grad_norm": 7.4635210037231445, "learning_rate": 9.432294683481663e-06, "loss": 0.3316, "step": 3485 }, { "epoch": 0.1616883116883117, "grad_norm": 6.387885570526123, "learning_rate": 9.43195403580236e-06, "loss": 0.2935, "step": 3486 }, { "epoch": 0.16173469387755102, "grad_norm": 7.955474853515625, "learning_rate": 9.431613292107112e-06, "loss": 0.4666, "step": 3487 }, { "epoch": 0.16178107606679035, "grad_norm": 5.970771312713623, "learning_rate": 9.431272452403298e-06, "loss": 0.3157, "step": 3488 }, { "epoch": 0.16182745825602968, "grad_norm": 4.651127338409424, "learning_rate": 9.430931516698305e-06, "loss": 0.2971, "step": 3489 }, { "epoch": 0.161873840445269, "grad_norm": 9.090520858764648, "learning_rate": 9.430590484999517e-06, "loss": 0.4655, "step": 3490 }, { "epoch": 0.16192022263450834, "grad_norm": 8.350889205932617, "learning_rate": 9.430249357314322e-06, "loss": 0.3666, "step": 3491 }, { "epoch": 0.16196660482374767, "grad_norm": 10.343417167663574, "learning_rate": 9.429908133650115e-06, "loss": 0.4102, "step": 3492 }, { "epoch": 0.16201298701298703, "grad_norm": 5.4278244972229, "learning_rate": 9.429566814014282e-06, "loss": 0.3298, "step": 3493 }, { "epoch": 0.16205936920222636, "grad_norm": 7.144266128540039, "learning_rate": 9.42922539841422e-06, "loss": 0.2134, "step": 3494 }, { "epoch": 0.16210575139146569, "grad_norm": 5.674810886383057, "learning_rate": 9.428883886857327e-06, "loss": 0.2419, "step": 3495 }, { "epoch": 0.16215213358070502, "grad_norm": 6.140927314758301, "learning_rate": 9.428542279351e-06, "loss": 0.3343, "step": 3496 }, { "epoch": 0.16219851576994435, "grad_norm": 7.827427864074707, "learning_rate": 9.42820057590264e-06, "loss": 0.352, "step": 3497 }, { "epoch": 0.16224489795918368, "grad_norm": 7.836908340454102, "learning_rate": 9.42785877651965e-06, "loss": 0.3733, "step": 3498 }, { "epoch": 0.162291280148423, "grad_norm": 6.226983547210693, "learning_rate": 9.427516881209437e-06, "loss": 0.383, "step": 3499 }, { "epoch": 0.16233766233766234, "grad_norm": 12.144635200500488, "learning_rate": 9.427174889979404e-06, "loss": 0.3518, "step": 3500 }, { "epoch": 0.16238404452690167, "grad_norm": 5.329715251922607, "learning_rate": 9.426832802836962e-06, "loss": 0.3233, "step": 3501 }, { "epoch": 0.162430426716141, "grad_norm": 9.444198608398438, "learning_rate": 9.426490619789525e-06, "loss": 0.3476, "step": 3502 }, { "epoch": 0.16247680890538033, "grad_norm": 11.372613906860352, "learning_rate": 9.4261483408445e-06, "loss": 0.3715, "step": 3503 }, { "epoch": 0.16252319109461966, "grad_norm": 8.605856895446777, "learning_rate": 9.42580596600931e-06, "loss": 0.4196, "step": 3504 }, { "epoch": 0.162569573283859, "grad_norm": 10.84053897857666, "learning_rate": 9.425463495291364e-06, "loss": 0.4932, "step": 3505 }, { "epoch": 0.16261595547309832, "grad_norm": 5.802679538726807, "learning_rate": 9.425120928698088e-06, "loss": 0.3009, "step": 3506 }, { "epoch": 0.16266233766233767, "grad_norm": 13.125895500183105, "learning_rate": 9.424778266236899e-06, "loss": 0.4497, "step": 3507 }, { "epoch": 0.162708719851577, "grad_norm": 8.26341438293457, "learning_rate": 9.424435507915224e-06, "loss": 0.4284, "step": 3508 }, { "epoch": 0.16275510204081634, "grad_norm": 7.186121940612793, "learning_rate": 9.424092653740486e-06, "loss": 0.2815, "step": 3509 }, { "epoch": 0.16280148423005567, "grad_norm": 3.8707199096679688, "learning_rate": 9.423749703720116e-06, "loss": 0.3363, "step": 3510 }, { "epoch": 0.162847866419295, "grad_norm": 6.335047721862793, "learning_rate": 9.423406657861542e-06, "loss": 0.4311, "step": 3511 }, { "epoch": 0.16289424860853433, "grad_norm": 8.70814037322998, "learning_rate": 9.423063516172195e-06, "loss": 0.3704, "step": 3512 }, { "epoch": 0.16294063079777366, "grad_norm": 9.51812744140625, "learning_rate": 9.42272027865951e-06, "loss": 0.3251, "step": 3513 }, { "epoch": 0.16298701298701299, "grad_norm": 5.954259872436523, "learning_rate": 9.422376945330922e-06, "loss": 0.2635, "step": 3514 }, { "epoch": 0.16303339517625232, "grad_norm": 8.568974494934082, "learning_rate": 9.422033516193871e-06, "loss": 0.4197, "step": 3515 }, { "epoch": 0.16307977736549165, "grad_norm": 6.8943257331848145, "learning_rate": 9.421689991255795e-06, "loss": 0.3208, "step": 3516 }, { "epoch": 0.16312615955473098, "grad_norm": 7.711054801940918, "learning_rate": 9.42134637052414e-06, "loss": 0.37, "step": 3517 }, { "epoch": 0.1631725417439703, "grad_norm": 20.491840362548828, "learning_rate": 9.421002654006347e-06, "loss": 0.3254, "step": 3518 }, { "epoch": 0.16321892393320964, "grad_norm": 5.077530860900879, "learning_rate": 9.420658841709861e-06, "loss": 0.3832, "step": 3519 }, { "epoch": 0.16326530612244897, "grad_norm": 8.110923767089844, "learning_rate": 9.420314933642136e-06, "loss": 0.3554, "step": 3520 }, { "epoch": 0.16331168831168832, "grad_norm": 7.037778854370117, "learning_rate": 9.419970929810618e-06, "loss": 0.347, "step": 3521 }, { "epoch": 0.16335807050092765, "grad_norm": 6.219718933105469, "learning_rate": 9.419626830222762e-06, "loss": 0.3583, "step": 3522 }, { "epoch": 0.16340445269016698, "grad_norm": 7.055413246154785, "learning_rate": 9.419282634886021e-06, "loss": 0.3947, "step": 3523 }, { "epoch": 0.16345083487940631, "grad_norm": 7.283405780792236, "learning_rate": 9.418938343807852e-06, "loss": 0.3992, "step": 3524 }, { "epoch": 0.16349721706864564, "grad_norm": 4.804226875305176, "learning_rate": 9.418593956995716e-06, "loss": 0.3298, "step": 3525 }, { "epoch": 0.16354359925788498, "grad_norm": 8.04886531829834, "learning_rate": 9.418249474457072e-06, "loss": 0.3814, "step": 3526 }, { "epoch": 0.1635899814471243, "grad_norm": 10.988924026489258, "learning_rate": 9.417904896199384e-06, "loss": 0.5093, "step": 3527 }, { "epoch": 0.16363636363636364, "grad_norm": 7.564973831176758, "learning_rate": 9.417560222230115e-06, "loss": 0.2548, "step": 3528 }, { "epoch": 0.16368274582560297, "grad_norm": 4.465160846710205, "learning_rate": 9.417215452556735e-06, "loss": 0.3346, "step": 3529 }, { "epoch": 0.1637291280148423, "grad_norm": 4.713077545166016, "learning_rate": 9.416870587186713e-06, "loss": 0.285, "step": 3530 }, { "epoch": 0.16377551020408163, "grad_norm": 8.806744575500488, "learning_rate": 9.416525626127517e-06, "loss": 0.3019, "step": 3531 }, { "epoch": 0.16382189239332096, "grad_norm": 7.7362799644470215, "learning_rate": 9.416180569386623e-06, "loss": 0.4048, "step": 3532 }, { "epoch": 0.16386827458256029, "grad_norm": 6.4977827072143555, "learning_rate": 9.415835416971509e-06, "loss": 0.4406, "step": 3533 }, { "epoch": 0.16391465677179962, "grad_norm": 5.66124963760376, "learning_rate": 9.415490168889648e-06, "loss": 0.2203, "step": 3534 }, { "epoch": 0.16396103896103897, "grad_norm": 5.59488582611084, "learning_rate": 9.41514482514852e-06, "loss": 0.3135, "step": 3535 }, { "epoch": 0.1640074211502783, "grad_norm": 6.033235549926758, "learning_rate": 9.41479938575561e-06, "loss": 0.3384, "step": 3536 }, { "epoch": 0.16405380333951763, "grad_norm": 6.08078145980835, "learning_rate": 9.4144538507184e-06, "loss": 0.4245, "step": 3537 }, { "epoch": 0.16410018552875696, "grad_norm": 5.911843299865723, "learning_rate": 9.414108220044376e-06, "loss": 0.2786, "step": 3538 }, { "epoch": 0.1641465677179963, "grad_norm": 10.993107795715332, "learning_rate": 9.413762493741023e-06, "loss": 0.3909, "step": 3539 }, { "epoch": 0.16419294990723562, "grad_norm": 9.868727684020996, "learning_rate": 9.413416671815836e-06, "loss": 0.3928, "step": 3540 }, { "epoch": 0.16423933209647495, "grad_norm": 5.854091644287109, "learning_rate": 9.413070754276305e-06, "loss": 0.3299, "step": 3541 }, { "epoch": 0.16428571428571428, "grad_norm": 10.621386528015137, "learning_rate": 9.412724741129922e-06, "loss": 0.4839, "step": 3542 }, { "epoch": 0.16433209647495362, "grad_norm": 8.886075019836426, "learning_rate": 9.412378632384185e-06, "loss": 0.3439, "step": 3543 }, { "epoch": 0.16437847866419295, "grad_norm": 11.419907569885254, "learning_rate": 9.412032428046594e-06, "loss": 0.3737, "step": 3544 }, { "epoch": 0.16442486085343228, "grad_norm": 12.236812591552734, "learning_rate": 9.411686128124647e-06, "loss": 0.4753, "step": 3545 }, { "epoch": 0.1644712430426716, "grad_norm": 23.510082244873047, "learning_rate": 9.411339732625846e-06, "loss": 0.3222, "step": 3546 }, { "epoch": 0.16451762523191094, "grad_norm": 5.428773880004883, "learning_rate": 9.410993241557696e-06, "loss": 0.3479, "step": 3547 }, { "epoch": 0.16456400742115027, "grad_norm": 16.37918472290039, "learning_rate": 9.410646654927705e-06, "loss": 0.5808, "step": 3548 }, { "epoch": 0.16461038961038962, "grad_norm": 9.775601387023926, "learning_rate": 9.410299972743381e-06, "loss": 0.3281, "step": 3549 }, { "epoch": 0.16465677179962895, "grad_norm": 3.7462241649627686, "learning_rate": 9.409953195012234e-06, "loss": 0.3594, "step": 3550 }, { "epoch": 0.16470315398886828, "grad_norm": 5.147068500518799, "learning_rate": 9.409606321741776e-06, "loss": 0.2869, "step": 3551 }, { "epoch": 0.16474953617810761, "grad_norm": 7.479738235473633, "learning_rate": 9.409259352939524e-06, "loss": 0.3646, "step": 3552 }, { "epoch": 0.16479591836734694, "grad_norm": 6.1219482421875, "learning_rate": 9.408912288612992e-06, "loss": 0.2422, "step": 3553 }, { "epoch": 0.16484230055658627, "grad_norm": 5.924253463745117, "learning_rate": 9.408565128769702e-06, "loss": 0.3984, "step": 3554 }, { "epoch": 0.1648886827458256, "grad_norm": 5.950102806091309, "learning_rate": 9.408217873417175e-06, "loss": 0.3044, "step": 3555 }, { "epoch": 0.16493506493506493, "grad_norm": 6.862806797027588, "learning_rate": 9.407870522562931e-06, "loss": 0.3936, "step": 3556 }, { "epoch": 0.16498144712430426, "grad_norm": 6.464425563812256, "learning_rate": 9.407523076214497e-06, "loss": 0.3752, "step": 3557 }, { "epoch": 0.1650278293135436, "grad_norm": 10.418134689331055, "learning_rate": 9.407175534379402e-06, "loss": 0.3876, "step": 3558 }, { "epoch": 0.16507421150278292, "grad_norm": 9.94890022277832, "learning_rate": 9.406827897065174e-06, "loss": 0.4531, "step": 3559 }, { "epoch": 0.16512059369202226, "grad_norm": 7.593417644500732, "learning_rate": 9.40648016427934e-06, "loss": 0.3647, "step": 3560 }, { "epoch": 0.16516697588126159, "grad_norm": 11.52369213104248, "learning_rate": 9.406132336029441e-06, "loss": 0.4278, "step": 3561 }, { "epoch": 0.16521335807050092, "grad_norm": 7.124959945678711, "learning_rate": 9.40578441232301e-06, "loss": 0.343, "step": 3562 }, { "epoch": 0.16525974025974027, "grad_norm": 6.448947429656982, "learning_rate": 9.40543639316758e-06, "loss": 0.3482, "step": 3563 }, { "epoch": 0.1653061224489796, "grad_norm": 10.404911994934082, "learning_rate": 9.405088278570696e-06, "loss": 0.4218, "step": 3564 }, { "epoch": 0.16535250463821893, "grad_norm": 8.833681106567383, "learning_rate": 9.404740068539896e-06, "loss": 0.3831, "step": 3565 }, { "epoch": 0.16539888682745826, "grad_norm": 7.210230827331543, "learning_rate": 9.404391763082729e-06, "loss": 0.3596, "step": 3566 }, { "epoch": 0.1654452690166976, "grad_norm": 7.555116653442383, "learning_rate": 9.404043362206736e-06, "loss": 0.2812, "step": 3567 }, { "epoch": 0.16549165120593692, "grad_norm": 6.7588090896606445, "learning_rate": 9.403694865919466e-06, "loss": 0.3576, "step": 3568 }, { "epoch": 0.16553803339517625, "grad_norm": 17.06918716430664, "learning_rate": 9.40334627422847e-06, "loss": 0.5531, "step": 3569 }, { "epoch": 0.16558441558441558, "grad_norm": 9.426286697387695, "learning_rate": 9.402997587141299e-06, "loss": 0.3675, "step": 3570 }, { "epoch": 0.16563079777365491, "grad_norm": 8.960399627685547, "learning_rate": 9.402648804665506e-06, "loss": 0.3035, "step": 3571 }, { "epoch": 0.16567717996289424, "grad_norm": 10.944805145263672, "learning_rate": 9.402299926808652e-06, "loss": 0.3985, "step": 3572 }, { "epoch": 0.16572356215213357, "grad_norm": 4.38409948348999, "learning_rate": 9.40195095357829e-06, "loss": 0.3046, "step": 3573 }, { "epoch": 0.1657699443413729, "grad_norm": 11.06094741821289, "learning_rate": 9.401601884981983e-06, "loss": 0.5106, "step": 3574 }, { "epoch": 0.16581632653061223, "grad_norm": 4.26694393157959, "learning_rate": 9.401252721027292e-06, "loss": 0.337, "step": 3575 }, { "epoch": 0.16586270871985156, "grad_norm": 10.609789848327637, "learning_rate": 9.400903461721783e-06, "loss": 0.3841, "step": 3576 }, { "epoch": 0.16590909090909092, "grad_norm": 5.687535285949707, "learning_rate": 9.400554107073022e-06, "loss": 0.4447, "step": 3577 }, { "epoch": 0.16595547309833025, "grad_norm": 4.690797328948975, "learning_rate": 9.400204657088576e-06, "loss": 0.3373, "step": 3578 }, { "epoch": 0.16600185528756958, "grad_norm": 12.486828804016113, "learning_rate": 9.39985511177602e-06, "loss": 0.5415, "step": 3579 }, { "epoch": 0.1660482374768089, "grad_norm": 11.583039283752441, "learning_rate": 9.39950547114292e-06, "loss": 0.5135, "step": 3580 }, { "epoch": 0.16609461966604824, "grad_norm": 7.186366558074951, "learning_rate": 9.399155735196856e-06, "loss": 0.3478, "step": 3581 }, { "epoch": 0.16614100185528757, "grad_norm": 9.381363868713379, "learning_rate": 9.398805903945405e-06, "loss": 0.3888, "step": 3582 }, { "epoch": 0.1661873840445269, "grad_norm": 7.892239570617676, "learning_rate": 9.398455977396141e-06, "loss": 0.3331, "step": 3583 }, { "epoch": 0.16623376623376623, "grad_norm": 11.723597526550293, "learning_rate": 9.398105955556651e-06, "loss": 0.5151, "step": 3584 }, { "epoch": 0.16628014842300556, "grad_norm": 10.78656005859375, "learning_rate": 9.397755838434513e-06, "loss": 0.3869, "step": 3585 }, { "epoch": 0.1663265306122449, "grad_norm": 7.326401710510254, "learning_rate": 9.397405626037316e-06, "loss": 0.3293, "step": 3586 }, { "epoch": 0.16637291280148422, "grad_norm": 4.70090389251709, "learning_rate": 9.397055318372645e-06, "loss": 0.3625, "step": 3587 }, { "epoch": 0.16641929499072355, "grad_norm": 9.604775428771973, "learning_rate": 9.39670491544809e-06, "loss": 0.4566, "step": 3588 }, { "epoch": 0.16646567717996288, "grad_norm": 11.052757263183594, "learning_rate": 9.39635441727124e-06, "loss": 0.4539, "step": 3589 }, { "epoch": 0.16651205936920221, "grad_norm": 7.250787258148193, "learning_rate": 9.396003823849694e-06, "loss": 0.3568, "step": 3590 }, { "epoch": 0.16655844155844157, "grad_norm": 5.180657863616943, "learning_rate": 9.395653135191041e-06, "loss": 0.3767, "step": 3591 }, { "epoch": 0.1666048237476809, "grad_norm": 9.126535415649414, "learning_rate": 9.395302351302881e-06, "loss": 0.3085, "step": 3592 }, { "epoch": 0.16665120593692023, "grad_norm": 5.75246524810791, "learning_rate": 9.394951472192816e-06, "loss": 0.4795, "step": 3593 }, { "epoch": 0.16669758812615956, "grad_norm": 11.097784042358398, "learning_rate": 9.394600497868444e-06, "loss": 0.3782, "step": 3594 }, { "epoch": 0.1667439703153989, "grad_norm": 5.182065010070801, "learning_rate": 9.394249428337371e-06, "loss": 0.3566, "step": 3595 }, { "epoch": 0.16679035250463822, "grad_norm": 11.874783515930176, "learning_rate": 9.393898263607202e-06, "loss": 0.4443, "step": 3596 }, { "epoch": 0.16683673469387755, "grad_norm": 5.511650085449219, "learning_rate": 9.393547003685543e-06, "loss": 0.3492, "step": 3597 }, { "epoch": 0.16688311688311688, "grad_norm": 9.23291301727295, "learning_rate": 9.393195648580007e-06, "loss": 0.357, "step": 3598 }, { "epoch": 0.1669294990723562, "grad_norm": 10.436295509338379, "learning_rate": 9.392844198298204e-06, "loss": 0.4252, "step": 3599 }, { "epoch": 0.16697588126159554, "grad_norm": 16.556941986083984, "learning_rate": 9.392492652847749e-06, "loss": 0.4963, "step": 3600 }, { "epoch": 0.16702226345083487, "grad_norm": 6.324976444244385, "learning_rate": 9.392141012236256e-06, "loss": 0.3557, "step": 3601 }, { "epoch": 0.1670686456400742, "grad_norm": 14.991517066955566, "learning_rate": 9.391789276471346e-06, "loss": 0.5003, "step": 3602 }, { "epoch": 0.16711502782931353, "grad_norm": 9.082845687866211, "learning_rate": 9.391437445560637e-06, "loss": 0.434, "step": 3603 }, { "epoch": 0.16716141001855286, "grad_norm": 9.466453552246094, "learning_rate": 9.391085519511752e-06, "loss": 0.333, "step": 3604 }, { "epoch": 0.1672077922077922, "grad_norm": 13.2134370803833, "learning_rate": 9.390733498332315e-06, "loss": 0.4583, "step": 3605 }, { "epoch": 0.16725417439703155, "grad_norm": 6.031942367553711, "learning_rate": 9.390381382029954e-06, "loss": 0.323, "step": 3606 }, { "epoch": 0.16730055658627088, "grad_norm": 7.690602779388428, "learning_rate": 9.390029170612296e-06, "loss": 0.3519, "step": 3607 }, { "epoch": 0.1673469387755102, "grad_norm": 11.920902252197266, "learning_rate": 9.38967686408697e-06, "loss": 0.4307, "step": 3608 }, { "epoch": 0.16739332096474954, "grad_norm": 7.779712677001953, "learning_rate": 9.389324462461611e-06, "loss": 0.4123, "step": 3609 }, { "epoch": 0.16743970315398887, "grad_norm": 7.911290168762207, "learning_rate": 9.388971965743851e-06, "loss": 0.4111, "step": 3610 }, { "epoch": 0.1674860853432282, "grad_norm": 17.722883224487305, "learning_rate": 9.388619373941331e-06, "loss": 0.5708, "step": 3611 }, { "epoch": 0.16753246753246753, "grad_norm": 5.587979316711426, "learning_rate": 9.388266687061686e-06, "loss": 0.4058, "step": 3612 }, { "epoch": 0.16757884972170686, "grad_norm": 6.074456214904785, "learning_rate": 9.387913905112557e-06, "loss": 0.2686, "step": 3613 }, { "epoch": 0.1676252319109462, "grad_norm": 4.6835150718688965, "learning_rate": 9.387561028101588e-06, "loss": 0.4352, "step": 3614 }, { "epoch": 0.16767161410018552, "grad_norm": 10.552300453186035, "learning_rate": 9.387208056036424e-06, "loss": 0.4212, "step": 3615 }, { "epoch": 0.16771799628942485, "grad_norm": 5.515955924987793, "learning_rate": 9.386854988924712e-06, "loss": 0.4594, "step": 3616 }, { "epoch": 0.16776437847866418, "grad_norm": 6.800716400146484, "learning_rate": 9.3865018267741e-06, "loss": 0.3048, "step": 3617 }, { "epoch": 0.1678107606679035, "grad_norm": 9.90027141571045, "learning_rate": 9.386148569592238e-06, "loss": 0.4962, "step": 3618 }, { "epoch": 0.16785714285714284, "grad_norm": 7.418543338775635, "learning_rate": 9.385795217386781e-06, "loss": 0.301, "step": 3619 }, { "epoch": 0.1679035250463822, "grad_norm": 8.396547317504883, "learning_rate": 9.385441770165385e-06, "loss": 0.3841, "step": 3620 }, { "epoch": 0.16794990723562153, "grad_norm": 9.279963493347168, "learning_rate": 9.385088227935705e-06, "loss": 0.3893, "step": 3621 }, { "epoch": 0.16799628942486086, "grad_norm": 5.815038204193115, "learning_rate": 9.384734590705404e-06, "loss": 0.3579, "step": 3622 }, { "epoch": 0.1680426716141002, "grad_norm": 17.805871963500977, "learning_rate": 9.384380858482139e-06, "loss": 0.4647, "step": 3623 }, { "epoch": 0.16808905380333952, "grad_norm": 5.936177730560303, "learning_rate": 9.384027031273575e-06, "loss": 0.2374, "step": 3624 }, { "epoch": 0.16813543599257885, "grad_norm": 5.3958210945129395, "learning_rate": 9.383673109087378e-06, "loss": 0.3209, "step": 3625 }, { "epoch": 0.16818181818181818, "grad_norm": 7.759224891662598, "learning_rate": 9.383319091931216e-06, "loss": 0.3614, "step": 3626 }, { "epoch": 0.1682282003710575, "grad_norm": 7.922688007354736, "learning_rate": 9.382964979812756e-06, "loss": 0.3507, "step": 3627 }, { "epoch": 0.16827458256029684, "grad_norm": 6.890294551849365, "learning_rate": 9.382610772739674e-06, "loss": 0.3173, "step": 3628 }, { "epoch": 0.16832096474953617, "grad_norm": 4.669735431671143, "learning_rate": 9.382256470719639e-06, "loss": 0.309, "step": 3629 }, { "epoch": 0.1683673469387755, "grad_norm": 8.551046371459961, "learning_rate": 9.38190207376033e-06, "loss": 0.3194, "step": 3630 }, { "epoch": 0.16841372912801483, "grad_norm": 5.308271408081055, "learning_rate": 9.381547581869424e-06, "loss": 0.3948, "step": 3631 }, { "epoch": 0.16846011131725416, "grad_norm": 7.22235631942749, "learning_rate": 9.3811929950546e-06, "loss": 0.3236, "step": 3632 }, { "epoch": 0.1685064935064935, "grad_norm": 5.86630916595459, "learning_rate": 9.38083831332354e-06, "loss": 0.3772, "step": 3633 }, { "epoch": 0.16855287569573285, "grad_norm": 5.679804801940918, "learning_rate": 9.38048353668393e-06, "loss": 0.2813, "step": 3634 }, { "epoch": 0.16859925788497218, "grad_norm": 6.604586601257324, "learning_rate": 9.380128665143454e-06, "loss": 0.3122, "step": 3635 }, { "epoch": 0.1686456400742115, "grad_norm": 6.491007328033447, "learning_rate": 9.379773698709801e-06, "loss": 0.3977, "step": 3636 }, { "epoch": 0.16869202226345084, "grad_norm": 5.823358535766602, "learning_rate": 9.379418637390661e-06, "loss": 0.2581, "step": 3637 }, { "epoch": 0.16873840445269017, "grad_norm": 14.276763916015625, "learning_rate": 9.379063481193725e-06, "loss": 0.3918, "step": 3638 }, { "epoch": 0.1687847866419295, "grad_norm": 10.545175552368164, "learning_rate": 9.378708230126689e-06, "loss": 0.4591, "step": 3639 }, { "epoch": 0.16883116883116883, "grad_norm": 17.458057403564453, "learning_rate": 9.37835288419725e-06, "loss": 0.3325, "step": 3640 }, { "epoch": 0.16887755102040816, "grad_norm": 7.802474498748779, "learning_rate": 9.377997443413104e-06, "loss": 0.4349, "step": 3641 }, { "epoch": 0.1689239332096475, "grad_norm": 8.920366287231445, "learning_rate": 9.377641907781952e-06, "loss": 0.4597, "step": 3642 }, { "epoch": 0.16897031539888682, "grad_norm": 9.32718276977539, "learning_rate": 9.377286277311496e-06, "loss": 0.3588, "step": 3643 }, { "epoch": 0.16901669758812615, "grad_norm": 8.786078453063965, "learning_rate": 9.376930552009444e-06, "loss": 0.4955, "step": 3644 }, { "epoch": 0.16906307977736548, "grad_norm": 8.719551086425781, "learning_rate": 9.3765747318835e-06, "loss": 0.3184, "step": 3645 }, { "epoch": 0.1691094619666048, "grad_norm": 8.405311584472656, "learning_rate": 9.376218816941373e-06, "loss": 0.4396, "step": 3646 }, { "epoch": 0.16915584415584414, "grad_norm": 10.097498893737793, "learning_rate": 9.375862807190773e-06, "loss": 0.3799, "step": 3647 }, { "epoch": 0.1692022263450835, "grad_norm": 6.154495716094971, "learning_rate": 9.375506702639412e-06, "loss": 0.3872, "step": 3648 }, { "epoch": 0.16924860853432283, "grad_norm": 12.653352737426758, "learning_rate": 9.375150503295008e-06, "loss": 0.273, "step": 3649 }, { "epoch": 0.16929499072356216, "grad_norm": 11.88670825958252, "learning_rate": 9.374794209165275e-06, "loss": 0.4252, "step": 3650 }, { "epoch": 0.1693413729128015, "grad_norm": 5.93619441986084, "learning_rate": 9.374437820257933e-06, "loss": 0.3477, "step": 3651 }, { "epoch": 0.16938775510204082, "grad_norm": 10.312410354614258, "learning_rate": 9.374081336580702e-06, "loss": 0.4088, "step": 3652 }, { "epoch": 0.16943413729128015, "grad_norm": 13.992110252380371, "learning_rate": 9.373724758141309e-06, "loss": 0.4902, "step": 3653 }, { "epoch": 0.16948051948051948, "grad_norm": 8.729028701782227, "learning_rate": 9.373368084947472e-06, "loss": 0.4624, "step": 3654 }, { "epoch": 0.1695269016697588, "grad_norm": 6.373997688293457, "learning_rate": 9.373011317006926e-06, "loss": 0.4462, "step": 3655 }, { "epoch": 0.16957328385899814, "grad_norm": 23.745859146118164, "learning_rate": 9.372654454327394e-06, "loss": 0.3639, "step": 3656 }, { "epoch": 0.16961966604823747, "grad_norm": 5.203268527984619, "learning_rate": 9.372297496916609e-06, "loss": 0.2539, "step": 3657 }, { "epoch": 0.1696660482374768, "grad_norm": 8.017507553100586, "learning_rate": 9.371940444782307e-06, "loss": 0.2877, "step": 3658 }, { "epoch": 0.16971243042671613, "grad_norm": 11.894733428955078, "learning_rate": 9.37158329793222e-06, "loss": 0.4053, "step": 3659 }, { "epoch": 0.16975881261595546, "grad_norm": 7.583353519439697, "learning_rate": 9.371226056374087e-06, "loss": 0.3916, "step": 3660 }, { "epoch": 0.1698051948051948, "grad_norm": 3.819403886795044, "learning_rate": 9.370868720115647e-06, "loss": 0.2924, "step": 3661 }, { "epoch": 0.16985157699443415, "grad_norm": 4.029099941253662, "learning_rate": 9.37051128916464e-06, "loss": 0.3243, "step": 3662 }, { "epoch": 0.16989795918367348, "grad_norm": 8.672162055969238, "learning_rate": 9.370153763528813e-06, "loss": 0.4359, "step": 3663 }, { "epoch": 0.1699443413729128, "grad_norm": 7.0270915031433105, "learning_rate": 9.369796143215911e-06, "loss": 0.4132, "step": 3664 }, { "epoch": 0.16999072356215214, "grad_norm": 12.78452205657959, "learning_rate": 9.369438428233677e-06, "loss": 0.477, "step": 3665 }, { "epoch": 0.17003710575139147, "grad_norm": 6.017186164855957, "learning_rate": 9.369080618589866e-06, "loss": 0.3622, "step": 3666 }, { "epoch": 0.1700834879406308, "grad_norm": 5.8748860359191895, "learning_rate": 9.368722714292228e-06, "loss": 0.3937, "step": 3667 }, { "epoch": 0.17012987012987013, "grad_norm": 4.205530166625977, "learning_rate": 9.368364715348514e-06, "loss": 0.3474, "step": 3668 }, { "epoch": 0.17017625231910946, "grad_norm": 8.845744132995605, "learning_rate": 9.368006621766485e-06, "loss": 0.4388, "step": 3669 }, { "epoch": 0.1702226345083488, "grad_norm": 10.221552848815918, "learning_rate": 9.367648433553898e-06, "loss": 0.3129, "step": 3670 }, { "epoch": 0.17026901669758812, "grad_norm": 12.570569038391113, "learning_rate": 9.367290150718509e-06, "loss": 0.4052, "step": 3671 }, { "epoch": 0.17031539888682745, "grad_norm": 10.556955337524414, "learning_rate": 9.366931773268083e-06, "loss": 0.319, "step": 3672 }, { "epoch": 0.17036178107606678, "grad_norm": 11.956494331359863, "learning_rate": 9.366573301210383e-06, "loss": 0.454, "step": 3673 }, { "epoch": 0.1704081632653061, "grad_norm": 6.116375923156738, "learning_rate": 9.366214734553176e-06, "loss": 0.4087, "step": 3674 }, { "epoch": 0.17045454545454544, "grad_norm": 10.557247161865234, "learning_rate": 9.365856073304231e-06, "loss": 0.2725, "step": 3675 }, { "epoch": 0.1705009276437848, "grad_norm": 13.91407585144043, "learning_rate": 9.365497317471315e-06, "loss": 0.4965, "step": 3676 }, { "epoch": 0.17054730983302413, "grad_norm": 8.225388526916504, "learning_rate": 9.365138467062202e-06, "loss": 0.3581, "step": 3677 }, { "epoch": 0.17059369202226346, "grad_norm": 5.7891974449157715, "learning_rate": 9.364779522084667e-06, "loss": 0.3275, "step": 3678 }, { "epoch": 0.1706400742115028, "grad_norm": 4.976036548614502, "learning_rate": 9.364420482546487e-06, "loss": 0.4202, "step": 3679 }, { "epoch": 0.17068645640074212, "grad_norm": 24.862213134765625, "learning_rate": 9.364061348455438e-06, "loss": 0.5246, "step": 3680 }, { "epoch": 0.17073283858998145, "grad_norm": 12.921120643615723, "learning_rate": 9.363702119819302e-06, "loss": 0.4747, "step": 3681 }, { "epoch": 0.17077922077922078, "grad_norm": 7.688897609710693, "learning_rate": 9.363342796645861e-06, "loss": 0.2549, "step": 3682 }, { "epoch": 0.1708256029684601, "grad_norm": 10.325559616088867, "learning_rate": 9.3629833789429e-06, "loss": 0.3268, "step": 3683 }, { "epoch": 0.17087198515769944, "grad_norm": 9.96579647064209, "learning_rate": 9.362623866718205e-06, "loss": 0.492, "step": 3684 }, { "epoch": 0.17091836734693877, "grad_norm": 4.900984287261963, "learning_rate": 9.362264259979565e-06, "loss": 0.3791, "step": 3685 }, { "epoch": 0.1709647495361781, "grad_norm": 5.009983539581299, "learning_rate": 9.36190455873477e-06, "loss": 0.3, "step": 3686 }, { "epoch": 0.17101113172541743, "grad_norm": 8.363057136535645, "learning_rate": 9.361544762991616e-06, "loss": 0.5169, "step": 3687 }, { "epoch": 0.17105751391465676, "grad_norm": 7.788073539733887, "learning_rate": 9.361184872757894e-06, "loss": 0.442, "step": 3688 }, { "epoch": 0.1711038961038961, "grad_norm": 6.1099772453308105, "learning_rate": 9.3608248880414e-06, "loss": 0.3789, "step": 3689 }, { "epoch": 0.17115027829313545, "grad_norm": 10.227490425109863, "learning_rate": 9.360464808849936e-06, "loss": 0.4733, "step": 3690 }, { "epoch": 0.17119666048237478, "grad_norm": 7.9303202629089355, "learning_rate": 9.360104635191303e-06, "loss": 0.324, "step": 3691 }, { "epoch": 0.1712430426716141, "grad_norm": 8.218338012695312, "learning_rate": 9.359744367073303e-06, "loss": 0.3636, "step": 3692 }, { "epoch": 0.17128942486085344, "grad_norm": 7.80859899520874, "learning_rate": 9.359384004503739e-06, "loss": 0.3833, "step": 3693 }, { "epoch": 0.17133580705009277, "grad_norm": 6.166488170623779, "learning_rate": 9.35902354749042e-06, "loss": 0.403, "step": 3694 }, { "epoch": 0.1713821892393321, "grad_norm": 10.973861694335938, "learning_rate": 9.358662996041157e-06, "loss": 0.4138, "step": 3695 }, { "epoch": 0.17142857142857143, "grad_norm": 8.741853713989258, "learning_rate": 9.358302350163758e-06, "loss": 0.5291, "step": 3696 }, { "epoch": 0.17147495361781076, "grad_norm": 8.180475234985352, "learning_rate": 9.357941609866037e-06, "loss": 0.3851, "step": 3697 }, { "epoch": 0.1715213358070501, "grad_norm": 5.319466590881348, "learning_rate": 9.357580775155809e-06, "loss": 0.3255, "step": 3698 }, { "epoch": 0.17156771799628942, "grad_norm": 6.554000377655029, "learning_rate": 9.357219846040893e-06, "loss": 0.4076, "step": 3699 }, { "epoch": 0.17161410018552875, "grad_norm": 5.210821151733398, "learning_rate": 9.356858822529108e-06, "loss": 0.3824, "step": 3700 }, { "epoch": 0.17166048237476808, "grad_norm": 6.373410224914551, "learning_rate": 9.356497704628272e-06, "loss": 0.3699, "step": 3701 }, { "epoch": 0.1717068645640074, "grad_norm": 11.97033977508545, "learning_rate": 9.356136492346214e-06, "loss": 0.4219, "step": 3702 }, { "epoch": 0.17175324675324674, "grad_norm": 13.508135795593262, "learning_rate": 9.355775185690755e-06, "loss": 0.3265, "step": 3703 }, { "epoch": 0.1717996289424861, "grad_norm": 5.809004306793213, "learning_rate": 9.355413784669722e-06, "loss": 0.2962, "step": 3704 }, { "epoch": 0.17184601113172543, "grad_norm": 12.625161170959473, "learning_rate": 9.355052289290949e-06, "loss": 0.2667, "step": 3705 }, { "epoch": 0.17189239332096476, "grad_norm": 5.49626350402832, "learning_rate": 9.354690699562266e-06, "loss": 0.3755, "step": 3706 }, { "epoch": 0.1719387755102041, "grad_norm": 8.381120681762695, "learning_rate": 9.354329015491503e-06, "loss": 0.385, "step": 3707 }, { "epoch": 0.17198515769944342, "grad_norm": 3.982849359512329, "learning_rate": 9.353967237086501e-06, "loss": 0.3145, "step": 3708 }, { "epoch": 0.17203153988868275, "grad_norm": 6.314438819885254, "learning_rate": 9.353605364355093e-06, "loss": 0.2997, "step": 3709 }, { "epoch": 0.17207792207792208, "grad_norm": 7.140683174133301, "learning_rate": 9.353243397305124e-06, "loss": 0.3787, "step": 3710 }, { "epoch": 0.1721243042671614, "grad_norm": 5.928689479827881, "learning_rate": 9.352881335944432e-06, "loss": 0.326, "step": 3711 }, { "epoch": 0.17217068645640074, "grad_norm": 7.853849411010742, "learning_rate": 9.352519180280862e-06, "loss": 0.3949, "step": 3712 }, { "epoch": 0.17221706864564007, "grad_norm": 14.927027702331543, "learning_rate": 9.352156930322257e-06, "loss": 0.567, "step": 3713 }, { "epoch": 0.1722634508348794, "grad_norm": 6.595493316650391, "learning_rate": 9.35179458607647e-06, "loss": 0.4121, "step": 3714 }, { "epoch": 0.17230983302411873, "grad_norm": 17.3238525390625, "learning_rate": 9.35143214755135e-06, "loss": 0.4713, "step": 3715 }, { "epoch": 0.17235621521335806, "grad_norm": 6.759720802307129, "learning_rate": 9.351069614754744e-06, "loss": 0.3439, "step": 3716 }, { "epoch": 0.1724025974025974, "grad_norm": 10.281311988830566, "learning_rate": 9.350706987694513e-06, "loss": 0.3645, "step": 3717 }, { "epoch": 0.17244897959183675, "grad_norm": 7.30851411819458, "learning_rate": 9.350344266378509e-06, "loss": 0.4251, "step": 3718 }, { "epoch": 0.17249536178107608, "grad_norm": 5.52047872543335, "learning_rate": 9.349981450814589e-06, "loss": 0.3979, "step": 3719 }, { "epoch": 0.1725417439703154, "grad_norm": 4.871979713439941, "learning_rate": 9.349618541010616e-06, "loss": 0.3712, "step": 3720 }, { "epoch": 0.17258812615955474, "grad_norm": 11.085448265075684, "learning_rate": 9.349255536974454e-06, "loss": 0.296, "step": 3721 }, { "epoch": 0.17263450834879407, "grad_norm": 11.152515411376953, "learning_rate": 9.348892438713962e-06, "loss": 0.3228, "step": 3722 }, { "epoch": 0.1726808905380334, "grad_norm": 5.239099502563477, "learning_rate": 9.34852924623701e-06, "loss": 0.2949, "step": 3723 }, { "epoch": 0.17272727272727273, "grad_norm": 5.3899054527282715, "learning_rate": 9.348165959551466e-06, "loss": 0.3243, "step": 3724 }, { "epoch": 0.17277365491651206, "grad_norm": 13.816150665283203, "learning_rate": 9.347802578665199e-06, "loss": 0.6161, "step": 3725 }, { "epoch": 0.1728200371057514, "grad_norm": 5.6719465255737305, "learning_rate": 9.347439103586085e-06, "loss": 0.3541, "step": 3726 }, { "epoch": 0.17286641929499072, "grad_norm": 19.735658645629883, "learning_rate": 9.347075534321992e-06, "loss": 0.4259, "step": 3727 }, { "epoch": 0.17291280148423005, "grad_norm": 10.47272777557373, "learning_rate": 9.346711870880802e-06, "loss": 0.4807, "step": 3728 }, { "epoch": 0.17295918367346938, "grad_norm": 8.442590713500977, "learning_rate": 9.346348113270393e-06, "loss": 0.3534, "step": 3729 }, { "epoch": 0.1730055658627087, "grad_norm": 9.53626823425293, "learning_rate": 9.345984261498644e-06, "loss": 0.3994, "step": 3730 }, { "epoch": 0.17305194805194804, "grad_norm": 7.949401378631592, "learning_rate": 9.345620315573438e-06, "loss": 0.362, "step": 3731 }, { "epoch": 0.17309833024118737, "grad_norm": 5.166259288787842, "learning_rate": 9.345256275502658e-06, "loss": 0.3854, "step": 3732 }, { "epoch": 0.17314471243042673, "grad_norm": 13.40192985534668, "learning_rate": 9.344892141294197e-06, "loss": 0.3961, "step": 3733 }, { "epoch": 0.17319109461966606, "grad_norm": 6.240100860595703, "learning_rate": 9.344527912955936e-06, "loss": 0.3483, "step": 3734 }, { "epoch": 0.1732374768089054, "grad_norm": 20.547584533691406, "learning_rate": 9.34416359049577e-06, "loss": 0.5087, "step": 3735 }, { "epoch": 0.17328385899814472, "grad_norm": 7.334588527679443, "learning_rate": 9.343799173921591e-06, "loss": 0.4492, "step": 3736 }, { "epoch": 0.17333024118738405, "grad_norm": 6.33034610748291, "learning_rate": 9.343434663241295e-06, "loss": 0.345, "step": 3737 }, { "epoch": 0.17337662337662338, "grad_norm": 11.772659301757812, "learning_rate": 9.343070058462778e-06, "loss": 0.4163, "step": 3738 }, { "epoch": 0.1734230055658627, "grad_norm": 7.654214859008789, "learning_rate": 9.342705359593938e-06, "loss": 0.3326, "step": 3739 }, { "epoch": 0.17346938775510204, "grad_norm": 11.204254150390625, "learning_rate": 9.342340566642676e-06, "loss": 0.365, "step": 3740 }, { "epoch": 0.17351576994434137, "grad_norm": 11.682965278625488, "learning_rate": 9.341975679616898e-06, "loss": 0.3158, "step": 3741 }, { "epoch": 0.1735621521335807, "grad_norm": 8.501358032226562, "learning_rate": 9.341610698524507e-06, "loss": 0.3403, "step": 3742 }, { "epoch": 0.17360853432282003, "grad_norm": 6.676298141479492, "learning_rate": 9.341245623373409e-06, "loss": 0.2572, "step": 3743 }, { "epoch": 0.17365491651205936, "grad_norm": 12.001404762268066, "learning_rate": 9.340880454171515e-06, "loss": 0.569, "step": 3744 }, { "epoch": 0.1737012987012987, "grad_norm": 4.839502334594727, "learning_rate": 9.340515190926736e-06, "loss": 0.333, "step": 3745 }, { "epoch": 0.17374768089053802, "grad_norm": 11.519521713256836, "learning_rate": 9.340149833646984e-06, "loss": 0.511, "step": 3746 }, { "epoch": 0.17379406307977738, "grad_norm": 8.467246055603027, "learning_rate": 9.339784382340174e-06, "loss": 0.3709, "step": 3747 }, { "epoch": 0.1738404452690167, "grad_norm": 7.257287502288818, "learning_rate": 9.339418837014226e-06, "loss": 0.2775, "step": 3748 }, { "epoch": 0.17388682745825604, "grad_norm": 9.578885078430176, "learning_rate": 9.339053197677058e-06, "loss": 0.4522, "step": 3749 }, { "epoch": 0.17393320964749537, "grad_norm": 10.774893760681152, "learning_rate": 9.33868746433659e-06, "loss": 0.4951, "step": 3750 }, { "epoch": 0.1739795918367347, "grad_norm": 10.062929153442383, "learning_rate": 9.338321637000746e-06, "loss": 0.4036, "step": 3751 }, { "epoch": 0.17402597402597403, "grad_norm": 5.534184455871582, "learning_rate": 9.337955715677452e-06, "loss": 0.3696, "step": 3752 }, { "epoch": 0.17407235621521336, "grad_norm": 11.859132766723633, "learning_rate": 9.337589700374636e-06, "loss": 0.5712, "step": 3753 }, { "epoch": 0.1741187384044527, "grad_norm": 7.217539310455322, "learning_rate": 9.337223591100228e-06, "loss": 0.411, "step": 3754 }, { "epoch": 0.17416512059369202, "grad_norm": 9.590609550476074, "learning_rate": 9.336857387862158e-06, "loss": 0.4313, "step": 3755 }, { "epoch": 0.17421150278293135, "grad_norm": 12.13876724243164, "learning_rate": 9.33649109066836e-06, "loss": 0.5511, "step": 3756 }, { "epoch": 0.17425788497217068, "grad_norm": 5.863702297210693, "learning_rate": 9.33612469952677e-06, "loss": 0.3656, "step": 3757 }, { "epoch": 0.17430426716141, "grad_norm": 7.062560081481934, "learning_rate": 9.335758214445323e-06, "loss": 0.4535, "step": 3758 }, { "epoch": 0.17435064935064934, "grad_norm": 4.782497406005859, "learning_rate": 9.335391635431965e-06, "loss": 0.3857, "step": 3759 }, { "epoch": 0.17439703153988867, "grad_norm": 14.06700325012207, "learning_rate": 9.335024962494633e-06, "loss": 0.4886, "step": 3760 }, { "epoch": 0.17444341372912803, "grad_norm": 7.00813627243042, "learning_rate": 9.334658195641269e-06, "loss": 0.3558, "step": 3761 }, { "epoch": 0.17448979591836736, "grad_norm": 7.9293670654296875, "learning_rate": 9.334291334879824e-06, "loss": 0.3513, "step": 3762 }, { "epoch": 0.1745361781076067, "grad_norm": 6.613221645355225, "learning_rate": 9.333924380218243e-06, "loss": 0.5107, "step": 3763 }, { "epoch": 0.17458256029684602, "grad_norm": 11.059221267700195, "learning_rate": 9.333557331664476e-06, "loss": 0.4298, "step": 3764 }, { "epoch": 0.17462894248608535, "grad_norm": 12.143928527832031, "learning_rate": 9.333190189226473e-06, "loss": 0.3774, "step": 3765 }, { "epoch": 0.17467532467532468, "grad_norm": 10.322587013244629, "learning_rate": 9.332822952912193e-06, "loss": 0.4939, "step": 3766 }, { "epoch": 0.174721706864564, "grad_norm": 13.318778038024902, "learning_rate": 9.332455622729588e-06, "loss": 0.5263, "step": 3767 }, { "epoch": 0.17476808905380334, "grad_norm": 6.74970006942749, "learning_rate": 9.332088198686618e-06, "loss": 0.3567, "step": 3768 }, { "epoch": 0.17481447124304267, "grad_norm": 10.925301551818848, "learning_rate": 9.331720680791241e-06, "loss": 0.4738, "step": 3769 }, { "epoch": 0.174860853432282, "grad_norm": 8.016068458557129, "learning_rate": 9.331353069051418e-06, "loss": 0.4758, "step": 3770 }, { "epoch": 0.17490723562152133, "grad_norm": 4.561720848083496, "learning_rate": 9.330985363475119e-06, "loss": 0.383, "step": 3771 }, { "epoch": 0.17495361781076066, "grad_norm": 9.157550811767578, "learning_rate": 9.330617564070305e-06, "loss": 0.4168, "step": 3772 }, { "epoch": 0.175, "grad_norm": 4.884336471557617, "learning_rate": 9.330249670844946e-06, "loss": 0.366, "step": 3773 }, { "epoch": 0.17504638218923932, "grad_norm": 9.55639362335205, "learning_rate": 9.329881683807011e-06, "loss": 0.4515, "step": 3774 }, { "epoch": 0.17509276437847868, "grad_norm": 11.789533615112305, "learning_rate": 9.329513602964474e-06, "loss": 0.4224, "step": 3775 }, { "epoch": 0.175139146567718, "grad_norm": 5.877440452575684, "learning_rate": 9.329145428325308e-06, "loss": 0.3999, "step": 3776 }, { "epoch": 0.17518552875695734, "grad_norm": 5.231929779052734, "learning_rate": 9.328777159897491e-06, "loss": 0.3001, "step": 3777 }, { "epoch": 0.17523191094619667, "grad_norm": 7.449274063110352, "learning_rate": 9.328408797689e-06, "loss": 0.3186, "step": 3778 }, { "epoch": 0.175278293135436, "grad_norm": 5.030575752258301, "learning_rate": 9.328040341707813e-06, "loss": 0.3929, "step": 3779 }, { "epoch": 0.17532467532467533, "grad_norm": 7.906985759735107, "learning_rate": 9.327671791961917e-06, "loss": 0.3706, "step": 3780 }, { "epoch": 0.17537105751391466, "grad_norm": 6.849950313568115, "learning_rate": 9.327303148459293e-06, "loss": 0.4072, "step": 3781 }, { "epoch": 0.175417439703154, "grad_norm": 8.712120056152344, "learning_rate": 9.326934411207928e-06, "loss": 0.3633, "step": 3782 }, { "epoch": 0.17546382189239332, "grad_norm": 9.591653823852539, "learning_rate": 9.326565580215811e-06, "loss": 0.311, "step": 3783 }, { "epoch": 0.17551020408163265, "grad_norm": 6.754834175109863, "learning_rate": 9.326196655490935e-06, "loss": 0.2641, "step": 3784 }, { "epoch": 0.17555658627087198, "grad_norm": 4.835880756378174, "learning_rate": 9.32582763704129e-06, "loss": 0.3027, "step": 3785 }, { "epoch": 0.1756029684601113, "grad_norm": 8.097328186035156, "learning_rate": 9.32545852487487e-06, "loss": 0.3373, "step": 3786 }, { "epoch": 0.17564935064935064, "grad_norm": 14.581475257873535, "learning_rate": 9.325089318999672e-06, "loss": 0.5083, "step": 3787 }, { "epoch": 0.17569573283858997, "grad_norm": 8.229424476623535, "learning_rate": 9.324720019423696e-06, "loss": 0.3806, "step": 3788 }, { "epoch": 0.17574211502782933, "grad_norm": 6.067429065704346, "learning_rate": 9.32435062615494e-06, "loss": 0.3586, "step": 3789 }, { "epoch": 0.17578849721706866, "grad_norm": 10.562784194946289, "learning_rate": 9.323981139201412e-06, "loss": 0.5017, "step": 3790 }, { "epoch": 0.175834879406308, "grad_norm": 17.358692169189453, "learning_rate": 9.32361155857111e-06, "loss": 0.3836, "step": 3791 }, { "epoch": 0.17588126159554732, "grad_norm": 7.957591533660889, "learning_rate": 9.323241884272046e-06, "loss": 0.1812, "step": 3792 }, { "epoch": 0.17592764378478665, "grad_norm": 13.382670402526855, "learning_rate": 9.322872116312227e-06, "loss": 0.4678, "step": 3793 }, { "epoch": 0.17597402597402598, "grad_norm": 7.050805568695068, "learning_rate": 9.322502254699663e-06, "loss": 0.3966, "step": 3794 }, { "epoch": 0.1760204081632653, "grad_norm": 9.922597885131836, "learning_rate": 9.322132299442368e-06, "loss": 0.4698, "step": 3795 }, { "epoch": 0.17606679035250464, "grad_norm": 5.74974250793457, "learning_rate": 9.321762250548355e-06, "loss": 0.2746, "step": 3796 }, { "epoch": 0.17611317254174397, "grad_norm": 7.7230706214904785, "learning_rate": 9.321392108025644e-06, "loss": 0.3965, "step": 3797 }, { "epoch": 0.1761595547309833, "grad_norm": 11.043614387512207, "learning_rate": 9.321021871882251e-06, "loss": 0.4713, "step": 3798 }, { "epoch": 0.17620593692022263, "grad_norm": 10.255559921264648, "learning_rate": 9.320651542126197e-06, "loss": 0.4075, "step": 3799 }, { "epoch": 0.17625231910946196, "grad_norm": 13.116429328918457, "learning_rate": 9.32028111876551e-06, "loss": 0.3328, "step": 3800 }, { "epoch": 0.1762987012987013, "grad_norm": 6.697628498077393, "learning_rate": 9.31991060180821e-06, "loss": 0.3458, "step": 3801 }, { "epoch": 0.17634508348794062, "grad_norm": 8.531011581420898, "learning_rate": 9.319539991262324e-06, "loss": 0.3804, "step": 3802 }, { "epoch": 0.17639146567717998, "grad_norm": 6.866191864013672, "learning_rate": 9.319169287135882e-06, "loss": 0.3643, "step": 3803 }, { "epoch": 0.1764378478664193, "grad_norm": 5.161471366882324, "learning_rate": 9.318798489436917e-06, "loss": 0.3539, "step": 3804 }, { "epoch": 0.17648423005565864, "grad_norm": 7.998917579650879, "learning_rate": 9.31842759817346e-06, "loss": 0.3975, "step": 3805 }, { "epoch": 0.17653061224489797, "grad_norm": 8.259306907653809, "learning_rate": 9.318056613353548e-06, "loss": 0.3235, "step": 3806 }, { "epoch": 0.1765769944341373, "grad_norm": 5.627623558044434, "learning_rate": 9.317685534985216e-06, "loss": 0.3037, "step": 3807 }, { "epoch": 0.17662337662337663, "grad_norm": 10.28633975982666, "learning_rate": 9.317314363076503e-06, "loss": 0.4201, "step": 3808 }, { "epoch": 0.17666975881261596, "grad_norm": 9.51871109008789, "learning_rate": 9.316943097635453e-06, "loss": 0.3235, "step": 3809 }, { "epoch": 0.1767161410018553, "grad_norm": 5.301543712615967, "learning_rate": 9.316571738670109e-06, "loss": 0.3493, "step": 3810 }, { "epoch": 0.17676252319109462, "grad_norm": 13.708061218261719, "learning_rate": 9.316200286188513e-06, "loss": 0.4039, "step": 3811 }, { "epoch": 0.17680890538033395, "grad_norm": 6.177794456481934, "learning_rate": 9.315828740198714e-06, "loss": 0.2907, "step": 3812 }, { "epoch": 0.17685528756957328, "grad_norm": 5.286633491516113, "learning_rate": 9.315457100708763e-06, "loss": 0.3404, "step": 3813 }, { "epoch": 0.1769016697588126, "grad_norm": 8.585195541381836, "learning_rate": 9.31508536772671e-06, "loss": 0.342, "step": 3814 }, { "epoch": 0.17694805194805194, "grad_norm": 6.988979339599609, "learning_rate": 9.314713541260607e-06, "loss": 0.3825, "step": 3815 }, { "epoch": 0.17699443413729127, "grad_norm": 5.208272457122803, "learning_rate": 9.314341621318512e-06, "loss": 0.2896, "step": 3816 }, { "epoch": 0.17704081632653063, "grad_norm": 12.327544212341309, "learning_rate": 9.313969607908481e-06, "loss": 0.5607, "step": 3817 }, { "epoch": 0.17708719851576996, "grad_norm": 31.117082595825195, "learning_rate": 9.313597501038576e-06, "loss": 0.3927, "step": 3818 }, { "epoch": 0.17713358070500929, "grad_norm": 7.433912754058838, "learning_rate": 9.313225300716853e-06, "loss": 0.3691, "step": 3819 }, { "epoch": 0.17717996289424862, "grad_norm": 4.60242223739624, "learning_rate": 9.312853006951381e-06, "loss": 0.2348, "step": 3820 }, { "epoch": 0.17722634508348795, "grad_norm": 7.8543477058410645, "learning_rate": 9.312480619750224e-06, "loss": 0.3052, "step": 3821 }, { "epoch": 0.17727272727272728, "grad_norm": 5.329947471618652, "learning_rate": 9.312108139121447e-06, "loss": 0.2091, "step": 3822 }, { "epoch": 0.1773191094619666, "grad_norm": 10.388788223266602, "learning_rate": 9.311735565073123e-06, "loss": 0.3693, "step": 3823 }, { "epoch": 0.17736549165120594, "grad_norm": 11.877656936645508, "learning_rate": 9.311362897613321e-06, "loss": 0.3659, "step": 3824 }, { "epoch": 0.17741187384044527, "grad_norm": 6.721286296844482, "learning_rate": 9.310990136750116e-06, "loss": 0.4054, "step": 3825 }, { "epoch": 0.1774582560296846, "grad_norm": 7.696870803833008, "learning_rate": 9.310617282491583e-06, "loss": 0.3998, "step": 3826 }, { "epoch": 0.17750463821892393, "grad_norm": 11.02636432647705, "learning_rate": 9.310244334845802e-06, "loss": 0.404, "step": 3827 }, { "epoch": 0.17755102040816326, "grad_norm": 11.72508716583252, "learning_rate": 9.30987129382085e-06, "loss": 0.4114, "step": 3828 }, { "epoch": 0.1775974025974026, "grad_norm": 6.995614051818848, "learning_rate": 9.309498159424808e-06, "loss": 0.3766, "step": 3829 }, { "epoch": 0.17764378478664192, "grad_norm": 10.775339126586914, "learning_rate": 9.309124931665765e-06, "loss": 0.3779, "step": 3830 }, { "epoch": 0.17769016697588128, "grad_norm": 5.221616268157959, "learning_rate": 9.3087516105518e-06, "loss": 0.3419, "step": 3831 }, { "epoch": 0.1777365491651206, "grad_norm": 11.282209396362305, "learning_rate": 9.308378196091006e-06, "loss": 0.5301, "step": 3832 }, { "epoch": 0.17778293135435994, "grad_norm": 6.311471939086914, "learning_rate": 9.30800468829147e-06, "loss": 0.4168, "step": 3833 }, { "epoch": 0.17782931354359927, "grad_norm": 9.830855369567871, "learning_rate": 9.307631087161285e-06, "loss": 0.357, "step": 3834 }, { "epoch": 0.1778756957328386, "grad_norm": 5.50068473815918, "learning_rate": 9.307257392708544e-06, "loss": 0.3805, "step": 3835 }, { "epoch": 0.17792207792207793, "grad_norm": 13.286962509155273, "learning_rate": 9.306883604941343e-06, "loss": 0.4677, "step": 3836 }, { "epoch": 0.17796846011131726, "grad_norm": 12.728489875793457, "learning_rate": 9.30650972386778e-06, "loss": 0.5492, "step": 3837 }, { "epoch": 0.17801484230055659, "grad_norm": 6.720578670501709, "learning_rate": 9.306135749495958e-06, "loss": 0.3631, "step": 3838 }, { "epoch": 0.17806122448979592, "grad_norm": 9.150167465209961, "learning_rate": 9.305761681833974e-06, "loss": 0.4012, "step": 3839 }, { "epoch": 0.17810760667903525, "grad_norm": 12.275242805480957, "learning_rate": 9.305387520889933e-06, "loss": 0.5497, "step": 3840 }, { "epoch": 0.17815398886827458, "grad_norm": 8.781206130981445, "learning_rate": 9.305013266671946e-06, "loss": 0.276, "step": 3841 }, { "epoch": 0.1782003710575139, "grad_norm": 16.216835021972656, "learning_rate": 9.304638919188114e-06, "loss": 0.7083, "step": 3842 }, { "epoch": 0.17824675324675324, "grad_norm": 6.001860618591309, "learning_rate": 9.304264478446552e-06, "loss": 0.3223, "step": 3843 }, { "epoch": 0.17829313543599257, "grad_norm": 8.475574493408203, "learning_rate": 9.303889944455369e-06, "loss": 0.4043, "step": 3844 }, { "epoch": 0.17833951762523192, "grad_norm": 7.4745378494262695, "learning_rate": 9.303515317222681e-06, "loss": 0.3383, "step": 3845 }, { "epoch": 0.17838589981447125, "grad_norm": 5.078770637512207, "learning_rate": 9.303140596756604e-06, "loss": 0.4505, "step": 3846 }, { "epoch": 0.17843228200371059, "grad_norm": 9.591777801513672, "learning_rate": 9.302765783065256e-06, "loss": 0.449, "step": 3847 }, { "epoch": 0.17847866419294992, "grad_norm": 9.068686485290527, "learning_rate": 9.302390876156756e-06, "loss": 0.3723, "step": 3848 }, { "epoch": 0.17852504638218925, "grad_norm": 11.267647743225098, "learning_rate": 9.302015876039226e-06, "loss": 0.3626, "step": 3849 }, { "epoch": 0.17857142857142858, "grad_norm": 8.056449890136719, "learning_rate": 9.301640782720792e-06, "loss": 0.34, "step": 3850 }, { "epoch": 0.1786178107606679, "grad_norm": 10.14171028137207, "learning_rate": 9.30126559620958e-06, "loss": 0.3564, "step": 3851 }, { "epoch": 0.17866419294990724, "grad_norm": 7.916632652282715, "learning_rate": 9.300890316513717e-06, "loss": 0.4214, "step": 3852 }, { "epoch": 0.17871057513914657, "grad_norm": 10.088420867919922, "learning_rate": 9.300514943641333e-06, "loss": 0.5105, "step": 3853 }, { "epoch": 0.1787569573283859, "grad_norm": 8.517714500427246, "learning_rate": 9.300139477600563e-06, "loss": 0.4072, "step": 3854 }, { "epoch": 0.17880333951762523, "grad_norm": 3.910628080368042, "learning_rate": 9.299763918399536e-06, "loss": 0.3086, "step": 3855 }, { "epoch": 0.17884972170686456, "grad_norm": 10.8518648147583, "learning_rate": 9.299388266046394e-06, "loss": 0.3553, "step": 3856 }, { "epoch": 0.1788961038961039, "grad_norm": 8.80921459197998, "learning_rate": 9.299012520549273e-06, "loss": 0.336, "step": 3857 }, { "epoch": 0.17894248608534322, "grad_norm": 5.728402137756348, "learning_rate": 9.298636681916313e-06, "loss": 0.361, "step": 3858 }, { "epoch": 0.17898886827458255, "grad_norm": 9.671545028686523, "learning_rate": 9.298260750155657e-06, "loss": 0.44, "step": 3859 }, { "epoch": 0.1790352504638219, "grad_norm": 15.472256660461426, "learning_rate": 9.297884725275448e-06, "loss": 0.4205, "step": 3860 }, { "epoch": 0.17908163265306123, "grad_norm": 5.931141376495361, "learning_rate": 9.297508607283833e-06, "loss": 0.3934, "step": 3861 }, { "epoch": 0.17912801484230056, "grad_norm": 4.900632858276367, "learning_rate": 9.297132396188963e-06, "loss": 0.368, "step": 3862 }, { "epoch": 0.1791743970315399, "grad_norm": 8.545400619506836, "learning_rate": 9.296756091998984e-06, "loss": 0.2596, "step": 3863 }, { "epoch": 0.17922077922077922, "grad_norm": 9.072455406188965, "learning_rate": 9.296379694722051e-06, "loss": 0.4554, "step": 3864 }, { "epoch": 0.17926716141001856, "grad_norm": 5.894612789154053, "learning_rate": 9.296003204366318e-06, "loss": 0.3914, "step": 3865 }, { "epoch": 0.17931354359925789, "grad_norm": 8.404152870178223, "learning_rate": 9.295626620939942e-06, "loss": 0.3764, "step": 3866 }, { "epoch": 0.17935992578849722, "grad_norm": 7.6020002365112305, "learning_rate": 9.29524994445108e-06, "loss": 0.4515, "step": 3867 }, { "epoch": 0.17940630797773655, "grad_norm": 5.782734394073486, "learning_rate": 9.294873174907895e-06, "loss": 0.3325, "step": 3868 }, { "epoch": 0.17945269016697588, "grad_norm": 5.012332916259766, "learning_rate": 9.294496312318547e-06, "loss": 0.2514, "step": 3869 }, { "epoch": 0.1794990723562152, "grad_norm": 8.315007209777832, "learning_rate": 9.2941193566912e-06, "loss": 0.397, "step": 3870 }, { "epoch": 0.17954545454545454, "grad_norm": 8.454005241394043, "learning_rate": 9.293742308034025e-06, "loss": 0.3095, "step": 3871 }, { "epoch": 0.17959183673469387, "grad_norm": 6.3899312019348145, "learning_rate": 9.293365166355186e-06, "loss": 0.4003, "step": 3872 }, { "epoch": 0.1796382189239332, "grad_norm": 6.837860107421875, "learning_rate": 9.292987931662855e-06, "loss": 0.3715, "step": 3873 }, { "epoch": 0.17968460111317255, "grad_norm": 4.678715229034424, "learning_rate": 9.292610603965204e-06, "loss": 0.3058, "step": 3874 }, { "epoch": 0.17973098330241188, "grad_norm": 4.293529510498047, "learning_rate": 9.29223318327041e-06, "loss": 0.3109, "step": 3875 }, { "epoch": 0.17977736549165121, "grad_norm": 10.476397514343262, "learning_rate": 9.291855669586648e-06, "loss": 0.3495, "step": 3876 }, { "epoch": 0.17982374768089054, "grad_norm": 8.026792526245117, "learning_rate": 9.291478062922097e-06, "loss": 0.3791, "step": 3877 }, { "epoch": 0.17987012987012987, "grad_norm": 12.015070915222168, "learning_rate": 9.291100363284935e-06, "loss": 0.3931, "step": 3878 }, { "epoch": 0.1799165120593692, "grad_norm": 5.345142841339111, "learning_rate": 9.29072257068335e-06, "loss": 0.2668, "step": 3879 }, { "epoch": 0.17996289424860853, "grad_norm": 10.298885345458984, "learning_rate": 9.29034468512552e-06, "loss": 0.3376, "step": 3880 }, { "epoch": 0.18000927643784786, "grad_norm": 10.035630226135254, "learning_rate": 9.28996670661964e-06, "loss": 0.4218, "step": 3881 }, { "epoch": 0.1800556586270872, "grad_norm": 5.835219860076904, "learning_rate": 9.28958863517389e-06, "loss": 0.2371, "step": 3882 }, { "epoch": 0.18010204081632653, "grad_norm": 10.37907600402832, "learning_rate": 9.289210470796465e-06, "loss": 0.4231, "step": 3883 }, { "epoch": 0.18014842300556586, "grad_norm": 7.592935085296631, "learning_rate": 9.28883221349556e-06, "loss": 0.2419, "step": 3884 }, { "epoch": 0.18019480519480519, "grad_norm": 7.971673488616943, "learning_rate": 9.288453863279365e-06, "loss": 0.3526, "step": 3885 }, { "epoch": 0.18024118738404452, "grad_norm": 8.602524757385254, "learning_rate": 9.28807542015608e-06, "loss": 0.2782, "step": 3886 }, { "epoch": 0.18028756957328385, "grad_norm": 17.227535247802734, "learning_rate": 9.287696884133903e-06, "loss": 0.4237, "step": 3887 }, { "epoch": 0.1803339517625232, "grad_norm": 9.142858505249023, "learning_rate": 9.287318255221033e-06, "loss": 0.3199, "step": 3888 }, { "epoch": 0.18038033395176253, "grad_norm": 9.16249942779541, "learning_rate": 9.286939533425676e-06, "loss": 0.4164, "step": 3889 }, { "epoch": 0.18042671614100186, "grad_norm": 8.48632526397705, "learning_rate": 9.286560718756036e-06, "loss": 0.4424, "step": 3890 }, { "epoch": 0.1804730983302412, "grad_norm": 5.491569995880127, "learning_rate": 9.286181811220317e-06, "loss": 0.3004, "step": 3891 }, { "epoch": 0.18051948051948052, "grad_norm": 5.727294445037842, "learning_rate": 9.28580281082673e-06, "loss": 0.4131, "step": 3892 }, { "epoch": 0.18056586270871985, "grad_norm": 7.3872389793396, "learning_rate": 9.285423717583484e-06, "loss": 0.4107, "step": 3893 }, { "epoch": 0.18061224489795918, "grad_norm": 9.144086837768555, "learning_rate": 9.285044531498797e-06, "loss": 0.3357, "step": 3894 }, { "epoch": 0.18065862708719851, "grad_norm": 8.326022148132324, "learning_rate": 9.284665252580878e-06, "loss": 0.3361, "step": 3895 }, { "epoch": 0.18070500927643784, "grad_norm": 11.322463989257812, "learning_rate": 9.284285880837947e-06, "loss": 0.3102, "step": 3896 }, { "epoch": 0.18075139146567717, "grad_norm": 5.883931636810303, "learning_rate": 9.283906416278222e-06, "loss": 0.3713, "step": 3897 }, { "epoch": 0.1807977736549165, "grad_norm": 7.054422378540039, "learning_rate": 9.283526858909924e-06, "loss": 0.3455, "step": 3898 }, { "epoch": 0.18084415584415584, "grad_norm": 11.48198413848877, "learning_rate": 9.283147208741276e-06, "loss": 0.4691, "step": 3899 }, { "epoch": 0.18089053803339517, "grad_norm": 5.404477596282959, "learning_rate": 9.282767465780502e-06, "loss": 0.3784, "step": 3900 }, { "epoch": 0.1809369202226345, "grad_norm": 9.649360656738281, "learning_rate": 9.282387630035833e-06, "loss": 0.4448, "step": 3901 }, { "epoch": 0.18098330241187385, "grad_norm": 9.454025268554688, "learning_rate": 9.28200770151549e-06, "loss": 0.4454, "step": 3902 }, { "epoch": 0.18102968460111318, "grad_norm": 7.615608215332031, "learning_rate": 9.281627680227711e-06, "loss": 0.3946, "step": 3903 }, { "epoch": 0.1810760667903525, "grad_norm": 5.410079002380371, "learning_rate": 9.281247566180727e-06, "loss": 0.3159, "step": 3904 }, { "epoch": 0.18112244897959184, "grad_norm": 20.122676849365234, "learning_rate": 9.280867359382772e-06, "loss": 0.7319, "step": 3905 }, { "epoch": 0.18116883116883117, "grad_norm": 10.993858337402344, "learning_rate": 9.280487059842085e-06, "loss": 0.3686, "step": 3906 }, { "epoch": 0.1812152133580705, "grad_norm": 7.593179702758789, "learning_rate": 9.280106667566902e-06, "loss": 0.3996, "step": 3907 }, { "epoch": 0.18126159554730983, "grad_norm": 7.177037715911865, "learning_rate": 9.279726182565466e-06, "loss": 0.4276, "step": 3908 }, { "epoch": 0.18130797773654916, "grad_norm": 6.87033748626709, "learning_rate": 9.27934560484602e-06, "loss": 0.3482, "step": 3909 }, { "epoch": 0.1813543599257885, "grad_norm": 9.429956436157227, "learning_rate": 9.278964934416807e-06, "loss": 0.4268, "step": 3910 }, { "epoch": 0.18140074211502782, "grad_norm": 6.693358421325684, "learning_rate": 9.278584171286078e-06, "loss": 0.4628, "step": 3911 }, { "epoch": 0.18144712430426715, "grad_norm": 12.95610237121582, "learning_rate": 9.278203315462078e-06, "loss": 0.3395, "step": 3912 }, { "epoch": 0.18149350649350648, "grad_norm": 5.906711578369141, "learning_rate": 9.27782236695306e-06, "loss": 0.3334, "step": 3913 }, { "epoch": 0.18153988868274581, "grad_norm": 4.392107963562012, "learning_rate": 9.277441325767277e-06, "loss": 0.3076, "step": 3914 }, { "epoch": 0.18158627087198514, "grad_norm": 4.2124481201171875, "learning_rate": 9.277060191912984e-06, "loss": 0.201, "step": 3915 }, { "epoch": 0.1816326530612245, "grad_norm": 9.03668212890625, "learning_rate": 9.276678965398436e-06, "loss": 0.3135, "step": 3916 }, { "epoch": 0.18167903525046383, "grad_norm": 11.177755355834961, "learning_rate": 9.276297646231896e-06, "loss": 0.484, "step": 3917 }, { "epoch": 0.18172541743970316, "grad_norm": 15.565506935119629, "learning_rate": 9.275916234421622e-06, "loss": 0.4032, "step": 3918 }, { "epoch": 0.1817717996289425, "grad_norm": 5.356657028198242, "learning_rate": 9.27553472997588e-06, "loss": 0.2776, "step": 3919 }, { "epoch": 0.18181818181818182, "grad_norm": 6.727745056152344, "learning_rate": 9.275153132902929e-06, "loss": 0.4031, "step": 3920 }, { "epoch": 0.18186456400742115, "grad_norm": 11.863397598266602, "learning_rate": 9.274771443211045e-06, "loss": 0.3162, "step": 3921 }, { "epoch": 0.18191094619666048, "grad_norm": 8.575291633605957, "learning_rate": 9.27438966090849e-06, "loss": 0.2992, "step": 3922 }, { "epoch": 0.1819573283858998, "grad_norm": 7.214102745056152, "learning_rate": 9.274007786003538e-06, "loss": 0.2733, "step": 3923 }, { "epoch": 0.18200371057513914, "grad_norm": 10.704472541809082, "learning_rate": 9.273625818504461e-06, "loss": 0.4569, "step": 3924 }, { "epoch": 0.18205009276437847, "grad_norm": 9.398892402648926, "learning_rate": 9.273243758419534e-06, "loss": 0.3932, "step": 3925 }, { "epoch": 0.1820964749536178, "grad_norm": 6.87859582901001, "learning_rate": 9.272861605757038e-06, "loss": 0.396, "step": 3926 }, { "epoch": 0.18214285714285713, "grad_norm": 6.430315971374512, "learning_rate": 9.272479360525246e-06, "loss": 0.3205, "step": 3927 }, { "epoch": 0.18218923933209646, "grad_norm": 5.25290060043335, "learning_rate": 9.272097022732444e-06, "loss": 0.3022, "step": 3928 }, { "epoch": 0.1822356215213358, "grad_norm": 4.723984718322754, "learning_rate": 9.271714592386913e-06, "loss": 0.2414, "step": 3929 }, { "epoch": 0.18228200371057515, "grad_norm": 8.640439987182617, "learning_rate": 9.271332069496937e-06, "loss": 0.4014, "step": 3930 }, { "epoch": 0.18232838589981448, "grad_norm": 6.784631252288818, "learning_rate": 9.270949454070806e-06, "loss": 0.3355, "step": 3931 }, { "epoch": 0.1823747680890538, "grad_norm": 12.047910690307617, "learning_rate": 9.270566746116808e-06, "loss": 0.4635, "step": 3932 }, { "epoch": 0.18242115027829314, "grad_norm": 7.024743556976318, "learning_rate": 9.270183945643232e-06, "loss": 0.3759, "step": 3933 }, { "epoch": 0.18246753246753247, "grad_norm": 7.166797637939453, "learning_rate": 9.269801052658375e-06, "loss": 0.3093, "step": 3934 }, { "epoch": 0.1825139146567718, "grad_norm": 7.051729679107666, "learning_rate": 9.26941806717053e-06, "loss": 0.3947, "step": 3935 }, { "epoch": 0.18256029684601113, "grad_norm": 5.395791530609131, "learning_rate": 9.269034989187993e-06, "loss": 0.4051, "step": 3936 }, { "epoch": 0.18260667903525046, "grad_norm": 6.543740749359131, "learning_rate": 9.268651818719066e-06, "loss": 0.469, "step": 3937 }, { "epoch": 0.1826530612244898, "grad_norm": 10.493880271911621, "learning_rate": 9.268268555772049e-06, "loss": 0.4126, "step": 3938 }, { "epoch": 0.18269944341372912, "grad_norm": 6.42924165725708, "learning_rate": 9.267885200355244e-06, "loss": 0.2977, "step": 3939 }, { "epoch": 0.18274582560296845, "grad_norm": 4.5065765380859375, "learning_rate": 9.267501752476959e-06, "loss": 0.2972, "step": 3940 }, { "epoch": 0.18279220779220778, "grad_norm": 7.743593215942383, "learning_rate": 9.267118212145498e-06, "loss": 0.4107, "step": 3941 }, { "epoch": 0.1828385899814471, "grad_norm": 5.113829135894775, "learning_rate": 9.266734579369172e-06, "loss": 0.2754, "step": 3942 }, { "epoch": 0.18288497217068644, "grad_norm": 8.543607711791992, "learning_rate": 9.266350854156291e-06, "loss": 0.4844, "step": 3943 }, { "epoch": 0.1829313543599258, "grad_norm": 5.175968647003174, "learning_rate": 9.26596703651517e-06, "loss": 0.2723, "step": 3944 }, { "epoch": 0.18297773654916513, "grad_norm": 11.178633689880371, "learning_rate": 9.265583126454123e-06, "loss": 0.3064, "step": 3945 }, { "epoch": 0.18302411873840446, "grad_norm": 10.087087631225586, "learning_rate": 9.265199123981467e-06, "loss": 0.388, "step": 3946 }, { "epoch": 0.1830705009276438, "grad_norm": 4.994277477264404, "learning_rate": 9.264815029105522e-06, "loss": 0.3195, "step": 3947 }, { "epoch": 0.18311688311688312, "grad_norm": 9.810434341430664, "learning_rate": 9.264430841834609e-06, "loss": 0.3399, "step": 3948 }, { "epoch": 0.18316326530612245, "grad_norm": 5.9633283615112305, "learning_rate": 9.26404656217705e-06, "loss": 0.313, "step": 3949 }, { "epoch": 0.18320964749536178, "grad_norm": 9.632469177246094, "learning_rate": 9.263662190141172e-06, "loss": 0.3798, "step": 3950 }, { "epoch": 0.1832560296846011, "grad_norm": 7.427568435668945, "learning_rate": 9.263277725735302e-06, "loss": 0.3012, "step": 3951 }, { "epoch": 0.18330241187384044, "grad_norm": 6.841253757476807, "learning_rate": 9.262893168967769e-06, "loss": 0.3768, "step": 3952 }, { "epoch": 0.18334879406307977, "grad_norm": 5.630781650543213, "learning_rate": 9.262508519846902e-06, "loss": 0.3746, "step": 3953 }, { "epoch": 0.1833951762523191, "grad_norm": 8.802659034729004, "learning_rate": 9.262123778381037e-06, "loss": 0.3784, "step": 3954 }, { "epoch": 0.18344155844155843, "grad_norm": 10.586771011352539, "learning_rate": 9.26173894457851e-06, "loss": 0.3173, "step": 3955 }, { "epoch": 0.18348794063079776, "grad_norm": 7.240850925445557, "learning_rate": 9.261354018447654e-06, "loss": 0.3273, "step": 3956 }, { "epoch": 0.1835343228200371, "grad_norm": 11.167135238647461, "learning_rate": 9.260968999996814e-06, "loss": 0.3924, "step": 3957 }, { "epoch": 0.18358070500927645, "grad_norm": 9.088523864746094, "learning_rate": 9.260583889234325e-06, "loss": 0.3712, "step": 3958 }, { "epoch": 0.18362708719851578, "grad_norm": 9.659728050231934, "learning_rate": 9.260198686168535e-06, "loss": 0.354, "step": 3959 }, { "epoch": 0.1836734693877551, "grad_norm": 6.179988861083984, "learning_rate": 9.259813390807788e-06, "loss": 0.3585, "step": 3960 }, { "epoch": 0.18371985157699444, "grad_norm": 7.714034557342529, "learning_rate": 9.259428003160429e-06, "loss": 0.2995, "step": 3961 }, { "epoch": 0.18376623376623377, "grad_norm": 7.594012260437012, "learning_rate": 9.259042523234809e-06, "loss": 0.3418, "step": 3962 }, { "epoch": 0.1838126159554731, "grad_norm": 9.440080642700195, "learning_rate": 9.25865695103928e-06, "loss": 0.421, "step": 3963 }, { "epoch": 0.18385899814471243, "grad_norm": 8.821744918823242, "learning_rate": 9.258271286582195e-06, "loss": 0.5097, "step": 3964 }, { "epoch": 0.18390538033395176, "grad_norm": 8.046146392822266, "learning_rate": 9.257885529871908e-06, "loss": 0.3092, "step": 3965 }, { "epoch": 0.1839517625231911, "grad_norm": 9.363040924072266, "learning_rate": 9.257499680916776e-06, "loss": 0.4031, "step": 3966 }, { "epoch": 0.18399814471243042, "grad_norm": 7.467981815338135, "learning_rate": 9.257113739725159e-06, "loss": 0.2622, "step": 3967 }, { "epoch": 0.18404452690166975, "grad_norm": 11.3922119140625, "learning_rate": 9.256727706305419e-06, "loss": 0.5192, "step": 3968 }, { "epoch": 0.18409090909090908, "grad_norm": 9.332676887512207, "learning_rate": 9.256341580665919e-06, "loss": 0.3519, "step": 3969 }, { "epoch": 0.1841372912801484, "grad_norm": 8.067358016967773, "learning_rate": 9.255955362815024e-06, "loss": 0.424, "step": 3970 }, { "epoch": 0.18418367346938774, "grad_norm": 10.8992338180542, "learning_rate": 9.255569052761098e-06, "loss": 0.4295, "step": 3971 }, { "epoch": 0.1842300556586271, "grad_norm": 4.948319435119629, "learning_rate": 9.255182650512515e-06, "loss": 0.2713, "step": 3972 }, { "epoch": 0.18427643784786643, "grad_norm": 11.800863265991211, "learning_rate": 9.254796156077644e-06, "loss": 0.5094, "step": 3973 }, { "epoch": 0.18432282003710576, "grad_norm": 3.83608341217041, "learning_rate": 9.254409569464859e-06, "loss": 0.2973, "step": 3974 }, { "epoch": 0.1843692022263451, "grad_norm": 11.326279640197754, "learning_rate": 9.254022890682532e-06, "loss": 0.3393, "step": 3975 }, { "epoch": 0.18441558441558442, "grad_norm": 6.5938520431518555, "learning_rate": 9.253636119739046e-06, "loss": 0.4504, "step": 3976 }, { "epoch": 0.18446196660482375, "grad_norm": 6.39707612991333, "learning_rate": 9.253249256642776e-06, "loss": 0.3519, "step": 3977 }, { "epoch": 0.18450834879406308, "grad_norm": 11.82512092590332, "learning_rate": 9.252862301402104e-06, "loss": 0.3353, "step": 3978 }, { "epoch": 0.1845547309833024, "grad_norm": 9.877978324890137, "learning_rate": 9.252475254025413e-06, "loss": 0.3511, "step": 3979 }, { "epoch": 0.18460111317254174, "grad_norm": 10.040103912353516, "learning_rate": 9.25208811452109e-06, "loss": 0.4187, "step": 3980 }, { "epoch": 0.18464749536178107, "grad_norm": 6.091075420379639, "learning_rate": 9.25170088289752e-06, "loss": 0.3211, "step": 3981 }, { "epoch": 0.1846938775510204, "grad_norm": 5.280747413635254, "learning_rate": 9.251313559163092e-06, "loss": 0.339, "step": 3982 }, { "epoch": 0.18474025974025973, "grad_norm": 5.641803741455078, "learning_rate": 9.2509261433262e-06, "loss": 0.316, "step": 3983 }, { "epoch": 0.18478664192949906, "grad_norm": 6.035042762756348, "learning_rate": 9.250538635395234e-06, "loss": 0.3681, "step": 3984 }, { "epoch": 0.1848330241187384, "grad_norm": 8.357024192810059, "learning_rate": 9.25015103537859e-06, "loss": 0.23, "step": 3985 }, { "epoch": 0.18487940630797772, "grad_norm": 10.885080337524414, "learning_rate": 9.249763343284665e-06, "loss": 0.3987, "step": 3986 }, { "epoch": 0.18492578849721708, "grad_norm": 9.152222633361816, "learning_rate": 9.24937555912186e-06, "loss": 0.3872, "step": 3987 }, { "epoch": 0.1849721706864564, "grad_norm": 7.976752281188965, "learning_rate": 9.248987682898576e-06, "loss": 0.3457, "step": 3988 }, { "epoch": 0.18501855287569574, "grad_norm": 6.253233909606934, "learning_rate": 9.248599714623212e-06, "loss": 0.3286, "step": 3989 }, { "epoch": 0.18506493506493507, "grad_norm": 6.6796369552612305, "learning_rate": 9.248211654304177e-06, "loss": 0.4234, "step": 3990 }, { "epoch": 0.1851113172541744, "grad_norm": 8.776063919067383, "learning_rate": 9.247823501949879e-06, "loss": 0.3528, "step": 3991 }, { "epoch": 0.18515769944341373, "grad_norm": 13.043463706970215, "learning_rate": 9.247435257568724e-06, "loss": 0.4247, "step": 3992 }, { "epoch": 0.18520408163265306, "grad_norm": 8.740269660949707, "learning_rate": 9.247046921169125e-06, "loss": 0.3171, "step": 3993 }, { "epoch": 0.1852504638218924, "grad_norm": 9.023076057434082, "learning_rate": 9.246658492759493e-06, "loss": 0.2951, "step": 3994 }, { "epoch": 0.18529684601113172, "grad_norm": 14.12374496459961, "learning_rate": 9.246269972348246e-06, "loss": 0.5058, "step": 3995 }, { "epoch": 0.18534322820037105, "grad_norm": 6.171804904937744, "learning_rate": 9.2458813599438e-06, "loss": 0.2995, "step": 3996 }, { "epoch": 0.18538961038961038, "grad_norm": 8.671135902404785, "learning_rate": 9.245492655554573e-06, "loss": 0.4642, "step": 3997 }, { "epoch": 0.1854359925788497, "grad_norm": 5.667021751403809, "learning_rate": 9.245103859188987e-06, "loss": 0.3207, "step": 3998 }, { "epoch": 0.18548237476808904, "grad_norm": 7.274017810821533, "learning_rate": 9.244714970855466e-06, "loss": 0.2331, "step": 3999 }, { "epoch": 0.18552875695732837, "grad_norm": 18.60085678100586, "learning_rate": 9.244325990562433e-06, "loss": 0.3957, "step": 4000 }, { "epoch": 0.18557513914656773, "grad_norm": 11.510812759399414, "learning_rate": 9.243936918318315e-06, "loss": 0.4044, "step": 4001 }, { "epoch": 0.18562152133580706, "grad_norm": 7.845262050628662, "learning_rate": 9.243547754131543e-06, "loss": 0.4933, "step": 4002 }, { "epoch": 0.1856679035250464, "grad_norm": 29.391422271728516, "learning_rate": 9.243158498010546e-06, "loss": 0.5871, "step": 4003 }, { "epoch": 0.18571428571428572, "grad_norm": 8.069348335266113, "learning_rate": 9.24276914996376e-06, "loss": 0.2172, "step": 4004 }, { "epoch": 0.18576066790352505, "grad_norm": 126.42618560791016, "learning_rate": 9.242379709999619e-06, "loss": 0.6497, "step": 4005 }, { "epoch": 0.18580705009276438, "grad_norm": 13.099586486816406, "learning_rate": 9.241990178126558e-06, "loss": 0.3694, "step": 4006 }, { "epoch": 0.1858534322820037, "grad_norm": 9.301576614379883, "learning_rate": 9.241600554353016e-06, "loss": 0.3942, "step": 4007 }, { "epoch": 0.18589981447124304, "grad_norm": 4.552908897399902, "learning_rate": 9.241210838687438e-06, "loss": 0.3184, "step": 4008 }, { "epoch": 0.18594619666048237, "grad_norm": 10.256155014038086, "learning_rate": 9.240821031138261e-06, "loss": 0.5011, "step": 4009 }, { "epoch": 0.1859925788497217, "grad_norm": 8.144552230834961, "learning_rate": 9.240431131713935e-06, "loss": 0.3434, "step": 4010 }, { "epoch": 0.18603896103896103, "grad_norm": 6.930857181549072, "learning_rate": 9.240041140422904e-06, "loss": 0.3264, "step": 4011 }, { "epoch": 0.18608534322820036, "grad_norm": 9.630350112915039, "learning_rate": 9.23965105727362e-06, "loss": 0.4436, "step": 4012 }, { "epoch": 0.1861317254174397, "grad_norm": 7.725118637084961, "learning_rate": 9.239260882274531e-06, "loss": 0.3172, "step": 4013 }, { "epoch": 0.18617810760667902, "grad_norm": 14.078282356262207, "learning_rate": 9.23887061543409e-06, "loss": 0.3498, "step": 4014 }, { "epoch": 0.18622448979591838, "grad_norm": 13.121434211730957, "learning_rate": 9.238480256760755e-06, "loss": 0.5567, "step": 4015 }, { "epoch": 0.1862708719851577, "grad_norm": 8.743853569030762, "learning_rate": 9.23808980626298e-06, "loss": 0.4229, "step": 4016 }, { "epoch": 0.18631725417439704, "grad_norm": 8.130082130432129, "learning_rate": 9.237699263949224e-06, "loss": 0.397, "step": 4017 }, { "epoch": 0.18636363636363637, "grad_norm": 8.546466827392578, "learning_rate": 9.237308629827949e-06, "loss": 0.4192, "step": 4018 }, { "epoch": 0.1864100185528757, "grad_norm": 8.139748573303223, "learning_rate": 9.236917903907616e-06, "loss": 0.3383, "step": 4019 }, { "epoch": 0.18645640074211503, "grad_norm": 5.881506443023682, "learning_rate": 9.236527086196694e-06, "loss": 0.3581, "step": 4020 }, { "epoch": 0.18650278293135436, "grad_norm": 9.000797271728516, "learning_rate": 9.236136176703646e-06, "loss": 0.4325, "step": 4021 }, { "epoch": 0.1865491651205937, "grad_norm": 11.559913635253906, "learning_rate": 9.23574517543694e-06, "loss": 0.4459, "step": 4022 }, { "epoch": 0.18659554730983302, "grad_norm": 7.753173828125, "learning_rate": 9.235354082405053e-06, "loss": 0.3503, "step": 4023 }, { "epoch": 0.18664192949907235, "grad_norm": 9.883310317993164, "learning_rate": 9.23496289761645e-06, "loss": 0.5549, "step": 4024 }, { "epoch": 0.18668831168831168, "grad_norm": 7.602514266967773, "learning_rate": 9.234571621079613e-06, "loss": 0.3729, "step": 4025 }, { "epoch": 0.186734693877551, "grad_norm": 7.0691728591918945, "learning_rate": 9.234180252803013e-06, "loss": 0.3081, "step": 4026 }, { "epoch": 0.18678107606679034, "grad_norm": 5.643435955047607, "learning_rate": 9.23378879279513e-06, "loss": 0.3062, "step": 4027 }, { "epoch": 0.18682745825602967, "grad_norm": 5.3831682205200195, "learning_rate": 9.233397241064447e-06, "loss": 0.3054, "step": 4028 }, { "epoch": 0.18687384044526903, "grad_norm": 10.168814659118652, "learning_rate": 9.233005597619447e-06, "loss": 0.3084, "step": 4029 }, { "epoch": 0.18692022263450836, "grad_norm": 5.841395378112793, "learning_rate": 9.232613862468614e-06, "loss": 0.3362, "step": 4030 }, { "epoch": 0.1869666048237477, "grad_norm": 6.244377136230469, "learning_rate": 9.232222035620432e-06, "loss": 0.4123, "step": 4031 }, { "epoch": 0.18701298701298702, "grad_norm": 15.60364055633545, "learning_rate": 9.231830117083393e-06, "loss": 0.396, "step": 4032 }, { "epoch": 0.18705936920222635, "grad_norm": 5.809175491333008, "learning_rate": 9.231438106865985e-06, "loss": 0.2744, "step": 4033 }, { "epoch": 0.18710575139146568, "grad_norm": 4.653163433074951, "learning_rate": 9.231046004976704e-06, "loss": 0.2778, "step": 4034 }, { "epoch": 0.187152133580705, "grad_norm": 8.493446350097656, "learning_rate": 9.230653811424042e-06, "loss": 0.4181, "step": 4035 }, { "epoch": 0.18719851576994434, "grad_norm": 4.744879245758057, "learning_rate": 9.230261526216498e-06, "loss": 0.1936, "step": 4036 }, { "epoch": 0.18724489795918367, "grad_norm": 6.63523006439209, "learning_rate": 9.229869149362568e-06, "loss": 0.3049, "step": 4037 }, { "epoch": 0.187291280148423, "grad_norm": 7.01294469833374, "learning_rate": 9.229476680870753e-06, "loss": 0.4517, "step": 4038 }, { "epoch": 0.18733766233766233, "grad_norm": 7.252603054046631, "learning_rate": 9.229084120749558e-06, "loss": 0.4151, "step": 4039 }, { "epoch": 0.18738404452690166, "grad_norm": 9.271159172058105, "learning_rate": 9.228691469007487e-06, "loss": 0.3577, "step": 4040 }, { "epoch": 0.187430426716141, "grad_norm": 6.523041248321533, "learning_rate": 9.228298725653043e-06, "loss": 0.4046, "step": 4041 }, { "epoch": 0.18747680890538032, "grad_norm": 14.37662410736084, "learning_rate": 9.22790589069474e-06, "loss": 0.5219, "step": 4042 }, { "epoch": 0.18752319109461968, "grad_norm": 10.919751167297363, "learning_rate": 9.227512964141084e-06, "loss": 0.4481, "step": 4043 }, { "epoch": 0.187569573283859, "grad_norm": 9.796332359313965, "learning_rate": 9.22711994600059e-06, "loss": 0.4578, "step": 4044 }, { "epoch": 0.18761595547309834, "grad_norm": 11.445942878723145, "learning_rate": 9.226726836281772e-06, "loss": 0.4727, "step": 4045 }, { "epoch": 0.18766233766233767, "grad_norm": 12.200002670288086, "learning_rate": 9.226333634993147e-06, "loss": 0.4861, "step": 4046 }, { "epoch": 0.187708719851577, "grad_norm": 12.221457481384277, "learning_rate": 9.225940342143232e-06, "loss": 0.435, "step": 4047 }, { "epoch": 0.18775510204081633, "grad_norm": 9.549421310424805, "learning_rate": 9.225546957740549e-06, "loss": 0.4796, "step": 4048 }, { "epoch": 0.18780148423005566, "grad_norm": 9.38355541229248, "learning_rate": 9.22515348179362e-06, "loss": 0.4427, "step": 4049 }, { "epoch": 0.187847866419295, "grad_norm": 7.584944248199463, "learning_rate": 9.22475991431097e-06, "loss": 0.4541, "step": 4050 }, { "epoch": 0.18789424860853432, "grad_norm": 6.220884799957275, "learning_rate": 9.224366255301122e-06, "loss": 0.3804, "step": 4051 }, { "epoch": 0.18794063079777365, "grad_norm": 8.145674705505371, "learning_rate": 9.223972504772609e-06, "loss": 0.3837, "step": 4052 }, { "epoch": 0.18798701298701298, "grad_norm": 7.031424045562744, "learning_rate": 9.22357866273396e-06, "loss": 0.3256, "step": 4053 }, { "epoch": 0.1880333951762523, "grad_norm": 6.1495490074157715, "learning_rate": 9.223184729193706e-06, "loss": 0.4789, "step": 4054 }, { "epoch": 0.18807977736549164, "grad_norm": 6.126339435577393, "learning_rate": 9.222790704160385e-06, "loss": 0.3835, "step": 4055 }, { "epoch": 0.18812615955473097, "grad_norm": 7.064233303070068, "learning_rate": 9.222396587642528e-06, "loss": 0.2669, "step": 4056 }, { "epoch": 0.18817254174397033, "grad_norm": 10.567845344543457, "learning_rate": 9.222002379648676e-06, "loss": 0.4006, "step": 4057 }, { "epoch": 0.18821892393320966, "grad_norm": 10.715837478637695, "learning_rate": 9.221608080187371e-06, "loss": 0.3937, "step": 4058 }, { "epoch": 0.188265306122449, "grad_norm": 7.222912311553955, "learning_rate": 9.221213689267152e-06, "loss": 0.2515, "step": 4059 }, { "epoch": 0.18831168831168832, "grad_norm": 5.3429975509643555, "learning_rate": 9.220819206896564e-06, "loss": 0.354, "step": 4060 }, { "epoch": 0.18835807050092765, "grad_norm": 8.284340858459473, "learning_rate": 9.220424633084158e-06, "loss": 0.4211, "step": 4061 }, { "epoch": 0.18840445269016698, "grad_norm": 14.26911449432373, "learning_rate": 9.220029967838476e-06, "loss": 0.6116, "step": 4062 }, { "epoch": 0.1884508348794063, "grad_norm": 5.50434684753418, "learning_rate": 9.21963521116807e-06, "loss": 0.3843, "step": 4063 }, { "epoch": 0.18849721706864564, "grad_norm": 7.887859344482422, "learning_rate": 9.219240363081494e-06, "loss": 0.3575, "step": 4064 }, { "epoch": 0.18854359925788497, "grad_norm": 12.921756744384766, "learning_rate": 9.2188454235873e-06, "loss": 0.5321, "step": 4065 }, { "epoch": 0.1885899814471243, "grad_norm": 9.519311904907227, "learning_rate": 9.218450392694046e-06, "loss": 0.4467, "step": 4066 }, { "epoch": 0.18863636363636363, "grad_norm": 8.346500396728516, "learning_rate": 9.218055270410289e-06, "loss": 0.3741, "step": 4067 }, { "epoch": 0.18868274582560296, "grad_norm": 18.374263763427734, "learning_rate": 9.21766005674459e-06, "loss": 0.3276, "step": 4068 }, { "epoch": 0.1887291280148423, "grad_norm": 6.350709438323975, "learning_rate": 9.217264751705509e-06, "loss": 0.4198, "step": 4069 }, { "epoch": 0.18877551020408162, "grad_norm": 6.312807083129883, "learning_rate": 9.21686935530161e-06, "loss": 0.3478, "step": 4070 }, { "epoch": 0.18882189239332098, "grad_norm": 9.011956214904785, "learning_rate": 9.216473867541462e-06, "loss": 0.3308, "step": 4071 }, { "epoch": 0.1888682745825603, "grad_norm": 6.925941467285156, "learning_rate": 9.216078288433632e-06, "loss": 0.3333, "step": 4072 }, { "epoch": 0.18891465677179964, "grad_norm": 6.997020244598389, "learning_rate": 9.215682617986691e-06, "loss": 0.3719, "step": 4073 }, { "epoch": 0.18896103896103897, "grad_norm": 9.291326522827148, "learning_rate": 9.215286856209209e-06, "loss": 0.3207, "step": 4074 }, { "epoch": 0.1890074211502783, "grad_norm": 7.218262195587158, "learning_rate": 9.214891003109759e-06, "loss": 0.363, "step": 4075 }, { "epoch": 0.18905380333951763, "grad_norm": 8.198336601257324, "learning_rate": 9.21449505869692e-06, "loss": 0.4186, "step": 4076 }, { "epoch": 0.18910018552875696, "grad_norm": 14.957575798034668, "learning_rate": 9.214099022979268e-06, "loss": 0.3192, "step": 4077 }, { "epoch": 0.1891465677179963, "grad_norm": 12.709395408630371, "learning_rate": 9.213702895965383e-06, "loss": 0.4558, "step": 4078 }, { "epoch": 0.18919294990723562, "grad_norm": 7.635259628295898, "learning_rate": 9.21330667766385e-06, "loss": 0.3232, "step": 4079 }, { "epoch": 0.18923933209647495, "grad_norm": 5.556662082672119, "learning_rate": 9.212910368083246e-06, "loss": 0.3108, "step": 4080 }, { "epoch": 0.18928571428571428, "grad_norm": 10.745624542236328, "learning_rate": 9.212513967232165e-06, "loss": 0.4064, "step": 4081 }, { "epoch": 0.1893320964749536, "grad_norm": 7.6302409172058105, "learning_rate": 9.212117475119187e-06, "loss": 0.4232, "step": 4082 }, { "epoch": 0.18937847866419294, "grad_norm": 11.130942344665527, "learning_rate": 9.211720891752907e-06, "loss": 0.4322, "step": 4083 }, { "epoch": 0.18942486085343227, "grad_norm": 10.02829360961914, "learning_rate": 9.211324217141916e-06, "loss": 0.399, "step": 4084 }, { "epoch": 0.18947124304267163, "grad_norm": 6.546996593475342, "learning_rate": 9.210927451294805e-06, "loss": 0.3056, "step": 4085 }, { "epoch": 0.18951762523191096, "grad_norm": 5.01097297668457, "learning_rate": 9.210530594220173e-06, "loss": 0.2866, "step": 4086 }, { "epoch": 0.1895640074211503, "grad_norm": 8.214977264404297, "learning_rate": 9.210133645926618e-06, "loss": 0.4079, "step": 4087 }, { "epoch": 0.18961038961038962, "grad_norm": 9.93572998046875, "learning_rate": 9.209736606422736e-06, "loss": 0.472, "step": 4088 }, { "epoch": 0.18965677179962895, "grad_norm": 5.481119632720947, "learning_rate": 9.209339475717132e-06, "loss": 0.3633, "step": 4089 }, { "epoch": 0.18970315398886828, "grad_norm": 11.786310195922852, "learning_rate": 9.208942253818409e-06, "loss": 0.4556, "step": 4090 }, { "epoch": 0.1897495361781076, "grad_norm": 6.194089889526367, "learning_rate": 9.208544940735171e-06, "loss": 0.3049, "step": 4091 }, { "epoch": 0.18979591836734694, "grad_norm": 8.067075729370117, "learning_rate": 9.208147536476026e-06, "loss": 0.4155, "step": 4092 }, { "epoch": 0.18984230055658627, "grad_norm": 5.673080921173096, "learning_rate": 9.207750041049585e-06, "loss": 0.2847, "step": 4093 }, { "epoch": 0.1898886827458256, "grad_norm": 7.63908052444458, "learning_rate": 9.20735245446446e-06, "loss": 0.3174, "step": 4094 }, { "epoch": 0.18993506493506493, "grad_norm": 9.332966804504395, "learning_rate": 9.206954776729263e-06, "loss": 0.4308, "step": 4095 }, { "epoch": 0.18998144712430426, "grad_norm": 6.348172187805176, "learning_rate": 9.206557007852609e-06, "loss": 0.2751, "step": 4096 }, { "epoch": 0.1900278293135436, "grad_norm": 9.357654571533203, "learning_rate": 9.206159147843114e-06, "loss": 0.3407, "step": 4097 }, { "epoch": 0.19007421150278292, "grad_norm": 9.644183158874512, "learning_rate": 9.205761196709401e-06, "loss": 0.3728, "step": 4098 }, { "epoch": 0.19012059369202228, "grad_norm": 9.070297241210938, "learning_rate": 9.205363154460092e-06, "loss": 0.3913, "step": 4099 }, { "epoch": 0.1901669758812616, "grad_norm": 9.549038887023926, "learning_rate": 9.204965021103807e-06, "loss": 0.4562, "step": 4100 }, { "epoch": 0.19021335807050094, "grad_norm": 7.375398635864258, "learning_rate": 9.204566796649173e-06, "loss": 0.385, "step": 4101 }, { "epoch": 0.19025974025974027, "grad_norm": 7.185600280761719, "learning_rate": 9.204168481104815e-06, "loss": 0.3382, "step": 4102 }, { "epoch": 0.1903061224489796, "grad_norm": 7.660928249359131, "learning_rate": 9.203770074479366e-06, "loss": 0.335, "step": 4103 }, { "epoch": 0.19035250463821893, "grad_norm": 4.63876485824585, "learning_rate": 9.203371576781457e-06, "loss": 0.2639, "step": 4104 }, { "epoch": 0.19039888682745826, "grad_norm": 6.05841064453125, "learning_rate": 9.202972988019718e-06, "loss": 0.3527, "step": 4105 }, { "epoch": 0.1904452690166976, "grad_norm": 8.081572532653809, "learning_rate": 9.202574308202787e-06, "loss": 0.4339, "step": 4106 }, { "epoch": 0.19049165120593692, "grad_norm": 10.65229320526123, "learning_rate": 9.202175537339299e-06, "loss": 0.5463, "step": 4107 }, { "epoch": 0.19053803339517625, "grad_norm": 7.751626491546631, "learning_rate": 9.201776675437894e-06, "loss": 0.3379, "step": 4108 }, { "epoch": 0.19058441558441558, "grad_norm": 6.974202632904053, "learning_rate": 9.201377722507215e-06, "loss": 0.4235, "step": 4109 }, { "epoch": 0.1906307977736549, "grad_norm": 10.260826110839844, "learning_rate": 9.200978678555902e-06, "loss": 0.4111, "step": 4110 }, { "epoch": 0.19067717996289424, "grad_norm": 7.660218238830566, "learning_rate": 9.200579543592605e-06, "loss": 0.3158, "step": 4111 }, { "epoch": 0.19072356215213357, "grad_norm": 5.048135280609131, "learning_rate": 9.200180317625965e-06, "loss": 0.3208, "step": 4112 }, { "epoch": 0.1907699443413729, "grad_norm": 10.763307571411133, "learning_rate": 9.199781000664636e-06, "loss": 0.3559, "step": 4113 }, { "epoch": 0.19081632653061226, "grad_norm": 7.606403827667236, "learning_rate": 9.199381592717265e-06, "loss": 0.3636, "step": 4114 }, { "epoch": 0.1908627087198516, "grad_norm": 10.980170249938965, "learning_rate": 9.198982093792506e-06, "loss": 0.505, "step": 4115 }, { "epoch": 0.19090909090909092, "grad_norm": 9.655377388000488, "learning_rate": 9.198582503899015e-06, "loss": 0.3417, "step": 4116 }, { "epoch": 0.19095547309833025, "grad_norm": 6.930221080780029, "learning_rate": 9.198182823045449e-06, "loss": 0.2386, "step": 4117 }, { "epoch": 0.19100185528756958, "grad_norm": 4.891430377960205, "learning_rate": 9.197783051240467e-06, "loss": 0.3144, "step": 4118 }, { "epoch": 0.1910482374768089, "grad_norm": 18.658567428588867, "learning_rate": 9.19738318849273e-06, "loss": 0.3485, "step": 4119 }, { "epoch": 0.19109461966604824, "grad_norm": 11.999173164367676, "learning_rate": 9.1969832348109e-06, "loss": 0.4618, "step": 4120 }, { "epoch": 0.19114100185528757, "grad_norm": 10.706559181213379, "learning_rate": 9.19658319020364e-06, "loss": 0.3634, "step": 4121 }, { "epoch": 0.1911873840445269, "grad_norm": 8.379729270935059, "learning_rate": 9.19618305467962e-06, "loss": 0.3495, "step": 4122 }, { "epoch": 0.19123376623376623, "grad_norm": 7.230635643005371, "learning_rate": 9.195782828247506e-06, "loss": 0.4229, "step": 4123 }, { "epoch": 0.19128014842300556, "grad_norm": 6.11415433883667, "learning_rate": 9.195382510915972e-06, "loss": 0.3393, "step": 4124 }, { "epoch": 0.1913265306122449, "grad_norm": 7.5357255935668945, "learning_rate": 9.194982102693687e-06, "loss": 0.3774, "step": 4125 }, { "epoch": 0.19137291280148422, "grad_norm": 13.01251220703125, "learning_rate": 9.194581603589327e-06, "loss": 0.4493, "step": 4126 }, { "epoch": 0.19141929499072355, "grad_norm": 10.035676956176758, "learning_rate": 9.194181013611571e-06, "loss": 0.5033, "step": 4127 }, { "epoch": 0.1914656771799629, "grad_norm": 8.810013771057129, "learning_rate": 9.193780332769096e-06, "loss": 0.4329, "step": 4128 }, { "epoch": 0.19151205936920224, "grad_norm": 8.702353477478027, "learning_rate": 9.193379561070579e-06, "loss": 0.3233, "step": 4129 }, { "epoch": 0.19155844155844157, "grad_norm": 9.900546073913574, "learning_rate": 9.192978698524708e-06, "loss": 0.4285, "step": 4130 }, { "epoch": 0.1916048237476809, "grad_norm": 6.47041130065918, "learning_rate": 9.192577745140164e-06, "loss": 0.4275, "step": 4131 }, { "epoch": 0.19165120593692023, "grad_norm": 7.4873247146606445, "learning_rate": 9.192176700925635e-06, "loss": 0.3773, "step": 4132 }, { "epoch": 0.19169758812615956, "grad_norm": 6.486086368560791, "learning_rate": 9.191775565889809e-06, "loss": 0.3566, "step": 4133 }, { "epoch": 0.1917439703153989, "grad_norm": 8.296743392944336, "learning_rate": 9.191374340041377e-06, "loss": 0.2746, "step": 4134 }, { "epoch": 0.19179035250463822, "grad_norm": 7.702235698699951, "learning_rate": 9.19097302338903e-06, "loss": 0.4649, "step": 4135 }, { "epoch": 0.19183673469387755, "grad_norm": 8.907331466674805, "learning_rate": 9.190571615941462e-06, "loss": 0.3304, "step": 4136 }, { "epoch": 0.19188311688311688, "grad_norm": 7.3209967613220215, "learning_rate": 9.190170117707371e-06, "loss": 0.3244, "step": 4137 }, { "epoch": 0.1919294990723562, "grad_norm": 7.423183441162109, "learning_rate": 9.189768528695454e-06, "loss": 0.3079, "step": 4138 }, { "epoch": 0.19197588126159554, "grad_norm": 18.932214736938477, "learning_rate": 9.189366848914412e-06, "loss": 0.5203, "step": 4139 }, { "epoch": 0.19202226345083487, "grad_norm": 10.159141540527344, "learning_rate": 9.188965078372947e-06, "loss": 0.3947, "step": 4140 }, { "epoch": 0.1920686456400742, "grad_norm": 12.027705192565918, "learning_rate": 9.188563217079763e-06, "loss": 0.5237, "step": 4141 }, { "epoch": 0.19211502782931356, "grad_norm": 11.280561447143555, "learning_rate": 9.188161265043566e-06, "loss": 0.3676, "step": 4142 }, { "epoch": 0.1921614100185529, "grad_norm": 7.281896114349365, "learning_rate": 9.187759222273065e-06, "loss": 0.4047, "step": 4143 }, { "epoch": 0.19220779220779222, "grad_norm": 10.476625442504883, "learning_rate": 9.18735708877697e-06, "loss": 0.4738, "step": 4144 }, { "epoch": 0.19225417439703155, "grad_norm": 13.001069068908691, "learning_rate": 9.18695486456399e-06, "loss": 0.4803, "step": 4145 }, { "epoch": 0.19230055658627088, "grad_norm": 10.872950553894043, "learning_rate": 9.186552549642842e-06, "loss": 0.3016, "step": 4146 }, { "epoch": 0.1923469387755102, "grad_norm": 13.630463600158691, "learning_rate": 9.18615014402224e-06, "loss": 0.6374, "step": 4147 }, { "epoch": 0.19239332096474954, "grad_norm": 9.055700302124023, "learning_rate": 9.185747647710905e-06, "loss": 0.4812, "step": 4148 }, { "epoch": 0.19243970315398887, "grad_norm": 6.527790069580078, "learning_rate": 9.185345060717554e-06, "loss": 0.4028, "step": 4149 }, { "epoch": 0.1924860853432282, "grad_norm": 8.20816421508789, "learning_rate": 9.184942383050912e-06, "loss": 0.3416, "step": 4150 }, { "epoch": 0.19253246753246753, "grad_norm": 10.263325691223145, "learning_rate": 9.184539614719699e-06, "loss": 0.4395, "step": 4151 }, { "epoch": 0.19257884972170686, "grad_norm": 5.235101699829102, "learning_rate": 9.18413675573264e-06, "loss": 0.3021, "step": 4152 }, { "epoch": 0.1926252319109462, "grad_norm": 6.630423069000244, "learning_rate": 9.183733806098467e-06, "loss": 0.2684, "step": 4153 }, { "epoch": 0.19267161410018552, "grad_norm": 5.943879127502441, "learning_rate": 9.183330765825909e-06, "loss": 0.264, "step": 4154 }, { "epoch": 0.19271799628942485, "grad_norm": 6.830891132354736, "learning_rate": 9.182927634923695e-06, "loss": 0.3599, "step": 4155 }, { "epoch": 0.1927643784786642, "grad_norm": 8.946185111999512, "learning_rate": 9.182524413400562e-06, "loss": 0.4477, "step": 4156 }, { "epoch": 0.19281076066790354, "grad_norm": 9.30547046661377, "learning_rate": 9.18212110126524e-06, "loss": 0.4149, "step": 4157 }, { "epoch": 0.19285714285714287, "grad_norm": 8.02385425567627, "learning_rate": 9.181717698526473e-06, "loss": 0.3084, "step": 4158 }, { "epoch": 0.1929035250463822, "grad_norm": 11.816709518432617, "learning_rate": 9.181314205192998e-06, "loss": 0.3828, "step": 4159 }, { "epoch": 0.19294990723562153, "grad_norm": 11.748003005981445, "learning_rate": 9.180910621273555e-06, "loss": 0.4839, "step": 4160 }, { "epoch": 0.19299628942486086, "grad_norm": 5.415382385253906, "learning_rate": 9.180506946776888e-06, "loss": 0.3175, "step": 4161 }, { "epoch": 0.1930426716141002, "grad_norm": 8.671095848083496, "learning_rate": 9.180103181711744e-06, "loss": 0.364, "step": 4162 }, { "epoch": 0.19308905380333952, "grad_norm": 5.720832347869873, "learning_rate": 9.17969932608687e-06, "loss": 0.4298, "step": 4163 }, { "epoch": 0.19313543599257885, "grad_norm": 7.077210426330566, "learning_rate": 9.179295379911013e-06, "loss": 0.2704, "step": 4164 }, { "epoch": 0.19318181818181818, "grad_norm": 5.470776081085205, "learning_rate": 9.178891343192927e-06, "loss": 0.3614, "step": 4165 }, { "epoch": 0.1932282003710575, "grad_norm": 6.107975482940674, "learning_rate": 9.178487215941365e-06, "loss": 0.435, "step": 4166 }, { "epoch": 0.19327458256029684, "grad_norm": 6.063448905944824, "learning_rate": 9.178082998165079e-06, "loss": 0.348, "step": 4167 }, { "epoch": 0.19332096474953617, "grad_norm": 10.561271667480469, "learning_rate": 9.177678689872831e-06, "loss": 0.4517, "step": 4168 }, { "epoch": 0.1933673469387755, "grad_norm": 11.761211395263672, "learning_rate": 9.177274291073375e-06, "loss": 0.4987, "step": 4169 }, { "epoch": 0.19341372912801486, "grad_norm": 7.904168128967285, "learning_rate": 9.176869801775476e-06, "loss": 0.4205, "step": 4170 }, { "epoch": 0.19346011131725419, "grad_norm": 5.809233665466309, "learning_rate": 9.176465221987898e-06, "loss": 0.2455, "step": 4171 }, { "epoch": 0.19350649350649352, "grad_norm": 5.2643938064575195, "learning_rate": 9.176060551719402e-06, "loss": 0.2838, "step": 4172 }, { "epoch": 0.19355287569573285, "grad_norm": 12.575604438781738, "learning_rate": 9.175655790978756e-06, "loss": 0.4695, "step": 4173 }, { "epoch": 0.19359925788497218, "grad_norm": 13.01799488067627, "learning_rate": 9.17525093977473e-06, "loss": 0.3598, "step": 4174 }, { "epoch": 0.1936456400742115, "grad_norm": 5.908766269683838, "learning_rate": 9.174845998116095e-06, "loss": 0.346, "step": 4175 }, { "epoch": 0.19369202226345084, "grad_norm": 7.4158034324646, "learning_rate": 9.174440966011624e-06, "loss": 0.3405, "step": 4176 }, { "epoch": 0.19373840445269017, "grad_norm": 10.045889854431152, "learning_rate": 9.174035843470091e-06, "loss": 0.3763, "step": 4177 }, { "epoch": 0.1937847866419295, "grad_norm": 13.643577575683594, "learning_rate": 9.173630630500272e-06, "loss": 0.5202, "step": 4178 }, { "epoch": 0.19383116883116883, "grad_norm": 7.736743450164795, "learning_rate": 9.173225327110948e-06, "loss": 0.4073, "step": 4179 }, { "epoch": 0.19387755102040816, "grad_norm": 6.89799690246582, "learning_rate": 9.172819933310898e-06, "loss": 0.3536, "step": 4180 }, { "epoch": 0.1939239332096475, "grad_norm": 7.212481498718262, "learning_rate": 9.172414449108905e-06, "loss": 0.3804, "step": 4181 }, { "epoch": 0.19397031539888682, "grad_norm": 5.850274085998535, "learning_rate": 9.172008874513754e-06, "loss": 0.3612, "step": 4182 }, { "epoch": 0.19401669758812615, "grad_norm": 12.694055557250977, "learning_rate": 9.17160320953423e-06, "loss": 0.359, "step": 4183 }, { "epoch": 0.1940630797773655, "grad_norm": 7.9545087814331055, "learning_rate": 9.171197454179124e-06, "loss": 0.2857, "step": 4184 }, { "epoch": 0.19410946196660483, "grad_norm": 4.938877582550049, "learning_rate": 9.170791608457226e-06, "loss": 0.4237, "step": 4185 }, { "epoch": 0.19415584415584416, "grad_norm": 5.20473051071167, "learning_rate": 9.170385672377327e-06, "loss": 0.3872, "step": 4186 }, { "epoch": 0.1942022263450835, "grad_norm": 8.585348129272461, "learning_rate": 9.169979645948221e-06, "loss": 0.4265, "step": 4187 }, { "epoch": 0.19424860853432283, "grad_norm": 14.437453269958496, "learning_rate": 9.169573529178706e-06, "loss": 0.4427, "step": 4188 }, { "epoch": 0.19429499072356216, "grad_norm": 5.417640686035156, "learning_rate": 9.16916732207758e-06, "loss": 0.2365, "step": 4189 }, { "epoch": 0.19434137291280149, "grad_norm": 7.069871425628662, "learning_rate": 9.168761024653644e-06, "loss": 0.3968, "step": 4190 }, { "epoch": 0.19438775510204082, "grad_norm": 11.634572982788086, "learning_rate": 9.168354636915695e-06, "loss": 0.3599, "step": 4191 }, { "epoch": 0.19443413729128015, "grad_norm": 6.319276809692383, "learning_rate": 9.167948158872546e-06, "loss": 0.3106, "step": 4192 }, { "epoch": 0.19448051948051948, "grad_norm": 6.322761535644531, "learning_rate": 9.167541590532997e-06, "loss": 0.4228, "step": 4193 }, { "epoch": 0.1945269016697588, "grad_norm": 4.42485237121582, "learning_rate": 9.167134931905857e-06, "loss": 0.3672, "step": 4194 }, { "epoch": 0.19457328385899814, "grad_norm": 8.955887794494629, "learning_rate": 9.166728182999937e-06, "loss": 0.5194, "step": 4195 }, { "epoch": 0.19461966604823747, "grad_norm": 11.053460121154785, "learning_rate": 9.166321343824048e-06, "loss": 0.2811, "step": 4196 }, { "epoch": 0.1946660482374768, "grad_norm": 5.525869846343994, "learning_rate": 9.165914414387007e-06, "loss": 0.3387, "step": 4197 }, { "epoch": 0.19471243042671615, "grad_norm": 5.296645641326904, "learning_rate": 9.165507394697624e-06, "loss": 0.4157, "step": 4198 }, { "epoch": 0.19475881261595548, "grad_norm": 9.808177947998047, "learning_rate": 9.165100284764723e-06, "loss": 0.3174, "step": 4199 }, { "epoch": 0.19480519480519481, "grad_norm": 7.2524237632751465, "learning_rate": 9.16469308459712e-06, "loss": 0.3716, "step": 4200 }, { "epoch": 0.19485157699443414, "grad_norm": 11.77589225769043, "learning_rate": 9.164285794203638e-06, "loss": 0.3571, "step": 4201 }, { "epoch": 0.19489795918367347, "grad_norm": 7.735377311706543, "learning_rate": 9.163878413593102e-06, "loss": 0.3624, "step": 4202 }, { "epoch": 0.1949443413729128, "grad_norm": 5.062862873077393, "learning_rate": 9.163470942774336e-06, "loss": 0.3334, "step": 4203 }, { "epoch": 0.19499072356215214, "grad_norm": 6.90485954284668, "learning_rate": 9.163063381756166e-06, "loss": 0.2346, "step": 4204 }, { "epoch": 0.19503710575139147, "grad_norm": 6.540371894836426, "learning_rate": 9.162655730547424e-06, "loss": 0.307, "step": 4205 }, { "epoch": 0.1950834879406308, "grad_norm": 6.8877153396606445, "learning_rate": 9.162247989156943e-06, "loss": 0.3329, "step": 4206 }, { "epoch": 0.19512987012987013, "grad_norm": 8.724496841430664, "learning_rate": 9.161840157593555e-06, "loss": 0.4435, "step": 4207 }, { "epoch": 0.19517625231910946, "grad_norm": 9.409204483032227, "learning_rate": 9.161432235866093e-06, "loss": 0.3791, "step": 4208 }, { "epoch": 0.19522263450834879, "grad_norm": 8.73858642578125, "learning_rate": 9.161024223983397e-06, "loss": 0.4493, "step": 4209 }, { "epoch": 0.19526901669758812, "grad_norm": 6.1249542236328125, "learning_rate": 9.160616121954307e-06, "loss": 0.3865, "step": 4210 }, { "epoch": 0.19531539888682745, "grad_norm": 6.102635383605957, "learning_rate": 9.160207929787663e-06, "loss": 0.3979, "step": 4211 }, { "epoch": 0.1953617810760668, "grad_norm": 6.259039402008057, "learning_rate": 9.159799647492309e-06, "loss": 0.3945, "step": 4212 }, { "epoch": 0.19540816326530613, "grad_norm": 8.42081356048584, "learning_rate": 9.159391275077088e-06, "loss": 0.4434, "step": 4213 }, { "epoch": 0.19545454545454546, "grad_norm": 5.0384979248046875, "learning_rate": 9.15898281255085e-06, "loss": 0.3008, "step": 4214 }, { "epoch": 0.1955009276437848, "grad_norm": 8.637419700622559, "learning_rate": 9.158574259922442e-06, "loss": 0.4226, "step": 4215 }, { "epoch": 0.19554730983302412, "grad_norm": 5.388361930847168, "learning_rate": 9.158165617200717e-06, "loss": 0.3791, "step": 4216 }, { "epoch": 0.19559369202226345, "grad_norm": 5.078531742095947, "learning_rate": 9.157756884394526e-06, "loss": 0.4347, "step": 4217 }, { "epoch": 0.19564007421150278, "grad_norm": 5.622830867767334, "learning_rate": 9.157348061512728e-06, "loss": 0.31, "step": 4218 }, { "epoch": 0.19568645640074211, "grad_norm": 5.091793537139893, "learning_rate": 9.156939148564175e-06, "loss": 0.2804, "step": 4219 }, { "epoch": 0.19573283858998144, "grad_norm": 9.355329513549805, "learning_rate": 9.156530145557728e-06, "loss": 0.4356, "step": 4220 }, { "epoch": 0.19577922077922078, "grad_norm": 17.83006477355957, "learning_rate": 9.156121052502247e-06, "loss": 0.5289, "step": 4221 }, { "epoch": 0.1958256029684601, "grad_norm": 4.348945617675781, "learning_rate": 9.155711869406595e-06, "loss": 0.3266, "step": 4222 }, { "epoch": 0.19587198515769944, "grad_norm": 7.504858016967773, "learning_rate": 9.155302596279639e-06, "loss": 0.4087, "step": 4223 }, { "epoch": 0.19591836734693877, "grad_norm": 12.847906112670898, "learning_rate": 9.154893233130244e-06, "loss": 0.3738, "step": 4224 }, { "epoch": 0.1959647495361781, "grad_norm": 11.024242401123047, "learning_rate": 9.154483779967278e-06, "loss": 0.3819, "step": 4225 }, { "epoch": 0.19601113172541745, "grad_norm": 4.923705577850342, "learning_rate": 9.154074236799612e-06, "loss": 0.3924, "step": 4226 }, { "epoch": 0.19605751391465678, "grad_norm": 9.074556350708008, "learning_rate": 9.153664603636119e-06, "loss": 0.4104, "step": 4227 }, { "epoch": 0.1961038961038961, "grad_norm": 5.859532356262207, "learning_rate": 9.153254880485672e-06, "loss": 0.3373, "step": 4228 }, { "epoch": 0.19615027829313544, "grad_norm": 3.478079080581665, "learning_rate": 9.15284506735715e-06, "loss": 0.2643, "step": 4229 }, { "epoch": 0.19619666048237477, "grad_norm": 6.7746477127075195, "learning_rate": 9.15243516425943e-06, "loss": 0.3081, "step": 4230 }, { "epoch": 0.1962430426716141, "grad_norm": 10.055850982666016, "learning_rate": 9.152025171201391e-06, "loss": 0.3641, "step": 4231 }, { "epoch": 0.19628942486085343, "grad_norm": 10.262338638305664, "learning_rate": 9.151615088191918e-06, "loss": 0.3934, "step": 4232 }, { "epoch": 0.19633580705009276, "grad_norm": 11.380640983581543, "learning_rate": 9.151204915239895e-06, "loss": 0.3996, "step": 4233 }, { "epoch": 0.1963821892393321, "grad_norm": 15.354714393615723, "learning_rate": 9.150794652354204e-06, "loss": 0.5263, "step": 4234 }, { "epoch": 0.19642857142857142, "grad_norm": 7.556028366088867, "learning_rate": 9.150384299543738e-06, "loss": 0.2953, "step": 4235 }, { "epoch": 0.19647495361781075, "grad_norm": 7.9016828536987305, "learning_rate": 9.149973856817384e-06, "loss": 0.3088, "step": 4236 }, { "epoch": 0.19652133580705008, "grad_norm": 11.628388404846191, "learning_rate": 9.149563324184037e-06, "loss": 0.5026, "step": 4237 }, { "epoch": 0.19656771799628941, "grad_norm": 9.877241134643555, "learning_rate": 9.149152701652589e-06, "loss": 0.3609, "step": 4238 }, { "epoch": 0.19661410018552875, "grad_norm": 8.399917602539062, "learning_rate": 9.148741989231935e-06, "loss": 0.2752, "step": 4239 }, { "epoch": 0.19666048237476808, "grad_norm": 7.689035415649414, "learning_rate": 9.148331186930976e-06, "loss": 0.1632, "step": 4240 }, { "epoch": 0.19670686456400743, "grad_norm": 8.322147369384766, "learning_rate": 9.147920294758608e-06, "loss": 0.3199, "step": 4241 }, { "epoch": 0.19675324675324676, "grad_norm": 7.3094801902771, "learning_rate": 9.147509312723735e-06, "loss": 0.4088, "step": 4242 }, { "epoch": 0.1967996289424861, "grad_norm": 9.143739700317383, "learning_rate": 9.14709824083526e-06, "loss": 0.384, "step": 4243 }, { "epoch": 0.19684601113172542, "grad_norm": 8.327105522155762, "learning_rate": 9.14668707910209e-06, "loss": 0.3065, "step": 4244 }, { "epoch": 0.19689239332096475, "grad_norm": 8.910225868225098, "learning_rate": 9.146275827533132e-06, "loss": 0.3366, "step": 4245 }, { "epoch": 0.19693877551020408, "grad_norm": 11.364848136901855, "learning_rate": 9.145864486137292e-06, "loss": 0.3011, "step": 4246 }, { "epoch": 0.19698515769944341, "grad_norm": 8.212836265563965, "learning_rate": 9.145453054923487e-06, "loss": 0.4186, "step": 4247 }, { "epoch": 0.19703153988868274, "grad_norm": 18.548267364501953, "learning_rate": 9.14504153390063e-06, "loss": 0.6223, "step": 4248 }, { "epoch": 0.19707792207792207, "grad_norm": 7.335703372955322, "learning_rate": 9.14462992307763e-06, "loss": 0.3034, "step": 4249 }, { "epoch": 0.1971243042671614, "grad_norm": 8.400747299194336, "learning_rate": 9.144218222463412e-06, "loss": 0.353, "step": 4250 }, { "epoch": 0.19717068645640073, "grad_norm": 4.7246317863464355, "learning_rate": 9.14380643206689e-06, "loss": 0.3497, "step": 4251 }, { "epoch": 0.19721706864564006, "grad_norm": 9.296831130981445, "learning_rate": 9.14339455189699e-06, "loss": 0.3766, "step": 4252 }, { "epoch": 0.1972634508348794, "grad_norm": 9.934972763061523, "learning_rate": 9.14298258196263e-06, "loss": 0.474, "step": 4253 }, { "epoch": 0.19730983302411872, "grad_norm": 7.028653144836426, "learning_rate": 9.142570522272739e-06, "loss": 0.3681, "step": 4254 }, { "epoch": 0.19735621521335808, "grad_norm": 4.495406627655029, "learning_rate": 9.142158372836243e-06, "loss": 0.3677, "step": 4255 }, { "epoch": 0.1974025974025974, "grad_norm": 10.415885925292969, "learning_rate": 9.141746133662069e-06, "loss": 0.4481, "step": 4256 }, { "epoch": 0.19744897959183674, "grad_norm": 7.748898506164551, "learning_rate": 9.14133380475915e-06, "loss": 0.5029, "step": 4257 }, { "epoch": 0.19749536178107607, "grad_norm": 8.166258811950684, "learning_rate": 9.140921386136418e-06, "loss": 0.3905, "step": 4258 }, { "epoch": 0.1975417439703154, "grad_norm": 8.957175254821777, "learning_rate": 9.14050887780281e-06, "loss": 0.3851, "step": 4259 }, { "epoch": 0.19758812615955473, "grad_norm": 5.1901445388793945, "learning_rate": 9.140096279767259e-06, "loss": 0.3442, "step": 4260 }, { "epoch": 0.19763450834879406, "grad_norm": 8.885198593139648, "learning_rate": 9.139683592038707e-06, "loss": 0.3879, "step": 4261 }, { "epoch": 0.1976808905380334, "grad_norm": 5.1843461990356445, "learning_rate": 9.139270814626094e-06, "loss": 0.3231, "step": 4262 }, { "epoch": 0.19772727272727272, "grad_norm": 12.298827171325684, "learning_rate": 9.13885794753836e-06, "loss": 0.4207, "step": 4263 }, { "epoch": 0.19777365491651205, "grad_norm": 6.876817226409912, "learning_rate": 9.138444990784455e-06, "loss": 0.3011, "step": 4264 }, { "epoch": 0.19782003710575138, "grad_norm": 11.58022403717041, "learning_rate": 9.138031944373317e-06, "loss": 0.4759, "step": 4265 }, { "epoch": 0.19786641929499071, "grad_norm": 26.259267807006836, "learning_rate": 9.137618808313904e-06, "loss": 0.4026, "step": 4266 }, { "epoch": 0.19791280148423004, "grad_norm": 9.116877555847168, "learning_rate": 9.13720558261516e-06, "loss": 0.4281, "step": 4267 }, { "epoch": 0.19795918367346937, "grad_norm": 8.982630729675293, "learning_rate": 9.136792267286039e-06, "loss": 0.3496, "step": 4268 }, { "epoch": 0.19800556586270873, "grad_norm": 9.840606689453125, "learning_rate": 9.136378862335496e-06, "loss": 0.3131, "step": 4269 }, { "epoch": 0.19805194805194806, "grad_norm": 7.143320083618164, "learning_rate": 9.135965367772488e-06, "loss": 0.4146, "step": 4270 }, { "epoch": 0.1980983302411874, "grad_norm": 4.642670154571533, "learning_rate": 9.135551783605969e-06, "loss": 0.3238, "step": 4271 }, { "epoch": 0.19814471243042672, "grad_norm": 5.349984169006348, "learning_rate": 9.135138109844904e-06, "loss": 0.3572, "step": 4272 }, { "epoch": 0.19819109461966605, "grad_norm": 5.5584869384765625, "learning_rate": 9.134724346498251e-06, "loss": 0.353, "step": 4273 }, { "epoch": 0.19823747680890538, "grad_norm": 14.400432586669922, "learning_rate": 9.134310493574978e-06, "loss": 0.4182, "step": 4274 }, { "epoch": 0.1982838589981447, "grad_norm": 7.865018844604492, "learning_rate": 9.133896551084048e-06, "loss": 0.4405, "step": 4275 }, { "epoch": 0.19833024118738404, "grad_norm": 7.597966194152832, "learning_rate": 9.133482519034428e-06, "loss": 0.4091, "step": 4276 }, { "epoch": 0.19837662337662337, "grad_norm": 6.551705837249756, "learning_rate": 9.133068397435092e-06, "loss": 0.4587, "step": 4277 }, { "epoch": 0.1984230055658627, "grad_norm": 6.909735679626465, "learning_rate": 9.13265418629501e-06, "loss": 0.4281, "step": 4278 }, { "epoch": 0.19846938775510203, "grad_norm": 5.568824291229248, "learning_rate": 9.13223988562315e-06, "loss": 0.3208, "step": 4279 }, { "epoch": 0.19851576994434136, "grad_norm": 7.332042217254639, "learning_rate": 9.131825495428496e-06, "loss": 0.3618, "step": 4280 }, { "epoch": 0.1985621521335807, "grad_norm": 7.672429084777832, "learning_rate": 9.131411015720022e-06, "loss": 0.3772, "step": 4281 }, { "epoch": 0.19860853432282002, "grad_norm": 5.437365531921387, "learning_rate": 9.130996446506706e-06, "loss": 0.2839, "step": 4282 }, { "epoch": 0.19865491651205938, "grad_norm": 7.641140937805176, "learning_rate": 9.130581787797533e-06, "loss": 0.3327, "step": 4283 }, { "epoch": 0.1987012987012987, "grad_norm": 5.883161544799805, "learning_rate": 9.130167039601481e-06, "loss": 0.2442, "step": 4284 }, { "epoch": 0.19874768089053804, "grad_norm": 6.011423587799072, "learning_rate": 9.129752201927541e-06, "loss": 0.3048, "step": 4285 }, { "epoch": 0.19879406307977737, "grad_norm": 8.44573974609375, "learning_rate": 9.129337274784697e-06, "loss": 0.4695, "step": 4286 }, { "epoch": 0.1988404452690167, "grad_norm": 30.710599899291992, "learning_rate": 9.128922258181938e-06, "loss": 0.4843, "step": 4287 }, { "epoch": 0.19888682745825603, "grad_norm": 7.132471084594727, "learning_rate": 9.128507152128257e-06, "loss": 0.2149, "step": 4288 }, { "epoch": 0.19893320964749536, "grad_norm": 13.933753967285156, "learning_rate": 9.128091956632646e-06, "loss": 0.5514, "step": 4289 }, { "epoch": 0.1989795918367347, "grad_norm": 8.414201736450195, "learning_rate": 9.127676671704102e-06, "loss": 0.4161, "step": 4290 }, { "epoch": 0.19902597402597402, "grad_norm": 6.038534641265869, "learning_rate": 9.127261297351617e-06, "loss": 0.3936, "step": 4291 }, { "epoch": 0.19907235621521335, "grad_norm": 7.460756778717041, "learning_rate": 9.126845833584195e-06, "loss": 0.3776, "step": 4292 }, { "epoch": 0.19911873840445268, "grad_norm": 11.919180870056152, "learning_rate": 9.126430280410833e-06, "loss": 0.4153, "step": 4293 }, { "epoch": 0.199165120593692, "grad_norm": 6.417735576629639, "learning_rate": 9.126014637840536e-06, "loss": 0.3996, "step": 4294 }, { "epoch": 0.19921150278293134, "grad_norm": 4.269655227661133, "learning_rate": 9.125598905882309e-06, "loss": 0.2806, "step": 4295 }, { "epoch": 0.19925788497217067, "grad_norm": 9.549982070922852, "learning_rate": 9.125183084545158e-06, "loss": 0.3948, "step": 4296 }, { "epoch": 0.19930426716141003, "grad_norm": 9.341156005859375, "learning_rate": 9.124767173838089e-06, "loss": 0.4174, "step": 4297 }, { "epoch": 0.19935064935064936, "grad_norm": 6.000349044799805, "learning_rate": 9.124351173770116e-06, "loss": 0.3081, "step": 4298 }, { "epoch": 0.1993970315398887, "grad_norm": 6.867153167724609, "learning_rate": 9.123935084350251e-06, "loss": 0.4733, "step": 4299 }, { "epoch": 0.19944341372912802, "grad_norm": 5.595615863800049, "learning_rate": 9.123518905587507e-06, "loss": 0.3457, "step": 4300 }, { "epoch": 0.19948979591836735, "grad_norm": 9.08625602722168, "learning_rate": 9.1231026374909e-06, "loss": 0.4293, "step": 4301 }, { "epoch": 0.19953617810760668, "grad_norm": 22.95142364501953, "learning_rate": 9.12268628006945e-06, "loss": 0.4165, "step": 4302 }, { "epoch": 0.199582560296846, "grad_norm": 11.056045532226562, "learning_rate": 9.122269833332175e-06, "loss": 0.3559, "step": 4303 }, { "epoch": 0.19962894248608534, "grad_norm": 10.399261474609375, "learning_rate": 9.1218532972881e-06, "loss": 0.3797, "step": 4304 }, { "epoch": 0.19967532467532467, "grad_norm": 6.780184745788574, "learning_rate": 9.121436671946247e-06, "loss": 0.3695, "step": 4305 }, { "epoch": 0.199721706864564, "grad_norm": 8.641016006469727, "learning_rate": 9.121019957315642e-06, "loss": 0.4158, "step": 4306 }, { "epoch": 0.19976808905380333, "grad_norm": 4.787994861602783, "learning_rate": 9.120603153405314e-06, "loss": 0.3841, "step": 4307 }, { "epoch": 0.19981447124304266, "grad_norm": 6.560143947601318, "learning_rate": 9.120186260224291e-06, "loss": 0.4121, "step": 4308 }, { "epoch": 0.199860853432282, "grad_norm": 5.598817825317383, "learning_rate": 9.119769277781606e-06, "loss": 0.3553, "step": 4309 }, { "epoch": 0.19990723562152132, "grad_norm": 6.541743755340576, "learning_rate": 9.119352206086292e-06, "loss": 0.3225, "step": 4310 }, { "epoch": 0.19995361781076068, "grad_norm": 26.45427703857422, "learning_rate": 9.118935045147387e-06, "loss": 0.3034, "step": 4311 }, { "epoch": 0.2, "grad_norm": 5.476614952087402, "learning_rate": 9.118517794973925e-06, "loss": 0.236, "step": 4312 }, { "epoch": 0.2, "eval_loss": 0.3643307387828827, "eval_runtime": 38.0174, "eval_samples_per_second": 45.847, "eval_steps_per_second": 5.734, "step": 4312 }, { "epoch": 0.20004638218923934, "grad_norm": 5.340691566467285, "learning_rate": 9.118100455574948e-06, "loss": 0.3385, "step": 4313 }, { "epoch": 0.20009276437847867, "grad_norm": 5.440051078796387, "learning_rate": 9.117683026959496e-06, "loss": 0.2967, "step": 4314 }, { "epoch": 0.200139146567718, "grad_norm": 6.182126522064209, "learning_rate": 9.117265509136615e-06, "loss": 0.3129, "step": 4315 }, { "epoch": 0.20018552875695733, "grad_norm": 5.030552864074707, "learning_rate": 9.116847902115346e-06, "loss": 0.3013, "step": 4316 }, { "epoch": 0.20023191094619666, "grad_norm": 7.544722080230713, "learning_rate": 9.116430205904741e-06, "loss": 0.372, "step": 4317 }, { "epoch": 0.200278293135436, "grad_norm": 9.23619270324707, "learning_rate": 9.116012420513845e-06, "loss": 0.3849, "step": 4318 }, { "epoch": 0.20032467532467532, "grad_norm": 7.266181468963623, "learning_rate": 9.115594545951711e-06, "loss": 0.258, "step": 4319 }, { "epoch": 0.20037105751391465, "grad_norm": 16.193340301513672, "learning_rate": 9.115176582227393e-06, "loss": 0.543, "step": 4320 }, { "epoch": 0.20041743970315398, "grad_norm": 5.330473899841309, "learning_rate": 9.114758529349943e-06, "loss": 0.3217, "step": 4321 }, { "epoch": 0.2004638218923933, "grad_norm": 5.3258514404296875, "learning_rate": 9.114340387328422e-06, "loss": 0.2658, "step": 4322 }, { "epoch": 0.20051020408163264, "grad_norm": 9.256635665893555, "learning_rate": 9.113922156171885e-06, "loss": 0.2784, "step": 4323 }, { "epoch": 0.20055658627087197, "grad_norm": 8.381485939025879, "learning_rate": 9.113503835889395e-06, "loss": 0.2943, "step": 4324 }, { "epoch": 0.20060296846011133, "grad_norm": 15.825518608093262, "learning_rate": 9.113085426490013e-06, "loss": 0.4866, "step": 4325 }, { "epoch": 0.20064935064935066, "grad_norm": 6.646139144897461, "learning_rate": 9.112666927982807e-06, "loss": 0.377, "step": 4326 }, { "epoch": 0.20069573283859, "grad_norm": 8.483918190002441, "learning_rate": 9.11224834037684e-06, "loss": 0.4139, "step": 4327 }, { "epoch": 0.20074211502782932, "grad_norm": 10.590506553649902, "learning_rate": 9.111829663681182e-06, "loss": 0.4845, "step": 4328 }, { "epoch": 0.20078849721706865, "grad_norm": 9.874958992004395, "learning_rate": 9.111410897904902e-06, "loss": 0.5463, "step": 4329 }, { "epoch": 0.20083487940630798, "grad_norm": 12.386466979980469, "learning_rate": 9.110992043057074e-06, "loss": 0.5235, "step": 4330 }, { "epoch": 0.2008812615955473, "grad_norm": 9.75733470916748, "learning_rate": 9.110573099146772e-06, "loss": 0.4644, "step": 4331 }, { "epoch": 0.20092764378478664, "grad_norm": 19.06218147277832, "learning_rate": 9.11015406618307e-06, "loss": 0.4038, "step": 4332 }, { "epoch": 0.20097402597402597, "grad_norm": 11.648454666137695, "learning_rate": 9.109734944175051e-06, "loss": 0.57, "step": 4333 }, { "epoch": 0.2010204081632653, "grad_norm": 10.98659610748291, "learning_rate": 9.109315733131792e-06, "loss": 0.4352, "step": 4334 }, { "epoch": 0.20106679035250463, "grad_norm": 8.696817398071289, "learning_rate": 9.108896433062373e-06, "loss": 0.3389, "step": 4335 }, { "epoch": 0.20111317254174396, "grad_norm": 10.37982177734375, "learning_rate": 9.10847704397588e-06, "loss": 0.3911, "step": 4336 }, { "epoch": 0.2011595547309833, "grad_norm": 10.364462852478027, "learning_rate": 9.108057565881402e-06, "loss": 0.3291, "step": 4337 }, { "epoch": 0.20120593692022262, "grad_norm": 10.171812057495117, "learning_rate": 9.107637998788019e-06, "loss": 0.4134, "step": 4338 }, { "epoch": 0.20125231910946198, "grad_norm": 13.707184791564941, "learning_rate": 9.107218342704828e-06, "loss": 0.3789, "step": 4339 }, { "epoch": 0.2012987012987013, "grad_norm": 6.477540969848633, "learning_rate": 9.106798597640918e-06, "loss": 0.4114, "step": 4340 }, { "epoch": 0.20134508348794064, "grad_norm": 5.546287536621094, "learning_rate": 9.106378763605382e-06, "loss": 0.3938, "step": 4341 }, { "epoch": 0.20139146567717997, "grad_norm": 5.959407329559326, "learning_rate": 9.105958840607317e-06, "loss": 0.3521, "step": 4342 }, { "epoch": 0.2014378478664193, "grad_norm": 5.0629353523254395, "learning_rate": 9.105538828655817e-06, "loss": 0.3548, "step": 4343 }, { "epoch": 0.20148423005565863, "grad_norm": 8.366875648498535, "learning_rate": 9.105118727759984e-06, "loss": 0.4107, "step": 4344 }, { "epoch": 0.20153061224489796, "grad_norm": 7.282784938812256, "learning_rate": 9.104698537928923e-06, "loss": 0.3701, "step": 4345 }, { "epoch": 0.2015769944341373, "grad_norm": 6.8702921867370605, "learning_rate": 9.104278259171728e-06, "loss": 0.4774, "step": 4346 }, { "epoch": 0.20162337662337662, "grad_norm": 4.891806125640869, "learning_rate": 9.103857891497512e-06, "loss": 0.3803, "step": 4347 }, { "epoch": 0.20166975881261595, "grad_norm": 5.126269340515137, "learning_rate": 9.103437434915378e-06, "loss": 0.3401, "step": 4348 }, { "epoch": 0.20171614100185528, "grad_norm": 9.508248329162598, "learning_rate": 9.103016889434439e-06, "loss": 0.412, "step": 4349 }, { "epoch": 0.2017625231910946, "grad_norm": 5.356814861297607, "learning_rate": 9.1025962550638e-06, "loss": 0.3491, "step": 4350 }, { "epoch": 0.20180890538033394, "grad_norm": 7.5432329177856445, "learning_rate": 9.10217553181258e-06, "loss": 0.2618, "step": 4351 }, { "epoch": 0.20185528756957327, "grad_norm": 5.580086708068848, "learning_rate": 9.101754719689888e-06, "loss": 0.2943, "step": 4352 }, { "epoch": 0.20190166975881263, "grad_norm": 5.848945140838623, "learning_rate": 9.101333818704846e-06, "loss": 0.3271, "step": 4353 }, { "epoch": 0.20194805194805196, "grad_norm": 7.856817245483398, "learning_rate": 9.100912828866568e-06, "loss": 0.3656, "step": 4354 }, { "epoch": 0.2019944341372913, "grad_norm": 9.529833793640137, "learning_rate": 9.100491750184177e-06, "loss": 0.4708, "step": 4355 }, { "epoch": 0.20204081632653062, "grad_norm": 9.08266544342041, "learning_rate": 9.100070582666796e-06, "loss": 0.4033, "step": 4356 }, { "epoch": 0.20208719851576995, "grad_norm": 11.665471076965332, "learning_rate": 9.099649326323547e-06, "loss": 0.3949, "step": 4357 }, { "epoch": 0.20213358070500928, "grad_norm": 7.237819671630859, "learning_rate": 9.099227981163558e-06, "loss": 0.4057, "step": 4358 }, { "epoch": 0.2021799628942486, "grad_norm": 8.300483703613281, "learning_rate": 9.098806547195957e-06, "loss": 0.428, "step": 4359 }, { "epoch": 0.20222634508348794, "grad_norm": 15.745335578918457, "learning_rate": 9.098385024429875e-06, "loss": 0.5587, "step": 4360 }, { "epoch": 0.20227272727272727, "grad_norm": 8.622188568115234, "learning_rate": 9.097963412874444e-06, "loss": 0.3095, "step": 4361 }, { "epoch": 0.2023191094619666, "grad_norm": 13.648703575134277, "learning_rate": 9.097541712538794e-06, "loss": 0.2938, "step": 4362 }, { "epoch": 0.20236549165120593, "grad_norm": 13.685961723327637, "learning_rate": 9.097119923432066e-06, "loss": 0.4222, "step": 4363 }, { "epoch": 0.20241187384044526, "grad_norm": 5.298348426818848, "learning_rate": 9.096698045563396e-06, "loss": 0.4178, "step": 4364 }, { "epoch": 0.2024582560296846, "grad_norm": 7.450130462646484, "learning_rate": 9.096276078941923e-06, "loss": 0.3594, "step": 4365 }, { "epoch": 0.20250463821892392, "grad_norm": 6.679621696472168, "learning_rate": 9.095854023576789e-06, "loss": 0.2985, "step": 4366 }, { "epoch": 0.20255102040816325, "grad_norm": 15.332673072814941, "learning_rate": 9.095431879477139e-06, "loss": 0.6936, "step": 4367 }, { "epoch": 0.2025974025974026, "grad_norm": 8.349851608276367, "learning_rate": 9.095009646652116e-06, "loss": 0.4063, "step": 4368 }, { "epoch": 0.20264378478664194, "grad_norm": 4.736597537994385, "learning_rate": 9.09458732511087e-06, "loss": 0.3365, "step": 4369 }, { "epoch": 0.20269016697588127, "grad_norm": 10.837636947631836, "learning_rate": 9.09416491486255e-06, "loss": 0.4225, "step": 4370 }, { "epoch": 0.2027365491651206, "grad_norm": 14.10335922241211, "learning_rate": 9.093742415916305e-06, "loss": 0.3695, "step": 4371 }, { "epoch": 0.20278293135435993, "grad_norm": 5.028407096862793, "learning_rate": 9.09331982828129e-06, "loss": 0.3083, "step": 4372 }, { "epoch": 0.20282931354359926, "grad_norm": 10.752408981323242, "learning_rate": 9.09289715196666e-06, "loss": 0.4083, "step": 4373 }, { "epoch": 0.2028756957328386, "grad_norm": 7.972959041595459, "learning_rate": 9.092474386981571e-06, "loss": 0.4459, "step": 4374 }, { "epoch": 0.20292207792207792, "grad_norm": 8.45997142791748, "learning_rate": 9.092051533335184e-06, "loss": 0.3274, "step": 4375 }, { "epoch": 0.20296846011131725, "grad_norm": 5.448707103729248, "learning_rate": 9.09162859103666e-06, "loss": 0.3418, "step": 4376 }, { "epoch": 0.20301484230055658, "grad_norm": 5.095795631408691, "learning_rate": 9.09120556009516e-06, "loss": 0.4106, "step": 4377 }, { "epoch": 0.2030612244897959, "grad_norm": 5.5924248695373535, "learning_rate": 9.090782440519849e-06, "loss": 0.3476, "step": 4378 }, { "epoch": 0.20310760667903524, "grad_norm": 6.513180732727051, "learning_rate": 9.090359232319894e-06, "loss": 0.3943, "step": 4379 }, { "epoch": 0.20315398886827457, "grad_norm": 6.2144455909729, "learning_rate": 9.089935935504464e-06, "loss": 0.3954, "step": 4380 }, { "epoch": 0.2032003710575139, "grad_norm": 8.560693740844727, "learning_rate": 9.089512550082728e-06, "loss": 0.3663, "step": 4381 }, { "epoch": 0.20324675324675326, "grad_norm": 7.322731018066406, "learning_rate": 9.089089076063861e-06, "loss": 0.293, "step": 4382 }, { "epoch": 0.2032931354359926, "grad_norm": 6.648344993591309, "learning_rate": 9.088665513457035e-06, "loss": 0.4118, "step": 4383 }, { "epoch": 0.20333951762523192, "grad_norm": 12.08459758758545, "learning_rate": 9.08824186227143e-06, "loss": 0.366, "step": 4384 }, { "epoch": 0.20338589981447125, "grad_norm": 12.053764343261719, "learning_rate": 9.087818122516218e-06, "loss": 0.4481, "step": 4385 }, { "epoch": 0.20343228200371058, "grad_norm": 9.009000778198242, "learning_rate": 9.087394294200584e-06, "loss": 0.3754, "step": 4386 }, { "epoch": 0.2034786641929499, "grad_norm": 5.325361251831055, "learning_rate": 9.086970377333709e-06, "loss": 0.368, "step": 4387 }, { "epoch": 0.20352504638218924, "grad_norm": 20.059085845947266, "learning_rate": 9.086546371924774e-06, "loss": 0.3562, "step": 4388 }, { "epoch": 0.20357142857142857, "grad_norm": 10.435193061828613, "learning_rate": 9.08612227798297e-06, "loss": 0.3014, "step": 4389 }, { "epoch": 0.2036178107606679, "grad_norm": 15.938621520996094, "learning_rate": 9.085698095517481e-06, "loss": 0.3865, "step": 4390 }, { "epoch": 0.20366419294990723, "grad_norm": 26.625356674194336, "learning_rate": 9.085273824537497e-06, "loss": 0.7104, "step": 4391 }, { "epoch": 0.20371057513914656, "grad_norm": 6.391693115234375, "learning_rate": 9.08484946505221e-06, "loss": 0.282, "step": 4392 }, { "epoch": 0.2037569573283859, "grad_norm": 6.917435646057129, "learning_rate": 9.084425017070815e-06, "loss": 0.3581, "step": 4393 }, { "epoch": 0.20380333951762522, "grad_norm": 5.7324700355529785, "learning_rate": 9.084000480602506e-06, "loss": 0.3478, "step": 4394 }, { "epoch": 0.20384972170686455, "grad_norm": 18.857086181640625, "learning_rate": 9.083575855656482e-06, "loss": 0.293, "step": 4395 }, { "epoch": 0.2038961038961039, "grad_norm": 5.56373405456543, "learning_rate": 9.083151142241939e-06, "loss": 0.3325, "step": 4396 }, { "epoch": 0.20394248608534324, "grad_norm": 6.345362186431885, "learning_rate": 9.082726340368082e-06, "loss": 0.3311, "step": 4397 }, { "epoch": 0.20398886827458257, "grad_norm": 8.098514556884766, "learning_rate": 9.082301450044111e-06, "loss": 0.4367, "step": 4398 }, { "epoch": 0.2040352504638219, "grad_norm": 5.741435527801514, "learning_rate": 9.081876471279231e-06, "loss": 0.2417, "step": 4399 }, { "epoch": 0.20408163265306123, "grad_norm": 17.895816802978516, "learning_rate": 9.081451404082653e-06, "loss": 0.4572, "step": 4400 }, { "epoch": 0.20412801484230056, "grad_norm": 7.0713653564453125, "learning_rate": 9.08102624846358e-06, "loss": 0.3635, "step": 4401 }, { "epoch": 0.2041743970315399, "grad_norm": 4.881788730621338, "learning_rate": 9.08060100443123e-06, "loss": 0.24, "step": 4402 }, { "epoch": 0.20422077922077922, "grad_norm": 14.49062442779541, "learning_rate": 9.080175671994808e-06, "loss": 0.3673, "step": 4403 }, { "epoch": 0.20426716141001855, "grad_norm": 4.8907318115234375, "learning_rate": 9.079750251163533e-06, "loss": 0.3737, "step": 4404 }, { "epoch": 0.20431354359925788, "grad_norm": 7.269566059112549, "learning_rate": 9.079324741946621e-06, "loss": 0.3537, "step": 4405 }, { "epoch": 0.2043599257884972, "grad_norm": 12.784070014953613, "learning_rate": 9.07889914435329e-06, "loss": 0.3345, "step": 4406 }, { "epoch": 0.20440630797773654, "grad_norm": 13.313289642333984, "learning_rate": 9.07847345839276e-06, "loss": 0.4377, "step": 4407 }, { "epoch": 0.20445269016697587, "grad_norm": 9.037408828735352, "learning_rate": 9.078047684074254e-06, "loss": 0.4835, "step": 4408 }, { "epoch": 0.2044990723562152, "grad_norm": 6.805792331695557, "learning_rate": 9.077621821406997e-06, "loss": 0.4376, "step": 4409 }, { "epoch": 0.20454545454545456, "grad_norm": 6.3961029052734375, "learning_rate": 9.077195870400213e-06, "loss": 0.3142, "step": 4410 }, { "epoch": 0.2045918367346939, "grad_norm": 10.597763061523438, "learning_rate": 9.07676983106313e-06, "loss": 0.4224, "step": 4411 }, { "epoch": 0.20463821892393322, "grad_norm": 6.217561721801758, "learning_rate": 9.076343703404981e-06, "loss": 0.3071, "step": 4412 }, { "epoch": 0.20468460111317255, "grad_norm": 8.79317569732666, "learning_rate": 9.075917487434995e-06, "loss": 0.4627, "step": 4413 }, { "epoch": 0.20473098330241188, "grad_norm": 6.584141731262207, "learning_rate": 9.075491183162405e-06, "loss": 0.3042, "step": 4414 }, { "epoch": 0.2047773654916512, "grad_norm": 6.981025218963623, "learning_rate": 9.075064790596449e-06, "loss": 0.3883, "step": 4415 }, { "epoch": 0.20482374768089054, "grad_norm": 6.665295124053955, "learning_rate": 9.074638309746364e-06, "loss": 0.3518, "step": 4416 }, { "epoch": 0.20487012987012987, "grad_norm": 8.220443725585938, "learning_rate": 9.074211740621388e-06, "loss": 0.4067, "step": 4417 }, { "epoch": 0.2049165120593692, "grad_norm": 9.631556510925293, "learning_rate": 9.073785083230765e-06, "loss": 0.3622, "step": 4418 }, { "epoch": 0.20496289424860853, "grad_norm": 5.421600818634033, "learning_rate": 9.073358337583736e-06, "loss": 0.3235, "step": 4419 }, { "epoch": 0.20500927643784786, "grad_norm": 8.810172080993652, "learning_rate": 9.072931503689549e-06, "loss": 0.3563, "step": 4420 }, { "epoch": 0.2050556586270872, "grad_norm": 12.109829902648926, "learning_rate": 9.072504581557447e-06, "loss": 0.4617, "step": 4421 }, { "epoch": 0.20510204081632652, "grad_norm": 11.290298461914062, "learning_rate": 9.072077571196681e-06, "loss": 0.3457, "step": 4422 }, { "epoch": 0.20514842300556585, "grad_norm": 7.358119487762451, "learning_rate": 9.071650472616505e-06, "loss": 0.3407, "step": 4423 }, { "epoch": 0.2051948051948052, "grad_norm": 10.980154991149902, "learning_rate": 9.071223285826166e-06, "loss": 0.501, "step": 4424 }, { "epoch": 0.20524118738404454, "grad_norm": 9.29425048828125, "learning_rate": 9.070796010834924e-06, "loss": 0.4143, "step": 4425 }, { "epoch": 0.20528756957328387, "grad_norm": 7.246909141540527, "learning_rate": 9.070368647652031e-06, "loss": 0.4258, "step": 4426 }, { "epoch": 0.2053339517625232, "grad_norm": 4.91298770904541, "learning_rate": 9.069941196286751e-06, "loss": 0.3925, "step": 4427 }, { "epoch": 0.20538033395176253, "grad_norm": 11.216337203979492, "learning_rate": 9.06951365674834e-06, "loss": 0.4835, "step": 4428 }, { "epoch": 0.20542671614100186, "grad_norm": 6.343753814697266, "learning_rate": 9.069086029046062e-06, "loss": 0.3586, "step": 4429 }, { "epoch": 0.2054730983302412, "grad_norm": 6.709954738616943, "learning_rate": 9.068658313189182e-06, "loss": 0.3709, "step": 4430 }, { "epoch": 0.20551948051948052, "grad_norm": 8.80737590789795, "learning_rate": 9.068230509186966e-06, "loss": 0.3381, "step": 4431 }, { "epoch": 0.20556586270871985, "grad_norm": 3.8935654163360596, "learning_rate": 9.067802617048681e-06, "loss": 0.2225, "step": 4432 }, { "epoch": 0.20561224489795918, "grad_norm": 5.388129234313965, "learning_rate": 9.067374636783597e-06, "loss": 0.292, "step": 4433 }, { "epoch": 0.2056586270871985, "grad_norm": 8.385149002075195, "learning_rate": 9.066946568400989e-06, "loss": 0.3806, "step": 4434 }, { "epoch": 0.20570500927643784, "grad_norm": 5.629362106323242, "learning_rate": 9.066518411910128e-06, "loss": 0.3226, "step": 4435 }, { "epoch": 0.20575139146567717, "grad_norm": 6.446146011352539, "learning_rate": 9.06609016732029e-06, "loss": 0.4332, "step": 4436 }, { "epoch": 0.2057977736549165, "grad_norm": 10.446371078491211, "learning_rate": 9.065661834640754e-06, "loss": 0.4532, "step": 4437 }, { "epoch": 0.20584415584415586, "grad_norm": 7.584158897399902, "learning_rate": 9.065233413880797e-06, "loss": 0.3995, "step": 4438 }, { "epoch": 0.2058905380333952, "grad_norm": 5.643247604370117, "learning_rate": 9.064804905049704e-06, "loss": 0.3316, "step": 4439 }, { "epoch": 0.20593692022263452, "grad_norm": 10.180736541748047, "learning_rate": 9.064376308156754e-06, "loss": 0.4225, "step": 4440 }, { "epoch": 0.20598330241187385, "grad_norm": 7.683657169342041, "learning_rate": 9.063947623211238e-06, "loss": 0.2632, "step": 4441 }, { "epoch": 0.20602968460111318, "grad_norm": 10.322540283203125, "learning_rate": 9.06351885022244e-06, "loss": 0.3752, "step": 4442 }, { "epoch": 0.2060760667903525, "grad_norm": 8.003693580627441, "learning_rate": 9.06308998919965e-06, "loss": 0.2767, "step": 4443 }, { "epoch": 0.20612244897959184, "grad_norm": 5.141932010650635, "learning_rate": 9.062661040152156e-06, "loss": 0.326, "step": 4444 }, { "epoch": 0.20616883116883117, "grad_norm": 9.160114288330078, "learning_rate": 9.062232003089255e-06, "loss": 0.3225, "step": 4445 }, { "epoch": 0.2062152133580705, "grad_norm": 9.747364044189453, "learning_rate": 9.061802878020239e-06, "loss": 0.3734, "step": 4446 }, { "epoch": 0.20626159554730983, "grad_norm": 6.577299118041992, "learning_rate": 9.061373664954407e-06, "loss": 0.2805, "step": 4447 }, { "epoch": 0.20630797773654916, "grad_norm": 6.629884243011475, "learning_rate": 9.060944363901057e-06, "loss": 0.3207, "step": 4448 }, { "epoch": 0.2063543599257885, "grad_norm": 4.71614933013916, "learning_rate": 9.060514974869488e-06, "loss": 0.3125, "step": 4449 }, { "epoch": 0.20640074211502782, "grad_norm": 7.143789291381836, "learning_rate": 9.060085497869004e-06, "loss": 0.4213, "step": 4450 }, { "epoch": 0.20644712430426715, "grad_norm": 15.762077331542969, "learning_rate": 9.059655932908911e-06, "loss": 0.4086, "step": 4451 }, { "epoch": 0.2064935064935065, "grad_norm": 7.975466251373291, "learning_rate": 9.059226279998512e-06, "loss": 0.3392, "step": 4452 }, { "epoch": 0.20653988868274584, "grad_norm": 16.935583114624023, "learning_rate": 9.058796539147116e-06, "loss": 0.3153, "step": 4453 }, { "epoch": 0.20658627087198517, "grad_norm": 9.811349868774414, "learning_rate": 9.058366710364035e-06, "loss": 0.2849, "step": 4454 }, { "epoch": 0.2066326530612245, "grad_norm": 8.108723640441895, "learning_rate": 9.05793679365858e-06, "loss": 0.3165, "step": 4455 }, { "epoch": 0.20667903525046383, "grad_norm": 12.776420593261719, "learning_rate": 9.057506789040063e-06, "loss": 0.4269, "step": 4456 }, { "epoch": 0.20672541743970316, "grad_norm": 7.331071376800537, "learning_rate": 9.057076696517804e-06, "loss": 0.4715, "step": 4457 }, { "epoch": 0.2067717996289425, "grad_norm": 10.491442680358887, "learning_rate": 9.056646516101117e-06, "loss": 0.3838, "step": 4458 }, { "epoch": 0.20681818181818182, "grad_norm": 7.892411231994629, "learning_rate": 9.056216247799321e-06, "loss": 0.4233, "step": 4459 }, { "epoch": 0.20686456400742115, "grad_norm": 7.445043087005615, "learning_rate": 9.055785891621742e-06, "loss": 0.3978, "step": 4460 }, { "epoch": 0.20691094619666048, "grad_norm": 6.562314987182617, "learning_rate": 9.0553554475777e-06, "loss": 0.3273, "step": 4461 }, { "epoch": 0.2069573283858998, "grad_norm": 6.598990440368652, "learning_rate": 9.054924915676522e-06, "loss": 0.5128, "step": 4462 }, { "epoch": 0.20700371057513914, "grad_norm": 9.758688926696777, "learning_rate": 9.054494295927533e-06, "loss": 0.2896, "step": 4463 }, { "epoch": 0.20705009276437847, "grad_norm": 7.292151927947998, "learning_rate": 9.054063588340065e-06, "loss": 0.252, "step": 4464 }, { "epoch": 0.2070964749536178, "grad_norm": 11.326153755187988, "learning_rate": 9.053632792923446e-06, "loss": 0.4109, "step": 4465 }, { "epoch": 0.20714285714285716, "grad_norm": 10.437353134155273, "learning_rate": 9.053201909687012e-06, "loss": 0.365, "step": 4466 }, { "epoch": 0.2071892393320965, "grad_norm": 8.064574241638184, "learning_rate": 9.052770938640096e-06, "loss": 0.3775, "step": 4467 }, { "epoch": 0.20723562152133582, "grad_norm": 9.35800552368164, "learning_rate": 9.052339879792036e-06, "loss": 0.3616, "step": 4468 }, { "epoch": 0.20728200371057515, "grad_norm": 14.404386520385742, "learning_rate": 9.051908733152169e-06, "loss": 0.4957, "step": 4469 }, { "epoch": 0.20732838589981448, "grad_norm": 13.610822677612305, "learning_rate": 9.051477498729837e-06, "loss": 0.4914, "step": 4470 }, { "epoch": 0.2073747680890538, "grad_norm": 5.408411979675293, "learning_rate": 9.051046176534381e-06, "loss": 0.3779, "step": 4471 }, { "epoch": 0.20742115027829314, "grad_norm": 6.71986722946167, "learning_rate": 9.050614766575147e-06, "loss": 0.4104, "step": 4472 }, { "epoch": 0.20746753246753247, "grad_norm": 10.601140022277832, "learning_rate": 9.05018326886148e-06, "loss": 0.4874, "step": 4473 }, { "epoch": 0.2075139146567718, "grad_norm": 16.74728012084961, "learning_rate": 9.049751683402728e-06, "loss": 0.5108, "step": 4474 }, { "epoch": 0.20756029684601113, "grad_norm": 7.059424877166748, "learning_rate": 9.049320010208244e-06, "loss": 0.3592, "step": 4475 }, { "epoch": 0.20760667903525046, "grad_norm": 4.462340831756592, "learning_rate": 9.048888249287376e-06, "loss": 0.3997, "step": 4476 }, { "epoch": 0.2076530612244898, "grad_norm": 11.75732707977295, "learning_rate": 9.048456400649482e-06, "loss": 0.4228, "step": 4477 }, { "epoch": 0.20769944341372912, "grad_norm": 5.1492109298706055, "learning_rate": 9.048024464303913e-06, "loss": 0.2726, "step": 4478 }, { "epoch": 0.20774582560296845, "grad_norm": 6.854251384735107, "learning_rate": 9.047592440260029e-06, "loss": 0.3655, "step": 4479 }, { "epoch": 0.2077922077922078, "grad_norm": 7.549292087554932, "learning_rate": 9.047160328527191e-06, "loss": 0.5198, "step": 4480 }, { "epoch": 0.20783858998144714, "grad_norm": 5.906645774841309, "learning_rate": 9.04672812911476e-06, "loss": 0.4429, "step": 4481 }, { "epoch": 0.20788497217068647, "grad_norm": 4.235157012939453, "learning_rate": 9.046295842032095e-06, "loss": 0.3049, "step": 4482 }, { "epoch": 0.2079313543599258, "grad_norm": 7.427152633666992, "learning_rate": 9.045863467288568e-06, "loss": 0.3845, "step": 4483 }, { "epoch": 0.20797773654916513, "grad_norm": 7.423494815826416, "learning_rate": 9.045431004893541e-06, "loss": 0.4247, "step": 4484 }, { "epoch": 0.20802411873840446, "grad_norm": 15.095953941345215, "learning_rate": 9.044998454856386e-06, "loss": 0.3902, "step": 4485 }, { "epoch": 0.2080705009276438, "grad_norm": 10.388871192932129, "learning_rate": 9.044565817186473e-06, "loss": 0.3872, "step": 4486 }, { "epoch": 0.20811688311688312, "grad_norm": 8.601607322692871, "learning_rate": 9.044133091893175e-06, "loss": 0.4034, "step": 4487 }, { "epoch": 0.20816326530612245, "grad_norm": 7.267849445343018, "learning_rate": 9.043700278985867e-06, "loss": 0.3693, "step": 4488 }, { "epoch": 0.20820964749536178, "grad_norm": 6.3974995613098145, "learning_rate": 9.043267378473923e-06, "loss": 0.3473, "step": 4489 }, { "epoch": 0.2082560296846011, "grad_norm": 8.989727020263672, "learning_rate": 9.042834390366725e-06, "loss": 0.2586, "step": 4490 }, { "epoch": 0.20830241187384044, "grad_norm": 7.250370502471924, "learning_rate": 9.042401314673654e-06, "loss": 0.3482, "step": 4491 }, { "epoch": 0.20834879406307977, "grad_norm": 5.164369583129883, "learning_rate": 9.04196815140409e-06, "loss": 0.2162, "step": 4492 }, { "epoch": 0.2083951762523191, "grad_norm": 9.716873168945312, "learning_rate": 9.041534900567416e-06, "loss": 0.3895, "step": 4493 }, { "epoch": 0.20844155844155843, "grad_norm": 5.711949825286865, "learning_rate": 9.041101562173023e-06, "loss": 0.3053, "step": 4494 }, { "epoch": 0.20848794063079779, "grad_norm": 10.23173999786377, "learning_rate": 9.040668136230295e-06, "loss": 0.4517, "step": 4495 }, { "epoch": 0.20853432282003712, "grad_norm": 7.240836143493652, "learning_rate": 9.040234622748621e-06, "loss": 0.3538, "step": 4496 }, { "epoch": 0.20858070500927645, "grad_norm": 10.770426750183105, "learning_rate": 9.039801021737399e-06, "loss": 0.4586, "step": 4497 }, { "epoch": 0.20862708719851578, "grad_norm": 4.626760005950928, "learning_rate": 9.039367333206016e-06, "loss": 0.2766, "step": 4498 }, { "epoch": 0.2086734693877551, "grad_norm": 9.39961051940918, "learning_rate": 9.03893355716387e-06, "loss": 0.4219, "step": 4499 }, { "epoch": 0.20871985157699444, "grad_norm": 8.255375862121582, "learning_rate": 9.038499693620358e-06, "loss": 0.45, "step": 4500 }, { "epoch": 0.20876623376623377, "grad_norm": 8.202654838562012, "learning_rate": 9.038065742584881e-06, "loss": 0.3986, "step": 4501 }, { "epoch": 0.2088126159554731, "grad_norm": 7.930265426635742, "learning_rate": 9.03763170406684e-06, "loss": 0.286, "step": 4502 }, { "epoch": 0.20885899814471243, "grad_norm": 9.425943374633789, "learning_rate": 9.037197578075638e-06, "loss": 0.4387, "step": 4503 }, { "epoch": 0.20890538033395176, "grad_norm": 6.243587493896484, "learning_rate": 9.03676336462068e-06, "loss": 0.3852, "step": 4504 }, { "epoch": 0.2089517625231911, "grad_norm": 8.39057731628418, "learning_rate": 9.036329063711373e-06, "loss": 0.2893, "step": 4505 }, { "epoch": 0.20899814471243042, "grad_norm": 12.302547454833984, "learning_rate": 9.035894675357124e-06, "loss": 0.4585, "step": 4506 }, { "epoch": 0.20904452690166975, "grad_norm": 10.817858695983887, "learning_rate": 9.035460199567348e-06, "loss": 0.3163, "step": 4507 }, { "epoch": 0.20909090909090908, "grad_norm": 7.591307640075684, "learning_rate": 9.035025636351453e-06, "loss": 0.3497, "step": 4508 }, { "epoch": 0.20913729128014844, "grad_norm": 9.699825286865234, "learning_rate": 9.034590985718859e-06, "loss": 0.3561, "step": 4509 }, { "epoch": 0.20918367346938777, "grad_norm": 5.155811786651611, "learning_rate": 9.034156247678975e-06, "loss": 0.3673, "step": 4510 }, { "epoch": 0.2092300556586271, "grad_norm": 8.044986724853516, "learning_rate": 9.033721422241227e-06, "loss": 0.3594, "step": 4511 }, { "epoch": 0.20927643784786643, "grad_norm": 7.626506328582764, "learning_rate": 9.03328650941503e-06, "loss": 0.3548, "step": 4512 }, { "epoch": 0.20932282003710576, "grad_norm": 8.569232940673828, "learning_rate": 9.03285150920981e-06, "loss": 0.4375, "step": 4513 }, { "epoch": 0.20936920222634509, "grad_norm": 7.211613178253174, "learning_rate": 9.032416421634989e-06, "loss": 0.3717, "step": 4514 }, { "epoch": 0.20941558441558442, "grad_norm": 6.0437140464782715, "learning_rate": 9.031981246699991e-06, "loss": 0.3699, "step": 4515 }, { "epoch": 0.20946196660482375, "grad_norm": 12.245404243469238, "learning_rate": 9.031545984414247e-06, "loss": 0.3982, "step": 4516 }, { "epoch": 0.20950834879406308, "grad_norm": 12.816804885864258, "learning_rate": 9.031110634787185e-06, "loss": 0.4756, "step": 4517 }, { "epoch": 0.2095547309833024, "grad_norm": 8.46778678894043, "learning_rate": 9.03067519782824e-06, "loss": 0.4177, "step": 4518 }, { "epoch": 0.20960111317254174, "grad_norm": 5.894327640533447, "learning_rate": 9.030239673546841e-06, "loss": 0.3745, "step": 4519 }, { "epoch": 0.20964749536178107, "grad_norm": 4.7513651847839355, "learning_rate": 9.029804061952426e-06, "loss": 0.426, "step": 4520 }, { "epoch": 0.2096938775510204, "grad_norm": 4.340932369232178, "learning_rate": 9.02936836305443e-06, "loss": 0.3511, "step": 4521 }, { "epoch": 0.20974025974025973, "grad_norm": 5.157323360443115, "learning_rate": 9.028932576862294e-06, "loss": 0.3152, "step": 4522 }, { "epoch": 0.20978664192949908, "grad_norm": 9.178534507751465, "learning_rate": 9.028496703385459e-06, "loss": 0.3576, "step": 4523 }, { "epoch": 0.20983302411873841, "grad_norm": 9.409279823303223, "learning_rate": 9.028060742633368e-06, "loss": 0.3765, "step": 4524 }, { "epoch": 0.20987940630797774, "grad_norm": 6.63694953918457, "learning_rate": 9.027624694615464e-06, "loss": 0.3412, "step": 4525 }, { "epoch": 0.20992578849721708, "grad_norm": 4.485083103179932, "learning_rate": 9.027188559341198e-06, "loss": 0.4267, "step": 4526 }, { "epoch": 0.2099721706864564, "grad_norm": 16.13921356201172, "learning_rate": 9.026752336820016e-06, "loss": 0.4136, "step": 4527 }, { "epoch": 0.21001855287569574, "grad_norm": 13.428851127624512, "learning_rate": 9.026316027061365e-06, "loss": 0.3394, "step": 4528 }, { "epoch": 0.21006493506493507, "grad_norm": 5.024357795715332, "learning_rate": 9.025879630074704e-06, "loss": 0.4075, "step": 4529 }, { "epoch": 0.2101113172541744, "grad_norm": 8.482527732849121, "learning_rate": 9.025443145869483e-06, "loss": 0.4757, "step": 4530 }, { "epoch": 0.21015769944341373, "grad_norm": 4.201209545135498, "learning_rate": 9.02500657445516e-06, "loss": 0.3429, "step": 4531 }, { "epoch": 0.21020408163265306, "grad_norm": 5.215038299560547, "learning_rate": 9.024569915841193e-06, "loss": 0.4628, "step": 4532 }, { "epoch": 0.21025046382189239, "grad_norm": 10.627257347106934, "learning_rate": 9.02413317003704e-06, "loss": 0.4338, "step": 4533 }, { "epoch": 0.21029684601113172, "grad_norm": 7.602200508117676, "learning_rate": 9.023696337052166e-06, "loss": 0.4399, "step": 4534 }, { "epoch": 0.21034322820037105, "grad_norm": 6.60173225402832, "learning_rate": 9.023259416896034e-06, "loss": 0.3434, "step": 4535 }, { "epoch": 0.21038961038961038, "grad_norm": 9.180002212524414, "learning_rate": 9.022822409578106e-06, "loss": 0.432, "step": 4536 }, { "epoch": 0.21043599257884973, "grad_norm": 6.127184867858887, "learning_rate": 9.022385315107853e-06, "loss": 0.2906, "step": 4537 }, { "epoch": 0.21048237476808906, "grad_norm": 4.993959426879883, "learning_rate": 9.021948133494744e-06, "loss": 0.3731, "step": 4538 }, { "epoch": 0.2105287569573284, "grad_norm": 9.025038719177246, "learning_rate": 9.02151086474825e-06, "loss": 0.423, "step": 4539 }, { "epoch": 0.21057513914656772, "grad_norm": 6.709224224090576, "learning_rate": 9.021073508877845e-06, "loss": 0.4551, "step": 4540 }, { "epoch": 0.21062152133580705, "grad_norm": 7.376088619232178, "learning_rate": 9.020636065893003e-06, "loss": 0.3703, "step": 4541 }, { "epoch": 0.21066790352504638, "grad_norm": 8.452442169189453, "learning_rate": 9.020198535803201e-06, "loss": 0.4382, "step": 4542 }, { "epoch": 0.21071428571428572, "grad_norm": 8.082258224487305, "learning_rate": 9.01976091861792e-06, "loss": 0.2847, "step": 4543 }, { "epoch": 0.21076066790352505, "grad_norm": 6.026214122772217, "learning_rate": 9.019323214346635e-06, "loss": 0.3382, "step": 4544 }, { "epoch": 0.21080705009276438, "grad_norm": 8.966155052185059, "learning_rate": 9.018885422998835e-06, "loss": 0.4035, "step": 4545 }, { "epoch": 0.2108534322820037, "grad_norm": 5.42690372467041, "learning_rate": 9.018447544584004e-06, "loss": 0.4532, "step": 4546 }, { "epoch": 0.21089981447124304, "grad_norm": 5.595372676849365, "learning_rate": 9.018009579111624e-06, "loss": 0.3228, "step": 4547 }, { "epoch": 0.21094619666048237, "grad_norm": 6.7832255363464355, "learning_rate": 9.017571526591185e-06, "loss": 0.4605, "step": 4548 }, { "epoch": 0.2109925788497217, "grad_norm": 6.426490306854248, "learning_rate": 9.01713338703218e-06, "loss": 0.3665, "step": 4549 }, { "epoch": 0.21103896103896103, "grad_norm": 8.617286682128906, "learning_rate": 9.016695160444097e-06, "loss": 0.4664, "step": 4550 }, { "epoch": 0.21108534322820038, "grad_norm": 6.622670650482178, "learning_rate": 9.016256846836434e-06, "loss": 0.3277, "step": 4551 }, { "epoch": 0.21113172541743971, "grad_norm": 7.467658519744873, "learning_rate": 9.015818446218683e-06, "loss": 0.3729, "step": 4552 }, { "epoch": 0.21117810760667904, "grad_norm": 5.768959999084473, "learning_rate": 9.015379958600344e-06, "loss": 0.4045, "step": 4553 }, { "epoch": 0.21122448979591837, "grad_norm": 6.852146625518799, "learning_rate": 9.014941383990916e-06, "loss": 0.3506, "step": 4554 }, { "epoch": 0.2112708719851577, "grad_norm": 4.883603096008301, "learning_rate": 9.0145027223999e-06, "loss": 0.3576, "step": 4555 }, { "epoch": 0.21131725417439703, "grad_norm": 4.902010917663574, "learning_rate": 9.014063973836801e-06, "loss": 0.3457, "step": 4556 }, { "epoch": 0.21136363636363636, "grad_norm": 4.977878093719482, "learning_rate": 9.013625138311124e-06, "loss": 0.3628, "step": 4557 }, { "epoch": 0.2114100185528757, "grad_norm": 5.402626037597656, "learning_rate": 9.013186215832374e-06, "loss": 0.2825, "step": 4558 }, { "epoch": 0.21145640074211502, "grad_norm": 4.185169219970703, "learning_rate": 9.012747206410062e-06, "loss": 0.353, "step": 4559 }, { "epoch": 0.21150278293135436, "grad_norm": 9.783507347106934, "learning_rate": 9.012308110053699e-06, "loss": 0.3276, "step": 4560 }, { "epoch": 0.21154916512059369, "grad_norm": 15.27508544921875, "learning_rate": 9.011868926772795e-06, "loss": 0.5383, "step": 4561 }, { "epoch": 0.21159554730983302, "grad_norm": 8.778165817260742, "learning_rate": 9.011429656576868e-06, "loss": 0.4028, "step": 4562 }, { "epoch": 0.21164192949907235, "grad_norm": 7.8694915771484375, "learning_rate": 9.010990299475432e-06, "loss": 0.445, "step": 4563 }, { "epoch": 0.21168831168831168, "grad_norm": 5.1016998291015625, "learning_rate": 9.01055085547801e-06, "loss": 0.2771, "step": 4564 }, { "epoch": 0.21173469387755103, "grad_norm": 9.889738082885742, "learning_rate": 9.010111324594115e-06, "loss": 0.3094, "step": 4565 }, { "epoch": 0.21178107606679036, "grad_norm": 4.8669843673706055, "learning_rate": 9.009671706833275e-06, "loss": 0.3466, "step": 4566 }, { "epoch": 0.2118274582560297, "grad_norm": 8.284692764282227, "learning_rate": 9.009232002205012e-06, "loss": 0.4662, "step": 4567 }, { "epoch": 0.21187384044526902, "grad_norm": 7.047654151916504, "learning_rate": 9.008792210718854e-06, "loss": 0.2801, "step": 4568 }, { "epoch": 0.21192022263450835, "grad_norm": 5.2135701179504395, "learning_rate": 9.008352332384326e-06, "loss": 0.3068, "step": 4569 }, { "epoch": 0.21196660482374768, "grad_norm": 11.481973648071289, "learning_rate": 9.007912367210958e-06, "loss": 0.4898, "step": 4570 }, { "epoch": 0.21201298701298701, "grad_norm": 6.383413791656494, "learning_rate": 9.007472315208283e-06, "loss": 0.4155, "step": 4571 }, { "epoch": 0.21205936920222634, "grad_norm": 5.923857688903809, "learning_rate": 9.007032176385836e-06, "loss": 0.3501, "step": 4572 }, { "epoch": 0.21210575139146567, "grad_norm": 10.538372039794922, "learning_rate": 9.006591950753148e-06, "loss": 0.4035, "step": 4573 }, { "epoch": 0.212152133580705, "grad_norm": 5.7953009605407715, "learning_rate": 9.00615163831976e-06, "loss": 0.3205, "step": 4574 }, { "epoch": 0.21219851576994433, "grad_norm": 7.846291542053223, "learning_rate": 9.00571123909521e-06, "loss": 0.3702, "step": 4575 }, { "epoch": 0.21224489795918366, "grad_norm": 16.245027542114258, "learning_rate": 9.005270753089038e-06, "loss": 0.3502, "step": 4576 }, { "epoch": 0.212291280148423, "grad_norm": 3.839092254638672, "learning_rate": 9.004830180310786e-06, "loss": 0.2948, "step": 4577 }, { "epoch": 0.21233766233766233, "grad_norm": 9.014863967895508, "learning_rate": 9.004389520770003e-06, "loss": 0.4149, "step": 4578 }, { "epoch": 0.21238404452690168, "grad_norm": 9.448513984680176, "learning_rate": 9.003948774476232e-06, "loss": 0.4495, "step": 4579 }, { "epoch": 0.212430426716141, "grad_norm": 11.488373756408691, "learning_rate": 9.003507941439023e-06, "loss": 0.3976, "step": 4580 }, { "epoch": 0.21247680890538034, "grad_norm": 5.229945659637451, "learning_rate": 9.003067021667926e-06, "loss": 0.373, "step": 4581 }, { "epoch": 0.21252319109461967, "grad_norm": 4.768326759338379, "learning_rate": 9.002626015172493e-06, "loss": 0.2886, "step": 4582 }, { "epoch": 0.212569573283859, "grad_norm": 4.675800800323486, "learning_rate": 9.002184921962281e-06, "loss": 0.2443, "step": 4583 }, { "epoch": 0.21261595547309833, "grad_norm": 8.517631530761719, "learning_rate": 9.00174374204684e-06, "loss": 0.3379, "step": 4584 }, { "epoch": 0.21266233766233766, "grad_norm": 8.873661994934082, "learning_rate": 9.001302475435734e-06, "loss": 0.4205, "step": 4585 }, { "epoch": 0.212708719851577, "grad_norm": 11.99337100982666, "learning_rate": 9.000861122138518e-06, "loss": 0.3675, "step": 4586 }, { "epoch": 0.21275510204081632, "grad_norm": 8.332409858703613, "learning_rate": 9.000419682164758e-06, "loss": 0.4417, "step": 4587 }, { "epoch": 0.21280148423005565, "grad_norm": 5.912045001983643, "learning_rate": 8.999978155524014e-06, "loss": 0.3073, "step": 4588 }, { "epoch": 0.21284786641929498, "grad_norm": 8.603184700012207, "learning_rate": 8.999536542225855e-06, "loss": 0.3212, "step": 4589 }, { "epoch": 0.21289424860853431, "grad_norm": 5.588308334350586, "learning_rate": 8.999094842279846e-06, "loss": 0.3679, "step": 4590 }, { "epoch": 0.21294063079777364, "grad_norm": 6.816889762878418, "learning_rate": 8.998653055695556e-06, "loss": 0.341, "step": 4591 }, { "epoch": 0.21298701298701297, "grad_norm": 12.646439552307129, "learning_rate": 8.998211182482557e-06, "loss": 0.3677, "step": 4592 }, { "epoch": 0.21303339517625233, "grad_norm": 6.961002826690674, "learning_rate": 8.99776922265042e-06, "loss": 0.3266, "step": 4593 }, { "epoch": 0.21307977736549166, "grad_norm": 6.982672214508057, "learning_rate": 8.997327176208723e-06, "loss": 0.3627, "step": 4594 }, { "epoch": 0.213126159554731, "grad_norm": 10.63470458984375, "learning_rate": 8.996885043167041e-06, "loss": 0.4044, "step": 4595 }, { "epoch": 0.21317254174397032, "grad_norm": 9.94924259185791, "learning_rate": 8.996442823534953e-06, "loss": 0.4364, "step": 4596 }, { "epoch": 0.21321892393320965, "grad_norm": 9.14416790008545, "learning_rate": 8.996000517322037e-06, "loss": 0.3212, "step": 4597 }, { "epoch": 0.21326530612244898, "grad_norm": 10.66522216796875, "learning_rate": 8.99555812453788e-06, "loss": 0.3961, "step": 4598 }, { "epoch": 0.2133116883116883, "grad_norm": 5.355692386627197, "learning_rate": 8.995115645192063e-06, "loss": 0.3657, "step": 4599 }, { "epoch": 0.21335807050092764, "grad_norm": 5.143002986907959, "learning_rate": 8.994673079294171e-06, "loss": 0.2471, "step": 4600 }, { "epoch": 0.21340445269016697, "grad_norm": 8.547355651855469, "learning_rate": 8.994230426853795e-06, "loss": 0.3259, "step": 4601 }, { "epoch": 0.2134508348794063, "grad_norm": 8.160365104675293, "learning_rate": 8.993787687880525e-06, "loss": 0.3403, "step": 4602 }, { "epoch": 0.21349721706864563, "grad_norm": 7.187836170196533, "learning_rate": 8.993344862383948e-06, "loss": 0.3743, "step": 4603 }, { "epoch": 0.21354359925788496, "grad_norm": 8.12452507019043, "learning_rate": 8.992901950373663e-06, "loss": 0.4066, "step": 4604 }, { "epoch": 0.2135899814471243, "grad_norm": 5.816075801849365, "learning_rate": 8.992458951859264e-06, "loss": 0.2561, "step": 4605 }, { "epoch": 0.21363636363636362, "grad_norm": 9.00903034210205, "learning_rate": 8.992015866850346e-06, "loss": 0.2677, "step": 4606 }, { "epoch": 0.21368274582560298, "grad_norm": 7.3577375411987305, "learning_rate": 8.99157269535651e-06, "loss": 0.2976, "step": 4607 }, { "epoch": 0.2137291280148423, "grad_norm": 6.01020622253418, "learning_rate": 8.991129437387357e-06, "loss": 0.2777, "step": 4608 }, { "epoch": 0.21377551020408164, "grad_norm": 8.115180969238281, "learning_rate": 8.99068609295249e-06, "loss": 0.4363, "step": 4609 }, { "epoch": 0.21382189239332097, "grad_norm": 14.498950958251953, "learning_rate": 8.990242662061515e-06, "loss": 0.3616, "step": 4610 }, { "epoch": 0.2138682745825603, "grad_norm": 4.82216739654541, "learning_rate": 8.989799144724035e-06, "loss": 0.3504, "step": 4611 }, { "epoch": 0.21391465677179963, "grad_norm": 6.962738037109375, "learning_rate": 8.989355540949663e-06, "loss": 0.3203, "step": 4612 }, { "epoch": 0.21396103896103896, "grad_norm": 8.054535865783691, "learning_rate": 8.988911850748006e-06, "loss": 0.2375, "step": 4613 }, { "epoch": 0.2140074211502783, "grad_norm": 10.262262344360352, "learning_rate": 8.988468074128677e-06, "loss": 0.3972, "step": 4614 }, { "epoch": 0.21405380333951762, "grad_norm": 5.688897132873535, "learning_rate": 8.988024211101292e-06, "loss": 0.3729, "step": 4615 }, { "epoch": 0.21410018552875695, "grad_norm": 8.925195693969727, "learning_rate": 8.987580261675466e-06, "loss": 0.4701, "step": 4616 }, { "epoch": 0.21414656771799628, "grad_norm": 6.4035749435424805, "learning_rate": 8.987136225860818e-06, "loss": 0.3164, "step": 4617 }, { "epoch": 0.2141929499072356, "grad_norm": 7.310822486877441, "learning_rate": 8.986692103666964e-06, "loss": 0.2371, "step": 4618 }, { "epoch": 0.21423933209647494, "grad_norm": 10.086380004882812, "learning_rate": 8.98624789510353e-06, "loss": 0.3519, "step": 4619 }, { "epoch": 0.21428571428571427, "grad_norm": 25.176782608032227, "learning_rate": 8.98580360018014e-06, "loss": 0.5125, "step": 4620 }, { "epoch": 0.2143320964749536, "grad_norm": 11.525350570678711, "learning_rate": 8.985359218906414e-06, "loss": 0.3854, "step": 4621 }, { "epoch": 0.21437847866419296, "grad_norm": 6.836849212646484, "learning_rate": 8.984914751291982e-06, "loss": 0.2899, "step": 4622 }, { "epoch": 0.2144248608534323, "grad_norm": 21.595983505249023, "learning_rate": 8.984470197346476e-06, "loss": 0.518, "step": 4623 }, { "epoch": 0.21447124304267162, "grad_norm": 16.971923828125, "learning_rate": 8.984025557079523e-06, "loss": 0.4851, "step": 4624 }, { "epoch": 0.21451762523191095, "grad_norm": 14.08719253540039, "learning_rate": 8.98358083050076e-06, "loss": 0.5177, "step": 4625 }, { "epoch": 0.21456400742115028, "grad_norm": 5.841560363769531, "learning_rate": 8.983136017619817e-06, "loss": 0.1726, "step": 4626 }, { "epoch": 0.2146103896103896, "grad_norm": 7.994672775268555, "learning_rate": 8.982691118446334e-06, "loss": 0.3796, "step": 4627 }, { "epoch": 0.21465677179962894, "grad_norm": 6.084538459777832, "learning_rate": 8.982246132989949e-06, "loss": 0.3228, "step": 4628 }, { "epoch": 0.21470315398886827, "grad_norm": 11.016231536865234, "learning_rate": 8.981801061260303e-06, "loss": 0.3671, "step": 4629 }, { "epoch": 0.2147495361781076, "grad_norm": 5.048189640045166, "learning_rate": 8.981355903267035e-06, "loss": 0.3394, "step": 4630 }, { "epoch": 0.21479591836734693, "grad_norm": 9.544883728027344, "learning_rate": 8.980910659019793e-06, "loss": 0.3429, "step": 4631 }, { "epoch": 0.21484230055658626, "grad_norm": 13.924182891845703, "learning_rate": 8.98046532852822e-06, "loss": 0.594, "step": 4632 }, { "epoch": 0.2148886827458256, "grad_norm": 7.096821308135986, "learning_rate": 8.980019911801964e-06, "loss": 0.3571, "step": 4633 }, { "epoch": 0.21493506493506492, "grad_norm": 12.366703987121582, "learning_rate": 8.97957440885068e-06, "loss": 0.4724, "step": 4634 }, { "epoch": 0.21498144712430425, "grad_norm": 18.911500930786133, "learning_rate": 8.97912881968401e-06, "loss": 0.4159, "step": 4635 }, { "epoch": 0.2150278293135436, "grad_norm": 17.821754455566406, "learning_rate": 8.978683144311617e-06, "loss": 0.4888, "step": 4636 }, { "epoch": 0.21507421150278294, "grad_norm": 8.249853134155273, "learning_rate": 8.978237382743151e-06, "loss": 0.4263, "step": 4637 }, { "epoch": 0.21512059369202227, "grad_norm": 7.4716715812683105, "learning_rate": 8.97779153498827e-06, "loss": 0.3672, "step": 4638 }, { "epoch": 0.2151669758812616, "grad_norm": 8.141136169433594, "learning_rate": 8.977345601056635e-06, "loss": 0.26, "step": 4639 }, { "epoch": 0.21521335807050093, "grad_norm": 7.6721649169921875, "learning_rate": 8.976899580957904e-06, "loss": 0.3069, "step": 4640 }, { "epoch": 0.21525974025974026, "grad_norm": 5.973554611206055, "learning_rate": 8.97645347470174e-06, "loss": 0.3824, "step": 4641 }, { "epoch": 0.2153061224489796, "grad_norm": 11.520037651062012, "learning_rate": 8.976007282297812e-06, "loss": 0.4949, "step": 4642 }, { "epoch": 0.21535250463821892, "grad_norm": 11.743578910827637, "learning_rate": 8.975561003755782e-06, "loss": 0.4097, "step": 4643 }, { "epoch": 0.21539888682745825, "grad_norm": 9.801916122436523, "learning_rate": 8.975114639085318e-06, "loss": 0.4256, "step": 4644 }, { "epoch": 0.21544526901669758, "grad_norm": 7.934720993041992, "learning_rate": 8.974668188296095e-06, "loss": 0.3136, "step": 4645 }, { "epoch": 0.2154916512059369, "grad_norm": 13.671958923339844, "learning_rate": 8.974221651397778e-06, "loss": 0.4303, "step": 4646 }, { "epoch": 0.21553803339517624, "grad_norm": 6.731056213378906, "learning_rate": 8.97377502840005e-06, "loss": 0.3345, "step": 4647 }, { "epoch": 0.21558441558441557, "grad_norm": 10.162240028381348, "learning_rate": 8.973328319312577e-06, "loss": 0.4344, "step": 4648 }, { "epoch": 0.2156307977736549, "grad_norm": 10.803829193115234, "learning_rate": 8.972881524145045e-06, "loss": 0.4235, "step": 4649 }, { "epoch": 0.21567717996289426, "grad_norm": 7.816615104675293, "learning_rate": 8.97243464290713e-06, "loss": 0.2875, "step": 4650 }, { "epoch": 0.2157235621521336, "grad_norm": 4.4593186378479, "learning_rate": 8.971987675608513e-06, "loss": 0.3298, "step": 4651 }, { "epoch": 0.21576994434137292, "grad_norm": 5.051895618438721, "learning_rate": 8.971540622258878e-06, "loss": 0.3375, "step": 4652 }, { "epoch": 0.21581632653061225, "grad_norm": 13.036535263061523, "learning_rate": 8.97109348286791e-06, "loss": 0.4926, "step": 4653 }, { "epoch": 0.21586270871985158, "grad_norm": 4.806154727935791, "learning_rate": 8.970646257445298e-06, "loss": 0.3457, "step": 4654 }, { "epoch": 0.2159090909090909, "grad_norm": 17.651762008666992, "learning_rate": 8.970198946000726e-06, "loss": 0.3854, "step": 4655 }, { "epoch": 0.21595547309833024, "grad_norm": 8.224531173706055, "learning_rate": 8.969751548543891e-06, "loss": 0.3724, "step": 4656 }, { "epoch": 0.21600185528756957, "grad_norm": 8.96833610534668, "learning_rate": 8.96930406508448e-06, "loss": 0.3979, "step": 4657 }, { "epoch": 0.2160482374768089, "grad_norm": 6.50739049911499, "learning_rate": 8.96885649563219e-06, "loss": 0.3758, "step": 4658 }, { "epoch": 0.21609461966604823, "grad_norm": 6.268558979034424, "learning_rate": 8.968408840196721e-06, "loss": 0.4108, "step": 4659 }, { "epoch": 0.21614100185528756, "grad_norm": 6.420322418212891, "learning_rate": 8.967961098787763e-06, "loss": 0.338, "step": 4660 }, { "epoch": 0.2161873840445269, "grad_norm": 12.074728012084961, "learning_rate": 8.967513271415024e-06, "loss": 0.4675, "step": 4661 }, { "epoch": 0.21623376623376622, "grad_norm": 20.613086700439453, "learning_rate": 8.9670653580882e-06, "loss": 0.4944, "step": 4662 }, { "epoch": 0.21628014842300555, "grad_norm": 6.964514255523682, "learning_rate": 8.966617358817e-06, "loss": 0.3171, "step": 4663 }, { "epoch": 0.2163265306122449, "grad_norm": 6.388608455657959, "learning_rate": 8.966169273611125e-06, "loss": 0.3016, "step": 4664 }, { "epoch": 0.21637291280148424, "grad_norm": 4.316862106323242, "learning_rate": 8.965721102480287e-06, "loss": 0.3238, "step": 4665 }, { "epoch": 0.21641929499072357, "grad_norm": 8.95700740814209, "learning_rate": 8.965272845434192e-06, "loss": 0.3551, "step": 4666 }, { "epoch": 0.2164656771799629, "grad_norm": 6.665661334991455, "learning_rate": 8.964824502482552e-06, "loss": 0.4078, "step": 4667 }, { "epoch": 0.21651205936920223, "grad_norm": 4.771523952484131, "learning_rate": 8.964376073635079e-06, "loss": 0.3968, "step": 4668 }, { "epoch": 0.21655844155844156, "grad_norm": 7.663930416107178, "learning_rate": 8.963927558901491e-06, "loss": 0.3675, "step": 4669 }, { "epoch": 0.2166048237476809, "grad_norm": 4.66069221496582, "learning_rate": 8.963478958291502e-06, "loss": 0.3523, "step": 4670 }, { "epoch": 0.21665120593692022, "grad_norm": 5.832301616668701, "learning_rate": 8.963030271814832e-06, "loss": 0.3248, "step": 4671 }, { "epoch": 0.21669758812615955, "grad_norm": 10.096776008605957, "learning_rate": 8.962581499481201e-06, "loss": 0.3499, "step": 4672 }, { "epoch": 0.21674397031539888, "grad_norm": 8.213166236877441, "learning_rate": 8.962132641300334e-06, "loss": 0.3783, "step": 4673 }, { "epoch": 0.2167903525046382, "grad_norm": 6.966603755950928, "learning_rate": 8.961683697281953e-06, "loss": 0.3186, "step": 4674 }, { "epoch": 0.21683673469387754, "grad_norm": 11.887578010559082, "learning_rate": 8.961234667435784e-06, "loss": 0.4056, "step": 4675 }, { "epoch": 0.21688311688311687, "grad_norm": 6.081905841827393, "learning_rate": 8.960785551771554e-06, "loss": 0.4122, "step": 4676 }, { "epoch": 0.2169294990723562, "grad_norm": 4.07222843170166, "learning_rate": 8.960336350298995e-06, "loss": 0.3669, "step": 4677 }, { "epoch": 0.21697588126159556, "grad_norm": 7.5163421630859375, "learning_rate": 8.959887063027837e-06, "loss": 0.3402, "step": 4678 }, { "epoch": 0.2170222634508349, "grad_norm": 6.08029842376709, "learning_rate": 8.959437689967815e-06, "loss": 0.3359, "step": 4679 }, { "epoch": 0.21706864564007422, "grad_norm": 10.026299476623535, "learning_rate": 8.958988231128665e-06, "loss": 0.4474, "step": 4680 }, { "epoch": 0.21711502782931355, "grad_norm": 20.205381393432617, "learning_rate": 8.95853868652012e-06, "loss": 0.2986, "step": 4681 }, { "epoch": 0.21716141001855288, "grad_norm": 7.695411682128906, "learning_rate": 8.958089056151925e-06, "loss": 0.3951, "step": 4682 }, { "epoch": 0.2172077922077922, "grad_norm": 5.581291198730469, "learning_rate": 8.957639340033817e-06, "loss": 0.2606, "step": 4683 }, { "epoch": 0.21725417439703154, "grad_norm": 8.42507266998291, "learning_rate": 8.957189538175542e-06, "loss": 0.3551, "step": 4684 }, { "epoch": 0.21730055658627087, "grad_norm": 11.439802169799805, "learning_rate": 8.95673965058684e-06, "loss": 0.6153, "step": 4685 }, { "epoch": 0.2173469387755102, "grad_norm": 7.167774200439453, "learning_rate": 8.956289677277462e-06, "loss": 0.3091, "step": 4686 }, { "epoch": 0.21739332096474953, "grad_norm": 7.796206951141357, "learning_rate": 8.955839618257154e-06, "loss": 0.4322, "step": 4687 }, { "epoch": 0.21743970315398886, "grad_norm": 7.980612754821777, "learning_rate": 8.955389473535669e-06, "loss": 0.4039, "step": 4688 }, { "epoch": 0.2174860853432282, "grad_norm": 13.072613716125488, "learning_rate": 8.954939243122755e-06, "loss": 0.3415, "step": 4689 }, { "epoch": 0.21753246753246752, "grad_norm": 5.57443904876709, "learning_rate": 8.954488927028171e-06, "loss": 0.3778, "step": 4690 }, { "epoch": 0.21757884972170685, "grad_norm": 5.392963409423828, "learning_rate": 8.954038525261668e-06, "loss": 0.4426, "step": 4691 }, { "epoch": 0.2176252319109462, "grad_norm": 9.145360946655273, "learning_rate": 8.953588037833007e-06, "loss": 0.4037, "step": 4692 }, { "epoch": 0.21767161410018554, "grad_norm": 8.970038414001465, "learning_rate": 8.953137464751947e-06, "loss": 0.4614, "step": 4693 }, { "epoch": 0.21771799628942487, "grad_norm": 8.324956893920898, "learning_rate": 8.952686806028246e-06, "loss": 0.3269, "step": 4694 }, { "epoch": 0.2177643784786642, "grad_norm": 10.149004936218262, "learning_rate": 8.952236061671673e-06, "loss": 0.4728, "step": 4695 }, { "epoch": 0.21781076066790353, "grad_norm": 6.575514793395996, "learning_rate": 8.95178523169199e-06, "loss": 0.3563, "step": 4696 }, { "epoch": 0.21785714285714286, "grad_norm": 5.664851665496826, "learning_rate": 8.951334316098964e-06, "loss": 0.3614, "step": 4697 }, { "epoch": 0.2179035250463822, "grad_norm": 8.317560195922852, "learning_rate": 8.950883314902363e-06, "loss": 0.3946, "step": 4698 }, { "epoch": 0.21794990723562152, "grad_norm": 7.97291374206543, "learning_rate": 8.950432228111959e-06, "loss": 0.4337, "step": 4699 }, { "epoch": 0.21799628942486085, "grad_norm": 5.467984199523926, "learning_rate": 8.949981055737526e-06, "loss": 0.3851, "step": 4700 }, { "epoch": 0.21804267161410018, "grad_norm": 8.366501808166504, "learning_rate": 8.949529797788836e-06, "loss": 0.3627, "step": 4701 }, { "epoch": 0.2180890538033395, "grad_norm": 7.1914496421813965, "learning_rate": 8.949078454275666e-06, "loss": 0.3338, "step": 4702 }, { "epoch": 0.21813543599257884, "grad_norm": 14.640104293823242, "learning_rate": 8.948627025207795e-06, "loss": 0.4006, "step": 4703 }, { "epoch": 0.21818181818181817, "grad_norm": 10.882369995117188, "learning_rate": 8.948175510595001e-06, "loss": 0.5223, "step": 4704 }, { "epoch": 0.2182282003710575, "grad_norm": 12.130036354064941, "learning_rate": 8.947723910447067e-06, "loss": 0.436, "step": 4705 }, { "epoch": 0.21827458256029686, "grad_norm": 8.064545631408691, "learning_rate": 8.947272224773777e-06, "loss": 0.3848, "step": 4706 }, { "epoch": 0.2183209647495362, "grad_norm": 5.60015344619751, "learning_rate": 8.946820453584917e-06, "loss": 0.3419, "step": 4707 }, { "epoch": 0.21836734693877552, "grad_norm": 7.116881370544434, "learning_rate": 8.946368596890273e-06, "loss": 0.3401, "step": 4708 }, { "epoch": 0.21841372912801485, "grad_norm": 5.214982986450195, "learning_rate": 8.945916654699633e-06, "loss": 0.3874, "step": 4709 }, { "epoch": 0.21846011131725418, "grad_norm": 17.2553768157959, "learning_rate": 8.945464627022793e-06, "loss": 0.4654, "step": 4710 }, { "epoch": 0.2185064935064935, "grad_norm": 12.916769027709961, "learning_rate": 8.945012513869542e-06, "loss": 0.3168, "step": 4711 }, { "epoch": 0.21855287569573284, "grad_norm": 6.800549030303955, "learning_rate": 8.944560315249676e-06, "loss": 0.4015, "step": 4712 }, { "epoch": 0.21859925788497217, "grad_norm": 4.356716156005859, "learning_rate": 8.94410803117299e-06, "loss": 0.2457, "step": 4713 }, { "epoch": 0.2186456400742115, "grad_norm": 7.881803035736084, "learning_rate": 8.943655661649284e-06, "loss": 0.4148, "step": 4714 }, { "epoch": 0.21869202226345083, "grad_norm": 4.767059326171875, "learning_rate": 8.943203206688358e-06, "loss": 0.3514, "step": 4715 }, { "epoch": 0.21873840445269016, "grad_norm": 9.101734161376953, "learning_rate": 8.942750666300015e-06, "loss": 0.4462, "step": 4716 }, { "epoch": 0.2187847866419295, "grad_norm": 6.896411418914795, "learning_rate": 8.942298040494059e-06, "loss": 0.3841, "step": 4717 }, { "epoch": 0.21883116883116882, "grad_norm": 6.140946388244629, "learning_rate": 8.941845329280293e-06, "loss": 0.241, "step": 4718 }, { "epoch": 0.21887755102040815, "grad_norm": 5.5856032371521, "learning_rate": 8.94139253266853e-06, "loss": 0.3777, "step": 4719 }, { "epoch": 0.2189239332096475, "grad_norm": 6.937580585479736, "learning_rate": 8.940939650668575e-06, "loss": 0.4298, "step": 4720 }, { "epoch": 0.21897031539888684, "grad_norm": 6.046900272369385, "learning_rate": 8.940486683290242e-06, "loss": 0.3659, "step": 4721 }, { "epoch": 0.21901669758812617, "grad_norm": 4.854815483093262, "learning_rate": 8.940033630543341e-06, "loss": 0.3746, "step": 4722 }, { "epoch": 0.2190630797773655, "grad_norm": 12.837692260742188, "learning_rate": 8.939580492437691e-06, "loss": 0.4603, "step": 4723 }, { "epoch": 0.21910946196660483, "grad_norm": 6.985339164733887, "learning_rate": 8.93912726898311e-06, "loss": 0.3976, "step": 4724 }, { "epoch": 0.21915584415584416, "grad_norm": 10.25035572052002, "learning_rate": 8.938673960189411e-06, "loss": 0.3563, "step": 4725 }, { "epoch": 0.2192022263450835, "grad_norm": 10.9679536819458, "learning_rate": 8.93822056606642e-06, "loss": 0.2996, "step": 4726 }, { "epoch": 0.21924860853432282, "grad_norm": 5.119958400726318, "learning_rate": 8.937767086623956e-06, "loss": 0.2529, "step": 4727 }, { "epoch": 0.21929499072356215, "grad_norm": 10.394858360290527, "learning_rate": 8.937313521871846e-06, "loss": 0.4538, "step": 4728 }, { "epoch": 0.21934137291280148, "grad_norm": 10.077132225036621, "learning_rate": 8.936859871819917e-06, "loss": 0.3055, "step": 4729 }, { "epoch": 0.2193877551020408, "grad_norm": 4.503937721252441, "learning_rate": 8.936406136477993e-06, "loss": 0.3753, "step": 4730 }, { "epoch": 0.21943413729128014, "grad_norm": 6.99869966506958, "learning_rate": 8.935952315855907e-06, "loss": 0.3772, "step": 4731 }, { "epoch": 0.21948051948051947, "grad_norm": 12.224483489990234, "learning_rate": 8.93549840996349e-06, "loss": 0.485, "step": 4732 }, { "epoch": 0.2195269016697588, "grad_norm": 21.72043228149414, "learning_rate": 8.935044418810574e-06, "loss": 0.325, "step": 4733 }, { "epoch": 0.21957328385899816, "grad_norm": 5.323511123657227, "learning_rate": 8.934590342407e-06, "loss": 0.2909, "step": 4734 }, { "epoch": 0.2196196660482375, "grad_norm": 5.427466869354248, "learning_rate": 8.934136180762597e-06, "loss": 0.369, "step": 4735 }, { "epoch": 0.21966604823747682, "grad_norm": 6.7100629806518555, "learning_rate": 8.933681933887213e-06, "loss": 0.2739, "step": 4736 }, { "epoch": 0.21971243042671615, "grad_norm": 6.908677577972412, "learning_rate": 8.933227601790681e-06, "loss": 0.3625, "step": 4737 }, { "epoch": 0.21975881261595548, "grad_norm": 4.540355682373047, "learning_rate": 8.93277318448285e-06, "loss": 0.3483, "step": 4738 }, { "epoch": 0.2198051948051948, "grad_norm": 5.310037612915039, "learning_rate": 8.932318681973561e-06, "loss": 0.3265, "step": 4739 }, { "epoch": 0.21985157699443414, "grad_norm": 3.8454856872558594, "learning_rate": 8.931864094272663e-06, "loss": 0.3433, "step": 4740 }, { "epoch": 0.21989795918367347, "grad_norm": 11.105977058410645, "learning_rate": 8.931409421390003e-06, "loss": 0.3959, "step": 4741 }, { "epoch": 0.2199443413729128, "grad_norm": 5.304581642150879, "learning_rate": 8.930954663335432e-06, "loss": 0.3081, "step": 4742 }, { "epoch": 0.21999072356215213, "grad_norm": 11.611924171447754, "learning_rate": 8.9304998201188e-06, "loss": 0.3951, "step": 4743 }, { "epoch": 0.22003710575139146, "grad_norm": 6.274210453033447, "learning_rate": 8.930044891749962e-06, "loss": 0.4288, "step": 4744 }, { "epoch": 0.2200834879406308, "grad_norm": 5.041075706481934, "learning_rate": 8.929589878238778e-06, "loss": 0.3755, "step": 4745 }, { "epoch": 0.22012987012987012, "grad_norm": 4.835516452789307, "learning_rate": 8.929134779595098e-06, "loss": 0.3479, "step": 4746 }, { "epoch": 0.22017625231910945, "grad_norm": 8.334017753601074, "learning_rate": 8.928679595828787e-06, "loss": 0.4085, "step": 4747 }, { "epoch": 0.2202226345083488, "grad_norm": 8.967647552490234, "learning_rate": 8.928224326949704e-06, "loss": 0.4083, "step": 4748 }, { "epoch": 0.22026901669758814, "grad_norm": 9.127791404724121, "learning_rate": 8.927768972967714e-06, "loss": 0.3412, "step": 4749 }, { "epoch": 0.22031539888682747, "grad_norm": 7.131044387817383, "learning_rate": 8.92731353389268e-06, "loss": 0.3962, "step": 4750 }, { "epoch": 0.2203617810760668, "grad_norm": 5.126603603363037, "learning_rate": 8.92685800973447e-06, "loss": 0.2509, "step": 4751 }, { "epoch": 0.22040816326530613, "grad_norm": 4.773247718811035, "learning_rate": 8.926402400502953e-06, "loss": 0.3252, "step": 4752 }, { "epoch": 0.22045454545454546, "grad_norm": 4.8956451416015625, "learning_rate": 8.925946706207998e-06, "loss": 0.3658, "step": 4753 }, { "epoch": 0.2205009276437848, "grad_norm": 6.762062072753906, "learning_rate": 8.925490926859479e-06, "loss": 0.3596, "step": 4754 }, { "epoch": 0.22054730983302412, "grad_norm": 7.796882152557373, "learning_rate": 8.92503506246727e-06, "loss": 0.4527, "step": 4755 }, { "epoch": 0.22059369202226345, "grad_norm": 7.200868129730225, "learning_rate": 8.924579113041245e-06, "loss": 0.2957, "step": 4756 }, { "epoch": 0.22064007421150278, "grad_norm": 5.847177505493164, "learning_rate": 8.924123078591284e-06, "loss": 0.3215, "step": 4757 }, { "epoch": 0.2206864564007421, "grad_norm": 6.404829025268555, "learning_rate": 8.923666959127269e-06, "loss": 0.2788, "step": 4758 }, { "epoch": 0.22073283858998144, "grad_norm": 5.768311500549316, "learning_rate": 8.923210754659075e-06, "loss": 0.3265, "step": 4759 }, { "epoch": 0.22077922077922077, "grad_norm": 8.203597068786621, "learning_rate": 8.922754465196591e-06, "loss": 0.3575, "step": 4760 }, { "epoch": 0.2208256029684601, "grad_norm": 9.745584487915039, "learning_rate": 8.922298090749701e-06, "loss": 0.3938, "step": 4761 }, { "epoch": 0.22087198515769943, "grad_norm": 7.765432357788086, "learning_rate": 8.921841631328291e-06, "loss": 0.3508, "step": 4762 }, { "epoch": 0.2209183673469388, "grad_norm": 11.099061965942383, "learning_rate": 8.921385086942251e-06, "loss": 0.3656, "step": 4763 }, { "epoch": 0.22096474953617812, "grad_norm": 5.890514850616455, "learning_rate": 8.92092845760147e-06, "loss": 0.296, "step": 4764 }, { "epoch": 0.22101113172541745, "grad_norm": 13.267755508422852, "learning_rate": 8.920471743315844e-06, "loss": 0.4267, "step": 4765 }, { "epoch": 0.22105751391465678, "grad_norm": 15.259363174438477, "learning_rate": 8.920014944095265e-06, "loss": 0.6926, "step": 4766 }, { "epoch": 0.2211038961038961, "grad_norm": 5.204401969909668, "learning_rate": 8.91955805994963e-06, "loss": 0.2842, "step": 4767 }, { "epoch": 0.22115027829313544, "grad_norm": 8.014059066772461, "learning_rate": 8.919101090888836e-06, "loss": 0.364, "step": 4768 }, { "epoch": 0.22119666048237477, "grad_norm": 6.232207775115967, "learning_rate": 8.918644036922783e-06, "loss": 0.3519, "step": 4769 }, { "epoch": 0.2212430426716141, "grad_norm": 6.9543232917785645, "learning_rate": 8.918186898061377e-06, "loss": 0.3321, "step": 4770 }, { "epoch": 0.22128942486085343, "grad_norm": 19.14679718017578, "learning_rate": 8.917729674314517e-06, "loss": 0.5108, "step": 4771 }, { "epoch": 0.22133580705009276, "grad_norm": 14.222450256347656, "learning_rate": 8.91727236569211e-06, "loss": 0.3929, "step": 4772 }, { "epoch": 0.2213821892393321, "grad_norm": 9.723995208740234, "learning_rate": 8.916814972204062e-06, "loss": 0.3044, "step": 4773 }, { "epoch": 0.22142857142857142, "grad_norm": 4.153388500213623, "learning_rate": 8.916357493860284e-06, "loss": 0.2814, "step": 4774 }, { "epoch": 0.22147495361781075, "grad_norm": 8.276947975158691, "learning_rate": 8.91589993067069e-06, "loss": 0.3631, "step": 4775 }, { "epoch": 0.22152133580705008, "grad_norm": 6.645641803741455, "learning_rate": 8.915442282645183e-06, "loss": 0.3565, "step": 4776 }, { "epoch": 0.22156771799628944, "grad_norm": 10.005952835083008, "learning_rate": 8.91498454979369e-06, "loss": 0.4315, "step": 4777 }, { "epoch": 0.22161410018552877, "grad_norm": 9.971116065979004, "learning_rate": 8.914526732126118e-06, "loss": 0.4726, "step": 4778 }, { "epoch": 0.2216604823747681, "grad_norm": 7.1625447273254395, "learning_rate": 8.914068829652391e-06, "loss": 0.3813, "step": 4779 }, { "epoch": 0.22170686456400743, "grad_norm": 7.944698333740234, "learning_rate": 8.913610842382428e-06, "loss": 0.3317, "step": 4780 }, { "epoch": 0.22175324675324676, "grad_norm": 11.848055839538574, "learning_rate": 8.913152770326148e-06, "loss": 0.4761, "step": 4781 }, { "epoch": 0.2217996289424861, "grad_norm": 14.415000915527344, "learning_rate": 8.912694613493477e-06, "loss": 0.6061, "step": 4782 }, { "epoch": 0.22184601113172542, "grad_norm": 5.890816688537598, "learning_rate": 8.912236371894343e-06, "loss": 0.3493, "step": 4783 }, { "epoch": 0.22189239332096475, "grad_norm": 11.357436180114746, "learning_rate": 8.91177804553867e-06, "loss": 0.4636, "step": 4784 }, { "epoch": 0.22193877551020408, "grad_norm": 5.54958438873291, "learning_rate": 8.911319634436391e-06, "loss": 0.3329, "step": 4785 }, { "epoch": 0.2219851576994434, "grad_norm": 8.298007011413574, "learning_rate": 8.910861138597433e-06, "loss": 0.32, "step": 4786 }, { "epoch": 0.22203153988868274, "grad_norm": 9.814260482788086, "learning_rate": 8.910402558031732e-06, "loss": 0.4841, "step": 4787 }, { "epoch": 0.22207792207792207, "grad_norm": 5.642449378967285, "learning_rate": 8.909943892749221e-06, "loss": 0.4084, "step": 4788 }, { "epoch": 0.2221243042671614, "grad_norm": 4.895956516265869, "learning_rate": 8.90948514275984e-06, "loss": 0.2472, "step": 4789 }, { "epoch": 0.22217068645640073, "grad_norm": 5.8062663078308105, "learning_rate": 8.909026308073523e-06, "loss": 0.3176, "step": 4790 }, { "epoch": 0.2222170686456401, "grad_norm": 12.30709171295166, "learning_rate": 8.908567388700214e-06, "loss": 0.5194, "step": 4791 }, { "epoch": 0.22226345083487942, "grad_norm": 7.963207721710205, "learning_rate": 8.908108384649856e-06, "loss": 0.3312, "step": 4792 }, { "epoch": 0.22230983302411875, "grad_norm": 7.243757247924805, "learning_rate": 8.907649295932388e-06, "loss": 0.3745, "step": 4793 }, { "epoch": 0.22235621521335808, "grad_norm": 8.81664752960205, "learning_rate": 8.90719012255776e-06, "loss": 0.38, "step": 4794 }, { "epoch": 0.2224025974025974, "grad_norm": 6.048609256744385, "learning_rate": 8.90673086453592e-06, "loss": 0.3816, "step": 4795 }, { "epoch": 0.22244897959183674, "grad_norm": 12.242450714111328, "learning_rate": 8.906271521876815e-06, "loss": 0.5037, "step": 4796 }, { "epoch": 0.22249536178107607, "grad_norm": 9.173819541931152, "learning_rate": 8.905812094590398e-06, "loss": 0.3643, "step": 4797 }, { "epoch": 0.2225417439703154, "grad_norm": 12.963942527770996, "learning_rate": 8.905352582686622e-06, "loss": 0.4208, "step": 4798 }, { "epoch": 0.22258812615955473, "grad_norm": 4.55687952041626, "learning_rate": 8.904892986175444e-06, "loss": 0.2765, "step": 4799 }, { "epoch": 0.22263450834879406, "grad_norm": 4.950169563293457, "learning_rate": 8.904433305066818e-06, "loss": 0.3192, "step": 4800 }, { "epoch": 0.2226808905380334, "grad_norm": 6.373635292053223, "learning_rate": 8.903973539370702e-06, "loss": 0.2275, "step": 4801 }, { "epoch": 0.22272727272727272, "grad_norm": 8.303792953491211, "learning_rate": 8.90351368909706e-06, "loss": 0.4673, "step": 4802 }, { "epoch": 0.22277365491651205, "grad_norm": 10.693490982055664, "learning_rate": 8.903053754255854e-06, "loss": 0.5686, "step": 4803 }, { "epoch": 0.22282003710575138, "grad_norm": 4.852739334106445, "learning_rate": 8.902593734857046e-06, "loss": 0.3153, "step": 4804 }, { "epoch": 0.22286641929499074, "grad_norm": 7.705358028411865, "learning_rate": 8.902133630910603e-06, "loss": 0.3456, "step": 4805 }, { "epoch": 0.22291280148423007, "grad_norm": 16.272598266601562, "learning_rate": 8.901673442426492e-06, "loss": 0.4271, "step": 4806 }, { "epoch": 0.2229591836734694, "grad_norm": 7.171494007110596, "learning_rate": 8.901213169414685e-06, "loss": 0.2808, "step": 4807 }, { "epoch": 0.22300556586270873, "grad_norm": 3.9637413024902344, "learning_rate": 8.900752811885152e-06, "loss": 0.2404, "step": 4808 }, { "epoch": 0.22305194805194806, "grad_norm": 10.115901947021484, "learning_rate": 8.900292369847866e-06, "loss": 0.4861, "step": 4809 }, { "epoch": 0.2230983302411874, "grad_norm": 9.25144100189209, "learning_rate": 8.899831843312805e-06, "loss": 0.3303, "step": 4810 }, { "epoch": 0.22314471243042672, "grad_norm": 12.882155418395996, "learning_rate": 8.899371232289943e-06, "loss": 0.4671, "step": 4811 }, { "epoch": 0.22319109461966605, "grad_norm": 5.803696632385254, "learning_rate": 8.89891053678926e-06, "loss": 0.2864, "step": 4812 }, { "epoch": 0.22323747680890538, "grad_norm": 7.023453235626221, "learning_rate": 8.898449756820737e-06, "loss": 0.3443, "step": 4813 }, { "epoch": 0.2232838589981447, "grad_norm": 9.300724983215332, "learning_rate": 8.897988892394356e-06, "loss": 0.4101, "step": 4814 }, { "epoch": 0.22333024118738404, "grad_norm": 6.827544689178467, "learning_rate": 8.897527943520101e-06, "loss": 0.3625, "step": 4815 }, { "epoch": 0.22337662337662337, "grad_norm": 6.363280773162842, "learning_rate": 8.897066910207958e-06, "loss": 0.3886, "step": 4816 }, { "epoch": 0.2234230055658627, "grad_norm": 19.346467971801758, "learning_rate": 8.896605792467919e-06, "loss": 0.5178, "step": 4817 }, { "epoch": 0.22346938775510203, "grad_norm": 7.671358108520508, "learning_rate": 8.896144590309968e-06, "loss": 0.4245, "step": 4818 }, { "epoch": 0.22351576994434139, "grad_norm": 8.952940940856934, "learning_rate": 8.895683303744101e-06, "loss": 0.4242, "step": 4819 }, { "epoch": 0.22356215213358072, "grad_norm": 8.582958221435547, "learning_rate": 8.895221932780308e-06, "loss": 0.4527, "step": 4820 }, { "epoch": 0.22360853432282005, "grad_norm": 10.363530158996582, "learning_rate": 8.894760477428588e-06, "loss": 0.3526, "step": 4821 }, { "epoch": 0.22365491651205938, "grad_norm": 3.877509593963623, "learning_rate": 8.894298937698936e-06, "loss": 0.3092, "step": 4822 }, { "epoch": 0.2237012987012987, "grad_norm": 6.348064422607422, "learning_rate": 8.893837313601352e-06, "loss": 0.2452, "step": 4823 }, { "epoch": 0.22374768089053804, "grad_norm": 9.659207344055176, "learning_rate": 8.893375605145837e-06, "loss": 0.4539, "step": 4824 }, { "epoch": 0.22379406307977737, "grad_norm": 8.700492858886719, "learning_rate": 8.892913812342392e-06, "loss": 0.3697, "step": 4825 }, { "epoch": 0.2238404452690167, "grad_norm": 12.726099967956543, "learning_rate": 8.892451935201023e-06, "loss": 0.5525, "step": 4826 }, { "epoch": 0.22388682745825603, "grad_norm": 9.19735050201416, "learning_rate": 8.891989973731735e-06, "loss": 0.4342, "step": 4827 }, { "epoch": 0.22393320964749536, "grad_norm": 14.652799606323242, "learning_rate": 8.891527927944538e-06, "loss": 0.4964, "step": 4828 }, { "epoch": 0.2239795918367347, "grad_norm": 7.451423645019531, "learning_rate": 8.89106579784944e-06, "loss": 0.2861, "step": 4829 }, { "epoch": 0.22402597402597402, "grad_norm": 6.359726905822754, "learning_rate": 8.890603583456454e-06, "loss": 0.4202, "step": 4830 }, { "epoch": 0.22407235621521335, "grad_norm": 7.5032172203063965, "learning_rate": 8.890141284775596e-06, "loss": 0.2489, "step": 4831 }, { "epoch": 0.22411873840445268, "grad_norm": 5.961744785308838, "learning_rate": 8.889678901816875e-06, "loss": 0.3706, "step": 4832 }, { "epoch": 0.22416512059369204, "grad_norm": 8.722047805786133, "learning_rate": 8.889216434590315e-06, "loss": 0.4087, "step": 4833 }, { "epoch": 0.22421150278293137, "grad_norm": 5.1691789627075195, "learning_rate": 8.888753883105933e-06, "loss": 0.3623, "step": 4834 }, { "epoch": 0.2242578849721707, "grad_norm": 6.710554599761963, "learning_rate": 8.888291247373746e-06, "loss": 0.3749, "step": 4835 }, { "epoch": 0.22430426716141003, "grad_norm": 10.970551490783691, "learning_rate": 8.887828527403782e-06, "loss": 0.4424, "step": 4836 }, { "epoch": 0.22435064935064936, "grad_norm": 3.9224202632904053, "learning_rate": 8.887365723206061e-06, "loss": 0.1898, "step": 4837 }, { "epoch": 0.22439703153988869, "grad_norm": 4.327276229858398, "learning_rate": 8.886902834790616e-06, "loss": 0.3313, "step": 4838 }, { "epoch": 0.22444341372912802, "grad_norm": 4.542832374572754, "learning_rate": 8.886439862167468e-06, "loss": 0.3617, "step": 4839 }, { "epoch": 0.22448979591836735, "grad_norm": 5.22550630569458, "learning_rate": 8.885976805346651e-06, "loss": 0.347, "step": 4840 }, { "epoch": 0.22453617810760668, "grad_norm": 6.042749881744385, "learning_rate": 8.885513664338197e-06, "loss": 0.3207, "step": 4841 }, { "epoch": 0.224582560296846, "grad_norm": 7.742114067077637, "learning_rate": 8.885050439152138e-06, "loss": 0.4626, "step": 4842 }, { "epoch": 0.22462894248608534, "grad_norm": 11.123775482177734, "learning_rate": 8.884587129798511e-06, "loss": 0.3844, "step": 4843 }, { "epoch": 0.22467532467532467, "grad_norm": 11.384135246276855, "learning_rate": 8.884123736287353e-06, "loss": 0.4678, "step": 4844 }, { "epoch": 0.224721706864564, "grad_norm": 5.839734077453613, "learning_rate": 8.883660258628703e-06, "loss": 0.2956, "step": 4845 }, { "epoch": 0.22476808905380333, "grad_norm": 6.00986385345459, "learning_rate": 8.883196696832602e-06, "loss": 0.3541, "step": 4846 }, { "epoch": 0.22481447124304269, "grad_norm": 11.907238006591797, "learning_rate": 8.882733050909092e-06, "loss": 0.3501, "step": 4847 }, { "epoch": 0.22486085343228202, "grad_norm": 6.462866306304932, "learning_rate": 8.882269320868218e-06, "loss": 0.3432, "step": 4848 }, { "epoch": 0.22490723562152135, "grad_norm": 7.131966590881348, "learning_rate": 8.881805506720028e-06, "loss": 0.2772, "step": 4849 }, { "epoch": 0.22495361781076068, "grad_norm": 11.87358283996582, "learning_rate": 8.881341608474569e-06, "loss": 0.4323, "step": 4850 }, { "epoch": 0.225, "grad_norm": 9.703450202941895, "learning_rate": 8.88087762614189e-06, "loss": 0.4796, "step": 4851 }, { "epoch": 0.22504638218923934, "grad_norm": 4.223055362701416, "learning_rate": 8.880413559732046e-06, "loss": 0.2866, "step": 4852 }, { "epoch": 0.22509276437847867, "grad_norm": 6.096066474914551, "learning_rate": 8.879949409255087e-06, "loss": 0.2271, "step": 4853 }, { "epoch": 0.225139146567718, "grad_norm": 11.822192192077637, "learning_rate": 8.879485174721072e-06, "loss": 0.4908, "step": 4854 }, { "epoch": 0.22518552875695733, "grad_norm": 12.423650741577148, "learning_rate": 8.879020856140056e-06, "loss": 0.4198, "step": 4855 }, { "epoch": 0.22523191094619666, "grad_norm": 9.018067359924316, "learning_rate": 8.8785564535221e-06, "loss": 0.2913, "step": 4856 }, { "epoch": 0.225278293135436, "grad_norm": 12.24878215789795, "learning_rate": 8.878091966877263e-06, "loss": 0.4945, "step": 4857 }, { "epoch": 0.22532467532467532, "grad_norm": 8.713738441467285, "learning_rate": 8.87762739621561e-06, "loss": 0.4075, "step": 4858 }, { "epoch": 0.22537105751391465, "grad_norm": 23.621036529541016, "learning_rate": 8.877162741547204e-06, "loss": 0.3787, "step": 4859 }, { "epoch": 0.22541743970315398, "grad_norm": 6.1137590408325195, "learning_rate": 8.876698002882113e-06, "loss": 0.4201, "step": 4860 }, { "epoch": 0.22546382189239333, "grad_norm": 8.030590057373047, "learning_rate": 8.876233180230405e-06, "loss": 0.4712, "step": 4861 }, { "epoch": 0.22551020408163266, "grad_norm": 4.478220462799072, "learning_rate": 8.875768273602148e-06, "loss": 0.2173, "step": 4862 }, { "epoch": 0.225556586270872, "grad_norm": 4.510802745819092, "learning_rate": 8.875303283007417e-06, "loss": 0.2381, "step": 4863 }, { "epoch": 0.22560296846011132, "grad_norm": 7.929255485534668, "learning_rate": 8.874838208456283e-06, "loss": 0.4181, "step": 4864 }, { "epoch": 0.22564935064935066, "grad_norm": 9.509572982788086, "learning_rate": 8.874373049958824e-06, "loss": 0.4622, "step": 4865 }, { "epoch": 0.22569573283858999, "grad_norm": 6.953028202056885, "learning_rate": 8.873907807525115e-06, "loss": 0.3871, "step": 4866 }, { "epoch": 0.22574211502782932, "grad_norm": 9.1614351272583, "learning_rate": 8.873442481165238e-06, "loss": 0.3204, "step": 4867 }, { "epoch": 0.22578849721706865, "grad_norm": 5.446469306945801, "learning_rate": 8.872977070889273e-06, "loss": 0.3682, "step": 4868 }, { "epoch": 0.22583487940630798, "grad_norm": 5.285614013671875, "learning_rate": 8.8725115767073e-06, "loss": 0.3631, "step": 4869 }, { "epoch": 0.2258812615955473, "grad_norm": 7.551178455352783, "learning_rate": 8.87204599862941e-06, "loss": 0.3316, "step": 4870 }, { "epoch": 0.22592764378478664, "grad_norm": 6.435991287231445, "learning_rate": 8.871580336665685e-06, "loss": 0.4387, "step": 4871 }, { "epoch": 0.22597402597402597, "grad_norm": 7.618910789489746, "learning_rate": 8.871114590826211e-06, "loss": 0.386, "step": 4872 }, { "epoch": 0.2260204081632653, "grad_norm": 14.689817428588867, "learning_rate": 8.870648761121083e-06, "loss": 0.3307, "step": 4873 }, { "epoch": 0.22606679035250463, "grad_norm": 6.468154430389404, "learning_rate": 8.87018284756039e-06, "loss": 0.1926, "step": 4874 }, { "epoch": 0.22611317254174398, "grad_norm": 7.8625078201293945, "learning_rate": 8.869716850154231e-06, "loss": 0.2971, "step": 4875 }, { "epoch": 0.22615955473098331, "grad_norm": 7.318641185760498, "learning_rate": 8.869250768912694e-06, "loss": 0.2867, "step": 4876 }, { "epoch": 0.22620593692022264, "grad_norm": 10.306707382202148, "learning_rate": 8.86878460384588e-06, "loss": 0.3859, "step": 4877 }, { "epoch": 0.22625231910946197, "grad_norm": 5.499630928039551, "learning_rate": 8.86831835496389e-06, "loss": 0.2628, "step": 4878 }, { "epoch": 0.2262987012987013, "grad_norm": 13.182162284851074, "learning_rate": 8.867852022276821e-06, "loss": 0.3942, "step": 4879 }, { "epoch": 0.22634508348794063, "grad_norm": 5.852871894836426, "learning_rate": 8.867385605794779e-06, "loss": 0.3791, "step": 4880 }, { "epoch": 0.22639146567717996, "grad_norm": 3.9739270210266113, "learning_rate": 8.866919105527867e-06, "loss": 0.2245, "step": 4881 }, { "epoch": 0.2264378478664193, "grad_norm": 6.186591625213623, "learning_rate": 8.866452521486192e-06, "loss": 0.4201, "step": 4882 }, { "epoch": 0.22648423005565863, "grad_norm": 9.655303955078125, "learning_rate": 8.865985853679866e-06, "loss": 0.3929, "step": 4883 }, { "epoch": 0.22653061224489796, "grad_norm": 9.81385326385498, "learning_rate": 8.86551910211899e-06, "loss": 0.3864, "step": 4884 }, { "epoch": 0.22657699443413729, "grad_norm": 6.260024070739746, "learning_rate": 8.865052266813686e-06, "loss": 0.314, "step": 4885 }, { "epoch": 0.22662337662337662, "grad_norm": 6.042171955108643, "learning_rate": 8.864585347774061e-06, "loss": 0.3803, "step": 4886 }, { "epoch": 0.22666975881261595, "grad_norm": 9.220235824584961, "learning_rate": 8.864118345010233e-06, "loss": 0.443, "step": 4887 }, { "epoch": 0.22671614100185528, "grad_norm": 13.3972806930542, "learning_rate": 8.86365125853232e-06, "loss": 0.6045, "step": 4888 }, { "epoch": 0.2267625231910946, "grad_norm": 9.601094245910645, "learning_rate": 8.86318408835044e-06, "loss": 0.4054, "step": 4889 }, { "epoch": 0.22680890538033396, "grad_norm": 8.264838218688965, "learning_rate": 8.862716834474714e-06, "loss": 0.3319, "step": 4890 }, { "epoch": 0.2268552875695733, "grad_norm": 8.643636703491211, "learning_rate": 8.862249496915265e-06, "loss": 0.4344, "step": 4891 }, { "epoch": 0.22690166975881262, "grad_norm": 5.0217366218566895, "learning_rate": 8.861782075682219e-06, "loss": 0.39, "step": 4892 }, { "epoch": 0.22694805194805195, "grad_norm": 17.410728454589844, "learning_rate": 8.861314570785699e-06, "loss": 0.4607, "step": 4893 }, { "epoch": 0.22699443413729128, "grad_norm": 7.628273010253906, "learning_rate": 8.860846982235837e-06, "loss": 0.3457, "step": 4894 }, { "epoch": 0.22704081632653061, "grad_norm": 9.12720012664795, "learning_rate": 8.860379310042761e-06, "loss": 0.4847, "step": 4895 }, { "epoch": 0.22708719851576994, "grad_norm": 6.889496326446533, "learning_rate": 8.859911554216605e-06, "loss": 0.1915, "step": 4896 }, { "epoch": 0.22713358070500927, "grad_norm": 15.774773597717285, "learning_rate": 8.8594437147675e-06, "loss": 0.392, "step": 4897 }, { "epoch": 0.2271799628942486, "grad_norm": 6.96838903427124, "learning_rate": 8.858975791705581e-06, "loss": 0.3301, "step": 4898 }, { "epoch": 0.22722634508348794, "grad_norm": 10.042006492614746, "learning_rate": 8.858507785040989e-06, "loss": 0.3692, "step": 4899 }, { "epoch": 0.22727272727272727, "grad_norm": 6.389703750610352, "learning_rate": 8.85803969478386e-06, "loss": 0.3775, "step": 4900 }, { "epoch": 0.2273191094619666, "grad_norm": 5.902683734893799, "learning_rate": 8.857571520944336e-06, "loss": 0.3111, "step": 4901 }, { "epoch": 0.22736549165120593, "grad_norm": 13.077878952026367, "learning_rate": 8.85710326353256e-06, "loss": 0.4227, "step": 4902 }, { "epoch": 0.22741187384044526, "grad_norm": 6.14008903503418, "learning_rate": 8.856634922558676e-06, "loss": 0.3735, "step": 4903 }, { "epoch": 0.2274582560296846, "grad_norm": 9.96030330657959, "learning_rate": 8.85616649803283e-06, "loss": 0.4549, "step": 4904 }, { "epoch": 0.22750463821892394, "grad_norm": 6.553175926208496, "learning_rate": 8.85569798996517e-06, "loss": 0.438, "step": 4905 }, { "epoch": 0.22755102040816327, "grad_norm": 9.374247550964355, "learning_rate": 8.855229398365848e-06, "loss": 0.3719, "step": 4906 }, { "epoch": 0.2275974025974026, "grad_norm": 9.641942024230957, "learning_rate": 8.854760723245016e-06, "loss": 0.3912, "step": 4907 }, { "epoch": 0.22764378478664193, "grad_norm": 5.404511451721191, "learning_rate": 8.854291964612824e-06, "loss": 0.3065, "step": 4908 }, { "epoch": 0.22769016697588126, "grad_norm": 7.807247161865234, "learning_rate": 8.85382312247943e-06, "loss": 0.4028, "step": 4909 }, { "epoch": 0.2277365491651206, "grad_norm": 8.375845909118652, "learning_rate": 8.85335419685499e-06, "loss": 0.3898, "step": 4910 }, { "epoch": 0.22778293135435992, "grad_norm": 5.3835320472717285, "learning_rate": 8.852885187749666e-06, "loss": 0.3966, "step": 4911 }, { "epoch": 0.22782931354359925, "grad_norm": 6.19058084487915, "learning_rate": 8.852416095173615e-06, "loss": 0.3287, "step": 4912 }, { "epoch": 0.22787569573283858, "grad_norm": 7.129560947418213, "learning_rate": 8.851946919137001e-06, "loss": 0.3868, "step": 4913 }, { "epoch": 0.22792207792207791, "grad_norm": 5.615076541900635, "learning_rate": 8.85147765964999e-06, "loss": 0.3075, "step": 4914 }, { "epoch": 0.22796846011131724, "grad_norm": 6.319705963134766, "learning_rate": 8.851008316722745e-06, "loss": 0.4335, "step": 4915 }, { "epoch": 0.22801484230055657, "grad_norm": 6.55674934387207, "learning_rate": 8.850538890365438e-06, "loss": 0.3764, "step": 4916 }, { "epoch": 0.2280612244897959, "grad_norm": 10.476622581481934, "learning_rate": 8.850069380588236e-06, "loss": 0.3972, "step": 4917 }, { "epoch": 0.22810760667903526, "grad_norm": 7.954525947570801, "learning_rate": 8.84959978740131e-06, "loss": 0.5379, "step": 4918 }, { "epoch": 0.2281539888682746, "grad_norm": 6.602640151977539, "learning_rate": 8.849130110814837e-06, "loss": 0.3137, "step": 4919 }, { "epoch": 0.22820037105751392, "grad_norm": 4.102738380432129, "learning_rate": 8.84866035083899e-06, "loss": 0.2103, "step": 4920 }, { "epoch": 0.22824675324675325, "grad_norm": 10.249284744262695, "learning_rate": 8.848190507483946e-06, "loss": 0.4613, "step": 4921 }, { "epoch": 0.22829313543599258, "grad_norm": 6.879027843475342, "learning_rate": 8.847720580759885e-06, "loss": 0.3926, "step": 4922 }, { "epoch": 0.2283395176252319, "grad_norm": 9.42109203338623, "learning_rate": 8.847250570676987e-06, "loss": 0.4703, "step": 4923 }, { "epoch": 0.22838589981447124, "grad_norm": 7.2768683433532715, "learning_rate": 8.846780477245435e-06, "loss": 0.3374, "step": 4924 }, { "epoch": 0.22843228200371057, "grad_norm": 7.719288349151611, "learning_rate": 8.846310300475412e-06, "loss": 0.3204, "step": 4925 }, { "epoch": 0.2284786641929499, "grad_norm": 8.239294052124023, "learning_rate": 8.845840040377105e-06, "loss": 0.2602, "step": 4926 }, { "epoch": 0.22852504638218923, "grad_norm": 7.005068302154541, "learning_rate": 8.845369696960703e-06, "loss": 0.403, "step": 4927 }, { "epoch": 0.22857142857142856, "grad_norm": 8.346541404724121, "learning_rate": 8.844899270236393e-06, "loss": 0.4113, "step": 4928 }, { "epoch": 0.2286178107606679, "grad_norm": 7.6781511306762695, "learning_rate": 8.84442876021437e-06, "loss": 0.3403, "step": 4929 }, { "epoch": 0.22866419294990722, "grad_norm": 5.589034080505371, "learning_rate": 8.843958166904824e-06, "loss": 0.23, "step": 4930 }, { "epoch": 0.22871057513914655, "grad_norm": 8.205994606018066, "learning_rate": 8.843487490317954e-06, "loss": 0.327, "step": 4931 }, { "epoch": 0.2287569573283859, "grad_norm": 18.57159996032715, "learning_rate": 8.843016730463953e-06, "loss": 0.4196, "step": 4932 }, { "epoch": 0.22880333951762524, "grad_norm": 9.695507049560547, "learning_rate": 8.842545887353022e-06, "loss": 0.4014, "step": 4933 }, { "epoch": 0.22884972170686457, "grad_norm": 7.93945837020874, "learning_rate": 8.842074960995362e-06, "loss": 0.4341, "step": 4934 }, { "epoch": 0.2288961038961039, "grad_norm": 9.268058776855469, "learning_rate": 8.841603951401174e-06, "loss": 0.2521, "step": 4935 }, { "epoch": 0.22894248608534323, "grad_norm": 6.690122127532959, "learning_rate": 8.841132858580661e-06, "loss": 0.3689, "step": 4936 }, { "epoch": 0.22898886827458256, "grad_norm": 6.244087219238281, "learning_rate": 8.840661682544033e-06, "loss": 0.4249, "step": 4937 }, { "epoch": 0.2290352504638219, "grad_norm": 8.681095123291016, "learning_rate": 8.840190423301493e-06, "loss": 0.347, "step": 4938 }, { "epoch": 0.22908163265306122, "grad_norm": 7.216220855712891, "learning_rate": 8.839719080863255e-06, "loss": 0.3818, "step": 4939 }, { "epoch": 0.22912801484230055, "grad_norm": 6.825351238250732, "learning_rate": 8.839247655239528e-06, "loss": 0.3426, "step": 4940 }, { "epoch": 0.22917439703153988, "grad_norm": 4.5462141036987305, "learning_rate": 8.838776146440525e-06, "loss": 0.3296, "step": 4941 }, { "epoch": 0.2292207792207792, "grad_norm": 6.321938991546631, "learning_rate": 8.838304554476462e-06, "loss": 0.3444, "step": 4942 }, { "epoch": 0.22926716141001854, "grad_norm": 8.052131652832031, "learning_rate": 8.837832879357555e-06, "loss": 0.3192, "step": 4943 }, { "epoch": 0.22931354359925787, "grad_norm": 8.505196571350098, "learning_rate": 8.837361121094022e-06, "loss": 0.4865, "step": 4944 }, { "epoch": 0.2293599257884972, "grad_norm": 11.081387519836426, "learning_rate": 8.836889279696086e-06, "loss": 0.5053, "step": 4945 }, { "epoch": 0.22940630797773656, "grad_norm": 15.775344848632812, "learning_rate": 8.836417355173966e-06, "loss": 0.4044, "step": 4946 }, { "epoch": 0.2294526901669759, "grad_norm": 12.134913444519043, "learning_rate": 8.835945347537889e-06, "loss": 0.3846, "step": 4947 }, { "epoch": 0.22949907235621522, "grad_norm": 5.6196818351745605, "learning_rate": 8.835473256798077e-06, "loss": 0.3858, "step": 4948 }, { "epoch": 0.22954545454545455, "grad_norm": 4.408173084259033, "learning_rate": 8.835001082964763e-06, "loss": 0.2931, "step": 4949 }, { "epoch": 0.22959183673469388, "grad_norm": 4.7872772216796875, "learning_rate": 8.834528826048172e-06, "loss": 0.3666, "step": 4950 }, { "epoch": 0.2296382189239332, "grad_norm": 7.0791192054748535, "learning_rate": 8.834056486058536e-06, "loss": 0.3595, "step": 4951 }, { "epoch": 0.22968460111317254, "grad_norm": 7.030756950378418, "learning_rate": 8.833584063006088e-06, "loss": 0.3795, "step": 4952 }, { "epoch": 0.22973098330241187, "grad_norm": 10.495265007019043, "learning_rate": 8.833111556901065e-06, "loss": 0.2616, "step": 4953 }, { "epoch": 0.2297773654916512, "grad_norm": 7.603267669677734, "learning_rate": 8.832638967753699e-06, "loss": 0.3443, "step": 4954 }, { "epoch": 0.22982374768089053, "grad_norm": 4.387239456176758, "learning_rate": 8.832166295574234e-06, "loss": 0.3274, "step": 4955 }, { "epoch": 0.22987012987012986, "grad_norm": 11.00197982788086, "learning_rate": 8.831693540372905e-06, "loss": 0.3367, "step": 4956 }, { "epoch": 0.2299165120593692, "grad_norm": 9.21556282043457, "learning_rate": 8.83122070215996e-06, "loss": 0.4278, "step": 4957 }, { "epoch": 0.22996289424860852, "grad_norm": 9.333945274353027, "learning_rate": 8.830747780945636e-06, "loss": 0.4806, "step": 4958 }, { "epoch": 0.23000927643784785, "grad_norm": 6.701463222503662, "learning_rate": 8.830274776740183e-06, "loss": 0.3896, "step": 4959 }, { "epoch": 0.2300556586270872, "grad_norm": 10.146110534667969, "learning_rate": 8.829801689553848e-06, "loss": 0.4605, "step": 4960 }, { "epoch": 0.23010204081632654, "grad_norm": 7.432321071624756, "learning_rate": 8.829328519396878e-06, "loss": 0.3794, "step": 4961 }, { "epoch": 0.23014842300556587, "grad_norm": 9.508614540100098, "learning_rate": 8.828855266279526e-06, "loss": 0.3867, "step": 4962 }, { "epoch": 0.2301948051948052, "grad_norm": 4.576114177703857, "learning_rate": 8.828381930212045e-06, "loss": 0.2993, "step": 4963 }, { "epoch": 0.23024118738404453, "grad_norm": 8.693541526794434, "learning_rate": 8.827908511204688e-06, "loss": 0.3043, "step": 4964 }, { "epoch": 0.23028756957328386, "grad_norm": 7.165652751922607, "learning_rate": 8.827435009267711e-06, "loss": 0.4003, "step": 4965 }, { "epoch": 0.2303339517625232, "grad_norm": 6.456260681152344, "learning_rate": 8.826961424411376e-06, "loss": 0.3409, "step": 4966 }, { "epoch": 0.23038033395176252, "grad_norm": 8.8837308883667, "learning_rate": 8.826487756645938e-06, "loss": 0.392, "step": 4967 }, { "epoch": 0.23042671614100185, "grad_norm": 7.180461406707764, "learning_rate": 8.826014005981662e-06, "loss": 0.3988, "step": 4968 }, { "epoch": 0.23047309833024118, "grad_norm": 12.443035125732422, "learning_rate": 8.82554017242881e-06, "loss": 0.3696, "step": 4969 }, { "epoch": 0.2305194805194805, "grad_norm": 7.486672878265381, "learning_rate": 8.825066255997648e-06, "loss": 0.4055, "step": 4970 }, { "epoch": 0.23056586270871984, "grad_norm": 11.447973251342773, "learning_rate": 8.824592256698444e-06, "loss": 0.255, "step": 4971 }, { "epoch": 0.23061224489795917, "grad_norm": 8.527395248413086, "learning_rate": 8.824118174541464e-06, "loss": 0.349, "step": 4972 }, { "epoch": 0.2306586270871985, "grad_norm": 7.207907676696777, "learning_rate": 8.823644009536983e-06, "loss": 0.4023, "step": 4973 }, { "epoch": 0.23070500927643786, "grad_norm": 9.35189437866211, "learning_rate": 8.82316976169527e-06, "loss": 0.3704, "step": 4974 }, { "epoch": 0.2307513914656772, "grad_norm": 6.155200481414795, "learning_rate": 8.8226954310266e-06, "loss": 0.4311, "step": 4975 }, { "epoch": 0.23079777365491652, "grad_norm": 8.926700592041016, "learning_rate": 8.82222101754125e-06, "loss": 0.5342, "step": 4976 }, { "epoch": 0.23084415584415585, "grad_norm": 7.184947967529297, "learning_rate": 8.8217465212495e-06, "loss": 0.3221, "step": 4977 }, { "epoch": 0.23089053803339518, "grad_norm": 11.878738403320312, "learning_rate": 8.821271942161624e-06, "loss": 0.3936, "step": 4978 }, { "epoch": 0.2309369202226345, "grad_norm": 7.571997165679932, "learning_rate": 8.820797280287909e-06, "loss": 0.317, "step": 4979 }, { "epoch": 0.23098330241187384, "grad_norm": 5.6169114112854, "learning_rate": 8.820322535638638e-06, "loss": 0.3788, "step": 4980 }, { "epoch": 0.23102968460111317, "grad_norm": 12.89258861541748, "learning_rate": 8.81984770822409e-06, "loss": 0.3735, "step": 4981 }, { "epoch": 0.2310760667903525, "grad_norm": 5.033885955810547, "learning_rate": 8.819372798054559e-06, "loss": 0.3807, "step": 4982 }, { "epoch": 0.23112244897959183, "grad_norm": 7.085668563842773, "learning_rate": 8.818897805140329e-06, "loss": 0.4007, "step": 4983 }, { "epoch": 0.23116883116883116, "grad_norm": 10.944612503051758, "learning_rate": 8.818422729491693e-06, "loss": 0.6702, "step": 4984 }, { "epoch": 0.2312152133580705, "grad_norm": 5.443280220031738, "learning_rate": 8.817947571118944e-06, "loss": 0.3022, "step": 4985 }, { "epoch": 0.23126159554730982, "grad_norm": 8.480233192443848, "learning_rate": 8.817472330032373e-06, "loss": 0.4428, "step": 4986 }, { "epoch": 0.23130797773654915, "grad_norm": 12.791061401367188, "learning_rate": 8.816997006242278e-06, "loss": 0.5095, "step": 4987 }, { "epoch": 0.2313543599257885, "grad_norm": 9.32852554321289, "learning_rate": 8.816521599758956e-06, "loss": 0.4385, "step": 4988 }, { "epoch": 0.23140074211502784, "grad_norm": 10.34683895111084, "learning_rate": 8.816046110592707e-06, "loss": 0.4914, "step": 4989 }, { "epoch": 0.23144712430426717, "grad_norm": 7.419781684875488, "learning_rate": 8.815570538753833e-06, "loss": 0.3454, "step": 4990 }, { "epoch": 0.2314935064935065, "grad_norm": 5.269930362701416, "learning_rate": 8.815094884252634e-06, "loss": 0.3069, "step": 4991 }, { "epoch": 0.23153988868274583, "grad_norm": 7.03040075302124, "learning_rate": 8.814619147099417e-06, "loss": 0.3472, "step": 4992 }, { "epoch": 0.23158627087198516, "grad_norm": 4.8744425773620605, "learning_rate": 8.814143327304488e-06, "loss": 0.3318, "step": 4993 }, { "epoch": 0.2316326530612245, "grad_norm": 4.485311031341553, "learning_rate": 8.813667424878153e-06, "loss": 0.3311, "step": 4994 }, { "epoch": 0.23167903525046382, "grad_norm": 9.008476257324219, "learning_rate": 8.813191439830729e-06, "loss": 0.386, "step": 4995 }, { "epoch": 0.23172541743970315, "grad_norm": 7.3273491859436035, "learning_rate": 8.812715372172522e-06, "loss": 0.3495, "step": 4996 }, { "epoch": 0.23177179962894248, "grad_norm": 8.416301727294922, "learning_rate": 8.812239221913847e-06, "loss": 0.4447, "step": 4997 }, { "epoch": 0.2318181818181818, "grad_norm": 10.706235885620117, "learning_rate": 8.81176298906502e-06, "loss": 0.5498, "step": 4998 }, { "epoch": 0.23186456400742114, "grad_norm": 14.053934097290039, "learning_rate": 8.81128667363636e-06, "loss": 0.513, "step": 4999 }, { "epoch": 0.23191094619666047, "grad_norm": 5.37425422668457, "learning_rate": 8.810810275638183e-06, "loss": 0.388, "step": 5000 }, { "epoch": 0.2319573283858998, "grad_norm": 11.923826217651367, "learning_rate": 8.810333795080813e-06, "loss": 0.4317, "step": 5001 }, { "epoch": 0.23200371057513916, "grad_norm": 9.354204177856445, "learning_rate": 8.809857231974568e-06, "loss": 0.3726, "step": 5002 }, { "epoch": 0.2320500927643785, "grad_norm": 15.35142707824707, "learning_rate": 8.809380586329778e-06, "loss": 0.5794, "step": 5003 }, { "epoch": 0.23209647495361782, "grad_norm": 8.854461669921875, "learning_rate": 8.808903858156766e-06, "loss": 0.4268, "step": 5004 }, { "epoch": 0.23214285714285715, "grad_norm": 7.852575302124023, "learning_rate": 8.808427047465862e-06, "loss": 0.3593, "step": 5005 }, { "epoch": 0.23218923933209648, "grad_norm": 6.687551975250244, "learning_rate": 8.807950154267393e-06, "loss": 0.2569, "step": 5006 }, { "epoch": 0.2322356215213358, "grad_norm": 8.679941177368164, "learning_rate": 8.807473178571692e-06, "loss": 0.3385, "step": 5007 }, { "epoch": 0.23228200371057514, "grad_norm": 8.43472957611084, "learning_rate": 8.806996120389096e-06, "loss": 0.2494, "step": 5008 }, { "epoch": 0.23232838589981447, "grad_norm": 5.471770763397217, "learning_rate": 8.806518979729934e-06, "loss": 0.414, "step": 5009 }, { "epoch": 0.2323747680890538, "grad_norm": 12.395873069763184, "learning_rate": 8.806041756604547e-06, "loss": 0.5599, "step": 5010 }, { "epoch": 0.23242115027829313, "grad_norm": 8.439431190490723, "learning_rate": 8.805564451023273e-06, "loss": 0.3266, "step": 5011 }, { "epoch": 0.23246753246753246, "grad_norm": 6.783400535583496, "learning_rate": 8.805087062996452e-06, "loss": 0.298, "step": 5012 }, { "epoch": 0.2325139146567718, "grad_norm": 6.812885284423828, "learning_rate": 8.804609592534427e-06, "loss": 0.4019, "step": 5013 }, { "epoch": 0.23256029684601112, "grad_norm": 5.498708248138428, "learning_rate": 8.804132039647542e-06, "loss": 0.3565, "step": 5014 }, { "epoch": 0.23260667903525045, "grad_norm": 6.610374450683594, "learning_rate": 8.803654404346142e-06, "loss": 0.2715, "step": 5015 }, { "epoch": 0.23265306122448978, "grad_norm": 7.958767414093018, "learning_rate": 8.803176686640577e-06, "loss": 0.3766, "step": 5016 }, { "epoch": 0.23269944341372914, "grad_norm": 5.55361795425415, "learning_rate": 8.802698886541194e-06, "loss": 0.2421, "step": 5017 }, { "epoch": 0.23274582560296847, "grad_norm": 8.790413856506348, "learning_rate": 8.802221004058346e-06, "loss": 0.4215, "step": 5018 }, { "epoch": 0.2327922077922078, "grad_norm": 9.538187026977539, "learning_rate": 8.801743039202386e-06, "loss": 0.3451, "step": 5019 }, { "epoch": 0.23283858998144713, "grad_norm": 6.421465873718262, "learning_rate": 8.801264991983667e-06, "loss": 0.3095, "step": 5020 }, { "epoch": 0.23288497217068646, "grad_norm": 27.251100540161133, "learning_rate": 8.800786862412547e-06, "loss": 0.5535, "step": 5021 }, { "epoch": 0.2329313543599258, "grad_norm": 7.011402606964111, "learning_rate": 8.800308650499385e-06, "loss": 0.4003, "step": 5022 }, { "epoch": 0.23297773654916512, "grad_norm": 6.970159530639648, "learning_rate": 8.79983035625454e-06, "loss": 0.4293, "step": 5023 }, { "epoch": 0.23302411873840445, "grad_norm": 7.234253406524658, "learning_rate": 8.799351979688375e-06, "loss": 0.4359, "step": 5024 }, { "epoch": 0.23307050092764378, "grad_norm": 5.206501007080078, "learning_rate": 8.798873520811253e-06, "loss": 0.2639, "step": 5025 }, { "epoch": 0.2331168831168831, "grad_norm": 7.337706565856934, "learning_rate": 8.79839497963354e-06, "loss": 0.3903, "step": 5026 }, { "epoch": 0.23316326530612244, "grad_norm": 5.733938694000244, "learning_rate": 8.797916356165602e-06, "loss": 0.359, "step": 5027 }, { "epoch": 0.23320964749536177, "grad_norm": 9.05062198638916, "learning_rate": 8.79743765041781e-06, "loss": 0.4888, "step": 5028 }, { "epoch": 0.2332560296846011, "grad_norm": 8.556578636169434, "learning_rate": 8.796958862400535e-06, "loss": 0.4658, "step": 5029 }, { "epoch": 0.23330241187384043, "grad_norm": 4.976393222808838, "learning_rate": 8.79647999212415e-06, "loss": 0.2519, "step": 5030 }, { "epoch": 0.2333487940630798, "grad_norm": 5.607377052307129, "learning_rate": 8.796001039599025e-06, "loss": 0.3439, "step": 5031 }, { "epoch": 0.23339517625231912, "grad_norm": 6.738199234008789, "learning_rate": 8.795522004835543e-06, "loss": 0.353, "step": 5032 }, { "epoch": 0.23344155844155845, "grad_norm": 8.620035171508789, "learning_rate": 8.795042887844077e-06, "loss": 0.3793, "step": 5033 }, { "epoch": 0.23348794063079778, "grad_norm": 10.11410140991211, "learning_rate": 8.794563688635009e-06, "loss": 0.3748, "step": 5034 }, { "epoch": 0.2335343228200371, "grad_norm": 5.829498291015625, "learning_rate": 8.79408440721872e-06, "loss": 0.4284, "step": 5035 }, { "epoch": 0.23358070500927644, "grad_norm": 4.11688756942749, "learning_rate": 8.793605043605594e-06, "loss": 0.3566, "step": 5036 }, { "epoch": 0.23362708719851577, "grad_norm": 8.833001136779785, "learning_rate": 8.793125597806014e-06, "loss": 0.4845, "step": 5037 }, { "epoch": 0.2336734693877551, "grad_norm": 6.3737359046936035, "learning_rate": 8.79264606983037e-06, "loss": 0.3887, "step": 5038 }, { "epoch": 0.23371985157699443, "grad_norm": 15.833592414855957, "learning_rate": 8.792166459689048e-06, "loss": 0.3225, "step": 5039 }, { "epoch": 0.23376623376623376, "grad_norm": 8.37936782836914, "learning_rate": 8.791686767392442e-06, "loss": 0.3573, "step": 5040 }, { "epoch": 0.2338126159554731, "grad_norm": 7.46975564956665, "learning_rate": 8.79120699295094e-06, "loss": 0.2612, "step": 5041 }, { "epoch": 0.23385899814471242, "grad_norm": 4.329302787780762, "learning_rate": 8.79072713637494e-06, "loss": 0.3557, "step": 5042 }, { "epoch": 0.23390538033395175, "grad_norm": 11.118525505065918, "learning_rate": 8.790247197674835e-06, "loss": 0.3702, "step": 5043 }, { "epoch": 0.23395176252319108, "grad_norm": 9.80412769317627, "learning_rate": 8.789767176861023e-06, "loss": 0.3862, "step": 5044 }, { "epoch": 0.23399814471243044, "grad_norm": 8.275327682495117, "learning_rate": 8.789287073943905e-06, "loss": 0.4529, "step": 5045 }, { "epoch": 0.23404452690166977, "grad_norm": 8.126862525939941, "learning_rate": 8.788806888933881e-06, "loss": 0.5025, "step": 5046 }, { "epoch": 0.2340909090909091, "grad_norm": 4.919994354248047, "learning_rate": 8.788326621841354e-06, "loss": 0.2528, "step": 5047 }, { "epoch": 0.23413729128014843, "grad_norm": 4.363003253936768, "learning_rate": 8.787846272676728e-06, "loss": 0.2973, "step": 5048 }, { "epoch": 0.23418367346938776, "grad_norm": 5.3301496505737305, "learning_rate": 8.787365841450411e-06, "loss": 0.3388, "step": 5049 }, { "epoch": 0.2342300556586271, "grad_norm": 5.514976501464844, "learning_rate": 8.786885328172811e-06, "loss": 0.3656, "step": 5050 }, { "epoch": 0.23427643784786642, "grad_norm": 4.384842395782471, "learning_rate": 8.786404732854338e-06, "loss": 0.2761, "step": 5051 }, { "epoch": 0.23432282003710575, "grad_norm": 7.0082926750183105, "learning_rate": 8.785924055505404e-06, "loss": 0.3514, "step": 5052 }, { "epoch": 0.23436920222634508, "grad_norm": 11.176201820373535, "learning_rate": 8.785443296136422e-06, "loss": 0.4529, "step": 5053 }, { "epoch": 0.2344155844155844, "grad_norm": 6.822598457336426, "learning_rate": 8.784962454757806e-06, "loss": 0.3655, "step": 5054 }, { "epoch": 0.23446196660482374, "grad_norm": 9.215909957885742, "learning_rate": 8.784481531379975e-06, "loss": 0.3468, "step": 5055 }, { "epoch": 0.23450834879406307, "grad_norm": 14.144158363342285, "learning_rate": 8.78400052601335e-06, "loss": 0.4667, "step": 5056 }, { "epoch": 0.2345547309833024, "grad_norm": 7.135702133178711, "learning_rate": 8.783519438668347e-06, "loss": 0.3906, "step": 5057 }, { "epoch": 0.23460111317254173, "grad_norm": 5.032436370849609, "learning_rate": 8.783038269355392e-06, "loss": 0.3168, "step": 5058 }, { "epoch": 0.2346474953617811, "grad_norm": 12.154167175292969, "learning_rate": 8.78255701808491e-06, "loss": 0.3878, "step": 5059 }, { "epoch": 0.23469387755102042, "grad_norm": 11.43956470489502, "learning_rate": 8.782075684867322e-06, "loss": 0.36, "step": 5060 }, { "epoch": 0.23474025974025975, "grad_norm": 10.591726303100586, "learning_rate": 8.78159426971306e-06, "loss": 0.3684, "step": 5061 }, { "epoch": 0.23478664192949908, "grad_norm": 8.044568061828613, "learning_rate": 8.781112772632554e-06, "loss": 0.3368, "step": 5062 }, { "epoch": 0.2348330241187384, "grad_norm": 4.9467573165893555, "learning_rate": 8.780631193636233e-06, "loss": 0.3075, "step": 5063 }, { "epoch": 0.23487940630797774, "grad_norm": 5.80935525894165, "learning_rate": 8.780149532734531e-06, "loss": 0.3325, "step": 5064 }, { "epoch": 0.23492578849721707, "grad_norm": 5.509751796722412, "learning_rate": 8.779667789937885e-06, "loss": 0.4106, "step": 5065 }, { "epoch": 0.2349721706864564, "grad_norm": 12.145313262939453, "learning_rate": 8.779185965256728e-06, "loss": 0.4022, "step": 5066 }, { "epoch": 0.23501855287569573, "grad_norm": 10.322038650512695, "learning_rate": 8.778704058701501e-06, "loss": 0.4847, "step": 5067 }, { "epoch": 0.23506493506493506, "grad_norm": 14.086165428161621, "learning_rate": 8.778222070282645e-06, "loss": 0.5434, "step": 5068 }, { "epoch": 0.2351113172541744, "grad_norm": 13.45026969909668, "learning_rate": 8.7777400000106e-06, "loss": 0.5586, "step": 5069 }, { "epoch": 0.23515769944341372, "grad_norm": 11.330999374389648, "learning_rate": 8.77725784789581e-06, "loss": 0.4245, "step": 5070 }, { "epoch": 0.23520408163265305, "grad_norm": 10.718338966369629, "learning_rate": 8.77677561394872e-06, "loss": 0.4, "step": 5071 }, { "epoch": 0.23525046382189238, "grad_norm": 5.620733737945557, "learning_rate": 8.776293298179779e-06, "loss": 0.3875, "step": 5072 }, { "epoch": 0.23529684601113174, "grad_norm": 9.23205852508545, "learning_rate": 8.775810900599435e-06, "loss": 0.4241, "step": 5073 }, { "epoch": 0.23534322820037107, "grad_norm": 6.484734058380127, "learning_rate": 8.775328421218142e-06, "loss": 0.3918, "step": 5074 }, { "epoch": 0.2353896103896104, "grad_norm": 5.33280611038208, "learning_rate": 8.774845860046349e-06, "loss": 0.4608, "step": 5075 }, { "epoch": 0.23543599257884973, "grad_norm": 8.850046157836914, "learning_rate": 8.77436321709451e-06, "loss": 0.4442, "step": 5076 }, { "epoch": 0.23548237476808906, "grad_norm": 6.9148712158203125, "learning_rate": 8.773880492373082e-06, "loss": 0.3062, "step": 5077 }, { "epoch": 0.2355287569573284, "grad_norm": 6.952733039855957, "learning_rate": 8.773397685892525e-06, "loss": 0.4329, "step": 5078 }, { "epoch": 0.23557513914656772, "grad_norm": 5.1653947830200195, "learning_rate": 8.772914797663298e-06, "loss": 0.3233, "step": 5079 }, { "epoch": 0.23562152133580705, "grad_norm": 6.702409744262695, "learning_rate": 8.772431827695862e-06, "loss": 0.394, "step": 5080 }, { "epoch": 0.23566790352504638, "grad_norm": 10.361186027526855, "learning_rate": 8.771948776000679e-06, "loss": 0.5738, "step": 5081 }, { "epoch": 0.2357142857142857, "grad_norm": 8.437675476074219, "learning_rate": 8.771465642588215e-06, "loss": 0.3792, "step": 5082 }, { "epoch": 0.23576066790352504, "grad_norm": 16.172103881835938, "learning_rate": 8.770982427468938e-06, "loss": 0.4041, "step": 5083 }, { "epoch": 0.23580705009276437, "grad_norm": 4.521551132202148, "learning_rate": 8.770499130653317e-06, "loss": 0.3157, "step": 5084 }, { "epoch": 0.2358534322820037, "grad_norm": 4.978273868560791, "learning_rate": 8.770015752151818e-06, "loss": 0.4164, "step": 5085 }, { "epoch": 0.23589981447124303, "grad_norm": 7.044912338256836, "learning_rate": 8.769532291974919e-06, "loss": 0.3367, "step": 5086 }, { "epoch": 0.2359461966604824, "grad_norm": 14.586060523986816, "learning_rate": 8.76904875013309e-06, "loss": 0.6082, "step": 5087 }, { "epoch": 0.23599257884972172, "grad_norm": 7.5682053565979, "learning_rate": 8.768565126636806e-06, "loss": 0.436, "step": 5088 }, { "epoch": 0.23603896103896105, "grad_norm": 5.662740707397461, "learning_rate": 8.768081421496549e-06, "loss": 0.4502, "step": 5089 }, { "epoch": 0.23608534322820038, "grad_norm": 3.4225516319274902, "learning_rate": 8.767597634722793e-06, "loss": 0.3081, "step": 5090 }, { "epoch": 0.2361317254174397, "grad_norm": 5.971452713012695, "learning_rate": 8.767113766326023e-06, "loss": 0.3547, "step": 5091 }, { "epoch": 0.23617810760667904, "grad_norm": 5.261438369750977, "learning_rate": 8.766629816316722e-06, "loss": 0.3855, "step": 5092 }, { "epoch": 0.23622448979591837, "grad_norm": 4.333755970001221, "learning_rate": 8.766145784705368e-06, "loss": 0.257, "step": 5093 }, { "epoch": 0.2362708719851577, "grad_norm": 8.020179748535156, "learning_rate": 8.765661671502457e-06, "loss": 0.5453, "step": 5094 }, { "epoch": 0.23631725417439703, "grad_norm": 4.702314853668213, "learning_rate": 8.76517747671847e-06, "loss": 0.3549, "step": 5095 }, { "epoch": 0.23636363636363636, "grad_norm": 8.183516502380371, "learning_rate": 8.764693200363897e-06, "loss": 0.3264, "step": 5096 }, { "epoch": 0.2364100185528757, "grad_norm": 5.537232398986816, "learning_rate": 8.764208842449234e-06, "loss": 0.4082, "step": 5097 }, { "epoch": 0.23645640074211502, "grad_norm": 9.194929122924805, "learning_rate": 8.76372440298497e-06, "loss": 0.3809, "step": 5098 }, { "epoch": 0.23650278293135435, "grad_norm": 9.067070007324219, "learning_rate": 8.763239881981604e-06, "loss": 0.4067, "step": 5099 }, { "epoch": 0.23654916512059368, "grad_norm": 8.349956512451172, "learning_rate": 8.762755279449629e-06, "loss": 0.39, "step": 5100 }, { "epoch": 0.23659554730983304, "grad_norm": 10.810526847839355, "learning_rate": 8.762270595399547e-06, "loss": 0.3731, "step": 5101 }, { "epoch": 0.23664192949907237, "grad_norm": 9.154191017150879, "learning_rate": 8.761785829841857e-06, "loss": 0.4933, "step": 5102 }, { "epoch": 0.2366883116883117, "grad_norm": 5.758466720581055, "learning_rate": 8.76130098278706e-06, "loss": 0.3009, "step": 5103 }, { "epoch": 0.23673469387755103, "grad_norm": 12.31881046295166, "learning_rate": 8.760816054245662e-06, "loss": 0.394, "step": 5104 }, { "epoch": 0.23678107606679036, "grad_norm": 9.373419761657715, "learning_rate": 8.760331044228167e-06, "loss": 0.4604, "step": 5105 }, { "epoch": 0.2368274582560297, "grad_norm": 9.14036750793457, "learning_rate": 8.759845952745083e-06, "loss": 0.4244, "step": 5106 }, { "epoch": 0.23687384044526902, "grad_norm": 7.886539459228516, "learning_rate": 8.75936077980692e-06, "loss": 0.3056, "step": 5107 }, { "epoch": 0.23692022263450835, "grad_norm": 5.7310590744018555, "learning_rate": 8.75887552542419e-06, "loss": 0.4104, "step": 5108 }, { "epoch": 0.23696660482374768, "grad_norm": 7.747710704803467, "learning_rate": 8.758390189607402e-06, "loss": 0.3811, "step": 5109 }, { "epoch": 0.237012987012987, "grad_norm": 12.068671226501465, "learning_rate": 8.757904772367075e-06, "loss": 0.3765, "step": 5110 }, { "epoch": 0.23705936920222634, "grad_norm": 5.310039520263672, "learning_rate": 8.75741927371372e-06, "loss": 0.3405, "step": 5111 }, { "epoch": 0.23710575139146567, "grad_norm": 9.042022705078125, "learning_rate": 8.756933693657863e-06, "loss": 0.4053, "step": 5112 }, { "epoch": 0.237152133580705, "grad_norm": 6.851674556732178, "learning_rate": 8.756448032210016e-06, "loss": 0.4253, "step": 5113 }, { "epoch": 0.23719851576994433, "grad_norm": 8.837174415588379, "learning_rate": 8.755962289380704e-06, "loss": 0.4015, "step": 5114 }, { "epoch": 0.2372448979591837, "grad_norm": 6.153519153594971, "learning_rate": 8.755476465180451e-06, "loss": 0.3694, "step": 5115 }, { "epoch": 0.23729128014842302, "grad_norm": 7.578761577606201, "learning_rate": 8.75499055961978e-06, "loss": 0.3624, "step": 5116 }, { "epoch": 0.23733766233766235, "grad_norm": 5.219972610473633, "learning_rate": 8.75450457270922e-06, "loss": 0.424, "step": 5117 }, { "epoch": 0.23738404452690168, "grad_norm": 13.262004852294922, "learning_rate": 8.754018504459298e-06, "loss": 0.3959, "step": 5118 }, { "epoch": 0.237430426716141, "grad_norm": 6.331966876983643, "learning_rate": 8.753532354880545e-06, "loss": 0.3792, "step": 5119 }, { "epoch": 0.23747680890538034, "grad_norm": 9.303799629211426, "learning_rate": 8.753046123983494e-06, "loss": 0.468, "step": 5120 }, { "epoch": 0.23752319109461967, "grad_norm": 6.588095664978027, "learning_rate": 8.752559811778678e-06, "loss": 0.4203, "step": 5121 }, { "epoch": 0.237569573283859, "grad_norm": 10.54261589050293, "learning_rate": 8.752073418276633e-06, "loss": 0.3993, "step": 5122 }, { "epoch": 0.23761595547309833, "grad_norm": 7.082550525665283, "learning_rate": 8.751586943487897e-06, "loss": 0.3346, "step": 5123 }, { "epoch": 0.23766233766233766, "grad_norm": 7.419008255004883, "learning_rate": 8.751100387423007e-06, "loss": 0.3667, "step": 5124 }, { "epoch": 0.237708719851577, "grad_norm": 6.617929458618164, "learning_rate": 8.750613750092508e-06, "loss": 0.2189, "step": 5125 }, { "epoch": 0.23775510204081632, "grad_norm": 7.768576622009277, "learning_rate": 8.750127031506937e-06, "loss": 0.3789, "step": 5126 }, { "epoch": 0.23780148423005565, "grad_norm": 11.555643081665039, "learning_rate": 8.749640231676845e-06, "loss": 0.4516, "step": 5127 }, { "epoch": 0.23784786641929498, "grad_norm": 9.069271087646484, "learning_rate": 8.749153350612774e-06, "loss": 0.4195, "step": 5128 }, { "epoch": 0.23789424860853434, "grad_norm": 8.379877090454102, "learning_rate": 8.748666388325274e-06, "loss": 0.4133, "step": 5129 }, { "epoch": 0.23794063079777367, "grad_norm": 10.627845764160156, "learning_rate": 8.748179344824891e-06, "loss": 0.3867, "step": 5130 }, { "epoch": 0.237987012987013, "grad_norm": 11.741691589355469, "learning_rate": 8.747692220122181e-06, "loss": 0.4687, "step": 5131 }, { "epoch": 0.23803339517625233, "grad_norm": 13.716130256652832, "learning_rate": 8.747205014227697e-06, "loss": 0.396, "step": 5132 }, { "epoch": 0.23807977736549166, "grad_norm": 11.46983528137207, "learning_rate": 8.746717727151992e-06, "loss": 0.334, "step": 5133 }, { "epoch": 0.238126159554731, "grad_norm": 10.280959129333496, "learning_rate": 8.746230358905624e-06, "loss": 0.296, "step": 5134 }, { "epoch": 0.23817254174397032, "grad_norm": 5.520815372467041, "learning_rate": 8.74574290949915e-06, "loss": 0.4349, "step": 5135 }, { "epoch": 0.23821892393320965, "grad_norm": 4.987325191497803, "learning_rate": 8.745255378943133e-06, "loss": 0.39, "step": 5136 }, { "epoch": 0.23826530612244898, "grad_norm": 7.591817378997803, "learning_rate": 8.744767767248132e-06, "loss": 0.3925, "step": 5137 }, { "epoch": 0.2383116883116883, "grad_norm": 9.06141471862793, "learning_rate": 8.744280074424713e-06, "loss": 0.346, "step": 5138 }, { "epoch": 0.23835807050092764, "grad_norm": 7.260236740112305, "learning_rate": 8.743792300483443e-06, "loss": 0.5277, "step": 5139 }, { "epoch": 0.23840445269016697, "grad_norm": 6.057880401611328, "learning_rate": 8.743304445434885e-06, "loss": 0.3532, "step": 5140 }, { "epoch": 0.2384508348794063, "grad_norm": 9.109480857849121, "learning_rate": 8.742816509289612e-06, "loss": 0.4564, "step": 5141 }, { "epoch": 0.23849721706864563, "grad_norm": 13.673375129699707, "learning_rate": 8.742328492058193e-06, "loss": 0.531, "step": 5142 }, { "epoch": 0.23854359925788496, "grad_norm": 8.82250690460205, "learning_rate": 8.741840393751202e-06, "loss": 0.267, "step": 5143 }, { "epoch": 0.23858998144712432, "grad_norm": 4.239908695220947, "learning_rate": 8.74135221437921e-06, "loss": 0.2918, "step": 5144 }, { "epoch": 0.23863636363636365, "grad_norm": 8.99647331237793, "learning_rate": 8.740863953952798e-06, "loss": 0.3909, "step": 5145 }, { "epoch": 0.23868274582560298, "grad_norm": 4.845118999481201, "learning_rate": 8.740375612482541e-06, "loss": 0.3589, "step": 5146 }, { "epoch": 0.2387291280148423, "grad_norm": 5.82830286026001, "learning_rate": 8.73988718997902e-06, "loss": 0.4409, "step": 5147 }, { "epoch": 0.23877551020408164, "grad_norm": 5.625066757202148, "learning_rate": 8.739398686452813e-06, "loss": 0.3821, "step": 5148 }, { "epoch": 0.23882189239332097, "grad_norm": 8.827219009399414, "learning_rate": 8.738910101914509e-06, "loss": 0.3851, "step": 5149 }, { "epoch": 0.2388682745825603, "grad_norm": 8.46009349822998, "learning_rate": 8.738421436374686e-06, "loss": 0.4711, "step": 5150 }, { "epoch": 0.23891465677179963, "grad_norm": 4.404256820678711, "learning_rate": 8.737932689843938e-06, "loss": 0.3883, "step": 5151 }, { "epoch": 0.23896103896103896, "grad_norm": 12.466126441955566, "learning_rate": 8.737443862332849e-06, "loss": 0.4531, "step": 5152 }, { "epoch": 0.2390074211502783, "grad_norm": 10.782852172851562, "learning_rate": 8.736954953852008e-06, "loss": 0.4266, "step": 5153 }, { "epoch": 0.23905380333951762, "grad_norm": 8.8035306930542, "learning_rate": 8.736465964412011e-06, "loss": 0.4456, "step": 5154 }, { "epoch": 0.23910018552875695, "grad_norm": 5.178637504577637, "learning_rate": 8.735976894023449e-06, "loss": 0.3832, "step": 5155 }, { "epoch": 0.23914656771799628, "grad_norm": 5.8519134521484375, "learning_rate": 8.735487742696917e-06, "loss": 0.3803, "step": 5156 }, { "epoch": 0.2391929499072356, "grad_norm": 5.482799053192139, "learning_rate": 8.734998510443016e-06, "loss": 0.3424, "step": 5157 }, { "epoch": 0.23923933209647497, "grad_norm": 7.775164604187012, "learning_rate": 8.734509197272339e-06, "loss": 0.3408, "step": 5158 }, { "epoch": 0.2392857142857143, "grad_norm": 3.8406178951263428, "learning_rate": 8.734019803195492e-06, "loss": 0.3322, "step": 5159 }, { "epoch": 0.23933209647495363, "grad_norm": 9.33578872680664, "learning_rate": 8.733530328223076e-06, "loss": 0.4033, "step": 5160 }, { "epoch": 0.23937847866419296, "grad_norm": 10.945588111877441, "learning_rate": 8.733040772365692e-06, "loss": 0.3655, "step": 5161 }, { "epoch": 0.2394248608534323, "grad_norm": 7.329286575317383, "learning_rate": 8.73255113563395e-06, "loss": 0.4547, "step": 5162 }, { "epoch": 0.23947124304267162, "grad_norm": 9.127802848815918, "learning_rate": 8.732061418038458e-06, "loss": 0.3315, "step": 5163 }, { "epoch": 0.23951762523191095, "grad_norm": 7.586606979370117, "learning_rate": 8.731571619589822e-06, "loss": 0.3508, "step": 5164 }, { "epoch": 0.23956400742115028, "grad_norm": 11.222304344177246, "learning_rate": 8.731081740298657e-06, "loss": 0.3493, "step": 5165 }, { "epoch": 0.2396103896103896, "grad_norm": 4.609006404876709, "learning_rate": 8.730591780175572e-06, "loss": 0.2879, "step": 5166 }, { "epoch": 0.23965677179962894, "grad_norm": 9.721795082092285, "learning_rate": 8.730101739231185e-06, "loss": 0.4019, "step": 5167 }, { "epoch": 0.23970315398886827, "grad_norm": 8.111342430114746, "learning_rate": 8.72961161747611e-06, "loss": 0.2894, "step": 5168 }, { "epoch": 0.2397495361781076, "grad_norm": 9.28612232208252, "learning_rate": 8.729121414920966e-06, "loss": 0.5769, "step": 5169 }, { "epoch": 0.23979591836734693, "grad_norm": 7.516360282897949, "learning_rate": 8.728631131576376e-06, "loss": 0.3875, "step": 5170 }, { "epoch": 0.23984230055658626, "grad_norm": 5.5107550621032715, "learning_rate": 8.728140767452956e-06, "loss": 0.3062, "step": 5171 }, { "epoch": 0.23988868274582562, "grad_norm": 6.8396196365356445, "learning_rate": 8.727650322561337e-06, "loss": 0.3748, "step": 5172 }, { "epoch": 0.23993506493506495, "grad_norm": 6.150742053985596, "learning_rate": 8.727159796912137e-06, "loss": 0.339, "step": 5173 }, { "epoch": 0.23998144712430428, "grad_norm": 6.020337104797363, "learning_rate": 8.726669190515986e-06, "loss": 0.305, "step": 5174 }, { "epoch": 0.2400278293135436, "grad_norm": 6.5438714027404785, "learning_rate": 8.726178503383513e-06, "loss": 0.3253, "step": 5175 }, { "epoch": 0.24007421150278294, "grad_norm": 14.002525329589844, "learning_rate": 8.725687735525347e-06, "loss": 0.3781, "step": 5176 }, { "epoch": 0.24012059369202227, "grad_norm": 13.418303489685059, "learning_rate": 8.725196886952122e-06, "loss": 0.4974, "step": 5177 }, { "epoch": 0.2401669758812616, "grad_norm": 8.624680519104004, "learning_rate": 8.724705957674473e-06, "loss": 0.5262, "step": 5178 }, { "epoch": 0.24021335807050093, "grad_norm": 6.604357719421387, "learning_rate": 8.724214947703032e-06, "loss": 0.3119, "step": 5179 }, { "epoch": 0.24025974025974026, "grad_norm": 19.071868896484375, "learning_rate": 8.723723857048438e-06, "loss": 0.3706, "step": 5180 }, { "epoch": 0.2403061224489796, "grad_norm": 5.249638557434082, "learning_rate": 8.723232685721332e-06, "loss": 0.4094, "step": 5181 }, { "epoch": 0.24035250463821892, "grad_norm": 5.902254581451416, "learning_rate": 8.722741433732354e-06, "loss": 0.3417, "step": 5182 }, { "epoch": 0.24039888682745825, "grad_norm": 6.013190269470215, "learning_rate": 8.722250101092145e-06, "loss": 0.3953, "step": 5183 }, { "epoch": 0.24044526901669758, "grad_norm": 4.568800449371338, "learning_rate": 8.721758687811353e-06, "loss": 0.3263, "step": 5184 }, { "epoch": 0.2404916512059369, "grad_norm": 7.728143215179443, "learning_rate": 8.72126719390062e-06, "loss": 0.422, "step": 5185 }, { "epoch": 0.24053803339517627, "grad_norm": 8.540508270263672, "learning_rate": 8.720775619370596e-06, "loss": 0.3403, "step": 5186 }, { "epoch": 0.2405844155844156, "grad_norm": 8.643879890441895, "learning_rate": 8.720283964231933e-06, "loss": 0.4384, "step": 5187 }, { "epoch": 0.24063079777365493, "grad_norm": 6.4369730949401855, "learning_rate": 8.719792228495278e-06, "loss": 0.4078, "step": 5188 }, { "epoch": 0.24067717996289426, "grad_norm": 12.215349197387695, "learning_rate": 8.719300412171285e-06, "loss": 0.461, "step": 5189 }, { "epoch": 0.24072356215213359, "grad_norm": 5.0006794929504395, "learning_rate": 8.718808515270613e-06, "loss": 0.316, "step": 5190 }, { "epoch": 0.24076994434137292, "grad_norm": 9.308856010437012, "learning_rate": 8.718316537803915e-06, "loss": 0.3225, "step": 5191 }, { "epoch": 0.24081632653061225, "grad_norm": 7.811200141906738, "learning_rate": 8.71782447978185e-06, "loss": 0.3725, "step": 5192 }, { "epoch": 0.24086270871985158, "grad_norm": 8.670872688293457, "learning_rate": 8.71733234121508e-06, "loss": 0.3779, "step": 5193 }, { "epoch": 0.2409090909090909, "grad_norm": 18.788053512573242, "learning_rate": 8.716840122114265e-06, "loss": 0.6052, "step": 5194 }, { "epoch": 0.24095547309833024, "grad_norm": 7.068164825439453, "learning_rate": 8.716347822490068e-06, "loss": 0.3669, "step": 5195 }, { "epoch": 0.24100185528756957, "grad_norm": 4.8282151222229, "learning_rate": 8.715855442353157e-06, "loss": 0.3415, "step": 5196 }, { "epoch": 0.2410482374768089, "grad_norm": 3.8240606784820557, "learning_rate": 8.715362981714197e-06, "loss": 0.387, "step": 5197 }, { "epoch": 0.24109461966604823, "grad_norm": 6.389121055603027, "learning_rate": 8.714870440583859e-06, "loss": 0.3343, "step": 5198 }, { "epoch": 0.24114100185528756, "grad_norm": 9.145369529724121, "learning_rate": 8.71437781897281e-06, "loss": 0.4216, "step": 5199 }, { "epoch": 0.24118738404452691, "grad_norm": 5.468220233917236, "learning_rate": 8.713885116891724e-06, "loss": 0.3601, "step": 5200 }, { "epoch": 0.24123376623376624, "grad_norm": 9.422167778015137, "learning_rate": 8.713392334351279e-06, "loss": 0.2527, "step": 5201 }, { "epoch": 0.24128014842300557, "grad_norm": 5.763785362243652, "learning_rate": 8.712899471362146e-06, "loss": 0.4187, "step": 5202 }, { "epoch": 0.2413265306122449, "grad_norm": 9.695267677307129, "learning_rate": 8.712406527935005e-06, "loss": 0.4395, "step": 5203 }, { "epoch": 0.24137291280148424, "grad_norm": 13.41118335723877, "learning_rate": 8.711913504080534e-06, "loss": 0.5004, "step": 5204 }, { "epoch": 0.24141929499072357, "grad_norm": 5.851684093475342, "learning_rate": 8.711420399809416e-06, "loss": 0.3001, "step": 5205 }, { "epoch": 0.2414656771799629, "grad_norm": 5.233016014099121, "learning_rate": 8.710927215132332e-06, "loss": 0.3763, "step": 5206 }, { "epoch": 0.24151205936920223, "grad_norm": 4.218788146972656, "learning_rate": 8.710433950059966e-06, "loss": 0.3446, "step": 5207 }, { "epoch": 0.24155844155844156, "grad_norm": 7.273904323577881, "learning_rate": 8.709940604603006e-06, "loss": 0.3148, "step": 5208 }, { "epoch": 0.24160482374768089, "grad_norm": 10.913249969482422, "learning_rate": 8.70944717877214e-06, "loss": 0.3507, "step": 5209 }, { "epoch": 0.24165120593692022, "grad_norm": 6.536726474761963, "learning_rate": 8.708953672578057e-06, "loss": 0.31, "step": 5210 }, { "epoch": 0.24169758812615955, "grad_norm": 9.562882423400879, "learning_rate": 8.70846008603145e-06, "loss": 0.3787, "step": 5211 }, { "epoch": 0.24174397031539888, "grad_norm": 5.916938781738281, "learning_rate": 8.70796641914301e-06, "loss": 0.3304, "step": 5212 }, { "epoch": 0.2417903525046382, "grad_norm": 4.504034042358398, "learning_rate": 8.707472671923433e-06, "loss": 0.3676, "step": 5213 }, { "epoch": 0.24183673469387756, "grad_norm": 8.509995460510254, "learning_rate": 8.706978844383418e-06, "loss": 0.399, "step": 5214 }, { "epoch": 0.2418831168831169, "grad_norm": 12.302345275878906, "learning_rate": 8.70648493653366e-06, "loss": 0.4717, "step": 5215 }, { "epoch": 0.24192949907235622, "grad_norm": 5.498287677764893, "learning_rate": 8.705990948384863e-06, "loss": 0.3269, "step": 5216 }, { "epoch": 0.24197588126159555, "grad_norm": 12.687846183776855, "learning_rate": 8.705496879947723e-06, "loss": 0.4194, "step": 5217 }, { "epoch": 0.24202226345083488, "grad_norm": 5.588613033294678, "learning_rate": 8.705002731232951e-06, "loss": 0.2469, "step": 5218 }, { "epoch": 0.24206864564007421, "grad_norm": 10.286958694458008, "learning_rate": 8.704508502251247e-06, "loss": 0.3157, "step": 5219 }, { "epoch": 0.24211502782931354, "grad_norm": 5.438316345214844, "learning_rate": 8.704014193013321e-06, "loss": 0.4165, "step": 5220 }, { "epoch": 0.24216141001855288, "grad_norm": 7.555276870727539, "learning_rate": 8.703519803529881e-06, "loss": 0.4812, "step": 5221 }, { "epoch": 0.2422077922077922, "grad_norm": 9.117146492004395, "learning_rate": 8.703025333811639e-06, "loss": 0.4727, "step": 5222 }, { "epoch": 0.24225417439703154, "grad_norm": 20.446338653564453, "learning_rate": 8.702530783869304e-06, "loss": 0.4931, "step": 5223 }, { "epoch": 0.24230055658627087, "grad_norm": 15.693017959594727, "learning_rate": 8.702036153713594e-06, "loss": 0.4207, "step": 5224 }, { "epoch": 0.2423469387755102, "grad_norm": 7.682109832763672, "learning_rate": 8.701541443355224e-06, "loss": 0.2816, "step": 5225 }, { "epoch": 0.24239332096474953, "grad_norm": 6.356196880340576, "learning_rate": 8.701046652804909e-06, "loss": 0.2256, "step": 5226 }, { "epoch": 0.24243970315398886, "grad_norm": 6.478034019470215, "learning_rate": 8.700551782073373e-06, "loss": 0.4025, "step": 5227 }, { "epoch": 0.2424860853432282, "grad_norm": 6.156540870666504, "learning_rate": 8.70005683117133e-06, "loss": 0.3656, "step": 5228 }, { "epoch": 0.24253246753246754, "grad_norm": 11.91112995147705, "learning_rate": 8.699561800109511e-06, "loss": 0.4916, "step": 5229 }, { "epoch": 0.24257884972170687, "grad_norm": 5.0655107498168945, "learning_rate": 8.699066688898636e-06, "loss": 0.2758, "step": 5230 }, { "epoch": 0.2426252319109462, "grad_norm": 9.049759864807129, "learning_rate": 8.698571497549432e-06, "loss": 0.4544, "step": 5231 }, { "epoch": 0.24267161410018553, "grad_norm": 9.072227478027344, "learning_rate": 8.698076226072628e-06, "loss": 0.2939, "step": 5232 }, { "epoch": 0.24271799628942486, "grad_norm": 5.7445387840271, "learning_rate": 8.697580874478952e-06, "loss": 0.3959, "step": 5233 }, { "epoch": 0.2427643784786642, "grad_norm": 5.129321098327637, "learning_rate": 8.697085442779139e-06, "loss": 0.3674, "step": 5234 }, { "epoch": 0.24281076066790352, "grad_norm": 8.442296028137207, "learning_rate": 8.696589930983918e-06, "loss": 0.4104, "step": 5235 }, { "epoch": 0.24285714285714285, "grad_norm": 8.897485733032227, "learning_rate": 8.696094339104024e-06, "loss": 0.4843, "step": 5236 }, { "epoch": 0.24290352504638218, "grad_norm": 10.09192943572998, "learning_rate": 8.6955986671502e-06, "loss": 0.4621, "step": 5237 }, { "epoch": 0.24294990723562152, "grad_norm": 6.903140068054199, "learning_rate": 8.695102915133176e-06, "loss": 0.4357, "step": 5238 }, { "epoch": 0.24299628942486085, "grad_norm": 4.5715532302856445, "learning_rate": 8.6946070830637e-06, "loss": 0.3189, "step": 5239 }, { "epoch": 0.24304267161410018, "grad_norm": 7.232194900512695, "learning_rate": 8.694111170952508e-06, "loss": 0.3365, "step": 5240 }, { "epoch": 0.2430890538033395, "grad_norm": 12.425163269042969, "learning_rate": 8.693615178810346e-06, "loss": 0.3903, "step": 5241 }, { "epoch": 0.24313543599257886, "grad_norm": 5.109088897705078, "learning_rate": 8.693119106647958e-06, "loss": 0.2663, "step": 5242 }, { "epoch": 0.2431818181818182, "grad_norm": 9.803217887878418, "learning_rate": 8.692622954476096e-06, "loss": 0.448, "step": 5243 }, { "epoch": 0.24322820037105752, "grad_norm": 9.780147552490234, "learning_rate": 8.692126722305503e-06, "loss": 0.409, "step": 5244 }, { "epoch": 0.24327458256029685, "grad_norm": 6.011251926422119, "learning_rate": 8.691630410146931e-06, "loss": 0.2713, "step": 5245 }, { "epoch": 0.24332096474953618, "grad_norm": 8.557689666748047, "learning_rate": 8.691134018011136e-06, "loss": 0.3165, "step": 5246 }, { "epoch": 0.24336734693877551, "grad_norm": 8.874813079833984, "learning_rate": 8.690637545908868e-06, "loss": 0.2999, "step": 5247 }, { "epoch": 0.24341372912801484, "grad_norm": 5.2203569412231445, "learning_rate": 8.690140993850884e-06, "loss": 0.3158, "step": 5248 }, { "epoch": 0.24346011131725417, "grad_norm": 6.566399097442627, "learning_rate": 8.689644361847943e-06, "loss": 0.295, "step": 5249 }, { "epoch": 0.2435064935064935, "grad_norm": 8.73420524597168, "learning_rate": 8.689147649910801e-06, "loss": 0.4052, "step": 5250 }, { "epoch": 0.24355287569573283, "grad_norm": 10.322296142578125, "learning_rate": 8.688650858050222e-06, "loss": 0.3621, "step": 5251 }, { "epoch": 0.24359925788497216, "grad_norm": 7.491859436035156, "learning_rate": 8.688153986276969e-06, "loss": 0.4482, "step": 5252 }, { "epoch": 0.2436456400742115, "grad_norm": 8.621410369873047, "learning_rate": 8.687657034601801e-06, "loss": 0.3639, "step": 5253 }, { "epoch": 0.24369202226345082, "grad_norm": 6.4631147384643555, "learning_rate": 8.687160003035493e-06, "loss": 0.2457, "step": 5254 }, { "epoch": 0.24373840445269015, "grad_norm": 9.053229331970215, "learning_rate": 8.686662891588807e-06, "loss": 0.3849, "step": 5255 }, { "epoch": 0.2437847866419295, "grad_norm": 9.213759422302246, "learning_rate": 8.686165700272513e-06, "loss": 0.4365, "step": 5256 }, { "epoch": 0.24383116883116884, "grad_norm": 5.7996015548706055, "learning_rate": 8.685668429097382e-06, "loss": 0.404, "step": 5257 }, { "epoch": 0.24387755102040817, "grad_norm": 9.390847206115723, "learning_rate": 8.685171078074191e-06, "loss": 0.3642, "step": 5258 }, { "epoch": 0.2439239332096475, "grad_norm": 9.887353897094727, "learning_rate": 8.684673647213712e-06, "loss": 0.3792, "step": 5259 }, { "epoch": 0.24397031539888683, "grad_norm": 5.486147403717041, "learning_rate": 8.684176136526722e-06, "loss": 0.3477, "step": 5260 }, { "epoch": 0.24401669758812616, "grad_norm": 6.419400215148926, "learning_rate": 8.683678546023998e-06, "loss": 0.3803, "step": 5261 }, { "epoch": 0.2440630797773655, "grad_norm": 14.253686904907227, "learning_rate": 8.683180875716323e-06, "loss": 0.5631, "step": 5262 }, { "epoch": 0.24410946196660482, "grad_norm": 5.801263332366943, "learning_rate": 8.682683125614475e-06, "loss": 0.2822, "step": 5263 }, { "epoch": 0.24415584415584415, "grad_norm": 7.04274845123291, "learning_rate": 8.682185295729241e-06, "loss": 0.3264, "step": 5264 }, { "epoch": 0.24420222634508348, "grad_norm": 10.640695571899414, "learning_rate": 8.681687386071405e-06, "loss": 0.4452, "step": 5265 }, { "epoch": 0.24424860853432281, "grad_norm": 12.60137939453125, "learning_rate": 8.681189396651752e-06, "loss": 0.4245, "step": 5266 }, { "epoch": 0.24429499072356214, "grad_norm": 5.668396472930908, "learning_rate": 8.680691327481073e-06, "loss": 0.3033, "step": 5267 }, { "epoch": 0.24434137291280147, "grad_norm": 11.99423599243164, "learning_rate": 8.680193178570157e-06, "loss": 0.3857, "step": 5268 }, { "epoch": 0.2443877551020408, "grad_norm": 4.150265216827393, "learning_rate": 8.6796949499298e-06, "loss": 0.3338, "step": 5269 }, { "epoch": 0.24443413729128013, "grad_norm": 8.724181175231934, "learning_rate": 8.67919664157079e-06, "loss": 0.3264, "step": 5270 }, { "epoch": 0.2444805194805195, "grad_norm": 6.316312789916992, "learning_rate": 8.678698253503925e-06, "loss": 0.4065, "step": 5271 }, { "epoch": 0.24452690166975882, "grad_norm": 10.651338577270508, "learning_rate": 8.678199785740003e-06, "loss": 0.3889, "step": 5272 }, { "epoch": 0.24457328385899815, "grad_norm": 8.081408500671387, "learning_rate": 8.677701238289824e-06, "loss": 0.3856, "step": 5273 }, { "epoch": 0.24461966604823748, "grad_norm": 8.465667724609375, "learning_rate": 8.677202611164188e-06, "loss": 0.3583, "step": 5274 }, { "epoch": 0.2446660482374768, "grad_norm": 5.858188629150391, "learning_rate": 8.676703904373894e-06, "loss": 0.3242, "step": 5275 }, { "epoch": 0.24471243042671614, "grad_norm": 9.723709106445312, "learning_rate": 8.676205117929752e-06, "loss": 0.3669, "step": 5276 }, { "epoch": 0.24475881261595547, "grad_norm": 5.992940425872803, "learning_rate": 8.675706251842564e-06, "loss": 0.327, "step": 5277 }, { "epoch": 0.2448051948051948, "grad_norm": 5.329188346862793, "learning_rate": 8.675207306123139e-06, "loss": 0.3745, "step": 5278 }, { "epoch": 0.24485157699443413, "grad_norm": 7.44303560256958, "learning_rate": 8.674708280782285e-06, "loss": 0.4516, "step": 5279 }, { "epoch": 0.24489795918367346, "grad_norm": 10.167285919189453, "learning_rate": 8.674209175830815e-06, "loss": 0.3912, "step": 5280 }, { "epoch": 0.2449443413729128, "grad_norm": 10.260669708251953, "learning_rate": 8.673709991279542e-06, "loss": 0.4817, "step": 5281 }, { "epoch": 0.24499072356215212, "grad_norm": 4.308885097503662, "learning_rate": 8.673210727139278e-06, "loss": 0.3554, "step": 5282 }, { "epoch": 0.24503710575139145, "grad_norm": 6.041687965393066, "learning_rate": 8.672711383420842e-06, "loss": 0.3469, "step": 5283 }, { "epoch": 0.24508348794063078, "grad_norm": 5.917057037353516, "learning_rate": 8.672211960135053e-06, "loss": 0.36, "step": 5284 }, { "epoch": 0.24512987012987014, "grad_norm": 6.555882453918457, "learning_rate": 8.671712457292725e-06, "loss": 0.3581, "step": 5285 }, { "epoch": 0.24517625231910947, "grad_norm": 29.441303253173828, "learning_rate": 8.671212874904685e-06, "loss": 0.3433, "step": 5286 }, { "epoch": 0.2452226345083488, "grad_norm": 8.16075325012207, "learning_rate": 8.670713212981754e-06, "loss": 0.3453, "step": 5287 }, { "epoch": 0.24526901669758813, "grad_norm": 7.165824890136719, "learning_rate": 8.670213471534759e-06, "loss": 0.2464, "step": 5288 }, { "epoch": 0.24531539888682746, "grad_norm": 11.232694625854492, "learning_rate": 8.669713650574524e-06, "loss": 0.3107, "step": 5289 }, { "epoch": 0.2453617810760668, "grad_norm": 8.07663631439209, "learning_rate": 8.669213750111877e-06, "loss": 0.3738, "step": 5290 }, { "epoch": 0.24540816326530612, "grad_norm": 8.765604019165039, "learning_rate": 8.668713770157653e-06, "loss": 0.4137, "step": 5291 }, { "epoch": 0.24545454545454545, "grad_norm": 8.017051696777344, "learning_rate": 8.668213710722678e-06, "loss": 0.4146, "step": 5292 }, { "epoch": 0.24550092764378478, "grad_norm": 7.444226264953613, "learning_rate": 8.667713571817786e-06, "loss": 0.381, "step": 5293 }, { "epoch": 0.2455473098330241, "grad_norm": 4.876974582672119, "learning_rate": 8.667213353453816e-06, "loss": 0.3179, "step": 5294 }, { "epoch": 0.24559369202226344, "grad_norm": 8.987226486206055, "learning_rate": 8.666713055641603e-06, "loss": 0.4356, "step": 5295 }, { "epoch": 0.24564007421150277, "grad_norm": 6.583431720733643, "learning_rate": 8.666212678391985e-06, "loss": 0.2742, "step": 5296 }, { "epoch": 0.2456864564007421, "grad_norm": 6.303699493408203, "learning_rate": 8.665712221715802e-06, "loss": 0.324, "step": 5297 }, { "epoch": 0.24573283858998143, "grad_norm": 16.96002960205078, "learning_rate": 8.6652116856239e-06, "loss": 0.4371, "step": 5298 }, { "epoch": 0.2457792207792208, "grad_norm": 11.146276473999023, "learning_rate": 8.664711070127117e-06, "loss": 0.2946, "step": 5299 }, { "epoch": 0.24582560296846012, "grad_norm": 5.499355792999268, "learning_rate": 8.664210375236304e-06, "loss": 0.421, "step": 5300 }, { "epoch": 0.24587198515769945, "grad_norm": 11.219053268432617, "learning_rate": 8.663709600962305e-06, "loss": 0.4445, "step": 5301 }, { "epoch": 0.24591836734693878, "grad_norm": 6.222542762756348, "learning_rate": 8.66320874731597e-06, "loss": 0.3064, "step": 5302 }, { "epoch": 0.2459647495361781, "grad_norm": 6.334029674530029, "learning_rate": 8.66270781430815e-06, "loss": 0.4402, "step": 5303 }, { "epoch": 0.24601113172541744, "grad_norm": 6.845684051513672, "learning_rate": 8.662206801949694e-06, "loss": 0.3947, "step": 5304 }, { "epoch": 0.24605751391465677, "grad_norm": 4.546755790710449, "learning_rate": 8.661705710251462e-06, "loss": 0.3741, "step": 5305 }, { "epoch": 0.2461038961038961, "grad_norm": 5.266145706176758, "learning_rate": 8.661204539224309e-06, "loss": 0.2951, "step": 5306 }, { "epoch": 0.24615027829313543, "grad_norm": 8.125436782836914, "learning_rate": 8.660703288879087e-06, "loss": 0.4622, "step": 5307 }, { "epoch": 0.24619666048237476, "grad_norm": 9.03173828125, "learning_rate": 8.660201959226662e-06, "loss": 0.4471, "step": 5308 }, { "epoch": 0.2462430426716141, "grad_norm": 5.788270950317383, "learning_rate": 8.65970055027789e-06, "loss": 0.2525, "step": 5309 }, { "epoch": 0.24628942486085342, "grad_norm": 8.874767303466797, "learning_rate": 8.659199062043638e-06, "loss": 0.4174, "step": 5310 }, { "epoch": 0.24633580705009275, "grad_norm": 6.728924751281738, "learning_rate": 8.658697494534766e-06, "loss": 0.3811, "step": 5311 }, { "epoch": 0.24638218923933208, "grad_norm": 6.884638786315918, "learning_rate": 8.658195847762144e-06, "loss": 0.3167, "step": 5312 }, { "epoch": 0.24642857142857144, "grad_norm": 17.397253036499023, "learning_rate": 8.657694121736638e-06, "loss": 0.3893, "step": 5313 }, { "epoch": 0.24647495361781077, "grad_norm": 12.194684028625488, "learning_rate": 8.657192316469118e-06, "loss": 0.4601, "step": 5314 }, { "epoch": 0.2465213358070501, "grad_norm": 4.30626916885376, "learning_rate": 8.656690431970456e-06, "loss": 0.3337, "step": 5315 }, { "epoch": 0.24656771799628943, "grad_norm": 8.75407600402832, "learning_rate": 8.656188468251523e-06, "loss": 0.4665, "step": 5316 }, { "epoch": 0.24661410018552876, "grad_norm": 7.092494010925293, "learning_rate": 8.655686425323196e-06, "loss": 0.3867, "step": 5317 }, { "epoch": 0.2466604823747681, "grad_norm": 5.634532928466797, "learning_rate": 8.65518430319635e-06, "loss": 0.3277, "step": 5318 }, { "epoch": 0.24670686456400742, "grad_norm": 9.632662773132324, "learning_rate": 8.654682101881866e-06, "loss": 0.529, "step": 5319 }, { "epoch": 0.24675324675324675, "grad_norm": 5.84660005569458, "learning_rate": 8.65417982139062e-06, "loss": 0.4003, "step": 5320 }, { "epoch": 0.24679962894248608, "grad_norm": 3.804738998413086, "learning_rate": 8.653677461733498e-06, "loss": 0.3289, "step": 5321 }, { "epoch": 0.2468460111317254, "grad_norm": 8.32066822052002, "learning_rate": 8.65317502292138e-06, "loss": 0.3611, "step": 5322 }, { "epoch": 0.24689239332096474, "grad_norm": 5.206508636474609, "learning_rate": 8.65267250496515e-06, "loss": 0.2986, "step": 5323 }, { "epoch": 0.24693877551020407, "grad_norm": 7.536147594451904, "learning_rate": 8.652169907875698e-06, "loss": 0.33, "step": 5324 }, { "epoch": 0.2469851576994434, "grad_norm": 8.852384567260742, "learning_rate": 8.651667231663911e-06, "loss": 0.5244, "step": 5325 }, { "epoch": 0.24703153988868273, "grad_norm": 4.434336185455322, "learning_rate": 8.651164476340681e-06, "loss": 0.3059, "step": 5326 }, { "epoch": 0.2470779220779221, "grad_norm": 7.49375581741333, "learning_rate": 8.650661641916897e-06, "loss": 0.3586, "step": 5327 }, { "epoch": 0.24712430426716142, "grad_norm": 5.712112903594971, "learning_rate": 8.650158728403454e-06, "loss": 0.3896, "step": 5328 }, { "epoch": 0.24717068645640075, "grad_norm": 6.13677453994751, "learning_rate": 8.649655735811248e-06, "loss": 0.3958, "step": 5329 }, { "epoch": 0.24721706864564008, "grad_norm": 5.2773637771606445, "learning_rate": 8.649152664151175e-06, "loss": 0.3439, "step": 5330 }, { "epoch": 0.2472634508348794, "grad_norm": 5.857161521911621, "learning_rate": 8.648649513434135e-06, "loss": 0.274, "step": 5331 }, { "epoch": 0.24730983302411874, "grad_norm": 9.597196578979492, "learning_rate": 8.648146283671028e-06, "loss": 0.3423, "step": 5332 }, { "epoch": 0.24735621521335807, "grad_norm": 8.244443893432617, "learning_rate": 8.647642974872753e-06, "loss": 0.2658, "step": 5333 }, { "epoch": 0.2474025974025974, "grad_norm": 7.991871356964111, "learning_rate": 8.647139587050222e-06, "loss": 0.2965, "step": 5334 }, { "epoch": 0.24744897959183673, "grad_norm": 5.774975299835205, "learning_rate": 8.64663612021433e-06, "loss": 0.4017, "step": 5335 }, { "epoch": 0.24749536178107606, "grad_norm": 5.295396327972412, "learning_rate": 8.646132574375994e-06, "loss": 0.2492, "step": 5336 }, { "epoch": 0.2475417439703154, "grad_norm": 12.437670707702637, "learning_rate": 8.645628949546117e-06, "loss": 0.3744, "step": 5337 }, { "epoch": 0.24758812615955472, "grad_norm": 4.595913410186768, "learning_rate": 8.645125245735613e-06, "loss": 0.2802, "step": 5338 }, { "epoch": 0.24763450834879405, "grad_norm": 9.03078842163086, "learning_rate": 8.64462146295539e-06, "loss": 0.2677, "step": 5339 }, { "epoch": 0.24768089053803338, "grad_norm": 5.199023246765137, "learning_rate": 8.644117601216368e-06, "loss": 0.3284, "step": 5340 }, { "epoch": 0.24772727272727274, "grad_norm": 4.420584201812744, "learning_rate": 8.64361366052946e-06, "loss": 0.2943, "step": 5341 }, { "epoch": 0.24777365491651207, "grad_norm": 8.771437644958496, "learning_rate": 8.643109640905585e-06, "loss": 0.3308, "step": 5342 }, { "epoch": 0.2478200371057514, "grad_norm": 7.261916637420654, "learning_rate": 8.64260554235566e-06, "loss": 0.3593, "step": 5343 }, { "epoch": 0.24786641929499073, "grad_norm": 18.689865112304688, "learning_rate": 8.642101364890605e-06, "loss": 0.3352, "step": 5344 }, { "epoch": 0.24791280148423006, "grad_norm": 5.7406535148620605, "learning_rate": 8.641597108521348e-06, "loss": 0.34, "step": 5345 }, { "epoch": 0.2479591836734694, "grad_norm": 8.364068031311035, "learning_rate": 8.64109277325881e-06, "loss": 0.2687, "step": 5346 }, { "epoch": 0.24800556586270872, "grad_norm": 13.903412818908691, "learning_rate": 8.640588359113916e-06, "loss": 0.4631, "step": 5347 }, { "epoch": 0.24805194805194805, "grad_norm": 7.486071586608887, "learning_rate": 8.640083866097598e-06, "loss": 0.3121, "step": 5348 }, { "epoch": 0.24809833024118738, "grad_norm": 9.946727752685547, "learning_rate": 8.639579294220779e-06, "loss": 0.3599, "step": 5349 }, { "epoch": 0.2481447124304267, "grad_norm": 5.16436243057251, "learning_rate": 8.639074643494397e-06, "loss": 0.3305, "step": 5350 }, { "epoch": 0.24819109461966604, "grad_norm": 17.066762924194336, "learning_rate": 8.638569913929383e-06, "loss": 0.4177, "step": 5351 }, { "epoch": 0.24823747680890537, "grad_norm": 19.500490188598633, "learning_rate": 8.638065105536669e-06, "loss": 0.4595, "step": 5352 }, { "epoch": 0.2482838589981447, "grad_norm": 10.42997932434082, "learning_rate": 8.637560218327195e-06, "loss": 0.4876, "step": 5353 }, { "epoch": 0.24833024118738403, "grad_norm": 6.407313823699951, "learning_rate": 8.637055252311895e-06, "loss": 0.3894, "step": 5354 }, { "epoch": 0.2483766233766234, "grad_norm": 8.860834121704102, "learning_rate": 8.636550207501713e-06, "loss": 0.4489, "step": 5355 }, { "epoch": 0.24842300556586272, "grad_norm": 7.794561862945557, "learning_rate": 8.63604508390759e-06, "loss": 0.32, "step": 5356 }, { "epoch": 0.24846938775510205, "grad_norm": 9.226594924926758, "learning_rate": 8.635539881540465e-06, "loss": 0.5696, "step": 5357 }, { "epoch": 0.24851576994434138, "grad_norm": 10.69282054901123, "learning_rate": 8.63503460041129e-06, "loss": 0.3598, "step": 5358 }, { "epoch": 0.2485621521335807, "grad_norm": 9.188520431518555, "learning_rate": 8.634529240531005e-06, "loss": 0.4067, "step": 5359 }, { "epoch": 0.24860853432282004, "grad_norm": 7.672842502593994, "learning_rate": 8.63402380191056e-06, "loss": 0.3885, "step": 5360 }, { "epoch": 0.24865491651205937, "grad_norm": 13.598668098449707, "learning_rate": 8.633518284560909e-06, "loss": 0.3972, "step": 5361 }, { "epoch": 0.2487012987012987, "grad_norm": 8.710692405700684, "learning_rate": 8.633012688492999e-06, "loss": 0.3364, "step": 5362 }, { "epoch": 0.24874768089053803, "grad_norm": 7.405848026275635, "learning_rate": 8.632507013717787e-06, "loss": 0.4222, "step": 5363 }, { "epoch": 0.24879406307977736, "grad_norm": 7.209578514099121, "learning_rate": 8.632001260246224e-06, "loss": 0.4077, "step": 5364 }, { "epoch": 0.2488404452690167, "grad_norm": 9.062460899353027, "learning_rate": 8.63149542808927e-06, "loss": 0.4353, "step": 5365 }, { "epoch": 0.24888682745825602, "grad_norm": 12.063535690307617, "learning_rate": 8.630989517257883e-06, "loss": 0.4749, "step": 5366 }, { "epoch": 0.24893320964749535, "grad_norm": 6.803989887237549, "learning_rate": 8.630483527763026e-06, "loss": 0.35, "step": 5367 }, { "epoch": 0.24897959183673468, "grad_norm": 8.47415542602539, "learning_rate": 8.629977459615655e-06, "loss": 0.3745, "step": 5368 }, { "epoch": 0.24902597402597404, "grad_norm": 7.486081123352051, "learning_rate": 8.629471312826738e-06, "loss": 0.3853, "step": 5369 }, { "epoch": 0.24907235621521337, "grad_norm": 5.634616374969482, "learning_rate": 8.62896508740724e-06, "loss": 0.3681, "step": 5370 }, { "epoch": 0.2491187384044527, "grad_norm": 5.703787326812744, "learning_rate": 8.628458783368127e-06, "loss": 0.3158, "step": 5371 }, { "epoch": 0.24916512059369203, "grad_norm": 7.7124834060668945, "learning_rate": 8.627952400720367e-06, "loss": 0.3676, "step": 5372 }, { "epoch": 0.24921150278293136, "grad_norm": 9.205632209777832, "learning_rate": 8.627445939474934e-06, "loss": 0.3399, "step": 5373 }, { "epoch": 0.2492578849721707, "grad_norm": 7.459084987640381, "learning_rate": 8.626939399642796e-06, "loss": 0.405, "step": 5374 }, { "epoch": 0.24930426716141002, "grad_norm": 11.270029067993164, "learning_rate": 8.62643278123493e-06, "loss": 0.4255, "step": 5375 }, { "epoch": 0.24935064935064935, "grad_norm": 6.970704555511475, "learning_rate": 8.62592608426231e-06, "loss": 0.4135, "step": 5376 }, { "epoch": 0.24939703153988868, "grad_norm": 10.680215835571289, "learning_rate": 8.625419308735916e-06, "loss": 0.5317, "step": 5377 }, { "epoch": 0.249443413729128, "grad_norm": 14.565299034118652, "learning_rate": 8.624912454666722e-06, "loss": 0.3904, "step": 5378 }, { "epoch": 0.24948979591836734, "grad_norm": 6.530433177947998, "learning_rate": 8.624405522065713e-06, "loss": 0.2292, "step": 5379 }, { "epoch": 0.24953617810760667, "grad_norm": 5.736461639404297, "learning_rate": 8.623898510943869e-06, "loss": 0.3987, "step": 5380 }, { "epoch": 0.249582560296846, "grad_norm": 13.757833480834961, "learning_rate": 8.623391421312177e-06, "loss": 0.4493, "step": 5381 }, { "epoch": 0.24962894248608533, "grad_norm": 16.002429962158203, "learning_rate": 8.622884253181619e-06, "loss": 0.5317, "step": 5382 }, { "epoch": 0.2496753246753247, "grad_norm": 5.54602575302124, "learning_rate": 8.622377006563185e-06, "loss": 0.368, "step": 5383 }, { "epoch": 0.24972170686456402, "grad_norm": 7.901761054992676, "learning_rate": 8.621869681467865e-06, "loss": 0.4104, "step": 5384 }, { "epoch": 0.24976808905380335, "grad_norm": 10.724677085876465, "learning_rate": 8.62136227790665e-06, "loss": 0.4352, "step": 5385 }, { "epoch": 0.24981447124304268, "grad_norm": 7.7599196434021, "learning_rate": 8.620854795890528e-06, "loss": 0.3255, "step": 5386 }, { "epoch": 0.249860853432282, "grad_norm": 5.667036056518555, "learning_rate": 8.620347235430497e-06, "loss": 0.3527, "step": 5387 }, { "epoch": 0.24990723562152134, "grad_norm": 5.608233451843262, "learning_rate": 8.619839596537554e-06, "loss": 0.2871, "step": 5388 }, { "epoch": 0.24995361781076067, "grad_norm": 9.521110534667969, "learning_rate": 8.619331879222695e-06, "loss": 0.476, "step": 5389 }, { "epoch": 0.25, "grad_norm": 8.802046775817871, "learning_rate": 8.61882408349692e-06, "loss": 0.364, "step": 5390 }, { "epoch": 0.25004638218923936, "grad_norm": 10.802689552307129, "learning_rate": 8.618316209371229e-06, "loss": 0.3825, "step": 5391 }, { "epoch": 0.25009276437847866, "grad_norm": 5.383559226989746, "learning_rate": 8.617808256856627e-06, "loss": 0.3326, "step": 5392 }, { "epoch": 0.250139146567718, "grad_norm": 7.375587463378906, "learning_rate": 8.617300225964116e-06, "loss": 0.3917, "step": 5393 }, { "epoch": 0.2501855287569573, "grad_norm": 4.359292030334473, "learning_rate": 8.616792116704704e-06, "loss": 0.2614, "step": 5394 }, { "epoch": 0.2502319109461967, "grad_norm": 9.67160415649414, "learning_rate": 8.616283929089399e-06, "loss": 0.2943, "step": 5395 }, { "epoch": 0.250278293135436, "grad_norm": 4.95345401763916, "learning_rate": 8.61577566312921e-06, "loss": 0.3488, "step": 5396 }, { "epoch": 0.25032467532467534, "grad_norm": 12.08731460571289, "learning_rate": 8.615267318835148e-06, "loss": 0.439, "step": 5397 }, { "epoch": 0.25037105751391464, "grad_norm": 6.764693260192871, "learning_rate": 8.614758896218227e-06, "loss": 0.4148, "step": 5398 }, { "epoch": 0.250417439703154, "grad_norm": 9.85821533203125, "learning_rate": 8.614250395289461e-06, "loss": 0.3227, "step": 5399 }, { "epoch": 0.2504638218923933, "grad_norm": 10.806824684143066, "learning_rate": 8.613741816059867e-06, "loss": 0.4629, "step": 5400 }, { "epoch": 0.25051020408163266, "grad_norm": 9.05398941040039, "learning_rate": 8.613233158540462e-06, "loss": 0.3498, "step": 5401 }, { "epoch": 0.25055658627087196, "grad_norm": 11.337015151977539, "learning_rate": 8.612724422742266e-06, "loss": 0.4929, "step": 5402 }, { "epoch": 0.2506029684601113, "grad_norm": 7.162823677062988, "learning_rate": 8.6122156086763e-06, "loss": 0.3364, "step": 5403 }, { "epoch": 0.2506493506493506, "grad_norm": 9.73654556274414, "learning_rate": 8.611706716353589e-06, "loss": 0.4722, "step": 5404 }, { "epoch": 0.25069573283859, "grad_norm": 8.023624420166016, "learning_rate": 8.611197745785158e-06, "loss": 0.2954, "step": 5405 }, { "epoch": 0.25074211502782934, "grad_norm": 5.578254699707031, "learning_rate": 8.610688696982032e-06, "loss": 0.3643, "step": 5406 }, { "epoch": 0.25078849721706864, "grad_norm": 5.717254161834717, "learning_rate": 8.61017956995524e-06, "loss": 0.2657, "step": 5407 }, { "epoch": 0.250834879406308, "grad_norm": 9.755125999450684, "learning_rate": 8.60967036471581e-06, "loss": 0.3092, "step": 5408 }, { "epoch": 0.2508812615955473, "grad_norm": 4.3865647315979, "learning_rate": 8.609161081274777e-06, "loss": 0.3563, "step": 5409 }, { "epoch": 0.25092764378478666, "grad_norm": 12.284668922424316, "learning_rate": 8.608651719643173e-06, "loss": 0.4535, "step": 5410 }, { "epoch": 0.25097402597402596, "grad_norm": 11.85039234161377, "learning_rate": 8.608142279832033e-06, "loss": 0.6149, "step": 5411 }, { "epoch": 0.2510204081632653, "grad_norm": 7.269989013671875, "learning_rate": 8.607632761852393e-06, "loss": 0.3865, "step": 5412 }, { "epoch": 0.2510667903525046, "grad_norm": 11.07872200012207, "learning_rate": 8.607123165715292e-06, "loss": 0.3296, "step": 5413 }, { "epoch": 0.251113172541744, "grad_norm": 6.408553600311279, "learning_rate": 8.60661349143177e-06, "loss": 0.3304, "step": 5414 }, { "epoch": 0.2511595547309833, "grad_norm": 6.880693435668945, "learning_rate": 8.60610373901287e-06, "loss": 0.3827, "step": 5415 }, { "epoch": 0.25120593692022264, "grad_norm": 7.162353992462158, "learning_rate": 8.605593908469635e-06, "loss": 0.3992, "step": 5416 }, { "epoch": 0.25125231910946194, "grad_norm": 15.916535377502441, "learning_rate": 8.605083999813107e-06, "loss": 0.484, "step": 5417 }, { "epoch": 0.2512987012987013, "grad_norm": 6.7285919189453125, "learning_rate": 8.604574013054336e-06, "loss": 0.3669, "step": 5418 }, { "epoch": 0.25134508348794066, "grad_norm": 7.596413612365723, "learning_rate": 8.604063948204373e-06, "loss": 0.3705, "step": 5419 }, { "epoch": 0.25139146567717996, "grad_norm": 5.562628269195557, "learning_rate": 8.603553805274263e-06, "loss": 0.3439, "step": 5420 }, { "epoch": 0.2514378478664193, "grad_norm": 9.744871139526367, "learning_rate": 8.603043584275061e-06, "loss": 0.4519, "step": 5421 }, { "epoch": 0.2514842300556586, "grad_norm": 7.838254451751709, "learning_rate": 8.602533285217823e-06, "loss": 0.424, "step": 5422 }, { "epoch": 0.251530612244898, "grad_norm": 9.707820892333984, "learning_rate": 8.602022908113598e-06, "loss": 0.3996, "step": 5423 }, { "epoch": 0.2515769944341373, "grad_norm": 5.224076271057129, "learning_rate": 8.601512452973447e-06, "loss": 0.2586, "step": 5424 }, { "epoch": 0.25162337662337664, "grad_norm": 7.103202819824219, "learning_rate": 8.60100191980843e-06, "loss": 0.2187, "step": 5425 }, { "epoch": 0.25166975881261594, "grad_norm": 4.551959037780762, "learning_rate": 8.600491308629604e-06, "loss": 0.2981, "step": 5426 }, { "epoch": 0.2517161410018553, "grad_norm": 5.97824239730835, "learning_rate": 8.599980619448035e-06, "loss": 0.3175, "step": 5427 }, { "epoch": 0.2517625231910946, "grad_norm": 9.64063835144043, "learning_rate": 8.599469852274782e-06, "loss": 0.3181, "step": 5428 }, { "epoch": 0.25180890538033396, "grad_norm": 13.139558792114258, "learning_rate": 8.598959007120914e-06, "loss": 0.4072, "step": 5429 }, { "epoch": 0.25185528756957326, "grad_norm": 5.486507892608643, "learning_rate": 8.598448083997498e-06, "loss": 0.3228, "step": 5430 }, { "epoch": 0.2519016697588126, "grad_norm": 8.550185203552246, "learning_rate": 8.597937082915602e-06, "loss": 0.3394, "step": 5431 }, { "epoch": 0.2519480519480519, "grad_norm": 7.38287878036499, "learning_rate": 8.597426003886295e-06, "loss": 0.3239, "step": 5432 }, { "epoch": 0.2519944341372913, "grad_norm": 7.566426753997803, "learning_rate": 8.596914846920655e-06, "loss": 0.3939, "step": 5433 }, { "epoch": 0.25204081632653064, "grad_norm": 6.107957363128662, "learning_rate": 8.596403612029749e-06, "loss": 0.2711, "step": 5434 }, { "epoch": 0.25208719851576994, "grad_norm": 4.3418288230896, "learning_rate": 8.595892299224658e-06, "loss": 0.329, "step": 5435 }, { "epoch": 0.2521335807050093, "grad_norm": 6.332068920135498, "learning_rate": 8.595380908516454e-06, "loss": 0.309, "step": 5436 }, { "epoch": 0.2521799628942486, "grad_norm": 6.954379558563232, "learning_rate": 8.59486943991622e-06, "loss": 0.3413, "step": 5437 }, { "epoch": 0.25222634508348796, "grad_norm": 14.148621559143066, "learning_rate": 8.594357893435038e-06, "loss": 0.2869, "step": 5438 }, { "epoch": 0.25227272727272726, "grad_norm": 4.777061939239502, "learning_rate": 8.593846269083985e-06, "loss": 0.2777, "step": 5439 }, { "epoch": 0.2523191094619666, "grad_norm": 10.528339385986328, "learning_rate": 8.59333456687415e-06, "loss": 0.3826, "step": 5440 }, { "epoch": 0.2523654916512059, "grad_norm": 6.429917812347412, "learning_rate": 8.592822786816616e-06, "loss": 0.3566, "step": 5441 }, { "epoch": 0.2524118738404453, "grad_norm": 11.359230041503906, "learning_rate": 8.592310928922471e-06, "loss": 0.3594, "step": 5442 }, { "epoch": 0.2524582560296846, "grad_norm": 10.823894500732422, "learning_rate": 8.591798993202806e-06, "loss": 0.56, "step": 5443 }, { "epoch": 0.25250463821892394, "grad_norm": 9.033380508422852, "learning_rate": 8.591286979668708e-06, "loss": 0.2758, "step": 5444 }, { "epoch": 0.25255102040816324, "grad_norm": 9.934440612792969, "learning_rate": 8.590774888331275e-06, "loss": 0.4301, "step": 5445 }, { "epoch": 0.2525974025974026, "grad_norm": 9.330608367919922, "learning_rate": 8.590262719201595e-06, "loss": 0.3915, "step": 5446 }, { "epoch": 0.25264378478664196, "grad_norm": 8.041563987731934, "learning_rate": 8.589750472290767e-06, "loss": 0.3845, "step": 5447 }, { "epoch": 0.25269016697588126, "grad_norm": 4.990623474121094, "learning_rate": 8.58923814760989e-06, "loss": 0.3258, "step": 5448 }, { "epoch": 0.2527365491651206, "grad_norm": 7.950121879577637, "learning_rate": 8.58872574517006e-06, "loss": 0.3673, "step": 5449 }, { "epoch": 0.2527829313543599, "grad_norm": 9.341514587402344, "learning_rate": 8.588213264982383e-06, "loss": 0.4929, "step": 5450 }, { "epoch": 0.2528293135435993, "grad_norm": 13.391879081726074, "learning_rate": 8.587700707057955e-06, "loss": 0.3086, "step": 5451 }, { "epoch": 0.2528756957328386, "grad_norm": 7.66222620010376, "learning_rate": 8.587188071407885e-06, "loss": 0.4303, "step": 5452 }, { "epoch": 0.25292207792207794, "grad_norm": 5.866328716278076, "learning_rate": 8.586675358043277e-06, "loss": 0.3334, "step": 5453 }, { "epoch": 0.25296846011131724, "grad_norm": 5.656925201416016, "learning_rate": 8.586162566975237e-06, "loss": 0.3755, "step": 5454 }, { "epoch": 0.2530148423005566, "grad_norm": 9.011177062988281, "learning_rate": 8.58564969821488e-06, "loss": 0.3836, "step": 5455 }, { "epoch": 0.2530612244897959, "grad_norm": 6.260150909423828, "learning_rate": 8.585136751773312e-06, "loss": 0.4, "step": 5456 }, { "epoch": 0.25310760667903526, "grad_norm": 13.521712303161621, "learning_rate": 8.584623727661648e-06, "loss": 0.3886, "step": 5457 }, { "epoch": 0.25315398886827456, "grad_norm": 6.54185676574707, "learning_rate": 8.584110625891e-06, "loss": 0.3428, "step": 5458 }, { "epoch": 0.2532003710575139, "grad_norm": 7.16403865814209, "learning_rate": 8.583597446472487e-06, "loss": 0.3751, "step": 5459 }, { "epoch": 0.2532467532467532, "grad_norm": 8.822924613952637, "learning_rate": 8.583084189417225e-06, "loss": 0.3166, "step": 5460 }, { "epoch": 0.2532931354359926, "grad_norm": 7.39223051071167, "learning_rate": 8.582570854736334e-06, "loss": 0.3128, "step": 5461 }, { "epoch": 0.25333951762523194, "grad_norm": 7.498646259307861, "learning_rate": 8.582057442440934e-06, "loss": 0.3703, "step": 5462 }, { "epoch": 0.25338589981447124, "grad_norm": 5.303943157196045, "learning_rate": 8.581543952542151e-06, "loss": 0.3203, "step": 5463 }, { "epoch": 0.2534322820037106, "grad_norm": 11.91205883026123, "learning_rate": 8.581030385051105e-06, "loss": 0.5382, "step": 5464 }, { "epoch": 0.2534786641929499, "grad_norm": 7.544009685516357, "learning_rate": 8.580516739978925e-06, "loss": 0.3323, "step": 5465 }, { "epoch": 0.25352504638218926, "grad_norm": 8.021052360534668, "learning_rate": 8.580003017336739e-06, "loss": 0.4854, "step": 5466 }, { "epoch": 0.25357142857142856, "grad_norm": 7.089771747589111, "learning_rate": 8.579489217135678e-06, "loss": 0.3794, "step": 5467 }, { "epoch": 0.2536178107606679, "grad_norm": 4.692978382110596, "learning_rate": 8.578975339386868e-06, "loss": 0.297, "step": 5468 }, { "epoch": 0.2536641929499072, "grad_norm": 7.749425411224365, "learning_rate": 8.578461384101446e-06, "loss": 0.3728, "step": 5469 }, { "epoch": 0.2537105751391466, "grad_norm": 9.804543495178223, "learning_rate": 8.577947351290545e-06, "loss": 0.3785, "step": 5470 }, { "epoch": 0.2537569573283859, "grad_norm": 8.254634857177734, "learning_rate": 8.577433240965302e-06, "loss": 0.3543, "step": 5471 }, { "epoch": 0.25380333951762524, "grad_norm": 6.478123664855957, "learning_rate": 8.576919053136856e-06, "loss": 0.2629, "step": 5472 }, { "epoch": 0.25384972170686454, "grad_norm": 5.662680625915527, "learning_rate": 8.576404787816344e-06, "loss": 0.299, "step": 5473 }, { "epoch": 0.2538961038961039, "grad_norm": 5.526602745056152, "learning_rate": 8.57589044501491e-06, "loss": 0.3222, "step": 5474 }, { "epoch": 0.25394248608534326, "grad_norm": 9.646074295043945, "learning_rate": 8.575376024743693e-06, "loss": 0.3023, "step": 5475 }, { "epoch": 0.25398886827458256, "grad_norm": 10.576157569885254, "learning_rate": 8.574861527013842e-06, "loss": 0.3641, "step": 5476 }, { "epoch": 0.2540352504638219, "grad_norm": 3.5233089923858643, "learning_rate": 8.574346951836503e-06, "loss": 0.3585, "step": 5477 }, { "epoch": 0.2540816326530612, "grad_norm": 5.909780979156494, "learning_rate": 8.57383229922282e-06, "loss": 0.4182, "step": 5478 }, { "epoch": 0.2541280148423006, "grad_norm": 7.7597975730896, "learning_rate": 8.573317569183946e-06, "loss": 0.355, "step": 5479 }, { "epoch": 0.2541743970315399, "grad_norm": 11.375909805297852, "learning_rate": 8.572802761731031e-06, "loss": 0.4784, "step": 5480 }, { "epoch": 0.25422077922077924, "grad_norm": 6.50355339050293, "learning_rate": 8.572287876875231e-06, "loss": 0.3046, "step": 5481 }, { "epoch": 0.25426716141001854, "grad_norm": 12.061079025268555, "learning_rate": 8.571772914627696e-06, "loss": 0.417, "step": 5482 }, { "epoch": 0.2543135435992579, "grad_norm": 9.816302299499512, "learning_rate": 8.571257874999586e-06, "loss": 0.4387, "step": 5483 }, { "epoch": 0.2543599257884972, "grad_norm": 5.910557746887207, "learning_rate": 8.570742758002057e-06, "loss": 0.3236, "step": 5484 }, { "epoch": 0.25440630797773656, "grad_norm": 8.150900840759277, "learning_rate": 8.570227563646269e-06, "loss": 0.2986, "step": 5485 }, { "epoch": 0.25445269016697586, "grad_norm": 12.631685256958008, "learning_rate": 8.569712291943386e-06, "loss": 0.3175, "step": 5486 }, { "epoch": 0.2544990723562152, "grad_norm": 8.569816589355469, "learning_rate": 8.569196942904568e-06, "loss": 0.3622, "step": 5487 }, { "epoch": 0.2545454545454545, "grad_norm": 4.528901100158691, "learning_rate": 8.56868151654098e-06, "loss": 0.3274, "step": 5488 }, { "epoch": 0.2545918367346939, "grad_norm": 11.4827241897583, "learning_rate": 8.56816601286379e-06, "loss": 0.3077, "step": 5489 }, { "epoch": 0.25463821892393323, "grad_norm": 4.469351768493652, "learning_rate": 8.567650431884164e-06, "loss": 0.268, "step": 5490 }, { "epoch": 0.25468460111317254, "grad_norm": 7.774649143218994, "learning_rate": 8.567134773613274e-06, "loss": 0.3757, "step": 5491 }, { "epoch": 0.2547309833024119, "grad_norm": 10.555798530578613, "learning_rate": 8.566619038062292e-06, "loss": 0.4038, "step": 5492 }, { "epoch": 0.2547773654916512, "grad_norm": 8.232624053955078, "learning_rate": 8.566103225242389e-06, "loss": 0.4038, "step": 5493 }, { "epoch": 0.25482374768089056, "grad_norm": 7.2487592697143555, "learning_rate": 8.56558733516474e-06, "loss": 0.4469, "step": 5494 }, { "epoch": 0.25487012987012986, "grad_norm": 6.478918552398682, "learning_rate": 8.565071367840521e-06, "loss": 0.4616, "step": 5495 }, { "epoch": 0.2549165120593692, "grad_norm": 5.143167018890381, "learning_rate": 8.564555323280913e-06, "loss": 0.2812, "step": 5496 }, { "epoch": 0.2549628942486085, "grad_norm": 10.480171203613281, "learning_rate": 8.564039201497095e-06, "loss": 0.5685, "step": 5497 }, { "epoch": 0.2550092764378479, "grad_norm": 13.928622245788574, "learning_rate": 8.563523002500245e-06, "loss": 0.3056, "step": 5498 }, { "epoch": 0.2550556586270872, "grad_norm": 9.79161262512207, "learning_rate": 8.563006726301549e-06, "loss": 0.4845, "step": 5499 }, { "epoch": 0.25510204081632654, "grad_norm": 7.155390739440918, "learning_rate": 8.562490372912192e-06, "loss": 0.3804, "step": 5500 }, { "epoch": 0.25514842300556584, "grad_norm": 4.041744232177734, "learning_rate": 8.56197394234336e-06, "loss": 0.2837, "step": 5501 }, { "epoch": 0.2551948051948052, "grad_norm": 8.30201244354248, "learning_rate": 8.561457434606242e-06, "loss": 0.3461, "step": 5502 }, { "epoch": 0.25524118738404455, "grad_norm": 7.396879196166992, "learning_rate": 8.560940849712027e-06, "loss": 0.3347, "step": 5503 }, { "epoch": 0.25528756957328386, "grad_norm": 7.012959957122803, "learning_rate": 8.560424187671905e-06, "loss": 0.3561, "step": 5504 }, { "epoch": 0.2553339517625232, "grad_norm": 6.523127555847168, "learning_rate": 8.559907448497073e-06, "loss": 0.3905, "step": 5505 }, { "epoch": 0.2553803339517625, "grad_norm": 7.388011932373047, "learning_rate": 8.559390632198723e-06, "loss": 0.424, "step": 5506 }, { "epoch": 0.2554267161410019, "grad_norm": 8.686408996582031, "learning_rate": 8.558873738788052e-06, "loss": 0.4126, "step": 5507 }, { "epoch": 0.2554730983302412, "grad_norm": 6.537751197814941, "learning_rate": 8.55835676827626e-06, "loss": 0.3684, "step": 5508 }, { "epoch": 0.25551948051948054, "grad_norm": 7.7789626121521, "learning_rate": 8.557839720674544e-06, "loss": 0.3214, "step": 5509 }, { "epoch": 0.25556586270871984, "grad_norm": 5.369006156921387, "learning_rate": 8.557322595994107e-06, "loss": 0.3099, "step": 5510 }, { "epoch": 0.2556122448979592, "grad_norm": 5.950103759765625, "learning_rate": 8.556805394246154e-06, "loss": 0.3537, "step": 5511 }, { "epoch": 0.2556586270871985, "grad_norm": 5.686035633087158, "learning_rate": 8.556288115441887e-06, "loss": 0.4326, "step": 5512 }, { "epoch": 0.25570500927643786, "grad_norm": 4.007159233093262, "learning_rate": 8.555770759592513e-06, "loss": 0.3654, "step": 5513 }, { "epoch": 0.25575139146567716, "grad_norm": 13.435465812683105, "learning_rate": 8.555253326709241e-06, "loss": 0.3012, "step": 5514 }, { "epoch": 0.2557977736549165, "grad_norm": 3.854224443435669, "learning_rate": 8.554735816803282e-06, "loss": 0.2913, "step": 5515 }, { "epoch": 0.2558441558441558, "grad_norm": 5.454697608947754, "learning_rate": 8.554218229885848e-06, "loss": 0.3652, "step": 5516 }, { "epoch": 0.2558905380333952, "grad_norm": 5.4483160972595215, "learning_rate": 8.553700565968146e-06, "loss": 0.3396, "step": 5517 }, { "epoch": 0.25593692022263453, "grad_norm": 7.792217254638672, "learning_rate": 8.5531828250614e-06, "loss": 0.3845, "step": 5518 }, { "epoch": 0.25598330241187384, "grad_norm": 4.983402729034424, "learning_rate": 8.55266500717682e-06, "loss": 0.3628, "step": 5519 }, { "epoch": 0.2560296846011132, "grad_norm": 6.278584003448486, "learning_rate": 8.552147112325626e-06, "loss": 0.3715, "step": 5520 }, { "epoch": 0.2560760667903525, "grad_norm": 7.3079071044921875, "learning_rate": 8.55162914051904e-06, "loss": 0.3366, "step": 5521 }, { "epoch": 0.25612244897959185, "grad_norm": 6.676608562469482, "learning_rate": 8.55111109176828e-06, "loss": 0.4015, "step": 5522 }, { "epoch": 0.25616883116883116, "grad_norm": 5.8136210441589355, "learning_rate": 8.550592966084574e-06, "loss": 0.3363, "step": 5523 }, { "epoch": 0.2562152133580705, "grad_norm": 13.815117835998535, "learning_rate": 8.55007476347914e-06, "loss": 0.3668, "step": 5524 }, { "epoch": 0.2562615955473098, "grad_norm": 6.865794658660889, "learning_rate": 8.549556483963213e-06, "loss": 0.2638, "step": 5525 }, { "epoch": 0.2563079777365492, "grad_norm": 13.454963684082031, "learning_rate": 8.549038127548015e-06, "loss": 0.4156, "step": 5526 }, { "epoch": 0.2563543599257885, "grad_norm": 8.083511352539062, "learning_rate": 8.548519694244778e-06, "loss": 0.234, "step": 5527 }, { "epoch": 0.25640074211502784, "grad_norm": 8.375085830688477, "learning_rate": 8.548001184064733e-06, "loss": 0.341, "step": 5528 }, { "epoch": 0.25644712430426714, "grad_norm": 5.277151107788086, "learning_rate": 8.547482597019114e-06, "loss": 0.3288, "step": 5529 }, { "epoch": 0.2564935064935065, "grad_norm": 4.277940273284912, "learning_rate": 8.546963933119157e-06, "loss": 0.3185, "step": 5530 }, { "epoch": 0.2565398886827458, "grad_norm": 9.788788795471191, "learning_rate": 8.546445192376097e-06, "loss": 0.4226, "step": 5531 }, { "epoch": 0.25658627087198516, "grad_norm": 130.87689208984375, "learning_rate": 8.54592637480117e-06, "loss": 0.28, "step": 5532 }, { "epoch": 0.2566326530612245, "grad_norm": 9.414965629577637, "learning_rate": 8.54540748040562e-06, "loss": 0.3871, "step": 5533 }, { "epoch": 0.2566790352504638, "grad_norm": 10.48946762084961, "learning_rate": 8.544888509200689e-06, "loss": 0.4091, "step": 5534 }, { "epoch": 0.2567254174397032, "grad_norm": 9.877151489257812, "learning_rate": 8.544369461197614e-06, "loss": 0.3239, "step": 5535 }, { "epoch": 0.2567717996289425, "grad_norm": 8.438162803649902, "learning_rate": 8.543850336407647e-06, "loss": 0.4134, "step": 5536 }, { "epoch": 0.25681818181818183, "grad_norm": 7.819791316986084, "learning_rate": 8.543331134842031e-06, "loss": 0.3782, "step": 5537 }, { "epoch": 0.25686456400742114, "grad_norm": 7.825135707855225, "learning_rate": 8.542811856512014e-06, "loss": 0.466, "step": 5538 }, { "epoch": 0.2569109461966605, "grad_norm": 4.469893455505371, "learning_rate": 8.542292501428849e-06, "loss": 0.3942, "step": 5539 }, { "epoch": 0.2569573283858998, "grad_norm": 6.466174125671387, "learning_rate": 8.541773069603783e-06, "loss": 0.2976, "step": 5540 }, { "epoch": 0.25700371057513915, "grad_norm": 11.597848892211914, "learning_rate": 8.541253561048074e-06, "loss": 0.4245, "step": 5541 }, { "epoch": 0.25705009276437846, "grad_norm": 6.9683709144592285, "learning_rate": 8.540733975772971e-06, "loss": 0.3864, "step": 5542 }, { "epoch": 0.2570964749536178, "grad_norm": 8.018817901611328, "learning_rate": 8.540214313789737e-06, "loss": 0.4078, "step": 5543 }, { "epoch": 0.2571428571428571, "grad_norm": 9.50058364868164, "learning_rate": 8.539694575109626e-06, "loss": 0.4071, "step": 5544 }, { "epoch": 0.2571892393320965, "grad_norm": 10.25351333618164, "learning_rate": 8.539174759743898e-06, "loss": 0.2367, "step": 5545 }, { "epoch": 0.25723562152133583, "grad_norm": 8.76201343536377, "learning_rate": 8.538654867703817e-06, "loss": 0.3853, "step": 5546 }, { "epoch": 0.25728200371057514, "grad_norm": 10.005645751953125, "learning_rate": 8.538134899000643e-06, "loss": 0.3218, "step": 5547 }, { "epoch": 0.2573283858998145, "grad_norm": 6.199625492095947, "learning_rate": 8.537614853645643e-06, "loss": 0.2928, "step": 5548 }, { "epoch": 0.2573747680890538, "grad_norm": 3.903900384902954, "learning_rate": 8.537094731650086e-06, "loss": 0.3123, "step": 5549 }, { "epoch": 0.25742115027829315, "grad_norm": 6.032087802886963, "learning_rate": 8.536574533025234e-06, "loss": 0.3398, "step": 5550 }, { "epoch": 0.25746753246753246, "grad_norm": 8.897037506103516, "learning_rate": 8.536054257782362e-06, "loss": 0.3974, "step": 5551 }, { "epoch": 0.2575139146567718, "grad_norm": 5.90828275680542, "learning_rate": 8.535533905932739e-06, "loss": 0.345, "step": 5552 }, { "epoch": 0.2575602968460111, "grad_norm": 8.935713768005371, "learning_rate": 8.535013477487637e-06, "loss": 0.3723, "step": 5553 }, { "epoch": 0.2576066790352505, "grad_norm": 4.8818039894104, "learning_rate": 8.534492972458334e-06, "loss": 0.2933, "step": 5554 }, { "epoch": 0.2576530612244898, "grad_norm": 8.257109642028809, "learning_rate": 8.533972390856106e-06, "loss": 0.4146, "step": 5555 }, { "epoch": 0.25769944341372913, "grad_norm": 7.656642913818359, "learning_rate": 8.533451732692229e-06, "loss": 0.3702, "step": 5556 }, { "epoch": 0.25774582560296844, "grad_norm": 8.922417640686035, "learning_rate": 8.532930997977984e-06, "loss": 0.3294, "step": 5557 }, { "epoch": 0.2577922077922078, "grad_norm": 9.11826229095459, "learning_rate": 8.532410186724653e-06, "loss": 0.269, "step": 5558 }, { "epoch": 0.2578385899814471, "grad_norm": 5.805410861968994, "learning_rate": 8.531889298943518e-06, "loss": 0.4474, "step": 5559 }, { "epoch": 0.25788497217068646, "grad_norm": 6.511167526245117, "learning_rate": 8.531368334645865e-06, "loss": 0.3195, "step": 5560 }, { "epoch": 0.2579313543599258, "grad_norm": 11.856184959411621, "learning_rate": 8.530847293842978e-06, "loss": 0.4506, "step": 5561 }, { "epoch": 0.2579777365491651, "grad_norm": 5.930220603942871, "learning_rate": 8.530326176546147e-06, "loss": 0.3631, "step": 5562 }, { "epoch": 0.2580241187384045, "grad_norm": 10.332606315612793, "learning_rate": 8.529804982766664e-06, "loss": 0.3014, "step": 5563 }, { "epoch": 0.2580705009276438, "grad_norm": 5.907065391540527, "learning_rate": 8.529283712515814e-06, "loss": 0.2514, "step": 5564 }, { "epoch": 0.25811688311688313, "grad_norm": 6.208094120025635, "learning_rate": 8.528762365804895e-06, "loss": 0.2996, "step": 5565 }, { "epoch": 0.25816326530612244, "grad_norm": 7.733068943023682, "learning_rate": 8.528240942645202e-06, "loss": 0.4866, "step": 5566 }, { "epoch": 0.2582096474953618, "grad_norm": 5.45554780960083, "learning_rate": 8.527719443048028e-06, "loss": 0.341, "step": 5567 }, { "epoch": 0.2582560296846011, "grad_norm": 7.251816272735596, "learning_rate": 8.527197867024675e-06, "loss": 0.306, "step": 5568 }, { "epoch": 0.25830241187384045, "grad_norm": 9.98855972290039, "learning_rate": 8.52667621458644e-06, "loss": 0.4194, "step": 5569 }, { "epoch": 0.25834879406307976, "grad_norm": 6.0439348220825195, "learning_rate": 8.526154485744624e-06, "loss": 0.3547, "step": 5570 }, { "epoch": 0.2583951762523191, "grad_norm": 7.594733715057373, "learning_rate": 8.525632680510531e-06, "loss": 0.3611, "step": 5571 }, { "epoch": 0.2584415584415584, "grad_norm": 16.35628318786621, "learning_rate": 8.525110798895467e-06, "loss": 0.4783, "step": 5572 }, { "epoch": 0.2584879406307978, "grad_norm": 7.230947494506836, "learning_rate": 8.524588840910737e-06, "loss": 0.3375, "step": 5573 }, { "epoch": 0.25853432282003713, "grad_norm": 7.103320598602295, "learning_rate": 8.524066806567646e-06, "loss": 0.4017, "step": 5574 }, { "epoch": 0.25858070500927643, "grad_norm": 6.126349449157715, "learning_rate": 8.523544695877508e-06, "loss": 0.3507, "step": 5575 }, { "epoch": 0.2586270871985158, "grad_norm": 8.956646919250488, "learning_rate": 8.523022508851634e-06, "loss": 0.5021, "step": 5576 }, { "epoch": 0.2586734693877551, "grad_norm": 7.042535305023193, "learning_rate": 8.522500245501333e-06, "loss": 0.2868, "step": 5577 }, { "epoch": 0.25871985157699445, "grad_norm": 11.263429641723633, "learning_rate": 8.521977905837924e-06, "loss": 0.5696, "step": 5578 }, { "epoch": 0.25876623376623376, "grad_norm": 23.371665954589844, "learning_rate": 8.521455489872721e-06, "loss": 0.594, "step": 5579 }, { "epoch": 0.2588126159554731, "grad_norm": 7.2702813148498535, "learning_rate": 8.520932997617043e-06, "loss": 0.3974, "step": 5580 }, { "epoch": 0.2588589981447124, "grad_norm": 5.212826728820801, "learning_rate": 8.520410429082206e-06, "loss": 0.342, "step": 5581 }, { "epoch": 0.2589053803339518, "grad_norm": 6.387678623199463, "learning_rate": 8.519887784279536e-06, "loss": 0.3296, "step": 5582 }, { "epoch": 0.2589517625231911, "grad_norm": 10.622568130493164, "learning_rate": 8.519365063220353e-06, "loss": 0.479, "step": 5583 }, { "epoch": 0.25899814471243043, "grad_norm": 4.98512601852417, "learning_rate": 8.518842265915982e-06, "loss": 0.3306, "step": 5584 }, { "epoch": 0.25904452690166974, "grad_norm": 7.301801681518555, "learning_rate": 8.51831939237775e-06, "loss": 0.3689, "step": 5585 }, { "epoch": 0.2590909090909091, "grad_norm": 6.166232585906982, "learning_rate": 8.517796442616983e-06, "loss": 0.386, "step": 5586 }, { "epoch": 0.2591372912801484, "grad_norm": 11.647482872009277, "learning_rate": 8.517273416645014e-06, "loss": 0.4099, "step": 5587 }, { "epoch": 0.25918367346938775, "grad_norm": 5.822303771972656, "learning_rate": 8.51675031447317e-06, "loss": 0.3359, "step": 5588 }, { "epoch": 0.2592300556586271, "grad_norm": 6.962406635284424, "learning_rate": 8.516227136112784e-06, "loss": 0.348, "step": 5589 }, { "epoch": 0.2592764378478664, "grad_norm": 4.286168575286865, "learning_rate": 8.515703881575192e-06, "loss": 0.3145, "step": 5590 }, { "epoch": 0.2593228200371058, "grad_norm": 5.391976833343506, "learning_rate": 8.515180550871733e-06, "loss": 0.3572, "step": 5591 }, { "epoch": 0.2593692022263451, "grad_norm": 9.147720336914062, "learning_rate": 8.514657144013738e-06, "loss": 0.3571, "step": 5592 }, { "epoch": 0.25941558441558443, "grad_norm": 10.036361694335938, "learning_rate": 8.514133661012551e-06, "loss": 0.4682, "step": 5593 }, { "epoch": 0.25946196660482373, "grad_norm": 6.303242206573486, "learning_rate": 8.513610101879512e-06, "loss": 0.3358, "step": 5594 }, { "epoch": 0.2595083487940631, "grad_norm": 7.18497371673584, "learning_rate": 8.513086466625963e-06, "loss": 0.4114, "step": 5595 }, { "epoch": 0.2595547309833024, "grad_norm": 9.129664421081543, "learning_rate": 8.512562755263248e-06, "loss": 0.4626, "step": 5596 }, { "epoch": 0.25960111317254175, "grad_norm": 16.55327606201172, "learning_rate": 8.512038967802713e-06, "loss": 0.6311, "step": 5597 }, { "epoch": 0.25964749536178106, "grad_norm": 12.551899909973145, "learning_rate": 8.51151510425571e-06, "loss": 0.44, "step": 5598 }, { "epoch": 0.2596938775510204, "grad_norm": 5.311028480529785, "learning_rate": 8.510991164633582e-06, "loss": 0.2985, "step": 5599 }, { "epoch": 0.2597402597402597, "grad_norm": 5.890344142913818, "learning_rate": 8.510467148947682e-06, "loss": 0.2792, "step": 5600 }, { "epoch": 0.2597866419294991, "grad_norm": 12.883611679077148, "learning_rate": 8.509943057209361e-06, "loss": 0.4498, "step": 5601 }, { "epoch": 0.25983302411873843, "grad_norm": 7.725830078125, "learning_rate": 8.509418889429976e-06, "loss": 0.2275, "step": 5602 }, { "epoch": 0.25987940630797773, "grad_norm": 5.239678382873535, "learning_rate": 8.508894645620884e-06, "loss": 0.3037, "step": 5603 }, { "epoch": 0.2599257884972171, "grad_norm": 7.09971284866333, "learning_rate": 8.50837032579344e-06, "loss": 0.3822, "step": 5604 }, { "epoch": 0.2599721706864564, "grad_norm": 6.546484470367432, "learning_rate": 8.507845929959e-06, "loss": 0.3145, "step": 5605 }, { "epoch": 0.26001855287569575, "grad_norm": 11.877906799316406, "learning_rate": 8.507321458128932e-06, "loss": 0.368, "step": 5606 }, { "epoch": 0.26006493506493505, "grad_norm": 10.60345458984375, "learning_rate": 8.506796910314593e-06, "loss": 0.3507, "step": 5607 }, { "epoch": 0.2601113172541744, "grad_norm": 8.015522003173828, "learning_rate": 8.506272286527346e-06, "loss": 0.3552, "step": 5608 }, { "epoch": 0.2601576994434137, "grad_norm": 6.518163204193115, "learning_rate": 8.505747586778563e-06, "loss": 0.41, "step": 5609 }, { "epoch": 0.2602040816326531, "grad_norm": 7.538214683532715, "learning_rate": 8.505222811079607e-06, "loss": 0.208, "step": 5610 }, { "epoch": 0.2602504638218924, "grad_norm": 9.812582969665527, "learning_rate": 8.504697959441848e-06, "loss": 0.4195, "step": 5611 }, { "epoch": 0.26029684601113173, "grad_norm": 7.708030700683594, "learning_rate": 8.504173031876655e-06, "loss": 0.3141, "step": 5612 }, { "epoch": 0.26034322820037104, "grad_norm": 8.95694637298584, "learning_rate": 8.503648028395401e-06, "loss": 0.3351, "step": 5613 }, { "epoch": 0.2603896103896104, "grad_norm": 10.536831855773926, "learning_rate": 8.503122949009461e-06, "loss": 0.4544, "step": 5614 }, { "epoch": 0.2604359925788497, "grad_norm": 7.186984062194824, "learning_rate": 8.50259779373021e-06, "loss": 0.3365, "step": 5615 }, { "epoch": 0.26048237476808905, "grad_norm": 5.514638423919678, "learning_rate": 8.502072562569025e-06, "loss": 0.3832, "step": 5616 }, { "epoch": 0.2605287569573284, "grad_norm": 13.320182800292969, "learning_rate": 8.501547255537286e-06, "loss": 0.5977, "step": 5617 }, { "epoch": 0.2605751391465677, "grad_norm": 8.888381958007812, "learning_rate": 8.501021872646373e-06, "loss": 0.3512, "step": 5618 }, { "epoch": 0.26062152133580707, "grad_norm": 6.903696060180664, "learning_rate": 8.500496413907666e-06, "loss": 0.3715, "step": 5619 }, { "epoch": 0.2606679035250464, "grad_norm": 4.467525005340576, "learning_rate": 8.499970879332551e-06, "loss": 0.4006, "step": 5620 }, { "epoch": 0.26071428571428573, "grad_norm": 5.887279033660889, "learning_rate": 8.499445268932414e-06, "loss": 0.2947, "step": 5621 }, { "epoch": 0.26076066790352503, "grad_norm": 8.590315818786621, "learning_rate": 8.498919582718642e-06, "loss": 0.3974, "step": 5622 }, { "epoch": 0.2608070500927644, "grad_norm": 13.423182487487793, "learning_rate": 8.498393820702619e-06, "loss": 0.4795, "step": 5623 }, { "epoch": 0.2608534322820037, "grad_norm": 14.682903289794922, "learning_rate": 8.497867982895741e-06, "loss": 0.351, "step": 5624 }, { "epoch": 0.26089981447124305, "grad_norm": 7.842977046966553, "learning_rate": 8.497342069309398e-06, "loss": 0.3385, "step": 5625 }, { "epoch": 0.26094619666048235, "grad_norm": 7.948644638061523, "learning_rate": 8.496816079954984e-06, "loss": 0.4103, "step": 5626 }, { "epoch": 0.2609925788497217, "grad_norm": 6.603088855743408, "learning_rate": 8.496290014843893e-06, "loss": 0.4087, "step": 5627 }, { "epoch": 0.261038961038961, "grad_norm": 10.636164665222168, "learning_rate": 8.495763873987526e-06, "loss": 0.3889, "step": 5628 }, { "epoch": 0.2610853432282004, "grad_norm": 4.881674289703369, "learning_rate": 8.495237657397276e-06, "loss": 0.38, "step": 5629 }, { "epoch": 0.26113172541743973, "grad_norm": 10.3915376663208, "learning_rate": 8.494711365084546e-06, "loss": 0.3228, "step": 5630 }, { "epoch": 0.26117810760667903, "grad_norm": 7.422152042388916, "learning_rate": 8.494184997060738e-06, "loss": 0.3361, "step": 5631 }, { "epoch": 0.2612244897959184, "grad_norm": 5.165135860443115, "learning_rate": 8.493658553337254e-06, "loss": 0.2809, "step": 5632 }, { "epoch": 0.2612708719851577, "grad_norm": 6.691599369049072, "learning_rate": 8.4931320339255e-06, "loss": 0.4015, "step": 5633 }, { "epoch": 0.26131725417439705, "grad_norm": 6.4507975578308105, "learning_rate": 8.492605438836883e-06, "loss": 0.3074, "step": 5634 }, { "epoch": 0.26136363636363635, "grad_norm": 8.861767768859863, "learning_rate": 8.492078768082811e-06, "loss": 0.3915, "step": 5635 }, { "epoch": 0.2614100185528757, "grad_norm": 4.90020751953125, "learning_rate": 8.491552021674698e-06, "loss": 0.2859, "step": 5636 }, { "epoch": 0.261456400742115, "grad_norm": 11.042719841003418, "learning_rate": 8.491025199623948e-06, "loss": 0.2785, "step": 5637 }, { "epoch": 0.26150278293135437, "grad_norm": 5.646698474884033, "learning_rate": 8.490498301941981e-06, "loss": 0.353, "step": 5638 }, { "epoch": 0.2615491651205937, "grad_norm": 4.92921257019043, "learning_rate": 8.489971328640207e-06, "loss": 0.3128, "step": 5639 }, { "epoch": 0.26159554730983303, "grad_norm": 7.616726875305176, "learning_rate": 8.489444279730046e-06, "loss": 0.32, "step": 5640 }, { "epoch": 0.26164192949907233, "grad_norm": 8.122201919555664, "learning_rate": 8.488917155222915e-06, "loss": 0.4551, "step": 5641 }, { "epoch": 0.2616883116883117, "grad_norm": 5.392989635467529, "learning_rate": 8.488389955130235e-06, "loss": 0.3411, "step": 5642 }, { "epoch": 0.261734693877551, "grad_norm": 15.038497924804688, "learning_rate": 8.487862679463425e-06, "loss": 0.4384, "step": 5643 }, { "epoch": 0.26178107606679035, "grad_norm": 5.723567008972168, "learning_rate": 8.487335328233912e-06, "loss": 0.3068, "step": 5644 }, { "epoch": 0.2618274582560297, "grad_norm": 10.63525104522705, "learning_rate": 8.486807901453117e-06, "loss": 0.4503, "step": 5645 }, { "epoch": 0.261873840445269, "grad_norm": 8.683012962341309, "learning_rate": 8.486280399132468e-06, "loss": 0.2044, "step": 5646 }, { "epoch": 0.26192022263450837, "grad_norm": 5.117380142211914, "learning_rate": 8.485752821283393e-06, "loss": 0.2295, "step": 5647 }, { "epoch": 0.2619666048237477, "grad_norm": 6.687877178192139, "learning_rate": 8.48522516791732e-06, "loss": 0.4146, "step": 5648 }, { "epoch": 0.26201298701298703, "grad_norm": 10.920989990234375, "learning_rate": 8.484697439045685e-06, "loss": 0.4586, "step": 5649 }, { "epoch": 0.26205936920222633, "grad_norm": 5.78951358795166, "learning_rate": 8.484169634679917e-06, "loss": 0.2751, "step": 5650 }, { "epoch": 0.2621057513914657, "grad_norm": 6.868066310882568, "learning_rate": 8.483641754831451e-06, "loss": 0.3395, "step": 5651 }, { "epoch": 0.262152133580705, "grad_norm": 8.22270393371582, "learning_rate": 8.483113799511725e-06, "loss": 0.2687, "step": 5652 }, { "epoch": 0.26219851576994435, "grad_norm": 11.28420639038086, "learning_rate": 8.482585768732173e-06, "loss": 0.5135, "step": 5653 }, { "epoch": 0.26224489795918365, "grad_norm": 6.688146591186523, "learning_rate": 8.48205766250424e-06, "loss": 0.3765, "step": 5654 }, { "epoch": 0.262291280148423, "grad_norm": 6.720828056335449, "learning_rate": 8.481529480839362e-06, "loss": 0.3683, "step": 5655 }, { "epoch": 0.2623376623376623, "grad_norm": 6.464635372161865, "learning_rate": 8.481001223748986e-06, "loss": 0.2534, "step": 5656 }, { "epoch": 0.26238404452690167, "grad_norm": 10.766173362731934, "learning_rate": 8.480472891244553e-06, "loss": 0.4503, "step": 5657 }, { "epoch": 0.262430426716141, "grad_norm": 6.3016204833984375, "learning_rate": 8.479944483337512e-06, "loss": 0.4211, "step": 5658 }, { "epoch": 0.26247680890538033, "grad_norm": 6.695189476013184, "learning_rate": 8.479416000039308e-06, "loss": 0.3306, "step": 5659 }, { "epoch": 0.2625231910946197, "grad_norm": 4.382102012634277, "learning_rate": 8.478887441361392e-06, "loss": 0.3638, "step": 5660 }, { "epoch": 0.262569573283859, "grad_norm": 5.828487396240234, "learning_rate": 8.478358807315215e-06, "loss": 0.386, "step": 5661 }, { "epoch": 0.26261595547309835, "grad_norm": 6.240355014801025, "learning_rate": 8.477830097912229e-06, "loss": 0.4232, "step": 5662 }, { "epoch": 0.26266233766233765, "grad_norm": 13.240147590637207, "learning_rate": 8.477301313163888e-06, "loss": 0.3929, "step": 5663 }, { "epoch": 0.262708719851577, "grad_norm": 12.447931289672852, "learning_rate": 8.476772453081647e-06, "loss": 0.514, "step": 5664 }, { "epoch": 0.2627551020408163, "grad_norm": 6.469792366027832, "learning_rate": 8.476243517676967e-06, "loss": 0.3551, "step": 5665 }, { "epoch": 0.26280148423005567, "grad_norm": 5.185888290405273, "learning_rate": 8.475714506961304e-06, "loss": 0.2542, "step": 5666 }, { "epoch": 0.262847866419295, "grad_norm": 5.025758266448975, "learning_rate": 8.475185420946119e-06, "loss": 0.3956, "step": 5667 }, { "epoch": 0.26289424860853433, "grad_norm": 7.016086578369141, "learning_rate": 8.474656259642874e-06, "loss": 0.4194, "step": 5668 }, { "epoch": 0.26294063079777363, "grad_norm": 7.869761943817139, "learning_rate": 8.474127023063035e-06, "loss": 0.4419, "step": 5669 }, { "epoch": 0.262987012987013, "grad_norm": 13.004270553588867, "learning_rate": 8.473597711218066e-06, "loss": 0.4705, "step": 5670 }, { "epoch": 0.2630333951762523, "grad_norm": 6.177770614624023, "learning_rate": 8.473068324119435e-06, "loss": 0.4282, "step": 5671 }, { "epoch": 0.26307977736549165, "grad_norm": 5.225286483764648, "learning_rate": 8.47253886177861e-06, "loss": 0.4088, "step": 5672 }, { "epoch": 0.263126159554731, "grad_norm": 6.25249719619751, "learning_rate": 8.472009324207062e-06, "loss": 0.3835, "step": 5673 }, { "epoch": 0.2631725417439703, "grad_norm": 6.8686604499816895, "learning_rate": 8.471479711416263e-06, "loss": 0.3344, "step": 5674 }, { "epoch": 0.26321892393320967, "grad_norm": 4.386002540588379, "learning_rate": 8.470950023417688e-06, "loss": 0.374, "step": 5675 }, { "epoch": 0.26326530612244897, "grad_norm": 7.159342288970947, "learning_rate": 8.470420260222813e-06, "loss": 0.4551, "step": 5676 }, { "epoch": 0.26331168831168833, "grad_norm": 6.0644378662109375, "learning_rate": 8.469890421843112e-06, "loss": 0.3162, "step": 5677 }, { "epoch": 0.26335807050092763, "grad_norm": 7.544323444366455, "learning_rate": 8.469360508290065e-06, "loss": 0.4088, "step": 5678 }, { "epoch": 0.263404452690167, "grad_norm": 7.411047458648682, "learning_rate": 8.468830519575153e-06, "loss": 0.3043, "step": 5679 }, { "epoch": 0.2634508348794063, "grad_norm": 9.248833656311035, "learning_rate": 8.468300455709856e-06, "loss": 0.3525, "step": 5680 }, { "epoch": 0.26349721706864565, "grad_norm": 5.313661098480225, "learning_rate": 8.46777031670566e-06, "loss": 0.3579, "step": 5681 }, { "epoch": 0.26354359925788495, "grad_norm": 7.145869255065918, "learning_rate": 8.467240102574049e-06, "loss": 0.498, "step": 5682 }, { "epoch": 0.2635899814471243, "grad_norm": 6.777723789215088, "learning_rate": 8.46670981332651e-06, "loss": 0.4075, "step": 5683 }, { "epoch": 0.2636363636363636, "grad_norm": 13.020097732543945, "learning_rate": 8.466179448974531e-06, "loss": 0.621, "step": 5684 }, { "epoch": 0.26368274582560297, "grad_norm": 12.878372192382812, "learning_rate": 8.465649009529603e-06, "loss": 0.473, "step": 5685 }, { "epoch": 0.2637291280148423, "grad_norm": 16.10079574584961, "learning_rate": 8.465118495003217e-06, "loss": 0.4101, "step": 5686 }, { "epoch": 0.26377551020408163, "grad_norm": 9.549074172973633, "learning_rate": 8.464587905406866e-06, "loss": 0.384, "step": 5687 }, { "epoch": 0.263821892393321, "grad_norm": 6.622583389282227, "learning_rate": 8.464057240752046e-06, "loss": 0.4259, "step": 5688 }, { "epoch": 0.2638682745825603, "grad_norm": 4.925198078155518, "learning_rate": 8.463526501050253e-06, "loss": 0.2816, "step": 5689 }, { "epoch": 0.26391465677179965, "grad_norm": 6.7645111083984375, "learning_rate": 8.462995686312985e-06, "loss": 0.4899, "step": 5690 }, { "epoch": 0.26396103896103895, "grad_norm": 5.380436897277832, "learning_rate": 8.462464796551743e-06, "loss": 0.3652, "step": 5691 }, { "epoch": 0.2640074211502783, "grad_norm": 5.2002177238464355, "learning_rate": 8.461933831778026e-06, "loss": 0.3332, "step": 5692 }, { "epoch": 0.2640538033395176, "grad_norm": 9.125946998596191, "learning_rate": 8.46140279200334e-06, "loss": 0.3786, "step": 5693 }, { "epoch": 0.26410018552875697, "grad_norm": 5.387612342834473, "learning_rate": 8.460871677239187e-06, "loss": 0.3372, "step": 5694 }, { "epoch": 0.2641465677179963, "grad_norm": 6.0925612449646, "learning_rate": 8.460340487497074e-06, "loss": 0.2939, "step": 5695 }, { "epoch": 0.26419294990723563, "grad_norm": 16.175710678100586, "learning_rate": 8.459809222788511e-06, "loss": 0.4074, "step": 5696 }, { "epoch": 0.26423933209647493, "grad_norm": 5.22007417678833, "learning_rate": 8.459277883125005e-06, "loss": 0.3783, "step": 5697 }, { "epoch": 0.2642857142857143, "grad_norm": 12.349264144897461, "learning_rate": 8.458746468518067e-06, "loss": 0.4377, "step": 5698 }, { "epoch": 0.2643320964749536, "grad_norm": 9.887223243713379, "learning_rate": 8.458214978979213e-06, "loss": 0.5218, "step": 5699 }, { "epoch": 0.26437847866419295, "grad_norm": 15.769933700561523, "learning_rate": 8.457683414519954e-06, "loss": 0.4208, "step": 5700 }, { "epoch": 0.2644248608534323, "grad_norm": 12.759142875671387, "learning_rate": 8.45715177515181e-06, "loss": 0.4756, "step": 5701 }, { "epoch": 0.2644712430426716, "grad_norm": 6.345090389251709, "learning_rate": 8.456620060886296e-06, "loss": 0.319, "step": 5702 }, { "epoch": 0.26451762523191097, "grad_norm": 6.651876449584961, "learning_rate": 8.45608827173493e-06, "loss": 0.3701, "step": 5703 }, { "epoch": 0.26456400742115027, "grad_norm": 7.297259330749512, "learning_rate": 8.455556407709235e-06, "loss": 0.4146, "step": 5704 }, { "epoch": 0.26461038961038963, "grad_norm": 11.04594612121582, "learning_rate": 8.455024468820732e-06, "loss": 0.4183, "step": 5705 }, { "epoch": 0.26465677179962893, "grad_norm": 8.628520965576172, "learning_rate": 8.454492455080946e-06, "loss": 0.4242, "step": 5706 }, { "epoch": 0.2647031539888683, "grad_norm": 8.277562141418457, "learning_rate": 8.453960366501407e-06, "loss": 0.3595, "step": 5707 }, { "epoch": 0.2647495361781076, "grad_norm": 6.614653587341309, "learning_rate": 8.453428203093635e-06, "loss": 0.3298, "step": 5708 }, { "epoch": 0.26479591836734695, "grad_norm": 5.370288372039795, "learning_rate": 8.452895964869162e-06, "loss": 0.3126, "step": 5709 }, { "epoch": 0.26484230055658625, "grad_norm": 5.555915832519531, "learning_rate": 8.452363651839522e-06, "loss": 0.3919, "step": 5710 }, { "epoch": 0.2648886827458256, "grad_norm": 7.867930889129639, "learning_rate": 8.451831264016242e-06, "loss": 0.2687, "step": 5711 }, { "epoch": 0.2649350649350649, "grad_norm": 15.099323272705078, "learning_rate": 8.45129880141086e-06, "loss": 0.3993, "step": 5712 }, { "epoch": 0.26498144712430427, "grad_norm": 4.355682849884033, "learning_rate": 8.450766264034907e-06, "loss": 0.3846, "step": 5713 }, { "epoch": 0.2650278293135436, "grad_norm": 6.956098556518555, "learning_rate": 8.450233651899925e-06, "loss": 0.353, "step": 5714 }, { "epoch": 0.26507421150278293, "grad_norm": 8.296442031860352, "learning_rate": 8.44970096501745e-06, "loss": 0.3735, "step": 5715 }, { "epoch": 0.2651205936920223, "grad_norm": 8.269926071166992, "learning_rate": 8.449168203399024e-06, "loss": 0.4594, "step": 5716 }, { "epoch": 0.2651669758812616, "grad_norm": 5.606106758117676, "learning_rate": 8.448635367056188e-06, "loss": 0.3504, "step": 5717 }, { "epoch": 0.26521335807050095, "grad_norm": 12.343705177307129, "learning_rate": 8.448102456000487e-06, "loss": 0.4754, "step": 5718 }, { "epoch": 0.26525974025974025, "grad_norm": 4.86424446105957, "learning_rate": 8.447569470243462e-06, "loss": 0.4615, "step": 5719 }, { "epoch": 0.2653061224489796, "grad_norm": 4.697738170623779, "learning_rate": 8.447036409796663e-06, "loss": 0.3074, "step": 5720 }, { "epoch": 0.2653525046382189, "grad_norm": 10.499025344848633, "learning_rate": 8.44650327467164e-06, "loss": 0.4597, "step": 5721 }, { "epoch": 0.26539888682745827, "grad_norm": 4.626427173614502, "learning_rate": 8.445970064879942e-06, "loss": 0.3535, "step": 5722 }, { "epoch": 0.26544526901669757, "grad_norm": 8.576273918151855, "learning_rate": 8.445436780433119e-06, "loss": 0.4151, "step": 5723 }, { "epoch": 0.26549165120593693, "grad_norm": 5.113775730133057, "learning_rate": 8.444903421342725e-06, "loss": 0.2476, "step": 5724 }, { "epoch": 0.26553803339517623, "grad_norm": 9.817730903625488, "learning_rate": 8.444369987620315e-06, "loss": 0.4999, "step": 5725 }, { "epoch": 0.2655844155844156, "grad_norm": 6.915263652801514, "learning_rate": 8.44383647927745e-06, "loss": 0.3948, "step": 5726 }, { "epoch": 0.2656307977736549, "grad_norm": 5.683027744293213, "learning_rate": 8.443302896325679e-06, "loss": 0.3395, "step": 5727 }, { "epoch": 0.26567717996289425, "grad_norm": 6.422658920288086, "learning_rate": 8.442769238776569e-06, "loss": 0.3634, "step": 5728 }, { "epoch": 0.2657235621521336, "grad_norm": 9.623695373535156, "learning_rate": 8.44223550664168e-06, "loss": 0.2223, "step": 5729 }, { "epoch": 0.2657699443413729, "grad_norm": 14.659257888793945, "learning_rate": 8.441701699932573e-06, "loss": 0.4335, "step": 5730 }, { "epoch": 0.26581632653061227, "grad_norm": 4.032608509063721, "learning_rate": 8.441167818660814e-06, "loss": 0.3103, "step": 5731 }, { "epoch": 0.26586270871985157, "grad_norm": 12.412201881408691, "learning_rate": 8.440633862837971e-06, "loss": 0.4591, "step": 5732 }, { "epoch": 0.26590909090909093, "grad_norm": 7.1720290184021, "learning_rate": 8.440099832475608e-06, "loss": 0.3448, "step": 5733 }, { "epoch": 0.26595547309833023, "grad_norm": 6.982126712799072, "learning_rate": 8.439565727585297e-06, "loss": 0.3215, "step": 5734 }, { "epoch": 0.2660018552875696, "grad_norm": 12.270798683166504, "learning_rate": 8.439031548178608e-06, "loss": 0.4121, "step": 5735 }, { "epoch": 0.2660482374768089, "grad_norm": 10.8046236038208, "learning_rate": 8.438497294267117e-06, "loss": 0.2583, "step": 5736 }, { "epoch": 0.26609461966604825, "grad_norm": 6.884448051452637, "learning_rate": 8.437962965862394e-06, "loss": 0.3973, "step": 5737 }, { "epoch": 0.26614100185528755, "grad_norm": 6.31255578994751, "learning_rate": 8.437428562976018e-06, "loss": 0.3358, "step": 5738 }, { "epoch": 0.2661873840445269, "grad_norm": 4.769742012023926, "learning_rate": 8.436894085619563e-06, "loss": 0.3358, "step": 5739 }, { "epoch": 0.2662337662337662, "grad_norm": 5.307952880859375, "learning_rate": 8.436359533804613e-06, "loss": 0.3409, "step": 5740 }, { "epoch": 0.26628014842300557, "grad_norm": 12.578134536743164, "learning_rate": 8.435824907542745e-06, "loss": 0.5325, "step": 5741 }, { "epoch": 0.26632653061224487, "grad_norm": 4.812826156616211, "learning_rate": 8.435290206845542e-06, "loss": 0.3342, "step": 5742 }, { "epoch": 0.26637291280148423, "grad_norm": 11.767098426818848, "learning_rate": 8.434755431724588e-06, "loss": 0.4202, "step": 5743 }, { "epoch": 0.2664192949907236, "grad_norm": 17.237539291381836, "learning_rate": 8.43422058219147e-06, "loss": 0.4009, "step": 5744 }, { "epoch": 0.2664656771799629, "grad_norm": 26.192655563354492, "learning_rate": 8.433685658257775e-06, "loss": 0.3855, "step": 5745 }, { "epoch": 0.26651205936920225, "grad_norm": 9.577068328857422, "learning_rate": 8.433150659935093e-06, "loss": 0.2952, "step": 5746 }, { "epoch": 0.26655844155844155, "grad_norm": 13.170719146728516, "learning_rate": 8.432615587235009e-06, "loss": 0.3588, "step": 5747 }, { "epoch": 0.2666048237476809, "grad_norm": 8.33715534210205, "learning_rate": 8.432080440169121e-06, "loss": 0.2634, "step": 5748 }, { "epoch": 0.2666512059369202, "grad_norm": 14.702434539794922, "learning_rate": 8.43154521874902e-06, "loss": 0.4526, "step": 5749 }, { "epoch": 0.26669758812615957, "grad_norm": 9.274591445922852, "learning_rate": 8.431009922986301e-06, "loss": 0.3381, "step": 5750 }, { "epoch": 0.26674397031539887, "grad_norm": 5.702117443084717, "learning_rate": 8.430474552892563e-06, "loss": 0.3047, "step": 5751 }, { "epoch": 0.26679035250463823, "grad_norm": 5.306009292602539, "learning_rate": 8.429939108479403e-06, "loss": 0.3274, "step": 5752 }, { "epoch": 0.26683673469387753, "grad_norm": 10.841390609741211, "learning_rate": 8.42940358975842e-06, "loss": 0.3856, "step": 5753 }, { "epoch": 0.2668831168831169, "grad_norm": 5.192894458770752, "learning_rate": 8.42886799674122e-06, "loss": 0.3411, "step": 5754 }, { "epoch": 0.2669294990723562, "grad_norm": 8.232300758361816, "learning_rate": 8.428332329439398e-06, "loss": 0.4624, "step": 5755 }, { "epoch": 0.26697588126159555, "grad_norm": 5.951506614685059, "learning_rate": 8.42779658786457e-06, "loss": 0.4213, "step": 5756 }, { "epoch": 0.2670222634508349, "grad_norm": 5.951786041259766, "learning_rate": 8.427260772028334e-06, "loss": 0.3752, "step": 5757 }, { "epoch": 0.2670686456400742, "grad_norm": 7.45609712600708, "learning_rate": 8.426724881942301e-06, "loss": 0.3622, "step": 5758 }, { "epoch": 0.26711502782931357, "grad_norm": 4.680865287780762, "learning_rate": 8.426188917618083e-06, "loss": 0.385, "step": 5759 }, { "epoch": 0.26716141001855287, "grad_norm": 6.086927890777588, "learning_rate": 8.425652879067288e-06, "loss": 0.4228, "step": 5760 }, { "epoch": 0.2672077922077922, "grad_norm": 4.022092342376709, "learning_rate": 8.42511676630153e-06, "loss": 0.2834, "step": 5761 }, { "epoch": 0.26725417439703153, "grad_norm": 4.736149311065674, "learning_rate": 8.424580579332424e-06, "loss": 0.339, "step": 5762 }, { "epoch": 0.2673005565862709, "grad_norm": 5.45811128616333, "learning_rate": 8.424044318171585e-06, "loss": 0.3178, "step": 5763 }, { "epoch": 0.2673469387755102, "grad_norm": 3.6430845260620117, "learning_rate": 8.423507982830634e-06, "loss": 0.1854, "step": 5764 }, { "epoch": 0.26739332096474955, "grad_norm": 4.918901443481445, "learning_rate": 8.422971573321185e-06, "loss": 0.3179, "step": 5765 }, { "epoch": 0.26743970315398885, "grad_norm": 5.455570220947266, "learning_rate": 8.422435089654865e-06, "loss": 0.2755, "step": 5766 }, { "epoch": 0.2674860853432282, "grad_norm": 5.3329315185546875, "learning_rate": 8.421898531843293e-06, "loss": 0.4093, "step": 5767 }, { "epoch": 0.2675324675324675, "grad_norm": 5.980104923248291, "learning_rate": 8.421361899898095e-06, "loss": 0.334, "step": 5768 }, { "epoch": 0.26757884972170687, "grad_norm": 4.545799732208252, "learning_rate": 8.420825193830895e-06, "loss": 0.2802, "step": 5769 }, { "epoch": 0.26762523191094617, "grad_norm": 6.728959083557129, "learning_rate": 8.42028841365332e-06, "loss": 0.4232, "step": 5770 }, { "epoch": 0.26767161410018553, "grad_norm": 7.741830348968506, "learning_rate": 8.419751559377004e-06, "loss": 0.4656, "step": 5771 }, { "epoch": 0.2677179962894249, "grad_norm": 8.57797908782959, "learning_rate": 8.419214631013573e-06, "loss": 0.4294, "step": 5772 }, { "epoch": 0.2677643784786642, "grad_norm": 8.422035217285156, "learning_rate": 8.418677628574661e-06, "loss": 0.3634, "step": 5773 }, { "epoch": 0.26781076066790355, "grad_norm": 7.943534851074219, "learning_rate": 8.418140552071898e-06, "loss": 0.2863, "step": 5774 }, { "epoch": 0.26785714285714285, "grad_norm": 3.590439796447754, "learning_rate": 8.417603401516928e-06, "loss": 0.2319, "step": 5775 }, { "epoch": 0.2679035250463822, "grad_norm": 6.645326137542725, "learning_rate": 8.417066176921377e-06, "loss": 0.3582, "step": 5776 }, { "epoch": 0.2679499072356215, "grad_norm": 20.354154586791992, "learning_rate": 8.416528878296894e-06, "loss": 0.5006, "step": 5777 }, { "epoch": 0.26799628942486087, "grad_norm": 7.361868381500244, "learning_rate": 8.415991505655114e-06, "loss": 0.3032, "step": 5778 }, { "epoch": 0.26804267161410017, "grad_norm": 6.915802001953125, "learning_rate": 8.41545405900768e-06, "loss": 0.347, "step": 5779 }, { "epoch": 0.2680890538033395, "grad_norm": 5.982047080993652, "learning_rate": 8.414916538366233e-06, "loss": 0.2966, "step": 5780 }, { "epoch": 0.26813543599257883, "grad_norm": 10.63657283782959, "learning_rate": 8.414378943742422e-06, "loss": 0.3035, "step": 5781 }, { "epoch": 0.2681818181818182, "grad_norm": 8.140867233276367, "learning_rate": 8.413841275147893e-06, "loss": 0.3176, "step": 5782 }, { "epoch": 0.2682282003710575, "grad_norm": 6.369822025299072, "learning_rate": 8.413303532594293e-06, "loss": 0.2493, "step": 5783 }, { "epoch": 0.26827458256029685, "grad_norm": 8.48449993133545, "learning_rate": 8.412765716093273e-06, "loss": 0.4547, "step": 5784 }, { "epoch": 0.26832096474953615, "grad_norm": 9.507203102111816, "learning_rate": 8.412227825656481e-06, "loss": 0.3743, "step": 5785 }, { "epoch": 0.2683673469387755, "grad_norm": 7.921911239624023, "learning_rate": 8.411689861295574e-06, "loss": 0.3422, "step": 5786 }, { "epoch": 0.26841372912801487, "grad_norm": 6.652505874633789, "learning_rate": 8.411151823022207e-06, "loss": 0.3468, "step": 5787 }, { "epoch": 0.26846011131725417, "grad_norm": 6.609920978546143, "learning_rate": 8.410613710848033e-06, "loss": 0.3163, "step": 5788 }, { "epoch": 0.2685064935064935, "grad_norm": 6.9715447425842285, "learning_rate": 8.410075524784713e-06, "loss": 0.3114, "step": 5789 }, { "epoch": 0.26855287569573283, "grad_norm": 14.098889350891113, "learning_rate": 8.409537264843905e-06, "loss": 0.3823, "step": 5790 }, { "epoch": 0.2685992578849722, "grad_norm": 5.016712188720703, "learning_rate": 8.40899893103727e-06, "loss": 0.3961, "step": 5791 }, { "epoch": 0.2686456400742115, "grad_norm": 6.63980770111084, "learning_rate": 8.408460523376474e-06, "loss": 0.2811, "step": 5792 }, { "epoch": 0.26869202226345085, "grad_norm": 7.072758197784424, "learning_rate": 8.407922041873175e-06, "loss": 0.3927, "step": 5793 }, { "epoch": 0.26873840445269015, "grad_norm": 5.902547836303711, "learning_rate": 8.407383486539046e-06, "loss": 0.3191, "step": 5794 }, { "epoch": 0.2687847866419295, "grad_norm": 9.855809211730957, "learning_rate": 8.406844857385749e-06, "loss": 0.3837, "step": 5795 }, { "epoch": 0.2688311688311688, "grad_norm": 4.056225776672363, "learning_rate": 8.406306154424954e-06, "loss": 0.3671, "step": 5796 }, { "epoch": 0.26887755102040817, "grad_norm": 10.475679397583008, "learning_rate": 8.405767377668335e-06, "loss": 0.3494, "step": 5797 }, { "epoch": 0.26892393320964747, "grad_norm": 9.709959030151367, "learning_rate": 8.40522852712756e-06, "loss": 0.2724, "step": 5798 }, { "epoch": 0.26897031539888683, "grad_norm": 8.063468933105469, "learning_rate": 8.404689602814307e-06, "loss": 0.378, "step": 5799 }, { "epoch": 0.2690166975881262, "grad_norm": 5.9362053871154785, "learning_rate": 8.404150604740248e-06, "loss": 0.3155, "step": 5800 }, { "epoch": 0.2690630797773655, "grad_norm": 11.921891212463379, "learning_rate": 8.403611532917062e-06, "loss": 0.5518, "step": 5801 }, { "epoch": 0.26910946196660485, "grad_norm": 5.358422756195068, "learning_rate": 8.403072387356428e-06, "loss": 0.288, "step": 5802 }, { "epoch": 0.26915584415584415, "grad_norm": 21.892974853515625, "learning_rate": 8.402533168070026e-06, "loss": 0.486, "step": 5803 }, { "epoch": 0.2692022263450835, "grad_norm": 7.836783409118652, "learning_rate": 8.401993875069536e-06, "loss": 0.3718, "step": 5804 }, { "epoch": 0.2692486085343228, "grad_norm": 5.001290798187256, "learning_rate": 8.401454508366644e-06, "loss": 0.3092, "step": 5805 }, { "epoch": 0.26929499072356217, "grad_norm": 5.003910541534424, "learning_rate": 8.400915067973034e-06, "loss": 0.2896, "step": 5806 }, { "epoch": 0.26934137291280147, "grad_norm": 12.86901569366455, "learning_rate": 8.400375553900392e-06, "loss": 0.5008, "step": 5807 }, { "epoch": 0.2693877551020408, "grad_norm": 7.83561897277832, "learning_rate": 8.39983596616041e-06, "loss": 0.4349, "step": 5808 }, { "epoch": 0.26943413729128013, "grad_norm": 6.187758922576904, "learning_rate": 8.399296304764772e-06, "loss": 0.3236, "step": 5809 }, { "epoch": 0.2694805194805195, "grad_norm": 8.647452354431152, "learning_rate": 8.398756569725173e-06, "loss": 0.2946, "step": 5810 }, { "epoch": 0.2695269016697588, "grad_norm": 5.180936336517334, "learning_rate": 8.398216761053307e-06, "loss": 0.3774, "step": 5811 }, { "epoch": 0.26957328385899815, "grad_norm": 5.651988506317139, "learning_rate": 8.397676878760867e-06, "loss": 0.3738, "step": 5812 }, { "epoch": 0.26961966604823745, "grad_norm": 15.670141220092773, "learning_rate": 8.397136922859548e-06, "loss": 0.3321, "step": 5813 }, { "epoch": 0.2696660482374768, "grad_norm": 7.012675762176514, "learning_rate": 8.396596893361051e-06, "loss": 0.2575, "step": 5814 }, { "epoch": 0.26971243042671617, "grad_norm": 6.7398905754089355, "learning_rate": 8.396056790277071e-06, "loss": 0.3843, "step": 5815 }, { "epoch": 0.26975881261595547, "grad_norm": 5.341550827026367, "learning_rate": 8.395516613619315e-06, "loss": 0.3282, "step": 5816 }, { "epoch": 0.2698051948051948, "grad_norm": 6.180640697479248, "learning_rate": 8.394976363399482e-06, "loss": 0.2767, "step": 5817 }, { "epoch": 0.26985157699443413, "grad_norm": 9.822604179382324, "learning_rate": 8.394436039629276e-06, "loss": 0.3597, "step": 5818 }, { "epoch": 0.2698979591836735, "grad_norm": 8.504215240478516, "learning_rate": 8.393895642320404e-06, "loss": 0.4148, "step": 5819 }, { "epoch": 0.2699443413729128, "grad_norm": 17.77184295654297, "learning_rate": 8.393355171484572e-06, "loss": 0.41, "step": 5820 }, { "epoch": 0.26999072356215215, "grad_norm": 7.611297607421875, "learning_rate": 8.392814627133491e-06, "loss": 0.4056, "step": 5821 }, { "epoch": 0.27003710575139145, "grad_norm": 5.14197301864624, "learning_rate": 8.392274009278871e-06, "loss": 0.3169, "step": 5822 }, { "epoch": 0.2700834879406308, "grad_norm": 7.455539703369141, "learning_rate": 8.391733317932422e-06, "loss": 0.3697, "step": 5823 }, { "epoch": 0.2701298701298701, "grad_norm": 7.235495090484619, "learning_rate": 8.391192553105862e-06, "loss": 0.3888, "step": 5824 }, { "epoch": 0.27017625231910947, "grad_norm": 6.359317302703857, "learning_rate": 8.390651714810903e-06, "loss": 0.2647, "step": 5825 }, { "epoch": 0.27022263450834877, "grad_norm": 16.190176010131836, "learning_rate": 8.390110803059263e-06, "loss": 0.3774, "step": 5826 }, { "epoch": 0.2702690166975881, "grad_norm": 9.034467697143555, "learning_rate": 8.389569817862661e-06, "loss": 0.4105, "step": 5827 }, { "epoch": 0.2703153988868275, "grad_norm": 5.9842753410339355, "learning_rate": 8.389028759232816e-06, "loss": 0.4011, "step": 5828 }, { "epoch": 0.2703617810760668, "grad_norm": 4.240108013153076, "learning_rate": 8.38848762718145e-06, "loss": 0.2587, "step": 5829 }, { "epoch": 0.27040816326530615, "grad_norm": 13.596214294433594, "learning_rate": 8.387946421720288e-06, "loss": 0.3918, "step": 5830 }, { "epoch": 0.27045454545454545, "grad_norm": 25.88575553894043, "learning_rate": 8.387405142861052e-06, "loss": 0.439, "step": 5831 }, { "epoch": 0.2705009276437848, "grad_norm": 7.09814977645874, "learning_rate": 8.386863790615472e-06, "loss": 0.4469, "step": 5832 }, { "epoch": 0.2705473098330241, "grad_norm": 10.468170166015625, "learning_rate": 8.386322364995274e-06, "loss": 0.3114, "step": 5833 }, { "epoch": 0.27059369202226347, "grad_norm": 5.610857963562012, "learning_rate": 8.385780866012188e-06, "loss": 0.3344, "step": 5834 }, { "epoch": 0.27064007421150277, "grad_norm": 9.551852226257324, "learning_rate": 8.385239293677945e-06, "loss": 0.3835, "step": 5835 }, { "epoch": 0.2706864564007421, "grad_norm": 4.722013473510742, "learning_rate": 8.384697648004277e-06, "loss": 0.2415, "step": 5836 }, { "epoch": 0.27073283858998143, "grad_norm": 11.711141586303711, "learning_rate": 8.384155929002921e-06, "loss": 0.4701, "step": 5837 }, { "epoch": 0.2707792207792208, "grad_norm": 9.570680618286133, "learning_rate": 8.383614136685612e-06, "loss": 0.3682, "step": 5838 }, { "epoch": 0.2708256029684601, "grad_norm": 5.684506893157959, "learning_rate": 8.383072271064088e-06, "loss": 0.3362, "step": 5839 }, { "epoch": 0.27087198515769945, "grad_norm": 6.435517311096191, "learning_rate": 8.382530332150086e-06, "loss": 0.353, "step": 5840 }, { "epoch": 0.27091836734693875, "grad_norm": 10.300448417663574, "learning_rate": 8.38198831995535e-06, "loss": 0.434, "step": 5841 }, { "epoch": 0.2709647495361781, "grad_norm": 8.381901741027832, "learning_rate": 8.381446234491618e-06, "loss": 0.4078, "step": 5842 }, { "epoch": 0.27101113172541746, "grad_norm": 6.655397891998291, "learning_rate": 8.38090407577064e-06, "loss": 0.3256, "step": 5843 }, { "epoch": 0.27105751391465677, "grad_norm": 9.188159942626953, "learning_rate": 8.38036184380416e-06, "loss": 0.3773, "step": 5844 }, { "epoch": 0.2711038961038961, "grad_norm": 5.756896018981934, "learning_rate": 8.379819538603919e-06, "loss": 0.3022, "step": 5845 }, { "epoch": 0.2711502782931354, "grad_norm": 5.79107666015625, "learning_rate": 8.379277160181674e-06, "loss": 0.2664, "step": 5846 }, { "epoch": 0.2711966604823748, "grad_norm": 5.341737747192383, "learning_rate": 8.378734708549169e-06, "loss": 0.3976, "step": 5847 }, { "epoch": 0.2712430426716141, "grad_norm": 8.570359230041504, "learning_rate": 8.378192183718158e-06, "loss": 0.3922, "step": 5848 }, { "epoch": 0.27128942486085345, "grad_norm": 6.744729518890381, "learning_rate": 8.3776495857004e-06, "loss": 0.3121, "step": 5849 }, { "epoch": 0.27133580705009275, "grad_norm": 8.498664855957031, "learning_rate": 8.37710691450764e-06, "loss": 0.365, "step": 5850 }, { "epoch": 0.2713821892393321, "grad_norm": 6.786217212677002, "learning_rate": 8.376564170151642e-06, "loss": 0.4458, "step": 5851 }, { "epoch": 0.2714285714285714, "grad_norm": 6.633615493774414, "learning_rate": 8.376021352644163e-06, "loss": 0.411, "step": 5852 }, { "epoch": 0.27147495361781077, "grad_norm": 7.004059314727783, "learning_rate": 8.375478461996961e-06, "loss": 0.4104, "step": 5853 }, { "epoch": 0.27152133580705007, "grad_norm": 7.460503101348877, "learning_rate": 8.374935498221799e-06, "loss": 0.4002, "step": 5854 }, { "epoch": 0.2715677179962894, "grad_norm": 5.661675930023193, "learning_rate": 8.37439246133044e-06, "loss": 0.2714, "step": 5855 }, { "epoch": 0.2716141001855288, "grad_norm": 5.441799640655518, "learning_rate": 8.373849351334647e-06, "loss": 0.3091, "step": 5856 }, { "epoch": 0.2716604823747681, "grad_norm": 6.383084774017334, "learning_rate": 8.37330616824619e-06, "loss": 0.3147, "step": 5857 }, { "epoch": 0.27170686456400744, "grad_norm": 14.460041046142578, "learning_rate": 8.37276291207683e-06, "loss": 0.3558, "step": 5858 }, { "epoch": 0.27175324675324675, "grad_norm": 9.181824684143066, "learning_rate": 8.372219582838343e-06, "loss": 0.3552, "step": 5859 }, { "epoch": 0.2717996289424861, "grad_norm": 9.302083969116211, "learning_rate": 8.371676180542497e-06, "loss": 0.3687, "step": 5860 }, { "epoch": 0.2718460111317254, "grad_norm": 7.616462230682373, "learning_rate": 8.371132705201064e-06, "loss": 0.48, "step": 5861 }, { "epoch": 0.27189239332096476, "grad_norm": 7.174198150634766, "learning_rate": 8.37058915682582e-06, "loss": 0.3259, "step": 5862 }, { "epoch": 0.27193877551020407, "grad_norm": 12.834290504455566, "learning_rate": 8.37004553542854e-06, "loss": 0.3926, "step": 5863 }, { "epoch": 0.2719851576994434, "grad_norm": 9.460381507873535, "learning_rate": 8.369501841021e-06, "loss": 0.3513, "step": 5864 }, { "epoch": 0.2720315398886827, "grad_norm": 12.041094779968262, "learning_rate": 8.36895807361498e-06, "loss": 0.6248, "step": 5865 }, { "epoch": 0.2720779220779221, "grad_norm": 6.134873867034912, "learning_rate": 8.368414233222261e-06, "loss": 0.3734, "step": 5866 }, { "epoch": 0.2721243042671614, "grad_norm": 10.082146644592285, "learning_rate": 8.367870319854623e-06, "loss": 0.3918, "step": 5867 }, { "epoch": 0.27217068645640075, "grad_norm": 5.011146068572998, "learning_rate": 8.367326333523853e-06, "loss": 0.3275, "step": 5868 }, { "epoch": 0.27221706864564005, "grad_norm": 7.033048152923584, "learning_rate": 8.366782274241731e-06, "loss": 0.3208, "step": 5869 }, { "epoch": 0.2722634508348794, "grad_norm": 10.252134323120117, "learning_rate": 8.36623814202005e-06, "loss": 0.448, "step": 5870 }, { "epoch": 0.27230983302411876, "grad_norm": 3.984602928161621, "learning_rate": 8.365693936870594e-06, "loss": 0.3174, "step": 5871 }, { "epoch": 0.27235621521335807, "grad_norm": 6.039788722991943, "learning_rate": 8.365149658805153e-06, "loss": 0.4362, "step": 5872 }, { "epoch": 0.2724025974025974, "grad_norm": 6.919215202331543, "learning_rate": 8.36460530783552e-06, "loss": 0.4038, "step": 5873 }, { "epoch": 0.2724489795918367, "grad_norm": 6.03280782699585, "learning_rate": 8.364060883973488e-06, "loss": 0.3117, "step": 5874 }, { "epoch": 0.2724953617810761, "grad_norm": 4.513929843902588, "learning_rate": 8.363516387230852e-06, "loss": 0.3508, "step": 5875 }, { "epoch": 0.2725417439703154, "grad_norm": 14.394092559814453, "learning_rate": 8.362971817619408e-06, "loss": 0.2886, "step": 5876 }, { "epoch": 0.27258812615955474, "grad_norm": 12.204484939575195, "learning_rate": 8.36242717515095e-06, "loss": 0.4053, "step": 5877 }, { "epoch": 0.27263450834879405, "grad_norm": 6.715791702270508, "learning_rate": 8.361882459837285e-06, "loss": 0.3982, "step": 5878 }, { "epoch": 0.2726808905380334, "grad_norm": 8.692234992980957, "learning_rate": 8.361337671690207e-06, "loss": 0.3883, "step": 5879 }, { "epoch": 0.2727272727272727, "grad_norm": 5.230277061462402, "learning_rate": 8.360792810721522e-06, "loss": 0.28, "step": 5880 }, { "epoch": 0.27277365491651206, "grad_norm": 6.9207763671875, "learning_rate": 8.360247876943034e-06, "loss": 0.414, "step": 5881 }, { "epoch": 0.27282003710575137, "grad_norm": 6.375596046447754, "learning_rate": 8.359702870366548e-06, "loss": 0.3481, "step": 5882 }, { "epoch": 0.2728664192949907, "grad_norm": 6.385673999786377, "learning_rate": 8.35915779100387e-06, "loss": 0.405, "step": 5883 }, { "epoch": 0.2729128014842301, "grad_norm": 6.003655910491943, "learning_rate": 8.35861263886681e-06, "loss": 0.3587, "step": 5884 }, { "epoch": 0.2729591836734694, "grad_norm": 4.239771842956543, "learning_rate": 8.358067413967182e-06, "loss": 0.3567, "step": 5885 }, { "epoch": 0.27300556586270874, "grad_norm": 5.713298797607422, "learning_rate": 8.357522116316791e-06, "loss": 0.3866, "step": 5886 }, { "epoch": 0.27305194805194805, "grad_norm": 4.703770160675049, "learning_rate": 8.356976745927457e-06, "loss": 0.3588, "step": 5887 }, { "epoch": 0.2730983302411874, "grad_norm": 6.437841415405273, "learning_rate": 8.35643130281099e-06, "loss": 0.3429, "step": 5888 }, { "epoch": 0.2731447124304267, "grad_norm": 7.843280792236328, "learning_rate": 8.35588578697921e-06, "loss": 0.4185, "step": 5889 }, { "epoch": 0.27319109461966606, "grad_norm": 4.893518447875977, "learning_rate": 8.355340198443935e-06, "loss": 0.3171, "step": 5890 }, { "epoch": 0.27323747680890537, "grad_norm": 4.883599758148193, "learning_rate": 8.354794537216984e-06, "loss": 0.2927, "step": 5891 }, { "epoch": 0.2732838589981447, "grad_norm": 5.9306230545043945, "learning_rate": 8.354248803310179e-06, "loss": 0.3914, "step": 5892 }, { "epoch": 0.273330241187384, "grad_norm": 8.49905014038086, "learning_rate": 8.353702996735343e-06, "loss": 0.3819, "step": 5893 }, { "epoch": 0.2733766233766234, "grad_norm": 13.301146507263184, "learning_rate": 8.3531571175043e-06, "loss": 0.4951, "step": 5894 }, { "epoch": 0.2734230055658627, "grad_norm": 15.500553131103516, "learning_rate": 8.352611165628877e-06, "loss": 0.4564, "step": 5895 }, { "epoch": 0.27346938775510204, "grad_norm": 6.946603298187256, "learning_rate": 8.352065141120902e-06, "loss": 0.271, "step": 5896 }, { "epoch": 0.27351576994434135, "grad_norm": 8.445130348205566, "learning_rate": 8.351519043992203e-06, "loss": 0.3663, "step": 5897 }, { "epoch": 0.2735621521335807, "grad_norm": 8.49433708190918, "learning_rate": 8.350972874254611e-06, "loss": 0.2425, "step": 5898 }, { "epoch": 0.27360853432282006, "grad_norm": 7.66556978225708, "learning_rate": 8.35042663191996e-06, "loss": 0.2964, "step": 5899 }, { "epoch": 0.27365491651205937, "grad_norm": 6.954695701599121, "learning_rate": 8.349880317000083e-06, "loss": 0.4053, "step": 5900 }, { "epoch": 0.2737012987012987, "grad_norm": 6.956564426422119, "learning_rate": 8.349333929506816e-06, "loss": 0.3776, "step": 5901 }, { "epoch": 0.273747680890538, "grad_norm": 8.549681663513184, "learning_rate": 8.348787469451995e-06, "loss": 0.3492, "step": 5902 }, { "epoch": 0.2737940630797774, "grad_norm": 6.972499370574951, "learning_rate": 8.348240936847461e-06, "loss": 0.3614, "step": 5903 }, { "epoch": 0.2738404452690167, "grad_norm": 13.666081428527832, "learning_rate": 8.347694331705055e-06, "loss": 0.3047, "step": 5904 }, { "epoch": 0.27388682745825604, "grad_norm": 5.623721599578857, "learning_rate": 8.347147654036614e-06, "loss": 0.2984, "step": 5905 }, { "epoch": 0.27393320964749535, "grad_norm": 5.062753677368164, "learning_rate": 8.346600903853985e-06, "loss": 0.2937, "step": 5906 }, { "epoch": 0.2739795918367347, "grad_norm": 5.001401901245117, "learning_rate": 8.346054081169013e-06, "loss": 0.3258, "step": 5907 }, { "epoch": 0.274025974025974, "grad_norm": 6.058474063873291, "learning_rate": 8.345507185993543e-06, "loss": 0.3428, "step": 5908 }, { "epoch": 0.27407235621521336, "grad_norm": 6.392746925354004, "learning_rate": 8.344960218339426e-06, "loss": 0.3686, "step": 5909 }, { "epoch": 0.27411873840445267, "grad_norm": 8.677370071411133, "learning_rate": 8.344413178218509e-06, "loss": 0.3838, "step": 5910 }, { "epoch": 0.274165120593692, "grad_norm": 20.236717224121094, "learning_rate": 8.343866065642645e-06, "loss": 0.3045, "step": 5911 }, { "epoch": 0.2742115027829313, "grad_norm": 6.436163425445557, "learning_rate": 8.343318880623688e-06, "loss": 0.3815, "step": 5912 }, { "epoch": 0.2742578849721707, "grad_norm": 4.783575057983398, "learning_rate": 8.342771623173488e-06, "loss": 0.3022, "step": 5913 }, { "epoch": 0.27430426716141004, "grad_norm": 6.093877792358398, "learning_rate": 8.342224293303905e-06, "loss": 0.4034, "step": 5914 }, { "epoch": 0.27435064935064934, "grad_norm": 6.413084030151367, "learning_rate": 8.341676891026796e-06, "loss": 0.3775, "step": 5915 }, { "epoch": 0.2743970315398887, "grad_norm": 6.194128513336182, "learning_rate": 8.341129416354019e-06, "loss": 0.332, "step": 5916 }, { "epoch": 0.274443413729128, "grad_norm": 8.705808639526367, "learning_rate": 8.340581869297438e-06, "loss": 0.3464, "step": 5917 }, { "epoch": 0.27448979591836736, "grad_norm": 6.003963947296143, "learning_rate": 8.34003424986891e-06, "loss": 0.2971, "step": 5918 }, { "epoch": 0.27453617810760667, "grad_norm": 13.356710433959961, "learning_rate": 8.3394865580803e-06, "loss": 0.4826, "step": 5919 }, { "epoch": 0.274582560296846, "grad_norm": 9.710067749023438, "learning_rate": 8.338938793943478e-06, "loss": 0.4132, "step": 5920 }, { "epoch": 0.2746289424860853, "grad_norm": 4.83155632019043, "learning_rate": 8.338390957470308e-06, "loss": 0.2945, "step": 5921 }, { "epoch": 0.2746753246753247, "grad_norm": 7.242469787597656, "learning_rate": 8.337843048672659e-06, "loss": 0.3651, "step": 5922 }, { "epoch": 0.274721706864564, "grad_norm": 11.314872741699219, "learning_rate": 8.337295067562401e-06, "loss": 0.4006, "step": 5923 }, { "epoch": 0.27476808905380334, "grad_norm": 7.442512512207031, "learning_rate": 8.336747014151404e-06, "loss": 0.342, "step": 5924 }, { "epoch": 0.27481447124304265, "grad_norm": 7.407868385314941, "learning_rate": 8.336198888451543e-06, "loss": 0.4154, "step": 5925 }, { "epoch": 0.274860853432282, "grad_norm": 7.1133599281311035, "learning_rate": 8.335650690474694e-06, "loss": 0.3861, "step": 5926 }, { "epoch": 0.27490723562152136, "grad_norm": 6.318007469177246, "learning_rate": 8.335102420232732e-06, "loss": 0.3886, "step": 5927 }, { "epoch": 0.27495361781076066, "grad_norm": 10.915841102600098, "learning_rate": 8.334554077737535e-06, "loss": 0.5174, "step": 5928 }, { "epoch": 0.275, "grad_norm": 4.190064907073975, "learning_rate": 8.334005663000981e-06, "loss": 0.3748, "step": 5929 }, { "epoch": 0.2750463821892393, "grad_norm": 10.748100280761719, "learning_rate": 8.333457176034954e-06, "loss": 0.442, "step": 5930 }, { "epoch": 0.2750927643784787, "grad_norm": 7.8945393562316895, "learning_rate": 8.332908616851337e-06, "loss": 0.4046, "step": 5931 }, { "epoch": 0.275139146567718, "grad_norm": 10.229519844055176, "learning_rate": 8.33235998546201e-06, "loss": 0.4193, "step": 5932 }, { "epoch": 0.27518552875695734, "grad_norm": 16.1021671295166, "learning_rate": 8.331811281878864e-06, "loss": 0.5148, "step": 5933 }, { "epoch": 0.27523191094619665, "grad_norm": 6.721494197845459, "learning_rate": 8.331262506113784e-06, "loss": 0.3744, "step": 5934 }, { "epoch": 0.275278293135436, "grad_norm": 8.02243423461914, "learning_rate": 8.330713658178658e-06, "loss": 0.359, "step": 5935 }, { "epoch": 0.2753246753246753, "grad_norm": 4.071835041046143, "learning_rate": 8.330164738085376e-06, "loss": 0.3877, "step": 5936 }, { "epoch": 0.27537105751391466, "grad_norm": 6.162367820739746, "learning_rate": 8.329615745845834e-06, "loss": 0.3307, "step": 5937 }, { "epoch": 0.27541743970315397, "grad_norm": 6.442251682281494, "learning_rate": 8.32906668147192e-06, "loss": 0.3139, "step": 5938 }, { "epoch": 0.2754638218923933, "grad_norm": 8.858814239501953, "learning_rate": 8.328517544975535e-06, "loss": 0.3779, "step": 5939 }, { "epoch": 0.2755102040816326, "grad_norm": 7.936983585357666, "learning_rate": 8.327968336368572e-06, "loss": 0.3494, "step": 5940 }, { "epoch": 0.275556586270872, "grad_norm": 7.2490739822387695, "learning_rate": 8.327419055662929e-06, "loss": 0.2856, "step": 5941 }, { "epoch": 0.27560296846011134, "grad_norm": 12.065979957580566, "learning_rate": 8.32686970287051e-06, "loss": 0.4252, "step": 5942 }, { "epoch": 0.27564935064935064, "grad_norm": 8.161510467529297, "learning_rate": 8.326320278003211e-06, "loss": 0.4312, "step": 5943 }, { "epoch": 0.27569573283859, "grad_norm": 10.124732971191406, "learning_rate": 8.325770781072939e-06, "loss": 0.3963, "step": 5944 }, { "epoch": 0.2757421150278293, "grad_norm": 6.512473106384277, "learning_rate": 8.325221212091597e-06, "loss": 0.4477, "step": 5945 }, { "epoch": 0.27578849721706866, "grad_norm": 7.050482273101807, "learning_rate": 8.324671571071091e-06, "loss": 0.346, "step": 5946 }, { "epoch": 0.27583487940630796, "grad_norm": 5.4860453605651855, "learning_rate": 8.324121858023328e-06, "loss": 0.3662, "step": 5947 }, { "epoch": 0.2758812615955473, "grad_norm": 5.878078460693359, "learning_rate": 8.32357207296022e-06, "loss": 0.3545, "step": 5948 }, { "epoch": 0.2759276437847866, "grad_norm": 10.533652305603027, "learning_rate": 8.323022215893675e-06, "loss": 0.4939, "step": 5949 }, { "epoch": 0.275974025974026, "grad_norm": 10.171955108642578, "learning_rate": 8.322472286835607e-06, "loss": 0.4433, "step": 5950 }, { "epoch": 0.2760204081632653, "grad_norm": 7.421659469604492, "learning_rate": 8.32192228579793e-06, "loss": 0.3803, "step": 5951 }, { "epoch": 0.27606679035250464, "grad_norm": 5.240976810455322, "learning_rate": 8.321372212792559e-06, "loss": 0.3216, "step": 5952 }, { "epoch": 0.27611317254174395, "grad_norm": 5.841787338256836, "learning_rate": 8.320822067831408e-06, "loss": 0.2857, "step": 5953 }, { "epoch": 0.2761595547309833, "grad_norm": 6.640200614929199, "learning_rate": 8.320271850926402e-06, "loss": 0.3624, "step": 5954 }, { "epoch": 0.27620593692022266, "grad_norm": 6.006932258605957, "learning_rate": 8.319721562089456e-06, "loss": 0.3137, "step": 5955 }, { "epoch": 0.27625231910946196, "grad_norm": 5.797845363616943, "learning_rate": 8.319171201332492e-06, "loss": 0.4327, "step": 5956 }, { "epoch": 0.2762987012987013, "grad_norm": 5.4439616203308105, "learning_rate": 8.318620768667438e-06, "loss": 0.3609, "step": 5957 }, { "epoch": 0.2763450834879406, "grad_norm": 6.188192367553711, "learning_rate": 8.318070264106213e-06, "loss": 0.3243, "step": 5958 }, { "epoch": 0.27639146567718, "grad_norm": 8.556986808776855, "learning_rate": 8.317519687660744e-06, "loss": 0.421, "step": 5959 }, { "epoch": 0.2764378478664193, "grad_norm": 10.873377799987793, "learning_rate": 8.316969039342963e-06, "loss": 0.3615, "step": 5960 }, { "epoch": 0.27648423005565864, "grad_norm": 6.80519962310791, "learning_rate": 8.316418319164798e-06, "loss": 0.4764, "step": 5961 }, { "epoch": 0.27653061224489794, "grad_norm": 9.137876510620117, "learning_rate": 8.315867527138179e-06, "loss": 0.3023, "step": 5962 }, { "epoch": 0.2765769944341373, "grad_norm": 10.643218994140625, "learning_rate": 8.315316663275038e-06, "loss": 0.4588, "step": 5963 }, { "epoch": 0.2766233766233766, "grad_norm": 5.160197734832764, "learning_rate": 8.31476572758731e-06, "loss": 0.242, "step": 5964 }, { "epoch": 0.27666975881261596, "grad_norm": 10.572071075439453, "learning_rate": 8.314214720086933e-06, "loss": 0.4042, "step": 5965 }, { "epoch": 0.27671614100185526, "grad_norm": 7.105179309844971, "learning_rate": 8.313663640785839e-06, "loss": 0.3088, "step": 5966 }, { "epoch": 0.2767625231910946, "grad_norm": 10.997693061828613, "learning_rate": 8.313112489695974e-06, "loss": 0.3923, "step": 5967 }, { "epoch": 0.2768089053803339, "grad_norm": 8.86191177368164, "learning_rate": 8.31256126682927e-06, "loss": 0.3567, "step": 5968 }, { "epoch": 0.2768552875695733, "grad_norm": 10.510013580322266, "learning_rate": 8.312009972197677e-06, "loss": 0.448, "step": 5969 }, { "epoch": 0.27690166975881264, "grad_norm": 6.562175273895264, "learning_rate": 8.311458605813133e-06, "loss": 0.2602, "step": 5970 }, { "epoch": 0.27694805194805194, "grad_norm": 16.40941619873047, "learning_rate": 8.310907167687585e-06, "loss": 0.3997, "step": 5971 }, { "epoch": 0.2769944341372913, "grad_norm": 15.465065002441406, "learning_rate": 8.31035565783298e-06, "loss": 0.4159, "step": 5972 }, { "epoch": 0.2770408163265306, "grad_norm": 6.31801176071167, "learning_rate": 8.309804076261265e-06, "loss": 0.3714, "step": 5973 }, { "epoch": 0.27708719851576996, "grad_norm": 10.86258602142334, "learning_rate": 8.30925242298439e-06, "loss": 0.3254, "step": 5974 }, { "epoch": 0.27713358070500926, "grad_norm": 13.961945533752441, "learning_rate": 8.308700698014308e-06, "loss": 0.5489, "step": 5975 }, { "epoch": 0.2771799628942486, "grad_norm": 11.814919471740723, "learning_rate": 8.30814890136297e-06, "loss": 0.2993, "step": 5976 }, { "epoch": 0.2772263450834879, "grad_norm": 7.536235809326172, "learning_rate": 8.30759703304233e-06, "loss": 0.4201, "step": 5977 }, { "epoch": 0.2772727272727273, "grad_norm": 4.836319923400879, "learning_rate": 8.307045093064347e-06, "loss": 0.3315, "step": 5978 }, { "epoch": 0.2773191094619666, "grad_norm": 9.064436912536621, "learning_rate": 8.306493081440974e-06, "loss": 0.3592, "step": 5979 }, { "epoch": 0.27736549165120594, "grad_norm": 7.9632768630981445, "learning_rate": 8.305940998184175e-06, "loss": 0.3938, "step": 5980 }, { "epoch": 0.27741187384044524, "grad_norm": 7.509748935699463, "learning_rate": 8.305388843305906e-06, "loss": 0.269, "step": 5981 }, { "epoch": 0.2774582560296846, "grad_norm": 8.468666076660156, "learning_rate": 8.30483661681813e-06, "loss": 0.3508, "step": 5982 }, { "epoch": 0.27750463821892396, "grad_norm": 8.492226600646973, "learning_rate": 8.304284318732815e-06, "loss": 0.398, "step": 5983 }, { "epoch": 0.27755102040816326, "grad_norm": 9.483390808105469, "learning_rate": 8.303731949061921e-06, "loss": 0.4266, "step": 5984 }, { "epoch": 0.2775974025974026, "grad_norm": 5.197929859161377, "learning_rate": 8.303179507817419e-06, "loss": 0.356, "step": 5985 }, { "epoch": 0.2776437847866419, "grad_norm": 7.086746692657471, "learning_rate": 8.302626995011272e-06, "loss": 0.3359, "step": 5986 }, { "epoch": 0.2776901669758813, "grad_norm": 9.32168960571289, "learning_rate": 8.302074410655456e-06, "loss": 0.5231, "step": 5987 }, { "epoch": 0.2777365491651206, "grad_norm": 8.566473960876465, "learning_rate": 8.301521754761936e-06, "loss": 0.3094, "step": 5988 }, { "epoch": 0.27778293135435994, "grad_norm": 9.010916709899902, "learning_rate": 8.300969027342692e-06, "loss": 0.4088, "step": 5989 }, { "epoch": 0.27782931354359924, "grad_norm": 9.413328170776367, "learning_rate": 8.300416228409693e-06, "loss": 0.3544, "step": 5990 }, { "epoch": 0.2778756957328386, "grad_norm": 10.021524429321289, "learning_rate": 8.29986335797492e-06, "loss": 0.3535, "step": 5991 }, { "epoch": 0.2779220779220779, "grad_norm": 10.458871841430664, "learning_rate": 8.299310416050345e-06, "loss": 0.3687, "step": 5992 }, { "epoch": 0.27796846011131726, "grad_norm": 10.96619701385498, "learning_rate": 8.298757402647952e-06, "loss": 0.4924, "step": 5993 }, { "epoch": 0.27801484230055656, "grad_norm": 4.514059543609619, "learning_rate": 8.298204317779718e-06, "loss": 0.3523, "step": 5994 }, { "epoch": 0.2780612244897959, "grad_norm": 4.865657806396484, "learning_rate": 8.297651161457627e-06, "loss": 0.3387, "step": 5995 }, { "epoch": 0.2781076066790352, "grad_norm": 8.529704093933105, "learning_rate": 8.297097933693665e-06, "loss": 0.4096, "step": 5996 }, { "epoch": 0.2781539888682746, "grad_norm": 7.501357078552246, "learning_rate": 8.296544634499814e-06, "loss": 0.4761, "step": 5997 }, { "epoch": 0.27820037105751394, "grad_norm": 6.427472114562988, "learning_rate": 8.295991263888062e-06, "loss": 0.3275, "step": 5998 }, { "epoch": 0.27824675324675324, "grad_norm": 7.4946746826171875, "learning_rate": 8.295437821870398e-06, "loss": 0.4527, "step": 5999 }, { "epoch": 0.2782931354359926, "grad_norm": 6.022600173950195, "learning_rate": 8.294884308458813e-06, "loss": 0.3173, "step": 6000 }, { "epoch": 0.2783395176252319, "grad_norm": 6.763225078582764, "learning_rate": 8.294330723665295e-06, "loss": 0.2727, "step": 6001 }, { "epoch": 0.27838589981447126, "grad_norm": 8.691146850585938, "learning_rate": 8.29377706750184e-06, "loss": 0.4434, "step": 6002 }, { "epoch": 0.27843228200371056, "grad_norm": 12.246674537658691, "learning_rate": 8.293223339980443e-06, "loss": 0.4516, "step": 6003 }, { "epoch": 0.2784786641929499, "grad_norm": 5.75493860244751, "learning_rate": 8.292669541113098e-06, "loss": 0.3827, "step": 6004 }, { "epoch": 0.2785250463821892, "grad_norm": 11.253523826599121, "learning_rate": 8.292115670911806e-06, "loss": 0.3752, "step": 6005 }, { "epoch": 0.2785714285714286, "grad_norm": 4.985452175140381, "learning_rate": 8.291561729388564e-06, "loss": 0.3117, "step": 6006 }, { "epoch": 0.2786178107606679, "grad_norm": 4.789318084716797, "learning_rate": 8.291007716555372e-06, "loss": 0.3606, "step": 6007 }, { "epoch": 0.27866419294990724, "grad_norm": 7.9639410972595215, "learning_rate": 8.290453632424236e-06, "loss": 0.3378, "step": 6008 }, { "epoch": 0.27871057513914654, "grad_norm": 6.012608051300049, "learning_rate": 8.289899477007155e-06, "loss": 0.2959, "step": 6009 }, { "epoch": 0.2787569573283859, "grad_norm": 7.5567708015441895, "learning_rate": 8.289345250316139e-06, "loss": 0.4119, "step": 6010 }, { "epoch": 0.27880333951762526, "grad_norm": 5.843697547912598, "learning_rate": 8.288790952363192e-06, "loss": 0.3341, "step": 6011 }, { "epoch": 0.27884972170686456, "grad_norm": 6.783288955688477, "learning_rate": 8.288236583160322e-06, "loss": 0.3836, "step": 6012 }, { "epoch": 0.2788961038961039, "grad_norm": 5.784948348999023, "learning_rate": 8.287682142719544e-06, "loss": 0.3751, "step": 6013 }, { "epoch": 0.2789424860853432, "grad_norm": 5.921298503875732, "learning_rate": 8.287127631052864e-06, "loss": 0.3832, "step": 6014 }, { "epoch": 0.2789888682745826, "grad_norm": 11.15989875793457, "learning_rate": 8.2865730481723e-06, "loss": 0.347, "step": 6015 }, { "epoch": 0.2790352504638219, "grad_norm": 8.42064094543457, "learning_rate": 8.286018394089864e-06, "loss": 0.1508, "step": 6016 }, { "epoch": 0.27908163265306124, "grad_norm": 13.380395889282227, "learning_rate": 8.28546366881757e-06, "loss": 0.332, "step": 6017 }, { "epoch": 0.27912801484230054, "grad_norm": 10.397356033325195, "learning_rate": 8.284908872367441e-06, "loss": 0.4096, "step": 6018 }, { "epoch": 0.2791743970315399, "grad_norm": 9.66244125366211, "learning_rate": 8.284354004751493e-06, "loss": 0.39, "step": 6019 }, { "epoch": 0.2792207792207792, "grad_norm": 5.205273151397705, "learning_rate": 8.283799065981748e-06, "loss": 0.2836, "step": 6020 }, { "epoch": 0.27926716141001856, "grad_norm": 9.310330390930176, "learning_rate": 8.28324405607023e-06, "loss": 0.3838, "step": 6021 }, { "epoch": 0.27931354359925786, "grad_norm": 4.936305046081543, "learning_rate": 8.282688975028958e-06, "loss": 0.4042, "step": 6022 }, { "epoch": 0.2793599257884972, "grad_norm": 12.103425025939941, "learning_rate": 8.282133822869962e-06, "loss": 0.4224, "step": 6023 }, { "epoch": 0.2794063079777365, "grad_norm": 9.214488983154297, "learning_rate": 8.281578599605269e-06, "loss": 0.35, "step": 6024 }, { "epoch": 0.2794526901669759, "grad_norm": 5.985262393951416, "learning_rate": 8.281023305246905e-06, "loss": 0.3056, "step": 6025 }, { "epoch": 0.27949907235621524, "grad_norm": 5.921624660491943, "learning_rate": 8.280467939806904e-06, "loss": 0.3266, "step": 6026 }, { "epoch": 0.27954545454545454, "grad_norm": 11.099969863891602, "learning_rate": 8.279912503297292e-06, "loss": 0.4088, "step": 6027 }, { "epoch": 0.2795918367346939, "grad_norm": 8.286886215209961, "learning_rate": 8.279356995730108e-06, "loss": 0.4791, "step": 6028 }, { "epoch": 0.2796382189239332, "grad_norm": 4.685475826263428, "learning_rate": 8.278801417117384e-06, "loss": 0.3746, "step": 6029 }, { "epoch": 0.27968460111317256, "grad_norm": 6.729917526245117, "learning_rate": 8.278245767471156e-06, "loss": 0.2826, "step": 6030 }, { "epoch": 0.27973098330241186, "grad_norm": 10.023064613342285, "learning_rate": 8.277690046803464e-06, "loss": 0.5097, "step": 6031 }, { "epoch": 0.2797773654916512, "grad_norm": 4.970213890075684, "learning_rate": 8.277134255126344e-06, "loss": 0.3567, "step": 6032 }, { "epoch": 0.2798237476808905, "grad_norm": 9.41793155670166, "learning_rate": 8.27657839245184e-06, "loss": 0.4021, "step": 6033 }, { "epoch": 0.2798701298701299, "grad_norm": 8.740954399108887, "learning_rate": 8.276022458791993e-06, "loss": 0.3497, "step": 6034 }, { "epoch": 0.2799165120593692, "grad_norm": 7.824139595031738, "learning_rate": 8.275466454158847e-06, "loss": 0.3761, "step": 6035 }, { "epoch": 0.27996289424860854, "grad_norm": 5.804128646850586, "learning_rate": 8.274910378564449e-06, "loss": 0.3849, "step": 6036 }, { "epoch": 0.28000927643784784, "grad_norm": 6.288814544677734, "learning_rate": 8.274354232020843e-06, "loss": 0.3515, "step": 6037 }, { "epoch": 0.2800556586270872, "grad_norm": 8.453373908996582, "learning_rate": 8.273798014540083e-06, "loss": 0.4033, "step": 6038 }, { "epoch": 0.2801020408163265, "grad_norm": 7.385097026824951, "learning_rate": 8.273241726134211e-06, "loss": 0.2882, "step": 6039 }, { "epoch": 0.28014842300556586, "grad_norm": 12.981243133544922, "learning_rate": 8.272685366815287e-06, "loss": 0.4734, "step": 6040 }, { "epoch": 0.2801948051948052, "grad_norm": 10.45538330078125, "learning_rate": 8.27212893659536e-06, "loss": 0.3932, "step": 6041 }, { "epoch": 0.2802411873840445, "grad_norm": 5.776510238647461, "learning_rate": 8.271572435486486e-06, "loss": 0.3227, "step": 6042 }, { "epoch": 0.2802875695732839, "grad_norm": 5.796933174133301, "learning_rate": 8.27101586350072e-06, "loss": 0.2793, "step": 6043 }, { "epoch": 0.2803339517625232, "grad_norm": 6.7205071449279785, "learning_rate": 8.27045922065012e-06, "loss": 0.4479, "step": 6044 }, { "epoch": 0.28038033395176254, "grad_norm": 5.539775371551514, "learning_rate": 8.269902506946746e-06, "loss": 0.2732, "step": 6045 }, { "epoch": 0.28042671614100184, "grad_norm": 4.292245864868164, "learning_rate": 8.26934572240266e-06, "loss": 0.3924, "step": 6046 }, { "epoch": 0.2804730983302412, "grad_norm": 6.620490550994873, "learning_rate": 8.268788867029921e-06, "loss": 0.3816, "step": 6047 }, { "epoch": 0.2805194805194805, "grad_norm": 7.6362786293029785, "learning_rate": 8.268231940840599e-06, "loss": 0.3403, "step": 6048 }, { "epoch": 0.28056586270871986, "grad_norm": 8.45055866241455, "learning_rate": 8.267674943846754e-06, "loss": 0.3269, "step": 6049 }, { "epoch": 0.28061224489795916, "grad_norm": 5.223228931427002, "learning_rate": 8.267117876060454e-06, "loss": 0.2648, "step": 6050 }, { "epoch": 0.2806586270871985, "grad_norm": 6.289086818695068, "learning_rate": 8.26656073749377e-06, "loss": 0.3617, "step": 6051 }, { "epoch": 0.2807050092764378, "grad_norm": 8.12002944946289, "learning_rate": 8.26600352815877e-06, "loss": 0.3745, "step": 6052 }, { "epoch": 0.2807513914656772, "grad_norm": 8.881630897521973, "learning_rate": 8.265446248067525e-06, "loss": 0.4429, "step": 6053 }, { "epoch": 0.28079777365491654, "grad_norm": 7.042565822601318, "learning_rate": 8.264888897232111e-06, "loss": 0.2872, "step": 6054 }, { "epoch": 0.28084415584415584, "grad_norm": 5.545934200286865, "learning_rate": 8.2643314756646e-06, "loss": 0.3163, "step": 6055 }, { "epoch": 0.2808905380333952, "grad_norm": 4.487893104553223, "learning_rate": 8.26377398337707e-06, "loss": 0.3426, "step": 6056 }, { "epoch": 0.2809369202226345, "grad_norm": 6.805025577545166, "learning_rate": 8.263216420381597e-06, "loss": 0.372, "step": 6057 }, { "epoch": 0.28098330241187386, "grad_norm": 6.142511367797852, "learning_rate": 8.262658786690262e-06, "loss": 0.3177, "step": 6058 }, { "epoch": 0.28102968460111316, "grad_norm": 4.956480979919434, "learning_rate": 8.262101082315146e-06, "loss": 0.4278, "step": 6059 }, { "epoch": 0.2810760667903525, "grad_norm": 6.997960567474365, "learning_rate": 8.261543307268332e-06, "loss": 0.337, "step": 6060 }, { "epoch": 0.2811224489795918, "grad_norm": 9.67713737487793, "learning_rate": 8.260985461561899e-06, "loss": 0.4593, "step": 6061 }, { "epoch": 0.2811688311688312, "grad_norm": 4.367258548736572, "learning_rate": 8.260427545207939e-06, "loss": 0.3531, "step": 6062 }, { "epoch": 0.2812152133580705, "grad_norm": 5.0588059425354, "learning_rate": 8.259869558218535e-06, "loss": 0.2979, "step": 6063 }, { "epoch": 0.28126159554730984, "grad_norm": 5.842654228210449, "learning_rate": 8.259311500605775e-06, "loss": 0.3728, "step": 6064 }, { "epoch": 0.28130797773654914, "grad_norm": 4.645956039428711, "learning_rate": 8.258753372381751e-06, "loss": 0.3332, "step": 6065 }, { "epoch": 0.2813543599257885, "grad_norm": 13.692597389221191, "learning_rate": 8.258195173558555e-06, "loss": 0.4598, "step": 6066 }, { "epoch": 0.2814007421150278, "grad_norm": 6.965138912200928, "learning_rate": 8.257636904148278e-06, "loss": 0.4485, "step": 6067 }, { "epoch": 0.28144712430426716, "grad_norm": 6.623927116394043, "learning_rate": 8.257078564163017e-06, "loss": 0.3758, "step": 6068 }, { "epoch": 0.2814935064935065, "grad_norm": 6.670796871185303, "learning_rate": 8.256520153614866e-06, "loss": 0.3298, "step": 6069 }, { "epoch": 0.2815398886827458, "grad_norm": 5.84296989440918, "learning_rate": 8.255961672515924e-06, "loss": 0.3754, "step": 6070 }, { "epoch": 0.2815862708719852, "grad_norm": 8.947554588317871, "learning_rate": 8.255403120878289e-06, "loss": 0.3826, "step": 6071 }, { "epoch": 0.2816326530612245, "grad_norm": 7.936049938201904, "learning_rate": 8.254844498714063e-06, "loss": 0.3713, "step": 6072 }, { "epoch": 0.28167903525046384, "grad_norm": 10.96638011932373, "learning_rate": 8.254285806035347e-06, "loss": 0.4107, "step": 6073 }, { "epoch": 0.28172541743970314, "grad_norm": 6.274903297424316, "learning_rate": 8.253727042854245e-06, "loss": 0.3614, "step": 6074 }, { "epoch": 0.2817717996289425, "grad_norm": 8.96418285369873, "learning_rate": 8.253168209182865e-06, "loss": 0.3086, "step": 6075 }, { "epoch": 0.2818181818181818, "grad_norm": 14.093154907226562, "learning_rate": 8.25260930503331e-06, "loss": 0.3064, "step": 6076 }, { "epoch": 0.28186456400742116, "grad_norm": 10.510924339294434, "learning_rate": 8.25205033041769e-06, "loss": 0.4857, "step": 6077 }, { "epoch": 0.28191094619666046, "grad_norm": 7.847565650939941, "learning_rate": 8.251491285348115e-06, "loss": 0.2878, "step": 6078 }, { "epoch": 0.2819573283858998, "grad_norm": 13.00903034210205, "learning_rate": 8.250932169836696e-06, "loss": 0.3589, "step": 6079 }, { "epoch": 0.2820037105751391, "grad_norm": 14.98937702178955, "learning_rate": 8.250372983895547e-06, "loss": 0.3592, "step": 6080 }, { "epoch": 0.2820500927643785, "grad_norm": 6.569676399230957, "learning_rate": 8.249813727536782e-06, "loss": 0.2692, "step": 6081 }, { "epoch": 0.28209647495361784, "grad_norm": 7.006595611572266, "learning_rate": 8.249254400772516e-06, "loss": 0.4407, "step": 6082 }, { "epoch": 0.28214285714285714, "grad_norm": 5.093496799468994, "learning_rate": 8.248695003614866e-06, "loss": 0.2102, "step": 6083 }, { "epoch": 0.2821892393320965, "grad_norm": 4.444668292999268, "learning_rate": 8.248135536075953e-06, "loss": 0.2734, "step": 6084 }, { "epoch": 0.2822356215213358, "grad_norm": 13.785707473754883, "learning_rate": 8.247575998167898e-06, "loss": 0.3928, "step": 6085 }, { "epoch": 0.28228200371057516, "grad_norm": 13.936583518981934, "learning_rate": 8.247016389902823e-06, "loss": 0.4274, "step": 6086 }, { "epoch": 0.28232838589981446, "grad_norm": 17.33866310119629, "learning_rate": 8.246456711292847e-06, "loss": 0.3567, "step": 6087 }, { "epoch": 0.2823747680890538, "grad_norm": 12.448392868041992, "learning_rate": 8.2458969623501e-06, "loss": 0.5028, "step": 6088 }, { "epoch": 0.2824211502782931, "grad_norm": 12.500411987304688, "learning_rate": 8.24533714308671e-06, "loss": 0.3901, "step": 6089 }, { "epoch": 0.2824675324675325, "grad_norm": 8.894434928894043, "learning_rate": 8.2447772535148e-06, "loss": 0.3212, "step": 6090 }, { "epoch": 0.2825139146567718, "grad_norm": 7.3207292556762695, "learning_rate": 8.244217293646502e-06, "loss": 0.3541, "step": 6091 }, { "epoch": 0.28256029684601114, "grad_norm": 4.176606178283691, "learning_rate": 8.243657263493948e-06, "loss": 0.2255, "step": 6092 }, { "epoch": 0.28260667903525044, "grad_norm": 8.0825834274292, "learning_rate": 8.24309716306927e-06, "loss": 0.3558, "step": 6093 }, { "epoch": 0.2826530612244898, "grad_norm": 9.405817031860352, "learning_rate": 8.242536992384602e-06, "loss": 0.3666, "step": 6094 }, { "epoch": 0.2826994434137291, "grad_norm": 5.452043056488037, "learning_rate": 8.241976751452082e-06, "loss": 0.3074, "step": 6095 }, { "epoch": 0.28274582560296846, "grad_norm": 10.0908842086792, "learning_rate": 8.241416440283844e-06, "loss": 0.5338, "step": 6096 }, { "epoch": 0.2827922077922078, "grad_norm": 6.844349384307861, "learning_rate": 8.240856058892031e-06, "loss": 0.3755, "step": 6097 }, { "epoch": 0.2828385899814471, "grad_norm": 7.722842693328857, "learning_rate": 8.240295607288777e-06, "loss": 0.3204, "step": 6098 }, { "epoch": 0.2828849721706865, "grad_norm": 8.571556091308594, "learning_rate": 8.239735085486232e-06, "loss": 0.458, "step": 6099 }, { "epoch": 0.2829313543599258, "grad_norm": 8.238235473632812, "learning_rate": 8.239174493496531e-06, "loss": 0.3693, "step": 6100 }, { "epoch": 0.28297773654916514, "grad_norm": 7.147428512573242, "learning_rate": 8.238613831331826e-06, "loss": 0.4368, "step": 6101 }, { "epoch": 0.28302411873840444, "grad_norm": 5.808351516723633, "learning_rate": 8.23805309900426e-06, "loss": 0.2613, "step": 6102 }, { "epoch": 0.2830705009276438, "grad_norm": 5.45400857925415, "learning_rate": 8.237492296525982e-06, "loss": 0.3097, "step": 6103 }, { "epoch": 0.2831168831168831, "grad_norm": 7.170193195343018, "learning_rate": 8.23693142390914e-06, "loss": 0.3687, "step": 6104 }, { "epoch": 0.28316326530612246, "grad_norm": 5.878767490386963, "learning_rate": 8.236370481165886e-06, "loss": 0.4267, "step": 6105 }, { "epoch": 0.28320964749536176, "grad_norm": 4.999505519866943, "learning_rate": 8.235809468308372e-06, "loss": 0.3055, "step": 6106 }, { "epoch": 0.2832560296846011, "grad_norm": 7.691093921661377, "learning_rate": 8.235248385348752e-06, "loss": 0.3779, "step": 6107 }, { "epoch": 0.2833024118738404, "grad_norm": 5.204257011413574, "learning_rate": 8.234687232299184e-06, "loss": 0.2805, "step": 6108 }, { "epoch": 0.2833487940630798, "grad_norm": 4.829584121704102, "learning_rate": 8.234126009171823e-06, "loss": 0.3442, "step": 6109 }, { "epoch": 0.28339517625231914, "grad_norm": 8.92611026763916, "learning_rate": 8.233564715978827e-06, "loss": 0.499, "step": 6110 }, { "epoch": 0.28344155844155844, "grad_norm": 6.4024434089660645, "learning_rate": 8.233003352732358e-06, "loss": 0.3459, "step": 6111 }, { "epoch": 0.2834879406307978, "grad_norm": 5.3969197273254395, "learning_rate": 8.232441919444576e-06, "loss": 0.3759, "step": 6112 }, { "epoch": 0.2835343228200371, "grad_norm": 11.58286190032959, "learning_rate": 8.231880416127644e-06, "loss": 0.4679, "step": 6113 }, { "epoch": 0.28358070500927646, "grad_norm": 10.009974479675293, "learning_rate": 8.23131884279373e-06, "loss": 0.279, "step": 6114 }, { "epoch": 0.28362708719851576, "grad_norm": 7.753548622131348, "learning_rate": 8.230757199454995e-06, "loss": 0.375, "step": 6115 }, { "epoch": 0.2836734693877551, "grad_norm": 8.44676685333252, "learning_rate": 8.23019548612361e-06, "loss": 0.2693, "step": 6116 }, { "epoch": 0.2837198515769944, "grad_norm": 10.158910751342773, "learning_rate": 8.229633702811743e-06, "loss": 0.5052, "step": 6117 }, { "epoch": 0.2837662337662338, "grad_norm": 12.440485000610352, "learning_rate": 8.229071849531567e-06, "loss": 0.3146, "step": 6118 }, { "epoch": 0.2838126159554731, "grad_norm": 6.758049488067627, "learning_rate": 8.22850992629525e-06, "loss": 0.3783, "step": 6119 }, { "epoch": 0.28385899814471244, "grad_norm": 9.079829216003418, "learning_rate": 8.227947933114971e-06, "loss": 0.3724, "step": 6120 }, { "epoch": 0.28390538033395174, "grad_norm": 7.438721179962158, "learning_rate": 8.227385870002901e-06, "loss": 0.3194, "step": 6121 }, { "epoch": 0.2839517625231911, "grad_norm": 4.830787181854248, "learning_rate": 8.22682373697122e-06, "loss": 0.3399, "step": 6122 }, { "epoch": 0.2839981447124304, "grad_norm": 8.797164916992188, "learning_rate": 8.226261534032102e-06, "loss": 0.3982, "step": 6123 }, { "epoch": 0.28404452690166976, "grad_norm": 6.672488212585449, "learning_rate": 8.225699261197731e-06, "loss": 0.2697, "step": 6124 }, { "epoch": 0.2840909090909091, "grad_norm": 6.465839862823486, "learning_rate": 8.225136918480287e-06, "loss": 0.2982, "step": 6125 }, { "epoch": 0.2841372912801484, "grad_norm": 5.840917110443115, "learning_rate": 8.224574505891954e-06, "loss": 0.3172, "step": 6126 }, { "epoch": 0.2841836734693878, "grad_norm": 7.839077949523926, "learning_rate": 8.224012023444912e-06, "loss": 0.2572, "step": 6127 }, { "epoch": 0.2842300556586271, "grad_norm": 10.630623817443848, "learning_rate": 8.223449471151352e-06, "loss": 0.3988, "step": 6128 }, { "epoch": 0.28427643784786644, "grad_norm": 7.623749732971191, "learning_rate": 8.22288684902346e-06, "loss": 0.3621, "step": 6129 }, { "epoch": 0.28432282003710574, "grad_norm": 12.439238548278809, "learning_rate": 8.222324157073422e-06, "loss": 0.5187, "step": 6130 }, { "epoch": 0.2843692022263451, "grad_norm": 13.690415382385254, "learning_rate": 8.221761395313434e-06, "loss": 0.3647, "step": 6131 }, { "epoch": 0.2844155844155844, "grad_norm": 6.56081485748291, "learning_rate": 8.221198563755683e-06, "loss": 0.4109, "step": 6132 }, { "epoch": 0.28446196660482376, "grad_norm": 5.819082260131836, "learning_rate": 8.220635662412363e-06, "loss": 0.4177, "step": 6133 }, { "epoch": 0.28450834879406306, "grad_norm": 8.309667587280273, "learning_rate": 8.220072691295672e-06, "loss": 0.4516, "step": 6134 }, { "epoch": 0.2845547309833024, "grad_norm": 5.859638214111328, "learning_rate": 8.219509650417802e-06, "loss": 0.2731, "step": 6135 }, { "epoch": 0.2846011131725417, "grad_norm": 7.485929489135742, "learning_rate": 8.218946539790957e-06, "loss": 0.4395, "step": 6136 }, { "epoch": 0.2846474953617811, "grad_norm": 8.685323715209961, "learning_rate": 8.218383359427331e-06, "loss": 0.3874, "step": 6137 }, { "epoch": 0.28469387755102044, "grad_norm": 12.355982780456543, "learning_rate": 8.217820109339127e-06, "loss": 0.4059, "step": 6138 }, { "epoch": 0.28474025974025974, "grad_norm": 24.25164031982422, "learning_rate": 8.217256789538548e-06, "loss": 0.4414, "step": 6139 }, { "epoch": 0.2847866419294991, "grad_norm": 5.683264255523682, "learning_rate": 8.216693400037797e-06, "loss": 0.3791, "step": 6140 }, { "epoch": 0.2848330241187384, "grad_norm": 12.852513313293457, "learning_rate": 8.21612994084908e-06, "loss": 0.5556, "step": 6141 }, { "epoch": 0.28487940630797776, "grad_norm": 6.826916694641113, "learning_rate": 8.215566411984606e-06, "loss": 0.4011, "step": 6142 }, { "epoch": 0.28492578849721706, "grad_norm": 8.808913230895996, "learning_rate": 8.215002813456579e-06, "loss": 0.3517, "step": 6143 }, { "epoch": 0.2849721706864564, "grad_norm": 7.733315467834473, "learning_rate": 8.214439145277215e-06, "loss": 0.4922, "step": 6144 }, { "epoch": 0.2850185528756957, "grad_norm": 5.183093070983887, "learning_rate": 8.21387540745872e-06, "loss": 0.385, "step": 6145 }, { "epoch": 0.2850649350649351, "grad_norm": 7.473920822143555, "learning_rate": 8.21331160001331e-06, "loss": 0.3893, "step": 6146 }, { "epoch": 0.2851113172541744, "grad_norm": 7.724489212036133, "learning_rate": 8.212747722953199e-06, "loss": 0.3928, "step": 6147 }, { "epoch": 0.28515769944341374, "grad_norm": 5.1458940505981445, "learning_rate": 8.212183776290603e-06, "loss": 0.3327, "step": 6148 }, { "epoch": 0.28520408163265304, "grad_norm": 8.619171142578125, "learning_rate": 8.211619760037739e-06, "loss": 0.409, "step": 6149 }, { "epoch": 0.2852504638218924, "grad_norm": 7.42241907119751, "learning_rate": 8.211055674206828e-06, "loss": 0.3396, "step": 6150 }, { "epoch": 0.2852968460111317, "grad_norm": 6.743535041809082, "learning_rate": 8.210491518810088e-06, "loss": 0.361, "step": 6151 }, { "epoch": 0.28534322820037106, "grad_norm": 4.7517924308776855, "learning_rate": 8.209927293859746e-06, "loss": 0.3527, "step": 6152 }, { "epoch": 0.2853896103896104, "grad_norm": 5.903269290924072, "learning_rate": 8.20936299936802e-06, "loss": 0.3375, "step": 6153 }, { "epoch": 0.2854359925788497, "grad_norm": 6.0652923583984375, "learning_rate": 8.208798635347136e-06, "loss": 0.4282, "step": 6154 }, { "epoch": 0.2854823747680891, "grad_norm": 5.525450229644775, "learning_rate": 8.208234201809323e-06, "loss": 0.3386, "step": 6155 }, { "epoch": 0.2855287569573284, "grad_norm": 8.615540504455566, "learning_rate": 8.207669698766809e-06, "loss": 0.3884, "step": 6156 }, { "epoch": 0.28557513914656774, "grad_norm": 5.863137245178223, "learning_rate": 8.207105126231823e-06, "loss": 0.3236, "step": 6157 }, { "epoch": 0.28562152133580704, "grad_norm": 6.336973190307617, "learning_rate": 8.206540484216596e-06, "loss": 0.4248, "step": 6158 }, { "epoch": 0.2856679035250464, "grad_norm": 7.6153388023376465, "learning_rate": 8.20597577273336e-06, "loss": 0.4378, "step": 6159 }, { "epoch": 0.2857142857142857, "grad_norm": 5.164461612701416, "learning_rate": 8.20541099179435e-06, "loss": 0.2956, "step": 6160 }, { "epoch": 0.28576066790352506, "grad_norm": 6.539797306060791, "learning_rate": 8.204846141411802e-06, "loss": 0.3729, "step": 6161 }, { "epoch": 0.28580705009276436, "grad_norm": 13.277115821838379, "learning_rate": 8.204281221597951e-06, "loss": 0.4211, "step": 6162 }, { "epoch": 0.2858534322820037, "grad_norm": 9.469216346740723, "learning_rate": 8.20371623236504e-06, "loss": 0.3523, "step": 6163 }, { "epoch": 0.285899814471243, "grad_norm": 9.90860366821289, "learning_rate": 8.203151173725306e-06, "loss": 0.3618, "step": 6164 }, { "epoch": 0.2859461966604824, "grad_norm": 8.731485366821289, "learning_rate": 8.20258604569099e-06, "loss": 0.4257, "step": 6165 }, { "epoch": 0.2859925788497217, "grad_norm": 10.841532707214355, "learning_rate": 8.202020848274337e-06, "loss": 0.2447, "step": 6166 }, { "epoch": 0.28603896103896104, "grad_norm": 4.7070112228393555, "learning_rate": 8.201455581487591e-06, "loss": 0.2726, "step": 6167 }, { "epoch": 0.2860853432282004, "grad_norm": 7.310079097747803, "learning_rate": 8.200890245342999e-06, "loss": 0.3713, "step": 6168 }, { "epoch": 0.2861317254174397, "grad_norm": 8.38708209991455, "learning_rate": 8.200324839852807e-06, "loss": 0.3757, "step": 6169 }, { "epoch": 0.28617810760667906, "grad_norm": 4.895601749420166, "learning_rate": 8.199759365029267e-06, "loss": 0.3064, "step": 6170 }, { "epoch": 0.28622448979591836, "grad_norm": 4.641935348510742, "learning_rate": 8.199193820884627e-06, "loss": 0.2534, "step": 6171 }, { "epoch": 0.2862708719851577, "grad_norm": 5.670950412750244, "learning_rate": 8.198628207431142e-06, "loss": 0.3369, "step": 6172 }, { "epoch": 0.286317254174397, "grad_norm": 12.405975341796875, "learning_rate": 8.19806252468106e-06, "loss": 0.3585, "step": 6173 }, { "epoch": 0.2863636363636364, "grad_norm": 8.402029991149902, "learning_rate": 8.197496772646644e-06, "loss": 0.3775, "step": 6174 }, { "epoch": 0.2864100185528757, "grad_norm": 5.473095893859863, "learning_rate": 8.196930951340145e-06, "loss": 0.4227, "step": 6175 }, { "epoch": 0.28645640074211504, "grad_norm": 24.905635833740234, "learning_rate": 8.196365060773825e-06, "loss": 0.4846, "step": 6176 }, { "epoch": 0.28650278293135434, "grad_norm": 4.970062255859375, "learning_rate": 8.19579910095994e-06, "loss": 0.2866, "step": 6177 }, { "epoch": 0.2865491651205937, "grad_norm": 14.731581687927246, "learning_rate": 8.195233071910754e-06, "loss": 0.3796, "step": 6178 }, { "epoch": 0.286595547309833, "grad_norm": 62.258880615234375, "learning_rate": 8.194666973638528e-06, "loss": 0.4746, "step": 6179 }, { "epoch": 0.28664192949907236, "grad_norm": 5.033396244049072, "learning_rate": 8.194100806155529e-06, "loss": 0.1298, "step": 6180 }, { "epoch": 0.2866883116883117, "grad_norm": 8.716364860534668, "learning_rate": 8.19353456947402e-06, "loss": 0.2707, "step": 6181 }, { "epoch": 0.286734693877551, "grad_norm": 7.39922571182251, "learning_rate": 8.192968263606267e-06, "loss": 0.3463, "step": 6182 }, { "epoch": 0.2867810760667904, "grad_norm": 7.195284366607666, "learning_rate": 8.192401888564542e-06, "loss": 0.2962, "step": 6183 }, { "epoch": 0.2868274582560297, "grad_norm": 9.590727806091309, "learning_rate": 8.191835444361113e-06, "loss": 0.3981, "step": 6184 }, { "epoch": 0.28687384044526903, "grad_norm": 16.311277389526367, "learning_rate": 8.191268931008254e-06, "loss": 0.4903, "step": 6185 }, { "epoch": 0.28692022263450834, "grad_norm": 3.8049569129943848, "learning_rate": 8.190702348518238e-06, "loss": 0.1784, "step": 6186 }, { "epoch": 0.2869666048237477, "grad_norm": 12.763829231262207, "learning_rate": 8.190135696903336e-06, "loss": 0.305, "step": 6187 }, { "epoch": 0.287012987012987, "grad_norm": 15.56362247467041, "learning_rate": 8.189568976175828e-06, "loss": 0.2793, "step": 6188 }, { "epoch": 0.28705936920222636, "grad_norm": 7.067087173461914, "learning_rate": 8.18900218634799e-06, "loss": 0.3161, "step": 6189 }, { "epoch": 0.28710575139146566, "grad_norm": 9.843475341796875, "learning_rate": 8.1884353274321e-06, "loss": 0.4023, "step": 6190 }, { "epoch": 0.287152133580705, "grad_norm": 16.44778060913086, "learning_rate": 8.187868399440444e-06, "loss": 0.4085, "step": 6191 }, { "epoch": 0.2871985157699443, "grad_norm": 6.563050746917725, "learning_rate": 8.187301402385299e-06, "loss": 0.3046, "step": 6192 }, { "epoch": 0.2872448979591837, "grad_norm": 6.331050395965576, "learning_rate": 8.186734336278948e-06, "loss": 0.3716, "step": 6193 }, { "epoch": 0.287291280148423, "grad_norm": 4.692493915557861, "learning_rate": 8.186167201133682e-06, "loss": 0.2618, "step": 6194 }, { "epoch": 0.28733766233766234, "grad_norm": 11.58687686920166, "learning_rate": 8.185599996961781e-06, "loss": 0.3764, "step": 6195 }, { "epoch": 0.2873840445269017, "grad_norm": 7.418350696563721, "learning_rate": 8.18503272377554e-06, "loss": 0.3814, "step": 6196 }, { "epoch": 0.287430426716141, "grad_norm": 6.345428943634033, "learning_rate": 8.184465381587242e-06, "loss": 0.3319, "step": 6197 }, { "epoch": 0.28747680890538035, "grad_norm": 11.331757545471191, "learning_rate": 8.183897970409181e-06, "loss": 0.4343, "step": 6198 }, { "epoch": 0.28752319109461966, "grad_norm": 11.299992561340332, "learning_rate": 8.183330490253651e-06, "loss": 0.4808, "step": 6199 }, { "epoch": 0.287569573283859, "grad_norm": 8.602935791015625, "learning_rate": 8.182762941132944e-06, "loss": 0.2339, "step": 6200 }, { "epoch": 0.2876159554730983, "grad_norm": 12.132781982421875, "learning_rate": 8.182195323059358e-06, "loss": 0.5116, "step": 6201 }, { "epoch": 0.2876623376623377, "grad_norm": 8.605842590332031, "learning_rate": 8.181627636045189e-06, "loss": 0.3895, "step": 6202 }, { "epoch": 0.287708719851577, "grad_norm": 10.080096244812012, "learning_rate": 8.181059880102733e-06, "loss": 0.522, "step": 6203 }, { "epoch": 0.28775510204081634, "grad_norm": 5.56599235534668, "learning_rate": 8.180492055244294e-06, "loss": 0.2649, "step": 6204 }, { "epoch": 0.28780148423005564, "grad_norm": 5.5046515464782715, "learning_rate": 8.179924161482173e-06, "loss": 0.3676, "step": 6205 }, { "epoch": 0.287847866419295, "grad_norm": 7.957090854644775, "learning_rate": 8.17935619882867e-06, "loss": 0.4022, "step": 6206 }, { "epoch": 0.2878942486085343, "grad_norm": 10.450660705566406, "learning_rate": 8.178788167296094e-06, "loss": 0.4649, "step": 6207 }, { "epoch": 0.28794063079777366, "grad_norm": 6.853631496429443, "learning_rate": 8.178220066896748e-06, "loss": 0.3318, "step": 6208 }, { "epoch": 0.287987012987013, "grad_norm": 11.37145709991455, "learning_rate": 8.17765189764294e-06, "loss": 0.3459, "step": 6209 }, { "epoch": 0.2880333951762523, "grad_norm": 7.950389862060547, "learning_rate": 8.177083659546982e-06, "loss": 0.4103, "step": 6210 }, { "epoch": 0.2880797773654917, "grad_norm": 9.398131370544434, "learning_rate": 8.17651535262118e-06, "loss": 0.4186, "step": 6211 }, { "epoch": 0.288126159554731, "grad_norm": 10.312822341918945, "learning_rate": 8.175946976877848e-06, "loss": 0.3525, "step": 6212 }, { "epoch": 0.28817254174397033, "grad_norm": 5.95821475982666, "learning_rate": 8.175378532329302e-06, "loss": 0.3342, "step": 6213 }, { "epoch": 0.28821892393320964, "grad_norm": 8.294635772705078, "learning_rate": 8.174810018987854e-06, "loss": 0.3593, "step": 6214 }, { "epoch": 0.288265306122449, "grad_norm": 6.918397903442383, "learning_rate": 8.174241436865822e-06, "loss": 0.341, "step": 6215 }, { "epoch": 0.2883116883116883, "grad_norm": 6.1062846183776855, "learning_rate": 8.173672785975522e-06, "loss": 0.3069, "step": 6216 }, { "epoch": 0.28835807050092765, "grad_norm": 8.705591201782227, "learning_rate": 8.173104066329275e-06, "loss": 0.3269, "step": 6217 }, { "epoch": 0.28840445269016696, "grad_norm": 9.570034980773926, "learning_rate": 8.172535277939404e-06, "loss": 0.4115, "step": 6218 }, { "epoch": 0.2884508348794063, "grad_norm": 6.019164085388184, "learning_rate": 8.171966420818227e-06, "loss": 0.3262, "step": 6219 }, { "epoch": 0.2884972170686456, "grad_norm": 7.928696632385254, "learning_rate": 8.171397494978073e-06, "loss": 0.4549, "step": 6220 }, { "epoch": 0.288543599257885, "grad_norm": 4.962610244750977, "learning_rate": 8.170828500431265e-06, "loss": 0.3934, "step": 6221 }, { "epoch": 0.2885899814471243, "grad_norm": 7.254619598388672, "learning_rate": 8.17025943719013e-06, "loss": 0.4063, "step": 6222 }, { "epoch": 0.28863636363636364, "grad_norm": 6.741583347320557, "learning_rate": 8.169690305266993e-06, "loss": 0.3394, "step": 6223 }, { "epoch": 0.288682745825603, "grad_norm": 5.982334613800049, "learning_rate": 8.169121104674192e-06, "loss": 0.2868, "step": 6224 }, { "epoch": 0.2887291280148423, "grad_norm": 7.079036712646484, "learning_rate": 8.16855183542405e-06, "loss": 0.3625, "step": 6225 }, { "epoch": 0.28877551020408165, "grad_norm": 7.497025012969971, "learning_rate": 8.167982497528906e-06, "loss": 0.4053, "step": 6226 }, { "epoch": 0.28882189239332096, "grad_norm": 9.113673210144043, "learning_rate": 8.167413091001091e-06, "loss": 0.4439, "step": 6227 }, { "epoch": 0.2888682745825603, "grad_norm": 14.177929878234863, "learning_rate": 8.166843615852942e-06, "loss": 0.5975, "step": 6228 }, { "epoch": 0.2889146567717996, "grad_norm": 10.14328384399414, "learning_rate": 8.166274072096799e-06, "loss": 0.3703, "step": 6229 }, { "epoch": 0.288961038961039, "grad_norm": 5.6463775634765625, "learning_rate": 8.165704459744994e-06, "loss": 0.2362, "step": 6230 }, { "epoch": 0.2890074211502783, "grad_norm": 8.381531715393066, "learning_rate": 8.165134778809873e-06, "loss": 0.4227, "step": 6231 }, { "epoch": 0.28905380333951763, "grad_norm": 9.290311813354492, "learning_rate": 8.16456502930378e-06, "loss": 0.4297, "step": 6232 }, { "epoch": 0.28910018552875694, "grad_norm": 9.986162185668945, "learning_rate": 8.163995211239051e-06, "loss": 0.4276, "step": 6233 }, { "epoch": 0.2891465677179963, "grad_norm": 7.604979038238525, "learning_rate": 8.163425324628034e-06, "loss": 0.3621, "step": 6234 }, { "epoch": 0.2891929499072356, "grad_norm": 9.920409202575684, "learning_rate": 8.162855369483078e-06, "loss": 0.3158, "step": 6235 }, { "epoch": 0.28923933209647495, "grad_norm": 6.354944229125977, "learning_rate": 8.162285345816527e-06, "loss": 0.3038, "step": 6236 }, { "epoch": 0.2892857142857143, "grad_norm": 7.9659743309021, "learning_rate": 8.161715253640734e-06, "loss": 0.3664, "step": 6237 }, { "epoch": 0.2893320964749536, "grad_norm": 5.767703056335449, "learning_rate": 8.161145092968046e-06, "loss": 0.3965, "step": 6238 }, { "epoch": 0.289378478664193, "grad_norm": 4.687185764312744, "learning_rate": 8.160574863810818e-06, "loss": 0.2932, "step": 6239 }, { "epoch": 0.2894248608534323, "grad_norm": 9.907134056091309, "learning_rate": 8.160004566181404e-06, "loss": 0.4944, "step": 6240 }, { "epoch": 0.28947124304267163, "grad_norm": 8.44916820526123, "learning_rate": 8.159434200092155e-06, "loss": 0.212, "step": 6241 }, { "epoch": 0.28951762523191094, "grad_norm": 13.395349502563477, "learning_rate": 8.15886376555543e-06, "loss": 0.4107, "step": 6242 }, { "epoch": 0.2895640074211503, "grad_norm": 6.937016010284424, "learning_rate": 8.15829326258359e-06, "loss": 0.3433, "step": 6243 }, { "epoch": 0.2896103896103896, "grad_norm": 7.1880340576171875, "learning_rate": 8.157722691188992e-06, "loss": 0.3811, "step": 6244 }, { "epoch": 0.28965677179962895, "grad_norm": 9.22458553314209, "learning_rate": 8.157152051383996e-06, "loss": 0.3069, "step": 6245 }, { "epoch": 0.28970315398886826, "grad_norm": 4.2241291999816895, "learning_rate": 8.156581343180967e-06, "loss": 0.3468, "step": 6246 }, { "epoch": 0.2897495361781076, "grad_norm": 10.403976440429688, "learning_rate": 8.156010566592267e-06, "loss": 0.344, "step": 6247 }, { "epoch": 0.2897959183673469, "grad_norm": 6.613167762756348, "learning_rate": 8.155439721630265e-06, "loss": 0.255, "step": 6248 }, { "epoch": 0.2898423005565863, "grad_norm": 10.203088760375977, "learning_rate": 8.154868808307324e-06, "loss": 0.3712, "step": 6249 }, { "epoch": 0.2898886827458256, "grad_norm": 6.532538890838623, "learning_rate": 8.154297826635815e-06, "loss": 0.2804, "step": 6250 }, { "epoch": 0.28993506493506493, "grad_norm": 4.68057918548584, "learning_rate": 8.153726776628106e-06, "loss": 0.3592, "step": 6251 }, { "epoch": 0.2899814471243043, "grad_norm": 25.07828140258789, "learning_rate": 8.153155658296572e-06, "loss": 0.4143, "step": 6252 }, { "epoch": 0.2900278293135436, "grad_norm": 5.898772716522217, "learning_rate": 8.152584471653581e-06, "loss": 0.4337, "step": 6253 }, { "epoch": 0.29007421150278295, "grad_norm": 8.035765647888184, "learning_rate": 8.152013216711512e-06, "loss": 0.3917, "step": 6254 }, { "epoch": 0.29012059369202226, "grad_norm": 5.909323692321777, "learning_rate": 8.15144189348274e-06, "loss": 0.2304, "step": 6255 }, { "epoch": 0.2901669758812616, "grad_norm": 11.050966262817383, "learning_rate": 8.15087050197964e-06, "loss": 0.3984, "step": 6256 }, { "epoch": 0.2902133580705009, "grad_norm": 13.707331657409668, "learning_rate": 8.150299042214594e-06, "loss": 0.3347, "step": 6257 }, { "epoch": 0.2902597402597403, "grad_norm": 3.9692578315734863, "learning_rate": 8.149727514199978e-06, "loss": 0.3286, "step": 6258 }, { "epoch": 0.2903061224489796, "grad_norm": 6.691097736358643, "learning_rate": 8.149155917948179e-06, "loss": 0.3682, "step": 6259 }, { "epoch": 0.29035250463821893, "grad_norm": 8.445900917053223, "learning_rate": 8.148584253471579e-06, "loss": 0.5307, "step": 6260 }, { "epoch": 0.29039888682745824, "grad_norm": 5.158410549163818, "learning_rate": 8.14801252078256e-06, "loss": 0.2703, "step": 6261 }, { "epoch": 0.2904452690166976, "grad_norm": 6.29531717300415, "learning_rate": 8.14744071989351e-06, "loss": 0.3407, "step": 6262 }, { "epoch": 0.2904916512059369, "grad_norm": 7.12031888961792, "learning_rate": 8.146868850816819e-06, "loss": 0.3338, "step": 6263 }, { "epoch": 0.29053803339517625, "grad_norm": 4.580161094665527, "learning_rate": 8.146296913564872e-06, "loss": 0.3379, "step": 6264 }, { "epoch": 0.2905844155844156, "grad_norm": 4.565007209777832, "learning_rate": 8.145724908150064e-06, "loss": 0.3172, "step": 6265 }, { "epoch": 0.2906307977736549, "grad_norm": 4.739018440246582, "learning_rate": 8.145152834584782e-06, "loss": 0.3897, "step": 6266 }, { "epoch": 0.29067717996289427, "grad_norm": 5.391056060791016, "learning_rate": 8.144580692881425e-06, "loss": 0.3417, "step": 6267 }, { "epoch": 0.2907235621521336, "grad_norm": 6.59898042678833, "learning_rate": 8.144008483052385e-06, "loss": 0.319, "step": 6268 }, { "epoch": 0.29076994434137293, "grad_norm": 7.311168670654297, "learning_rate": 8.14343620511006e-06, "loss": 0.3523, "step": 6269 }, { "epoch": 0.29081632653061223, "grad_norm": 6.403462886810303, "learning_rate": 8.142863859066848e-06, "loss": 0.3748, "step": 6270 }, { "epoch": 0.2908627087198516, "grad_norm": 4.725225925445557, "learning_rate": 8.142291444935147e-06, "loss": 0.3576, "step": 6271 }, { "epoch": 0.2909090909090909, "grad_norm": 8.676095008850098, "learning_rate": 8.14171896272736e-06, "loss": 0.3847, "step": 6272 }, { "epoch": 0.29095547309833025, "grad_norm": 6.473916530609131, "learning_rate": 8.141146412455888e-06, "loss": 0.2161, "step": 6273 }, { "epoch": 0.29100185528756956, "grad_norm": 7.069913387298584, "learning_rate": 8.140573794133137e-06, "loss": 0.415, "step": 6274 }, { "epoch": 0.2910482374768089, "grad_norm": 7.449296951293945, "learning_rate": 8.14000110777151e-06, "loss": 0.31, "step": 6275 }, { "epoch": 0.2910946196660482, "grad_norm": 7.398065567016602, "learning_rate": 8.139428353383416e-06, "loss": 0.3834, "step": 6276 }, { "epoch": 0.2911410018552876, "grad_norm": 8.3926420211792, "learning_rate": 8.138855530981263e-06, "loss": 0.3638, "step": 6277 }, { "epoch": 0.2911873840445269, "grad_norm": 4.997363090515137, "learning_rate": 8.138282640577459e-06, "loss": 0.3148, "step": 6278 }, { "epoch": 0.29123376623376623, "grad_norm": 8.99169921875, "learning_rate": 8.137709682184418e-06, "loss": 0.3789, "step": 6279 }, { "epoch": 0.2912801484230056, "grad_norm": 11.105337142944336, "learning_rate": 8.13713665581455e-06, "loss": 0.4115, "step": 6280 }, { "epoch": 0.2913265306122449, "grad_norm": 13.181046485900879, "learning_rate": 8.136563561480273e-06, "loss": 0.5159, "step": 6281 }, { "epoch": 0.29137291280148425, "grad_norm": 6.827955722808838, "learning_rate": 8.135990399194001e-06, "loss": 0.401, "step": 6282 }, { "epoch": 0.29141929499072355, "grad_norm": 8.095053672790527, "learning_rate": 8.13541716896815e-06, "loss": 0.3906, "step": 6283 }, { "epoch": 0.2914656771799629, "grad_norm": 9.888023376464844, "learning_rate": 8.13484387081514e-06, "loss": 0.3681, "step": 6284 }, { "epoch": 0.2915120593692022, "grad_norm": 5.3006086349487305, "learning_rate": 8.13427050474739e-06, "loss": 0.273, "step": 6285 }, { "epoch": 0.2915584415584416, "grad_norm": 8.372920989990234, "learning_rate": 8.133697070777324e-06, "loss": 0.3936, "step": 6286 }, { "epoch": 0.2916048237476809, "grad_norm": 12.835405349731445, "learning_rate": 8.133123568917365e-06, "loss": 0.3167, "step": 6287 }, { "epoch": 0.29165120593692023, "grad_norm": 4.192995548248291, "learning_rate": 8.132549999179934e-06, "loss": 0.3167, "step": 6288 }, { "epoch": 0.29169758812615953, "grad_norm": 11.829895973205566, "learning_rate": 8.13197636157746e-06, "loss": 0.3261, "step": 6289 }, { "epoch": 0.2917439703153989, "grad_norm": 9.601490020751953, "learning_rate": 8.131402656122372e-06, "loss": 0.3879, "step": 6290 }, { "epoch": 0.2917903525046382, "grad_norm": 5.062792778015137, "learning_rate": 8.130828882827094e-06, "loss": 0.3417, "step": 6291 }, { "epoch": 0.29183673469387755, "grad_norm": 10.22500991821289, "learning_rate": 8.130255041704063e-06, "loss": 0.3568, "step": 6292 }, { "epoch": 0.29188311688311686, "grad_norm": 8.363293647766113, "learning_rate": 8.129681132765705e-06, "loss": 0.3846, "step": 6293 }, { "epoch": 0.2919294990723562, "grad_norm": 5.96317195892334, "learning_rate": 8.129107156024455e-06, "loss": 0.309, "step": 6294 }, { "epoch": 0.29197588126159557, "grad_norm": 5.298001289367676, "learning_rate": 8.128533111492752e-06, "loss": 0.3526, "step": 6295 }, { "epoch": 0.2920222634508349, "grad_norm": 10.364675521850586, "learning_rate": 8.127958999183027e-06, "loss": 0.3723, "step": 6296 }, { "epoch": 0.29206864564007423, "grad_norm": 9.796384811401367, "learning_rate": 8.127384819107722e-06, "loss": 0.4656, "step": 6297 }, { "epoch": 0.29211502782931353, "grad_norm": 7.975378513336182, "learning_rate": 8.126810571279273e-06, "loss": 0.3764, "step": 6298 }, { "epoch": 0.2921614100185529, "grad_norm": 4.018299102783203, "learning_rate": 8.126236255710123e-06, "loss": 0.3405, "step": 6299 }, { "epoch": 0.2922077922077922, "grad_norm": 10.743943214416504, "learning_rate": 8.125661872412713e-06, "loss": 0.5494, "step": 6300 }, { "epoch": 0.29225417439703155, "grad_norm": 14.077674865722656, "learning_rate": 8.125087421399487e-06, "loss": 0.3804, "step": 6301 }, { "epoch": 0.29230055658627085, "grad_norm": 4.831739902496338, "learning_rate": 8.124512902682891e-06, "loss": 0.2333, "step": 6302 }, { "epoch": 0.2923469387755102, "grad_norm": 6.475256443023682, "learning_rate": 8.123938316275371e-06, "loss": 0.2514, "step": 6303 }, { "epoch": 0.2923933209647495, "grad_norm": 7.418537616729736, "learning_rate": 8.123363662189373e-06, "loss": 0.3589, "step": 6304 }, { "epoch": 0.2924397031539889, "grad_norm": 6.253316879272461, "learning_rate": 8.122788940437352e-06, "loss": 0.3229, "step": 6305 }, { "epoch": 0.2924860853432282, "grad_norm": 4.746002674102783, "learning_rate": 8.122214151031753e-06, "loss": 0.2679, "step": 6306 }, { "epoch": 0.29253246753246753, "grad_norm": 6.783196449279785, "learning_rate": 8.121639293985033e-06, "loss": 0.3028, "step": 6307 }, { "epoch": 0.2925788497217069, "grad_norm": 4.894598484039307, "learning_rate": 8.121064369309644e-06, "loss": 0.3072, "step": 6308 }, { "epoch": 0.2926252319109462, "grad_norm": 6.1168293952941895, "learning_rate": 8.12048937701804e-06, "loss": 0.348, "step": 6309 }, { "epoch": 0.29267161410018555, "grad_norm": 7.840660095214844, "learning_rate": 8.119914317122681e-06, "loss": 0.3243, "step": 6310 }, { "epoch": 0.29271799628942485, "grad_norm": 7.72324275970459, "learning_rate": 8.119339189636024e-06, "loss": 0.2559, "step": 6311 }, { "epoch": 0.2927643784786642, "grad_norm": 4.232672214508057, "learning_rate": 8.118763994570528e-06, "loss": 0.2968, "step": 6312 }, { "epoch": 0.2928107606679035, "grad_norm": 5.712223052978516, "learning_rate": 8.118188731938654e-06, "loss": 0.3556, "step": 6313 }, { "epoch": 0.29285714285714287, "grad_norm": 5.483803749084473, "learning_rate": 8.117613401752869e-06, "loss": 0.331, "step": 6314 }, { "epoch": 0.2929035250463822, "grad_norm": 7.023990631103516, "learning_rate": 8.11703800402563e-06, "loss": 0.4038, "step": 6315 }, { "epoch": 0.29294990723562153, "grad_norm": 10.901226997375488, "learning_rate": 8.11646253876941e-06, "loss": 0.3825, "step": 6316 }, { "epoch": 0.29299628942486083, "grad_norm": 6.924710273742676, "learning_rate": 8.115887005996672e-06, "loss": 0.3246, "step": 6317 }, { "epoch": 0.2930426716141002, "grad_norm": 7.608652114868164, "learning_rate": 8.115311405719885e-06, "loss": 0.2739, "step": 6318 }, { "epoch": 0.2930890538033395, "grad_norm": 6.259729862213135, "learning_rate": 8.11473573795152e-06, "loss": 0.3646, "step": 6319 }, { "epoch": 0.29313543599257885, "grad_norm": 6.226423263549805, "learning_rate": 8.114160002704048e-06, "loss": 0.4009, "step": 6320 }, { "epoch": 0.29318181818181815, "grad_norm": 8.610807418823242, "learning_rate": 8.113584199989941e-06, "loss": 0.4653, "step": 6321 }, { "epoch": 0.2932282003710575, "grad_norm": 22.365575790405273, "learning_rate": 8.113008329821675e-06, "loss": 0.643, "step": 6322 }, { "epoch": 0.29327458256029687, "grad_norm": 6.251905918121338, "learning_rate": 8.112432392211726e-06, "loss": 0.3304, "step": 6323 }, { "epoch": 0.2933209647495362, "grad_norm": 4.886246681213379, "learning_rate": 8.11185638717257e-06, "loss": 0.2393, "step": 6324 }, { "epoch": 0.29336734693877553, "grad_norm": 5.425588607788086, "learning_rate": 8.111280314716687e-06, "loss": 0.3746, "step": 6325 }, { "epoch": 0.29341372912801483, "grad_norm": 5.480897903442383, "learning_rate": 8.110704174856558e-06, "loss": 0.2654, "step": 6326 }, { "epoch": 0.2934601113172542, "grad_norm": 6.411409854888916, "learning_rate": 8.110127967604665e-06, "loss": 0.2789, "step": 6327 }, { "epoch": 0.2935064935064935, "grad_norm": 10.159387588500977, "learning_rate": 8.109551692973487e-06, "loss": 0.3636, "step": 6328 }, { "epoch": 0.29355287569573285, "grad_norm": 8.071340560913086, "learning_rate": 8.108975350975513e-06, "loss": 0.2765, "step": 6329 }, { "epoch": 0.29359925788497215, "grad_norm": 5.60606575012207, "learning_rate": 8.108398941623229e-06, "loss": 0.3306, "step": 6330 }, { "epoch": 0.2936456400742115, "grad_norm": 12.566670417785645, "learning_rate": 8.107822464929118e-06, "loss": 0.3756, "step": 6331 }, { "epoch": 0.2936920222634508, "grad_norm": 11.484832763671875, "learning_rate": 8.107245920905675e-06, "loss": 0.4105, "step": 6332 }, { "epoch": 0.29373840445269017, "grad_norm": 10.8466215133667, "learning_rate": 8.106669309565389e-06, "loss": 0.5083, "step": 6333 }, { "epoch": 0.2937847866419295, "grad_norm": 7.895463466644287, "learning_rate": 8.10609263092075e-06, "loss": 0.4595, "step": 6334 }, { "epoch": 0.29383116883116883, "grad_norm": 8.131087303161621, "learning_rate": 8.105515884984252e-06, "loss": 0.4451, "step": 6335 }, { "epoch": 0.2938775510204082, "grad_norm": 5.824517250061035, "learning_rate": 8.10493907176839e-06, "loss": 0.2393, "step": 6336 }, { "epoch": 0.2939239332096475, "grad_norm": 22.769838333129883, "learning_rate": 8.104362191285661e-06, "loss": 0.329, "step": 6337 }, { "epoch": 0.29397031539888685, "grad_norm": 11.460925102233887, "learning_rate": 8.103785243548563e-06, "loss": 0.4261, "step": 6338 }, { "epoch": 0.29401669758812615, "grad_norm": 5.213460445404053, "learning_rate": 8.103208228569595e-06, "loss": 0.2987, "step": 6339 }, { "epoch": 0.2940630797773655, "grad_norm": 6.234729290008545, "learning_rate": 8.102631146361256e-06, "loss": 0.3704, "step": 6340 }, { "epoch": 0.2941094619666048, "grad_norm": 10.619220733642578, "learning_rate": 8.10205399693605e-06, "loss": 0.4184, "step": 6341 }, { "epoch": 0.29415584415584417, "grad_norm": 6.012730598449707, "learning_rate": 8.101476780306481e-06, "loss": 0.3508, "step": 6342 }, { "epoch": 0.2942022263450835, "grad_norm": 9.14661693572998, "learning_rate": 8.100899496485055e-06, "loss": 0.437, "step": 6343 }, { "epoch": 0.29424860853432283, "grad_norm": 4.736847400665283, "learning_rate": 8.100322145484275e-06, "loss": 0.2975, "step": 6344 }, { "epoch": 0.29429499072356213, "grad_norm": 11.50524616241455, "learning_rate": 8.099744727316651e-06, "loss": 0.4783, "step": 6345 }, { "epoch": 0.2943413729128015, "grad_norm": 6.7232184410095215, "learning_rate": 8.099167241994692e-06, "loss": 0.4242, "step": 6346 }, { "epoch": 0.2943877551020408, "grad_norm": 10.706683158874512, "learning_rate": 8.098589689530912e-06, "loss": 0.4555, "step": 6347 }, { "epoch": 0.29443413729128015, "grad_norm": 3.7700929641723633, "learning_rate": 8.098012069937817e-06, "loss": 0.3565, "step": 6348 }, { "epoch": 0.29448051948051945, "grad_norm": 9.593456268310547, "learning_rate": 8.097434383227927e-06, "loss": 0.2892, "step": 6349 }, { "epoch": 0.2945269016697588, "grad_norm": 8.115884780883789, "learning_rate": 8.096856629413754e-06, "loss": 0.3222, "step": 6350 }, { "epoch": 0.29457328385899817, "grad_norm": 6.727236747741699, "learning_rate": 8.096278808507816e-06, "loss": 0.423, "step": 6351 }, { "epoch": 0.29461966604823747, "grad_norm": 10.077895164489746, "learning_rate": 8.09570092052263e-06, "loss": 0.4669, "step": 6352 }, { "epoch": 0.29466604823747683, "grad_norm": 9.960319519042969, "learning_rate": 8.095122965470717e-06, "loss": 0.5489, "step": 6353 }, { "epoch": 0.29471243042671613, "grad_norm": 7.7487406730651855, "learning_rate": 8.094544943364597e-06, "loss": 0.3403, "step": 6354 }, { "epoch": 0.2947588126159555, "grad_norm": 8.728962898254395, "learning_rate": 8.093966854216792e-06, "loss": 0.3727, "step": 6355 }, { "epoch": 0.2948051948051948, "grad_norm": 4.3514275550842285, "learning_rate": 8.09338869803983e-06, "loss": 0.354, "step": 6356 }, { "epoch": 0.29485157699443415, "grad_norm": 8.762027740478516, "learning_rate": 8.09281047484623e-06, "loss": 0.4227, "step": 6357 }, { "epoch": 0.29489795918367345, "grad_norm": 7.2420196533203125, "learning_rate": 8.092232184648525e-06, "loss": 0.4422, "step": 6358 }, { "epoch": 0.2949443413729128, "grad_norm": 8.585442543029785, "learning_rate": 8.091653827459241e-06, "loss": 0.4245, "step": 6359 }, { "epoch": 0.2949907235621521, "grad_norm": 6.286200523376465, "learning_rate": 8.091075403290905e-06, "loss": 0.4093, "step": 6360 }, { "epoch": 0.29503710575139147, "grad_norm": 11.134454727172852, "learning_rate": 8.090496912156051e-06, "loss": 0.4418, "step": 6361 }, { "epoch": 0.2950834879406308, "grad_norm": 5.167034149169922, "learning_rate": 8.089918354067213e-06, "loss": 0.3402, "step": 6362 }, { "epoch": 0.29512987012987013, "grad_norm": 6.749930381774902, "learning_rate": 8.089339729036925e-06, "loss": 0.2833, "step": 6363 }, { "epoch": 0.2951762523191095, "grad_norm": 4.521007537841797, "learning_rate": 8.088761037077718e-06, "loss": 0.3107, "step": 6364 }, { "epoch": 0.2952226345083488, "grad_norm": 5.687578201293945, "learning_rate": 8.088182278202133e-06, "loss": 0.3182, "step": 6365 }, { "epoch": 0.29526901669758815, "grad_norm": 7.363772869110107, "learning_rate": 8.08760345242271e-06, "loss": 0.3762, "step": 6366 }, { "epoch": 0.29531539888682745, "grad_norm": 5.615804672241211, "learning_rate": 8.087024559751984e-06, "loss": 0.3891, "step": 6367 }, { "epoch": 0.2953617810760668, "grad_norm": 5.701439380645752, "learning_rate": 8.086445600202502e-06, "loss": 0.239, "step": 6368 }, { "epoch": 0.2954081632653061, "grad_norm": 6.402019500732422, "learning_rate": 8.0858665737868e-06, "loss": 0.2668, "step": 6369 }, { "epoch": 0.29545454545454547, "grad_norm": 5.802917003631592, "learning_rate": 8.08528748051743e-06, "loss": 0.1565, "step": 6370 }, { "epoch": 0.29550092764378477, "grad_norm": 8.277812004089355, "learning_rate": 8.084708320406934e-06, "loss": 0.5395, "step": 6371 }, { "epoch": 0.29554730983302413, "grad_norm": 7.146137237548828, "learning_rate": 8.084129093467857e-06, "loss": 0.2478, "step": 6372 }, { "epoch": 0.29559369202226343, "grad_norm": 5.342002868652344, "learning_rate": 8.083549799712749e-06, "loss": 0.3128, "step": 6373 }, { "epoch": 0.2956400742115028, "grad_norm": 7.8617753982543945, "learning_rate": 8.082970439154164e-06, "loss": 0.3518, "step": 6374 }, { "epoch": 0.2956864564007421, "grad_norm": 4.996439456939697, "learning_rate": 8.082391011804649e-06, "loss": 0.3895, "step": 6375 }, { "epoch": 0.29573283858998145, "grad_norm": 5.535582065582275, "learning_rate": 8.081811517676759e-06, "loss": 0.3758, "step": 6376 }, { "epoch": 0.29577922077922075, "grad_norm": 7.563549995422363, "learning_rate": 8.081231956783048e-06, "loss": 0.376, "step": 6377 }, { "epoch": 0.2958256029684601, "grad_norm": 8.012236595153809, "learning_rate": 8.080652329136072e-06, "loss": 0.3376, "step": 6378 }, { "epoch": 0.29587198515769947, "grad_norm": 18.31106948852539, "learning_rate": 8.080072634748389e-06, "loss": 0.4268, "step": 6379 }, { "epoch": 0.29591836734693877, "grad_norm": 15.318674087524414, "learning_rate": 8.079492873632554e-06, "loss": 0.3507, "step": 6380 }, { "epoch": 0.29596474953617813, "grad_norm": 4.390234470367432, "learning_rate": 8.078913045801132e-06, "loss": 0.2942, "step": 6381 }, { "epoch": 0.29601113172541743, "grad_norm": 9.020916938781738, "learning_rate": 8.078333151266683e-06, "loss": 0.3394, "step": 6382 }, { "epoch": 0.2960575139146568, "grad_norm": 20.040769577026367, "learning_rate": 8.077753190041768e-06, "loss": 0.2505, "step": 6383 }, { "epoch": 0.2961038961038961, "grad_norm": 14.565251350402832, "learning_rate": 8.077173162138956e-06, "loss": 0.3965, "step": 6384 }, { "epoch": 0.29615027829313545, "grad_norm": 10.226795196533203, "learning_rate": 8.07659306757081e-06, "loss": 0.3983, "step": 6385 }, { "epoch": 0.29619666048237475, "grad_norm": 5.607831954956055, "learning_rate": 8.076012906349897e-06, "loss": 0.3716, "step": 6386 }, { "epoch": 0.2962430426716141, "grad_norm": 7.035378456115723, "learning_rate": 8.075432678488786e-06, "loss": 0.203, "step": 6387 }, { "epoch": 0.2962894248608534, "grad_norm": 4.190101623535156, "learning_rate": 8.074852384000052e-06, "loss": 0.3479, "step": 6388 }, { "epoch": 0.29633580705009277, "grad_norm": 6.478888988494873, "learning_rate": 8.07427202289626e-06, "loss": 0.3494, "step": 6389 }, { "epoch": 0.2963821892393321, "grad_norm": 8.572684288024902, "learning_rate": 8.073691595189987e-06, "loss": 0.4551, "step": 6390 }, { "epoch": 0.29642857142857143, "grad_norm": 8.824674606323242, "learning_rate": 8.073111100893805e-06, "loss": 0.4191, "step": 6391 }, { "epoch": 0.2964749536178108, "grad_norm": 7.2202863693237305, "learning_rate": 8.072530540020294e-06, "loss": 0.3333, "step": 6392 }, { "epoch": 0.2965213358070501, "grad_norm": 6.676168441772461, "learning_rate": 8.071949912582029e-06, "loss": 0.321, "step": 6393 }, { "epoch": 0.29656771799628945, "grad_norm": 6.046130180358887, "learning_rate": 8.071369218591588e-06, "loss": 0.2078, "step": 6394 }, { "epoch": 0.29661410018552875, "grad_norm": 9.526464462280273, "learning_rate": 8.070788458061555e-06, "loss": 0.3767, "step": 6395 }, { "epoch": 0.2966604823747681, "grad_norm": 11.954962730407715, "learning_rate": 8.07020763100451e-06, "loss": 0.4476, "step": 6396 }, { "epoch": 0.2967068645640074, "grad_norm": 9.74572467803955, "learning_rate": 8.069626737433033e-06, "loss": 0.3886, "step": 6397 }, { "epoch": 0.29675324675324677, "grad_norm": 6.057463645935059, "learning_rate": 8.069045777359713e-06, "loss": 0.3637, "step": 6398 }, { "epoch": 0.29679962894248607, "grad_norm": 15.97484302520752, "learning_rate": 8.068464750797135e-06, "loss": 0.4956, "step": 6399 }, { "epoch": 0.29684601113172543, "grad_norm": 10.114471435546875, "learning_rate": 8.067883657757888e-06, "loss": 0.3814, "step": 6400 }, { "epoch": 0.29689239332096473, "grad_norm": 6.035767555236816, "learning_rate": 8.067302498254558e-06, "loss": 0.3759, "step": 6401 }, { "epoch": 0.2969387755102041, "grad_norm": 7.873994827270508, "learning_rate": 8.06672127229974e-06, "loss": 0.3703, "step": 6402 }, { "epoch": 0.2969851576994434, "grad_norm": 20.956871032714844, "learning_rate": 8.06613997990602e-06, "loss": 0.4625, "step": 6403 }, { "epoch": 0.29703153988868275, "grad_norm": 7.224864959716797, "learning_rate": 8.065558621085997e-06, "loss": 0.3279, "step": 6404 }, { "epoch": 0.29707792207792205, "grad_norm": 5.205594062805176, "learning_rate": 8.064977195852261e-06, "loss": 0.3832, "step": 6405 }, { "epoch": 0.2971243042671614, "grad_norm": 5.655943393707275, "learning_rate": 8.064395704217412e-06, "loss": 0.3657, "step": 6406 }, { "epoch": 0.29717068645640077, "grad_norm": 9.464224815368652, "learning_rate": 8.063814146194047e-06, "loss": 0.4106, "step": 6407 }, { "epoch": 0.29721706864564007, "grad_norm": 9.03645133972168, "learning_rate": 8.063232521794762e-06, "loss": 0.3984, "step": 6408 }, { "epoch": 0.29726345083487943, "grad_norm": 12.839919090270996, "learning_rate": 8.062650831032163e-06, "loss": 0.4649, "step": 6409 }, { "epoch": 0.29730983302411873, "grad_norm": 8.408035278320312, "learning_rate": 8.062069073918848e-06, "loss": 0.372, "step": 6410 }, { "epoch": 0.2973562152133581, "grad_norm": 3.9582343101501465, "learning_rate": 8.06148725046742e-06, "loss": 0.3629, "step": 6411 }, { "epoch": 0.2974025974025974, "grad_norm": 9.72581958770752, "learning_rate": 8.060905360690488e-06, "loss": 0.4691, "step": 6412 }, { "epoch": 0.29744897959183675, "grad_norm": 5.822299480438232, "learning_rate": 8.060323404600655e-06, "loss": 0.3565, "step": 6413 }, { "epoch": 0.29749536178107605, "grad_norm": 8.018736839294434, "learning_rate": 8.059741382210528e-06, "loss": 0.4832, "step": 6414 }, { "epoch": 0.2975417439703154, "grad_norm": 6.292395114898682, "learning_rate": 8.05915929353272e-06, "loss": 0.3365, "step": 6415 }, { "epoch": 0.2975881261595547, "grad_norm": 12.073541641235352, "learning_rate": 8.05857713857984e-06, "loss": 0.4529, "step": 6416 }, { "epoch": 0.29763450834879407, "grad_norm": 7.484419822692871, "learning_rate": 8.057994917364497e-06, "loss": 0.2894, "step": 6417 }, { "epoch": 0.29768089053803337, "grad_norm": 17.72821617126465, "learning_rate": 8.057412629899309e-06, "loss": 0.4951, "step": 6418 }, { "epoch": 0.29772727272727273, "grad_norm": 11.636051177978516, "learning_rate": 8.056830276196888e-06, "loss": 0.3453, "step": 6419 }, { "epoch": 0.29777365491651203, "grad_norm": 5.846595287322998, "learning_rate": 8.056247856269853e-06, "loss": 0.3531, "step": 6420 }, { "epoch": 0.2978200371057514, "grad_norm": 6.0204267501831055, "learning_rate": 8.055665370130817e-06, "loss": 0.3794, "step": 6421 }, { "epoch": 0.29786641929499075, "grad_norm": 4.41120719909668, "learning_rate": 8.055082817792404e-06, "loss": 0.2942, "step": 6422 }, { "epoch": 0.29791280148423005, "grad_norm": 7.6142964363098145, "learning_rate": 8.054500199267234e-06, "loss": 0.4041, "step": 6423 }, { "epoch": 0.2979591836734694, "grad_norm": 7.11388635635376, "learning_rate": 8.053917514567927e-06, "loss": 0.3421, "step": 6424 }, { "epoch": 0.2980055658627087, "grad_norm": 9.510093688964844, "learning_rate": 8.053334763707109e-06, "loss": 0.4449, "step": 6425 }, { "epoch": 0.29805194805194807, "grad_norm": 8.380231857299805, "learning_rate": 8.052751946697403e-06, "loss": 0.3173, "step": 6426 }, { "epoch": 0.29809833024118737, "grad_norm": 7.148608684539795, "learning_rate": 8.052169063551436e-06, "loss": 0.3759, "step": 6427 }, { "epoch": 0.29814471243042673, "grad_norm": 5.0408935546875, "learning_rate": 8.051586114281837e-06, "loss": 0.3436, "step": 6428 }, { "epoch": 0.29819109461966603, "grad_norm": 5.793569087982178, "learning_rate": 8.051003098901233e-06, "loss": 0.3304, "step": 6429 }, { "epoch": 0.2982374768089054, "grad_norm": 7.110062122344971, "learning_rate": 8.050420017422258e-06, "loss": 0.3465, "step": 6430 }, { "epoch": 0.2982838589981447, "grad_norm": 10.59958267211914, "learning_rate": 8.049836869857541e-06, "loss": 0.407, "step": 6431 }, { "epoch": 0.29833024118738405, "grad_norm": 9.475691795349121, "learning_rate": 8.049253656219718e-06, "loss": 0.2944, "step": 6432 }, { "epoch": 0.29837662337662335, "grad_norm": 6.453728675842285, "learning_rate": 8.048670376521421e-06, "loss": 0.2814, "step": 6433 }, { "epoch": 0.2984230055658627, "grad_norm": 6.1046013832092285, "learning_rate": 8.048087030775288e-06, "loss": 0.3707, "step": 6434 }, { "epoch": 0.29846938775510207, "grad_norm": 9.136823654174805, "learning_rate": 8.04750361899396e-06, "loss": 0.4245, "step": 6435 }, { "epoch": 0.29851576994434137, "grad_norm": 10.783888816833496, "learning_rate": 8.046920141190071e-06, "loss": 0.4841, "step": 6436 }, { "epoch": 0.2985621521335807, "grad_norm": 8.230928421020508, "learning_rate": 8.046336597376266e-06, "loss": 0.1757, "step": 6437 }, { "epoch": 0.29860853432282003, "grad_norm": 8.811681747436523, "learning_rate": 8.045752987565184e-06, "loss": 0.4181, "step": 6438 }, { "epoch": 0.2986549165120594, "grad_norm": 7.232283592224121, "learning_rate": 8.04516931176947e-06, "loss": 0.3703, "step": 6439 }, { "epoch": 0.2987012987012987, "grad_norm": 6.0166826248168945, "learning_rate": 8.04458557000177e-06, "loss": 0.3941, "step": 6440 }, { "epoch": 0.29874768089053805, "grad_norm": 6.461480140686035, "learning_rate": 8.04400176227473e-06, "loss": 0.248, "step": 6441 }, { "epoch": 0.29879406307977735, "grad_norm": 12.190902709960938, "learning_rate": 8.043417888600996e-06, "loss": 0.4224, "step": 6442 }, { "epoch": 0.2988404452690167, "grad_norm": 7.5886406898498535, "learning_rate": 8.042833948993217e-06, "loss": 0.4297, "step": 6443 }, { "epoch": 0.298886827458256, "grad_norm": 5.844557285308838, "learning_rate": 8.042249943464048e-06, "loss": 0.3511, "step": 6444 }, { "epoch": 0.29893320964749537, "grad_norm": 4.949405193328857, "learning_rate": 8.04166587202614e-06, "loss": 0.2809, "step": 6445 }, { "epoch": 0.29897959183673467, "grad_norm": 8.267572402954102, "learning_rate": 8.041081734692143e-06, "loss": 0.3183, "step": 6446 }, { "epoch": 0.29902597402597403, "grad_norm": 9.569002151489258, "learning_rate": 8.040497531474713e-06, "loss": 0.4158, "step": 6447 }, { "epoch": 0.29907235621521333, "grad_norm": 6.23406982421875, "learning_rate": 8.03991326238651e-06, "loss": 0.3911, "step": 6448 }, { "epoch": 0.2991187384044527, "grad_norm": 7.076389312744141, "learning_rate": 8.039328927440188e-06, "loss": 0.4069, "step": 6449 }, { "epoch": 0.29916512059369205, "grad_norm": 7.392673015594482, "learning_rate": 8.03874452664841e-06, "loss": 0.3892, "step": 6450 }, { "epoch": 0.29921150278293135, "grad_norm": 6.436065673828125, "learning_rate": 8.038160060023834e-06, "loss": 0.3668, "step": 6451 }, { "epoch": 0.2992578849721707, "grad_norm": 4.751985549926758, "learning_rate": 8.03757552757912e-06, "loss": 0.3945, "step": 6452 }, { "epoch": 0.29930426716141, "grad_norm": 5.211161136627197, "learning_rate": 8.036990929326939e-06, "loss": 0.2988, "step": 6453 }, { "epoch": 0.29935064935064937, "grad_norm": 5.220190048217773, "learning_rate": 8.036406265279948e-06, "loss": 0.2611, "step": 6454 }, { "epoch": 0.29939703153988867, "grad_norm": 4.344400405883789, "learning_rate": 8.035821535450816e-06, "loss": 0.2373, "step": 6455 }, { "epoch": 0.299443413729128, "grad_norm": 6.794876575469971, "learning_rate": 8.035236739852214e-06, "loss": 0.2791, "step": 6456 }, { "epoch": 0.29948979591836733, "grad_norm": 6.399502277374268, "learning_rate": 8.034651878496808e-06, "loss": 0.4109, "step": 6457 }, { "epoch": 0.2995361781076067, "grad_norm": 7.455174922943115, "learning_rate": 8.03406695139727e-06, "loss": 0.3261, "step": 6458 }, { "epoch": 0.299582560296846, "grad_norm": 5.1313982009887695, "learning_rate": 8.033481958566271e-06, "loss": 0.28, "step": 6459 }, { "epoch": 0.29962894248608535, "grad_norm": 12.422232627868652, "learning_rate": 8.032896900016486e-06, "loss": 0.4801, "step": 6460 }, { "epoch": 0.29967532467532465, "grad_norm": 4.6689558029174805, "learning_rate": 8.032311775760591e-06, "loss": 0.383, "step": 6461 }, { "epoch": 0.299721706864564, "grad_norm": 4.568682670593262, "learning_rate": 8.031726585811257e-06, "loss": 0.2994, "step": 6462 }, { "epoch": 0.29976808905380337, "grad_norm": 7.66491174697876, "learning_rate": 8.031141330181168e-06, "loss": 0.2697, "step": 6463 }, { "epoch": 0.29981447124304267, "grad_norm": 4.682694435119629, "learning_rate": 8.030556008883e-06, "loss": 0.3702, "step": 6464 }, { "epoch": 0.299860853432282, "grad_norm": 13.993927955627441, "learning_rate": 8.029970621929434e-06, "loss": 0.4189, "step": 6465 }, { "epoch": 0.29990723562152133, "grad_norm": 7.9165191650390625, "learning_rate": 8.029385169333155e-06, "loss": 0.2682, "step": 6466 }, { "epoch": 0.2999536178107607, "grad_norm": 7.552942752838135, "learning_rate": 8.028799651106843e-06, "loss": 0.4264, "step": 6467 }, { "epoch": 0.3, "grad_norm": 24.749032974243164, "learning_rate": 8.028214067263183e-06, "loss": 0.6602, "step": 6468 }, { "epoch": 0.3, "eval_loss": 0.3521163761615753, "eval_runtime": 38.0051, "eval_samples_per_second": 45.862, "eval_steps_per_second": 5.736, "step": 6468 }, { "epoch": 0.30004638218923935, "grad_norm": 5.660698890686035, "learning_rate": 8.027628417814864e-06, "loss": 0.3436, "step": 6469 }, { "epoch": 0.30009276437847865, "grad_norm": 6.006377220153809, "learning_rate": 8.027042702774572e-06, "loss": 0.3649, "step": 6470 }, { "epoch": 0.300139146567718, "grad_norm": 7.104304790496826, "learning_rate": 8.026456922154995e-06, "loss": 0.3809, "step": 6471 }, { "epoch": 0.3001855287569573, "grad_norm": 9.955513000488281, "learning_rate": 8.025871075968828e-06, "loss": 0.3676, "step": 6472 }, { "epoch": 0.30023191094619667, "grad_norm": 5.976938247680664, "learning_rate": 8.025285164228757e-06, "loss": 0.3181, "step": 6473 }, { "epoch": 0.30027829313543597, "grad_norm": 7.513853549957275, "learning_rate": 8.02469918694748e-06, "loss": 0.3299, "step": 6474 }, { "epoch": 0.3003246753246753, "grad_norm": 11.685467720031738, "learning_rate": 8.02411314413769e-06, "loss": 0.4458, "step": 6475 }, { "epoch": 0.30037105751391463, "grad_norm": 18.165843963623047, "learning_rate": 8.023527035812085e-06, "loss": 0.3171, "step": 6476 }, { "epoch": 0.300417439703154, "grad_norm": 10.590509414672852, "learning_rate": 8.02294086198336e-06, "loss": 0.5322, "step": 6477 }, { "epoch": 0.30046382189239335, "grad_norm": 4.502852916717529, "learning_rate": 8.022354622664215e-06, "loss": 0.1793, "step": 6478 }, { "epoch": 0.30051020408163265, "grad_norm": 7.534884929656982, "learning_rate": 8.021768317867353e-06, "loss": 0.307, "step": 6479 }, { "epoch": 0.300556586270872, "grad_norm": 18.222007751464844, "learning_rate": 8.021181947605474e-06, "loss": 0.4018, "step": 6480 }, { "epoch": 0.3006029684601113, "grad_norm": 7.6165852546691895, "learning_rate": 8.02059551189128e-06, "loss": 0.3901, "step": 6481 }, { "epoch": 0.30064935064935067, "grad_norm": 4.261832237243652, "learning_rate": 8.02000901073748e-06, "loss": 0.3321, "step": 6482 }, { "epoch": 0.30069573283858997, "grad_norm": 9.455543518066406, "learning_rate": 8.019422444156776e-06, "loss": 0.3936, "step": 6483 }, { "epoch": 0.3007421150278293, "grad_norm": 6.150516986846924, "learning_rate": 8.018835812161877e-06, "loss": 0.3587, "step": 6484 }, { "epoch": 0.30078849721706863, "grad_norm": 4.804036617279053, "learning_rate": 8.018249114765493e-06, "loss": 0.3338, "step": 6485 }, { "epoch": 0.300834879406308, "grad_norm": 3.7908926010131836, "learning_rate": 8.017662351980334e-06, "loss": 0.3398, "step": 6486 }, { "epoch": 0.3008812615955473, "grad_norm": 5.919721603393555, "learning_rate": 8.017075523819111e-06, "loss": 0.3894, "step": 6487 }, { "epoch": 0.30092764378478665, "grad_norm": 7.089515209197998, "learning_rate": 8.016488630294539e-06, "loss": 0.3797, "step": 6488 }, { "epoch": 0.30097402597402595, "grad_norm": 7.0916595458984375, "learning_rate": 8.015901671419333e-06, "loss": 0.3568, "step": 6489 }, { "epoch": 0.3010204081632653, "grad_norm": 9.977424621582031, "learning_rate": 8.015314647206206e-06, "loss": 0.4943, "step": 6490 }, { "epoch": 0.30106679035250467, "grad_norm": 10.909955024719238, "learning_rate": 8.014727557667878e-06, "loss": 0.3261, "step": 6491 }, { "epoch": 0.30111317254174397, "grad_norm": 4.689964771270752, "learning_rate": 8.014140402817068e-06, "loss": 0.321, "step": 6492 }, { "epoch": 0.3011595547309833, "grad_norm": 8.856243133544922, "learning_rate": 8.013553182666496e-06, "loss": 0.4133, "step": 6493 }, { "epoch": 0.30120593692022263, "grad_norm": 5.043924808502197, "learning_rate": 8.012965897228884e-06, "loss": 0.2669, "step": 6494 }, { "epoch": 0.301252319109462, "grad_norm": 4.773975849151611, "learning_rate": 8.012378546516955e-06, "loss": 0.2232, "step": 6495 }, { "epoch": 0.3012987012987013, "grad_norm": 7.45326566696167, "learning_rate": 8.011791130543433e-06, "loss": 0.3733, "step": 6496 }, { "epoch": 0.30134508348794065, "grad_norm": 5.438383102416992, "learning_rate": 8.011203649321045e-06, "loss": 0.3603, "step": 6497 }, { "epoch": 0.30139146567717995, "grad_norm": 7.343680381774902, "learning_rate": 8.010616102862519e-06, "loss": 0.3275, "step": 6498 }, { "epoch": 0.3014378478664193, "grad_norm": 8.08145809173584, "learning_rate": 8.010028491180582e-06, "loss": 0.3259, "step": 6499 }, { "epoch": 0.3014842300556586, "grad_norm": 14.426738739013672, "learning_rate": 8.009440814287966e-06, "loss": 0.6156, "step": 6500 }, { "epoch": 0.30153061224489797, "grad_norm": 10.028082847595215, "learning_rate": 8.0088530721974e-06, "loss": 0.3994, "step": 6501 }, { "epoch": 0.30157699443413727, "grad_norm": 8.165569305419922, "learning_rate": 8.008265264921621e-06, "loss": 0.321, "step": 6502 }, { "epoch": 0.3016233766233766, "grad_norm": 6.880414009094238, "learning_rate": 8.007677392473362e-06, "loss": 0.4077, "step": 6503 }, { "epoch": 0.30166975881261593, "grad_norm": 8.967668533325195, "learning_rate": 8.007089454865358e-06, "loss": 0.3696, "step": 6504 }, { "epoch": 0.3017161410018553, "grad_norm": 6.147525310516357, "learning_rate": 8.006501452110347e-06, "loss": 0.317, "step": 6505 }, { "epoch": 0.30176252319109464, "grad_norm": 7.888523101806641, "learning_rate": 8.00591338422107e-06, "loss": 0.2849, "step": 6506 }, { "epoch": 0.30180890538033395, "grad_norm": 6.410292148590088, "learning_rate": 8.005325251210261e-06, "loss": 0.3459, "step": 6507 }, { "epoch": 0.3018552875695733, "grad_norm": 10.319759368896484, "learning_rate": 8.00473705309067e-06, "loss": 0.4084, "step": 6508 }, { "epoch": 0.3019016697588126, "grad_norm": 7.451125144958496, "learning_rate": 8.004148789875032e-06, "loss": 0.3699, "step": 6509 }, { "epoch": 0.30194805194805197, "grad_norm": 4.6738810539245605, "learning_rate": 8.003560461576095e-06, "loss": 0.262, "step": 6510 }, { "epoch": 0.30199443413729127, "grad_norm": 5.374911308288574, "learning_rate": 8.002972068206608e-06, "loss": 0.331, "step": 6511 }, { "epoch": 0.3020408163265306, "grad_norm": 7.9221296310424805, "learning_rate": 8.002383609779312e-06, "loss": 0.3247, "step": 6512 }, { "epoch": 0.30208719851576993, "grad_norm": 7.675661563873291, "learning_rate": 8.00179508630696e-06, "loss": 0.3349, "step": 6513 }, { "epoch": 0.3021335807050093, "grad_norm": 9.812383651733398, "learning_rate": 8.0012064978023e-06, "loss": 0.4403, "step": 6514 }, { "epoch": 0.3021799628942486, "grad_norm": 7.295226573944092, "learning_rate": 8.000617844278085e-06, "loss": 0.3657, "step": 6515 }, { "epoch": 0.30222634508348795, "grad_norm": 7.8614501953125, "learning_rate": 8.000029125747066e-06, "loss": 0.424, "step": 6516 }, { "epoch": 0.30227272727272725, "grad_norm": 12.295454978942871, "learning_rate": 7.999440342222e-06, "loss": 0.3586, "step": 6517 }, { "epoch": 0.3023191094619666, "grad_norm": 8.6423978805542, "learning_rate": 7.99885149371564e-06, "loss": 0.452, "step": 6518 }, { "epoch": 0.30236549165120596, "grad_norm": 10.17379093170166, "learning_rate": 7.998262580240744e-06, "loss": 0.4109, "step": 6519 }, { "epoch": 0.30241187384044527, "grad_norm": 7.722638130187988, "learning_rate": 7.997673601810071e-06, "loss": 0.3106, "step": 6520 }, { "epoch": 0.3024582560296846, "grad_norm": 9.92212200164795, "learning_rate": 7.99708455843638e-06, "loss": 0.4505, "step": 6521 }, { "epoch": 0.3025046382189239, "grad_norm": 9.2138090133667, "learning_rate": 7.996495450132433e-06, "loss": 0.4416, "step": 6522 }, { "epoch": 0.3025510204081633, "grad_norm": 5.079173564910889, "learning_rate": 7.995906276910994e-06, "loss": 0.2341, "step": 6523 }, { "epoch": 0.3025974025974026, "grad_norm": 7.724086284637451, "learning_rate": 7.995317038784825e-06, "loss": 0.4133, "step": 6524 }, { "epoch": 0.30264378478664195, "grad_norm": 4.758742809295654, "learning_rate": 7.994727735766691e-06, "loss": 0.2505, "step": 6525 }, { "epoch": 0.30269016697588125, "grad_norm": 9.36972427368164, "learning_rate": 7.99413836786936e-06, "loss": 0.3707, "step": 6526 }, { "epoch": 0.3027365491651206, "grad_norm": 6.078463077545166, "learning_rate": 7.993548935105602e-06, "loss": 0.2949, "step": 6527 }, { "epoch": 0.3027829313543599, "grad_norm": 4.744472503662109, "learning_rate": 7.992959437488185e-06, "loss": 0.1736, "step": 6528 }, { "epoch": 0.30282931354359927, "grad_norm": 9.307662010192871, "learning_rate": 7.99236987502988e-06, "loss": 0.3894, "step": 6529 }, { "epoch": 0.30287569573283857, "grad_norm": 5.219143390655518, "learning_rate": 7.99178024774346e-06, "loss": 0.245, "step": 6530 }, { "epoch": 0.3029220779220779, "grad_norm": 9.631994247436523, "learning_rate": 7.991190555641699e-06, "loss": 0.3859, "step": 6531 }, { "epoch": 0.30296846011131723, "grad_norm": 7.257126331329346, "learning_rate": 7.990600798737373e-06, "loss": 0.4175, "step": 6532 }, { "epoch": 0.3030148423005566, "grad_norm": 3.8156628608703613, "learning_rate": 7.990010977043257e-06, "loss": 0.2602, "step": 6533 }, { "epoch": 0.30306122448979594, "grad_norm": 13.373754501342773, "learning_rate": 7.98942109057213e-06, "loss": 0.3882, "step": 6534 }, { "epoch": 0.30310760667903525, "grad_norm": 6.531197547912598, "learning_rate": 7.988831139336773e-06, "loss": 0.3153, "step": 6535 }, { "epoch": 0.3031539888682746, "grad_norm": 13.120589256286621, "learning_rate": 7.988241123349965e-06, "loss": 0.5177, "step": 6536 }, { "epoch": 0.3032003710575139, "grad_norm": 3.558488130569458, "learning_rate": 7.98765104262449e-06, "loss": 0.3, "step": 6537 }, { "epoch": 0.30324675324675326, "grad_norm": 6.611358165740967, "learning_rate": 7.98706089717313e-06, "loss": 0.3583, "step": 6538 }, { "epoch": 0.30329313543599257, "grad_norm": 11.555540084838867, "learning_rate": 7.98647068700867e-06, "loss": 0.404, "step": 6539 }, { "epoch": 0.3033395176252319, "grad_norm": 6.386565685272217, "learning_rate": 7.9858804121439e-06, "loss": 0.3378, "step": 6540 }, { "epoch": 0.3033858998144712, "grad_norm": 9.673038482666016, "learning_rate": 7.985290072591605e-06, "loss": 0.313, "step": 6541 }, { "epoch": 0.3034322820037106, "grad_norm": 10.362113952636719, "learning_rate": 7.984699668364575e-06, "loss": 0.445, "step": 6542 }, { "epoch": 0.3034786641929499, "grad_norm": 5.7888336181640625, "learning_rate": 7.984109199475602e-06, "loss": 0.3882, "step": 6543 }, { "epoch": 0.30352504638218925, "grad_norm": 10.350811004638672, "learning_rate": 7.983518665937476e-06, "loss": 0.3012, "step": 6544 }, { "epoch": 0.30357142857142855, "grad_norm": 6.45941162109375, "learning_rate": 7.982928067762991e-06, "loss": 0.3684, "step": 6545 }, { "epoch": 0.3036178107606679, "grad_norm": 8.743221282958984, "learning_rate": 7.982337404964945e-06, "loss": 0.3546, "step": 6546 }, { "epoch": 0.3036641929499072, "grad_norm": 11.15596866607666, "learning_rate": 7.98174667755613e-06, "loss": 0.4955, "step": 6547 }, { "epoch": 0.30371057513914657, "grad_norm": 8.763431549072266, "learning_rate": 7.981155885549347e-06, "loss": 0.4742, "step": 6548 }, { "epoch": 0.3037569573283859, "grad_norm": 7.3060784339904785, "learning_rate": 7.980565028957394e-06, "loss": 0.4377, "step": 6549 }, { "epoch": 0.3038033395176252, "grad_norm": 7.271577835083008, "learning_rate": 7.979974107793073e-06, "loss": 0.3918, "step": 6550 }, { "epoch": 0.3038497217068646, "grad_norm": 9.467734336853027, "learning_rate": 7.979383122069181e-06, "loss": 0.3872, "step": 6551 }, { "epoch": 0.3038961038961039, "grad_norm": 4.966428279876709, "learning_rate": 7.97879207179853e-06, "loss": 0.4357, "step": 6552 }, { "epoch": 0.30394248608534324, "grad_norm": 7.647375106811523, "learning_rate": 7.978200956993917e-06, "loss": 0.2856, "step": 6553 }, { "epoch": 0.30398886827458255, "grad_norm": 8.893467903137207, "learning_rate": 7.977609777668151e-06, "loss": 0.3708, "step": 6554 }, { "epoch": 0.3040352504638219, "grad_norm": 4.780735492706299, "learning_rate": 7.97701853383404e-06, "loss": 0.3316, "step": 6555 }, { "epoch": 0.3040816326530612, "grad_norm": 7.042377948760986, "learning_rate": 7.976427225504393e-06, "loss": 0.3556, "step": 6556 }, { "epoch": 0.30412801484230056, "grad_norm": 13.016145706176758, "learning_rate": 7.975835852692022e-06, "loss": 0.3614, "step": 6557 }, { "epoch": 0.30417439703153987, "grad_norm": 4.976535320281982, "learning_rate": 7.975244415409733e-06, "loss": 0.3593, "step": 6558 }, { "epoch": 0.3042207792207792, "grad_norm": 6.302614212036133, "learning_rate": 7.974652913670346e-06, "loss": 0.4212, "step": 6559 }, { "epoch": 0.3042671614100185, "grad_norm": 10.15433406829834, "learning_rate": 7.97406134748667e-06, "loss": 0.3931, "step": 6560 }, { "epoch": 0.3043135435992579, "grad_norm": 6.354125499725342, "learning_rate": 7.973469716871526e-06, "loss": 0.2936, "step": 6561 }, { "epoch": 0.30435992578849724, "grad_norm": 9.81270694732666, "learning_rate": 7.972878021837728e-06, "loss": 0.335, "step": 6562 }, { "epoch": 0.30440630797773655, "grad_norm": 9.901357650756836, "learning_rate": 7.972286262398096e-06, "loss": 0.5375, "step": 6563 }, { "epoch": 0.3044526901669759, "grad_norm": 5.443024158477783, "learning_rate": 7.97169443856545e-06, "loss": 0.3733, "step": 6564 }, { "epoch": 0.3044990723562152, "grad_norm": 9.285303115844727, "learning_rate": 7.97110255035261e-06, "loss": 0.3916, "step": 6565 }, { "epoch": 0.30454545454545456, "grad_norm": 8.205103874206543, "learning_rate": 7.970510597772401e-06, "loss": 0.3317, "step": 6566 }, { "epoch": 0.30459183673469387, "grad_norm": 12.124882698059082, "learning_rate": 7.969918580837648e-06, "loss": 0.4556, "step": 6567 }, { "epoch": 0.3046382189239332, "grad_norm": 8.226354598999023, "learning_rate": 7.969326499561173e-06, "loss": 0.3442, "step": 6568 }, { "epoch": 0.3046846011131725, "grad_norm": 10.276263236999512, "learning_rate": 7.968734353955807e-06, "loss": 0.3948, "step": 6569 }, { "epoch": 0.3047309833024119, "grad_norm": 10.610200881958008, "learning_rate": 7.968142144034377e-06, "loss": 0.5162, "step": 6570 }, { "epoch": 0.3047773654916512, "grad_norm": 7.041722774505615, "learning_rate": 7.967549869809714e-06, "loss": 0.3184, "step": 6571 }, { "epoch": 0.30482374768089054, "grad_norm": 14.665306091308594, "learning_rate": 7.966957531294648e-06, "loss": 0.4804, "step": 6572 }, { "epoch": 0.30487012987012985, "grad_norm": 14.69828987121582, "learning_rate": 7.96636512850201e-06, "loss": 0.4847, "step": 6573 }, { "epoch": 0.3049165120593692, "grad_norm": 4.413754463195801, "learning_rate": 7.965772661444638e-06, "loss": 0.3078, "step": 6574 }, { "epoch": 0.3049628942486085, "grad_norm": 3.701728105545044, "learning_rate": 7.965180130135364e-06, "loss": 0.2582, "step": 6575 }, { "epoch": 0.30500927643784786, "grad_norm": 21.942106246948242, "learning_rate": 7.964587534587028e-06, "loss": 0.5619, "step": 6576 }, { "epoch": 0.3050556586270872, "grad_norm": 6.7743144035339355, "learning_rate": 7.963994874812466e-06, "loss": 0.3971, "step": 6577 }, { "epoch": 0.3051020408163265, "grad_norm": 5.596957683563232, "learning_rate": 7.963402150824517e-06, "loss": 0.32, "step": 6578 }, { "epoch": 0.3051484230055659, "grad_norm": 6.2859063148498535, "learning_rate": 7.962809362636025e-06, "loss": 0.4209, "step": 6579 }, { "epoch": 0.3051948051948052, "grad_norm": 6.868612289428711, "learning_rate": 7.962216510259832e-06, "loss": 0.2618, "step": 6580 }, { "epoch": 0.30524118738404454, "grad_norm": 7.136142730712891, "learning_rate": 7.961623593708779e-06, "loss": 0.3136, "step": 6581 }, { "epoch": 0.30528756957328385, "grad_norm": 7.245800018310547, "learning_rate": 7.961030612995712e-06, "loss": 0.4445, "step": 6582 }, { "epoch": 0.3053339517625232, "grad_norm": 7.258234977722168, "learning_rate": 7.96043756813348e-06, "loss": 0.3493, "step": 6583 }, { "epoch": 0.3053803339517625, "grad_norm": 8.945839881896973, "learning_rate": 7.95984445913493e-06, "loss": 0.387, "step": 6584 }, { "epoch": 0.30542671614100186, "grad_norm": 6.277941703796387, "learning_rate": 7.959251286012912e-06, "loss": 0.2523, "step": 6585 }, { "epoch": 0.30547309833024117, "grad_norm": 9.725934028625488, "learning_rate": 7.958658048780272e-06, "loss": 0.3662, "step": 6586 }, { "epoch": 0.3055194805194805, "grad_norm": 9.97158432006836, "learning_rate": 7.95806474744987e-06, "loss": 0.3637, "step": 6587 }, { "epoch": 0.3055658627087198, "grad_norm": 12.96428394317627, "learning_rate": 7.957471382034553e-06, "loss": 0.4501, "step": 6588 }, { "epoch": 0.3056122448979592, "grad_norm": 7.902505397796631, "learning_rate": 7.956877952547178e-06, "loss": 0.3228, "step": 6589 }, { "epoch": 0.30565862708719854, "grad_norm": 9.262397766113281, "learning_rate": 7.956284459000603e-06, "loss": 0.3644, "step": 6590 }, { "epoch": 0.30570500927643784, "grad_norm": 5.128401756286621, "learning_rate": 7.955690901407684e-06, "loss": 0.3223, "step": 6591 }, { "epoch": 0.3057513914656772, "grad_norm": 6.6401047706604, "learning_rate": 7.95509727978128e-06, "loss": 0.2424, "step": 6592 }, { "epoch": 0.3057977736549165, "grad_norm": 4.786444187164307, "learning_rate": 7.954503594134254e-06, "loss": 0.3485, "step": 6593 }, { "epoch": 0.30584415584415586, "grad_norm": 9.142252922058105, "learning_rate": 7.953909844479465e-06, "loss": 0.3968, "step": 6594 }, { "epoch": 0.30589053803339517, "grad_norm": 7.3316569328308105, "learning_rate": 7.953316030829778e-06, "loss": 0.4339, "step": 6595 }, { "epoch": 0.3059369202226345, "grad_norm": 4.326993465423584, "learning_rate": 7.952722153198054e-06, "loss": 0.3189, "step": 6596 }, { "epoch": 0.3059833024118738, "grad_norm": 11.952400207519531, "learning_rate": 7.952128211597165e-06, "loss": 0.4268, "step": 6597 }, { "epoch": 0.3060296846011132, "grad_norm": 8.69700813293457, "learning_rate": 7.951534206039975e-06, "loss": 0.3979, "step": 6598 }, { "epoch": 0.3060760667903525, "grad_norm": 6.317358493804932, "learning_rate": 7.950940136539351e-06, "loss": 0.3146, "step": 6599 }, { "epoch": 0.30612244897959184, "grad_norm": 8.51870346069336, "learning_rate": 7.950346003108167e-06, "loss": 0.3696, "step": 6600 }, { "epoch": 0.30616883116883115, "grad_norm": 9.501206398010254, "learning_rate": 7.949751805759294e-06, "loss": 0.2739, "step": 6601 }, { "epoch": 0.3062152133580705, "grad_norm": 7.28025484085083, "learning_rate": 7.949157544505602e-06, "loss": 0.4076, "step": 6602 }, { "epoch": 0.3062615955473098, "grad_norm": 11.700311660766602, "learning_rate": 7.948563219359969e-06, "loss": 0.4493, "step": 6603 }, { "epoch": 0.30630797773654916, "grad_norm": 5.846220016479492, "learning_rate": 7.947968830335268e-06, "loss": 0.3095, "step": 6604 }, { "epoch": 0.3063543599257885, "grad_norm": 5.1094207763671875, "learning_rate": 7.947374377444376e-06, "loss": 0.3162, "step": 6605 }, { "epoch": 0.3064007421150278, "grad_norm": 7.668980121612549, "learning_rate": 7.946779860700175e-06, "loss": 0.4022, "step": 6606 }, { "epoch": 0.3064471243042672, "grad_norm": 5.651705265045166, "learning_rate": 7.946185280115541e-06, "loss": 0.3654, "step": 6607 }, { "epoch": 0.3064935064935065, "grad_norm": 9.539050102233887, "learning_rate": 7.945590635703358e-06, "loss": 0.365, "step": 6608 }, { "epoch": 0.30653988868274584, "grad_norm": 6.1742329597473145, "learning_rate": 7.944995927476507e-06, "loss": 0.4377, "step": 6609 }, { "epoch": 0.30658627087198514, "grad_norm": 7.846907138824463, "learning_rate": 7.944401155447872e-06, "loss": 0.436, "step": 6610 }, { "epoch": 0.3066326530612245, "grad_norm": 5.428852558135986, "learning_rate": 7.943806319630339e-06, "loss": 0.3356, "step": 6611 }, { "epoch": 0.3066790352504638, "grad_norm": 8.101374626159668, "learning_rate": 7.943211420036797e-06, "loss": 0.4221, "step": 6612 }, { "epoch": 0.30672541743970316, "grad_norm": 4.661862373352051, "learning_rate": 7.94261645668013e-06, "loss": 0.2907, "step": 6613 }, { "epoch": 0.30677179962894247, "grad_norm": 5.990302562713623, "learning_rate": 7.94202142957323e-06, "loss": 0.3673, "step": 6614 }, { "epoch": 0.3068181818181818, "grad_norm": 6.940511703491211, "learning_rate": 7.941426338728987e-06, "loss": 0.3087, "step": 6615 }, { "epoch": 0.3068645640074211, "grad_norm": 5.87014627456665, "learning_rate": 7.940831184160294e-06, "loss": 0.3726, "step": 6616 }, { "epoch": 0.3069109461966605, "grad_norm": 4.69936990737915, "learning_rate": 7.940235965880045e-06, "loss": 0.3402, "step": 6617 }, { "epoch": 0.30695732838589984, "grad_norm": 7.2268877029418945, "learning_rate": 7.939640683901133e-06, "loss": 0.3448, "step": 6618 }, { "epoch": 0.30700371057513914, "grad_norm": 8.935818672180176, "learning_rate": 7.939045338236459e-06, "loss": 0.3592, "step": 6619 }, { "epoch": 0.3070500927643785, "grad_norm": 4.597105979919434, "learning_rate": 7.938449928898916e-06, "loss": 0.4036, "step": 6620 }, { "epoch": 0.3070964749536178, "grad_norm": 6.532268524169922, "learning_rate": 7.937854455901404e-06, "loss": 0.3326, "step": 6621 }, { "epoch": 0.30714285714285716, "grad_norm": 4.631979942321777, "learning_rate": 7.937258919256827e-06, "loss": 0.3099, "step": 6622 }, { "epoch": 0.30718923933209646, "grad_norm": 16.08728790283203, "learning_rate": 7.936663318978083e-06, "loss": 0.4887, "step": 6623 }, { "epoch": 0.3072356215213358, "grad_norm": 11.092802047729492, "learning_rate": 7.93606765507808e-06, "loss": 0.4656, "step": 6624 }, { "epoch": 0.3072820037105751, "grad_norm": 4.650210857391357, "learning_rate": 7.935471927569719e-06, "loss": 0.3993, "step": 6625 }, { "epoch": 0.3073283858998145, "grad_norm": 8.18890380859375, "learning_rate": 7.934876136465904e-06, "loss": 0.3599, "step": 6626 }, { "epoch": 0.3073747680890538, "grad_norm": 11.288844108581543, "learning_rate": 7.934280281779547e-06, "loss": 0.4098, "step": 6627 }, { "epoch": 0.30742115027829314, "grad_norm": 13.694306373596191, "learning_rate": 7.933684363523557e-06, "loss": 0.5219, "step": 6628 }, { "epoch": 0.30746753246753245, "grad_norm": 5.323026657104492, "learning_rate": 7.93308838171084e-06, "loss": 0.3645, "step": 6629 }, { "epoch": 0.3075139146567718, "grad_norm": 9.794517517089844, "learning_rate": 7.932492336354312e-06, "loss": 0.5611, "step": 6630 }, { "epoch": 0.3075602968460111, "grad_norm": 8.093450546264648, "learning_rate": 7.931896227466885e-06, "loss": 0.3508, "step": 6631 }, { "epoch": 0.30760667903525046, "grad_norm": 4.760022163391113, "learning_rate": 7.93130005506147e-06, "loss": 0.3012, "step": 6632 }, { "epoch": 0.3076530612244898, "grad_norm": 8.509040832519531, "learning_rate": 7.930703819150988e-06, "loss": 0.3117, "step": 6633 }, { "epoch": 0.3076994434137291, "grad_norm": 4.554834365844727, "learning_rate": 7.930107519748354e-06, "loss": 0.3115, "step": 6634 }, { "epoch": 0.3077458256029685, "grad_norm": 6.0497565269470215, "learning_rate": 7.929511156866483e-06, "loss": 0.2997, "step": 6635 }, { "epoch": 0.3077922077922078, "grad_norm": 4.7233076095581055, "learning_rate": 7.9289147305183e-06, "loss": 0.2675, "step": 6636 }, { "epoch": 0.30783858998144714, "grad_norm": 8.994569778442383, "learning_rate": 7.928318240716724e-06, "loss": 0.4129, "step": 6637 }, { "epoch": 0.30788497217068644, "grad_norm": 9.292582511901855, "learning_rate": 7.927721687474677e-06, "loss": 0.4897, "step": 6638 }, { "epoch": 0.3079313543599258, "grad_norm": 13.977930068969727, "learning_rate": 7.927125070805086e-06, "loss": 0.4501, "step": 6639 }, { "epoch": 0.3079777365491651, "grad_norm": 27.988807678222656, "learning_rate": 7.926528390720872e-06, "loss": 0.5186, "step": 6640 }, { "epoch": 0.30802411873840446, "grad_norm": 12.375200271606445, "learning_rate": 7.925931647234963e-06, "loss": 0.6084, "step": 6641 }, { "epoch": 0.30807050092764376, "grad_norm": 10.291000366210938, "learning_rate": 7.925334840360291e-06, "loss": 0.4748, "step": 6642 }, { "epoch": 0.3081168831168831, "grad_norm": 5.122748374938965, "learning_rate": 7.924737970109782e-06, "loss": 0.2558, "step": 6643 }, { "epoch": 0.3081632653061224, "grad_norm": 5.914286136627197, "learning_rate": 7.924141036496366e-06, "loss": 0.2838, "step": 6644 }, { "epoch": 0.3082096474953618, "grad_norm": 5.712826728820801, "learning_rate": 7.923544039532977e-06, "loss": 0.1843, "step": 6645 }, { "epoch": 0.30825602968460114, "grad_norm": 7.126655578613281, "learning_rate": 7.92294697923255e-06, "loss": 0.315, "step": 6646 }, { "epoch": 0.30830241187384044, "grad_norm": 24.342254638671875, "learning_rate": 7.922349855608019e-06, "loss": 0.329, "step": 6647 }, { "epoch": 0.3083487940630798, "grad_norm": 8.0646333694458, "learning_rate": 7.921752668672316e-06, "loss": 0.4525, "step": 6648 }, { "epoch": 0.3083951762523191, "grad_norm": 7.556521415710449, "learning_rate": 7.921155418438384e-06, "loss": 0.4008, "step": 6649 }, { "epoch": 0.30844155844155846, "grad_norm": 12.742416381835938, "learning_rate": 7.920558104919163e-06, "loss": 0.3285, "step": 6650 }, { "epoch": 0.30848794063079776, "grad_norm": 4.919981956481934, "learning_rate": 7.91996072812759e-06, "loss": 0.2796, "step": 6651 }, { "epoch": 0.3085343228200371, "grad_norm": 5.009596824645996, "learning_rate": 7.919363288076606e-06, "loss": 0.2266, "step": 6652 }, { "epoch": 0.3085807050092764, "grad_norm": 4.522464275360107, "learning_rate": 7.918765784779158e-06, "loss": 0.3009, "step": 6653 }, { "epoch": 0.3086270871985158, "grad_norm": 3.4211959838867188, "learning_rate": 7.918168218248189e-06, "loss": 0.2916, "step": 6654 }, { "epoch": 0.3086734693877551, "grad_norm": 10.72326374053955, "learning_rate": 7.917570588496643e-06, "loss": 0.3231, "step": 6655 }, { "epoch": 0.30871985157699444, "grad_norm": 5.456338882446289, "learning_rate": 7.916972895537471e-06, "loss": 0.3434, "step": 6656 }, { "epoch": 0.30876623376623374, "grad_norm": 8.732077598571777, "learning_rate": 7.91637513938362e-06, "loss": 0.3771, "step": 6657 }, { "epoch": 0.3088126159554731, "grad_norm": 6.208738803863525, "learning_rate": 7.915777320048036e-06, "loss": 0.4404, "step": 6658 }, { "epoch": 0.3088589981447124, "grad_norm": 9.445908546447754, "learning_rate": 7.915179437543679e-06, "loss": 0.3625, "step": 6659 }, { "epoch": 0.30890538033395176, "grad_norm": 7.25424861907959, "learning_rate": 7.914581491883495e-06, "loss": 0.3222, "step": 6660 }, { "epoch": 0.3089517625231911, "grad_norm": 10.132307052612305, "learning_rate": 7.91398348308044e-06, "loss": 0.3555, "step": 6661 }, { "epoch": 0.3089981447124304, "grad_norm": 5.33950138092041, "learning_rate": 7.91338541114747e-06, "loss": 0.2904, "step": 6662 }, { "epoch": 0.3090445269016698, "grad_norm": 8.4475736618042, "learning_rate": 7.91278727609754e-06, "loss": 0.3779, "step": 6663 }, { "epoch": 0.3090909090909091, "grad_norm": 7.8108229637146, "learning_rate": 7.912189077943613e-06, "loss": 0.3593, "step": 6664 }, { "epoch": 0.30913729128014844, "grad_norm": 8.735240936279297, "learning_rate": 7.911590816698644e-06, "loss": 0.4061, "step": 6665 }, { "epoch": 0.30918367346938774, "grad_norm": 28.028505325317383, "learning_rate": 7.910992492375598e-06, "loss": 0.3412, "step": 6666 }, { "epoch": 0.3092300556586271, "grad_norm": 8.10096263885498, "learning_rate": 7.910394104987432e-06, "loss": 0.4222, "step": 6667 }, { "epoch": 0.3092764378478664, "grad_norm": 10.002519607543945, "learning_rate": 7.909795654547114e-06, "loss": 0.3944, "step": 6668 }, { "epoch": 0.30932282003710576, "grad_norm": 7.863852024078369, "learning_rate": 7.909197141067609e-06, "loss": 0.3779, "step": 6669 }, { "epoch": 0.30936920222634506, "grad_norm": 12.210583686828613, "learning_rate": 7.90859856456188e-06, "loss": 0.6276, "step": 6670 }, { "epoch": 0.3094155844155844, "grad_norm": 4.980231761932373, "learning_rate": 7.907999925042898e-06, "loss": 0.287, "step": 6671 }, { "epoch": 0.3094619666048237, "grad_norm": 9.601852416992188, "learning_rate": 7.907401222523634e-06, "loss": 0.3604, "step": 6672 }, { "epoch": 0.3095083487940631, "grad_norm": 7.443408966064453, "learning_rate": 7.906802457017053e-06, "loss": 0.3803, "step": 6673 }, { "epoch": 0.3095547309833024, "grad_norm": 6.255833625793457, "learning_rate": 7.90620362853613e-06, "loss": 0.3977, "step": 6674 }, { "epoch": 0.30960111317254174, "grad_norm": 8.376765251159668, "learning_rate": 7.90560473709384e-06, "loss": 0.3561, "step": 6675 }, { "epoch": 0.3096474953617811, "grad_norm": 9.813031196594238, "learning_rate": 7.905005782703155e-06, "loss": 0.3114, "step": 6676 }, { "epoch": 0.3096938775510204, "grad_norm": 7.010604381561279, "learning_rate": 7.904406765377052e-06, "loss": 0.3199, "step": 6677 }, { "epoch": 0.30974025974025976, "grad_norm": 8.427871704101562, "learning_rate": 7.903807685128508e-06, "loss": 0.2502, "step": 6678 }, { "epoch": 0.30978664192949906, "grad_norm": 11.880263328552246, "learning_rate": 7.903208541970502e-06, "loss": 0.3617, "step": 6679 }, { "epoch": 0.3098330241187384, "grad_norm": 5.910560131072998, "learning_rate": 7.902609335916015e-06, "loss": 0.2712, "step": 6680 }, { "epoch": 0.3098794063079777, "grad_norm": 6.869740009307861, "learning_rate": 7.902010066978028e-06, "loss": 0.4126, "step": 6681 }, { "epoch": 0.3099257884972171, "grad_norm": 5.021996974945068, "learning_rate": 7.901410735169522e-06, "loss": 0.3467, "step": 6682 }, { "epoch": 0.3099721706864564, "grad_norm": 3.632023572921753, "learning_rate": 7.900811340503483e-06, "loss": 0.2813, "step": 6683 }, { "epoch": 0.31001855287569574, "grad_norm": 7.518237113952637, "learning_rate": 7.900211882992897e-06, "loss": 0.4322, "step": 6684 }, { "epoch": 0.31006493506493504, "grad_norm": 8.363354682922363, "learning_rate": 7.89961236265075e-06, "loss": 0.4282, "step": 6685 }, { "epoch": 0.3101113172541744, "grad_norm": 9.732295036315918, "learning_rate": 7.899012779490031e-06, "loss": 0.3549, "step": 6686 }, { "epoch": 0.3101576994434137, "grad_norm": 9.80800724029541, "learning_rate": 7.898413133523729e-06, "loss": 0.48, "step": 6687 }, { "epoch": 0.31020408163265306, "grad_norm": 14.28938102722168, "learning_rate": 7.897813424764836e-06, "loss": 0.3441, "step": 6688 }, { "epoch": 0.3102504638218924, "grad_norm": 10.78130054473877, "learning_rate": 7.897213653226344e-06, "loss": 0.3895, "step": 6689 }, { "epoch": 0.3102968460111317, "grad_norm": 6.246992588043213, "learning_rate": 7.896613818921244e-06, "loss": 0.2407, "step": 6690 }, { "epoch": 0.3103432282003711, "grad_norm": 8.257331848144531, "learning_rate": 7.896013921862534e-06, "loss": 0.369, "step": 6691 }, { "epoch": 0.3103896103896104, "grad_norm": 6.289729595184326, "learning_rate": 7.89541396206321e-06, "loss": 0.3697, "step": 6692 }, { "epoch": 0.31043599257884974, "grad_norm": 8.225309371948242, "learning_rate": 7.89481393953627e-06, "loss": 0.4015, "step": 6693 }, { "epoch": 0.31048237476808904, "grad_norm": 6.602002143859863, "learning_rate": 7.894213854294715e-06, "loss": 0.3074, "step": 6694 }, { "epoch": 0.3105287569573284, "grad_norm": 5.658491611480713, "learning_rate": 7.89361370635154e-06, "loss": 0.3435, "step": 6695 }, { "epoch": 0.3105751391465677, "grad_norm": 7.750551223754883, "learning_rate": 7.893013495719752e-06, "loss": 0.4281, "step": 6696 }, { "epoch": 0.31062152133580706, "grad_norm": 7.053256511688232, "learning_rate": 7.892413222412352e-06, "loss": 0.4274, "step": 6697 }, { "epoch": 0.31066790352504636, "grad_norm": 8.204873085021973, "learning_rate": 7.891812886442345e-06, "loss": 0.3817, "step": 6698 }, { "epoch": 0.3107142857142857, "grad_norm": 9.447630882263184, "learning_rate": 7.891212487822737e-06, "loss": 0.3243, "step": 6699 }, { "epoch": 0.310760667903525, "grad_norm": 5.650155067443848, "learning_rate": 7.890612026566536e-06, "loss": 0.3157, "step": 6700 }, { "epoch": 0.3108070500927644, "grad_norm": 8.065695762634277, "learning_rate": 7.89001150268675e-06, "loss": 0.3391, "step": 6701 }, { "epoch": 0.3108534322820037, "grad_norm": 7.771061420440674, "learning_rate": 7.88941091619639e-06, "loss": 0.3728, "step": 6702 }, { "epoch": 0.31089981447124304, "grad_norm": 8.635520935058594, "learning_rate": 7.888810267108464e-06, "loss": 0.3793, "step": 6703 }, { "epoch": 0.3109461966604824, "grad_norm": 10.367545127868652, "learning_rate": 7.888209555435988e-06, "loss": 0.4782, "step": 6704 }, { "epoch": 0.3109925788497217, "grad_norm": 8.300898551940918, "learning_rate": 7.887608781191978e-06, "loss": 0.3665, "step": 6705 }, { "epoch": 0.31103896103896106, "grad_norm": 9.598145484924316, "learning_rate": 7.887007944389444e-06, "loss": 0.3282, "step": 6706 }, { "epoch": 0.31108534322820036, "grad_norm": 8.276520729064941, "learning_rate": 7.886407045041406e-06, "loss": 0.3632, "step": 6707 }, { "epoch": 0.3111317254174397, "grad_norm": 5.843402862548828, "learning_rate": 7.885806083160883e-06, "loss": 0.2641, "step": 6708 }, { "epoch": 0.311178107606679, "grad_norm": 8.990304946899414, "learning_rate": 7.885205058760892e-06, "loss": 0.411, "step": 6709 }, { "epoch": 0.3112244897959184, "grad_norm": 6.1801605224609375, "learning_rate": 7.884603971854457e-06, "loss": 0.4039, "step": 6710 }, { "epoch": 0.3112708719851577, "grad_norm": 9.232931137084961, "learning_rate": 7.884002822454597e-06, "loss": 0.4025, "step": 6711 }, { "epoch": 0.31131725417439704, "grad_norm": 7.937490940093994, "learning_rate": 7.883401610574338e-06, "loss": 0.4192, "step": 6712 }, { "epoch": 0.31136363636363634, "grad_norm": 8.05920124053955, "learning_rate": 7.882800336226701e-06, "loss": 0.3814, "step": 6713 }, { "epoch": 0.3114100185528757, "grad_norm": 6.300525665283203, "learning_rate": 7.882198999424718e-06, "loss": 0.3133, "step": 6714 }, { "epoch": 0.311456400742115, "grad_norm": 6.2799859046936035, "learning_rate": 7.881597600181413e-06, "loss": 0.397, "step": 6715 }, { "epoch": 0.31150278293135436, "grad_norm": 9.187744140625, "learning_rate": 7.880996138509816e-06, "loss": 0.2924, "step": 6716 }, { "epoch": 0.3115491651205937, "grad_norm": 6.969238758087158, "learning_rate": 7.880394614422957e-06, "loss": 0.3799, "step": 6717 }, { "epoch": 0.311595547309833, "grad_norm": 30.0634708404541, "learning_rate": 7.87979302793387e-06, "loss": 0.47, "step": 6718 }, { "epoch": 0.3116419294990724, "grad_norm": 15.287216186523438, "learning_rate": 7.879191379055585e-06, "loss": 0.4438, "step": 6719 }, { "epoch": 0.3116883116883117, "grad_norm": 6.419962406158447, "learning_rate": 7.878589667801136e-06, "loss": 0.3569, "step": 6720 }, { "epoch": 0.31173469387755104, "grad_norm": 10.882224082946777, "learning_rate": 7.87798789418356e-06, "loss": 0.3832, "step": 6721 }, { "epoch": 0.31178107606679034, "grad_norm": 6.816159725189209, "learning_rate": 7.877386058215895e-06, "loss": 0.386, "step": 6722 }, { "epoch": 0.3118274582560297, "grad_norm": 12.995365142822266, "learning_rate": 7.876784159911179e-06, "loss": 0.5747, "step": 6723 }, { "epoch": 0.311873840445269, "grad_norm": 11.218213081359863, "learning_rate": 7.876182199282451e-06, "loss": 0.3651, "step": 6724 }, { "epoch": 0.31192022263450836, "grad_norm": 6.901298522949219, "learning_rate": 7.875580176342752e-06, "loss": 0.3085, "step": 6725 }, { "epoch": 0.31196660482374766, "grad_norm": 14.555046081542969, "learning_rate": 7.874978091105127e-06, "loss": 0.291, "step": 6726 }, { "epoch": 0.312012987012987, "grad_norm": 7.740196704864502, "learning_rate": 7.874375943582618e-06, "loss": 0.3501, "step": 6727 }, { "epoch": 0.3120593692022263, "grad_norm": 5.451420783996582, "learning_rate": 7.873773733788268e-06, "loss": 0.3269, "step": 6728 }, { "epoch": 0.3121057513914657, "grad_norm": 7.17331600189209, "learning_rate": 7.873171461735129e-06, "loss": 0.3438, "step": 6729 }, { "epoch": 0.312152133580705, "grad_norm": 5.611303806304932, "learning_rate": 7.872569127436242e-06, "loss": 0.3388, "step": 6730 }, { "epoch": 0.31219851576994434, "grad_norm": 11.222590446472168, "learning_rate": 7.871966730904663e-06, "loss": 0.4481, "step": 6731 }, { "epoch": 0.3122448979591837, "grad_norm": 4.606560707092285, "learning_rate": 7.87136427215344e-06, "loss": 0.2704, "step": 6732 }, { "epoch": 0.312291280148423, "grad_norm": 9.91932487487793, "learning_rate": 7.870761751195622e-06, "loss": 0.3758, "step": 6733 }, { "epoch": 0.31233766233766236, "grad_norm": 6.389863967895508, "learning_rate": 7.870159168044265e-06, "loss": 0.3269, "step": 6734 }, { "epoch": 0.31238404452690166, "grad_norm": 4.874887466430664, "learning_rate": 7.869556522712425e-06, "loss": 0.2717, "step": 6735 }, { "epoch": 0.312430426716141, "grad_norm": 6.983826160430908, "learning_rate": 7.868953815213157e-06, "loss": 0.2498, "step": 6736 }, { "epoch": 0.3124768089053803, "grad_norm": 7.5281219482421875, "learning_rate": 7.868351045559515e-06, "loss": 0.3775, "step": 6737 }, { "epoch": 0.3125231910946197, "grad_norm": 6.405583381652832, "learning_rate": 7.867748213764564e-06, "loss": 0.3676, "step": 6738 }, { "epoch": 0.312569573283859, "grad_norm": 6.3173980712890625, "learning_rate": 7.867145319841358e-06, "loss": 0.3163, "step": 6739 }, { "epoch": 0.31261595547309834, "grad_norm": 6.051529407501221, "learning_rate": 7.866542363802961e-06, "loss": 0.3829, "step": 6740 }, { "epoch": 0.31266233766233764, "grad_norm": 6.378538608551025, "learning_rate": 7.865939345662437e-06, "loss": 0.2978, "step": 6741 }, { "epoch": 0.312708719851577, "grad_norm": 13.335206031799316, "learning_rate": 7.865336265432847e-06, "loss": 0.3445, "step": 6742 }, { "epoch": 0.3127551020408163, "grad_norm": 5.691918849945068, "learning_rate": 7.86473312312726e-06, "loss": 0.3019, "step": 6743 }, { "epoch": 0.31280148423005566, "grad_norm": 7.5242414474487305, "learning_rate": 7.864129918758738e-06, "loss": 0.3345, "step": 6744 }, { "epoch": 0.312847866419295, "grad_norm": 7.135979175567627, "learning_rate": 7.863526652340354e-06, "loss": 0.4049, "step": 6745 }, { "epoch": 0.3128942486085343, "grad_norm": 13.786415100097656, "learning_rate": 7.862923323885177e-06, "loss": 0.395, "step": 6746 }, { "epoch": 0.3129406307977737, "grad_norm": 7.278458118438721, "learning_rate": 7.862319933406274e-06, "loss": 0.3594, "step": 6747 }, { "epoch": 0.312987012987013, "grad_norm": 9.785652160644531, "learning_rate": 7.86171648091672e-06, "loss": 0.2886, "step": 6748 }, { "epoch": 0.31303339517625234, "grad_norm": 6.010782241821289, "learning_rate": 7.861112966429588e-06, "loss": 0.282, "step": 6749 }, { "epoch": 0.31307977736549164, "grad_norm": 10.882081985473633, "learning_rate": 7.860509389957952e-06, "loss": 0.4049, "step": 6750 }, { "epoch": 0.313126159554731, "grad_norm": 6.091452598571777, "learning_rate": 7.85990575151489e-06, "loss": 0.3301, "step": 6751 }, { "epoch": 0.3131725417439703, "grad_norm": 4.878137588500977, "learning_rate": 7.859302051113476e-06, "loss": 0.319, "step": 6752 }, { "epoch": 0.31321892393320966, "grad_norm": 4.681980609893799, "learning_rate": 7.858698288766792e-06, "loss": 0.2527, "step": 6753 }, { "epoch": 0.31326530612244896, "grad_norm": 13.774539947509766, "learning_rate": 7.85809446448792e-06, "loss": 0.4141, "step": 6754 }, { "epoch": 0.3133116883116883, "grad_norm": 6.780941009521484, "learning_rate": 7.857490578289937e-06, "loss": 0.3145, "step": 6755 }, { "epoch": 0.3133580705009276, "grad_norm": 9.229458808898926, "learning_rate": 7.856886630185927e-06, "loss": 0.3154, "step": 6756 }, { "epoch": 0.313404452690167, "grad_norm": 15.398635864257812, "learning_rate": 7.856282620188976e-06, "loss": 0.4771, "step": 6757 }, { "epoch": 0.3134508348794063, "grad_norm": 12.817512512207031, "learning_rate": 7.855678548312169e-06, "loss": 0.5136, "step": 6758 }, { "epoch": 0.31349721706864564, "grad_norm": 10.583724975585938, "learning_rate": 7.855074414568591e-06, "loss": 0.3411, "step": 6759 }, { "epoch": 0.313543599257885, "grad_norm": 7.54749059677124, "learning_rate": 7.854470218971333e-06, "loss": 0.3636, "step": 6760 }, { "epoch": 0.3135899814471243, "grad_norm": 7.136335372924805, "learning_rate": 7.853865961533482e-06, "loss": 0.3243, "step": 6761 }, { "epoch": 0.31363636363636366, "grad_norm": 8.736671447753906, "learning_rate": 7.853261642268133e-06, "loss": 0.4016, "step": 6762 }, { "epoch": 0.31368274582560296, "grad_norm": 10.718208312988281, "learning_rate": 7.852657261188372e-06, "loss": 0.424, "step": 6763 }, { "epoch": 0.3137291280148423, "grad_norm": 10.157788276672363, "learning_rate": 7.852052818307298e-06, "loss": 0.4214, "step": 6764 }, { "epoch": 0.3137755102040816, "grad_norm": 9.63771915435791, "learning_rate": 7.851448313638003e-06, "loss": 0.3432, "step": 6765 }, { "epoch": 0.313821892393321, "grad_norm": 3.7064452171325684, "learning_rate": 7.850843747193586e-06, "loss": 0.2959, "step": 6766 }, { "epoch": 0.3138682745825603, "grad_norm": 19.222557067871094, "learning_rate": 7.850239118987141e-06, "loss": 0.436, "step": 6767 }, { "epoch": 0.31391465677179964, "grad_norm": 3.692403554916382, "learning_rate": 7.84963442903177e-06, "loss": 0.2969, "step": 6768 }, { "epoch": 0.31396103896103894, "grad_norm": 9.741988182067871, "learning_rate": 7.849029677340572e-06, "loss": 0.3191, "step": 6769 }, { "epoch": 0.3140074211502783, "grad_norm": 4.705011367797852, "learning_rate": 7.848424863926648e-06, "loss": 0.326, "step": 6770 }, { "epoch": 0.3140538033395176, "grad_norm": 6.507453918457031, "learning_rate": 7.847819988803103e-06, "loss": 0.308, "step": 6771 }, { "epoch": 0.31410018552875696, "grad_norm": 4.4242024421691895, "learning_rate": 7.84721505198304e-06, "loss": 0.3312, "step": 6772 }, { "epoch": 0.3141465677179963, "grad_norm": 4.863670349121094, "learning_rate": 7.846610053479562e-06, "loss": 0.3668, "step": 6773 }, { "epoch": 0.3141929499072356, "grad_norm": 7.329981803894043, "learning_rate": 7.84600499330578e-06, "loss": 0.3659, "step": 6774 }, { "epoch": 0.314239332096475, "grad_norm": 7.307796955108643, "learning_rate": 7.845399871474801e-06, "loss": 0.3396, "step": 6775 }, { "epoch": 0.3142857142857143, "grad_norm": 18.81024742126465, "learning_rate": 7.844794687999737e-06, "loss": 0.5876, "step": 6776 }, { "epoch": 0.31433209647495364, "grad_norm": 10.656399726867676, "learning_rate": 7.844189442893694e-06, "loss": 0.2453, "step": 6777 }, { "epoch": 0.31437847866419294, "grad_norm": 9.827254295349121, "learning_rate": 7.843584136169786e-06, "loss": 0.5221, "step": 6778 }, { "epoch": 0.3144248608534323, "grad_norm": 7.077070713043213, "learning_rate": 7.842978767841129e-06, "loss": 0.3574, "step": 6779 }, { "epoch": 0.3144712430426716, "grad_norm": 10.518348693847656, "learning_rate": 7.842373337920837e-06, "loss": 0.3712, "step": 6780 }, { "epoch": 0.31451762523191096, "grad_norm": 6.636781215667725, "learning_rate": 7.841767846422026e-06, "loss": 0.4581, "step": 6781 }, { "epoch": 0.31456400742115026, "grad_norm": 4.977673530578613, "learning_rate": 7.841162293357813e-06, "loss": 0.313, "step": 6782 }, { "epoch": 0.3146103896103896, "grad_norm": 5.046576976776123, "learning_rate": 7.840556678741316e-06, "loss": 0.3619, "step": 6783 }, { "epoch": 0.3146567717996289, "grad_norm": 10.54525089263916, "learning_rate": 7.839951002585657e-06, "loss": 0.5157, "step": 6784 }, { "epoch": 0.3147031539888683, "grad_norm": 6.066775321960449, "learning_rate": 7.83934526490396e-06, "loss": 0.336, "step": 6785 }, { "epoch": 0.3147495361781076, "grad_norm": 7.404804229736328, "learning_rate": 7.838739465709343e-06, "loss": 0.3457, "step": 6786 }, { "epoch": 0.31479591836734694, "grad_norm": 10.28349494934082, "learning_rate": 7.838133605014935e-06, "loss": 0.394, "step": 6787 }, { "epoch": 0.3148423005565863, "grad_norm": 4.450393199920654, "learning_rate": 7.837527682833857e-06, "loss": 0.3582, "step": 6788 }, { "epoch": 0.3148886827458256, "grad_norm": 6.745849609375, "learning_rate": 7.83692169917924e-06, "loss": 0.2788, "step": 6789 }, { "epoch": 0.31493506493506496, "grad_norm": 6.977816581726074, "learning_rate": 7.836315654064211e-06, "loss": 0.2858, "step": 6790 }, { "epoch": 0.31498144712430426, "grad_norm": 8.623005867004395, "learning_rate": 7.835709547501897e-06, "loss": 0.3668, "step": 6791 }, { "epoch": 0.3150278293135436, "grad_norm": 12.065895080566406, "learning_rate": 7.835103379505433e-06, "loss": 0.3602, "step": 6792 }, { "epoch": 0.3150742115027829, "grad_norm": 8.064010620117188, "learning_rate": 7.83449715008795e-06, "loss": 0.438, "step": 6793 }, { "epoch": 0.3151205936920223, "grad_norm": 4.69890022277832, "learning_rate": 7.833890859262579e-06, "loss": 0.2576, "step": 6794 }, { "epoch": 0.3151669758812616, "grad_norm": 6.741367340087891, "learning_rate": 7.83328450704246e-06, "loss": 0.3568, "step": 6795 }, { "epoch": 0.31521335807050094, "grad_norm": 7.804193019866943, "learning_rate": 7.832678093440726e-06, "loss": 0.4208, "step": 6796 }, { "epoch": 0.31525974025974024, "grad_norm": 6.479237079620361, "learning_rate": 7.832071618470513e-06, "loss": 0.3658, "step": 6797 }, { "epoch": 0.3153061224489796, "grad_norm": 8.834599494934082, "learning_rate": 7.831465082144964e-06, "loss": 0.2781, "step": 6798 }, { "epoch": 0.3153525046382189, "grad_norm": 7.929902076721191, "learning_rate": 7.830858484477218e-06, "loss": 0.4533, "step": 6799 }, { "epoch": 0.31539888682745826, "grad_norm": 5.84702730178833, "learning_rate": 7.830251825480415e-06, "loss": 0.3765, "step": 6800 }, { "epoch": 0.3154452690166976, "grad_norm": 4.111716270446777, "learning_rate": 7.8296451051677e-06, "loss": 0.2539, "step": 6801 }, { "epoch": 0.3154916512059369, "grad_norm": 7.288713455200195, "learning_rate": 7.829038323552216e-06, "loss": 0.2917, "step": 6802 }, { "epoch": 0.3155380333951763, "grad_norm": 10.597280502319336, "learning_rate": 7.828431480647109e-06, "loss": 0.4332, "step": 6803 }, { "epoch": 0.3155844155844156, "grad_norm": 9.813470840454102, "learning_rate": 7.827824576465525e-06, "loss": 0.4117, "step": 6804 }, { "epoch": 0.31563079777365494, "grad_norm": 5.6377272605896, "learning_rate": 7.827217611020612e-06, "loss": 0.2773, "step": 6805 }, { "epoch": 0.31567717996289424, "grad_norm": 12.601222038269043, "learning_rate": 7.826610584325524e-06, "loss": 0.393, "step": 6806 }, { "epoch": 0.3157235621521336, "grad_norm": 11.378238677978516, "learning_rate": 7.826003496393405e-06, "loss": 0.2512, "step": 6807 }, { "epoch": 0.3157699443413729, "grad_norm": 19.011940002441406, "learning_rate": 7.825396347237413e-06, "loss": 0.4445, "step": 6808 }, { "epoch": 0.31581632653061226, "grad_norm": 11.378427505493164, "learning_rate": 7.824789136870699e-06, "loss": 0.4276, "step": 6809 }, { "epoch": 0.31586270871985156, "grad_norm": 7.865798473358154, "learning_rate": 7.824181865306418e-06, "loss": 0.3462, "step": 6810 }, { "epoch": 0.3159090909090909, "grad_norm": 13.661776542663574, "learning_rate": 7.823574532557727e-06, "loss": 0.4694, "step": 6811 }, { "epoch": 0.3159554730983302, "grad_norm": 5.354075908660889, "learning_rate": 7.822967138637783e-06, "loss": 0.3311, "step": 6812 }, { "epoch": 0.3160018552875696, "grad_norm": 7.156092643737793, "learning_rate": 7.822359683559743e-06, "loss": 0.352, "step": 6813 }, { "epoch": 0.3160482374768089, "grad_norm": 10.620307922363281, "learning_rate": 7.821752167336772e-06, "loss": 0.4938, "step": 6814 }, { "epoch": 0.31609461966604824, "grad_norm": 13.163888931274414, "learning_rate": 7.821144589982027e-06, "loss": 0.4519, "step": 6815 }, { "epoch": 0.3161410018552876, "grad_norm": 6.689156532287598, "learning_rate": 7.820536951508672e-06, "loss": 0.2729, "step": 6816 }, { "epoch": 0.3161873840445269, "grad_norm": 9.059286117553711, "learning_rate": 7.819929251929873e-06, "loss": 0.3226, "step": 6817 }, { "epoch": 0.31623376623376626, "grad_norm": 7.248569965362549, "learning_rate": 7.819321491258793e-06, "loss": 0.3316, "step": 6818 }, { "epoch": 0.31628014842300556, "grad_norm": 5.4614362716674805, "learning_rate": 7.8187136695086e-06, "loss": 0.3731, "step": 6819 }, { "epoch": 0.3163265306122449, "grad_norm": 7.272862434387207, "learning_rate": 7.818105786692462e-06, "loss": 0.3895, "step": 6820 }, { "epoch": 0.3163729128014842, "grad_norm": 6.085302352905273, "learning_rate": 7.817497842823549e-06, "loss": 0.328, "step": 6821 }, { "epoch": 0.3164192949907236, "grad_norm": 5.5385541915893555, "learning_rate": 7.816889837915032e-06, "loss": 0.3112, "step": 6822 }, { "epoch": 0.3164656771799629, "grad_norm": 6.235110282897949, "learning_rate": 7.816281771980082e-06, "loss": 0.3177, "step": 6823 }, { "epoch": 0.31651205936920224, "grad_norm": 4.817189693450928, "learning_rate": 7.815673645031871e-06, "loss": 0.323, "step": 6824 }, { "epoch": 0.31655844155844154, "grad_norm": 4.888227462768555, "learning_rate": 7.81506545708358e-06, "loss": 0.3274, "step": 6825 }, { "epoch": 0.3166048237476809, "grad_norm": 12.112173080444336, "learning_rate": 7.814457208148376e-06, "loss": 0.476, "step": 6826 }, { "epoch": 0.3166512059369202, "grad_norm": 5.730503559112549, "learning_rate": 7.813848898239442e-06, "loss": 0.317, "step": 6827 }, { "epoch": 0.31669758812615956, "grad_norm": 5.695093631744385, "learning_rate": 7.813240527369958e-06, "loss": 0.3404, "step": 6828 }, { "epoch": 0.31674397031539886, "grad_norm": 12.699638366699219, "learning_rate": 7.8126320955531e-06, "loss": 0.4599, "step": 6829 }, { "epoch": 0.3167903525046382, "grad_norm": 5.421376705169678, "learning_rate": 7.812023602802053e-06, "loss": 0.3048, "step": 6830 }, { "epoch": 0.3168367346938776, "grad_norm": 10.230815887451172, "learning_rate": 7.811415049129995e-06, "loss": 0.4114, "step": 6831 }, { "epoch": 0.3168831168831169, "grad_norm": 8.146166801452637, "learning_rate": 7.810806434550114e-06, "loss": 0.3312, "step": 6832 }, { "epoch": 0.31692949907235624, "grad_norm": 5.309009075164795, "learning_rate": 7.810197759075595e-06, "loss": 0.4315, "step": 6833 }, { "epoch": 0.31697588126159554, "grad_norm": 6.613906383514404, "learning_rate": 7.809589022719623e-06, "loss": 0.3897, "step": 6834 }, { "epoch": 0.3170222634508349, "grad_norm": 6.92220401763916, "learning_rate": 7.808980225495386e-06, "loss": 0.3693, "step": 6835 }, { "epoch": 0.3170686456400742, "grad_norm": 4.198336124420166, "learning_rate": 7.808371367416076e-06, "loss": 0.3854, "step": 6836 }, { "epoch": 0.31711502782931356, "grad_norm": 6.531822204589844, "learning_rate": 7.80776244849488e-06, "loss": 0.3288, "step": 6837 }, { "epoch": 0.31716141001855286, "grad_norm": 7.073028087615967, "learning_rate": 7.807153468744992e-06, "loss": 0.2916, "step": 6838 }, { "epoch": 0.3172077922077922, "grad_norm": 6.699297904968262, "learning_rate": 7.806544428179606e-06, "loss": 0.3065, "step": 6839 }, { "epoch": 0.3172541743970315, "grad_norm": 4.509942054748535, "learning_rate": 7.805935326811913e-06, "loss": 0.284, "step": 6840 }, { "epoch": 0.3173005565862709, "grad_norm": 4.6520161628723145, "learning_rate": 7.805326164655111e-06, "loss": 0.2718, "step": 6841 }, { "epoch": 0.3173469387755102, "grad_norm": 6.96486234664917, "learning_rate": 7.804716941722398e-06, "loss": 0.4136, "step": 6842 }, { "epoch": 0.31739332096474954, "grad_norm": 13.963397979736328, "learning_rate": 7.804107658026974e-06, "loss": 0.4362, "step": 6843 }, { "epoch": 0.3174397031539889, "grad_norm": 6.983670711517334, "learning_rate": 7.803498313582034e-06, "loss": 0.3676, "step": 6844 }, { "epoch": 0.3174860853432282, "grad_norm": 9.935128211975098, "learning_rate": 7.802888908400782e-06, "loss": 0.3653, "step": 6845 }, { "epoch": 0.31753246753246755, "grad_norm": 6.35149621963501, "learning_rate": 7.802279442496421e-06, "loss": 0.3858, "step": 6846 }, { "epoch": 0.31757884972170686, "grad_norm": 10.401451110839844, "learning_rate": 7.801669915882152e-06, "loss": 0.4378, "step": 6847 }, { "epoch": 0.3176252319109462, "grad_norm": 8.325623512268066, "learning_rate": 7.801060328571186e-06, "loss": 0.3502, "step": 6848 }, { "epoch": 0.3176716141001855, "grad_norm": 4.8634114265441895, "learning_rate": 7.800450680576722e-06, "loss": 0.2625, "step": 6849 }, { "epoch": 0.3177179962894249, "grad_norm": 5.519956588745117, "learning_rate": 7.799840971911972e-06, "loss": 0.2954, "step": 6850 }, { "epoch": 0.3177643784786642, "grad_norm": 5.160170555114746, "learning_rate": 7.799231202590145e-06, "loss": 0.3236, "step": 6851 }, { "epoch": 0.31781076066790354, "grad_norm": 8.249631881713867, "learning_rate": 7.798621372624448e-06, "loss": 0.3295, "step": 6852 }, { "epoch": 0.31785714285714284, "grad_norm": 5.358874797821045, "learning_rate": 7.798011482028099e-06, "loss": 0.249, "step": 6853 }, { "epoch": 0.3179035250463822, "grad_norm": 7.387584209442139, "learning_rate": 7.797401530814303e-06, "loss": 0.3923, "step": 6854 }, { "epoch": 0.3179499072356215, "grad_norm": 7.3050360679626465, "learning_rate": 7.79679151899628e-06, "loss": 0.3219, "step": 6855 }, { "epoch": 0.31799628942486086, "grad_norm": 5.778231620788574, "learning_rate": 7.796181446587244e-06, "loss": 0.2486, "step": 6856 }, { "epoch": 0.31804267161410016, "grad_norm": 5.86193323135376, "learning_rate": 7.795571313600412e-06, "loss": 0.3253, "step": 6857 }, { "epoch": 0.3180890538033395, "grad_norm": 7.273956298828125, "learning_rate": 7.794961120049003e-06, "loss": 0.3595, "step": 6858 }, { "epoch": 0.3181354359925789, "grad_norm": 17.230484008789062, "learning_rate": 7.794350865946233e-06, "loss": 0.308, "step": 6859 }, { "epoch": 0.3181818181818182, "grad_norm": 7.595303058624268, "learning_rate": 7.793740551305326e-06, "loss": 0.3647, "step": 6860 }, { "epoch": 0.31822820037105753, "grad_norm": 7.164857387542725, "learning_rate": 7.793130176139506e-06, "loss": 0.2826, "step": 6861 }, { "epoch": 0.31827458256029684, "grad_norm": 5.5874457359313965, "learning_rate": 7.79251974046199e-06, "loss": 0.3602, "step": 6862 }, { "epoch": 0.3183209647495362, "grad_norm": 7.213801860809326, "learning_rate": 7.791909244286008e-06, "loss": 0.3772, "step": 6863 }, { "epoch": 0.3183673469387755, "grad_norm": 5.69183874130249, "learning_rate": 7.791298687624786e-06, "loss": 0.332, "step": 6864 }, { "epoch": 0.31841372912801486, "grad_norm": 9.814303398132324, "learning_rate": 7.79068807049155e-06, "loss": 0.2544, "step": 6865 }, { "epoch": 0.31846011131725416, "grad_norm": 11.72790813446045, "learning_rate": 7.790077392899528e-06, "loss": 0.4519, "step": 6866 }, { "epoch": 0.3185064935064935, "grad_norm": 6.802755832672119, "learning_rate": 7.789466654861952e-06, "loss": 0.4049, "step": 6867 }, { "epoch": 0.3185528756957328, "grad_norm": 4.082059860229492, "learning_rate": 7.788855856392053e-06, "loss": 0.338, "step": 6868 }, { "epoch": 0.3185992578849722, "grad_norm": 4.269477844238281, "learning_rate": 7.788244997503061e-06, "loss": 0.3166, "step": 6869 }, { "epoch": 0.3186456400742115, "grad_norm": 4.6526594161987305, "learning_rate": 7.787634078208213e-06, "loss": 0.3301, "step": 6870 }, { "epoch": 0.31869202226345084, "grad_norm": 8.84559154510498, "learning_rate": 7.787023098520743e-06, "loss": 0.3613, "step": 6871 }, { "epoch": 0.3187384044526902, "grad_norm": 6.415309906005859, "learning_rate": 7.786412058453886e-06, "loss": 0.3242, "step": 6872 }, { "epoch": 0.3187847866419295, "grad_norm": 6.776705265045166, "learning_rate": 7.785800958020881e-06, "loss": 0.3967, "step": 6873 }, { "epoch": 0.31883116883116885, "grad_norm": 6.827310085296631, "learning_rate": 7.785189797234972e-06, "loss": 0.4413, "step": 6874 }, { "epoch": 0.31887755102040816, "grad_norm": 5.478217124938965, "learning_rate": 7.78457857610939e-06, "loss": 0.3395, "step": 6875 }, { "epoch": 0.3189239332096475, "grad_norm": 8.566527366638184, "learning_rate": 7.783967294657384e-06, "loss": 0.4276, "step": 6876 }, { "epoch": 0.3189703153988868, "grad_norm": 7.816932678222656, "learning_rate": 7.783355952892195e-06, "loss": 0.2637, "step": 6877 }, { "epoch": 0.3190166975881262, "grad_norm": 15.928050994873047, "learning_rate": 7.782744550827067e-06, "loss": 0.3333, "step": 6878 }, { "epoch": 0.3190630797773655, "grad_norm": 6.611289978027344, "learning_rate": 7.782133088475246e-06, "loss": 0.3603, "step": 6879 }, { "epoch": 0.31910946196660483, "grad_norm": 10.138269424438477, "learning_rate": 7.78152156584998e-06, "loss": 0.4991, "step": 6880 }, { "epoch": 0.31915584415584414, "grad_norm": 6.780407428741455, "learning_rate": 7.780909982964515e-06, "loss": 0.3748, "step": 6881 }, { "epoch": 0.3192022263450835, "grad_norm": 31.779191970825195, "learning_rate": 7.780298339832104e-06, "loss": 0.4218, "step": 6882 }, { "epoch": 0.3192486085343228, "grad_norm": 7.9511871337890625, "learning_rate": 7.779686636465994e-06, "loss": 0.3431, "step": 6883 }, { "epoch": 0.31929499072356216, "grad_norm": 8.32032299041748, "learning_rate": 7.77907487287944e-06, "loss": 0.4559, "step": 6884 }, { "epoch": 0.31934137291280146, "grad_norm": 9.596941947937012, "learning_rate": 7.778463049085694e-06, "loss": 0.4009, "step": 6885 }, { "epoch": 0.3193877551020408, "grad_norm": 10.224947929382324, "learning_rate": 7.777851165098012e-06, "loss": 0.3815, "step": 6886 }, { "epoch": 0.3194341372912802, "grad_norm": 6.858285427093506, "learning_rate": 7.77723922092965e-06, "loss": 0.3847, "step": 6887 }, { "epoch": 0.3194805194805195, "grad_norm": 8.124277114868164, "learning_rate": 7.776627216593863e-06, "loss": 0.3929, "step": 6888 }, { "epoch": 0.31952690166975883, "grad_norm": 8.015926361083984, "learning_rate": 7.776015152103915e-06, "loss": 0.371, "step": 6889 }, { "epoch": 0.31957328385899814, "grad_norm": 11.478680610656738, "learning_rate": 7.775403027473061e-06, "loss": 0.3754, "step": 6890 }, { "epoch": 0.3196196660482375, "grad_norm": 5.263286113739014, "learning_rate": 7.774790842714566e-06, "loss": 0.3086, "step": 6891 }, { "epoch": 0.3196660482374768, "grad_norm": 6.213229179382324, "learning_rate": 7.774178597841688e-06, "loss": 0.2952, "step": 6892 }, { "epoch": 0.31971243042671615, "grad_norm": 6.740245342254639, "learning_rate": 7.773566292867698e-06, "loss": 0.3166, "step": 6893 }, { "epoch": 0.31975881261595546, "grad_norm": 7.9125566482543945, "learning_rate": 7.772953927805855e-06, "loss": 0.5347, "step": 6894 }, { "epoch": 0.3198051948051948, "grad_norm": 9.623334884643555, "learning_rate": 7.772341502669426e-06, "loss": 0.3135, "step": 6895 }, { "epoch": 0.3198515769944341, "grad_norm": 6.267141819000244, "learning_rate": 7.771729017471682e-06, "loss": 0.4024, "step": 6896 }, { "epoch": 0.3198979591836735, "grad_norm": 9.527612686157227, "learning_rate": 7.77111647222589e-06, "loss": 0.4415, "step": 6897 }, { "epoch": 0.3199443413729128, "grad_norm": 4.763093948364258, "learning_rate": 7.770503866945324e-06, "loss": 0.3397, "step": 6898 }, { "epoch": 0.31999072356215214, "grad_norm": 9.017087936401367, "learning_rate": 7.769891201643251e-06, "loss": 0.4258, "step": 6899 }, { "epoch": 0.3200371057513915, "grad_norm": 6.846952438354492, "learning_rate": 7.769278476332947e-06, "loss": 0.432, "step": 6900 }, { "epoch": 0.3200834879406308, "grad_norm": 5.187619209289551, "learning_rate": 7.768665691027684e-06, "loss": 0.2936, "step": 6901 }, { "epoch": 0.32012987012987015, "grad_norm": 4.6656880378723145, "learning_rate": 7.76805284574074e-06, "loss": 0.359, "step": 6902 }, { "epoch": 0.32017625231910946, "grad_norm": 11.647147178649902, "learning_rate": 7.76743994048539e-06, "loss": 0.403, "step": 6903 }, { "epoch": 0.3202226345083488, "grad_norm": 5.464483261108398, "learning_rate": 7.766826975274916e-06, "loss": 0.3516, "step": 6904 }, { "epoch": 0.3202690166975881, "grad_norm": 11.936877250671387, "learning_rate": 7.766213950122592e-06, "loss": 0.3877, "step": 6905 }, { "epoch": 0.3203153988868275, "grad_norm": 7.436601638793945, "learning_rate": 7.765600865041702e-06, "loss": 0.4127, "step": 6906 }, { "epoch": 0.3203617810760668, "grad_norm": 6.324100494384766, "learning_rate": 7.764987720045531e-06, "loss": 0.2724, "step": 6907 }, { "epoch": 0.32040816326530613, "grad_norm": 7.795073509216309, "learning_rate": 7.764374515147356e-06, "loss": 0.3157, "step": 6908 }, { "epoch": 0.32045454545454544, "grad_norm": 6.471841335296631, "learning_rate": 7.763761250360468e-06, "loss": 0.3949, "step": 6909 }, { "epoch": 0.3205009276437848, "grad_norm": 5.210553169250488, "learning_rate": 7.76314792569815e-06, "loss": 0.4104, "step": 6910 }, { "epoch": 0.3205473098330241, "grad_norm": 5.5149617195129395, "learning_rate": 7.762534541173687e-06, "loss": 0.3587, "step": 6911 }, { "epoch": 0.32059369202226345, "grad_norm": 4.331810474395752, "learning_rate": 7.761921096800372e-06, "loss": 0.2618, "step": 6912 }, { "epoch": 0.32064007421150276, "grad_norm": 6.797049045562744, "learning_rate": 7.761307592591493e-06, "loss": 0.1669, "step": 6913 }, { "epoch": 0.3206864564007421, "grad_norm": 5.4884257316589355, "learning_rate": 7.76069402856034e-06, "loss": 0.3439, "step": 6914 }, { "epoch": 0.3207328385899815, "grad_norm": 10.464438438415527, "learning_rate": 7.76008040472021e-06, "loss": 0.2557, "step": 6915 }, { "epoch": 0.3207792207792208, "grad_norm": 5.909363746643066, "learning_rate": 7.759466721084392e-06, "loss": 0.3901, "step": 6916 }, { "epoch": 0.32082560296846013, "grad_norm": 8.873967170715332, "learning_rate": 7.758852977666183e-06, "loss": 0.4304, "step": 6917 }, { "epoch": 0.32087198515769944, "grad_norm": 7.8285040855407715, "learning_rate": 7.758239174478878e-06, "loss": 0.3213, "step": 6918 }, { "epoch": 0.3209183673469388, "grad_norm": 11.027036666870117, "learning_rate": 7.757625311535779e-06, "loss": 0.3333, "step": 6919 }, { "epoch": 0.3209647495361781, "grad_norm": 5.963607311248779, "learning_rate": 7.75701138885018e-06, "loss": 0.355, "step": 6920 }, { "epoch": 0.32101113172541745, "grad_norm": 8.30594539642334, "learning_rate": 7.756397406435386e-06, "loss": 0.3002, "step": 6921 }, { "epoch": 0.32105751391465676, "grad_norm": 6.655942916870117, "learning_rate": 7.755783364304693e-06, "loss": 0.2737, "step": 6922 }, { "epoch": 0.3211038961038961, "grad_norm": 8.652191162109375, "learning_rate": 7.755169262471409e-06, "loss": 0.3143, "step": 6923 }, { "epoch": 0.3211502782931354, "grad_norm": 11.21679401397705, "learning_rate": 7.754555100948835e-06, "loss": 0.4707, "step": 6924 }, { "epoch": 0.3211966604823748, "grad_norm": 13.506495475769043, "learning_rate": 7.753940879750277e-06, "loss": 0.4477, "step": 6925 }, { "epoch": 0.3212430426716141, "grad_norm": 7.114480018615723, "learning_rate": 7.753326598889043e-06, "loss": 0.2495, "step": 6926 }, { "epoch": 0.32128942486085343, "grad_norm": 10.545119285583496, "learning_rate": 7.752712258378441e-06, "loss": 0.3883, "step": 6927 }, { "epoch": 0.3213358070500928, "grad_norm": 20.36526107788086, "learning_rate": 7.75209785823178e-06, "loss": 0.5513, "step": 6928 }, { "epoch": 0.3213821892393321, "grad_norm": 13.468684196472168, "learning_rate": 7.75148339846237e-06, "loss": 0.506, "step": 6929 }, { "epoch": 0.32142857142857145, "grad_norm": 8.792421340942383, "learning_rate": 7.750868879083523e-06, "loss": 0.3632, "step": 6930 }, { "epoch": 0.32147495361781075, "grad_norm": 7.647684574127197, "learning_rate": 7.750254300108552e-06, "loss": 0.4053, "step": 6931 }, { "epoch": 0.3215213358070501, "grad_norm": 7.874612331390381, "learning_rate": 7.749639661550775e-06, "loss": 0.5435, "step": 6932 }, { "epoch": 0.3215677179962894, "grad_norm": 8.069467544555664, "learning_rate": 7.749024963423504e-06, "loss": 0.3485, "step": 6933 }, { "epoch": 0.3216141001855288, "grad_norm": 6.254698753356934, "learning_rate": 7.748410205740055e-06, "loss": 0.2955, "step": 6934 }, { "epoch": 0.3216604823747681, "grad_norm": 5.480079650878906, "learning_rate": 7.747795388513752e-06, "loss": 0.3247, "step": 6935 }, { "epoch": 0.32170686456400743, "grad_norm": 7.679468154907227, "learning_rate": 7.747180511757908e-06, "loss": 0.3823, "step": 6936 }, { "epoch": 0.32175324675324674, "grad_norm": 7.772312641143799, "learning_rate": 7.746565575485849e-06, "loss": 0.4278, "step": 6937 }, { "epoch": 0.3217996289424861, "grad_norm": 9.137747764587402, "learning_rate": 7.745950579710894e-06, "loss": 0.341, "step": 6938 }, { "epoch": 0.3218460111317254, "grad_norm": 15.243170738220215, "learning_rate": 7.74533552444637e-06, "loss": 0.3796, "step": 6939 }, { "epoch": 0.32189239332096475, "grad_norm": 9.039963722229004, "learning_rate": 7.7447204097056e-06, "loss": 0.3169, "step": 6940 }, { "epoch": 0.32193877551020406, "grad_norm": 9.076138496398926, "learning_rate": 7.744105235501907e-06, "loss": 0.4169, "step": 6941 }, { "epoch": 0.3219851576994434, "grad_norm": 6.359004974365234, "learning_rate": 7.743490001848623e-06, "loss": 0.3889, "step": 6942 }, { "epoch": 0.32203153988868277, "grad_norm": 6.194614887237549, "learning_rate": 7.742874708759076e-06, "loss": 0.3733, "step": 6943 }, { "epoch": 0.3220779220779221, "grad_norm": 6.431843280792236, "learning_rate": 7.742259356246594e-06, "loss": 0.3875, "step": 6944 }, { "epoch": 0.32212430426716143, "grad_norm": 6.047683238983154, "learning_rate": 7.74164394432451e-06, "loss": 0.2555, "step": 6945 }, { "epoch": 0.32217068645640073, "grad_norm": 5.28187894821167, "learning_rate": 7.741028473006157e-06, "loss": 0.2901, "step": 6946 }, { "epoch": 0.3222170686456401, "grad_norm": 12.017054557800293, "learning_rate": 7.740412942304868e-06, "loss": 0.4585, "step": 6947 }, { "epoch": 0.3222634508348794, "grad_norm": 5.55962610244751, "learning_rate": 7.739797352233976e-06, "loss": 0.3247, "step": 6948 }, { "epoch": 0.32230983302411875, "grad_norm": 9.920971870422363, "learning_rate": 7.73918170280682e-06, "loss": 0.4301, "step": 6949 }, { "epoch": 0.32235621521335805, "grad_norm": 5.511466979980469, "learning_rate": 7.738565994036737e-06, "loss": 0.312, "step": 6950 }, { "epoch": 0.3224025974025974, "grad_norm": 10.786701202392578, "learning_rate": 7.737950225937068e-06, "loss": 0.403, "step": 6951 }, { "epoch": 0.3224489795918367, "grad_norm": 11.496036529541016, "learning_rate": 7.737334398521149e-06, "loss": 0.4886, "step": 6952 }, { "epoch": 0.3224953617810761, "grad_norm": 10.3765869140625, "learning_rate": 7.736718511802326e-06, "loss": 0.4398, "step": 6953 }, { "epoch": 0.3225417439703154, "grad_norm": 7.163707256317139, "learning_rate": 7.736102565793939e-06, "loss": 0.3531, "step": 6954 }, { "epoch": 0.32258812615955473, "grad_norm": 6.689340114593506, "learning_rate": 7.735486560509332e-06, "loss": 0.3925, "step": 6955 }, { "epoch": 0.32263450834879404, "grad_norm": 7.567995548248291, "learning_rate": 7.734870495961854e-06, "loss": 0.4536, "step": 6956 }, { "epoch": 0.3226808905380334, "grad_norm": 7.824258327484131, "learning_rate": 7.734254372164846e-06, "loss": 0.3916, "step": 6957 }, { "epoch": 0.32272727272727275, "grad_norm": 5.768098831176758, "learning_rate": 7.733638189131663e-06, "loss": 0.4211, "step": 6958 }, { "epoch": 0.32277365491651205, "grad_norm": 10.679652214050293, "learning_rate": 7.733021946875648e-06, "loss": 0.5276, "step": 6959 }, { "epoch": 0.3228200371057514, "grad_norm": 4.089277744293213, "learning_rate": 7.732405645410155e-06, "loss": 0.333, "step": 6960 }, { "epoch": 0.3228664192949907, "grad_norm": 8.516870498657227, "learning_rate": 7.731789284748534e-06, "loss": 0.3364, "step": 6961 }, { "epoch": 0.32291280148423007, "grad_norm": 12.494987487792969, "learning_rate": 7.73117286490414e-06, "loss": 0.422, "step": 6962 }, { "epoch": 0.3229591836734694, "grad_norm": 11.486512184143066, "learning_rate": 7.730556385890325e-06, "loss": 0.3414, "step": 6963 }, { "epoch": 0.32300556586270873, "grad_norm": 7.543997764587402, "learning_rate": 7.729939847720449e-06, "loss": 0.3575, "step": 6964 }, { "epoch": 0.32305194805194803, "grad_norm": 4.474428653717041, "learning_rate": 7.729323250407863e-06, "loss": 0.2597, "step": 6965 }, { "epoch": 0.3230983302411874, "grad_norm": 9.081430435180664, "learning_rate": 7.72870659396593e-06, "loss": 0.332, "step": 6966 }, { "epoch": 0.3231447124304267, "grad_norm": 9.5757417678833, "learning_rate": 7.728089878408008e-06, "loss": 0.4125, "step": 6967 }, { "epoch": 0.32319109461966605, "grad_norm": 6.941223621368408, "learning_rate": 7.727473103747456e-06, "loss": 0.3591, "step": 6968 }, { "epoch": 0.32323747680890536, "grad_norm": 6.520560264587402, "learning_rate": 7.726856269997637e-06, "loss": 0.2113, "step": 6969 }, { "epoch": 0.3232838589981447, "grad_norm": 10.04202938079834, "learning_rate": 7.726239377171919e-06, "loss": 0.6352, "step": 6970 }, { "epoch": 0.32333024118738407, "grad_norm": 6.831222057342529, "learning_rate": 7.725622425283662e-06, "loss": 0.3641, "step": 6971 }, { "epoch": 0.3233766233766234, "grad_norm": 9.951778411865234, "learning_rate": 7.725005414346229e-06, "loss": 0.3549, "step": 6972 }, { "epoch": 0.32342300556586273, "grad_norm": 4.845222473144531, "learning_rate": 7.724388344372995e-06, "loss": 0.2799, "step": 6973 }, { "epoch": 0.32346938775510203, "grad_norm": 5.523500919342041, "learning_rate": 7.723771215377322e-06, "loss": 0.3335, "step": 6974 }, { "epoch": 0.3235157699443414, "grad_norm": 5.18808126449585, "learning_rate": 7.723154027372583e-06, "loss": 0.2591, "step": 6975 }, { "epoch": 0.3235621521335807, "grad_norm": 6.7354888916015625, "learning_rate": 7.72253678037215e-06, "loss": 0.4071, "step": 6976 }, { "epoch": 0.32360853432282005, "grad_norm": 4.715390205383301, "learning_rate": 7.721919474389393e-06, "loss": 0.303, "step": 6977 }, { "epoch": 0.32365491651205935, "grad_norm": 7.521035671234131, "learning_rate": 7.721302109437686e-06, "loss": 0.3673, "step": 6978 }, { "epoch": 0.3237012987012987, "grad_norm": 7.578983306884766, "learning_rate": 7.720684685530402e-06, "loss": 0.3864, "step": 6979 }, { "epoch": 0.323747680890538, "grad_norm": 6.388587951660156, "learning_rate": 7.72006720268092e-06, "loss": 0.2656, "step": 6980 }, { "epoch": 0.32379406307977737, "grad_norm": 5.601187229156494, "learning_rate": 7.719449660902619e-06, "loss": 0.2428, "step": 6981 }, { "epoch": 0.3238404452690167, "grad_norm": 11.921627044677734, "learning_rate": 7.718832060208874e-06, "loss": 0.3902, "step": 6982 }, { "epoch": 0.32388682745825603, "grad_norm": 4.993022441864014, "learning_rate": 7.718214400613067e-06, "loss": 0.306, "step": 6983 }, { "epoch": 0.32393320964749533, "grad_norm": 14.480925559997559, "learning_rate": 7.717596682128578e-06, "loss": 0.3978, "step": 6984 }, { "epoch": 0.3239795918367347, "grad_norm": 6.799907684326172, "learning_rate": 7.71697890476879e-06, "loss": 0.387, "step": 6985 }, { "epoch": 0.32402597402597405, "grad_norm": 9.317767143249512, "learning_rate": 7.71636106854709e-06, "loss": 0.3721, "step": 6986 }, { "epoch": 0.32407235621521335, "grad_norm": 5.902972221374512, "learning_rate": 7.715743173476858e-06, "loss": 0.3307, "step": 6987 }, { "epoch": 0.3241187384044527, "grad_norm": 6.956401824951172, "learning_rate": 7.715125219571482e-06, "loss": 0.271, "step": 6988 }, { "epoch": 0.324165120593692, "grad_norm": 7.748335838317871, "learning_rate": 7.714507206844351e-06, "loss": 0.3579, "step": 6989 }, { "epoch": 0.32421150278293137, "grad_norm": 6.898051738739014, "learning_rate": 7.713889135308852e-06, "loss": 0.4057, "step": 6990 }, { "epoch": 0.3242578849721707, "grad_norm": 6.546020030975342, "learning_rate": 7.713271004978377e-06, "loss": 0.3259, "step": 6991 }, { "epoch": 0.32430426716141003, "grad_norm": 8.222891807556152, "learning_rate": 7.712652815866318e-06, "loss": 0.3887, "step": 6992 }, { "epoch": 0.32435064935064933, "grad_norm": 5.601700782775879, "learning_rate": 7.712034567986064e-06, "loss": 0.2312, "step": 6993 }, { "epoch": 0.3243970315398887, "grad_norm": 10.328327178955078, "learning_rate": 7.711416261351014e-06, "loss": 0.512, "step": 6994 }, { "epoch": 0.324443413729128, "grad_norm": 8.635270118713379, "learning_rate": 7.71079789597456e-06, "loss": 0.372, "step": 6995 }, { "epoch": 0.32448979591836735, "grad_norm": 6.1407790184021, "learning_rate": 7.710179471870099e-06, "loss": 0.3216, "step": 6996 }, { "epoch": 0.32453617810760665, "grad_norm": 10.938673973083496, "learning_rate": 7.709560989051028e-06, "loss": 0.4108, "step": 6997 }, { "epoch": 0.324582560296846, "grad_norm": 13.299076080322266, "learning_rate": 7.708942447530749e-06, "loss": 0.3815, "step": 6998 }, { "epoch": 0.32462894248608537, "grad_norm": 5.788538455963135, "learning_rate": 7.708323847322661e-06, "loss": 0.3542, "step": 6999 }, { "epoch": 0.3246753246753247, "grad_norm": 4.991219520568848, "learning_rate": 7.707705188440165e-06, "loss": 0.3478, "step": 7000 }, { "epoch": 0.32472170686456403, "grad_norm": 7.958574295043945, "learning_rate": 7.707086470896664e-06, "loss": 0.3526, "step": 7001 }, { "epoch": 0.32476808905380333, "grad_norm": 5.061476707458496, "learning_rate": 7.706467694705562e-06, "loss": 0.3781, "step": 7002 }, { "epoch": 0.3248144712430427, "grad_norm": 6.914111137390137, "learning_rate": 7.705848859880265e-06, "loss": 0.2883, "step": 7003 }, { "epoch": 0.324860853432282, "grad_norm": 5.63776159286499, "learning_rate": 7.70522996643418e-06, "loss": 0.2955, "step": 7004 }, { "epoch": 0.32490723562152135, "grad_norm": 6.567218780517578, "learning_rate": 7.704611014380714e-06, "loss": 0.4118, "step": 7005 }, { "epoch": 0.32495361781076065, "grad_norm": 7.200613975524902, "learning_rate": 7.703992003733278e-06, "loss": 0.3124, "step": 7006 }, { "epoch": 0.325, "grad_norm": 6.179540157318115, "learning_rate": 7.70337293450528e-06, "loss": 0.3143, "step": 7007 }, { "epoch": 0.3250463821892393, "grad_norm": 11.00744342803955, "learning_rate": 7.702753806710135e-06, "loss": 0.3482, "step": 7008 }, { "epoch": 0.32509276437847867, "grad_norm": 6.716924667358398, "learning_rate": 7.702134620361254e-06, "loss": 0.3042, "step": 7009 }, { "epoch": 0.325139146567718, "grad_norm": 7.694493293762207, "learning_rate": 7.70151537547205e-06, "loss": 0.3631, "step": 7010 }, { "epoch": 0.32518552875695733, "grad_norm": 8.082348823547363, "learning_rate": 7.700896072055943e-06, "loss": 0.3436, "step": 7011 }, { "epoch": 0.32523191094619663, "grad_norm": 9.489850997924805, "learning_rate": 7.700276710126344e-06, "loss": 0.4458, "step": 7012 }, { "epoch": 0.325278293135436, "grad_norm": 4.051633834838867, "learning_rate": 7.699657289696677e-06, "loss": 0.2983, "step": 7013 }, { "epoch": 0.32532467532467535, "grad_norm": 10.467902183532715, "learning_rate": 7.699037810780357e-06, "loss": 0.4286, "step": 7014 }, { "epoch": 0.32537105751391465, "grad_norm": 4.789430618286133, "learning_rate": 7.698418273390807e-06, "loss": 0.4203, "step": 7015 }, { "epoch": 0.325417439703154, "grad_norm": 6.6649017333984375, "learning_rate": 7.697798677541448e-06, "loss": 0.4034, "step": 7016 }, { "epoch": 0.3254638218923933, "grad_norm": 9.679984092712402, "learning_rate": 7.697179023245705e-06, "loss": 0.346, "step": 7017 }, { "epoch": 0.32551020408163267, "grad_norm": 6.108819484710693, "learning_rate": 7.696559310517e-06, "loss": 0.3313, "step": 7018 }, { "epoch": 0.325556586270872, "grad_norm": 5.763882160186768, "learning_rate": 7.695939539368762e-06, "loss": 0.288, "step": 7019 }, { "epoch": 0.32560296846011133, "grad_norm": 8.317675590515137, "learning_rate": 7.695319709814412e-06, "loss": 0.3182, "step": 7020 }, { "epoch": 0.32564935064935063, "grad_norm": 6.715518951416016, "learning_rate": 7.694699821867385e-06, "loss": 0.4375, "step": 7021 }, { "epoch": 0.32569573283859, "grad_norm": 6.695291519165039, "learning_rate": 7.694079875541106e-06, "loss": 0.316, "step": 7022 }, { "epoch": 0.3257421150278293, "grad_norm": 8.839198112487793, "learning_rate": 7.693459870849008e-06, "loss": 0.4149, "step": 7023 }, { "epoch": 0.32578849721706865, "grad_norm": 6.175842761993408, "learning_rate": 7.692839807804522e-06, "loss": 0.2603, "step": 7024 }, { "epoch": 0.32583487940630795, "grad_norm": 5.8801703453063965, "learning_rate": 7.692219686421083e-06, "loss": 0.4375, "step": 7025 }, { "epoch": 0.3258812615955473, "grad_norm": 6.50514554977417, "learning_rate": 7.691599506712124e-06, "loss": 0.3844, "step": 7026 }, { "epoch": 0.32592764378478667, "grad_norm": 4.703978061676025, "learning_rate": 7.690979268691081e-06, "loss": 0.3108, "step": 7027 }, { "epoch": 0.32597402597402597, "grad_norm": 6.441999435424805, "learning_rate": 7.690358972371393e-06, "loss": 0.2407, "step": 7028 }, { "epoch": 0.32602040816326533, "grad_norm": 8.25999641418457, "learning_rate": 7.689738617766495e-06, "loss": 0.3021, "step": 7029 }, { "epoch": 0.32606679035250463, "grad_norm": 5.631657123565674, "learning_rate": 7.68911820488983e-06, "loss": 0.381, "step": 7030 }, { "epoch": 0.326113172541744, "grad_norm": 6.688051223754883, "learning_rate": 7.688497733754836e-06, "loss": 0.4087, "step": 7031 }, { "epoch": 0.3261595547309833, "grad_norm": 9.4837646484375, "learning_rate": 7.687877204374957e-06, "loss": 0.3673, "step": 7032 }, { "epoch": 0.32620593692022265, "grad_norm": 6.890236854553223, "learning_rate": 7.687256616763637e-06, "loss": 0.2928, "step": 7033 }, { "epoch": 0.32625231910946195, "grad_norm": 5.655857086181641, "learning_rate": 7.686635970934319e-06, "loss": 0.2963, "step": 7034 }, { "epoch": 0.3262987012987013, "grad_norm": 3.7199554443359375, "learning_rate": 7.686015266900451e-06, "loss": 0.3644, "step": 7035 }, { "epoch": 0.3263450834879406, "grad_norm": 11.950477600097656, "learning_rate": 7.685394504675477e-06, "loss": 0.4771, "step": 7036 }, { "epoch": 0.32639146567717997, "grad_norm": 6.376729488372803, "learning_rate": 7.684773684272848e-06, "loss": 0.359, "step": 7037 }, { "epoch": 0.3264378478664193, "grad_norm": 6.41648006439209, "learning_rate": 7.684152805706014e-06, "loss": 0.324, "step": 7038 }, { "epoch": 0.32648423005565863, "grad_norm": 8.806434631347656, "learning_rate": 7.683531868988426e-06, "loss": 0.4246, "step": 7039 }, { "epoch": 0.32653061224489793, "grad_norm": 12.121081352233887, "learning_rate": 7.682910874133534e-06, "loss": 0.5596, "step": 7040 }, { "epoch": 0.3265769944341373, "grad_norm": 5.393649101257324, "learning_rate": 7.682289821154792e-06, "loss": 0.3313, "step": 7041 }, { "epoch": 0.32662337662337665, "grad_norm": 12.10495662689209, "learning_rate": 7.681668710065657e-06, "loss": 0.4696, "step": 7042 }, { "epoch": 0.32666975881261595, "grad_norm": 5.551220893859863, "learning_rate": 7.681047540879583e-06, "loss": 0.3804, "step": 7043 }, { "epoch": 0.3267161410018553, "grad_norm": 5.524238109588623, "learning_rate": 7.68042631361003e-06, "loss": 0.3199, "step": 7044 }, { "epoch": 0.3267625231910946, "grad_norm": 9.082586288452148, "learning_rate": 7.679805028270454e-06, "loss": 0.4098, "step": 7045 }, { "epoch": 0.32680890538033397, "grad_norm": 5.212239742279053, "learning_rate": 7.679183684874317e-06, "loss": 0.2829, "step": 7046 }, { "epoch": 0.32685528756957327, "grad_norm": 14.032553672790527, "learning_rate": 7.678562283435075e-06, "loss": 0.814, "step": 7047 }, { "epoch": 0.32690166975881263, "grad_norm": 10.945268630981445, "learning_rate": 7.677940823966196e-06, "loss": 0.3612, "step": 7048 }, { "epoch": 0.32694805194805193, "grad_norm": 6.930427551269531, "learning_rate": 7.67731930648114e-06, "loss": 0.2884, "step": 7049 }, { "epoch": 0.3269944341372913, "grad_norm": 8.320002555847168, "learning_rate": 7.676697730993376e-06, "loss": 0.3938, "step": 7050 }, { "epoch": 0.3270408163265306, "grad_norm": 10.517417907714844, "learning_rate": 7.676076097516366e-06, "loss": 0.3813, "step": 7051 }, { "epoch": 0.32708719851576995, "grad_norm": 5.77528190612793, "learning_rate": 7.675454406063579e-06, "loss": 0.299, "step": 7052 }, { "epoch": 0.32713358070500925, "grad_norm": 5.705280780792236, "learning_rate": 7.674832656648483e-06, "loss": 0.2639, "step": 7053 }, { "epoch": 0.3271799628942486, "grad_norm": 5.000393867492676, "learning_rate": 7.67421084928455e-06, "loss": 0.3173, "step": 7054 }, { "epoch": 0.32722634508348797, "grad_norm": 6.214705944061279, "learning_rate": 7.673588983985247e-06, "loss": 0.3595, "step": 7055 }, { "epoch": 0.32727272727272727, "grad_norm": 5.932878494262695, "learning_rate": 7.672967060764049e-06, "loss": 0.3457, "step": 7056 }, { "epoch": 0.32731910946196663, "grad_norm": 7.202866077423096, "learning_rate": 7.672345079634432e-06, "loss": 0.2735, "step": 7057 }, { "epoch": 0.32736549165120593, "grad_norm": 12.074288368225098, "learning_rate": 7.671723040609866e-06, "loss": 0.605, "step": 7058 }, { "epoch": 0.3274118738404453, "grad_norm": 5.785820960998535, "learning_rate": 7.67110094370383e-06, "loss": 0.3673, "step": 7059 }, { "epoch": 0.3274582560296846, "grad_norm": 6.751826763153076, "learning_rate": 7.670478788929803e-06, "loss": 0.3231, "step": 7060 }, { "epoch": 0.32750463821892395, "grad_norm": 5.991393089294434, "learning_rate": 7.669856576301258e-06, "loss": 0.4872, "step": 7061 }, { "epoch": 0.32755102040816325, "grad_norm": 7.645017623901367, "learning_rate": 7.669234305831682e-06, "loss": 0.3534, "step": 7062 }, { "epoch": 0.3275974025974026, "grad_norm": 7.1933512687683105, "learning_rate": 7.668611977534551e-06, "loss": 0.3533, "step": 7063 }, { "epoch": 0.3276437847866419, "grad_norm": 6.0831403732299805, "learning_rate": 7.667989591423349e-06, "loss": 0.3677, "step": 7064 }, { "epoch": 0.32769016697588127, "grad_norm": 4.278409481048584, "learning_rate": 7.667367147511557e-06, "loss": 0.2193, "step": 7065 }, { "epoch": 0.32773654916512057, "grad_norm": 6.1145243644714355, "learning_rate": 7.666744645812667e-06, "loss": 0.2992, "step": 7066 }, { "epoch": 0.32778293135435993, "grad_norm": 12.183521270751953, "learning_rate": 7.666122086340158e-06, "loss": 0.4413, "step": 7067 }, { "epoch": 0.32782931354359923, "grad_norm": 8.253209114074707, "learning_rate": 7.665499469107523e-06, "loss": 0.3965, "step": 7068 }, { "epoch": 0.3278756957328386, "grad_norm": 4.937024116516113, "learning_rate": 7.664876794128245e-06, "loss": 0.2788, "step": 7069 }, { "epoch": 0.32792207792207795, "grad_norm": 7.2994842529296875, "learning_rate": 7.664254061415818e-06, "loss": 0.3433, "step": 7070 }, { "epoch": 0.32796846011131725, "grad_norm": 4.786010265350342, "learning_rate": 7.663631270983733e-06, "loss": 0.3116, "step": 7071 }, { "epoch": 0.3280148423005566, "grad_norm": 8.259325981140137, "learning_rate": 7.663008422845479e-06, "loss": 0.4468, "step": 7072 }, { "epoch": 0.3280612244897959, "grad_norm": 7.904426574707031, "learning_rate": 7.662385517014554e-06, "loss": 0.419, "step": 7073 }, { "epoch": 0.32810760667903527, "grad_norm": 5.187025547027588, "learning_rate": 7.661762553504451e-06, "loss": 0.3195, "step": 7074 }, { "epoch": 0.32815398886827457, "grad_norm": 6.058932304382324, "learning_rate": 7.661139532328666e-06, "loss": 0.3285, "step": 7075 }, { "epoch": 0.32820037105751393, "grad_norm": 6.991052150726318, "learning_rate": 7.660516453500697e-06, "loss": 0.358, "step": 7076 }, { "epoch": 0.32824675324675323, "grad_norm": 6.978612422943115, "learning_rate": 7.659893317034041e-06, "loss": 0.3032, "step": 7077 }, { "epoch": 0.3282931354359926, "grad_norm": 10.82962417602539, "learning_rate": 7.6592701229422e-06, "loss": 0.4067, "step": 7078 }, { "epoch": 0.3283395176252319, "grad_norm": 5.525129795074463, "learning_rate": 7.658646871238675e-06, "loss": 0.3155, "step": 7079 }, { "epoch": 0.32838589981447125, "grad_norm": 22.528202056884766, "learning_rate": 7.658023561936966e-06, "loss": 0.467, "step": 7080 }, { "epoch": 0.32843228200371055, "grad_norm": 5.366362571716309, "learning_rate": 7.657400195050582e-06, "loss": 0.356, "step": 7081 }, { "epoch": 0.3284786641929499, "grad_norm": 9.192151069641113, "learning_rate": 7.656776770593022e-06, "loss": 0.4041, "step": 7082 }, { "epoch": 0.3285250463821892, "grad_norm": 15.133862495422363, "learning_rate": 7.656153288577794e-06, "loss": 0.3833, "step": 7083 }, { "epoch": 0.32857142857142857, "grad_norm": 13.269896507263184, "learning_rate": 7.655529749018407e-06, "loss": 0.4798, "step": 7084 }, { "epoch": 0.3286178107606679, "grad_norm": 5.841202735900879, "learning_rate": 7.654906151928368e-06, "loss": 0.3669, "step": 7085 }, { "epoch": 0.32866419294990723, "grad_norm": 7.636292934417725, "learning_rate": 7.654282497321188e-06, "loss": 0.3373, "step": 7086 }, { "epoch": 0.3287105751391466, "grad_norm": 5.366025924682617, "learning_rate": 7.653658785210378e-06, "loss": 0.3747, "step": 7087 }, { "epoch": 0.3287569573283859, "grad_norm": 7.403258323669434, "learning_rate": 7.653035015609449e-06, "loss": 0.3261, "step": 7088 }, { "epoch": 0.32880333951762525, "grad_norm": 6.756089687347412, "learning_rate": 7.652411188531916e-06, "loss": 0.3478, "step": 7089 }, { "epoch": 0.32884972170686455, "grad_norm": 9.204288482666016, "learning_rate": 7.651787303991297e-06, "loss": 0.335, "step": 7090 }, { "epoch": 0.3288961038961039, "grad_norm": 4.648422718048096, "learning_rate": 7.6511633620011e-06, "loss": 0.2674, "step": 7091 }, { "epoch": 0.3289424860853432, "grad_norm": 8.53663158416748, "learning_rate": 7.650539362574848e-06, "loss": 0.4551, "step": 7092 }, { "epoch": 0.32898886827458257, "grad_norm": 10.158501625061035, "learning_rate": 7.649915305726062e-06, "loss": 0.4103, "step": 7093 }, { "epoch": 0.32903525046382187, "grad_norm": 9.12099838256836, "learning_rate": 7.649291191468254e-06, "loss": 0.2855, "step": 7094 }, { "epoch": 0.32908163265306123, "grad_norm": 8.49252700805664, "learning_rate": 7.648667019814953e-06, "loss": 0.3684, "step": 7095 }, { "epoch": 0.32912801484230053, "grad_norm": 7.986525058746338, "learning_rate": 7.648042790779677e-06, "loss": 0.2905, "step": 7096 }, { "epoch": 0.3291743970315399, "grad_norm": 5.12455415725708, "learning_rate": 7.64741850437595e-06, "loss": 0.2664, "step": 7097 }, { "epoch": 0.32922077922077925, "grad_norm": 6.189352989196777, "learning_rate": 7.646794160617297e-06, "loss": 0.3426, "step": 7098 }, { "epoch": 0.32926716141001855, "grad_norm": 11.727717399597168, "learning_rate": 7.646169759517246e-06, "loss": 0.4133, "step": 7099 }, { "epoch": 0.3293135435992579, "grad_norm": 7.337381839752197, "learning_rate": 7.645545301089323e-06, "loss": 0.3423, "step": 7100 }, { "epoch": 0.3293599257884972, "grad_norm": 4.878810882568359, "learning_rate": 7.644920785347055e-06, "loss": 0.3257, "step": 7101 }, { "epoch": 0.32940630797773657, "grad_norm": 12.064544677734375, "learning_rate": 7.644296212303975e-06, "loss": 0.3969, "step": 7102 }, { "epoch": 0.32945269016697587, "grad_norm": 20.673242568969727, "learning_rate": 7.64367158197361e-06, "loss": 0.334, "step": 7103 }, { "epoch": 0.32949907235621523, "grad_norm": 5.244019031524658, "learning_rate": 7.643046894369497e-06, "loss": 0.3342, "step": 7104 }, { "epoch": 0.32954545454545453, "grad_norm": 5.748039722442627, "learning_rate": 7.642422149505166e-06, "loss": 0.3362, "step": 7105 }, { "epoch": 0.3295918367346939, "grad_norm": 4.449379920959473, "learning_rate": 7.641797347394153e-06, "loss": 0.2933, "step": 7106 }, { "epoch": 0.3296382189239332, "grad_norm": 7.310128211975098, "learning_rate": 7.641172488049994e-06, "loss": 0.3791, "step": 7107 }, { "epoch": 0.32968460111317255, "grad_norm": 8.859278678894043, "learning_rate": 7.640547571486226e-06, "loss": 0.4124, "step": 7108 }, { "epoch": 0.32973098330241185, "grad_norm": 11.876818656921387, "learning_rate": 7.639922597716387e-06, "loss": 0.5711, "step": 7109 }, { "epoch": 0.3297773654916512, "grad_norm": 3.7490131855010986, "learning_rate": 7.639297566754017e-06, "loss": 0.2759, "step": 7110 }, { "epoch": 0.3298237476808905, "grad_norm": 5.803919315338135, "learning_rate": 7.638672478612659e-06, "loss": 0.3723, "step": 7111 }, { "epoch": 0.32987012987012987, "grad_norm": 10.300260543823242, "learning_rate": 7.638047333305853e-06, "loss": 0.3668, "step": 7112 }, { "epoch": 0.3299165120593692, "grad_norm": 18.178749084472656, "learning_rate": 7.637422130847143e-06, "loss": 0.5514, "step": 7113 }, { "epoch": 0.32996289424860853, "grad_norm": 10.528131484985352, "learning_rate": 7.636796871250072e-06, "loss": 0.4505, "step": 7114 }, { "epoch": 0.3300092764378479, "grad_norm": 10.742843627929688, "learning_rate": 7.636171554528188e-06, "loss": 0.4008, "step": 7115 }, { "epoch": 0.3300556586270872, "grad_norm": 9.979267120361328, "learning_rate": 7.635546180695039e-06, "loss": 0.4366, "step": 7116 }, { "epoch": 0.33010204081632655, "grad_norm": 8.69389820098877, "learning_rate": 7.634920749764171e-06, "loss": 0.4313, "step": 7117 }, { "epoch": 0.33014842300556585, "grad_norm": 5.287749290466309, "learning_rate": 7.634295261749136e-06, "loss": 0.3852, "step": 7118 }, { "epoch": 0.3301948051948052, "grad_norm": 8.396710395812988, "learning_rate": 7.633669716663484e-06, "loss": 0.3181, "step": 7119 }, { "epoch": 0.3302411873840445, "grad_norm": 5.002260684967041, "learning_rate": 7.633044114520765e-06, "loss": 0.3384, "step": 7120 }, { "epoch": 0.33028756957328387, "grad_norm": 23.641372680664062, "learning_rate": 7.632418455334535e-06, "loss": 0.45, "step": 7121 }, { "epoch": 0.33033395176252317, "grad_norm": 6.592515468597412, "learning_rate": 7.631792739118346e-06, "loss": 0.4513, "step": 7122 }, { "epoch": 0.33038033395176253, "grad_norm": 4.420170783996582, "learning_rate": 7.631166965885759e-06, "loss": 0.2863, "step": 7123 }, { "epoch": 0.33042671614100183, "grad_norm": 5.402523994445801, "learning_rate": 7.630541135650324e-06, "loss": 0.4062, "step": 7124 }, { "epoch": 0.3304730983302412, "grad_norm": 8.441926956176758, "learning_rate": 7.629915248425603e-06, "loss": 0.4406, "step": 7125 }, { "epoch": 0.33051948051948055, "grad_norm": 6.351968288421631, "learning_rate": 7.629289304225157e-06, "loss": 0.3024, "step": 7126 }, { "epoch": 0.33056586270871985, "grad_norm": 9.354155540466309, "learning_rate": 7.628663303062545e-06, "loss": 0.4021, "step": 7127 }, { "epoch": 0.3306122448979592, "grad_norm": 5.872066497802734, "learning_rate": 7.628037244951328e-06, "loss": 0.4506, "step": 7128 }, { "epoch": 0.3306586270871985, "grad_norm": 4.378067970275879, "learning_rate": 7.62741112990507e-06, "loss": 0.3684, "step": 7129 }, { "epoch": 0.33070500927643787, "grad_norm": 14.572993278503418, "learning_rate": 7.626784957937337e-06, "loss": 0.3882, "step": 7130 }, { "epoch": 0.33075139146567717, "grad_norm": 10.649352073669434, "learning_rate": 7.626158729061692e-06, "loss": 0.3472, "step": 7131 }, { "epoch": 0.3307977736549165, "grad_norm": 8.623248100280762, "learning_rate": 7.625532443291703e-06, "loss": 0.4096, "step": 7132 }, { "epoch": 0.33084415584415583, "grad_norm": 3.6946310997009277, "learning_rate": 7.62490610064094e-06, "loss": 0.25, "step": 7133 }, { "epoch": 0.3308905380333952, "grad_norm": 3.885911226272583, "learning_rate": 7.62427970112297e-06, "loss": 0.2751, "step": 7134 }, { "epoch": 0.3309369202226345, "grad_norm": 5.876440525054932, "learning_rate": 7.6236532447513646e-06, "loss": 0.3663, "step": 7135 }, { "epoch": 0.33098330241187385, "grad_norm": 13.04700756072998, "learning_rate": 7.623026731539696e-06, "loss": 0.3611, "step": 7136 }, { "epoch": 0.33102968460111315, "grad_norm": 13.788005828857422, "learning_rate": 7.622400161501535e-06, "loss": 0.4367, "step": 7137 }, { "epoch": 0.3310760667903525, "grad_norm": 4.661031246185303, "learning_rate": 7.621773534650458e-06, "loss": 0.2978, "step": 7138 }, { "epoch": 0.3311224489795918, "grad_norm": 6.5134596824646, "learning_rate": 7.621146851000043e-06, "loss": 0.3567, "step": 7139 }, { "epoch": 0.33116883116883117, "grad_norm": 8.252421379089355, "learning_rate": 7.620520110563862e-06, "loss": 0.3529, "step": 7140 }, { "epoch": 0.3312152133580705, "grad_norm": 14.850028991699219, "learning_rate": 7.619893313355494e-06, "loss": 0.5666, "step": 7141 }, { "epoch": 0.33126159554730983, "grad_norm": 9.232784271240234, "learning_rate": 7.619266459388521e-06, "loss": 0.5168, "step": 7142 }, { "epoch": 0.3313079777365492, "grad_norm": 5.694993019104004, "learning_rate": 7.618639548676521e-06, "loss": 0.4098, "step": 7143 }, { "epoch": 0.3313543599257885, "grad_norm": 6.947449207305908, "learning_rate": 7.618012581233076e-06, "loss": 0.3157, "step": 7144 }, { "epoch": 0.33140074211502785, "grad_norm": 9.444722175598145, "learning_rate": 7.61738555707177e-06, "loss": 0.293, "step": 7145 }, { "epoch": 0.33144712430426715, "grad_norm": 13.019283294677734, "learning_rate": 7.616758476206185e-06, "loss": 0.4777, "step": 7146 }, { "epoch": 0.3314935064935065, "grad_norm": 6.531293869018555, "learning_rate": 7.616131338649908e-06, "loss": 0.3439, "step": 7147 }, { "epoch": 0.3315398886827458, "grad_norm": 5.159171104431152, "learning_rate": 7.6155041444165276e-06, "loss": 0.3001, "step": 7148 }, { "epoch": 0.33158627087198517, "grad_norm": 11.013694763183594, "learning_rate": 7.614876893519627e-06, "loss": 0.428, "step": 7149 }, { "epoch": 0.33163265306122447, "grad_norm": 6.295390605926514, "learning_rate": 7.614249585972799e-06, "loss": 0.3807, "step": 7150 }, { "epoch": 0.3316790352504638, "grad_norm": 7.660320281982422, "learning_rate": 7.613622221789634e-06, "loss": 0.4418, "step": 7151 }, { "epoch": 0.33172541743970313, "grad_norm": 8.572222709655762, "learning_rate": 7.61299480098372e-06, "loss": 0.2241, "step": 7152 }, { "epoch": 0.3317717996289425, "grad_norm": 10.831374168395996, "learning_rate": 7.6123673235686525e-06, "loss": 0.2369, "step": 7153 }, { "epoch": 0.33181818181818185, "grad_norm": 9.383490562438965, "learning_rate": 7.611739789558023e-06, "loss": 0.3578, "step": 7154 }, { "epoch": 0.33186456400742115, "grad_norm": 7.714204788208008, "learning_rate": 7.6111121989654315e-06, "loss": 0.3206, "step": 7155 }, { "epoch": 0.3319109461966605, "grad_norm": 12.923565864562988, "learning_rate": 7.61048455180447e-06, "loss": 0.4724, "step": 7156 }, { "epoch": 0.3319573283858998, "grad_norm": 9.051921844482422, "learning_rate": 7.609856848088737e-06, "loss": 0.3862, "step": 7157 }, { "epoch": 0.33200371057513917, "grad_norm": 6.034181594848633, "learning_rate": 7.609229087831833e-06, "loss": 0.3503, "step": 7158 }, { "epoch": 0.33205009276437847, "grad_norm": 8.342514991760254, "learning_rate": 7.608601271047356e-06, "loss": 0.3107, "step": 7159 }, { "epoch": 0.3320964749536178, "grad_norm": 7.244695663452148, "learning_rate": 7.607973397748909e-06, "loss": 0.4333, "step": 7160 }, { "epoch": 0.33214285714285713, "grad_norm": 8.835646629333496, "learning_rate": 7.607345467950095e-06, "loss": 0.3678, "step": 7161 }, { "epoch": 0.3321892393320965, "grad_norm": 8.304503440856934, "learning_rate": 7.606717481664515e-06, "loss": 0.4252, "step": 7162 }, { "epoch": 0.3322356215213358, "grad_norm": 9.144683837890625, "learning_rate": 7.606089438905776e-06, "loss": 0.2755, "step": 7163 }, { "epoch": 0.33228200371057515, "grad_norm": 8.148427963256836, "learning_rate": 7.605461339687486e-06, "loss": 0.3354, "step": 7164 }, { "epoch": 0.33232838589981445, "grad_norm": 10.131240844726562, "learning_rate": 7.604833184023247e-06, "loss": 0.336, "step": 7165 }, { "epoch": 0.3323747680890538, "grad_norm": 4.509088516235352, "learning_rate": 7.604204971926672e-06, "loss": 0.348, "step": 7166 }, { "epoch": 0.3324211502782931, "grad_norm": 5.96932315826416, "learning_rate": 7.60357670341137e-06, "loss": 0.4525, "step": 7167 }, { "epoch": 0.33246753246753247, "grad_norm": 4.9917402267456055, "learning_rate": 7.602948378490953e-06, "loss": 0.366, "step": 7168 }, { "epoch": 0.3325139146567718, "grad_norm": 3.9121034145355225, "learning_rate": 7.602319997179032e-06, "loss": 0.264, "step": 7169 }, { "epoch": 0.3325602968460111, "grad_norm": 5.795363903045654, "learning_rate": 7.601691559489219e-06, "loss": 0.3826, "step": 7170 }, { "epoch": 0.3326066790352505, "grad_norm": 5.812994480133057, "learning_rate": 7.601063065435133e-06, "loss": 0.4171, "step": 7171 }, { "epoch": 0.3326530612244898, "grad_norm": 5.613929748535156, "learning_rate": 7.600434515030386e-06, "loss": 0.3827, "step": 7172 }, { "epoch": 0.33269944341372915, "grad_norm": 11.90946102142334, "learning_rate": 7.599805908288598e-06, "loss": 0.3562, "step": 7173 }, { "epoch": 0.33274582560296845, "grad_norm": 5.074860572814941, "learning_rate": 7.599177245223385e-06, "loss": 0.4159, "step": 7174 }, { "epoch": 0.3327922077922078, "grad_norm": 4.966644763946533, "learning_rate": 7.598548525848369e-06, "loss": 0.3046, "step": 7175 }, { "epoch": 0.3328385899814471, "grad_norm": 3.6507833003997803, "learning_rate": 7.597919750177168e-06, "loss": 0.3149, "step": 7176 }, { "epoch": 0.33288497217068647, "grad_norm": 3.6857876777648926, "learning_rate": 7.597290918223408e-06, "loss": 0.3145, "step": 7177 }, { "epoch": 0.33293135435992577, "grad_norm": 12.588336944580078, "learning_rate": 7.59666203000071e-06, "loss": 0.3171, "step": 7178 }, { "epoch": 0.3329777365491651, "grad_norm": 4.622824192047119, "learning_rate": 7.5960330855226975e-06, "loss": 0.3424, "step": 7179 }, { "epoch": 0.33302411873840443, "grad_norm": 5.476066589355469, "learning_rate": 7.5954040848029975e-06, "loss": 0.341, "step": 7180 }, { "epoch": 0.3330705009276438, "grad_norm": 6.5852274894714355, "learning_rate": 7.594775027855238e-06, "loss": 0.3694, "step": 7181 }, { "epoch": 0.33311688311688314, "grad_norm": 10.16937255859375, "learning_rate": 7.594145914693045e-06, "loss": 0.4115, "step": 7182 }, { "epoch": 0.33316326530612245, "grad_norm": 6.912095069885254, "learning_rate": 7.593516745330051e-06, "loss": 0.3868, "step": 7183 }, { "epoch": 0.3332096474953618, "grad_norm": 6.083138465881348, "learning_rate": 7.592887519779883e-06, "loss": 0.2714, "step": 7184 }, { "epoch": 0.3332560296846011, "grad_norm": 10.994484901428223, "learning_rate": 7.592258238056174e-06, "loss": 0.5158, "step": 7185 }, { "epoch": 0.33330241187384047, "grad_norm": 5.490262031555176, "learning_rate": 7.591628900172558e-06, "loss": 0.2373, "step": 7186 }, { "epoch": 0.33334879406307977, "grad_norm": 8.174798965454102, "learning_rate": 7.590999506142669e-06, "loss": 0.3309, "step": 7187 }, { "epoch": 0.3333951762523191, "grad_norm": 6.064419269561768, "learning_rate": 7.590370055980143e-06, "loss": 0.4392, "step": 7188 }, { "epoch": 0.3334415584415584, "grad_norm": 9.033794403076172, "learning_rate": 7.589740549698616e-06, "loss": 0.2839, "step": 7189 }, { "epoch": 0.3334879406307978, "grad_norm": 6.996395587921143, "learning_rate": 7.589110987311725e-06, "loss": 0.4718, "step": 7190 }, { "epoch": 0.3335343228200371, "grad_norm": 6.402681350708008, "learning_rate": 7.588481368833111e-06, "loss": 0.3045, "step": 7191 }, { "epoch": 0.33358070500927645, "grad_norm": 7.277106761932373, "learning_rate": 7.587851694276412e-06, "loss": 0.4056, "step": 7192 }, { "epoch": 0.33362708719851575, "grad_norm": 4.259560585021973, "learning_rate": 7.587221963655272e-06, "loss": 0.3221, "step": 7193 }, { "epoch": 0.3336734693877551, "grad_norm": 5.286308288574219, "learning_rate": 7.586592176983333e-06, "loss": 0.2665, "step": 7194 }, { "epoch": 0.3337198515769944, "grad_norm": 7.3858723640441895, "learning_rate": 7.585962334274239e-06, "loss": 0.2971, "step": 7195 }, { "epoch": 0.33376623376623377, "grad_norm": 6.285533428192139, "learning_rate": 7.585332435541633e-06, "loss": 0.4379, "step": 7196 }, { "epoch": 0.3338126159554731, "grad_norm": 7.474552631378174, "learning_rate": 7.5847024807991655e-06, "loss": 0.3379, "step": 7197 }, { "epoch": 0.3338589981447124, "grad_norm": 10.524065017700195, "learning_rate": 7.584072470060482e-06, "loss": 0.3569, "step": 7198 }, { "epoch": 0.3339053803339518, "grad_norm": 8.166808128356934, "learning_rate": 7.583442403339231e-06, "loss": 0.3019, "step": 7199 }, { "epoch": 0.3339517625231911, "grad_norm": 14.386744499206543, "learning_rate": 7.582812280649061e-06, "loss": 0.35, "step": 7200 }, { "epoch": 0.33399814471243044, "grad_norm": 4.964208602905273, "learning_rate": 7.5821821020036275e-06, "loss": 0.277, "step": 7201 }, { "epoch": 0.33404452690166975, "grad_norm": 6.985608100891113, "learning_rate": 7.581551867416581e-06, "loss": 0.3874, "step": 7202 }, { "epoch": 0.3340909090909091, "grad_norm": 3.741079568862915, "learning_rate": 7.5809215769015734e-06, "loss": 0.3231, "step": 7203 }, { "epoch": 0.3341372912801484, "grad_norm": 9.503325462341309, "learning_rate": 7.5802912304722605e-06, "loss": 0.425, "step": 7204 }, { "epoch": 0.33418367346938777, "grad_norm": 7.1594696044921875, "learning_rate": 7.579660828142301e-06, "loss": 0.2907, "step": 7205 }, { "epoch": 0.33423005565862707, "grad_norm": 13.405790328979492, "learning_rate": 7.579030369925351e-06, "loss": 0.3982, "step": 7206 }, { "epoch": 0.3342764378478664, "grad_norm": 9.7553129196167, "learning_rate": 7.578399855835065e-06, "loss": 0.4318, "step": 7207 }, { "epoch": 0.33432282003710573, "grad_norm": 7.749016761779785, "learning_rate": 7.57776928588511e-06, "loss": 0.2831, "step": 7208 }, { "epoch": 0.3343692022263451, "grad_norm": 4.365159034729004, "learning_rate": 7.577138660089141e-06, "loss": 0.2924, "step": 7209 }, { "epoch": 0.3344155844155844, "grad_norm": 18.70303726196289, "learning_rate": 7.576507978460822e-06, "loss": 0.4403, "step": 7210 }, { "epoch": 0.33446196660482375, "grad_norm": 10.198567390441895, "learning_rate": 7.575877241013817e-06, "loss": 0.4324, "step": 7211 }, { "epoch": 0.3345083487940631, "grad_norm": 13.087430000305176, "learning_rate": 7.5752464477617905e-06, "loss": 0.3727, "step": 7212 }, { "epoch": 0.3345547309833024, "grad_norm": 12.563342094421387, "learning_rate": 7.574615598718407e-06, "loss": 0.3942, "step": 7213 }, { "epoch": 0.33460111317254176, "grad_norm": 8.752013206481934, "learning_rate": 7.573984693897336e-06, "loss": 0.353, "step": 7214 }, { "epoch": 0.33464749536178107, "grad_norm": 3.835707902908325, "learning_rate": 7.573353733312243e-06, "loss": 0.2995, "step": 7215 }, { "epoch": 0.3346938775510204, "grad_norm": 9.126351356506348, "learning_rate": 7.572722716976799e-06, "loss": 0.4575, "step": 7216 }, { "epoch": 0.3347402597402597, "grad_norm": 5.472165584564209, "learning_rate": 7.572091644904674e-06, "loss": 0.3193, "step": 7217 }, { "epoch": 0.3347866419294991, "grad_norm": 5.45149040222168, "learning_rate": 7.57146051710954e-06, "loss": 0.311, "step": 7218 }, { "epoch": 0.3348330241187384, "grad_norm": 4.248791217803955, "learning_rate": 7.570829333605072e-06, "loss": 0.3074, "step": 7219 }, { "epoch": 0.33487940630797774, "grad_norm": 13.154141426086426, "learning_rate": 7.570198094404941e-06, "loss": 0.3998, "step": 7220 }, { "epoch": 0.33492578849721705, "grad_norm": 5.184728145599365, "learning_rate": 7.569566799522825e-06, "loss": 0.3938, "step": 7221 }, { "epoch": 0.3349721706864564, "grad_norm": 4.274781703948975, "learning_rate": 7.5689354489723975e-06, "loss": 0.255, "step": 7222 }, { "epoch": 0.3350185528756957, "grad_norm": 6.887264728546143, "learning_rate": 7.568304042767341e-06, "loss": 0.3887, "step": 7223 }, { "epoch": 0.33506493506493507, "grad_norm": 4.1159491539001465, "learning_rate": 7.56767258092133e-06, "loss": 0.2785, "step": 7224 }, { "epoch": 0.3351113172541744, "grad_norm": 11.803261756896973, "learning_rate": 7.5670410634480475e-06, "loss": 0.4933, "step": 7225 }, { "epoch": 0.3351576994434137, "grad_norm": 6.674896240234375, "learning_rate": 7.566409490361174e-06, "loss": 0.3539, "step": 7226 }, { "epoch": 0.3352040816326531, "grad_norm": 5.633953094482422, "learning_rate": 7.565777861674393e-06, "loss": 0.3054, "step": 7227 }, { "epoch": 0.3352504638218924, "grad_norm": 6.598179340362549, "learning_rate": 7.565146177401388e-06, "loss": 0.3507, "step": 7228 }, { "epoch": 0.33529684601113174, "grad_norm": 5.590695381164551, "learning_rate": 7.564514437555843e-06, "loss": 0.3314, "step": 7229 }, { "epoch": 0.33534322820037105, "grad_norm": 8.039749145507812, "learning_rate": 7.563882642151448e-06, "loss": 0.3125, "step": 7230 }, { "epoch": 0.3353896103896104, "grad_norm": 8.184896469116211, "learning_rate": 7.563250791201886e-06, "loss": 0.3609, "step": 7231 }, { "epoch": 0.3354359925788497, "grad_norm": 15.142226219177246, "learning_rate": 7.562618884720847e-06, "loss": 0.5017, "step": 7232 }, { "epoch": 0.33548237476808906, "grad_norm": 6.694607734680176, "learning_rate": 7.561986922722022e-06, "loss": 0.3002, "step": 7233 }, { "epoch": 0.33552875695732837, "grad_norm": 13.033174514770508, "learning_rate": 7.561354905219102e-06, "loss": 0.3849, "step": 7234 }, { "epoch": 0.3355751391465677, "grad_norm": 18.457677841186523, "learning_rate": 7.56072283222578e-06, "loss": 0.3699, "step": 7235 }, { "epoch": 0.335621521335807, "grad_norm": 6.869650363922119, "learning_rate": 7.560090703755747e-06, "loss": 0.3323, "step": 7236 }, { "epoch": 0.3356679035250464, "grad_norm": 7.29749870300293, "learning_rate": 7.559458519822698e-06, "loss": 0.3038, "step": 7237 }, { "epoch": 0.3357142857142857, "grad_norm": 6.906503200531006, "learning_rate": 7.558826280440333e-06, "loss": 0.227, "step": 7238 }, { "epoch": 0.33576066790352505, "grad_norm": 6.189939498901367, "learning_rate": 7.558193985622344e-06, "loss": 0.2672, "step": 7239 }, { "epoch": 0.3358070500927644, "grad_norm": 9.667279243469238, "learning_rate": 7.557561635382433e-06, "loss": 0.3158, "step": 7240 }, { "epoch": 0.3358534322820037, "grad_norm": 5.128068923950195, "learning_rate": 7.556929229734298e-06, "loss": 0.3282, "step": 7241 }, { "epoch": 0.33589981447124306, "grad_norm": 4.710686683654785, "learning_rate": 7.556296768691639e-06, "loss": 0.2947, "step": 7242 }, { "epoch": 0.33594619666048237, "grad_norm": 10.189485549926758, "learning_rate": 7.55566425226816e-06, "loss": 0.5555, "step": 7243 }, { "epoch": 0.3359925788497217, "grad_norm": 8.902703285217285, "learning_rate": 7.5550316804775625e-06, "loss": 0.3757, "step": 7244 }, { "epoch": 0.336038961038961, "grad_norm": 6.4185638427734375, "learning_rate": 7.55439905333355e-06, "loss": 0.3434, "step": 7245 }, { "epoch": 0.3360853432282004, "grad_norm": 9.295994758605957, "learning_rate": 7.553766370849831e-06, "loss": 0.3891, "step": 7246 }, { "epoch": 0.3361317254174397, "grad_norm": 8.012937545776367, "learning_rate": 7.5531336330401105e-06, "loss": 0.3408, "step": 7247 }, { "epoch": 0.33617810760667904, "grad_norm": 13.485432624816895, "learning_rate": 7.552500839918095e-06, "loss": 0.387, "step": 7248 }, { "epoch": 0.33622448979591835, "grad_norm": 10.74498462677002, "learning_rate": 7.5518679914974964e-06, "loss": 0.3532, "step": 7249 }, { "epoch": 0.3362708719851577, "grad_norm": 5.3408589363098145, "learning_rate": 7.551235087792024e-06, "loss": 0.3949, "step": 7250 }, { "epoch": 0.336317254174397, "grad_norm": 7.665788650512695, "learning_rate": 7.550602128815387e-06, "loss": 0.2783, "step": 7251 }, { "epoch": 0.33636363636363636, "grad_norm": 8.55403995513916, "learning_rate": 7.549969114581303e-06, "loss": 0.4886, "step": 7252 }, { "epoch": 0.3364100185528757, "grad_norm": 19.686187744140625, "learning_rate": 7.549336045103481e-06, "loss": 0.4576, "step": 7253 }, { "epoch": 0.336456400742115, "grad_norm": 8.859379768371582, "learning_rate": 7.548702920395639e-06, "loss": 0.5109, "step": 7254 }, { "epoch": 0.3365027829313544, "grad_norm": 5.734750270843506, "learning_rate": 7.548069740471493e-06, "loss": 0.298, "step": 7255 }, { "epoch": 0.3365491651205937, "grad_norm": 8.553659439086914, "learning_rate": 7.54743650534476e-06, "loss": 0.4621, "step": 7256 }, { "epoch": 0.33659554730983304, "grad_norm": 11.162702560424805, "learning_rate": 7.546803215029157e-06, "loss": 0.3339, "step": 7257 }, { "epoch": 0.33664192949907235, "grad_norm": 4.054076671600342, "learning_rate": 7.546169869538408e-06, "loss": 0.3012, "step": 7258 }, { "epoch": 0.3366883116883117, "grad_norm": 15.895977020263672, "learning_rate": 7.54553646888623e-06, "loss": 0.3809, "step": 7259 }, { "epoch": 0.336734693877551, "grad_norm": 4.90053129196167, "learning_rate": 7.544903013086348e-06, "loss": 0.3857, "step": 7260 }, { "epoch": 0.33678107606679036, "grad_norm": 13.632959365844727, "learning_rate": 7.544269502152483e-06, "loss": 0.5501, "step": 7261 }, { "epoch": 0.33682745825602967, "grad_norm": 8.31934642791748, "learning_rate": 7.543635936098363e-06, "loss": 0.3896, "step": 7262 }, { "epoch": 0.336873840445269, "grad_norm": 9.164046287536621, "learning_rate": 7.543002314937713e-06, "loss": 0.4241, "step": 7263 }, { "epoch": 0.3369202226345083, "grad_norm": 5.512680530548096, "learning_rate": 7.542368638684257e-06, "loss": 0.3738, "step": 7264 }, { "epoch": 0.3369666048237477, "grad_norm": 7.59041690826416, "learning_rate": 7.541734907351725e-06, "loss": 0.3061, "step": 7265 }, { "epoch": 0.337012987012987, "grad_norm": 9.859586715698242, "learning_rate": 7.541101120953848e-06, "loss": 0.3917, "step": 7266 }, { "epoch": 0.33705936920222634, "grad_norm": 10.600213050842285, "learning_rate": 7.540467279504355e-06, "loss": 0.4407, "step": 7267 }, { "epoch": 0.3371057513914657, "grad_norm": 13.095834732055664, "learning_rate": 7.53983338301698e-06, "loss": 0.3585, "step": 7268 }, { "epoch": 0.337152133580705, "grad_norm": 7.171743869781494, "learning_rate": 7.539199431505453e-06, "loss": 0.3692, "step": 7269 }, { "epoch": 0.33719851576994436, "grad_norm": 4.943210124969482, "learning_rate": 7.5385654249835095e-06, "loss": 0.3607, "step": 7270 }, { "epoch": 0.33724489795918366, "grad_norm": 11.177330017089844, "learning_rate": 7.537931363464885e-06, "loss": 0.3394, "step": 7271 }, { "epoch": 0.337291280148423, "grad_norm": 5.606936454772949, "learning_rate": 7.537297246963316e-06, "loss": 0.3824, "step": 7272 }, { "epoch": 0.3373376623376623, "grad_norm": 4.508090496063232, "learning_rate": 7.536663075492542e-06, "loss": 0.2657, "step": 7273 }, { "epoch": 0.3373840445269017, "grad_norm": 6.401499271392822, "learning_rate": 7.536028849066299e-06, "loss": 0.2883, "step": 7274 }, { "epoch": 0.337430426716141, "grad_norm": 7.930229663848877, "learning_rate": 7.535394567698329e-06, "loss": 0.3668, "step": 7275 }, { "epoch": 0.33747680890538034, "grad_norm": 6.478511333465576, "learning_rate": 7.5347602314023725e-06, "loss": 0.3627, "step": 7276 }, { "epoch": 0.33752319109461965, "grad_norm": 6.9961771965026855, "learning_rate": 7.5341258401921724e-06, "loss": 0.4025, "step": 7277 }, { "epoch": 0.337569573283859, "grad_norm": 9.223443984985352, "learning_rate": 7.5334913940814726e-06, "loss": 0.3862, "step": 7278 }, { "epoch": 0.3376159554730983, "grad_norm": 8.827678680419922, "learning_rate": 7.5328568930840194e-06, "loss": 0.3733, "step": 7279 }, { "epoch": 0.33766233766233766, "grad_norm": 4.767545700073242, "learning_rate": 7.532222337213556e-06, "loss": 0.3523, "step": 7280 }, { "epoch": 0.337708719851577, "grad_norm": 9.455619812011719, "learning_rate": 7.531587726483831e-06, "loss": 0.4032, "step": 7281 }, { "epoch": 0.3377551020408163, "grad_norm": 8.336155891418457, "learning_rate": 7.530953060908594e-06, "loss": 0.3947, "step": 7282 }, { "epoch": 0.3378014842300557, "grad_norm": 4.152040481567383, "learning_rate": 7.530318340501595e-06, "loss": 0.2314, "step": 7283 }, { "epoch": 0.337847866419295, "grad_norm": 6.6933979988098145, "learning_rate": 7.529683565276582e-06, "loss": 0.4194, "step": 7284 }, { "epoch": 0.33789424860853434, "grad_norm": 5.777859210968018, "learning_rate": 7.5290487352473104e-06, "loss": 0.3853, "step": 7285 }, { "epoch": 0.33794063079777364, "grad_norm": 4.967930316925049, "learning_rate": 7.5284138504275296e-06, "loss": 0.2927, "step": 7286 }, { "epoch": 0.337987012987013, "grad_norm": 4.130683898925781, "learning_rate": 7.527778910830999e-06, "loss": 0.3692, "step": 7287 }, { "epoch": 0.3380333951762523, "grad_norm": 3.9745614528656006, "learning_rate": 7.5271439164714695e-06, "loss": 0.2625, "step": 7288 }, { "epoch": 0.33807977736549166, "grad_norm": 6.6514129638671875, "learning_rate": 7.5265088673627e-06, "loss": 0.3587, "step": 7289 }, { "epoch": 0.33812615955473097, "grad_norm": 5.733495712280273, "learning_rate": 7.525873763518449e-06, "loss": 0.2249, "step": 7290 }, { "epoch": 0.3381725417439703, "grad_norm": 10.307832717895508, "learning_rate": 7.525238604952476e-06, "loss": 0.5223, "step": 7291 }, { "epoch": 0.3382189239332096, "grad_norm": 6.783415794372559, "learning_rate": 7.524603391678541e-06, "loss": 0.3352, "step": 7292 }, { "epoch": 0.338265306122449, "grad_norm": 4.5890350341796875, "learning_rate": 7.523968123710403e-06, "loss": 0.2538, "step": 7293 }, { "epoch": 0.3383116883116883, "grad_norm": 3.6295270919799805, "learning_rate": 7.523332801061829e-06, "loss": 0.2521, "step": 7294 }, { "epoch": 0.33835807050092764, "grad_norm": 7.06195592880249, "learning_rate": 7.522697423746579e-06, "loss": 0.3732, "step": 7295 }, { "epoch": 0.338404452690167, "grad_norm": 4.921474456787109, "learning_rate": 7.52206199177842e-06, "loss": 0.3356, "step": 7296 }, { "epoch": 0.3384508348794063, "grad_norm": 7.053983688354492, "learning_rate": 7.521426505171119e-06, "loss": 0.4294, "step": 7297 }, { "epoch": 0.33849721706864566, "grad_norm": 5.992380142211914, "learning_rate": 7.520790963938443e-06, "loss": 0.3288, "step": 7298 }, { "epoch": 0.33854359925788496, "grad_norm": 6.171337127685547, "learning_rate": 7.52015536809416e-06, "loss": 0.4059, "step": 7299 }, { "epoch": 0.3385899814471243, "grad_norm": 5.916565895080566, "learning_rate": 7.519519717652039e-06, "loss": 0.3808, "step": 7300 }, { "epoch": 0.3386363636363636, "grad_norm": 18.504791259765625, "learning_rate": 7.518884012625853e-06, "loss": 0.5881, "step": 7301 }, { "epoch": 0.338682745825603, "grad_norm": 4.542404651641846, "learning_rate": 7.518248253029374e-06, "loss": 0.4002, "step": 7302 }, { "epoch": 0.3387291280148423, "grad_norm": 9.380105018615723, "learning_rate": 7.517612438876376e-06, "loss": 0.3454, "step": 7303 }, { "epoch": 0.33877551020408164, "grad_norm": 11.378561973571777, "learning_rate": 7.5169765701806295e-06, "loss": 0.3879, "step": 7304 }, { "epoch": 0.33882189239332094, "grad_norm": 4.618590354919434, "learning_rate": 7.5163406469559154e-06, "loss": 0.2693, "step": 7305 }, { "epoch": 0.3388682745825603, "grad_norm": 6.309258460998535, "learning_rate": 7.515704669216008e-06, "loss": 0.3845, "step": 7306 }, { "epoch": 0.3389146567717996, "grad_norm": 7.544618606567383, "learning_rate": 7.515068636974685e-06, "loss": 0.3482, "step": 7307 }, { "epoch": 0.33896103896103896, "grad_norm": 6.094422817230225, "learning_rate": 7.514432550245726e-06, "loss": 0.2647, "step": 7308 }, { "epoch": 0.3390074211502783, "grad_norm": 5.476874351501465, "learning_rate": 7.513796409042914e-06, "loss": 0.4003, "step": 7309 }, { "epoch": 0.3390538033395176, "grad_norm": 7.535468101501465, "learning_rate": 7.513160213380026e-06, "loss": 0.451, "step": 7310 }, { "epoch": 0.339100185528757, "grad_norm": 4.430295944213867, "learning_rate": 7.51252396327085e-06, "loss": 0.3679, "step": 7311 }, { "epoch": 0.3391465677179963, "grad_norm": 5.511984825134277, "learning_rate": 7.5118876587291666e-06, "loss": 0.3476, "step": 7312 }, { "epoch": 0.33919294990723564, "grad_norm": 6.046130180358887, "learning_rate": 7.511251299768763e-06, "loss": 0.3013, "step": 7313 }, { "epoch": 0.33923933209647494, "grad_norm": 8.502192497253418, "learning_rate": 7.510614886403424e-06, "loss": 0.3775, "step": 7314 }, { "epoch": 0.3392857142857143, "grad_norm": 11.9437894821167, "learning_rate": 7.509978418646937e-06, "loss": 0.2619, "step": 7315 }, { "epoch": 0.3393320964749536, "grad_norm": 6.9922099113464355, "learning_rate": 7.509341896513092e-06, "loss": 0.3292, "step": 7316 }, { "epoch": 0.33937847866419296, "grad_norm": 4.48910665512085, "learning_rate": 7.508705320015678e-06, "loss": 0.3129, "step": 7317 }, { "epoch": 0.33942486085343226, "grad_norm": 5.574756622314453, "learning_rate": 7.508068689168486e-06, "loss": 0.3296, "step": 7318 }, { "epoch": 0.3394712430426716, "grad_norm": 6.583471775054932, "learning_rate": 7.507432003985309e-06, "loss": 0.3862, "step": 7319 }, { "epoch": 0.3395176252319109, "grad_norm": 5.9585280418396, "learning_rate": 7.506795264479941e-06, "loss": 0.3778, "step": 7320 }, { "epoch": 0.3395640074211503, "grad_norm": 10.079486846923828, "learning_rate": 7.506158470666175e-06, "loss": 0.4491, "step": 7321 }, { "epoch": 0.3396103896103896, "grad_norm": 28.42516326904297, "learning_rate": 7.505521622557808e-06, "loss": 0.4758, "step": 7322 }, { "epoch": 0.33965677179962894, "grad_norm": 9.466361999511719, "learning_rate": 7.504884720168637e-06, "loss": 0.4156, "step": 7323 }, { "epoch": 0.3397031539888683, "grad_norm": 6.026791572570801, "learning_rate": 7.504247763512458e-06, "loss": 0.1878, "step": 7324 }, { "epoch": 0.3397495361781076, "grad_norm": 7.52990198135376, "learning_rate": 7.503610752603074e-06, "loss": 0.3639, "step": 7325 }, { "epoch": 0.33979591836734696, "grad_norm": 7.965583801269531, "learning_rate": 7.502973687454282e-06, "loss": 0.3143, "step": 7326 }, { "epoch": 0.33984230055658626, "grad_norm": 6.804784297943115, "learning_rate": 7.502336568079887e-06, "loss": 0.3949, "step": 7327 }, { "epoch": 0.3398886827458256, "grad_norm": 8.64907169342041, "learning_rate": 7.501699394493689e-06, "loss": 0.4641, "step": 7328 }, { "epoch": 0.3399350649350649, "grad_norm": 15.939351081848145, "learning_rate": 7.501062166709493e-06, "loss": 0.3184, "step": 7329 }, { "epoch": 0.3399814471243043, "grad_norm": 7.926705360412598, "learning_rate": 7.500424884741104e-06, "loss": 0.3686, "step": 7330 }, { "epoch": 0.3400278293135436, "grad_norm": 7.184744834899902, "learning_rate": 7.49978754860233e-06, "loss": 0.2729, "step": 7331 }, { "epoch": 0.34007421150278294, "grad_norm": 7.001676082611084, "learning_rate": 7.499150158306976e-06, "loss": 0.3985, "step": 7332 }, { "epoch": 0.34012059369202224, "grad_norm": 8.468015670776367, "learning_rate": 7.498512713868853e-06, "loss": 0.4114, "step": 7333 }, { "epoch": 0.3401669758812616, "grad_norm": 9.180434226989746, "learning_rate": 7.497875215301768e-06, "loss": 0.3789, "step": 7334 }, { "epoch": 0.3402133580705009, "grad_norm": 5.573418617248535, "learning_rate": 7.4972376626195364e-06, "loss": 0.3671, "step": 7335 }, { "epoch": 0.34025974025974026, "grad_norm": 6.621294975280762, "learning_rate": 7.4966000558359675e-06, "loss": 0.2888, "step": 7336 }, { "epoch": 0.34030612244897956, "grad_norm": 8.349143981933594, "learning_rate": 7.495962394964874e-06, "loss": 0.3712, "step": 7337 }, { "epoch": 0.3403525046382189, "grad_norm": 7.4393086433410645, "learning_rate": 7.495324680020072e-06, "loss": 0.4651, "step": 7338 }, { "epoch": 0.3403988868274583, "grad_norm": 6.499116897583008, "learning_rate": 7.494686911015379e-06, "loss": 0.3055, "step": 7339 }, { "epoch": 0.3404452690166976, "grad_norm": 5.469536304473877, "learning_rate": 7.494049087964607e-06, "loss": 0.4243, "step": 7340 }, { "epoch": 0.34049165120593694, "grad_norm": 11.751910209655762, "learning_rate": 7.493411210881578e-06, "loss": 0.4528, "step": 7341 }, { "epoch": 0.34053803339517624, "grad_norm": 6.672018051147461, "learning_rate": 7.492773279780111e-06, "loss": 0.3835, "step": 7342 }, { "epoch": 0.3405844155844156, "grad_norm": 12.165547370910645, "learning_rate": 7.492135294674024e-06, "loss": 0.487, "step": 7343 }, { "epoch": 0.3406307977736549, "grad_norm": 5.8165764808654785, "learning_rate": 7.491497255577143e-06, "loss": 0.3079, "step": 7344 }, { "epoch": 0.34067717996289426, "grad_norm": 6.4749579429626465, "learning_rate": 7.490859162503285e-06, "loss": 0.3116, "step": 7345 }, { "epoch": 0.34072356215213356, "grad_norm": 5.0206990242004395, "learning_rate": 7.490221015466279e-06, "loss": 0.2786, "step": 7346 }, { "epoch": 0.3407699443413729, "grad_norm": 4.942476749420166, "learning_rate": 7.4895828144799475e-06, "loss": 0.3321, "step": 7347 }, { "epoch": 0.3408163265306122, "grad_norm": 11.00274658203125, "learning_rate": 7.488944559558118e-06, "loss": 0.551, "step": 7348 }, { "epoch": 0.3408627087198516, "grad_norm": 4.1338419914245605, "learning_rate": 7.488306250714617e-06, "loss": 0.3755, "step": 7349 }, { "epoch": 0.3409090909090909, "grad_norm": 4.85352897644043, "learning_rate": 7.487667887963273e-06, "loss": 0.3324, "step": 7350 }, { "epoch": 0.34095547309833024, "grad_norm": 8.987479209899902, "learning_rate": 7.487029471317917e-06, "loss": 0.373, "step": 7351 }, { "epoch": 0.3410018552875696, "grad_norm": 4.258133411407471, "learning_rate": 7.486391000792379e-06, "loss": 0.3376, "step": 7352 }, { "epoch": 0.3410482374768089, "grad_norm": 12.588295936584473, "learning_rate": 7.485752476400492e-06, "loss": 0.4007, "step": 7353 }, { "epoch": 0.34109461966604826, "grad_norm": 7.5586395263671875, "learning_rate": 7.485113898156089e-06, "loss": 0.3133, "step": 7354 }, { "epoch": 0.34114100185528756, "grad_norm": 6.516199111938477, "learning_rate": 7.484475266073003e-06, "loss": 0.2731, "step": 7355 }, { "epoch": 0.3411873840445269, "grad_norm": 5.273731708526611, "learning_rate": 7.483836580165071e-06, "loss": 0.3476, "step": 7356 }, { "epoch": 0.3412337662337662, "grad_norm": 6.116049766540527, "learning_rate": 7.48319784044613e-06, "loss": 0.306, "step": 7357 }, { "epoch": 0.3412801484230056, "grad_norm": 4.5251946449279785, "learning_rate": 7.482559046930018e-06, "loss": 0.2305, "step": 7358 }, { "epoch": 0.3413265306122449, "grad_norm": 5.854769706726074, "learning_rate": 7.481920199630572e-06, "loss": 0.2669, "step": 7359 }, { "epoch": 0.34137291280148424, "grad_norm": 7.473107814788818, "learning_rate": 7.481281298561635e-06, "loss": 0.3611, "step": 7360 }, { "epoch": 0.34141929499072354, "grad_norm": 9.502701759338379, "learning_rate": 7.480642343737048e-06, "loss": 0.4228, "step": 7361 }, { "epoch": 0.3414656771799629, "grad_norm": 6.386837959289551, "learning_rate": 7.480003335170651e-06, "loss": 0.3654, "step": 7362 }, { "epoch": 0.3415120593692022, "grad_norm": 8.973142623901367, "learning_rate": 7.479364272876292e-06, "loss": 0.3713, "step": 7363 }, { "epoch": 0.34155844155844156, "grad_norm": 6.442826747894287, "learning_rate": 7.4787251568678135e-06, "loss": 0.3238, "step": 7364 }, { "epoch": 0.34160482374768086, "grad_norm": 8.135553359985352, "learning_rate": 7.478085987159061e-06, "loss": 0.4544, "step": 7365 }, { "epoch": 0.3416512059369202, "grad_norm": 17.26676368713379, "learning_rate": 7.477446763763882e-06, "loss": 0.4349, "step": 7366 }, { "epoch": 0.3416975881261596, "grad_norm": 6.867982864379883, "learning_rate": 7.476807486696127e-06, "loss": 0.3607, "step": 7367 }, { "epoch": 0.3417439703153989, "grad_norm": 4.671152114868164, "learning_rate": 7.476168155969643e-06, "loss": 0.2406, "step": 7368 }, { "epoch": 0.34179035250463824, "grad_norm": 5.164057731628418, "learning_rate": 7.475528771598284e-06, "loss": 0.3522, "step": 7369 }, { "epoch": 0.34183673469387754, "grad_norm": 8.968178749084473, "learning_rate": 7.474889333595898e-06, "loss": 0.4055, "step": 7370 }, { "epoch": 0.3418831168831169, "grad_norm": 6.542175769805908, "learning_rate": 7.4742498419763395e-06, "loss": 0.3845, "step": 7371 }, { "epoch": 0.3419294990723562, "grad_norm": 11.823898315429688, "learning_rate": 7.4736102967534654e-06, "loss": 0.2859, "step": 7372 }, { "epoch": 0.34197588126159556, "grad_norm": 9.92875862121582, "learning_rate": 7.472970697941127e-06, "loss": 0.2721, "step": 7373 }, { "epoch": 0.34202226345083486, "grad_norm": 11.573888778686523, "learning_rate": 7.472331045553184e-06, "loss": 0.5179, "step": 7374 }, { "epoch": 0.3420686456400742, "grad_norm": 12.099209785461426, "learning_rate": 7.471691339603493e-06, "loss": 0.3899, "step": 7375 }, { "epoch": 0.3421150278293135, "grad_norm": 10.867405891418457, "learning_rate": 7.471051580105912e-06, "loss": 0.3982, "step": 7376 }, { "epoch": 0.3421614100185529, "grad_norm": 8.60243034362793, "learning_rate": 7.470411767074301e-06, "loss": 0.3262, "step": 7377 }, { "epoch": 0.3422077922077922, "grad_norm": 7.358833312988281, "learning_rate": 7.469771900522523e-06, "loss": 0.2659, "step": 7378 }, { "epoch": 0.34225417439703154, "grad_norm": 8.619495391845703, "learning_rate": 7.469131980464439e-06, "loss": 0.4608, "step": 7379 }, { "epoch": 0.3423005565862709, "grad_norm": 5.73817777633667, "learning_rate": 7.468492006913914e-06, "loss": 0.3612, "step": 7380 }, { "epoch": 0.3423469387755102, "grad_norm": 8.609912872314453, "learning_rate": 7.467851979884811e-06, "loss": 0.327, "step": 7381 }, { "epoch": 0.34239332096474956, "grad_norm": 5.413196086883545, "learning_rate": 7.467211899390997e-06, "loss": 0.4696, "step": 7382 }, { "epoch": 0.34243970315398886, "grad_norm": 10.230436325073242, "learning_rate": 7.466571765446339e-06, "loss": 0.3006, "step": 7383 }, { "epoch": 0.3424860853432282, "grad_norm": 10.272634506225586, "learning_rate": 7.465931578064703e-06, "loss": 0.2699, "step": 7384 }, { "epoch": 0.3425324675324675, "grad_norm": 10.43117904663086, "learning_rate": 7.465291337259963e-06, "loss": 0.4201, "step": 7385 }, { "epoch": 0.3425788497217069, "grad_norm": 7.320267200469971, "learning_rate": 7.464651043045983e-06, "loss": 0.349, "step": 7386 }, { "epoch": 0.3426252319109462, "grad_norm": 8.956686019897461, "learning_rate": 7.464010695436639e-06, "loss": 0.4548, "step": 7387 }, { "epoch": 0.34267161410018554, "grad_norm": 8.468785285949707, "learning_rate": 7.463370294445804e-06, "loss": 0.4211, "step": 7388 }, { "epoch": 0.34271799628942484, "grad_norm": 4.655352592468262, "learning_rate": 7.462729840087351e-06, "loss": 0.3294, "step": 7389 }, { "epoch": 0.3427643784786642, "grad_norm": 11.864266395568848, "learning_rate": 7.462089332375153e-06, "loss": 0.5588, "step": 7390 }, { "epoch": 0.3428107606679035, "grad_norm": 6.150695323944092, "learning_rate": 7.46144877132309e-06, "loss": 0.4309, "step": 7391 }, { "epoch": 0.34285714285714286, "grad_norm": 8.158357620239258, "learning_rate": 7.4608081569450365e-06, "loss": 0.3748, "step": 7392 }, { "epoch": 0.34290352504638216, "grad_norm": 10.179734230041504, "learning_rate": 7.460167489254873e-06, "loss": 0.4284, "step": 7393 }, { "epoch": 0.3429499072356215, "grad_norm": 7.0986104011535645, "learning_rate": 7.459526768266478e-06, "loss": 0.3152, "step": 7394 }, { "epoch": 0.3429962894248609, "grad_norm": 5.733268737792969, "learning_rate": 7.458885993993734e-06, "loss": 0.2865, "step": 7395 }, { "epoch": 0.3430426716141002, "grad_norm": 6.82359504699707, "learning_rate": 7.458245166450522e-06, "loss": 0.3773, "step": 7396 }, { "epoch": 0.34308905380333954, "grad_norm": 5.032923221588135, "learning_rate": 7.4576042856507235e-06, "loss": 0.3215, "step": 7397 }, { "epoch": 0.34313543599257884, "grad_norm": 5.041071891784668, "learning_rate": 7.456963351608225e-06, "loss": 0.2834, "step": 7398 }, { "epoch": 0.3431818181818182, "grad_norm": 7.338356971740723, "learning_rate": 7.456322364336911e-06, "loss": 0.3188, "step": 7399 }, { "epoch": 0.3432282003710575, "grad_norm": 6.73621940612793, "learning_rate": 7.455681323850669e-06, "loss": 0.4431, "step": 7400 }, { "epoch": 0.34327458256029686, "grad_norm": 8.286916732788086, "learning_rate": 7.455040230163386e-06, "loss": 0.454, "step": 7401 }, { "epoch": 0.34332096474953616, "grad_norm": 8.153600692749023, "learning_rate": 7.454399083288952e-06, "loss": 0.3752, "step": 7402 }, { "epoch": 0.3433673469387755, "grad_norm": 5.4297308921813965, "learning_rate": 7.453757883241256e-06, "loss": 0.2985, "step": 7403 }, { "epoch": 0.3434137291280148, "grad_norm": 6.008816242218018, "learning_rate": 7.45311663003419e-06, "loss": 0.3712, "step": 7404 }, { "epoch": 0.3434601113172542, "grad_norm": 5.7679057121276855, "learning_rate": 7.452475323681645e-06, "loss": 0.2448, "step": 7405 }, { "epoch": 0.3435064935064935, "grad_norm": 6.4375901222229, "learning_rate": 7.4518339641975166e-06, "loss": 0.3898, "step": 7406 }, { "epoch": 0.34355287569573284, "grad_norm": 5.564090728759766, "learning_rate": 7.4511925515956985e-06, "loss": 0.3018, "step": 7407 }, { "epoch": 0.3435992578849722, "grad_norm": 6.89978551864624, "learning_rate": 7.450551085890087e-06, "loss": 0.3257, "step": 7408 }, { "epoch": 0.3436456400742115, "grad_norm": 11.178620338439941, "learning_rate": 7.449909567094579e-06, "loss": 0.4683, "step": 7409 }, { "epoch": 0.34369202226345086, "grad_norm": 8.454588890075684, "learning_rate": 7.44926799522307e-06, "loss": 0.4015, "step": 7410 }, { "epoch": 0.34373840445269016, "grad_norm": 8.878435134887695, "learning_rate": 7.448626370289465e-06, "loss": 0.4289, "step": 7411 }, { "epoch": 0.3437847866419295, "grad_norm": 8.288251876831055, "learning_rate": 7.447984692307659e-06, "loss": 0.321, "step": 7412 }, { "epoch": 0.3438311688311688, "grad_norm": 6.561913967132568, "learning_rate": 7.447342961291557e-06, "loss": 0.31, "step": 7413 }, { "epoch": 0.3438775510204082, "grad_norm": 7.5047712326049805, "learning_rate": 7.44670117725506e-06, "loss": 0.414, "step": 7414 }, { "epoch": 0.3439239332096475, "grad_norm": 6.0052595138549805, "learning_rate": 7.4460593402120725e-06, "loss": 0.3295, "step": 7415 }, { "epoch": 0.34397031539888684, "grad_norm": 6.796544551849365, "learning_rate": 7.4454174501765e-06, "loss": 0.4148, "step": 7416 }, { "epoch": 0.34401669758812614, "grad_norm": 5.836574554443359, "learning_rate": 7.444775507162247e-06, "loss": 0.2976, "step": 7417 }, { "epoch": 0.3440630797773655, "grad_norm": 6.074760913848877, "learning_rate": 7.444133511183225e-06, "loss": 0.3023, "step": 7418 }, { "epoch": 0.3441094619666048, "grad_norm": 6.77282190322876, "learning_rate": 7.443491462253336e-06, "loss": 0.2798, "step": 7419 }, { "epoch": 0.34415584415584416, "grad_norm": 5.615983009338379, "learning_rate": 7.442849360386495e-06, "loss": 0.3461, "step": 7420 }, { "epoch": 0.34420222634508346, "grad_norm": 6.294556617736816, "learning_rate": 7.442207205596612e-06, "loss": 0.3846, "step": 7421 }, { "epoch": 0.3442486085343228, "grad_norm": 7.960537433624268, "learning_rate": 7.441564997897597e-06, "loss": 0.3702, "step": 7422 }, { "epoch": 0.3442949907235622, "grad_norm": 9.308510780334473, "learning_rate": 7.440922737303363e-06, "loss": 0.4334, "step": 7423 }, { "epoch": 0.3443413729128015, "grad_norm": 16.285694122314453, "learning_rate": 7.440280423827827e-06, "loss": 0.3364, "step": 7424 }, { "epoch": 0.34438775510204084, "grad_norm": 6.115722179412842, "learning_rate": 7.439638057484901e-06, "loss": 0.3023, "step": 7425 }, { "epoch": 0.34443413729128014, "grad_norm": 5.956121921539307, "learning_rate": 7.438995638288506e-06, "loss": 0.2653, "step": 7426 }, { "epoch": 0.3444805194805195, "grad_norm": 13.88496208190918, "learning_rate": 7.438353166252555e-06, "loss": 0.4649, "step": 7427 }, { "epoch": 0.3445269016697588, "grad_norm": 8.32342529296875, "learning_rate": 7.437710641390969e-06, "loss": 0.3817, "step": 7428 }, { "epoch": 0.34457328385899816, "grad_norm": 4.915321350097656, "learning_rate": 7.437068063717668e-06, "loss": 0.3699, "step": 7429 }, { "epoch": 0.34461966604823746, "grad_norm": 6.994430065155029, "learning_rate": 7.436425433246572e-06, "loss": 0.4473, "step": 7430 }, { "epoch": 0.3446660482374768, "grad_norm": 6.927681922912598, "learning_rate": 7.4357827499916046e-06, "loss": 0.3951, "step": 7431 }, { "epoch": 0.3447124304267161, "grad_norm": 4.224460124969482, "learning_rate": 7.4351400139666894e-06, "loss": 0.3018, "step": 7432 }, { "epoch": 0.3447588126159555, "grad_norm": 4.756211280822754, "learning_rate": 7.434497225185749e-06, "loss": 0.2228, "step": 7433 }, { "epoch": 0.3448051948051948, "grad_norm": 10.907065391540527, "learning_rate": 7.4338543836627104e-06, "loss": 0.343, "step": 7434 }, { "epoch": 0.34485157699443414, "grad_norm": 12.741899490356445, "learning_rate": 7.433211489411503e-06, "loss": 0.371, "step": 7435 }, { "epoch": 0.3448979591836735, "grad_norm": 4.2967023849487305, "learning_rate": 7.432568542446048e-06, "loss": 0.3266, "step": 7436 }, { "epoch": 0.3449443413729128, "grad_norm": 5.269363880157471, "learning_rate": 7.431925542780281e-06, "loss": 0.3035, "step": 7437 }, { "epoch": 0.34499072356215216, "grad_norm": 7.705671787261963, "learning_rate": 7.43128249042813e-06, "loss": 0.2969, "step": 7438 }, { "epoch": 0.34503710575139146, "grad_norm": 6.707502365112305, "learning_rate": 7.430639385403525e-06, "loss": 0.3614, "step": 7439 }, { "epoch": 0.3450834879406308, "grad_norm": 6.429345607757568, "learning_rate": 7.429996227720401e-06, "loss": 0.3461, "step": 7440 }, { "epoch": 0.3451298701298701, "grad_norm": 5.020261764526367, "learning_rate": 7.429353017392688e-06, "loss": 0.3663, "step": 7441 }, { "epoch": 0.3451762523191095, "grad_norm": 3.9454009532928467, "learning_rate": 7.428709754434326e-06, "loss": 0.302, "step": 7442 }, { "epoch": 0.3452226345083488, "grad_norm": 5.0731611251831055, "learning_rate": 7.428066438859247e-06, "loss": 0.2942, "step": 7443 }, { "epoch": 0.34526901669758814, "grad_norm": 7.670047760009766, "learning_rate": 7.427423070681389e-06, "loss": 0.328, "step": 7444 }, { "epoch": 0.34531539888682744, "grad_norm": 8.571846961975098, "learning_rate": 7.4267796499146905e-06, "loss": 0.4881, "step": 7445 }, { "epoch": 0.3453617810760668, "grad_norm": 9.99770736694336, "learning_rate": 7.426136176573092e-06, "loss": 0.3903, "step": 7446 }, { "epoch": 0.3454081632653061, "grad_norm": 8.771170616149902, "learning_rate": 7.425492650670531e-06, "loss": 0.3294, "step": 7447 }, { "epoch": 0.34545454545454546, "grad_norm": 7.151189804077148, "learning_rate": 7.424849072220953e-06, "loss": 0.3831, "step": 7448 }, { "epoch": 0.34550092764378476, "grad_norm": 5.341944694519043, "learning_rate": 7.424205441238298e-06, "loss": 0.3324, "step": 7449 }, { "epoch": 0.3455473098330241, "grad_norm": 10.232987403869629, "learning_rate": 7.423561757736511e-06, "loss": 0.3721, "step": 7450 }, { "epoch": 0.3455936920222635, "grad_norm": 13.425687789916992, "learning_rate": 7.422918021729536e-06, "loss": 0.4545, "step": 7451 }, { "epoch": 0.3456400742115028, "grad_norm": 5.187999725341797, "learning_rate": 7.4222742332313206e-06, "loss": 0.3545, "step": 7452 }, { "epoch": 0.34568645640074214, "grad_norm": 5.101751327514648, "learning_rate": 7.421630392255811e-06, "loss": 0.2636, "step": 7453 }, { "epoch": 0.34573283858998144, "grad_norm": 6.808425426483154, "learning_rate": 7.420986498816958e-06, "loss": 0.3712, "step": 7454 }, { "epoch": 0.3457792207792208, "grad_norm": 5.484249114990234, "learning_rate": 7.420342552928707e-06, "loss": 0.3146, "step": 7455 }, { "epoch": 0.3458256029684601, "grad_norm": 8.617839813232422, "learning_rate": 7.419698554605013e-06, "loss": 0.3157, "step": 7456 }, { "epoch": 0.34587198515769946, "grad_norm": 6.327975273132324, "learning_rate": 7.419054503859825e-06, "loss": 0.3946, "step": 7457 }, { "epoch": 0.34591836734693876, "grad_norm": 4.822928428649902, "learning_rate": 7.418410400707097e-06, "loss": 0.4143, "step": 7458 }, { "epoch": 0.3459647495361781, "grad_norm": 7.25933313369751, "learning_rate": 7.4177662451607844e-06, "loss": 0.3957, "step": 7459 }, { "epoch": 0.3460111317254174, "grad_norm": 7.074621677398682, "learning_rate": 7.417122037234841e-06, "loss": 0.3343, "step": 7460 }, { "epoch": 0.3460575139146568, "grad_norm": 9.363039016723633, "learning_rate": 7.416477776943223e-06, "loss": 0.3877, "step": 7461 }, { "epoch": 0.3461038961038961, "grad_norm": 6.020611763000488, "learning_rate": 7.415833464299888e-06, "loss": 0.4311, "step": 7462 }, { "epoch": 0.34615027829313544, "grad_norm": 6.741611003875732, "learning_rate": 7.415189099318796e-06, "loss": 0.2228, "step": 7463 }, { "epoch": 0.34619666048237474, "grad_norm": 5.343765735626221, "learning_rate": 7.414544682013907e-06, "loss": 0.3046, "step": 7464 }, { "epoch": 0.3462430426716141, "grad_norm": 6.78915548324585, "learning_rate": 7.41390021239918e-06, "loss": 0.2614, "step": 7465 }, { "epoch": 0.34628942486085346, "grad_norm": 5.575591087341309, "learning_rate": 7.413255690488578e-06, "loss": 0.3388, "step": 7466 }, { "epoch": 0.34633580705009276, "grad_norm": 5.085278034210205, "learning_rate": 7.412611116296064e-06, "loss": 0.3897, "step": 7467 }, { "epoch": 0.3463821892393321, "grad_norm": 6.208313941955566, "learning_rate": 7.411966489835603e-06, "loss": 0.3901, "step": 7468 }, { "epoch": 0.3464285714285714, "grad_norm": 12.498067855834961, "learning_rate": 7.4113218111211606e-06, "loss": 0.4815, "step": 7469 }, { "epoch": 0.3464749536178108, "grad_norm": 8.735801696777344, "learning_rate": 7.410677080166703e-06, "loss": 0.352, "step": 7470 }, { "epoch": 0.3465213358070501, "grad_norm": 4.542500019073486, "learning_rate": 7.410032296986197e-06, "loss": 0.2982, "step": 7471 }, { "epoch": 0.34656771799628944, "grad_norm": 6.339366436004639, "learning_rate": 7.409387461593613e-06, "loss": 0.5033, "step": 7472 }, { "epoch": 0.34661410018552874, "grad_norm": 5.377523422241211, "learning_rate": 7.408742574002922e-06, "loss": 0.2368, "step": 7473 }, { "epoch": 0.3466604823747681, "grad_norm": 6.665317058563232, "learning_rate": 7.408097634228091e-06, "loss": 0.3448, "step": 7474 }, { "epoch": 0.3467068645640074, "grad_norm": 7.0867695808410645, "learning_rate": 7.407452642283096e-06, "loss": 0.2971, "step": 7475 }, { "epoch": 0.34675324675324676, "grad_norm": 7.901745796203613, "learning_rate": 7.40680759818191e-06, "loss": 0.3932, "step": 7476 }, { "epoch": 0.34679962894248606, "grad_norm": 7.588246822357178, "learning_rate": 7.4061625019385055e-06, "loss": 0.362, "step": 7477 }, { "epoch": 0.3468460111317254, "grad_norm": 15.951400756835938, "learning_rate": 7.405517353566861e-06, "loss": 0.3408, "step": 7478 }, { "epoch": 0.3468923933209648, "grad_norm": 9.583457946777344, "learning_rate": 7.404872153080951e-06, "loss": 0.3637, "step": 7479 }, { "epoch": 0.3469387755102041, "grad_norm": 9.451314926147461, "learning_rate": 7.404226900494753e-06, "loss": 0.4856, "step": 7480 }, { "epoch": 0.34698515769944344, "grad_norm": 7.078573226928711, "learning_rate": 7.4035815958222504e-06, "loss": 0.4183, "step": 7481 }, { "epoch": 0.34703153988868274, "grad_norm": 6.452495574951172, "learning_rate": 7.4029362390774185e-06, "loss": 0.2981, "step": 7482 }, { "epoch": 0.3470779220779221, "grad_norm": 7.4988884925842285, "learning_rate": 7.402290830274239e-06, "loss": 0.2915, "step": 7483 }, { "epoch": 0.3471243042671614, "grad_norm": 8.732932090759277, "learning_rate": 7.401645369426697e-06, "loss": 0.3382, "step": 7484 }, { "epoch": 0.34717068645640076, "grad_norm": 11.827970504760742, "learning_rate": 7.400999856548776e-06, "loss": 0.3401, "step": 7485 }, { "epoch": 0.34721706864564006, "grad_norm": 6.730310440063477, "learning_rate": 7.400354291654458e-06, "loss": 0.3595, "step": 7486 }, { "epoch": 0.3472634508348794, "grad_norm": 6.473360061645508, "learning_rate": 7.399708674757731e-06, "loss": 0.3447, "step": 7487 }, { "epoch": 0.3473098330241187, "grad_norm": 9.547922134399414, "learning_rate": 7.399063005872581e-06, "loss": 0.4077, "step": 7488 }, { "epoch": 0.3473562152133581, "grad_norm": 5.8050007820129395, "learning_rate": 7.398417285012996e-06, "loss": 0.3015, "step": 7489 }, { "epoch": 0.3474025974025974, "grad_norm": 6.1223931312561035, "learning_rate": 7.397771512192966e-06, "loss": 0.4052, "step": 7490 }, { "epoch": 0.34744897959183674, "grad_norm": 7.998903274536133, "learning_rate": 7.397125687426481e-06, "loss": 0.2976, "step": 7491 }, { "epoch": 0.34749536178107604, "grad_norm": 11.869071960449219, "learning_rate": 7.396479810727532e-06, "loss": 0.2794, "step": 7492 }, { "epoch": 0.3475417439703154, "grad_norm": 6.453667640686035, "learning_rate": 7.395833882110113e-06, "loss": 0.227, "step": 7493 }, { "epoch": 0.34758812615955476, "grad_norm": 7.387002944946289, "learning_rate": 7.395187901588214e-06, "loss": 0.3681, "step": 7494 }, { "epoch": 0.34763450834879406, "grad_norm": 9.236701965332031, "learning_rate": 7.394541869175835e-06, "loss": 0.4279, "step": 7495 }, { "epoch": 0.3476808905380334, "grad_norm": 5.046060085296631, "learning_rate": 7.3938957848869684e-06, "loss": 0.349, "step": 7496 }, { "epoch": 0.3477272727272727, "grad_norm": 6.422573089599609, "learning_rate": 7.393249648735613e-06, "loss": 0.365, "step": 7497 }, { "epoch": 0.3477736549165121, "grad_norm": 8.719648361206055, "learning_rate": 7.3926034607357654e-06, "loss": 0.3905, "step": 7498 }, { "epoch": 0.3478200371057514, "grad_norm": 11.484277725219727, "learning_rate": 7.391957220901426e-06, "loss": 0.4301, "step": 7499 }, { "epoch": 0.34786641929499074, "grad_norm": 5.423316478729248, "learning_rate": 7.391310929246596e-06, "loss": 0.3823, "step": 7500 }, { "epoch": 0.34791280148423004, "grad_norm": 5.746527671813965, "learning_rate": 7.390664585785274e-06, "loss": 0.4177, "step": 7501 }, { "epoch": 0.3479591836734694, "grad_norm": 6.244846820831299, "learning_rate": 7.3900181905314646e-06, "loss": 0.338, "step": 7502 }, { "epoch": 0.3480055658627087, "grad_norm": 11.230778694152832, "learning_rate": 7.389371743499174e-06, "loss": 0.4449, "step": 7503 }, { "epoch": 0.34805194805194806, "grad_norm": 5.625178337097168, "learning_rate": 7.3887252447024035e-06, "loss": 0.2462, "step": 7504 }, { "epoch": 0.34809833024118736, "grad_norm": 7.56683349609375, "learning_rate": 7.388078694155161e-06, "loss": 0.3254, "step": 7505 }, { "epoch": 0.3481447124304267, "grad_norm": 5.074612140655518, "learning_rate": 7.387432091871454e-06, "loss": 0.3524, "step": 7506 }, { "epoch": 0.3481910946196661, "grad_norm": 7.338743209838867, "learning_rate": 7.386785437865287e-06, "loss": 0.4058, "step": 7507 }, { "epoch": 0.3482374768089054, "grad_norm": 9.286444664001465, "learning_rate": 7.386138732150675e-06, "loss": 0.4622, "step": 7508 }, { "epoch": 0.34828385899814474, "grad_norm": 5.567071914672852, "learning_rate": 7.385491974741625e-06, "loss": 0.3825, "step": 7509 }, { "epoch": 0.34833024118738404, "grad_norm": 6.689778804779053, "learning_rate": 7.38484516565215e-06, "loss": 0.3744, "step": 7510 }, { "epoch": 0.3483766233766234, "grad_norm": 20.677257537841797, "learning_rate": 7.384198304896262e-06, "loss": 0.3201, "step": 7511 }, { "epoch": 0.3484230055658627, "grad_norm": 6.279991626739502, "learning_rate": 7.3835513924879755e-06, "loss": 0.2651, "step": 7512 }, { "epoch": 0.34846938775510206, "grad_norm": 6.344236850738525, "learning_rate": 7.382904428441305e-06, "loss": 0.3513, "step": 7513 }, { "epoch": 0.34851576994434136, "grad_norm": 6.286149024963379, "learning_rate": 7.382257412770267e-06, "loss": 0.2172, "step": 7514 }, { "epoch": 0.3485621521335807, "grad_norm": 6.946451663970947, "learning_rate": 7.381610345488879e-06, "loss": 0.3217, "step": 7515 }, { "epoch": 0.34860853432282, "grad_norm": 25.823001861572266, "learning_rate": 7.3809632266111575e-06, "loss": 0.5347, "step": 7516 }, { "epoch": 0.3486549165120594, "grad_norm": 6.5049285888671875, "learning_rate": 7.380316056151125e-06, "loss": 0.311, "step": 7517 }, { "epoch": 0.3487012987012987, "grad_norm": 8.812020301818848, "learning_rate": 7.3796688341228e-06, "loss": 0.4533, "step": 7518 }, { "epoch": 0.34874768089053804, "grad_norm": 6.309755325317383, "learning_rate": 7.379021560540207e-06, "loss": 0.3852, "step": 7519 }, { "epoch": 0.34879406307977734, "grad_norm": 12.515377044677734, "learning_rate": 7.378374235417365e-06, "loss": 0.421, "step": 7520 }, { "epoch": 0.3488404452690167, "grad_norm": 8.257294654846191, "learning_rate": 7.377726858768298e-06, "loss": 0.4125, "step": 7521 }, { "epoch": 0.34888682745825605, "grad_norm": 9.63463020324707, "learning_rate": 7.377079430607036e-06, "loss": 0.3934, "step": 7522 }, { "epoch": 0.34893320964749536, "grad_norm": 7.9250359535217285, "learning_rate": 7.376431950947599e-06, "loss": 0.4046, "step": 7523 }, { "epoch": 0.3489795918367347, "grad_norm": 5.3528947830200195, "learning_rate": 7.375784419804018e-06, "loss": 0.3055, "step": 7524 }, { "epoch": 0.349025974025974, "grad_norm": 6.27425479888916, "learning_rate": 7.375136837190322e-06, "loss": 0.2904, "step": 7525 }, { "epoch": 0.3490723562152134, "grad_norm": 5.034092426300049, "learning_rate": 7.374489203120538e-06, "loss": 0.2436, "step": 7526 }, { "epoch": 0.3491187384044527, "grad_norm": 13.598836898803711, "learning_rate": 7.3738415176086975e-06, "loss": 0.4908, "step": 7527 }, { "epoch": 0.34916512059369204, "grad_norm": 6.14892578125, "learning_rate": 7.373193780668835e-06, "loss": 0.3436, "step": 7528 }, { "epoch": 0.34921150278293134, "grad_norm": 6.608493804931641, "learning_rate": 7.372545992314978e-06, "loss": 0.4729, "step": 7529 }, { "epoch": 0.3492578849721707, "grad_norm": 7.072799205780029, "learning_rate": 7.371898152561166e-06, "loss": 0.4398, "step": 7530 }, { "epoch": 0.34930426716141, "grad_norm": 7.958694934844971, "learning_rate": 7.3712502614214285e-06, "loss": 0.3932, "step": 7531 }, { "epoch": 0.34935064935064936, "grad_norm": 7.253201961517334, "learning_rate": 7.370602318909806e-06, "loss": 0.4324, "step": 7532 }, { "epoch": 0.34939703153988866, "grad_norm": 6.260262489318848, "learning_rate": 7.369954325040336e-06, "loss": 0.4125, "step": 7533 }, { "epoch": 0.349443413729128, "grad_norm": 8.65664005279541, "learning_rate": 7.369306279827054e-06, "loss": 0.3881, "step": 7534 }, { "epoch": 0.3494897959183674, "grad_norm": 8.213288307189941, "learning_rate": 7.368658183284002e-06, "loss": 0.3117, "step": 7535 }, { "epoch": 0.3495361781076067, "grad_norm": 8.394448280334473, "learning_rate": 7.36801003542522e-06, "loss": 0.2554, "step": 7536 }, { "epoch": 0.34958256029684603, "grad_norm": 8.028907775878906, "learning_rate": 7.3673618362647495e-06, "loss": 0.2757, "step": 7537 }, { "epoch": 0.34962894248608534, "grad_norm": 7.297528266906738, "learning_rate": 7.3667135858166335e-06, "loss": 0.4648, "step": 7538 }, { "epoch": 0.3496753246753247, "grad_norm": 7.910453796386719, "learning_rate": 7.3660652840949155e-06, "loss": 0.3168, "step": 7539 }, { "epoch": 0.349721706864564, "grad_norm": 4.836520671844482, "learning_rate": 7.36541693111364e-06, "loss": 0.3062, "step": 7540 }, { "epoch": 0.34976808905380335, "grad_norm": 4.588162899017334, "learning_rate": 7.364768526886856e-06, "loss": 0.3411, "step": 7541 }, { "epoch": 0.34981447124304266, "grad_norm": 6.54990291595459, "learning_rate": 7.364120071428609e-06, "loss": 0.4334, "step": 7542 }, { "epoch": 0.349860853432282, "grad_norm": 9.34988784790039, "learning_rate": 7.3634715647529476e-06, "loss": 0.4504, "step": 7543 }, { "epoch": 0.3499072356215213, "grad_norm": 31.874427795410156, "learning_rate": 7.36282300687392e-06, "loss": 0.4021, "step": 7544 }, { "epoch": 0.3499536178107607, "grad_norm": 4.266424179077148, "learning_rate": 7.362174397805579e-06, "loss": 0.364, "step": 7545 }, { "epoch": 0.35, "grad_norm": 3.5953001976013184, "learning_rate": 7.361525737561978e-06, "loss": 0.303, "step": 7546 }, { "epoch": 0.35004638218923934, "grad_norm": 8.688979148864746, "learning_rate": 7.360877026157164e-06, "loss": 0.3825, "step": 7547 }, { "epoch": 0.35009276437847864, "grad_norm": 7.294922351837158, "learning_rate": 7.360228263605196e-06, "loss": 0.3947, "step": 7548 }, { "epoch": 0.350139146567718, "grad_norm": 8.480627059936523, "learning_rate": 7.359579449920127e-06, "loss": 0.3521, "step": 7549 }, { "epoch": 0.35018552875695735, "grad_norm": 10.92585277557373, "learning_rate": 7.358930585116014e-06, "loss": 0.261, "step": 7550 }, { "epoch": 0.35023191094619666, "grad_norm": 7.096709728240967, "learning_rate": 7.358281669206915e-06, "loss": 0.3654, "step": 7551 }, { "epoch": 0.350278293135436, "grad_norm": 7.720959663391113, "learning_rate": 7.357632702206886e-06, "loss": 0.4501, "step": 7552 }, { "epoch": 0.3503246753246753, "grad_norm": 5.5425286293029785, "learning_rate": 7.3569836841299905e-06, "loss": 0.3293, "step": 7553 }, { "epoch": 0.3503710575139147, "grad_norm": 4.835607528686523, "learning_rate": 7.356334614990284e-06, "loss": 0.3828, "step": 7554 }, { "epoch": 0.350417439703154, "grad_norm": 4.544653415679932, "learning_rate": 7.355685494801831e-06, "loss": 0.2848, "step": 7555 }, { "epoch": 0.35046382189239333, "grad_norm": 6.902688026428223, "learning_rate": 7.355036323578696e-06, "loss": 0.4068, "step": 7556 }, { "epoch": 0.35051020408163264, "grad_norm": 6.451077461242676, "learning_rate": 7.354387101334939e-06, "loss": 0.3315, "step": 7557 }, { "epoch": 0.350556586270872, "grad_norm": 4.270594596862793, "learning_rate": 7.353737828084628e-06, "loss": 0.2971, "step": 7558 }, { "epoch": 0.3506029684601113, "grad_norm": 17.69098472595215, "learning_rate": 7.35308850384183e-06, "loss": 0.4897, "step": 7559 }, { "epoch": 0.35064935064935066, "grad_norm": 4.2281813621521, "learning_rate": 7.35243912862061e-06, "loss": 0.246, "step": 7560 }, { "epoch": 0.35069573283858996, "grad_norm": 4.755935192108154, "learning_rate": 7.351789702435037e-06, "loss": 0.3216, "step": 7561 }, { "epoch": 0.3507421150278293, "grad_norm": 5.551161289215088, "learning_rate": 7.351140225299179e-06, "loss": 0.3071, "step": 7562 }, { "epoch": 0.3507884972170687, "grad_norm": 7.4957146644592285, "learning_rate": 7.35049069722711e-06, "loss": 0.3451, "step": 7563 }, { "epoch": 0.350834879406308, "grad_norm": 11.791380882263184, "learning_rate": 7.349841118232899e-06, "loss": 0.4548, "step": 7564 }, { "epoch": 0.35088126159554733, "grad_norm": 6.335854530334473, "learning_rate": 7.349191488330619e-06, "loss": 0.2549, "step": 7565 }, { "epoch": 0.35092764378478664, "grad_norm": 13.115816116333008, "learning_rate": 7.348541807534346e-06, "loss": 0.4385, "step": 7566 }, { "epoch": 0.350974025974026, "grad_norm": 11.36520767211914, "learning_rate": 7.347892075858152e-06, "loss": 0.4165, "step": 7567 }, { "epoch": 0.3510204081632653, "grad_norm": 7.4090447425842285, "learning_rate": 7.347242293316115e-06, "loss": 0.2722, "step": 7568 }, { "epoch": 0.35106679035250465, "grad_norm": 10.9607572555542, "learning_rate": 7.346592459922313e-06, "loss": 0.4765, "step": 7569 }, { "epoch": 0.35111317254174396, "grad_norm": 8.646228790283203, "learning_rate": 7.345942575690821e-06, "loss": 0.2062, "step": 7570 }, { "epoch": 0.3511595547309833, "grad_norm": 9.386663436889648, "learning_rate": 7.345292640635721e-06, "loss": 0.3217, "step": 7571 }, { "epoch": 0.3512059369202226, "grad_norm": 10.16871166229248, "learning_rate": 7.344642654771095e-06, "loss": 0.4383, "step": 7572 }, { "epoch": 0.351252319109462, "grad_norm": 11.256780624389648, "learning_rate": 7.34399261811102e-06, "loss": 0.4054, "step": 7573 }, { "epoch": 0.3512987012987013, "grad_norm": 5.517991065979004, "learning_rate": 7.3433425306695825e-06, "loss": 0.3284, "step": 7574 }, { "epoch": 0.35134508348794063, "grad_norm": 8.629050254821777, "learning_rate": 7.342692392460864e-06, "loss": 0.2959, "step": 7575 }, { "epoch": 0.35139146567717994, "grad_norm": 7.722752094268799, "learning_rate": 7.342042203498952e-06, "loss": 0.3574, "step": 7576 }, { "epoch": 0.3514378478664193, "grad_norm": 8.731023788452148, "learning_rate": 7.34139196379793e-06, "loss": 0.2425, "step": 7577 }, { "epoch": 0.35148423005565865, "grad_norm": 4.896998405456543, "learning_rate": 7.340741673371886e-06, "loss": 0.2248, "step": 7578 }, { "epoch": 0.35153061224489796, "grad_norm": 7.347400665283203, "learning_rate": 7.340091332234909e-06, "loss": 0.3489, "step": 7579 }, { "epoch": 0.3515769944341373, "grad_norm": 5.730246543884277, "learning_rate": 7.339440940401087e-06, "loss": 0.2985, "step": 7580 }, { "epoch": 0.3516233766233766, "grad_norm": 6.20880126953125, "learning_rate": 7.3387904978845104e-06, "loss": 0.4376, "step": 7581 }, { "epoch": 0.351669758812616, "grad_norm": 5.213616847991943, "learning_rate": 7.338140004699272e-06, "loss": 0.3355, "step": 7582 }, { "epoch": 0.3517161410018553, "grad_norm": 7.482257843017578, "learning_rate": 7.337489460859464e-06, "loss": 0.4387, "step": 7583 }, { "epoch": 0.35176252319109463, "grad_norm": 4.077905178070068, "learning_rate": 7.336838866379179e-06, "loss": 0.2254, "step": 7584 }, { "epoch": 0.35180890538033394, "grad_norm": 6.322109222412109, "learning_rate": 7.336188221272513e-06, "loss": 0.3201, "step": 7585 }, { "epoch": 0.3518552875695733, "grad_norm": 10.993964195251465, "learning_rate": 7.335537525553561e-06, "loss": 0.4151, "step": 7586 }, { "epoch": 0.3519016697588126, "grad_norm": 7.805482864379883, "learning_rate": 7.33488677923642e-06, "loss": 0.2207, "step": 7587 }, { "epoch": 0.35194805194805195, "grad_norm": 3.7659318447113037, "learning_rate": 7.334235982335189e-06, "loss": 0.2927, "step": 7588 }, { "epoch": 0.35199443413729126, "grad_norm": 6.60040807723999, "learning_rate": 7.333585134863966e-06, "loss": 0.3559, "step": 7589 }, { "epoch": 0.3520408163265306, "grad_norm": 4.8458709716796875, "learning_rate": 7.332934236836854e-06, "loss": 0.3228, "step": 7590 }, { "epoch": 0.3520871985157699, "grad_norm": 7.002963542938232, "learning_rate": 7.332283288267949e-06, "loss": 0.2406, "step": 7591 }, { "epoch": 0.3521335807050093, "grad_norm": 5.79461145401001, "learning_rate": 7.33163228917136e-06, "loss": 0.3105, "step": 7592 }, { "epoch": 0.35217996289424863, "grad_norm": 8.294127464294434, "learning_rate": 7.330981239561186e-06, "loss": 0.3715, "step": 7593 }, { "epoch": 0.35222634508348794, "grad_norm": 9.44486141204834, "learning_rate": 7.330330139451531e-06, "loss": 0.3524, "step": 7594 }, { "epoch": 0.3522727272727273, "grad_norm": 5.2508955001831055, "learning_rate": 7.329678988856504e-06, "loss": 0.2421, "step": 7595 }, { "epoch": 0.3523191094619666, "grad_norm": 5.116163730621338, "learning_rate": 7.329027787790212e-06, "loss": 0.3913, "step": 7596 }, { "epoch": 0.35236549165120595, "grad_norm": 10.617137908935547, "learning_rate": 7.328376536266759e-06, "loss": 0.2954, "step": 7597 }, { "epoch": 0.35241187384044526, "grad_norm": 5.594064712524414, "learning_rate": 7.327725234300258e-06, "loss": 0.2935, "step": 7598 }, { "epoch": 0.3524582560296846, "grad_norm": 11.041864395141602, "learning_rate": 7.3270738819048185e-06, "loss": 0.4521, "step": 7599 }, { "epoch": 0.3525046382189239, "grad_norm": 8.8766508102417, "learning_rate": 7.326422479094548e-06, "loss": 0.3351, "step": 7600 }, { "epoch": 0.3525510204081633, "grad_norm": 8.45409107208252, "learning_rate": 7.325771025883563e-06, "loss": 0.4823, "step": 7601 }, { "epoch": 0.3525974025974026, "grad_norm": 10.107551574707031, "learning_rate": 7.325119522285976e-06, "loss": 0.4093, "step": 7602 }, { "epoch": 0.35264378478664193, "grad_norm": 11.388524055480957, "learning_rate": 7.324467968315901e-06, "loss": 0.4647, "step": 7603 }, { "epoch": 0.35269016697588124, "grad_norm": 8.928329467773438, "learning_rate": 7.323816363987453e-06, "loss": 0.3575, "step": 7604 }, { "epoch": 0.3527365491651206, "grad_norm": 6.161660671234131, "learning_rate": 7.3231647093147475e-06, "loss": 0.4698, "step": 7605 }, { "epoch": 0.35278293135435995, "grad_norm": 9.153273582458496, "learning_rate": 7.322513004311905e-06, "loss": 0.3358, "step": 7606 }, { "epoch": 0.35282931354359925, "grad_norm": 7.228038311004639, "learning_rate": 7.321861248993044e-06, "loss": 0.3666, "step": 7607 }, { "epoch": 0.3528756957328386, "grad_norm": 8.626927375793457, "learning_rate": 7.321209443372284e-06, "loss": 0.4122, "step": 7608 }, { "epoch": 0.3529220779220779, "grad_norm": 8.05962085723877, "learning_rate": 7.320557587463743e-06, "loss": 0.4331, "step": 7609 }, { "epoch": 0.3529684601113173, "grad_norm": 6.242820739746094, "learning_rate": 7.319905681281549e-06, "loss": 0.3409, "step": 7610 }, { "epoch": 0.3530148423005566, "grad_norm": 9.854975700378418, "learning_rate": 7.319253724839821e-06, "loss": 0.4966, "step": 7611 }, { "epoch": 0.35306122448979593, "grad_norm": 4.774576187133789, "learning_rate": 7.318601718152685e-06, "loss": 0.3085, "step": 7612 }, { "epoch": 0.35310760667903524, "grad_norm": 12.39823055267334, "learning_rate": 7.317949661234265e-06, "loss": 0.5771, "step": 7613 }, { "epoch": 0.3531539888682746, "grad_norm": 9.701282501220703, "learning_rate": 7.317297554098688e-06, "loss": 0.3844, "step": 7614 }, { "epoch": 0.3532003710575139, "grad_norm": 7.667847633361816, "learning_rate": 7.316645396760081e-06, "loss": 0.4804, "step": 7615 }, { "epoch": 0.35324675324675325, "grad_norm": 16.940410614013672, "learning_rate": 7.3159931892325754e-06, "loss": 0.3986, "step": 7616 }, { "epoch": 0.35329313543599256, "grad_norm": 4.129483699798584, "learning_rate": 7.315340931530297e-06, "loss": 0.3344, "step": 7617 }, { "epoch": 0.3533395176252319, "grad_norm": 5.616858005523682, "learning_rate": 7.31468862366738e-06, "loss": 0.3624, "step": 7618 }, { "epoch": 0.3533858998144712, "grad_norm": 8.876551628112793, "learning_rate": 7.3140362656579535e-06, "loss": 0.3944, "step": 7619 }, { "epoch": 0.3534322820037106, "grad_norm": 7.275905609130859, "learning_rate": 7.3133838575161534e-06, "loss": 0.3983, "step": 7620 }, { "epoch": 0.35347866419294993, "grad_norm": 9.569831848144531, "learning_rate": 7.31273139925611e-06, "loss": 0.4864, "step": 7621 }, { "epoch": 0.35352504638218923, "grad_norm": 6.760860443115234, "learning_rate": 7.312078890891962e-06, "loss": 0.3734, "step": 7622 }, { "epoch": 0.3535714285714286, "grad_norm": 7.192045211791992, "learning_rate": 7.311426332437845e-06, "loss": 0.3671, "step": 7623 }, { "epoch": 0.3536178107606679, "grad_norm": 12.5098876953125, "learning_rate": 7.310773723907895e-06, "loss": 0.5408, "step": 7624 }, { "epoch": 0.35366419294990725, "grad_norm": 5.767961025238037, "learning_rate": 7.31012106531625e-06, "loss": 0.2734, "step": 7625 }, { "epoch": 0.35371057513914655, "grad_norm": 7.830362319946289, "learning_rate": 7.309468356677052e-06, "loss": 0.2461, "step": 7626 }, { "epoch": 0.3537569573283859, "grad_norm": 4.452605724334717, "learning_rate": 7.3088155980044396e-06, "loss": 0.3265, "step": 7627 }, { "epoch": 0.3538033395176252, "grad_norm": 8.594176292419434, "learning_rate": 7.308162789312555e-06, "loss": 0.3291, "step": 7628 }, { "epoch": 0.3538497217068646, "grad_norm": 8.798863410949707, "learning_rate": 7.3075099306155415e-06, "loss": 0.2673, "step": 7629 }, { "epoch": 0.3538961038961039, "grad_norm": 6.704944610595703, "learning_rate": 7.306857021927543e-06, "loss": 0.3375, "step": 7630 }, { "epoch": 0.35394248608534323, "grad_norm": 6.86163330078125, "learning_rate": 7.306204063262703e-06, "loss": 0.3065, "step": 7631 }, { "epoch": 0.35398886827458254, "grad_norm": 5.352630615234375, "learning_rate": 7.305551054635167e-06, "loss": 0.2989, "step": 7632 }, { "epoch": 0.3540352504638219, "grad_norm": 14.625016212463379, "learning_rate": 7.304897996059085e-06, "loss": 0.4519, "step": 7633 }, { "epoch": 0.35408163265306125, "grad_norm": 5.826683044433594, "learning_rate": 7.304244887548604e-06, "loss": 0.3048, "step": 7634 }, { "epoch": 0.35412801484230055, "grad_norm": 6.052479267120361, "learning_rate": 7.303591729117871e-06, "loss": 0.2826, "step": 7635 }, { "epoch": 0.3541743970315399, "grad_norm": 6.4696149826049805, "learning_rate": 7.30293852078104e-06, "loss": 0.3515, "step": 7636 }, { "epoch": 0.3542207792207792, "grad_norm": 7.235960006713867, "learning_rate": 7.302285262552261e-06, "loss": 0.2709, "step": 7637 }, { "epoch": 0.35426716141001857, "grad_norm": 4.668694019317627, "learning_rate": 7.301631954445685e-06, "loss": 0.2221, "step": 7638 }, { "epoch": 0.3543135435992579, "grad_norm": 10.456062316894531, "learning_rate": 7.300978596475467e-06, "loss": 0.2947, "step": 7639 }, { "epoch": 0.35435992578849723, "grad_norm": 6.2902021408081055, "learning_rate": 7.300325188655762e-06, "loss": 0.3201, "step": 7640 }, { "epoch": 0.35440630797773653, "grad_norm": 8.257512092590332, "learning_rate": 7.299671731000724e-06, "loss": 0.3312, "step": 7641 }, { "epoch": 0.3544526901669759, "grad_norm": 4.985842227935791, "learning_rate": 7.299018223524511e-06, "loss": 0.366, "step": 7642 }, { "epoch": 0.3544990723562152, "grad_norm": 4.925297260284424, "learning_rate": 7.298364666241281e-06, "loss": 0.3764, "step": 7643 }, { "epoch": 0.35454545454545455, "grad_norm": 9.61340618133545, "learning_rate": 7.297711059165193e-06, "loss": 0.4121, "step": 7644 }, { "epoch": 0.35459183673469385, "grad_norm": 4.1931328773498535, "learning_rate": 7.2970574023104065e-06, "loss": 0.3082, "step": 7645 }, { "epoch": 0.3546382189239332, "grad_norm": 13.4073486328125, "learning_rate": 7.296403695691084e-06, "loss": 0.4904, "step": 7646 }, { "epoch": 0.3546846011131725, "grad_norm": 6.5547356605529785, "learning_rate": 7.295749939321385e-06, "loss": 0.3943, "step": 7647 }, { "epoch": 0.3547309833024119, "grad_norm": 8.78962516784668, "learning_rate": 7.295096133215476e-06, "loss": 0.4481, "step": 7648 }, { "epoch": 0.35477736549165123, "grad_norm": 11.131970405578613, "learning_rate": 7.2944422773875175e-06, "loss": 0.4028, "step": 7649 }, { "epoch": 0.35482374768089053, "grad_norm": 5.379807472229004, "learning_rate": 7.293788371851679e-06, "loss": 0.3619, "step": 7650 }, { "epoch": 0.3548701298701299, "grad_norm": 4.918361186981201, "learning_rate": 7.293134416622125e-06, "loss": 0.3321, "step": 7651 }, { "epoch": 0.3549165120593692, "grad_norm": 7.969964027404785, "learning_rate": 7.292480411713023e-06, "loss": 0.4209, "step": 7652 }, { "epoch": 0.35496289424860855, "grad_norm": 10.220682144165039, "learning_rate": 7.291826357138544e-06, "loss": 0.3951, "step": 7653 }, { "epoch": 0.35500927643784785, "grad_norm": 4.293881416320801, "learning_rate": 7.291172252912854e-06, "loss": 0.3312, "step": 7654 }, { "epoch": 0.3550556586270872, "grad_norm": 4.959682464599609, "learning_rate": 7.290518099050125e-06, "loss": 0.3209, "step": 7655 }, { "epoch": 0.3551020408163265, "grad_norm": 8.56179428100586, "learning_rate": 7.289863895564531e-06, "loss": 0.4953, "step": 7656 }, { "epoch": 0.35514842300556587, "grad_norm": 7.089725017547607, "learning_rate": 7.289209642470243e-06, "loss": 0.3763, "step": 7657 }, { "epoch": 0.3551948051948052, "grad_norm": 9.057882308959961, "learning_rate": 7.288555339781434e-06, "loss": 0.4342, "step": 7658 }, { "epoch": 0.35524118738404453, "grad_norm": 7.430734157562256, "learning_rate": 7.287900987512283e-06, "loss": 0.3425, "step": 7659 }, { "epoch": 0.35528756957328383, "grad_norm": 5.213415622711182, "learning_rate": 7.2872465856769625e-06, "loss": 0.3314, "step": 7660 }, { "epoch": 0.3553339517625232, "grad_norm": 7.500868320465088, "learning_rate": 7.286592134289652e-06, "loss": 0.3493, "step": 7661 }, { "epoch": 0.35538033395176255, "grad_norm": 7.487919330596924, "learning_rate": 7.285937633364528e-06, "loss": 0.4825, "step": 7662 }, { "epoch": 0.35542671614100185, "grad_norm": 8.613683700561523, "learning_rate": 7.28528308291577e-06, "loss": 0.4256, "step": 7663 }, { "epoch": 0.3554730983302412, "grad_norm": 21.891826629638672, "learning_rate": 7.284628482957563e-06, "loss": 0.3671, "step": 7664 }, { "epoch": 0.3555194805194805, "grad_norm": 5.016006946563721, "learning_rate": 7.283973833504081e-06, "loss": 0.3791, "step": 7665 }, { "epoch": 0.35556586270871987, "grad_norm": 6.692221641540527, "learning_rate": 7.283319134569513e-06, "loss": 0.3318, "step": 7666 }, { "epoch": 0.3556122448979592, "grad_norm": 6.182792663574219, "learning_rate": 7.282664386168039e-06, "loss": 0.2328, "step": 7667 }, { "epoch": 0.35565862708719853, "grad_norm": 6.887511253356934, "learning_rate": 7.2820095883138456e-06, "loss": 0.2855, "step": 7668 }, { "epoch": 0.35570500927643783, "grad_norm": 5.686439514160156, "learning_rate": 7.2813547410211186e-06, "loss": 0.252, "step": 7669 }, { "epoch": 0.3557513914656772, "grad_norm": 15.031940460205078, "learning_rate": 7.280699844304044e-06, "loss": 0.5066, "step": 7670 }, { "epoch": 0.3557977736549165, "grad_norm": 6.379899024963379, "learning_rate": 7.2800448981768094e-06, "loss": 0.291, "step": 7671 }, { "epoch": 0.35584415584415585, "grad_norm": 4.827749729156494, "learning_rate": 7.279389902653606e-06, "loss": 0.2576, "step": 7672 }, { "epoch": 0.35589053803339515, "grad_norm": 5.236083030700684, "learning_rate": 7.278734857748621e-06, "loss": 0.3649, "step": 7673 }, { "epoch": 0.3559369202226345, "grad_norm": 6.41839075088501, "learning_rate": 7.278079763476048e-06, "loss": 0.3328, "step": 7674 }, { "epoch": 0.3559833024118738, "grad_norm": 8.689355850219727, "learning_rate": 7.277424619850079e-06, "loss": 0.4746, "step": 7675 }, { "epoch": 0.35602968460111317, "grad_norm": 8.173922538757324, "learning_rate": 7.2767694268849065e-06, "loss": 0.4195, "step": 7676 }, { "epoch": 0.35607606679035253, "grad_norm": 18.563343048095703, "learning_rate": 7.276114184594725e-06, "loss": 0.5172, "step": 7677 }, { "epoch": 0.35612244897959183, "grad_norm": 13.65859603881836, "learning_rate": 7.275458892993731e-06, "loss": 0.4298, "step": 7678 }, { "epoch": 0.3561688311688312, "grad_norm": 5.777818202972412, "learning_rate": 7.274803552096119e-06, "loss": 0.2742, "step": 7679 }, { "epoch": 0.3562152133580705, "grad_norm": 12.249480247497559, "learning_rate": 7.274148161916088e-06, "loss": 0.3559, "step": 7680 }, { "epoch": 0.35626159554730985, "grad_norm": 12.068827629089355, "learning_rate": 7.273492722467839e-06, "loss": 0.4738, "step": 7681 }, { "epoch": 0.35630797773654915, "grad_norm": 5.580172538757324, "learning_rate": 7.272837233765566e-06, "loss": 0.3405, "step": 7682 }, { "epoch": 0.3563543599257885, "grad_norm": 6.326277256011963, "learning_rate": 7.272181695823476e-06, "loss": 0.3872, "step": 7683 }, { "epoch": 0.3564007421150278, "grad_norm": 4.988012313842773, "learning_rate": 7.271526108655765e-06, "loss": 0.3991, "step": 7684 }, { "epoch": 0.35644712430426717, "grad_norm": 6.727252006530762, "learning_rate": 7.270870472276641e-06, "loss": 0.3991, "step": 7685 }, { "epoch": 0.3564935064935065, "grad_norm": 7.267383098602295, "learning_rate": 7.270214786700306e-06, "loss": 0.293, "step": 7686 }, { "epoch": 0.35653988868274583, "grad_norm": 5.659390449523926, "learning_rate": 7.269559051940964e-06, "loss": 0.396, "step": 7687 }, { "epoch": 0.35658627087198513, "grad_norm": 7.921783447265625, "learning_rate": 7.268903268012823e-06, "loss": 0.3322, "step": 7688 }, { "epoch": 0.3566326530612245, "grad_norm": 7.921095371246338, "learning_rate": 7.268247434930089e-06, "loss": 0.4667, "step": 7689 }, { "epoch": 0.35667903525046385, "grad_norm": 8.451475143432617, "learning_rate": 7.2675915527069705e-06, "loss": 0.2909, "step": 7690 }, { "epoch": 0.35672541743970315, "grad_norm": 13.000038146972656, "learning_rate": 7.266935621357678e-06, "loss": 0.427, "step": 7691 }, { "epoch": 0.3567717996289425, "grad_norm": 8.24911880493164, "learning_rate": 7.266279640896421e-06, "loss": 0.2335, "step": 7692 }, { "epoch": 0.3568181818181818, "grad_norm": 4.413698673248291, "learning_rate": 7.265623611337409e-06, "loss": 0.2463, "step": 7693 }, { "epoch": 0.35686456400742117, "grad_norm": 7.231980323791504, "learning_rate": 7.264967532694859e-06, "loss": 0.389, "step": 7694 }, { "epoch": 0.3569109461966605, "grad_norm": 5.86198616027832, "learning_rate": 7.2643114049829795e-06, "loss": 0.4741, "step": 7695 }, { "epoch": 0.35695732838589983, "grad_norm": 16.496679306030273, "learning_rate": 7.263655228215991e-06, "loss": 0.3284, "step": 7696 }, { "epoch": 0.35700371057513913, "grad_norm": 7.126336574554443, "learning_rate": 7.262999002408103e-06, "loss": 0.385, "step": 7697 }, { "epoch": 0.3570500927643785, "grad_norm": 5.254933834075928, "learning_rate": 7.262342727573536e-06, "loss": 0.2958, "step": 7698 }, { "epoch": 0.3570964749536178, "grad_norm": 11.195767402648926, "learning_rate": 7.261686403726508e-06, "loss": 0.4525, "step": 7699 }, { "epoch": 0.35714285714285715, "grad_norm": 8.218993186950684, "learning_rate": 7.261030030881236e-06, "loss": 0.3454, "step": 7700 }, { "epoch": 0.35718923933209645, "grad_norm": 6.430708408355713, "learning_rate": 7.260373609051942e-06, "loss": 0.4586, "step": 7701 }, { "epoch": 0.3572356215213358, "grad_norm": 4.593548774719238, "learning_rate": 7.259717138252844e-06, "loss": 0.346, "step": 7702 }, { "epoch": 0.3572820037105751, "grad_norm": 4.992074966430664, "learning_rate": 7.259060618498167e-06, "loss": 0.2864, "step": 7703 }, { "epoch": 0.35732838589981447, "grad_norm": 6.060727119445801, "learning_rate": 7.258404049802135e-06, "loss": 0.3008, "step": 7704 }, { "epoch": 0.35737476808905383, "grad_norm": 4.671330451965332, "learning_rate": 7.257747432178968e-06, "loss": 0.2908, "step": 7705 }, { "epoch": 0.35742115027829313, "grad_norm": 5.9426960945129395, "learning_rate": 7.257090765642894e-06, "loss": 0.3426, "step": 7706 }, { "epoch": 0.3574675324675325, "grad_norm": 6.6356353759765625, "learning_rate": 7.25643405020814e-06, "loss": 0.2207, "step": 7707 }, { "epoch": 0.3575139146567718, "grad_norm": 6.817548751831055, "learning_rate": 7.255777285888932e-06, "loss": 0.4173, "step": 7708 }, { "epoch": 0.35756029684601115, "grad_norm": 9.27723503112793, "learning_rate": 7.255120472699499e-06, "loss": 0.4988, "step": 7709 }, { "epoch": 0.35760667903525045, "grad_norm": 7.30925989151001, "learning_rate": 7.25446361065407e-06, "loss": 0.394, "step": 7710 }, { "epoch": 0.3576530612244898, "grad_norm": 5.991437911987305, "learning_rate": 7.253806699766878e-06, "loss": 0.2701, "step": 7711 }, { "epoch": 0.3576994434137291, "grad_norm": 7.643215656280518, "learning_rate": 7.25314974005215e-06, "loss": 0.3607, "step": 7712 }, { "epoch": 0.35774582560296847, "grad_norm": 5.735290050506592, "learning_rate": 7.252492731524122e-06, "loss": 0.3301, "step": 7713 }, { "epoch": 0.3577922077922078, "grad_norm": 9.308561325073242, "learning_rate": 7.2518356741970285e-06, "loss": 0.2669, "step": 7714 }, { "epoch": 0.35783858998144713, "grad_norm": 6.8409647941589355, "learning_rate": 7.251178568085101e-06, "loss": 0.3493, "step": 7715 }, { "epoch": 0.35788497217068643, "grad_norm": 5.518651008605957, "learning_rate": 7.250521413202578e-06, "loss": 0.3461, "step": 7716 }, { "epoch": 0.3579313543599258, "grad_norm": 8.197917938232422, "learning_rate": 7.249864209563695e-06, "loss": 0.3717, "step": 7717 }, { "epoch": 0.3579777365491651, "grad_norm": 8.135849952697754, "learning_rate": 7.249206957182692e-06, "loss": 0.41, "step": 7718 }, { "epoch": 0.35802411873840445, "grad_norm": 6.461931228637695, "learning_rate": 7.248549656073805e-06, "loss": 0.4005, "step": 7719 }, { "epoch": 0.3580705009276438, "grad_norm": 24.13715362548828, "learning_rate": 7.247892306251276e-06, "loss": 0.3703, "step": 7720 }, { "epoch": 0.3581168831168831, "grad_norm": 5.137388706207275, "learning_rate": 7.2472349077293465e-06, "loss": 0.2775, "step": 7721 }, { "epoch": 0.35816326530612247, "grad_norm": 6.589743614196777, "learning_rate": 7.246577460522259e-06, "loss": 0.3295, "step": 7722 }, { "epoch": 0.35820964749536177, "grad_norm": 10.190287590026855, "learning_rate": 7.245919964644254e-06, "loss": 0.4882, "step": 7723 }, { "epoch": 0.35825602968460113, "grad_norm": 4.5255632400512695, "learning_rate": 7.245262420109578e-06, "loss": 0.2894, "step": 7724 }, { "epoch": 0.35830241187384043, "grad_norm": 6.821828842163086, "learning_rate": 7.244604826932475e-06, "loss": 0.2634, "step": 7725 }, { "epoch": 0.3583487940630798, "grad_norm": 9.746335983276367, "learning_rate": 7.243947185127194e-06, "loss": 0.4857, "step": 7726 }, { "epoch": 0.3583951762523191, "grad_norm": 5.755262851715088, "learning_rate": 7.24328949470798e-06, "loss": 0.4098, "step": 7727 }, { "epoch": 0.35844155844155845, "grad_norm": 3.970459461212158, "learning_rate": 7.2426317556890825e-06, "loss": 0.2985, "step": 7728 }, { "epoch": 0.35848794063079775, "grad_norm": 7.909958839416504, "learning_rate": 7.24197396808475e-06, "loss": 0.3252, "step": 7729 }, { "epoch": 0.3585343228200371, "grad_norm": 5.901336669921875, "learning_rate": 7.2413161319092354e-06, "loss": 0.3691, "step": 7730 }, { "epoch": 0.3585807050092764, "grad_norm": 4.600536346435547, "learning_rate": 7.240658247176788e-06, "loss": 0.3283, "step": 7731 }, { "epoch": 0.35862708719851577, "grad_norm": 6.907156944274902, "learning_rate": 7.240000313901661e-06, "loss": 0.3386, "step": 7732 }, { "epoch": 0.35867346938775513, "grad_norm": 8.512638092041016, "learning_rate": 7.239342332098112e-06, "loss": 0.4968, "step": 7733 }, { "epoch": 0.35871985157699443, "grad_norm": 4.343321323394775, "learning_rate": 7.238684301780389e-06, "loss": 0.278, "step": 7734 }, { "epoch": 0.3587662337662338, "grad_norm": 9.4645414352417, "learning_rate": 7.238026222962753e-06, "loss": 0.4184, "step": 7735 }, { "epoch": 0.3588126159554731, "grad_norm": 5.5608229637146, "learning_rate": 7.237368095659459e-06, "loss": 0.349, "step": 7736 }, { "epoch": 0.35885899814471245, "grad_norm": 7.748444080352783, "learning_rate": 7.236709919884764e-06, "loss": 0.3988, "step": 7737 }, { "epoch": 0.35890538033395175, "grad_norm": 8.015893936157227, "learning_rate": 7.236051695652929e-06, "loss": 0.2674, "step": 7738 }, { "epoch": 0.3589517625231911, "grad_norm": 16.144145965576172, "learning_rate": 7.235393422978214e-06, "loss": 0.5539, "step": 7739 }, { "epoch": 0.3589981447124304, "grad_norm": 4.665989875793457, "learning_rate": 7.234735101874879e-06, "loss": 0.2948, "step": 7740 }, { "epoch": 0.35904452690166977, "grad_norm": 7.5288801193237305, "learning_rate": 7.234076732357187e-06, "loss": 0.3703, "step": 7741 }, { "epoch": 0.35909090909090907, "grad_norm": 9.571296691894531, "learning_rate": 7.233418314439401e-06, "loss": 0.3111, "step": 7742 }, { "epoch": 0.35913729128014843, "grad_norm": 10.015591621398926, "learning_rate": 7.232759848135787e-06, "loss": 0.4545, "step": 7743 }, { "epoch": 0.35918367346938773, "grad_norm": 10.421945571899414, "learning_rate": 7.232101333460605e-06, "loss": 0.4452, "step": 7744 }, { "epoch": 0.3592300556586271, "grad_norm": 13.267529487609863, "learning_rate": 7.231442770428127e-06, "loss": 0.3981, "step": 7745 }, { "epoch": 0.3592764378478664, "grad_norm": 5.282776355743408, "learning_rate": 7.230784159052619e-06, "loss": 0.2821, "step": 7746 }, { "epoch": 0.35932282003710575, "grad_norm": 8.84736442565918, "learning_rate": 7.230125499348346e-06, "loss": 0.4082, "step": 7747 }, { "epoch": 0.3593692022263451, "grad_norm": 20.859405517578125, "learning_rate": 7.2294667913295825e-06, "loss": 0.4973, "step": 7748 }, { "epoch": 0.3594155844155844, "grad_norm": 4.4166178703308105, "learning_rate": 7.228808035010597e-06, "loss": 0.2522, "step": 7749 }, { "epoch": 0.35946196660482377, "grad_norm": 10.000218391418457, "learning_rate": 7.22814923040566e-06, "loss": 0.3635, "step": 7750 }, { "epoch": 0.35950834879406307, "grad_norm": 9.636913299560547, "learning_rate": 7.227490377529045e-06, "loss": 0.3251, "step": 7751 }, { "epoch": 0.35955473098330243, "grad_norm": 8.05389404296875, "learning_rate": 7.226831476395028e-06, "loss": 0.299, "step": 7752 }, { "epoch": 0.35960111317254173, "grad_norm": 7.015909671783447, "learning_rate": 7.2261725270178795e-06, "loss": 0.3872, "step": 7753 }, { "epoch": 0.3596474953617811, "grad_norm": 8.209550857543945, "learning_rate": 7.225513529411878e-06, "loss": 0.41, "step": 7754 }, { "epoch": 0.3596938775510204, "grad_norm": 7.398043632507324, "learning_rate": 7.2248544835912995e-06, "loss": 0.3451, "step": 7755 }, { "epoch": 0.35974025974025975, "grad_norm": 6.364692211151123, "learning_rate": 7.224195389570422e-06, "loss": 0.3021, "step": 7756 }, { "epoch": 0.35978664192949905, "grad_norm": 8.924383163452148, "learning_rate": 7.223536247363525e-06, "loss": 0.3592, "step": 7757 }, { "epoch": 0.3598330241187384, "grad_norm": 13.247901916503906, "learning_rate": 7.222877056984888e-06, "loss": 0.4347, "step": 7758 }, { "epoch": 0.3598794063079777, "grad_norm": 6.221672058105469, "learning_rate": 7.222217818448791e-06, "loss": 0.4802, "step": 7759 }, { "epoch": 0.35992578849721707, "grad_norm": 7.860283851623535, "learning_rate": 7.221558531769519e-06, "loss": 0.415, "step": 7760 }, { "epoch": 0.3599721706864564, "grad_norm": 5.7665839195251465, "learning_rate": 7.220899196961352e-06, "loss": 0.2397, "step": 7761 }, { "epoch": 0.36001855287569573, "grad_norm": 7.92635440826416, "learning_rate": 7.2202398140385744e-06, "loss": 0.4291, "step": 7762 }, { "epoch": 0.3600649350649351, "grad_norm": 5.800205707550049, "learning_rate": 7.219580383015474e-06, "loss": 0.4007, "step": 7763 }, { "epoch": 0.3601113172541744, "grad_norm": 12.299821853637695, "learning_rate": 7.218920903906334e-06, "loss": 0.4531, "step": 7764 }, { "epoch": 0.36015769944341375, "grad_norm": 4.797526836395264, "learning_rate": 7.2182613767254436e-06, "loss": 0.43, "step": 7765 }, { "epoch": 0.36020408163265305, "grad_norm": 9.049525260925293, "learning_rate": 7.217601801487091e-06, "loss": 0.4722, "step": 7766 }, { "epoch": 0.3602504638218924, "grad_norm": 3.6764464378356934, "learning_rate": 7.216942178205564e-06, "loss": 0.3282, "step": 7767 }, { "epoch": 0.3602968460111317, "grad_norm": 5.23999547958374, "learning_rate": 7.216282506895155e-06, "loss": 0.3091, "step": 7768 }, { "epoch": 0.36034322820037107, "grad_norm": 5.127115726470947, "learning_rate": 7.215622787570154e-06, "loss": 0.3814, "step": 7769 }, { "epoch": 0.36038961038961037, "grad_norm": 6.791162967681885, "learning_rate": 7.214963020244853e-06, "loss": 0.3096, "step": 7770 }, { "epoch": 0.36043599257884973, "grad_norm": 7.413999557495117, "learning_rate": 7.214303204933548e-06, "loss": 0.376, "step": 7771 }, { "epoch": 0.36048237476808903, "grad_norm": 6.594059944152832, "learning_rate": 7.213643341650532e-06, "loss": 0.3716, "step": 7772 }, { "epoch": 0.3605287569573284, "grad_norm": 5.860900402069092, "learning_rate": 7.212983430410099e-06, "loss": 0.3947, "step": 7773 }, { "epoch": 0.3605751391465677, "grad_norm": 8.14990234375, "learning_rate": 7.212323471226548e-06, "loss": 0.3931, "step": 7774 }, { "epoch": 0.36062152133580705, "grad_norm": 8.120125770568848, "learning_rate": 7.211663464114176e-06, "loss": 0.3573, "step": 7775 }, { "epoch": 0.3606679035250464, "grad_norm": 4.137100696563721, "learning_rate": 7.211003409087282e-06, "loss": 0.2575, "step": 7776 }, { "epoch": 0.3607142857142857, "grad_norm": 7.384495735168457, "learning_rate": 7.210343306160163e-06, "loss": 0.3681, "step": 7777 }, { "epoch": 0.36076066790352507, "grad_norm": 8.080016136169434, "learning_rate": 7.209683155347125e-06, "loss": 0.4685, "step": 7778 }, { "epoch": 0.36080705009276437, "grad_norm": 7.002378463745117, "learning_rate": 7.209022956662466e-06, "loss": 0.3766, "step": 7779 }, { "epoch": 0.3608534322820037, "grad_norm": 5.737783432006836, "learning_rate": 7.208362710120489e-06, "loss": 0.3401, "step": 7780 }, { "epoch": 0.36089981447124303, "grad_norm": 9.039796829223633, "learning_rate": 7.2077024157354995e-06, "loss": 0.3681, "step": 7781 }, { "epoch": 0.3609461966604824, "grad_norm": 5.4664812088012695, "learning_rate": 7.207042073521801e-06, "loss": 0.3199, "step": 7782 }, { "epoch": 0.3609925788497217, "grad_norm": 4.735202312469482, "learning_rate": 7.2063816834937015e-06, "loss": 0.322, "step": 7783 }, { "epoch": 0.36103896103896105, "grad_norm": 5.180609703063965, "learning_rate": 7.2057212456655055e-06, "loss": 0.2841, "step": 7784 }, { "epoch": 0.36108534322820035, "grad_norm": 7.32249641418457, "learning_rate": 7.205060760051522e-06, "loss": 0.3817, "step": 7785 }, { "epoch": 0.3611317254174397, "grad_norm": 6.883784770965576, "learning_rate": 7.20440022666606e-06, "loss": 0.3309, "step": 7786 }, { "epoch": 0.361178107606679, "grad_norm": 11.994162559509277, "learning_rate": 7.203739645523431e-06, "loss": 0.4312, "step": 7787 }, { "epoch": 0.36122448979591837, "grad_norm": 8.919211387634277, "learning_rate": 7.2030790166379435e-06, "loss": 0.498, "step": 7788 }, { "epoch": 0.3612708719851577, "grad_norm": 6.535840034484863, "learning_rate": 7.202418340023911e-06, "loss": 0.337, "step": 7789 }, { "epoch": 0.36131725417439703, "grad_norm": 9.578956604003906, "learning_rate": 7.201757615695648e-06, "loss": 0.4417, "step": 7790 }, { "epoch": 0.3613636363636364, "grad_norm": 6.082922458648682, "learning_rate": 7.201096843667468e-06, "loss": 0.3852, "step": 7791 }, { "epoch": 0.3614100185528757, "grad_norm": 3.6892733573913574, "learning_rate": 7.200436023953683e-06, "loss": 0.2424, "step": 7792 }, { "epoch": 0.36145640074211505, "grad_norm": 5.487614631652832, "learning_rate": 7.199775156568616e-06, "loss": 0.292, "step": 7793 }, { "epoch": 0.36150278293135435, "grad_norm": 9.926793098449707, "learning_rate": 7.199114241526577e-06, "loss": 0.5207, "step": 7794 }, { "epoch": 0.3615491651205937, "grad_norm": 8.227435111999512, "learning_rate": 7.1984532788418905e-06, "loss": 0.4301, "step": 7795 }, { "epoch": 0.361595547309833, "grad_norm": 8.340143203735352, "learning_rate": 7.1977922685288715e-06, "loss": 0.2063, "step": 7796 }, { "epoch": 0.36164192949907237, "grad_norm": 10.51822566986084, "learning_rate": 7.197131210601843e-06, "loss": 0.3883, "step": 7797 }, { "epoch": 0.36168831168831167, "grad_norm": 11.236600875854492, "learning_rate": 7.196470105075126e-06, "loss": 0.3619, "step": 7798 }, { "epoch": 0.36173469387755103, "grad_norm": 7.078164577484131, "learning_rate": 7.1958089519630415e-06, "loss": 0.3149, "step": 7799 }, { "epoch": 0.36178107606679033, "grad_norm": 5.298100471496582, "learning_rate": 7.195147751279915e-06, "loss": 0.252, "step": 7800 }, { "epoch": 0.3618274582560297, "grad_norm": 5.3775858879089355, "learning_rate": 7.194486503040072e-06, "loss": 0.3759, "step": 7801 }, { "epoch": 0.361873840445269, "grad_norm": 6.1495561599731445, "learning_rate": 7.193825207257834e-06, "loss": 0.3312, "step": 7802 }, { "epoch": 0.36192022263450835, "grad_norm": 4.765497207641602, "learning_rate": 7.19316386394753e-06, "loss": 0.2574, "step": 7803 }, { "epoch": 0.3619666048237477, "grad_norm": 4.386087417602539, "learning_rate": 7.192502473123488e-06, "loss": 0.3522, "step": 7804 }, { "epoch": 0.362012987012987, "grad_norm": 7.070489883422852, "learning_rate": 7.191841034800038e-06, "loss": 0.3429, "step": 7805 }, { "epoch": 0.36205936920222637, "grad_norm": 6.425018787384033, "learning_rate": 7.191179548991507e-06, "loss": 0.3182, "step": 7806 }, { "epoch": 0.36210575139146567, "grad_norm": 7.945735931396484, "learning_rate": 7.190518015712226e-06, "loss": 0.3716, "step": 7807 }, { "epoch": 0.362152133580705, "grad_norm": 7.596715450286865, "learning_rate": 7.189856434976528e-06, "loss": 0.4226, "step": 7808 }, { "epoch": 0.36219851576994433, "grad_norm": 8.65969181060791, "learning_rate": 7.189194806798745e-06, "loss": 0.4897, "step": 7809 }, { "epoch": 0.3622448979591837, "grad_norm": 10.526537895202637, "learning_rate": 7.1885331311932115e-06, "loss": 0.5037, "step": 7810 }, { "epoch": 0.362291280148423, "grad_norm": 6.323055267333984, "learning_rate": 7.1878714081742625e-06, "loss": 0.4017, "step": 7811 }, { "epoch": 0.36233766233766235, "grad_norm": 5.218053817749023, "learning_rate": 7.187209637756234e-06, "loss": 0.3519, "step": 7812 }, { "epoch": 0.36238404452690165, "grad_norm": 8.6546049118042, "learning_rate": 7.18654781995346e-06, "loss": 0.3438, "step": 7813 }, { "epoch": 0.362430426716141, "grad_norm": 10.579463958740234, "learning_rate": 7.185885954780283e-06, "loss": 0.4573, "step": 7814 }, { "epoch": 0.3624768089053803, "grad_norm": 6.9811110496521, "learning_rate": 7.1852240422510386e-06, "loss": 0.3137, "step": 7815 }, { "epoch": 0.36252319109461967, "grad_norm": 16.140371322631836, "learning_rate": 7.184562082380069e-06, "loss": 0.418, "step": 7816 }, { "epoch": 0.362569573283859, "grad_norm": 5.605874538421631, "learning_rate": 7.1839000751817135e-06, "loss": 0.3726, "step": 7817 }, { "epoch": 0.36261595547309833, "grad_norm": 8.8289794921875, "learning_rate": 7.183238020670314e-06, "loss": 0.2884, "step": 7818 }, { "epoch": 0.3626623376623377, "grad_norm": 6.043944835662842, "learning_rate": 7.182575918860215e-06, "loss": 0.4005, "step": 7819 }, { "epoch": 0.362708719851577, "grad_norm": 6.7773308753967285, "learning_rate": 7.18191376976576e-06, "loss": 0.4656, "step": 7820 }, { "epoch": 0.36275510204081635, "grad_norm": 5.383027076721191, "learning_rate": 7.1812515734012935e-06, "loss": 0.4056, "step": 7821 }, { "epoch": 0.36280148423005565, "grad_norm": 6.302209377288818, "learning_rate": 7.180589329781163e-06, "loss": 0.4532, "step": 7822 }, { "epoch": 0.362847866419295, "grad_norm": 4.830052852630615, "learning_rate": 7.179927038919714e-06, "loss": 0.431, "step": 7823 }, { "epoch": 0.3628942486085343, "grad_norm": 5.546785831451416, "learning_rate": 7.179264700831296e-06, "loss": 0.3282, "step": 7824 }, { "epoch": 0.36294063079777367, "grad_norm": 10.953609466552734, "learning_rate": 7.178602315530259e-06, "loss": 0.4315, "step": 7825 }, { "epoch": 0.36298701298701297, "grad_norm": 6.559126853942871, "learning_rate": 7.177939883030949e-06, "loss": 0.3687, "step": 7826 }, { "epoch": 0.3630333951762523, "grad_norm": 4.3862175941467285, "learning_rate": 7.177277403347721e-06, "loss": 0.2892, "step": 7827 }, { "epoch": 0.36307977736549163, "grad_norm": 4.983239650726318, "learning_rate": 7.176614876494928e-06, "loss": 0.3414, "step": 7828 }, { "epoch": 0.363126159554731, "grad_norm": 4.354933738708496, "learning_rate": 7.175952302486921e-06, "loss": 0.3365, "step": 7829 }, { "epoch": 0.3631725417439703, "grad_norm": 4.076262950897217, "learning_rate": 7.175289681338055e-06, "loss": 0.3032, "step": 7830 }, { "epoch": 0.36321892393320965, "grad_norm": 6.17936372756958, "learning_rate": 7.174627013062685e-06, "loss": 0.2384, "step": 7831 }, { "epoch": 0.363265306122449, "grad_norm": 9.77566146850586, "learning_rate": 7.173964297675168e-06, "loss": 0.3476, "step": 7832 }, { "epoch": 0.3633116883116883, "grad_norm": 11.077418327331543, "learning_rate": 7.1733015351898605e-06, "loss": 0.4301, "step": 7833 }, { "epoch": 0.36335807050092767, "grad_norm": 8.036581039428711, "learning_rate": 7.172638725621123e-06, "loss": 0.4274, "step": 7834 }, { "epoch": 0.36340445269016697, "grad_norm": 6.525643825531006, "learning_rate": 7.1719758689833115e-06, "loss": 0.4538, "step": 7835 }, { "epoch": 0.3634508348794063, "grad_norm": 6.934205532073975, "learning_rate": 7.171312965290791e-06, "loss": 0.4472, "step": 7836 }, { "epoch": 0.36349721706864563, "grad_norm": 10.729644775390625, "learning_rate": 7.1706500145579185e-06, "loss": 0.4086, "step": 7837 }, { "epoch": 0.363543599257885, "grad_norm": 10.91327953338623, "learning_rate": 7.169987016799058e-06, "loss": 0.487, "step": 7838 }, { "epoch": 0.3635899814471243, "grad_norm": 8.28907585144043, "learning_rate": 7.169323972028576e-06, "loss": 0.5035, "step": 7839 }, { "epoch": 0.36363636363636365, "grad_norm": 7.531521797180176, "learning_rate": 7.168660880260831e-06, "loss": 0.4309, "step": 7840 }, { "epoch": 0.36368274582560295, "grad_norm": 5.850975513458252, "learning_rate": 7.167997741510194e-06, "loss": 0.3303, "step": 7841 }, { "epoch": 0.3637291280148423, "grad_norm": 9.605117797851562, "learning_rate": 7.167334555791029e-06, "loss": 0.3883, "step": 7842 }, { "epoch": 0.3637755102040816, "grad_norm": 4.644406318664551, "learning_rate": 7.1666713231177035e-06, "loss": 0.3711, "step": 7843 }, { "epoch": 0.36382189239332097, "grad_norm": 9.059109687805176, "learning_rate": 7.166008043504588e-06, "loss": 0.3781, "step": 7844 }, { "epoch": 0.36386827458256027, "grad_norm": 5.569645404815674, "learning_rate": 7.1653447169660485e-06, "loss": 0.4007, "step": 7845 }, { "epoch": 0.3639146567717996, "grad_norm": 6.369040489196777, "learning_rate": 7.16468134351646e-06, "loss": 0.3084, "step": 7846 }, { "epoch": 0.363961038961039, "grad_norm": 7.42556619644165, "learning_rate": 7.16401792317019e-06, "loss": 0.2868, "step": 7847 }, { "epoch": 0.3640074211502783, "grad_norm": 6.67011022567749, "learning_rate": 7.163354455941614e-06, "loss": 0.349, "step": 7848 }, { "epoch": 0.36405380333951765, "grad_norm": 5.173837184906006, "learning_rate": 7.162690941845105e-06, "loss": 0.2312, "step": 7849 }, { "epoch": 0.36410018552875695, "grad_norm": 38.769081115722656, "learning_rate": 7.162027380895036e-06, "loss": 0.7314, "step": 7850 }, { "epoch": 0.3641465677179963, "grad_norm": 6.2682881355285645, "learning_rate": 7.161363773105786e-06, "loss": 0.3458, "step": 7851 }, { "epoch": 0.3641929499072356, "grad_norm": 7.286962032318115, "learning_rate": 7.160700118491729e-06, "loss": 0.2425, "step": 7852 }, { "epoch": 0.36423933209647497, "grad_norm": 4.8429412841796875, "learning_rate": 7.160036417067243e-06, "loss": 0.3137, "step": 7853 }, { "epoch": 0.36428571428571427, "grad_norm": 6.416231632232666, "learning_rate": 7.159372668846709e-06, "loss": 0.2064, "step": 7854 }, { "epoch": 0.3643320964749536, "grad_norm": 4.6312150955200195, "learning_rate": 7.158708873844503e-06, "loss": 0.2555, "step": 7855 }, { "epoch": 0.36437847866419293, "grad_norm": 5.7159857749938965, "learning_rate": 7.15804503207501e-06, "loss": 0.433, "step": 7856 }, { "epoch": 0.3644248608534323, "grad_norm": 16.845149993896484, "learning_rate": 7.157381143552609e-06, "loss": 0.3913, "step": 7857 }, { "epoch": 0.3644712430426716, "grad_norm": 10.984071731567383, "learning_rate": 7.156717208291681e-06, "loss": 0.433, "step": 7858 }, { "epoch": 0.36451762523191095, "grad_norm": 5.870023727416992, "learning_rate": 7.156053226306616e-06, "loss": 0.3119, "step": 7859 }, { "epoch": 0.3645640074211503, "grad_norm": 11.979056358337402, "learning_rate": 7.155389197611793e-06, "loss": 0.436, "step": 7860 }, { "epoch": 0.3646103896103896, "grad_norm": 5.251551628112793, "learning_rate": 7.1547251222216e-06, "loss": 0.3178, "step": 7861 }, { "epoch": 0.36465677179962896, "grad_norm": 7.196136474609375, "learning_rate": 7.154061000150424e-06, "loss": 0.4513, "step": 7862 }, { "epoch": 0.36470315398886827, "grad_norm": 7.601300239562988, "learning_rate": 7.1533968314126525e-06, "loss": 0.2806, "step": 7863 }, { "epoch": 0.3647495361781076, "grad_norm": 8.212085723876953, "learning_rate": 7.152732616022675e-06, "loss": 0.4156, "step": 7864 }, { "epoch": 0.3647959183673469, "grad_norm": 9.744976997375488, "learning_rate": 7.152068353994879e-06, "loss": 0.5394, "step": 7865 }, { "epoch": 0.3648423005565863, "grad_norm": 15.490191459655762, "learning_rate": 7.15140404534366e-06, "loss": 0.3648, "step": 7866 }, { "epoch": 0.3648886827458256, "grad_norm": 9.132384300231934, "learning_rate": 7.1507396900834046e-06, "loss": 0.4528, "step": 7867 }, { "epoch": 0.36493506493506495, "grad_norm": 5.8421711921691895, "learning_rate": 7.150075288228509e-06, "loss": 0.2599, "step": 7868 }, { "epoch": 0.36498144712430425, "grad_norm": 7.1699299812316895, "learning_rate": 7.149410839793367e-06, "loss": 0.3727, "step": 7869 }, { "epoch": 0.3650278293135436, "grad_norm": 4.522149085998535, "learning_rate": 7.1487463447923725e-06, "loss": 0.3507, "step": 7870 }, { "epoch": 0.3650742115027829, "grad_norm": 6.422687530517578, "learning_rate": 7.14808180323992e-06, "loss": 0.2801, "step": 7871 }, { "epoch": 0.36512059369202227, "grad_norm": 5.63648796081543, "learning_rate": 7.147417215150411e-06, "loss": 0.2299, "step": 7872 }, { "epoch": 0.36516697588126157, "grad_norm": 14.819412231445312, "learning_rate": 7.146752580538239e-06, "loss": 0.3844, "step": 7873 }, { "epoch": 0.3652133580705009, "grad_norm": 11.002452850341797, "learning_rate": 7.146087899417805e-06, "loss": 0.3643, "step": 7874 }, { "epoch": 0.3652597402597403, "grad_norm": 5.097788333892822, "learning_rate": 7.1454231718035095e-06, "loss": 0.4563, "step": 7875 }, { "epoch": 0.3653061224489796, "grad_norm": 6.361294269561768, "learning_rate": 7.144758397709751e-06, "loss": 0.2991, "step": 7876 }, { "epoch": 0.36535250463821894, "grad_norm": 5.105811595916748, "learning_rate": 7.144093577150934e-06, "loss": 0.3209, "step": 7877 }, { "epoch": 0.36539888682745825, "grad_norm": 8.592853546142578, "learning_rate": 7.14342871014146e-06, "loss": 0.3244, "step": 7878 }, { "epoch": 0.3654452690166976, "grad_norm": 12.339111328125, "learning_rate": 7.142763796695734e-06, "loss": 0.432, "step": 7879 }, { "epoch": 0.3654916512059369, "grad_norm": 7.701406955718994, "learning_rate": 7.142098836828162e-06, "loss": 0.3293, "step": 7880 }, { "epoch": 0.36553803339517627, "grad_norm": 9.375007629394531, "learning_rate": 7.1414338305531464e-06, "loss": 0.5126, "step": 7881 }, { "epoch": 0.36558441558441557, "grad_norm": 5.000532627105713, "learning_rate": 7.140768777885096e-06, "loss": 0.2908, "step": 7882 }, { "epoch": 0.3656307977736549, "grad_norm": 5.244624614715576, "learning_rate": 7.140103678838422e-06, "loss": 0.3182, "step": 7883 }, { "epoch": 0.3656771799628942, "grad_norm": 5.70815896987915, "learning_rate": 7.139438533427528e-06, "loss": 0.4044, "step": 7884 }, { "epoch": 0.3657235621521336, "grad_norm": 7.623676300048828, "learning_rate": 7.138773341666827e-06, "loss": 0.2188, "step": 7885 }, { "epoch": 0.3657699443413729, "grad_norm": 8.258040428161621, "learning_rate": 7.13810810357073e-06, "loss": 0.4562, "step": 7886 }, { "epoch": 0.36581632653061225, "grad_norm": 6.852015495300293, "learning_rate": 7.1374428191536484e-06, "loss": 0.3112, "step": 7887 }, { "epoch": 0.3658627087198516, "grad_norm": 4.024997234344482, "learning_rate": 7.136777488429997e-06, "loss": 0.3793, "step": 7888 }, { "epoch": 0.3659090909090909, "grad_norm": 9.69008731842041, "learning_rate": 7.136112111414186e-06, "loss": 0.4275, "step": 7889 }, { "epoch": 0.36595547309833026, "grad_norm": 5.643921852111816, "learning_rate": 7.135446688120633e-06, "loss": 0.2655, "step": 7890 }, { "epoch": 0.36600185528756957, "grad_norm": 7.6649065017700195, "learning_rate": 7.134781218563756e-06, "loss": 0.3499, "step": 7891 }, { "epoch": 0.3660482374768089, "grad_norm": 6.473328590393066, "learning_rate": 7.134115702757969e-06, "loss": 0.3485, "step": 7892 }, { "epoch": 0.3660946196660482, "grad_norm": 6.717617511749268, "learning_rate": 7.133450140717689e-06, "loss": 0.2785, "step": 7893 }, { "epoch": 0.3661410018552876, "grad_norm": 5.993355751037598, "learning_rate": 7.13278453245734e-06, "loss": 0.3903, "step": 7894 }, { "epoch": 0.3661873840445269, "grad_norm": 10.402165412902832, "learning_rate": 7.1321188779913365e-06, "loss": 0.4979, "step": 7895 }, { "epoch": 0.36623376623376624, "grad_norm": 7.269048690795898, "learning_rate": 7.131453177334103e-06, "loss": 0.2655, "step": 7896 }, { "epoch": 0.36628014842300555, "grad_norm": 8.808813095092773, "learning_rate": 7.130787430500061e-06, "loss": 0.2929, "step": 7897 }, { "epoch": 0.3663265306122449, "grad_norm": 4.591689586639404, "learning_rate": 7.130121637503633e-06, "loss": 0.3625, "step": 7898 }, { "epoch": 0.3663729128014842, "grad_norm": 8.132316589355469, "learning_rate": 7.129455798359244e-06, "loss": 0.4153, "step": 7899 }, { "epoch": 0.36641929499072357, "grad_norm": 7.044368267059326, "learning_rate": 7.128789913081316e-06, "loss": 0.3786, "step": 7900 }, { "epoch": 0.36646567717996287, "grad_norm": 5.882034778594971, "learning_rate": 7.12812398168428e-06, "loss": 0.3954, "step": 7901 }, { "epoch": 0.3665120593692022, "grad_norm": 6.102203845977783, "learning_rate": 7.127458004182559e-06, "loss": 0.3468, "step": 7902 }, { "epoch": 0.3665584415584416, "grad_norm": 4.825474262237549, "learning_rate": 7.126791980590583e-06, "loss": 0.2586, "step": 7903 }, { "epoch": 0.3666048237476809, "grad_norm": 6.104541301727295, "learning_rate": 7.12612591092278e-06, "loss": 0.3515, "step": 7904 }, { "epoch": 0.36665120593692024, "grad_norm": 10.830824851989746, "learning_rate": 7.1254597951935815e-06, "loss": 0.4202, "step": 7905 }, { "epoch": 0.36669758812615955, "grad_norm": 6.414165496826172, "learning_rate": 7.124793633417416e-06, "loss": 0.2838, "step": 7906 }, { "epoch": 0.3667439703153989, "grad_norm": 8.954853057861328, "learning_rate": 7.124127425608718e-06, "loss": 0.5321, "step": 7907 }, { "epoch": 0.3667903525046382, "grad_norm": 3.9291064739227295, "learning_rate": 7.12346117178192e-06, "loss": 0.2831, "step": 7908 }, { "epoch": 0.36683673469387756, "grad_norm": 7.722306728363037, "learning_rate": 7.122794871951455e-06, "loss": 0.2552, "step": 7909 }, { "epoch": 0.36688311688311687, "grad_norm": 4.306624889373779, "learning_rate": 7.12212852613176e-06, "loss": 0.2553, "step": 7910 }, { "epoch": 0.3669294990723562, "grad_norm": 5.205840110778809, "learning_rate": 7.121462134337268e-06, "loss": 0.3081, "step": 7911 }, { "epoch": 0.3669758812615955, "grad_norm": 6.906773567199707, "learning_rate": 7.120795696582419e-06, "loss": 0.3177, "step": 7912 }, { "epoch": 0.3670222634508349, "grad_norm": 6.731937408447266, "learning_rate": 7.12012921288165e-06, "loss": 0.3472, "step": 7913 }, { "epoch": 0.3670686456400742, "grad_norm": 5.665799140930176, "learning_rate": 7.119462683249398e-06, "loss": 0.347, "step": 7914 }, { "epoch": 0.36711502782931354, "grad_norm": 13.5648193359375, "learning_rate": 7.118796107700107e-06, "loss": 0.5752, "step": 7915 }, { "epoch": 0.3671614100185529, "grad_norm": 6.336019992828369, "learning_rate": 7.1181294862482155e-06, "loss": 0.3557, "step": 7916 }, { "epoch": 0.3672077922077922, "grad_norm": 7.948930740356445, "learning_rate": 7.1174628189081655e-06, "loss": 0.3447, "step": 7917 }, { "epoch": 0.36725417439703156, "grad_norm": 5.487402439117432, "learning_rate": 7.116796105694401e-06, "loss": 0.3173, "step": 7918 }, { "epoch": 0.36730055658627087, "grad_norm": 8.638747215270996, "learning_rate": 7.116129346621365e-06, "loss": 0.3282, "step": 7919 }, { "epoch": 0.3673469387755102, "grad_norm": 6.881374359130859, "learning_rate": 7.115462541703502e-06, "loss": 0.4607, "step": 7920 }, { "epoch": 0.3673933209647495, "grad_norm": 9.345198631286621, "learning_rate": 7.11479569095526e-06, "loss": 0.3652, "step": 7921 }, { "epoch": 0.3674397031539889, "grad_norm": 6.392276763916016, "learning_rate": 7.114128794391084e-06, "loss": 0.2345, "step": 7922 }, { "epoch": 0.3674860853432282, "grad_norm": 5.9546895027160645, "learning_rate": 7.113461852025423e-06, "loss": 0.3357, "step": 7923 }, { "epoch": 0.36753246753246754, "grad_norm": 4.305790901184082, "learning_rate": 7.1127948638727264e-06, "loss": 0.395, "step": 7924 }, { "epoch": 0.36757884972170685, "grad_norm": 10.917684555053711, "learning_rate": 7.112127829947442e-06, "loss": 0.4243, "step": 7925 }, { "epoch": 0.3676252319109462, "grad_norm": 9.301647186279297, "learning_rate": 7.111460750264023e-06, "loss": 0.3953, "step": 7926 }, { "epoch": 0.3676716141001855, "grad_norm": 15.9717435836792, "learning_rate": 7.110793624836921e-06, "loss": 0.3866, "step": 7927 }, { "epoch": 0.36771799628942486, "grad_norm": 5.664473533630371, "learning_rate": 7.1101264536805885e-06, "loss": 0.3945, "step": 7928 }, { "epoch": 0.36776437847866417, "grad_norm": 8.251326560974121, "learning_rate": 7.109459236809478e-06, "loss": 0.3766, "step": 7929 }, { "epoch": 0.3678107606679035, "grad_norm": 4.775683403015137, "learning_rate": 7.108791974238047e-06, "loss": 0.3436, "step": 7930 }, { "epoch": 0.3678571428571429, "grad_norm": 10.272381782531738, "learning_rate": 7.108124665980749e-06, "loss": 0.4519, "step": 7931 }, { "epoch": 0.3679035250463822, "grad_norm": 7.855691432952881, "learning_rate": 7.107457312052044e-06, "loss": 0.2836, "step": 7932 }, { "epoch": 0.36794990723562154, "grad_norm": 10.04769515991211, "learning_rate": 7.106789912466385e-06, "loss": 0.4614, "step": 7933 }, { "epoch": 0.36799628942486085, "grad_norm": 4.592345237731934, "learning_rate": 7.1061224672382355e-06, "loss": 0.2549, "step": 7934 }, { "epoch": 0.3680426716141002, "grad_norm": 11.391972541809082, "learning_rate": 7.105454976382054e-06, "loss": 0.3676, "step": 7935 }, { "epoch": 0.3680890538033395, "grad_norm": 4.384055137634277, "learning_rate": 7.104787439912301e-06, "loss": 0.3018, "step": 7936 }, { "epoch": 0.36813543599257886, "grad_norm": 12.214399337768555, "learning_rate": 7.104119857843437e-06, "loss": 0.4044, "step": 7937 }, { "epoch": 0.36818181818181817, "grad_norm": 15.889992713928223, "learning_rate": 7.103452230189928e-06, "loss": 0.4071, "step": 7938 }, { "epoch": 0.3682282003710575, "grad_norm": 5.490484237670898, "learning_rate": 7.102784556966234e-06, "loss": 0.3228, "step": 7939 }, { "epoch": 0.3682745825602968, "grad_norm": 9.566776275634766, "learning_rate": 7.102116838186823e-06, "loss": 0.4569, "step": 7940 }, { "epoch": 0.3683209647495362, "grad_norm": 8.789652824401855, "learning_rate": 7.101449073866159e-06, "loss": 0.4014, "step": 7941 }, { "epoch": 0.3683673469387755, "grad_norm": 8.023425102233887, "learning_rate": 7.100781264018708e-06, "loss": 0.3245, "step": 7942 }, { "epoch": 0.36841372912801484, "grad_norm": 6.282118320465088, "learning_rate": 7.10011340865894e-06, "loss": 0.3403, "step": 7943 }, { "epoch": 0.3684601113172542, "grad_norm": 5.400543689727783, "learning_rate": 7.099445507801324e-06, "loss": 0.3238, "step": 7944 }, { "epoch": 0.3685064935064935, "grad_norm": 8.580368995666504, "learning_rate": 7.098777561460326e-06, "loss": 0.3661, "step": 7945 }, { "epoch": 0.36855287569573286, "grad_norm": 10.006352424621582, "learning_rate": 7.098109569650421e-06, "loss": 0.413, "step": 7946 }, { "epoch": 0.36859925788497216, "grad_norm": 6.920474052429199, "learning_rate": 7.097441532386078e-06, "loss": 0.2229, "step": 7947 }, { "epoch": 0.3686456400742115, "grad_norm": 5.114640712738037, "learning_rate": 7.096773449681771e-06, "loss": 0.3163, "step": 7948 }, { "epoch": 0.3686920222634508, "grad_norm": 6.700385570526123, "learning_rate": 7.096105321551972e-06, "loss": 0.5318, "step": 7949 }, { "epoch": 0.3687384044526902, "grad_norm": 6.146360874176025, "learning_rate": 7.0954371480111575e-06, "loss": 0.3638, "step": 7950 }, { "epoch": 0.3687847866419295, "grad_norm": 8.98802661895752, "learning_rate": 7.094768929073803e-06, "loss": 0.4173, "step": 7951 }, { "epoch": 0.36883116883116884, "grad_norm": 10.2426118850708, "learning_rate": 7.094100664754384e-06, "loss": 0.4436, "step": 7952 }, { "epoch": 0.36887755102040815, "grad_norm": 6.559292316436768, "learning_rate": 7.0934323550673775e-06, "loss": 0.2803, "step": 7953 }, { "epoch": 0.3689239332096475, "grad_norm": 15.582817077636719, "learning_rate": 7.092764000027266e-06, "loss": 0.4732, "step": 7954 }, { "epoch": 0.3689703153988868, "grad_norm": 5.327722072601318, "learning_rate": 7.092095599648523e-06, "loss": 0.2537, "step": 7955 }, { "epoch": 0.36901669758812616, "grad_norm": 5.590048313140869, "learning_rate": 7.091427153945634e-06, "loss": 0.4128, "step": 7956 }, { "epoch": 0.36906307977736547, "grad_norm": 8.557977676391602, "learning_rate": 7.09075866293308e-06, "loss": 0.3668, "step": 7957 }, { "epoch": 0.3691094619666048, "grad_norm": 6.33567476272583, "learning_rate": 7.09009012662534e-06, "loss": 0.432, "step": 7958 }, { "epoch": 0.3691558441558442, "grad_norm": 5.560307025909424, "learning_rate": 7.089421545036901e-06, "loss": 0.34, "step": 7959 }, { "epoch": 0.3692022263450835, "grad_norm": 6.71891450881958, "learning_rate": 7.088752918182247e-06, "loss": 0.3689, "step": 7960 }, { "epoch": 0.36924860853432284, "grad_norm": 10.544991493225098, "learning_rate": 7.08808424607586e-06, "loss": 0.3666, "step": 7961 }, { "epoch": 0.36929499072356214, "grad_norm": 4.217282295227051, "learning_rate": 7.0874155287322326e-06, "loss": 0.2874, "step": 7962 }, { "epoch": 0.3693413729128015, "grad_norm": 6.98895788192749, "learning_rate": 7.086746766165846e-06, "loss": 0.3563, "step": 7963 }, { "epoch": 0.3693877551020408, "grad_norm": 6.891456127166748, "learning_rate": 7.086077958391192e-06, "loss": 0.4313, "step": 7964 }, { "epoch": 0.36943413729128016, "grad_norm": 7.102577209472656, "learning_rate": 7.08540910542276e-06, "loss": 0.3556, "step": 7965 }, { "epoch": 0.36948051948051946, "grad_norm": 7.110353469848633, "learning_rate": 7.08474020727504e-06, "loss": 0.353, "step": 7966 }, { "epoch": 0.3695269016697588, "grad_norm": 8.875396728515625, "learning_rate": 7.084071263962522e-06, "loss": 0.4703, "step": 7967 }, { "epoch": 0.3695732838589981, "grad_norm": 6.5502119064331055, "learning_rate": 7.0834022754997e-06, "loss": 0.4086, "step": 7968 }, { "epoch": 0.3696196660482375, "grad_norm": 5.62829065322876, "learning_rate": 7.082733241901067e-06, "loss": 0.3343, "step": 7969 }, { "epoch": 0.3696660482374768, "grad_norm": 8.941507339477539, "learning_rate": 7.082064163181116e-06, "loss": 0.3844, "step": 7970 }, { "epoch": 0.36971243042671614, "grad_norm": 8.55361270904541, "learning_rate": 7.081395039354343e-06, "loss": 0.413, "step": 7971 }, { "epoch": 0.36975881261595545, "grad_norm": 4.189576148986816, "learning_rate": 7.080725870435244e-06, "loss": 0.3779, "step": 7972 }, { "epoch": 0.3698051948051948, "grad_norm": 7.504044055938721, "learning_rate": 7.080056656438319e-06, "loss": 0.3638, "step": 7973 }, { "epoch": 0.36985157699443416, "grad_norm": 6.671122074127197, "learning_rate": 7.07938739737806e-06, "loss": 0.2938, "step": 7974 }, { "epoch": 0.36989795918367346, "grad_norm": 8.414786338806152, "learning_rate": 7.078718093268971e-06, "loss": 0.3809, "step": 7975 }, { "epoch": 0.3699443413729128, "grad_norm": 10.811250686645508, "learning_rate": 7.078048744125553e-06, "loss": 0.4956, "step": 7976 }, { "epoch": 0.3699907235621521, "grad_norm": 9.805933952331543, "learning_rate": 7.077379349962304e-06, "loss": 0.5142, "step": 7977 }, { "epoch": 0.3700371057513915, "grad_norm": 9.45655632019043, "learning_rate": 7.076709910793725e-06, "loss": 0.3578, "step": 7978 }, { "epoch": 0.3700834879406308, "grad_norm": 9.903451919555664, "learning_rate": 7.076040426634323e-06, "loss": 0.3526, "step": 7979 }, { "epoch": 0.37012987012987014, "grad_norm": 9.82467269897461, "learning_rate": 7.075370897498599e-06, "loss": 0.3048, "step": 7980 }, { "epoch": 0.37017625231910944, "grad_norm": 6.674144268035889, "learning_rate": 7.074701323401061e-06, "loss": 0.3514, "step": 7981 }, { "epoch": 0.3702226345083488, "grad_norm": 6.00732421875, "learning_rate": 7.074031704356212e-06, "loss": 0.3452, "step": 7982 }, { "epoch": 0.3702690166975881, "grad_norm": 5.137364864349365, "learning_rate": 7.07336204037856e-06, "loss": 0.3107, "step": 7983 }, { "epoch": 0.37031539888682746, "grad_norm": 7.020951747894287, "learning_rate": 7.072692331482613e-06, "loss": 0.2888, "step": 7984 }, { "epoch": 0.37036178107606677, "grad_norm": 6.143496513366699, "learning_rate": 7.07202257768288e-06, "loss": 0.344, "step": 7985 }, { "epoch": 0.3704081632653061, "grad_norm": 5.676090240478516, "learning_rate": 7.071352778993872e-06, "loss": 0.3112, "step": 7986 }, { "epoch": 0.3704545454545455, "grad_norm": 6.535185813903809, "learning_rate": 7.070682935430097e-06, "loss": 0.2877, "step": 7987 }, { "epoch": 0.3705009276437848, "grad_norm": 4.362610816955566, "learning_rate": 7.070013047006068e-06, "loss": 0.2854, "step": 7988 }, { "epoch": 0.37054730983302414, "grad_norm": 7.061955451965332, "learning_rate": 7.069343113736299e-06, "loss": 0.4406, "step": 7989 }, { "epoch": 0.37059369202226344, "grad_norm": 7.402166843414307, "learning_rate": 7.068673135635302e-06, "loss": 0.4307, "step": 7990 }, { "epoch": 0.3706400742115028, "grad_norm": 6.537667274475098, "learning_rate": 7.068003112717594e-06, "loss": 0.3701, "step": 7991 }, { "epoch": 0.3706864564007421, "grad_norm": 5.815281867980957, "learning_rate": 7.067333044997689e-06, "loss": 0.326, "step": 7992 }, { "epoch": 0.37073283858998146, "grad_norm": 15.509297370910645, "learning_rate": 7.066662932490103e-06, "loss": 0.3156, "step": 7993 }, { "epoch": 0.37077922077922076, "grad_norm": 5.2246994972229, "learning_rate": 7.065992775209356e-06, "loss": 0.3895, "step": 7994 }, { "epoch": 0.3708256029684601, "grad_norm": 8.4091215133667, "learning_rate": 7.0653225731699635e-06, "loss": 0.3196, "step": 7995 }, { "epoch": 0.3708719851576994, "grad_norm": 9.015115737915039, "learning_rate": 7.06465232638645e-06, "loss": 0.4498, "step": 7996 }, { "epoch": 0.3709183673469388, "grad_norm": 5.109913349151611, "learning_rate": 7.063982034873329e-06, "loss": 0.3283, "step": 7997 }, { "epoch": 0.3709647495361781, "grad_norm": 6.47412633895874, "learning_rate": 7.063311698645128e-06, "loss": 0.3024, "step": 7998 }, { "epoch": 0.37101113172541744, "grad_norm": 6.05410623550415, "learning_rate": 7.062641317716368e-06, "loss": 0.351, "step": 7999 }, { "epoch": 0.37105751391465674, "grad_norm": 14.793201446533203, "learning_rate": 7.06197089210157e-06, "loss": 0.371, "step": 8000 }, { "epoch": 0.3711038961038961, "grad_norm": 9.969866752624512, "learning_rate": 7.06130042181526e-06, "loss": 0.4171, "step": 8001 }, { "epoch": 0.37115027829313546, "grad_norm": 8.131062507629395, "learning_rate": 7.060629906871965e-06, "loss": 0.3561, "step": 8002 }, { "epoch": 0.37119666048237476, "grad_norm": 3.7121026515960693, "learning_rate": 7.059959347286209e-06, "loss": 0.225, "step": 8003 }, { "epoch": 0.3712430426716141, "grad_norm": 7.79337215423584, "learning_rate": 7.059288743072521e-06, "loss": 0.4081, "step": 8004 }, { "epoch": 0.3712894248608534, "grad_norm": 4.51740837097168, "learning_rate": 7.058618094245427e-06, "loss": 0.3179, "step": 8005 }, { "epoch": 0.3713358070500928, "grad_norm": 10.382346153259277, "learning_rate": 7.057947400819459e-06, "loss": 0.5529, "step": 8006 }, { "epoch": 0.3713821892393321, "grad_norm": 12.818584442138672, "learning_rate": 7.057276662809147e-06, "loss": 0.4599, "step": 8007 }, { "epoch": 0.37142857142857144, "grad_norm": 5.822766304016113, "learning_rate": 7.0566058802290196e-06, "loss": 0.2948, "step": 8008 }, { "epoch": 0.37147495361781074, "grad_norm": 5.057920455932617, "learning_rate": 7.055935053093612e-06, "loss": 0.3646, "step": 8009 }, { "epoch": 0.3715213358070501, "grad_norm": 5.323730945587158, "learning_rate": 7.055264181417454e-06, "loss": 0.3367, "step": 8010 }, { "epoch": 0.3715677179962894, "grad_norm": 10.577325820922852, "learning_rate": 7.054593265215083e-06, "loss": 0.4561, "step": 8011 }, { "epoch": 0.37161410018552876, "grad_norm": 5.221288681030273, "learning_rate": 7.053922304501032e-06, "loss": 0.3033, "step": 8012 }, { "epoch": 0.37166048237476806, "grad_norm": 6.5486369132995605, "learning_rate": 7.053251299289837e-06, "loss": 0.3506, "step": 8013 }, { "epoch": 0.3717068645640074, "grad_norm": 8.913369178771973, "learning_rate": 7.0525802495960375e-06, "loss": 0.3791, "step": 8014 }, { "epoch": 0.3717532467532468, "grad_norm": 7.047066688537598, "learning_rate": 7.0519091554341675e-06, "loss": 0.2564, "step": 8015 }, { "epoch": 0.3717996289424861, "grad_norm": 7.988738536834717, "learning_rate": 7.0512380168187685e-06, "loss": 0.3424, "step": 8016 }, { "epoch": 0.37184601113172544, "grad_norm": 11.348875999450684, "learning_rate": 7.0505668337643805e-06, "loss": 0.4648, "step": 8017 }, { "epoch": 0.37189239332096474, "grad_norm": 7.499236583709717, "learning_rate": 7.049895606285543e-06, "loss": 0.4119, "step": 8018 }, { "epoch": 0.3719387755102041, "grad_norm": 5.148135185241699, "learning_rate": 7.049224334396798e-06, "loss": 0.3752, "step": 8019 }, { "epoch": 0.3719851576994434, "grad_norm": 4.826082706451416, "learning_rate": 7.0485530181126895e-06, "loss": 0.3142, "step": 8020 }, { "epoch": 0.37203153988868276, "grad_norm": 10.66846752166748, "learning_rate": 7.047881657447761e-06, "loss": 0.3185, "step": 8021 }, { "epoch": 0.37207792207792206, "grad_norm": 6.097110748291016, "learning_rate": 7.047210252416555e-06, "loss": 0.3294, "step": 8022 }, { "epoch": 0.3721243042671614, "grad_norm": 6.021368026733398, "learning_rate": 7.046538803033619e-06, "loss": 0.357, "step": 8023 }, { "epoch": 0.3721706864564007, "grad_norm": 10.566349983215332, "learning_rate": 7.045867309313499e-06, "loss": 0.381, "step": 8024 }, { "epoch": 0.3722170686456401, "grad_norm": 4.618056774139404, "learning_rate": 7.045195771270744e-06, "loss": 0.3975, "step": 8025 }, { "epoch": 0.3722634508348794, "grad_norm": 13.213837623596191, "learning_rate": 7.044524188919901e-06, "loss": 0.312, "step": 8026 }, { "epoch": 0.37230983302411874, "grad_norm": 7.218941688537598, "learning_rate": 7.043852562275518e-06, "loss": 0.2488, "step": 8027 }, { "epoch": 0.37235621521335804, "grad_norm": 4.594847679138184, "learning_rate": 7.0431808913521506e-06, "loss": 0.291, "step": 8028 }, { "epoch": 0.3724025974025974, "grad_norm": 4.59533166885376, "learning_rate": 7.042509176164344e-06, "loss": 0.2891, "step": 8029 }, { "epoch": 0.37244897959183676, "grad_norm": 9.757355690002441, "learning_rate": 7.041837416726654e-06, "loss": 0.24, "step": 8030 }, { "epoch": 0.37249536178107606, "grad_norm": 11.105949401855469, "learning_rate": 7.041165613053634e-06, "loss": 0.4721, "step": 8031 }, { "epoch": 0.3725417439703154, "grad_norm": 6.598122596740723, "learning_rate": 7.040493765159837e-06, "loss": 0.3916, "step": 8032 }, { "epoch": 0.3725881261595547, "grad_norm": 5.388843059539795, "learning_rate": 7.03982187305982e-06, "loss": 0.3427, "step": 8033 }, { "epoch": 0.3726345083487941, "grad_norm": 6.575247287750244, "learning_rate": 7.039149936768137e-06, "loss": 0.3009, "step": 8034 }, { "epoch": 0.3726808905380334, "grad_norm": 7.820711135864258, "learning_rate": 7.038477956299345e-06, "loss": 0.4015, "step": 8035 }, { "epoch": 0.37272727272727274, "grad_norm": 10.828073501586914, "learning_rate": 7.037805931668006e-06, "loss": 0.4091, "step": 8036 }, { "epoch": 0.37277365491651204, "grad_norm": 5.599987030029297, "learning_rate": 7.037133862888674e-06, "loss": 0.2723, "step": 8037 }, { "epoch": 0.3728200371057514, "grad_norm": 10.60421085357666, "learning_rate": 7.036461749975913e-06, "loss": 0.4588, "step": 8038 }, { "epoch": 0.3728664192949907, "grad_norm": 7.74958610534668, "learning_rate": 7.0357895929442825e-06, "loss": 0.4059, "step": 8039 }, { "epoch": 0.37291280148423006, "grad_norm": 8.112855911254883, "learning_rate": 7.035117391808341e-06, "loss": 0.468, "step": 8040 }, { "epoch": 0.37295918367346936, "grad_norm": 5.033084869384766, "learning_rate": 7.034445146582658e-06, "loss": 0.3325, "step": 8041 }, { "epoch": 0.3730055658627087, "grad_norm": 7.5256667137146, "learning_rate": 7.033772857281793e-06, "loss": 0.3772, "step": 8042 }, { "epoch": 0.3730519480519481, "grad_norm": 5.2602081298828125, "learning_rate": 7.0331005239203106e-06, "loss": 0.3645, "step": 8043 }, { "epoch": 0.3730983302411874, "grad_norm": 12.504480361938477, "learning_rate": 7.032428146512779e-06, "loss": 0.5348, "step": 8044 }, { "epoch": 0.37314471243042674, "grad_norm": 5.7833428382873535, "learning_rate": 7.031755725073763e-06, "loss": 0.3022, "step": 8045 }, { "epoch": 0.37319109461966604, "grad_norm": 6.023545265197754, "learning_rate": 7.0310832596178325e-06, "loss": 0.3803, "step": 8046 }, { "epoch": 0.3732374768089054, "grad_norm": 9.124917984008789, "learning_rate": 7.030410750159553e-06, "loss": 0.3025, "step": 8047 }, { "epoch": 0.3732838589981447, "grad_norm": 12.820328712463379, "learning_rate": 7.0297381967134955e-06, "loss": 0.4073, "step": 8048 }, { "epoch": 0.37333024118738406, "grad_norm": 6.520030498504639, "learning_rate": 7.02906559929423e-06, "loss": 0.4503, "step": 8049 }, { "epoch": 0.37337662337662336, "grad_norm": 4.245970249176025, "learning_rate": 7.0283929579163305e-06, "loss": 0.3258, "step": 8050 }, { "epoch": 0.3734230055658627, "grad_norm": 10.116390228271484, "learning_rate": 7.027720272594365e-06, "loss": 0.4097, "step": 8051 }, { "epoch": 0.373469387755102, "grad_norm": 4.958528518676758, "learning_rate": 7.027047543342911e-06, "loss": 0.3208, "step": 8052 }, { "epoch": 0.3735157699443414, "grad_norm": 6.330326080322266, "learning_rate": 7.02637477017654e-06, "loss": 0.2813, "step": 8053 }, { "epoch": 0.3735621521335807, "grad_norm": 6.702070713043213, "learning_rate": 7.025701953109829e-06, "loss": 0.4176, "step": 8054 }, { "epoch": 0.37360853432282004, "grad_norm": 7.211516380310059, "learning_rate": 7.025029092157354e-06, "loss": 0.455, "step": 8055 }, { "epoch": 0.37365491651205934, "grad_norm": 7.641397953033447, "learning_rate": 7.024356187333692e-06, "loss": 0.3394, "step": 8056 }, { "epoch": 0.3737012987012987, "grad_norm": 6.4866414070129395, "learning_rate": 7.02368323865342e-06, "loss": 0.3351, "step": 8057 }, { "epoch": 0.37374768089053806, "grad_norm": 12.753650665283203, "learning_rate": 7.023010246131119e-06, "loss": 0.3616, "step": 8058 }, { "epoch": 0.37379406307977736, "grad_norm": 15.30871295928955, "learning_rate": 7.022337209781367e-06, "loss": 0.282, "step": 8059 }, { "epoch": 0.3738404452690167, "grad_norm": 7.894770622253418, "learning_rate": 7.021664129618746e-06, "loss": 0.2872, "step": 8060 }, { "epoch": 0.373886827458256, "grad_norm": 8.657645225524902, "learning_rate": 7.020991005657839e-06, "loss": 0.3644, "step": 8061 }, { "epoch": 0.3739332096474954, "grad_norm": 5.651839733123779, "learning_rate": 7.020317837913227e-06, "loss": 0.2988, "step": 8062 }, { "epoch": 0.3739795918367347, "grad_norm": 5.923246383666992, "learning_rate": 7.019644626399496e-06, "loss": 0.2865, "step": 8063 }, { "epoch": 0.37402597402597404, "grad_norm": 5.344334602355957, "learning_rate": 7.018971371131227e-06, "loss": 0.4031, "step": 8064 }, { "epoch": 0.37407235621521334, "grad_norm": 6.720600128173828, "learning_rate": 7.018298072123009e-06, "loss": 0.4151, "step": 8065 }, { "epoch": 0.3741187384044527, "grad_norm": 5.387638092041016, "learning_rate": 7.017624729389428e-06, "loss": 0.3777, "step": 8066 }, { "epoch": 0.374165120593692, "grad_norm": 6.019467830657959, "learning_rate": 7.016951342945072e-06, "loss": 0.3484, "step": 8067 }, { "epoch": 0.37421150278293136, "grad_norm": 8.011395454406738, "learning_rate": 7.0162779128045275e-06, "loss": 0.3625, "step": 8068 }, { "epoch": 0.37425788497217066, "grad_norm": 5.269225597381592, "learning_rate": 7.015604438982388e-06, "loss": 0.3181, "step": 8069 }, { "epoch": 0.37430426716141, "grad_norm": 4.4328999519348145, "learning_rate": 7.014930921493239e-06, "loss": 0.2873, "step": 8070 }, { "epoch": 0.3743506493506494, "grad_norm": 15.7998046875, "learning_rate": 7.0142573603516765e-06, "loss": 0.4336, "step": 8071 }, { "epoch": 0.3743970315398887, "grad_norm": 6.017899036407471, "learning_rate": 7.01358375557229e-06, "loss": 0.2951, "step": 8072 }, { "epoch": 0.37444341372912804, "grad_norm": 6.519735336303711, "learning_rate": 7.012910107169672e-06, "loss": 0.3145, "step": 8073 }, { "epoch": 0.37448979591836734, "grad_norm": 4.424607276916504, "learning_rate": 7.01223641515842e-06, "loss": 0.3923, "step": 8074 }, { "epoch": 0.3745361781076067, "grad_norm": 3.6868138313293457, "learning_rate": 7.011562679553127e-06, "loss": 0.2481, "step": 8075 }, { "epoch": 0.374582560296846, "grad_norm": 7.497649192810059, "learning_rate": 7.010888900368389e-06, "loss": 0.4215, "step": 8076 }, { "epoch": 0.37462894248608536, "grad_norm": 5.191551685333252, "learning_rate": 7.010215077618805e-06, "loss": 0.2984, "step": 8077 }, { "epoch": 0.37467532467532466, "grad_norm": 4.219010829925537, "learning_rate": 7.00954121131897e-06, "loss": 0.2275, "step": 8078 }, { "epoch": 0.374721706864564, "grad_norm": 5.460562229156494, "learning_rate": 7.008867301483484e-06, "loss": 0.399, "step": 8079 }, { "epoch": 0.3747680890538033, "grad_norm": 6.090423583984375, "learning_rate": 7.008193348126949e-06, "loss": 0.3875, "step": 8080 }, { "epoch": 0.3748144712430427, "grad_norm": 6.450511455535889, "learning_rate": 7.007519351263964e-06, "loss": 0.3696, "step": 8081 }, { "epoch": 0.374860853432282, "grad_norm": 8.624123573303223, "learning_rate": 7.006845310909131e-06, "loss": 0.3432, "step": 8082 }, { "epoch": 0.37490723562152134, "grad_norm": 5.007539749145508, "learning_rate": 7.006171227077054e-06, "loss": 0.3409, "step": 8083 }, { "epoch": 0.37495361781076064, "grad_norm": 6.991912364959717, "learning_rate": 7.005497099782332e-06, "loss": 0.3621, "step": 8084 }, { "epoch": 0.375, "grad_norm": 11.515298843383789, "learning_rate": 7.0048229290395775e-06, "loss": 0.4621, "step": 8085 }, { "epoch": 0.37504638218923936, "grad_norm": 7.587855815887451, "learning_rate": 7.004148714863389e-06, "loss": 0.3039, "step": 8086 }, { "epoch": 0.37509276437847866, "grad_norm": 7.212692737579346, "learning_rate": 7.0034744572683755e-06, "loss": 0.2543, "step": 8087 }, { "epoch": 0.375139146567718, "grad_norm": 10.06206226348877, "learning_rate": 7.0028001562691475e-06, "loss": 0.4646, "step": 8088 }, { "epoch": 0.3751855287569573, "grad_norm": 8.461616516113281, "learning_rate": 7.002125811880307e-06, "loss": 0.4032, "step": 8089 }, { "epoch": 0.3752319109461967, "grad_norm": 6.972093105316162, "learning_rate": 7.001451424116468e-06, "loss": 0.3813, "step": 8090 }, { "epoch": 0.375278293135436, "grad_norm": 8.473087310791016, "learning_rate": 7.00077699299224e-06, "loss": 0.3112, "step": 8091 }, { "epoch": 0.37532467532467534, "grad_norm": 12.01984977722168, "learning_rate": 7.000102518522234e-06, "loss": 0.4832, "step": 8092 }, { "epoch": 0.37537105751391464, "grad_norm": 12.895078659057617, "learning_rate": 6.9994280007210616e-06, "loss": 0.552, "step": 8093 }, { "epoch": 0.375417439703154, "grad_norm": 7.7261457443237305, "learning_rate": 6.998753439603335e-06, "loss": 0.4599, "step": 8094 }, { "epoch": 0.3754638218923933, "grad_norm": 10.486315727233887, "learning_rate": 6.99807883518367e-06, "loss": 0.2746, "step": 8095 }, { "epoch": 0.37551020408163266, "grad_norm": 5.807994365692139, "learning_rate": 6.9974041874766815e-06, "loss": 0.337, "step": 8096 }, { "epoch": 0.37555658627087196, "grad_norm": 12.13486385345459, "learning_rate": 6.996729496496985e-06, "loss": 0.4829, "step": 8097 }, { "epoch": 0.3756029684601113, "grad_norm": 8.284099578857422, "learning_rate": 6.996054762259197e-06, "loss": 0.3769, "step": 8098 }, { "epoch": 0.3756493506493506, "grad_norm": 5.194821357727051, "learning_rate": 6.995379984777935e-06, "loss": 0.2659, "step": 8099 }, { "epoch": 0.37569573283859, "grad_norm": 5.715334892272949, "learning_rate": 6.9947051640678175e-06, "loss": 0.3057, "step": 8100 }, { "epoch": 0.37574211502782934, "grad_norm": 5.6770710945129395, "learning_rate": 6.994030300143465e-06, "loss": 0.2414, "step": 8101 }, { "epoch": 0.37578849721706864, "grad_norm": 10.87099552154541, "learning_rate": 6.993355393019499e-06, "loss": 0.4725, "step": 8102 }, { "epoch": 0.375834879406308, "grad_norm": 6.955840587615967, "learning_rate": 6.992680442710539e-06, "loss": 0.3672, "step": 8103 }, { "epoch": 0.3758812615955473, "grad_norm": 7.259006023406982, "learning_rate": 6.9920054492312086e-06, "loss": 0.3239, "step": 8104 }, { "epoch": 0.37592764378478666, "grad_norm": 12.428549766540527, "learning_rate": 6.99133041259613e-06, "loss": 0.3994, "step": 8105 }, { "epoch": 0.37597402597402596, "grad_norm": 9.572399139404297, "learning_rate": 6.990655332819928e-06, "loss": 0.3786, "step": 8106 }, { "epoch": 0.3760204081632653, "grad_norm": 9.002312660217285, "learning_rate": 6.98998020991723e-06, "loss": 0.3623, "step": 8107 }, { "epoch": 0.3760667903525046, "grad_norm": 8.251445770263672, "learning_rate": 6.989305043902659e-06, "loss": 0.3369, "step": 8108 }, { "epoch": 0.376113172541744, "grad_norm": 4.534151077270508, "learning_rate": 6.988629834790843e-06, "loss": 0.3877, "step": 8109 }, { "epoch": 0.3761595547309833, "grad_norm": 6.432931423187256, "learning_rate": 6.987954582596412e-06, "loss": 0.3886, "step": 8110 }, { "epoch": 0.37620593692022264, "grad_norm": 4.907652854919434, "learning_rate": 6.987279287333993e-06, "loss": 0.2935, "step": 8111 }, { "epoch": 0.37625231910946194, "grad_norm": 7.128312110900879, "learning_rate": 6.9866039490182146e-06, "loss": 0.3131, "step": 8112 }, { "epoch": 0.3762987012987013, "grad_norm": 8.258403778076172, "learning_rate": 6.985928567663712e-06, "loss": 0.3487, "step": 8113 }, { "epoch": 0.37634508348794066, "grad_norm": 15.254003524780273, "learning_rate": 6.985253143285114e-06, "loss": 0.5078, "step": 8114 }, { "epoch": 0.37639146567717996, "grad_norm": 4.0662031173706055, "learning_rate": 6.984577675897053e-06, "loss": 0.321, "step": 8115 }, { "epoch": 0.3764378478664193, "grad_norm": 10.170454978942871, "learning_rate": 6.983902165514163e-06, "loss": 0.4805, "step": 8116 }, { "epoch": 0.3764842300556586, "grad_norm": 6.512055397033691, "learning_rate": 6.983226612151079e-06, "loss": 0.3485, "step": 8117 }, { "epoch": 0.376530612244898, "grad_norm": 4.8697075843811035, "learning_rate": 6.982551015822438e-06, "loss": 0.2503, "step": 8118 }, { "epoch": 0.3765769944341373, "grad_norm": 5.33427095413208, "learning_rate": 6.981875376542874e-06, "loss": 0.2682, "step": 8119 }, { "epoch": 0.37662337662337664, "grad_norm": 6.838464260101318, "learning_rate": 6.981199694327024e-06, "loss": 0.4094, "step": 8120 }, { "epoch": 0.37666975881261594, "grad_norm": 5.202762126922607, "learning_rate": 6.98052396918953e-06, "loss": 0.343, "step": 8121 }, { "epoch": 0.3767161410018553, "grad_norm": 7.076846599578857, "learning_rate": 6.979848201145028e-06, "loss": 0.3594, "step": 8122 }, { "epoch": 0.3767625231910946, "grad_norm": 9.789198875427246, "learning_rate": 6.979172390208158e-06, "loss": 0.2827, "step": 8123 }, { "epoch": 0.37680890538033396, "grad_norm": 6.468472957611084, "learning_rate": 6.978496536393564e-06, "loss": 0.2951, "step": 8124 }, { "epoch": 0.37685528756957326, "grad_norm": 4.468921661376953, "learning_rate": 6.977820639715885e-06, "loss": 0.308, "step": 8125 }, { "epoch": 0.3769016697588126, "grad_norm": 6.770009517669678, "learning_rate": 6.977144700189764e-06, "loss": 0.3163, "step": 8126 }, { "epoch": 0.3769480519480519, "grad_norm": 5.489294052124023, "learning_rate": 6.9764687178298474e-06, "loss": 0.2895, "step": 8127 }, { "epoch": 0.3769944341372913, "grad_norm": 6.702425956726074, "learning_rate": 6.975792692650778e-06, "loss": 0.2807, "step": 8128 }, { "epoch": 0.37704081632653064, "grad_norm": 4.167757511138916, "learning_rate": 6.975116624667202e-06, "loss": 0.2495, "step": 8129 }, { "epoch": 0.37708719851576994, "grad_norm": 6.885191440582275, "learning_rate": 6.974440513893765e-06, "loss": 0.3649, "step": 8130 }, { "epoch": 0.3771335807050093, "grad_norm": 6.819766998291016, "learning_rate": 6.973764360345116e-06, "loss": 0.3099, "step": 8131 }, { "epoch": 0.3771799628942486, "grad_norm": 6.443680763244629, "learning_rate": 6.973088164035906e-06, "loss": 0.3164, "step": 8132 }, { "epoch": 0.37722634508348796, "grad_norm": 5.672072410583496, "learning_rate": 6.972411924980779e-06, "loss": 0.4369, "step": 8133 }, { "epoch": 0.37727272727272726, "grad_norm": 4.70188045501709, "learning_rate": 6.9717356431943885e-06, "loss": 0.2405, "step": 8134 }, { "epoch": 0.3773191094619666, "grad_norm": 10.523701667785645, "learning_rate": 6.971059318691386e-06, "loss": 0.4462, "step": 8135 }, { "epoch": 0.3773654916512059, "grad_norm": 10.979435920715332, "learning_rate": 6.97038295148642e-06, "loss": 0.428, "step": 8136 }, { "epoch": 0.3774118738404453, "grad_norm": 7.136756896972656, "learning_rate": 6.9697065415941515e-06, "loss": 0.3722, "step": 8137 }, { "epoch": 0.3774582560296846, "grad_norm": 8.457340240478516, "learning_rate": 6.969030089029226e-06, "loss": 0.3673, "step": 8138 }, { "epoch": 0.37750463821892394, "grad_norm": 6.148313999176025, "learning_rate": 6.968353593806304e-06, "loss": 0.3435, "step": 8139 }, { "epoch": 0.37755102040816324, "grad_norm": 9.37641429901123, "learning_rate": 6.9676770559400385e-06, "loss": 0.4696, "step": 8140 }, { "epoch": 0.3775974025974026, "grad_norm": 9.81292724609375, "learning_rate": 6.967000475445087e-06, "loss": 0.3495, "step": 8141 }, { "epoch": 0.37764378478664196, "grad_norm": 5.917403697967529, "learning_rate": 6.96632385233611e-06, "loss": 0.3008, "step": 8142 }, { "epoch": 0.37769016697588126, "grad_norm": 5.28723669052124, "learning_rate": 6.965647186627762e-06, "loss": 0.3805, "step": 8143 }, { "epoch": 0.3777365491651206, "grad_norm": 7.934657573699951, "learning_rate": 6.964970478334705e-06, "loss": 0.2845, "step": 8144 }, { "epoch": 0.3777829313543599, "grad_norm": 8.58969497680664, "learning_rate": 6.9642937274715985e-06, "loss": 0.4057, "step": 8145 }, { "epoch": 0.3778293135435993, "grad_norm": 12.738970756530762, "learning_rate": 6.963616934053104e-06, "loss": 0.4259, "step": 8146 }, { "epoch": 0.3778756957328386, "grad_norm": 10.364118576049805, "learning_rate": 6.962940098093886e-06, "loss": 0.3398, "step": 8147 }, { "epoch": 0.37792207792207794, "grad_norm": 8.894451141357422, "learning_rate": 6.962263219608605e-06, "loss": 0.3164, "step": 8148 }, { "epoch": 0.37796846011131724, "grad_norm": 5.1944684982299805, "learning_rate": 6.9615862986119276e-06, "loss": 0.3226, "step": 8149 }, { "epoch": 0.3780148423005566, "grad_norm": 13.147529602050781, "learning_rate": 6.960909335118515e-06, "loss": 0.4724, "step": 8150 }, { "epoch": 0.3780612244897959, "grad_norm": 10.948877334594727, "learning_rate": 6.960232329143038e-06, "loss": 0.4692, "step": 8151 }, { "epoch": 0.37810760667903526, "grad_norm": 9.546751976013184, "learning_rate": 6.959555280700162e-06, "loss": 0.3634, "step": 8152 }, { "epoch": 0.37815398886827456, "grad_norm": 12.736432075500488, "learning_rate": 6.958878189804553e-06, "loss": 0.4036, "step": 8153 }, { "epoch": 0.3782003710575139, "grad_norm": 10.627198219299316, "learning_rate": 6.958201056470883e-06, "loss": 0.3793, "step": 8154 }, { "epoch": 0.3782467532467532, "grad_norm": 9.638023376464844, "learning_rate": 6.957523880713817e-06, "loss": 0.5258, "step": 8155 }, { "epoch": 0.3782931354359926, "grad_norm": 9.812512397766113, "learning_rate": 6.9568466625480304e-06, "loss": 0.3934, "step": 8156 }, { "epoch": 0.37833951762523194, "grad_norm": 8.240787506103516, "learning_rate": 6.956169401988193e-06, "loss": 0.4337, "step": 8157 }, { "epoch": 0.37838589981447124, "grad_norm": 6.505373001098633, "learning_rate": 6.955492099048977e-06, "loss": 0.2818, "step": 8158 }, { "epoch": 0.3784322820037106, "grad_norm": 8.526505470275879, "learning_rate": 6.954814753745057e-06, "loss": 0.3065, "step": 8159 }, { "epoch": 0.3784786641929499, "grad_norm": 5.94764518737793, "learning_rate": 6.954137366091105e-06, "loss": 0.3226, "step": 8160 }, { "epoch": 0.37852504638218926, "grad_norm": 9.415226936340332, "learning_rate": 6.953459936101799e-06, "loss": 0.3354, "step": 8161 }, { "epoch": 0.37857142857142856, "grad_norm": 5.613217353820801, "learning_rate": 6.9527824637918125e-06, "loss": 0.3305, "step": 8162 }, { "epoch": 0.3786178107606679, "grad_norm": 6.26491641998291, "learning_rate": 6.9521049491758245e-06, "loss": 0.3619, "step": 8163 }, { "epoch": 0.3786641929499072, "grad_norm": 9.10468864440918, "learning_rate": 6.951427392268512e-06, "loss": 0.4749, "step": 8164 }, { "epoch": 0.3787105751391466, "grad_norm": 6.3974103927612305, "learning_rate": 6.950749793084555e-06, "loss": 0.3555, "step": 8165 }, { "epoch": 0.3787569573283859, "grad_norm": 6.419728755950928, "learning_rate": 6.950072151638632e-06, "loss": 0.3124, "step": 8166 }, { "epoch": 0.37880333951762524, "grad_norm": 11.317704200744629, "learning_rate": 6.949394467945425e-06, "loss": 0.5289, "step": 8167 }, { "epoch": 0.37884972170686454, "grad_norm": 8.289798736572266, "learning_rate": 6.948716742019616e-06, "loss": 0.4443, "step": 8168 }, { "epoch": 0.3788961038961039, "grad_norm": 6.505293846130371, "learning_rate": 6.948038973875884e-06, "loss": 0.2605, "step": 8169 }, { "epoch": 0.37894248608534326, "grad_norm": 5.793064117431641, "learning_rate": 6.9473611635289175e-06, "loss": 0.4195, "step": 8170 }, { "epoch": 0.37898886827458256, "grad_norm": 3.4074466228485107, "learning_rate": 6.946683310993397e-06, "loss": 0.2886, "step": 8171 }, { "epoch": 0.3790352504638219, "grad_norm": 6.260814189910889, "learning_rate": 6.9460054162840095e-06, "loss": 0.3808, "step": 8172 }, { "epoch": 0.3790816326530612, "grad_norm": 10.331216812133789, "learning_rate": 6.945327479415442e-06, "loss": 0.4021, "step": 8173 }, { "epoch": 0.3791280148423006, "grad_norm": 4.818753242492676, "learning_rate": 6.94464950040238e-06, "loss": 0.3058, "step": 8174 }, { "epoch": 0.3791743970315399, "grad_norm": 6.405986785888672, "learning_rate": 6.943971479259513e-06, "loss": 0.3822, "step": 8175 }, { "epoch": 0.37922077922077924, "grad_norm": 7.460623264312744, "learning_rate": 6.943293416001529e-06, "loss": 0.4021, "step": 8176 }, { "epoch": 0.37926716141001854, "grad_norm": 4.691046714782715, "learning_rate": 6.9426153106431175e-06, "loss": 0.3192, "step": 8177 }, { "epoch": 0.3793135435992579, "grad_norm": 8.003405570983887, "learning_rate": 6.94193716319897e-06, "loss": 0.3101, "step": 8178 }, { "epoch": 0.3793599257884972, "grad_norm": 16.973329544067383, "learning_rate": 6.941258973683778e-06, "loss": 0.5671, "step": 8179 }, { "epoch": 0.37940630797773656, "grad_norm": 13.568628311157227, "learning_rate": 6.940580742112235e-06, "loss": 0.4067, "step": 8180 }, { "epoch": 0.37945269016697586, "grad_norm": 4.2499895095825195, "learning_rate": 6.939902468499033e-06, "loss": 0.296, "step": 8181 }, { "epoch": 0.3794990723562152, "grad_norm": 6.743593692779541, "learning_rate": 6.939224152858868e-06, "loss": 0.3724, "step": 8182 }, { "epoch": 0.3795454545454545, "grad_norm": 3.5908143520355225, "learning_rate": 6.938545795206434e-06, "loss": 0.2848, "step": 8183 }, { "epoch": 0.3795918367346939, "grad_norm": 5.997419834136963, "learning_rate": 6.937867395556428e-06, "loss": 0.3411, "step": 8184 }, { "epoch": 0.37963821892393323, "grad_norm": 6.359122276306152, "learning_rate": 6.9371889539235474e-06, "loss": 0.4287, "step": 8185 }, { "epoch": 0.37968460111317254, "grad_norm": 4.996589183807373, "learning_rate": 6.93651047032249e-06, "loss": 0.3526, "step": 8186 }, { "epoch": 0.3797309833024119, "grad_norm": 7.779634475708008, "learning_rate": 6.9358319447679535e-06, "loss": 0.3157, "step": 8187 }, { "epoch": 0.3797773654916512, "grad_norm": 6.655558109283447, "learning_rate": 6.935153377274639e-06, "loss": 0.2636, "step": 8188 }, { "epoch": 0.37982374768089056, "grad_norm": 7.680927753448486, "learning_rate": 6.934474767857249e-06, "loss": 0.2252, "step": 8189 }, { "epoch": 0.37987012987012986, "grad_norm": 12.08950138092041, "learning_rate": 6.933796116530481e-06, "loss": 0.3024, "step": 8190 }, { "epoch": 0.3799165120593692, "grad_norm": 8.390056610107422, "learning_rate": 6.933117423309042e-06, "loss": 0.3635, "step": 8191 }, { "epoch": 0.3799628942486085, "grad_norm": 5.927423477172852, "learning_rate": 6.932438688207634e-06, "loss": 0.2939, "step": 8192 }, { "epoch": 0.3800092764378479, "grad_norm": 5.695382595062256, "learning_rate": 6.931759911240958e-06, "loss": 0.2786, "step": 8193 }, { "epoch": 0.3800556586270872, "grad_norm": 9.03039836883545, "learning_rate": 6.931081092423725e-06, "loss": 0.3596, "step": 8194 }, { "epoch": 0.38010204081632654, "grad_norm": 7.468400955200195, "learning_rate": 6.930402231770639e-06, "loss": 0.3949, "step": 8195 }, { "epoch": 0.38014842300556584, "grad_norm": 5.172715663909912, "learning_rate": 6.929723329296405e-06, "loss": 0.3028, "step": 8196 }, { "epoch": 0.3801948051948052, "grad_norm": 10.759093284606934, "learning_rate": 6.929044385015735e-06, "loss": 0.5544, "step": 8197 }, { "epoch": 0.38024118738404455, "grad_norm": 4.596527099609375, "learning_rate": 6.928365398943334e-06, "loss": 0.3405, "step": 8198 }, { "epoch": 0.38028756957328386, "grad_norm": 8.716900825500488, "learning_rate": 6.927686371093914e-06, "loss": 0.3389, "step": 8199 }, { "epoch": 0.3803339517625232, "grad_norm": 6.539093017578125, "learning_rate": 6.927007301482187e-06, "loss": 0.4291, "step": 8200 }, { "epoch": 0.3803803339517625, "grad_norm": 5.689020156860352, "learning_rate": 6.926328190122862e-06, "loss": 0.3519, "step": 8201 }, { "epoch": 0.3804267161410019, "grad_norm": 8.664200782775879, "learning_rate": 6.9256490370306526e-06, "loss": 0.4373, "step": 8202 }, { "epoch": 0.3804730983302412, "grad_norm": 7.764151573181152, "learning_rate": 6.924969842220274e-06, "loss": 0.4479, "step": 8203 }, { "epoch": 0.38051948051948054, "grad_norm": 6.852559566497803, "learning_rate": 6.924290605706438e-06, "loss": 0.3891, "step": 8204 }, { "epoch": 0.38056586270871984, "grad_norm": 5.092979431152344, "learning_rate": 6.923611327503861e-06, "loss": 0.3085, "step": 8205 }, { "epoch": 0.3806122448979592, "grad_norm": 8.368620872497559, "learning_rate": 6.922932007627259e-06, "loss": 0.364, "step": 8206 }, { "epoch": 0.3806586270871985, "grad_norm": 14.299257278442383, "learning_rate": 6.92225264609135e-06, "loss": 0.4221, "step": 8207 }, { "epoch": 0.38070500927643786, "grad_norm": 9.531644821166992, "learning_rate": 6.921573242910851e-06, "loss": 0.3419, "step": 8208 }, { "epoch": 0.38075139146567716, "grad_norm": 6.582579612731934, "learning_rate": 6.920893798100481e-06, "loss": 0.2916, "step": 8209 }, { "epoch": 0.3807977736549165, "grad_norm": 7.1904168128967285, "learning_rate": 6.920214311674958e-06, "loss": 0.2155, "step": 8210 }, { "epoch": 0.3808441558441558, "grad_norm": 6.314632415771484, "learning_rate": 6.919534783649009e-06, "loss": 0.3245, "step": 8211 }, { "epoch": 0.3808905380333952, "grad_norm": 4.382369041442871, "learning_rate": 6.918855214037348e-06, "loss": 0.3089, "step": 8212 }, { "epoch": 0.38093692022263453, "grad_norm": 7.321004390716553, "learning_rate": 6.918175602854702e-06, "loss": 0.2603, "step": 8213 }, { "epoch": 0.38098330241187384, "grad_norm": 8.971514701843262, "learning_rate": 6.917495950115794e-06, "loss": 0.3828, "step": 8214 }, { "epoch": 0.3810296846011132, "grad_norm": 7.231348037719727, "learning_rate": 6.916816255835345e-06, "loss": 0.3421, "step": 8215 }, { "epoch": 0.3810760667903525, "grad_norm": 5.1922478675842285, "learning_rate": 6.916136520028087e-06, "loss": 0.376, "step": 8216 }, { "epoch": 0.38112244897959185, "grad_norm": 5.281991004943848, "learning_rate": 6.91545674270874e-06, "loss": 0.2415, "step": 8217 }, { "epoch": 0.38116883116883116, "grad_norm": 4.649451732635498, "learning_rate": 6.914776923892031e-06, "loss": 0.2789, "step": 8218 }, { "epoch": 0.3812152133580705, "grad_norm": 4.697080135345459, "learning_rate": 6.914097063592693e-06, "loss": 0.2874, "step": 8219 }, { "epoch": 0.3812615955473098, "grad_norm": 8.372320175170898, "learning_rate": 6.913417161825449e-06, "loss": 0.2961, "step": 8220 }, { "epoch": 0.3813079777365492, "grad_norm": 8.204771995544434, "learning_rate": 6.912737218605032e-06, "loss": 0.3592, "step": 8221 }, { "epoch": 0.3813543599257885, "grad_norm": 6.644350528717041, "learning_rate": 6.912057233946174e-06, "loss": 0.3508, "step": 8222 }, { "epoch": 0.38140074211502784, "grad_norm": 4.714345932006836, "learning_rate": 6.911377207863603e-06, "loss": 0.2611, "step": 8223 }, { "epoch": 0.38144712430426714, "grad_norm": 7.526765823364258, "learning_rate": 6.910697140372053e-06, "loss": 0.3639, "step": 8224 }, { "epoch": 0.3814935064935065, "grad_norm": 8.021310806274414, "learning_rate": 6.910017031486258e-06, "loss": 0.2833, "step": 8225 }, { "epoch": 0.3815398886827458, "grad_norm": 9.683290481567383, "learning_rate": 6.909336881220951e-06, "loss": 0.3638, "step": 8226 }, { "epoch": 0.38158627087198516, "grad_norm": 5.650367259979248, "learning_rate": 6.908656689590868e-06, "loss": 0.4031, "step": 8227 }, { "epoch": 0.3816326530612245, "grad_norm": 7.456267356872559, "learning_rate": 6.907976456610743e-06, "loss": 0.4796, "step": 8228 }, { "epoch": 0.3816790352504638, "grad_norm": 8.941631317138672, "learning_rate": 6.907296182295315e-06, "loss": 0.4306, "step": 8229 }, { "epoch": 0.3817254174397032, "grad_norm": 6.7844696044921875, "learning_rate": 6.906615866659322e-06, "loss": 0.3223, "step": 8230 }, { "epoch": 0.3817717996289425, "grad_norm": 6.999617576599121, "learning_rate": 6.9059355097175015e-06, "loss": 0.364, "step": 8231 }, { "epoch": 0.38181818181818183, "grad_norm": 8.229071617126465, "learning_rate": 6.905255111484592e-06, "loss": 0.4649, "step": 8232 }, { "epoch": 0.38186456400742114, "grad_norm": 7.020419120788574, "learning_rate": 6.904574671975338e-06, "loss": 0.3414, "step": 8233 }, { "epoch": 0.3819109461966605, "grad_norm": 7.460065841674805, "learning_rate": 6.903894191204476e-06, "loss": 0.3757, "step": 8234 }, { "epoch": 0.3819573283858998, "grad_norm": 4.942556381225586, "learning_rate": 6.903213669186753e-06, "loss": 0.2748, "step": 8235 }, { "epoch": 0.38200371057513915, "grad_norm": 6.416656017303467, "learning_rate": 6.902533105936908e-06, "loss": 0.2903, "step": 8236 }, { "epoch": 0.38205009276437846, "grad_norm": 5.096599578857422, "learning_rate": 6.901852501469687e-06, "loss": 0.2801, "step": 8237 }, { "epoch": 0.3820964749536178, "grad_norm": 15.667167663574219, "learning_rate": 6.901171855799836e-06, "loss": 0.5028, "step": 8238 }, { "epoch": 0.3821428571428571, "grad_norm": 10.201187133789062, "learning_rate": 6.9004911689420976e-06, "loss": 0.4445, "step": 8239 }, { "epoch": 0.3821892393320965, "grad_norm": 9.953624725341797, "learning_rate": 6.899810440911221e-06, "loss": 0.5004, "step": 8240 }, { "epoch": 0.38223562152133583, "grad_norm": 6.095146656036377, "learning_rate": 6.899129671721954e-06, "loss": 0.4542, "step": 8241 }, { "epoch": 0.38228200371057514, "grad_norm": 11.119933128356934, "learning_rate": 6.898448861389043e-06, "loss": 0.4605, "step": 8242 }, { "epoch": 0.3823283858998145, "grad_norm": 6.022929668426514, "learning_rate": 6.897768009927239e-06, "loss": 0.2935, "step": 8243 }, { "epoch": 0.3823747680890538, "grad_norm": 6.681565761566162, "learning_rate": 6.8970871173512935e-06, "loss": 0.2531, "step": 8244 }, { "epoch": 0.38242115027829315, "grad_norm": 5.111189365386963, "learning_rate": 6.896406183675954e-06, "loss": 0.3106, "step": 8245 }, { "epoch": 0.38246753246753246, "grad_norm": 6.278592586517334, "learning_rate": 6.895725208915978e-06, "loss": 0.3313, "step": 8246 }, { "epoch": 0.3825139146567718, "grad_norm": 7.875368118286133, "learning_rate": 6.8950441930861125e-06, "loss": 0.2609, "step": 8247 }, { "epoch": 0.3825602968460111, "grad_norm": 4.530153274536133, "learning_rate": 6.894363136201114e-06, "loss": 0.2441, "step": 8248 }, { "epoch": 0.3826066790352505, "grad_norm": 7.28264856338501, "learning_rate": 6.893682038275739e-06, "loss": 0.3615, "step": 8249 }, { "epoch": 0.3826530612244898, "grad_norm": 8.023200035095215, "learning_rate": 6.89300089932474e-06, "loss": 0.3326, "step": 8250 }, { "epoch": 0.38269944341372913, "grad_norm": 5.95427942276001, "learning_rate": 6.892319719362876e-06, "loss": 0.3389, "step": 8251 }, { "epoch": 0.38274582560296844, "grad_norm": 6.121531963348389, "learning_rate": 6.891638498404902e-06, "loss": 0.3801, "step": 8252 }, { "epoch": 0.3827922077922078, "grad_norm": 5.070008754730225, "learning_rate": 6.8909572364655785e-06, "loss": 0.3087, "step": 8253 }, { "epoch": 0.3828385899814471, "grad_norm": 7.499817848205566, "learning_rate": 6.890275933559663e-06, "loss": 0.3985, "step": 8254 }, { "epoch": 0.38288497217068646, "grad_norm": 14.14210319519043, "learning_rate": 6.889594589701919e-06, "loss": 0.4201, "step": 8255 }, { "epoch": 0.3829313543599258, "grad_norm": 9.383633613586426, "learning_rate": 6.888913204907103e-06, "loss": 0.3111, "step": 8256 }, { "epoch": 0.3829777365491651, "grad_norm": 3.689558982849121, "learning_rate": 6.88823177918998e-06, "loss": 0.2686, "step": 8257 }, { "epoch": 0.3830241187384045, "grad_norm": 10.359001159667969, "learning_rate": 6.88755031256531e-06, "loss": 0.2697, "step": 8258 }, { "epoch": 0.3830705009276438, "grad_norm": 8.589332580566406, "learning_rate": 6.886868805047857e-06, "loss": 0.3689, "step": 8259 }, { "epoch": 0.38311688311688313, "grad_norm": 7.939052581787109, "learning_rate": 6.886187256652389e-06, "loss": 0.314, "step": 8260 }, { "epoch": 0.38316326530612244, "grad_norm": 11.769670486450195, "learning_rate": 6.885505667393669e-06, "loss": 0.3913, "step": 8261 }, { "epoch": 0.3832096474953618, "grad_norm": 8.154374122619629, "learning_rate": 6.88482403728646e-06, "loss": 0.2172, "step": 8262 }, { "epoch": 0.3832560296846011, "grad_norm": 9.482492446899414, "learning_rate": 6.8841423663455365e-06, "loss": 0.4559, "step": 8263 }, { "epoch": 0.38330241187384045, "grad_norm": 6.197371959686279, "learning_rate": 6.88346065458566e-06, "loss": 0.2596, "step": 8264 }, { "epoch": 0.38334879406307976, "grad_norm": 5.679816246032715, "learning_rate": 6.8827789020216005e-06, "loss": 0.3406, "step": 8265 }, { "epoch": 0.3833951762523191, "grad_norm": 7.643085479736328, "learning_rate": 6.882097108668132e-06, "loss": 0.4942, "step": 8266 }, { "epoch": 0.3834415584415584, "grad_norm": 7.080084800720215, "learning_rate": 6.881415274540021e-06, "loss": 0.3505, "step": 8267 }, { "epoch": 0.3834879406307978, "grad_norm": 6.383350849151611, "learning_rate": 6.880733399652042e-06, "loss": 0.2746, "step": 8268 }, { "epoch": 0.38353432282003713, "grad_norm": 6.353720188140869, "learning_rate": 6.8800514840189636e-06, "loss": 0.3025, "step": 8269 }, { "epoch": 0.38358070500927643, "grad_norm": 10.74422836303711, "learning_rate": 6.879369527655562e-06, "loss": 0.2678, "step": 8270 }, { "epoch": 0.3836270871985158, "grad_norm": 5.867310047149658, "learning_rate": 6.878687530576612e-06, "loss": 0.3538, "step": 8271 }, { "epoch": 0.3836734693877551, "grad_norm": 5.507946491241455, "learning_rate": 6.878005492796886e-06, "loss": 0.338, "step": 8272 }, { "epoch": 0.38371985157699445, "grad_norm": 8.98784351348877, "learning_rate": 6.877323414331163e-06, "loss": 0.4667, "step": 8273 }, { "epoch": 0.38376623376623376, "grad_norm": 8.496969223022461, "learning_rate": 6.876641295194218e-06, "loss": 0.3353, "step": 8274 }, { "epoch": 0.3838126159554731, "grad_norm": 7.2964067459106445, "learning_rate": 6.875959135400829e-06, "loss": 0.3251, "step": 8275 }, { "epoch": 0.3838589981447124, "grad_norm": 13.121760368347168, "learning_rate": 6.875276934965773e-06, "loss": 0.4676, "step": 8276 }, { "epoch": 0.3839053803339518, "grad_norm": 5.279290199279785, "learning_rate": 6.8745946939038345e-06, "loss": 0.3002, "step": 8277 }, { "epoch": 0.3839517625231911, "grad_norm": 9.929167747497559, "learning_rate": 6.873912412229788e-06, "loss": 0.4116, "step": 8278 }, { "epoch": 0.38399814471243043, "grad_norm": 7.925321578979492, "learning_rate": 6.87323008995842e-06, "loss": 0.3639, "step": 8279 }, { "epoch": 0.38404452690166974, "grad_norm": 7.764542102813721, "learning_rate": 6.8725477271045085e-06, "loss": 0.3737, "step": 8280 }, { "epoch": 0.3840909090909091, "grad_norm": 7.363285064697266, "learning_rate": 6.871865323682838e-06, "loss": 0.3475, "step": 8281 }, { "epoch": 0.3841372912801484, "grad_norm": 6.3875627517700195, "learning_rate": 6.871182879708194e-06, "loss": 0.3715, "step": 8282 }, { "epoch": 0.38418367346938775, "grad_norm": 4.460283279418945, "learning_rate": 6.870500395195358e-06, "loss": 0.3701, "step": 8283 }, { "epoch": 0.3842300556586271, "grad_norm": 9.333629608154297, "learning_rate": 6.86981787015912e-06, "loss": 0.4881, "step": 8284 }, { "epoch": 0.3842764378478664, "grad_norm": 13.563718795776367, "learning_rate": 6.869135304614262e-06, "loss": 0.4155, "step": 8285 }, { "epoch": 0.3843228200371058, "grad_norm": 16.811748504638672, "learning_rate": 6.868452698575574e-06, "loss": 0.4025, "step": 8286 }, { "epoch": 0.3843692022263451, "grad_norm": 7.411314487457275, "learning_rate": 6.867770052057844e-06, "loss": 0.4106, "step": 8287 }, { "epoch": 0.38441558441558443, "grad_norm": 6.576017379760742, "learning_rate": 6.867087365075861e-06, "loss": 0.3499, "step": 8288 }, { "epoch": 0.38446196660482373, "grad_norm": 10.627507209777832, "learning_rate": 6.866404637644415e-06, "loss": 0.4306, "step": 8289 }, { "epoch": 0.3845083487940631, "grad_norm": 7.015232086181641, "learning_rate": 6.865721869778297e-06, "loss": 0.3015, "step": 8290 }, { "epoch": 0.3845547309833024, "grad_norm": 6.5994439125061035, "learning_rate": 6.865039061492298e-06, "loss": 0.3862, "step": 8291 }, { "epoch": 0.38460111317254175, "grad_norm": 5.321170330047607, "learning_rate": 6.864356212801212e-06, "loss": 0.3897, "step": 8292 }, { "epoch": 0.38464749536178106, "grad_norm": 7.247717380523682, "learning_rate": 6.863673323719831e-06, "loss": 0.3452, "step": 8293 }, { "epoch": 0.3846938775510204, "grad_norm": 8.356022834777832, "learning_rate": 6.862990394262953e-06, "loss": 0.3131, "step": 8294 }, { "epoch": 0.3847402597402597, "grad_norm": 10.684012413024902, "learning_rate": 6.862307424445368e-06, "loss": 0.3649, "step": 8295 }, { "epoch": 0.3847866419294991, "grad_norm": 5.679028034210205, "learning_rate": 6.861624414281875e-06, "loss": 0.34, "step": 8296 }, { "epoch": 0.38483302411873843, "grad_norm": 6.3535919189453125, "learning_rate": 6.860941363787273e-06, "loss": 0.397, "step": 8297 }, { "epoch": 0.38487940630797773, "grad_norm": 5.427221298217773, "learning_rate": 6.860258272976357e-06, "loss": 0.3143, "step": 8298 }, { "epoch": 0.3849257884972171, "grad_norm": 5.744736194610596, "learning_rate": 6.859575141863928e-06, "loss": 0.3646, "step": 8299 }, { "epoch": 0.3849721706864564, "grad_norm": 5.091468811035156, "learning_rate": 6.858891970464781e-06, "loss": 0.3094, "step": 8300 }, { "epoch": 0.38501855287569575, "grad_norm": 7.385092735290527, "learning_rate": 6.858208758793723e-06, "loss": 0.4464, "step": 8301 }, { "epoch": 0.38506493506493505, "grad_norm": 11.258821487426758, "learning_rate": 6.857525506865551e-06, "loss": 0.4896, "step": 8302 }, { "epoch": 0.3851113172541744, "grad_norm": 6.750227928161621, "learning_rate": 6.856842214695069e-06, "loss": 0.4272, "step": 8303 }, { "epoch": 0.3851576994434137, "grad_norm": 6.591118335723877, "learning_rate": 6.85615888229708e-06, "loss": 0.4043, "step": 8304 }, { "epoch": 0.3852040816326531, "grad_norm": 9.247368812561035, "learning_rate": 6.855475509686387e-06, "loss": 0.3703, "step": 8305 }, { "epoch": 0.3852504638218924, "grad_norm": 6.547163486480713, "learning_rate": 6.854792096877796e-06, "loss": 0.2574, "step": 8306 }, { "epoch": 0.38529684601113173, "grad_norm": 8.705082893371582, "learning_rate": 6.854108643886114e-06, "loss": 0.3805, "step": 8307 }, { "epoch": 0.38534322820037104, "grad_norm": 8.905548095703125, "learning_rate": 6.853425150726144e-06, "loss": 0.3938, "step": 8308 }, { "epoch": 0.3853896103896104, "grad_norm": 4.189410209655762, "learning_rate": 6.8527416174126975e-06, "loss": 0.3463, "step": 8309 }, { "epoch": 0.3854359925788497, "grad_norm": 13.533418655395508, "learning_rate": 6.852058043960579e-06, "loss": 0.3683, "step": 8310 }, { "epoch": 0.38548237476808905, "grad_norm": 5.8112688064575195, "learning_rate": 6.851374430384601e-06, "loss": 0.3684, "step": 8311 }, { "epoch": 0.3855287569573284, "grad_norm": 7.729397773742676, "learning_rate": 6.850690776699574e-06, "loss": 0.4153, "step": 8312 }, { "epoch": 0.3855751391465677, "grad_norm": 7.448493957519531, "learning_rate": 6.850007082920304e-06, "loss": 0.4664, "step": 8313 }, { "epoch": 0.38562152133580707, "grad_norm": 5.188488483428955, "learning_rate": 6.849323349061609e-06, "loss": 0.3647, "step": 8314 }, { "epoch": 0.3856679035250464, "grad_norm": 9.818428039550781, "learning_rate": 6.8486395751382985e-06, "loss": 0.4382, "step": 8315 }, { "epoch": 0.38571428571428573, "grad_norm": 8.792911529541016, "learning_rate": 6.847955761165186e-06, "loss": 0.4585, "step": 8316 }, { "epoch": 0.38576066790352503, "grad_norm": 8.46621036529541, "learning_rate": 6.847271907157087e-06, "loss": 0.2764, "step": 8317 }, { "epoch": 0.3858070500927644, "grad_norm": 6.679958343505859, "learning_rate": 6.846588013128818e-06, "loss": 0.3537, "step": 8318 }, { "epoch": 0.3858534322820037, "grad_norm": 5.066192626953125, "learning_rate": 6.845904079095191e-06, "loss": 0.3764, "step": 8319 }, { "epoch": 0.38589981447124305, "grad_norm": 6.134297847747803, "learning_rate": 6.845220105071028e-06, "loss": 0.2565, "step": 8320 }, { "epoch": 0.38594619666048235, "grad_norm": 3.2782599925994873, "learning_rate": 6.844536091071143e-06, "loss": 0.3344, "step": 8321 }, { "epoch": 0.3859925788497217, "grad_norm": 7.3040618896484375, "learning_rate": 6.843852037110357e-06, "loss": 0.3251, "step": 8322 }, { "epoch": 0.386038961038961, "grad_norm": 5.629528522491455, "learning_rate": 6.843167943203491e-06, "loss": 0.4056, "step": 8323 }, { "epoch": 0.3860853432282004, "grad_norm": 9.31749153137207, "learning_rate": 6.842483809365361e-06, "loss": 0.3455, "step": 8324 }, { "epoch": 0.38613172541743973, "grad_norm": 4.782345294952393, "learning_rate": 6.841799635610791e-06, "loss": 0.3741, "step": 8325 }, { "epoch": 0.38617810760667903, "grad_norm": 7.113462924957275, "learning_rate": 6.841115421954605e-06, "loss": 0.3229, "step": 8326 }, { "epoch": 0.3862244897959184, "grad_norm": 9.220866203308105, "learning_rate": 6.8404311684116235e-06, "loss": 0.4383, "step": 8327 }, { "epoch": 0.3862708719851577, "grad_norm": 6.515564441680908, "learning_rate": 6.8397468749966735e-06, "loss": 0.3801, "step": 8328 }, { "epoch": 0.38631725417439705, "grad_norm": 4.378541469573975, "learning_rate": 6.839062541724575e-06, "loss": 0.3174, "step": 8329 }, { "epoch": 0.38636363636363635, "grad_norm": 7.602086067199707, "learning_rate": 6.838378168610158e-06, "loss": 0.475, "step": 8330 }, { "epoch": 0.3864100185528757, "grad_norm": 4.364452838897705, "learning_rate": 6.837693755668247e-06, "loss": 0.3286, "step": 8331 }, { "epoch": 0.386456400742115, "grad_norm": 6.328337669372559, "learning_rate": 6.837009302913671e-06, "loss": 0.3587, "step": 8332 }, { "epoch": 0.38650278293135437, "grad_norm": 9.593107223510742, "learning_rate": 6.836324810361255e-06, "loss": 0.3916, "step": 8333 }, { "epoch": 0.3865491651205937, "grad_norm": 8.586213111877441, "learning_rate": 6.835640278025834e-06, "loss": 0.4846, "step": 8334 }, { "epoch": 0.38659554730983303, "grad_norm": 5.607861518859863, "learning_rate": 6.834955705922233e-06, "loss": 0.2956, "step": 8335 }, { "epoch": 0.38664192949907233, "grad_norm": 8.855949401855469, "learning_rate": 6.834271094065284e-06, "loss": 0.3851, "step": 8336 }, { "epoch": 0.3866883116883117, "grad_norm": 4.318343639373779, "learning_rate": 6.833586442469819e-06, "loss": 0.3037, "step": 8337 }, { "epoch": 0.386734693877551, "grad_norm": 4.92936897277832, "learning_rate": 6.8329017511506714e-06, "loss": 0.3474, "step": 8338 }, { "epoch": 0.38678107606679035, "grad_norm": 4.684435844421387, "learning_rate": 6.832217020122675e-06, "loss": 0.3448, "step": 8339 }, { "epoch": 0.3868274582560297, "grad_norm": 10.672905921936035, "learning_rate": 6.831532249400662e-06, "loss": 0.3655, "step": 8340 }, { "epoch": 0.386873840445269, "grad_norm": 9.551438331604004, "learning_rate": 6.830847438999469e-06, "loss": 0.4339, "step": 8341 }, { "epoch": 0.38692022263450837, "grad_norm": 7.175065517425537, "learning_rate": 6.830162588933933e-06, "loss": 0.3328, "step": 8342 }, { "epoch": 0.3869666048237477, "grad_norm": 5.896529674530029, "learning_rate": 6.829477699218889e-06, "loss": 0.1923, "step": 8343 }, { "epoch": 0.38701298701298703, "grad_norm": 10.10466480255127, "learning_rate": 6.8287927698691745e-06, "loss": 0.3958, "step": 8344 }, { "epoch": 0.38705936920222633, "grad_norm": 5.480484485626221, "learning_rate": 6.8281078008996305e-06, "loss": 0.2647, "step": 8345 }, { "epoch": 0.3871057513914657, "grad_norm": 5.775613307952881, "learning_rate": 6.827422792325094e-06, "loss": 0.3393, "step": 8346 }, { "epoch": 0.387152133580705, "grad_norm": 5.866580486297607, "learning_rate": 6.826737744160408e-06, "loss": 0.258, "step": 8347 }, { "epoch": 0.38719851576994435, "grad_norm": 7.490051746368408, "learning_rate": 6.826052656420412e-06, "loss": 0.4675, "step": 8348 }, { "epoch": 0.38724489795918365, "grad_norm": 3.328807830810547, "learning_rate": 6.8253675291199485e-06, "loss": 0.3305, "step": 8349 }, { "epoch": 0.387291280148423, "grad_norm": 8.792695045471191, "learning_rate": 6.82468236227386e-06, "loss": 0.3224, "step": 8350 }, { "epoch": 0.3873376623376623, "grad_norm": 6.064157009124756, "learning_rate": 6.823997155896991e-06, "loss": 0.3501, "step": 8351 }, { "epoch": 0.38738404452690167, "grad_norm": 16.95527458190918, "learning_rate": 6.823311910004184e-06, "loss": 0.4864, "step": 8352 }, { "epoch": 0.387430426716141, "grad_norm": 7.299379348754883, "learning_rate": 6.822626624610288e-06, "loss": 0.385, "step": 8353 }, { "epoch": 0.38747680890538033, "grad_norm": 5.712416172027588, "learning_rate": 6.821941299730146e-06, "loss": 0.3991, "step": 8354 }, { "epoch": 0.3875231910946197, "grad_norm": 6.861723899841309, "learning_rate": 6.821255935378608e-06, "loss": 0.3119, "step": 8355 }, { "epoch": 0.387569573283859, "grad_norm": 9.255704879760742, "learning_rate": 6.82057053157052e-06, "loss": 0.3711, "step": 8356 }, { "epoch": 0.38761595547309835, "grad_norm": 10.45799446105957, "learning_rate": 6.819885088320732e-06, "loss": 0.3469, "step": 8357 }, { "epoch": 0.38766233766233765, "grad_norm": 5.487917423248291, "learning_rate": 6.819199605644093e-06, "loss": 0.3861, "step": 8358 }, { "epoch": 0.387708719851577, "grad_norm": 8.907683372497559, "learning_rate": 6.818514083555455e-06, "loss": 0.4357, "step": 8359 }, { "epoch": 0.3877551020408163, "grad_norm": 3.8577520847320557, "learning_rate": 6.8178285220696686e-06, "loss": 0.2684, "step": 8360 }, { "epoch": 0.38780148423005567, "grad_norm": 5.3762359619140625, "learning_rate": 6.817142921201586e-06, "loss": 0.3481, "step": 8361 }, { "epoch": 0.387847866419295, "grad_norm": 5.800303936004639, "learning_rate": 6.8164572809660605e-06, "loss": 0.3627, "step": 8362 }, { "epoch": 0.38789424860853433, "grad_norm": 7.765194416046143, "learning_rate": 6.815771601377944e-06, "loss": 0.3545, "step": 8363 }, { "epoch": 0.38794063079777363, "grad_norm": 5.288536548614502, "learning_rate": 6.815085882452096e-06, "loss": 0.3275, "step": 8364 }, { "epoch": 0.387987012987013, "grad_norm": 6.6955060958862305, "learning_rate": 6.814400124203369e-06, "loss": 0.3038, "step": 8365 }, { "epoch": 0.3880333951762523, "grad_norm": 6.404494762420654, "learning_rate": 6.813714326646619e-06, "loss": 0.4074, "step": 8366 }, { "epoch": 0.38807977736549165, "grad_norm": 5.924474239349365, "learning_rate": 6.813028489796707e-06, "loss": 0.2793, "step": 8367 }, { "epoch": 0.388126159554731, "grad_norm": 6.922695159912109, "learning_rate": 6.812342613668486e-06, "loss": 0.3569, "step": 8368 }, { "epoch": 0.3881725417439703, "grad_norm": 4.16246223449707, "learning_rate": 6.811656698276822e-06, "loss": 0.2257, "step": 8369 }, { "epoch": 0.38821892393320967, "grad_norm": 7.871577262878418, "learning_rate": 6.810970743636569e-06, "loss": 0.3305, "step": 8370 }, { "epoch": 0.38826530612244897, "grad_norm": 5.514797687530518, "learning_rate": 6.81028474976259e-06, "loss": 0.3069, "step": 8371 }, { "epoch": 0.38831168831168833, "grad_norm": 8.053380966186523, "learning_rate": 6.809598716669747e-06, "loss": 0.3603, "step": 8372 }, { "epoch": 0.38835807050092763, "grad_norm": 7.932482719421387, "learning_rate": 6.808912644372903e-06, "loss": 0.4039, "step": 8373 }, { "epoch": 0.388404452690167, "grad_norm": 8.05090045928955, "learning_rate": 6.808226532886919e-06, "loss": 0.4663, "step": 8374 }, { "epoch": 0.3884508348794063, "grad_norm": 4.726670742034912, "learning_rate": 6.807540382226662e-06, "loss": 0.3735, "step": 8375 }, { "epoch": 0.38849721706864565, "grad_norm": 8.932908058166504, "learning_rate": 6.806854192406995e-06, "loss": 0.2837, "step": 8376 }, { "epoch": 0.38854359925788495, "grad_norm": 7.679562091827393, "learning_rate": 6.806167963442786e-06, "loss": 0.3066, "step": 8377 }, { "epoch": 0.3885899814471243, "grad_norm": 11.05367660522461, "learning_rate": 6.805481695348901e-06, "loss": 0.3231, "step": 8378 }, { "epoch": 0.3886363636363636, "grad_norm": 13.38875675201416, "learning_rate": 6.804795388140206e-06, "loss": 0.3791, "step": 8379 }, { "epoch": 0.38868274582560297, "grad_norm": 6.703845024108887, "learning_rate": 6.804109041831573e-06, "loss": 0.4471, "step": 8380 }, { "epoch": 0.3887291280148423, "grad_norm": 8.016149520874023, "learning_rate": 6.803422656437868e-06, "loss": 0.3445, "step": 8381 }, { "epoch": 0.38877551020408163, "grad_norm": 6.784495830535889, "learning_rate": 6.802736231973961e-06, "loss": 0.3751, "step": 8382 }, { "epoch": 0.388821892393321, "grad_norm": 4.831977367401123, "learning_rate": 6.802049768454726e-06, "loss": 0.2777, "step": 8383 }, { "epoch": 0.3888682745825603, "grad_norm": 10.825560569763184, "learning_rate": 6.801363265895033e-06, "loss": 0.4895, "step": 8384 }, { "epoch": 0.38891465677179965, "grad_norm": 6.317519664764404, "learning_rate": 6.800676724309754e-06, "loss": 0.3641, "step": 8385 }, { "epoch": 0.38896103896103895, "grad_norm": 3.793929100036621, "learning_rate": 6.799990143713766e-06, "loss": 0.2676, "step": 8386 }, { "epoch": 0.3890074211502783, "grad_norm": 5.8582329750061035, "learning_rate": 6.799303524121939e-06, "loss": 0.3291, "step": 8387 }, { "epoch": 0.3890538033395176, "grad_norm": 8.769356727600098, "learning_rate": 6.798616865549149e-06, "loss": 0.4652, "step": 8388 }, { "epoch": 0.38910018552875697, "grad_norm": 6.3944501876831055, "learning_rate": 6.797930168010276e-06, "loss": 0.3601, "step": 8389 }, { "epoch": 0.3891465677179963, "grad_norm": 10.399982452392578, "learning_rate": 6.797243431520193e-06, "loss": 0.4282, "step": 8390 }, { "epoch": 0.38919294990723563, "grad_norm": 5.2165045738220215, "learning_rate": 6.796556656093779e-06, "loss": 0.2153, "step": 8391 }, { "epoch": 0.38923933209647493, "grad_norm": 4.949163436889648, "learning_rate": 6.795869841745912e-06, "loss": 0.3482, "step": 8392 }, { "epoch": 0.3892857142857143, "grad_norm": 4.8290815353393555, "learning_rate": 6.7951829884914725e-06, "loss": 0.3234, "step": 8393 }, { "epoch": 0.3893320964749536, "grad_norm": 13.988826751708984, "learning_rate": 6.794496096345341e-06, "loss": 0.3503, "step": 8394 }, { "epoch": 0.38937847866419295, "grad_norm": 8.723886489868164, "learning_rate": 6.793809165322398e-06, "loss": 0.4539, "step": 8395 }, { "epoch": 0.3894248608534323, "grad_norm": 7.910956382751465, "learning_rate": 6.793122195437525e-06, "loss": 0.3633, "step": 8396 }, { "epoch": 0.3894712430426716, "grad_norm": 15.132369041442871, "learning_rate": 6.792435186705606e-06, "loss": 0.4289, "step": 8397 }, { "epoch": 0.38951762523191097, "grad_norm": 6.223965167999268, "learning_rate": 6.791748139141523e-06, "loss": 0.2263, "step": 8398 }, { "epoch": 0.38956400742115027, "grad_norm": 8.446067810058594, "learning_rate": 6.791061052760162e-06, "loss": 0.4078, "step": 8399 }, { "epoch": 0.38961038961038963, "grad_norm": 8.978629112243652, "learning_rate": 6.7903739275764095e-06, "loss": 0.457, "step": 8400 }, { "epoch": 0.38965677179962893, "grad_norm": 6.959397792816162, "learning_rate": 6.789686763605147e-06, "loss": 0.2321, "step": 8401 }, { "epoch": 0.3897031539888683, "grad_norm": 7.370920658111572, "learning_rate": 6.788999560861269e-06, "loss": 0.3354, "step": 8402 }, { "epoch": 0.3897495361781076, "grad_norm": 8.041852951049805, "learning_rate": 6.7883123193596555e-06, "loss": 0.4118, "step": 8403 }, { "epoch": 0.38979591836734695, "grad_norm": 6.152374267578125, "learning_rate": 6.7876250391152e-06, "loss": 0.4078, "step": 8404 }, { "epoch": 0.38984230055658625, "grad_norm": 4.7496185302734375, "learning_rate": 6.786937720142792e-06, "loss": 0.2753, "step": 8405 }, { "epoch": 0.3898886827458256, "grad_norm": 8.28027629852295, "learning_rate": 6.78625036245732e-06, "loss": 0.3923, "step": 8406 }, { "epoch": 0.3899350649350649, "grad_norm": 5.727871417999268, "learning_rate": 6.785562966073675e-06, "loss": 0.3101, "step": 8407 }, { "epoch": 0.38998144712430427, "grad_norm": 7.025694847106934, "learning_rate": 6.784875531006751e-06, "loss": 0.3108, "step": 8408 }, { "epoch": 0.3900278293135436, "grad_norm": 19.837682723999023, "learning_rate": 6.784188057271441e-06, "loss": 0.3778, "step": 8409 }, { "epoch": 0.39007421150278293, "grad_norm": 5.6299333572387695, "learning_rate": 6.783500544882638e-06, "loss": 0.2775, "step": 8410 }, { "epoch": 0.3901205936920223, "grad_norm": 4.752004623413086, "learning_rate": 6.7828129938552345e-06, "loss": 0.3041, "step": 8411 }, { "epoch": 0.3901669758812616, "grad_norm": 5.93059778213501, "learning_rate": 6.782125404204128e-06, "loss": 0.3048, "step": 8412 }, { "epoch": 0.39021335807050095, "grad_norm": 8.94828987121582, "learning_rate": 6.781437775944216e-06, "loss": 0.4243, "step": 8413 }, { "epoch": 0.39025974025974025, "grad_norm": 5.485703468322754, "learning_rate": 6.7807501090903925e-06, "loss": 0.3644, "step": 8414 }, { "epoch": 0.3903061224489796, "grad_norm": 13.65444278717041, "learning_rate": 6.780062403657558e-06, "loss": 0.4319, "step": 8415 }, { "epoch": 0.3903525046382189, "grad_norm": 8.741044998168945, "learning_rate": 6.7793746596606115e-06, "loss": 0.4328, "step": 8416 }, { "epoch": 0.39039888682745827, "grad_norm": 6.081540584564209, "learning_rate": 6.77868687711445e-06, "loss": 0.3616, "step": 8417 }, { "epoch": 0.39044526901669757, "grad_norm": 5.077337741851807, "learning_rate": 6.777999056033976e-06, "loss": 0.2495, "step": 8418 }, { "epoch": 0.39049165120593693, "grad_norm": 6.465485095977783, "learning_rate": 6.777311196434091e-06, "loss": 0.3798, "step": 8419 }, { "epoch": 0.39053803339517623, "grad_norm": 9.874552726745605, "learning_rate": 6.776623298329694e-06, "loss": 0.24, "step": 8420 }, { "epoch": 0.3905844155844156, "grad_norm": 9.878207206726074, "learning_rate": 6.775935361735693e-06, "loss": 0.3073, "step": 8421 }, { "epoch": 0.3906307977736549, "grad_norm": 5.006924629211426, "learning_rate": 6.775247386666987e-06, "loss": 0.3928, "step": 8422 }, { "epoch": 0.39067717996289425, "grad_norm": 8.209489822387695, "learning_rate": 6.774559373138484e-06, "loss": 0.4288, "step": 8423 }, { "epoch": 0.3907235621521336, "grad_norm": 7.013293266296387, "learning_rate": 6.7738713211650885e-06, "loss": 0.3972, "step": 8424 }, { "epoch": 0.3907699443413729, "grad_norm": 5.682033061981201, "learning_rate": 6.773183230761706e-06, "loss": 0.3291, "step": 8425 }, { "epoch": 0.39081632653061227, "grad_norm": 5.161670207977295, "learning_rate": 6.772495101943243e-06, "loss": 0.3052, "step": 8426 }, { "epoch": 0.39086270871985157, "grad_norm": 8.103262901306152, "learning_rate": 6.77180693472461e-06, "loss": 0.369, "step": 8427 }, { "epoch": 0.39090909090909093, "grad_norm": 4.838174819946289, "learning_rate": 6.771118729120714e-06, "loss": 0.3498, "step": 8428 }, { "epoch": 0.39095547309833023, "grad_norm": 13.390012741088867, "learning_rate": 6.770430485146464e-06, "loss": 0.5091, "step": 8429 }, { "epoch": 0.3910018552875696, "grad_norm": 5.2583441734313965, "learning_rate": 6.769742202816773e-06, "loss": 0.282, "step": 8430 }, { "epoch": 0.3910482374768089, "grad_norm": 6.935766220092773, "learning_rate": 6.769053882146548e-06, "loss": 0.3913, "step": 8431 }, { "epoch": 0.39109461966604825, "grad_norm": 5.624627590179443, "learning_rate": 6.7683655231507065e-06, "loss": 0.397, "step": 8432 }, { "epoch": 0.39114100185528755, "grad_norm": 8.670798301696777, "learning_rate": 6.767677125844157e-06, "loss": 0.4484, "step": 8433 }, { "epoch": 0.3911873840445269, "grad_norm": 4.528435707092285, "learning_rate": 6.766988690241816e-06, "loss": 0.2591, "step": 8434 }, { "epoch": 0.3912337662337662, "grad_norm": 4.544785499572754, "learning_rate": 6.7663002163585955e-06, "loss": 0.2593, "step": 8435 }, { "epoch": 0.39128014842300557, "grad_norm": 10.141786575317383, "learning_rate": 6.765611704209413e-06, "loss": 0.3744, "step": 8436 }, { "epoch": 0.39132653061224487, "grad_norm": 5.3458943367004395, "learning_rate": 6.764923153809186e-06, "loss": 0.2363, "step": 8437 }, { "epoch": 0.39137291280148423, "grad_norm": 7.944349765777588, "learning_rate": 6.764234565172827e-06, "loss": 0.4014, "step": 8438 }, { "epoch": 0.3914192949907236, "grad_norm": 6.548993110656738, "learning_rate": 6.763545938315259e-06, "loss": 0.2482, "step": 8439 }, { "epoch": 0.3914656771799629, "grad_norm": 7.9390177726745605, "learning_rate": 6.762857273251396e-06, "loss": 0.4721, "step": 8440 }, { "epoch": 0.39151205936920225, "grad_norm": 10.622932434082031, "learning_rate": 6.762168569996162e-06, "loss": 0.475, "step": 8441 }, { "epoch": 0.39155844155844155, "grad_norm": 8.52929973602295, "learning_rate": 6.761479828564474e-06, "loss": 0.3414, "step": 8442 }, { "epoch": 0.3916048237476809, "grad_norm": 6.472636699676514, "learning_rate": 6.760791048971256e-06, "loss": 0.3883, "step": 8443 }, { "epoch": 0.3916512059369202, "grad_norm": 7.729839324951172, "learning_rate": 6.760102231231427e-06, "loss": 0.3312, "step": 8444 }, { "epoch": 0.39169758812615957, "grad_norm": 4.795523643493652, "learning_rate": 6.7594133753599136e-06, "loss": 0.2111, "step": 8445 }, { "epoch": 0.39174397031539887, "grad_norm": 6.443260192871094, "learning_rate": 6.758724481371636e-06, "loss": 0.3507, "step": 8446 }, { "epoch": 0.39179035250463823, "grad_norm": 11.453293800354004, "learning_rate": 6.758035549281521e-06, "loss": 0.2787, "step": 8447 }, { "epoch": 0.39183673469387753, "grad_norm": 6.880367279052734, "learning_rate": 6.757346579104493e-06, "loss": 0.3985, "step": 8448 }, { "epoch": 0.3918831168831169, "grad_norm": 10.720710754394531, "learning_rate": 6.756657570855476e-06, "loss": 0.3305, "step": 8449 }, { "epoch": 0.3919294990723562, "grad_norm": 5.493137836456299, "learning_rate": 6.7559685245494025e-06, "loss": 0.3506, "step": 8450 }, { "epoch": 0.39197588126159555, "grad_norm": 5.2990312576293945, "learning_rate": 6.755279440201194e-06, "loss": 0.3228, "step": 8451 }, { "epoch": 0.3920222634508349, "grad_norm": 5.42371940612793, "learning_rate": 6.754590317825785e-06, "loss": 0.2774, "step": 8452 }, { "epoch": 0.3920686456400742, "grad_norm": 10.897735595703125, "learning_rate": 6.753901157438101e-06, "loss": 0.4348, "step": 8453 }, { "epoch": 0.39211502782931357, "grad_norm": 8.903263092041016, "learning_rate": 6.753211959053073e-06, "loss": 0.3842, "step": 8454 }, { "epoch": 0.39216141001855287, "grad_norm": 12.238972663879395, "learning_rate": 6.752522722685635e-06, "loss": 0.3554, "step": 8455 }, { "epoch": 0.3922077922077922, "grad_norm": 8.766010284423828, "learning_rate": 6.751833448350713e-06, "loss": 0.3674, "step": 8456 }, { "epoch": 0.39225417439703153, "grad_norm": 5.6746602058410645, "learning_rate": 6.751144136063247e-06, "loss": 0.3752, "step": 8457 }, { "epoch": 0.3923005565862709, "grad_norm": 11.598555564880371, "learning_rate": 6.750454785838164e-06, "loss": 0.3816, "step": 8458 }, { "epoch": 0.3923469387755102, "grad_norm": 7.9275126457214355, "learning_rate": 6.749765397690402e-06, "loss": 0.4248, "step": 8459 }, { "epoch": 0.39239332096474955, "grad_norm": 7.556860446929932, "learning_rate": 6.7490759716348974e-06, "loss": 0.2917, "step": 8460 }, { "epoch": 0.39243970315398885, "grad_norm": 11.18282413482666, "learning_rate": 6.748386507686582e-06, "loss": 0.3578, "step": 8461 }, { "epoch": 0.3924860853432282, "grad_norm": 8.745091438293457, "learning_rate": 6.747697005860396e-06, "loss": 0.3932, "step": 8462 }, { "epoch": 0.3925324675324675, "grad_norm": 5.443356513977051, "learning_rate": 6.747007466171277e-06, "loss": 0.3359, "step": 8463 }, { "epoch": 0.39257884972170687, "grad_norm": 12.781628608703613, "learning_rate": 6.7463178886341615e-06, "loss": 0.4727, "step": 8464 }, { "epoch": 0.39262523191094617, "grad_norm": 6.272031307220459, "learning_rate": 6.745628273263991e-06, "loss": 0.3779, "step": 8465 }, { "epoch": 0.39267161410018553, "grad_norm": 4.314103126525879, "learning_rate": 6.744938620075704e-06, "loss": 0.2374, "step": 8466 }, { "epoch": 0.3927179962894249, "grad_norm": 14.469854354858398, "learning_rate": 6.744248929084241e-06, "loss": 0.3832, "step": 8467 }, { "epoch": 0.3927643784786642, "grad_norm": 5.7459917068481445, "learning_rate": 6.743559200304548e-06, "loss": 0.2886, "step": 8468 }, { "epoch": 0.39281076066790355, "grad_norm": 5.634580135345459, "learning_rate": 6.742869433751562e-06, "loss": 0.2823, "step": 8469 }, { "epoch": 0.39285714285714285, "grad_norm": 3.554276943206787, "learning_rate": 6.742179629440229e-06, "loss": 0.2809, "step": 8470 }, { "epoch": 0.3929035250463822, "grad_norm": 7.496034145355225, "learning_rate": 6.741489787385496e-06, "loss": 0.2952, "step": 8471 }, { "epoch": 0.3929499072356215, "grad_norm": 5.606621265411377, "learning_rate": 6.740799907602302e-06, "loss": 0.3116, "step": 8472 }, { "epoch": 0.39299628942486087, "grad_norm": 5.724415302276611, "learning_rate": 6.740109990105599e-06, "loss": 0.3752, "step": 8473 }, { "epoch": 0.39304267161410017, "grad_norm": 10.950904846191406, "learning_rate": 6.739420034910329e-06, "loss": 0.4413, "step": 8474 }, { "epoch": 0.3930890538033395, "grad_norm": 6.204432964324951, "learning_rate": 6.738730042031441e-06, "loss": 0.3951, "step": 8475 }, { "epoch": 0.39313543599257883, "grad_norm": 11.564526557922363, "learning_rate": 6.7380400114838855e-06, "loss": 0.427, "step": 8476 }, { "epoch": 0.3931818181818182, "grad_norm": 3.5424721240997314, "learning_rate": 6.737349943282607e-06, "loss": 0.273, "step": 8477 }, { "epoch": 0.3932282003710575, "grad_norm": 12.733376502990723, "learning_rate": 6.7366598374425604e-06, "loss": 0.6161, "step": 8478 }, { "epoch": 0.39327458256029685, "grad_norm": 5.587282180786133, "learning_rate": 6.735969693978695e-06, "loss": 0.3192, "step": 8479 }, { "epoch": 0.39332096474953615, "grad_norm": 4.353832244873047, "learning_rate": 6.735279512905961e-06, "loss": 0.2942, "step": 8480 }, { "epoch": 0.3933673469387755, "grad_norm": 5.755465507507324, "learning_rate": 6.734589294239311e-06, "loss": 0.2556, "step": 8481 }, { "epoch": 0.39341372912801487, "grad_norm": 5.949000835418701, "learning_rate": 6.733899037993701e-06, "loss": 0.2698, "step": 8482 }, { "epoch": 0.39346011131725417, "grad_norm": 8.406709671020508, "learning_rate": 6.733208744184081e-06, "loss": 0.3897, "step": 8483 }, { "epoch": 0.3935064935064935, "grad_norm": 6.462149143218994, "learning_rate": 6.732518412825409e-06, "loss": 0.3433, "step": 8484 }, { "epoch": 0.39355287569573283, "grad_norm": 13.160181045532227, "learning_rate": 6.731828043932638e-06, "loss": 0.4284, "step": 8485 }, { "epoch": 0.3935992578849722, "grad_norm": 9.684764862060547, "learning_rate": 6.731137637520725e-06, "loss": 0.4279, "step": 8486 }, { "epoch": 0.3936456400742115, "grad_norm": 6.112397193908691, "learning_rate": 6.73044719360463e-06, "loss": 0.3383, "step": 8487 }, { "epoch": 0.39369202226345085, "grad_norm": 8.93121337890625, "learning_rate": 6.729756712199309e-06, "loss": 0.3502, "step": 8488 }, { "epoch": 0.39373840445269015, "grad_norm": 10.663504600524902, "learning_rate": 6.7290661933197195e-06, "loss": 0.3888, "step": 8489 }, { "epoch": 0.3937847866419295, "grad_norm": 10.485225677490234, "learning_rate": 6.728375636980826e-06, "loss": 0.3804, "step": 8490 }, { "epoch": 0.3938311688311688, "grad_norm": 11.850780487060547, "learning_rate": 6.727685043197584e-06, "loss": 0.5445, "step": 8491 }, { "epoch": 0.39387755102040817, "grad_norm": 6.153903007507324, "learning_rate": 6.726994411984957e-06, "loss": 0.2836, "step": 8492 }, { "epoch": 0.39392393320964747, "grad_norm": 6.1083526611328125, "learning_rate": 6.726303743357907e-06, "loss": 0.28, "step": 8493 }, { "epoch": 0.39397031539888683, "grad_norm": 5.864180088043213, "learning_rate": 6.725613037331398e-06, "loss": 0.2417, "step": 8494 }, { "epoch": 0.3940166975881262, "grad_norm": 5.268681049346924, "learning_rate": 6.724922293920392e-06, "loss": 0.4328, "step": 8495 }, { "epoch": 0.3940630797773655, "grad_norm": 9.90815258026123, "learning_rate": 6.724231513139853e-06, "loss": 0.3831, "step": 8496 }, { "epoch": 0.39410946196660485, "grad_norm": 6.592427730560303, "learning_rate": 6.7235406950047485e-06, "loss": 0.2874, "step": 8497 }, { "epoch": 0.39415584415584415, "grad_norm": 13.497486114501953, "learning_rate": 6.722849839530045e-06, "loss": 0.424, "step": 8498 }, { "epoch": 0.3942022263450835, "grad_norm": 7.296325206756592, "learning_rate": 6.722158946730707e-06, "loss": 0.4116, "step": 8499 }, { "epoch": 0.3942486085343228, "grad_norm": 7.339605331420898, "learning_rate": 6.7214680166217035e-06, "loss": 0.405, "step": 8500 }, { "epoch": 0.39429499072356217, "grad_norm": 5.126232147216797, "learning_rate": 6.720777049218006e-06, "loss": 0.3023, "step": 8501 }, { "epoch": 0.39434137291280147, "grad_norm": 12.01143741607666, "learning_rate": 6.720086044534579e-06, "loss": 0.5284, "step": 8502 }, { "epoch": 0.3943877551020408, "grad_norm": 5.477619647979736, "learning_rate": 6.7193950025863965e-06, "loss": 0.3, "step": 8503 }, { "epoch": 0.39443413729128013, "grad_norm": 7.114284515380859, "learning_rate": 6.718703923388427e-06, "loss": 0.3655, "step": 8504 }, { "epoch": 0.3944805194805195, "grad_norm": 4.3154988288879395, "learning_rate": 6.718012806955643e-06, "loss": 0.3128, "step": 8505 }, { "epoch": 0.3945269016697588, "grad_norm": 7.276490688323975, "learning_rate": 6.71732165330302e-06, "loss": 0.3424, "step": 8506 }, { "epoch": 0.39457328385899815, "grad_norm": 7.383325099945068, "learning_rate": 6.716630462445527e-06, "loss": 0.2944, "step": 8507 }, { "epoch": 0.39461966604823745, "grad_norm": 18.982316970825195, "learning_rate": 6.71593923439814e-06, "loss": 0.4073, "step": 8508 }, { "epoch": 0.3946660482374768, "grad_norm": 8.400872230529785, "learning_rate": 6.715247969175837e-06, "loss": 0.329, "step": 8509 }, { "epoch": 0.39471243042671617, "grad_norm": 4.289604187011719, "learning_rate": 6.7145566667935904e-06, "loss": 0.2757, "step": 8510 }, { "epoch": 0.39475881261595547, "grad_norm": 7.1393513679504395, "learning_rate": 6.713865327266377e-06, "loss": 0.3191, "step": 8511 }, { "epoch": 0.3948051948051948, "grad_norm": 9.155111312866211, "learning_rate": 6.713173950609176e-06, "loss": 0.3852, "step": 8512 }, { "epoch": 0.39485157699443413, "grad_norm": 9.081290245056152, "learning_rate": 6.712482536836965e-06, "loss": 0.3277, "step": 8513 }, { "epoch": 0.3948979591836735, "grad_norm": 9.854497909545898, "learning_rate": 6.711791085964724e-06, "loss": 0.4062, "step": 8514 }, { "epoch": 0.3949443413729128, "grad_norm": 6.786642074584961, "learning_rate": 6.71109959800743e-06, "loss": 0.3219, "step": 8515 }, { "epoch": 0.39499072356215215, "grad_norm": 7.192685127258301, "learning_rate": 6.710408072980067e-06, "loss": 0.4121, "step": 8516 }, { "epoch": 0.39503710575139145, "grad_norm": 6.40261173248291, "learning_rate": 6.709716510897615e-06, "loss": 0.2579, "step": 8517 }, { "epoch": 0.3950834879406308, "grad_norm": 5.122190475463867, "learning_rate": 6.709024911775056e-06, "loss": 0.3042, "step": 8518 }, { "epoch": 0.3951298701298701, "grad_norm": 4.803053855895996, "learning_rate": 6.708333275627375e-06, "loss": 0.1864, "step": 8519 }, { "epoch": 0.39517625231910947, "grad_norm": 15.645055770874023, "learning_rate": 6.707641602469554e-06, "loss": 0.3796, "step": 8520 }, { "epoch": 0.39522263450834877, "grad_norm": 10.913758277893066, "learning_rate": 6.706949892316578e-06, "loss": 0.3643, "step": 8521 }, { "epoch": 0.3952690166975881, "grad_norm": 17.9717960357666, "learning_rate": 6.706258145183433e-06, "loss": 0.4446, "step": 8522 }, { "epoch": 0.3953153988868275, "grad_norm": 8.416926383972168, "learning_rate": 6.705566361085105e-06, "loss": 0.3508, "step": 8523 }, { "epoch": 0.3953617810760668, "grad_norm": 3.4743001461029053, "learning_rate": 6.704874540036582e-06, "loss": 0.2522, "step": 8524 }, { "epoch": 0.39540816326530615, "grad_norm": 10.179614067077637, "learning_rate": 6.704182682052852e-06, "loss": 0.3588, "step": 8525 }, { "epoch": 0.39545454545454545, "grad_norm": 14.812073707580566, "learning_rate": 6.703490787148902e-06, "loss": 0.5484, "step": 8526 }, { "epoch": 0.3955009276437848, "grad_norm": 13.7569580078125, "learning_rate": 6.702798855339723e-06, "loss": 0.4474, "step": 8527 }, { "epoch": 0.3955473098330241, "grad_norm": 3.274685859680176, "learning_rate": 6.702106886640305e-06, "loss": 0.288, "step": 8528 }, { "epoch": 0.39559369202226347, "grad_norm": 11.130782127380371, "learning_rate": 6.7014148810656374e-06, "loss": 0.3352, "step": 8529 }, { "epoch": 0.39564007421150277, "grad_norm": 6.135876178741455, "learning_rate": 6.700722838630715e-06, "loss": 0.3994, "step": 8530 }, { "epoch": 0.3956864564007421, "grad_norm": 8.860300064086914, "learning_rate": 6.700030759350531e-06, "loss": 0.4402, "step": 8531 }, { "epoch": 0.39573283858998143, "grad_norm": 6.380034923553467, "learning_rate": 6.6993386432400755e-06, "loss": 0.3556, "step": 8532 }, { "epoch": 0.3957792207792208, "grad_norm": 6.585958003997803, "learning_rate": 6.6986464903143445e-06, "loss": 0.3607, "step": 8533 }, { "epoch": 0.3958256029684601, "grad_norm": 12.11461353302002, "learning_rate": 6.697954300588334e-06, "loss": 0.3503, "step": 8534 }, { "epoch": 0.39587198515769945, "grad_norm": 13.579034805297852, "learning_rate": 6.697262074077038e-06, "loss": 0.5442, "step": 8535 }, { "epoch": 0.39591836734693875, "grad_norm": 4.849971771240234, "learning_rate": 6.696569810795455e-06, "loss": 0.357, "step": 8536 }, { "epoch": 0.3959647495361781, "grad_norm": 6.137805938720703, "learning_rate": 6.695877510758583e-06, "loss": 0.355, "step": 8537 }, { "epoch": 0.39601113172541746, "grad_norm": 8.946050643920898, "learning_rate": 6.6951851739814175e-06, "loss": 0.2921, "step": 8538 }, { "epoch": 0.39605751391465677, "grad_norm": 7.60288143157959, "learning_rate": 6.694492800478961e-06, "loss": 0.2937, "step": 8539 }, { "epoch": 0.3961038961038961, "grad_norm": 5.126128673553467, "learning_rate": 6.693800390266211e-06, "loss": 0.3102, "step": 8540 }, { "epoch": 0.3961502782931354, "grad_norm": 6.159119129180908, "learning_rate": 6.693107943358168e-06, "loss": 0.2397, "step": 8541 }, { "epoch": 0.3961966604823748, "grad_norm": 6.438249588012695, "learning_rate": 6.692415459769835e-06, "loss": 0.3306, "step": 8542 }, { "epoch": 0.3962430426716141, "grad_norm": 6.45353364944458, "learning_rate": 6.691722939516214e-06, "loss": 0.3183, "step": 8543 }, { "epoch": 0.39628942486085345, "grad_norm": 9.3963623046875, "learning_rate": 6.691030382612309e-06, "loss": 0.4222, "step": 8544 }, { "epoch": 0.39633580705009275, "grad_norm": 6.141077041625977, "learning_rate": 6.69033778907312e-06, "loss": 0.2547, "step": 8545 }, { "epoch": 0.3963821892393321, "grad_norm": 4.142505168914795, "learning_rate": 6.6896451589136555e-06, "loss": 0.1922, "step": 8546 }, { "epoch": 0.3964285714285714, "grad_norm": 8.249682426452637, "learning_rate": 6.688952492148921e-06, "loss": 0.3542, "step": 8547 }, { "epoch": 0.39647495361781077, "grad_norm": 5.170194625854492, "learning_rate": 6.688259788793921e-06, "loss": 0.3404, "step": 8548 }, { "epoch": 0.39652133580705007, "grad_norm": 4.308437347412109, "learning_rate": 6.687567048863661e-06, "loss": 0.2836, "step": 8549 }, { "epoch": 0.3965677179962894, "grad_norm": 7.683659076690674, "learning_rate": 6.686874272373154e-06, "loss": 0.2273, "step": 8550 }, { "epoch": 0.3966141001855288, "grad_norm": 5.46278190612793, "learning_rate": 6.686181459337404e-06, "loss": 0.3149, "step": 8551 }, { "epoch": 0.3966604823747681, "grad_norm": 8.79694652557373, "learning_rate": 6.685488609771422e-06, "loss": 0.3909, "step": 8552 }, { "epoch": 0.39670686456400744, "grad_norm": 6.005486488342285, "learning_rate": 6.68479572369022e-06, "loss": 0.3505, "step": 8553 }, { "epoch": 0.39675324675324675, "grad_norm": 17.049205780029297, "learning_rate": 6.684102801108805e-06, "loss": 0.3759, "step": 8554 }, { "epoch": 0.3967996289424861, "grad_norm": 7.399929046630859, "learning_rate": 6.683409842042193e-06, "loss": 0.4316, "step": 8555 }, { "epoch": 0.3968460111317254, "grad_norm": 8.601082801818848, "learning_rate": 6.682716846505394e-06, "loss": 0.4415, "step": 8556 }, { "epoch": 0.39689239332096476, "grad_norm": 8.93825912475586, "learning_rate": 6.6820238145134205e-06, "loss": 0.3715, "step": 8557 }, { "epoch": 0.39693877551020407, "grad_norm": 7.407180309295654, "learning_rate": 6.681330746081292e-06, "loss": 0.3805, "step": 8558 }, { "epoch": 0.3969851576994434, "grad_norm": 3.690922975540161, "learning_rate": 6.680637641224016e-06, "loss": 0.2526, "step": 8559 }, { "epoch": 0.3970315398886827, "grad_norm": 21.812381744384766, "learning_rate": 6.679944499956612e-06, "loss": 0.3521, "step": 8560 }, { "epoch": 0.3970779220779221, "grad_norm": 7.351777076721191, "learning_rate": 6.679251322294099e-06, "loss": 0.3037, "step": 8561 }, { "epoch": 0.3971243042671614, "grad_norm": 7.4411773681640625, "learning_rate": 6.678558108251489e-06, "loss": 0.323, "step": 8562 }, { "epoch": 0.39717068645640075, "grad_norm": 11.34941291809082, "learning_rate": 6.677864857843806e-06, "loss": 0.45, "step": 8563 }, { "epoch": 0.39721706864564005, "grad_norm": 12.021556854248047, "learning_rate": 6.677171571086064e-06, "loss": 0.4, "step": 8564 }, { "epoch": 0.3972634508348794, "grad_norm": 4.410993576049805, "learning_rate": 6.676478247993284e-06, "loss": 0.3106, "step": 8565 }, { "epoch": 0.39730983302411876, "grad_norm": 8.395380973815918, "learning_rate": 6.6757848885804885e-06, "loss": 0.281, "step": 8566 }, { "epoch": 0.39735621521335807, "grad_norm": 7.665830612182617, "learning_rate": 6.675091492862696e-06, "loss": 0.3842, "step": 8567 }, { "epoch": 0.3974025974025974, "grad_norm": 5.968767166137695, "learning_rate": 6.674398060854931e-06, "loss": 0.3227, "step": 8568 }, { "epoch": 0.3974489795918367, "grad_norm": 7.420173645019531, "learning_rate": 6.6737045925722155e-06, "loss": 0.3514, "step": 8569 }, { "epoch": 0.3974953617810761, "grad_norm": 5.5390472412109375, "learning_rate": 6.673011088029571e-06, "loss": 0.2523, "step": 8570 }, { "epoch": 0.3975417439703154, "grad_norm": 16.47736930847168, "learning_rate": 6.672317547242024e-06, "loss": 0.4606, "step": 8571 }, { "epoch": 0.39758812615955474, "grad_norm": 6.042265892028809, "learning_rate": 6.671623970224601e-06, "loss": 0.3696, "step": 8572 }, { "epoch": 0.39763450834879405, "grad_norm": 6.929208278656006, "learning_rate": 6.670930356992325e-06, "loss": 0.3465, "step": 8573 }, { "epoch": 0.3976808905380334, "grad_norm": 7.0697150230407715, "learning_rate": 6.670236707560224e-06, "loss": 0.3376, "step": 8574 }, { "epoch": 0.3977272727272727, "grad_norm": 6.929102420806885, "learning_rate": 6.6695430219433266e-06, "loss": 0.3743, "step": 8575 }, { "epoch": 0.39777365491651206, "grad_norm": 6.467362880706787, "learning_rate": 6.668849300156659e-06, "loss": 0.2945, "step": 8576 }, { "epoch": 0.39782003710575137, "grad_norm": 4.99878454208374, "learning_rate": 6.668155542215253e-06, "loss": 0.3668, "step": 8577 }, { "epoch": 0.3978664192949907, "grad_norm": 5.163423538208008, "learning_rate": 6.667461748134136e-06, "loss": 0.3859, "step": 8578 }, { "epoch": 0.3979128014842301, "grad_norm": 8.694296836853027, "learning_rate": 6.66676791792834e-06, "loss": 0.2417, "step": 8579 }, { "epoch": 0.3979591836734694, "grad_norm": 6.361327648162842, "learning_rate": 6.666074051612897e-06, "loss": 0.3111, "step": 8580 }, { "epoch": 0.39800556586270874, "grad_norm": 8.34838581085205, "learning_rate": 6.665380149202838e-06, "loss": 0.3773, "step": 8581 }, { "epoch": 0.39805194805194805, "grad_norm": 10.805272102355957, "learning_rate": 6.664686210713196e-06, "loss": 0.3884, "step": 8582 }, { "epoch": 0.3980983302411874, "grad_norm": 6.428287506103516, "learning_rate": 6.663992236159006e-06, "loss": 0.3329, "step": 8583 }, { "epoch": 0.3981447124304267, "grad_norm": 7.036576747894287, "learning_rate": 6.6632982255553004e-06, "loss": 0.3946, "step": 8584 }, { "epoch": 0.39819109461966606, "grad_norm": 4.125823974609375, "learning_rate": 6.662604178917118e-06, "loss": 0.3466, "step": 8585 }, { "epoch": 0.39823747680890537, "grad_norm": 7.569442272186279, "learning_rate": 6.661910096259492e-06, "loss": 0.3919, "step": 8586 }, { "epoch": 0.3982838589981447, "grad_norm": 4.27598237991333, "learning_rate": 6.66121597759746e-06, "loss": 0.2847, "step": 8587 }, { "epoch": 0.398330241187384, "grad_norm": 4.657721996307373, "learning_rate": 6.66052182294606e-06, "loss": 0.3635, "step": 8588 }, { "epoch": 0.3983766233766234, "grad_norm": 9.119182586669922, "learning_rate": 6.6598276323203305e-06, "loss": 0.3136, "step": 8589 }, { "epoch": 0.3984230055658627, "grad_norm": 6.537773609161377, "learning_rate": 6.659133405735312e-06, "loss": 0.2898, "step": 8590 }, { "epoch": 0.39846938775510204, "grad_norm": 5.544378757476807, "learning_rate": 6.658439143206042e-06, "loss": 0.3285, "step": 8591 }, { "epoch": 0.39851576994434135, "grad_norm": 12.234970092773438, "learning_rate": 6.6577448447475644e-06, "loss": 0.4616, "step": 8592 }, { "epoch": 0.3985621521335807, "grad_norm": 6.1329665184021, "learning_rate": 6.6570505103749175e-06, "loss": 0.3816, "step": 8593 }, { "epoch": 0.39860853432282006, "grad_norm": 8.312920570373535, "learning_rate": 6.656356140103145e-06, "loss": 0.3376, "step": 8594 }, { "epoch": 0.39865491651205937, "grad_norm": 4.904379367828369, "learning_rate": 6.655661733947292e-06, "loss": 0.3138, "step": 8595 }, { "epoch": 0.3987012987012987, "grad_norm": 4.8638410568237305, "learning_rate": 6.654967291922399e-06, "loss": 0.2523, "step": 8596 }, { "epoch": 0.398747680890538, "grad_norm": 6.720407009124756, "learning_rate": 6.6542728140435144e-06, "loss": 0.3144, "step": 8597 }, { "epoch": 0.3987940630797774, "grad_norm": 4.230581283569336, "learning_rate": 6.6535783003256805e-06, "loss": 0.2579, "step": 8598 }, { "epoch": 0.3988404452690167, "grad_norm": 5.23923921585083, "learning_rate": 6.652883750783945e-06, "loss": 0.3248, "step": 8599 }, { "epoch": 0.39888682745825604, "grad_norm": 6.9991326332092285, "learning_rate": 6.652189165433356e-06, "loss": 0.3084, "step": 8600 }, { "epoch": 0.39893320964749535, "grad_norm": 8.326709747314453, "learning_rate": 6.651494544288959e-06, "loss": 0.4354, "step": 8601 }, { "epoch": 0.3989795918367347, "grad_norm": 11.629053115844727, "learning_rate": 6.650799887365804e-06, "loss": 0.4408, "step": 8602 }, { "epoch": 0.399025974025974, "grad_norm": 8.819903373718262, "learning_rate": 6.650105194678941e-06, "loss": 0.4583, "step": 8603 }, { "epoch": 0.39907235621521336, "grad_norm": 8.26431655883789, "learning_rate": 6.649410466243418e-06, "loss": 0.2587, "step": 8604 }, { "epoch": 0.39911873840445267, "grad_norm": 4.36594820022583, "learning_rate": 6.648715702074287e-06, "loss": 0.2937, "step": 8605 }, { "epoch": 0.399165120593692, "grad_norm": 28.105863571166992, "learning_rate": 6.648020902186601e-06, "loss": 0.3432, "step": 8606 }, { "epoch": 0.3992115027829313, "grad_norm": 10.097261428833008, "learning_rate": 6.647326066595412e-06, "loss": 0.3848, "step": 8607 }, { "epoch": 0.3992578849721707, "grad_norm": 11.820563316345215, "learning_rate": 6.646631195315771e-06, "loss": 0.5184, "step": 8608 }, { "epoch": 0.39930426716141004, "grad_norm": 6.125452995300293, "learning_rate": 6.645936288362734e-06, "loss": 0.399, "step": 8609 }, { "epoch": 0.39935064935064934, "grad_norm": 7.0115485191345215, "learning_rate": 6.645241345751356e-06, "loss": 0.3666, "step": 8610 }, { "epoch": 0.3993970315398887, "grad_norm": 6.469043731689453, "learning_rate": 6.6445463674966915e-06, "loss": 0.3281, "step": 8611 }, { "epoch": 0.399443413729128, "grad_norm": 11.201948165893555, "learning_rate": 6.643851353613797e-06, "loss": 0.3054, "step": 8612 }, { "epoch": 0.39948979591836736, "grad_norm": 11.725700378417969, "learning_rate": 6.643156304117733e-06, "loss": 0.4213, "step": 8613 }, { "epoch": 0.39953617810760667, "grad_norm": 6.121728420257568, "learning_rate": 6.6424612190235504e-06, "loss": 0.3025, "step": 8614 }, { "epoch": 0.399582560296846, "grad_norm": 6.455047607421875, "learning_rate": 6.641766098346313e-06, "loss": 0.3529, "step": 8615 }, { "epoch": 0.3996289424860853, "grad_norm": 6.0510454177856445, "learning_rate": 6.64107094210108e-06, "loss": 0.3707, "step": 8616 }, { "epoch": 0.3996753246753247, "grad_norm": 6.291382312774658, "learning_rate": 6.6403757503029095e-06, "loss": 0.2817, "step": 8617 }, { "epoch": 0.399721706864564, "grad_norm": 7.160309791564941, "learning_rate": 6.639680522966865e-06, "loss": 0.4149, "step": 8618 }, { "epoch": 0.39976808905380334, "grad_norm": 5.760693550109863, "learning_rate": 6.638985260108007e-06, "loss": 0.4833, "step": 8619 }, { "epoch": 0.39981447124304265, "grad_norm": 6.297774791717529, "learning_rate": 6.638289961741395e-06, "loss": 0.3032, "step": 8620 }, { "epoch": 0.399860853432282, "grad_norm": 6.685695171356201, "learning_rate": 6.637594627882098e-06, "loss": 0.4189, "step": 8621 }, { "epoch": 0.39990723562152136, "grad_norm": 8.071152687072754, "learning_rate": 6.636899258545175e-06, "loss": 0.3558, "step": 8622 }, { "epoch": 0.39995361781076066, "grad_norm": 9.258265495300293, "learning_rate": 6.636203853745693e-06, "loss": 0.3767, "step": 8623 }, { "epoch": 0.4, "grad_norm": 5.604525566101074, "learning_rate": 6.635508413498719e-06, "loss": 0.3464, "step": 8624 }, { "epoch": 0.4, "eval_loss": 0.34723004698753357, "eval_runtime": 38.0116, "eval_samples_per_second": 45.854, "eval_steps_per_second": 5.735, "step": 8624 }, { "epoch": 0.4000463821892393, "grad_norm": 4.382598876953125, "learning_rate": 6.634812937819317e-06, "loss": 0.3192, "step": 8625 }, { "epoch": 0.4000927643784787, "grad_norm": 9.051800727844238, "learning_rate": 6.634117426722556e-06, "loss": 0.2783, "step": 8626 }, { "epoch": 0.400139146567718, "grad_norm": 5.6784491539001465, "learning_rate": 6.633421880223502e-06, "loss": 0.3923, "step": 8627 }, { "epoch": 0.40018552875695734, "grad_norm": 6.42189884185791, "learning_rate": 6.6327262983372245e-06, "loss": 0.3134, "step": 8628 }, { "epoch": 0.40023191094619665, "grad_norm": 9.039334297180176, "learning_rate": 6.6320306810787935e-06, "loss": 0.3814, "step": 8629 }, { "epoch": 0.400278293135436, "grad_norm": 4.747528076171875, "learning_rate": 6.631335028463277e-06, "loss": 0.2905, "step": 8630 }, { "epoch": 0.4003246753246753, "grad_norm": 6.6381049156188965, "learning_rate": 6.630639340505749e-06, "loss": 0.4702, "step": 8631 }, { "epoch": 0.40037105751391466, "grad_norm": 7.851486682891846, "learning_rate": 6.62994361722128e-06, "loss": 0.4124, "step": 8632 }, { "epoch": 0.40041743970315397, "grad_norm": 8.498101234436035, "learning_rate": 6.629247858624941e-06, "loss": 0.3965, "step": 8633 }, { "epoch": 0.4004638218923933, "grad_norm": 10.78039836883545, "learning_rate": 6.628552064731807e-06, "loss": 0.3774, "step": 8634 }, { "epoch": 0.4005102040816326, "grad_norm": 6.055816173553467, "learning_rate": 6.627856235556952e-06, "loss": 0.3168, "step": 8635 }, { "epoch": 0.400556586270872, "grad_norm": 6.315883636474609, "learning_rate": 6.62716037111545e-06, "loss": 0.3072, "step": 8636 }, { "epoch": 0.40060296846011134, "grad_norm": 11.580974578857422, "learning_rate": 6.626464471422377e-06, "loss": 0.3161, "step": 8637 }, { "epoch": 0.40064935064935064, "grad_norm": 5.07454252243042, "learning_rate": 6.625768536492808e-06, "loss": 0.2647, "step": 8638 }, { "epoch": 0.40069573283859, "grad_norm": 7.999095916748047, "learning_rate": 6.625072566341821e-06, "loss": 0.3708, "step": 8639 }, { "epoch": 0.4007421150278293, "grad_norm": 11.956950187683105, "learning_rate": 6.624376560984496e-06, "loss": 0.324, "step": 8640 }, { "epoch": 0.40078849721706866, "grad_norm": 3.1435678005218506, "learning_rate": 6.623680520435909e-06, "loss": 0.2962, "step": 8641 }, { "epoch": 0.40083487940630796, "grad_norm": 7.681671619415283, "learning_rate": 6.622984444711138e-06, "loss": 0.3356, "step": 8642 }, { "epoch": 0.4008812615955473, "grad_norm": 6.744589805603027, "learning_rate": 6.622288333825268e-06, "loss": 0.4037, "step": 8643 }, { "epoch": 0.4009276437847866, "grad_norm": 8.74901294708252, "learning_rate": 6.621592187793375e-06, "loss": 0.3778, "step": 8644 }, { "epoch": 0.400974025974026, "grad_norm": 19.974149703979492, "learning_rate": 6.620896006630542e-06, "loss": 0.426, "step": 8645 }, { "epoch": 0.4010204081632653, "grad_norm": 9.584312438964844, "learning_rate": 6.620199790351853e-06, "loss": 0.354, "step": 8646 }, { "epoch": 0.40106679035250464, "grad_norm": 3.8005332946777344, "learning_rate": 6.61950353897239e-06, "loss": 0.3558, "step": 8647 }, { "epoch": 0.40111317254174395, "grad_norm": 7.891717910766602, "learning_rate": 6.618807252507238e-06, "loss": 0.3064, "step": 8648 }, { "epoch": 0.4011595547309833, "grad_norm": 8.7618408203125, "learning_rate": 6.61811093097148e-06, "loss": 0.3633, "step": 8649 }, { "epoch": 0.40120593692022266, "grad_norm": 7.2803778648376465, "learning_rate": 6.617414574380201e-06, "loss": 0.3834, "step": 8650 }, { "epoch": 0.40125231910946196, "grad_norm": 10.769095420837402, "learning_rate": 6.61671818274849e-06, "loss": 0.4015, "step": 8651 }, { "epoch": 0.4012987012987013, "grad_norm": 7.324338436126709, "learning_rate": 6.616021756091431e-06, "loss": 0.3438, "step": 8652 }, { "epoch": 0.4013450834879406, "grad_norm": 4.538473606109619, "learning_rate": 6.615325294424113e-06, "loss": 0.371, "step": 8653 }, { "epoch": 0.40139146567718, "grad_norm": 5.828506946563721, "learning_rate": 6.614628797761626e-06, "loss": 0.3072, "step": 8654 }, { "epoch": 0.4014378478664193, "grad_norm": 5.692619800567627, "learning_rate": 6.613932266119057e-06, "loss": 0.3517, "step": 8655 }, { "epoch": 0.40148423005565864, "grad_norm": 8.775533676147461, "learning_rate": 6.613235699511496e-06, "loss": 0.4903, "step": 8656 }, { "epoch": 0.40153061224489794, "grad_norm": 3.6572093963623047, "learning_rate": 6.612539097954035e-06, "loss": 0.2284, "step": 8657 }, { "epoch": 0.4015769944341373, "grad_norm": 4.799185276031494, "learning_rate": 6.611842461461764e-06, "loss": 0.3588, "step": 8658 }, { "epoch": 0.4016233766233766, "grad_norm": 6.781790256500244, "learning_rate": 6.611145790049778e-06, "loss": 0.2943, "step": 8659 }, { "epoch": 0.40166975881261596, "grad_norm": 5.739826679229736, "learning_rate": 6.610449083733167e-06, "loss": 0.4072, "step": 8660 }, { "epoch": 0.40171614100185526, "grad_norm": 6.5642290115356445, "learning_rate": 6.609752342527026e-06, "loss": 0.2334, "step": 8661 }, { "epoch": 0.4017625231910946, "grad_norm": 16.7108097076416, "learning_rate": 6.609055566446451e-06, "loss": 0.414, "step": 8662 }, { "epoch": 0.4018089053803339, "grad_norm": 11.274142265319824, "learning_rate": 6.608358755506534e-06, "loss": 0.503, "step": 8663 }, { "epoch": 0.4018552875695733, "grad_norm": 12.770524978637695, "learning_rate": 6.6076619097223735e-06, "loss": 0.2744, "step": 8664 }, { "epoch": 0.40190166975881264, "grad_norm": 5.274073123931885, "learning_rate": 6.606965029109067e-06, "loss": 0.2588, "step": 8665 }, { "epoch": 0.40194805194805194, "grad_norm": 7.399117946624756, "learning_rate": 6.606268113681709e-06, "loss": 0.357, "step": 8666 }, { "epoch": 0.4019944341372913, "grad_norm": 6.553868293762207, "learning_rate": 6.6055711634554e-06, "loss": 0.4238, "step": 8667 }, { "epoch": 0.4020408163265306, "grad_norm": 9.307147026062012, "learning_rate": 6.604874178445239e-06, "loss": 0.4426, "step": 8668 }, { "epoch": 0.40208719851576996, "grad_norm": 9.586045265197754, "learning_rate": 6.604177158666325e-06, "loss": 0.3414, "step": 8669 }, { "epoch": 0.40213358070500926, "grad_norm": 13.077072143554688, "learning_rate": 6.603480104133759e-06, "loss": 0.3357, "step": 8670 }, { "epoch": 0.4021799628942486, "grad_norm": 6.862855911254883, "learning_rate": 6.602783014862641e-06, "loss": 0.3729, "step": 8671 }, { "epoch": 0.4022263450834879, "grad_norm": 9.484014511108398, "learning_rate": 6.602085890868076e-06, "loss": 0.3442, "step": 8672 }, { "epoch": 0.4022727272727273, "grad_norm": 6.341416358947754, "learning_rate": 6.601388732165166e-06, "loss": 0.3719, "step": 8673 }, { "epoch": 0.4023191094619666, "grad_norm": 5.0969038009643555, "learning_rate": 6.6006915387690125e-06, "loss": 0.1479, "step": 8674 }, { "epoch": 0.40236549165120594, "grad_norm": 7.195320129394531, "learning_rate": 6.5999943106947206e-06, "loss": 0.3559, "step": 8675 }, { "epoch": 0.40241187384044524, "grad_norm": 7.877738952636719, "learning_rate": 6.599297047957397e-06, "loss": 0.2338, "step": 8676 }, { "epoch": 0.4024582560296846, "grad_norm": 7.011598587036133, "learning_rate": 6.598599750572145e-06, "loss": 0.3832, "step": 8677 }, { "epoch": 0.40250463821892396, "grad_norm": 3.624389886856079, "learning_rate": 6.597902418554075e-06, "loss": 0.3385, "step": 8678 }, { "epoch": 0.40255102040816326, "grad_norm": 6.790755271911621, "learning_rate": 6.5972050519182895e-06, "loss": 0.3857, "step": 8679 }, { "epoch": 0.4025974025974026, "grad_norm": 9.3739652633667, "learning_rate": 6.5965076506799e-06, "loss": 0.3538, "step": 8680 }, { "epoch": 0.4026437847866419, "grad_norm": 7.094712734222412, "learning_rate": 6.595810214854015e-06, "loss": 0.306, "step": 8681 }, { "epoch": 0.4026901669758813, "grad_norm": 16.29600715637207, "learning_rate": 6.595112744455743e-06, "loss": 0.444, "step": 8682 }, { "epoch": 0.4027365491651206, "grad_norm": 9.901082992553711, "learning_rate": 6.594415239500194e-06, "loss": 0.3571, "step": 8683 }, { "epoch": 0.40278293135435994, "grad_norm": 8.824823379516602, "learning_rate": 6.59371770000248e-06, "loss": 0.5008, "step": 8684 }, { "epoch": 0.40282931354359924, "grad_norm": 3.853761911392212, "learning_rate": 6.593020125977713e-06, "loss": 0.3899, "step": 8685 }, { "epoch": 0.4028756957328386, "grad_norm": 4.373118877410889, "learning_rate": 6.592322517441004e-06, "loss": 0.2888, "step": 8686 }, { "epoch": 0.4029220779220779, "grad_norm": 4.872921943664551, "learning_rate": 6.591624874407469e-06, "loss": 0.3685, "step": 8687 }, { "epoch": 0.40296846011131726, "grad_norm": 10.62840461730957, "learning_rate": 6.59092719689222e-06, "loss": 0.4632, "step": 8688 }, { "epoch": 0.40301484230055656, "grad_norm": 5.245361328125, "learning_rate": 6.590229484910373e-06, "loss": 0.2948, "step": 8689 }, { "epoch": 0.4030612244897959, "grad_norm": 6.897601127624512, "learning_rate": 6.589531738477042e-06, "loss": 0.3345, "step": 8690 }, { "epoch": 0.4031076066790352, "grad_norm": 6.203366756439209, "learning_rate": 6.588833957607344e-06, "loss": 0.3777, "step": 8691 }, { "epoch": 0.4031539888682746, "grad_norm": 7.855893135070801, "learning_rate": 6.588136142316398e-06, "loss": 0.4212, "step": 8692 }, { "epoch": 0.40320037105751394, "grad_norm": 14.061140060424805, "learning_rate": 6.587438292619319e-06, "loss": 0.4148, "step": 8693 }, { "epoch": 0.40324675324675324, "grad_norm": 8.217126846313477, "learning_rate": 6.586740408531227e-06, "loss": 0.4018, "step": 8694 }, { "epoch": 0.4032931354359926, "grad_norm": 4.746420860290527, "learning_rate": 6.586042490067242e-06, "loss": 0.2574, "step": 8695 }, { "epoch": 0.4033395176252319, "grad_norm": 11.245561599731445, "learning_rate": 6.5853445372424805e-06, "loss": 0.4564, "step": 8696 }, { "epoch": 0.40338589981447126, "grad_norm": 4.921083450317383, "learning_rate": 6.5846465500720665e-06, "loss": 0.3035, "step": 8697 }, { "epoch": 0.40343228200371056, "grad_norm": 8.484781265258789, "learning_rate": 6.583948528571123e-06, "loss": 0.3962, "step": 8698 }, { "epoch": 0.4034786641929499, "grad_norm": 6.050538063049316, "learning_rate": 6.583250472754767e-06, "loss": 0.3023, "step": 8699 }, { "epoch": 0.4035250463821892, "grad_norm": 4.489989280700684, "learning_rate": 6.582552382638128e-06, "loss": 0.3617, "step": 8700 }, { "epoch": 0.4035714285714286, "grad_norm": 7.102170944213867, "learning_rate": 6.581854258236324e-06, "loss": 0.2162, "step": 8701 }, { "epoch": 0.4036178107606679, "grad_norm": 6.37322998046875, "learning_rate": 6.581156099564482e-06, "loss": 0.389, "step": 8702 }, { "epoch": 0.40366419294990724, "grad_norm": 6.928817272186279, "learning_rate": 6.580457906637727e-06, "loss": 0.3804, "step": 8703 }, { "epoch": 0.40371057513914654, "grad_norm": 7.953514575958252, "learning_rate": 6.579759679471186e-06, "loss": 0.3981, "step": 8704 }, { "epoch": 0.4037569573283859, "grad_norm": 6.802182197570801, "learning_rate": 6.5790614180799835e-06, "loss": 0.3716, "step": 8705 }, { "epoch": 0.40380333951762526, "grad_norm": 8.371366500854492, "learning_rate": 6.57836312247925e-06, "loss": 0.4653, "step": 8706 }, { "epoch": 0.40384972170686456, "grad_norm": 4.251195430755615, "learning_rate": 6.5776647926841095e-06, "loss": 0.2875, "step": 8707 }, { "epoch": 0.4038961038961039, "grad_norm": 4.5550642013549805, "learning_rate": 6.5769664287096955e-06, "loss": 0.3715, "step": 8708 }, { "epoch": 0.4039424860853432, "grad_norm": 8.151805877685547, "learning_rate": 6.576268030571136e-06, "loss": 0.3188, "step": 8709 }, { "epoch": 0.4039888682745826, "grad_norm": 4.085235118865967, "learning_rate": 6.575569598283559e-06, "loss": 0.2304, "step": 8710 }, { "epoch": 0.4040352504638219, "grad_norm": 8.912487030029297, "learning_rate": 6.574871131862099e-06, "loss": 0.2933, "step": 8711 }, { "epoch": 0.40408163265306124, "grad_norm": 7.064591884613037, "learning_rate": 6.574172631321885e-06, "loss": 0.4055, "step": 8712 }, { "epoch": 0.40412801484230054, "grad_norm": 7.328124523162842, "learning_rate": 6.573474096678052e-06, "loss": 0.3459, "step": 8713 }, { "epoch": 0.4041743970315399, "grad_norm": 7.7956061363220215, "learning_rate": 6.572775527945734e-06, "loss": 0.3672, "step": 8714 }, { "epoch": 0.4042207792207792, "grad_norm": 9.086417198181152, "learning_rate": 6.572076925140063e-06, "loss": 0.3988, "step": 8715 }, { "epoch": 0.40426716141001856, "grad_norm": 8.232919692993164, "learning_rate": 6.571378288276174e-06, "loss": 0.479, "step": 8716 }, { "epoch": 0.40431354359925786, "grad_norm": 4.167235851287842, "learning_rate": 6.570679617369204e-06, "loss": 0.3864, "step": 8717 }, { "epoch": 0.4043599257884972, "grad_norm": 13.955216407775879, "learning_rate": 6.569980912434287e-06, "loss": 0.3489, "step": 8718 }, { "epoch": 0.4044063079777365, "grad_norm": 5.421062469482422, "learning_rate": 6.569282173486564e-06, "loss": 0.3971, "step": 8719 }, { "epoch": 0.4044526901669759, "grad_norm": 8.583641052246094, "learning_rate": 6.5685834005411685e-06, "loss": 0.2581, "step": 8720 }, { "epoch": 0.40449907235621524, "grad_norm": 10.772896766662598, "learning_rate": 6.567884593613241e-06, "loss": 0.2976, "step": 8721 }, { "epoch": 0.40454545454545454, "grad_norm": 6.386717796325684, "learning_rate": 6.567185752717922e-06, "loss": 0.3229, "step": 8722 }, { "epoch": 0.4045918367346939, "grad_norm": 5.285187244415283, "learning_rate": 6.5664868778703495e-06, "loss": 0.3328, "step": 8723 }, { "epoch": 0.4046382189239332, "grad_norm": 6.063871383666992, "learning_rate": 6.5657879690856644e-06, "loss": 0.3374, "step": 8724 }, { "epoch": 0.40468460111317256, "grad_norm": 5.967545986175537, "learning_rate": 6.56508902637901e-06, "loss": 0.3253, "step": 8725 }, { "epoch": 0.40473098330241186, "grad_norm": 7.339141845703125, "learning_rate": 6.564390049765528e-06, "loss": 0.3723, "step": 8726 }, { "epoch": 0.4047773654916512, "grad_norm": 4.281947612762451, "learning_rate": 6.563691039260359e-06, "loss": 0.3406, "step": 8727 }, { "epoch": 0.4048237476808905, "grad_norm": 8.33486557006836, "learning_rate": 6.562991994878649e-06, "loss": 0.3887, "step": 8728 }, { "epoch": 0.4048701298701299, "grad_norm": 4.67302131652832, "learning_rate": 6.562292916635542e-06, "loss": 0.3638, "step": 8729 }, { "epoch": 0.4049165120593692, "grad_norm": 4.769495010375977, "learning_rate": 6.561593804546182e-06, "loss": 0.3272, "step": 8730 }, { "epoch": 0.40496289424860854, "grad_norm": 9.85708999633789, "learning_rate": 6.560894658625717e-06, "loss": 0.5113, "step": 8731 }, { "epoch": 0.40500927643784784, "grad_norm": 4.97064208984375, "learning_rate": 6.560195478889293e-06, "loss": 0.2668, "step": 8732 }, { "epoch": 0.4050556586270872, "grad_norm": 6.123186111450195, "learning_rate": 6.559496265352056e-06, "loss": 0.4001, "step": 8733 }, { "epoch": 0.4051020408163265, "grad_norm": 8.066638946533203, "learning_rate": 6.558797018029155e-06, "loss": 0.3599, "step": 8734 }, { "epoch": 0.40514842300556586, "grad_norm": 6.892303466796875, "learning_rate": 6.558097736935739e-06, "loss": 0.3845, "step": 8735 }, { "epoch": 0.4051948051948052, "grad_norm": 8.397683143615723, "learning_rate": 6.557398422086957e-06, "loss": 0.3993, "step": 8736 }, { "epoch": 0.4052411873840445, "grad_norm": 11.418416976928711, "learning_rate": 6.556699073497959e-06, "loss": 0.4243, "step": 8737 }, { "epoch": 0.4052875695732839, "grad_norm": 5.525165557861328, "learning_rate": 6.555999691183899e-06, "loss": 0.3195, "step": 8738 }, { "epoch": 0.4053339517625232, "grad_norm": 6.0339035987854, "learning_rate": 6.555300275159924e-06, "loss": 0.321, "step": 8739 }, { "epoch": 0.40538033395176254, "grad_norm": 7.3022236824035645, "learning_rate": 6.55460082544119e-06, "loss": 0.369, "step": 8740 }, { "epoch": 0.40542671614100184, "grad_norm": 6.981042861938477, "learning_rate": 6.553901342042848e-06, "loss": 0.3811, "step": 8741 }, { "epoch": 0.4054730983302412, "grad_norm": 7.373937606811523, "learning_rate": 6.5532018249800544e-06, "loss": 0.2868, "step": 8742 }, { "epoch": 0.4055194805194805, "grad_norm": 4.715733051300049, "learning_rate": 6.552502274267962e-06, "loss": 0.3143, "step": 8743 }, { "epoch": 0.40556586270871986, "grad_norm": 8.472261428833008, "learning_rate": 6.551802689921726e-06, "loss": 0.2961, "step": 8744 }, { "epoch": 0.40561224489795916, "grad_norm": 13.51691722869873, "learning_rate": 6.551103071956504e-06, "loss": 0.5655, "step": 8745 }, { "epoch": 0.4056586270871985, "grad_norm": 10.528244972229004, "learning_rate": 6.5504034203874525e-06, "loss": 0.4521, "step": 8746 }, { "epoch": 0.4057050092764378, "grad_norm": 10.19239330291748, "learning_rate": 6.549703735229728e-06, "loss": 0.3163, "step": 8747 }, { "epoch": 0.4057513914656772, "grad_norm": 8.123270988464355, "learning_rate": 6.5490040164984895e-06, "loss": 0.3745, "step": 8748 }, { "epoch": 0.40579777365491654, "grad_norm": 12.182557106018066, "learning_rate": 6.548304264208894e-06, "loss": 0.3563, "step": 8749 }, { "epoch": 0.40584415584415584, "grad_norm": 5.955076694488525, "learning_rate": 6.5476044783761065e-06, "loss": 0.3554, "step": 8750 }, { "epoch": 0.4058905380333952, "grad_norm": 7.23954963684082, "learning_rate": 6.546904659015283e-06, "loss": 0.3297, "step": 8751 }, { "epoch": 0.4059369202226345, "grad_norm": 9.399154663085938, "learning_rate": 6.546204806141586e-06, "loss": 0.3438, "step": 8752 }, { "epoch": 0.40598330241187386, "grad_norm": 16.503976821899414, "learning_rate": 6.545504919770178e-06, "loss": 0.4148, "step": 8753 }, { "epoch": 0.40602968460111316, "grad_norm": 4.358973503112793, "learning_rate": 6.544804999916219e-06, "loss": 0.381, "step": 8754 }, { "epoch": 0.4060760667903525, "grad_norm": 5.871330738067627, "learning_rate": 6.544105046594878e-06, "loss": 0.3006, "step": 8755 }, { "epoch": 0.4061224489795918, "grad_norm": 8.61097526550293, "learning_rate": 6.5434050598213125e-06, "loss": 0.4407, "step": 8756 }, { "epoch": 0.4061688311688312, "grad_norm": 5.32438850402832, "learning_rate": 6.542705039610691e-06, "loss": 0.3968, "step": 8757 }, { "epoch": 0.4062152133580705, "grad_norm": 10.839798927307129, "learning_rate": 6.542004985978179e-06, "loss": 0.3684, "step": 8758 }, { "epoch": 0.40626159554730984, "grad_norm": 11.067179679870605, "learning_rate": 6.541304898938943e-06, "loss": 0.2987, "step": 8759 }, { "epoch": 0.40630797773654914, "grad_norm": 8.863683700561523, "learning_rate": 6.5406047785081485e-06, "loss": 0.3257, "step": 8760 }, { "epoch": 0.4063543599257885, "grad_norm": 7.554296970367432, "learning_rate": 6.5399046247009635e-06, "loss": 0.3462, "step": 8761 }, { "epoch": 0.4064007421150278, "grad_norm": 9.459884643554688, "learning_rate": 6.539204437532558e-06, "loss": 0.401, "step": 8762 }, { "epoch": 0.40644712430426716, "grad_norm": 7.798255443572998, "learning_rate": 6.5385042170181e-06, "loss": 0.3595, "step": 8763 }, { "epoch": 0.4064935064935065, "grad_norm": 4.264560699462891, "learning_rate": 6.5378039631727596e-06, "loss": 0.3176, "step": 8764 }, { "epoch": 0.4065398886827458, "grad_norm": 5.0575852394104, "learning_rate": 6.537103676011707e-06, "loss": 0.3087, "step": 8765 }, { "epoch": 0.4065862708719852, "grad_norm": 9.175248146057129, "learning_rate": 6.536403355550114e-06, "loss": 0.3259, "step": 8766 }, { "epoch": 0.4066326530612245, "grad_norm": 4.84983491897583, "learning_rate": 6.535703001803153e-06, "loss": 0.2802, "step": 8767 }, { "epoch": 0.40667903525046384, "grad_norm": 10.94353199005127, "learning_rate": 6.535002614785995e-06, "loss": 0.4684, "step": 8768 }, { "epoch": 0.40672541743970314, "grad_norm": 6.416972637176514, "learning_rate": 6.534302194513817e-06, "loss": 0.3671, "step": 8769 }, { "epoch": 0.4067717996289425, "grad_norm": 8.204005241394043, "learning_rate": 6.533601741001791e-06, "loss": 0.3217, "step": 8770 }, { "epoch": 0.4068181818181818, "grad_norm": 8.165360450744629, "learning_rate": 6.532901254265093e-06, "loss": 0.3391, "step": 8771 }, { "epoch": 0.40686456400742116, "grad_norm": 5.256131649017334, "learning_rate": 6.532200734318896e-06, "loss": 0.3274, "step": 8772 }, { "epoch": 0.40691094619666046, "grad_norm": 11.263802528381348, "learning_rate": 6.531500181178378e-06, "loss": 0.287, "step": 8773 }, { "epoch": 0.4069573283858998, "grad_norm": 4.0064697265625, "learning_rate": 6.530799594858719e-06, "loss": 0.346, "step": 8774 }, { "epoch": 0.4070037105751391, "grad_norm": 7.637348651885986, "learning_rate": 6.530098975375092e-06, "loss": 0.3639, "step": 8775 }, { "epoch": 0.4070500927643785, "grad_norm": 7.146393775939941, "learning_rate": 6.529398322742677e-06, "loss": 0.3392, "step": 8776 }, { "epoch": 0.40709647495361784, "grad_norm": 9.978741645812988, "learning_rate": 6.528697636976656e-06, "loss": 0.3611, "step": 8777 }, { "epoch": 0.40714285714285714, "grad_norm": 5.895116806030273, "learning_rate": 6.5279969180922055e-06, "loss": 0.4411, "step": 8778 }, { "epoch": 0.4071892393320965, "grad_norm": 8.530438423156738, "learning_rate": 6.527296166104509e-06, "loss": 0.4279, "step": 8779 }, { "epoch": 0.4072356215213358, "grad_norm": 10.695318222045898, "learning_rate": 6.526595381028746e-06, "loss": 0.4121, "step": 8780 }, { "epoch": 0.40728200371057516, "grad_norm": 7.720906734466553, "learning_rate": 6.5258945628800995e-06, "loss": 0.4443, "step": 8781 }, { "epoch": 0.40732838589981446, "grad_norm": 6.063959121704102, "learning_rate": 6.5251937116737516e-06, "loss": 0.3193, "step": 8782 }, { "epoch": 0.4073747680890538, "grad_norm": 6.711617946624756, "learning_rate": 6.5244928274248864e-06, "loss": 0.3767, "step": 8783 }, { "epoch": 0.4074211502782931, "grad_norm": 6.145958423614502, "learning_rate": 6.523791910148689e-06, "loss": 0.3078, "step": 8784 }, { "epoch": 0.4074675324675325, "grad_norm": 5.853280544281006, "learning_rate": 6.523090959860343e-06, "loss": 0.2931, "step": 8785 }, { "epoch": 0.4075139146567718, "grad_norm": 7.03034782409668, "learning_rate": 6.522389976575034e-06, "loss": 0.3473, "step": 8786 }, { "epoch": 0.40756029684601114, "grad_norm": 6.0526652336120605, "learning_rate": 6.52168896030795e-06, "loss": 0.2809, "step": 8787 }, { "epoch": 0.40760667903525044, "grad_norm": 5.949991226196289, "learning_rate": 6.520987911074278e-06, "loss": 0.3122, "step": 8788 }, { "epoch": 0.4076530612244898, "grad_norm": 4.597906112670898, "learning_rate": 6.520286828889204e-06, "loss": 0.3347, "step": 8789 }, { "epoch": 0.4076994434137291, "grad_norm": 10.913019180297852, "learning_rate": 6.519585713767919e-06, "loss": 0.3706, "step": 8790 }, { "epoch": 0.40774582560296846, "grad_norm": 4.95980167388916, "learning_rate": 6.518884565725609e-06, "loss": 0.3768, "step": 8791 }, { "epoch": 0.4077922077922078, "grad_norm": 8.041523933410645, "learning_rate": 6.518183384777468e-06, "loss": 0.3201, "step": 8792 }, { "epoch": 0.4078385899814471, "grad_norm": 10.394954681396484, "learning_rate": 6.517482170938684e-06, "loss": 0.3695, "step": 8793 }, { "epoch": 0.4078849721706865, "grad_norm": 7.653390884399414, "learning_rate": 6.516780924224448e-06, "loss": 0.4404, "step": 8794 }, { "epoch": 0.4079313543599258, "grad_norm": 4.617804527282715, "learning_rate": 6.516079644649954e-06, "loss": 0.3294, "step": 8795 }, { "epoch": 0.40797773654916514, "grad_norm": 10.210785865783691, "learning_rate": 6.515378332230396e-06, "loss": 0.3607, "step": 8796 }, { "epoch": 0.40802411873840444, "grad_norm": 5.135159969329834, "learning_rate": 6.514676986980963e-06, "loss": 0.2823, "step": 8797 }, { "epoch": 0.4080705009276438, "grad_norm": 5.557162761688232, "learning_rate": 6.5139756089168524e-06, "loss": 0.3132, "step": 8798 }, { "epoch": 0.4081168831168831, "grad_norm": 7.156826496124268, "learning_rate": 6.5132741980532595e-06, "loss": 0.3478, "step": 8799 }, { "epoch": 0.40816326530612246, "grad_norm": 9.24480152130127, "learning_rate": 6.51257275440538e-06, "loss": 0.3518, "step": 8800 }, { "epoch": 0.40820964749536176, "grad_norm": 6.184500217437744, "learning_rate": 6.511871277988409e-06, "loss": 0.3281, "step": 8801 }, { "epoch": 0.4082560296846011, "grad_norm": 7.412372589111328, "learning_rate": 6.5111697688175435e-06, "loss": 0.3172, "step": 8802 }, { "epoch": 0.4083024118738404, "grad_norm": 7.179746627807617, "learning_rate": 6.510468226907982e-06, "loss": 0.3753, "step": 8803 }, { "epoch": 0.4083487940630798, "grad_norm": 5.662372589111328, "learning_rate": 6.5097666522749245e-06, "loss": 0.36, "step": 8804 }, { "epoch": 0.40839517625231914, "grad_norm": 8.274080276489258, "learning_rate": 6.509065044933568e-06, "loss": 0.3884, "step": 8805 }, { "epoch": 0.40844155844155844, "grad_norm": 7.450026035308838, "learning_rate": 6.508363404899112e-06, "loss": 0.3378, "step": 8806 }, { "epoch": 0.4084879406307978, "grad_norm": 3.589630603790283, "learning_rate": 6.507661732186761e-06, "loss": 0.2121, "step": 8807 }, { "epoch": 0.4085343228200371, "grad_norm": 10.174092292785645, "learning_rate": 6.506960026811712e-06, "loss": 0.4437, "step": 8808 }, { "epoch": 0.40858070500927646, "grad_norm": 5.496096134185791, "learning_rate": 6.506258288789168e-06, "loss": 0.2909, "step": 8809 }, { "epoch": 0.40862708719851576, "grad_norm": 5.598491191864014, "learning_rate": 6.505556518134335e-06, "loss": 0.3456, "step": 8810 }, { "epoch": 0.4086734693877551, "grad_norm": 7.931285858154297, "learning_rate": 6.504854714862412e-06, "loss": 0.2934, "step": 8811 }, { "epoch": 0.4087198515769944, "grad_norm": 7.836981296539307, "learning_rate": 6.504152878988608e-06, "loss": 0.4037, "step": 8812 }, { "epoch": 0.4087662337662338, "grad_norm": 11.93437385559082, "learning_rate": 6.503451010528122e-06, "loss": 0.5984, "step": 8813 }, { "epoch": 0.4088126159554731, "grad_norm": 7.522767066955566, "learning_rate": 6.5027491094961645e-06, "loss": 0.3586, "step": 8814 }, { "epoch": 0.40885899814471244, "grad_norm": 11.445069313049316, "learning_rate": 6.502047175907941e-06, "loss": 0.3962, "step": 8815 }, { "epoch": 0.40890538033395174, "grad_norm": 9.00261402130127, "learning_rate": 6.501345209778655e-06, "loss": 0.4612, "step": 8816 }, { "epoch": 0.4089517625231911, "grad_norm": 8.250847816467285, "learning_rate": 6.500643211123519e-06, "loss": 0.408, "step": 8817 }, { "epoch": 0.4089981447124304, "grad_norm": 9.352019309997559, "learning_rate": 6.499941179957739e-06, "loss": 0.3755, "step": 8818 }, { "epoch": 0.40904452690166976, "grad_norm": 7.990998268127441, "learning_rate": 6.4992391162965255e-06, "loss": 0.4138, "step": 8819 }, { "epoch": 0.4090909090909091, "grad_norm": 14.790719032287598, "learning_rate": 6.498537020155085e-06, "loss": 0.5371, "step": 8820 }, { "epoch": 0.4091372912801484, "grad_norm": 4.49677848815918, "learning_rate": 6.497834891548632e-06, "loss": 0.2758, "step": 8821 }, { "epoch": 0.4091836734693878, "grad_norm": 8.514761924743652, "learning_rate": 6.497132730492375e-06, "loss": 0.3532, "step": 8822 }, { "epoch": 0.4092300556586271, "grad_norm": 6.97846794128418, "learning_rate": 6.496430537001529e-06, "loss": 0.3582, "step": 8823 }, { "epoch": 0.40927643784786644, "grad_norm": 4.729531288146973, "learning_rate": 6.495728311091303e-06, "loss": 0.2646, "step": 8824 }, { "epoch": 0.40932282003710574, "grad_norm": 6.346098899841309, "learning_rate": 6.495026052776912e-06, "loss": 0.3816, "step": 8825 }, { "epoch": 0.4093692022263451, "grad_norm": 11.080201148986816, "learning_rate": 6.494323762073571e-06, "loss": 0.4483, "step": 8826 }, { "epoch": 0.4094155844155844, "grad_norm": 10.275325775146484, "learning_rate": 6.493621438996494e-06, "loss": 0.4366, "step": 8827 }, { "epoch": 0.40946196660482376, "grad_norm": 6.414557933807373, "learning_rate": 6.492919083560894e-06, "loss": 0.3802, "step": 8828 }, { "epoch": 0.40950834879406306, "grad_norm": 8.306572914123535, "learning_rate": 6.492216695781992e-06, "loss": 0.3087, "step": 8829 }, { "epoch": 0.4095547309833024, "grad_norm": 5.16709566116333, "learning_rate": 6.491514275675001e-06, "loss": 0.3114, "step": 8830 }, { "epoch": 0.4096011131725417, "grad_norm": 4.587086200714111, "learning_rate": 6.490811823255142e-06, "loss": 0.2769, "step": 8831 }, { "epoch": 0.4096474953617811, "grad_norm": 5.569727420806885, "learning_rate": 6.490109338537629e-06, "loss": 0.3652, "step": 8832 }, { "epoch": 0.40969387755102044, "grad_norm": 4.451183319091797, "learning_rate": 6.489406821537684e-06, "loss": 0.2758, "step": 8833 }, { "epoch": 0.40974025974025974, "grad_norm": 9.391453742980957, "learning_rate": 6.488704272270526e-06, "loss": 0.4212, "step": 8834 }, { "epoch": 0.4097866419294991, "grad_norm": 7.792515754699707, "learning_rate": 6.4880016907513745e-06, "loss": 0.4082, "step": 8835 }, { "epoch": 0.4098330241187384, "grad_norm": 7.714338779449463, "learning_rate": 6.487299076995451e-06, "loss": 0.351, "step": 8836 }, { "epoch": 0.40987940630797776, "grad_norm": 8.697049140930176, "learning_rate": 6.4865964310179784e-06, "loss": 0.4333, "step": 8837 }, { "epoch": 0.40992578849721706, "grad_norm": 6.326126575469971, "learning_rate": 6.485893752834178e-06, "loss": 0.2855, "step": 8838 }, { "epoch": 0.4099721706864564, "grad_norm": 4.463895797729492, "learning_rate": 6.485191042459272e-06, "loss": 0.2664, "step": 8839 }, { "epoch": 0.4100185528756957, "grad_norm": 4.771304607391357, "learning_rate": 6.484488299908487e-06, "loss": 0.3755, "step": 8840 }, { "epoch": 0.4100649350649351, "grad_norm": 5.626181125640869, "learning_rate": 6.483785525197045e-06, "loss": 0.362, "step": 8841 }, { "epoch": 0.4101113172541744, "grad_norm": 4.507391452789307, "learning_rate": 6.4830827183401735e-06, "loss": 0.3072, "step": 8842 }, { "epoch": 0.41015769944341374, "grad_norm": 4.71872615814209, "learning_rate": 6.482379879353095e-06, "loss": 0.2979, "step": 8843 }, { "epoch": 0.41020408163265304, "grad_norm": 5.272751808166504, "learning_rate": 6.481677008251039e-06, "loss": 0.389, "step": 8844 }, { "epoch": 0.4102504638218924, "grad_norm": 6.036323547363281, "learning_rate": 6.480974105049234e-06, "loss": 0.3301, "step": 8845 }, { "epoch": 0.4102968460111317, "grad_norm": 7.298174858093262, "learning_rate": 6.480271169762904e-06, "loss": 0.3804, "step": 8846 }, { "epoch": 0.41034322820037106, "grad_norm": 5.5588178634643555, "learning_rate": 6.47956820240728e-06, "loss": 0.3485, "step": 8847 }, { "epoch": 0.4103896103896104, "grad_norm": 6.004258632659912, "learning_rate": 6.478865202997593e-06, "loss": 0.3255, "step": 8848 }, { "epoch": 0.4104359925788497, "grad_norm": 6.487975120544434, "learning_rate": 6.47816217154907e-06, "loss": 0.297, "step": 8849 }, { "epoch": 0.4104823747680891, "grad_norm": 7.899438858032227, "learning_rate": 6.477459108076943e-06, "loss": 0.3766, "step": 8850 }, { "epoch": 0.4105287569573284, "grad_norm": 3.8035058975219727, "learning_rate": 6.4767560125964455e-06, "loss": 0.342, "step": 8851 }, { "epoch": 0.41057513914656774, "grad_norm": 4.9306159019470215, "learning_rate": 6.476052885122805e-06, "loss": 0.2905, "step": 8852 }, { "epoch": 0.41062152133580704, "grad_norm": 5.549591541290283, "learning_rate": 6.47534972567126e-06, "loss": 0.3479, "step": 8853 }, { "epoch": 0.4106679035250464, "grad_norm": 5.186526775360107, "learning_rate": 6.474646534257041e-06, "loss": 0.2864, "step": 8854 }, { "epoch": 0.4107142857142857, "grad_norm": 9.701956748962402, "learning_rate": 6.4739433108953805e-06, "loss": 0.4423, "step": 8855 }, { "epoch": 0.41076066790352506, "grad_norm": 9.650307655334473, "learning_rate": 6.473240055601517e-06, "loss": 0.4247, "step": 8856 }, { "epoch": 0.41080705009276436, "grad_norm": 12.853556632995605, "learning_rate": 6.4725367683906845e-06, "loss": 0.3839, "step": 8857 }, { "epoch": 0.4108534322820037, "grad_norm": 4.981399059295654, "learning_rate": 6.47183344927812e-06, "loss": 0.1973, "step": 8858 }, { "epoch": 0.410899814471243, "grad_norm": 8.981074333190918, "learning_rate": 6.47113009827906e-06, "loss": 0.4556, "step": 8859 }, { "epoch": 0.4109461966604824, "grad_norm": 5.540948867797852, "learning_rate": 6.470426715408742e-06, "loss": 0.3371, "step": 8860 }, { "epoch": 0.4109925788497217, "grad_norm": 5.869764804840088, "learning_rate": 6.469723300682406e-06, "loss": 0.3317, "step": 8861 }, { "epoch": 0.41103896103896104, "grad_norm": 9.940411567687988, "learning_rate": 6.4690198541152884e-06, "loss": 0.3962, "step": 8862 }, { "epoch": 0.4110853432282004, "grad_norm": 7.023294448852539, "learning_rate": 6.468316375722631e-06, "loss": 0.2985, "step": 8863 }, { "epoch": 0.4111317254174397, "grad_norm": 5.12414026260376, "learning_rate": 6.467612865519674e-06, "loss": 0.334, "step": 8864 }, { "epoch": 0.41117810760667906, "grad_norm": 7.476726531982422, "learning_rate": 6.466909323521659e-06, "loss": 0.3371, "step": 8865 }, { "epoch": 0.41122448979591836, "grad_norm": 7.121767520904541, "learning_rate": 6.466205749743826e-06, "loss": 0.2927, "step": 8866 }, { "epoch": 0.4112708719851577, "grad_norm": 8.1069974899292, "learning_rate": 6.465502144201419e-06, "loss": 0.3684, "step": 8867 }, { "epoch": 0.411317254174397, "grad_norm": 7.915123462677002, "learning_rate": 6.464798506909681e-06, "loss": 0.3048, "step": 8868 }, { "epoch": 0.4113636363636364, "grad_norm": 5.36940336227417, "learning_rate": 6.464094837883856e-06, "loss": 0.3708, "step": 8869 }, { "epoch": 0.4114100185528757, "grad_norm": 5.38258171081543, "learning_rate": 6.463391137139189e-06, "loss": 0.3118, "step": 8870 }, { "epoch": 0.41145640074211504, "grad_norm": 10.674969673156738, "learning_rate": 6.4626874046909235e-06, "loss": 0.2664, "step": 8871 }, { "epoch": 0.41150278293135434, "grad_norm": 10.48469352722168, "learning_rate": 6.46198364055431e-06, "loss": 0.3295, "step": 8872 }, { "epoch": 0.4115491651205937, "grad_norm": 4.573629856109619, "learning_rate": 6.461279844744589e-06, "loss": 0.3574, "step": 8873 }, { "epoch": 0.411595547309833, "grad_norm": 8.608427047729492, "learning_rate": 6.460576017277011e-06, "loss": 0.4014, "step": 8874 }, { "epoch": 0.41164192949907236, "grad_norm": 10.513482093811035, "learning_rate": 6.459872158166826e-06, "loss": 0.5082, "step": 8875 }, { "epoch": 0.4116883116883117, "grad_norm": 6.0285444259643555, "learning_rate": 6.459168267429279e-06, "loss": 0.3453, "step": 8876 }, { "epoch": 0.411734693877551, "grad_norm": 6.838096618652344, "learning_rate": 6.458464345079623e-06, "loss": 0.3884, "step": 8877 }, { "epoch": 0.4117810760667904, "grad_norm": 8.517426490783691, "learning_rate": 6.457760391133105e-06, "loss": 0.3823, "step": 8878 }, { "epoch": 0.4118274582560297, "grad_norm": 4.944194316864014, "learning_rate": 6.457056405604975e-06, "loss": 0.3875, "step": 8879 }, { "epoch": 0.41187384044526903, "grad_norm": 6.325939178466797, "learning_rate": 6.456352388510489e-06, "loss": 0.2337, "step": 8880 }, { "epoch": 0.41192022263450834, "grad_norm": 6.292982578277588, "learning_rate": 6.455648339864896e-06, "loss": 0.2972, "step": 8881 }, { "epoch": 0.4119666048237477, "grad_norm": 9.217416763305664, "learning_rate": 6.454944259683451e-06, "loss": 0.3359, "step": 8882 }, { "epoch": 0.412012987012987, "grad_norm": 12.60333251953125, "learning_rate": 6.454240147981403e-06, "loss": 0.4316, "step": 8883 }, { "epoch": 0.41205936920222636, "grad_norm": 3.9568774700164795, "learning_rate": 6.453536004774012e-06, "loss": 0.3466, "step": 8884 }, { "epoch": 0.41210575139146566, "grad_norm": 6.0755767822265625, "learning_rate": 6.452831830076529e-06, "loss": 0.3033, "step": 8885 }, { "epoch": 0.412152133580705, "grad_norm": 6.606775283813477, "learning_rate": 6.45212762390421e-06, "loss": 0.2478, "step": 8886 }, { "epoch": 0.4121985157699443, "grad_norm": 12.678083419799805, "learning_rate": 6.451423386272312e-06, "loss": 0.3119, "step": 8887 }, { "epoch": 0.4122448979591837, "grad_norm": 9.98095989227295, "learning_rate": 6.450719117196094e-06, "loss": 0.4194, "step": 8888 }, { "epoch": 0.412291280148423, "grad_norm": 10.911978721618652, "learning_rate": 6.450014816690808e-06, "loss": 0.4936, "step": 8889 }, { "epoch": 0.41233766233766234, "grad_norm": 9.656254768371582, "learning_rate": 6.44931048477172e-06, "loss": 0.3381, "step": 8890 }, { "epoch": 0.4123840445269017, "grad_norm": 5.802029132843018, "learning_rate": 6.448606121454082e-06, "loss": 0.2982, "step": 8891 }, { "epoch": 0.412430426716141, "grad_norm": 6.437610149383545, "learning_rate": 6.4479017267531565e-06, "loss": 0.3713, "step": 8892 }, { "epoch": 0.41247680890538035, "grad_norm": 17.4952392578125, "learning_rate": 6.447197300684203e-06, "loss": 0.4805, "step": 8893 }, { "epoch": 0.41252319109461966, "grad_norm": 15.077655792236328, "learning_rate": 6.446492843262485e-06, "loss": 0.3423, "step": 8894 }, { "epoch": 0.412569573283859, "grad_norm": 7.87810754776001, "learning_rate": 6.445788354503263e-06, "loss": 0.4088, "step": 8895 }, { "epoch": 0.4126159554730983, "grad_norm": 8.481367111206055, "learning_rate": 6.445083834421797e-06, "loss": 0.431, "step": 8896 }, { "epoch": 0.4126623376623377, "grad_norm": 8.984746932983398, "learning_rate": 6.444379283033353e-06, "loss": 0.4728, "step": 8897 }, { "epoch": 0.412708719851577, "grad_norm": 6.787219047546387, "learning_rate": 6.443674700353194e-06, "loss": 0.3307, "step": 8898 }, { "epoch": 0.41275510204081634, "grad_norm": 8.829132080078125, "learning_rate": 6.442970086396583e-06, "loss": 0.384, "step": 8899 }, { "epoch": 0.41280148423005564, "grad_norm": 9.110902786254883, "learning_rate": 6.442265441178788e-06, "loss": 0.3139, "step": 8900 }, { "epoch": 0.412847866419295, "grad_norm": 9.634923934936523, "learning_rate": 6.441560764715071e-06, "loss": 0.468, "step": 8901 }, { "epoch": 0.4128942486085343, "grad_norm": 5.861632823944092, "learning_rate": 6.4408560570207e-06, "loss": 0.2149, "step": 8902 }, { "epoch": 0.41294063079777366, "grad_norm": 5.640119552612305, "learning_rate": 6.440151318110944e-06, "loss": 0.3518, "step": 8903 }, { "epoch": 0.412987012987013, "grad_norm": 9.266770362854004, "learning_rate": 6.439446548001069e-06, "loss": 0.403, "step": 8904 }, { "epoch": 0.4130333951762523, "grad_norm": 10.603333473205566, "learning_rate": 6.438741746706344e-06, "loss": 0.352, "step": 8905 }, { "epoch": 0.4130797773654917, "grad_norm": 6.9427289962768555, "learning_rate": 6.438036914242037e-06, "loss": 0.3335, "step": 8906 }, { "epoch": 0.413126159554731, "grad_norm": 8.085525512695312, "learning_rate": 6.437332050623419e-06, "loss": 0.4689, "step": 8907 }, { "epoch": 0.41317254174397033, "grad_norm": 4.760626316070557, "learning_rate": 6.4366271558657615e-06, "loss": 0.3761, "step": 8908 }, { "epoch": 0.41321892393320964, "grad_norm": 7.569082260131836, "learning_rate": 6.4359222299843325e-06, "loss": 0.456, "step": 8909 }, { "epoch": 0.413265306122449, "grad_norm": 7.3989410400390625, "learning_rate": 6.435217272994406e-06, "loss": 0.3356, "step": 8910 }, { "epoch": 0.4133116883116883, "grad_norm": 7.628670692443848, "learning_rate": 6.434512284911256e-06, "loss": 0.3169, "step": 8911 }, { "epoch": 0.41335807050092765, "grad_norm": 8.95783805847168, "learning_rate": 6.433807265750152e-06, "loss": 0.4303, "step": 8912 }, { "epoch": 0.41340445269016696, "grad_norm": 5.075624465942383, "learning_rate": 6.433102215526372e-06, "loss": 0.2636, "step": 8913 }, { "epoch": 0.4134508348794063, "grad_norm": 7.6933913230896, "learning_rate": 6.432397134255186e-06, "loss": 0.2952, "step": 8914 }, { "epoch": 0.4134972170686456, "grad_norm": 7.897127628326416, "learning_rate": 6.431692021951873e-06, "loss": 0.3537, "step": 8915 }, { "epoch": 0.413543599257885, "grad_norm": 5.1692609786987305, "learning_rate": 6.430986878631708e-06, "loss": 0.351, "step": 8916 }, { "epoch": 0.4135899814471243, "grad_norm": 5.31810998916626, "learning_rate": 6.430281704309966e-06, "loss": 0.3669, "step": 8917 }, { "epoch": 0.41363636363636364, "grad_norm": 6.854677200317383, "learning_rate": 6.429576499001925e-06, "loss": 0.3872, "step": 8918 }, { "epoch": 0.413682745825603, "grad_norm": 10.305624008178711, "learning_rate": 6.428871262722864e-06, "loss": 0.3158, "step": 8919 }, { "epoch": 0.4137291280148423, "grad_norm": 4.806748867034912, "learning_rate": 6.4281659954880605e-06, "loss": 0.2906, "step": 8920 }, { "epoch": 0.41377551020408165, "grad_norm": 9.29365062713623, "learning_rate": 6.427460697312793e-06, "loss": 0.4346, "step": 8921 }, { "epoch": 0.41382189239332096, "grad_norm": 6.106595039367676, "learning_rate": 6.426755368212344e-06, "loss": 0.3999, "step": 8922 }, { "epoch": 0.4138682745825603, "grad_norm": 11.678009986877441, "learning_rate": 6.426050008201992e-06, "loss": 0.6062, "step": 8923 }, { "epoch": 0.4139146567717996, "grad_norm": 5.433447360992432, "learning_rate": 6.425344617297019e-06, "loss": 0.3425, "step": 8924 }, { "epoch": 0.413961038961039, "grad_norm": 13.437582969665527, "learning_rate": 6.424639195512705e-06, "loss": 0.4808, "step": 8925 }, { "epoch": 0.4140074211502783, "grad_norm": 6.9131550788879395, "learning_rate": 6.4239337428643355e-06, "loss": 0.3387, "step": 8926 }, { "epoch": 0.41405380333951763, "grad_norm": 10.103343963623047, "learning_rate": 6.423228259367194e-06, "loss": 0.2765, "step": 8927 }, { "epoch": 0.41410018552875694, "grad_norm": 6.683265686035156, "learning_rate": 6.42252274503656e-06, "loss": 0.3096, "step": 8928 }, { "epoch": 0.4141465677179963, "grad_norm": 12.783940315246582, "learning_rate": 6.421817199887722e-06, "loss": 0.4252, "step": 8929 }, { "epoch": 0.4141929499072356, "grad_norm": 8.266383171081543, "learning_rate": 6.421111623935966e-06, "loss": 0.4601, "step": 8930 }, { "epoch": 0.41423933209647495, "grad_norm": 11.66601848602295, "learning_rate": 6.420406017196574e-06, "loss": 0.4037, "step": 8931 }, { "epoch": 0.4142857142857143, "grad_norm": 9.506656646728516, "learning_rate": 6.419700379684836e-06, "loss": 0.3058, "step": 8932 }, { "epoch": 0.4143320964749536, "grad_norm": 7.252905368804932, "learning_rate": 6.418994711416038e-06, "loss": 0.3658, "step": 8933 }, { "epoch": 0.414378478664193, "grad_norm": 16.036819458007812, "learning_rate": 6.4182890124054675e-06, "loss": 0.4663, "step": 8934 }, { "epoch": 0.4144248608534323, "grad_norm": 7.504977703094482, "learning_rate": 6.417583282668415e-06, "loss": 0.305, "step": 8935 }, { "epoch": 0.41447124304267163, "grad_norm": 7.615923881530762, "learning_rate": 6.416877522220167e-06, "loss": 0.3158, "step": 8936 }, { "epoch": 0.41451762523191094, "grad_norm": 11.311210632324219, "learning_rate": 6.416171731076014e-06, "loss": 0.3366, "step": 8937 }, { "epoch": 0.4145640074211503, "grad_norm": 5.27177095413208, "learning_rate": 6.415465909251249e-06, "loss": 0.3036, "step": 8938 }, { "epoch": 0.4146103896103896, "grad_norm": 4.69295597076416, "learning_rate": 6.4147600567611615e-06, "loss": 0.3135, "step": 8939 }, { "epoch": 0.41465677179962895, "grad_norm": 6.3606276512146, "learning_rate": 6.414054173621043e-06, "loss": 0.3324, "step": 8940 }, { "epoch": 0.41470315398886826, "grad_norm": 4.716183662414551, "learning_rate": 6.4133482598461875e-06, "loss": 0.2763, "step": 8941 }, { "epoch": 0.4147495361781076, "grad_norm": 8.752275466918945, "learning_rate": 6.412642315451887e-06, "loss": 0.4466, "step": 8942 }, { "epoch": 0.4147959183673469, "grad_norm": 5.612915992736816, "learning_rate": 6.411936340453435e-06, "loss": 0.262, "step": 8943 }, { "epoch": 0.4148423005565863, "grad_norm": 5.102349758148193, "learning_rate": 6.4112303348661274e-06, "loss": 0.299, "step": 8944 }, { "epoch": 0.4148886827458256, "grad_norm": 8.315260887145996, "learning_rate": 6.41052429870526e-06, "loss": 0.4387, "step": 8945 }, { "epoch": 0.41493506493506493, "grad_norm": 6.7214460372924805, "learning_rate": 6.4098182319861265e-06, "loss": 0.2975, "step": 8946 }, { "epoch": 0.4149814471243043, "grad_norm": 17.0899658203125, "learning_rate": 6.409112134724025e-06, "loss": 0.5304, "step": 8947 }, { "epoch": 0.4150278293135436, "grad_norm": 5.709596157073975, "learning_rate": 6.408406006934251e-06, "loss": 0.2987, "step": 8948 }, { "epoch": 0.41507421150278295, "grad_norm": 8.569832801818848, "learning_rate": 6.407699848632106e-06, "loss": 0.3854, "step": 8949 }, { "epoch": 0.41512059369202226, "grad_norm": 6.505866527557373, "learning_rate": 6.406993659832886e-06, "loss": 0.2907, "step": 8950 }, { "epoch": 0.4151669758812616, "grad_norm": 20.269514083862305, "learning_rate": 6.406287440551888e-06, "loss": 0.5901, "step": 8951 }, { "epoch": 0.4152133580705009, "grad_norm": 6.646124839782715, "learning_rate": 6.405581190804418e-06, "loss": 0.3899, "step": 8952 }, { "epoch": 0.4152597402597403, "grad_norm": 4.911120891571045, "learning_rate": 6.404874910605769e-06, "loss": 0.3226, "step": 8953 }, { "epoch": 0.4153061224489796, "grad_norm": 6.66895055770874, "learning_rate": 6.404168599971249e-06, "loss": 0.2929, "step": 8954 }, { "epoch": 0.41535250463821893, "grad_norm": 19.065675735473633, "learning_rate": 6.403462258916156e-06, "loss": 0.3429, "step": 8955 }, { "epoch": 0.41539888682745824, "grad_norm": 9.861812591552734, "learning_rate": 6.402755887455792e-06, "loss": 0.4108, "step": 8956 }, { "epoch": 0.4154452690166976, "grad_norm": 4.693557262420654, "learning_rate": 6.402049485605464e-06, "loss": 0.2994, "step": 8957 }, { "epoch": 0.4154916512059369, "grad_norm": 8.122928619384766, "learning_rate": 6.401343053380472e-06, "loss": 0.3654, "step": 8958 }, { "epoch": 0.41553803339517625, "grad_norm": 7.584587574005127, "learning_rate": 6.400636590796121e-06, "loss": 0.4822, "step": 8959 }, { "epoch": 0.4155844155844156, "grad_norm": 7.780361652374268, "learning_rate": 6.399930097867719e-06, "loss": 0.3601, "step": 8960 }, { "epoch": 0.4156307977736549, "grad_norm": 6.803781032562256, "learning_rate": 6.399223574610569e-06, "loss": 0.3687, "step": 8961 }, { "epoch": 0.41567717996289427, "grad_norm": 7.9498186111450195, "learning_rate": 6.398517021039977e-06, "loss": 0.3631, "step": 8962 }, { "epoch": 0.4157235621521336, "grad_norm": 6.811389923095703, "learning_rate": 6.3978104371712536e-06, "loss": 0.36, "step": 8963 }, { "epoch": 0.41576994434137293, "grad_norm": 3.7379636764526367, "learning_rate": 6.397103823019704e-06, "loss": 0.3359, "step": 8964 }, { "epoch": 0.41581632653061223, "grad_norm": 6.983560562133789, "learning_rate": 6.396397178600636e-06, "loss": 0.3896, "step": 8965 }, { "epoch": 0.4158627087198516, "grad_norm": 7.3667073249816895, "learning_rate": 6.395690503929359e-06, "loss": 0.3923, "step": 8966 }, { "epoch": 0.4159090909090909, "grad_norm": 5.563296318054199, "learning_rate": 6.394983799021185e-06, "loss": 0.335, "step": 8967 }, { "epoch": 0.41595547309833025, "grad_norm": 6.854033946990967, "learning_rate": 6.394277063891422e-06, "loss": 0.3607, "step": 8968 }, { "epoch": 0.41600185528756956, "grad_norm": 4.4043169021606445, "learning_rate": 6.393570298555381e-06, "loss": 0.3216, "step": 8969 }, { "epoch": 0.4160482374768089, "grad_norm": 11.964680671691895, "learning_rate": 6.392863503028376e-06, "loss": 0.3617, "step": 8970 }, { "epoch": 0.4160946196660482, "grad_norm": 4.428523063659668, "learning_rate": 6.392156677325718e-06, "loss": 0.3708, "step": 8971 }, { "epoch": 0.4161410018552876, "grad_norm": 10.670378684997559, "learning_rate": 6.3914498214627185e-06, "loss": 0.5073, "step": 8972 }, { "epoch": 0.4161873840445269, "grad_norm": 4.896419525146484, "learning_rate": 6.3907429354546924e-06, "loss": 0.3689, "step": 8973 }, { "epoch": 0.41623376623376623, "grad_norm": 9.481851577758789, "learning_rate": 6.3900360193169565e-06, "loss": 0.2922, "step": 8974 }, { "epoch": 0.4162801484230056, "grad_norm": 6.97524881362915, "learning_rate": 6.389329073064821e-06, "loss": 0.2676, "step": 8975 }, { "epoch": 0.4163265306122449, "grad_norm": 6.220829010009766, "learning_rate": 6.388622096713606e-06, "loss": 0.2622, "step": 8976 }, { "epoch": 0.41637291280148425, "grad_norm": 5.174604892730713, "learning_rate": 6.387915090278624e-06, "loss": 0.3658, "step": 8977 }, { "epoch": 0.41641929499072355, "grad_norm": 9.502763748168945, "learning_rate": 6.3872080537751934e-06, "loss": 0.3052, "step": 8978 }, { "epoch": 0.4164656771799629, "grad_norm": 12.677793502807617, "learning_rate": 6.386500987218634e-06, "loss": 0.4245, "step": 8979 }, { "epoch": 0.4165120593692022, "grad_norm": 6.056232452392578, "learning_rate": 6.38579389062426e-06, "loss": 0.348, "step": 8980 }, { "epoch": 0.4165584415584416, "grad_norm": 7.569056510925293, "learning_rate": 6.385086764007392e-06, "loss": 0.3689, "step": 8981 }, { "epoch": 0.4166048237476809, "grad_norm": 6.628491401672363, "learning_rate": 6.384379607383352e-06, "loss": 0.246, "step": 8982 }, { "epoch": 0.41665120593692023, "grad_norm": 4.512234210968018, "learning_rate": 6.383672420767456e-06, "loss": 0.278, "step": 8983 }, { "epoch": 0.41669758812615953, "grad_norm": 7.099079132080078, "learning_rate": 6.382965204175027e-06, "loss": 0.4482, "step": 8984 }, { "epoch": 0.4167439703153989, "grad_norm": 8.274431228637695, "learning_rate": 6.382257957621388e-06, "loss": 0.3494, "step": 8985 }, { "epoch": 0.4167903525046382, "grad_norm": 8.056544303894043, "learning_rate": 6.381550681121858e-06, "loss": 0.2406, "step": 8986 }, { "epoch": 0.41683673469387755, "grad_norm": 6.962970733642578, "learning_rate": 6.380843374691762e-06, "loss": 0.2691, "step": 8987 }, { "epoch": 0.41688311688311686, "grad_norm": 8.134103775024414, "learning_rate": 6.380136038346422e-06, "loss": 0.3291, "step": 8988 }, { "epoch": 0.4169294990723562, "grad_norm": 6.436074256896973, "learning_rate": 6.379428672101162e-06, "loss": 0.4184, "step": 8989 }, { "epoch": 0.41697588126159557, "grad_norm": 8.34714126586914, "learning_rate": 6.378721275971309e-06, "loss": 0.4794, "step": 8990 }, { "epoch": 0.4170222634508349, "grad_norm": 20.151090621948242, "learning_rate": 6.378013849972186e-06, "loss": 0.3216, "step": 8991 }, { "epoch": 0.41706864564007423, "grad_norm": 7.539283275604248, "learning_rate": 6.377306394119118e-06, "loss": 0.2388, "step": 8992 }, { "epoch": 0.41711502782931353, "grad_norm": 6.138847827911377, "learning_rate": 6.376598908427436e-06, "loss": 0.3083, "step": 8993 }, { "epoch": 0.4171614100185529, "grad_norm": 9.79458999633789, "learning_rate": 6.375891392912464e-06, "loss": 0.4774, "step": 8994 }, { "epoch": 0.4172077922077922, "grad_norm": 8.130257606506348, "learning_rate": 6.3751838475895315e-06, "loss": 0.3878, "step": 8995 }, { "epoch": 0.41725417439703155, "grad_norm": 12.471360206604004, "learning_rate": 6.374476272473964e-06, "loss": 0.6645, "step": 8996 }, { "epoch": 0.41730055658627085, "grad_norm": 9.001755714416504, "learning_rate": 6.373768667581095e-06, "loss": 0.4242, "step": 8997 }, { "epoch": 0.4173469387755102, "grad_norm": 6.248039722442627, "learning_rate": 6.373061032926252e-06, "loss": 0.3165, "step": 8998 }, { "epoch": 0.4173933209647495, "grad_norm": 4.7372260093688965, "learning_rate": 6.372353368524765e-06, "loss": 0.3139, "step": 8999 }, { "epoch": 0.4174397031539889, "grad_norm": 6.8779296875, "learning_rate": 6.371645674391967e-06, "loss": 0.3929, "step": 9000 }, { "epoch": 0.4174860853432282, "grad_norm": 6.727295398712158, "learning_rate": 6.370937950543189e-06, "loss": 0.2955, "step": 9001 }, { "epoch": 0.41753246753246753, "grad_norm": 4.474544525146484, "learning_rate": 6.370230196993763e-06, "loss": 0.2381, "step": 9002 }, { "epoch": 0.4175788497217069, "grad_norm": 10.911761283874512, "learning_rate": 6.369522413759022e-06, "loss": 0.4324, "step": 9003 }, { "epoch": 0.4176252319109462, "grad_norm": 4.759251117706299, "learning_rate": 6.368814600854302e-06, "loss": 0.3403, "step": 9004 }, { "epoch": 0.41767161410018555, "grad_norm": 7.17813777923584, "learning_rate": 6.368106758294934e-06, "loss": 0.3401, "step": 9005 }, { "epoch": 0.41771799628942485, "grad_norm": 9.892252922058105, "learning_rate": 6.367398886096256e-06, "loss": 0.3592, "step": 9006 }, { "epoch": 0.4177643784786642, "grad_norm": 7.14901876449585, "learning_rate": 6.3666909842736e-06, "loss": 0.3551, "step": 9007 }, { "epoch": 0.4178107606679035, "grad_norm": 4.887308597564697, "learning_rate": 6.365983052842306e-06, "loss": 0.2911, "step": 9008 }, { "epoch": 0.41785714285714287, "grad_norm": 4.408274173736572, "learning_rate": 6.365275091817711e-06, "loss": 0.2562, "step": 9009 }, { "epoch": 0.4179035250463822, "grad_norm": 10.031405448913574, "learning_rate": 6.36456710121515e-06, "loss": 0.4199, "step": 9010 }, { "epoch": 0.41794990723562153, "grad_norm": 4.93776273727417, "learning_rate": 6.363859081049961e-06, "loss": 0.3429, "step": 9011 }, { "epoch": 0.41799628942486083, "grad_norm": 5.270020008087158, "learning_rate": 6.363151031337488e-06, "loss": 0.3982, "step": 9012 }, { "epoch": 0.4180426716141002, "grad_norm": 3.5101613998413086, "learning_rate": 6.362442952093065e-06, "loss": 0.2235, "step": 9013 }, { "epoch": 0.4180890538033395, "grad_norm": 10.345179557800293, "learning_rate": 6.3617348433320326e-06, "loss": 0.4659, "step": 9014 }, { "epoch": 0.41813543599257885, "grad_norm": 6.929079055786133, "learning_rate": 6.361026705069735e-06, "loss": 0.343, "step": 9015 }, { "epoch": 0.41818181818181815, "grad_norm": 4.906991004943848, "learning_rate": 6.3603185373215105e-06, "loss": 0.2453, "step": 9016 }, { "epoch": 0.4182282003710575, "grad_norm": 9.330899238586426, "learning_rate": 6.359610340102704e-06, "loss": 0.3399, "step": 9017 }, { "epoch": 0.41827458256029687, "grad_norm": 4.796443939208984, "learning_rate": 6.358902113428655e-06, "loss": 0.3325, "step": 9018 }, { "epoch": 0.4183209647495362, "grad_norm": 4.198294639587402, "learning_rate": 6.358193857314709e-06, "loss": 0.2961, "step": 9019 }, { "epoch": 0.41836734693877553, "grad_norm": 11.18105411529541, "learning_rate": 6.35748557177621e-06, "loss": 0.5506, "step": 9020 }, { "epoch": 0.41841372912801483, "grad_norm": 3.885270357131958, "learning_rate": 6.356777256828501e-06, "loss": 0.3355, "step": 9021 }, { "epoch": 0.4184601113172542, "grad_norm": 8.842208862304688, "learning_rate": 6.356068912486928e-06, "loss": 0.332, "step": 9022 }, { "epoch": 0.4185064935064935, "grad_norm": 4.744085311889648, "learning_rate": 6.355360538766839e-06, "loss": 0.2865, "step": 9023 }, { "epoch": 0.41855287569573285, "grad_norm": 6.205535411834717, "learning_rate": 6.354652135683578e-06, "loss": 0.3148, "step": 9024 }, { "epoch": 0.41859925788497215, "grad_norm": 9.930604934692383, "learning_rate": 6.353943703252493e-06, "loss": 0.3722, "step": 9025 }, { "epoch": 0.4186456400742115, "grad_norm": 4.825763702392578, "learning_rate": 6.353235241488932e-06, "loss": 0.3127, "step": 9026 }, { "epoch": 0.4186920222634508, "grad_norm": 5.2697882652282715, "learning_rate": 6.352526750408242e-06, "loss": 0.353, "step": 9027 }, { "epoch": 0.41873840445269017, "grad_norm": 7.469344615936279, "learning_rate": 6.3518182300257745e-06, "loss": 0.3978, "step": 9028 }, { "epoch": 0.4187847866419295, "grad_norm": 4.026252746582031, "learning_rate": 6.351109680356878e-06, "loss": 0.2919, "step": 9029 }, { "epoch": 0.41883116883116883, "grad_norm": 9.794647216796875, "learning_rate": 6.350401101416904e-06, "loss": 0.3469, "step": 9030 }, { "epoch": 0.4188775510204082, "grad_norm": 4.691744327545166, "learning_rate": 6.3496924932212e-06, "loss": 0.3613, "step": 9031 }, { "epoch": 0.4189239332096475, "grad_norm": 5.315713405609131, "learning_rate": 6.348983855785122e-06, "loss": 0.3368, "step": 9032 }, { "epoch": 0.41897031539888685, "grad_norm": 13.038318634033203, "learning_rate": 6.34827518912402e-06, "loss": 0.3918, "step": 9033 }, { "epoch": 0.41901669758812615, "grad_norm": 5.638134479522705, "learning_rate": 6.347566493253245e-06, "loss": 0.2347, "step": 9034 }, { "epoch": 0.4190630797773655, "grad_norm": 7.315237045288086, "learning_rate": 6.346857768188156e-06, "loss": 0.3742, "step": 9035 }, { "epoch": 0.4191094619666048, "grad_norm": 10.271062850952148, "learning_rate": 6.346149013944102e-06, "loss": 0.326, "step": 9036 }, { "epoch": 0.41915584415584417, "grad_norm": 7.726357936859131, "learning_rate": 6.345440230536439e-06, "loss": 0.2723, "step": 9037 }, { "epoch": 0.4192022263450835, "grad_norm": 7.362651348114014, "learning_rate": 6.344731417980526e-06, "loss": 0.4029, "step": 9038 }, { "epoch": 0.41924860853432283, "grad_norm": 5.592629432678223, "learning_rate": 6.3440225762917125e-06, "loss": 0.2808, "step": 9039 }, { "epoch": 0.41929499072356213, "grad_norm": 12.634882926940918, "learning_rate": 6.343313705485361e-06, "loss": 0.384, "step": 9040 }, { "epoch": 0.4193413729128015, "grad_norm": 5.5565972328186035, "learning_rate": 6.342604805576826e-06, "loss": 0.3825, "step": 9041 }, { "epoch": 0.4193877551020408, "grad_norm": 7.955803394317627, "learning_rate": 6.341895876581465e-06, "loss": 0.2615, "step": 9042 }, { "epoch": 0.41943413729128015, "grad_norm": 8.077075004577637, "learning_rate": 6.341186918514639e-06, "loss": 0.3268, "step": 9043 }, { "epoch": 0.41948051948051945, "grad_norm": 8.849838256835938, "learning_rate": 6.3404779313917055e-06, "loss": 0.4949, "step": 9044 }, { "epoch": 0.4195269016697588, "grad_norm": 8.567267417907715, "learning_rate": 6.339768915228025e-06, "loss": 0.393, "step": 9045 }, { "epoch": 0.41957328385899817, "grad_norm": 8.690492630004883, "learning_rate": 6.339059870038957e-06, "loss": 0.4183, "step": 9046 }, { "epoch": 0.41961966604823747, "grad_norm": 14.872413635253906, "learning_rate": 6.3383507958398625e-06, "loss": 0.5126, "step": 9047 }, { "epoch": 0.41966604823747683, "grad_norm": 9.674386978149414, "learning_rate": 6.337641692646106e-06, "loss": 0.372, "step": 9048 }, { "epoch": 0.41971243042671613, "grad_norm": 9.365259170532227, "learning_rate": 6.336932560473046e-06, "loss": 0.3471, "step": 9049 }, { "epoch": 0.4197588126159555, "grad_norm": 11.6399564743042, "learning_rate": 6.336223399336048e-06, "loss": 0.3459, "step": 9050 }, { "epoch": 0.4198051948051948, "grad_norm": 7.161934852600098, "learning_rate": 6.335514209250474e-06, "loss": 0.3181, "step": 9051 }, { "epoch": 0.41985157699443415, "grad_norm": 11.449100494384766, "learning_rate": 6.3348049902316885e-06, "loss": 0.427, "step": 9052 }, { "epoch": 0.41989795918367345, "grad_norm": 10.107257843017578, "learning_rate": 6.334095742295059e-06, "loss": 0.2582, "step": 9053 }, { "epoch": 0.4199443413729128, "grad_norm": 6.623852729797363, "learning_rate": 6.333386465455948e-06, "loss": 0.3593, "step": 9054 }, { "epoch": 0.4199907235621521, "grad_norm": 5.912932872772217, "learning_rate": 6.3326771597297225e-06, "loss": 0.2808, "step": 9055 }, { "epoch": 0.42003710575139147, "grad_norm": 6.552662372589111, "learning_rate": 6.331967825131749e-06, "loss": 0.347, "step": 9056 }, { "epoch": 0.4200834879406308, "grad_norm": 3.961583375930786, "learning_rate": 6.331258461677395e-06, "loss": 0.345, "step": 9057 }, { "epoch": 0.42012987012987013, "grad_norm": 7.807947635650635, "learning_rate": 6.33054906938203e-06, "loss": 0.3382, "step": 9058 }, { "epoch": 0.4201762523191095, "grad_norm": 5.79533052444458, "learning_rate": 6.329839648261021e-06, "loss": 0.3023, "step": 9059 }, { "epoch": 0.4202226345083488, "grad_norm": 7.517873287200928, "learning_rate": 6.329130198329735e-06, "loss": 0.4275, "step": 9060 }, { "epoch": 0.42026901669758815, "grad_norm": 7.372195720672607, "learning_rate": 6.328420719603546e-06, "loss": 0.23, "step": 9061 }, { "epoch": 0.42031539888682745, "grad_norm": 12.337494850158691, "learning_rate": 6.327711212097822e-06, "loss": 0.3828, "step": 9062 }, { "epoch": 0.4203617810760668, "grad_norm": 5.6488823890686035, "learning_rate": 6.327001675827935e-06, "loss": 0.3685, "step": 9063 }, { "epoch": 0.4204081632653061, "grad_norm": 12.237394332885742, "learning_rate": 6.326292110809258e-06, "loss": 0.5215, "step": 9064 }, { "epoch": 0.42045454545454547, "grad_norm": 12.894755363464355, "learning_rate": 6.32558251705716e-06, "loss": 0.3527, "step": 9065 }, { "epoch": 0.42050092764378477, "grad_norm": 8.242292404174805, "learning_rate": 6.324872894587017e-06, "loss": 0.4285, "step": 9066 }, { "epoch": 0.42054730983302413, "grad_norm": 10.290838241577148, "learning_rate": 6.3241632434142035e-06, "loss": 0.4284, "step": 9067 }, { "epoch": 0.42059369202226343, "grad_norm": 8.182149887084961, "learning_rate": 6.323453563554089e-06, "loss": 0.3301, "step": 9068 }, { "epoch": 0.4206400742115028, "grad_norm": 8.70626449584961, "learning_rate": 6.3227438550220525e-06, "loss": 0.3548, "step": 9069 }, { "epoch": 0.4206864564007421, "grad_norm": 8.808038711547852, "learning_rate": 6.322034117833467e-06, "loss": 0.531, "step": 9070 }, { "epoch": 0.42073283858998145, "grad_norm": 10.007373809814453, "learning_rate": 6.32132435200371e-06, "loss": 0.3923, "step": 9071 }, { "epoch": 0.42077922077922075, "grad_norm": 6.8415727615356445, "learning_rate": 6.3206145575481585e-06, "loss": 0.3035, "step": 9072 }, { "epoch": 0.4208256029684601, "grad_norm": 11.618107795715332, "learning_rate": 6.319904734482189e-06, "loss": 0.4006, "step": 9073 }, { "epoch": 0.42087198515769947, "grad_norm": 7.6409759521484375, "learning_rate": 6.319194882821178e-06, "loss": 0.2749, "step": 9074 }, { "epoch": 0.42091836734693877, "grad_norm": 9.982111930847168, "learning_rate": 6.318485002580507e-06, "loss": 0.4354, "step": 9075 }, { "epoch": 0.42096474953617813, "grad_norm": 8.752305030822754, "learning_rate": 6.317775093775554e-06, "loss": 0.4243, "step": 9076 }, { "epoch": 0.42101113172541743, "grad_norm": 5.8921966552734375, "learning_rate": 6.317065156421699e-06, "loss": 0.2855, "step": 9077 }, { "epoch": 0.4210575139146568, "grad_norm": 7.191498756408691, "learning_rate": 6.316355190534322e-06, "loss": 0.3754, "step": 9078 }, { "epoch": 0.4211038961038961, "grad_norm": 4.439329624176025, "learning_rate": 6.3156451961288026e-06, "loss": 0.2875, "step": 9079 }, { "epoch": 0.42115027829313545, "grad_norm": 4.652339458465576, "learning_rate": 6.314935173220524e-06, "loss": 0.2989, "step": 9080 }, { "epoch": 0.42119666048237475, "grad_norm": 10.41675090789795, "learning_rate": 6.31422512182487e-06, "loss": 0.5561, "step": 9081 }, { "epoch": 0.4212430426716141, "grad_norm": 7.140581130981445, "learning_rate": 6.313515041957221e-06, "loss": 0.3766, "step": 9082 }, { "epoch": 0.4212894248608534, "grad_norm": 5.927593231201172, "learning_rate": 6.312804933632962e-06, "loss": 0.369, "step": 9083 }, { "epoch": 0.42133580705009277, "grad_norm": 5.583504676818848, "learning_rate": 6.312094796867476e-06, "loss": 0.3246, "step": 9084 }, { "epoch": 0.4213821892393321, "grad_norm": 6.856638431549072, "learning_rate": 6.3113846316761475e-06, "loss": 0.3259, "step": 9085 }, { "epoch": 0.42142857142857143, "grad_norm": 9.09190845489502, "learning_rate": 6.310674438074365e-06, "loss": 0.3557, "step": 9086 }, { "epoch": 0.4214749536178108, "grad_norm": 6.871284008026123, "learning_rate": 6.30996421607751e-06, "loss": 0.3407, "step": 9087 }, { "epoch": 0.4215213358070501, "grad_norm": 6.21288537979126, "learning_rate": 6.309253965700972e-06, "loss": 0.3582, "step": 9088 }, { "epoch": 0.42156771799628945, "grad_norm": 6.966396331787109, "learning_rate": 6.308543686960138e-06, "loss": 0.3616, "step": 9089 }, { "epoch": 0.42161410018552875, "grad_norm": 7.326623439788818, "learning_rate": 6.307833379870394e-06, "loss": 0.3504, "step": 9090 }, { "epoch": 0.4216604823747681, "grad_norm": 9.000057220458984, "learning_rate": 6.307123044447131e-06, "loss": 0.3885, "step": 9091 }, { "epoch": 0.4217068645640074, "grad_norm": 9.114723205566406, "learning_rate": 6.306412680705735e-06, "loss": 0.3423, "step": 9092 }, { "epoch": 0.42175324675324677, "grad_norm": 9.230524063110352, "learning_rate": 6.305702288661598e-06, "loss": 0.3467, "step": 9093 }, { "epoch": 0.42179962894248607, "grad_norm": 6.101755619049072, "learning_rate": 6.30499186833011e-06, "loss": 0.3852, "step": 9094 }, { "epoch": 0.42184601113172543, "grad_norm": 5.797280788421631, "learning_rate": 6.3042814197266615e-06, "loss": 0.4179, "step": 9095 }, { "epoch": 0.42189239332096473, "grad_norm": 5.338336944580078, "learning_rate": 6.303570942866643e-06, "loss": 0.3176, "step": 9096 }, { "epoch": 0.4219387755102041, "grad_norm": 5.564470291137695, "learning_rate": 6.302860437765449e-06, "loss": 0.3999, "step": 9097 }, { "epoch": 0.4219851576994434, "grad_norm": 7.229545593261719, "learning_rate": 6.302149904438469e-06, "loss": 0.3376, "step": 9098 }, { "epoch": 0.42203153988868275, "grad_norm": 4.197958469390869, "learning_rate": 6.301439342901101e-06, "loss": 0.3526, "step": 9099 }, { "epoch": 0.42207792207792205, "grad_norm": 8.855546951293945, "learning_rate": 6.300728753168733e-06, "loss": 0.2943, "step": 9100 }, { "epoch": 0.4221243042671614, "grad_norm": 5.787557125091553, "learning_rate": 6.3000181352567646e-06, "loss": 0.4176, "step": 9101 }, { "epoch": 0.42217068645640077, "grad_norm": 6.9024763107299805, "learning_rate": 6.299307489180589e-06, "loss": 0.3501, "step": 9102 }, { "epoch": 0.42221706864564007, "grad_norm": 4.593562602996826, "learning_rate": 6.2985968149555995e-06, "loss": 0.2932, "step": 9103 }, { "epoch": 0.42226345083487943, "grad_norm": 5.490993976593018, "learning_rate": 6.297886112597198e-06, "loss": 0.2887, "step": 9104 }, { "epoch": 0.42230983302411873, "grad_norm": 5.7392096519470215, "learning_rate": 6.297175382120777e-06, "loss": 0.3343, "step": 9105 }, { "epoch": 0.4223562152133581, "grad_norm": 5.614502906799316, "learning_rate": 6.296464623541736e-06, "loss": 0.351, "step": 9106 }, { "epoch": 0.4224025974025974, "grad_norm": 5.380566596984863, "learning_rate": 6.295753836875471e-06, "loss": 0.3362, "step": 9107 }, { "epoch": 0.42244897959183675, "grad_norm": 10.07261848449707, "learning_rate": 6.295043022137385e-06, "loss": 0.4122, "step": 9108 }, { "epoch": 0.42249536178107605, "grad_norm": 7.091180801391602, "learning_rate": 6.294332179342874e-06, "loss": 0.2834, "step": 9109 }, { "epoch": 0.4225417439703154, "grad_norm": 7.458658695220947, "learning_rate": 6.29362130850734e-06, "loss": 0.3543, "step": 9110 }, { "epoch": 0.4225881261595547, "grad_norm": 5.048964977264404, "learning_rate": 6.292910409646181e-06, "loss": 0.2499, "step": 9111 }, { "epoch": 0.42263450834879407, "grad_norm": 13.15789794921875, "learning_rate": 6.2921994827748e-06, "loss": 0.4396, "step": 9112 }, { "epoch": 0.42268089053803337, "grad_norm": 8.893689155578613, "learning_rate": 6.291488527908601e-06, "loss": 0.5189, "step": 9113 }, { "epoch": 0.42272727272727273, "grad_norm": 7.6755571365356445, "learning_rate": 6.290777545062981e-06, "loss": 0.401, "step": 9114 }, { "epoch": 0.42277365491651203, "grad_norm": 6.568930149078369, "learning_rate": 6.290066534253348e-06, "loss": 0.3542, "step": 9115 }, { "epoch": 0.4228200371057514, "grad_norm": 6.965602397918701, "learning_rate": 6.289355495495104e-06, "loss": 0.3158, "step": 9116 }, { "epoch": 0.42286641929499075, "grad_norm": 4.803134441375732, "learning_rate": 6.288644428803653e-06, "loss": 0.2851, "step": 9117 }, { "epoch": 0.42291280148423005, "grad_norm": 5.706399440765381, "learning_rate": 6.287933334194401e-06, "loss": 0.3336, "step": 9118 }, { "epoch": 0.4229591836734694, "grad_norm": 4.871682167053223, "learning_rate": 6.287222211682752e-06, "loss": 0.4015, "step": 9119 }, { "epoch": 0.4230055658627087, "grad_norm": 4.579196453094482, "learning_rate": 6.286511061284112e-06, "loss": 0.4219, "step": 9120 }, { "epoch": 0.42305194805194807, "grad_norm": 5.804758548736572, "learning_rate": 6.28579988301389e-06, "loss": 0.3765, "step": 9121 }, { "epoch": 0.42309833024118737, "grad_norm": 8.41622257232666, "learning_rate": 6.28508867688749e-06, "loss": 0.328, "step": 9122 }, { "epoch": 0.42314471243042673, "grad_norm": 5.760193824768066, "learning_rate": 6.284377442920322e-06, "loss": 0.2808, "step": 9123 }, { "epoch": 0.42319109461966603, "grad_norm": 5.922753810882568, "learning_rate": 6.283666181127796e-06, "loss": 0.3484, "step": 9124 }, { "epoch": 0.4232374768089054, "grad_norm": 4.706871032714844, "learning_rate": 6.282954891525317e-06, "loss": 0.2084, "step": 9125 }, { "epoch": 0.4232838589981447, "grad_norm": 9.011516571044922, "learning_rate": 6.282243574128297e-06, "loss": 0.3318, "step": 9126 }, { "epoch": 0.42333024118738405, "grad_norm": 4.42405366897583, "learning_rate": 6.2815322289521475e-06, "loss": 0.1985, "step": 9127 }, { "epoch": 0.42337662337662335, "grad_norm": 6.418895721435547, "learning_rate": 6.280820856012277e-06, "loss": 0.3745, "step": 9128 }, { "epoch": 0.4234230055658627, "grad_norm": 7.682169437408447, "learning_rate": 6.2801094553241005e-06, "loss": 0.2832, "step": 9129 }, { "epoch": 0.42346938775510207, "grad_norm": 14.234182357788086, "learning_rate": 6.279398026903025e-06, "loss": 0.43, "step": 9130 }, { "epoch": 0.42351576994434137, "grad_norm": 6.895674705505371, "learning_rate": 6.2786865707644675e-06, "loss": 0.3149, "step": 9131 }, { "epoch": 0.4235621521335807, "grad_norm": 11.200702667236328, "learning_rate": 6.27797508692384e-06, "loss": 0.4763, "step": 9132 }, { "epoch": 0.42360853432282003, "grad_norm": 5.544442176818848, "learning_rate": 6.277263575396556e-06, "loss": 0.2864, "step": 9133 }, { "epoch": 0.4236549165120594, "grad_norm": 11.675524711608887, "learning_rate": 6.276552036198031e-06, "loss": 0.5416, "step": 9134 }, { "epoch": 0.4237012987012987, "grad_norm": 6.491246700286865, "learning_rate": 6.275840469343679e-06, "loss": 0.4775, "step": 9135 }, { "epoch": 0.42374768089053805, "grad_norm": 7.001699924468994, "learning_rate": 6.275128874848915e-06, "loss": 0.3535, "step": 9136 }, { "epoch": 0.42379406307977735, "grad_norm": 11.987276077270508, "learning_rate": 6.274417252729157e-06, "loss": 0.4197, "step": 9137 }, { "epoch": 0.4238404452690167, "grad_norm": 6.212437629699707, "learning_rate": 6.273705602999822e-06, "loss": 0.2774, "step": 9138 }, { "epoch": 0.423886827458256, "grad_norm": 7.161899089813232, "learning_rate": 6.272993925676328e-06, "loss": 0.3101, "step": 9139 }, { "epoch": 0.42393320964749537, "grad_norm": 7.515358924865723, "learning_rate": 6.272282220774091e-06, "loss": 0.4298, "step": 9140 }, { "epoch": 0.42397959183673467, "grad_norm": 7.858050346374512, "learning_rate": 6.271570488308529e-06, "loss": 0.3329, "step": 9141 }, { "epoch": 0.42402597402597403, "grad_norm": 8.614729881286621, "learning_rate": 6.270858728295065e-06, "loss": 0.299, "step": 9142 }, { "epoch": 0.42407235621521333, "grad_norm": 8.16225528717041, "learning_rate": 6.2701469407491176e-06, "loss": 0.2866, "step": 9143 }, { "epoch": 0.4241187384044527, "grad_norm": 10.802350044250488, "learning_rate": 6.269435125686105e-06, "loss": 0.4121, "step": 9144 }, { "epoch": 0.42416512059369205, "grad_norm": 6.159448623657227, "learning_rate": 6.26872328312145e-06, "loss": 0.3881, "step": 9145 }, { "epoch": 0.42421150278293135, "grad_norm": 9.835417747497559, "learning_rate": 6.268011413070576e-06, "loss": 0.4677, "step": 9146 }, { "epoch": 0.4242578849721707, "grad_norm": 12.220236778259277, "learning_rate": 6.267299515548901e-06, "loss": 0.4096, "step": 9147 }, { "epoch": 0.42430426716141, "grad_norm": 7.164438247680664, "learning_rate": 6.266587590571852e-06, "loss": 0.4531, "step": 9148 }, { "epoch": 0.42435064935064937, "grad_norm": 6.186732292175293, "learning_rate": 6.2658756381548515e-06, "loss": 0.3727, "step": 9149 }, { "epoch": 0.42439703153988867, "grad_norm": 7.885580539703369, "learning_rate": 6.265163658313322e-06, "loss": 0.409, "step": 9150 }, { "epoch": 0.424443413729128, "grad_norm": 8.243277549743652, "learning_rate": 6.26445165106269e-06, "loss": 0.3877, "step": 9151 }, { "epoch": 0.42448979591836733, "grad_norm": 6.566524982452393, "learning_rate": 6.263739616418379e-06, "loss": 0.3511, "step": 9152 }, { "epoch": 0.4245361781076067, "grad_norm": 5.101933479309082, "learning_rate": 6.263027554395815e-06, "loss": 0.3244, "step": 9153 }, { "epoch": 0.424582560296846, "grad_norm": 5.40863037109375, "learning_rate": 6.262315465010427e-06, "loss": 0.2722, "step": 9154 }, { "epoch": 0.42462894248608535, "grad_norm": 8.543174743652344, "learning_rate": 6.26160334827764e-06, "loss": 0.4617, "step": 9155 }, { "epoch": 0.42467532467532465, "grad_norm": 6.392052173614502, "learning_rate": 6.26089120421288e-06, "loss": 0.3918, "step": 9156 }, { "epoch": 0.424721706864564, "grad_norm": 5.953107833862305, "learning_rate": 6.26017903283158e-06, "loss": 0.3088, "step": 9157 }, { "epoch": 0.42476808905380337, "grad_norm": 6.377779960632324, "learning_rate": 6.259466834149164e-06, "loss": 0.3442, "step": 9158 }, { "epoch": 0.42481447124304267, "grad_norm": 6.870826721191406, "learning_rate": 6.258754608181063e-06, "loss": 0.4254, "step": 9159 }, { "epoch": 0.424860853432282, "grad_norm": 9.374670028686523, "learning_rate": 6.258042354942708e-06, "loss": 0.3877, "step": 9160 }, { "epoch": 0.42490723562152133, "grad_norm": 4.476642608642578, "learning_rate": 6.2573300744495295e-06, "loss": 0.3174, "step": 9161 }, { "epoch": 0.4249536178107607, "grad_norm": 8.662856101989746, "learning_rate": 6.256617766716958e-06, "loss": 0.4247, "step": 9162 }, { "epoch": 0.425, "grad_norm": 6.011208534240723, "learning_rate": 6.255905431760425e-06, "loss": 0.3131, "step": 9163 }, { "epoch": 0.42504638218923935, "grad_norm": 7.174981594085693, "learning_rate": 6.2551930695953625e-06, "loss": 0.3798, "step": 9164 }, { "epoch": 0.42509276437847865, "grad_norm": 8.3038969039917, "learning_rate": 6.254480680237206e-06, "loss": 0.3427, "step": 9165 }, { "epoch": 0.425139146567718, "grad_norm": 10.235958099365234, "learning_rate": 6.2537682637013855e-06, "loss": 0.3304, "step": 9166 }, { "epoch": 0.4251855287569573, "grad_norm": 6.270719051361084, "learning_rate": 6.253055820003338e-06, "loss": 0.3521, "step": 9167 }, { "epoch": 0.42523191094619667, "grad_norm": 5.066012859344482, "learning_rate": 6.252343349158496e-06, "loss": 0.3213, "step": 9168 }, { "epoch": 0.42527829313543597, "grad_norm": 7.233927249908447, "learning_rate": 6.2516308511822965e-06, "loss": 0.3641, "step": 9169 }, { "epoch": 0.4253246753246753, "grad_norm": 9.241793632507324, "learning_rate": 6.2509183260901755e-06, "loss": 0.3789, "step": 9170 }, { "epoch": 0.42537105751391463, "grad_norm": 16.892295837402344, "learning_rate": 6.250205773897567e-06, "loss": 0.4296, "step": 9171 }, { "epoch": 0.425417439703154, "grad_norm": 10.176281929016113, "learning_rate": 6.249493194619911e-06, "loss": 0.3483, "step": 9172 }, { "epoch": 0.42546382189239335, "grad_norm": 5.961653232574463, "learning_rate": 6.248780588272645e-06, "loss": 0.3555, "step": 9173 }, { "epoch": 0.42551020408163265, "grad_norm": 5.566682815551758, "learning_rate": 6.248067954871204e-06, "loss": 0.3553, "step": 9174 }, { "epoch": 0.425556586270872, "grad_norm": 6.165064811706543, "learning_rate": 6.247355294431031e-06, "loss": 0.2506, "step": 9175 }, { "epoch": 0.4256029684601113, "grad_norm": 8.744292259216309, "learning_rate": 6.2466426069675626e-06, "loss": 0.4915, "step": 9176 }, { "epoch": 0.42564935064935067, "grad_norm": 15.372817993164062, "learning_rate": 6.245929892496239e-06, "loss": 0.5531, "step": 9177 }, { "epoch": 0.42569573283858997, "grad_norm": 7.327203750610352, "learning_rate": 6.245217151032502e-06, "loss": 0.3311, "step": 9178 }, { "epoch": 0.4257421150278293, "grad_norm": 10.780587196350098, "learning_rate": 6.2445043825917915e-06, "loss": 0.3782, "step": 9179 }, { "epoch": 0.42578849721706863, "grad_norm": 6.978938579559326, "learning_rate": 6.243791587189552e-06, "loss": 0.383, "step": 9180 }, { "epoch": 0.425834879406308, "grad_norm": 10.63969898223877, "learning_rate": 6.2430787648412215e-06, "loss": 0.4186, "step": 9181 }, { "epoch": 0.4258812615955473, "grad_norm": 5.480958938598633, "learning_rate": 6.242365915562246e-06, "loss": 0.3497, "step": 9182 }, { "epoch": 0.42592764378478665, "grad_norm": 7.3978800773620605, "learning_rate": 6.24165303936807e-06, "loss": 0.3838, "step": 9183 }, { "epoch": 0.42597402597402595, "grad_norm": 5.499747276306152, "learning_rate": 6.240940136274134e-06, "loss": 0.3614, "step": 9184 }, { "epoch": 0.4260204081632653, "grad_norm": 5.243515491485596, "learning_rate": 6.2402272062958845e-06, "loss": 0.3346, "step": 9185 }, { "epoch": 0.42606679035250467, "grad_norm": 6.525051116943359, "learning_rate": 6.239514249448767e-06, "loss": 0.3755, "step": 9186 }, { "epoch": 0.42611317254174397, "grad_norm": 5.2548627853393555, "learning_rate": 6.238801265748227e-06, "loss": 0.3656, "step": 9187 }, { "epoch": 0.4261595547309833, "grad_norm": 7.919923305511475, "learning_rate": 6.238088255209711e-06, "loss": 0.3328, "step": 9188 }, { "epoch": 0.42620593692022263, "grad_norm": 6.455519676208496, "learning_rate": 6.237375217848666e-06, "loss": 0.4692, "step": 9189 }, { "epoch": 0.426252319109462, "grad_norm": 8.684112548828125, "learning_rate": 6.2366621536805395e-06, "loss": 0.3365, "step": 9190 }, { "epoch": 0.4262987012987013, "grad_norm": 3.332400321960449, "learning_rate": 6.2359490627207795e-06, "loss": 0.2277, "step": 9191 }, { "epoch": 0.42634508348794065, "grad_norm": 7.0331830978393555, "learning_rate": 6.235235944984835e-06, "loss": 0.3259, "step": 9192 }, { "epoch": 0.42639146567717995, "grad_norm": 8.886930465698242, "learning_rate": 6.234522800488156e-06, "loss": 0.4109, "step": 9193 }, { "epoch": 0.4264378478664193, "grad_norm": 5.439748287200928, "learning_rate": 6.233809629246191e-06, "loss": 0.3155, "step": 9194 }, { "epoch": 0.4264842300556586, "grad_norm": 12.980230331420898, "learning_rate": 6.233096431274392e-06, "loss": 0.5485, "step": 9195 }, { "epoch": 0.42653061224489797, "grad_norm": 4.60244607925415, "learning_rate": 6.232383206588209e-06, "loss": 0.2059, "step": 9196 }, { "epoch": 0.42657699443413727, "grad_norm": 5.336793899536133, "learning_rate": 6.231669955203093e-06, "loss": 0.3343, "step": 9197 }, { "epoch": 0.4266233766233766, "grad_norm": 11.394418716430664, "learning_rate": 6.2309566771344966e-06, "loss": 0.4198, "step": 9198 }, { "epoch": 0.42666975881261593, "grad_norm": 6.319797992706299, "learning_rate": 6.230243372397874e-06, "loss": 0.3583, "step": 9199 }, { "epoch": 0.4267161410018553, "grad_norm": 9.29172420501709, "learning_rate": 6.229530041008678e-06, "loss": 0.3007, "step": 9200 }, { "epoch": 0.42676252319109464, "grad_norm": 10.466765403747559, "learning_rate": 6.2288166829823614e-06, "loss": 0.4419, "step": 9201 }, { "epoch": 0.42680890538033395, "grad_norm": 22.12759780883789, "learning_rate": 6.228103298334379e-06, "loss": 0.4238, "step": 9202 }, { "epoch": 0.4268552875695733, "grad_norm": 9.237829208374023, "learning_rate": 6.227389887080187e-06, "loss": 0.3724, "step": 9203 }, { "epoch": 0.4269016697588126, "grad_norm": 4.750052452087402, "learning_rate": 6.22667644923524e-06, "loss": 0.3247, "step": 9204 }, { "epoch": 0.42694805194805197, "grad_norm": 6.586249828338623, "learning_rate": 6.225962984814996e-06, "loss": 0.3365, "step": 9205 }, { "epoch": 0.42699443413729127, "grad_norm": 4.625326633453369, "learning_rate": 6.22524949383491e-06, "loss": 0.2971, "step": 9206 }, { "epoch": 0.4270408163265306, "grad_norm": 3.5311336517333984, "learning_rate": 6.22453597631044e-06, "loss": 0.2057, "step": 9207 }, { "epoch": 0.42708719851576993, "grad_norm": 7.315124988555908, "learning_rate": 6.223822432257043e-06, "loss": 0.4374, "step": 9208 }, { "epoch": 0.4271335807050093, "grad_norm": 11.316171646118164, "learning_rate": 6.22310886169018e-06, "loss": 0.4006, "step": 9209 }, { "epoch": 0.4271799628942486, "grad_norm": 8.307805061340332, "learning_rate": 6.222395264625308e-06, "loss": 0.466, "step": 9210 }, { "epoch": 0.42722634508348795, "grad_norm": 7.1933183670043945, "learning_rate": 6.221681641077888e-06, "loss": 0.3601, "step": 9211 }, { "epoch": 0.42727272727272725, "grad_norm": 8.277782440185547, "learning_rate": 6.220967991063378e-06, "loss": 0.237, "step": 9212 }, { "epoch": 0.4273191094619666, "grad_norm": 4.687115669250488, "learning_rate": 6.220254314597242e-06, "loss": 0.2846, "step": 9213 }, { "epoch": 0.42736549165120596, "grad_norm": 5.830384254455566, "learning_rate": 6.219540611694938e-06, "loss": 0.3214, "step": 9214 }, { "epoch": 0.42741187384044527, "grad_norm": 7.915226459503174, "learning_rate": 6.2188268823719326e-06, "loss": 0.3095, "step": 9215 }, { "epoch": 0.4274582560296846, "grad_norm": 6.759117603302002, "learning_rate": 6.218113126643683e-06, "loss": 0.2845, "step": 9216 }, { "epoch": 0.4275046382189239, "grad_norm": 6.5660400390625, "learning_rate": 6.217399344525656e-06, "loss": 0.3095, "step": 9217 }, { "epoch": 0.4275510204081633, "grad_norm": 6.112290382385254, "learning_rate": 6.216685536033316e-06, "loss": 0.4122, "step": 9218 }, { "epoch": 0.4275974025974026, "grad_norm": 6.299693584442139, "learning_rate": 6.215971701182123e-06, "loss": 0.4143, "step": 9219 }, { "epoch": 0.42764378478664195, "grad_norm": 7.263377666473389, "learning_rate": 6.215257839987546e-06, "loss": 0.4382, "step": 9220 }, { "epoch": 0.42769016697588125, "grad_norm": 6.579857349395752, "learning_rate": 6.214543952465049e-06, "loss": 0.3755, "step": 9221 }, { "epoch": 0.4277365491651206, "grad_norm": 10.539681434631348, "learning_rate": 6.213830038630097e-06, "loss": 0.3968, "step": 9222 }, { "epoch": 0.4277829313543599, "grad_norm": 6.293456554412842, "learning_rate": 6.2131160984981575e-06, "loss": 0.4771, "step": 9223 }, { "epoch": 0.42782931354359927, "grad_norm": 6.722843170166016, "learning_rate": 6.212402132084697e-06, "loss": 0.3664, "step": 9224 }, { "epoch": 0.42787569573283857, "grad_norm": 9.315796852111816, "learning_rate": 6.2116881394051854e-06, "loss": 0.42, "step": 9225 }, { "epoch": 0.4279220779220779, "grad_norm": 7.116629123687744, "learning_rate": 6.2109741204750885e-06, "loss": 0.4098, "step": 9226 }, { "epoch": 0.42796846011131723, "grad_norm": 7.192016124725342, "learning_rate": 6.210260075309875e-06, "loss": 0.3011, "step": 9227 }, { "epoch": 0.4280148423005566, "grad_norm": 4.8630452156066895, "learning_rate": 6.209546003925018e-06, "loss": 0.3469, "step": 9228 }, { "epoch": 0.42806122448979594, "grad_norm": 7.775503158569336, "learning_rate": 6.2088319063359825e-06, "loss": 0.2933, "step": 9229 }, { "epoch": 0.42810760667903525, "grad_norm": 8.183640480041504, "learning_rate": 6.2081177825582415e-06, "loss": 0.4273, "step": 9230 }, { "epoch": 0.4281539888682746, "grad_norm": 8.78329849243164, "learning_rate": 6.207403632607267e-06, "loss": 0.4127, "step": 9231 }, { "epoch": 0.4282003710575139, "grad_norm": 4.137081623077393, "learning_rate": 6.206689456498529e-06, "loss": 0.2913, "step": 9232 }, { "epoch": 0.42824675324675326, "grad_norm": 10.282356262207031, "learning_rate": 6.205975254247502e-06, "loss": 0.3831, "step": 9233 }, { "epoch": 0.42829313543599257, "grad_norm": 7.726596832275391, "learning_rate": 6.205261025869655e-06, "loss": 0.2951, "step": 9234 }, { "epoch": 0.4283395176252319, "grad_norm": 10.169140815734863, "learning_rate": 6.204546771380463e-06, "loss": 0.3895, "step": 9235 }, { "epoch": 0.4283858998144712, "grad_norm": 4.9818243980407715, "learning_rate": 6.203832490795403e-06, "loss": 0.2659, "step": 9236 }, { "epoch": 0.4284322820037106, "grad_norm": 5.55761194229126, "learning_rate": 6.203118184129946e-06, "loss": 0.3594, "step": 9237 }, { "epoch": 0.4284786641929499, "grad_norm": 6.521188735961914, "learning_rate": 6.202403851399566e-06, "loss": 0.3173, "step": 9238 }, { "epoch": 0.42852504638218925, "grad_norm": 6.9759202003479, "learning_rate": 6.201689492619744e-06, "loss": 0.3044, "step": 9239 }, { "epoch": 0.42857142857142855, "grad_norm": 6.349307060241699, "learning_rate": 6.200975107805951e-06, "loss": 0.2857, "step": 9240 }, { "epoch": 0.4286178107606679, "grad_norm": 6.733545303344727, "learning_rate": 6.200260696973666e-06, "loss": 0.2919, "step": 9241 }, { "epoch": 0.4286641929499072, "grad_norm": 6.57078742980957, "learning_rate": 6.199546260138368e-06, "loss": 0.3178, "step": 9242 }, { "epoch": 0.42871057513914657, "grad_norm": 5.644035339355469, "learning_rate": 6.198831797315531e-06, "loss": 0.3331, "step": 9243 }, { "epoch": 0.4287569573283859, "grad_norm": 5.154834270477295, "learning_rate": 6.198117308520637e-06, "loss": 0.2542, "step": 9244 }, { "epoch": 0.4288033395176252, "grad_norm": 4.80126953125, "learning_rate": 6.197402793769161e-06, "loss": 0.3401, "step": 9245 }, { "epoch": 0.4288497217068646, "grad_norm": 8.604596138000488, "learning_rate": 6.196688253076586e-06, "loss": 0.3256, "step": 9246 }, { "epoch": 0.4288961038961039, "grad_norm": 10.843692779541016, "learning_rate": 6.195973686458393e-06, "loss": 0.441, "step": 9247 }, { "epoch": 0.42894248608534324, "grad_norm": 7.012929916381836, "learning_rate": 6.1952590939300595e-06, "loss": 0.2834, "step": 9248 }, { "epoch": 0.42898886827458255, "grad_norm": 15.956268310546875, "learning_rate": 6.194544475507067e-06, "loss": 0.2613, "step": 9249 }, { "epoch": 0.4290352504638219, "grad_norm": 7.312572956085205, "learning_rate": 6.193829831204901e-06, "loss": 0.2815, "step": 9250 }, { "epoch": 0.4290816326530612, "grad_norm": 8.859569549560547, "learning_rate": 6.193115161039039e-06, "loss": 0.3657, "step": 9251 }, { "epoch": 0.42912801484230056, "grad_norm": 6.634727954864502, "learning_rate": 6.192400465024968e-06, "loss": 0.354, "step": 9252 }, { "epoch": 0.42917439703153987, "grad_norm": 7.81196403503418, "learning_rate": 6.191685743178168e-06, "loss": 0.3441, "step": 9253 }, { "epoch": 0.4292207792207792, "grad_norm": 6.688150405883789, "learning_rate": 6.1909709955141274e-06, "loss": 0.3019, "step": 9254 }, { "epoch": 0.4292671614100185, "grad_norm": 3.924039125442505, "learning_rate": 6.190256222048328e-06, "loss": 0.329, "step": 9255 }, { "epoch": 0.4293135435992579, "grad_norm": 7.419170379638672, "learning_rate": 6.189541422796254e-06, "loss": 0.3742, "step": 9256 }, { "epoch": 0.42935992578849724, "grad_norm": 9.110250473022461, "learning_rate": 6.188826597773392e-06, "loss": 0.3933, "step": 9257 }, { "epoch": 0.42940630797773655, "grad_norm": 8.45767879486084, "learning_rate": 6.18811174699523e-06, "loss": 0.3871, "step": 9258 }, { "epoch": 0.4294526901669759, "grad_norm": 9.017157554626465, "learning_rate": 6.187396870477253e-06, "loss": 0.3866, "step": 9259 }, { "epoch": 0.4294990723562152, "grad_norm": 12.401833534240723, "learning_rate": 6.1866819682349485e-06, "loss": 0.5027, "step": 9260 }, { "epoch": 0.42954545454545456, "grad_norm": 5.958046913146973, "learning_rate": 6.1859670402838065e-06, "loss": 0.311, "step": 9261 }, { "epoch": 0.42959183673469387, "grad_norm": 8.782086372375488, "learning_rate": 6.185252086639314e-06, "loss": 0.3132, "step": 9262 }, { "epoch": 0.4296382189239332, "grad_norm": 6.318640232086182, "learning_rate": 6.1845371073169604e-06, "loss": 0.3995, "step": 9263 }, { "epoch": 0.4296846011131725, "grad_norm": 4.974673748016357, "learning_rate": 6.183822102332234e-06, "loss": 0.3612, "step": 9264 }, { "epoch": 0.4297309833024119, "grad_norm": 11.076581001281738, "learning_rate": 6.183107071700627e-06, "loss": 0.3307, "step": 9265 }, { "epoch": 0.4297773654916512, "grad_norm": 12.83352279663086, "learning_rate": 6.1823920154376296e-06, "loss": 0.6292, "step": 9266 }, { "epoch": 0.42982374768089054, "grad_norm": 6.616831302642822, "learning_rate": 6.181676933558732e-06, "loss": 0.3103, "step": 9267 }, { "epoch": 0.42987012987012985, "grad_norm": 5.436938285827637, "learning_rate": 6.180961826079427e-06, "loss": 0.2823, "step": 9268 }, { "epoch": 0.4299165120593692, "grad_norm": 4.114674091339111, "learning_rate": 6.180246693015207e-06, "loss": 0.2749, "step": 9269 }, { "epoch": 0.4299628942486085, "grad_norm": 4.560534954071045, "learning_rate": 6.179531534381566e-06, "loss": 0.2719, "step": 9270 }, { "epoch": 0.43000927643784786, "grad_norm": 6.509775161743164, "learning_rate": 6.1788163501939945e-06, "loss": 0.3468, "step": 9271 }, { "epoch": 0.4300556586270872, "grad_norm": 7.805814743041992, "learning_rate": 6.1781011404679905e-06, "loss": 0.3031, "step": 9272 }, { "epoch": 0.4301020408163265, "grad_norm": 9.431385040283203, "learning_rate": 6.177385905219045e-06, "loss": 0.4876, "step": 9273 }, { "epoch": 0.4301484230055659, "grad_norm": 6.800930500030518, "learning_rate": 6.176670644462656e-06, "loss": 0.3593, "step": 9274 }, { "epoch": 0.4301948051948052, "grad_norm": 5.586193084716797, "learning_rate": 6.175955358214317e-06, "loss": 0.3107, "step": 9275 }, { "epoch": 0.43024118738404454, "grad_norm": 6.6883625984191895, "learning_rate": 6.175240046489525e-06, "loss": 0.4147, "step": 9276 }, { "epoch": 0.43028756957328385, "grad_norm": 7.027590274810791, "learning_rate": 6.1745247093037796e-06, "loss": 0.2569, "step": 9277 }, { "epoch": 0.4303339517625232, "grad_norm": 6.21635627746582, "learning_rate": 6.1738093466725745e-06, "loss": 0.2764, "step": 9278 }, { "epoch": 0.4303803339517625, "grad_norm": 4.378393173217773, "learning_rate": 6.173093958611409e-06, "loss": 0.2761, "step": 9279 }, { "epoch": 0.43042671614100186, "grad_norm": 5.72992467880249, "learning_rate": 6.172378545135782e-06, "loss": 0.3657, "step": 9280 }, { "epoch": 0.43047309833024117, "grad_norm": 11.451738357543945, "learning_rate": 6.171663106261192e-06, "loss": 0.5209, "step": 9281 }, { "epoch": 0.4305194805194805, "grad_norm": 6.385965824127197, "learning_rate": 6.1709476420031386e-06, "loss": 0.3111, "step": 9282 }, { "epoch": 0.4305658627087198, "grad_norm": 5.76000452041626, "learning_rate": 6.170232152377122e-06, "loss": 0.354, "step": 9283 }, { "epoch": 0.4306122448979592, "grad_norm": 4.415185451507568, "learning_rate": 6.1695166373986434e-06, "loss": 0.3839, "step": 9284 }, { "epoch": 0.43065862708719854, "grad_norm": 7.137063980102539, "learning_rate": 6.168801097083204e-06, "loss": 0.3506, "step": 9285 }, { "epoch": 0.43070500927643784, "grad_norm": 5.306957244873047, "learning_rate": 6.1680855314463055e-06, "loss": 0.2074, "step": 9286 }, { "epoch": 0.4307513914656772, "grad_norm": 6.294724464416504, "learning_rate": 6.1673699405034495e-06, "loss": 0.404, "step": 9287 }, { "epoch": 0.4307977736549165, "grad_norm": 6.227085113525391, "learning_rate": 6.16665432427014e-06, "loss": 0.425, "step": 9288 }, { "epoch": 0.43084415584415586, "grad_norm": 22.874399185180664, "learning_rate": 6.165938682761878e-06, "loss": 0.3764, "step": 9289 }, { "epoch": 0.43089053803339517, "grad_norm": 8.926979064941406, "learning_rate": 6.16522301599417e-06, "loss": 0.3758, "step": 9290 }, { "epoch": 0.4309369202226345, "grad_norm": 5.1304426193237305, "learning_rate": 6.164507323982522e-06, "loss": 0.3594, "step": 9291 }, { "epoch": 0.4309833024118738, "grad_norm": 10.036321640014648, "learning_rate": 6.163791606742437e-06, "loss": 0.3872, "step": 9292 }, { "epoch": 0.4310296846011132, "grad_norm": 5.797883987426758, "learning_rate": 6.163075864289419e-06, "loss": 0.345, "step": 9293 }, { "epoch": 0.4310760667903525, "grad_norm": 18.144256591796875, "learning_rate": 6.162360096638976e-06, "loss": 0.5071, "step": 9294 }, { "epoch": 0.43112244897959184, "grad_norm": 5.741994380950928, "learning_rate": 6.161644303806614e-06, "loss": 0.3097, "step": 9295 }, { "epoch": 0.43116883116883115, "grad_norm": 7.5339813232421875, "learning_rate": 6.160928485807843e-06, "loss": 0.37, "step": 9296 }, { "epoch": 0.4312152133580705, "grad_norm": 9.742684364318848, "learning_rate": 6.160212642658167e-06, "loss": 0.3678, "step": 9297 }, { "epoch": 0.4312615955473098, "grad_norm": 6.390440940856934, "learning_rate": 6.159496774373096e-06, "loss": 0.4009, "step": 9298 }, { "epoch": 0.43130797773654916, "grad_norm": 8.241273880004883, "learning_rate": 6.15878088096814e-06, "loss": 0.2446, "step": 9299 }, { "epoch": 0.4313543599257885, "grad_norm": 12.920074462890625, "learning_rate": 6.158064962458807e-06, "loss": 0.3439, "step": 9300 }, { "epoch": 0.4314007421150278, "grad_norm": 13.331513404846191, "learning_rate": 6.157349018860607e-06, "loss": 0.3364, "step": 9301 }, { "epoch": 0.4314471243042672, "grad_norm": 6.604156970977783, "learning_rate": 6.156633050189052e-06, "loss": 0.4085, "step": 9302 }, { "epoch": 0.4314935064935065, "grad_norm": 7.649875164031982, "learning_rate": 6.155917056459651e-06, "loss": 0.324, "step": 9303 }, { "epoch": 0.43153988868274584, "grad_norm": 8.626029968261719, "learning_rate": 6.155201037687917e-06, "loss": 0.3657, "step": 9304 }, { "epoch": 0.43158627087198514, "grad_norm": 6.538014888763428, "learning_rate": 6.154484993889361e-06, "loss": 0.3135, "step": 9305 }, { "epoch": 0.4316326530612245, "grad_norm": 10.378564834594727, "learning_rate": 6.1537689250794975e-06, "loss": 0.4492, "step": 9306 }, { "epoch": 0.4316790352504638, "grad_norm": 7.541552543640137, "learning_rate": 6.153052831273839e-06, "loss": 0.4008, "step": 9307 }, { "epoch": 0.43172541743970316, "grad_norm": 8.789183616638184, "learning_rate": 6.152336712487898e-06, "loss": 0.4249, "step": 9308 }, { "epoch": 0.43177179962894247, "grad_norm": 4.042330265045166, "learning_rate": 6.151620568737191e-06, "loss": 0.2807, "step": 9309 }, { "epoch": 0.4318181818181818, "grad_norm": 9.528051376342773, "learning_rate": 6.150904400037232e-06, "loss": 0.4108, "step": 9310 }, { "epoch": 0.4318645640074211, "grad_norm": 9.78270149230957, "learning_rate": 6.150188206403535e-06, "loss": 0.3845, "step": 9311 }, { "epoch": 0.4319109461966605, "grad_norm": 4.531673431396484, "learning_rate": 6.149471987851617e-06, "loss": 0.3969, "step": 9312 }, { "epoch": 0.43195732838589984, "grad_norm": 4.969569683074951, "learning_rate": 6.148755744396997e-06, "loss": 0.251, "step": 9313 }, { "epoch": 0.43200371057513914, "grad_norm": 9.28197956085205, "learning_rate": 6.1480394760551875e-06, "loss": 0.4658, "step": 9314 }, { "epoch": 0.4320500927643785, "grad_norm": 8.426372528076172, "learning_rate": 6.147323182841709e-06, "loss": 0.3514, "step": 9315 }, { "epoch": 0.4320964749536178, "grad_norm": 6.732263565063477, "learning_rate": 6.146606864772079e-06, "loss": 0.3545, "step": 9316 }, { "epoch": 0.43214285714285716, "grad_norm": 9.86572265625, "learning_rate": 6.145890521861815e-06, "loss": 0.4349, "step": 9317 }, { "epoch": 0.43218923933209646, "grad_norm": 6.750779151916504, "learning_rate": 6.145174154126438e-06, "loss": 0.3863, "step": 9318 }, { "epoch": 0.4322356215213358, "grad_norm": 8.514245986938477, "learning_rate": 6.144457761581467e-06, "loss": 0.3551, "step": 9319 }, { "epoch": 0.4322820037105751, "grad_norm": 8.94715690612793, "learning_rate": 6.1437413442424236e-06, "loss": 0.3834, "step": 9320 }, { "epoch": 0.4323283858998145, "grad_norm": 7.445716381072998, "learning_rate": 6.143024902124826e-06, "loss": 0.3407, "step": 9321 }, { "epoch": 0.4323747680890538, "grad_norm": 7.4019551277160645, "learning_rate": 6.142308435244195e-06, "loss": 0.3517, "step": 9322 }, { "epoch": 0.43242115027829314, "grad_norm": 5.57578182220459, "learning_rate": 6.141591943616056e-06, "loss": 0.3202, "step": 9323 }, { "epoch": 0.43246753246753245, "grad_norm": 10.13012981414795, "learning_rate": 6.14087542725593e-06, "loss": 0.3936, "step": 9324 }, { "epoch": 0.4325139146567718, "grad_norm": 10.997093200683594, "learning_rate": 6.140158886179341e-06, "loss": 0.2865, "step": 9325 }, { "epoch": 0.4325602968460111, "grad_norm": 5.350728988647461, "learning_rate": 6.139442320401808e-06, "loss": 0.3088, "step": 9326 }, { "epoch": 0.43260667903525046, "grad_norm": 6.165960788726807, "learning_rate": 6.138725729938859e-06, "loss": 0.3208, "step": 9327 }, { "epoch": 0.4326530612244898, "grad_norm": 6.872837543487549, "learning_rate": 6.13800911480602e-06, "loss": 0.271, "step": 9328 }, { "epoch": 0.4326994434137291, "grad_norm": 6.024953842163086, "learning_rate": 6.1372924750188105e-06, "loss": 0.3178, "step": 9329 }, { "epoch": 0.4327458256029685, "grad_norm": 5.643494129180908, "learning_rate": 6.136575810592762e-06, "loss": 0.3986, "step": 9330 }, { "epoch": 0.4327922077922078, "grad_norm": 5.467609882354736, "learning_rate": 6.1358591215433964e-06, "loss": 0.3061, "step": 9331 }, { "epoch": 0.43283858998144714, "grad_norm": 9.552077293395996, "learning_rate": 6.1351424078862405e-06, "loss": 0.2768, "step": 9332 }, { "epoch": 0.43288497217068644, "grad_norm": 12.464617729187012, "learning_rate": 6.134425669636826e-06, "loss": 0.3907, "step": 9333 }, { "epoch": 0.4329313543599258, "grad_norm": 7.655238151550293, "learning_rate": 6.133708906810677e-06, "loss": 0.4552, "step": 9334 }, { "epoch": 0.4329777365491651, "grad_norm": 5.498001575469971, "learning_rate": 6.132992119423322e-06, "loss": 0.2722, "step": 9335 }, { "epoch": 0.43302411873840446, "grad_norm": 5.367911338806152, "learning_rate": 6.132275307490291e-06, "loss": 0.3079, "step": 9336 }, { "epoch": 0.43307050092764376, "grad_norm": 8.746524810791016, "learning_rate": 6.131558471027112e-06, "loss": 0.3014, "step": 9337 }, { "epoch": 0.4331168831168831, "grad_norm": 10.262615203857422, "learning_rate": 6.1308416100493166e-06, "loss": 0.4027, "step": 9338 }, { "epoch": 0.4331632653061224, "grad_norm": 8.096357345581055, "learning_rate": 6.130124724572433e-06, "loss": 0.3187, "step": 9339 }, { "epoch": 0.4332096474953618, "grad_norm": 14.222784996032715, "learning_rate": 6.129407814611993e-06, "loss": 0.3967, "step": 9340 }, { "epoch": 0.43325602968460114, "grad_norm": 8.084956169128418, "learning_rate": 6.12869088018353e-06, "loss": 0.2709, "step": 9341 }, { "epoch": 0.43330241187384044, "grad_norm": 6.7154741287231445, "learning_rate": 6.127973921302572e-06, "loss": 0.4152, "step": 9342 }, { "epoch": 0.4333487940630798, "grad_norm": 5.759435176849365, "learning_rate": 6.127256937984657e-06, "loss": 0.2946, "step": 9343 }, { "epoch": 0.4333951762523191, "grad_norm": 6.311835289001465, "learning_rate": 6.126539930245313e-06, "loss": 0.2828, "step": 9344 }, { "epoch": 0.43344155844155846, "grad_norm": 5.689818382263184, "learning_rate": 6.125822898100076e-06, "loss": 0.3678, "step": 9345 }, { "epoch": 0.43348794063079776, "grad_norm": 11.500218391418457, "learning_rate": 6.12510584156448e-06, "loss": 0.3802, "step": 9346 }, { "epoch": 0.4335343228200371, "grad_norm": 4.275607585906982, "learning_rate": 6.124388760654059e-06, "loss": 0.257, "step": 9347 }, { "epoch": 0.4335807050092764, "grad_norm": 9.918281555175781, "learning_rate": 6.1236716553843485e-06, "loss": 0.2543, "step": 9348 }, { "epoch": 0.4336270871985158, "grad_norm": 7.098076343536377, "learning_rate": 6.1229545257708844e-06, "loss": 0.3391, "step": 9349 }, { "epoch": 0.4336734693877551, "grad_norm": 5.570041656494141, "learning_rate": 6.122237371829203e-06, "loss": 0.3067, "step": 9350 }, { "epoch": 0.43371985157699444, "grad_norm": 5.838314533233643, "learning_rate": 6.121520193574841e-06, "loss": 0.2638, "step": 9351 }, { "epoch": 0.43376623376623374, "grad_norm": 7.544806003570557, "learning_rate": 6.120802991023334e-06, "loss": 0.2882, "step": 9352 }, { "epoch": 0.4338126159554731, "grad_norm": 12.911206245422363, "learning_rate": 6.120085764190224e-06, "loss": 0.4142, "step": 9353 }, { "epoch": 0.4338589981447124, "grad_norm": 7.8156962394714355, "learning_rate": 6.119368513091045e-06, "loss": 0.2564, "step": 9354 }, { "epoch": 0.43390538033395176, "grad_norm": 12.58997917175293, "learning_rate": 6.118651237741337e-06, "loss": 0.3373, "step": 9355 }, { "epoch": 0.4339517625231911, "grad_norm": 6.4158453941345215, "learning_rate": 6.117933938156643e-06, "loss": 0.2493, "step": 9356 }, { "epoch": 0.4339981447124304, "grad_norm": 8.265585899353027, "learning_rate": 6.117216614352497e-06, "loss": 0.268, "step": 9357 }, { "epoch": 0.4340445269016698, "grad_norm": 6.746634006500244, "learning_rate": 6.116499266344442e-06, "loss": 0.3181, "step": 9358 }, { "epoch": 0.4340909090909091, "grad_norm": 6.233107566833496, "learning_rate": 6.11578189414802e-06, "loss": 0.3492, "step": 9359 }, { "epoch": 0.43413729128014844, "grad_norm": 5.92983341217041, "learning_rate": 6.1150644977787735e-06, "loss": 0.3247, "step": 9360 }, { "epoch": 0.43418367346938774, "grad_norm": 5.240621566772461, "learning_rate": 6.114347077252241e-06, "loss": 0.3091, "step": 9361 }, { "epoch": 0.4342300556586271, "grad_norm": 8.449177742004395, "learning_rate": 6.1136296325839675e-06, "loss": 0.4404, "step": 9362 }, { "epoch": 0.4342764378478664, "grad_norm": 10.650652885437012, "learning_rate": 6.112912163789495e-06, "loss": 0.4807, "step": 9363 }, { "epoch": 0.43432282003710576, "grad_norm": 7.815542697906494, "learning_rate": 6.112194670884367e-06, "loss": 0.4785, "step": 9364 }, { "epoch": 0.43436920222634506, "grad_norm": 11.327667236328125, "learning_rate": 6.111477153884129e-06, "loss": 0.3729, "step": 9365 }, { "epoch": 0.4344155844155844, "grad_norm": 4.9497175216674805, "learning_rate": 6.110759612804324e-06, "loss": 0.3469, "step": 9366 }, { "epoch": 0.4344619666048237, "grad_norm": 5.4716267585754395, "learning_rate": 6.110042047660499e-06, "loss": 0.4018, "step": 9367 }, { "epoch": 0.4345083487940631, "grad_norm": 5.815248012542725, "learning_rate": 6.109324458468198e-06, "loss": 0.3643, "step": 9368 }, { "epoch": 0.4345547309833024, "grad_norm": 4.444730281829834, "learning_rate": 6.108606845242966e-06, "loss": 0.363, "step": 9369 }, { "epoch": 0.43460111317254174, "grad_norm": 5.37401008605957, "learning_rate": 6.1078892080003535e-06, "loss": 0.3096, "step": 9370 }, { "epoch": 0.4346474953617811, "grad_norm": 8.127178192138672, "learning_rate": 6.1071715467559046e-06, "loss": 0.4041, "step": 9371 }, { "epoch": 0.4346938775510204, "grad_norm": 5.495308876037598, "learning_rate": 6.106453861525169e-06, "loss": 0.3823, "step": 9372 }, { "epoch": 0.43474025974025976, "grad_norm": 6.761168479919434, "learning_rate": 6.105736152323693e-06, "loss": 0.4321, "step": 9373 }, { "epoch": 0.43478664192949906, "grad_norm": 4.528412342071533, "learning_rate": 6.105018419167028e-06, "loss": 0.3716, "step": 9374 }, { "epoch": 0.4348330241187384, "grad_norm": 5.647359848022461, "learning_rate": 6.1043006620707215e-06, "loss": 0.3626, "step": 9375 }, { "epoch": 0.4348794063079777, "grad_norm": 7.923488140106201, "learning_rate": 6.103582881050323e-06, "loss": 0.3175, "step": 9376 }, { "epoch": 0.4349257884972171, "grad_norm": 10.541852951049805, "learning_rate": 6.102865076121383e-06, "loss": 0.4798, "step": 9377 }, { "epoch": 0.4349721706864564, "grad_norm": 6.366098880767822, "learning_rate": 6.102147247299454e-06, "loss": 0.3156, "step": 9378 }, { "epoch": 0.43501855287569574, "grad_norm": 6.715196132659912, "learning_rate": 6.101429394600085e-06, "loss": 0.2098, "step": 9379 }, { "epoch": 0.43506493506493504, "grad_norm": 7.944502830505371, "learning_rate": 6.100711518038828e-06, "loss": 0.2799, "step": 9380 }, { "epoch": 0.4351113172541744, "grad_norm": 4.923548698425293, "learning_rate": 6.099993617631239e-06, "loss": 0.2854, "step": 9381 }, { "epoch": 0.4351576994434137, "grad_norm": 14.953339576721191, "learning_rate": 6.0992756933928674e-06, "loss": 0.4623, "step": 9382 }, { "epoch": 0.43520408163265306, "grad_norm": 8.511857032775879, "learning_rate": 6.098557745339268e-06, "loss": 0.4015, "step": 9383 }, { "epoch": 0.4352504638218924, "grad_norm": 5.8006463050842285, "learning_rate": 6.097839773485995e-06, "loss": 0.3326, "step": 9384 }, { "epoch": 0.4352968460111317, "grad_norm": 6.661837577819824, "learning_rate": 6.097121777848601e-06, "loss": 0.3126, "step": 9385 }, { "epoch": 0.4353432282003711, "grad_norm": 11.079236030578613, "learning_rate": 6.096403758442644e-06, "loss": 0.313, "step": 9386 }, { "epoch": 0.4353896103896104, "grad_norm": 7.986236095428467, "learning_rate": 6.095685715283677e-06, "loss": 0.4446, "step": 9387 }, { "epoch": 0.43543599257884974, "grad_norm": 4.858828067779541, "learning_rate": 6.0949676483872555e-06, "loss": 0.2918, "step": 9388 }, { "epoch": 0.43548237476808904, "grad_norm": 8.011064529418945, "learning_rate": 6.09424955776894e-06, "loss": 0.3334, "step": 9389 }, { "epoch": 0.4355287569573284, "grad_norm": 12.504034996032715, "learning_rate": 6.093531443444282e-06, "loss": 0.4296, "step": 9390 }, { "epoch": 0.4355751391465677, "grad_norm": 5.248575687408447, "learning_rate": 6.092813305428844e-06, "loss": 0.3096, "step": 9391 }, { "epoch": 0.43562152133580706, "grad_norm": 10.250529289245605, "learning_rate": 6.092095143738184e-06, "loss": 0.4277, "step": 9392 }, { "epoch": 0.43566790352504636, "grad_norm": 7.685594081878662, "learning_rate": 6.091376958387856e-06, "loss": 0.2247, "step": 9393 }, { "epoch": 0.4357142857142857, "grad_norm": 5.779286861419678, "learning_rate": 6.090658749393422e-06, "loss": 0.2796, "step": 9394 }, { "epoch": 0.435760667903525, "grad_norm": 8.566177368164062, "learning_rate": 6.0899405167704425e-06, "loss": 0.4073, "step": 9395 }, { "epoch": 0.4358070500927644, "grad_norm": 4.086655616760254, "learning_rate": 6.089222260534475e-06, "loss": 0.3387, "step": 9396 }, { "epoch": 0.4358534322820037, "grad_norm": 9.607328414916992, "learning_rate": 6.088503980701084e-06, "loss": 0.4265, "step": 9397 }, { "epoch": 0.43589981447124304, "grad_norm": 4.411991119384766, "learning_rate": 6.087785677285827e-06, "loss": 0.3359, "step": 9398 }, { "epoch": 0.4359461966604824, "grad_norm": 3.89151930809021, "learning_rate": 6.087067350304266e-06, "loss": 0.2666, "step": 9399 }, { "epoch": 0.4359925788497217, "grad_norm": 4.232383728027344, "learning_rate": 6.086348999771967e-06, "loss": 0.4088, "step": 9400 }, { "epoch": 0.43603896103896106, "grad_norm": 12.133597373962402, "learning_rate": 6.0856306257044886e-06, "loss": 0.3841, "step": 9401 }, { "epoch": 0.43608534322820036, "grad_norm": 6.720547199249268, "learning_rate": 6.084912228117394e-06, "loss": 0.4054, "step": 9402 }, { "epoch": 0.4361317254174397, "grad_norm": 4.811410903930664, "learning_rate": 6.08419380702625e-06, "loss": 0.3224, "step": 9403 }, { "epoch": 0.436178107606679, "grad_norm": 7.878467559814453, "learning_rate": 6.083475362446618e-06, "loss": 0.3793, "step": 9404 }, { "epoch": 0.4362244897959184, "grad_norm": 9.656789779663086, "learning_rate": 6.0827568943940644e-06, "loss": 0.3876, "step": 9405 }, { "epoch": 0.4362708719851577, "grad_norm": 4.278501510620117, "learning_rate": 6.082038402884153e-06, "loss": 0.3303, "step": 9406 }, { "epoch": 0.43631725417439704, "grad_norm": 4.921424865722656, "learning_rate": 6.081319887932451e-06, "loss": 0.2109, "step": 9407 }, { "epoch": 0.43636363636363634, "grad_norm": 8.29360580444336, "learning_rate": 6.080601349554523e-06, "loss": 0.4039, "step": 9408 }, { "epoch": 0.4364100185528757, "grad_norm": 8.318549156188965, "learning_rate": 6.079882787765938e-06, "loss": 0.3464, "step": 9409 }, { "epoch": 0.436456400742115, "grad_norm": 22.318227767944336, "learning_rate": 6.07916420258226e-06, "loss": 0.4674, "step": 9410 }, { "epoch": 0.43650278293135436, "grad_norm": 7.872187614440918, "learning_rate": 6.078445594019063e-06, "loss": 0.3521, "step": 9411 }, { "epoch": 0.4365491651205937, "grad_norm": 7.629302501678467, "learning_rate": 6.077726962091907e-06, "loss": 0.3779, "step": 9412 }, { "epoch": 0.436595547309833, "grad_norm": 5.531274795532227, "learning_rate": 6.077008306816365e-06, "loss": 0.209, "step": 9413 }, { "epoch": 0.4366419294990724, "grad_norm": 22.477323532104492, "learning_rate": 6.076289628208007e-06, "loss": 0.506, "step": 9414 }, { "epoch": 0.4366883116883117, "grad_norm": 6.436123847961426, "learning_rate": 6.075570926282402e-06, "loss": 0.3604, "step": 9415 }, { "epoch": 0.43673469387755104, "grad_norm": 4.779468536376953, "learning_rate": 6.074852201055121e-06, "loss": 0.3321, "step": 9416 }, { "epoch": 0.43678107606679034, "grad_norm": 13.730236053466797, "learning_rate": 6.0741334525417324e-06, "loss": 0.5539, "step": 9417 }, { "epoch": 0.4368274582560297, "grad_norm": 5.151298999786377, "learning_rate": 6.07341468075781e-06, "loss": 0.295, "step": 9418 }, { "epoch": 0.436873840445269, "grad_norm": 5.804683685302734, "learning_rate": 6.072695885718926e-06, "loss": 0.2816, "step": 9419 }, { "epoch": 0.43692022263450836, "grad_norm": 11.860776901245117, "learning_rate": 6.071977067440649e-06, "loss": 0.5095, "step": 9420 }, { "epoch": 0.43696660482374766, "grad_norm": 5.126153945922852, "learning_rate": 6.071258225938556e-06, "loss": 0.3188, "step": 9421 }, { "epoch": 0.437012987012987, "grad_norm": 4.735175132751465, "learning_rate": 6.0705393612282175e-06, "loss": 0.1865, "step": 9422 }, { "epoch": 0.4370593692022263, "grad_norm": 7.034147262573242, "learning_rate": 6.069820473325209e-06, "loss": 0.3459, "step": 9423 }, { "epoch": 0.4371057513914657, "grad_norm": 7.115102291107178, "learning_rate": 6.069101562245103e-06, "loss": 0.2284, "step": 9424 }, { "epoch": 0.437152133580705, "grad_norm": 6.216189384460449, "learning_rate": 6.068382628003477e-06, "loss": 0.2687, "step": 9425 }, { "epoch": 0.43719851576994434, "grad_norm": 12.98752212524414, "learning_rate": 6.067663670615904e-06, "loss": 0.3871, "step": 9426 }, { "epoch": 0.4372448979591837, "grad_norm": 7.873854637145996, "learning_rate": 6.0669446900979615e-06, "loss": 0.3621, "step": 9427 }, { "epoch": 0.437291280148423, "grad_norm": 7.311558246612549, "learning_rate": 6.066225686465225e-06, "loss": 0.4283, "step": 9428 }, { "epoch": 0.43733766233766236, "grad_norm": 5.7044196128845215, "learning_rate": 6.06550665973327e-06, "loss": 0.2759, "step": 9429 }, { "epoch": 0.43738404452690166, "grad_norm": 6.939311981201172, "learning_rate": 6.064787609917677e-06, "loss": 0.3846, "step": 9430 }, { "epoch": 0.437430426716141, "grad_norm": 10.338568687438965, "learning_rate": 6.06406853703402e-06, "loss": 0.2667, "step": 9431 }, { "epoch": 0.4374768089053803, "grad_norm": 6.543144226074219, "learning_rate": 6.063349441097881e-06, "loss": 0.4099, "step": 9432 }, { "epoch": 0.4375231910946197, "grad_norm": 5.591792583465576, "learning_rate": 6.062630322124837e-06, "loss": 0.2294, "step": 9433 }, { "epoch": 0.437569573283859, "grad_norm": 9.483402252197266, "learning_rate": 6.0619111801304675e-06, "loss": 0.3636, "step": 9434 }, { "epoch": 0.43761595547309834, "grad_norm": 6.321747303009033, "learning_rate": 6.061192015130352e-06, "loss": 0.2685, "step": 9435 }, { "epoch": 0.43766233766233764, "grad_norm": 3.6873373985290527, "learning_rate": 6.060472827140072e-06, "loss": 0.2329, "step": 9436 }, { "epoch": 0.437708719851577, "grad_norm": 10.368247985839844, "learning_rate": 6.0597536161752065e-06, "loss": 0.3252, "step": 9437 }, { "epoch": 0.4377551020408163, "grad_norm": 8.834100723266602, "learning_rate": 6.059034382251339e-06, "loss": 0.3875, "step": 9438 }, { "epoch": 0.43780148423005566, "grad_norm": 20.17979621887207, "learning_rate": 6.058315125384049e-06, "loss": 0.3921, "step": 9439 }, { "epoch": 0.437847866419295, "grad_norm": 4.538966178894043, "learning_rate": 6.05759584558892e-06, "loss": 0.2892, "step": 9440 }, { "epoch": 0.4378942486085343, "grad_norm": 7.84512186050415, "learning_rate": 6.056876542881537e-06, "loss": 0.3675, "step": 9441 }, { "epoch": 0.4379406307977737, "grad_norm": 8.694683074951172, "learning_rate": 6.0561572172774785e-06, "loss": 0.2959, "step": 9442 }, { "epoch": 0.437987012987013, "grad_norm": 6.6166229248046875, "learning_rate": 6.055437868792332e-06, "loss": 0.2743, "step": 9443 }, { "epoch": 0.43803339517625234, "grad_norm": 7.457400321960449, "learning_rate": 6.0547184974416806e-06, "loss": 0.2132, "step": 9444 }, { "epoch": 0.43807977736549164, "grad_norm": 5.5765485763549805, "learning_rate": 6.053999103241109e-06, "loss": 0.388, "step": 9445 }, { "epoch": 0.438126159554731, "grad_norm": 10.025006294250488, "learning_rate": 6.053279686206204e-06, "loss": 0.3456, "step": 9446 }, { "epoch": 0.4381725417439703, "grad_norm": 4.903372764587402, "learning_rate": 6.052560246352549e-06, "loss": 0.3329, "step": 9447 }, { "epoch": 0.43821892393320966, "grad_norm": 5.218897342681885, "learning_rate": 6.051840783695731e-06, "loss": 0.3692, "step": 9448 }, { "epoch": 0.43826530612244896, "grad_norm": 4.110775470733643, "learning_rate": 6.051121298251339e-06, "loss": 0.3076, "step": 9449 }, { "epoch": 0.4383116883116883, "grad_norm": 4.15217924118042, "learning_rate": 6.0504017900349575e-06, "loss": 0.2205, "step": 9450 }, { "epoch": 0.4383580705009276, "grad_norm": 7.454255104064941, "learning_rate": 6.049682259062175e-06, "loss": 0.2841, "step": 9451 }, { "epoch": 0.438404452690167, "grad_norm": 5.511358737945557, "learning_rate": 6.04896270534858e-06, "loss": 0.2161, "step": 9452 }, { "epoch": 0.4384508348794063, "grad_norm": 8.726592063903809, "learning_rate": 6.048243128909761e-06, "loss": 0.4096, "step": 9453 }, { "epoch": 0.43849721706864564, "grad_norm": 3.761444091796875, "learning_rate": 6.0475235297613075e-06, "loss": 0.1585, "step": 9454 }, { "epoch": 0.438543599257885, "grad_norm": 9.30941390991211, "learning_rate": 6.0468039079188115e-06, "loss": 0.4045, "step": 9455 }, { "epoch": 0.4385899814471243, "grad_norm": 4.836799144744873, "learning_rate": 6.046084263397858e-06, "loss": 0.2576, "step": 9456 }, { "epoch": 0.43863636363636366, "grad_norm": 8.77051067352295, "learning_rate": 6.045364596214043e-06, "loss": 0.3859, "step": 9457 }, { "epoch": 0.43868274582560296, "grad_norm": 6.993699550628662, "learning_rate": 6.0446449063829535e-06, "loss": 0.2881, "step": 9458 }, { "epoch": 0.4387291280148423, "grad_norm": 9.076635360717773, "learning_rate": 6.043925193920184e-06, "loss": 0.3171, "step": 9459 }, { "epoch": 0.4387755102040816, "grad_norm": 21.334766387939453, "learning_rate": 6.043205458841326e-06, "loss": 0.5057, "step": 9460 }, { "epoch": 0.438821892393321, "grad_norm": 9.345141410827637, "learning_rate": 6.042485701161971e-06, "loss": 0.315, "step": 9461 }, { "epoch": 0.4388682745825603, "grad_norm": 13.002311706542969, "learning_rate": 6.041765920897713e-06, "loss": 0.3885, "step": 9462 }, { "epoch": 0.43891465677179964, "grad_norm": 8.47260856628418, "learning_rate": 6.0410461180641476e-06, "loss": 0.4758, "step": 9463 }, { "epoch": 0.43896103896103894, "grad_norm": 8.567384719848633, "learning_rate": 6.040326292676865e-06, "loss": 0.3396, "step": 9464 }, { "epoch": 0.4390074211502783, "grad_norm": 5.388567924499512, "learning_rate": 6.039606444751464e-06, "loss": 0.2856, "step": 9465 }, { "epoch": 0.4390538033395176, "grad_norm": 5.345600605010986, "learning_rate": 6.038886574303537e-06, "loss": 0.2374, "step": 9466 }, { "epoch": 0.43910018552875696, "grad_norm": 7.292624473571777, "learning_rate": 6.038166681348679e-06, "loss": 0.39, "step": 9467 }, { "epoch": 0.4391465677179963, "grad_norm": 7.538646221160889, "learning_rate": 6.037446765902489e-06, "loss": 0.2764, "step": 9468 }, { "epoch": 0.4391929499072356, "grad_norm": 11.46452808380127, "learning_rate": 6.036726827980561e-06, "loss": 0.4308, "step": 9469 }, { "epoch": 0.439239332096475, "grad_norm": 7.181945323944092, "learning_rate": 6.036006867598494e-06, "loss": 0.2638, "step": 9470 }, { "epoch": 0.4392857142857143, "grad_norm": 5.06255578994751, "learning_rate": 6.035286884771885e-06, "loss": 0.3322, "step": 9471 }, { "epoch": 0.43933209647495364, "grad_norm": 7.0917510986328125, "learning_rate": 6.0345668795163306e-06, "loss": 0.4016, "step": 9472 }, { "epoch": 0.43937847866419294, "grad_norm": 11.858421325683594, "learning_rate": 6.0338468518474314e-06, "loss": 0.4346, "step": 9473 }, { "epoch": 0.4394248608534323, "grad_norm": 13.689545631408691, "learning_rate": 6.033126801780784e-06, "loss": 0.2462, "step": 9474 }, { "epoch": 0.4394712430426716, "grad_norm": 9.529813766479492, "learning_rate": 6.03240672933199e-06, "loss": 0.4082, "step": 9475 }, { "epoch": 0.43951762523191096, "grad_norm": 9.895241737365723, "learning_rate": 6.03168663451665e-06, "loss": 0.3295, "step": 9476 }, { "epoch": 0.43956400742115026, "grad_norm": 26.73335838317871, "learning_rate": 6.0309665173503615e-06, "loss": 0.421, "step": 9477 }, { "epoch": 0.4396103896103896, "grad_norm": 5.399311065673828, "learning_rate": 6.030246377848728e-06, "loss": 0.3561, "step": 9478 }, { "epoch": 0.4396567717996289, "grad_norm": 6.584759712219238, "learning_rate": 6.0295262160273505e-06, "loss": 0.2933, "step": 9479 }, { "epoch": 0.4397031539888683, "grad_norm": 8.54840087890625, "learning_rate": 6.028806031901829e-06, "loss": 0.3109, "step": 9480 }, { "epoch": 0.4397495361781076, "grad_norm": 11.322039604187012, "learning_rate": 6.0280858254877695e-06, "loss": 0.3676, "step": 9481 }, { "epoch": 0.43979591836734694, "grad_norm": 6.863055229187012, "learning_rate": 6.027365596800772e-06, "loss": 0.3517, "step": 9482 }, { "epoch": 0.4398423005565863, "grad_norm": 8.104863166809082, "learning_rate": 6.0266453458564415e-06, "loss": 0.4006, "step": 9483 }, { "epoch": 0.4398886827458256, "grad_norm": 7.549427509307861, "learning_rate": 6.025925072670379e-06, "loss": 0.3623, "step": 9484 }, { "epoch": 0.43993506493506496, "grad_norm": 9.568757057189941, "learning_rate": 6.025204777258191e-06, "loss": 0.4126, "step": 9485 }, { "epoch": 0.43998144712430426, "grad_norm": 5.871143341064453, "learning_rate": 6.024484459635485e-06, "loss": 0.3678, "step": 9486 }, { "epoch": 0.4400278293135436, "grad_norm": 8.299795150756836, "learning_rate": 6.023764119817861e-06, "loss": 0.3654, "step": 9487 }, { "epoch": 0.4400742115027829, "grad_norm": 6.144627571105957, "learning_rate": 6.023043757820929e-06, "loss": 0.3764, "step": 9488 }, { "epoch": 0.4401205936920223, "grad_norm": 6.160510063171387, "learning_rate": 6.0223233736602926e-06, "loss": 0.2369, "step": 9489 }, { "epoch": 0.4401669758812616, "grad_norm": 4.097121238708496, "learning_rate": 6.021602967351559e-06, "loss": 0.2954, "step": 9490 }, { "epoch": 0.44021335807050094, "grad_norm": 7.1429243087768555, "learning_rate": 6.020882538910338e-06, "loss": 0.3243, "step": 9491 }, { "epoch": 0.44025974025974024, "grad_norm": 6.214995384216309, "learning_rate": 6.020162088352233e-06, "loss": 0.4553, "step": 9492 }, { "epoch": 0.4403061224489796, "grad_norm": 9.87911605834961, "learning_rate": 6.019441615692856e-06, "loss": 0.4503, "step": 9493 }, { "epoch": 0.4403525046382189, "grad_norm": 4.70366907119751, "learning_rate": 6.018721120947812e-06, "loss": 0.2698, "step": 9494 }, { "epoch": 0.44039888682745826, "grad_norm": 6.153051376342773, "learning_rate": 6.018000604132715e-06, "loss": 0.3049, "step": 9495 }, { "epoch": 0.4404452690166976, "grad_norm": 8.610726356506348, "learning_rate": 6.0172800652631706e-06, "loss": 0.3166, "step": 9496 }, { "epoch": 0.4404916512059369, "grad_norm": 5.366676330566406, "learning_rate": 6.01655950435479e-06, "loss": 0.3219, "step": 9497 }, { "epoch": 0.4405380333951763, "grad_norm": 5.9059553146362305, "learning_rate": 6.015838921423184e-06, "loss": 0.2729, "step": 9498 }, { "epoch": 0.4405844155844156, "grad_norm": 5.676061630249023, "learning_rate": 6.0151183164839635e-06, "loss": 0.2758, "step": 9499 }, { "epoch": 0.44063079777365494, "grad_norm": 5.573189735412598, "learning_rate": 6.014397689552739e-06, "loss": 0.3893, "step": 9500 }, { "epoch": 0.44067717996289424, "grad_norm": 6.909146785736084, "learning_rate": 6.013677040645126e-06, "loss": 0.3102, "step": 9501 }, { "epoch": 0.4407235621521336, "grad_norm": 6.2880754470825195, "learning_rate": 6.012956369776732e-06, "loss": 0.3539, "step": 9502 }, { "epoch": 0.4407699443413729, "grad_norm": 7.895979404449463, "learning_rate": 6.012235676963174e-06, "loss": 0.3605, "step": 9503 }, { "epoch": 0.44081632653061226, "grad_norm": 3.7576699256896973, "learning_rate": 6.011514962220064e-06, "loss": 0.2965, "step": 9504 }, { "epoch": 0.44086270871985156, "grad_norm": 9.076166152954102, "learning_rate": 6.010794225563016e-06, "loss": 0.3425, "step": 9505 }, { "epoch": 0.4409090909090909, "grad_norm": 3.759556531906128, "learning_rate": 6.0100734670076435e-06, "loss": 0.2785, "step": 9506 }, { "epoch": 0.4409554730983302, "grad_norm": 9.895013809204102, "learning_rate": 6.009352686569563e-06, "loss": 0.5443, "step": 9507 }, { "epoch": 0.4410018552875696, "grad_norm": 6.2629313468933105, "learning_rate": 6.008631884264387e-06, "loss": 0.3677, "step": 9508 }, { "epoch": 0.4410482374768089, "grad_norm": 9.993510246276855, "learning_rate": 6.007911060107736e-06, "loss": 0.4598, "step": 9509 }, { "epoch": 0.44109461966604824, "grad_norm": 6.940410137176514, "learning_rate": 6.007190214115222e-06, "loss": 0.4179, "step": 9510 }, { "epoch": 0.4411410018552876, "grad_norm": 7.695087909698486, "learning_rate": 6.006469346302462e-06, "loss": 0.3417, "step": 9511 }, { "epoch": 0.4411873840445269, "grad_norm": 5.119935512542725, "learning_rate": 6.005748456685077e-06, "loss": 0.4001, "step": 9512 }, { "epoch": 0.44123376623376626, "grad_norm": 11.25318431854248, "learning_rate": 6.005027545278681e-06, "loss": 0.3582, "step": 9513 }, { "epoch": 0.44128014842300556, "grad_norm": 6.250208854675293, "learning_rate": 6.004306612098894e-06, "loss": 0.2792, "step": 9514 }, { "epoch": 0.4413265306122449, "grad_norm": 8.846258163452148, "learning_rate": 6.003585657161333e-06, "loss": 0.3686, "step": 9515 }, { "epoch": 0.4413729128014842, "grad_norm": 8.80469036102295, "learning_rate": 6.002864680481618e-06, "loss": 0.3427, "step": 9516 }, { "epoch": 0.4414192949907236, "grad_norm": 8.206110000610352, "learning_rate": 6.0021436820753685e-06, "loss": 0.3932, "step": 9517 }, { "epoch": 0.4414656771799629, "grad_norm": 7.182553291320801, "learning_rate": 6.001422661958206e-06, "loss": 0.4475, "step": 9518 }, { "epoch": 0.44151205936920224, "grad_norm": 5.797287464141846, "learning_rate": 6.000701620145748e-06, "loss": 0.3308, "step": 9519 }, { "epoch": 0.44155844155844154, "grad_norm": 6.973384857177734, "learning_rate": 5.999980556653619e-06, "loss": 0.391, "step": 9520 }, { "epoch": 0.4416048237476809, "grad_norm": 7.035966873168945, "learning_rate": 5.999259471497438e-06, "loss": 0.4174, "step": 9521 }, { "epoch": 0.4416512059369202, "grad_norm": 9.70584487915039, "learning_rate": 5.998538364692827e-06, "loss": 0.3404, "step": 9522 }, { "epoch": 0.44169758812615956, "grad_norm": 8.747457504272461, "learning_rate": 5.99781723625541e-06, "loss": 0.3493, "step": 9523 }, { "epoch": 0.44174397031539886, "grad_norm": 5.174001693725586, "learning_rate": 5.997096086200808e-06, "loss": 0.3426, "step": 9524 }, { "epoch": 0.4417903525046382, "grad_norm": 5.796025276184082, "learning_rate": 5.996374914544645e-06, "loss": 0.3263, "step": 9525 }, { "epoch": 0.4418367346938776, "grad_norm": 8.50066089630127, "learning_rate": 5.995653721302547e-06, "loss": 0.3198, "step": 9526 }, { "epoch": 0.4418831168831169, "grad_norm": 6.628649711608887, "learning_rate": 5.994932506490134e-06, "loss": 0.4473, "step": 9527 }, { "epoch": 0.44192949907235624, "grad_norm": 7.738222599029541, "learning_rate": 5.994211270123034e-06, "loss": 0.4188, "step": 9528 }, { "epoch": 0.44197588126159554, "grad_norm": 8.340197563171387, "learning_rate": 5.99349001221687e-06, "loss": 0.3605, "step": 9529 }, { "epoch": 0.4420222634508349, "grad_norm": 5.608534336090088, "learning_rate": 5.992768732787271e-06, "loss": 0.3167, "step": 9530 }, { "epoch": 0.4420686456400742, "grad_norm": 8.046099662780762, "learning_rate": 5.99204743184986e-06, "loss": 0.3063, "step": 9531 }, { "epoch": 0.44211502782931356, "grad_norm": 7.1593475341796875, "learning_rate": 5.991326109420265e-06, "loss": 0.3746, "step": 9532 }, { "epoch": 0.44216141001855286, "grad_norm": 4.924170970916748, "learning_rate": 5.990604765514111e-06, "loss": 0.3873, "step": 9533 }, { "epoch": 0.4422077922077922, "grad_norm": 5.588879108428955, "learning_rate": 5.98988340014703e-06, "loss": 0.3182, "step": 9534 }, { "epoch": 0.4422541743970315, "grad_norm": 4.941733360290527, "learning_rate": 5.989162013334645e-06, "loss": 0.3322, "step": 9535 }, { "epoch": 0.4423005565862709, "grad_norm": 9.486037254333496, "learning_rate": 5.9884406050925866e-06, "loss": 0.3906, "step": 9536 }, { "epoch": 0.4423469387755102, "grad_norm": 4.971670150756836, "learning_rate": 5.987719175436486e-06, "loss": 0.2611, "step": 9537 }, { "epoch": 0.44239332096474954, "grad_norm": 5.5846357345581055, "learning_rate": 5.986997724381969e-06, "loss": 0.2615, "step": 9538 }, { "epoch": 0.4424397031539889, "grad_norm": 32.840816497802734, "learning_rate": 5.986276251944667e-06, "loss": 0.3178, "step": 9539 }, { "epoch": 0.4424860853432282, "grad_norm": 11.364922523498535, "learning_rate": 5.9855547581402105e-06, "loss": 0.3615, "step": 9540 }, { "epoch": 0.44253246753246755, "grad_norm": 5.077508926391602, "learning_rate": 5.984833242984229e-06, "loss": 0.3122, "step": 9541 }, { "epoch": 0.44257884972170686, "grad_norm": 21.535337448120117, "learning_rate": 5.984111706492357e-06, "loss": 0.5283, "step": 9542 }, { "epoch": 0.4426252319109462, "grad_norm": 10.48367977142334, "learning_rate": 5.983390148680221e-06, "loss": 0.4556, "step": 9543 }, { "epoch": 0.4426716141001855, "grad_norm": 10.182901382446289, "learning_rate": 5.9826685695634575e-06, "loss": 0.4479, "step": 9544 }, { "epoch": 0.4427179962894249, "grad_norm": 6.060027122497559, "learning_rate": 5.9819469691577e-06, "loss": 0.2732, "step": 9545 }, { "epoch": 0.4427643784786642, "grad_norm": 4.327716827392578, "learning_rate": 5.9812253474785755e-06, "loss": 0.2938, "step": 9546 }, { "epoch": 0.44281076066790354, "grad_norm": 5.578458309173584, "learning_rate": 5.980503704541722e-06, "loss": 0.2718, "step": 9547 }, { "epoch": 0.44285714285714284, "grad_norm": 6.302199363708496, "learning_rate": 5.979782040362776e-06, "loss": 0.2758, "step": 9548 }, { "epoch": 0.4429035250463822, "grad_norm": 4.730474948883057, "learning_rate": 5.979060354957366e-06, "loss": 0.3191, "step": 9549 }, { "epoch": 0.4429499072356215, "grad_norm": 3.6438419818878174, "learning_rate": 5.978338648341131e-06, "loss": 0.275, "step": 9550 }, { "epoch": 0.44299628942486086, "grad_norm": 5.148619174957275, "learning_rate": 5.977616920529705e-06, "loss": 0.192, "step": 9551 }, { "epoch": 0.44304267161410016, "grad_norm": 11.721107482910156, "learning_rate": 5.976895171538724e-06, "loss": 0.4447, "step": 9552 }, { "epoch": 0.4430890538033395, "grad_norm": 9.861557960510254, "learning_rate": 5.9761734013838245e-06, "loss": 0.3178, "step": 9553 }, { "epoch": 0.4431354359925789, "grad_norm": 4.730208873748779, "learning_rate": 5.975451610080643e-06, "loss": 0.296, "step": 9554 }, { "epoch": 0.4431818181818182, "grad_norm": 6.11969518661499, "learning_rate": 5.974729797644816e-06, "loss": 0.305, "step": 9555 }, { "epoch": 0.44322820037105753, "grad_norm": 4.261641025543213, "learning_rate": 5.974007964091983e-06, "loss": 0.252, "step": 9556 }, { "epoch": 0.44327458256029684, "grad_norm": 8.317693710327148, "learning_rate": 5.97328610943778e-06, "loss": 0.3058, "step": 9557 }, { "epoch": 0.4433209647495362, "grad_norm": 12.476046562194824, "learning_rate": 5.972564233697849e-06, "loss": 0.3969, "step": 9558 }, { "epoch": 0.4433673469387755, "grad_norm": 18.482955932617188, "learning_rate": 5.971842336887826e-06, "loss": 0.4978, "step": 9559 }, { "epoch": 0.44341372912801486, "grad_norm": 17.169063568115234, "learning_rate": 5.971120419023349e-06, "loss": 0.3154, "step": 9560 }, { "epoch": 0.44346011131725416, "grad_norm": 5.6290974617004395, "learning_rate": 5.970398480120064e-06, "loss": 0.287, "step": 9561 }, { "epoch": 0.4435064935064935, "grad_norm": 10.066627502441406, "learning_rate": 5.969676520193606e-06, "loss": 0.3959, "step": 9562 }, { "epoch": 0.4435528756957328, "grad_norm": 12.599459648132324, "learning_rate": 5.968954539259617e-06, "loss": 0.4978, "step": 9563 }, { "epoch": 0.4435992578849722, "grad_norm": 6.689598083496094, "learning_rate": 5.96823253733374e-06, "loss": 0.3866, "step": 9564 }, { "epoch": 0.4436456400742115, "grad_norm": 15.662325859069824, "learning_rate": 5.967510514431616e-06, "loss": 0.3629, "step": 9565 }, { "epoch": 0.44369202226345084, "grad_norm": 9.169234275817871, "learning_rate": 5.966788470568886e-06, "loss": 0.4441, "step": 9566 }, { "epoch": 0.4437384044526902, "grad_norm": 9.926090240478516, "learning_rate": 5.966066405761195e-06, "loss": 0.3785, "step": 9567 }, { "epoch": 0.4437847866419295, "grad_norm": 6.025531768798828, "learning_rate": 5.965344320024184e-06, "loss": 0.2184, "step": 9568 }, { "epoch": 0.44383116883116885, "grad_norm": 8.29602336883545, "learning_rate": 5.964622213373496e-06, "loss": 0.3771, "step": 9569 }, { "epoch": 0.44387755102040816, "grad_norm": 7.0995917320251465, "learning_rate": 5.96390008582478e-06, "loss": 0.3909, "step": 9570 }, { "epoch": 0.4439239332096475, "grad_norm": 5.205742835998535, "learning_rate": 5.963177937393674e-06, "loss": 0.2753, "step": 9571 }, { "epoch": 0.4439703153988868, "grad_norm": 7.142164707183838, "learning_rate": 5.962455768095829e-06, "loss": 0.3032, "step": 9572 }, { "epoch": 0.4440166975881262, "grad_norm": 7.731851100921631, "learning_rate": 5.961733577946885e-06, "loss": 0.2393, "step": 9573 }, { "epoch": 0.4440630797773655, "grad_norm": 7.620662689208984, "learning_rate": 5.961011366962489e-06, "loss": 0.3644, "step": 9574 }, { "epoch": 0.44410946196660483, "grad_norm": 14.596610069274902, "learning_rate": 5.960289135158291e-06, "loss": 0.3999, "step": 9575 }, { "epoch": 0.44415584415584414, "grad_norm": 3.7992746829986572, "learning_rate": 5.959566882549936e-06, "loss": 0.235, "step": 9576 }, { "epoch": 0.4442022263450835, "grad_norm": 16.32625389099121, "learning_rate": 5.958844609153068e-06, "loss": 0.3836, "step": 9577 }, { "epoch": 0.4442486085343228, "grad_norm": 6.292471408843994, "learning_rate": 5.95812231498334e-06, "loss": 0.455, "step": 9578 }, { "epoch": 0.44429499072356216, "grad_norm": 6.07246208190918, "learning_rate": 5.957400000056396e-06, "loss": 0.3571, "step": 9579 }, { "epoch": 0.44434137291280146, "grad_norm": 9.71870231628418, "learning_rate": 5.956677664387886e-06, "loss": 0.4214, "step": 9580 }, { "epoch": 0.4443877551020408, "grad_norm": 11.131448745727539, "learning_rate": 5.9559553079934595e-06, "loss": 0.5295, "step": 9581 }, { "epoch": 0.4444341372912802, "grad_norm": 8.784440040588379, "learning_rate": 5.9552329308887646e-06, "loss": 0.3973, "step": 9582 }, { "epoch": 0.4444805194805195, "grad_norm": 3.906049966812134, "learning_rate": 5.954510533089453e-06, "loss": 0.1871, "step": 9583 }, { "epoch": 0.44452690166975883, "grad_norm": 5.4721832275390625, "learning_rate": 5.953788114611173e-06, "loss": 0.2975, "step": 9584 }, { "epoch": 0.44457328385899814, "grad_norm": 6.263274669647217, "learning_rate": 5.953065675469577e-06, "loss": 0.2615, "step": 9585 }, { "epoch": 0.4446196660482375, "grad_norm": 8.829879760742188, "learning_rate": 5.952343215680318e-06, "loss": 0.4797, "step": 9586 }, { "epoch": 0.4446660482374768, "grad_norm": 5.117262363433838, "learning_rate": 5.951620735259042e-06, "loss": 0.2892, "step": 9587 }, { "epoch": 0.44471243042671615, "grad_norm": 4.909749507904053, "learning_rate": 5.950898234221406e-06, "loss": 0.303, "step": 9588 }, { "epoch": 0.44475881261595546, "grad_norm": 5.575340270996094, "learning_rate": 5.950175712583063e-06, "loss": 0.2875, "step": 9589 }, { "epoch": 0.4448051948051948, "grad_norm": 13.992971420288086, "learning_rate": 5.949453170359663e-06, "loss": 0.3825, "step": 9590 }, { "epoch": 0.4448515769944341, "grad_norm": 7.2957940101623535, "learning_rate": 5.948730607566862e-06, "loss": 0.3566, "step": 9591 }, { "epoch": 0.4448979591836735, "grad_norm": 6.009659290313721, "learning_rate": 5.948008024220311e-06, "loss": 0.3067, "step": 9592 }, { "epoch": 0.4449443413729128, "grad_norm": 7.528958797454834, "learning_rate": 5.9472854203356654e-06, "loss": 0.3364, "step": 9593 }, { "epoch": 0.44499072356215214, "grad_norm": 10.708585739135742, "learning_rate": 5.946562795928583e-06, "loss": 0.4207, "step": 9594 }, { "epoch": 0.4450371057513915, "grad_norm": 4.78007698059082, "learning_rate": 5.945840151014716e-06, "loss": 0.2929, "step": 9595 }, { "epoch": 0.4450834879406308, "grad_norm": 5.0110039710998535, "learning_rate": 5.94511748560972e-06, "loss": 0.2363, "step": 9596 }, { "epoch": 0.44512987012987015, "grad_norm": 5.762193202972412, "learning_rate": 5.944394799729255e-06, "loss": 0.3831, "step": 9597 }, { "epoch": 0.44517625231910946, "grad_norm": 6.248625755310059, "learning_rate": 5.943672093388973e-06, "loss": 0.2285, "step": 9598 }, { "epoch": 0.4452226345083488, "grad_norm": 10.429803848266602, "learning_rate": 5.9429493666045325e-06, "loss": 0.3787, "step": 9599 }, { "epoch": 0.4452690166975881, "grad_norm": 5.653371810913086, "learning_rate": 5.942226619391592e-06, "loss": 0.3726, "step": 9600 }, { "epoch": 0.4453153988868275, "grad_norm": 3.981863021850586, "learning_rate": 5.941503851765809e-06, "loss": 0.1477, "step": 9601 }, { "epoch": 0.4453617810760668, "grad_norm": 5.103370189666748, "learning_rate": 5.940781063742841e-06, "loss": 0.2654, "step": 9602 }, { "epoch": 0.44540816326530613, "grad_norm": 7.609748840332031, "learning_rate": 5.940058255338348e-06, "loss": 0.3249, "step": 9603 }, { "epoch": 0.44545454545454544, "grad_norm": 8.216069221496582, "learning_rate": 5.939335426567987e-06, "loss": 0.3595, "step": 9604 }, { "epoch": 0.4455009276437848, "grad_norm": 6.095606327056885, "learning_rate": 5.938612577447423e-06, "loss": 0.3184, "step": 9605 }, { "epoch": 0.4455473098330241, "grad_norm": 11.633426666259766, "learning_rate": 5.937889707992308e-06, "loss": 0.4597, "step": 9606 }, { "epoch": 0.44559369202226345, "grad_norm": 6.424982070922852, "learning_rate": 5.937166818218309e-06, "loss": 0.2643, "step": 9607 }, { "epoch": 0.44564007421150276, "grad_norm": 11.71726131439209, "learning_rate": 5.936443908141088e-06, "loss": 0.3592, "step": 9608 }, { "epoch": 0.4456864564007421, "grad_norm": 5.808376789093018, "learning_rate": 5.9357209777763006e-06, "loss": 0.2326, "step": 9609 }, { "epoch": 0.4457328385899815, "grad_norm": 8.235458374023438, "learning_rate": 5.934998027139611e-06, "loss": 0.4066, "step": 9610 }, { "epoch": 0.4457792207792208, "grad_norm": 8.366219520568848, "learning_rate": 5.934275056246686e-06, "loss": 0.4613, "step": 9611 }, { "epoch": 0.44582560296846013, "grad_norm": 8.739117622375488, "learning_rate": 5.933552065113182e-06, "loss": 0.3971, "step": 9612 }, { "epoch": 0.44587198515769944, "grad_norm": 8.286420822143555, "learning_rate": 5.932829053754765e-06, "loss": 0.3825, "step": 9613 }, { "epoch": 0.4459183673469388, "grad_norm": 6.392483234405518, "learning_rate": 5.932106022187099e-06, "loss": 0.3972, "step": 9614 }, { "epoch": 0.4459647495361781, "grad_norm": 8.186971664428711, "learning_rate": 5.931382970425847e-06, "loss": 0.398, "step": 9615 }, { "epoch": 0.44601113172541745, "grad_norm": 4.3421950340271, "learning_rate": 5.930659898486676e-06, "loss": 0.3132, "step": 9616 }, { "epoch": 0.44605751391465676, "grad_norm": 6.17701530456543, "learning_rate": 5.929936806385248e-06, "loss": 0.251, "step": 9617 }, { "epoch": 0.4461038961038961, "grad_norm": 9.925802230834961, "learning_rate": 5.92921369413723e-06, "loss": 0.3137, "step": 9618 }, { "epoch": 0.4461502782931354, "grad_norm": 5.991443157196045, "learning_rate": 5.928490561758287e-06, "loss": 0.3249, "step": 9619 }, { "epoch": 0.4461966604823748, "grad_norm": 7.228482246398926, "learning_rate": 5.927767409264085e-06, "loss": 0.3839, "step": 9620 }, { "epoch": 0.4462430426716141, "grad_norm": 4.9188385009765625, "learning_rate": 5.927044236670293e-06, "loss": 0.3279, "step": 9621 }, { "epoch": 0.44628942486085343, "grad_norm": 5.725007057189941, "learning_rate": 5.926321043992576e-06, "loss": 0.2639, "step": 9622 }, { "epoch": 0.4463358070500928, "grad_norm": 4.519655704498291, "learning_rate": 5.925597831246601e-06, "loss": 0.2741, "step": 9623 }, { "epoch": 0.4463821892393321, "grad_norm": 10.546955108642578, "learning_rate": 5.924874598448038e-06, "loss": 0.4416, "step": 9624 }, { "epoch": 0.44642857142857145, "grad_norm": 7.7812089920043945, "learning_rate": 5.924151345612555e-06, "loss": 0.3127, "step": 9625 }, { "epoch": 0.44647495361781075, "grad_norm": 7.368332862854004, "learning_rate": 5.92342807275582e-06, "loss": 0.2798, "step": 9626 }, { "epoch": 0.4465213358070501, "grad_norm": 5.545261859893799, "learning_rate": 5.9227047798935034e-06, "loss": 0.2605, "step": 9627 }, { "epoch": 0.4465677179962894, "grad_norm": 4.007230758666992, "learning_rate": 5.921981467041274e-06, "loss": 0.2469, "step": 9628 }, { "epoch": 0.4466141001855288, "grad_norm": 7.689237594604492, "learning_rate": 5.921258134214804e-06, "loss": 0.3046, "step": 9629 }, { "epoch": 0.4466604823747681, "grad_norm": 6.082954406738281, "learning_rate": 5.92053478142976e-06, "loss": 0.3086, "step": 9630 }, { "epoch": 0.44670686456400743, "grad_norm": 7.693235397338867, "learning_rate": 5.919811408701816e-06, "loss": 0.3244, "step": 9631 }, { "epoch": 0.44675324675324674, "grad_norm": 6.84221076965332, "learning_rate": 5.919088016046644e-06, "loss": 0.3517, "step": 9632 }, { "epoch": 0.4467996289424861, "grad_norm": 12.195953369140625, "learning_rate": 5.918364603479915e-06, "loss": 0.3991, "step": 9633 }, { "epoch": 0.4468460111317254, "grad_norm": 11.999690055847168, "learning_rate": 5.9176411710173e-06, "loss": 0.4387, "step": 9634 }, { "epoch": 0.44689239332096475, "grad_norm": 5.582695007324219, "learning_rate": 5.916917718674473e-06, "loss": 0.3184, "step": 9635 }, { "epoch": 0.44693877551020406, "grad_norm": 4.254544734954834, "learning_rate": 5.916194246467109e-06, "loss": 0.3009, "step": 9636 }, { "epoch": 0.4469851576994434, "grad_norm": 6.383319854736328, "learning_rate": 5.915470754410878e-06, "loss": 0.2717, "step": 9637 }, { "epoch": 0.44703153988868277, "grad_norm": 11.389437675476074, "learning_rate": 5.914747242521458e-06, "loss": 0.2754, "step": 9638 }, { "epoch": 0.4470779220779221, "grad_norm": 8.025284767150879, "learning_rate": 5.914023710814519e-06, "loss": 0.4792, "step": 9639 }, { "epoch": 0.44712430426716143, "grad_norm": 10.157137870788574, "learning_rate": 5.913300159305741e-06, "loss": 0.3688, "step": 9640 }, { "epoch": 0.44717068645640073, "grad_norm": 6.4869537353515625, "learning_rate": 5.9125765880107956e-06, "loss": 0.3384, "step": 9641 }, { "epoch": 0.4472170686456401, "grad_norm": 7.648287773132324, "learning_rate": 5.9118529969453585e-06, "loss": 0.3744, "step": 9642 }, { "epoch": 0.4472634508348794, "grad_norm": 7.396520137786865, "learning_rate": 5.91112938612511e-06, "loss": 0.3533, "step": 9643 }, { "epoch": 0.44730983302411875, "grad_norm": 12.498635292053223, "learning_rate": 5.91040575556572e-06, "loss": 0.5267, "step": 9644 }, { "epoch": 0.44735621521335805, "grad_norm": 9.190543174743652, "learning_rate": 5.909682105282873e-06, "loss": 0.4318, "step": 9645 }, { "epoch": 0.4474025974025974, "grad_norm": 16.803747177124023, "learning_rate": 5.908958435292241e-06, "loss": 0.3043, "step": 9646 }, { "epoch": 0.4474489795918367, "grad_norm": 6.218772888183594, "learning_rate": 5.908234745609504e-06, "loss": 0.2956, "step": 9647 }, { "epoch": 0.4474953617810761, "grad_norm": 6.604524612426758, "learning_rate": 5.907511036250341e-06, "loss": 0.4039, "step": 9648 }, { "epoch": 0.4475417439703154, "grad_norm": 4.197327613830566, "learning_rate": 5.906787307230431e-06, "loss": 0.2068, "step": 9649 }, { "epoch": 0.44758812615955473, "grad_norm": 9.701619148254395, "learning_rate": 5.906063558565451e-06, "loss": 0.368, "step": 9650 }, { "epoch": 0.44763450834879404, "grad_norm": 9.894086837768555, "learning_rate": 5.905339790271081e-06, "loss": 0.5012, "step": 9651 }, { "epoch": 0.4476808905380334, "grad_norm": 10.228436470031738, "learning_rate": 5.904616002363004e-06, "loss": 0.468, "step": 9652 }, { "epoch": 0.44772727272727275, "grad_norm": 12.196338653564453, "learning_rate": 5.903892194856897e-06, "loss": 0.5518, "step": 9653 }, { "epoch": 0.44777365491651205, "grad_norm": 4.719500541687012, "learning_rate": 5.903168367768443e-06, "loss": 0.2974, "step": 9654 }, { "epoch": 0.4478200371057514, "grad_norm": 13.470651626586914, "learning_rate": 5.902444521113323e-06, "loss": 0.4694, "step": 9655 }, { "epoch": 0.4478664192949907, "grad_norm": 12.764656066894531, "learning_rate": 5.901720654907217e-06, "loss": 0.384, "step": 9656 }, { "epoch": 0.44791280148423007, "grad_norm": 7.758193016052246, "learning_rate": 5.90099676916581e-06, "loss": 0.229, "step": 9657 }, { "epoch": 0.4479591836734694, "grad_norm": 4.67304801940918, "learning_rate": 5.900272863904782e-06, "loss": 0.3927, "step": 9658 }, { "epoch": 0.44800556586270873, "grad_norm": 6.52131462097168, "learning_rate": 5.899548939139819e-06, "loss": 0.4082, "step": 9659 }, { "epoch": 0.44805194805194803, "grad_norm": 8.292276382446289, "learning_rate": 5.898824994886601e-06, "loss": 0.3199, "step": 9660 }, { "epoch": 0.4480983302411874, "grad_norm": 6.166224002838135, "learning_rate": 5.898101031160815e-06, "loss": 0.3486, "step": 9661 }, { "epoch": 0.4481447124304267, "grad_norm": 6.398088455200195, "learning_rate": 5.897377047978143e-06, "loss": 0.3806, "step": 9662 }, { "epoch": 0.44819109461966605, "grad_norm": 6.416922569274902, "learning_rate": 5.896653045354271e-06, "loss": 0.3657, "step": 9663 }, { "epoch": 0.44823747680890536, "grad_norm": 6.806889533996582, "learning_rate": 5.895929023304882e-06, "loss": 0.3993, "step": 9664 }, { "epoch": 0.4482838589981447, "grad_norm": 7.767065525054932, "learning_rate": 5.895204981845667e-06, "loss": 0.3236, "step": 9665 }, { "epoch": 0.44833024118738407, "grad_norm": 5.762831687927246, "learning_rate": 5.8944809209923045e-06, "loss": 0.3278, "step": 9666 }, { "epoch": 0.4483766233766234, "grad_norm": 5.836478233337402, "learning_rate": 5.893756840760486e-06, "loss": 0.3477, "step": 9667 }, { "epoch": 0.44842300556586273, "grad_norm": 4.225797653198242, "learning_rate": 5.893032741165898e-06, "loss": 0.3158, "step": 9668 }, { "epoch": 0.44846938775510203, "grad_norm": 4.895598888397217, "learning_rate": 5.892308622224225e-06, "loss": 0.2646, "step": 9669 }, { "epoch": 0.4485157699443414, "grad_norm": 4.884820461273193, "learning_rate": 5.891584483951157e-06, "loss": 0.355, "step": 9670 }, { "epoch": 0.4485621521335807, "grad_norm": 15.189290046691895, "learning_rate": 5.890860326362382e-06, "loss": 0.4485, "step": 9671 }, { "epoch": 0.44860853432282005, "grad_norm": 3.7277276515960693, "learning_rate": 5.8901361494735874e-06, "loss": 0.2952, "step": 9672 }, { "epoch": 0.44865491651205935, "grad_norm": 9.386731147766113, "learning_rate": 5.889411953300463e-06, "loss": 0.3736, "step": 9673 }, { "epoch": 0.4487012987012987, "grad_norm": 6.010673999786377, "learning_rate": 5.888687737858697e-06, "loss": 0.3108, "step": 9674 }, { "epoch": 0.448747680890538, "grad_norm": 12.91665267944336, "learning_rate": 5.88796350316398e-06, "loss": 0.3478, "step": 9675 }, { "epoch": 0.44879406307977737, "grad_norm": 6.371146202087402, "learning_rate": 5.887239249232003e-06, "loss": 0.3516, "step": 9676 }, { "epoch": 0.4488404452690167, "grad_norm": 5.6607561111450195, "learning_rate": 5.886514976078454e-06, "loss": 0.274, "step": 9677 }, { "epoch": 0.44888682745825603, "grad_norm": 8.448857307434082, "learning_rate": 5.8857906837190274e-06, "loss": 0.4649, "step": 9678 }, { "epoch": 0.44893320964749533, "grad_norm": 7.994649887084961, "learning_rate": 5.885066372169413e-06, "loss": 0.3079, "step": 9679 }, { "epoch": 0.4489795918367347, "grad_norm": 9.51801586151123, "learning_rate": 5.8843420414453e-06, "loss": 0.3641, "step": 9680 }, { "epoch": 0.44902597402597405, "grad_norm": 9.624343872070312, "learning_rate": 5.883617691562385e-06, "loss": 0.3901, "step": 9681 }, { "epoch": 0.44907235621521335, "grad_norm": 8.565068244934082, "learning_rate": 5.882893322536358e-06, "loss": 0.4056, "step": 9682 }, { "epoch": 0.4491187384044527, "grad_norm": 8.75354290008545, "learning_rate": 5.8821689343829135e-06, "loss": 0.3934, "step": 9683 }, { "epoch": 0.449165120593692, "grad_norm": 4.594484329223633, "learning_rate": 5.881444527117745e-06, "loss": 0.2964, "step": 9684 }, { "epoch": 0.44921150278293137, "grad_norm": 6.638270378112793, "learning_rate": 5.880720100756544e-06, "loss": 0.2794, "step": 9685 }, { "epoch": 0.4492578849721707, "grad_norm": 10.563498497009277, "learning_rate": 5.879995655315007e-06, "loss": 0.4917, "step": 9686 }, { "epoch": 0.44930426716141003, "grad_norm": 6.4565653800964355, "learning_rate": 5.87927119080883e-06, "loss": 0.392, "step": 9687 }, { "epoch": 0.44935064935064933, "grad_norm": 9.196534156799316, "learning_rate": 5.878546707253704e-06, "loss": 0.2627, "step": 9688 }, { "epoch": 0.4493970315398887, "grad_norm": 8.309330940246582, "learning_rate": 5.877822204665328e-06, "loss": 0.2522, "step": 9689 }, { "epoch": 0.449443413729128, "grad_norm": 6.739854335784912, "learning_rate": 5.8770976830593975e-06, "loss": 0.3345, "step": 9690 }, { "epoch": 0.44948979591836735, "grad_norm": 7.248547554016113, "learning_rate": 5.876373142451607e-06, "loss": 0.4287, "step": 9691 }, { "epoch": 0.44953617810760665, "grad_norm": 6.295780658721924, "learning_rate": 5.875648582857655e-06, "loss": 0.2839, "step": 9692 }, { "epoch": 0.449582560296846, "grad_norm": 7.773111820220947, "learning_rate": 5.874924004293239e-06, "loss": 0.3275, "step": 9693 }, { "epoch": 0.44962894248608537, "grad_norm": 5.688048839569092, "learning_rate": 5.8741994067740545e-06, "loss": 0.2753, "step": 9694 }, { "epoch": 0.4496753246753247, "grad_norm": 4.543675899505615, "learning_rate": 5.873474790315804e-06, "loss": 0.321, "step": 9695 }, { "epoch": 0.44972170686456403, "grad_norm": 7.606587886810303, "learning_rate": 5.87275015493418e-06, "loss": 0.3807, "step": 9696 }, { "epoch": 0.44976808905380333, "grad_norm": 13.9006986618042, "learning_rate": 5.872025500644885e-06, "loss": 0.4834, "step": 9697 }, { "epoch": 0.4498144712430427, "grad_norm": 5.381582736968994, "learning_rate": 5.871300827463618e-06, "loss": 0.2415, "step": 9698 }, { "epoch": 0.449860853432282, "grad_norm": 5.332846164703369, "learning_rate": 5.8705761354060774e-06, "loss": 0.3006, "step": 9699 }, { "epoch": 0.44990723562152135, "grad_norm": 10.618108749389648, "learning_rate": 5.869851424487964e-06, "loss": 0.2707, "step": 9700 }, { "epoch": 0.44995361781076065, "grad_norm": 6.016632080078125, "learning_rate": 5.869126694724979e-06, "loss": 0.4083, "step": 9701 }, { "epoch": 0.45, "grad_norm": 4.904726982116699, "learning_rate": 5.868401946132822e-06, "loss": 0.2667, "step": 9702 }, { "epoch": 0.4500463821892393, "grad_norm": 10.779658317565918, "learning_rate": 5.867677178727196e-06, "loss": 0.3044, "step": 9703 }, { "epoch": 0.45009276437847867, "grad_norm": 9.98938274383545, "learning_rate": 5.8669523925238e-06, "loss": 0.4158, "step": 9704 }, { "epoch": 0.450139146567718, "grad_norm": 8.798029899597168, "learning_rate": 5.866227587538338e-06, "loss": 0.4254, "step": 9705 }, { "epoch": 0.45018552875695733, "grad_norm": 4.838164806365967, "learning_rate": 5.865502763786513e-06, "loss": 0.2558, "step": 9706 }, { "epoch": 0.45023191094619663, "grad_norm": 6.477478981018066, "learning_rate": 5.864777921284027e-06, "loss": 0.3233, "step": 9707 }, { "epoch": 0.450278293135436, "grad_norm": 5.171636581420898, "learning_rate": 5.8640530600465825e-06, "loss": 0.2885, "step": 9708 }, { "epoch": 0.45032467532467535, "grad_norm": 5.953904628753662, "learning_rate": 5.863328180089886e-06, "loss": 0.3641, "step": 9709 }, { "epoch": 0.45037105751391465, "grad_norm": 5.454794883728027, "learning_rate": 5.862603281429637e-06, "loss": 0.2673, "step": 9710 }, { "epoch": 0.450417439703154, "grad_norm": 4.933482646942139, "learning_rate": 5.861878364081546e-06, "loss": 0.3277, "step": 9711 }, { "epoch": 0.4504638218923933, "grad_norm": 5.812062740325928, "learning_rate": 5.861153428061313e-06, "loss": 0.3018, "step": 9712 }, { "epoch": 0.45051020408163267, "grad_norm": 10.136836051940918, "learning_rate": 5.860428473384645e-06, "loss": 0.4684, "step": 9713 }, { "epoch": 0.450556586270872, "grad_norm": 9.573945045471191, "learning_rate": 5.85970350006725e-06, "loss": 0.4702, "step": 9714 }, { "epoch": 0.45060296846011133, "grad_norm": 9.07927417755127, "learning_rate": 5.8589785081248295e-06, "loss": 0.3345, "step": 9715 }, { "epoch": 0.45064935064935063, "grad_norm": 8.209439277648926, "learning_rate": 5.8582534975730945e-06, "loss": 0.3569, "step": 9716 }, { "epoch": 0.45069573283859, "grad_norm": 7.00700044631958, "learning_rate": 5.85752846842775e-06, "loss": 0.3482, "step": 9717 }, { "epoch": 0.4507421150278293, "grad_norm": 8.618840217590332, "learning_rate": 5.856803420704502e-06, "loss": 0.4163, "step": 9718 }, { "epoch": 0.45078849721706865, "grad_norm": 9.108426094055176, "learning_rate": 5.85607835441906e-06, "loss": 0.3826, "step": 9719 }, { "epoch": 0.45083487940630795, "grad_norm": 14.03635311126709, "learning_rate": 5.855353269587134e-06, "loss": 0.4112, "step": 9720 }, { "epoch": 0.4508812615955473, "grad_norm": 7.857822895050049, "learning_rate": 5.8546281662244296e-06, "loss": 0.3127, "step": 9721 }, { "epoch": 0.45092764378478667, "grad_norm": 4.530730724334717, "learning_rate": 5.853903044346656e-06, "loss": 0.3753, "step": 9722 }, { "epoch": 0.45097402597402597, "grad_norm": 9.238667488098145, "learning_rate": 5.853177903969525e-06, "loss": 0.3367, "step": 9723 }, { "epoch": 0.45102040816326533, "grad_norm": 5.334561824798584, "learning_rate": 5.852452745108742e-06, "loss": 0.302, "step": 9724 }, { "epoch": 0.45106679035250463, "grad_norm": 9.356728553771973, "learning_rate": 5.8517275677800224e-06, "loss": 0.3662, "step": 9725 }, { "epoch": 0.451113172541744, "grad_norm": 7.651690483093262, "learning_rate": 5.851002371999073e-06, "loss": 0.2556, "step": 9726 }, { "epoch": 0.4511595547309833, "grad_norm": 7.588225841522217, "learning_rate": 5.850277157781606e-06, "loss": 0.3858, "step": 9727 }, { "epoch": 0.45120593692022265, "grad_norm": 6.390745639801025, "learning_rate": 5.849551925143334e-06, "loss": 0.2997, "step": 9728 }, { "epoch": 0.45125231910946195, "grad_norm": 5.848066329956055, "learning_rate": 5.848826674099967e-06, "loss": 0.331, "step": 9729 }, { "epoch": 0.4512987012987013, "grad_norm": 9.669323921203613, "learning_rate": 5.848101404667217e-06, "loss": 0.3433, "step": 9730 }, { "epoch": 0.4513450834879406, "grad_norm": 8.129354476928711, "learning_rate": 5.847376116860799e-06, "loss": 0.3621, "step": 9731 }, { "epoch": 0.45139146567717997, "grad_norm": 4.111360549926758, "learning_rate": 5.846650810696422e-06, "loss": 0.2004, "step": 9732 }, { "epoch": 0.4514378478664193, "grad_norm": 7.542908191680908, "learning_rate": 5.845925486189804e-06, "loss": 0.239, "step": 9733 }, { "epoch": 0.45148423005565863, "grad_norm": 5.7158427238464355, "learning_rate": 5.8452001433566565e-06, "loss": 0.4321, "step": 9734 }, { "epoch": 0.45153061224489793, "grad_norm": 11.844130516052246, "learning_rate": 5.844474782212692e-06, "loss": 0.3791, "step": 9735 }, { "epoch": 0.4515769944341373, "grad_norm": 5.478546142578125, "learning_rate": 5.843749402773629e-06, "loss": 0.3129, "step": 9736 }, { "epoch": 0.45162337662337665, "grad_norm": 5.2894768714904785, "learning_rate": 5.843024005055178e-06, "loss": 0.338, "step": 9737 }, { "epoch": 0.45166975881261595, "grad_norm": 3.1415905952453613, "learning_rate": 5.842298589073058e-06, "loss": 0.2573, "step": 9738 }, { "epoch": 0.4517161410018553, "grad_norm": 3.9760053157806396, "learning_rate": 5.841573154842983e-06, "loss": 0.2543, "step": 9739 }, { "epoch": 0.4517625231910946, "grad_norm": 8.010960578918457, "learning_rate": 5.840847702380669e-06, "loss": 0.4333, "step": 9740 }, { "epoch": 0.45180890538033397, "grad_norm": 8.206371307373047, "learning_rate": 5.840122231701832e-06, "loss": 0.3472, "step": 9741 }, { "epoch": 0.45185528756957327, "grad_norm": 5.732455253601074, "learning_rate": 5.8393967428221935e-06, "loss": 0.2088, "step": 9742 }, { "epoch": 0.45190166975881263, "grad_norm": 6.310111999511719, "learning_rate": 5.838671235757464e-06, "loss": 0.3994, "step": 9743 }, { "epoch": 0.45194805194805193, "grad_norm": 9.046611785888672, "learning_rate": 5.837945710523366e-06, "loss": 0.3729, "step": 9744 }, { "epoch": 0.4519944341372913, "grad_norm": 11.860074043273926, "learning_rate": 5.837220167135616e-06, "loss": 0.3787, "step": 9745 }, { "epoch": 0.4520408163265306, "grad_norm": 38.203250885009766, "learning_rate": 5.836494605609933e-06, "loss": 0.3971, "step": 9746 }, { "epoch": 0.45208719851576995, "grad_norm": 5.372424602508545, "learning_rate": 5.8357690259620345e-06, "loss": 0.2875, "step": 9747 }, { "epoch": 0.45213358070500925, "grad_norm": 5.360355377197266, "learning_rate": 5.835043428207642e-06, "loss": 0.3403, "step": 9748 }, { "epoch": 0.4521799628942486, "grad_norm": 7.3621721267700195, "learning_rate": 5.8343178123624725e-06, "loss": 0.3376, "step": 9749 }, { "epoch": 0.45222634508348797, "grad_norm": 6.038364887237549, "learning_rate": 5.833592178442249e-06, "loss": 0.3622, "step": 9750 }, { "epoch": 0.45227272727272727, "grad_norm": 4.844236373901367, "learning_rate": 5.8328665264626905e-06, "loss": 0.2832, "step": 9751 }, { "epoch": 0.45231910946196663, "grad_norm": 6.175837993621826, "learning_rate": 5.8321408564395165e-06, "loss": 0.3293, "step": 9752 }, { "epoch": 0.45236549165120593, "grad_norm": 9.310005187988281, "learning_rate": 5.8314151683884524e-06, "loss": 0.4193, "step": 9753 }, { "epoch": 0.4524118738404453, "grad_norm": 13.485588073730469, "learning_rate": 5.830689462325215e-06, "loss": 0.4691, "step": 9754 }, { "epoch": 0.4524582560296846, "grad_norm": 6.318049430847168, "learning_rate": 5.829963738265531e-06, "loss": 0.393, "step": 9755 }, { "epoch": 0.45250463821892395, "grad_norm": 7.0701823234558105, "learning_rate": 5.829237996225118e-06, "loss": 0.3307, "step": 9756 }, { "epoch": 0.45255102040816325, "grad_norm": 4.798781871795654, "learning_rate": 5.828512236219701e-06, "loss": 0.3053, "step": 9757 }, { "epoch": 0.4525974025974026, "grad_norm": 7.4827141761779785, "learning_rate": 5.827786458265005e-06, "loss": 0.3727, "step": 9758 }, { "epoch": 0.4526437847866419, "grad_norm": 7.1755571365356445, "learning_rate": 5.827060662376751e-06, "loss": 0.3187, "step": 9759 }, { "epoch": 0.45269016697588127, "grad_norm": 10.481793403625488, "learning_rate": 5.826334848570664e-06, "loss": 0.2818, "step": 9760 }, { "epoch": 0.45273654916512057, "grad_norm": 6.0297417640686035, "learning_rate": 5.825609016862469e-06, "loss": 0.4211, "step": 9761 }, { "epoch": 0.45278293135435993, "grad_norm": 4.728396415710449, "learning_rate": 5.82488316726789e-06, "loss": 0.2764, "step": 9762 }, { "epoch": 0.45282931354359923, "grad_norm": 6.016673564910889, "learning_rate": 5.824157299802653e-06, "loss": 0.3528, "step": 9763 }, { "epoch": 0.4528756957328386, "grad_norm": 4.58018684387207, "learning_rate": 5.823431414482481e-06, "loss": 0.3336, "step": 9764 }, { "epoch": 0.45292207792207795, "grad_norm": 6.664865970611572, "learning_rate": 5.822705511323103e-06, "loss": 0.3138, "step": 9765 }, { "epoch": 0.45296846011131725, "grad_norm": 8.772126197814941, "learning_rate": 5.821979590340243e-06, "loss": 0.226, "step": 9766 }, { "epoch": 0.4530148423005566, "grad_norm": 9.52496337890625, "learning_rate": 5.82125365154963e-06, "loss": 0.444, "step": 9767 }, { "epoch": 0.4530612244897959, "grad_norm": 5.473198413848877, "learning_rate": 5.820527694966988e-06, "loss": 0.3026, "step": 9768 }, { "epoch": 0.45310760667903527, "grad_norm": 7.754055023193359, "learning_rate": 5.819801720608048e-06, "loss": 0.3125, "step": 9769 }, { "epoch": 0.45315398886827457, "grad_norm": 5.732454776763916, "learning_rate": 5.819075728488535e-06, "loss": 0.3513, "step": 9770 }, { "epoch": 0.45320037105751393, "grad_norm": 14.32058334350586, "learning_rate": 5.81834971862418e-06, "loss": 0.5706, "step": 9771 }, { "epoch": 0.45324675324675323, "grad_norm": 6.221452713012695, "learning_rate": 5.817623691030708e-06, "loss": 0.3309, "step": 9772 }, { "epoch": 0.4532931354359926, "grad_norm": 14.522637367248535, "learning_rate": 5.8168976457238515e-06, "loss": 0.4894, "step": 9773 }, { "epoch": 0.4533395176252319, "grad_norm": 6.325541019439697, "learning_rate": 5.816171582719338e-06, "loss": 0.31, "step": 9774 }, { "epoch": 0.45338589981447125, "grad_norm": 9.563885688781738, "learning_rate": 5.815445502032897e-06, "loss": 0.3834, "step": 9775 }, { "epoch": 0.45343228200371055, "grad_norm": 7.403444290161133, "learning_rate": 5.81471940368026e-06, "loss": 0.4007, "step": 9776 }, { "epoch": 0.4534786641929499, "grad_norm": 10.533193588256836, "learning_rate": 5.813993287677155e-06, "loss": 0.5109, "step": 9777 }, { "epoch": 0.4535250463821892, "grad_norm": 13.34920883178711, "learning_rate": 5.813267154039317e-06, "loss": 0.3599, "step": 9778 }, { "epoch": 0.45357142857142857, "grad_norm": 12.190275192260742, "learning_rate": 5.812541002782475e-06, "loss": 0.4331, "step": 9779 }, { "epoch": 0.4536178107606679, "grad_norm": 4.797826290130615, "learning_rate": 5.811814833922359e-06, "loss": 0.3016, "step": 9780 }, { "epoch": 0.45366419294990723, "grad_norm": 8.122821807861328, "learning_rate": 5.811088647474705e-06, "loss": 0.4652, "step": 9781 }, { "epoch": 0.4537105751391466, "grad_norm": 6.287908554077148, "learning_rate": 5.810362443455242e-06, "loss": 0.2452, "step": 9782 }, { "epoch": 0.4537569573283859, "grad_norm": 17.777490615844727, "learning_rate": 5.809636221879703e-06, "loss": 0.4998, "step": 9783 }, { "epoch": 0.45380333951762525, "grad_norm": 12.05507755279541, "learning_rate": 5.808909982763825e-06, "loss": 0.2801, "step": 9784 }, { "epoch": 0.45384972170686455, "grad_norm": 14.560879707336426, "learning_rate": 5.808183726123337e-06, "loss": 0.5983, "step": 9785 }, { "epoch": 0.4538961038961039, "grad_norm": 7.058011531829834, "learning_rate": 5.807457451973976e-06, "loss": 0.3178, "step": 9786 }, { "epoch": 0.4539424860853432, "grad_norm": 6.674496173858643, "learning_rate": 5.806731160331474e-06, "loss": 0.3499, "step": 9787 }, { "epoch": 0.45398886827458257, "grad_norm": 9.730636596679688, "learning_rate": 5.806004851211567e-06, "loss": 0.3964, "step": 9788 }, { "epoch": 0.45403525046382187, "grad_norm": 6.565895080566406, "learning_rate": 5.805278524629991e-06, "loss": 0.2922, "step": 9789 }, { "epoch": 0.45408163265306123, "grad_norm": 11.900715827941895, "learning_rate": 5.8045521806024785e-06, "loss": 0.4092, "step": 9790 }, { "epoch": 0.45412801484230053, "grad_norm": 12.776496887207031, "learning_rate": 5.803825819144771e-06, "loss": 0.3254, "step": 9791 }, { "epoch": 0.4541743970315399, "grad_norm": 6.815052509307861, "learning_rate": 5.803099440272597e-06, "loss": 0.3685, "step": 9792 }, { "epoch": 0.45422077922077925, "grad_norm": 7.322391510009766, "learning_rate": 5.802373044001699e-06, "loss": 0.3296, "step": 9793 }, { "epoch": 0.45426716141001855, "grad_norm": 7.969905853271484, "learning_rate": 5.8016466303478125e-06, "loss": 0.3517, "step": 9794 }, { "epoch": 0.4543135435992579, "grad_norm": 5.954243183135986, "learning_rate": 5.800920199326674e-06, "loss": 0.2998, "step": 9795 }, { "epoch": 0.4543599257884972, "grad_norm": 9.915080070495605, "learning_rate": 5.800193750954023e-06, "loss": 0.3856, "step": 9796 }, { "epoch": 0.45440630797773657, "grad_norm": 5.299732208251953, "learning_rate": 5.7994672852455954e-06, "loss": 0.2398, "step": 9797 }, { "epoch": 0.45445269016697587, "grad_norm": 6.3591203689575195, "learning_rate": 5.798740802217131e-06, "loss": 0.3109, "step": 9798 }, { "epoch": 0.45449907235621523, "grad_norm": 8.418862342834473, "learning_rate": 5.798014301884368e-06, "loss": 0.3443, "step": 9799 }, { "epoch": 0.45454545454545453, "grad_norm": 9.926340103149414, "learning_rate": 5.797287784263047e-06, "loss": 0.3896, "step": 9800 }, { "epoch": 0.4545918367346939, "grad_norm": 5.741265773773193, "learning_rate": 5.796561249368905e-06, "loss": 0.3008, "step": 9801 }, { "epoch": 0.4546382189239332, "grad_norm": 11.864523887634277, "learning_rate": 5.795834697217686e-06, "loss": 0.5129, "step": 9802 }, { "epoch": 0.45468460111317255, "grad_norm": 9.223201751708984, "learning_rate": 5.795108127825126e-06, "loss": 0.4291, "step": 9803 }, { "epoch": 0.45473098330241185, "grad_norm": 9.114680290222168, "learning_rate": 5.794381541206968e-06, "loss": 0.4863, "step": 9804 }, { "epoch": 0.4547773654916512, "grad_norm": 8.488432884216309, "learning_rate": 5.7936549373789545e-06, "loss": 0.3143, "step": 9805 }, { "epoch": 0.4548237476808905, "grad_norm": 8.461007118225098, "learning_rate": 5.792928316356824e-06, "loss": 0.4704, "step": 9806 }, { "epoch": 0.45487012987012987, "grad_norm": 9.185466766357422, "learning_rate": 5.7922016781563205e-06, "loss": 0.3652, "step": 9807 }, { "epoch": 0.4549165120593692, "grad_norm": 7.087782859802246, "learning_rate": 5.791475022793185e-06, "loss": 0.297, "step": 9808 }, { "epoch": 0.45496289424860853, "grad_norm": 5.018857479095459, "learning_rate": 5.790748350283161e-06, "loss": 0.3279, "step": 9809 }, { "epoch": 0.4550092764378479, "grad_norm": 9.624217987060547, "learning_rate": 5.7900216606419915e-06, "loss": 0.3811, "step": 9810 }, { "epoch": 0.4550556586270872, "grad_norm": 8.26440715789795, "learning_rate": 5.789294953885418e-06, "loss": 0.3405, "step": 9811 }, { "epoch": 0.45510204081632655, "grad_norm": 5.857519149780273, "learning_rate": 5.7885682300291855e-06, "loss": 0.3499, "step": 9812 }, { "epoch": 0.45514842300556585, "grad_norm": 7.440625190734863, "learning_rate": 5.78784148908904e-06, "loss": 0.3274, "step": 9813 }, { "epoch": 0.4551948051948052, "grad_norm": 4.779292583465576, "learning_rate": 5.787114731080723e-06, "loss": 0.2915, "step": 9814 }, { "epoch": 0.4552411873840445, "grad_norm": 5.578482627868652, "learning_rate": 5.78638795601998e-06, "loss": 0.245, "step": 9815 }, { "epoch": 0.45528756957328387, "grad_norm": 6.83154296875, "learning_rate": 5.785661163922558e-06, "loss": 0.3445, "step": 9816 }, { "epoch": 0.45533395176252317, "grad_norm": 8.526013374328613, "learning_rate": 5.7849343548042e-06, "loss": 0.3506, "step": 9817 }, { "epoch": 0.45538033395176253, "grad_norm": 14.184029579162598, "learning_rate": 5.784207528680655e-06, "loss": 0.5066, "step": 9818 }, { "epoch": 0.45542671614100183, "grad_norm": 8.937438011169434, "learning_rate": 5.783480685567666e-06, "loss": 0.2779, "step": 9819 }, { "epoch": 0.4554730983302412, "grad_norm": 5.0470194816589355, "learning_rate": 5.782753825480981e-06, "loss": 0.3491, "step": 9820 }, { "epoch": 0.45551948051948055, "grad_norm": 5.4203009605407715, "learning_rate": 5.782026948436349e-06, "loss": 0.3996, "step": 9821 }, { "epoch": 0.45556586270871985, "grad_norm": 7.218466281890869, "learning_rate": 5.781300054449515e-06, "loss": 0.2969, "step": 9822 }, { "epoch": 0.4556122448979592, "grad_norm": 6.824928283691406, "learning_rate": 5.780573143536226e-06, "loss": 0.3871, "step": 9823 }, { "epoch": 0.4556586270871985, "grad_norm": 8.084512710571289, "learning_rate": 5.7798462157122325e-06, "loss": 0.3877, "step": 9824 }, { "epoch": 0.45570500927643787, "grad_norm": 7.758318901062012, "learning_rate": 5.779119270993282e-06, "loss": 0.4755, "step": 9825 }, { "epoch": 0.45575139146567717, "grad_norm": 4.7960429191589355, "learning_rate": 5.778392309395124e-06, "loss": 0.2814, "step": 9826 }, { "epoch": 0.4557977736549165, "grad_norm": 5.956376552581787, "learning_rate": 5.7776653309335065e-06, "loss": 0.3336, "step": 9827 }, { "epoch": 0.45584415584415583, "grad_norm": 11.912834167480469, "learning_rate": 5.776938335624179e-06, "loss": 0.3853, "step": 9828 }, { "epoch": 0.4558905380333952, "grad_norm": 7.614163398742676, "learning_rate": 5.776211323482894e-06, "loss": 0.3152, "step": 9829 }, { "epoch": 0.4559369202226345, "grad_norm": 6.8154826164245605, "learning_rate": 5.775484294525399e-06, "loss": 0.3981, "step": 9830 }, { "epoch": 0.45598330241187385, "grad_norm": 18.68763542175293, "learning_rate": 5.774757248767447e-06, "loss": 0.3581, "step": 9831 }, { "epoch": 0.45602968460111315, "grad_norm": 6.527846813201904, "learning_rate": 5.774030186224786e-06, "loss": 0.2818, "step": 9832 }, { "epoch": 0.4560760667903525, "grad_norm": 6.559634685516357, "learning_rate": 5.7733031069131704e-06, "loss": 0.3416, "step": 9833 }, { "epoch": 0.4561224489795918, "grad_norm": 4.654662609100342, "learning_rate": 5.77257601084835e-06, "loss": 0.3593, "step": 9834 }, { "epoch": 0.45616883116883117, "grad_norm": 4.5825605392456055, "learning_rate": 5.77184889804608e-06, "loss": 0.2941, "step": 9835 }, { "epoch": 0.4562152133580705, "grad_norm": 5.849054336547852, "learning_rate": 5.771121768522108e-06, "loss": 0.2673, "step": 9836 }, { "epoch": 0.45626159554730983, "grad_norm": 7.046089172363281, "learning_rate": 5.770394622292192e-06, "loss": 0.3704, "step": 9837 }, { "epoch": 0.4563079777365492, "grad_norm": 6.684276103973389, "learning_rate": 5.769667459372081e-06, "loss": 0.3454, "step": 9838 }, { "epoch": 0.4563543599257885, "grad_norm": 5.30251407623291, "learning_rate": 5.7689402797775306e-06, "loss": 0.3196, "step": 9839 }, { "epoch": 0.45640074211502785, "grad_norm": 5.067093849182129, "learning_rate": 5.768213083524296e-06, "loss": 0.3325, "step": 9840 }, { "epoch": 0.45644712430426715, "grad_norm": 3.547720432281494, "learning_rate": 5.767485870628129e-06, "loss": 0.246, "step": 9841 }, { "epoch": 0.4564935064935065, "grad_norm": 5.451706886291504, "learning_rate": 5.7667586411047845e-06, "loss": 0.3671, "step": 9842 }, { "epoch": 0.4565398886827458, "grad_norm": 7.489774227142334, "learning_rate": 5.766031394970021e-06, "loss": 0.2244, "step": 9843 }, { "epoch": 0.45658627087198517, "grad_norm": 6.150245666503906, "learning_rate": 5.76530413223959e-06, "loss": 0.3541, "step": 9844 }, { "epoch": 0.45663265306122447, "grad_norm": 6.665042877197266, "learning_rate": 5.764576852929248e-06, "loss": 0.341, "step": 9845 }, { "epoch": 0.4566790352504638, "grad_norm": 7.002166271209717, "learning_rate": 5.763849557054753e-06, "loss": 0.4345, "step": 9846 }, { "epoch": 0.45672541743970313, "grad_norm": 6.830463409423828, "learning_rate": 5.763122244631859e-06, "loss": 0.3685, "step": 9847 }, { "epoch": 0.4567717996289425, "grad_norm": 6.0979437828063965, "learning_rate": 5.762394915676325e-06, "loss": 0.273, "step": 9848 }, { "epoch": 0.45681818181818185, "grad_norm": 9.058904647827148, "learning_rate": 5.761667570203907e-06, "loss": 0.4337, "step": 9849 }, { "epoch": 0.45686456400742115, "grad_norm": 10.032779693603516, "learning_rate": 5.760940208230362e-06, "loss": 0.4788, "step": 9850 }, { "epoch": 0.4569109461966605, "grad_norm": 5.671078205108643, "learning_rate": 5.76021282977145e-06, "loss": 0.3624, "step": 9851 }, { "epoch": 0.4569573283858998, "grad_norm": 4.7432451248168945, "learning_rate": 5.759485434842927e-06, "loss": 0.3131, "step": 9852 }, { "epoch": 0.45700371057513917, "grad_norm": 9.662501335144043, "learning_rate": 5.7587580234605535e-06, "loss": 0.4695, "step": 9853 }, { "epoch": 0.45705009276437847, "grad_norm": 7.1359543800354, "learning_rate": 5.758030595640088e-06, "loss": 0.2132, "step": 9854 }, { "epoch": 0.4570964749536178, "grad_norm": 13.351451873779297, "learning_rate": 5.757303151397289e-06, "loss": 0.5876, "step": 9855 }, { "epoch": 0.45714285714285713, "grad_norm": 5.479215621948242, "learning_rate": 5.7565756907479155e-06, "loss": 0.3321, "step": 9856 }, { "epoch": 0.4571892393320965, "grad_norm": 7.530966281890869, "learning_rate": 5.75584821370773e-06, "loss": 0.3487, "step": 9857 }, { "epoch": 0.4572356215213358, "grad_norm": 5.565528869628906, "learning_rate": 5.755120720292491e-06, "loss": 0.3419, "step": 9858 }, { "epoch": 0.45728200371057515, "grad_norm": 3.9661149978637695, "learning_rate": 5.7543932105179616e-06, "loss": 0.2098, "step": 9859 }, { "epoch": 0.45732838589981445, "grad_norm": 14.942268371582031, "learning_rate": 5.753665684399899e-06, "loss": 0.4393, "step": 9860 }, { "epoch": 0.4573747680890538, "grad_norm": 6.828384876251221, "learning_rate": 5.752938141954066e-06, "loss": 0.3479, "step": 9861 }, { "epoch": 0.4574211502782931, "grad_norm": 6.658957004547119, "learning_rate": 5.7522105831962284e-06, "loss": 0.3888, "step": 9862 }, { "epoch": 0.45746753246753247, "grad_norm": 5.492591381072998, "learning_rate": 5.751483008142143e-06, "loss": 0.3655, "step": 9863 }, { "epoch": 0.4575139146567718, "grad_norm": 5.374382019042969, "learning_rate": 5.750755416807575e-06, "loss": 0.2919, "step": 9864 }, { "epoch": 0.4575602968460111, "grad_norm": 12.63216781616211, "learning_rate": 5.750027809208288e-06, "loss": 0.3886, "step": 9865 }, { "epoch": 0.4576066790352505, "grad_norm": 5.385286808013916, "learning_rate": 5.749300185360043e-06, "loss": 0.3416, "step": 9866 }, { "epoch": 0.4576530612244898, "grad_norm": 4.798874378204346, "learning_rate": 5.748572545278605e-06, "loss": 0.2924, "step": 9867 }, { "epoch": 0.45769944341372915, "grad_norm": 7.31914758682251, "learning_rate": 5.747844888979739e-06, "loss": 0.3564, "step": 9868 }, { "epoch": 0.45774582560296845, "grad_norm": 8.017162322998047, "learning_rate": 5.747117216479206e-06, "loss": 0.3017, "step": 9869 }, { "epoch": 0.4577922077922078, "grad_norm": 4.89124870300293, "learning_rate": 5.746389527792772e-06, "loss": 0.339, "step": 9870 }, { "epoch": 0.4578385899814471, "grad_norm": 9.088959693908691, "learning_rate": 5.745661822936205e-06, "loss": 0.4107, "step": 9871 }, { "epoch": 0.45788497217068647, "grad_norm": 8.447051048278809, "learning_rate": 5.7449341019252655e-06, "loss": 0.4231, "step": 9872 }, { "epoch": 0.45793135435992577, "grad_norm": 11.125582695007324, "learning_rate": 5.744206364775724e-06, "loss": 0.4431, "step": 9873 }, { "epoch": 0.4579777365491651, "grad_norm": 5.668640613555908, "learning_rate": 5.743478611503342e-06, "loss": 0.3186, "step": 9874 }, { "epoch": 0.45802411873840443, "grad_norm": 5.1656270027160645, "learning_rate": 5.742750842123888e-06, "loss": 0.3128, "step": 9875 }, { "epoch": 0.4580705009276438, "grad_norm": 6.691683292388916, "learning_rate": 5.742023056653131e-06, "loss": 0.2681, "step": 9876 }, { "epoch": 0.45811688311688314, "grad_norm": 5.334491729736328, "learning_rate": 5.741295255106834e-06, "loss": 0.2964, "step": 9877 }, { "epoch": 0.45816326530612245, "grad_norm": 8.021371841430664, "learning_rate": 5.740567437500768e-06, "loss": 0.3204, "step": 9878 }, { "epoch": 0.4582096474953618, "grad_norm": 8.120939254760742, "learning_rate": 5.7398396038506975e-06, "loss": 0.3745, "step": 9879 }, { "epoch": 0.4582560296846011, "grad_norm": 5.355707168579102, "learning_rate": 5.7391117541723914e-06, "loss": 0.224, "step": 9880 }, { "epoch": 0.45830241187384047, "grad_norm": 5.432421684265137, "learning_rate": 5.738383888481621e-06, "loss": 0.3433, "step": 9881 }, { "epoch": 0.45834879406307977, "grad_norm": 5.678204536437988, "learning_rate": 5.737656006794152e-06, "loss": 0.369, "step": 9882 }, { "epoch": 0.4583951762523191, "grad_norm": 9.23000431060791, "learning_rate": 5.736928109125754e-06, "loss": 0.3375, "step": 9883 }, { "epoch": 0.4584415584415584, "grad_norm": 8.254385948181152, "learning_rate": 5.7362001954922e-06, "loss": 0.3045, "step": 9884 }, { "epoch": 0.4584879406307978, "grad_norm": 13.48853588104248, "learning_rate": 5.735472265909254e-06, "loss": 0.3354, "step": 9885 }, { "epoch": 0.4585343228200371, "grad_norm": 5.673989772796631, "learning_rate": 5.734744320392691e-06, "loss": 0.3226, "step": 9886 }, { "epoch": 0.45858070500927645, "grad_norm": 8.366025924682617, "learning_rate": 5.73401635895828e-06, "loss": 0.4486, "step": 9887 }, { "epoch": 0.45862708719851575, "grad_norm": 8.763901710510254, "learning_rate": 5.733288381621791e-06, "loss": 0.4607, "step": 9888 }, { "epoch": 0.4586734693877551, "grad_norm": 7.30644416809082, "learning_rate": 5.732560388398996e-06, "loss": 0.2897, "step": 9889 }, { "epoch": 0.4587198515769944, "grad_norm": 5.4950456619262695, "learning_rate": 5.731832379305666e-06, "loss": 0.3212, "step": 9890 }, { "epoch": 0.45876623376623377, "grad_norm": 7.079452991485596, "learning_rate": 5.731104354357574e-06, "loss": 0.3656, "step": 9891 }, { "epoch": 0.4588126159554731, "grad_norm": 4.384294509887695, "learning_rate": 5.730376313570493e-06, "loss": 0.3615, "step": 9892 }, { "epoch": 0.4588589981447124, "grad_norm": 6.169825077056885, "learning_rate": 5.729648256960193e-06, "loss": 0.2566, "step": 9893 }, { "epoch": 0.4589053803339518, "grad_norm": 12.66852855682373, "learning_rate": 5.728920184542448e-06, "loss": 0.3412, "step": 9894 }, { "epoch": 0.4589517625231911, "grad_norm": 5.010693550109863, "learning_rate": 5.728192096333034e-06, "loss": 0.2155, "step": 9895 }, { "epoch": 0.45899814471243044, "grad_norm": 5.541238307952881, "learning_rate": 5.727463992347719e-06, "loss": 0.345, "step": 9896 }, { "epoch": 0.45904452690166975, "grad_norm": 4.408865928649902, "learning_rate": 5.726735872602282e-06, "loss": 0.2532, "step": 9897 }, { "epoch": 0.4590909090909091, "grad_norm": 8.95044994354248, "learning_rate": 5.7260077371124965e-06, "loss": 0.4447, "step": 9898 }, { "epoch": 0.4591372912801484, "grad_norm": 12.227337837219238, "learning_rate": 5.725279585894135e-06, "loss": 0.3604, "step": 9899 }, { "epoch": 0.45918367346938777, "grad_norm": 6.077524662017822, "learning_rate": 5.724551418962975e-06, "loss": 0.262, "step": 9900 }, { "epoch": 0.45923005565862707, "grad_norm": 8.555816650390625, "learning_rate": 5.723823236334789e-06, "loss": 0.4255, "step": 9901 }, { "epoch": 0.4592764378478664, "grad_norm": 5.888967990875244, "learning_rate": 5.723095038025355e-06, "loss": 0.2496, "step": 9902 }, { "epoch": 0.45932282003710573, "grad_norm": 5.179490566253662, "learning_rate": 5.72236682405045e-06, "loss": 0.3529, "step": 9903 }, { "epoch": 0.4593692022263451, "grad_norm": 10.71804141998291, "learning_rate": 5.721638594425847e-06, "loss": 0.3986, "step": 9904 }, { "epoch": 0.4594155844155844, "grad_norm": 5.216842174530029, "learning_rate": 5.7209103491673245e-06, "loss": 0.327, "step": 9905 }, { "epoch": 0.45946196660482375, "grad_norm": 5.20975923538208, "learning_rate": 5.72018208829066e-06, "loss": 0.429, "step": 9906 }, { "epoch": 0.4595083487940631, "grad_norm": 7.2617268562316895, "learning_rate": 5.719453811811631e-06, "loss": 0.3487, "step": 9907 }, { "epoch": 0.4595547309833024, "grad_norm": 8.165132522583008, "learning_rate": 5.718725519746013e-06, "loss": 0.366, "step": 9908 }, { "epoch": 0.45960111317254176, "grad_norm": 6.97238302230835, "learning_rate": 5.717997212109587e-06, "loss": 0.3215, "step": 9909 }, { "epoch": 0.45964749536178107, "grad_norm": 4.507284641265869, "learning_rate": 5.71726888891813e-06, "loss": 0.3278, "step": 9910 }, { "epoch": 0.4596938775510204, "grad_norm": 12.032959938049316, "learning_rate": 5.7165405501874214e-06, "loss": 0.3473, "step": 9911 }, { "epoch": 0.4597402597402597, "grad_norm": 9.253578186035156, "learning_rate": 5.715812195933238e-06, "loss": 0.2624, "step": 9912 }, { "epoch": 0.4597866419294991, "grad_norm": 10.08372688293457, "learning_rate": 5.715083826171362e-06, "loss": 0.5083, "step": 9913 }, { "epoch": 0.4598330241187384, "grad_norm": 4.061610221862793, "learning_rate": 5.714355440917573e-06, "loss": 0.294, "step": 9914 }, { "epoch": 0.45987940630797774, "grad_norm": 10.001189231872559, "learning_rate": 5.713627040187648e-06, "loss": 0.4603, "step": 9915 }, { "epoch": 0.45992578849721705, "grad_norm": 8.38458251953125, "learning_rate": 5.712898623997372e-06, "loss": 0.2906, "step": 9916 }, { "epoch": 0.4599721706864564, "grad_norm": 6.49351167678833, "learning_rate": 5.71217019236252e-06, "loss": 0.323, "step": 9917 }, { "epoch": 0.4600185528756957, "grad_norm": 8.237008094787598, "learning_rate": 5.711441745298879e-06, "loss": 0.4254, "step": 9918 }, { "epoch": 0.46006493506493507, "grad_norm": 5.917082786560059, "learning_rate": 5.7107132828222275e-06, "loss": 0.3061, "step": 9919 }, { "epoch": 0.4601113172541744, "grad_norm": 8.991689682006836, "learning_rate": 5.709984804948347e-06, "loss": 0.4444, "step": 9920 }, { "epoch": 0.4601576994434137, "grad_norm": 11.277907371520996, "learning_rate": 5.709256311693021e-06, "loss": 0.5097, "step": 9921 }, { "epoch": 0.4602040816326531, "grad_norm": 8.121297836303711, "learning_rate": 5.708527803072031e-06, "loss": 0.3097, "step": 9922 }, { "epoch": 0.4602504638218924, "grad_norm": 7.563335418701172, "learning_rate": 5.70779927910116e-06, "loss": 0.3369, "step": 9923 }, { "epoch": 0.46029684601113174, "grad_norm": 8.000629425048828, "learning_rate": 5.70707073979619e-06, "loss": 0.3831, "step": 9924 }, { "epoch": 0.46034322820037105, "grad_norm": 15.066222190856934, "learning_rate": 5.706342185172905e-06, "loss": 0.4898, "step": 9925 }, { "epoch": 0.4603896103896104, "grad_norm": 8.014359474182129, "learning_rate": 5.7056136152470905e-06, "loss": 0.3582, "step": 9926 }, { "epoch": 0.4604359925788497, "grad_norm": 5.797489643096924, "learning_rate": 5.704885030034528e-06, "loss": 0.3698, "step": 9927 }, { "epoch": 0.46048237476808906, "grad_norm": 5.112602233886719, "learning_rate": 5.704156429551004e-06, "loss": 0.328, "step": 9928 }, { "epoch": 0.46052875695732837, "grad_norm": 4.626737594604492, "learning_rate": 5.703427813812303e-06, "loss": 0.3027, "step": 9929 }, { "epoch": 0.4605751391465677, "grad_norm": 6.512014389038086, "learning_rate": 5.702699182834208e-06, "loss": 0.2591, "step": 9930 }, { "epoch": 0.460621521335807, "grad_norm": 5.813811302185059, "learning_rate": 5.701970536632507e-06, "loss": 0.3579, "step": 9931 }, { "epoch": 0.4606679035250464, "grad_norm": 11.202513694763184, "learning_rate": 5.701241875222984e-06, "loss": 0.3133, "step": 9932 }, { "epoch": 0.4607142857142857, "grad_norm": 3.7500851154327393, "learning_rate": 5.7005131986214246e-06, "loss": 0.2871, "step": 9933 }, { "epoch": 0.46076066790352505, "grad_norm": 8.709957122802734, "learning_rate": 5.699784506843617e-06, "loss": 0.2718, "step": 9934 }, { "epoch": 0.4608070500927644, "grad_norm": 5.43434476852417, "learning_rate": 5.699055799905347e-06, "loss": 0.3457, "step": 9935 }, { "epoch": 0.4608534322820037, "grad_norm": 13.037679672241211, "learning_rate": 5.698327077822403e-06, "loss": 0.4762, "step": 9936 }, { "epoch": 0.46089981447124306, "grad_norm": 4.099255084991455, "learning_rate": 5.697598340610571e-06, "loss": 0.2324, "step": 9937 }, { "epoch": 0.46094619666048237, "grad_norm": 5.316153526306152, "learning_rate": 5.696869588285637e-06, "loss": 0.3249, "step": 9938 }, { "epoch": 0.4609925788497217, "grad_norm": 6.421211242675781, "learning_rate": 5.696140820863393e-06, "loss": 0.3295, "step": 9939 }, { "epoch": 0.461038961038961, "grad_norm": 7.885784149169922, "learning_rate": 5.695412038359624e-06, "loss": 0.311, "step": 9940 }, { "epoch": 0.4610853432282004, "grad_norm": 6.348247051239014, "learning_rate": 5.694683240790122e-06, "loss": 0.2811, "step": 9941 }, { "epoch": 0.4611317254174397, "grad_norm": 5.831299781799316, "learning_rate": 5.6939544281706715e-06, "loss": 0.3531, "step": 9942 }, { "epoch": 0.46117810760667904, "grad_norm": 8.856803894042969, "learning_rate": 5.693225600517065e-06, "loss": 0.4019, "step": 9943 }, { "epoch": 0.46122448979591835, "grad_norm": 4.701732635498047, "learning_rate": 5.692496757845092e-06, "loss": 0.2358, "step": 9944 }, { "epoch": 0.4612708719851577, "grad_norm": 9.301155090332031, "learning_rate": 5.691767900170542e-06, "loss": 0.3405, "step": 9945 }, { "epoch": 0.461317254174397, "grad_norm": 6.600200176239014, "learning_rate": 5.6910390275092045e-06, "loss": 0.38, "step": 9946 }, { "epoch": 0.46136363636363636, "grad_norm": 5.3737053871154785, "learning_rate": 5.690310139876872e-06, "loss": 0.336, "step": 9947 }, { "epoch": 0.4614100185528757, "grad_norm": 6.02134895324707, "learning_rate": 5.689581237289333e-06, "loss": 0.2785, "step": 9948 }, { "epoch": 0.461456400742115, "grad_norm": 9.470102310180664, "learning_rate": 5.6888523197623815e-06, "loss": 0.3582, "step": 9949 }, { "epoch": 0.4615027829313544, "grad_norm": 6.039533615112305, "learning_rate": 5.688123387311808e-06, "loss": 0.3282, "step": 9950 }, { "epoch": 0.4615491651205937, "grad_norm": 6.212177753448486, "learning_rate": 5.687394439953403e-06, "loss": 0.2579, "step": 9951 }, { "epoch": 0.46159554730983304, "grad_norm": 6.995209217071533, "learning_rate": 5.686665477702962e-06, "loss": 0.3211, "step": 9952 }, { "epoch": 0.46164192949907235, "grad_norm": 7.579984188079834, "learning_rate": 5.6859365005762745e-06, "loss": 0.3261, "step": 9953 }, { "epoch": 0.4616883116883117, "grad_norm": 5.525579929351807, "learning_rate": 5.685207508589133e-06, "loss": 0.3494, "step": 9954 }, { "epoch": 0.461734693877551, "grad_norm": 8.169459342956543, "learning_rate": 5.684478501757335e-06, "loss": 0.3501, "step": 9955 }, { "epoch": 0.46178107606679036, "grad_norm": 9.540288925170898, "learning_rate": 5.6837494800966696e-06, "loss": 0.4016, "step": 9956 }, { "epoch": 0.46182745825602967, "grad_norm": 4.779168605804443, "learning_rate": 5.683020443622932e-06, "loss": 0.3018, "step": 9957 }, { "epoch": 0.461873840445269, "grad_norm": 6.721207618713379, "learning_rate": 5.6822913923519175e-06, "loss": 0.3656, "step": 9958 }, { "epoch": 0.4619202226345083, "grad_norm": 8.613273620605469, "learning_rate": 5.68156232629942e-06, "loss": 0.2514, "step": 9959 }, { "epoch": 0.4619666048237477, "grad_norm": 6.921201229095459, "learning_rate": 5.680833245481234e-06, "loss": 0.3521, "step": 9960 }, { "epoch": 0.462012987012987, "grad_norm": 4.714303016662598, "learning_rate": 5.680104149913155e-06, "loss": 0.3559, "step": 9961 }, { "epoch": 0.46205936920222634, "grad_norm": 9.004270553588867, "learning_rate": 5.679375039610977e-06, "loss": 0.442, "step": 9962 }, { "epoch": 0.4621057513914657, "grad_norm": 10.221651077270508, "learning_rate": 5.678645914590499e-06, "loss": 0.3393, "step": 9963 }, { "epoch": 0.462152133580705, "grad_norm": 5.027672290802002, "learning_rate": 5.677916774867515e-06, "loss": 0.282, "step": 9964 }, { "epoch": 0.46219851576994436, "grad_norm": 7.629848480224609, "learning_rate": 5.6771876204578205e-06, "loss": 0.4066, "step": 9965 }, { "epoch": 0.46224489795918366, "grad_norm": 9.467367172241211, "learning_rate": 5.676458451377213e-06, "loss": 0.3771, "step": 9966 }, { "epoch": 0.462291280148423, "grad_norm": 7.7141923904418945, "learning_rate": 5.675729267641492e-06, "loss": 0.412, "step": 9967 }, { "epoch": 0.4623376623376623, "grad_norm": 5.880846977233887, "learning_rate": 5.675000069266451e-06, "loss": 0.4109, "step": 9968 }, { "epoch": 0.4623840445269017, "grad_norm": 4.837208271026611, "learning_rate": 5.6742708562678905e-06, "loss": 0.3141, "step": 9969 }, { "epoch": 0.462430426716141, "grad_norm": 7.395363807678223, "learning_rate": 5.673541628661607e-06, "loss": 0.3899, "step": 9970 }, { "epoch": 0.46247680890538034, "grad_norm": 9.072654724121094, "learning_rate": 5.672812386463401e-06, "loss": 0.3864, "step": 9971 }, { "epoch": 0.46252319109461965, "grad_norm": 3.909050703048706, "learning_rate": 5.672083129689068e-06, "loss": 0.2447, "step": 9972 }, { "epoch": 0.462569573283859, "grad_norm": 6.968302249908447, "learning_rate": 5.671353858354407e-06, "loss": 0.272, "step": 9973 }, { "epoch": 0.4626159554730983, "grad_norm": 4.93218994140625, "learning_rate": 5.670624572475222e-06, "loss": 0.2775, "step": 9974 }, { "epoch": 0.46266233766233766, "grad_norm": 4.73742151260376, "learning_rate": 5.669895272067307e-06, "loss": 0.3316, "step": 9975 }, { "epoch": 0.462708719851577, "grad_norm": 8.785720825195312, "learning_rate": 5.6691659571464655e-06, "loss": 0.4344, "step": 9976 }, { "epoch": 0.4627551020408163, "grad_norm": 7.72938871383667, "learning_rate": 5.6684366277284956e-06, "loss": 0.3995, "step": 9977 }, { "epoch": 0.4628014842300557, "grad_norm": 6.011159896850586, "learning_rate": 5.667707283829199e-06, "loss": 0.3494, "step": 9978 }, { "epoch": 0.462847866419295, "grad_norm": 15.581602096557617, "learning_rate": 5.666977925464376e-06, "loss": 0.3967, "step": 9979 }, { "epoch": 0.46289424860853434, "grad_norm": 8.71884536743164, "learning_rate": 5.666248552649829e-06, "loss": 0.4527, "step": 9980 }, { "epoch": 0.46294063079777364, "grad_norm": 5.598988056182861, "learning_rate": 5.6655191654013574e-06, "loss": 0.3171, "step": 9981 }, { "epoch": 0.462987012987013, "grad_norm": 9.340434074401855, "learning_rate": 5.664789763734766e-06, "loss": 0.4851, "step": 9982 }, { "epoch": 0.4630333951762523, "grad_norm": 6.220190525054932, "learning_rate": 5.6640603476658526e-06, "loss": 0.3702, "step": 9983 }, { "epoch": 0.46307977736549166, "grad_norm": 6.775638580322266, "learning_rate": 5.663330917210423e-06, "loss": 0.4261, "step": 9984 }, { "epoch": 0.46312615955473097, "grad_norm": 7.908145427703857, "learning_rate": 5.662601472384279e-06, "loss": 0.4076, "step": 9985 }, { "epoch": 0.4631725417439703, "grad_norm": 5.333899974822998, "learning_rate": 5.6618720132032235e-06, "loss": 0.3592, "step": 9986 }, { "epoch": 0.4632189239332096, "grad_norm": 6.836452007293701, "learning_rate": 5.66114253968306e-06, "loss": 0.2475, "step": 9987 }, { "epoch": 0.463265306122449, "grad_norm": 7.3564629554748535, "learning_rate": 5.660413051839593e-06, "loss": 0.3309, "step": 9988 }, { "epoch": 0.4633116883116883, "grad_norm": 6.698549270629883, "learning_rate": 5.659683549688624e-06, "loss": 0.3432, "step": 9989 }, { "epoch": 0.46335807050092764, "grad_norm": 6.453343868255615, "learning_rate": 5.658954033245959e-06, "loss": 0.3452, "step": 9990 }, { "epoch": 0.463404452690167, "grad_norm": 7.028084754943848, "learning_rate": 5.658224502527404e-06, "loss": 0.2753, "step": 9991 }, { "epoch": 0.4634508348794063, "grad_norm": 3.4807424545288086, "learning_rate": 5.657494957548761e-06, "loss": 0.1885, "step": 9992 }, { "epoch": 0.46349721706864566, "grad_norm": 8.980080604553223, "learning_rate": 5.656765398325837e-06, "loss": 0.3404, "step": 9993 }, { "epoch": 0.46354359925788496, "grad_norm": 6.913337707519531, "learning_rate": 5.656035824874437e-06, "loss": 0.4372, "step": 9994 }, { "epoch": 0.4635899814471243, "grad_norm": 5.445587635040283, "learning_rate": 5.655306237210366e-06, "loss": 0.333, "step": 9995 }, { "epoch": 0.4636363636363636, "grad_norm": 14.941944122314453, "learning_rate": 5.6545766353494325e-06, "loss": 0.521, "step": 9996 }, { "epoch": 0.463682745825603, "grad_norm": 11.498124122619629, "learning_rate": 5.653847019307441e-06, "loss": 0.3549, "step": 9997 }, { "epoch": 0.4637291280148423, "grad_norm": 7.225383281707764, "learning_rate": 5.653117389100198e-06, "loss": 0.3973, "step": 9998 }, { "epoch": 0.46377551020408164, "grad_norm": 4.228610515594482, "learning_rate": 5.652387744743513e-06, "loss": 0.2106, "step": 9999 }, { "epoch": 0.46382189239332094, "grad_norm": 5.512524604797363, "learning_rate": 5.6516580862531886e-06, "loss": 0.3574, "step": 10000 }, { "epoch": 0.4638682745825603, "grad_norm": 6.124767780303955, "learning_rate": 5.6509284136450385e-06, "loss": 0.2357, "step": 10001 }, { "epoch": 0.4639146567717996, "grad_norm": 5.678493976593018, "learning_rate": 5.6501987269348656e-06, "loss": 0.2764, "step": 10002 }, { "epoch": 0.46396103896103896, "grad_norm": 5.117108345031738, "learning_rate": 5.649469026138481e-06, "loss": 0.3038, "step": 10003 }, { "epoch": 0.4640074211502783, "grad_norm": 8.500683784484863, "learning_rate": 5.648739311271692e-06, "loss": 0.3668, "step": 10004 }, { "epoch": 0.4640538033395176, "grad_norm": 6.046450138092041, "learning_rate": 5.648009582350309e-06, "loss": 0.3881, "step": 10005 }, { "epoch": 0.464100185528757, "grad_norm": 8.618335723876953, "learning_rate": 5.6472798393901395e-06, "loss": 0.372, "step": 10006 }, { "epoch": 0.4641465677179963, "grad_norm": 8.65085220336914, "learning_rate": 5.6465500824069945e-06, "loss": 0.3253, "step": 10007 }, { "epoch": 0.46419294990723564, "grad_norm": 7.798030376434326, "learning_rate": 5.645820311416681e-06, "loss": 0.27, "step": 10008 }, { "epoch": 0.46423933209647494, "grad_norm": 11.50062084197998, "learning_rate": 5.645090526435013e-06, "loss": 0.3326, "step": 10009 }, { "epoch": 0.4642857142857143, "grad_norm": 6.075462341308594, "learning_rate": 5.644360727477799e-06, "loss": 0.3603, "step": 10010 }, { "epoch": 0.4643320964749536, "grad_norm": 4.982481002807617, "learning_rate": 5.643630914560848e-06, "loss": 0.3038, "step": 10011 }, { "epoch": 0.46437847866419296, "grad_norm": 7.248159885406494, "learning_rate": 5.6429010876999745e-06, "loss": 0.2414, "step": 10012 }, { "epoch": 0.46442486085343226, "grad_norm": 14.454265594482422, "learning_rate": 5.6421712469109865e-06, "loss": 0.5113, "step": 10013 }, { "epoch": 0.4644712430426716, "grad_norm": 11.51051139831543, "learning_rate": 5.641441392209699e-06, "loss": 0.3407, "step": 10014 }, { "epoch": 0.4645176252319109, "grad_norm": 5.327162265777588, "learning_rate": 5.640711523611922e-06, "loss": 0.3758, "step": 10015 }, { "epoch": 0.4645640074211503, "grad_norm": 8.79235553741455, "learning_rate": 5.639981641133465e-06, "loss": 0.3638, "step": 10016 }, { "epoch": 0.4646103896103896, "grad_norm": 5.86126708984375, "learning_rate": 5.639251744790145e-06, "loss": 0.3217, "step": 10017 }, { "epoch": 0.46465677179962894, "grad_norm": 6.224485874176025, "learning_rate": 5.6385218345977745e-06, "loss": 0.3753, "step": 10018 }, { "epoch": 0.4647031539888683, "grad_norm": 5.489767074584961, "learning_rate": 5.637791910572163e-06, "loss": 0.3289, "step": 10019 }, { "epoch": 0.4647495361781076, "grad_norm": 9.058157920837402, "learning_rate": 5.637061972729128e-06, "loss": 0.3481, "step": 10020 }, { "epoch": 0.46479591836734696, "grad_norm": 5.408961772918701, "learning_rate": 5.636332021084481e-06, "loss": 0.2219, "step": 10021 }, { "epoch": 0.46484230055658626, "grad_norm": 4.7798027992248535, "learning_rate": 5.635602055654035e-06, "loss": 0.2421, "step": 10022 }, { "epoch": 0.4648886827458256, "grad_norm": 8.805458068847656, "learning_rate": 5.634872076453607e-06, "loss": 0.3413, "step": 10023 }, { "epoch": 0.4649350649350649, "grad_norm": 7.062859535217285, "learning_rate": 5.63414208349901e-06, "loss": 0.3575, "step": 10024 }, { "epoch": 0.4649814471243043, "grad_norm": 5.450164794921875, "learning_rate": 5.633412076806059e-06, "loss": 0.3407, "step": 10025 }, { "epoch": 0.4650278293135436, "grad_norm": 8.988728523254395, "learning_rate": 5.632682056390569e-06, "loss": 0.3596, "step": 10026 }, { "epoch": 0.46507421150278294, "grad_norm": 6.423664093017578, "learning_rate": 5.631952022268356e-06, "loss": 0.3232, "step": 10027 }, { "epoch": 0.46512059369202224, "grad_norm": 6.095117092132568, "learning_rate": 5.631221974455237e-06, "loss": 0.3853, "step": 10028 }, { "epoch": 0.4651669758812616, "grad_norm": 4.900246620178223, "learning_rate": 5.630491912967026e-06, "loss": 0.318, "step": 10029 }, { "epoch": 0.4652133580705009, "grad_norm": 7.228067874908447, "learning_rate": 5.629761837819541e-06, "loss": 0.351, "step": 10030 }, { "epoch": 0.46525974025974026, "grad_norm": 11.06018352508545, "learning_rate": 5.629031749028597e-06, "loss": 0.4252, "step": 10031 }, { "epoch": 0.46530612244897956, "grad_norm": 9.465496063232422, "learning_rate": 5.628301646610013e-06, "loss": 0.3981, "step": 10032 }, { "epoch": 0.4653525046382189, "grad_norm": 5.3292236328125, "learning_rate": 5.627571530579604e-06, "loss": 0.2838, "step": 10033 }, { "epoch": 0.4653988868274583, "grad_norm": 5.639602184295654, "learning_rate": 5.62684140095319e-06, "loss": 0.3671, "step": 10034 }, { "epoch": 0.4654452690166976, "grad_norm": 7.461705207824707, "learning_rate": 5.626111257746587e-06, "loss": 0.3636, "step": 10035 }, { "epoch": 0.46549165120593694, "grad_norm": 4.546947002410889, "learning_rate": 5.625381100975613e-06, "loss": 0.2396, "step": 10036 }, { "epoch": 0.46553803339517624, "grad_norm": 4.791583061218262, "learning_rate": 5.624650930656089e-06, "loss": 0.2941, "step": 10037 }, { "epoch": 0.4655844155844156, "grad_norm": 6.578858375549316, "learning_rate": 5.62392074680383e-06, "loss": 0.3625, "step": 10038 }, { "epoch": 0.4656307977736549, "grad_norm": 4.65015172958374, "learning_rate": 5.6231905494346585e-06, "loss": 0.2739, "step": 10039 }, { "epoch": 0.46567717996289426, "grad_norm": 7.573145866394043, "learning_rate": 5.622460338564393e-06, "loss": 0.3086, "step": 10040 }, { "epoch": 0.46572356215213356, "grad_norm": 5.337679862976074, "learning_rate": 5.621730114208852e-06, "loss": 0.2836, "step": 10041 }, { "epoch": 0.4657699443413729, "grad_norm": 15.272102355957031, "learning_rate": 5.620999876383856e-06, "loss": 0.5016, "step": 10042 }, { "epoch": 0.4658163265306122, "grad_norm": 4.881204128265381, "learning_rate": 5.620269625105225e-06, "loss": 0.3091, "step": 10043 }, { "epoch": 0.4658627087198516, "grad_norm": 10.322041511535645, "learning_rate": 5.6195393603887794e-06, "loss": 0.3513, "step": 10044 }, { "epoch": 0.4659090909090909, "grad_norm": 11.609895706176758, "learning_rate": 5.6188090822503414e-06, "loss": 0.4294, "step": 10045 }, { "epoch": 0.46595547309833024, "grad_norm": 9.344776153564453, "learning_rate": 5.61807879070573e-06, "loss": 0.3272, "step": 10046 }, { "epoch": 0.4660018552875696, "grad_norm": 24.24268913269043, "learning_rate": 5.617348485770767e-06, "loss": 0.6635, "step": 10047 }, { "epoch": 0.4660482374768089, "grad_norm": 8.476849555969238, "learning_rate": 5.6166181674612764e-06, "loss": 0.3203, "step": 10048 }, { "epoch": 0.46609461966604826, "grad_norm": 4.935840129852295, "learning_rate": 5.615887835793078e-06, "loss": 0.3311, "step": 10049 }, { "epoch": 0.46614100185528756, "grad_norm": 18.056886672973633, "learning_rate": 5.6151574907819925e-06, "loss": 0.3951, "step": 10050 }, { "epoch": 0.4661873840445269, "grad_norm": 4.684406757354736, "learning_rate": 5.614427132443847e-06, "loss": 0.423, "step": 10051 }, { "epoch": 0.4662337662337662, "grad_norm": 12.878049850463867, "learning_rate": 5.613696760794461e-06, "loss": 0.446, "step": 10052 }, { "epoch": 0.4662801484230056, "grad_norm": 4.8076863288879395, "learning_rate": 5.612966375849659e-06, "loss": 0.4043, "step": 10053 }, { "epoch": 0.4663265306122449, "grad_norm": 5.026290416717529, "learning_rate": 5.6122359776252626e-06, "loss": 0.2686, "step": 10054 }, { "epoch": 0.46637291280148424, "grad_norm": 7.405416011810303, "learning_rate": 5.611505566137096e-06, "loss": 0.2895, "step": 10055 }, { "epoch": 0.46641929499072354, "grad_norm": 7.060703754425049, "learning_rate": 5.610775141400986e-06, "loss": 0.2995, "step": 10056 }, { "epoch": 0.4664656771799629, "grad_norm": 5.377931118011475, "learning_rate": 5.610044703432753e-06, "loss": 0.3089, "step": 10057 }, { "epoch": 0.4665120593692022, "grad_norm": 3.971605062484741, "learning_rate": 5.609314252248223e-06, "loss": 0.2721, "step": 10058 }, { "epoch": 0.46655844155844156, "grad_norm": 3.3725757598876953, "learning_rate": 5.608583787863223e-06, "loss": 0.26, "step": 10059 }, { "epoch": 0.46660482374768086, "grad_norm": 5.076639175415039, "learning_rate": 5.607853310293575e-06, "loss": 0.3486, "step": 10060 }, { "epoch": 0.4666512059369202, "grad_norm": 5.113400459289551, "learning_rate": 5.607122819555106e-06, "loss": 0.3179, "step": 10061 }, { "epoch": 0.4666975881261596, "grad_norm": 6.899444580078125, "learning_rate": 5.606392315663641e-06, "loss": 0.4138, "step": 10062 }, { "epoch": 0.4667439703153989, "grad_norm": 4.872127056121826, "learning_rate": 5.605661798635007e-06, "loss": 0.3876, "step": 10063 }, { "epoch": 0.46679035250463824, "grad_norm": 5.774303436279297, "learning_rate": 5.6049312684850295e-06, "loss": 0.3935, "step": 10064 }, { "epoch": 0.46683673469387754, "grad_norm": 7.540722846984863, "learning_rate": 5.604200725229534e-06, "loss": 0.4535, "step": 10065 }, { "epoch": 0.4668831168831169, "grad_norm": 9.780597686767578, "learning_rate": 5.603470168884351e-06, "loss": 0.4062, "step": 10066 }, { "epoch": 0.4669294990723562, "grad_norm": 6.475736618041992, "learning_rate": 5.602739599465302e-06, "loss": 0.2832, "step": 10067 }, { "epoch": 0.46697588126159556, "grad_norm": 8.924582481384277, "learning_rate": 5.602009016988218e-06, "loss": 0.4513, "step": 10068 }, { "epoch": 0.46702226345083486, "grad_norm": 7.013042449951172, "learning_rate": 5.601278421468927e-06, "loss": 0.3964, "step": 10069 }, { "epoch": 0.4670686456400742, "grad_norm": 6.378941059112549, "learning_rate": 5.600547812923257e-06, "loss": 0.3962, "step": 10070 }, { "epoch": 0.4671150278293135, "grad_norm": 7.403030872344971, "learning_rate": 5.599817191367035e-06, "loss": 0.3762, "step": 10071 }, { "epoch": 0.4671614100185529, "grad_norm": 8.420703887939453, "learning_rate": 5.599086556816089e-06, "loss": 0.4145, "step": 10072 }, { "epoch": 0.4672077922077922, "grad_norm": 4.965128421783447, "learning_rate": 5.5983559092862485e-06, "loss": 0.3592, "step": 10073 }, { "epoch": 0.46725417439703154, "grad_norm": 13.630216598510742, "learning_rate": 5.597625248793344e-06, "loss": 0.4024, "step": 10074 }, { "epoch": 0.4673005565862709, "grad_norm": 6.0349955558776855, "learning_rate": 5.5968945753532035e-06, "loss": 0.3171, "step": 10075 }, { "epoch": 0.4673469387755102, "grad_norm": 13.4246187210083, "learning_rate": 5.596163888981656e-06, "loss": 0.4318, "step": 10076 }, { "epoch": 0.46739332096474956, "grad_norm": 15.413155555725098, "learning_rate": 5.595433189694534e-06, "loss": 0.3273, "step": 10077 }, { "epoch": 0.46743970315398886, "grad_norm": 4.683738708496094, "learning_rate": 5.594702477507663e-06, "loss": 0.3164, "step": 10078 }, { "epoch": 0.4674860853432282, "grad_norm": 6.724193096160889, "learning_rate": 5.5939717524368794e-06, "loss": 0.431, "step": 10079 }, { "epoch": 0.4675324675324675, "grad_norm": 5.880517482757568, "learning_rate": 5.59324101449801e-06, "loss": 0.3621, "step": 10080 }, { "epoch": 0.4675788497217069, "grad_norm": 7.702938556671143, "learning_rate": 5.592510263706887e-06, "loss": 0.3928, "step": 10081 }, { "epoch": 0.4676252319109462, "grad_norm": 4.581567764282227, "learning_rate": 5.591779500079342e-06, "loss": 0.2302, "step": 10082 }, { "epoch": 0.46767161410018554, "grad_norm": 8.66763973236084, "learning_rate": 5.591048723631205e-06, "loss": 0.4282, "step": 10083 }, { "epoch": 0.46771799628942484, "grad_norm": 8.40073013305664, "learning_rate": 5.59031793437831e-06, "loss": 0.4025, "step": 10084 }, { "epoch": 0.4677643784786642, "grad_norm": 5.131199836730957, "learning_rate": 5.589587132336488e-06, "loss": 0.2998, "step": 10085 }, { "epoch": 0.4678107606679035, "grad_norm": 8.974501609802246, "learning_rate": 5.588856317521573e-06, "loss": 0.2374, "step": 10086 }, { "epoch": 0.46785714285714286, "grad_norm": 4.5589165687561035, "learning_rate": 5.5881254899493955e-06, "loss": 0.3045, "step": 10087 }, { "epoch": 0.46790352504638216, "grad_norm": 6.654292106628418, "learning_rate": 5.587394649635789e-06, "loss": 0.3051, "step": 10088 }, { "epoch": 0.4679499072356215, "grad_norm": 4.92049503326416, "learning_rate": 5.586663796596588e-06, "loss": 0.3224, "step": 10089 }, { "epoch": 0.4679962894248609, "grad_norm": 14.386678695678711, "learning_rate": 5.585932930847624e-06, "loss": 0.2775, "step": 10090 }, { "epoch": 0.4680426716141002, "grad_norm": 4.040532112121582, "learning_rate": 5.585202052404733e-06, "loss": 0.2512, "step": 10091 }, { "epoch": 0.46808905380333954, "grad_norm": 10.561749458312988, "learning_rate": 5.584471161283749e-06, "loss": 0.3181, "step": 10092 }, { "epoch": 0.46813543599257884, "grad_norm": 6.579338550567627, "learning_rate": 5.583740257500504e-06, "loss": 0.3858, "step": 10093 }, { "epoch": 0.4681818181818182, "grad_norm": 6.457286834716797, "learning_rate": 5.583009341070836e-06, "loss": 0.2445, "step": 10094 }, { "epoch": 0.4682282003710575, "grad_norm": 6.325576305389404, "learning_rate": 5.582278412010577e-06, "loss": 0.3558, "step": 10095 }, { "epoch": 0.46827458256029686, "grad_norm": 9.704129219055176, "learning_rate": 5.581547470335563e-06, "loss": 0.358, "step": 10096 }, { "epoch": 0.46832096474953616, "grad_norm": 8.073065757751465, "learning_rate": 5.580816516061631e-06, "loss": 0.359, "step": 10097 }, { "epoch": 0.4683673469387755, "grad_norm": 9.520577430725098, "learning_rate": 5.5800855492046145e-06, "loss": 0.4249, "step": 10098 }, { "epoch": 0.4684137291280148, "grad_norm": 2.7877869606018066, "learning_rate": 5.5793545697803504e-06, "loss": 0.2111, "step": 10099 }, { "epoch": 0.4684601113172542, "grad_norm": 4.919610977172852, "learning_rate": 5.578623577804676e-06, "loss": 0.3134, "step": 10100 }, { "epoch": 0.4685064935064935, "grad_norm": 4.279700756072998, "learning_rate": 5.577892573293426e-06, "loss": 0.2092, "step": 10101 }, { "epoch": 0.46855287569573284, "grad_norm": 11.345271110534668, "learning_rate": 5.577161556262438e-06, "loss": 0.3571, "step": 10102 }, { "epoch": 0.4685992578849722, "grad_norm": 6.830028057098389, "learning_rate": 5.576430526727552e-06, "loss": 0.2792, "step": 10103 }, { "epoch": 0.4686456400742115, "grad_norm": 5.112950325012207, "learning_rate": 5.575699484704599e-06, "loss": 0.2494, "step": 10104 }, { "epoch": 0.46869202226345086, "grad_norm": 9.109612464904785, "learning_rate": 5.574968430209423e-06, "loss": 0.3241, "step": 10105 }, { "epoch": 0.46873840445269016, "grad_norm": 12.224842071533203, "learning_rate": 5.574237363257858e-06, "loss": 0.4073, "step": 10106 }, { "epoch": 0.4687847866419295, "grad_norm": 6.578021049499512, "learning_rate": 5.573506283865744e-06, "loss": 0.2603, "step": 10107 }, { "epoch": 0.4688311688311688, "grad_norm": 9.670623779296875, "learning_rate": 5.57277519204892e-06, "loss": 0.3954, "step": 10108 }, { "epoch": 0.4688775510204082, "grad_norm": 11.652910232543945, "learning_rate": 5.572044087823221e-06, "loss": 0.4977, "step": 10109 }, { "epoch": 0.4689239332096475, "grad_norm": 7.404344081878662, "learning_rate": 5.571312971204489e-06, "loss": 0.3188, "step": 10110 }, { "epoch": 0.46897031539888684, "grad_norm": 12.321073532104492, "learning_rate": 5.5705818422085646e-06, "loss": 0.4004, "step": 10111 }, { "epoch": 0.46901669758812614, "grad_norm": 6.866294860839844, "learning_rate": 5.5698507008512835e-06, "loss": 0.2902, "step": 10112 }, { "epoch": 0.4690630797773655, "grad_norm": 6.593748092651367, "learning_rate": 5.569119547148488e-06, "loss": 0.3182, "step": 10113 }, { "epoch": 0.4691094619666048, "grad_norm": 9.979667663574219, "learning_rate": 5.568388381116019e-06, "loss": 0.5224, "step": 10114 }, { "epoch": 0.46915584415584416, "grad_norm": 6.1327433586120605, "learning_rate": 5.567657202769714e-06, "loss": 0.2871, "step": 10115 }, { "epoch": 0.46920222634508346, "grad_norm": 26.158815383911133, "learning_rate": 5.566926012125417e-06, "loss": 0.4284, "step": 10116 }, { "epoch": 0.4692486085343228, "grad_norm": 8.11934757232666, "learning_rate": 5.566194809198965e-06, "loss": 0.3713, "step": 10117 }, { "epoch": 0.4692949907235622, "grad_norm": 9.926565170288086, "learning_rate": 5.565463594006202e-06, "loss": 0.4091, "step": 10118 }, { "epoch": 0.4693413729128015, "grad_norm": 7.83332633972168, "learning_rate": 5.564732366562968e-06, "loss": 0.3324, "step": 10119 }, { "epoch": 0.46938775510204084, "grad_norm": 6.070408821105957, "learning_rate": 5.564001126885106e-06, "loss": 0.2763, "step": 10120 }, { "epoch": 0.46943413729128014, "grad_norm": 6.197207450866699, "learning_rate": 5.563269874988455e-06, "loss": 0.407, "step": 10121 }, { "epoch": 0.4694805194805195, "grad_norm": 5.3732194900512695, "learning_rate": 5.562538610888863e-06, "loss": 0.3241, "step": 10122 }, { "epoch": 0.4695269016697588, "grad_norm": 8.029936790466309, "learning_rate": 5.5618073346021654e-06, "loss": 0.3525, "step": 10123 }, { "epoch": 0.46957328385899816, "grad_norm": 7.0652055740356445, "learning_rate": 5.56107604614421e-06, "loss": 0.3504, "step": 10124 }, { "epoch": 0.46961966604823746, "grad_norm": 12.912274360656738, "learning_rate": 5.560344745530837e-06, "loss": 0.3873, "step": 10125 }, { "epoch": 0.4696660482374768, "grad_norm": 11.204816818237305, "learning_rate": 5.55961343277789e-06, "loss": 0.3789, "step": 10126 }, { "epoch": 0.4697124304267161, "grad_norm": 10.340009689331055, "learning_rate": 5.558882107901215e-06, "loss": 0.4256, "step": 10127 }, { "epoch": 0.4697588126159555, "grad_norm": 5.75016975402832, "learning_rate": 5.558150770916653e-06, "loss": 0.3278, "step": 10128 }, { "epoch": 0.4698051948051948, "grad_norm": 17.579959869384766, "learning_rate": 5.557419421840048e-06, "loss": 0.5164, "step": 10129 }, { "epoch": 0.46985157699443414, "grad_norm": 8.175740242004395, "learning_rate": 5.556688060687246e-06, "loss": 0.2841, "step": 10130 }, { "epoch": 0.4698979591836735, "grad_norm": 7.846322536468506, "learning_rate": 5.555956687474091e-06, "loss": 0.3152, "step": 10131 }, { "epoch": 0.4699443413729128, "grad_norm": 6.252247333526611, "learning_rate": 5.555225302216427e-06, "loss": 0.2818, "step": 10132 }, { "epoch": 0.46999072356215216, "grad_norm": 4.626301288604736, "learning_rate": 5.554493904930101e-06, "loss": 0.2736, "step": 10133 }, { "epoch": 0.47003710575139146, "grad_norm": 9.773303031921387, "learning_rate": 5.553762495630957e-06, "loss": 0.4128, "step": 10134 }, { "epoch": 0.4700834879406308, "grad_norm": 12.570039749145508, "learning_rate": 5.5530310743348405e-06, "loss": 0.4974, "step": 10135 }, { "epoch": 0.4701298701298701, "grad_norm": 6.51754903793335, "learning_rate": 5.552299641057596e-06, "loss": 0.3396, "step": 10136 }, { "epoch": 0.4701762523191095, "grad_norm": 6.323230266571045, "learning_rate": 5.551568195815071e-06, "loss": 0.3472, "step": 10137 }, { "epoch": 0.4702226345083488, "grad_norm": 6.451212406158447, "learning_rate": 5.550836738623113e-06, "loss": 0.3448, "step": 10138 }, { "epoch": 0.47026901669758814, "grad_norm": 6.040367126464844, "learning_rate": 5.5501052694975675e-06, "loss": 0.4166, "step": 10139 }, { "epoch": 0.47031539888682744, "grad_norm": 10.903100967407227, "learning_rate": 5.549373788454281e-06, "loss": 0.4241, "step": 10140 }, { "epoch": 0.4703617810760668, "grad_norm": 4.93312931060791, "learning_rate": 5.548642295509103e-06, "loss": 0.2504, "step": 10141 }, { "epoch": 0.4704081632653061, "grad_norm": 6.590129852294922, "learning_rate": 5.547910790677877e-06, "loss": 0.3043, "step": 10142 }, { "epoch": 0.47045454545454546, "grad_norm": 6.389602184295654, "learning_rate": 5.547179273976453e-06, "loss": 0.3761, "step": 10143 }, { "epoch": 0.47050092764378476, "grad_norm": 37.128875732421875, "learning_rate": 5.54644774542068e-06, "loss": 0.3763, "step": 10144 }, { "epoch": 0.4705473098330241, "grad_norm": 5.62313175201416, "learning_rate": 5.545716205026403e-06, "loss": 0.3111, "step": 10145 }, { "epoch": 0.4705936920222635, "grad_norm": 10.796850204467773, "learning_rate": 5.544984652809474e-06, "loss": 0.3189, "step": 10146 }, { "epoch": 0.4706400742115028, "grad_norm": 8.213512420654297, "learning_rate": 5.544253088785738e-06, "loss": 0.3371, "step": 10147 }, { "epoch": 0.47068645640074214, "grad_norm": 5.383725643157959, "learning_rate": 5.543521512971046e-06, "loss": 0.2853, "step": 10148 }, { "epoch": 0.47073283858998144, "grad_norm": 9.834891319274902, "learning_rate": 5.542789925381249e-06, "loss": 0.3148, "step": 10149 }, { "epoch": 0.4707792207792208, "grad_norm": 18.63982582092285, "learning_rate": 5.542058326032194e-06, "loss": 0.4329, "step": 10150 }, { "epoch": 0.4708256029684601, "grad_norm": 6.129896640777588, "learning_rate": 5.54132671493973e-06, "loss": 0.2865, "step": 10151 }, { "epoch": 0.47087198515769946, "grad_norm": 8.517314910888672, "learning_rate": 5.540595092119709e-06, "loss": 0.2647, "step": 10152 }, { "epoch": 0.47091836734693876, "grad_norm": 5.854842662811279, "learning_rate": 5.539863457587981e-06, "loss": 0.366, "step": 10153 }, { "epoch": 0.4709647495361781, "grad_norm": 6.872878074645996, "learning_rate": 5.539131811360395e-06, "loss": 0.2777, "step": 10154 }, { "epoch": 0.4710111317254174, "grad_norm": 4.872988700866699, "learning_rate": 5.5384001534528046e-06, "loss": 0.3056, "step": 10155 }, { "epoch": 0.4710575139146568, "grad_norm": 8.495606422424316, "learning_rate": 5.537668483881055e-06, "loss": 0.3922, "step": 10156 }, { "epoch": 0.4711038961038961, "grad_norm": 9.553244590759277, "learning_rate": 5.536936802661006e-06, "loss": 0.2943, "step": 10157 }, { "epoch": 0.47115027829313544, "grad_norm": 9.76364517211914, "learning_rate": 5.536205109808501e-06, "loss": 0.4237, "step": 10158 }, { "epoch": 0.47119666048237474, "grad_norm": 6.165077209472656, "learning_rate": 5.535473405339396e-06, "loss": 0.37, "step": 10159 }, { "epoch": 0.4712430426716141, "grad_norm": 5.871565818786621, "learning_rate": 5.5347416892695425e-06, "loss": 0.2248, "step": 10160 }, { "epoch": 0.47128942486085346, "grad_norm": 5.392212390899658, "learning_rate": 5.5340099616147925e-06, "loss": 0.1605, "step": 10161 }, { "epoch": 0.47133580705009276, "grad_norm": 5.171445846557617, "learning_rate": 5.533278222390997e-06, "loss": 0.3427, "step": 10162 }, { "epoch": 0.4713821892393321, "grad_norm": 8.101616859436035, "learning_rate": 5.532546471614012e-06, "loss": 0.4063, "step": 10163 }, { "epoch": 0.4714285714285714, "grad_norm": 5.365435600280762, "learning_rate": 5.531814709299688e-06, "loss": 0.316, "step": 10164 }, { "epoch": 0.4714749536178108, "grad_norm": 6.882833003997803, "learning_rate": 5.531082935463878e-06, "loss": 0.2277, "step": 10165 }, { "epoch": 0.4715213358070501, "grad_norm": 7.523099899291992, "learning_rate": 5.530351150122437e-06, "loss": 0.2977, "step": 10166 }, { "epoch": 0.47156771799628944, "grad_norm": 8.488030433654785, "learning_rate": 5.5296193532912165e-06, "loss": 0.2785, "step": 10167 }, { "epoch": 0.47161410018552874, "grad_norm": 5.394816875457764, "learning_rate": 5.5288875449860745e-06, "loss": 0.3628, "step": 10168 }, { "epoch": 0.4716604823747681, "grad_norm": 7.76540470123291, "learning_rate": 5.528155725222861e-06, "loss": 0.2998, "step": 10169 }, { "epoch": 0.4717068645640074, "grad_norm": 5.931037425994873, "learning_rate": 5.527423894017433e-06, "loss": 0.3763, "step": 10170 }, { "epoch": 0.47175324675324676, "grad_norm": 12.259928703308105, "learning_rate": 5.526692051385645e-06, "loss": 0.606, "step": 10171 }, { "epoch": 0.47179962894248606, "grad_norm": 5.250259876251221, "learning_rate": 5.52596019734335e-06, "loss": 0.2739, "step": 10172 }, { "epoch": 0.4718460111317254, "grad_norm": 5.269914150238037, "learning_rate": 5.525228331906406e-06, "loss": 0.2788, "step": 10173 }, { "epoch": 0.4718923933209648, "grad_norm": 4.5268096923828125, "learning_rate": 5.524496455090668e-06, "loss": 0.3023, "step": 10174 }, { "epoch": 0.4719387755102041, "grad_norm": 3.5835154056549072, "learning_rate": 5.52376456691199e-06, "loss": 0.3255, "step": 10175 }, { "epoch": 0.47198515769944344, "grad_norm": 10.767200469970703, "learning_rate": 5.523032667386229e-06, "loss": 0.5128, "step": 10176 }, { "epoch": 0.47203153988868274, "grad_norm": 6.3745198249816895, "learning_rate": 5.522300756529241e-06, "loss": 0.3155, "step": 10177 }, { "epoch": 0.4720779220779221, "grad_norm": 6.7884626388549805, "learning_rate": 5.521568834356882e-06, "loss": 0.2672, "step": 10178 }, { "epoch": 0.4721243042671614, "grad_norm": 8.94383430480957, "learning_rate": 5.520836900885011e-06, "loss": 0.2954, "step": 10179 }, { "epoch": 0.47217068645640076, "grad_norm": 9.909832954406738, "learning_rate": 5.5201049561294805e-06, "loss": 0.1961, "step": 10180 }, { "epoch": 0.47221706864564006, "grad_norm": 7.148628234863281, "learning_rate": 5.519373000106152e-06, "loss": 0.3681, "step": 10181 }, { "epoch": 0.4722634508348794, "grad_norm": 6.774263858795166, "learning_rate": 5.518641032830882e-06, "loss": 0.3515, "step": 10182 }, { "epoch": 0.4723098330241187, "grad_norm": 11.813531875610352, "learning_rate": 5.517909054319527e-06, "loss": 0.3828, "step": 10183 }, { "epoch": 0.4723562152133581, "grad_norm": 4.438084602355957, "learning_rate": 5.517177064587945e-06, "loss": 0.4288, "step": 10184 }, { "epoch": 0.4724025974025974, "grad_norm": 6.989276885986328, "learning_rate": 5.516445063651996e-06, "loss": 0.3711, "step": 10185 }, { "epoch": 0.47244897959183674, "grad_norm": 7.409591197967529, "learning_rate": 5.515713051527536e-06, "loss": 0.2052, "step": 10186 }, { "epoch": 0.47249536178107604, "grad_norm": 8.176222801208496, "learning_rate": 5.514981028230426e-06, "loss": 0.4178, "step": 10187 }, { "epoch": 0.4725417439703154, "grad_norm": 8.920129776000977, "learning_rate": 5.514248993776522e-06, "loss": 0.3371, "step": 10188 }, { "epoch": 0.47258812615955476, "grad_norm": 7.8939738273620605, "learning_rate": 5.513516948181685e-06, "loss": 0.3643, "step": 10189 }, { "epoch": 0.47263450834879406, "grad_norm": 6.786016464233398, "learning_rate": 5.512784891461776e-06, "loss": 0.1548, "step": 10190 }, { "epoch": 0.4726808905380334, "grad_norm": 10.027708053588867, "learning_rate": 5.512052823632651e-06, "loss": 0.2926, "step": 10191 }, { "epoch": 0.4727272727272727, "grad_norm": 5.056833744049072, "learning_rate": 5.511320744710171e-06, "loss": 0.3882, "step": 10192 }, { "epoch": 0.4727736549165121, "grad_norm": 17.93586540222168, "learning_rate": 5.510588654710198e-06, "loss": 0.275, "step": 10193 }, { "epoch": 0.4728200371057514, "grad_norm": 9.342743873596191, "learning_rate": 5.5098565536485916e-06, "loss": 0.3603, "step": 10194 }, { "epoch": 0.47286641929499074, "grad_norm": 12.839018821716309, "learning_rate": 5.50912444154121e-06, "loss": 0.4629, "step": 10195 }, { "epoch": 0.47291280148423004, "grad_norm": 8.884112358093262, "learning_rate": 5.508392318403919e-06, "loss": 0.4064, "step": 10196 }, { "epoch": 0.4729591836734694, "grad_norm": 10.55245590209961, "learning_rate": 5.507660184252574e-06, "loss": 0.4544, "step": 10197 }, { "epoch": 0.4730055658627087, "grad_norm": 11.791658401489258, "learning_rate": 5.50692803910304e-06, "loss": 0.4768, "step": 10198 }, { "epoch": 0.47305194805194806, "grad_norm": 21.269746780395508, "learning_rate": 5.506195882971177e-06, "loss": 0.316, "step": 10199 }, { "epoch": 0.47309833024118736, "grad_norm": 5.870211601257324, "learning_rate": 5.505463715872846e-06, "loss": 0.3211, "step": 10200 }, { "epoch": 0.4731447124304267, "grad_norm": 7.149345874786377, "learning_rate": 5.504731537823913e-06, "loss": 0.2633, "step": 10201 }, { "epoch": 0.4731910946196661, "grad_norm": 6.165882110595703, "learning_rate": 5.503999348840235e-06, "loss": 0.2782, "step": 10202 }, { "epoch": 0.4732374768089054, "grad_norm": 7.843670845031738, "learning_rate": 5.5032671489376775e-06, "loss": 0.1727, "step": 10203 }, { "epoch": 0.47328385899814474, "grad_norm": 5.386988162994385, "learning_rate": 5.502534938132104e-06, "loss": 0.2805, "step": 10204 }, { "epoch": 0.47333024118738404, "grad_norm": 5.103599548339844, "learning_rate": 5.501802716439374e-06, "loss": 0.3467, "step": 10205 }, { "epoch": 0.4733766233766234, "grad_norm": 5.622665882110596, "learning_rate": 5.501070483875355e-06, "loss": 0.3034, "step": 10206 }, { "epoch": 0.4734230055658627, "grad_norm": 6.990610122680664, "learning_rate": 5.500338240455907e-06, "loss": 0.3694, "step": 10207 }, { "epoch": 0.47346938775510206, "grad_norm": 4.782339096069336, "learning_rate": 5.499605986196894e-06, "loss": 0.2627, "step": 10208 }, { "epoch": 0.47351576994434136, "grad_norm": 4.56612491607666, "learning_rate": 5.498873721114183e-06, "loss": 0.31, "step": 10209 }, { "epoch": 0.4735621521335807, "grad_norm": 6.696455478668213, "learning_rate": 5.498141445223635e-06, "loss": 0.3159, "step": 10210 }, { "epoch": 0.47360853432282, "grad_norm": 9.669540405273438, "learning_rate": 5.497409158541115e-06, "loss": 0.3456, "step": 10211 }, { "epoch": 0.4736549165120594, "grad_norm": 19.093700408935547, "learning_rate": 5.496676861082488e-06, "loss": 0.3948, "step": 10212 }, { "epoch": 0.4737012987012987, "grad_norm": 7.284026145935059, "learning_rate": 5.495944552863619e-06, "loss": 0.2903, "step": 10213 }, { "epoch": 0.47374768089053804, "grad_norm": 7.64670991897583, "learning_rate": 5.495212233900373e-06, "loss": 0.2787, "step": 10214 }, { "epoch": 0.47379406307977734, "grad_norm": 8.605316162109375, "learning_rate": 5.494479904208615e-06, "loss": 0.4076, "step": 10215 }, { "epoch": 0.4738404452690167, "grad_norm": 6.015340328216553, "learning_rate": 5.493747563804211e-06, "loss": 0.3119, "step": 10216 }, { "epoch": 0.47388682745825605, "grad_norm": 6.6778244972229, "learning_rate": 5.493015212703026e-06, "loss": 0.4818, "step": 10217 }, { "epoch": 0.47393320964749536, "grad_norm": 4.208168029785156, "learning_rate": 5.492282850920926e-06, "loss": 0.4057, "step": 10218 }, { "epoch": 0.4739795918367347, "grad_norm": 14.232242584228516, "learning_rate": 5.491550478473778e-06, "loss": 0.3273, "step": 10219 }, { "epoch": 0.474025974025974, "grad_norm": 16.86747169494629, "learning_rate": 5.490818095377448e-06, "loss": 0.4827, "step": 10220 }, { "epoch": 0.4740723562152134, "grad_norm": 7.461638450622559, "learning_rate": 5.490085701647805e-06, "loss": 0.3242, "step": 10221 }, { "epoch": 0.4741187384044527, "grad_norm": 4.202620983123779, "learning_rate": 5.489353297300712e-06, "loss": 0.2531, "step": 10222 }, { "epoch": 0.47416512059369204, "grad_norm": 7.985359191894531, "learning_rate": 5.488620882352036e-06, "loss": 0.3974, "step": 10223 }, { "epoch": 0.47421150278293134, "grad_norm": 8.999578475952148, "learning_rate": 5.4878884568176494e-06, "loss": 0.3083, "step": 10224 }, { "epoch": 0.4742578849721707, "grad_norm": 7.086114883422852, "learning_rate": 5.487156020713414e-06, "loss": 0.4418, "step": 10225 }, { "epoch": 0.47430426716141, "grad_norm": 12.066324234008789, "learning_rate": 5.486423574055202e-06, "loss": 0.3431, "step": 10226 }, { "epoch": 0.47435064935064936, "grad_norm": 9.851766586303711, "learning_rate": 5.4856911168588815e-06, "loss": 0.3944, "step": 10227 }, { "epoch": 0.47439703153988866, "grad_norm": 11.527342796325684, "learning_rate": 5.484958649140316e-06, "loss": 0.4076, "step": 10228 }, { "epoch": 0.474443413729128, "grad_norm": 5.482172012329102, "learning_rate": 5.484226170915379e-06, "loss": 0.3863, "step": 10229 }, { "epoch": 0.4744897959183674, "grad_norm": 9.961771011352539, "learning_rate": 5.483493682199936e-06, "loss": 0.4343, "step": 10230 }, { "epoch": 0.4745361781076067, "grad_norm": 12.096077919006348, "learning_rate": 5.482761183009858e-06, "loss": 0.4295, "step": 10231 }, { "epoch": 0.47458256029684603, "grad_norm": 8.195754051208496, "learning_rate": 5.482028673361015e-06, "loss": 0.4972, "step": 10232 }, { "epoch": 0.47462894248608534, "grad_norm": 10.94210147857666, "learning_rate": 5.481296153269273e-06, "loss": 0.4222, "step": 10233 }, { "epoch": 0.4746753246753247, "grad_norm": 8.264429092407227, "learning_rate": 5.480563622750505e-06, "loss": 0.2777, "step": 10234 }, { "epoch": 0.474721706864564, "grad_norm": 12.598152160644531, "learning_rate": 5.47983108182058e-06, "loss": 0.4426, "step": 10235 }, { "epoch": 0.47476808905380335, "grad_norm": 4.531144618988037, "learning_rate": 5.479098530495366e-06, "loss": 0.3068, "step": 10236 }, { "epoch": 0.47481447124304266, "grad_norm": 6.755772113800049, "learning_rate": 5.478365968790737e-06, "loss": 0.3774, "step": 10237 }, { "epoch": 0.474860853432282, "grad_norm": 5.294691562652588, "learning_rate": 5.47763339672256e-06, "loss": 0.3097, "step": 10238 }, { "epoch": 0.4749072356215213, "grad_norm": 10.108640670776367, "learning_rate": 5.476900814306708e-06, "loss": 0.3158, "step": 10239 }, { "epoch": 0.4749536178107607, "grad_norm": 3.8573410511016846, "learning_rate": 5.476168221559052e-06, "loss": 0.2561, "step": 10240 }, { "epoch": 0.475, "grad_norm": 5.896828651428223, "learning_rate": 5.4754356184954604e-06, "loss": 0.2238, "step": 10241 }, { "epoch": 0.47504638218923934, "grad_norm": 3.9511232376098633, "learning_rate": 5.474703005131809e-06, "loss": 0.2999, "step": 10242 }, { "epoch": 0.47509276437847864, "grad_norm": 10.479676246643066, "learning_rate": 5.473970381483967e-06, "loss": 0.4837, "step": 10243 }, { "epoch": 0.475139146567718, "grad_norm": 5.576125144958496, "learning_rate": 5.473237747567805e-06, "loss": 0.2524, "step": 10244 }, { "epoch": 0.47518552875695735, "grad_norm": 7.167482376098633, "learning_rate": 5.472505103399201e-06, "loss": 0.3347, "step": 10245 }, { "epoch": 0.47523191094619666, "grad_norm": 6.963109493255615, "learning_rate": 5.471772448994019e-06, "loss": 0.3985, "step": 10246 }, { "epoch": 0.475278293135436, "grad_norm": 4.43503999710083, "learning_rate": 5.471039784368138e-06, "loss": 0.3759, "step": 10247 }, { "epoch": 0.4753246753246753, "grad_norm": 5.696291446685791, "learning_rate": 5.470307109537427e-06, "loss": 0.3561, "step": 10248 }, { "epoch": 0.4753710575139147, "grad_norm": 9.099211692810059, "learning_rate": 5.46957442451776e-06, "loss": 0.4382, "step": 10249 }, { "epoch": 0.475417439703154, "grad_norm": 4.077214241027832, "learning_rate": 5.468841729325014e-06, "loss": 0.3351, "step": 10250 }, { "epoch": 0.47546382189239333, "grad_norm": 8.869339942932129, "learning_rate": 5.4681090239750565e-06, "loss": 0.4491, "step": 10251 }, { "epoch": 0.47551020408163264, "grad_norm": 7.538790225982666, "learning_rate": 5.467376308483764e-06, "loss": 0.3704, "step": 10252 }, { "epoch": 0.475556586270872, "grad_norm": 6.7142462730407715, "learning_rate": 5.466643582867011e-06, "loss": 0.3683, "step": 10253 }, { "epoch": 0.4756029684601113, "grad_norm": 8.657110214233398, "learning_rate": 5.465910847140671e-06, "loss": 0.3335, "step": 10254 }, { "epoch": 0.47564935064935066, "grad_norm": 5.804176330566406, "learning_rate": 5.465178101320619e-06, "loss": 0.201, "step": 10255 }, { "epoch": 0.47569573283858996, "grad_norm": 10.887130737304688, "learning_rate": 5.464445345422727e-06, "loss": 0.4212, "step": 10256 }, { "epoch": 0.4757421150278293, "grad_norm": 9.332157135009766, "learning_rate": 5.463712579462872e-06, "loss": 0.4375, "step": 10257 }, { "epoch": 0.4757884972170687, "grad_norm": 3.463789224624634, "learning_rate": 5.46297980345693e-06, "loss": 0.2652, "step": 10258 }, { "epoch": 0.475834879406308, "grad_norm": 8.293112754821777, "learning_rate": 5.4622470174207735e-06, "loss": 0.4546, "step": 10259 }, { "epoch": 0.47588126159554733, "grad_norm": 6.583811283111572, "learning_rate": 5.461514221370279e-06, "loss": 0.3923, "step": 10260 }, { "epoch": 0.47592764378478664, "grad_norm": 5.922987937927246, "learning_rate": 5.460781415321323e-06, "loss": 0.3886, "step": 10261 }, { "epoch": 0.475974025974026, "grad_norm": 8.861677169799805, "learning_rate": 5.46004859928978e-06, "loss": 0.4158, "step": 10262 }, { "epoch": 0.4760204081632653, "grad_norm": 10.368117332458496, "learning_rate": 5.459315773291527e-06, "loss": 0.4471, "step": 10263 }, { "epoch": 0.47606679035250465, "grad_norm": 7.71075439453125, "learning_rate": 5.45858293734244e-06, "loss": 0.3308, "step": 10264 }, { "epoch": 0.47611317254174396, "grad_norm": 7.475613117218018, "learning_rate": 5.457850091458397e-06, "loss": 0.3126, "step": 10265 }, { "epoch": 0.4761595547309833, "grad_norm": 4.878756523132324, "learning_rate": 5.45711723565527e-06, "loss": 0.2943, "step": 10266 }, { "epoch": 0.4762059369202226, "grad_norm": 6.10081672668457, "learning_rate": 5.456384369948942e-06, "loss": 0.2552, "step": 10267 }, { "epoch": 0.476252319109462, "grad_norm": 5.606805801391602, "learning_rate": 5.455651494355286e-06, "loss": 0.3807, "step": 10268 }, { "epoch": 0.4762987012987013, "grad_norm": 7.424797058105469, "learning_rate": 5.4549186088901815e-06, "loss": 0.4203, "step": 10269 }, { "epoch": 0.47634508348794063, "grad_norm": 11.32441520690918, "learning_rate": 5.4541857135695055e-06, "loss": 0.421, "step": 10270 }, { "epoch": 0.47639146567717994, "grad_norm": 4.462982177734375, "learning_rate": 5.453452808409134e-06, "loss": 0.319, "step": 10271 }, { "epoch": 0.4764378478664193, "grad_norm": 6.07730770111084, "learning_rate": 5.452719893424949e-06, "loss": 0.3884, "step": 10272 }, { "epoch": 0.47648423005565865, "grad_norm": 10.547846794128418, "learning_rate": 5.451986968632824e-06, "loss": 0.4607, "step": 10273 }, { "epoch": 0.47653061224489796, "grad_norm": 5.625880241394043, "learning_rate": 5.451254034048641e-06, "loss": 0.3006, "step": 10274 }, { "epoch": 0.4765769944341373, "grad_norm": 5.027763843536377, "learning_rate": 5.450521089688277e-06, "loss": 0.3163, "step": 10275 }, { "epoch": 0.4766233766233766, "grad_norm": 6.3689446449279785, "learning_rate": 5.449788135567611e-06, "loss": 0.4102, "step": 10276 }, { "epoch": 0.476669758812616, "grad_norm": 10.472615242004395, "learning_rate": 5.449055171702522e-06, "loss": 0.4242, "step": 10277 }, { "epoch": 0.4767161410018553, "grad_norm": 7.461386203765869, "learning_rate": 5.448322198108891e-06, "loss": 0.3397, "step": 10278 }, { "epoch": 0.47676252319109463, "grad_norm": 4.729733943939209, "learning_rate": 5.447589214802594e-06, "loss": 0.2978, "step": 10279 }, { "epoch": 0.47680890538033394, "grad_norm": 10.918665885925293, "learning_rate": 5.446856221799515e-06, "loss": 0.4027, "step": 10280 }, { "epoch": 0.4768552875695733, "grad_norm": 6.630922317504883, "learning_rate": 5.44612321911553e-06, "loss": 0.3108, "step": 10281 }, { "epoch": 0.4769016697588126, "grad_norm": 6.974567890167236, "learning_rate": 5.445390206766521e-06, "loss": 0.3103, "step": 10282 }, { "epoch": 0.47694805194805195, "grad_norm": 9.395153999328613, "learning_rate": 5.444657184768369e-06, "loss": 0.3648, "step": 10283 }, { "epoch": 0.47699443413729126, "grad_norm": 12.17719841003418, "learning_rate": 5.443924153136952e-06, "loss": 0.3929, "step": 10284 }, { "epoch": 0.4770408163265306, "grad_norm": 18.589427947998047, "learning_rate": 5.443191111888153e-06, "loss": 0.6757, "step": 10285 }, { "epoch": 0.4770871985157699, "grad_norm": 8.341684341430664, "learning_rate": 5.442458061037854e-06, "loss": 0.352, "step": 10286 }, { "epoch": 0.4771335807050093, "grad_norm": 7.216604709625244, "learning_rate": 5.441725000601933e-06, "loss": 0.3348, "step": 10287 }, { "epoch": 0.47717996289424863, "grad_norm": 8.01081657409668, "learning_rate": 5.440991930596273e-06, "loss": 0.3862, "step": 10288 }, { "epoch": 0.47722634508348794, "grad_norm": 15.064566612243652, "learning_rate": 5.440258851036755e-06, "loss": 0.4984, "step": 10289 }, { "epoch": 0.4772727272727273, "grad_norm": 6.307171821594238, "learning_rate": 5.439525761939261e-06, "loss": 0.4461, "step": 10290 }, { "epoch": 0.4773191094619666, "grad_norm": 5.661396503448486, "learning_rate": 5.438792663319675e-06, "loss": 0.265, "step": 10291 }, { "epoch": 0.47736549165120595, "grad_norm": 5.271273136138916, "learning_rate": 5.438059555193875e-06, "loss": 0.2537, "step": 10292 }, { "epoch": 0.47741187384044526, "grad_norm": 6.234238147735596, "learning_rate": 5.4373264375777464e-06, "loss": 0.3718, "step": 10293 }, { "epoch": 0.4774582560296846, "grad_norm": 6.080933094024658, "learning_rate": 5.436593310487174e-06, "loss": 0.3607, "step": 10294 }, { "epoch": 0.4775046382189239, "grad_norm": 8.244632720947266, "learning_rate": 5.435860173938035e-06, "loss": 0.2986, "step": 10295 }, { "epoch": 0.4775510204081633, "grad_norm": 5.397369384765625, "learning_rate": 5.435127027946215e-06, "loss": 0.2912, "step": 10296 }, { "epoch": 0.4775974025974026, "grad_norm": 8.20649242401123, "learning_rate": 5.434393872527599e-06, "loss": 0.3989, "step": 10297 }, { "epoch": 0.47764378478664193, "grad_norm": 7.244223117828369, "learning_rate": 5.433660707698067e-06, "loss": 0.3424, "step": 10298 }, { "epoch": 0.47769016697588124, "grad_norm": 7.624274730682373, "learning_rate": 5.432927533473507e-06, "loss": 0.3749, "step": 10299 }, { "epoch": 0.4777365491651206, "grad_norm": 5.671207904815674, "learning_rate": 5.432194349869798e-06, "loss": 0.2896, "step": 10300 }, { "epoch": 0.47778293135435995, "grad_norm": 9.89541244506836, "learning_rate": 5.4314611569028276e-06, "loss": 0.324, "step": 10301 }, { "epoch": 0.47782931354359925, "grad_norm": 4.453249454498291, "learning_rate": 5.4307279545884794e-06, "loss": 0.3634, "step": 10302 }, { "epoch": 0.4778756957328386, "grad_norm": 5.814757823944092, "learning_rate": 5.429994742942636e-06, "loss": 0.3521, "step": 10303 }, { "epoch": 0.4779220779220779, "grad_norm": 11.515525817871094, "learning_rate": 5.429261521981183e-06, "loss": 0.3091, "step": 10304 }, { "epoch": 0.4779684601113173, "grad_norm": 10.045958518981934, "learning_rate": 5.428528291720007e-06, "loss": 0.4955, "step": 10305 }, { "epoch": 0.4780148423005566, "grad_norm": 10.104114532470703, "learning_rate": 5.4277950521749904e-06, "loss": 0.2914, "step": 10306 }, { "epoch": 0.47806122448979593, "grad_norm": 4.602095127105713, "learning_rate": 5.427061803362019e-06, "loss": 0.3938, "step": 10307 }, { "epoch": 0.47810760667903524, "grad_norm": 11.30599308013916, "learning_rate": 5.4263285452969805e-06, "loss": 0.3633, "step": 10308 }, { "epoch": 0.4781539888682746, "grad_norm": 11.521448135375977, "learning_rate": 5.425595277995759e-06, "loss": 0.3721, "step": 10309 }, { "epoch": 0.4782003710575139, "grad_norm": 6.196352481842041, "learning_rate": 5.424862001474239e-06, "loss": 0.2818, "step": 10310 }, { "epoch": 0.47824675324675325, "grad_norm": 5.1957244873046875, "learning_rate": 5.424128715748309e-06, "loss": 0.2554, "step": 10311 }, { "epoch": 0.47829313543599256, "grad_norm": 6.436439514160156, "learning_rate": 5.423395420833853e-06, "loss": 0.3416, "step": 10312 }, { "epoch": 0.4783395176252319, "grad_norm": 6.460702419281006, "learning_rate": 5.4226621167467595e-06, "loss": 0.3217, "step": 10313 }, { "epoch": 0.4783858998144712, "grad_norm": 6.217263698577881, "learning_rate": 5.421928803502914e-06, "loss": 0.3717, "step": 10314 }, { "epoch": 0.4784322820037106, "grad_norm": 5.854287624359131, "learning_rate": 5.4211954811182025e-06, "loss": 0.2376, "step": 10315 }, { "epoch": 0.47847866419294993, "grad_norm": 6.711682319641113, "learning_rate": 5.420462149608514e-06, "loss": 0.3967, "step": 10316 }, { "epoch": 0.47852504638218923, "grad_norm": 6.669814586639404, "learning_rate": 5.4197288089897346e-06, "loss": 0.315, "step": 10317 }, { "epoch": 0.4785714285714286, "grad_norm": 13.695051193237305, "learning_rate": 5.418995459277751e-06, "loss": 0.3323, "step": 10318 }, { "epoch": 0.4786178107606679, "grad_norm": 6.057754039764404, "learning_rate": 5.418262100488454e-06, "loss": 0.3975, "step": 10319 }, { "epoch": 0.47866419294990725, "grad_norm": 13.519203186035156, "learning_rate": 5.417528732637727e-06, "loss": 0.4747, "step": 10320 }, { "epoch": 0.47871057513914655, "grad_norm": 14.649846076965332, "learning_rate": 5.416795355741462e-06, "loss": 0.4936, "step": 10321 }, { "epoch": 0.4787569573283859, "grad_norm": 3.096437931060791, "learning_rate": 5.416061969815544e-06, "loss": 0.1953, "step": 10322 }, { "epoch": 0.4788033395176252, "grad_norm": 4.635781764984131, "learning_rate": 5.4153285748758636e-06, "loss": 0.2724, "step": 10323 }, { "epoch": 0.4788497217068646, "grad_norm": 6.492856502532959, "learning_rate": 5.414595170938309e-06, "loss": 0.212, "step": 10324 }, { "epoch": 0.4788961038961039, "grad_norm": 4.358199119567871, "learning_rate": 5.413861758018768e-06, "loss": 0.3444, "step": 10325 }, { "epoch": 0.47894248608534323, "grad_norm": 5.637645244598389, "learning_rate": 5.41312833613313e-06, "loss": 0.3138, "step": 10326 }, { "epoch": 0.47898886827458254, "grad_norm": 5.086592674255371, "learning_rate": 5.412394905297285e-06, "loss": 0.3562, "step": 10327 }, { "epoch": 0.4790352504638219, "grad_norm": 9.877510070800781, "learning_rate": 5.411661465527123e-06, "loss": 0.3717, "step": 10328 }, { "epoch": 0.47908163265306125, "grad_norm": 6.783180236816406, "learning_rate": 5.410928016838531e-06, "loss": 0.3105, "step": 10329 }, { "epoch": 0.47912801484230055, "grad_norm": 6.513617992401123, "learning_rate": 5.4101945592474e-06, "loss": 0.3489, "step": 10330 }, { "epoch": 0.4791743970315399, "grad_norm": 5.812580108642578, "learning_rate": 5.409461092769621e-06, "loss": 0.225, "step": 10331 }, { "epoch": 0.4792207792207792, "grad_norm": 8.725728034973145, "learning_rate": 5.408727617421085e-06, "loss": 0.3692, "step": 10332 }, { "epoch": 0.47926716141001857, "grad_norm": 5.008176803588867, "learning_rate": 5.407994133217678e-06, "loss": 0.2348, "step": 10333 }, { "epoch": 0.4793135435992579, "grad_norm": 8.876189231872559, "learning_rate": 5.407260640175294e-06, "loss": 0.2657, "step": 10334 }, { "epoch": 0.47935992578849723, "grad_norm": 7.412419319152832, "learning_rate": 5.406527138309824e-06, "loss": 0.3549, "step": 10335 }, { "epoch": 0.47940630797773653, "grad_norm": 7.143765926361084, "learning_rate": 5.405793627637157e-06, "loss": 0.4146, "step": 10336 }, { "epoch": 0.4794526901669759, "grad_norm": 8.985736846923828, "learning_rate": 5.405060108173184e-06, "loss": 0.3991, "step": 10337 }, { "epoch": 0.4794990723562152, "grad_norm": 7.685865879058838, "learning_rate": 5.4043265799338005e-06, "loss": 0.3789, "step": 10338 }, { "epoch": 0.47954545454545455, "grad_norm": 8.000649452209473, "learning_rate": 5.4035930429348925e-06, "loss": 0.3161, "step": 10339 }, { "epoch": 0.47959183673469385, "grad_norm": 7.810375690460205, "learning_rate": 5.402859497192356e-06, "loss": 0.3099, "step": 10340 }, { "epoch": 0.4796382189239332, "grad_norm": 11.986532211303711, "learning_rate": 5.402125942722079e-06, "loss": 0.3547, "step": 10341 }, { "epoch": 0.4796846011131725, "grad_norm": 7.062534809112549, "learning_rate": 5.401392379539956e-06, "loss": 0.3887, "step": 10342 }, { "epoch": 0.4797309833024119, "grad_norm": 5.202923774719238, "learning_rate": 5.400658807661881e-06, "loss": 0.2953, "step": 10343 }, { "epoch": 0.47977736549165123, "grad_norm": 8.613252639770508, "learning_rate": 5.39992522710374e-06, "loss": 0.3408, "step": 10344 }, { "epoch": 0.47982374768089053, "grad_norm": 9.66677474975586, "learning_rate": 5.3991916378814335e-06, "loss": 0.3836, "step": 10345 }, { "epoch": 0.4798701298701299, "grad_norm": 20.64157485961914, "learning_rate": 5.39845804001085e-06, "loss": 0.6423, "step": 10346 }, { "epoch": 0.4799165120593692, "grad_norm": 8.78254508972168, "learning_rate": 5.397724433507883e-06, "loss": 0.3721, "step": 10347 }, { "epoch": 0.47996289424860855, "grad_norm": 10.545602798461914, "learning_rate": 5.3969908183884244e-06, "loss": 0.4921, "step": 10348 }, { "epoch": 0.48000927643784785, "grad_norm": 9.682891845703125, "learning_rate": 5.396257194668371e-06, "loss": 0.4203, "step": 10349 }, { "epoch": 0.4800556586270872, "grad_norm": 5.886151313781738, "learning_rate": 5.3955235623636134e-06, "loss": 0.2607, "step": 10350 }, { "epoch": 0.4801020408163265, "grad_norm": 6.65281867980957, "learning_rate": 5.394789921490048e-06, "loss": 0.2569, "step": 10351 }, { "epoch": 0.48014842300556587, "grad_norm": 9.35261058807373, "learning_rate": 5.394056272063566e-06, "loss": 0.2969, "step": 10352 }, { "epoch": 0.4801948051948052, "grad_norm": 5.315400123596191, "learning_rate": 5.3933226141000615e-06, "loss": 0.3066, "step": 10353 }, { "epoch": 0.48024118738404453, "grad_norm": 6.112271308898926, "learning_rate": 5.392588947615433e-06, "loss": 0.3582, "step": 10354 }, { "epoch": 0.48028756957328383, "grad_norm": 5.582724571228027, "learning_rate": 5.39185527262557e-06, "loss": 0.3543, "step": 10355 }, { "epoch": 0.4803339517625232, "grad_norm": 12.469563484191895, "learning_rate": 5.391121589146368e-06, "loss": 0.4113, "step": 10356 }, { "epoch": 0.48038033395176255, "grad_norm": 13.394937515258789, "learning_rate": 5.390387897193726e-06, "loss": 0.5402, "step": 10357 }, { "epoch": 0.48042671614100185, "grad_norm": 14.937427520751953, "learning_rate": 5.389654196783535e-06, "loss": 0.3432, "step": 10358 }, { "epoch": 0.4804730983302412, "grad_norm": 5.739044189453125, "learning_rate": 5.388920487931691e-06, "loss": 0.3905, "step": 10359 }, { "epoch": 0.4805194805194805, "grad_norm": 9.19394588470459, "learning_rate": 5.38818677065409e-06, "loss": 0.3197, "step": 10360 }, { "epoch": 0.48056586270871987, "grad_norm": 9.163705825805664, "learning_rate": 5.387453044966626e-06, "loss": 0.4351, "step": 10361 }, { "epoch": 0.4806122448979592, "grad_norm": 5.82880163192749, "learning_rate": 5.386719310885199e-06, "loss": 0.3052, "step": 10362 }, { "epoch": 0.48065862708719853, "grad_norm": 4.981091499328613, "learning_rate": 5.3859855684257e-06, "loss": 0.2229, "step": 10363 }, { "epoch": 0.48070500927643783, "grad_norm": 5.833980083465576, "learning_rate": 5.3852518176040285e-06, "loss": 0.2292, "step": 10364 }, { "epoch": 0.4807513914656772, "grad_norm": 10.294685363769531, "learning_rate": 5.384518058436078e-06, "loss": 0.3628, "step": 10365 }, { "epoch": 0.4807977736549165, "grad_norm": 5.6255693435668945, "learning_rate": 5.383784290937747e-06, "loss": 0.2784, "step": 10366 }, { "epoch": 0.48084415584415585, "grad_norm": 13.638849258422852, "learning_rate": 5.383050515124933e-06, "loss": 0.4043, "step": 10367 }, { "epoch": 0.48089053803339515, "grad_norm": 4.151729583740234, "learning_rate": 5.3823167310135304e-06, "loss": 0.3444, "step": 10368 }, { "epoch": 0.4809369202226345, "grad_norm": 10.89651107788086, "learning_rate": 5.381582938619439e-06, "loss": 0.402, "step": 10369 }, { "epoch": 0.4809833024118738, "grad_norm": 6.327680587768555, "learning_rate": 5.3808491379585525e-06, "loss": 0.2756, "step": 10370 }, { "epoch": 0.48102968460111317, "grad_norm": 5.388116836547852, "learning_rate": 5.38011532904677e-06, "loss": 0.2994, "step": 10371 }, { "epoch": 0.48107606679035253, "grad_norm": 5.151648998260498, "learning_rate": 5.379381511899991e-06, "loss": 0.3026, "step": 10372 }, { "epoch": 0.48112244897959183, "grad_norm": 9.369340896606445, "learning_rate": 5.378647686534112e-06, "loss": 0.2739, "step": 10373 }, { "epoch": 0.4811688311688312, "grad_norm": 12.109457969665527, "learning_rate": 5.37791385296503e-06, "loss": 0.4282, "step": 10374 }, { "epoch": 0.4812152133580705, "grad_norm": 6.687482833862305, "learning_rate": 5.377180011208642e-06, "loss": 0.3851, "step": 10375 }, { "epoch": 0.48126159554730985, "grad_norm": 3.427779197692871, "learning_rate": 5.376446161280851e-06, "loss": 0.1935, "step": 10376 }, { "epoch": 0.48130797773654915, "grad_norm": 14.145052909851074, "learning_rate": 5.375712303197551e-06, "loss": 0.3155, "step": 10377 }, { "epoch": 0.4813543599257885, "grad_norm": 10.76494026184082, "learning_rate": 5.374978436974641e-06, "loss": 0.3841, "step": 10378 }, { "epoch": 0.4814007421150278, "grad_norm": 5.9467949867248535, "learning_rate": 5.374244562628022e-06, "loss": 0.3481, "step": 10379 }, { "epoch": 0.48144712430426717, "grad_norm": 8.397335052490234, "learning_rate": 5.373510680173593e-06, "loss": 0.2312, "step": 10380 }, { "epoch": 0.4814935064935065, "grad_norm": 5.073099136352539, "learning_rate": 5.37277678962725e-06, "loss": 0.3711, "step": 10381 }, { "epoch": 0.48153988868274583, "grad_norm": 13.999577522277832, "learning_rate": 5.372042891004896e-06, "loss": 0.4021, "step": 10382 }, { "epoch": 0.48158627087198513, "grad_norm": 9.004717826843262, "learning_rate": 5.371308984322428e-06, "loss": 0.3669, "step": 10383 }, { "epoch": 0.4816326530612245, "grad_norm": 18.043582916259766, "learning_rate": 5.370575069595748e-06, "loss": 0.5534, "step": 10384 }, { "epoch": 0.48167903525046385, "grad_norm": 6.717107772827148, "learning_rate": 5.369841146840754e-06, "loss": 0.2939, "step": 10385 }, { "epoch": 0.48172541743970315, "grad_norm": 5.246161937713623, "learning_rate": 5.369107216073346e-06, "loss": 0.3093, "step": 10386 }, { "epoch": 0.4817717996289425, "grad_norm": 5.369143486022949, "learning_rate": 5.368373277309426e-06, "loss": 0.3153, "step": 10387 }, { "epoch": 0.4818181818181818, "grad_norm": 6.214001655578613, "learning_rate": 5.3676393305648924e-06, "loss": 0.3776, "step": 10388 }, { "epoch": 0.48186456400742117, "grad_norm": 6.901288986206055, "learning_rate": 5.3669053758556455e-06, "loss": 0.361, "step": 10389 }, { "epoch": 0.4819109461966605, "grad_norm": 4.700535774230957, "learning_rate": 5.36617141319759e-06, "loss": 0.2458, "step": 10390 }, { "epoch": 0.48195732838589983, "grad_norm": 5.949446201324463, "learning_rate": 5.365437442606622e-06, "loss": 0.2701, "step": 10391 }, { "epoch": 0.48200371057513913, "grad_norm": 7.421104907989502, "learning_rate": 5.364703464098645e-06, "loss": 0.3596, "step": 10392 }, { "epoch": 0.4820500927643785, "grad_norm": 14.723848342895508, "learning_rate": 5.363969477689559e-06, "loss": 0.6389, "step": 10393 }, { "epoch": 0.4820964749536178, "grad_norm": 4.5610032081604, "learning_rate": 5.363235483395267e-06, "loss": 0.2692, "step": 10394 }, { "epoch": 0.48214285714285715, "grad_norm": 5.821201324462891, "learning_rate": 5.36250148123167e-06, "loss": 0.3326, "step": 10395 }, { "epoch": 0.48218923933209645, "grad_norm": 5.81520414352417, "learning_rate": 5.36176747121467e-06, "loss": 0.3231, "step": 10396 }, { "epoch": 0.4822356215213358, "grad_norm": 9.880691528320312, "learning_rate": 5.361033453360167e-06, "loss": 0.3496, "step": 10397 }, { "epoch": 0.4822820037105751, "grad_norm": 7.7319016456604, "learning_rate": 5.360299427684066e-06, "loss": 0.335, "step": 10398 }, { "epoch": 0.48232838589981447, "grad_norm": 6.453728675842285, "learning_rate": 5.359565394202267e-06, "loss": 0.3672, "step": 10399 }, { "epoch": 0.48237476808905383, "grad_norm": 9.997055053710938, "learning_rate": 5.358831352930674e-06, "loss": 0.4199, "step": 10400 }, { "epoch": 0.48242115027829313, "grad_norm": 64.9843978881836, "learning_rate": 5.358097303885189e-06, "loss": 0.354, "step": 10401 }, { "epoch": 0.4824675324675325, "grad_norm": 9.79759407043457, "learning_rate": 5.357363247081715e-06, "loss": 0.2731, "step": 10402 }, { "epoch": 0.4825139146567718, "grad_norm": 19.522912979125977, "learning_rate": 5.356629182536155e-06, "loss": 0.4639, "step": 10403 }, { "epoch": 0.48256029684601115, "grad_norm": 7.493007659912109, "learning_rate": 5.355895110264411e-06, "loss": 0.2962, "step": 10404 }, { "epoch": 0.48260667903525045, "grad_norm": 4.9997429847717285, "learning_rate": 5.3551610302823875e-06, "loss": 0.2126, "step": 10405 }, { "epoch": 0.4826530612244898, "grad_norm": 7.118654727935791, "learning_rate": 5.354426942605988e-06, "loss": 0.2261, "step": 10406 }, { "epoch": 0.4826994434137291, "grad_norm": 7.623879432678223, "learning_rate": 5.353692847251115e-06, "loss": 0.3047, "step": 10407 }, { "epoch": 0.48274582560296847, "grad_norm": 4.863831996917725, "learning_rate": 5.352958744233673e-06, "loss": 0.2629, "step": 10408 }, { "epoch": 0.4827922077922078, "grad_norm": 3.543752670288086, "learning_rate": 5.352224633569567e-06, "loss": 0.2945, "step": 10409 }, { "epoch": 0.48283858998144713, "grad_norm": 11.274904251098633, "learning_rate": 5.351490515274699e-06, "loss": 0.4015, "step": 10410 }, { "epoch": 0.48288497217068643, "grad_norm": 10.68828296661377, "learning_rate": 5.350756389364975e-06, "loss": 0.2452, "step": 10411 }, { "epoch": 0.4829313543599258, "grad_norm": 8.17108154296875, "learning_rate": 5.350022255856299e-06, "loss": 0.3089, "step": 10412 }, { "epoch": 0.4829777365491651, "grad_norm": 6.538491249084473, "learning_rate": 5.349288114764574e-06, "loss": 0.3735, "step": 10413 }, { "epoch": 0.48302411873840445, "grad_norm": 9.053376197814941, "learning_rate": 5.348553966105707e-06, "loss": 0.401, "step": 10414 }, { "epoch": 0.4830705009276438, "grad_norm": 6.6364874839782715, "learning_rate": 5.347819809895603e-06, "loss": 0.2606, "step": 10415 }, { "epoch": 0.4831168831168831, "grad_norm": 8.846577644348145, "learning_rate": 5.347085646150165e-06, "loss": 0.2943, "step": 10416 }, { "epoch": 0.48316326530612247, "grad_norm": 6.424978256225586, "learning_rate": 5.3463514748853e-06, "loss": 0.3301, "step": 10417 }, { "epoch": 0.48320964749536177, "grad_norm": 9.277684211730957, "learning_rate": 5.345617296116914e-06, "loss": 0.3146, "step": 10418 }, { "epoch": 0.48325602968460113, "grad_norm": 5.8467512130737305, "learning_rate": 5.34488310986091e-06, "loss": 0.3671, "step": 10419 }, { "epoch": 0.48330241187384043, "grad_norm": 5.218952178955078, "learning_rate": 5.344148916133197e-06, "loss": 0.307, "step": 10420 }, { "epoch": 0.4833487940630798, "grad_norm": 8.9132080078125, "learning_rate": 5.343414714949678e-06, "loss": 0.3169, "step": 10421 }, { "epoch": 0.4833951762523191, "grad_norm": 7.66283655166626, "learning_rate": 5.342680506326262e-06, "loss": 0.3948, "step": 10422 }, { "epoch": 0.48344155844155845, "grad_norm": 9.597329139709473, "learning_rate": 5.341946290278852e-06, "loss": 0.5043, "step": 10423 }, { "epoch": 0.48348794063079775, "grad_norm": 8.430267333984375, "learning_rate": 5.341212066823356e-06, "loss": 0.4202, "step": 10424 }, { "epoch": 0.4835343228200371, "grad_norm": 9.880188941955566, "learning_rate": 5.340477835975681e-06, "loss": 0.4042, "step": 10425 }, { "epoch": 0.4835807050092764, "grad_norm": 8.353979110717773, "learning_rate": 5.339743597751733e-06, "loss": 0.4517, "step": 10426 }, { "epoch": 0.48362708719851577, "grad_norm": 6.293680191040039, "learning_rate": 5.339009352167418e-06, "loss": 0.4047, "step": 10427 }, { "epoch": 0.48367346938775513, "grad_norm": 9.946651458740234, "learning_rate": 5.338275099238647e-06, "loss": 0.3926, "step": 10428 }, { "epoch": 0.48371985157699443, "grad_norm": 4.784663200378418, "learning_rate": 5.337540838981322e-06, "loss": 0.2832, "step": 10429 }, { "epoch": 0.4837662337662338, "grad_norm": 7.9107489585876465, "learning_rate": 5.336806571411354e-06, "loss": 0.3421, "step": 10430 }, { "epoch": 0.4838126159554731, "grad_norm": 5.840352535247803, "learning_rate": 5.336072296544649e-06, "loss": 0.2898, "step": 10431 }, { "epoch": 0.48385899814471245, "grad_norm": 7.179194450378418, "learning_rate": 5.335338014397115e-06, "loss": 0.3427, "step": 10432 }, { "epoch": 0.48390538033395175, "grad_norm": 4.678577423095703, "learning_rate": 5.33460372498466e-06, "loss": 0.3626, "step": 10433 }, { "epoch": 0.4839517625231911, "grad_norm": 5.285607814788818, "learning_rate": 5.333869428323191e-06, "loss": 0.4562, "step": 10434 }, { "epoch": 0.4839981447124304, "grad_norm": 13.517592430114746, "learning_rate": 5.333135124428617e-06, "loss": 0.3027, "step": 10435 }, { "epoch": 0.48404452690166977, "grad_norm": 9.169888496398926, "learning_rate": 5.332400813316847e-06, "loss": 0.344, "step": 10436 }, { "epoch": 0.48409090909090907, "grad_norm": 8.038164138793945, "learning_rate": 5.331666495003787e-06, "loss": 0.2701, "step": 10437 }, { "epoch": 0.48413729128014843, "grad_norm": 5.688312530517578, "learning_rate": 5.3309321695053485e-06, "loss": 0.3541, "step": 10438 }, { "epoch": 0.48418367346938773, "grad_norm": 5.647048473358154, "learning_rate": 5.330197836837441e-06, "loss": 0.3668, "step": 10439 }, { "epoch": 0.4842300556586271, "grad_norm": 6.006656646728516, "learning_rate": 5.329463497015969e-06, "loss": 0.3057, "step": 10440 }, { "epoch": 0.4842764378478664, "grad_norm": 9.17021656036377, "learning_rate": 5.328729150056844e-06, "loss": 0.4148, "step": 10441 }, { "epoch": 0.48432282003710575, "grad_norm": 5.2447509765625, "learning_rate": 5.327994795975977e-06, "loss": 0.3837, "step": 10442 }, { "epoch": 0.4843692022263451, "grad_norm": 7.517115116119385, "learning_rate": 5.327260434789274e-06, "loss": 0.2769, "step": 10443 }, { "epoch": 0.4844155844155844, "grad_norm": 6.656427383422852, "learning_rate": 5.326526066512648e-06, "loss": 0.3405, "step": 10444 }, { "epoch": 0.48446196660482377, "grad_norm": 5.164605617523193, "learning_rate": 5.3257916911620056e-06, "loss": 0.2824, "step": 10445 }, { "epoch": 0.48450834879406307, "grad_norm": 8.298179626464844, "learning_rate": 5.3250573087532586e-06, "loss": 0.3951, "step": 10446 }, { "epoch": 0.48455473098330243, "grad_norm": 10.374004364013672, "learning_rate": 5.324322919302317e-06, "loss": 0.3873, "step": 10447 }, { "epoch": 0.48460111317254173, "grad_norm": 4.968533039093018, "learning_rate": 5.3235885228250895e-06, "loss": 0.3087, "step": 10448 }, { "epoch": 0.4846474953617811, "grad_norm": 5.562399864196777, "learning_rate": 5.322854119337487e-06, "loss": 0.3526, "step": 10449 }, { "epoch": 0.4846938775510204, "grad_norm": 7.054422378540039, "learning_rate": 5.322119708855422e-06, "loss": 0.3661, "step": 10450 }, { "epoch": 0.48474025974025975, "grad_norm": 9.246566772460938, "learning_rate": 5.321385291394803e-06, "loss": 0.3981, "step": 10451 }, { "epoch": 0.48478664192949905, "grad_norm": 7.465031147003174, "learning_rate": 5.320650866971541e-06, "loss": 0.3164, "step": 10452 }, { "epoch": 0.4848330241187384, "grad_norm": 12.7913236618042, "learning_rate": 5.319916435601547e-06, "loss": 0.4835, "step": 10453 }, { "epoch": 0.4848794063079777, "grad_norm": 5.62317419052124, "learning_rate": 5.319181997300731e-06, "loss": 0.361, "step": 10454 }, { "epoch": 0.48492578849721707, "grad_norm": 9.036234855651855, "learning_rate": 5.318447552085007e-06, "loss": 0.3607, "step": 10455 }, { "epoch": 0.4849721706864564, "grad_norm": 3.8798983097076416, "learning_rate": 5.317713099970283e-06, "loss": 0.2496, "step": 10456 }, { "epoch": 0.48501855287569573, "grad_norm": 3.8373680114746094, "learning_rate": 5.316978640972473e-06, "loss": 0.2784, "step": 10457 }, { "epoch": 0.4850649350649351, "grad_norm": 12.825928688049316, "learning_rate": 5.316244175107489e-06, "loss": 0.3195, "step": 10458 }, { "epoch": 0.4851113172541744, "grad_norm": 6.767540454864502, "learning_rate": 5.31550970239124e-06, "loss": 0.2798, "step": 10459 }, { "epoch": 0.48515769944341375, "grad_norm": 5.884958267211914, "learning_rate": 5.314775222839641e-06, "loss": 0.3107, "step": 10460 }, { "epoch": 0.48520408163265305, "grad_norm": 11.571855545043945, "learning_rate": 5.314040736468603e-06, "loss": 0.3546, "step": 10461 }, { "epoch": 0.4852504638218924, "grad_norm": 15.572853088378906, "learning_rate": 5.313306243294038e-06, "loss": 0.4726, "step": 10462 }, { "epoch": 0.4852968460111317, "grad_norm": 4.946329593658447, "learning_rate": 5.312571743331858e-06, "loss": 0.2619, "step": 10463 }, { "epoch": 0.48534322820037107, "grad_norm": 10.106372833251953, "learning_rate": 5.311837236597975e-06, "loss": 0.3967, "step": 10464 }, { "epoch": 0.48538961038961037, "grad_norm": 10.863039016723633, "learning_rate": 5.311102723108305e-06, "loss": 0.3473, "step": 10465 }, { "epoch": 0.48543599257884973, "grad_norm": 3.669950246810913, "learning_rate": 5.310368202878757e-06, "loss": 0.3301, "step": 10466 }, { "epoch": 0.48548237476808903, "grad_norm": 12.13451099395752, "learning_rate": 5.309633675925246e-06, "loss": 0.4673, "step": 10467 }, { "epoch": 0.4855287569573284, "grad_norm": 5.248655319213867, "learning_rate": 5.308899142263684e-06, "loss": 0.1989, "step": 10468 }, { "epoch": 0.4855751391465677, "grad_norm": 6.886784076690674, "learning_rate": 5.308164601909986e-06, "loss": 0.3471, "step": 10469 }, { "epoch": 0.48562152133580705, "grad_norm": 6.818755626678467, "learning_rate": 5.307430054880065e-06, "loss": 0.3001, "step": 10470 }, { "epoch": 0.4856679035250464, "grad_norm": 9.878119468688965, "learning_rate": 5.306695501189833e-06, "loss": 0.3893, "step": 10471 }, { "epoch": 0.4857142857142857, "grad_norm": 7.194273948669434, "learning_rate": 5.305960940855205e-06, "loss": 0.3897, "step": 10472 }, { "epoch": 0.48576066790352507, "grad_norm": 5.884335041046143, "learning_rate": 5.305226373892094e-06, "loss": 0.3035, "step": 10473 }, { "epoch": 0.48580705009276437, "grad_norm": 3.6428205966949463, "learning_rate": 5.304491800316416e-06, "loss": 0.2296, "step": 10474 }, { "epoch": 0.4858534322820037, "grad_norm": 5.931723117828369, "learning_rate": 5.303757220144082e-06, "loss": 0.2853, "step": 10475 }, { "epoch": 0.48589981447124303, "grad_norm": 4.2211995124816895, "learning_rate": 5.303022633391009e-06, "loss": 0.2862, "step": 10476 }, { "epoch": 0.4859461966604824, "grad_norm": 21.669607162475586, "learning_rate": 5.3022880400731115e-06, "loss": 0.5035, "step": 10477 }, { "epoch": 0.4859925788497217, "grad_norm": 6.324013710021973, "learning_rate": 5.301553440206301e-06, "loss": 0.3225, "step": 10478 }, { "epoch": 0.48603896103896105, "grad_norm": 3.804356575012207, "learning_rate": 5.300818833806495e-06, "loss": 0.2569, "step": 10479 }, { "epoch": 0.48608534322820035, "grad_norm": 5.317108154296875, "learning_rate": 5.3000842208896084e-06, "loss": 0.2517, "step": 10480 }, { "epoch": 0.4861317254174397, "grad_norm": 7.572360515594482, "learning_rate": 5.299349601471555e-06, "loss": 0.2857, "step": 10481 }, { "epoch": 0.486178107606679, "grad_norm": 5.387430191040039, "learning_rate": 5.298614975568249e-06, "loss": 0.2958, "step": 10482 }, { "epoch": 0.48622448979591837, "grad_norm": 7.203165054321289, "learning_rate": 5.29788034319561e-06, "loss": 0.3684, "step": 10483 }, { "epoch": 0.4862708719851577, "grad_norm": 8.79767894744873, "learning_rate": 5.297145704369548e-06, "loss": 0.4045, "step": 10484 }, { "epoch": 0.48631725417439703, "grad_norm": 11.725838661193848, "learning_rate": 5.296411059105983e-06, "loss": 0.5107, "step": 10485 }, { "epoch": 0.4863636363636364, "grad_norm": 6.541001796722412, "learning_rate": 5.295676407420827e-06, "loss": 0.3186, "step": 10486 }, { "epoch": 0.4864100185528757, "grad_norm": 6.433159351348877, "learning_rate": 5.294941749329999e-06, "loss": 0.3028, "step": 10487 }, { "epoch": 0.48645640074211505, "grad_norm": 14.176634788513184, "learning_rate": 5.294207084849412e-06, "loss": 0.4164, "step": 10488 }, { "epoch": 0.48650278293135435, "grad_norm": 5.62616491317749, "learning_rate": 5.293472413994984e-06, "loss": 0.2466, "step": 10489 }, { "epoch": 0.4865491651205937, "grad_norm": 11.820632934570312, "learning_rate": 5.292737736782631e-06, "loss": 0.4293, "step": 10490 }, { "epoch": 0.486595547309833, "grad_norm": 5.284363746643066, "learning_rate": 5.292003053228271e-06, "loss": 0.3235, "step": 10491 }, { "epoch": 0.48664192949907237, "grad_norm": 10.168612480163574, "learning_rate": 5.2912683633478165e-06, "loss": 0.3496, "step": 10492 }, { "epoch": 0.48668831168831167, "grad_norm": 5.289061069488525, "learning_rate": 5.290533667157188e-06, "loss": 0.3725, "step": 10493 }, { "epoch": 0.48673469387755103, "grad_norm": 4.800616264343262, "learning_rate": 5.2897989646723e-06, "loss": 0.2545, "step": 10494 }, { "epoch": 0.48678107606679033, "grad_norm": 8.872220039367676, "learning_rate": 5.289064255909071e-06, "loss": 0.4469, "step": 10495 }, { "epoch": 0.4868274582560297, "grad_norm": 9.384565353393555, "learning_rate": 5.288329540883418e-06, "loss": 0.3703, "step": 10496 }, { "epoch": 0.486873840445269, "grad_norm": 5.32009220123291, "learning_rate": 5.287594819611256e-06, "loss": 0.2945, "step": 10497 }, { "epoch": 0.48692022263450835, "grad_norm": 5.626843452453613, "learning_rate": 5.286860092108505e-06, "loss": 0.3126, "step": 10498 }, { "epoch": 0.4869666048237477, "grad_norm": 5.4932942390441895, "learning_rate": 5.286125358391081e-06, "loss": 0.2402, "step": 10499 }, { "epoch": 0.487012987012987, "grad_norm": 5.76055908203125, "learning_rate": 5.285390618474902e-06, "loss": 0.2465, "step": 10500 }, { "epoch": 0.48705936920222637, "grad_norm": 3.636091470718384, "learning_rate": 5.284655872375885e-06, "loss": 0.2455, "step": 10501 }, { "epoch": 0.48710575139146567, "grad_norm": 6.135373592376709, "learning_rate": 5.283921120109951e-06, "loss": 0.3214, "step": 10502 }, { "epoch": 0.487152133580705, "grad_norm": 8.105289459228516, "learning_rate": 5.283186361693012e-06, "loss": 0.304, "step": 10503 }, { "epoch": 0.48719851576994433, "grad_norm": 11.834856033325195, "learning_rate": 5.282451597140994e-06, "loss": 0.3771, "step": 10504 }, { "epoch": 0.4872448979591837, "grad_norm": 3.8199636936187744, "learning_rate": 5.281716826469808e-06, "loss": 0.311, "step": 10505 }, { "epoch": 0.487291280148423, "grad_norm": 6.65638542175293, "learning_rate": 5.280982049695377e-06, "loss": 0.2879, "step": 10506 }, { "epoch": 0.48733766233766235, "grad_norm": 8.08557415008545, "learning_rate": 5.280247266833618e-06, "loss": 0.4075, "step": 10507 }, { "epoch": 0.48738404452690165, "grad_norm": 15.889761924743652, "learning_rate": 5.279512477900449e-06, "loss": 0.4386, "step": 10508 }, { "epoch": 0.487430426716141, "grad_norm": 6.947535514831543, "learning_rate": 5.278777682911791e-06, "loss": 0.3698, "step": 10509 }, { "epoch": 0.4874768089053803, "grad_norm": 10.233922958374023, "learning_rate": 5.2780428818835605e-06, "loss": 0.3457, "step": 10510 }, { "epoch": 0.48752319109461967, "grad_norm": 7.672443389892578, "learning_rate": 5.277308074831678e-06, "loss": 0.2426, "step": 10511 }, { "epoch": 0.487569573283859, "grad_norm": 6.037177085876465, "learning_rate": 5.276573261772062e-06, "loss": 0.2997, "step": 10512 }, { "epoch": 0.48761595547309833, "grad_norm": 16.616117477416992, "learning_rate": 5.275838442720632e-06, "loss": 0.5838, "step": 10513 }, { "epoch": 0.4876623376623377, "grad_norm": 5.133094310760498, "learning_rate": 5.275103617693309e-06, "loss": 0.3166, "step": 10514 }, { "epoch": 0.487708719851577, "grad_norm": 8.272905349731445, "learning_rate": 5.274368786706009e-06, "loss": 0.3343, "step": 10515 }, { "epoch": 0.48775510204081635, "grad_norm": 8.88779354095459, "learning_rate": 5.273633949774655e-06, "loss": 0.391, "step": 10516 }, { "epoch": 0.48780148423005565, "grad_norm": 5.288866996765137, "learning_rate": 5.272899106915167e-06, "loss": 0.3514, "step": 10517 }, { "epoch": 0.487847866419295, "grad_norm": 5.745665550231934, "learning_rate": 5.272164258143462e-06, "loss": 0.2437, "step": 10518 }, { "epoch": 0.4878942486085343, "grad_norm": 9.553348541259766, "learning_rate": 5.271429403475463e-06, "loss": 0.3903, "step": 10519 }, { "epoch": 0.48794063079777367, "grad_norm": 4.980389595031738, "learning_rate": 5.270694542927089e-06, "loss": 0.3113, "step": 10520 }, { "epoch": 0.48798701298701297, "grad_norm": 7.712735652923584, "learning_rate": 5.269959676514259e-06, "loss": 0.3322, "step": 10521 }, { "epoch": 0.4880333951762523, "grad_norm": 8.000648498535156, "learning_rate": 5.269224804252897e-06, "loss": 0.3605, "step": 10522 }, { "epoch": 0.48807977736549163, "grad_norm": 5.366959571838379, "learning_rate": 5.268489926158921e-06, "loss": 0.3561, "step": 10523 }, { "epoch": 0.488126159554731, "grad_norm": 7.084439277648926, "learning_rate": 5.267755042248253e-06, "loss": 0.378, "step": 10524 }, { "epoch": 0.4881725417439703, "grad_norm": 11.125675201416016, "learning_rate": 5.267020152536811e-06, "loss": 0.3906, "step": 10525 }, { "epoch": 0.48821892393320965, "grad_norm": 7.867873191833496, "learning_rate": 5.26628525704052e-06, "loss": 0.4627, "step": 10526 }, { "epoch": 0.488265306122449, "grad_norm": 6.229604244232178, "learning_rate": 5.2655503557753e-06, "loss": 0.3315, "step": 10527 }, { "epoch": 0.4883116883116883, "grad_norm": 5.533975124359131, "learning_rate": 5.2648154487570705e-06, "loss": 0.3231, "step": 10528 }, { "epoch": 0.48835807050092767, "grad_norm": 4.611822128295898, "learning_rate": 5.264080536001754e-06, "loss": 0.2689, "step": 10529 }, { "epoch": 0.48840445269016697, "grad_norm": 11.721144676208496, "learning_rate": 5.263345617525272e-06, "loss": 0.3065, "step": 10530 }, { "epoch": 0.4884508348794063, "grad_norm": 5.943988800048828, "learning_rate": 5.262610693343546e-06, "loss": 0.3315, "step": 10531 }, { "epoch": 0.48849721706864563, "grad_norm": 4.523459434509277, "learning_rate": 5.2618757634724985e-06, "loss": 0.2437, "step": 10532 }, { "epoch": 0.488543599257885, "grad_norm": 4.803267478942871, "learning_rate": 5.261140827928049e-06, "loss": 0.3471, "step": 10533 }, { "epoch": 0.4885899814471243, "grad_norm": 11.536774635314941, "learning_rate": 5.260405886726124e-06, "loss": 0.3563, "step": 10534 }, { "epoch": 0.48863636363636365, "grad_norm": 11.057238578796387, "learning_rate": 5.259670939882641e-06, "loss": 0.3201, "step": 10535 }, { "epoch": 0.48868274582560295, "grad_norm": 3.3255038261413574, "learning_rate": 5.258935987413524e-06, "loss": 0.3206, "step": 10536 }, { "epoch": 0.4887291280148423, "grad_norm": 7.210343360900879, "learning_rate": 5.258201029334697e-06, "loss": 0.2378, "step": 10537 }, { "epoch": 0.4887755102040816, "grad_norm": 6.250364780426025, "learning_rate": 5.257466065662078e-06, "loss": 0.3245, "step": 10538 }, { "epoch": 0.48882189239332097, "grad_norm": 5.901383876800537, "learning_rate": 5.256731096411594e-06, "loss": 0.2675, "step": 10539 }, { "epoch": 0.48886827458256027, "grad_norm": 8.038846015930176, "learning_rate": 5.255996121599167e-06, "loss": 0.4071, "step": 10540 }, { "epoch": 0.4889146567717996, "grad_norm": 5.25605583190918, "learning_rate": 5.255261141240717e-06, "loss": 0.3012, "step": 10541 }, { "epoch": 0.488961038961039, "grad_norm": 5.977451324462891, "learning_rate": 5.254526155352171e-06, "loss": 0.3086, "step": 10542 }, { "epoch": 0.4890074211502783, "grad_norm": 12.295449256896973, "learning_rate": 5.25379116394945e-06, "loss": 0.5651, "step": 10543 }, { "epoch": 0.48905380333951765, "grad_norm": 8.424210548400879, "learning_rate": 5.253056167048476e-06, "loss": 0.3564, "step": 10544 }, { "epoch": 0.48910018552875695, "grad_norm": 9.063698768615723, "learning_rate": 5.252321164665175e-06, "loss": 0.3535, "step": 10545 }, { "epoch": 0.4891465677179963, "grad_norm": 7.637523651123047, "learning_rate": 5.251586156815468e-06, "loss": 0.3268, "step": 10546 }, { "epoch": 0.4891929499072356, "grad_norm": 7.635298728942871, "learning_rate": 5.25085114351528e-06, "loss": 0.4212, "step": 10547 }, { "epoch": 0.48923933209647497, "grad_norm": 12.482098579406738, "learning_rate": 5.2501161247805345e-06, "loss": 0.4314, "step": 10548 }, { "epoch": 0.48928571428571427, "grad_norm": 8.76145076751709, "learning_rate": 5.249381100627155e-06, "loss": 0.38, "step": 10549 }, { "epoch": 0.4893320964749536, "grad_norm": 4.701962471008301, "learning_rate": 5.248646071071065e-06, "loss": 0.3539, "step": 10550 }, { "epoch": 0.48937847866419293, "grad_norm": 4.29490852355957, "learning_rate": 5.247911036128189e-06, "loss": 0.3584, "step": 10551 }, { "epoch": 0.4894248608534323, "grad_norm": 5.070096492767334, "learning_rate": 5.247175995814452e-06, "loss": 0.3201, "step": 10552 }, { "epoch": 0.4894712430426716, "grad_norm": 6.799983501434326, "learning_rate": 5.246440950145777e-06, "loss": 0.2779, "step": 10553 }, { "epoch": 0.48951762523191095, "grad_norm": 8.129118919372559, "learning_rate": 5.245705899138089e-06, "loss": 0.2974, "step": 10554 }, { "epoch": 0.4895640074211503, "grad_norm": 8.394877433776855, "learning_rate": 5.244970842807312e-06, "loss": 0.3829, "step": 10555 }, { "epoch": 0.4896103896103896, "grad_norm": 3.9633588790893555, "learning_rate": 5.244235781169371e-06, "loss": 0.3697, "step": 10556 }, { "epoch": 0.48965677179962896, "grad_norm": 7.371473789215088, "learning_rate": 5.24350071424019e-06, "loss": 0.2927, "step": 10557 }, { "epoch": 0.48970315398886827, "grad_norm": 13.339027404785156, "learning_rate": 5.242765642035695e-06, "loss": 0.3581, "step": 10558 }, { "epoch": 0.4897495361781076, "grad_norm": 4.799707412719727, "learning_rate": 5.242030564571812e-06, "loss": 0.3559, "step": 10559 }, { "epoch": 0.4897959183673469, "grad_norm": 7.1051506996154785, "learning_rate": 5.241295481864462e-06, "loss": 0.262, "step": 10560 }, { "epoch": 0.4898423005565863, "grad_norm": 7.293661117553711, "learning_rate": 5.240560393929572e-06, "loss": 0.3727, "step": 10561 }, { "epoch": 0.4898886827458256, "grad_norm": 5.118342876434326, "learning_rate": 5.239825300783071e-06, "loss": 0.231, "step": 10562 }, { "epoch": 0.48993506493506495, "grad_norm": 7.965873718261719, "learning_rate": 5.23909020244088e-06, "loss": 0.2369, "step": 10563 }, { "epoch": 0.48998144712430425, "grad_norm": 9.59589672088623, "learning_rate": 5.238355098918925e-06, "loss": 0.4696, "step": 10564 }, { "epoch": 0.4900278293135436, "grad_norm": 6.926909923553467, "learning_rate": 5.2376199902331335e-06, "loss": 0.3384, "step": 10565 }, { "epoch": 0.4900742115027829, "grad_norm": 4.967306137084961, "learning_rate": 5.23688487639943e-06, "loss": 0.3927, "step": 10566 }, { "epoch": 0.49012059369202227, "grad_norm": 11.157358169555664, "learning_rate": 5.2361497574337415e-06, "loss": 0.3976, "step": 10567 }, { "epoch": 0.49016697588126157, "grad_norm": 5.705856800079346, "learning_rate": 5.235414633351992e-06, "loss": 0.2675, "step": 10568 }, { "epoch": 0.4902133580705009, "grad_norm": 13.954939842224121, "learning_rate": 5.234679504170108e-06, "loss": 0.2593, "step": 10569 }, { "epoch": 0.4902597402597403, "grad_norm": 8.449472427368164, "learning_rate": 5.233944369904018e-06, "loss": 0.418, "step": 10570 }, { "epoch": 0.4903061224489796, "grad_norm": 7.728620529174805, "learning_rate": 5.233209230569645e-06, "loss": 0.3204, "step": 10571 }, { "epoch": 0.49035250463821894, "grad_norm": 11.34589958190918, "learning_rate": 5.232474086182917e-06, "loss": 0.2925, "step": 10572 }, { "epoch": 0.49039888682745825, "grad_norm": 6.527573108673096, "learning_rate": 5.231738936759762e-06, "loss": 0.3006, "step": 10573 }, { "epoch": 0.4904452690166976, "grad_norm": 6.403321743011475, "learning_rate": 5.231003782316104e-06, "loss": 0.313, "step": 10574 }, { "epoch": 0.4904916512059369, "grad_norm": 4.782180309295654, "learning_rate": 5.230268622867871e-06, "loss": 0.2993, "step": 10575 }, { "epoch": 0.49053803339517627, "grad_norm": 6.5840325355529785, "learning_rate": 5.229533458430991e-06, "loss": 0.2449, "step": 10576 }, { "epoch": 0.49058441558441557, "grad_norm": 8.286362648010254, "learning_rate": 5.228798289021388e-06, "loss": 0.436, "step": 10577 }, { "epoch": 0.4906307977736549, "grad_norm": 5.163583755493164, "learning_rate": 5.228063114654993e-06, "loss": 0.2744, "step": 10578 }, { "epoch": 0.4906771799628942, "grad_norm": 7.31833553314209, "learning_rate": 5.227327935347729e-06, "loss": 0.3789, "step": 10579 }, { "epoch": 0.4907235621521336, "grad_norm": 5.379721641540527, "learning_rate": 5.226592751115526e-06, "loss": 0.2728, "step": 10580 }, { "epoch": 0.4907699443413729, "grad_norm": 5.737435817718506, "learning_rate": 5.2258575619743106e-06, "loss": 0.3347, "step": 10581 }, { "epoch": 0.49081632653061225, "grad_norm": 6.970633029937744, "learning_rate": 5.225122367940009e-06, "loss": 0.3469, "step": 10582 }, { "epoch": 0.4908627087198516, "grad_norm": 9.190145492553711, "learning_rate": 5.224387169028551e-06, "loss": 0.3449, "step": 10583 }, { "epoch": 0.4909090909090909, "grad_norm": 13.770268440246582, "learning_rate": 5.223651965255864e-06, "loss": 0.3957, "step": 10584 }, { "epoch": 0.49095547309833026, "grad_norm": 16.181730270385742, "learning_rate": 5.222916756637873e-06, "loss": 0.5206, "step": 10585 }, { "epoch": 0.49100185528756957, "grad_norm": 5.840346813201904, "learning_rate": 5.22218154319051e-06, "loss": 0.3475, "step": 10586 }, { "epoch": 0.4910482374768089, "grad_norm": 4.919003486633301, "learning_rate": 5.2214463249296996e-06, "loss": 0.3809, "step": 10587 }, { "epoch": 0.4910946196660482, "grad_norm": 6.969326019287109, "learning_rate": 5.220711101871373e-06, "loss": 0.3526, "step": 10588 }, { "epoch": 0.4911410018552876, "grad_norm": 7.420775413513184, "learning_rate": 5.219975874031457e-06, "loss": 0.2825, "step": 10589 }, { "epoch": 0.4911873840445269, "grad_norm": 12.99083423614502, "learning_rate": 5.2192406414258774e-06, "loss": 0.5073, "step": 10590 }, { "epoch": 0.49123376623376624, "grad_norm": 10.687655448913574, "learning_rate": 5.218505404070566e-06, "loss": 0.4931, "step": 10591 }, { "epoch": 0.49128014842300555, "grad_norm": 4.5870161056518555, "learning_rate": 5.217770161981451e-06, "loss": 0.3241, "step": 10592 }, { "epoch": 0.4913265306122449, "grad_norm": 6.188785552978516, "learning_rate": 5.217034915174459e-06, "loss": 0.2975, "step": 10593 }, { "epoch": 0.4913729128014842, "grad_norm": 23.745243072509766, "learning_rate": 5.216299663665522e-06, "loss": 0.2731, "step": 10594 }, { "epoch": 0.49141929499072357, "grad_norm": 7.194429397583008, "learning_rate": 5.215564407470566e-06, "loss": 0.3602, "step": 10595 }, { "epoch": 0.49146567717996287, "grad_norm": 24.544654846191406, "learning_rate": 5.21482914660552e-06, "loss": 0.5246, "step": 10596 }, { "epoch": 0.4915120593692022, "grad_norm": 6.876531600952148, "learning_rate": 5.214093881086315e-06, "loss": 0.3531, "step": 10597 }, { "epoch": 0.4915584415584416, "grad_norm": 6.668091773986816, "learning_rate": 5.213358610928878e-06, "loss": 0.3666, "step": 10598 }, { "epoch": 0.4916048237476809, "grad_norm": 14.193743705749512, "learning_rate": 5.2126233361491394e-06, "loss": 0.3741, "step": 10599 }, { "epoch": 0.49165120593692024, "grad_norm": 7.189566612243652, "learning_rate": 5.211888056763029e-06, "loss": 0.3452, "step": 10600 }, { "epoch": 0.49169758812615955, "grad_norm": 5.798274040222168, "learning_rate": 5.2111527727864756e-06, "loss": 0.2124, "step": 10601 }, { "epoch": 0.4917439703153989, "grad_norm": 8.226151466369629, "learning_rate": 5.210417484235407e-06, "loss": 0.397, "step": 10602 }, { "epoch": 0.4917903525046382, "grad_norm": 5.446727275848389, "learning_rate": 5.209682191125757e-06, "loss": 0.2886, "step": 10603 }, { "epoch": 0.49183673469387756, "grad_norm": 23.75002670288086, "learning_rate": 5.208946893473451e-06, "loss": 0.5084, "step": 10604 }, { "epoch": 0.49188311688311687, "grad_norm": 10.819497108459473, "learning_rate": 5.208211591294422e-06, "loss": 0.3816, "step": 10605 }, { "epoch": 0.4919294990723562, "grad_norm": 6.3563618659973145, "learning_rate": 5.207476284604599e-06, "loss": 0.3101, "step": 10606 }, { "epoch": 0.4919758812615955, "grad_norm": 6.299557685852051, "learning_rate": 5.206740973419911e-06, "loss": 0.3101, "step": 10607 }, { "epoch": 0.4920222634508349, "grad_norm": 7.734307765960693, "learning_rate": 5.206005657756289e-06, "loss": 0.4296, "step": 10608 }, { "epoch": 0.4920686456400742, "grad_norm": 8.691176414489746, "learning_rate": 5.205270337629662e-06, "loss": 0.3962, "step": 10609 }, { "epoch": 0.49211502782931354, "grad_norm": 4.862698554992676, "learning_rate": 5.204535013055962e-06, "loss": 0.3644, "step": 10610 }, { "epoch": 0.4921614100185529, "grad_norm": 4.361595153808594, "learning_rate": 5.20379968405112e-06, "loss": 0.2706, "step": 10611 }, { "epoch": 0.4922077922077922, "grad_norm": 8.072837829589844, "learning_rate": 5.203064350631064e-06, "loss": 0.3873, "step": 10612 }, { "epoch": 0.49225417439703156, "grad_norm": 9.174817085266113, "learning_rate": 5.202329012811727e-06, "loss": 0.2955, "step": 10613 }, { "epoch": 0.49230055658627087, "grad_norm": 6.6896071434021, "learning_rate": 5.201593670609038e-06, "loss": 0.2932, "step": 10614 }, { "epoch": 0.4923469387755102, "grad_norm": 8.42446231842041, "learning_rate": 5.2008583240389284e-06, "loss": 0.2039, "step": 10615 }, { "epoch": 0.4923933209647495, "grad_norm": 9.228106498718262, "learning_rate": 5.20012297311733e-06, "loss": 0.4534, "step": 10616 }, { "epoch": 0.4924397031539889, "grad_norm": 7.6200151443481445, "learning_rate": 5.199387617860172e-06, "loss": 0.3241, "step": 10617 }, { "epoch": 0.4924860853432282, "grad_norm": 6.6673126220703125, "learning_rate": 5.198652258283387e-06, "loss": 0.2429, "step": 10618 }, { "epoch": 0.49253246753246754, "grad_norm": 5.470515251159668, "learning_rate": 5.197916894402906e-06, "loss": 0.348, "step": 10619 }, { "epoch": 0.49257884972170685, "grad_norm": 9.3923921585083, "learning_rate": 5.197181526234658e-06, "loss": 0.3525, "step": 10620 }, { "epoch": 0.4926252319109462, "grad_norm": 13.232734680175781, "learning_rate": 5.196446153794578e-06, "loss": 0.3246, "step": 10621 }, { "epoch": 0.4926716141001855, "grad_norm": 9.87554931640625, "learning_rate": 5.195710777098595e-06, "loss": 0.2863, "step": 10622 }, { "epoch": 0.49271799628942486, "grad_norm": 4.808680057525635, "learning_rate": 5.194975396162642e-06, "loss": 0.3129, "step": 10623 }, { "epoch": 0.49276437847866417, "grad_norm": 8.652093887329102, "learning_rate": 5.194240011002648e-06, "loss": 0.3959, "step": 10624 }, { "epoch": 0.4928107606679035, "grad_norm": 12.087813377380371, "learning_rate": 5.193504621634549e-06, "loss": 0.3769, "step": 10625 }, { "epoch": 0.4928571428571429, "grad_norm": 5.437726020812988, "learning_rate": 5.192769228074272e-06, "loss": 0.3503, "step": 10626 }, { "epoch": 0.4929035250463822, "grad_norm": 7.059523105621338, "learning_rate": 5.192033830337754e-06, "loss": 0.3852, "step": 10627 }, { "epoch": 0.49294990723562154, "grad_norm": 4.994348049163818, "learning_rate": 5.1912984284409226e-06, "loss": 0.2582, "step": 10628 }, { "epoch": 0.49299628942486085, "grad_norm": 5.749332427978516, "learning_rate": 5.190563022399711e-06, "loss": 0.349, "step": 10629 }, { "epoch": 0.4930426716141002, "grad_norm": 10.520391464233398, "learning_rate": 5.189827612230054e-06, "loss": 0.3999, "step": 10630 }, { "epoch": 0.4930890538033395, "grad_norm": 6.382053375244141, "learning_rate": 5.18909219794788e-06, "loss": 0.3656, "step": 10631 }, { "epoch": 0.49313543599257886, "grad_norm": 7.065734386444092, "learning_rate": 5.188356779569125e-06, "loss": 0.3465, "step": 10632 }, { "epoch": 0.49318181818181817, "grad_norm": 6.828261852264404, "learning_rate": 5.187621357109719e-06, "loss": 0.3329, "step": 10633 }, { "epoch": 0.4932282003710575, "grad_norm": 6.466620922088623, "learning_rate": 5.186885930585596e-06, "loss": 0.3426, "step": 10634 }, { "epoch": 0.4932745825602968, "grad_norm": 15.435037612915039, "learning_rate": 5.186150500012685e-06, "loss": 0.3986, "step": 10635 }, { "epoch": 0.4933209647495362, "grad_norm": 9.642315864562988, "learning_rate": 5.185415065406926e-06, "loss": 0.4139, "step": 10636 }, { "epoch": 0.4933673469387755, "grad_norm": 7.5967488288879395, "learning_rate": 5.184679626784243e-06, "loss": 0.2931, "step": 10637 }, { "epoch": 0.49341372912801484, "grad_norm": 8.515213012695312, "learning_rate": 5.183944184160578e-06, "loss": 0.2652, "step": 10638 }, { "epoch": 0.4934601113172542, "grad_norm": 9.603530883789062, "learning_rate": 5.183208737551856e-06, "loss": 0.203, "step": 10639 }, { "epoch": 0.4935064935064935, "grad_norm": 7.0543212890625, "learning_rate": 5.182473286974013e-06, "loss": 0.3373, "step": 10640 }, { "epoch": 0.49355287569573286, "grad_norm": 8.982004165649414, "learning_rate": 5.1817378324429846e-06, "loss": 0.4865, "step": 10641 }, { "epoch": 0.49359925788497216, "grad_norm": 8.343533515930176, "learning_rate": 5.1810023739747e-06, "loss": 0.417, "step": 10642 }, { "epoch": 0.4936456400742115, "grad_norm": 5.507200717926025, "learning_rate": 5.180266911585095e-06, "loss": 0.3255, "step": 10643 }, { "epoch": 0.4936920222634508, "grad_norm": 10.651240348815918, "learning_rate": 5.1795314452901034e-06, "loss": 0.4303, "step": 10644 }, { "epoch": 0.4937384044526902, "grad_norm": 7.059760570526123, "learning_rate": 5.178795975105657e-06, "loss": 0.3486, "step": 10645 }, { "epoch": 0.4937847866419295, "grad_norm": 4.764956474304199, "learning_rate": 5.178060501047689e-06, "loss": 0.2215, "step": 10646 }, { "epoch": 0.49383116883116884, "grad_norm": 7.354402542114258, "learning_rate": 5.177325023132136e-06, "loss": 0.3349, "step": 10647 }, { "epoch": 0.49387755102040815, "grad_norm": 8.761603355407715, "learning_rate": 5.176589541374929e-06, "loss": 0.3156, "step": 10648 }, { "epoch": 0.4939239332096475, "grad_norm": 8.946213722229004, "learning_rate": 5.1758540557920035e-06, "loss": 0.3877, "step": 10649 }, { "epoch": 0.4939703153988868, "grad_norm": 7.972519397735596, "learning_rate": 5.175118566399292e-06, "loss": 0.2591, "step": 10650 }, { "epoch": 0.49401669758812616, "grad_norm": 8.962424278259277, "learning_rate": 5.174383073212729e-06, "loss": 0.3686, "step": 10651 }, { "epoch": 0.49406307977736547, "grad_norm": 3.7691023349761963, "learning_rate": 5.17364757624825e-06, "loss": 0.2545, "step": 10652 }, { "epoch": 0.4941094619666048, "grad_norm": 6.546536445617676, "learning_rate": 5.172912075521786e-06, "loss": 0.2896, "step": 10653 }, { "epoch": 0.4941558441558442, "grad_norm": 10.281350135803223, "learning_rate": 5.1721765710492745e-06, "loss": 0.4288, "step": 10654 }, { "epoch": 0.4942022263450835, "grad_norm": 7.388161659240723, "learning_rate": 5.171441062846647e-06, "loss": 0.3118, "step": 10655 }, { "epoch": 0.49424860853432284, "grad_norm": 7.447529315948486, "learning_rate": 5.17070555092984e-06, "loss": 0.3616, "step": 10656 }, { "epoch": 0.49429499072356214, "grad_norm": 7.739843845367432, "learning_rate": 5.169970035314787e-06, "loss": 0.3793, "step": 10657 }, { "epoch": 0.4943413729128015, "grad_norm": 3.419926643371582, "learning_rate": 5.1692345160174225e-06, "loss": 0.2221, "step": 10658 }, { "epoch": 0.4943877551020408, "grad_norm": 8.74610424041748, "learning_rate": 5.168498993053683e-06, "loss": 0.5156, "step": 10659 }, { "epoch": 0.49443413729128016, "grad_norm": 7.348206996917725, "learning_rate": 5.167763466439502e-06, "loss": 0.4453, "step": 10660 }, { "epoch": 0.49448051948051946, "grad_norm": 6.377999305725098, "learning_rate": 5.1670279361908115e-06, "loss": 0.4241, "step": 10661 }, { "epoch": 0.4945269016697588, "grad_norm": 11.639628410339355, "learning_rate": 5.166292402323551e-06, "loss": 0.5054, "step": 10662 }, { "epoch": 0.4945732838589981, "grad_norm": 3.7302591800689697, "learning_rate": 5.165556864853651e-06, "loss": 0.2161, "step": 10663 }, { "epoch": 0.4946196660482375, "grad_norm": 8.066683769226074, "learning_rate": 5.164821323797051e-06, "loss": 0.3854, "step": 10664 }, { "epoch": 0.4946660482374768, "grad_norm": 6.120792865753174, "learning_rate": 5.164085779169684e-06, "loss": 0.2749, "step": 10665 }, { "epoch": 0.49471243042671614, "grad_norm": 5.502170085906982, "learning_rate": 5.163350230987484e-06, "loss": 0.3405, "step": 10666 }, { "epoch": 0.49475881261595545, "grad_norm": 4.841678619384766, "learning_rate": 5.1626146792663875e-06, "loss": 0.2857, "step": 10667 }, { "epoch": 0.4948051948051948, "grad_norm": 5.64905309677124, "learning_rate": 5.16187912402233e-06, "loss": 0.3837, "step": 10668 }, { "epoch": 0.49485157699443416, "grad_norm": 7.193426609039307, "learning_rate": 5.161143565271246e-06, "loss": 0.3732, "step": 10669 }, { "epoch": 0.49489795918367346, "grad_norm": 6.553719520568848, "learning_rate": 5.160408003029074e-06, "loss": 0.3443, "step": 10670 }, { "epoch": 0.4949443413729128, "grad_norm": 5.5974955558776855, "learning_rate": 5.159672437311744e-06, "loss": 0.2562, "step": 10671 }, { "epoch": 0.4949907235621521, "grad_norm": 11.849843978881836, "learning_rate": 5.1589368681351985e-06, "loss": 0.4172, "step": 10672 }, { "epoch": 0.4950371057513915, "grad_norm": 7.020448684692383, "learning_rate": 5.158201295515366e-06, "loss": 0.356, "step": 10673 }, { "epoch": 0.4950834879406308, "grad_norm": 8.833585739135742, "learning_rate": 5.157465719468187e-06, "loss": 0.3329, "step": 10674 }, { "epoch": 0.49512987012987014, "grad_norm": 13.575679779052734, "learning_rate": 5.156730140009598e-06, "loss": 0.5087, "step": 10675 }, { "epoch": 0.49517625231910944, "grad_norm": 5.743983745574951, "learning_rate": 5.155994557155531e-06, "loss": 0.3033, "step": 10676 }, { "epoch": 0.4952226345083488, "grad_norm": 4.130568981170654, "learning_rate": 5.1552589709219255e-06, "loss": 0.2209, "step": 10677 }, { "epoch": 0.4952690166975881, "grad_norm": 9.968851089477539, "learning_rate": 5.154523381324716e-06, "loss": 0.2731, "step": 10678 }, { "epoch": 0.49531539888682746, "grad_norm": 4.037479400634766, "learning_rate": 5.153787788379838e-06, "loss": 0.2547, "step": 10679 }, { "epoch": 0.49536178107606677, "grad_norm": 6.756636142730713, "learning_rate": 5.1530521921032305e-06, "loss": 0.3775, "step": 10680 }, { "epoch": 0.4954081632653061, "grad_norm": 6.734188556671143, "learning_rate": 5.152316592510827e-06, "loss": 0.3444, "step": 10681 }, { "epoch": 0.4954545454545455, "grad_norm": 8.777446746826172, "learning_rate": 5.151580989618566e-06, "loss": 0.4036, "step": 10682 }, { "epoch": 0.4955009276437848, "grad_norm": 4.407012462615967, "learning_rate": 5.15084538344238e-06, "loss": 0.2826, "step": 10683 }, { "epoch": 0.49554730983302414, "grad_norm": 8.76557445526123, "learning_rate": 5.1501097739982095e-06, "loss": 0.3732, "step": 10684 }, { "epoch": 0.49559369202226344, "grad_norm": 4.737846851348877, "learning_rate": 5.149374161301991e-06, "loss": 0.231, "step": 10685 }, { "epoch": 0.4956400742115028, "grad_norm": 9.289581298828125, "learning_rate": 5.148638545369658e-06, "loss": 0.3668, "step": 10686 }, { "epoch": 0.4956864564007421, "grad_norm": 5.830291271209717, "learning_rate": 5.1479029262171506e-06, "loss": 0.3191, "step": 10687 }, { "epoch": 0.49573283858998146, "grad_norm": 26.49019432067871, "learning_rate": 5.147167303860406e-06, "loss": 0.38, "step": 10688 }, { "epoch": 0.49577922077922076, "grad_norm": 5.070931434631348, "learning_rate": 5.1464316783153575e-06, "loss": 0.3881, "step": 10689 }, { "epoch": 0.4958256029684601, "grad_norm": 7.313035011291504, "learning_rate": 5.145696049597944e-06, "loss": 0.2992, "step": 10690 }, { "epoch": 0.4958719851576994, "grad_norm": 4.8207783699035645, "learning_rate": 5.144960417724103e-06, "loss": 0.2598, "step": 10691 }, { "epoch": 0.4959183673469388, "grad_norm": 5.954864978790283, "learning_rate": 5.1442247827097705e-06, "loss": 0.3148, "step": 10692 }, { "epoch": 0.4959647495361781, "grad_norm": 6.158790588378906, "learning_rate": 5.1434891445708845e-06, "loss": 0.3576, "step": 10693 }, { "epoch": 0.49601113172541744, "grad_norm": 10.72689437866211, "learning_rate": 5.1427535033233825e-06, "loss": 0.4579, "step": 10694 }, { "epoch": 0.49605751391465674, "grad_norm": 6.6822309494018555, "learning_rate": 5.1420178589832e-06, "loss": 0.2829, "step": 10695 }, { "epoch": 0.4961038961038961, "grad_norm": 5.887020587921143, "learning_rate": 5.141282211566276e-06, "loss": 0.2564, "step": 10696 }, { "epoch": 0.49615027829313546, "grad_norm": 3.892683744430542, "learning_rate": 5.140546561088546e-06, "loss": 0.29, "step": 10697 }, { "epoch": 0.49619666048237476, "grad_norm": 9.61817455291748, "learning_rate": 5.139810907565951e-06, "loss": 0.4042, "step": 10698 }, { "epoch": 0.4962430426716141, "grad_norm": 5.1148295402526855, "learning_rate": 5.1390752510144256e-06, "loss": 0.2999, "step": 10699 }, { "epoch": 0.4962894248608534, "grad_norm": 12.492558479309082, "learning_rate": 5.138339591449908e-06, "loss": 0.5229, "step": 10700 }, { "epoch": 0.4963358070500928, "grad_norm": 11.410684585571289, "learning_rate": 5.137603928888337e-06, "loss": 0.477, "step": 10701 }, { "epoch": 0.4963821892393321, "grad_norm": 12.342041969299316, "learning_rate": 5.1368682633456475e-06, "loss": 0.6617, "step": 10702 }, { "epoch": 0.49642857142857144, "grad_norm": 13.28519344329834, "learning_rate": 5.13613259483778e-06, "loss": 0.5437, "step": 10703 }, { "epoch": 0.49647495361781074, "grad_norm": 4.935927391052246, "learning_rate": 5.1353969233806735e-06, "loss": 0.2375, "step": 10704 }, { "epoch": 0.4965213358070501, "grad_norm": 5.856004238128662, "learning_rate": 5.134661248990262e-06, "loss": 0.1776, "step": 10705 }, { "epoch": 0.4965677179962894, "grad_norm": 9.685254096984863, "learning_rate": 5.133925571682486e-06, "loss": 0.3824, "step": 10706 }, { "epoch": 0.49661410018552876, "grad_norm": 7.55087423324585, "learning_rate": 5.133189891473285e-06, "loss": 0.3528, "step": 10707 }, { "epoch": 0.49666048237476806, "grad_norm": 4.882852077484131, "learning_rate": 5.1324542083785936e-06, "loss": 0.3473, "step": 10708 }, { "epoch": 0.4967068645640074, "grad_norm": 9.856694221496582, "learning_rate": 5.131718522414352e-06, "loss": 0.2643, "step": 10709 }, { "epoch": 0.4967532467532468, "grad_norm": 6.097508430480957, "learning_rate": 5.130982833596497e-06, "loss": 0.3342, "step": 10710 }, { "epoch": 0.4967996289424861, "grad_norm": 7.135842323303223, "learning_rate": 5.130247141940968e-06, "loss": 0.3087, "step": 10711 }, { "epoch": 0.49684601113172544, "grad_norm": 4.695164203643799, "learning_rate": 5.129511447463705e-06, "loss": 0.315, "step": 10712 }, { "epoch": 0.49689239332096474, "grad_norm": 8.99064826965332, "learning_rate": 5.1287757501806444e-06, "loss": 0.4952, "step": 10713 }, { "epoch": 0.4969387755102041, "grad_norm": 7.842475891113281, "learning_rate": 5.128040050107724e-06, "loss": 0.4215, "step": 10714 }, { "epoch": 0.4969851576994434, "grad_norm": 7.08864688873291, "learning_rate": 5.127304347260885e-06, "loss": 0.4233, "step": 10715 }, { "epoch": 0.49703153988868276, "grad_norm": 6.828114032745361, "learning_rate": 5.126568641656064e-06, "loss": 0.3818, "step": 10716 }, { "epoch": 0.49707792207792206, "grad_norm": 4.479256629943848, "learning_rate": 5.125832933309198e-06, "loss": 0.287, "step": 10717 }, { "epoch": 0.4971243042671614, "grad_norm": 9.40207576751709, "learning_rate": 5.1250972222362304e-06, "loss": 0.3963, "step": 10718 }, { "epoch": 0.4971706864564007, "grad_norm": 5.647382736206055, "learning_rate": 5.124361508453096e-06, "loss": 0.2619, "step": 10719 }, { "epoch": 0.4972170686456401, "grad_norm": 7.786584377288818, "learning_rate": 5.123625791975737e-06, "loss": 0.3714, "step": 10720 }, { "epoch": 0.4972634508348794, "grad_norm": 9.376117706298828, "learning_rate": 5.122890072820089e-06, "loss": 0.4096, "step": 10721 }, { "epoch": 0.49730983302411874, "grad_norm": 6.223372459411621, "learning_rate": 5.122154351002092e-06, "loss": 0.2755, "step": 10722 }, { "epoch": 0.49735621521335804, "grad_norm": 11.379890441894531, "learning_rate": 5.121418626537687e-06, "loss": 0.4288, "step": 10723 }, { "epoch": 0.4974025974025974, "grad_norm": 8.51409912109375, "learning_rate": 5.12068289944281e-06, "loss": 0.469, "step": 10724 }, { "epoch": 0.49744897959183676, "grad_norm": 15.033987045288086, "learning_rate": 5.119947169733401e-06, "loss": 0.4337, "step": 10725 }, { "epoch": 0.49749536178107606, "grad_norm": 6.344771385192871, "learning_rate": 5.119211437425402e-06, "loss": 0.4471, "step": 10726 }, { "epoch": 0.4975417439703154, "grad_norm": 10.908634185791016, "learning_rate": 5.118475702534748e-06, "loss": 0.4051, "step": 10727 }, { "epoch": 0.4975881261595547, "grad_norm": 6.10556697845459, "learning_rate": 5.117739965077382e-06, "loss": 0.3201, "step": 10728 }, { "epoch": 0.4976345083487941, "grad_norm": 12.257080078125, "learning_rate": 5.117004225069242e-06, "loss": 0.4902, "step": 10729 }, { "epoch": 0.4976808905380334, "grad_norm": 3.0058844089508057, "learning_rate": 5.116268482526267e-06, "loss": 0.1612, "step": 10730 }, { "epoch": 0.49772727272727274, "grad_norm": 4.212371349334717, "learning_rate": 5.115532737464396e-06, "loss": 0.273, "step": 10731 }, { "epoch": 0.49777365491651204, "grad_norm": 6.174208164215088, "learning_rate": 5.114796989899569e-06, "loss": 0.3783, "step": 10732 }, { "epoch": 0.4978200371057514, "grad_norm": 6.678610801696777, "learning_rate": 5.114061239847725e-06, "loss": 0.3439, "step": 10733 }, { "epoch": 0.4978664192949907, "grad_norm": 13.282049179077148, "learning_rate": 5.113325487324806e-06, "loss": 0.4048, "step": 10734 }, { "epoch": 0.49791280148423006, "grad_norm": 7.841185569763184, "learning_rate": 5.112589732346749e-06, "loss": 0.416, "step": 10735 }, { "epoch": 0.49795918367346936, "grad_norm": 4.634997367858887, "learning_rate": 5.111853974929495e-06, "loss": 0.3605, "step": 10736 }, { "epoch": 0.4980055658627087, "grad_norm": 4.295985698699951, "learning_rate": 5.111118215088985e-06, "loss": 0.2464, "step": 10737 }, { "epoch": 0.4980519480519481, "grad_norm": 5.314948081970215, "learning_rate": 5.110382452841156e-06, "loss": 0.3773, "step": 10738 }, { "epoch": 0.4980983302411874, "grad_norm": 3.339841365814209, "learning_rate": 5.109646688201949e-06, "loss": 0.2455, "step": 10739 }, { "epoch": 0.49814471243042674, "grad_norm": 7.1529541015625, "learning_rate": 5.108910921187306e-06, "loss": 0.3335, "step": 10740 }, { "epoch": 0.49819109461966604, "grad_norm": 8.454510688781738, "learning_rate": 5.108175151813164e-06, "loss": 0.3369, "step": 10741 }, { "epoch": 0.4982374768089054, "grad_norm": 8.190652847290039, "learning_rate": 5.107439380095465e-06, "loss": 0.387, "step": 10742 }, { "epoch": 0.4982838589981447, "grad_norm": 14.373132705688477, "learning_rate": 5.106703606050148e-06, "loss": 0.3404, "step": 10743 }, { "epoch": 0.49833024118738406, "grad_norm": 4.313149929046631, "learning_rate": 5.105967829693155e-06, "loss": 0.3609, "step": 10744 }, { "epoch": 0.49837662337662336, "grad_norm": 4.069979190826416, "learning_rate": 5.105232051040424e-06, "loss": 0.2589, "step": 10745 }, { "epoch": 0.4984230055658627, "grad_norm": 5.446477890014648, "learning_rate": 5.104496270107896e-06, "loss": 0.3967, "step": 10746 }, { "epoch": 0.498469387755102, "grad_norm": 7.6487884521484375, "learning_rate": 5.103760486911511e-06, "loss": 0.4526, "step": 10747 }, { "epoch": 0.4985157699443414, "grad_norm": 3.972641706466675, "learning_rate": 5.1030247014672105e-06, "loss": 0.2805, "step": 10748 }, { "epoch": 0.4985621521335807, "grad_norm": 7.121078014373779, "learning_rate": 5.102288913790934e-06, "loss": 0.3971, "step": 10749 }, { "epoch": 0.49860853432282004, "grad_norm": 9.5167818069458, "learning_rate": 5.101553123898621e-06, "loss": 0.4734, "step": 10750 }, { "epoch": 0.49865491651205934, "grad_norm": 5.667703628540039, "learning_rate": 5.100817331806215e-06, "loss": 0.2548, "step": 10751 }, { "epoch": 0.4987012987012987, "grad_norm": 6.825207710266113, "learning_rate": 5.100081537529653e-06, "loss": 0.5025, "step": 10752 }, { "epoch": 0.49874768089053806, "grad_norm": 9.421553611755371, "learning_rate": 5.099345741084879e-06, "loss": 0.4456, "step": 10753 }, { "epoch": 0.49879406307977736, "grad_norm": 7.851845741271973, "learning_rate": 5.098609942487831e-06, "loss": 0.3273, "step": 10754 }, { "epoch": 0.4988404452690167, "grad_norm": 5.833885192871094, "learning_rate": 5.09787414175445e-06, "loss": 0.3425, "step": 10755 }, { "epoch": 0.498886827458256, "grad_norm": 6.759583473205566, "learning_rate": 5.097138338900679e-06, "loss": 0.3853, "step": 10756 }, { "epoch": 0.4989332096474954, "grad_norm": 4.979320049285889, "learning_rate": 5.096402533942455e-06, "loss": 0.3204, "step": 10757 }, { "epoch": 0.4989795918367347, "grad_norm": 6.346184730529785, "learning_rate": 5.095666726895721e-06, "loss": 0.3085, "step": 10758 }, { "epoch": 0.49902597402597404, "grad_norm": 6.335411071777344, "learning_rate": 5.09493091777642e-06, "loss": 0.4054, "step": 10759 }, { "epoch": 0.49907235621521334, "grad_norm": 10.333483695983887, "learning_rate": 5.0941951066004906e-06, "loss": 0.3569, "step": 10760 }, { "epoch": 0.4991187384044527, "grad_norm": 7.751437187194824, "learning_rate": 5.093459293383872e-06, "loss": 0.3613, "step": 10761 }, { "epoch": 0.499165120593692, "grad_norm": 4.075148582458496, "learning_rate": 5.0927234781425084e-06, "loss": 0.3234, "step": 10762 }, { "epoch": 0.49921150278293136, "grad_norm": 5.875036239624023, "learning_rate": 5.091987660892339e-06, "loss": 0.3977, "step": 10763 }, { "epoch": 0.49925788497217066, "grad_norm": 8.037946701049805, "learning_rate": 5.091251841649306e-06, "loss": 0.4169, "step": 10764 }, { "epoch": 0.49930426716141, "grad_norm": 7.645385265350342, "learning_rate": 5.090516020429349e-06, "loss": 0.3894, "step": 10765 }, { "epoch": 0.4993506493506494, "grad_norm": 5.920991897583008, "learning_rate": 5.089780197248411e-06, "loss": 0.3477, "step": 10766 }, { "epoch": 0.4993970315398887, "grad_norm": 9.949776649475098, "learning_rate": 5.089044372122432e-06, "loss": 0.4206, "step": 10767 }, { "epoch": 0.49944341372912804, "grad_norm": 12.255973815917969, "learning_rate": 5.088308545067354e-06, "loss": 0.4269, "step": 10768 }, { "epoch": 0.49948979591836734, "grad_norm": 10.013941764831543, "learning_rate": 5.087572716099117e-06, "loss": 0.3378, "step": 10769 }, { "epoch": 0.4995361781076067, "grad_norm": 3.4623310565948486, "learning_rate": 5.086836885233665e-06, "loss": 0.225, "step": 10770 }, { "epoch": 0.499582560296846, "grad_norm": 6.21796178817749, "learning_rate": 5.086101052486936e-06, "loss": 0.339, "step": 10771 }, { "epoch": 0.49962894248608536, "grad_norm": 9.340045928955078, "learning_rate": 5.085365217874875e-06, "loss": 0.2945, "step": 10772 }, { "epoch": 0.49967532467532466, "grad_norm": 8.239259719848633, "learning_rate": 5.084629381413419e-06, "loss": 0.4138, "step": 10773 }, { "epoch": 0.499721706864564, "grad_norm": 7.483577728271484, "learning_rate": 5.083893543118513e-06, "loss": 0.3444, "step": 10774 }, { "epoch": 0.4997680890538033, "grad_norm": 10.09752082824707, "learning_rate": 5.0831577030061e-06, "loss": 0.4329, "step": 10775 }, { "epoch": 0.4998144712430427, "grad_norm": 4.166254043579102, "learning_rate": 5.082421861092116e-06, "loss": 0.3176, "step": 10776 }, { "epoch": 0.499860853432282, "grad_norm": 5.570476531982422, "learning_rate": 5.081686017392508e-06, "loss": 0.2888, "step": 10777 }, { "epoch": 0.49990723562152134, "grad_norm": 9.356586456298828, "learning_rate": 5.080950171923215e-06, "loss": 0.4052, "step": 10778 }, { "epoch": 0.49995361781076064, "grad_norm": 4.547125339508057, "learning_rate": 5.080214324700178e-06, "loss": 0.406, "step": 10779 }, { "epoch": 0.5, "grad_norm": 5.0294270515441895, "learning_rate": 5.079478475739341e-06, "loss": 0.3598, "step": 10780 }, { "epoch": 0.5, "eval_loss": 0.34115365147590637, "eval_runtime": 38.0301, "eval_samples_per_second": 45.832, "eval_steps_per_second": 5.732, "step": 10780 }, { "epoch": 0.5000463821892394, "grad_norm": 5.088116645812988, "learning_rate": 5.0787426250566454e-06, "loss": 0.3604, "step": 10781 }, { "epoch": 0.5000927643784787, "grad_norm": 3.331374406814575, "learning_rate": 5.07800677266803e-06, "loss": 0.2662, "step": 10782 }, { "epoch": 0.500139146567718, "grad_norm": 6.916153430938721, "learning_rate": 5.077270918589442e-06, "loss": 0.3451, "step": 10783 }, { "epoch": 0.5001855287569573, "grad_norm": 10.949832916259766, "learning_rate": 5.0765350628368185e-06, "loss": 0.4284, "step": 10784 }, { "epoch": 0.5002319109461967, "grad_norm": 9.032220840454102, "learning_rate": 5.075799205426103e-06, "loss": 0.381, "step": 10785 }, { "epoch": 0.500278293135436, "grad_norm": 6.529811382293701, "learning_rate": 5.075063346373239e-06, "loss": 0.299, "step": 10786 }, { "epoch": 0.5003246753246753, "grad_norm": 5.704183578491211, "learning_rate": 5.074327485694166e-06, "loss": 0.2816, "step": 10787 }, { "epoch": 0.5003710575139146, "grad_norm": 5.1531901359558105, "learning_rate": 5.073591623404827e-06, "loss": 0.3622, "step": 10788 }, { "epoch": 0.500417439703154, "grad_norm": 8.63512134552002, "learning_rate": 5.072855759521167e-06, "loss": 0.3547, "step": 10789 }, { "epoch": 0.5004638218923934, "grad_norm": 14.72978401184082, "learning_rate": 5.072119894059123e-06, "loss": 0.4416, "step": 10790 }, { "epoch": 0.5005102040816326, "grad_norm": 7.116847038269043, "learning_rate": 5.07138402703464e-06, "loss": 0.3253, "step": 10791 }, { "epoch": 0.500556586270872, "grad_norm": 11.322092056274414, "learning_rate": 5.0706481584636605e-06, "loss": 0.3859, "step": 10792 }, { "epoch": 0.5006029684601113, "grad_norm": 7.5131354331970215, "learning_rate": 5.069912288362124e-06, "loss": 0.3399, "step": 10793 }, { "epoch": 0.5006493506493507, "grad_norm": 4.263430118560791, "learning_rate": 5.0691764167459765e-06, "loss": 0.3118, "step": 10794 }, { "epoch": 0.50069573283859, "grad_norm": 9.83611011505127, "learning_rate": 5.068440543631157e-06, "loss": 0.3711, "step": 10795 }, { "epoch": 0.5007421150278293, "grad_norm": 4.528404712677002, "learning_rate": 5.06770466903361e-06, "loss": 0.3779, "step": 10796 }, { "epoch": 0.5007884972170686, "grad_norm": 3.6978795528411865, "learning_rate": 5.066968792969277e-06, "loss": 0.3369, "step": 10797 }, { "epoch": 0.500834879406308, "grad_norm": 6.33404016494751, "learning_rate": 5.0662329154541e-06, "loss": 0.3334, "step": 10798 }, { "epoch": 0.5008812615955474, "grad_norm": 6.0141119956970215, "learning_rate": 5.065497036504023e-06, "loss": 0.3417, "step": 10799 }, { "epoch": 0.5009276437847866, "grad_norm": 9.947463989257812, "learning_rate": 5.064761156134986e-06, "loss": 0.4257, "step": 10800 }, { "epoch": 0.500974025974026, "grad_norm": 10.760629653930664, "learning_rate": 5.0640252743629326e-06, "loss": 0.35, "step": 10801 }, { "epoch": 0.5010204081632653, "grad_norm": 4.151649475097656, "learning_rate": 5.063289391203808e-06, "loss": 0.2997, "step": 10802 }, { "epoch": 0.5010667903525047, "grad_norm": 4.4348039627075195, "learning_rate": 5.062553506673549e-06, "loss": 0.2354, "step": 10803 }, { "epoch": 0.5011131725417439, "grad_norm": 8.155670166015625, "learning_rate": 5.061817620788102e-06, "loss": 0.4617, "step": 10804 }, { "epoch": 0.5011595547309833, "grad_norm": 6.833518981933594, "learning_rate": 5.06108173356341e-06, "loss": 0.4145, "step": 10805 }, { "epoch": 0.5012059369202226, "grad_norm": 6.565609931945801, "learning_rate": 5.060345845015413e-06, "loss": 0.2444, "step": 10806 }, { "epoch": 0.501252319109462, "grad_norm": 10.237786293029785, "learning_rate": 5.059609955160057e-06, "loss": 0.4252, "step": 10807 }, { "epoch": 0.5012987012987012, "grad_norm": 6.174278736114502, "learning_rate": 5.0588740640132805e-06, "loss": 0.2562, "step": 10808 }, { "epoch": 0.5013450834879406, "grad_norm": 14.865206718444824, "learning_rate": 5.0581381715910295e-06, "loss": 0.4322, "step": 10809 }, { "epoch": 0.50139146567718, "grad_norm": 9.97364330291748, "learning_rate": 5.0574022779092456e-06, "loss": 0.2871, "step": 10810 }, { "epoch": 0.5014378478664193, "grad_norm": 4.963634967803955, "learning_rate": 5.056666382983872e-06, "loss": 0.3397, "step": 10811 }, { "epoch": 0.5014842300556587, "grad_norm": 8.199739456176758, "learning_rate": 5.055930486830851e-06, "loss": 0.3526, "step": 10812 }, { "epoch": 0.5015306122448979, "grad_norm": 7.755131721496582, "learning_rate": 5.055194589466125e-06, "loss": 0.3307, "step": 10813 }, { "epoch": 0.5015769944341373, "grad_norm": 10.028253555297852, "learning_rate": 5.054458690905637e-06, "loss": 0.3774, "step": 10814 }, { "epoch": 0.5016233766233766, "grad_norm": 10.547505378723145, "learning_rate": 5.053722791165332e-06, "loss": 0.3597, "step": 10815 }, { "epoch": 0.501669758812616, "grad_norm": 7.416969299316406, "learning_rate": 5.052986890261149e-06, "loss": 0.3431, "step": 10816 }, { "epoch": 0.5017161410018552, "grad_norm": 8.302802085876465, "learning_rate": 5.052250988209035e-06, "loss": 0.3542, "step": 10817 }, { "epoch": 0.5017625231910946, "grad_norm": 11.016948699951172, "learning_rate": 5.051515085024928e-06, "loss": 0.4382, "step": 10818 }, { "epoch": 0.501808905380334, "grad_norm": 5.531054496765137, "learning_rate": 5.050779180724777e-06, "loss": 0.341, "step": 10819 }, { "epoch": 0.5018552875695733, "grad_norm": 4.48560905456543, "learning_rate": 5.05004327532452e-06, "loss": 0.2637, "step": 10820 }, { "epoch": 0.5019016697588126, "grad_norm": 6.973857402801514, "learning_rate": 5.049307368840101e-06, "loss": 0.3167, "step": 10821 }, { "epoch": 0.5019480519480519, "grad_norm": 6.231462478637695, "learning_rate": 5.048571461287467e-06, "loss": 0.2884, "step": 10822 }, { "epoch": 0.5019944341372913, "grad_norm": 7.111805438995361, "learning_rate": 5.047835552682555e-06, "loss": 0.3908, "step": 10823 }, { "epoch": 0.5020408163265306, "grad_norm": 20.456079483032227, "learning_rate": 5.047099643041312e-06, "loss": 0.4021, "step": 10824 }, { "epoch": 0.50208719851577, "grad_norm": 8.959856986999512, "learning_rate": 5.046363732379681e-06, "loss": 0.316, "step": 10825 }, { "epoch": 0.5021335807050092, "grad_norm": 7.845141410827637, "learning_rate": 5.0456278207136025e-06, "loss": 0.3618, "step": 10826 }, { "epoch": 0.5021799628942486, "grad_norm": 3.878861665725708, "learning_rate": 5.0448919080590234e-06, "loss": 0.2985, "step": 10827 }, { "epoch": 0.502226345083488, "grad_norm": 8.775757789611816, "learning_rate": 5.044155994431883e-06, "loss": 0.4518, "step": 10828 }, { "epoch": 0.5022727272727273, "grad_norm": 6.232418060302734, "learning_rate": 5.043420079848126e-06, "loss": 0.4072, "step": 10829 }, { "epoch": 0.5023191094619666, "grad_norm": 5.575283050537109, "learning_rate": 5.042684164323698e-06, "loss": 0.2597, "step": 10830 }, { "epoch": 0.5023654916512059, "grad_norm": 8.379898071289062, "learning_rate": 5.041948247874537e-06, "loss": 0.4503, "step": 10831 }, { "epoch": 0.5024118738404453, "grad_norm": 11.323997497558594, "learning_rate": 5.0412123305165924e-06, "loss": 0.4709, "step": 10832 }, { "epoch": 0.5024582560296846, "grad_norm": 6.287606716156006, "learning_rate": 5.040476412265802e-06, "loss": 0.292, "step": 10833 }, { "epoch": 0.5025046382189239, "grad_norm": 12.063923835754395, "learning_rate": 5.039740493138113e-06, "loss": 0.4406, "step": 10834 }, { "epoch": 0.5025510204081632, "grad_norm": 5.597596168518066, "learning_rate": 5.039004573149467e-06, "loss": 0.3381, "step": 10835 }, { "epoch": 0.5025974025974026, "grad_norm": 9.021228790283203, "learning_rate": 5.038268652315808e-06, "loss": 0.3571, "step": 10836 }, { "epoch": 0.502643784786642, "grad_norm": 8.531638145446777, "learning_rate": 5.037532730653076e-06, "loss": 0.3663, "step": 10837 }, { "epoch": 0.5026901669758813, "grad_norm": 4.8469109535217285, "learning_rate": 5.036796808177221e-06, "loss": 0.3096, "step": 10838 }, { "epoch": 0.5027365491651206, "grad_norm": 6.886699676513672, "learning_rate": 5.0360608849041805e-06, "loss": 0.234, "step": 10839 }, { "epoch": 0.5027829313543599, "grad_norm": 7.521595478057861, "learning_rate": 5.0353249608499e-06, "loss": 0.375, "step": 10840 }, { "epoch": 0.5028293135435993, "grad_norm": 4.547664165496826, "learning_rate": 5.034589036030323e-06, "loss": 0.3169, "step": 10841 }, { "epoch": 0.5028756957328386, "grad_norm": 5.75548791885376, "learning_rate": 5.033853110461393e-06, "loss": 0.2707, "step": 10842 }, { "epoch": 0.5029220779220779, "grad_norm": 5.492951393127441, "learning_rate": 5.033117184159053e-06, "loss": 0.3704, "step": 10843 }, { "epoch": 0.5029684601113172, "grad_norm": 5.109784126281738, "learning_rate": 5.0323812571392475e-06, "loss": 0.2855, "step": 10844 }, { "epoch": 0.5030148423005566, "grad_norm": 8.079687118530273, "learning_rate": 5.031645329417918e-06, "loss": 0.349, "step": 10845 }, { "epoch": 0.503061224489796, "grad_norm": 7.964157581329346, "learning_rate": 5.030909401011009e-06, "loss": 0.2553, "step": 10846 }, { "epoch": 0.5031076066790352, "grad_norm": 11.937621116638184, "learning_rate": 5.030173471934465e-06, "loss": 0.3593, "step": 10847 }, { "epoch": 0.5031539888682746, "grad_norm": 10.411423683166504, "learning_rate": 5.029437542204228e-06, "loss": 0.4949, "step": 10848 }, { "epoch": 0.5032003710575139, "grad_norm": 7.607686519622803, "learning_rate": 5.028701611836243e-06, "loss": 0.3883, "step": 10849 }, { "epoch": 0.5032467532467533, "grad_norm": 5.316047668457031, "learning_rate": 5.027965680846452e-06, "loss": 0.33, "step": 10850 }, { "epoch": 0.5032931354359926, "grad_norm": 9.2288236618042, "learning_rate": 5.027229749250799e-06, "loss": 0.4159, "step": 10851 }, { "epoch": 0.5033395176252319, "grad_norm": 8.818437576293945, "learning_rate": 5.026493817065229e-06, "loss": 0.3967, "step": 10852 }, { "epoch": 0.5033858998144712, "grad_norm": 9.375265121459961, "learning_rate": 5.0257578843056835e-06, "loss": 0.3149, "step": 10853 }, { "epoch": 0.5034322820037106, "grad_norm": 10.52382755279541, "learning_rate": 5.025021950988108e-06, "loss": 0.3581, "step": 10854 }, { "epoch": 0.50347866419295, "grad_norm": 6.514389991760254, "learning_rate": 5.024286017128443e-06, "loss": 0.3956, "step": 10855 }, { "epoch": 0.5035250463821892, "grad_norm": 14.16124153137207, "learning_rate": 5.023550082742637e-06, "loss": 0.4276, "step": 10856 }, { "epoch": 0.5035714285714286, "grad_norm": 6.704988956451416, "learning_rate": 5.02281414784663e-06, "loss": 0.3612, "step": 10857 }, { "epoch": 0.5036178107606679, "grad_norm": 4.940714359283447, "learning_rate": 5.022078212456365e-06, "loss": 0.2116, "step": 10858 }, { "epoch": 0.5036641929499073, "grad_norm": 7.23270320892334, "learning_rate": 5.021342276587788e-06, "loss": 0.3827, "step": 10859 }, { "epoch": 0.5037105751391465, "grad_norm": 5.2334208488464355, "learning_rate": 5.020606340256843e-06, "loss": 0.3459, "step": 10860 }, { "epoch": 0.5037569573283859, "grad_norm": 4.597510814666748, "learning_rate": 5.019870403479471e-06, "loss": 0.3434, "step": 10861 }, { "epoch": 0.5038033395176252, "grad_norm": 5.916145324707031, "learning_rate": 5.019134466271617e-06, "loss": 0.3678, "step": 10862 }, { "epoch": 0.5038497217068646, "grad_norm": 5.3952813148498535, "learning_rate": 5.018398528649227e-06, "loss": 0.3828, "step": 10863 }, { "epoch": 0.5038961038961038, "grad_norm": 4.4761271476745605, "learning_rate": 5.01766259062824e-06, "loss": 0.4161, "step": 10864 }, { "epoch": 0.5039424860853432, "grad_norm": 6.895099639892578, "learning_rate": 5.016926652224604e-06, "loss": 0.4242, "step": 10865 }, { "epoch": 0.5039888682745826, "grad_norm": 7.477218151092529, "learning_rate": 5.016190713454261e-06, "loss": 0.3427, "step": 10866 }, { "epoch": 0.5040352504638219, "grad_norm": 5.705673694610596, "learning_rate": 5.0154547743331535e-06, "loss": 0.2498, "step": 10867 }, { "epoch": 0.5040816326530613, "grad_norm": 8.78946304321289, "learning_rate": 5.014718834877228e-06, "loss": 0.3945, "step": 10868 }, { "epoch": 0.5041280148423005, "grad_norm": 6.997143745422363, "learning_rate": 5.013982895102425e-06, "loss": 0.3835, "step": 10869 }, { "epoch": 0.5041743970315399, "grad_norm": 8.073007583618164, "learning_rate": 5.013246955024691e-06, "loss": 0.3914, "step": 10870 }, { "epoch": 0.5042207792207792, "grad_norm": 6.396225929260254, "learning_rate": 5.0125110146599685e-06, "loss": 0.3294, "step": 10871 }, { "epoch": 0.5042671614100186, "grad_norm": 6.40411376953125, "learning_rate": 5.011775074024202e-06, "loss": 0.2531, "step": 10872 }, { "epoch": 0.5043135435992578, "grad_norm": 9.535917282104492, "learning_rate": 5.011039133133334e-06, "loss": 0.3898, "step": 10873 }, { "epoch": 0.5043599257884972, "grad_norm": 9.421650886535645, "learning_rate": 5.010303192003309e-06, "loss": 0.3536, "step": 10874 }, { "epoch": 0.5044063079777366, "grad_norm": 14.450648307800293, "learning_rate": 5.009567250650071e-06, "loss": 0.3989, "step": 10875 }, { "epoch": 0.5044526901669759, "grad_norm": 6.850481033325195, "learning_rate": 5.008831309089565e-06, "loss": 0.233, "step": 10876 }, { "epoch": 0.5044990723562152, "grad_norm": 5.647840976715088, "learning_rate": 5.008095367337731e-06, "loss": 0.2888, "step": 10877 }, { "epoch": 0.5045454545454545, "grad_norm": 8.14204216003418, "learning_rate": 5.007359425410516e-06, "loss": 0.3068, "step": 10878 }, { "epoch": 0.5045918367346939, "grad_norm": 8.594659805297852, "learning_rate": 5.006623483323863e-06, "loss": 0.3768, "step": 10879 }, { "epoch": 0.5046382189239332, "grad_norm": 8.563811302185059, "learning_rate": 5.005887541093716e-06, "loss": 0.3731, "step": 10880 }, { "epoch": 0.5046846011131726, "grad_norm": 5.3875508308410645, "learning_rate": 5.005151598736017e-06, "loss": 0.3228, "step": 10881 }, { "epoch": 0.5047309833024118, "grad_norm": 7.72625207901001, "learning_rate": 5.0044156562667145e-06, "loss": 0.3509, "step": 10882 }, { "epoch": 0.5047773654916512, "grad_norm": 2.9534502029418945, "learning_rate": 5.003679713701747e-06, "loss": 0.2519, "step": 10883 }, { "epoch": 0.5048237476808906, "grad_norm": 8.764243125915527, "learning_rate": 5.002943771057061e-06, "loss": 0.3679, "step": 10884 }, { "epoch": 0.5048701298701299, "grad_norm": 11.2798433303833, "learning_rate": 5.002207828348598e-06, "loss": 0.3546, "step": 10885 }, { "epoch": 0.5049165120593692, "grad_norm": 8.017149925231934, "learning_rate": 5.001471885592305e-06, "loss": 0.2902, "step": 10886 }, { "epoch": 0.5049628942486085, "grad_norm": 5.70421028137207, "learning_rate": 5.000735942804126e-06, "loss": 0.3323, "step": 10887 }, { "epoch": 0.5050092764378479, "grad_norm": 13.4288330078125, "learning_rate": 5e-06, "loss": 0.3154, "step": 10888 }, { "epoch": 0.5050556586270872, "grad_norm": 5.848767280578613, "learning_rate": 4.9992640571958765e-06, "loss": 0.3874, "step": 10889 }, { "epoch": 0.5051020408163265, "grad_norm": 7.74229621887207, "learning_rate": 4.9985281144076945e-06, "loss": 0.3993, "step": 10890 }, { "epoch": 0.5051484230055658, "grad_norm": 6.523682117462158, "learning_rate": 4.997792171651404e-06, "loss": 0.3428, "step": 10891 }, { "epoch": 0.5051948051948052, "grad_norm": 5.971771717071533, "learning_rate": 4.997056228942942e-06, "loss": 0.2516, "step": 10892 }, { "epoch": 0.5052411873840446, "grad_norm": 6.901121139526367, "learning_rate": 4.9963202862982556e-06, "loss": 0.4372, "step": 10893 }, { "epoch": 0.5052875695732839, "grad_norm": 8.63766860961914, "learning_rate": 4.995584343733287e-06, "loss": 0.3175, "step": 10894 }, { "epoch": 0.5053339517625232, "grad_norm": 3.744856595993042, "learning_rate": 4.994848401263983e-06, "loss": 0.1746, "step": 10895 }, { "epoch": 0.5053803339517625, "grad_norm": 6.605525493621826, "learning_rate": 4.994112458906285e-06, "loss": 0.3555, "step": 10896 }, { "epoch": 0.5054267161410019, "grad_norm": 7.523900032043457, "learning_rate": 4.993376516676139e-06, "loss": 0.2314, "step": 10897 }, { "epoch": 0.5054730983302412, "grad_norm": 4.911028861999512, "learning_rate": 4.992640574589486e-06, "loss": 0.2318, "step": 10898 }, { "epoch": 0.5055194805194805, "grad_norm": 5.560793876647949, "learning_rate": 4.991904632662271e-06, "loss": 0.3691, "step": 10899 }, { "epoch": 0.5055658627087198, "grad_norm": 6.921875476837158, "learning_rate": 4.991168690910437e-06, "loss": 0.243, "step": 10900 }, { "epoch": 0.5056122448979592, "grad_norm": 4.629385948181152, "learning_rate": 4.9904327493499294e-06, "loss": 0.4111, "step": 10901 }, { "epoch": 0.5056586270871986, "grad_norm": 6.253500938415527, "learning_rate": 4.9896968079966925e-06, "loss": 0.2802, "step": 10902 }, { "epoch": 0.5057050092764378, "grad_norm": 8.654508590698242, "learning_rate": 4.988960866866668e-06, "loss": 0.4861, "step": 10903 }, { "epoch": 0.5057513914656772, "grad_norm": 5.969454288482666, "learning_rate": 4.988224925975799e-06, "loss": 0.2653, "step": 10904 }, { "epoch": 0.5057977736549165, "grad_norm": 6.379394054412842, "learning_rate": 4.987488985340032e-06, "loss": 0.2406, "step": 10905 }, { "epoch": 0.5058441558441559, "grad_norm": 5.656774044036865, "learning_rate": 4.98675304497531e-06, "loss": 0.4449, "step": 10906 }, { "epoch": 0.5058905380333952, "grad_norm": 9.856046676635742, "learning_rate": 4.986017104897575e-06, "loss": 0.4127, "step": 10907 }, { "epoch": 0.5059369202226345, "grad_norm": 7.959039211273193, "learning_rate": 4.9852811651227754e-06, "loss": 0.3577, "step": 10908 }, { "epoch": 0.5059833024118738, "grad_norm": 9.351698875427246, "learning_rate": 4.984545225666848e-06, "loss": 0.489, "step": 10909 }, { "epoch": 0.5060296846011132, "grad_norm": 4.6031904220581055, "learning_rate": 4.983809286545741e-06, "loss": 0.371, "step": 10910 }, { "epoch": 0.5060760667903526, "grad_norm": 5.327841758728027, "learning_rate": 4.9830733477753974e-06, "loss": 0.3184, "step": 10911 }, { "epoch": 0.5061224489795918, "grad_norm": 5.794271945953369, "learning_rate": 4.9823374093717604e-06, "loss": 0.3827, "step": 10912 }, { "epoch": 0.5061688311688312, "grad_norm": 13.478523254394531, "learning_rate": 4.981601471350776e-06, "loss": 0.4159, "step": 10913 }, { "epoch": 0.5062152133580705, "grad_norm": 5.786704063415527, "learning_rate": 4.980865533728384e-06, "loss": 0.3061, "step": 10914 }, { "epoch": 0.5062615955473099, "grad_norm": 4.146669864654541, "learning_rate": 4.980129596520531e-06, "loss": 0.239, "step": 10915 }, { "epoch": 0.5063079777365491, "grad_norm": 10.92531681060791, "learning_rate": 4.979393659743159e-06, "loss": 0.599, "step": 10916 }, { "epoch": 0.5063543599257885, "grad_norm": 5.923361778259277, "learning_rate": 4.978657723412212e-06, "loss": 0.3331, "step": 10917 }, { "epoch": 0.5064007421150278, "grad_norm": 6.281412601470947, "learning_rate": 4.977921787543636e-06, "loss": 0.3037, "step": 10918 }, { "epoch": 0.5064471243042672, "grad_norm": 9.209565162658691, "learning_rate": 4.977185852153373e-06, "loss": 0.2969, "step": 10919 }, { "epoch": 0.5064935064935064, "grad_norm": 8.827865600585938, "learning_rate": 4.976449917257365e-06, "loss": 0.3028, "step": 10920 }, { "epoch": 0.5065398886827458, "grad_norm": 4.509819507598877, "learning_rate": 4.975713982871558e-06, "loss": 0.2602, "step": 10921 }, { "epoch": 0.5065862708719852, "grad_norm": 9.168662071228027, "learning_rate": 4.974978049011894e-06, "loss": 0.3028, "step": 10922 }, { "epoch": 0.5066326530612245, "grad_norm": 6.099436283111572, "learning_rate": 4.974242115694317e-06, "loss": 0.3376, "step": 10923 }, { "epoch": 0.5066790352504639, "grad_norm": 4.387308597564697, "learning_rate": 4.973506182934773e-06, "loss": 0.1898, "step": 10924 }, { "epoch": 0.5067254174397031, "grad_norm": 5.012239933013916, "learning_rate": 4.972770250749203e-06, "loss": 0.2904, "step": 10925 }, { "epoch": 0.5067717996289425, "grad_norm": 7.552067756652832, "learning_rate": 4.97203431915355e-06, "loss": 0.3317, "step": 10926 }, { "epoch": 0.5068181818181818, "grad_norm": 8.204325675964355, "learning_rate": 4.971298388163758e-06, "loss": 0.3983, "step": 10927 }, { "epoch": 0.5068645640074212, "grad_norm": 5.650095462799072, "learning_rate": 4.970562457795772e-06, "loss": 0.293, "step": 10928 }, { "epoch": 0.5069109461966604, "grad_norm": 5.637993812561035, "learning_rate": 4.969826528065536e-06, "loss": 0.2678, "step": 10929 }, { "epoch": 0.5069573283858998, "grad_norm": 6.295041561126709, "learning_rate": 4.969090598988993e-06, "loss": 0.289, "step": 10930 }, { "epoch": 0.5070037105751392, "grad_norm": 9.915241241455078, "learning_rate": 4.968354670582084e-06, "loss": 0.3506, "step": 10931 }, { "epoch": 0.5070500927643785, "grad_norm": 6.205341339111328, "learning_rate": 4.967618742860755e-06, "loss": 0.2437, "step": 10932 }, { "epoch": 0.5070964749536178, "grad_norm": 8.013250350952148, "learning_rate": 4.966882815840948e-06, "loss": 0.3758, "step": 10933 }, { "epoch": 0.5071428571428571, "grad_norm": 6.511595249176025, "learning_rate": 4.966146889538608e-06, "loss": 0.2646, "step": 10934 }, { "epoch": 0.5071892393320965, "grad_norm": 5.88001012802124, "learning_rate": 4.965410963969679e-06, "loss": 0.3663, "step": 10935 }, { "epoch": 0.5072356215213358, "grad_norm": 7.7945051193237305, "learning_rate": 4.964675039150102e-06, "loss": 0.3957, "step": 10936 }, { "epoch": 0.5072820037105752, "grad_norm": 6.184424877166748, "learning_rate": 4.963939115095821e-06, "loss": 0.3719, "step": 10937 }, { "epoch": 0.5073283858998144, "grad_norm": 5.519623756408691, "learning_rate": 4.963203191822781e-06, "loss": 0.369, "step": 10938 }, { "epoch": 0.5073747680890538, "grad_norm": 7.950486660003662, "learning_rate": 4.962467269346924e-06, "loss": 0.3113, "step": 10939 }, { "epoch": 0.5074211502782932, "grad_norm": 12.994677543640137, "learning_rate": 4.961731347684196e-06, "loss": 0.4004, "step": 10940 }, { "epoch": 0.5074675324675325, "grad_norm": 7.799816608428955, "learning_rate": 4.960995426850535e-06, "loss": 0.3127, "step": 10941 }, { "epoch": 0.5075139146567718, "grad_norm": 9.621438026428223, "learning_rate": 4.960259506861888e-06, "loss": 0.2995, "step": 10942 }, { "epoch": 0.5075602968460111, "grad_norm": 8.542698860168457, "learning_rate": 4.959523587734199e-06, "loss": 0.3065, "step": 10943 }, { "epoch": 0.5076066790352505, "grad_norm": 13.281461715698242, "learning_rate": 4.958787669483408e-06, "loss": 0.4436, "step": 10944 }, { "epoch": 0.5076530612244898, "grad_norm": 6.117120265960693, "learning_rate": 4.958051752125464e-06, "loss": 0.2975, "step": 10945 }, { "epoch": 0.5076994434137291, "grad_norm": 5.588451385498047, "learning_rate": 4.957315835676305e-06, "loss": 0.3209, "step": 10946 }, { "epoch": 0.5077458256029684, "grad_norm": 9.567619323730469, "learning_rate": 4.956579920151876e-06, "loss": 0.4032, "step": 10947 }, { "epoch": 0.5077922077922078, "grad_norm": 8.436141014099121, "learning_rate": 4.955844005568119e-06, "loss": 0.3298, "step": 10948 }, { "epoch": 0.5078385899814472, "grad_norm": 10.40993595123291, "learning_rate": 4.955108091940978e-06, "loss": 0.4502, "step": 10949 }, { "epoch": 0.5078849721706865, "grad_norm": 8.983742713928223, "learning_rate": 4.954372179286398e-06, "loss": 0.3073, "step": 10950 }, { "epoch": 0.5079313543599258, "grad_norm": 6.907447338104248, "learning_rate": 4.953636267620322e-06, "loss": 0.2844, "step": 10951 }, { "epoch": 0.5079777365491651, "grad_norm": 5.971335411071777, "learning_rate": 4.952900356958689e-06, "loss": 0.3419, "step": 10952 }, { "epoch": 0.5080241187384045, "grad_norm": 4.8363261222839355, "learning_rate": 4.952164447317446e-06, "loss": 0.3257, "step": 10953 }, { "epoch": 0.5080705009276438, "grad_norm": 7.492402076721191, "learning_rate": 4.951428538712534e-06, "loss": 0.361, "step": 10954 }, { "epoch": 0.5081168831168831, "grad_norm": 12.240377426147461, "learning_rate": 4.9506926311598994e-06, "loss": 0.4002, "step": 10955 }, { "epoch": 0.5081632653061224, "grad_norm": 10.4551362991333, "learning_rate": 4.949956724675482e-06, "loss": 0.3697, "step": 10956 }, { "epoch": 0.5082096474953618, "grad_norm": 7.18187141418457, "learning_rate": 4.949220819275226e-06, "loss": 0.3699, "step": 10957 }, { "epoch": 0.5082560296846012, "grad_norm": 8.57063102722168, "learning_rate": 4.948484914975072e-06, "loss": 0.3735, "step": 10958 }, { "epoch": 0.5083024118738404, "grad_norm": 8.859442710876465, "learning_rate": 4.947749011790967e-06, "loss": 0.3095, "step": 10959 }, { "epoch": 0.5083487940630798, "grad_norm": 4.685128211975098, "learning_rate": 4.947013109738853e-06, "loss": 0.2867, "step": 10960 }, { "epoch": 0.5083951762523191, "grad_norm": 9.634003639221191, "learning_rate": 4.94627720883467e-06, "loss": 0.4061, "step": 10961 }, { "epoch": 0.5084415584415585, "grad_norm": 8.138075828552246, "learning_rate": 4.945541309094365e-06, "loss": 0.3846, "step": 10962 }, { "epoch": 0.5084879406307977, "grad_norm": 12.150280952453613, "learning_rate": 4.944805410533877e-06, "loss": 0.3485, "step": 10963 }, { "epoch": 0.5085343228200371, "grad_norm": 6.987547397613525, "learning_rate": 4.9440695131691504e-06, "loss": 0.2972, "step": 10964 }, { "epoch": 0.5085807050092764, "grad_norm": 7.071789741516113, "learning_rate": 4.94333361701613e-06, "loss": 0.3562, "step": 10965 }, { "epoch": 0.5086270871985158, "grad_norm": 5.659125328063965, "learning_rate": 4.942597722090755e-06, "loss": 0.3054, "step": 10966 }, { "epoch": 0.5086734693877552, "grad_norm": 7.249014854431152, "learning_rate": 4.941861828408971e-06, "loss": 0.4173, "step": 10967 }, { "epoch": 0.5087198515769944, "grad_norm": 8.28429889678955, "learning_rate": 4.941125935986721e-06, "loss": 0.4144, "step": 10968 }, { "epoch": 0.5087662337662338, "grad_norm": 7.447789192199707, "learning_rate": 4.940390044839945e-06, "loss": 0.3925, "step": 10969 }, { "epoch": 0.5088126159554731, "grad_norm": 6.497064590454102, "learning_rate": 4.939654154984589e-06, "loss": 0.4076, "step": 10970 }, { "epoch": 0.5088589981447125, "grad_norm": 5.7593092918396, "learning_rate": 4.938918266436592e-06, "loss": 0.3877, "step": 10971 }, { "epoch": 0.5089053803339517, "grad_norm": 7.444198131561279, "learning_rate": 4.938182379211899e-06, "loss": 0.3228, "step": 10972 }, { "epoch": 0.5089517625231911, "grad_norm": 6.650213718414307, "learning_rate": 4.937446493326453e-06, "loss": 0.3734, "step": 10973 }, { "epoch": 0.5089981447124304, "grad_norm": 6.8925886154174805, "learning_rate": 4.936710608796195e-06, "loss": 0.3219, "step": 10974 }, { "epoch": 0.5090445269016698, "grad_norm": 8.432585716247559, "learning_rate": 4.935974725637068e-06, "loss": 0.2973, "step": 10975 }, { "epoch": 0.509090909090909, "grad_norm": 6.903285026550293, "learning_rate": 4.935238843865015e-06, "loss": 0.3467, "step": 10976 }, { "epoch": 0.5091372912801484, "grad_norm": 7.704698085784912, "learning_rate": 4.934502963495978e-06, "loss": 0.462, "step": 10977 }, { "epoch": 0.5091836734693878, "grad_norm": 7.845920562744141, "learning_rate": 4.9337670845459e-06, "loss": 0.3183, "step": 10978 }, { "epoch": 0.5092300556586271, "grad_norm": 4.312039375305176, "learning_rate": 4.933031207030724e-06, "loss": 0.2942, "step": 10979 }, { "epoch": 0.5092764378478665, "grad_norm": 8.772906303405762, "learning_rate": 4.932295330966392e-06, "loss": 0.3965, "step": 10980 }, { "epoch": 0.5093228200371057, "grad_norm": 8.612126350402832, "learning_rate": 4.931559456368844e-06, "loss": 0.3755, "step": 10981 }, { "epoch": 0.5093692022263451, "grad_norm": 6.952656269073486, "learning_rate": 4.930823583254025e-06, "loss": 0.3584, "step": 10982 }, { "epoch": 0.5094155844155844, "grad_norm": 9.109954833984375, "learning_rate": 4.930087711637876e-06, "loss": 0.3587, "step": 10983 }, { "epoch": 0.5094619666048238, "grad_norm": 4.827108860015869, "learning_rate": 4.929351841536342e-06, "loss": 0.2053, "step": 10984 }, { "epoch": 0.509508348794063, "grad_norm": 7.542735576629639, "learning_rate": 4.928615972965362e-06, "loss": 0.3315, "step": 10985 }, { "epoch": 0.5095547309833024, "grad_norm": 9.091203689575195, "learning_rate": 4.927880105940879e-06, "loss": 0.3995, "step": 10986 }, { "epoch": 0.5096011131725418, "grad_norm": 4.966659069061279, "learning_rate": 4.927144240478835e-06, "loss": 0.2596, "step": 10987 }, { "epoch": 0.5096474953617811, "grad_norm": 6.731932163238525, "learning_rate": 4.926408376595173e-06, "loss": 0.3842, "step": 10988 }, { "epoch": 0.5096938775510204, "grad_norm": 9.78364372253418, "learning_rate": 4.925672514305834e-06, "loss": 0.3555, "step": 10989 }, { "epoch": 0.5097402597402597, "grad_norm": 6.271210193634033, "learning_rate": 4.924936653626763e-06, "loss": 0.3821, "step": 10990 }, { "epoch": 0.5097866419294991, "grad_norm": 12.324037551879883, "learning_rate": 4.924200794573898e-06, "loss": 0.3638, "step": 10991 }, { "epoch": 0.5098330241187384, "grad_norm": 10.102566719055176, "learning_rate": 4.923464937163182e-06, "loss": 0.459, "step": 10992 }, { "epoch": 0.5098794063079778, "grad_norm": 9.848587036132812, "learning_rate": 4.92272908141056e-06, "loss": 0.5863, "step": 10993 }, { "epoch": 0.509925788497217, "grad_norm": 4.020741939544678, "learning_rate": 4.92199322733197e-06, "loss": 0.3105, "step": 10994 }, { "epoch": 0.5099721706864564, "grad_norm": 5.263456344604492, "learning_rate": 4.921257374943358e-06, "loss": 0.2561, "step": 10995 }, { "epoch": 0.5100185528756958, "grad_norm": 4.547765731811523, "learning_rate": 4.920521524260661e-06, "loss": 0.3438, "step": 10996 }, { "epoch": 0.5100649350649351, "grad_norm": 8.437373161315918, "learning_rate": 4.919785675299824e-06, "loss": 0.2769, "step": 10997 }, { "epoch": 0.5101113172541744, "grad_norm": 5.946854591369629, "learning_rate": 4.919049828076787e-06, "loss": 0.2882, "step": 10998 }, { "epoch": 0.5101576994434137, "grad_norm": 13.344595909118652, "learning_rate": 4.918313982607493e-06, "loss": 0.3295, "step": 10999 }, { "epoch": 0.5102040816326531, "grad_norm": 28.136993408203125, "learning_rate": 4.917578138907884e-06, "loss": 0.3808, "step": 11000 }, { "epoch": 0.5102504638218924, "grad_norm": 6.9837517738342285, "learning_rate": 4.916842296993904e-06, "loss": 0.3106, "step": 11001 }, { "epoch": 0.5102968460111317, "grad_norm": 5.709279537200928, "learning_rate": 4.916106456881488e-06, "loss": 0.2575, "step": 11002 }, { "epoch": 0.510343228200371, "grad_norm": 7.3227925300598145, "learning_rate": 4.915370618586582e-06, "loss": 0.341, "step": 11003 }, { "epoch": 0.5103896103896104, "grad_norm": 5.431521892547607, "learning_rate": 4.914634782125127e-06, "loss": 0.3502, "step": 11004 }, { "epoch": 0.5104359925788498, "grad_norm": 7.936300277709961, "learning_rate": 4.913898947513064e-06, "loss": 0.3758, "step": 11005 }, { "epoch": 0.5104823747680891, "grad_norm": 9.419219970703125, "learning_rate": 4.9131631147663374e-06, "loss": 0.3756, "step": 11006 }, { "epoch": 0.5105287569573284, "grad_norm": 4.893269062042236, "learning_rate": 4.9124272839008845e-06, "loss": 0.2935, "step": 11007 }, { "epoch": 0.5105751391465677, "grad_norm": 5.140554904937744, "learning_rate": 4.911691454932648e-06, "loss": 0.3067, "step": 11008 }, { "epoch": 0.5106215213358071, "grad_norm": 10.87037467956543, "learning_rate": 4.910955627877569e-06, "loss": 0.3353, "step": 11009 }, { "epoch": 0.5106679035250464, "grad_norm": 12.603129386901855, "learning_rate": 4.91021980275159e-06, "loss": 0.5554, "step": 11010 }, { "epoch": 0.5107142857142857, "grad_norm": 5.661171913146973, "learning_rate": 4.909483979570651e-06, "loss": 0.3315, "step": 11011 }, { "epoch": 0.510760667903525, "grad_norm": 6.250280380249023, "learning_rate": 4.908748158350696e-06, "loss": 0.2551, "step": 11012 }, { "epoch": 0.5108070500927644, "grad_norm": 5.034974575042725, "learning_rate": 4.908012339107663e-06, "loss": 0.2756, "step": 11013 }, { "epoch": 0.5108534322820037, "grad_norm": 9.548596382141113, "learning_rate": 4.907276521857493e-06, "loss": 0.527, "step": 11014 }, { "epoch": 0.510899814471243, "grad_norm": 7.143672943115234, "learning_rate": 4.906540706616129e-06, "loss": 0.3876, "step": 11015 }, { "epoch": 0.5109461966604824, "grad_norm": 9.138630867004395, "learning_rate": 4.90580489339951e-06, "loss": 0.3396, "step": 11016 }, { "epoch": 0.5109925788497217, "grad_norm": 10.238555908203125, "learning_rate": 4.905069082223582e-06, "loss": 0.2872, "step": 11017 }, { "epoch": 0.5110389610389611, "grad_norm": 5.000711917877197, "learning_rate": 4.90433327310428e-06, "loss": 0.2998, "step": 11018 }, { "epoch": 0.5110853432282003, "grad_norm": 11.44978141784668, "learning_rate": 4.9035974660575464e-06, "loss": 0.4304, "step": 11019 }, { "epoch": 0.5111317254174397, "grad_norm": 6.09005069732666, "learning_rate": 4.902861661099323e-06, "loss": 0.2993, "step": 11020 }, { "epoch": 0.511178107606679, "grad_norm": 8.595449447631836, "learning_rate": 4.90212585824555e-06, "loss": 0.3725, "step": 11021 }, { "epoch": 0.5112244897959184, "grad_norm": 6.368968963623047, "learning_rate": 4.901390057512172e-06, "loss": 0.402, "step": 11022 }, { "epoch": 0.5112708719851577, "grad_norm": 7.749078273773193, "learning_rate": 4.900654258915124e-06, "loss": 0.4525, "step": 11023 }, { "epoch": 0.511317254174397, "grad_norm": 5.459985733032227, "learning_rate": 4.899918462470349e-06, "loss": 0.3404, "step": 11024 }, { "epoch": 0.5113636363636364, "grad_norm": 4.388143539428711, "learning_rate": 4.899182668193787e-06, "loss": 0.3868, "step": 11025 }, { "epoch": 0.5114100185528757, "grad_norm": 11.949085235595703, "learning_rate": 4.8984468761013794e-06, "loss": 0.4491, "step": 11026 }, { "epoch": 0.5114564007421151, "grad_norm": 5.53985595703125, "learning_rate": 4.897711086209067e-06, "loss": 0.2741, "step": 11027 }, { "epoch": 0.5115027829313543, "grad_norm": 13.14823055267334, "learning_rate": 4.896975298532792e-06, "loss": 0.3223, "step": 11028 }, { "epoch": 0.5115491651205937, "grad_norm": 7.2252278327941895, "learning_rate": 4.896239513088491e-06, "loss": 0.3946, "step": 11029 }, { "epoch": 0.511595547309833, "grad_norm": 7.171745777130127, "learning_rate": 4.895503729892106e-06, "loss": 0.3857, "step": 11030 }, { "epoch": 0.5116419294990724, "grad_norm": 10.050294876098633, "learning_rate": 4.894767948959578e-06, "loss": 0.5394, "step": 11031 }, { "epoch": 0.5116883116883116, "grad_norm": 5.163519382476807, "learning_rate": 4.894032170306846e-06, "loss": 0.2981, "step": 11032 }, { "epoch": 0.511734693877551, "grad_norm": 4.733736038208008, "learning_rate": 4.893296393949854e-06, "loss": 0.1677, "step": 11033 }, { "epoch": 0.5117810760667904, "grad_norm": 5.433709621429443, "learning_rate": 4.892560619904536e-06, "loss": 0.335, "step": 11034 }, { "epoch": 0.5118274582560297, "grad_norm": 5.807110786437988, "learning_rate": 4.8918248481868375e-06, "loss": 0.2024, "step": 11035 }, { "epoch": 0.5118738404452691, "grad_norm": 9.500214576721191, "learning_rate": 4.891089078812695e-06, "loss": 0.4047, "step": 11036 }, { "epoch": 0.5119202226345083, "grad_norm": 8.85529613494873, "learning_rate": 4.890353311798051e-06, "loss": 0.313, "step": 11037 }, { "epoch": 0.5119666048237477, "grad_norm": 6.459522724151611, "learning_rate": 4.889617547158845e-06, "loss": 0.2856, "step": 11038 }, { "epoch": 0.512012987012987, "grad_norm": 11.699116706848145, "learning_rate": 4.888881784911018e-06, "loss": 0.5296, "step": 11039 }, { "epoch": 0.5120593692022264, "grad_norm": 7.092662334442139, "learning_rate": 4.8881460250705056e-06, "loss": 0.2898, "step": 11040 }, { "epoch": 0.5121057513914656, "grad_norm": 4.2887725830078125, "learning_rate": 4.887410267653252e-06, "loss": 0.2463, "step": 11041 }, { "epoch": 0.512152133580705, "grad_norm": 6.719224452972412, "learning_rate": 4.886674512675195e-06, "loss": 0.3115, "step": 11042 }, { "epoch": 0.5121985157699444, "grad_norm": 5.973560333251953, "learning_rate": 4.8859387601522754e-06, "loss": 0.3239, "step": 11043 }, { "epoch": 0.5122448979591837, "grad_norm": 6.972423076629639, "learning_rate": 4.8852030101004335e-06, "loss": 0.4901, "step": 11044 }, { "epoch": 0.512291280148423, "grad_norm": 9.340890884399414, "learning_rate": 4.8844672625356065e-06, "loss": 0.3974, "step": 11045 }, { "epoch": 0.5123376623376623, "grad_norm": 9.08682918548584, "learning_rate": 4.883731517473736e-06, "loss": 0.4982, "step": 11046 }, { "epoch": 0.5123840445269017, "grad_norm": 6.96514892578125, "learning_rate": 4.882995774930759e-06, "loss": 0.3418, "step": 11047 }, { "epoch": 0.512430426716141, "grad_norm": 4.686809062957764, "learning_rate": 4.882260034922618e-06, "loss": 0.2763, "step": 11048 }, { "epoch": 0.5124768089053804, "grad_norm": 8.259970664978027, "learning_rate": 4.881524297465251e-06, "loss": 0.4491, "step": 11049 }, { "epoch": 0.5125231910946196, "grad_norm": 5.966945648193359, "learning_rate": 4.880788562574601e-06, "loss": 0.2796, "step": 11050 }, { "epoch": 0.512569573283859, "grad_norm": 7.935151100158691, "learning_rate": 4.8800528302666e-06, "loss": 0.3195, "step": 11051 }, { "epoch": 0.5126159554730984, "grad_norm": 9.982693672180176, "learning_rate": 4.879317100557192e-06, "loss": 0.4411, "step": 11052 }, { "epoch": 0.5126623376623377, "grad_norm": 6.246025562286377, "learning_rate": 4.878581373462314e-06, "loss": 0.2571, "step": 11053 }, { "epoch": 0.512708719851577, "grad_norm": 5.926452159881592, "learning_rate": 4.877845648997908e-06, "loss": 0.3497, "step": 11054 }, { "epoch": 0.5127551020408163, "grad_norm": 8.336089134216309, "learning_rate": 4.877109927179914e-06, "loss": 0.4284, "step": 11055 }, { "epoch": 0.5128014842300557, "grad_norm": 7.030084609985352, "learning_rate": 4.876374208024265e-06, "loss": 0.3217, "step": 11056 }, { "epoch": 0.512847866419295, "grad_norm": 5.990720748901367, "learning_rate": 4.875638491546905e-06, "loss": 0.3496, "step": 11057 }, { "epoch": 0.5128942486085343, "grad_norm": 5.037125110626221, "learning_rate": 4.874902777763771e-06, "loss": 0.3587, "step": 11058 }, { "epoch": 0.5129406307977736, "grad_norm": 16.618743896484375, "learning_rate": 4.8741670666908016e-06, "loss": 0.4697, "step": 11059 }, { "epoch": 0.512987012987013, "grad_norm": 7.612311840057373, "learning_rate": 4.873431358343937e-06, "loss": 0.402, "step": 11060 }, { "epoch": 0.5130333951762523, "grad_norm": 6.634537696838379, "learning_rate": 4.872695652739117e-06, "loss": 0.3373, "step": 11061 }, { "epoch": 0.5130797773654916, "grad_norm": 7.129085540771484, "learning_rate": 4.871959949892278e-06, "loss": 0.2534, "step": 11062 }, { "epoch": 0.513126159554731, "grad_norm": 10.216707229614258, "learning_rate": 4.871224249819357e-06, "loss": 0.3807, "step": 11063 }, { "epoch": 0.5131725417439703, "grad_norm": 5.128978252410889, "learning_rate": 4.870488552536296e-06, "loss": 0.2835, "step": 11064 }, { "epoch": 0.5132189239332097, "grad_norm": 9.669872283935547, "learning_rate": 4.869752858059032e-06, "loss": 0.3414, "step": 11065 }, { "epoch": 0.513265306122449, "grad_norm": 3.3201143741607666, "learning_rate": 4.8690171664035054e-06, "loss": 0.1802, "step": 11066 }, { "epoch": 0.5133116883116883, "grad_norm": 5.442423343658447, "learning_rate": 4.8682814775856505e-06, "loss": 0.3057, "step": 11067 }, { "epoch": 0.5133580705009276, "grad_norm": 6.383619785308838, "learning_rate": 4.867545791621409e-06, "loss": 0.3595, "step": 11068 }, { "epoch": 0.513404452690167, "grad_norm": 4.490920066833496, "learning_rate": 4.866810108526717e-06, "loss": 0.2778, "step": 11069 }, { "epoch": 0.5134508348794063, "grad_norm": 6.090451717376709, "learning_rate": 4.8660744283175136e-06, "loss": 0.3134, "step": 11070 }, { "epoch": 0.5134972170686456, "grad_norm": 6.345036506652832, "learning_rate": 4.865338751009738e-06, "loss": 0.3017, "step": 11071 }, { "epoch": 0.513543599257885, "grad_norm": 9.380762100219727, "learning_rate": 4.864603076619329e-06, "loss": 0.337, "step": 11072 }, { "epoch": 0.5135899814471243, "grad_norm": 7.898662090301514, "learning_rate": 4.863867405162221e-06, "loss": 0.349, "step": 11073 }, { "epoch": 0.5136363636363637, "grad_norm": 6.299795150756836, "learning_rate": 4.863131736654353e-06, "loss": 0.4679, "step": 11074 }, { "epoch": 0.5136827458256029, "grad_norm": 7.162832736968994, "learning_rate": 4.862396071111664e-06, "loss": 0.2954, "step": 11075 }, { "epoch": 0.5137291280148423, "grad_norm": 9.080694198608398, "learning_rate": 4.861660408550093e-06, "loss": 0.3705, "step": 11076 }, { "epoch": 0.5137755102040816, "grad_norm": 6.523865222930908, "learning_rate": 4.860924748985577e-06, "loss": 0.3353, "step": 11077 }, { "epoch": 0.513821892393321, "grad_norm": 5.875390529632568, "learning_rate": 4.860189092434051e-06, "loss": 0.3629, "step": 11078 }, { "epoch": 0.5138682745825603, "grad_norm": 8.107083320617676, "learning_rate": 4.859453438911455e-06, "loss": 0.3706, "step": 11079 }, { "epoch": 0.5139146567717996, "grad_norm": 10.134346008300781, "learning_rate": 4.858717788433725e-06, "loss": 0.426, "step": 11080 }, { "epoch": 0.513961038961039, "grad_norm": 3.1635282039642334, "learning_rate": 4.857982141016801e-06, "loss": 0.2474, "step": 11081 }, { "epoch": 0.5140074211502783, "grad_norm": 7.883790969848633, "learning_rate": 4.85724649667662e-06, "loss": 0.3904, "step": 11082 }, { "epoch": 0.5140538033395177, "grad_norm": 5.3256425857543945, "learning_rate": 4.856510855429117e-06, "loss": 0.3305, "step": 11083 }, { "epoch": 0.5141001855287569, "grad_norm": 7.239561557769775, "learning_rate": 4.855775217290231e-06, "loss": 0.2925, "step": 11084 }, { "epoch": 0.5141465677179963, "grad_norm": 8.109971046447754, "learning_rate": 4.855039582275898e-06, "loss": 0.322, "step": 11085 }, { "epoch": 0.5141929499072356, "grad_norm": 4.734618186950684, "learning_rate": 4.8543039504020565e-06, "loss": 0.2698, "step": 11086 }, { "epoch": 0.514239332096475, "grad_norm": 15.002569198608398, "learning_rate": 4.853568321684644e-06, "loss": 0.5179, "step": 11087 }, { "epoch": 0.5142857142857142, "grad_norm": 7.648076057434082, "learning_rate": 4.8528326961395965e-06, "loss": 0.354, "step": 11088 }, { "epoch": 0.5143320964749536, "grad_norm": 5.980896949768066, "learning_rate": 4.85209707378285e-06, "loss": 0.3556, "step": 11089 }, { "epoch": 0.514378478664193, "grad_norm": 5.04152250289917, "learning_rate": 4.851361454630342e-06, "loss": 0.3298, "step": 11090 }, { "epoch": 0.5144248608534323, "grad_norm": 5.785058498382568, "learning_rate": 4.850625838698011e-06, "loss": 0.38, "step": 11091 }, { "epoch": 0.5144712430426717, "grad_norm": 7.469895362854004, "learning_rate": 4.849890226001792e-06, "loss": 0.2477, "step": 11092 }, { "epoch": 0.5145176252319109, "grad_norm": 9.326254844665527, "learning_rate": 4.849154616557621e-06, "loss": 0.3759, "step": 11093 }, { "epoch": 0.5145640074211503, "grad_norm": 8.463550567626953, "learning_rate": 4.848419010381438e-06, "loss": 0.3014, "step": 11094 }, { "epoch": 0.5146103896103896, "grad_norm": 8.41947078704834, "learning_rate": 4.847683407489175e-06, "loss": 0.3505, "step": 11095 }, { "epoch": 0.514656771799629, "grad_norm": 5.548577785491943, "learning_rate": 4.846947807896771e-06, "loss": 0.3227, "step": 11096 }, { "epoch": 0.5147031539888682, "grad_norm": 8.544755935668945, "learning_rate": 4.8462122116201634e-06, "loss": 0.3846, "step": 11097 }, { "epoch": 0.5147495361781076, "grad_norm": 7.206167221069336, "learning_rate": 4.845476618675285e-06, "loss": 0.3749, "step": 11098 }, { "epoch": 0.514795918367347, "grad_norm": 4.961340427398682, "learning_rate": 4.844741029078077e-06, "loss": 0.2845, "step": 11099 }, { "epoch": 0.5148423005565863, "grad_norm": 4.654370307922363, "learning_rate": 4.8440054428444696e-06, "loss": 0.2824, "step": 11100 }, { "epoch": 0.5148886827458256, "grad_norm": 5.749419689178467, "learning_rate": 4.843269859990404e-06, "loss": 0.2308, "step": 11101 }, { "epoch": 0.5149350649350649, "grad_norm": 7.460590839385986, "learning_rate": 4.8425342805318135e-06, "loss": 0.2663, "step": 11102 }, { "epoch": 0.5149814471243043, "grad_norm": 5.139838695526123, "learning_rate": 4.8417987044846345e-06, "loss": 0.3264, "step": 11103 }, { "epoch": 0.5150278293135436, "grad_norm": 4.858319282531738, "learning_rate": 4.841063131864805e-06, "loss": 0.3127, "step": 11104 }, { "epoch": 0.515074211502783, "grad_norm": 8.062719345092773, "learning_rate": 4.8403275626882565e-06, "loss": 0.3285, "step": 11105 }, { "epoch": 0.5151205936920222, "grad_norm": 9.31043815612793, "learning_rate": 4.839591996970928e-06, "loss": 0.3215, "step": 11106 }, { "epoch": 0.5151669758812616, "grad_norm": 6.863370418548584, "learning_rate": 4.838856434728755e-06, "loss": 0.299, "step": 11107 }, { "epoch": 0.515213358070501, "grad_norm": 5.778435230255127, "learning_rate": 4.838120875977671e-06, "loss": 0.3474, "step": 11108 }, { "epoch": 0.5152597402597403, "grad_norm": 10.147789001464844, "learning_rate": 4.837385320733613e-06, "loss": 0.4526, "step": 11109 }, { "epoch": 0.5153061224489796, "grad_norm": 6.268983840942383, "learning_rate": 4.836649769012518e-06, "loss": 0.3775, "step": 11110 }, { "epoch": 0.5153525046382189, "grad_norm": 5.725107669830322, "learning_rate": 4.835914220830318e-06, "loss": 0.2774, "step": 11111 }, { "epoch": 0.5153988868274583, "grad_norm": 4.816274166107178, "learning_rate": 4.83517867620295e-06, "loss": 0.3119, "step": 11112 }, { "epoch": 0.5154452690166976, "grad_norm": 8.848739624023438, "learning_rate": 4.83444313514635e-06, "loss": 0.3367, "step": 11113 }, { "epoch": 0.5154916512059369, "grad_norm": 10.946556091308594, "learning_rate": 4.833707597676451e-06, "loss": 0.3317, "step": 11114 }, { "epoch": 0.5155380333951762, "grad_norm": 6.831146240234375, "learning_rate": 4.83297206380919e-06, "loss": 0.3607, "step": 11115 }, { "epoch": 0.5155844155844156, "grad_norm": 7.1794328689575195, "learning_rate": 4.832236533560501e-06, "loss": 0.296, "step": 11116 }, { "epoch": 0.515630797773655, "grad_norm": 6.910607814788818, "learning_rate": 4.831501006946318e-06, "loss": 0.3873, "step": 11117 }, { "epoch": 0.5156771799628942, "grad_norm": 10.299813270568848, "learning_rate": 4.830765483982578e-06, "loss": 0.3121, "step": 11118 }, { "epoch": 0.5157235621521336, "grad_norm": 4.568993091583252, "learning_rate": 4.830029964685214e-06, "loss": 0.2538, "step": 11119 }, { "epoch": 0.5157699443413729, "grad_norm": 9.66350269317627, "learning_rate": 4.829294449070161e-06, "loss": 0.386, "step": 11120 }, { "epoch": 0.5158163265306123, "grad_norm": 7.588958740234375, "learning_rate": 4.828558937153354e-06, "loss": 0.3015, "step": 11121 }, { "epoch": 0.5158627087198516, "grad_norm": 11.80990219116211, "learning_rate": 4.827823428950727e-06, "loss": 0.4528, "step": 11122 }, { "epoch": 0.5159090909090909, "grad_norm": 7.997014999389648, "learning_rate": 4.827087924478216e-06, "loss": 0.3289, "step": 11123 }, { "epoch": 0.5159554730983302, "grad_norm": 5.519796848297119, "learning_rate": 4.826352423751752e-06, "loss": 0.4033, "step": 11124 }, { "epoch": 0.5160018552875696, "grad_norm": 4.160207748413086, "learning_rate": 4.825616926787271e-06, "loss": 0.3727, "step": 11125 }, { "epoch": 0.516048237476809, "grad_norm": 7.345163345336914, "learning_rate": 4.824881433600709e-06, "loss": 0.4057, "step": 11126 }, { "epoch": 0.5160946196660482, "grad_norm": 10.781208992004395, "learning_rate": 4.824145944207997e-06, "loss": 0.3646, "step": 11127 }, { "epoch": 0.5161410018552876, "grad_norm": 8.24697208404541, "learning_rate": 4.823410458625072e-06, "loss": 0.2843, "step": 11128 }, { "epoch": 0.5161873840445269, "grad_norm": 6.5272393226623535, "learning_rate": 4.822674976867865e-06, "loss": 0.2858, "step": 11129 }, { "epoch": 0.5162337662337663, "grad_norm": 6.458503723144531, "learning_rate": 4.821939498952311e-06, "loss": 0.3124, "step": 11130 }, { "epoch": 0.5162801484230055, "grad_norm": 9.631763458251953, "learning_rate": 4.821204024894344e-06, "loss": 0.3899, "step": 11131 }, { "epoch": 0.5163265306122449, "grad_norm": 5.084385871887207, "learning_rate": 4.820468554709898e-06, "loss": 0.2597, "step": 11132 }, { "epoch": 0.5163729128014842, "grad_norm": 11.691451072692871, "learning_rate": 4.819733088414906e-06, "loss": 0.3811, "step": 11133 }, { "epoch": 0.5164192949907236, "grad_norm": 6.009705066680908, "learning_rate": 4.818997626025301e-06, "loss": 0.3775, "step": 11134 }, { "epoch": 0.516465677179963, "grad_norm": 9.665498733520508, "learning_rate": 4.818262167557017e-06, "loss": 0.5171, "step": 11135 }, { "epoch": 0.5165120593692022, "grad_norm": 7.189038276672363, "learning_rate": 4.817526713025988e-06, "loss": 0.2615, "step": 11136 }, { "epoch": 0.5165584415584416, "grad_norm": 6.318288803100586, "learning_rate": 4.816791262448146e-06, "loss": 0.2888, "step": 11137 }, { "epoch": 0.5166048237476809, "grad_norm": 7.390328884124756, "learning_rate": 4.816055815839426e-06, "loss": 0.3245, "step": 11138 }, { "epoch": 0.5166512059369203, "grad_norm": 5.198609352111816, "learning_rate": 4.8153203732157575e-06, "loss": 0.3709, "step": 11139 }, { "epoch": 0.5166975881261595, "grad_norm": 11.762598991394043, "learning_rate": 4.814584934593077e-06, "loss": 0.4926, "step": 11140 }, { "epoch": 0.5167439703153989, "grad_norm": 4.028590202331543, "learning_rate": 4.813849499987314e-06, "loss": 0.1157, "step": 11141 }, { "epoch": 0.5167903525046382, "grad_norm": 9.972884178161621, "learning_rate": 4.813114069414405e-06, "loss": 0.2878, "step": 11142 }, { "epoch": 0.5168367346938776, "grad_norm": 5.714095592498779, "learning_rate": 4.812378642890283e-06, "loss": 0.4046, "step": 11143 }, { "epoch": 0.5168831168831168, "grad_norm": 5.468334197998047, "learning_rate": 4.811643220430877e-06, "loss": 0.2984, "step": 11144 }, { "epoch": 0.5169294990723562, "grad_norm": 6.019372940063477, "learning_rate": 4.810907802052121e-06, "loss": 0.3436, "step": 11145 }, { "epoch": 0.5169758812615955, "grad_norm": 5.781430721282959, "learning_rate": 4.810172387769947e-06, "loss": 0.2448, "step": 11146 }, { "epoch": 0.5170222634508349, "grad_norm": 13.74007797241211, "learning_rate": 4.809436977600289e-06, "loss": 0.3686, "step": 11147 }, { "epoch": 0.5170686456400743, "grad_norm": 7.076021194458008, "learning_rate": 4.80870157155908e-06, "loss": 0.3946, "step": 11148 }, { "epoch": 0.5171150278293135, "grad_norm": 8.738678932189941, "learning_rate": 4.8079661696622484e-06, "loss": 0.3657, "step": 11149 }, { "epoch": 0.5171614100185529, "grad_norm": 6.549681663513184, "learning_rate": 4.8072307719257285e-06, "loss": 0.2477, "step": 11150 }, { "epoch": 0.5172077922077922, "grad_norm": 11.977296829223633, "learning_rate": 4.806495378365453e-06, "loss": 0.3814, "step": 11151 }, { "epoch": 0.5172541743970316, "grad_norm": 6.137965202331543, "learning_rate": 4.805759988997352e-06, "loss": 0.3384, "step": 11152 }, { "epoch": 0.5173005565862708, "grad_norm": 5.106934547424316, "learning_rate": 4.805024603837359e-06, "loss": 0.2983, "step": 11153 }, { "epoch": 0.5173469387755102, "grad_norm": 11.627685546875, "learning_rate": 4.804289222901407e-06, "loss": 0.5018, "step": 11154 }, { "epoch": 0.5173933209647495, "grad_norm": 6.9266886711120605, "learning_rate": 4.803553846205425e-06, "loss": 0.3641, "step": 11155 }, { "epoch": 0.5174397031539889, "grad_norm": 5.446415424346924, "learning_rate": 4.802818473765343e-06, "loss": 0.2614, "step": 11156 }, { "epoch": 0.5174860853432282, "grad_norm": 13.555249214172363, "learning_rate": 4.802083105597096e-06, "loss": 0.5165, "step": 11157 }, { "epoch": 0.5175324675324675, "grad_norm": 3.912879467010498, "learning_rate": 4.801347741716614e-06, "loss": 0.2705, "step": 11158 }, { "epoch": 0.5175788497217069, "grad_norm": 5.554925441741943, "learning_rate": 4.80061238213983e-06, "loss": 0.3404, "step": 11159 }, { "epoch": 0.5176252319109462, "grad_norm": 3.8960378170013428, "learning_rate": 4.7998770268826726e-06, "loss": 0.2786, "step": 11160 }, { "epoch": 0.5176716141001856, "grad_norm": 20.647127151489258, "learning_rate": 4.799141675961072e-06, "loss": 0.2, "step": 11161 }, { "epoch": 0.5177179962894248, "grad_norm": 6.850368976593018, "learning_rate": 4.798406329390963e-06, "loss": 0.4574, "step": 11162 }, { "epoch": 0.5177643784786642, "grad_norm": 10.038220405578613, "learning_rate": 4.797670987188274e-06, "loss": 0.4799, "step": 11163 }, { "epoch": 0.5178107606679035, "grad_norm": 4.684632301330566, "learning_rate": 4.796935649368936e-06, "loss": 0.3406, "step": 11164 }, { "epoch": 0.5178571428571429, "grad_norm": 5.683228492736816, "learning_rate": 4.796200315948882e-06, "loss": 0.2306, "step": 11165 }, { "epoch": 0.5179035250463822, "grad_norm": 7.735657691955566, "learning_rate": 4.795464986944039e-06, "loss": 0.4046, "step": 11166 }, { "epoch": 0.5179499072356215, "grad_norm": 7.220638275146484, "learning_rate": 4.794729662370339e-06, "loss": 0.3547, "step": 11167 }, { "epoch": 0.5179962894248609, "grad_norm": 6.788036823272705, "learning_rate": 4.793994342243713e-06, "loss": 0.2747, "step": 11168 }, { "epoch": 0.5180426716141002, "grad_norm": 13.654409408569336, "learning_rate": 4.79325902658009e-06, "loss": 0.3924, "step": 11169 }, { "epoch": 0.5180890538033395, "grad_norm": 8.053827285766602, "learning_rate": 4.792523715395404e-06, "loss": 0.4704, "step": 11170 }, { "epoch": 0.5181354359925788, "grad_norm": 5.277516841888428, "learning_rate": 4.79178840870558e-06, "loss": 0.2826, "step": 11171 }, { "epoch": 0.5181818181818182, "grad_norm": 9.362831115722656, "learning_rate": 4.79105310652655e-06, "loss": 0.3037, "step": 11172 }, { "epoch": 0.5182282003710575, "grad_norm": 8.896949768066406, "learning_rate": 4.790317808874245e-06, "loss": 0.3006, "step": 11173 }, { "epoch": 0.5182745825602968, "grad_norm": 7.289228439331055, "learning_rate": 4.789582515764593e-06, "loss": 0.2362, "step": 11174 }, { "epoch": 0.5183209647495362, "grad_norm": 6.295163154602051, "learning_rate": 4.788847227213525e-06, "loss": 0.288, "step": 11175 }, { "epoch": 0.5183673469387755, "grad_norm": 4.818123817443848, "learning_rate": 4.788111943236973e-06, "loss": 0.3246, "step": 11176 }, { "epoch": 0.5184137291280149, "grad_norm": 6.88942289352417, "learning_rate": 4.787376663850862e-06, "loss": 0.3123, "step": 11177 }, { "epoch": 0.5184601113172542, "grad_norm": 12.561038970947266, "learning_rate": 4.786641389071123e-06, "loss": 0.4964, "step": 11178 }, { "epoch": 0.5185064935064935, "grad_norm": 4.964073181152344, "learning_rate": 4.7859061189136866e-06, "loss": 0.3073, "step": 11179 }, { "epoch": 0.5185528756957328, "grad_norm": 5.0001044273376465, "learning_rate": 4.785170853394481e-06, "loss": 0.2392, "step": 11180 }, { "epoch": 0.5185992578849722, "grad_norm": 10.736104965209961, "learning_rate": 4.784435592529437e-06, "loss": 0.3616, "step": 11181 }, { "epoch": 0.5186456400742115, "grad_norm": 7.2414164543151855, "learning_rate": 4.783700336334481e-06, "loss": 0.4122, "step": 11182 }, { "epoch": 0.5186920222634508, "grad_norm": 6.782679080963135, "learning_rate": 4.782965084825542e-06, "loss": 0.2486, "step": 11183 }, { "epoch": 0.5187384044526901, "grad_norm": 8.367021560668945, "learning_rate": 4.78222983801855e-06, "loss": 0.3758, "step": 11184 }, { "epoch": 0.5187847866419295, "grad_norm": 4.746926307678223, "learning_rate": 4.781494595929434e-06, "loss": 0.3033, "step": 11185 }, { "epoch": 0.5188311688311689, "grad_norm": 5.119609355926514, "learning_rate": 4.780759358574125e-06, "loss": 0.3479, "step": 11186 }, { "epoch": 0.5188775510204081, "grad_norm": 5.3564043045043945, "learning_rate": 4.780024125968547e-06, "loss": 0.3543, "step": 11187 }, { "epoch": 0.5189239332096475, "grad_norm": 13.65581226348877, "learning_rate": 4.779288898128629e-06, "loss": 0.4064, "step": 11188 }, { "epoch": 0.5189703153988868, "grad_norm": 9.628053665161133, "learning_rate": 4.778553675070301e-06, "loss": 0.3148, "step": 11189 }, { "epoch": 0.5190166975881262, "grad_norm": 5.644028186798096, "learning_rate": 4.777818456809491e-06, "loss": 0.2825, "step": 11190 }, { "epoch": 0.5190630797773655, "grad_norm": 6.091836452484131, "learning_rate": 4.777083243362128e-06, "loss": 0.3501, "step": 11191 }, { "epoch": 0.5191094619666048, "grad_norm": 6.520181655883789, "learning_rate": 4.7763480347441395e-06, "loss": 0.278, "step": 11192 }, { "epoch": 0.5191558441558441, "grad_norm": 4.746337413787842, "learning_rate": 4.775612830971451e-06, "loss": 0.3312, "step": 11193 }, { "epoch": 0.5192022263450835, "grad_norm": 7.682112216949463, "learning_rate": 4.774877632059993e-06, "loss": 0.3484, "step": 11194 }, { "epoch": 0.5192486085343229, "grad_norm": 4.486934661865234, "learning_rate": 4.774142438025691e-06, "loss": 0.2842, "step": 11195 }, { "epoch": 0.5192949907235621, "grad_norm": 5.900598526000977, "learning_rate": 4.773407248884475e-06, "loss": 0.377, "step": 11196 }, { "epoch": 0.5193413729128015, "grad_norm": 11.694101333618164, "learning_rate": 4.772672064652273e-06, "loss": 0.445, "step": 11197 }, { "epoch": 0.5193877551020408, "grad_norm": 6.856278419494629, "learning_rate": 4.77193688534501e-06, "loss": 0.3954, "step": 11198 }, { "epoch": 0.5194341372912802, "grad_norm": 10.724647521972656, "learning_rate": 4.7712017109786125e-06, "loss": 0.3135, "step": 11199 }, { "epoch": 0.5194805194805194, "grad_norm": 9.231500625610352, "learning_rate": 4.77046654156901e-06, "loss": 0.3525, "step": 11200 }, { "epoch": 0.5195269016697588, "grad_norm": 7.777554512023926, "learning_rate": 4.769731377132129e-06, "loss": 0.2816, "step": 11201 }, { "epoch": 0.5195732838589981, "grad_norm": 6.927903175354004, "learning_rate": 4.768996217683896e-06, "loss": 0.2878, "step": 11202 }, { "epoch": 0.5196196660482375, "grad_norm": 14.342582702636719, "learning_rate": 4.76826106324024e-06, "loss": 0.3151, "step": 11203 }, { "epoch": 0.5196660482374769, "grad_norm": 13.007651329040527, "learning_rate": 4.7675259138170835e-06, "loss": 0.3969, "step": 11204 }, { "epoch": 0.5197124304267161, "grad_norm": 4.508031845092773, "learning_rate": 4.766790769430356e-06, "loss": 0.228, "step": 11205 }, { "epoch": 0.5197588126159555, "grad_norm": 7.1318206787109375, "learning_rate": 4.766055630095983e-06, "loss": 0.3705, "step": 11206 }, { "epoch": 0.5198051948051948, "grad_norm": 8.164022445678711, "learning_rate": 4.765320495829893e-06, "loss": 0.2885, "step": 11207 }, { "epoch": 0.5198515769944342, "grad_norm": 8.5134916305542, "learning_rate": 4.7645853666480104e-06, "loss": 0.3115, "step": 11208 }, { "epoch": 0.5198979591836734, "grad_norm": 13.723536491394043, "learning_rate": 4.763850242566261e-06, "loss": 0.3636, "step": 11209 }, { "epoch": 0.5199443413729128, "grad_norm": 10.376386642456055, "learning_rate": 4.763115123600571e-06, "loss": 0.3411, "step": 11210 }, { "epoch": 0.5199907235621521, "grad_norm": 4.93022346496582, "learning_rate": 4.762380009766867e-06, "loss": 0.2953, "step": 11211 }, { "epoch": 0.5200371057513915, "grad_norm": 5.768429756164551, "learning_rate": 4.761644901081076e-06, "loss": 0.3981, "step": 11212 }, { "epoch": 0.5200834879406308, "grad_norm": 4.174184322357178, "learning_rate": 4.760909797559121e-06, "loss": 0.3185, "step": 11213 }, { "epoch": 0.5201298701298701, "grad_norm": 6.918315410614014, "learning_rate": 4.7601746992169315e-06, "loss": 0.3919, "step": 11214 }, { "epoch": 0.5201762523191095, "grad_norm": 10.307353973388672, "learning_rate": 4.759439606070429e-06, "loss": 0.4456, "step": 11215 }, { "epoch": 0.5202226345083488, "grad_norm": 8.209185600280762, "learning_rate": 4.758704518135539e-06, "loss": 0.3445, "step": 11216 }, { "epoch": 0.5202690166975881, "grad_norm": 9.199116706848145, "learning_rate": 4.757969435428191e-06, "loss": 0.3097, "step": 11217 }, { "epoch": 0.5203153988868274, "grad_norm": 8.949066162109375, "learning_rate": 4.7572343579643055e-06, "loss": 0.4438, "step": 11218 }, { "epoch": 0.5203617810760668, "grad_norm": 9.253928184509277, "learning_rate": 4.756499285759812e-06, "loss": 0.2742, "step": 11219 }, { "epoch": 0.5204081632653061, "grad_norm": 5.07941198348999, "learning_rate": 4.755764218830632e-06, "loss": 0.2861, "step": 11220 }, { "epoch": 0.5204545454545455, "grad_norm": 6.837398052215576, "learning_rate": 4.75502915719269e-06, "loss": 0.4464, "step": 11221 }, { "epoch": 0.5205009276437847, "grad_norm": 4.297272205352783, "learning_rate": 4.7542941008619125e-06, "loss": 0.2314, "step": 11222 }, { "epoch": 0.5205473098330241, "grad_norm": 11.030235290527344, "learning_rate": 4.753559049854224e-06, "loss": 0.3823, "step": 11223 }, { "epoch": 0.5205936920222635, "grad_norm": 5.76345157623291, "learning_rate": 4.752824004185548e-06, "loss": 0.4376, "step": 11224 }, { "epoch": 0.5206400742115028, "grad_norm": 6.980015277862549, "learning_rate": 4.7520889638718126e-06, "loss": 0.2468, "step": 11225 }, { "epoch": 0.5206864564007421, "grad_norm": 8.324831008911133, "learning_rate": 4.7513539289289365e-06, "loss": 0.433, "step": 11226 }, { "epoch": 0.5207328385899814, "grad_norm": 10.427051544189453, "learning_rate": 4.750618899372847e-06, "loss": 0.4863, "step": 11227 }, { "epoch": 0.5207792207792208, "grad_norm": 8.323637962341309, "learning_rate": 4.749883875219466e-06, "loss": 0.3348, "step": 11228 }, { "epoch": 0.5208256029684601, "grad_norm": 5.648025989532471, "learning_rate": 4.749148856484721e-06, "loss": 0.3543, "step": 11229 }, { "epoch": 0.5208719851576994, "grad_norm": 6.46122407913208, "learning_rate": 4.748413843184534e-06, "loss": 0.368, "step": 11230 }, { "epoch": 0.5209183673469387, "grad_norm": 8.082855224609375, "learning_rate": 4.747678835334828e-06, "loss": 0.3764, "step": 11231 }, { "epoch": 0.5209647495361781, "grad_norm": 10.34762191772461, "learning_rate": 4.7469438329515255e-06, "loss": 0.4007, "step": 11232 }, { "epoch": 0.5210111317254175, "grad_norm": 5.192196846008301, "learning_rate": 4.746208836050552e-06, "loss": 0.3078, "step": 11233 }, { "epoch": 0.5210575139146568, "grad_norm": 9.366507530212402, "learning_rate": 4.7454738446478296e-06, "loss": 0.3508, "step": 11234 }, { "epoch": 0.5211038961038961, "grad_norm": 7.913295269012451, "learning_rate": 4.7447388587592835e-06, "loss": 0.3019, "step": 11235 }, { "epoch": 0.5211502782931354, "grad_norm": 9.262201309204102, "learning_rate": 4.744003878400836e-06, "loss": 0.43, "step": 11236 }, { "epoch": 0.5211966604823748, "grad_norm": 5.503742218017578, "learning_rate": 4.743268903588408e-06, "loss": 0.2969, "step": 11237 }, { "epoch": 0.5212430426716141, "grad_norm": 6.113919734954834, "learning_rate": 4.742533934337923e-06, "loss": 0.4131, "step": 11238 }, { "epoch": 0.5212894248608534, "grad_norm": 6.022350788116455, "learning_rate": 4.741798970665306e-06, "loss": 0.3812, "step": 11239 }, { "epoch": 0.5213358070500927, "grad_norm": 7.830813884735107, "learning_rate": 4.7410640125864785e-06, "loss": 0.2648, "step": 11240 }, { "epoch": 0.5213821892393321, "grad_norm": 7.166112899780273, "learning_rate": 4.740329060117362e-06, "loss": 0.3476, "step": 11241 }, { "epoch": 0.5214285714285715, "grad_norm": 7.914587020874023, "learning_rate": 4.739594113273878e-06, "loss": 0.3942, "step": 11242 }, { "epoch": 0.5214749536178107, "grad_norm": 4.5612969398498535, "learning_rate": 4.738859172071951e-06, "loss": 0.295, "step": 11243 }, { "epoch": 0.5215213358070501, "grad_norm": 9.856168746948242, "learning_rate": 4.738124236527503e-06, "loss": 0.4243, "step": 11244 }, { "epoch": 0.5215677179962894, "grad_norm": 10.347533226013184, "learning_rate": 4.737389306656456e-06, "loss": 0.3924, "step": 11245 }, { "epoch": 0.5216141001855288, "grad_norm": 11.356095314025879, "learning_rate": 4.73665438247473e-06, "loss": 0.3684, "step": 11246 }, { "epoch": 0.5216604823747681, "grad_norm": 6.160856246948242, "learning_rate": 4.7359194639982485e-06, "loss": 0.3763, "step": 11247 }, { "epoch": 0.5217068645640074, "grad_norm": 5.848038673400879, "learning_rate": 4.735184551242932e-06, "loss": 0.3673, "step": 11248 }, { "epoch": 0.5217532467532467, "grad_norm": 7.656598091125488, "learning_rate": 4.734449644224702e-06, "loss": 0.3067, "step": 11249 }, { "epoch": 0.5217996289424861, "grad_norm": 5.140279293060303, "learning_rate": 4.733714742959481e-06, "loss": 0.2982, "step": 11250 }, { "epoch": 0.5218460111317255, "grad_norm": 6.676870346069336, "learning_rate": 4.73297984746319e-06, "loss": 0.3176, "step": 11251 }, { "epoch": 0.5218923933209647, "grad_norm": 5.691915035247803, "learning_rate": 4.73224495775175e-06, "loss": 0.39, "step": 11252 }, { "epoch": 0.5219387755102041, "grad_norm": 5.225522518157959, "learning_rate": 4.731510073841081e-06, "loss": 0.3693, "step": 11253 }, { "epoch": 0.5219851576994434, "grad_norm": 8.352499961853027, "learning_rate": 4.730775195747105e-06, "loss": 0.4218, "step": 11254 }, { "epoch": 0.5220315398886828, "grad_norm": 9.125086784362793, "learning_rate": 4.730040323485742e-06, "loss": 0.3009, "step": 11255 }, { "epoch": 0.522077922077922, "grad_norm": 4.90213680267334, "learning_rate": 4.729305457072913e-06, "loss": 0.2893, "step": 11256 }, { "epoch": 0.5221243042671614, "grad_norm": 4.239329814910889, "learning_rate": 4.728570596524538e-06, "loss": 0.3313, "step": 11257 }, { "epoch": 0.5221706864564007, "grad_norm": 10.205581665039062, "learning_rate": 4.72783574185654e-06, "loss": 0.4372, "step": 11258 }, { "epoch": 0.5222170686456401, "grad_norm": 5.594981670379639, "learning_rate": 4.727100893084835e-06, "loss": 0.3421, "step": 11259 }, { "epoch": 0.5222634508348795, "grad_norm": 9.714555740356445, "learning_rate": 4.726366050225347e-06, "loss": 0.4257, "step": 11260 }, { "epoch": 0.5223098330241187, "grad_norm": 6.7704758644104, "learning_rate": 4.725631213293992e-06, "loss": 0.2115, "step": 11261 }, { "epoch": 0.5223562152133581, "grad_norm": 5.568168640136719, "learning_rate": 4.724896382306693e-06, "loss": 0.3205, "step": 11262 }, { "epoch": 0.5224025974025974, "grad_norm": 5.082559585571289, "learning_rate": 4.72416155727937e-06, "loss": 0.2929, "step": 11263 }, { "epoch": 0.5224489795918368, "grad_norm": 8.867253303527832, "learning_rate": 4.723426738227939e-06, "loss": 0.3829, "step": 11264 }, { "epoch": 0.522495361781076, "grad_norm": 9.893020629882812, "learning_rate": 4.722691925168324e-06, "loss": 0.4033, "step": 11265 }, { "epoch": 0.5225417439703154, "grad_norm": 5.763365268707275, "learning_rate": 4.721957118116441e-06, "loss": 0.3202, "step": 11266 }, { "epoch": 0.5225881261595547, "grad_norm": 9.879724502563477, "learning_rate": 4.72122231708821e-06, "loss": 0.3605, "step": 11267 }, { "epoch": 0.5226345083487941, "grad_norm": 3.6771585941314697, "learning_rate": 4.720487522099552e-06, "loss": 0.2488, "step": 11268 }, { "epoch": 0.5226808905380333, "grad_norm": 7.719079971313477, "learning_rate": 4.719752733166383e-06, "loss": 0.2346, "step": 11269 }, { "epoch": 0.5227272727272727, "grad_norm": 5.075811862945557, "learning_rate": 4.7190179503046245e-06, "loss": 0.2725, "step": 11270 }, { "epoch": 0.5227736549165121, "grad_norm": 5.061723709106445, "learning_rate": 4.7182831735301935e-06, "loss": 0.2636, "step": 11271 }, { "epoch": 0.5228200371057514, "grad_norm": 8.259871482849121, "learning_rate": 4.717548402859008e-06, "loss": 0.3119, "step": 11272 }, { "epoch": 0.5228664192949907, "grad_norm": 5.305237770080566, "learning_rate": 4.716813638306988e-06, "loss": 0.3286, "step": 11273 }, { "epoch": 0.52291280148423, "grad_norm": 4.913468360900879, "learning_rate": 4.716078879890052e-06, "loss": 0.3027, "step": 11274 }, { "epoch": 0.5229591836734694, "grad_norm": 7.7881269454956055, "learning_rate": 4.715344127624116e-06, "loss": 0.2838, "step": 11275 }, { "epoch": 0.5230055658627087, "grad_norm": 10.446605682373047, "learning_rate": 4.7146093815251e-06, "loss": 0.4953, "step": 11276 }, { "epoch": 0.5230519480519481, "grad_norm": 6.558069229125977, "learning_rate": 4.713874641608921e-06, "loss": 0.3844, "step": 11277 }, { "epoch": 0.5230983302411873, "grad_norm": 11.608528137207031, "learning_rate": 4.713139907891496e-06, "loss": 0.499, "step": 11278 }, { "epoch": 0.5231447124304267, "grad_norm": 7.145328998565674, "learning_rate": 4.7124051803887455e-06, "loss": 0.1862, "step": 11279 }, { "epoch": 0.5231910946196661, "grad_norm": 5.904412269592285, "learning_rate": 4.711670459116585e-06, "loss": 0.2965, "step": 11280 }, { "epoch": 0.5232374768089054, "grad_norm": 6.1967997550964355, "learning_rate": 4.71093574409093e-06, "loss": 0.2891, "step": 11281 }, { "epoch": 0.5232838589981447, "grad_norm": 11.50398063659668, "learning_rate": 4.710201035327701e-06, "loss": 0.4133, "step": 11282 }, { "epoch": 0.523330241187384, "grad_norm": 9.26080322265625, "learning_rate": 4.709466332842813e-06, "loss": 0.3274, "step": 11283 }, { "epoch": 0.5233766233766234, "grad_norm": 9.617079734802246, "learning_rate": 4.7087316366521835e-06, "loss": 0.4394, "step": 11284 }, { "epoch": 0.5234230055658627, "grad_norm": 10.248934745788574, "learning_rate": 4.707996946771732e-06, "loss": 0.2813, "step": 11285 }, { "epoch": 0.523469387755102, "grad_norm": 4.143904209136963, "learning_rate": 4.7072622632173705e-06, "loss": 0.3001, "step": 11286 }, { "epoch": 0.5235157699443413, "grad_norm": 7.392839431762695, "learning_rate": 4.7065275860050175e-06, "loss": 0.3978, "step": 11287 }, { "epoch": 0.5235621521335807, "grad_norm": 3.953824758529663, "learning_rate": 4.7057929151505895e-06, "loss": 0.2924, "step": 11288 }, { "epoch": 0.5236085343228201, "grad_norm": 5.799459457397461, "learning_rate": 4.705058250670002e-06, "loss": 0.267, "step": 11289 }, { "epoch": 0.5236549165120594, "grad_norm": 9.576833724975586, "learning_rate": 4.704323592579176e-06, "loss": 0.4168, "step": 11290 }, { "epoch": 0.5237012987012987, "grad_norm": 5.056286334991455, "learning_rate": 4.70358894089402e-06, "loss": 0.3262, "step": 11291 }, { "epoch": 0.523747680890538, "grad_norm": 4.229702472686768, "learning_rate": 4.702854295630454e-06, "loss": 0.3223, "step": 11292 }, { "epoch": 0.5237940630797774, "grad_norm": 8.447357177734375, "learning_rate": 4.702119656804392e-06, "loss": 0.3231, "step": 11293 }, { "epoch": 0.5238404452690167, "grad_norm": 5.450985431671143, "learning_rate": 4.701385024431751e-06, "loss": 0.3038, "step": 11294 }, { "epoch": 0.523886827458256, "grad_norm": 10.923563957214355, "learning_rate": 4.700650398528446e-06, "loss": 0.3354, "step": 11295 }, { "epoch": 0.5239332096474953, "grad_norm": 8.406569480895996, "learning_rate": 4.699915779110394e-06, "loss": 0.2753, "step": 11296 }, { "epoch": 0.5239795918367347, "grad_norm": 7.473621368408203, "learning_rate": 4.699181166193507e-06, "loss": 0.3719, "step": 11297 }, { "epoch": 0.5240259740259741, "grad_norm": 9.34131145477295, "learning_rate": 4.6984465597937e-06, "loss": 0.3452, "step": 11298 }, { "epoch": 0.5240723562152133, "grad_norm": 10.10732650756836, "learning_rate": 4.69771195992689e-06, "loss": 0.3044, "step": 11299 }, { "epoch": 0.5241187384044527, "grad_norm": 7.8826518058776855, "learning_rate": 4.696977366608992e-06, "loss": 0.4548, "step": 11300 }, { "epoch": 0.524165120593692, "grad_norm": 10.631486892700195, "learning_rate": 4.69624277985592e-06, "loss": 0.377, "step": 11301 }, { "epoch": 0.5242115027829314, "grad_norm": 7.663463115692139, "learning_rate": 4.695508199683587e-06, "loss": 0.5026, "step": 11302 }, { "epoch": 0.5242578849721707, "grad_norm": 8.755499839782715, "learning_rate": 4.694773626107907e-06, "loss": 0.4607, "step": 11303 }, { "epoch": 0.52430426716141, "grad_norm": 5.795032501220703, "learning_rate": 4.694039059144797e-06, "loss": 0.3215, "step": 11304 }, { "epoch": 0.5243506493506493, "grad_norm": 5.028707027435303, "learning_rate": 4.693304498810168e-06, "loss": 0.2461, "step": 11305 }, { "epoch": 0.5243970315398887, "grad_norm": 19.598922729492188, "learning_rate": 4.692569945119936e-06, "loss": 0.3216, "step": 11306 }, { "epoch": 0.5244434137291281, "grad_norm": 5.549128532409668, "learning_rate": 4.6918353980900155e-06, "loss": 0.3725, "step": 11307 }, { "epoch": 0.5244897959183673, "grad_norm": 4.611893177032471, "learning_rate": 4.691100857736317e-06, "loss": 0.3055, "step": 11308 }, { "epoch": 0.5245361781076067, "grad_norm": 7.277071475982666, "learning_rate": 4.690366324074755e-06, "loss": 0.3467, "step": 11309 }, { "epoch": 0.524582560296846, "grad_norm": 5.242237091064453, "learning_rate": 4.689631797121244e-06, "loss": 0.2959, "step": 11310 }, { "epoch": 0.5246289424860854, "grad_norm": 8.9474458694458, "learning_rate": 4.688897276891696e-06, "loss": 0.3825, "step": 11311 }, { "epoch": 0.5246753246753246, "grad_norm": 6.4597578048706055, "learning_rate": 4.688162763402027e-06, "loss": 0.3578, "step": 11312 }, { "epoch": 0.524721706864564, "grad_norm": 5.593569278717041, "learning_rate": 4.687428256668145e-06, "loss": 0.266, "step": 11313 }, { "epoch": 0.5247680890538033, "grad_norm": 8.977812767028809, "learning_rate": 4.686693756705965e-06, "loss": 0.4211, "step": 11314 }, { "epoch": 0.5248144712430427, "grad_norm": 5.311792373657227, "learning_rate": 4.685959263531399e-06, "loss": 0.2977, "step": 11315 }, { "epoch": 0.524860853432282, "grad_norm": 7.035727024078369, "learning_rate": 4.6852247771603595e-06, "loss": 0.3985, "step": 11316 }, { "epoch": 0.5249072356215213, "grad_norm": 5.361891269683838, "learning_rate": 4.68449029760876e-06, "loss": 0.2941, "step": 11317 }, { "epoch": 0.5249536178107607, "grad_norm": 5.018589973449707, "learning_rate": 4.683755824892513e-06, "loss": 0.3627, "step": 11318 }, { "epoch": 0.525, "grad_norm": 6.851678371429443, "learning_rate": 4.6830213590275285e-06, "loss": 0.4261, "step": 11319 }, { "epoch": 0.5250463821892394, "grad_norm": 6.3734612464904785, "learning_rate": 4.6822869000297185e-06, "loss": 0.2558, "step": 11320 }, { "epoch": 0.5250927643784786, "grad_norm": 10.37331485748291, "learning_rate": 4.681552447914995e-06, "loss": 0.3898, "step": 11321 }, { "epoch": 0.525139146567718, "grad_norm": 7.123495578765869, "learning_rate": 4.68081800269927e-06, "loss": 0.2745, "step": 11322 }, { "epoch": 0.5251855287569573, "grad_norm": 5.955526351928711, "learning_rate": 4.6800835643984566e-06, "loss": 0.3552, "step": 11323 }, { "epoch": 0.5252319109461967, "grad_norm": 13.012006759643555, "learning_rate": 4.679349133028462e-06, "loss": 0.5277, "step": 11324 }, { "epoch": 0.525278293135436, "grad_norm": 10.056920051574707, "learning_rate": 4.678614708605199e-06, "loss": 0.3493, "step": 11325 }, { "epoch": 0.5253246753246753, "grad_norm": 6.5694580078125, "learning_rate": 4.6778802911445795e-06, "loss": 0.3098, "step": 11326 }, { "epoch": 0.5253710575139147, "grad_norm": 4.7121052742004395, "learning_rate": 4.677145880662513e-06, "loss": 0.299, "step": 11327 }, { "epoch": 0.525417439703154, "grad_norm": 9.383885383605957, "learning_rate": 4.6764114771749104e-06, "loss": 0.3666, "step": 11328 }, { "epoch": 0.5254638218923933, "grad_norm": 8.071959495544434, "learning_rate": 4.675677080697686e-06, "loss": 0.3314, "step": 11329 }, { "epoch": 0.5255102040816326, "grad_norm": 4.684971809387207, "learning_rate": 4.674942691246743e-06, "loss": 0.2984, "step": 11330 }, { "epoch": 0.525556586270872, "grad_norm": 9.011134147644043, "learning_rate": 4.674208308837995e-06, "loss": 0.4336, "step": 11331 }, { "epoch": 0.5256029684601113, "grad_norm": 12.648370742797852, "learning_rate": 4.673473933487353e-06, "loss": 0.4089, "step": 11332 }, { "epoch": 0.5256493506493507, "grad_norm": 8.173735618591309, "learning_rate": 4.672739565210726e-06, "loss": 0.3358, "step": 11333 }, { "epoch": 0.52569573283859, "grad_norm": 5.94811487197876, "learning_rate": 4.672005204024026e-06, "loss": 0.3187, "step": 11334 }, { "epoch": 0.5257421150278293, "grad_norm": 5.353978633880615, "learning_rate": 4.671270849943158e-06, "loss": 0.2789, "step": 11335 }, { "epoch": 0.5257884972170687, "grad_norm": 4.87199068069458, "learning_rate": 4.670536502984033e-06, "loss": 0.2947, "step": 11336 }, { "epoch": 0.525834879406308, "grad_norm": 10.112371444702148, "learning_rate": 4.669802163162561e-06, "loss": 0.4199, "step": 11337 }, { "epoch": 0.5258812615955473, "grad_norm": 6.194569110870361, "learning_rate": 4.6690678304946515e-06, "loss": 0.3517, "step": 11338 }, { "epoch": 0.5259276437847866, "grad_norm": 5.6450700759887695, "learning_rate": 4.668333504996213e-06, "loss": 0.3029, "step": 11339 }, { "epoch": 0.525974025974026, "grad_norm": 9.131340026855469, "learning_rate": 4.667599186683156e-06, "loss": 0.3615, "step": 11340 }, { "epoch": 0.5260204081632653, "grad_norm": 4.940448760986328, "learning_rate": 4.6668648755713855e-06, "loss": 0.3629, "step": 11341 }, { "epoch": 0.5260667903525046, "grad_norm": 5.737760543823242, "learning_rate": 4.666130571676811e-06, "loss": 0.3402, "step": 11342 }, { "epoch": 0.526113172541744, "grad_norm": 11.565411567687988, "learning_rate": 4.665396275015342e-06, "loss": 0.3563, "step": 11343 }, { "epoch": 0.5261595547309833, "grad_norm": 5.671788215637207, "learning_rate": 4.664661985602886e-06, "loss": 0.4055, "step": 11344 }, { "epoch": 0.5262059369202227, "grad_norm": 6.115366458892822, "learning_rate": 4.663927703455354e-06, "loss": 0.3149, "step": 11345 }, { "epoch": 0.526252319109462, "grad_norm": 9.45545768737793, "learning_rate": 4.663193428588648e-06, "loss": 0.4332, "step": 11346 }, { "epoch": 0.5262987012987013, "grad_norm": 4.231679439544678, "learning_rate": 4.662459161018679e-06, "loss": 0.1548, "step": 11347 }, { "epoch": 0.5263450834879406, "grad_norm": 5.893855094909668, "learning_rate": 4.661724900761355e-06, "loss": 0.3622, "step": 11348 }, { "epoch": 0.52639146567718, "grad_norm": 11.094069480895996, "learning_rate": 4.660990647832582e-06, "loss": 0.3798, "step": 11349 }, { "epoch": 0.5264378478664193, "grad_norm": 6.941086292266846, "learning_rate": 4.66025640224827e-06, "loss": 0.3358, "step": 11350 }, { "epoch": 0.5264842300556586, "grad_norm": 11.382591247558594, "learning_rate": 4.659522164024321e-06, "loss": 0.3563, "step": 11351 }, { "epoch": 0.5265306122448979, "grad_norm": 12.372068405151367, "learning_rate": 4.6587879331766465e-06, "loss": 0.3945, "step": 11352 }, { "epoch": 0.5265769944341373, "grad_norm": 4.8268537521362305, "learning_rate": 4.6580537097211495e-06, "loss": 0.2864, "step": 11353 }, { "epoch": 0.5266233766233767, "grad_norm": 9.636872291564941, "learning_rate": 4.65731949367374e-06, "loss": 0.3452, "step": 11354 }, { "epoch": 0.5266697588126159, "grad_norm": 7.582803249359131, "learning_rate": 4.6565852850503226e-06, "loss": 0.3361, "step": 11355 }, { "epoch": 0.5267161410018553, "grad_norm": 6.666016578674316, "learning_rate": 4.6558510838668055e-06, "loss": 0.2989, "step": 11356 }, { "epoch": 0.5267625231910946, "grad_norm": 16.95670509338379, "learning_rate": 4.655116890139091e-06, "loss": 0.5633, "step": 11357 }, { "epoch": 0.526808905380334, "grad_norm": 11.323593139648438, "learning_rate": 4.654382703883087e-06, "loss": 0.4751, "step": 11358 }, { "epoch": 0.5268552875695733, "grad_norm": 7.993303298950195, "learning_rate": 4.6536485251147005e-06, "loss": 0.3799, "step": 11359 }, { "epoch": 0.5269016697588126, "grad_norm": 6.197962760925293, "learning_rate": 4.652914353849835e-06, "loss": 0.2732, "step": 11360 }, { "epoch": 0.5269480519480519, "grad_norm": 6.995621681213379, "learning_rate": 4.6521801901044e-06, "loss": 0.4335, "step": 11361 }, { "epoch": 0.5269944341372913, "grad_norm": 5.4906816482543945, "learning_rate": 4.6514460338942945e-06, "loss": 0.1957, "step": 11362 }, { "epoch": 0.5270408163265307, "grad_norm": 12.194058418273926, "learning_rate": 4.650711885235428e-06, "loss": 0.4545, "step": 11363 }, { "epoch": 0.5270871985157699, "grad_norm": 10.112933158874512, "learning_rate": 4.6499777441437035e-06, "loss": 0.41, "step": 11364 }, { "epoch": 0.5271335807050093, "grad_norm": 7.6184868812561035, "learning_rate": 4.6492436106350264e-06, "loss": 0.4117, "step": 11365 }, { "epoch": 0.5271799628942486, "grad_norm": 5.204394340515137, "learning_rate": 4.648509484725301e-06, "loss": 0.1478, "step": 11366 }, { "epoch": 0.527226345083488, "grad_norm": 8.348388671875, "learning_rate": 4.647775366430436e-06, "loss": 0.3688, "step": 11367 }, { "epoch": 0.5272727272727272, "grad_norm": 5.862185478210449, "learning_rate": 4.647041255766329e-06, "loss": 0.3038, "step": 11368 }, { "epoch": 0.5273191094619666, "grad_norm": 9.742901802062988, "learning_rate": 4.646307152748887e-06, "loss": 0.3885, "step": 11369 }, { "epoch": 0.5273654916512059, "grad_norm": 7.56704568862915, "learning_rate": 4.6455730573940135e-06, "loss": 0.3758, "step": 11370 }, { "epoch": 0.5274118738404453, "grad_norm": 7.863638877868652, "learning_rate": 4.644838969717613e-06, "loss": 0.3296, "step": 11371 }, { "epoch": 0.5274582560296845, "grad_norm": 7.442454814910889, "learning_rate": 4.644104889735592e-06, "loss": 0.4012, "step": 11372 }, { "epoch": 0.5275046382189239, "grad_norm": 4.784409999847412, "learning_rate": 4.643370817463848e-06, "loss": 0.3422, "step": 11373 }, { "epoch": 0.5275510204081633, "grad_norm": 7.032221794128418, "learning_rate": 4.642636752918287e-06, "loss": 0.3612, "step": 11374 }, { "epoch": 0.5275974025974026, "grad_norm": 4.567867279052734, "learning_rate": 4.641902696114812e-06, "loss": 0.2367, "step": 11375 }, { "epoch": 0.527643784786642, "grad_norm": 9.369637489318848, "learning_rate": 4.641168647069326e-06, "loss": 0.3965, "step": 11376 }, { "epoch": 0.5276901669758812, "grad_norm": 9.433588981628418, "learning_rate": 4.640434605797733e-06, "loss": 0.3099, "step": 11377 }, { "epoch": 0.5277365491651206, "grad_norm": 5.935145378112793, "learning_rate": 4.639700572315936e-06, "loss": 0.3087, "step": 11378 }, { "epoch": 0.5277829313543599, "grad_norm": 9.942688941955566, "learning_rate": 4.638966546639835e-06, "loss": 0.4273, "step": 11379 }, { "epoch": 0.5278293135435993, "grad_norm": 7.022232532501221, "learning_rate": 4.638232528785332e-06, "loss": 0.3264, "step": 11380 }, { "epoch": 0.5278756957328385, "grad_norm": 3.21990966796875, "learning_rate": 4.637498518768332e-06, "loss": 0.2646, "step": 11381 }, { "epoch": 0.5279220779220779, "grad_norm": 10.245906829833984, "learning_rate": 4.636764516604734e-06, "loss": 0.4891, "step": 11382 }, { "epoch": 0.5279684601113173, "grad_norm": 11.79719352722168, "learning_rate": 4.636030522310443e-06, "loss": 0.3936, "step": 11383 }, { "epoch": 0.5280148423005566, "grad_norm": 9.294723510742188, "learning_rate": 4.6352965359013576e-06, "loss": 0.4036, "step": 11384 }, { "epoch": 0.5280612244897959, "grad_norm": 8.222482681274414, "learning_rate": 4.63456255739338e-06, "loss": 0.3653, "step": 11385 }, { "epoch": 0.5281076066790352, "grad_norm": 12.399945259094238, "learning_rate": 4.633828586802412e-06, "loss": 0.3702, "step": 11386 }, { "epoch": 0.5281539888682746, "grad_norm": 6.518171310424805, "learning_rate": 4.6330946241443545e-06, "loss": 0.4166, "step": 11387 }, { "epoch": 0.5282003710575139, "grad_norm": 6.950176239013672, "learning_rate": 4.63236066943511e-06, "loss": 0.368, "step": 11388 }, { "epoch": 0.5282467532467533, "grad_norm": 6.550858020782471, "learning_rate": 4.631626722690577e-06, "loss": 0.2873, "step": 11389 }, { "epoch": 0.5282931354359925, "grad_norm": 7.476912021636963, "learning_rate": 4.6308927839266555e-06, "loss": 0.4435, "step": 11390 }, { "epoch": 0.5283395176252319, "grad_norm": 5.837035655975342, "learning_rate": 4.630158853159248e-06, "loss": 0.3306, "step": 11391 }, { "epoch": 0.5283858998144713, "grad_norm": 13.127070426940918, "learning_rate": 4.629424930404253e-06, "loss": 0.3878, "step": 11392 }, { "epoch": 0.5284322820037106, "grad_norm": 4.709054470062256, "learning_rate": 4.6286910156775725e-06, "loss": 0.3142, "step": 11393 }, { "epoch": 0.5284786641929499, "grad_norm": 4.520664691925049, "learning_rate": 4.6279571089951056e-06, "loss": 0.3409, "step": 11394 }, { "epoch": 0.5285250463821892, "grad_norm": 6.77787971496582, "learning_rate": 4.627223210372751e-06, "loss": 0.2878, "step": 11395 }, { "epoch": 0.5285714285714286, "grad_norm": 4.63456392288208, "learning_rate": 4.626489319826409e-06, "loss": 0.2677, "step": 11396 }, { "epoch": 0.5286178107606679, "grad_norm": 3.848907709121704, "learning_rate": 4.625755437371979e-06, "loss": 0.2564, "step": 11397 }, { "epoch": 0.5286641929499072, "grad_norm": 10.591673851013184, "learning_rate": 4.62502156302536e-06, "loss": 0.4284, "step": 11398 }, { "epoch": 0.5287105751391465, "grad_norm": 5.561920642852783, "learning_rate": 4.62428769680245e-06, "loss": 0.276, "step": 11399 }, { "epoch": 0.5287569573283859, "grad_norm": 8.499095916748047, "learning_rate": 4.623553838719151e-06, "loss": 0.3844, "step": 11400 }, { "epoch": 0.5288033395176253, "grad_norm": 4.500316143035889, "learning_rate": 4.6228199887913584e-06, "loss": 0.2741, "step": 11401 }, { "epoch": 0.5288497217068646, "grad_norm": 5.556673526763916, "learning_rate": 4.6220861470349715e-06, "loss": 0.3878, "step": 11402 }, { "epoch": 0.5288961038961039, "grad_norm": 4.292382717132568, "learning_rate": 4.62135231346589e-06, "loss": 0.2426, "step": 11403 }, { "epoch": 0.5289424860853432, "grad_norm": 4.9512739181518555, "learning_rate": 4.62061848810001e-06, "loss": 0.3013, "step": 11404 }, { "epoch": 0.5289888682745826, "grad_norm": 5.267258167266846, "learning_rate": 4.619884670953231e-06, "loss": 0.2822, "step": 11405 }, { "epoch": 0.5290352504638219, "grad_norm": 6.0096564292907715, "learning_rate": 4.619150862041449e-06, "loss": 0.3554, "step": 11406 }, { "epoch": 0.5290816326530612, "grad_norm": 9.16894817352295, "learning_rate": 4.618417061380563e-06, "loss": 0.4162, "step": 11407 }, { "epoch": 0.5291280148423005, "grad_norm": 5.767107963562012, "learning_rate": 4.617683268986471e-06, "loss": 0.3756, "step": 11408 }, { "epoch": 0.5291743970315399, "grad_norm": 7.954423427581787, "learning_rate": 4.616949484875068e-06, "loss": 0.4224, "step": 11409 }, { "epoch": 0.5292207792207793, "grad_norm": 6.074289321899414, "learning_rate": 4.616215709062254e-06, "loss": 0.3256, "step": 11410 }, { "epoch": 0.5292671614100185, "grad_norm": 10.897950172424316, "learning_rate": 4.615481941563924e-06, "loss": 0.4058, "step": 11411 }, { "epoch": 0.5293135435992579, "grad_norm": 8.135734558105469, "learning_rate": 4.614748182395973e-06, "loss": 0.3495, "step": 11412 }, { "epoch": 0.5293599257884972, "grad_norm": 7.240486145019531, "learning_rate": 4.614014431574302e-06, "loss": 0.3964, "step": 11413 }, { "epoch": 0.5294063079777366, "grad_norm": 3.646570920944214, "learning_rate": 4.613280689114803e-06, "loss": 0.2436, "step": 11414 }, { "epoch": 0.5294526901669759, "grad_norm": 6.16953706741333, "learning_rate": 4.612546955033374e-06, "loss": 0.309, "step": 11415 }, { "epoch": 0.5294990723562152, "grad_norm": 4.936371803283691, "learning_rate": 4.611813229345911e-06, "loss": 0.3653, "step": 11416 }, { "epoch": 0.5295454545454545, "grad_norm": 8.078665733337402, "learning_rate": 4.61107951206831e-06, "loss": 0.3147, "step": 11417 }, { "epoch": 0.5295918367346939, "grad_norm": 8.43624496459961, "learning_rate": 4.610345803216467e-06, "loss": 0.2725, "step": 11418 }, { "epoch": 0.5296382189239333, "grad_norm": 7.414863586425781, "learning_rate": 4.609612102806275e-06, "loss": 0.222, "step": 11419 }, { "epoch": 0.5296846011131725, "grad_norm": 4.105344772338867, "learning_rate": 4.608878410853632e-06, "loss": 0.276, "step": 11420 }, { "epoch": 0.5297309833024119, "grad_norm": 9.129900932312012, "learning_rate": 4.608144727374431e-06, "loss": 0.2881, "step": 11421 }, { "epoch": 0.5297773654916512, "grad_norm": 9.49295425415039, "learning_rate": 4.607411052384569e-06, "loss": 0.4402, "step": 11422 }, { "epoch": 0.5298237476808906, "grad_norm": 6.095017910003662, "learning_rate": 4.606677385899939e-06, "loss": 0.3515, "step": 11423 }, { "epoch": 0.5298701298701298, "grad_norm": 6.024087905883789, "learning_rate": 4.605943727936436e-06, "loss": 0.3205, "step": 11424 }, { "epoch": 0.5299165120593692, "grad_norm": 7.320588111877441, "learning_rate": 4.6052100785099535e-06, "loss": 0.3223, "step": 11425 }, { "epoch": 0.5299628942486085, "grad_norm": 6.097350597381592, "learning_rate": 4.6044764376363865e-06, "loss": 0.3321, "step": 11426 }, { "epoch": 0.5300092764378479, "grad_norm": 6.826016902923584, "learning_rate": 4.60374280533163e-06, "loss": 0.198, "step": 11427 }, { "epoch": 0.5300556586270871, "grad_norm": 9.841717720031738, "learning_rate": 4.603009181611577e-06, "loss": 0.3594, "step": 11428 }, { "epoch": 0.5301020408163265, "grad_norm": 4.9240946769714355, "learning_rate": 4.602275566492119e-06, "loss": 0.2681, "step": 11429 }, { "epoch": 0.5301484230055659, "grad_norm": 3.898885726928711, "learning_rate": 4.601541959989152e-06, "loss": 0.348, "step": 11430 }, { "epoch": 0.5301948051948052, "grad_norm": 7.001819610595703, "learning_rate": 4.600808362118567e-06, "loss": 0.3969, "step": 11431 }, { "epoch": 0.5302411873840446, "grad_norm": 7.3728227615356445, "learning_rate": 4.6000747728962606e-06, "loss": 0.3992, "step": 11432 }, { "epoch": 0.5302875695732838, "grad_norm": 8.616974830627441, "learning_rate": 4.5993411923381225e-06, "loss": 0.267, "step": 11433 }, { "epoch": 0.5303339517625232, "grad_norm": 4.533473968505859, "learning_rate": 4.598607620460045e-06, "loss": 0.2862, "step": 11434 }, { "epoch": 0.5303803339517625, "grad_norm": 11.452495574951172, "learning_rate": 4.597874057277922e-06, "loss": 0.2783, "step": 11435 }, { "epoch": 0.5304267161410019, "grad_norm": 7.8825602531433105, "learning_rate": 4.597140502807645e-06, "loss": 0.3466, "step": 11436 }, { "epoch": 0.5304730983302411, "grad_norm": 5.971795558929443, "learning_rate": 4.5964069570651075e-06, "loss": 0.2079, "step": 11437 }, { "epoch": 0.5305194805194805, "grad_norm": 7.634641647338867, "learning_rate": 4.595673420066202e-06, "loss": 0.231, "step": 11438 }, { "epoch": 0.5305658627087199, "grad_norm": 4.86898136138916, "learning_rate": 4.594939891826817e-06, "loss": 0.288, "step": 11439 }, { "epoch": 0.5306122448979592, "grad_norm": 12.35666561126709, "learning_rate": 4.594206372362845e-06, "loss": 0.5572, "step": 11440 }, { "epoch": 0.5306586270871985, "grad_norm": 8.119251251220703, "learning_rate": 4.5934728616901775e-06, "loss": 0.2977, "step": 11441 }, { "epoch": 0.5307050092764378, "grad_norm": 8.671103477478027, "learning_rate": 4.592739359824706e-06, "loss": 0.3306, "step": 11442 }, { "epoch": 0.5307513914656772, "grad_norm": 9.02955436706543, "learning_rate": 4.592005866782325e-06, "loss": 0.3961, "step": 11443 }, { "epoch": 0.5307977736549165, "grad_norm": 8.604580879211426, "learning_rate": 4.591272382578919e-06, "loss": 0.3187, "step": 11444 }, { "epoch": 0.5308441558441559, "grad_norm": 4.097945213317871, "learning_rate": 4.59053890723038e-06, "loss": 0.2952, "step": 11445 }, { "epoch": 0.5308905380333951, "grad_norm": 5.776023864746094, "learning_rate": 4.589805440752601e-06, "loss": 0.3599, "step": 11446 }, { "epoch": 0.5309369202226345, "grad_norm": 14.645354270935059, "learning_rate": 4.58907198316147e-06, "loss": 0.36, "step": 11447 }, { "epoch": 0.5309833024118739, "grad_norm": 10.024518013000488, "learning_rate": 4.588338534472878e-06, "loss": 0.3298, "step": 11448 }, { "epoch": 0.5310296846011132, "grad_norm": 7.689783096313477, "learning_rate": 4.587605094702717e-06, "loss": 0.3595, "step": 11449 }, { "epoch": 0.5310760667903525, "grad_norm": 6.246420860290527, "learning_rate": 4.586871663866872e-06, "loss": 0.2809, "step": 11450 }, { "epoch": 0.5311224489795918, "grad_norm": 13.707444190979004, "learning_rate": 4.586138241981234e-06, "loss": 0.5733, "step": 11451 }, { "epoch": 0.5311688311688312, "grad_norm": 5.93230676651001, "learning_rate": 4.585404829061693e-06, "loss": 0.282, "step": 11452 }, { "epoch": 0.5312152133580705, "grad_norm": 7.456127166748047, "learning_rate": 4.584671425124137e-06, "loss": 0.3879, "step": 11453 }, { "epoch": 0.5312615955473098, "grad_norm": 6.372276782989502, "learning_rate": 4.583938030184458e-06, "loss": 0.368, "step": 11454 }, { "epoch": 0.5313079777365491, "grad_norm": 6.600505828857422, "learning_rate": 4.583204644258541e-06, "loss": 0.3328, "step": 11455 }, { "epoch": 0.5313543599257885, "grad_norm": 5.735397815704346, "learning_rate": 4.582471267362274e-06, "loss": 0.4007, "step": 11456 }, { "epoch": 0.5314007421150279, "grad_norm": 8.676767349243164, "learning_rate": 4.581737899511548e-06, "loss": 0.4077, "step": 11457 }, { "epoch": 0.5314471243042672, "grad_norm": 7.208469867706299, "learning_rate": 4.5810045407222495e-06, "loss": 0.3151, "step": 11458 }, { "epoch": 0.5314935064935065, "grad_norm": 6.16398286819458, "learning_rate": 4.580271191010266e-06, "loss": 0.3226, "step": 11459 }, { "epoch": 0.5315398886827458, "grad_norm": 5.702208042144775, "learning_rate": 4.579537850391488e-06, "loss": 0.3071, "step": 11460 }, { "epoch": 0.5315862708719852, "grad_norm": 7.005751609802246, "learning_rate": 4.578804518881799e-06, "loss": 0.4151, "step": 11461 }, { "epoch": 0.5316326530612245, "grad_norm": 5.794546604156494, "learning_rate": 4.5780711964970884e-06, "loss": 0.2886, "step": 11462 }, { "epoch": 0.5316790352504638, "grad_norm": 5.521021842956543, "learning_rate": 4.577337883253241e-06, "loss": 0.3911, "step": 11463 }, { "epoch": 0.5317254174397031, "grad_norm": 4.644576072692871, "learning_rate": 4.576604579166147e-06, "loss": 0.2618, "step": 11464 }, { "epoch": 0.5317717996289425, "grad_norm": 8.168082237243652, "learning_rate": 4.575871284251694e-06, "loss": 0.342, "step": 11465 }, { "epoch": 0.5318181818181819, "grad_norm": 10.036723136901855, "learning_rate": 4.575137998525762e-06, "loss": 0.371, "step": 11466 }, { "epoch": 0.5318645640074211, "grad_norm": 5.5530900955200195, "learning_rate": 4.574404722004243e-06, "loss": 0.2952, "step": 11467 }, { "epoch": 0.5319109461966605, "grad_norm": 7.10026741027832, "learning_rate": 4.57367145470302e-06, "loss": 0.3224, "step": 11468 }, { "epoch": 0.5319573283858998, "grad_norm": 5.548215866088867, "learning_rate": 4.572938196637981e-06, "loss": 0.3316, "step": 11469 }, { "epoch": 0.5320037105751392, "grad_norm": 10.290980339050293, "learning_rate": 4.57220494782501e-06, "loss": 0.3899, "step": 11470 }, { "epoch": 0.5320500927643784, "grad_norm": 3.4026777744293213, "learning_rate": 4.571471708279996e-06, "loss": 0.2896, "step": 11471 }, { "epoch": 0.5320964749536178, "grad_norm": 10.192174911499023, "learning_rate": 4.570738478018819e-06, "loss": 0.4211, "step": 11472 }, { "epoch": 0.5321428571428571, "grad_norm": 4.020881652832031, "learning_rate": 4.570005257057366e-06, "loss": 0.2323, "step": 11473 }, { "epoch": 0.5321892393320965, "grad_norm": 3.5608623027801514, "learning_rate": 4.569272045411522e-06, "loss": 0.3214, "step": 11474 }, { "epoch": 0.5322356215213359, "grad_norm": 13.408358573913574, "learning_rate": 4.568538843097173e-06, "loss": 0.368, "step": 11475 }, { "epoch": 0.5322820037105751, "grad_norm": 5.5098958015441895, "learning_rate": 4.567805650130204e-06, "loss": 0.3173, "step": 11476 }, { "epoch": 0.5323283858998145, "grad_norm": 6.1766228675842285, "learning_rate": 4.567072466526495e-06, "loss": 0.3055, "step": 11477 }, { "epoch": 0.5323747680890538, "grad_norm": 6.778407096862793, "learning_rate": 4.566339292301934e-06, "loss": 0.3863, "step": 11478 }, { "epoch": 0.5324211502782932, "grad_norm": 4.625077247619629, "learning_rate": 4.565606127472403e-06, "loss": 0.2899, "step": 11479 }, { "epoch": 0.5324675324675324, "grad_norm": 3.5787203311920166, "learning_rate": 4.564872972053786e-06, "loss": 0.2846, "step": 11480 }, { "epoch": 0.5325139146567718, "grad_norm": 7.396979331970215, "learning_rate": 4.564139826061966e-06, "loss": 0.3142, "step": 11481 }, { "epoch": 0.5325602968460111, "grad_norm": 47.51814651489258, "learning_rate": 4.5634066895128296e-06, "loss": 0.6368, "step": 11482 }, { "epoch": 0.5326066790352505, "grad_norm": 6.189911842346191, "learning_rate": 4.562673562422254e-06, "loss": 0.3469, "step": 11483 }, { "epoch": 0.5326530612244897, "grad_norm": 6.434017658233643, "learning_rate": 4.5619404448061256e-06, "loss": 0.3453, "step": 11484 }, { "epoch": 0.5326994434137291, "grad_norm": 7.927070140838623, "learning_rate": 4.561207336680327e-06, "loss": 0.3663, "step": 11485 }, { "epoch": 0.5327458256029685, "grad_norm": 10.06368637084961, "learning_rate": 4.56047423806074e-06, "loss": 0.4386, "step": 11486 }, { "epoch": 0.5327922077922078, "grad_norm": 4.177414417266846, "learning_rate": 4.559741148963247e-06, "loss": 0.2988, "step": 11487 }, { "epoch": 0.5328385899814472, "grad_norm": 5.753730773925781, "learning_rate": 4.559008069403729e-06, "loss": 0.3495, "step": 11488 }, { "epoch": 0.5328849721706864, "grad_norm": 4.887124061584473, "learning_rate": 4.558274999398069e-06, "loss": 0.3955, "step": 11489 }, { "epoch": 0.5329313543599258, "grad_norm": 13.025373458862305, "learning_rate": 4.5575419389621476e-06, "loss": 0.5512, "step": 11490 }, { "epoch": 0.5329777365491651, "grad_norm": 16.951202392578125, "learning_rate": 4.556808888111847e-06, "loss": 0.4346, "step": 11491 }, { "epoch": 0.5330241187384045, "grad_norm": 11.275769233703613, "learning_rate": 4.556075846863048e-06, "loss": 0.475, "step": 11492 }, { "epoch": 0.5330705009276437, "grad_norm": 8.198712348937988, "learning_rate": 4.555342815231634e-06, "loss": 0.2947, "step": 11493 }, { "epoch": 0.5331168831168831, "grad_norm": 9.988476753234863, "learning_rate": 4.55460979323348e-06, "loss": 0.3462, "step": 11494 }, { "epoch": 0.5331632653061225, "grad_norm": 8.24510669708252, "learning_rate": 4.5538767808844716e-06, "loss": 0.4091, "step": 11495 }, { "epoch": 0.5332096474953618, "grad_norm": 5.754305839538574, "learning_rate": 4.553143778200486e-06, "loss": 0.353, "step": 11496 }, { "epoch": 0.5332560296846011, "grad_norm": 5.941777229309082, "learning_rate": 4.5524107851974056e-06, "loss": 0.3332, "step": 11497 }, { "epoch": 0.5333024118738404, "grad_norm": 3.8781700134277344, "learning_rate": 4.551677801891112e-06, "loss": 0.2643, "step": 11498 }, { "epoch": 0.5333487940630798, "grad_norm": 5.022068977355957, "learning_rate": 4.550944828297479e-06, "loss": 0.3671, "step": 11499 }, { "epoch": 0.5333951762523191, "grad_norm": 5.079200267791748, "learning_rate": 4.550211864432391e-06, "loss": 0.2371, "step": 11500 }, { "epoch": 0.5334415584415585, "grad_norm": 4.221078872680664, "learning_rate": 4.549478910311724e-06, "loss": 0.2505, "step": 11501 }, { "epoch": 0.5334879406307977, "grad_norm": 10.68259048461914, "learning_rate": 4.54874596595136e-06, "loss": 0.5219, "step": 11502 }, { "epoch": 0.5335343228200371, "grad_norm": 9.207058906555176, "learning_rate": 4.548013031367176e-06, "loss": 0.2869, "step": 11503 }, { "epoch": 0.5335807050092765, "grad_norm": 9.4950532913208, "learning_rate": 4.547280106575054e-06, "loss": 0.5259, "step": 11504 }, { "epoch": 0.5336270871985158, "grad_norm": 9.520333290100098, "learning_rate": 4.5465471915908675e-06, "loss": 0.482, "step": 11505 }, { "epoch": 0.5336734693877551, "grad_norm": 6.5665388107299805, "learning_rate": 4.545814286430496e-06, "loss": 0.3297, "step": 11506 }, { "epoch": 0.5337198515769944, "grad_norm": 13.811695098876953, "learning_rate": 4.545081391109819e-06, "loss": 0.4406, "step": 11507 }, { "epoch": 0.5337662337662338, "grad_norm": 5.487722873687744, "learning_rate": 4.544348505644714e-06, "loss": 0.214, "step": 11508 }, { "epoch": 0.5338126159554731, "grad_norm": 6.174779415130615, "learning_rate": 4.54361563005106e-06, "loss": 0.3545, "step": 11509 }, { "epoch": 0.5338589981447124, "grad_norm": 6.337594032287598, "learning_rate": 4.542882764344731e-06, "loss": 0.3883, "step": 11510 }, { "epoch": 0.5339053803339517, "grad_norm": 4.470818519592285, "learning_rate": 4.542149908541605e-06, "loss": 0.328, "step": 11511 }, { "epoch": 0.5339517625231911, "grad_norm": 5.234076023101807, "learning_rate": 4.541417062657561e-06, "loss": 0.3341, "step": 11512 }, { "epoch": 0.5339981447124305, "grad_norm": 4.822751045227051, "learning_rate": 4.540684226708473e-06, "loss": 0.3749, "step": 11513 }, { "epoch": 0.5340445269016698, "grad_norm": 8.716106414794922, "learning_rate": 4.539951400710222e-06, "loss": 0.43, "step": 11514 }, { "epoch": 0.5340909090909091, "grad_norm": 13.364599227905273, "learning_rate": 4.539218584678679e-06, "loss": 0.3617, "step": 11515 }, { "epoch": 0.5341372912801484, "grad_norm": 5.020819187164307, "learning_rate": 4.538485778629723e-06, "loss": 0.3472, "step": 11516 }, { "epoch": 0.5341836734693878, "grad_norm": 4.614786148071289, "learning_rate": 4.537752982579228e-06, "loss": 0.2966, "step": 11517 }, { "epoch": 0.5342300556586271, "grad_norm": 4.8124237060546875, "learning_rate": 4.537020196543072e-06, "loss": 0.2981, "step": 11518 }, { "epoch": 0.5342764378478664, "grad_norm": 5.118951797485352, "learning_rate": 4.536287420537128e-06, "loss": 0.3274, "step": 11519 }, { "epoch": 0.5343228200371057, "grad_norm": 5.445694446563721, "learning_rate": 4.535554654577275e-06, "loss": 0.3183, "step": 11520 }, { "epoch": 0.5343692022263451, "grad_norm": 13.970538139343262, "learning_rate": 4.534821898679384e-06, "loss": 0.3655, "step": 11521 }, { "epoch": 0.5344155844155845, "grad_norm": 5.105695724487305, "learning_rate": 4.53408915285933e-06, "loss": 0.2588, "step": 11522 }, { "epoch": 0.5344619666048237, "grad_norm": 11.255090713500977, "learning_rate": 4.5333564171329895e-06, "loss": 0.3703, "step": 11523 }, { "epoch": 0.5345083487940631, "grad_norm": 3.8546366691589355, "learning_rate": 4.532623691516236e-06, "loss": 0.3042, "step": 11524 }, { "epoch": 0.5345547309833024, "grad_norm": 3.265059471130371, "learning_rate": 4.531890976024946e-06, "loss": 0.1499, "step": 11525 }, { "epoch": 0.5346011131725418, "grad_norm": 7.877665996551514, "learning_rate": 4.531158270674989e-06, "loss": 0.3422, "step": 11526 }, { "epoch": 0.534647495361781, "grad_norm": 7.083466053009033, "learning_rate": 4.53042557548224e-06, "loss": 0.3318, "step": 11527 }, { "epoch": 0.5346938775510204, "grad_norm": 8.418011665344238, "learning_rate": 4.529692890462574e-06, "loss": 0.3855, "step": 11528 }, { "epoch": 0.5347402597402597, "grad_norm": 8.23547649383545, "learning_rate": 4.528960215631863e-06, "loss": 0.415, "step": 11529 }, { "epoch": 0.5347866419294991, "grad_norm": 4.777781963348389, "learning_rate": 4.528227551005982e-06, "loss": 0.323, "step": 11530 }, { "epoch": 0.5348330241187385, "grad_norm": 7.312155723571777, "learning_rate": 4.5274948966008026e-06, "loss": 0.4421, "step": 11531 }, { "epoch": 0.5348794063079777, "grad_norm": 4.988697052001953, "learning_rate": 4.5267622524321955e-06, "loss": 0.1568, "step": 11532 }, { "epoch": 0.5349257884972171, "grad_norm": 9.81054973602295, "learning_rate": 4.5260296185160345e-06, "loss": 0.3248, "step": 11533 }, { "epoch": 0.5349721706864564, "grad_norm": 8.00184440612793, "learning_rate": 4.525296994868192e-06, "loss": 0.4052, "step": 11534 }, { "epoch": 0.5350185528756958, "grad_norm": 5.870844841003418, "learning_rate": 4.52456438150454e-06, "loss": 0.3421, "step": 11535 }, { "epoch": 0.535064935064935, "grad_norm": 9.981466293334961, "learning_rate": 4.5238317784409515e-06, "loss": 0.3643, "step": 11536 }, { "epoch": 0.5351113172541744, "grad_norm": 5.012681007385254, "learning_rate": 4.523099185693294e-06, "loss": 0.3122, "step": 11537 }, { "epoch": 0.5351576994434137, "grad_norm": 5.018214702606201, "learning_rate": 4.522366603277442e-06, "loss": 0.3291, "step": 11538 }, { "epoch": 0.5352040816326531, "grad_norm": 4.582148551940918, "learning_rate": 4.521634031209264e-06, "loss": 0.2658, "step": 11539 }, { "epoch": 0.5352504638218923, "grad_norm": 5.838944911956787, "learning_rate": 4.520901469504635e-06, "loss": 0.2395, "step": 11540 }, { "epoch": 0.5352968460111317, "grad_norm": 4.0354533195495605, "learning_rate": 4.5201689181794215e-06, "loss": 0.3327, "step": 11541 }, { "epoch": 0.5353432282003711, "grad_norm": 10.558032989501953, "learning_rate": 4.5194363772494965e-06, "loss": 0.4596, "step": 11542 }, { "epoch": 0.5353896103896104, "grad_norm": 24.771194458007812, "learning_rate": 4.518703846730728e-06, "loss": 0.399, "step": 11543 }, { "epoch": 0.5354359925788498, "grad_norm": 6.818282127380371, "learning_rate": 4.5179713266389866e-06, "loss": 0.2431, "step": 11544 }, { "epoch": 0.535482374768089, "grad_norm": 7.2781081199646, "learning_rate": 4.5172388169901425e-06, "loss": 0.386, "step": 11545 }, { "epoch": 0.5355287569573284, "grad_norm": 7.82642126083374, "learning_rate": 4.516506317800065e-06, "loss": 0.3306, "step": 11546 }, { "epoch": 0.5355751391465677, "grad_norm": 6.671266555786133, "learning_rate": 4.5157738290846235e-06, "loss": 0.3627, "step": 11547 }, { "epoch": 0.5356215213358071, "grad_norm": 4.777729034423828, "learning_rate": 4.515041350859685e-06, "loss": 0.2078, "step": 11548 }, { "epoch": 0.5356679035250463, "grad_norm": 6.330145835876465, "learning_rate": 4.514308883141121e-06, "loss": 0.3236, "step": 11549 }, { "epoch": 0.5357142857142857, "grad_norm": 3.8101887702941895, "learning_rate": 4.513576425944799e-06, "loss": 0.3338, "step": 11550 }, { "epoch": 0.5357606679035251, "grad_norm": 11.826919555664062, "learning_rate": 4.512843979286587e-06, "loss": 0.3632, "step": 11551 }, { "epoch": 0.5358070500927644, "grad_norm": 7.462097644805908, "learning_rate": 4.512111543182352e-06, "loss": 0.392, "step": 11552 }, { "epoch": 0.5358534322820037, "grad_norm": 10.027626037597656, "learning_rate": 4.511379117647965e-06, "loss": 0.3815, "step": 11553 }, { "epoch": 0.535899814471243, "grad_norm": 7.124020576477051, "learning_rate": 4.510646702699291e-06, "loss": 0.3884, "step": 11554 }, { "epoch": 0.5359461966604824, "grad_norm": 7.344118118286133, "learning_rate": 4.509914298352197e-06, "loss": 0.2725, "step": 11555 }, { "epoch": 0.5359925788497217, "grad_norm": 8.028446197509766, "learning_rate": 4.5091819046225535e-06, "loss": 0.3885, "step": 11556 }, { "epoch": 0.5360389610389611, "grad_norm": 5.718881607055664, "learning_rate": 4.508449521526223e-06, "loss": 0.2516, "step": 11557 }, { "epoch": 0.5360853432282003, "grad_norm": 9.4295015335083, "learning_rate": 4.507717149079076e-06, "loss": 0.437, "step": 11558 }, { "epoch": 0.5361317254174397, "grad_norm": 5.679393768310547, "learning_rate": 4.506984787296976e-06, "loss": 0.2667, "step": 11559 }, { "epoch": 0.536178107606679, "grad_norm": 5.23067045211792, "learning_rate": 4.50625243619579e-06, "loss": 0.2767, "step": 11560 }, { "epoch": 0.5362244897959184, "grad_norm": 13.584991455078125, "learning_rate": 4.505520095791387e-06, "loss": 0.4237, "step": 11561 }, { "epoch": 0.5362708719851577, "grad_norm": 8.78115463256836, "learning_rate": 4.504787766099628e-06, "loss": 0.3606, "step": 11562 }, { "epoch": 0.536317254174397, "grad_norm": 7.42584228515625, "learning_rate": 4.504055447136382e-06, "loss": 0.4044, "step": 11563 }, { "epoch": 0.5363636363636364, "grad_norm": 7.611568927764893, "learning_rate": 4.503323138917513e-06, "loss": 0.3279, "step": 11564 }, { "epoch": 0.5364100185528757, "grad_norm": 4.333650588989258, "learning_rate": 4.502590841458886e-06, "loss": 0.2631, "step": 11565 }, { "epoch": 0.536456400742115, "grad_norm": 16.39525604248047, "learning_rate": 4.501858554776367e-06, "loss": 0.4944, "step": 11566 }, { "epoch": 0.5365027829313543, "grad_norm": 8.79653263092041, "learning_rate": 4.501126278885818e-06, "loss": 0.4193, "step": 11567 }, { "epoch": 0.5365491651205937, "grad_norm": 7.745771884918213, "learning_rate": 4.500394013803106e-06, "loss": 0.332, "step": 11568 }, { "epoch": 0.536595547309833, "grad_norm": 5.409905910491943, "learning_rate": 4.499661759544095e-06, "loss": 0.1389, "step": 11569 }, { "epoch": 0.5366419294990723, "grad_norm": 10.857666969299316, "learning_rate": 4.498929516124647e-06, "loss": 0.3909, "step": 11570 }, { "epoch": 0.5366883116883117, "grad_norm": 5.82893180847168, "learning_rate": 4.4981972835606265e-06, "loss": 0.3208, "step": 11571 }, { "epoch": 0.536734693877551, "grad_norm": 8.736326217651367, "learning_rate": 4.497465061867898e-06, "loss": 0.2606, "step": 11572 }, { "epoch": 0.5367810760667904, "grad_norm": 6.77497673034668, "learning_rate": 4.496732851062323e-06, "loss": 0.3591, "step": 11573 }, { "epoch": 0.5368274582560297, "grad_norm": 11.619986534118652, "learning_rate": 4.496000651159765e-06, "loss": 0.4719, "step": 11574 }, { "epoch": 0.536873840445269, "grad_norm": 9.198591232299805, "learning_rate": 4.495268462176089e-06, "loss": 0.3951, "step": 11575 }, { "epoch": 0.5369202226345083, "grad_norm": 9.1116361618042, "learning_rate": 4.494536284127155e-06, "loss": 0.3533, "step": 11576 }, { "epoch": 0.5369666048237477, "grad_norm": 5.6669087409973145, "learning_rate": 4.493804117028825e-06, "loss": 0.4035, "step": 11577 }, { "epoch": 0.537012987012987, "grad_norm": 7.847836971282959, "learning_rate": 4.4930719608969615e-06, "loss": 0.3523, "step": 11578 }, { "epoch": 0.5370593692022263, "grad_norm": 4.60811710357666, "learning_rate": 4.492339815747426e-06, "loss": 0.2148, "step": 11579 }, { "epoch": 0.5371057513914657, "grad_norm": 12.993103981018066, "learning_rate": 4.491607681596083e-06, "loss": 0.4368, "step": 11580 }, { "epoch": 0.537152133580705, "grad_norm": 16.79097557067871, "learning_rate": 4.490875558458791e-06, "loss": 0.5868, "step": 11581 }, { "epoch": 0.5371985157699444, "grad_norm": 7.327170372009277, "learning_rate": 4.49014344635141e-06, "loss": 0.3249, "step": 11582 }, { "epoch": 0.5372448979591836, "grad_norm": 8.697405815124512, "learning_rate": 4.489411345289803e-06, "loss": 0.3115, "step": 11583 }, { "epoch": 0.537291280148423, "grad_norm": 7.856729030609131, "learning_rate": 4.488679255289829e-06, "loss": 0.373, "step": 11584 }, { "epoch": 0.5373376623376623, "grad_norm": 5.585690498352051, "learning_rate": 4.48794717636735e-06, "loss": 0.2152, "step": 11585 }, { "epoch": 0.5373840445269017, "grad_norm": 5.103446006774902, "learning_rate": 4.487215108538227e-06, "loss": 0.335, "step": 11586 }, { "epoch": 0.537430426716141, "grad_norm": 9.242181777954102, "learning_rate": 4.486483051818317e-06, "loss": 0.4099, "step": 11587 }, { "epoch": 0.5374768089053803, "grad_norm": 7.09694242477417, "learning_rate": 4.485751006223479e-06, "loss": 0.3713, "step": 11588 }, { "epoch": 0.5375231910946197, "grad_norm": 6.957326889038086, "learning_rate": 4.485018971769576e-06, "loss": 0.353, "step": 11589 }, { "epoch": 0.537569573283859, "grad_norm": 4.6035990715026855, "learning_rate": 4.484286948472464e-06, "loss": 0.2555, "step": 11590 }, { "epoch": 0.5376159554730984, "grad_norm": 6.377965927124023, "learning_rate": 4.483554936348007e-06, "loss": 0.3898, "step": 11591 }, { "epoch": 0.5376623376623376, "grad_norm": 6.71108865737915, "learning_rate": 4.4828229354120565e-06, "loss": 0.3245, "step": 11592 }, { "epoch": 0.537708719851577, "grad_norm": 7.037043571472168, "learning_rate": 4.482090945680474e-06, "loss": 0.3072, "step": 11593 }, { "epoch": 0.5377551020408163, "grad_norm": 9.170878410339355, "learning_rate": 4.481358967169119e-06, "loss": 0.3533, "step": 11594 }, { "epoch": 0.5378014842300557, "grad_norm": 7.647291660308838, "learning_rate": 4.480626999893849e-06, "loss": 0.2535, "step": 11595 }, { "epoch": 0.5378478664192949, "grad_norm": 4.830093860626221, "learning_rate": 4.4798950438705195e-06, "loss": 0.2542, "step": 11596 }, { "epoch": 0.5378942486085343, "grad_norm": 5.137953758239746, "learning_rate": 4.479163099114993e-06, "loss": 0.3244, "step": 11597 }, { "epoch": 0.5379406307977737, "grad_norm": 8.926156997680664, "learning_rate": 4.4784311656431196e-06, "loss": 0.3692, "step": 11598 }, { "epoch": 0.537987012987013, "grad_norm": 5.672416687011719, "learning_rate": 4.477699243470761e-06, "loss": 0.3374, "step": 11599 }, { "epoch": 0.5380333951762524, "grad_norm": 11.514223098754883, "learning_rate": 4.476967332613772e-06, "loss": 0.3305, "step": 11600 }, { "epoch": 0.5380797773654916, "grad_norm": 5.032078266143799, "learning_rate": 4.476235433088011e-06, "loss": 0.2436, "step": 11601 }, { "epoch": 0.538126159554731, "grad_norm": 7.600808143615723, "learning_rate": 4.475503544909335e-06, "loss": 0.3433, "step": 11602 }, { "epoch": 0.5381725417439703, "grad_norm": 10.73292064666748, "learning_rate": 4.474771668093595e-06, "loss": 0.4291, "step": 11603 }, { "epoch": 0.5382189239332097, "grad_norm": 7.351929187774658, "learning_rate": 4.4740398026566505e-06, "loss": 0.3067, "step": 11604 }, { "epoch": 0.5382653061224489, "grad_norm": 6.322714328765869, "learning_rate": 4.473307948614356e-06, "loss": 0.3564, "step": 11605 }, { "epoch": 0.5383116883116883, "grad_norm": 8.05502700805664, "learning_rate": 4.472576105982567e-06, "loss": 0.2158, "step": 11606 }, { "epoch": 0.5383580705009277, "grad_norm": 12.276368141174316, "learning_rate": 4.471844274777141e-06, "loss": 0.4747, "step": 11607 }, { "epoch": 0.538404452690167, "grad_norm": 7.121078968048096, "learning_rate": 4.471112455013928e-06, "loss": 0.3403, "step": 11608 }, { "epoch": 0.5384508348794063, "grad_norm": 5.065573692321777, "learning_rate": 4.470380646708784e-06, "loss": 0.3618, "step": 11609 }, { "epoch": 0.5384972170686456, "grad_norm": 6.668463706970215, "learning_rate": 4.469648849877565e-06, "loss": 0.2717, "step": 11610 }, { "epoch": 0.538543599257885, "grad_norm": 10.476202011108398, "learning_rate": 4.468917064536123e-06, "loss": 0.4018, "step": 11611 }, { "epoch": 0.5385899814471243, "grad_norm": 7.816845417022705, "learning_rate": 4.468185290700313e-06, "loss": 0.4287, "step": 11612 }, { "epoch": 0.5386363636363637, "grad_norm": 4.338938236236572, "learning_rate": 4.46745352838599e-06, "loss": 0.3468, "step": 11613 }, { "epoch": 0.5386827458256029, "grad_norm": 6.125213146209717, "learning_rate": 4.4667217776090045e-06, "loss": 0.3148, "step": 11614 }, { "epoch": 0.5387291280148423, "grad_norm": 4.641346454620361, "learning_rate": 4.465990038385209e-06, "loss": 0.2437, "step": 11615 }, { "epoch": 0.5387755102040817, "grad_norm": 4.955599308013916, "learning_rate": 4.465258310730458e-06, "loss": 0.237, "step": 11616 }, { "epoch": 0.538821892393321, "grad_norm": 9.800522804260254, "learning_rate": 4.464526594660604e-06, "loss": 0.361, "step": 11617 }, { "epoch": 0.5388682745825603, "grad_norm": 6.863806247711182, "learning_rate": 4.463794890191501e-06, "loss": 0.317, "step": 11618 }, { "epoch": 0.5389146567717996, "grad_norm": 7.102696895599365, "learning_rate": 4.463063197338997e-06, "loss": 0.3685, "step": 11619 }, { "epoch": 0.538961038961039, "grad_norm": 7.123597621917725, "learning_rate": 4.4623315161189455e-06, "loss": 0.3307, "step": 11620 }, { "epoch": 0.5390074211502783, "grad_norm": 6.43215274810791, "learning_rate": 4.461599846547198e-06, "loss": 0.3046, "step": 11621 }, { "epoch": 0.5390538033395176, "grad_norm": 10.425183296203613, "learning_rate": 4.460868188639606e-06, "loss": 0.3474, "step": 11622 }, { "epoch": 0.5391001855287569, "grad_norm": 7.871120929718018, "learning_rate": 4.460136542412019e-06, "loss": 0.3623, "step": 11623 }, { "epoch": 0.5391465677179963, "grad_norm": 14.434967041015625, "learning_rate": 4.459404907880293e-06, "loss": 0.4308, "step": 11624 }, { "epoch": 0.5391929499072357, "grad_norm": 8.058944702148438, "learning_rate": 4.458673285060271e-06, "loss": 0.3817, "step": 11625 }, { "epoch": 0.5392393320964749, "grad_norm": 6.144131183624268, "learning_rate": 4.457941673967808e-06, "loss": 0.3809, "step": 11626 }, { "epoch": 0.5392857142857143, "grad_norm": 6.801644802093506, "learning_rate": 4.457210074618752e-06, "loss": 0.3783, "step": 11627 }, { "epoch": 0.5393320964749536, "grad_norm": 27.15560531616211, "learning_rate": 4.456478487028953e-06, "loss": 0.4591, "step": 11628 }, { "epoch": 0.539378478664193, "grad_norm": 5.745023250579834, "learning_rate": 4.455746911214265e-06, "loss": 0.331, "step": 11629 }, { "epoch": 0.5394248608534323, "grad_norm": 9.648571968078613, "learning_rate": 4.455015347190529e-06, "loss": 0.299, "step": 11630 }, { "epoch": 0.5394712430426716, "grad_norm": 5.988007068634033, "learning_rate": 4.4542837949735985e-06, "loss": 0.3048, "step": 11631 }, { "epoch": 0.5395176252319109, "grad_norm": 6.20056676864624, "learning_rate": 4.453552254579322e-06, "loss": 0.2631, "step": 11632 }, { "epoch": 0.5395640074211503, "grad_norm": 7.25980281829834, "learning_rate": 4.4528207260235475e-06, "loss": 0.4153, "step": 11633 }, { "epoch": 0.5396103896103897, "grad_norm": 5.854457855224609, "learning_rate": 4.4520892093221235e-06, "loss": 0.311, "step": 11634 }, { "epoch": 0.5396567717996289, "grad_norm": 5.336204528808594, "learning_rate": 4.4513577044909e-06, "loss": 0.3227, "step": 11635 }, { "epoch": 0.5397031539888683, "grad_norm": 5.119237422943115, "learning_rate": 4.45062621154572e-06, "loss": 0.2304, "step": 11636 }, { "epoch": 0.5397495361781076, "grad_norm": 7.430087566375732, "learning_rate": 4.449894730502433e-06, "loss": 0.3818, "step": 11637 }, { "epoch": 0.539795918367347, "grad_norm": 8.905550956726074, "learning_rate": 4.4491632613768875e-06, "loss": 0.4098, "step": 11638 }, { "epoch": 0.5398423005565862, "grad_norm": 5.1772565841674805, "learning_rate": 4.448431804184929e-06, "loss": 0.2376, "step": 11639 }, { "epoch": 0.5398886827458256, "grad_norm": 12.30666732788086, "learning_rate": 4.447700358942407e-06, "loss": 0.3286, "step": 11640 }, { "epoch": 0.5399350649350649, "grad_norm": 8.685835838317871, "learning_rate": 4.446968925665163e-06, "loss": 0.4394, "step": 11641 }, { "epoch": 0.5399814471243043, "grad_norm": 4.169548034667969, "learning_rate": 4.446237504369045e-06, "loss": 0.2853, "step": 11642 }, { "epoch": 0.5400278293135437, "grad_norm": 9.672802925109863, "learning_rate": 4.4455060950699e-06, "loss": 0.3385, "step": 11643 }, { "epoch": 0.5400742115027829, "grad_norm": 4.205300331115723, "learning_rate": 4.444774697783573e-06, "loss": 0.2665, "step": 11644 }, { "epoch": 0.5401205936920223, "grad_norm": 14.128491401672363, "learning_rate": 4.444043312525909e-06, "loss": 0.3384, "step": 11645 }, { "epoch": 0.5401669758812616, "grad_norm": 6.840637683868408, "learning_rate": 4.4433119393127556e-06, "loss": 0.2903, "step": 11646 }, { "epoch": 0.540213358070501, "grad_norm": 10.348298072814941, "learning_rate": 4.442580578159954e-06, "loss": 0.3092, "step": 11647 }, { "epoch": 0.5402597402597402, "grad_norm": 9.21043872833252, "learning_rate": 4.441849229083349e-06, "loss": 0.3968, "step": 11648 }, { "epoch": 0.5403061224489796, "grad_norm": 14.174054145812988, "learning_rate": 4.441117892098787e-06, "loss": 0.3394, "step": 11649 }, { "epoch": 0.5403525046382189, "grad_norm": 9.253487586975098, "learning_rate": 4.440386567222111e-06, "loss": 0.4516, "step": 11650 }, { "epoch": 0.5403988868274583, "grad_norm": 14.374201774597168, "learning_rate": 4.439655254469166e-06, "loss": 0.3903, "step": 11651 }, { "epoch": 0.5404452690166975, "grad_norm": 8.543604850769043, "learning_rate": 4.438923953855793e-06, "loss": 0.3111, "step": 11652 }, { "epoch": 0.5404916512059369, "grad_norm": 7.542212009429932, "learning_rate": 4.438192665397836e-06, "loss": 0.2876, "step": 11653 }, { "epoch": 0.5405380333951763, "grad_norm": 8.288546562194824, "learning_rate": 4.43746138911114e-06, "loss": 0.3369, "step": 11654 }, { "epoch": 0.5405844155844156, "grad_norm": 9.757736206054688, "learning_rate": 4.436730125011544e-06, "loss": 0.363, "step": 11655 }, { "epoch": 0.540630797773655, "grad_norm": 6.386998176574707, "learning_rate": 4.435998873114895e-06, "loss": 0.3945, "step": 11656 }, { "epoch": 0.5406771799628942, "grad_norm": 11.750008583068848, "learning_rate": 4.435267633437035e-06, "loss": 0.4115, "step": 11657 }, { "epoch": 0.5407235621521336, "grad_norm": 7.3832502365112305, "learning_rate": 4.4345364059938e-06, "loss": 0.323, "step": 11658 }, { "epoch": 0.5407699443413729, "grad_norm": 9.21725082397461, "learning_rate": 4.433805190801037e-06, "loss": 0.4182, "step": 11659 }, { "epoch": 0.5408163265306123, "grad_norm": 11.198007583618164, "learning_rate": 4.433073987874585e-06, "loss": 0.3683, "step": 11660 }, { "epoch": 0.5408627087198515, "grad_norm": 14.770872116088867, "learning_rate": 4.432342797230286e-06, "loss": 0.3649, "step": 11661 }, { "epoch": 0.5409090909090909, "grad_norm": 10.234984397888184, "learning_rate": 4.4316116188839836e-06, "loss": 0.3748, "step": 11662 }, { "epoch": 0.5409554730983303, "grad_norm": 5.449249267578125, "learning_rate": 4.430880452851514e-06, "loss": 0.3687, "step": 11663 }, { "epoch": 0.5410018552875696, "grad_norm": 12.398455619812012, "learning_rate": 4.430149299148718e-06, "loss": 0.3631, "step": 11664 }, { "epoch": 0.5410482374768089, "grad_norm": 4.661995887756348, "learning_rate": 4.429418157791437e-06, "loss": 0.2715, "step": 11665 }, { "epoch": 0.5410946196660482, "grad_norm": 7.752193450927734, "learning_rate": 4.428687028795512e-06, "loss": 0.4659, "step": 11666 }, { "epoch": 0.5411410018552876, "grad_norm": 5.22218132019043, "learning_rate": 4.42795591217678e-06, "loss": 0.3101, "step": 11667 }, { "epoch": 0.5411873840445269, "grad_norm": 4.0585036277771, "learning_rate": 4.427224807951084e-06, "loss": 0.2793, "step": 11668 }, { "epoch": 0.5412337662337663, "grad_norm": 7.292824745178223, "learning_rate": 4.426493716134258e-06, "loss": 0.3703, "step": 11669 }, { "epoch": 0.5412801484230055, "grad_norm": 12.00813102722168, "learning_rate": 4.425762636742143e-06, "loss": 0.3684, "step": 11670 }, { "epoch": 0.5413265306122449, "grad_norm": 6.103795528411865, "learning_rate": 4.425031569790578e-06, "loss": 0.3141, "step": 11671 }, { "epoch": 0.5413729128014843, "grad_norm": 7.366080284118652, "learning_rate": 4.424300515295401e-06, "loss": 0.3355, "step": 11672 }, { "epoch": 0.5414192949907236, "grad_norm": 7.3223042488098145, "learning_rate": 4.423569473272451e-06, "loss": 0.3466, "step": 11673 }, { "epoch": 0.5414656771799629, "grad_norm": 6.145711421966553, "learning_rate": 4.4228384437375624e-06, "loss": 0.3397, "step": 11674 }, { "epoch": 0.5415120593692022, "grad_norm": 15.835298538208008, "learning_rate": 4.422107426706575e-06, "loss": 0.4117, "step": 11675 }, { "epoch": 0.5415584415584416, "grad_norm": 9.215710639953613, "learning_rate": 4.421376422195326e-06, "loss": 0.3934, "step": 11676 }, { "epoch": 0.5416048237476809, "grad_norm": 7.855014324188232, "learning_rate": 4.42064543021965e-06, "loss": 0.3202, "step": 11677 }, { "epoch": 0.5416512059369202, "grad_norm": 8.132028579711914, "learning_rate": 4.419914450795387e-06, "loss": 0.2927, "step": 11678 }, { "epoch": 0.5416975881261595, "grad_norm": 7.107288360595703, "learning_rate": 4.419183483938372e-06, "loss": 0.3908, "step": 11679 }, { "epoch": 0.5417439703153989, "grad_norm": 9.619800567626953, "learning_rate": 4.418452529664438e-06, "loss": 0.4846, "step": 11680 }, { "epoch": 0.5417903525046383, "grad_norm": 5.735834121704102, "learning_rate": 4.417721587989424e-06, "loss": 0.2263, "step": 11681 }, { "epoch": 0.5418367346938775, "grad_norm": 5.9712395668029785, "learning_rate": 4.416990658929165e-06, "loss": 0.3674, "step": 11682 }, { "epoch": 0.5418831168831169, "grad_norm": 7.447758197784424, "learning_rate": 4.4162597424994964e-06, "loss": 0.2981, "step": 11683 }, { "epoch": 0.5419294990723562, "grad_norm": 11.428080558776855, "learning_rate": 4.415528838716254e-06, "loss": 0.3461, "step": 11684 }, { "epoch": 0.5419758812615956, "grad_norm": 9.202043533325195, "learning_rate": 4.414797947595268e-06, "loss": 0.2963, "step": 11685 }, { "epoch": 0.5420222634508349, "grad_norm": 6.320420742034912, "learning_rate": 4.414067069152377e-06, "loss": 0.4156, "step": 11686 }, { "epoch": 0.5420686456400742, "grad_norm": 6.391010761260986, "learning_rate": 4.413336203403413e-06, "loss": 0.325, "step": 11687 }, { "epoch": 0.5421150278293135, "grad_norm": 6.752875328063965, "learning_rate": 4.412605350364213e-06, "loss": 0.3548, "step": 11688 }, { "epoch": 0.5421614100185529, "grad_norm": 9.228582382202148, "learning_rate": 4.411874510050608e-06, "loss": 0.4131, "step": 11689 }, { "epoch": 0.5422077922077922, "grad_norm": 7.018484592437744, "learning_rate": 4.4111436824784295e-06, "loss": 0.351, "step": 11690 }, { "epoch": 0.5422541743970315, "grad_norm": 7.253994941711426, "learning_rate": 4.4104128676635134e-06, "loss": 0.342, "step": 11691 }, { "epoch": 0.5423005565862709, "grad_norm": 5.649639129638672, "learning_rate": 4.409682065621691e-06, "loss": 0.2483, "step": 11692 }, { "epoch": 0.5423469387755102, "grad_norm": 11.347341537475586, "learning_rate": 4.408951276368797e-06, "loss": 0.3806, "step": 11693 }, { "epoch": 0.5423933209647496, "grad_norm": 6.803506851196289, "learning_rate": 4.40822049992066e-06, "loss": 0.4232, "step": 11694 }, { "epoch": 0.5424397031539888, "grad_norm": 6.120491981506348, "learning_rate": 4.4074897362931155e-06, "loss": 0.3249, "step": 11695 }, { "epoch": 0.5424860853432282, "grad_norm": 9.556222915649414, "learning_rate": 4.406758985501992e-06, "loss": 0.413, "step": 11696 }, { "epoch": 0.5425324675324675, "grad_norm": 7.7252607345581055, "learning_rate": 4.406028247563122e-06, "loss": 0.3908, "step": 11697 }, { "epoch": 0.5425788497217069, "grad_norm": 12.432934761047363, "learning_rate": 4.405297522492338e-06, "loss": 0.3868, "step": 11698 }, { "epoch": 0.5426252319109462, "grad_norm": 8.495465278625488, "learning_rate": 4.404566810305469e-06, "loss": 0.3084, "step": 11699 }, { "epoch": 0.5426716141001855, "grad_norm": 4.907452583312988, "learning_rate": 4.403836111018346e-06, "loss": 0.3791, "step": 11700 }, { "epoch": 0.5427179962894249, "grad_norm": 9.122200965881348, "learning_rate": 4.403105424646798e-06, "loss": 0.406, "step": 11701 }, { "epoch": 0.5427643784786642, "grad_norm": 9.58328628540039, "learning_rate": 4.402374751206657e-06, "loss": 0.3644, "step": 11702 }, { "epoch": 0.5428107606679036, "grad_norm": 6.614284992218018, "learning_rate": 4.401644090713753e-06, "loss": 0.4026, "step": 11703 }, { "epoch": 0.5428571428571428, "grad_norm": 8.612471580505371, "learning_rate": 4.400913443183913e-06, "loss": 0.4223, "step": 11704 }, { "epoch": 0.5429035250463822, "grad_norm": 9.536420822143555, "learning_rate": 4.400182808632967e-06, "loss": 0.3037, "step": 11705 }, { "epoch": 0.5429499072356215, "grad_norm": 6.17555046081543, "learning_rate": 4.399452187076745e-06, "loss": 0.3129, "step": 11706 }, { "epoch": 0.5429962894248609, "grad_norm": 6.474221229553223, "learning_rate": 4.398721578531074e-06, "loss": 0.3443, "step": 11707 }, { "epoch": 0.5430426716141001, "grad_norm": 3.8299481868743896, "learning_rate": 4.397990983011783e-06, "loss": 0.2473, "step": 11708 }, { "epoch": 0.5430890538033395, "grad_norm": 7.2790632247924805, "learning_rate": 4.3972604005346994e-06, "loss": 0.334, "step": 11709 }, { "epoch": 0.5431354359925789, "grad_norm": 11.111518859863281, "learning_rate": 4.396529831115651e-06, "loss": 0.3638, "step": 11710 }, { "epoch": 0.5431818181818182, "grad_norm": 5.461465358734131, "learning_rate": 4.395799274770467e-06, "loss": 0.3717, "step": 11711 }, { "epoch": 0.5432282003710576, "grad_norm": 5.215308666229248, "learning_rate": 4.395068731514972e-06, "loss": 0.3041, "step": 11712 }, { "epoch": 0.5432745825602968, "grad_norm": 5.375904083251953, "learning_rate": 4.3943382013649945e-06, "loss": 0.2916, "step": 11713 }, { "epoch": 0.5433209647495362, "grad_norm": 7.5227179527282715, "learning_rate": 4.39360768433636e-06, "loss": 0.1989, "step": 11714 }, { "epoch": 0.5433673469387755, "grad_norm": 7.031224727630615, "learning_rate": 4.3928771804448944e-06, "loss": 0.3365, "step": 11715 }, { "epoch": 0.5434137291280149, "grad_norm": 4.046036720275879, "learning_rate": 4.392146689706426e-06, "loss": 0.2657, "step": 11716 }, { "epoch": 0.5434601113172541, "grad_norm": 8.729351043701172, "learning_rate": 4.391416212136778e-06, "loss": 0.3283, "step": 11717 }, { "epoch": 0.5435064935064935, "grad_norm": 6.586097717285156, "learning_rate": 4.3906857477517775e-06, "loss": 0.2921, "step": 11718 }, { "epoch": 0.5435528756957329, "grad_norm": 9.813673973083496, "learning_rate": 4.389955296567249e-06, "loss": 0.3776, "step": 11719 }, { "epoch": 0.5435992578849722, "grad_norm": 7.633115768432617, "learning_rate": 4.389224858599015e-06, "loss": 0.3458, "step": 11720 }, { "epoch": 0.5436456400742115, "grad_norm": 7.618404865264893, "learning_rate": 4.388494433862904e-06, "loss": 0.4063, "step": 11721 }, { "epoch": 0.5436920222634508, "grad_norm": 11.33427619934082, "learning_rate": 4.387764022374738e-06, "loss": 0.3474, "step": 11722 }, { "epoch": 0.5437384044526902, "grad_norm": 7.226956844329834, "learning_rate": 4.387033624150344e-06, "loss": 0.3083, "step": 11723 }, { "epoch": 0.5437847866419295, "grad_norm": 7.986715793609619, "learning_rate": 4.38630323920554e-06, "loss": 0.3159, "step": 11724 }, { "epoch": 0.5438311688311688, "grad_norm": 5.65322732925415, "learning_rate": 4.385572867556154e-06, "loss": 0.2175, "step": 11725 }, { "epoch": 0.5438775510204081, "grad_norm": 4.616697788238525, "learning_rate": 4.384842509218007e-06, "loss": 0.295, "step": 11726 }, { "epoch": 0.5439239332096475, "grad_norm": 7.042037010192871, "learning_rate": 4.384112164206923e-06, "loss": 0.312, "step": 11727 }, { "epoch": 0.5439703153988869, "grad_norm": 6.478254318237305, "learning_rate": 4.383381832538725e-06, "loss": 0.3367, "step": 11728 }, { "epoch": 0.5440166975881262, "grad_norm": 3.6055490970611572, "learning_rate": 4.382651514229234e-06, "loss": 0.1671, "step": 11729 }, { "epoch": 0.5440630797773655, "grad_norm": 21.917882919311523, "learning_rate": 4.381921209294272e-06, "loss": 0.4269, "step": 11730 }, { "epoch": 0.5441094619666048, "grad_norm": 7.572453498840332, "learning_rate": 4.38119091774966e-06, "loss": 0.2803, "step": 11731 }, { "epoch": 0.5441558441558442, "grad_norm": 8.346776962280273, "learning_rate": 4.3804606396112205e-06, "loss": 0.3155, "step": 11732 }, { "epoch": 0.5442022263450835, "grad_norm": 7.731590270996094, "learning_rate": 4.379730374894778e-06, "loss": 0.3673, "step": 11733 }, { "epoch": 0.5442486085343228, "grad_norm": 4.8657636642456055, "learning_rate": 4.379000123616146e-06, "loss": 0.2675, "step": 11734 }, { "epoch": 0.5442949907235621, "grad_norm": 11.802885055541992, "learning_rate": 4.37826988579115e-06, "loss": 0.5089, "step": 11735 }, { "epoch": 0.5443413729128015, "grad_norm": 6.975360870361328, "learning_rate": 4.377539661435608e-06, "loss": 0.3624, "step": 11736 }, { "epoch": 0.5443877551020408, "grad_norm": 10.114826202392578, "learning_rate": 4.3768094505653415e-06, "loss": 0.4181, "step": 11737 }, { "epoch": 0.5444341372912801, "grad_norm": 5.799485683441162, "learning_rate": 4.376079253196169e-06, "loss": 0.3521, "step": 11738 }, { "epoch": 0.5444805194805195, "grad_norm": 5.060729503631592, "learning_rate": 4.3753490693439135e-06, "loss": 0.3213, "step": 11739 }, { "epoch": 0.5445269016697588, "grad_norm": 6.084654808044434, "learning_rate": 4.374618899024388e-06, "loss": 0.3444, "step": 11740 }, { "epoch": 0.5445732838589982, "grad_norm": 6.236048221588135, "learning_rate": 4.373888742253415e-06, "loss": 0.3354, "step": 11741 }, { "epoch": 0.5446196660482375, "grad_norm": 9.240015983581543, "learning_rate": 4.3731585990468115e-06, "loss": 0.3862, "step": 11742 }, { "epoch": 0.5446660482374768, "grad_norm": 11.852189064025879, "learning_rate": 4.3724284694203965e-06, "loss": 0.425, "step": 11743 }, { "epoch": 0.5447124304267161, "grad_norm": 8.449570655822754, "learning_rate": 4.3716983533899895e-06, "loss": 0.4584, "step": 11744 }, { "epoch": 0.5447588126159555, "grad_norm": 7.5031256675720215, "learning_rate": 4.3709682509714045e-06, "loss": 0.3964, "step": 11745 }, { "epoch": 0.5448051948051948, "grad_norm": 11.736684799194336, "learning_rate": 4.370238162180461e-06, "loss": 0.3941, "step": 11746 }, { "epoch": 0.5448515769944341, "grad_norm": 3.2446837425231934, "learning_rate": 4.369508087032975e-06, "loss": 0.259, "step": 11747 }, { "epoch": 0.5448979591836735, "grad_norm": 8.485610008239746, "learning_rate": 4.368778025544764e-06, "loss": 0.3596, "step": 11748 }, { "epoch": 0.5449443413729128, "grad_norm": 9.761469841003418, "learning_rate": 4.368047977731643e-06, "loss": 0.5063, "step": 11749 }, { "epoch": 0.5449907235621522, "grad_norm": 8.317632675170898, "learning_rate": 4.367317943609433e-06, "loss": 0.365, "step": 11750 }, { "epoch": 0.5450371057513914, "grad_norm": 18.16606330871582, "learning_rate": 4.366587923193944e-06, "loss": 0.5516, "step": 11751 }, { "epoch": 0.5450834879406308, "grad_norm": 5.952025413513184, "learning_rate": 4.365857916500991e-06, "loss": 0.3481, "step": 11752 }, { "epoch": 0.5451298701298701, "grad_norm": 5.131367206573486, "learning_rate": 4.365127923546394e-06, "loss": 0.3199, "step": 11753 }, { "epoch": 0.5451762523191095, "grad_norm": 5.831107139587402, "learning_rate": 4.364397944345966e-06, "loss": 0.4212, "step": 11754 }, { "epoch": 0.5452226345083488, "grad_norm": 8.855587005615234, "learning_rate": 4.363667978915522e-06, "loss": 0.3686, "step": 11755 }, { "epoch": 0.5452690166975881, "grad_norm": 6.760662078857422, "learning_rate": 4.3629380272708745e-06, "loss": 0.2922, "step": 11756 }, { "epoch": 0.5453153988868275, "grad_norm": 8.846327781677246, "learning_rate": 4.362208089427838e-06, "loss": 0.49, "step": 11757 }, { "epoch": 0.5453617810760668, "grad_norm": 6.015178680419922, "learning_rate": 4.361478165402228e-06, "loss": 0.3318, "step": 11758 }, { "epoch": 0.5454081632653062, "grad_norm": 10.674324035644531, "learning_rate": 4.360748255209855e-06, "loss": 0.4421, "step": 11759 }, { "epoch": 0.5454545454545454, "grad_norm": 8.49997329711914, "learning_rate": 4.360018358866535e-06, "loss": 0.3898, "step": 11760 }, { "epoch": 0.5455009276437848, "grad_norm": 6.14243745803833, "learning_rate": 4.359288476388082e-06, "loss": 0.4646, "step": 11761 }, { "epoch": 0.5455473098330241, "grad_norm": 12.7821683883667, "learning_rate": 4.358558607790303e-06, "loss": 0.5363, "step": 11762 }, { "epoch": 0.5455936920222635, "grad_norm": 10.587946891784668, "learning_rate": 4.357828753089014e-06, "loss": 0.4872, "step": 11763 }, { "epoch": 0.5456400742115027, "grad_norm": 11.710021018981934, "learning_rate": 4.357098912300027e-06, "loss": 0.2891, "step": 11764 }, { "epoch": 0.5456864564007421, "grad_norm": 4.930970191955566, "learning_rate": 4.356369085439152e-06, "loss": 0.288, "step": 11765 }, { "epoch": 0.5457328385899815, "grad_norm": 3.844278335571289, "learning_rate": 4.355639272522204e-06, "loss": 0.3294, "step": 11766 }, { "epoch": 0.5457792207792208, "grad_norm": 10.638769149780273, "learning_rate": 4.354909473564989e-06, "loss": 0.4746, "step": 11767 }, { "epoch": 0.5458256029684602, "grad_norm": 10.308232307434082, "learning_rate": 4.35417968858332e-06, "loss": 0.3232, "step": 11768 }, { "epoch": 0.5458719851576994, "grad_norm": 7.561581611633301, "learning_rate": 4.353449917593007e-06, "loss": 0.3608, "step": 11769 }, { "epoch": 0.5459183673469388, "grad_norm": 5.232361793518066, "learning_rate": 4.352720160609861e-06, "loss": 0.3965, "step": 11770 }, { "epoch": 0.5459647495361781, "grad_norm": 13.474828720092773, "learning_rate": 4.351990417649693e-06, "loss": 0.4188, "step": 11771 }, { "epoch": 0.5460111317254175, "grad_norm": 7.220280647277832, "learning_rate": 4.35126068872831e-06, "loss": 0.4148, "step": 11772 }, { "epoch": 0.5460575139146567, "grad_norm": 4.978055953979492, "learning_rate": 4.350530973861521e-06, "loss": 0.3269, "step": 11773 }, { "epoch": 0.5461038961038961, "grad_norm": 6.461361408233643, "learning_rate": 4.349801273065135e-06, "loss": 0.3764, "step": 11774 }, { "epoch": 0.5461502782931354, "grad_norm": 6.489820957183838, "learning_rate": 4.349071586354963e-06, "loss": 0.3486, "step": 11775 }, { "epoch": 0.5461966604823748, "grad_norm": 7.113893985748291, "learning_rate": 4.348341913746811e-06, "loss": 0.2993, "step": 11776 }, { "epoch": 0.546243042671614, "grad_norm": 8.237465858459473, "learning_rate": 4.347612255256491e-06, "loss": 0.3508, "step": 11777 }, { "epoch": 0.5462894248608534, "grad_norm": 4.256946563720703, "learning_rate": 4.346882610899803e-06, "loss": 0.3271, "step": 11778 }, { "epoch": 0.5463358070500928, "grad_norm": 7.101321220397949, "learning_rate": 4.3461529806925605e-06, "loss": 0.2468, "step": 11779 }, { "epoch": 0.5463821892393321, "grad_norm": 8.76024341583252, "learning_rate": 4.345423364650568e-06, "loss": 0.3099, "step": 11780 }, { "epoch": 0.5464285714285714, "grad_norm": 6.604143142700195, "learning_rate": 4.344693762789634e-06, "loss": 0.3613, "step": 11781 }, { "epoch": 0.5464749536178107, "grad_norm": 8.726526260375977, "learning_rate": 4.3439641751255654e-06, "loss": 0.2893, "step": 11782 }, { "epoch": 0.5465213358070501, "grad_norm": 23.588502883911133, "learning_rate": 4.343234601674165e-06, "loss": 0.4707, "step": 11783 }, { "epoch": 0.5465677179962894, "grad_norm": 7.9045491218566895, "learning_rate": 4.3425050424512405e-06, "loss": 0.3102, "step": 11784 }, { "epoch": 0.5466141001855288, "grad_norm": 6.176347255706787, "learning_rate": 4.341775497472597e-06, "loss": 0.3486, "step": 11785 }, { "epoch": 0.546660482374768, "grad_norm": 6.852515697479248, "learning_rate": 4.341045966754041e-06, "loss": 0.4097, "step": 11786 }, { "epoch": 0.5467068645640074, "grad_norm": 5.340117454528809, "learning_rate": 4.340316450311376e-06, "loss": 0.3644, "step": 11787 }, { "epoch": 0.5467532467532468, "grad_norm": 9.728178024291992, "learning_rate": 4.33958694816041e-06, "loss": 0.366, "step": 11788 }, { "epoch": 0.5467996289424861, "grad_norm": 6.336413860321045, "learning_rate": 4.3388574603169414e-06, "loss": 0.3596, "step": 11789 }, { "epoch": 0.5468460111317254, "grad_norm": 5.552407741546631, "learning_rate": 4.338127986796777e-06, "loss": 0.2886, "step": 11790 }, { "epoch": 0.5468923933209647, "grad_norm": 10.25564193725586, "learning_rate": 4.337398527615721e-06, "loss": 0.3792, "step": 11791 }, { "epoch": 0.5469387755102041, "grad_norm": 7.281099796295166, "learning_rate": 4.336669082789578e-06, "loss": 0.3274, "step": 11792 }, { "epoch": 0.5469851576994434, "grad_norm": 4.562536239624023, "learning_rate": 4.33593965233415e-06, "loss": 0.329, "step": 11793 }, { "epoch": 0.5470315398886827, "grad_norm": 4.811446666717529, "learning_rate": 4.335210236265237e-06, "loss": 0.2354, "step": 11794 }, { "epoch": 0.547077922077922, "grad_norm": 11.088626861572266, "learning_rate": 4.334480834598644e-06, "loss": 0.4608, "step": 11795 }, { "epoch": 0.5471243042671614, "grad_norm": 9.348367691040039, "learning_rate": 4.333751447350172e-06, "loss": 0.4698, "step": 11796 }, { "epoch": 0.5471706864564008, "grad_norm": 8.593512535095215, "learning_rate": 4.333022074535625e-06, "loss": 0.4347, "step": 11797 }, { "epoch": 0.5472170686456401, "grad_norm": 6.981627941131592, "learning_rate": 4.332292716170802e-06, "loss": 0.3608, "step": 11798 }, { "epoch": 0.5472634508348794, "grad_norm": 5.765411853790283, "learning_rate": 4.331563372271507e-06, "loss": 0.2445, "step": 11799 }, { "epoch": 0.5473098330241187, "grad_norm": 9.352533340454102, "learning_rate": 4.330834042853537e-06, "loss": 0.4008, "step": 11800 }, { "epoch": 0.5473562152133581, "grad_norm": 6.63431978225708, "learning_rate": 4.330104727932694e-06, "loss": 0.3626, "step": 11801 }, { "epoch": 0.5474025974025974, "grad_norm": 7.538980007171631, "learning_rate": 4.32937542752478e-06, "loss": 0.2783, "step": 11802 }, { "epoch": 0.5474489795918367, "grad_norm": 7.396819114685059, "learning_rate": 4.328646141645592e-06, "loss": 0.2813, "step": 11803 }, { "epoch": 0.547495361781076, "grad_norm": 6.185489654541016, "learning_rate": 4.327916870310936e-06, "loss": 0.3446, "step": 11804 }, { "epoch": 0.5475417439703154, "grad_norm": 12.608917236328125, "learning_rate": 4.327187613536602e-06, "loss": 0.3339, "step": 11805 }, { "epoch": 0.5475881261595548, "grad_norm": 6.17542839050293, "learning_rate": 4.326458371338394e-06, "loss": 0.3425, "step": 11806 }, { "epoch": 0.547634508348794, "grad_norm": 7.190519332885742, "learning_rate": 4.32572914373211e-06, "loss": 0.2653, "step": 11807 }, { "epoch": 0.5476808905380334, "grad_norm": 7.762778282165527, "learning_rate": 4.32499993073355e-06, "loss": 0.3899, "step": 11808 }, { "epoch": 0.5477272727272727, "grad_norm": 5.839966773986816, "learning_rate": 4.324270732358509e-06, "loss": 0.2823, "step": 11809 }, { "epoch": 0.5477736549165121, "grad_norm": 5.767648696899414, "learning_rate": 4.323541548622788e-06, "loss": 0.3248, "step": 11810 }, { "epoch": 0.5478200371057514, "grad_norm": 4.924894332885742, "learning_rate": 4.322812379542181e-06, "loss": 0.3499, "step": 11811 }, { "epoch": 0.5478664192949907, "grad_norm": 5.3750457763671875, "learning_rate": 4.322083225132487e-06, "loss": 0.3043, "step": 11812 }, { "epoch": 0.54791280148423, "grad_norm": 7.292492389678955, "learning_rate": 4.321354085409502e-06, "loss": 0.4033, "step": 11813 }, { "epoch": 0.5479591836734694, "grad_norm": 10.235556602478027, "learning_rate": 4.3206249603890224e-06, "loss": 0.3818, "step": 11814 }, { "epoch": 0.5480055658627088, "grad_norm": 10.83981990814209, "learning_rate": 4.319895850086848e-06, "loss": 0.4154, "step": 11815 }, { "epoch": 0.548051948051948, "grad_norm": 8.019943237304688, "learning_rate": 4.319166754518768e-06, "loss": 0.4099, "step": 11816 }, { "epoch": 0.5480983302411874, "grad_norm": 11.018827438354492, "learning_rate": 4.318437673700582e-06, "loss": 0.4492, "step": 11817 }, { "epoch": 0.5481447124304267, "grad_norm": 8.711544036865234, "learning_rate": 4.317708607648083e-06, "loss": 0.3568, "step": 11818 }, { "epoch": 0.5481910946196661, "grad_norm": 12.611093521118164, "learning_rate": 4.316979556377068e-06, "loss": 0.2826, "step": 11819 }, { "epoch": 0.5482374768089053, "grad_norm": 7.762873649597168, "learning_rate": 4.316250519903331e-06, "loss": 0.3858, "step": 11820 }, { "epoch": 0.5482838589981447, "grad_norm": 5.544831275939941, "learning_rate": 4.315521498242668e-06, "loss": 0.3155, "step": 11821 }, { "epoch": 0.548330241187384, "grad_norm": 5.822750568389893, "learning_rate": 4.314792491410868e-06, "loss": 0.3623, "step": 11822 }, { "epoch": 0.5483766233766234, "grad_norm": 9.996923446655273, "learning_rate": 4.314063499423727e-06, "loss": 0.4162, "step": 11823 }, { "epoch": 0.5484230055658627, "grad_norm": 12.61876106262207, "learning_rate": 4.3133345222970396e-06, "loss": 0.2384, "step": 11824 }, { "epoch": 0.548469387755102, "grad_norm": 6.4827117919921875, "learning_rate": 4.312605560046597e-06, "loss": 0.293, "step": 11825 }, { "epoch": 0.5485157699443414, "grad_norm": 6.8119707107543945, "learning_rate": 4.311876612688194e-06, "loss": 0.3192, "step": 11826 }, { "epoch": 0.5485621521335807, "grad_norm": 9.995896339416504, "learning_rate": 4.31114768023762e-06, "loss": 0.2446, "step": 11827 }, { "epoch": 0.5486085343228201, "grad_norm": 5.173025131225586, "learning_rate": 4.310418762710668e-06, "loss": 0.3075, "step": 11828 }, { "epoch": 0.5486549165120593, "grad_norm": 5.290469646453857, "learning_rate": 4.3096898601231294e-06, "loss": 0.3605, "step": 11829 }, { "epoch": 0.5487012987012987, "grad_norm": 8.214362144470215, "learning_rate": 4.308960972490796e-06, "loss": 0.4522, "step": 11830 }, { "epoch": 0.548747680890538, "grad_norm": 8.532734870910645, "learning_rate": 4.30823209982946e-06, "loss": 0.2696, "step": 11831 }, { "epoch": 0.5487940630797774, "grad_norm": 10.125093460083008, "learning_rate": 4.30750324215491e-06, "loss": 0.4023, "step": 11832 }, { "epoch": 0.5488404452690167, "grad_norm": 8.1996488571167, "learning_rate": 4.306774399482937e-06, "loss": 0.4279, "step": 11833 }, { "epoch": 0.548886827458256, "grad_norm": 7.471070766448975, "learning_rate": 4.30604557182933e-06, "loss": 0.3292, "step": 11834 }, { "epoch": 0.5489332096474954, "grad_norm": 11.4345064163208, "learning_rate": 4.30531675920988e-06, "loss": 0.3174, "step": 11835 }, { "epoch": 0.5489795918367347, "grad_norm": 8.039327621459961, "learning_rate": 4.304587961640377e-06, "loss": 0.3537, "step": 11836 }, { "epoch": 0.549025974025974, "grad_norm": 11.002742767333984, "learning_rate": 4.303859179136609e-06, "loss": 0.4246, "step": 11837 }, { "epoch": 0.5490723562152133, "grad_norm": 8.042288780212402, "learning_rate": 4.303130411714364e-06, "loss": 0.3927, "step": 11838 }, { "epoch": 0.5491187384044527, "grad_norm": 5.48499870300293, "learning_rate": 4.302401659389431e-06, "loss": 0.3206, "step": 11839 }, { "epoch": 0.549165120593692, "grad_norm": 5.606866836547852, "learning_rate": 4.301672922177598e-06, "loss": 0.3092, "step": 11840 }, { "epoch": 0.5492115027829314, "grad_norm": 7.190717697143555, "learning_rate": 4.300944200094653e-06, "loss": 0.4222, "step": 11841 }, { "epoch": 0.5492578849721707, "grad_norm": 8.819432258605957, "learning_rate": 4.3002154931563836e-06, "loss": 0.3759, "step": 11842 }, { "epoch": 0.54930426716141, "grad_norm": 10.525961875915527, "learning_rate": 4.299486801378577e-06, "loss": 0.4575, "step": 11843 }, { "epoch": 0.5493506493506494, "grad_norm": 5.431840896606445, "learning_rate": 4.2987581247770184e-06, "loss": 0.3603, "step": 11844 }, { "epoch": 0.5493970315398887, "grad_norm": 6.3961992263793945, "learning_rate": 4.298029463367495e-06, "loss": 0.287, "step": 11845 }, { "epoch": 0.549443413729128, "grad_norm": 8.164351463317871, "learning_rate": 4.297300817165793e-06, "loss": 0.2916, "step": 11846 }, { "epoch": 0.5494897959183673, "grad_norm": 4.967926025390625, "learning_rate": 4.2965721861876985e-06, "loss": 0.3251, "step": 11847 }, { "epoch": 0.5495361781076067, "grad_norm": 5.791559219360352, "learning_rate": 4.295843570448998e-06, "loss": 0.3336, "step": 11848 }, { "epoch": 0.549582560296846, "grad_norm": 6.870720863342285, "learning_rate": 4.295114969965473e-06, "loss": 0.2658, "step": 11849 }, { "epoch": 0.5496289424860853, "grad_norm": 9.602548599243164, "learning_rate": 4.29438638475291e-06, "loss": 0.3241, "step": 11850 }, { "epoch": 0.5496753246753247, "grad_norm": 7.56412410736084, "learning_rate": 4.293657814827096e-06, "loss": 0.3471, "step": 11851 }, { "epoch": 0.549721706864564, "grad_norm": 5.3263936042785645, "learning_rate": 4.292929260203811e-06, "loss": 0.3532, "step": 11852 }, { "epoch": 0.5497680890538034, "grad_norm": 6.807029724121094, "learning_rate": 4.292200720898843e-06, "loss": 0.3138, "step": 11853 }, { "epoch": 0.5498144712430427, "grad_norm": 6.260309219360352, "learning_rate": 4.29147219692797e-06, "loss": 0.3422, "step": 11854 }, { "epoch": 0.549860853432282, "grad_norm": 8.268567085266113, "learning_rate": 4.2907436883069795e-06, "loss": 0.3342, "step": 11855 }, { "epoch": 0.5499072356215213, "grad_norm": 7.000997066497803, "learning_rate": 4.2900151950516535e-06, "loss": 0.312, "step": 11856 }, { "epoch": 0.5499536178107607, "grad_norm": 8.794271469116211, "learning_rate": 4.289286717177773e-06, "loss": 0.3846, "step": 11857 }, { "epoch": 0.55, "grad_norm": 9.890865325927734, "learning_rate": 4.288558254701121e-06, "loss": 0.3595, "step": 11858 }, { "epoch": 0.5500463821892393, "grad_norm": 5.205052375793457, "learning_rate": 4.2878298076374805e-06, "loss": 0.3122, "step": 11859 }, { "epoch": 0.5500927643784786, "grad_norm": 5.37627649307251, "learning_rate": 4.28710137600263e-06, "loss": 0.2726, "step": 11860 }, { "epoch": 0.550139146567718, "grad_norm": 8.261967658996582, "learning_rate": 4.286372959812353e-06, "loss": 0.2767, "step": 11861 }, { "epoch": 0.5501855287569574, "grad_norm": 5.6183648109436035, "learning_rate": 4.2856445590824295e-06, "loss": 0.3015, "step": 11862 }, { "epoch": 0.5502319109461966, "grad_norm": 9.075316429138184, "learning_rate": 4.284916173828639e-06, "loss": 0.3894, "step": 11863 }, { "epoch": 0.550278293135436, "grad_norm": 7.291398525238037, "learning_rate": 4.284187804066764e-06, "loss": 0.3091, "step": 11864 }, { "epoch": 0.5503246753246753, "grad_norm": 6.55552864074707, "learning_rate": 4.28345944981258e-06, "loss": 0.3221, "step": 11865 }, { "epoch": 0.5503710575139147, "grad_norm": 6.805427074432373, "learning_rate": 4.282731111081871e-06, "loss": 0.3599, "step": 11866 }, { "epoch": 0.550417439703154, "grad_norm": 8.13656234741211, "learning_rate": 4.2820027878904145e-06, "loss": 0.3308, "step": 11867 }, { "epoch": 0.5504638218923933, "grad_norm": 5.389711380004883, "learning_rate": 4.281274480253988e-06, "loss": 0.3397, "step": 11868 }, { "epoch": 0.5505102040816326, "grad_norm": 6.252411365509033, "learning_rate": 4.28054618818837e-06, "loss": 0.342, "step": 11869 }, { "epoch": 0.550556586270872, "grad_norm": 5.904742240905762, "learning_rate": 4.2798179117093405e-06, "loss": 0.2909, "step": 11870 }, { "epoch": 0.5506029684601114, "grad_norm": 7.140801906585693, "learning_rate": 4.279089650832677e-06, "loss": 0.3145, "step": 11871 }, { "epoch": 0.5506493506493506, "grad_norm": 5.39538049697876, "learning_rate": 4.2783614055741546e-06, "loss": 0.3075, "step": 11872 }, { "epoch": 0.55069573283859, "grad_norm": 6.0387349128723145, "learning_rate": 4.277633175949551e-06, "loss": 0.3388, "step": 11873 }, { "epoch": 0.5507421150278293, "grad_norm": 6.079965591430664, "learning_rate": 4.276904961974645e-06, "loss": 0.3089, "step": 11874 }, { "epoch": 0.5507884972170687, "grad_norm": 10.034967422485352, "learning_rate": 4.276176763665212e-06, "loss": 0.3982, "step": 11875 }, { "epoch": 0.5508348794063079, "grad_norm": 9.799120903015137, "learning_rate": 4.275448581037027e-06, "loss": 0.339, "step": 11876 }, { "epoch": 0.5508812615955473, "grad_norm": 9.009114265441895, "learning_rate": 4.274720414105866e-06, "loss": 0.4485, "step": 11877 }, { "epoch": 0.5509276437847866, "grad_norm": 6.923614501953125, "learning_rate": 4.273992262887505e-06, "loss": 0.3379, "step": 11878 }, { "epoch": 0.550974025974026, "grad_norm": 5.53321647644043, "learning_rate": 4.273264127397719e-06, "loss": 0.2786, "step": 11879 }, { "epoch": 0.5510204081632653, "grad_norm": 3.3736765384674072, "learning_rate": 4.272536007652281e-06, "loss": 0.2904, "step": 11880 }, { "epoch": 0.5510667903525046, "grad_norm": 6.946990013122559, "learning_rate": 4.271807903666969e-06, "loss": 0.3608, "step": 11881 }, { "epoch": 0.551113172541744, "grad_norm": 9.868332862854004, "learning_rate": 4.2710798154575534e-06, "loss": 0.3914, "step": 11882 }, { "epoch": 0.5511595547309833, "grad_norm": 5.615159511566162, "learning_rate": 4.270351743039809e-06, "loss": 0.2797, "step": 11883 }, { "epoch": 0.5512059369202227, "grad_norm": 9.332642555236816, "learning_rate": 4.269623686429508e-06, "loss": 0.354, "step": 11884 }, { "epoch": 0.5512523191094619, "grad_norm": 9.853012084960938, "learning_rate": 4.2688956456424255e-06, "loss": 0.3871, "step": 11885 }, { "epoch": 0.5512987012987013, "grad_norm": 4.7041335105896, "learning_rate": 4.268167620694336e-06, "loss": 0.3531, "step": 11886 }, { "epoch": 0.5513450834879406, "grad_norm": 7.845337390899658, "learning_rate": 4.267439611601006e-06, "loss": 0.4164, "step": 11887 }, { "epoch": 0.55139146567718, "grad_norm": 8.740238189697266, "learning_rate": 4.266711618378211e-06, "loss": 0.3325, "step": 11888 }, { "epoch": 0.5514378478664193, "grad_norm": 5.657632350921631, "learning_rate": 4.265983641041721e-06, "loss": 0.3583, "step": 11889 }, { "epoch": 0.5514842300556586, "grad_norm": 8.07823657989502, "learning_rate": 4.265255679607309e-06, "loss": 0.3879, "step": 11890 }, { "epoch": 0.551530612244898, "grad_norm": 4.751091480255127, "learning_rate": 4.264527734090746e-06, "loss": 0.2629, "step": 11891 }, { "epoch": 0.5515769944341373, "grad_norm": 7.557496070861816, "learning_rate": 4.2637998045078025e-06, "loss": 0.441, "step": 11892 }, { "epoch": 0.5516233766233766, "grad_norm": 7.838526725769043, "learning_rate": 4.263071890874247e-06, "loss": 0.4798, "step": 11893 }, { "epoch": 0.5516697588126159, "grad_norm": 8.798323631286621, "learning_rate": 4.262343993205849e-06, "loss": 0.4471, "step": 11894 }, { "epoch": 0.5517161410018553, "grad_norm": 7.7939982414245605, "learning_rate": 4.26161611151838e-06, "loss": 0.2973, "step": 11895 }, { "epoch": 0.5517625231910946, "grad_norm": 7.747059345245361, "learning_rate": 4.260888245827608e-06, "loss": 0.3274, "step": 11896 }, { "epoch": 0.551808905380334, "grad_norm": 6.809452056884766, "learning_rate": 4.260160396149305e-06, "loss": 0.3047, "step": 11897 }, { "epoch": 0.5518552875695732, "grad_norm": 12.875360488891602, "learning_rate": 4.259432562499235e-06, "loss": 0.4007, "step": 11898 }, { "epoch": 0.5519016697588126, "grad_norm": 9.587135314941406, "learning_rate": 4.258704744893167e-06, "loss": 0.3237, "step": 11899 }, { "epoch": 0.551948051948052, "grad_norm": 4.4648051261901855, "learning_rate": 4.25797694334687e-06, "loss": 0.3395, "step": 11900 }, { "epoch": 0.5519944341372913, "grad_norm": 7.253445148468018, "learning_rate": 4.257249157876112e-06, "loss": 0.303, "step": 11901 }, { "epoch": 0.5520408163265306, "grad_norm": 4.692515850067139, "learning_rate": 4.256521388496658e-06, "loss": 0.2498, "step": 11902 }, { "epoch": 0.5520871985157699, "grad_norm": 6.455620288848877, "learning_rate": 4.255793635224278e-06, "loss": 0.3173, "step": 11903 }, { "epoch": 0.5521335807050093, "grad_norm": 6.312569618225098, "learning_rate": 4.255065898074735e-06, "loss": 0.2279, "step": 11904 }, { "epoch": 0.5521799628942486, "grad_norm": 7.701912879943848, "learning_rate": 4.254338177063797e-06, "loss": 0.2853, "step": 11905 }, { "epoch": 0.5522263450834879, "grad_norm": 8.497344970703125, "learning_rate": 4.253610472207228e-06, "loss": 0.3728, "step": 11906 }, { "epoch": 0.5522727272727272, "grad_norm": 6.018688201904297, "learning_rate": 4.252882783520795e-06, "loss": 0.395, "step": 11907 }, { "epoch": 0.5523191094619666, "grad_norm": 5.834787368774414, "learning_rate": 4.2521551110202644e-06, "loss": 0.3512, "step": 11908 }, { "epoch": 0.552365491651206, "grad_norm": 5.8611321449279785, "learning_rate": 4.251427454721396e-06, "loss": 0.2569, "step": 11909 }, { "epoch": 0.5524118738404453, "grad_norm": 7.187432289123535, "learning_rate": 4.250699814639958e-06, "loss": 0.2866, "step": 11910 }, { "epoch": 0.5524582560296846, "grad_norm": 5.021469593048096, "learning_rate": 4.249972190791713e-06, "loss": 0.2642, "step": 11911 }, { "epoch": 0.5525046382189239, "grad_norm": 10.1504545211792, "learning_rate": 4.249244583192425e-06, "loss": 0.4159, "step": 11912 }, { "epoch": 0.5525510204081633, "grad_norm": 6.256620407104492, "learning_rate": 4.248516991857857e-06, "loss": 0.2508, "step": 11913 }, { "epoch": 0.5525974025974026, "grad_norm": 6.71640682220459, "learning_rate": 4.247789416803774e-06, "loss": 0.3445, "step": 11914 }, { "epoch": 0.5526437847866419, "grad_norm": 7.539754867553711, "learning_rate": 4.2470618580459345e-06, "loss": 0.2996, "step": 11915 }, { "epoch": 0.5526901669758812, "grad_norm": 8.556070327758789, "learning_rate": 4.246334315600102e-06, "loss": 0.3996, "step": 11916 }, { "epoch": 0.5527365491651206, "grad_norm": 5.862459659576416, "learning_rate": 4.245606789482041e-06, "loss": 0.3137, "step": 11917 }, { "epoch": 0.55278293135436, "grad_norm": 5.585517883300781, "learning_rate": 4.244879279707509e-06, "loss": 0.2581, "step": 11918 }, { "epoch": 0.5528293135435992, "grad_norm": 10.720494270324707, "learning_rate": 4.244151786292272e-06, "loss": 0.4275, "step": 11919 }, { "epoch": 0.5528756957328386, "grad_norm": 7.2190375328063965, "learning_rate": 4.243424309252086e-06, "loss": 0.4096, "step": 11920 }, { "epoch": 0.5529220779220779, "grad_norm": 10.738628387451172, "learning_rate": 4.2426968486027135e-06, "loss": 0.4362, "step": 11921 }, { "epoch": 0.5529684601113173, "grad_norm": 9.905537605285645, "learning_rate": 4.241969404359913e-06, "loss": 0.3758, "step": 11922 }, { "epoch": 0.5530148423005566, "grad_norm": 4.992735385894775, "learning_rate": 4.241241976539447e-06, "loss": 0.2888, "step": 11923 }, { "epoch": 0.5530612244897959, "grad_norm": 5.760536193847656, "learning_rate": 4.240514565157073e-06, "loss": 0.269, "step": 11924 }, { "epoch": 0.5531076066790352, "grad_norm": 4.978508949279785, "learning_rate": 4.239787170228552e-06, "loss": 0.3389, "step": 11925 }, { "epoch": 0.5531539888682746, "grad_norm": 16.174325942993164, "learning_rate": 4.239059791769639e-06, "loss": 0.5534, "step": 11926 }, { "epoch": 0.553200371057514, "grad_norm": 4.083807945251465, "learning_rate": 4.2383324297960944e-06, "loss": 0.3244, "step": 11927 }, { "epoch": 0.5532467532467532, "grad_norm": 9.59633731842041, "learning_rate": 4.237605084323676e-06, "loss": 0.4286, "step": 11928 }, { "epoch": 0.5532931354359926, "grad_norm": 5.229447364807129, "learning_rate": 4.236877755368142e-06, "loss": 0.2858, "step": 11929 }, { "epoch": 0.5533395176252319, "grad_norm": 4.726528167724609, "learning_rate": 4.23615044294525e-06, "loss": 0.3447, "step": 11930 }, { "epoch": 0.5533858998144713, "grad_norm": 5.54998254776001, "learning_rate": 4.235423147070754e-06, "loss": 0.3203, "step": 11931 }, { "epoch": 0.5534322820037105, "grad_norm": 8.817134857177734, "learning_rate": 4.234695867760412e-06, "loss": 0.5042, "step": 11932 }, { "epoch": 0.5534786641929499, "grad_norm": 11.547518730163574, "learning_rate": 4.233968605029981e-06, "loss": 0.3296, "step": 11933 }, { "epoch": 0.5535250463821892, "grad_norm": 5.563616752624512, "learning_rate": 4.2332413588952154e-06, "loss": 0.3328, "step": 11934 }, { "epoch": 0.5535714285714286, "grad_norm": 5.898846626281738, "learning_rate": 4.232514129371874e-06, "loss": 0.3785, "step": 11935 }, { "epoch": 0.5536178107606679, "grad_norm": 15.05384349822998, "learning_rate": 4.231786916475707e-06, "loss": 0.3981, "step": 11936 }, { "epoch": 0.5536641929499072, "grad_norm": 4.49844217300415, "learning_rate": 4.231059720222471e-06, "loss": 0.3161, "step": 11937 }, { "epoch": 0.5537105751391466, "grad_norm": 5.200204849243164, "learning_rate": 4.230332540627921e-06, "loss": 0.3098, "step": 11938 }, { "epoch": 0.5537569573283859, "grad_norm": 4.298946380615234, "learning_rate": 4.22960537770781e-06, "loss": 0.3447, "step": 11939 }, { "epoch": 0.5538033395176253, "grad_norm": 6.98234748840332, "learning_rate": 4.228878231477892e-06, "loss": 0.3517, "step": 11940 }, { "epoch": 0.5538497217068645, "grad_norm": 9.666154861450195, "learning_rate": 4.228151101953923e-06, "loss": 0.4357, "step": 11941 }, { "epoch": 0.5538961038961039, "grad_norm": 7.05631160736084, "learning_rate": 4.227423989151651e-06, "loss": 0.4465, "step": 11942 }, { "epoch": 0.5539424860853432, "grad_norm": 6.446477890014648, "learning_rate": 4.226696893086831e-06, "loss": 0.3277, "step": 11943 }, { "epoch": 0.5539888682745826, "grad_norm": 6.718061447143555, "learning_rate": 4.225969813775215e-06, "loss": 0.3271, "step": 11944 }, { "epoch": 0.5540352504638218, "grad_norm": 4.337401866912842, "learning_rate": 4.225242751232554e-06, "loss": 0.3422, "step": 11945 }, { "epoch": 0.5540816326530612, "grad_norm": 4.657951831817627, "learning_rate": 4.224515705474603e-06, "loss": 0.2181, "step": 11946 }, { "epoch": 0.5541280148423006, "grad_norm": 5.9993062019348145, "learning_rate": 4.223788676517108e-06, "loss": 0.3829, "step": 11947 }, { "epoch": 0.5541743970315399, "grad_norm": 6.852616786956787, "learning_rate": 4.223061664375822e-06, "loss": 0.451, "step": 11948 }, { "epoch": 0.5542207792207792, "grad_norm": 7.497274875640869, "learning_rate": 4.222334669066495e-06, "loss": 0.2101, "step": 11949 }, { "epoch": 0.5542671614100185, "grad_norm": 6.516932010650635, "learning_rate": 4.221607690604877e-06, "loss": 0.3148, "step": 11950 }, { "epoch": 0.5543135435992579, "grad_norm": 8.390247344970703, "learning_rate": 4.220880729006718e-06, "loss": 0.3326, "step": 11951 }, { "epoch": 0.5543599257884972, "grad_norm": 9.563157081604004, "learning_rate": 4.220153784287769e-06, "loss": 0.3307, "step": 11952 }, { "epoch": 0.5544063079777366, "grad_norm": 4.923946857452393, "learning_rate": 4.219426856463776e-06, "loss": 0.2835, "step": 11953 }, { "epoch": 0.5544526901669758, "grad_norm": 9.217437744140625, "learning_rate": 4.218699945550488e-06, "loss": 0.325, "step": 11954 }, { "epoch": 0.5544990723562152, "grad_norm": 7.01704740524292, "learning_rate": 4.217973051563653e-06, "loss": 0.2655, "step": 11955 }, { "epoch": 0.5545454545454546, "grad_norm": 3.196377992630005, "learning_rate": 4.2172461745190195e-06, "loss": 0.2924, "step": 11956 }, { "epoch": 0.5545918367346939, "grad_norm": 3.9068517684936523, "learning_rate": 4.216519314432336e-06, "loss": 0.2978, "step": 11957 }, { "epoch": 0.5546382189239332, "grad_norm": 7.532114028930664, "learning_rate": 4.215792471319347e-06, "loss": 0.2966, "step": 11958 }, { "epoch": 0.5546846011131725, "grad_norm": 12.524422645568848, "learning_rate": 4.215065645195801e-06, "loss": 0.451, "step": 11959 }, { "epoch": 0.5547309833024119, "grad_norm": 8.938982963562012, "learning_rate": 4.214338836077444e-06, "loss": 0.3624, "step": 11960 }, { "epoch": 0.5547773654916512, "grad_norm": 7.667806148529053, "learning_rate": 4.21361204398002e-06, "loss": 0.3111, "step": 11961 }, { "epoch": 0.5548237476808905, "grad_norm": 6.498837471008301, "learning_rate": 4.2128852689192775e-06, "loss": 0.3707, "step": 11962 }, { "epoch": 0.5548701298701298, "grad_norm": 5.451961040496826, "learning_rate": 4.2121585109109625e-06, "loss": 0.2183, "step": 11963 }, { "epoch": 0.5549165120593692, "grad_norm": 18.17027473449707, "learning_rate": 4.211431769970815e-06, "loss": 0.411, "step": 11964 }, { "epoch": 0.5549628942486086, "grad_norm": 9.266127586364746, "learning_rate": 4.210705046114584e-06, "loss": 0.3745, "step": 11965 }, { "epoch": 0.5550092764378479, "grad_norm": 6.131808280944824, "learning_rate": 4.209978339358011e-06, "loss": 0.3236, "step": 11966 }, { "epoch": 0.5550556586270872, "grad_norm": 5.693836212158203, "learning_rate": 4.20925164971684e-06, "loss": 0.3805, "step": 11967 }, { "epoch": 0.5551020408163265, "grad_norm": 5.728332996368408, "learning_rate": 4.208524977206817e-06, "loss": 0.3121, "step": 11968 }, { "epoch": 0.5551484230055659, "grad_norm": 4.8781538009643555, "learning_rate": 4.207798321843681e-06, "loss": 0.3027, "step": 11969 }, { "epoch": 0.5551948051948052, "grad_norm": 6.507880210876465, "learning_rate": 4.207071683643177e-06, "loss": 0.2116, "step": 11970 }, { "epoch": 0.5552411873840445, "grad_norm": 5.718632698059082, "learning_rate": 4.206345062621046e-06, "loss": 0.3697, "step": 11971 }, { "epoch": 0.5552875695732838, "grad_norm": 4.894662857055664, "learning_rate": 4.205618458793032e-06, "loss": 0.3057, "step": 11972 }, { "epoch": 0.5553339517625232, "grad_norm": 10.723535537719727, "learning_rate": 4.204891872174875e-06, "loss": 0.3743, "step": 11973 }, { "epoch": 0.5553803339517626, "grad_norm": 4.810549259185791, "learning_rate": 4.204165302782317e-06, "loss": 0.3572, "step": 11974 }, { "epoch": 0.5554267161410018, "grad_norm": 9.341582298278809, "learning_rate": 4.203438750631096e-06, "loss": 0.3396, "step": 11975 }, { "epoch": 0.5554730983302412, "grad_norm": 7.033518314361572, "learning_rate": 4.202712215736955e-06, "loss": 0.3421, "step": 11976 }, { "epoch": 0.5555194805194805, "grad_norm": 9.438925743103027, "learning_rate": 4.201985698115633e-06, "loss": 0.3578, "step": 11977 }, { "epoch": 0.5555658627087199, "grad_norm": 5.643624305725098, "learning_rate": 4.201259197782871e-06, "loss": 0.3976, "step": 11978 }, { "epoch": 0.5556122448979591, "grad_norm": 9.932184219360352, "learning_rate": 4.200532714754407e-06, "loss": 0.3377, "step": 11979 }, { "epoch": 0.5556586270871985, "grad_norm": 9.318997383117676, "learning_rate": 4.199806249045979e-06, "loss": 0.4354, "step": 11980 }, { "epoch": 0.5557050092764378, "grad_norm": 4.920464038848877, "learning_rate": 4.1990798006733266e-06, "loss": 0.2681, "step": 11981 }, { "epoch": 0.5557513914656772, "grad_norm": 4.198134899139404, "learning_rate": 4.198353369652188e-06, "loss": 0.3928, "step": 11982 }, { "epoch": 0.5557977736549166, "grad_norm": 8.318805694580078, "learning_rate": 4.197626955998303e-06, "loss": 0.4622, "step": 11983 }, { "epoch": 0.5558441558441558, "grad_norm": 6.471461772918701, "learning_rate": 4.1969005597274035e-06, "loss": 0.3073, "step": 11984 }, { "epoch": 0.5558905380333952, "grad_norm": 8.803491592407227, "learning_rate": 4.196174180855233e-06, "loss": 0.3654, "step": 11985 }, { "epoch": 0.5559369202226345, "grad_norm": 7.114102363586426, "learning_rate": 4.195447819397522e-06, "loss": 0.3693, "step": 11986 }, { "epoch": 0.5559833024118739, "grad_norm": 13.604921340942383, "learning_rate": 4.19472147537001e-06, "loss": 0.2296, "step": 11987 }, { "epoch": 0.5560296846011131, "grad_norm": 4.40607213973999, "learning_rate": 4.193995148788435e-06, "loss": 0.3574, "step": 11988 }, { "epoch": 0.5560760667903525, "grad_norm": 7.4951910972595215, "learning_rate": 4.193268839668528e-06, "loss": 0.3543, "step": 11989 }, { "epoch": 0.5561224489795918, "grad_norm": 9.19835090637207, "learning_rate": 4.192542548026027e-06, "loss": 0.3705, "step": 11990 }, { "epoch": 0.5561688311688312, "grad_norm": 8.952263832092285, "learning_rate": 4.191816273876665e-06, "loss": 0.3262, "step": 11991 }, { "epoch": 0.5562152133580704, "grad_norm": 8.395135879516602, "learning_rate": 4.191090017236177e-06, "loss": 0.3941, "step": 11992 }, { "epoch": 0.5562615955473098, "grad_norm": 8.11231517791748, "learning_rate": 4.1903637781202976e-06, "loss": 0.2838, "step": 11993 }, { "epoch": 0.5563079777365492, "grad_norm": 6.2490434646606445, "learning_rate": 4.18963755654476e-06, "loss": 0.3824, "step": 11994 }, { "epoch": 0.5563543599257885, "grad_norm": 7.387030124664307, "learning_rate": 4.188911352525296e-06, "loss": 0.3665, "step": 11995 }, { "epoch": 0.5564007421150279, "grad_norm": 5.481512546539307, "learning_rate": 4.188185166077642e-06, "loss": 0.3898, "step": 11996 }, { "epoch": 0.5564471243042671, "grad_norm": 11.743456840515137, "learning_rate": 4.187458997217527e-06, "loss": 0.3688, "step": 11997 }, { "epoch": 0.5564935064935065, "grad_norm": 7.947423934936523, "learning_rate": 4.186732845960685e-06, "loss": 0.3624, "step": 11998 }, { "epoch": 0.5565398886827458, "grad_norm": 4.399300575256348, "learning_rate": 4.186006712322846e-06, "loss": 0.2487, "step": 11999 }, { "epoch": 0.5565862708719852, "grad_norm": 6.475158214569092, "learning_rate": 4.185280596319742e-06, "loss": 0.4536, "step": 12000 }, { "epoch": 0.5566326530612244, "grad_norm": 7.363617420196533, "learning_rate": 4.184554497967105e-06, "loss": 0.3336, "step": 12001 }, { "epoch": 0.5566790352504638, "grad_norm": 7.7656755447387695, "learning_rate": 4.183828417280664e-06, "loss": 0.2896, "step": 12002 }, { "epoch": 0.5567254174397032, "grad_norm": 10.827869415283203, "learning_rate": 4.183102354276149e-06, "loss": 0.5066, "step": 12003 }, { "epoch": 0.5567717996289425, "grad_norm": 5.611002445220947, "learning_rate": 4.182376308969293e-06, "loss": 0.4148, "step": 12004 }, { "epoch": 0.5568181818181818, "grad_norm": 4.073221683502197, "learning_rate": 4.181650281375822e-06, "loss": 0.2792, "step": 12005 }, { "epoch": 0.5568645640074211, "grad_norm": 6.616870880126953, "learning_rate": 4.180924271511465e-06, "loss": 0.3759, "step": 12006 }, { "epoch": 0.5569109461966605, "grad_norm": 8.928037643432617, "learning_rate": 4.180198279391953e-06, "loss": 0.3364, "step": 12007 }, { "epoch": 0.5569573283858998, "grad_norm": 7.043945789337158, "learning_rate": 4.1794723050330125e-06, "loss": 0.2801, "step": 12008 }, { "epoch": 0.5570037105751392, "grad_norm": 9.491706848144531, "learning_rate": 4.178746348450372e-06, "loss": 0.4314, "step": 12009 }, { "epoch": 0.5570500927643784, "grad_norm": 5.312502384185791, "learning_rate": 4.178020409659758e-06, "loss": 0.3595, "step": 12010 }, { "epoch": 0.5570964749536178, "grad_norm": 7.502797603607178, "learning_rate": 4.177294488676898e-06, "loss": 0.2534, "step": 12011 }, { "epoch": 0.5571428571428572, "grad_norm": 7.968080997467041, "learning_rate": 4.17656858551752e-06, "loss": 0.4905, "step": 12012 }, { "epoch": 0.5571892393320965, "grad_norm": 4.0309858322143555, "learning_rate": 4.175842700197349e-06, "loss": 0.3566, "step": 12013 }, { "epoch": 0.5572356215213358, "grad_norm": 9.49499797821045, "learning_rate": 4.175116832732111e-06, "loss": 0.4122, "step": 12014 }, { "epoch": 0.5572820037105751, "grad_norm": 6.351633071899414, "learning_rate": 4.174390983137532e-06, "loss": 0.3961, "step": 12015 }, { "epoch": 0.5573283858998145, "grad_norm": 9.201141357421875, "learning_rate": 4.173665151429336e-06, "loss": 0.2884, "step": 12016 }, { "epoch": 0.5573747680890538, "grad_norm": 4.936942100524902, "learning_rate": 4.17293933762325e-06, "loss": 0.2743, "step": 12017 }, { "epoch": 0.5574211502782931, "grad_norm": 6.815789699554443, "learning_rate": 4.172213541734996e-06, "loss": 0.4065, "step": 12018 }, { "epoch": 0.5574675324675324, "grad_norm": 6.312410354614258, "learning_rate": 4.171487763780301e-06, "loss": 0.3727, "step": 12019 }, { "epoch": 0.5575139146567718, "grad_norm": 7.136153697967529, "learning_rate": 4.170762003774884e-06, "loss": 0.3889, "step": 12020 }, { "epoch": 0.5575602968460112, "grad_norm": 11.96309757232666, "learning_rate": 4.170036261734471e-06, "loss": 0.362, "step": 12021 }, { "epoch": 0.5576066790352505, "grad_norm": 7.6693501472473145, "learning_rate": 4.169310537674785e-06, "loss": 0.2584, "step": 12022 }, { "epoch": 0.5576530612244898, "grad_norm": 11.480171203613281, "learning_rate": 4.168584831611549e-06, "loss": 0.3238, "step": 12023 }, { "epoch": 0.5576994434137291, "grad_norm": 8.825337409973145, "learning_rate": 4.167859143560484e-06, "loss": 0.314, "step": 12024 }, { "epoch": 0.5577458256029685, "grad_norm": 5.519758224487305, "learning_rate": 4.167133473537311e-06, "loss": 0.2068, "step": 12025 }, { "epoch": 0.5577922077922078, "grad_norm": 6.7660231590271, "learning_rate": 4.166407821557752e-06, "loss": 0.3248, "step": 12026 }, { "epoch": 0.5578385899814471, "grad_norm": 6.348531723022461, "learning_rate": 4.1656821876375275e-06, "loss": 0.3057, "step": 12027 }, { "epoch": 0.5578849721706864, "grad_norm": 6.702381610870361, "learning_rate": 4.16495657179236e-06, "loss": 0.3535, "step": 12028 }, { "epoch": 0.5579313543599258, "grad_norm": 10.919323921203613, "learning_rate": 4.164230974037968e-06, "loss": 0.5479, "step": 12029 }, { "epoch": 0.5579777365491652, "grad_norm": 9.80453872680664, "learning_rate": 4.16350539439007e-06, "loss": 0.3191, "step": 12030 }, { "epoch": 0.5580241187384044, "grad_norm": 5.600841999053955, "learning_rate": 4.162779832864385e-06, "loss": 0.3103, "step": 12031 }, { "epoch": 0.5580705009276438, "grad_norm": 8.700071334838867, "learning_rate": 4.162054289476635e-06, "loss": 0.2664, "step": 12032 }, { "epoch": 0.5581168831168831, "grad_norm": 8.59233283996582, "learning_rate": 4.161328764242536e-06, "loss": 0.3488, "step": 12033 }, { "epoch": 0.5581632653061225, "grad_norm": 8.578848838806152, "learning_rate": 4.16060325717781e-06, "loss": 0.4001, "step": 12034 }, { "epoch": 0.5582096474953617, "grad_norm": 5.921712398529053, "learning_rate": 4.159877768298169e-06, "loss": 0.3487, "step": 12035 }, { "epoch": 0.5582560296846011, "grad_norm": 9.765987396240234, "learning_rate": 4.159152297619332e-06, "loss": 0.4643, "step": 12036 }, { "epoch": 0.5583024118738404, "grad_norm": 11.38457202911377, "learning_rate": 4.1584268451570185e-06, "loss": 0.4824, "step": 12037 }, { "epoch": 0.5583487940630798, "grad_norm": 5.144696235656738, "learning_rate": 4.157701410926943e-06, "loss": 0.2773, "step": 12038 }, { "epoch": 0.5583951762523192, "grad_norm": 7.079259395599365, "learning_rate": 4.156975994944824e-06, "loss": 0.3815, "step": 12039 }, { "epoch": 0.5584415584415584, "grad_norm": 11.843189239501953, "learning_rate": 4.1562505972263735e-06, "loss": 0.4619, "step": 12040 }, { "epoch": 0.5584879406307978, "grad_norm": 11.35352897644043, "learning_rate": 4.155525217787309e-06, "loss": 0.4067, "step": 12041 }, { "epoch": 0.5585343228200371, "grad_norm": 12.084360122680664, "learning_rate": 4.154799856643345e-06, "loss": 0.3752, "step": 12042 }, { "epoch": 0.5585807050092765, "grad_norm": 6.191104888916016, "learning_rate": 4.154074513810197e-06, "loss": 0.2721, "step": 12043 }, { "epoch": 0.5586270871985157, "grad_norm": 5.0677666664123535, "learning_rate": 4.153349189303577e-06, "loss": 0.3197, "step": 12044 }, { "epoch": 0.5586734693877551, "grad_norm": 9.364431381225586, "learning_rate": 4.152623883139203e-06, "loss": 0.3158, "step": 12045 }, { "epoch": 0.5587198515769944, "grad_norm": 5.41422700881958, "learning_rate": 4.151898595332784e-06, "loss": 0.2435, "step": 12046 }, { "epoch": 0.5587662337662338, "grad_norm": 8.51885986328125, "learning_rate": 4.151173325900035e-06, "loss": 0.3928, "step": 12047 }, { "epoch": 0.558812615955473, "grad_norm": 5.780728816986084, "learning_rate": 4.150448074856667e-06, "loss": 0.35, "step": 12048 }, { "epoch": 0.5588589981447124, "grad_norm": 5.094027042388916, "learning_rate": 4.149722842218395e-06, "loss": 0.3522, "step": 12049 }, { "epoch": 0.5589053803339518, "grad_norm": 6.460902214050293, "learning_rate": 4.148997628000929e-06, "loss": 0.3521, "step": 12050 }, { "epoch": 0.5589517625231911, "grad_norm": 7.148072242736816, "learning_rate": 4.14827243221998e-06, "loss": 0.35, "step": 12051 }, { "epoch": 0.5589981447124305, "grad_norm": 6.544924259185791, "learning_rate": 4.1475472548912584e-06, "loss": 0.4096, "step": 12052 }, { "epoch": 0.5590445269016697, "grad_norm": 12.640796661376953, "learning_rate": 4.1468220960304775e-06, "loss": 0.2696, "step": 12053 }, { "epoch": 0.5590909090909091, "grad_norm": 6.149261474609375, "learning_rate": 4.1460969556533445e-06, "loss": 0.418, "step": 12054 }, { "epoch": 0.5591372912801484, "grad_norm": 6.565664768218994, "learning_rate": 4.145371833775571e-06, "loss": 0.3209, "step": 12055 }, { "epoch": 0.5591836734693878, "grad_norm": 4.821878433227539, "learning_rate": 4.144646730412868e-06, "loss": 0.3513, "step": 12056 }, { "epoch": 0.559230055658627, "grad_norm": 4.313647747039795, "learning_rate": 4.143921645580941e-06, "loss": 0.3722, "step": 12057 }, { "epoch": 0.5592764378478664, "grad_norm": 5.696144104003906, "learning_rate": 4.143196579295499e-06, "loss": 0.3179, "step": 12058 }, { "epoch": 0.5593228200371058, "grad_norm": 9.648616790771484, "learning_rate": 4.142471531572252e-06, "loss": 0.373, "step": 12059 }, { "epoch": 0.5593692022263451, "grad_norm": 9.35014820098877, "learning_rate": 4.141746502426906e-06, "loss": 0.3843, "step": 12060 }, { "epoch": 0.5594155844155844, "grad_norm": 5.640340805053711, "learning_rate": 4.141021491875172e-06, "loss": 0.2484, "step": 12061 }, { "epoch": 0.5594619666048237, "grad_norm": 9.974699974060059, "learning_rate": 4.140296499932753e-06, "loss": 0.3424, "step": 12062 }, { "epoch": 0.5595083487940631, "grad_norm": 6.931221961975098, "learning_rate": 4.1395715266153554e-06, "loss": 0.4003, "step": 12063 }, { "epoch": 0.5595547309833024, "grad_norm": 7.431144714355469, "learning_rate": 4.138846571938688e-06, "loss": 0.3057, "step": 12064 }, { "epoch": 0.5596011131725418, "grad_norm": 6.393836498260498, "learning_rate": 4.138121635918455e-06, "loss": 0.3742, "step": 12065 }, { "epoch": 0.559647495361781, "grad_norm": 7.819530963897705, "learning_rate": 4.137396718570362e-06, "loss": 0.3129, "step": 12066 }, { "epoch": 0.5596938775510204, "grad_norm": 7.800872802734375, "learning_rate": 4.136671819910117e-06, "loss": 0.2579, "step": 12067 }, { "epoch": 0.5597402597402598, "grad_norm": 4.832711696624756, "learning_rate": 4.135946939953418e-06, "loss": 0.3093, "step": 12068 }, { "epoch": 0.5597866419294991, "grad_norm": 10.005717277526855, "learning_rate": 4.1352220787159746e-06, "loss": 0.4072, "step": 12069 }, { "epoch": 0.5598330241187384, "grad_norm": 8.154256820678711, "learning_rate": 4.1344972362134876e-06, "loss": 0.3348, "step": 12070 }, { "epoch": 0.5598794063079777, "grad_norm": 5.412471771240234, "learning_rate": 4.133772412461662e-06, "loss": 0.1782, "step": 12071 }, { "epoch": 0.5599257884972171, "grad_norm": 3.6076431274414062, "learning_rate": 4.133047607476202e-06, "loss": 0.2742, "step": 12072 }, { "epoch": 0.5599721706864564, "grad_norm": 8.060385704040527, "learning_rate": 4.132322821272806e-06, "loss": 0.4468, "step": 12073 }, { "epoch": 0.5600185528756957, "grad_norm": 5.105400562286377, "learning_rate": 4.131598053867179e-06, "loss": 0.3077, "step": 12074 }, { "epoch": 0.560064935064935, "grad_norm": 9.848172187805176, "learning_rate": 4.130873305275022e-06, "loss": 0.4374, "step": 12075 }, { "epoch": 0.5601113172541744, "grad_norm": 7.740090370178223, "learning_rate": 4.130148575512037e-06, "loss": 0.3998, "step": 12076 }, { "epoch": 0.5601576994434138, "grad_norm": 5.286024570465088, "learning_rate": 4.1294238645939225e-06, "loss": 0.3566, "step": 12077 }, { "epoch": 0.560204081632653, "grad_norm": 4.138004779815674, "learning_rate": 4.128699172536384e-06, "loss": 0.3004, "step": 12078 }, { "epoch": 0.5602504638218924, "grad_norm": 6.773299694061279, "learning_rate": 4.127974499355116e-06, "loss": 0.367, "step": 12079 }, { "epoch": 0.5602968460111317, "grad_norm": 7.8208465576171875, "learning_rate": 4.127249845065821e-06, "loss": 0.2799, "step": 12080 }, { "epoch": 0.5603432282003711, "grad_norm": 12.59213924407959, "learning_rate": 4.126525209684198e-06, "loss": 0.3835, "step": 12081 }, { "epoch": 0.5603896103896104, "grad_norm": 6.5995378494262695, "learning_rate": 4.125800593225945e-06, "loss": 0.3641, "step": 12082 }, { "epoch": 0.5604359925788497, "grad_norm": 13.786754608154297, "learning_rate": 4.125075995706763e-06, "loss": 0.3295, "step": 12083 }, { "epoch": 0.560482374768089, "grad_norm": 6.299685478210449, "learning_rate": 4.1243514171423465e-06, "loss": 0.3362, "step": 12084 }, { "epoch": 0.5605287569573284, "grad_norm": 7.6328654289245605, "learning_rate": 4.123626857548395e-06, "loss": 0.3758, "step": 12085 }, { "epoch": 0.5605751391465678, "grad_norm": 9.098918914794922, "learning_rate": 4.122902316940605e-06, "loss": 0.3416, "step": 12086 }, { "epoch": 0.560621521335807, "grad_norm": 6.532246112823486, "learning_rate": 4.122177795334673e-06, "loss": 0.3777, "step": 12087 }, { "epoch": 0.5606679035250464, "grad_norm": 5.576096534729004, "learning_rate": 4.121453292746297e-06, "loss": 0.2983, "step": 12088 }, { "epoch": 0.5607142857142857, "grad_norm": 10.813316345214844, "learning_rate": 4.120728809191174e-06, "loss": 0.4332, "step": 12089 }, { "epoch": 0.5607606679035251, "grad_norm": 4.233736515045166, "learning_rate": 4.120004344684995e-06, "loss": 0.2771, "step": 12090 }, { "epoch": 0.5608070500927643, "grad_norm": 8.51284122467041, "learning_rate": 4.119279899243457e-06, "loss": 0.4238, "step": 12091 }, { "epoch": 0.5608534322820037, "grad_norm": 5.354913234710693, "learning_rate": 4.118555472882257e-06, "loss": 0.2837, "step": 12092 }, { "epoch": 0.560899814471243, "grad_norm": 9.72423267364502, "learning_rate": 4.117831065617087e-06, "loss": 0.3519, "step": 12093 }, { "epoch": 0.5609461966604824, "grad_norm": 9.538215637207031, "learning_rate": 4.117106677463644e-06, "loss": 0.4165, "step": 12094 }, { "epoch": 0.5609925788497218, "grad_norm": 12.207806587219238, "learning_rate": 4.116382308437617e-06, "loss": 0.4333, "step": 12095 }, { "epoch": 0.561038961038961, "grad_norm": 8.143924713134766, "learning_rate": 4.115657958554701e-06, "loss": 0.4037, "step": 12096 }, { "epoch": 0.5610853432282004, "grad_norm": 6.8640594482421875, "learning_rate": 4.11493362783059e-06, "loss": 0.4058, "step": 12097 }, { "epoch": 0.5611317254174397, "grad_norm": 4.410050868988037, "learning_rate": 4.114209316280973e-06, "loss": 0.3108, "step": 12098 }, { "epoch": 0.5611781076066791, "grad_norm": 6.89107608795166, "learning_rate": 4.113485023921548e-06, "loss": 0.3332, "step": 12099 }, { "epoch": 0.5612244897959183, "grad_norm": 6.049981594085693, "learning_rate": 4.112760750768e-06, "loss": 0.2799, "step": 12100 }, { "epoch": 0.5612708719851577, "grad_norm": 11.447712898254395, "learning_rate": 4.1120364968360215e-06, "loss": 0.4029, "step": 12101 }, { "epoch": 0.561317254174397, "grad_norm": 5.2044677734375, "learning_rate": 4.111312262141304e-06, "loss": 0.2939, "step": 12102 }, { "epoch": 0.5613636363636364, "grad_norm": 8.962560653686523, "learning_rate": 4.110588046699538e-06, "loss": 0.2654, "step": 12103 }, { "epoch": 0.5614100185528756, "grad_norm": 7.751077651977539, "learning_rate": 4.109863850526413e-06, "loss": 0.4164, "step": 12104 }, { "epoch": 0.561456400742115, "grad_norm": 4.900480270385742, "learning_rate": 4.1091396736376206e-06, "loss": 0.3414, "step": 12105 }, { "epoch": 0.5615027829313544, "grad_norm": 6.363481044769287, "learning_rate": 4.1084155160488445e-06, "loss": 0.2717, "step": 12106 }, { "epoch": 0.5615491651205937, "grad_norm": 5.350171089172363, "learning_rate": 4.107691377775776e-06, "loss": 0.2508, "step": 12107 }, { "epoch": 0.5615955473098331, "grad_norm": 6.761512756347656, "learning_rate": 4.106967258834103e-06, "loss": 0.3873, "step": 12108 }, { "epoch": 0.5616419294990723, "grad_norm": 7.0951762199401855, "learning_rate": 4.106243159239514e-06, "loss": 0.4091, "step": 12109 }, { "epoch": 0.5616883116883117, "grad_norm": 6.904284954071045, "learning_rate": 4.105519079007698e-06, "loss": 0.3324, "step": 12110 }, { "epoch": 0.561734693877551, "grad_norm": 8.137848854064941, "learning_rate": 4.104795018154336e-06, "loss": 0.2937, "step": 12111 }, { "epoch": 0.5617810760667904, "grad_norm": 9.126643180847168, "learning_rate": 4.104070976695118e-06, "loss": 0.3579, "step": 12112 }, { "epoch": 0.5618274582560296, "grad_norm": 3.7877113819122314, "learning_rate": 4.1033469546457305e-06, "loss": 0.2755, "step": 12113 }, { "epoch": 0.561873840445269, "grad_norm": 10.13050651550293, "learning_rate": 4.102622952021857e-06, "loss": 0.3407, "step": 12114 }, { "epoch": 0.5619202226345084, "grad_norm": 14.50367259979248, "learning_rate": 4.101898968839185e-06, "loss": 0.4443, "step": 12115 }, { "epoch": 0.5619666048237477, "grad_norm": 5.892427444458008, "learning_rate": 4.1011750051134004e-06, "loss": 0.3509, "step": 12116 }, { "epoch": 0.562012987012987, "grad_norm": 8.640948295593262, "learning_rate": 4.100451060860183e-06, "loss": 0.2984, "step": 12117 }, { "epoch": 0.5620593692022263, "grad_norm": 4.313810348510742, "learning_rate": 4.0997271360952184e-06, "loss": 0.2871, "step": 12118 }, { "epoch": 0.5621057513914657, "grad_norm": 5.451752662658691, "learning_rate": 4.099003230834191e-06, "loss": 0.3237, "step": 12119 }, { "epoch": 0.562152133580705, "grad_norm": 7.816930294036865, "learning_rate": 4.098279345092783e-06, "loss": 0.4067, "step": 12120 }, { "epoch": 0.5621985157699444, "grad_norm": 8.776874542236328, "learning_rate": 4.09755547888668e-06, "loss": 0.3401, "step": 12121 }, { "epoch": 0.5622448979591836, "grad_norm": 10.2127046585083, "learning_rate": 4.096831632231559e-06, "loss": 0.3432, "step": 12122 }, { "epoch": 0.562291280148423, "grad_norm": 6.799379825592041, "learning_rate": 4.096107805143104e-06, "loss": 0.3366, "step": 12123 }, { "epoch": 0.5623376623376624, "grad_norm": 9.83154582977295, "learning_rate": 4.095383997636998e-06, "loss": 0.4682, "step": 12124 }, { "epoch": 0.5623840445269017, "grad_norm": 8.30795669555664, "learning_rate": 4.094660209728919e-06, "loss": 0.4086, "step": 12125 }, { "epoch": 0.562430426716141, "grad_norm": 4.408987998962402, "learning_rate": 4.093936441434551e-06, "loss": 0.3697, "step": 12126 }, { "epoch": 0.5624768089053803, "grad_norm": 12.800165176391602, "learning_rate": 4.093212692769572e-06, "loss": 0.2574, "step": 12127 }, { "epoch": 0.5625231910946197, "grad_norm": 5.025338172912598, "learning_rate": 4.092488963749661e-06, "loss": 0.2964, "step": 12128 }, { "epoch": 0.562569573283859, "grad_norm": 11.458560943603516, "learning_rate": 4.0917652543904965e-06, "loss": 0.3525, "step": 12129 }, { "epoch": 0.5626159554730983, "grad_norm": 5.832163333892822, "learning_rate": 4.09104156470776e-06, "loss": 0.3632, "step": 12130 }, { "epoch": 0.5626623376623376, "grad_norm": 11.24590015411377, "learning_rate": 4.09031789471713e-06, "loss": 0.4073, "step": 12131 }, { "epoch": 0.562708719851577, "grad_norm": 11.971159934997559, "learning_rate": 4.089594244434282e-06, "loss": 0.3548, "step": 12132 }, { "epoch": 0.5627551020408164, "grad_norm": 12.027998924255371, "learning_rate": 4.0888706138748935e-06, "loss": 0.2589, "step": 12133 }, { "epoch": 0.5628014842300556, "grad_norm": 23.888301849365234, "learning_rate": 4.088147003054642e-06, "loss": 0.5764, "step": 12134 }, { "epoch": 0.562847866419295, "grad_norm": 8.205949783325195, "learning_rate": 4.087423411989206e-06, "loss": 0.3268, "step": 12135 }, { "epoch": 0.5628942486085343, "grad_norm": 5.795368194580078, "learning_rate": 4.086699840694262e-06, "loss": 0.212, "step": 12136 }, { "epoch": 0.5629406307977737, "grad_norm": 7.3385210037231445, "learning_rate": 4.085976289185482e-06, "loss": 0.3719, "step": 12137 }, { "epoch": 0.562987012987013, "grad_norm": 7.377379894256592, "learning_rate": 4.085252757478545e-06, "loss": 0.3209, "step": 12138 }, { "epoch": 0.5630333951762523, "grad_norm": 6.591641426086426, "learning_rate": 4.084529245589123e-06, "loss": 0.4178, "step": 12139 }, { "epoch": 0.5630797773654916, "grad_norm": 8.927738189697266, "learning_rate": 4.083805753532892e-06, "loss": 0.1915, "step": 12140 }, { "epoch": 0.563126159554731, "grad_norm": 7.975833415985107, "learning_rate": 4.083082281325528e-06, "loss": 0.3117, "step": 12141 }, { "epoch": 0.5631725417439704, "grad_norm": 9.184391975402832, "learning_rate": 4.082358828982701e-06, "loss": 0.4573, "step": 12142 }, { "epoch": 0.5632189239332096, "grad_norm": 4.915102958679199, "learning_rate": 4.081635396520088e-06, "loss": 0.3913, "step": 12143 }, { "epoch": 0.563265306122449, "grad_norm": 6.2739176750183105, "learning_rate": 4.080911983953357e-06, "loss": 0.2942, "step": 12144 }, { "epoch": 0.5633116883116883, "grad_norm": 7.6368584632873535, "learning_rate": 4.0801885912981845e-06, "loss": 0.3557, "step": 12145 }, { "epoch": 0.5633580705009277, "grad_norm": 8.360283851623535, "learning_rate": 4.079465218570242e-06, "loss": 0.3429, "step": 12146 }, { "epoch": 0.5634044526901669, "grad_norm": 6.527413845062256, "learning_rate": 4.078741865785199e-06, "loss": 0.3652, "step": 12147 }, { "epoch": 0.5634508348794063, "grad_norm": 12.73056411743164, "learning_rate": 4.078018532958727e-06, "loss": 0.2881, "step": 12148 }, { "epoch": 0.5634972170686456, "grad_norm": 11.803243637084961, "learning_rate": 4.077295220106498e-06, "loss": 0.2785, "step": 12149 }, { "epoch": 0.563543599257885, "grad_norm": 6.1375250816345215, "learning_rate": 4.0765719272441805e-06, "loss": 0.3082, "step": 12150 }, { "epoch": 0.5635899814471244, "grad_norm": 7.45479679107666, "learning_rate": 4.075848654387446e-06, "loss": 0.2805, "step": 12151 }, { "epoch": 0.5636363636363636, "grad_norm": 8.602218627929688, "learning_rate": 4.075125401551963e-06, "loss": 0.399, "step": 12152 }, { "epoch": 0.563682745825603, "grad_norm": 8.302763938903809, "learning_rate": 4.0744021687534e-06, "loss": 0.3055, "step": 12153 }, { "epoch": 0.5637291280148423, "grad_norm": 13.224207878112793, "learning_rate": 4.073678956007427e-06, "loss": 0.4063, "step": 12154 }, { "epoch": 0.5637755102040817, "grad_norm": 5.5618181228637695, "learning_rate": 4.072955763329709e-06, "loss": 0.361, "step": 12155 }, { "epoch": 0.5638218923933209, "grad_norm": 11.359270095825195, "learning_rate": 4.072232590735917e-06, "loss": 0.4034, "step": 12156 }, { "epoch": 0.5638682745825603, "grad_norm": 7.003890037536621, "learning_rate": 4.071509438241715e-06, "loss": 0.3562, "step": 12157 }, { "epoch": 0.5639146567717996, "grad_norm": 6.8567352294921875, "learning_rate": 4.0707863058627715e-06, "loss": 0.3314, "step": 12158 }, { "epoch": 0.563961038961039, "grad_norm": 4.577086448669434, "learning_rate": 4.070063193614753e-06, "loss": 0.3416, "step": 12159 }, { "epoch": 0.5640074211502782, "grad_norm": 9.634232521057129, "learning_rate": 4.0693401015133255e-06, "loss": 0.4157, "step": 12160 }, { "epoch": 0.5640538033395176, "grad_norm": 8.927802085876465, "learning_rate": 4.068617029574154e-06, "loss": 0.4465, "step": 12161 }, { "epoch": 0.564100185528757, "grad_norm": 7.139503002166748, "learning_rate": 4.067893977812902e-06, "loss": 0.3672, "step": 12162 }, { "epoch": 0.5641465677179963, "grad_norm": 4.466521263122559, "learning_rate": 4.067170946245236e-06, "loss": 0.2161, "step": 12163 }, { "epoch": 0.5641929499072357, "grad_norm": 6.613531112670898, "learning_rate": 4.066447934886819e-06, "loss": 0.3826, "step": 12164 }, { "epoch": 0.5642393320964749, "grad_norm": 4.930033206939697, "learning_rate": 4.065724943753317e-06, "loss": 0.2387, "step": 12165 }, { "epoch": 0.5642857142857143, "grad_norm": 9.15380859375, "learning_rate": 4.06500197286039e-06, "loss": 0.3419, "step": 12166 }, { "epoch": 0.5643320964749536, "grad_norm": 5.355799674987793, "learning_rate": 4.064279022223701e-06, "loss": 0.4439, "step": 12167 }, { "epoch": 0.564378478664193, "grad_norm": 16.452987670898438, "learning_rate": 4.063556091858914e-06, "loss": 0.6179, "step": 12168 }, { "epoch": 0.5644248608534322, "grad_norm": 8.573139190673828, "learning_rate": 4.0628331817816906e-06, "loss": 0.3435, "step": 12169 }, { "epoch": 0.5644712430426716, "grad_norm": 6.772412300109863, "learning_rate": 4.062110292007691e-06, "loss": 0.3106, "step": 12170 }, { "epoch": 0.564517625231911, "grad_norm": 9.380501747131348, "learning_rate": 4.061387422552581e-06, "loss": 0.3365, "step": 12171 }, { "epoch": 0.5645640074211503, "grad_norm": 8.092621803283691, "learning_rate": 4.060664573432014e-06, "loss": 0.329, "step": 12172 }, { "epoch": 0.5646103896103896, "grad_norm": 7.542166233062744, "learning_rate": 4.059941744661654e-06, "loss": 0.3652, "step": 12173 }, { "epoch": 0.5646567717996289, "grad_norm": 11.80312442779541, "learning_rate": 4.05921893625716e-06, "loss": 0.2731, "step": 12174 }, { "epoch": 0.5647031539888683, "grad_norm": 7.014031887054443, "learning_rate": 4.058496148234192e-06, "loss": 0.3782, "step": 12175 }, { "epoch": 0.5647495361781076, "grad_norm": 5.786084175109863, "learning_rate": 4.057773380608411e-06, "loss": 0.3427, "step": 12176 }, { "epoch": 0.564795918367347, "grad_norm": 6.552250385284424, "learning_rate": 4.057050633395469e-06, "loss": 0.2614, "step": 12177 }, { "epoch": 0.5648423005565862, "grad_norm": 9.056147575378418, "learning_rate": 4.056327906611029e-06, "loss": 0.3712, "step": 12178 }, { "epoch": 0.5648886827458256, "grad_norm": 6.061500549316406, "learning_rate": 4.055605200270747e-06, "loss": 0.3793, "step": 12179 }, { "epoch": 0.564935064935065, "grad_norm": 7.969015598297119, "learning_rate": 4.0548825143902796e-06, "loss": 0.2505, "step": 12180 }, { "epoch": 0.5649814471243043, "grad_norm": 6.782576560974121, "learning_rate": 4.054159848985287e-06, "loss": 0.3459, "step": 12181 }, { "epoch": 0.5650278293135436, "grad_norm": 6.832304954528809, "learning_rate": 4.053437204071419e-06, "loss": 0.4192, "step": 12182 }, { "epoch": 0.5650742115027829, "grad_norm": 8.143269538879395, "learning_rate": 4.052714579664335e-06, "loss": 0.3804, "step": 12183 }, { "epoch": 0.5651205936920223, "grad_norm": 8.52076244354248, "learning_rate": 4.051991975779691e-06, "loss": 0.3824, "step": 12184 }, { "epoch": 0.5651669758812616, "grad_norm": 5.242172718048096, "learning_rate": 4.05126939243314e-06, "loss": 0.342, "step": 12185 }, { "epoch": 0.5652133580705009, "grad_norm": 5.245943546295166, "learning_rate": 4.050546829640337e-06, "loss": 0.308, "step": 12186 }, { "epoch": 0.5652597402597402, "grad_norm": 6.471330165863037, "learning_rate": 4.0498242874169396e-06, "loss": 0.2746, "step": 12187 }, { "epoch": 0.5653061224489796, "grad_norm": 4.50266695022583, "learning_rate": 4.049101765778595e-06, "loss": 0.3073, "step": 12188 }, { "epoch": 0.565352504638219, "grad_norm": 5.355275630950928, "learning_rate": 4.048379264740959e-06, "loss": 0.3364, "step": 12189 }, { "epoch": 0.5653988868274582, "grad_norm": 10.046815872192383, "learning_rate": 4.047656784319685e-06, "loss": 0.4034, "step": 12190 }, { "epoch": 0.5654452690166976, "grad_norm": 5.7776641845703125, "learning_rate": 4.046934324530423e-06, "loss": 0.3029, "step": 12191 }, { "epoch": 0.5654916512059369, "grad_norm": 6.446334362030029, "learning_rate": 4.046211885388829e-06, "loss": 0.3338, "step": 12192 }, { "epoch": 0.5655380333951763, "grad_norm": 4.870159149169922, "learning_rate": 4.04548946691055e-06, "loss": 0.3929, "step": 12193 }, { "epoch": 0.5655844155844156, "grad_norm": 6.95938777923584, "learning_rate": 4.044767069111237e-06, "loss": 0.2983, "step": 12194 }, { "epoch": 0.5656307977736549, "grad_norm": 4.422769546508789, "learning_rate": 4.044044692006542e-06, "loss": 0.2765, "step": 12195 }, { "epoch": 0.5656771799628942, "grad_norm": 8.111364364624023, "learning_rate": 4.043322335612115e-06, "loss": 0.3019, "step": 12196 }, { "epoch": 0.5657235621521336, "grad_norm": 8.014581680297852, "learning_rate": 4.042599999943605e-06, "loss": 0.2954, "step": 12197 }, { "epoch": 0.565769944341373, "grad_norm": 5.237637519836426, "learning_rate": 4.041877685016663e-06, "loss": 0.3021, "step": 12198 }, { "epoch": 0.5658163265306122, "grad_norm": 7.462674617767334, "learning_rate": 4.041155390846933e-06, "loss": 0.2857, "step": 12199 }, { "epoch": 0.5658627087198516, "grad_norm": 8.94225788116455, "learning_rate": 4.040433117450066e-06, "loss": 0.3891, "step": 12200 }, { "epoch": 0.5659090909090909, "grad_norm": 10.276843070983887, "learning_rate": 4.039710864841709e-06, "loss": 0.3132, "step": 12201 }, { "epoch": 0.5659554730983303, "grad_norm": 11.513096809387207, "learning_rate": 4.03898863303751e-06, "loss": 0.4053, "step": 12202 }, { "epoch": 0.5660018552875695, "grad_norm": 4.46525764465332, "learning_rate": 4.038266422053119e-06, "loss": 0.3264, "step": 12203 }, { "epoch": 0.5660482374768089, "grad_norm": 7.236816883087158, "learning_rate": 4.0375442319041746e-06, "loss": 0.3437, "step": 12204 }, { "epoch": 0.5660946196660482, "grad_norm": 5.600715637207031, "learning_rate": 4.036822062606327e-06, "loss": 0.2356, "step": 12205 }, { "epoch": 0.5661410018552876, "grad_norm": 9.728476524353027, "learning_rate": 4.036099914175222e-06, "loss": 0.4676, "step": 12206 }, { "epoch": 0.566187384044527, "grad_norm": 10.165969848632812, "learning_rate": 4.035377786626504e-06, "loss": 0.3716, "step": 12207 }, { "epoch": 0.5662337662337662, "grad_norm": 18.148193359375, "learning_rate": 4.034655679975817e-06, "loss": 0.437, "step": 12208 }, { "epoch": 0.5662801484230056, "grad_norm": 8.934198379516602, "learning_rate": 4.033933594238808e-06, "loss": 0.4292, "step": 12209 }, { "epoch": 0.5663265306122449, "grad_norm": 8.704218864440918, "learning_rate": 4.033211529431116e-06, "loss": 0.3507, "step": 12210 }, { "epoch": 0.5663729128014843, "grad_norm": 5.628757476806641, "learning_rate": 4.032489485568386e-06, "loss": 0.3109, "step": 12211 }, { "epoch": 0.5664192949907235, "grad_norm": 15.60460090637207, "learning_rate": 4.031767462666261e-06, "loss": 0.3449, "step": 12212 }, { "epoch": 0.5664656771799629, "grad_norm": 7.052353382110596, "learning_rate": 4.031045460740383e-06, "loss": 0.3257, "step": 12213 }, { "epoch": 0.5665120593692022, "grad_norm": 7.992983341217041, "learning_rate": 4.030323479806397e-06, "loss": 0.2235, "step": 12214 }, { "epoch": 0.5665584415584416, "grad_norm": 6.666418552398682, "learning_rate": 4.029601519879939e-06, "loss": 0.2717, "step": 12215 }, { "epoch": 0.5666048237476808, "grad_norm": 7.2321648597717285, "learning_rate": 4.0288795809766516e-06, "loss": 0.3202, "step": 12216 }, { "epoch": 0.5666512059369202, "grad_norm": 5.433565616607666, "learning_rate": 4.028157663112176e-06, "loss": 0.3753, "step": 12217 }, { "epoch": 0.5666975881261596, "grad_norm": 7.745369911193848, "learning_rate": 4.027435766302152e-06, "loss": 0.4407, "step": 12218 }, { "epoch": 0.5667439703153989, "grad_norm": 7.126113414764404, "learning_rate": 4.026713890562219e-06, "loss": 0.3769, "step": 12219 }, { "epoch": 0.5667903525046383, "grad_norm": 6.212181568145752, "learning_rate": 4.025992035908019e-06, "loss": 0.3452, "step": 12220 }, { "epoch": 0.5668367346938775, "grad_norm": 7.744356155395508, "learning_rate": 4.025270202355186e-06, "loss": 0.3565, "step": 12221 }, { "epoch": 0.5668831168831169, "grad_norm": 3.296438217163086, "learning_rate": 4.02454838991936e-06, "loss": 0.2741, "step": 12222 }, { "epoch": 0.5669294990723562, "grad_norm": 4.2140421867370605, "learning_rate": 4.023826598616177e-06, "loss": 0.3179, "step": 12223 }, { "epoch": 0.5669758812615956, "grad_norm": 10.766582489013672, "learning_rate": 4.023104828461277e-06, "loss": 0.4443, "step": 12224 }, { "epoch": 0.5670222634508348, "grad_norm": 7.687602519989014, "learning_rate": 4.022383079470298e-06, "loss": 0.3447, "step": 12225 }, { "epoch": 0.5670686456400742, "grad_norm": 17.31383514404297, "learning_rate": 4.021661351658871e-06, "loss": 0.4084, "step": 12226 }, { "epoch": 0.5671150278293136, "grad_norm": 8.893095970153809, "learning_rate": 4.020939645042636e-06, "loss": 0.4536, "step": 12227 }, { "epoch": 0.5671614100185529, "grad_norm": 5.2210283279418945, "learning_rate": 4.020217959637226e-06, "loss": 0.3061, "step": 12228 }, { "epoch": 0.5672077922077922, "grad_norm": 5.928301811218262, "learning_rate": 4.019496295458277e-06, "loss": 0.2593, "step": 12229 }, { "epoch": 0.5672541743970315, "grad_norm": 8.793688774108887, "learning_rate": 4.0187746525214245e-06, "loss": 0.4501, "step": 12230 }, { "epoch": 0.5673005565862709, "grad_norm": 7.211468696594238, "learning_rate": 4.018053030842304e-06, "loss": 0.407, "step": 12231 }, { "epoch": 0.5673469387755102, "grad_norm": 8.32942008972168, "learning_rate": 4.017331430436543e-06, "loss": 0.4837, "step": 12232 }, { "epoch": 0.5673933209647495, "grad_norm": 5.005883693695068, "learning_rate": 4.0166098513197796e-06, "loss": 0.2809, "step": 12233 }, { "epoch": 0.5674397031539888, "grad_norm": 7.65743350982666, "learning_rate": 4.015888293507645e-06, "loss": 0.4112, "step": 12234 }, { "epoch": 0.5674860853432282, "grad_norm": 6.970331192016602, "learning_rate": 4.015166757015771e-06, "loss": 0.2582, "step": 12235 }, { "epoch": 0.5675324675324676, "grad_norm": 9.816208839416504, "learning_rate": 4.014445241859792e-06, "loss": 0.3657, "step": 12236 }, { "epoch": 0.5675788497217069, "grad_norm": 4.671932220458984, "learning_rate": 4.0137237480553345e-06, "loss": 0.2492, "step": 12237 }, { "epoch": 0.5676252319109462, "grad_norm": 4.993814945220947, "learning_rate": 4.013002275618033e-06, "loss": 0.3261, "step": 12238 }, { "epoch": 0.5676716141001855, "grad_norm": 8.55656623840332, "learning_rate": 4.012280824563515e-06, "loss": 0.2821, "step": 12239 }, { "epoch": 0.5677179962894249, "grad_norm": 9.638242721557617, "learning_rate": 4.011559394907413e-06, "loss": 0.4541, "step": 12240 }, { "epoch": 0.5677643784786642, "grad_norm": 7.081305980682373, "learning_rate": 4.0108379866653555e-06, "loss": 0.3335, "step": 12241 }, { "epoch": 0.5678107606679035, "grad_norm": 7.4745001792907715, "learning_rate": 4.010116599852973e-06, "loss": 0.1719, "step": 12242 }, { "epoch": 0.5678571428571428, "grad_norm": 10.980302810668945, "learning_rate": 4.00939523448589e-06, "loss": 0.3813, "step": 12243 }, { "epoch": 0.5679035250463822, "grad_norm": 4.59005069732666, "learning_rate": 4.0086738905797375e-06, "loss": 0.2682, "step": 12244 }, { "epoch": 0.5679499072356216, "grad_norm": 4.611824035644531, "learning_rate": 4.007952568150141e-06, "loss": 0.267, "step": 12245 }, { "epoch": 0.5679962894248608, "grad_norm": 19.90460205078125, "learning_rate": 4.00723126721273e-06, "loss": 0.4041, "step": 12246 }, { "epoch": 0.5680426716141002, "grad_norm": 10.74841594696045, "learning_rate": 4.0065099877831315e-06, "loss": 0.384, "step": 12247 }, { "epoch": 0.5680890538033395, "grad_norm": 8.664133071899414, "learning_rate": 4.005788729876968e-06, "loss": 0.3689, "step": 12248 }, { "epoch": 0.5681354359925789, "grad_norm": 10.861837387084961, "learning_rate": 4.005067493509868e-06, "loss": 0.4481, "step": 12249 }, { "epoch": 0.5681818181818182, "grad_norm": 7.327361583709717, "learning_rate": 4.004346278697455e-06, "loss": 0.3516, "step": 12250 }, { "epoch": 0.5682282003710575, "grad_norm": 7.360426425933838, "learning_rate": 4.003625085455355e-06, "loss": 0.3229, "step": 12251 }, { "epoch": 0.5682745825602968, "grad_norm": 23.818811416625977, "learning_rate": 4.002903913799192e-06, "loss": 0.2665, "step": 12252 }, { "epoch": 0.5683209647495362, "grad_norm": 4.1535868644714355, "learning_rate": 4.002182763744593e-06, "loss": 0.305, "step": 12253 }, { "epoch": 0.5683673469387756, "grad_norm": 5.781949520111084, "learning_rate": 4.001461635307174e-06, "loss": 0.3613, "step": 12254 }, { "epoch": 0.5684137291280148, "grad_norm": 11.33907699584961, "learning_rate": 4.000740528502563e-06, "loss": 0.5074, "step": 12255 }, { "epoch": 0.5684601113172542, "grad_norm": 5.799802780151367, "learning_rate": 4.0000194433463816e-06, "loss": 0.3743, "step": 12256 }, { "epoch": 0.5685064935064935, "grad_norm": 6.848455905914307, "learning_rate": 3.9992983798542515e-06, "loss": 0.3361, "step": 12257 }, { "epoch": 0.5685528756957329, "grad_norm": 6.568971157073975, "learning_rate": 3.998577338041797e-06, "loss": 0.3251, "step": 12258 }, { "epoch": 0.5685992578849721, "grad_norm": 7.9396586418151855, "learning_rate": 3.997856317924632e-06, "loss": 0.3019, "step": 12259 }, { "epoch": 0.5686456400742115, "grad_norm": 5.3723955154418945, "learning_rate": 3.9971353195183835e-06, "loss": 0.3177, "step": 12260 }, { "epoch": 0.5686920222634508, "grad_norm": 8.777559280395508, "learning_rate": 3.996414342838668e-06, "loss": 0.2682, "step": 12261 }, { "epoch": 0.5687384044526902, "grad_norm": 4.8237128257751465, "learning_rate": 3.995693387901107e-06, "loss": 0.3134, "step": 12262 }, { "epoch": 0.5687847866419296, "grad_norm": 4.92454719543457, "learning_rate": 3.994972454721319e-06, "loss": 0.3761, "step": 12263 }, { "epoch": 0.5688311688311688, "grad_norm": 10.422200202941895, "learning_rate": 3.994251543314925e-06, "loss": 0.2866, "step": 12264 }, { "epoch": 0.5688775510204082, "grad_norm": 4.6045050621032715, "learning_rate": 3.9935306536975385e-06, "loss": 0.2882, "step": 12265 }, { "epoch": 0.5689239332096475, "grad_norm": 6.943708896636963, "learning_rate": 3.992809785884779e-06, "loss": 0.3175, "step": 12266 }, { "epoch": 0.5689703153988869, "grad_norm": 11.002976417541504, "learning_rate": 3.9920889398922654e-06, "loss": 0.4316, "step": 12267 }, { "epoch": 0.5690166975881261, "grad_norm": 9.427177429199219, "learning_rate": 3.991368115735612e-06, "loss": 0.4031, "step": 12268 }, { "epoch": 0.5690630797773655, "grad_norm": 7.378321647644043, "learning_rate": 3.99064731343044e-06, "loss": 0.1753, "step": 12269 }, { "epoch": 0.5691094619666048, "grad_norm": 6.983360290527344, "learning_rate": 3.989926532992358e-06, "loss": 0.3662, "step": 12270 }, { "epoch": 0.5691558441558442, "grad_norm": 11.00367546081543, "learning_rate": 3.989205774436986e-06, "loss": 0.3941, "step": 12271 }, { "epoch": 0.5692022263450834, "grad_norm": 7.5564680099487305, "learning_rate": 3.988485037779937e-06, "loss": 0.3295, "step": 12272 }, { "epoch": 0.5692486085343228, "grad_norm": 6.0610504150390625, "learning_rate": 3.987764323036826e-06, "loss": 0.3347, "step": 12273 }, { "epoch": 0.5692949907235622, "grad_norm": 5.327116966247559, "learning_rate": 3.98704363022327e-06, "loss": 0.3443, "step": 12274 }, { "epoch": 0.5693413729128015, "grad_norm": 4.000931262969971, "learning_rate": 3.9863229593548765e-06, "loss": 0.262, "step": 12275 }, { "epoch": 0.5693877551020409, "grad_norm": 13.615527153015137, "learning_rate": 3.985602310447262e-06, "loss": 0.2956, "step": 12276 }, { "epoch": 0.5694341372912801, "grad_norm": 7.4686970710754395, "learning_rate": 3.984881683516038e-06, "loss": 0.3497, "step": 12277 }, { "epoch": 0.5694805194805195, "grad_norm": 4.142819404602051, "learning_rate": 3.984161078576818e-06, "loss": 0.1734, "step": 12278 }, { "epoch": 0.5695269016697588, "grad_norm": 5.4434967041015625, "learning_rate": 3.983440495645212e-06, "loss": 0.3168, "step": 12279 }, { "epoch": 0.5695732838589982, "grad_norm": 6.424036979675293, "learning_rate": 3.982719934736832e-06, "loss": 0.309, "step": 12280 }, { "epoch": 0.5696196660482374, "grad_norm": 16.973474502563477, "learning_rate": 3.981999395867288e-06, "loss": 0.3614, "step": 12281 }, { "epoch": 0.5696660482374768, "grad_norm": 11.5812406539917, "learning_rate": 3.981278879052189e-06, "loss": 0.3859, "step": 12282 }, { "epoch": 0.5697124304267162, "grad_norm": 7.701573848724365, "learning_rate": 3.9805583843071454e-06, "loss": 0.397, "step": 12283 }, { "epoch": 0.5697588126159555, "grad_norm": 6.5974860191345215, "learning_rate": 3.979837911647769e-06, "loss": 0.2784, "step": 12284 }, { "epoch": 0.5698051948051948, "grad_norm": 14.738247871398926, "learning_rate": 3.979117461089665e-06, "loss": 0.4083, "step": 12285 }, { "epoch": 0.5698515769944341, "grad_norm": 8.044318199157715, "learning_rate": 3.978397032648442e-06, "loss": 0.3402, "step": 12286 }, { "epoch": 0.5698979591836735, "grad_norm": 4.869203567504883, "learning_rate": 3.977676626339709e-06, "loss": 0.3821, "step": 12287 }, { "epoch": 0.5699443413729128, "grad_norm": 7.7059712409973145, "learning_rate": 3.976956242179073e-06, "loss": 0.2706, "step": 12288 }, { "epoch": 0.5699907235621521, "grad_norm": 8.195199966430664, "learning_rate": 3.97623588018214e-06, "loss": 0.3473, "step": 12289 }, { "epoch": 0.5700371057513914, "grad_norm": 6.327586650848389, "learning_rate": 3.975515540364517e-06, "loss": 0.3424, "step": 12290 }, { "epoch": 0.5700834879406308, "grad_norm": 5.512065410614014, "learning_rate": 3.97479522274181e-06, "loss": 0.3067, "step": 12291 }, { "epoch": 0.5701298701298702, "grad_norm": 9.210979461669922, "learning_rate": 3.974074927329622e-06, "loss": 0.2788, "step": 12292 }, { "epoch": 0.5701762523191095, "grad_norm": 5.763336181640625, "learning_rate": 3.973354654143561e-06, "loss": 0.301, "step": 12293 }, { "epoch": 0.5702226345083488, "grad_norm": 5.729100704193115, "learning_rate": 3.97263440319923e-06, "loss": 0.218, "step": 12294 }, { "epoch": 0.5702690166975881, "grad_norm": 6.098367691040039, "learning_rate": 3.971914174512231e-06, "loss": 0.3077, "step": 12295 }, { "epoch": 0.5703153988868275, "grad_norm": 10.404767990112305, "learning_rate": 3.971193968098172e-06, "loss": 0.3194, "step": 12296 }, { "epoch": 0.5703617810760668, "grad_norm": 5.724658012390137, "learning_rate": 3.970473783972651e-06, "loss": 0.337, "step": 12297 }, { "epoch": 0.5704081632653061, "grad_norm": 13.463641166687012, "learning_rate": 3.969753622151273e-06, "loss": 0.6112, "step": 12298 }, { "epoch": 0.5704545454545454, "grad_norm": 5.896609783172607, "learning_rate": 3.969033482649639e-06, "loss": 0.2454, "step": 12299 }, { "epoch": 0.5705009276437848, "grad_norm": 6.5128583908081055, "learning_rate": 3.968313365483352e-06, "loss": 0.3162, "step": 12300 }, { "epoch": 0.5705473098330242, "grad_norm": 13.903120040893555, "learning_rate": 3.9675932706680094e-06, "loss": 0.461, "step": 12301 }, { "epoch": 0.5705936920222634, "grad_norm": 5.174735069274902, "learning_rate": 3.966873198219217e-06, "loss": 0.3007, "step": 12302 }, { "epoch": 0.5706400742115028, "grad_norm": 11.398218154907227, "learning_rate": 3.96615314815257e-06, "loss": 0.4406, "step": 12303 }, { "epoch": 0.5706864564007421, "grad_norm": 5.031327247619629, "learning_rate": 3.96543312048367e-06, "loss": 0.3463, "step": 12304 }, { "epoch": 0.5707328385899815, "grad_norm": 9.87920093536377, "learning_rate": 3.964713115228117e-06, "loss": 0.3347, "step": 12305 }, { "epoch": 0.5707792207792208, "grad_norm": 6.992615222930908, "learning_rate": 3.963993132401506e-06, "loss": 0.357, "step": 12306 }, { "epoch": 0.5708256029684601, "grad_norm": 7.92143440246582, "learning_rate": 3.96327317201944e-06, "loss": 0.3444, "step": 12307 }, { "epoch": 0.5708719851576994, "grad_norm": 5.287234783172607, "learning_rate": 3.962553234097512e-06, "loss": 0.3267, "step": 12308 }, { "epoch": 0.5709183673469388, "grad_norm": 14.001903533935547, "learning_rate": 3.961833318651322e-06, "loss": 0.5194, "step": 12309 }, { "epoch": 0.5709647495361782, "grad_norm": 5.937697410583496, "learning_rate": 3.961113425696464e-06, "loss": 0.3531, "step": 12310 }, { "epoch": 0.5710111317254174, "grad_norm": 6.396944999694824, "learning_rate": 3.960393555248537e-06, "loss": 0.2689, "step": 12311 }, { "epoch": 0.5710575139146568, "grad_norm": 5.845190048217773, "learning_rate": 3.959673707323135e-06, "loss": 0.3612, "step": 12312 }, { "epoch": 0.5711038961038961, "grad_norm": 6.393714427947998, "learning_rate": 3.958953881935853e-06, "loss": 0.2273, "step": 12313 }, { "epoch": 0.5711502782931355, "grad_norm": 9.359980583190918, "learning_rate": 3.958234079102288e-06, "loss": 0.4649, "step": 12314 }, { "epoch": 0.5711966604823747, "grad_norm": 7.952116012573242, "learning_rate": 3.9575142988380305e-06, "loss": 0.3833, "step": 12315 }, { "epoch": 0.5712430426716141, "grad_norm": 6.628368377685547, "learning_rate": 3.956794541158676e-06, "loss": 0.2437, "step": 12316 }, { "epoch": 0.5712894248608534, "grad_norm": 5.2958984375, "learning_rate": 3.956074806079816e-06, "loss": 0.3278, "step": 12317 }, { "epoch": 0.5713358070500928, "grad_norm": 7.15139102935791, "learning_rate": 3.955355093617047e-06, "loss": 0.3458, "step": 12318 }, { "epoch": 0.5713821892393321, "grad_norm": 11.955852508544922, "learning_rate": 3.954635403785959e-06, "loss": 0.259, "step": 12319 }, { "epoch": 0.5714285714285714, "grad_norm": 7.095259666442871, "learning_rate": 3.953915736602143e-06, "loss": 0.2628, "step": 12320 }, { "epoch": 0.5714749536178108, "grad_norm": 6.49948263168335, "learning_rate": 3.953196092081191e-06, "loss": 0.3794, "step": 12321 }, { "epoch": 0.5715213358070501, "grad_norm": 6.65559196472168, "learning_rate": 3.952476470238692e-06, "loss": 0.4231, "step": 12322 }, { "epoch": 0.5715677179962895, "grad_norm": 7.608933448791504, "learning_rate": 3.951756871090239e-06, "loss": 0.3182, "step": 12323 }, { "epoch": 0.5716141001855287, "grad_norm": 6.134026050567627, "learning_rate": 3.951037294651422e-06, "loss": 0.3715, "step": 12324 }, { "epoch": 0.5716604823747681, "grad_norm": 9.516128540039062, "learning_rate": 3.950317740937827e-06, "loss": 0.4796, "step": 12325 }, { "epoch": 0.5717068645640074, "grad_norm": 7.916926383972168, "learning_rate": 3.949598209965044e-06, "loss": 0.3314, "step": 12326 }, { "epoch": 0.5717532467532468, "grad_norm": 7.069466590881348, "learning_rate": 3.948878701748662e-06, "loss": 0.4217, "step": 12327 }, { "epoch": 0.571799628942486, "grad_norm": 6.5863752365112305, "learning_rate": 3.948159216304269e-06, "loss": 0.3589, "step": 12328 }, { "epoch": 0.5718460111317254, "grad_norm": 5.579805374145508, "learning_rate": 3.947439753647453e-06, "loss": 0.3644, "step": 12329 }, { "epoch": 0.5718923933209648, "grad_norm": 7.281184673309326, "learning_rate": 3.946720313793798e-06, "loss": 0.335, "step": 12330 }, { "epoch": 0.5719387755102041, "grad_norm": 10.558783531188965, "learning_rate": 3.946000896758892e-06, "loss": 0.3737, "step": 12331 }, { "epoch": 0.5719851576994434, "grad_norm": 8.55649471282959, "learning_rate": 3.94528150255832e-06, "loss": 0.2991, "step": 12332 }, { "epoch": 0.5720315398886827, "grad_norm": 2.9548487663269043, "learning_rate": 3.944562131207669e-06, "loss": 0.194, "step": 12333 }, { "epoch": 0.5720779220779221, "grad_norm": 7.925278186798096, "learning_rate": 3.9438427827225214e-06, "loss": 0.309, "step": 12334 }, { "epoch": 0.5721243042671614, "grad_norm": 7.105108737945557, "learning_rate": 3.943123457118466e-06, "loss": 0.3178, "step": 12335 }, { "epoch": 0.5721706864564008, "grad_norm": 25.925939559936523, "learning_rate": 3.942404154411081e-06, "loss": 0.4118, "step": 12336 }, { "epoch": 0.57221706864564, "grad_norm": 6.748818397521973, "learning_rate": 3.941684874615952e-06, "loss": 0.3727, "step": 12337 }, { "epoch": 0.5722634508348794, "grad_norm": 5.6606059074401855, "learning_rate": 3.940965617748662e-06, "loss": 0.3562, "step": 12338 }, { "epoch": 0.5723098330241188, "grad_norm": 6.928160190582275, "learning_rate": 3.940246383824794e-06, "loss": 0.3211, "step": 12339 }, { "epoch": 0.5723562152133581, "grad_norm": 5.353978633880615, "learning_rate": 3.939527172859931e-06, "loss": 0.291, "step": 12340 }, { "epoch": 0.5724025974025974, "grad_norm": 6.5695953369140625, "learning_rate": 3.93880798486965e-06, "loss": 0.2602, "step": 12341 }, { "epoch": 0.5724489795918367, "grad_norm": 7.40031623840332, "learning_rate": 3.938088819869534e-06, "loss": 0.3557, "step": 12342 }, { "epoch": 0.5724953617810761, "grad_norm": 8.242568969726562, "learning_rate": 3.937369677875164e-06, "loss": 0.4919, "step": 12343 }, { "epoch": 0.5725417439703154, "grad_norm": 7.55444860458374, "learning_rate": 3.93665055890212e-06, "loss": 0.3452, "step": 12344 }, { "epoch": 0.5725881261595547, "grad_norm": 7.339192867279053, "learning_rate": 3.935931462965979e-06, "loss": 0.3722, "step": 12345 }, { "epoch": 0.572634508348794, "grad_norm": 7.628688335418701, "learning_rate": 3.935212390082326e-06, "loss": 0.4292, "step": 12346 }, { "epoch": 0.5726808905380334, "grad_norm": 12.108328819274902, "learning_rate": 3.934493340266732e-06, "loss": 0.3671, "step": 12347 }, { "epoch": 0.5727272727272728, "grad_norm": 5.8896002769470215, "learning_rate": 3.9337743135347775e-06, "loss": 0.3018, "step": 12348 }, { "epoch": 0.5727736549165121, "grad_norm": 6.490663051605225, "learning_rate": 3.933055309902039e-06, "loss": 0.2833, "step": 12349 }, { "epoch": 0.5728200371057514, "grad_norm": 9.364981651306152, "learning_rate": 3.932336329384097e-06, "loss": 0.3013, "step": 12350 }, { "epoch": 0.5728664192949907, "grad_norm": 5.887807369232178, "learning_rate": 3.931617371996526e-06, "loss": 0.3108, "step": 12351 }, { "epoch": 0.5729128014842301, "grad_norm": 5.0006561279296875, "learning_rate": 3.930898437754899e-06, "loss": 0.4209, "step": 12352 }, { "epoch": 0.5729591836734694, "grad_norm": 9.281672477722168, "learning_rate": 3.930179526674794e-06, "loss": 0.3966, "step": 12353 }, { "epoch": 0.5730055658627087, "grad_norm": 3.358952760696411, "learning_rate": 3.929460638771783e-06, "loss": 0.2832, "step": 12354 }, { "epoch": 0.573051948051948, "grad_norm": 9.272893905639648, "learning_rate": 3.928741774061445e-06, "loss": 0.4804, "step": 12355 }, { "epoch": 0.5730983302411874, "grad_norm": 7.215510845184326, "learning_rate": 3.9280229325593535e-06, "loss": 0.3415, "step": 12356 }, { "epoch": 0.5731447124304268, "grad_norm": 6.109920024871826, "learning_rate": 3.927304114281077e-06, "loss": 0.2793, "step": 12357 }, { "epoch": 0.573191094619666, "grad_norm": 11.001754760742188, "learning_rate": 3.926585319242191e-06, "loss": 0.5234, "step": 12358 }, { "epoch": 0.5732374768089054, "grad_norm": 7.135953426361084, "learning_rate": 3.925866547458268e-06, "loss": 0.3245, "step": 12359 }, { "epoch": 0.5732838589981447, "grad_norm": 8.436677932739258, "learning_rate": 3.92514779894488e-06, "loss": 0.2225, "step": 12360 }, { "epoch": 0.5733302411873841, "grad_norm": 5.553075313568115, "learning_rate": 3.924429073717598e-06, "loss": 0.3304, "step": 12361 }, { "epoch": 0.5733766233766234, "grad_norm": 5.803536891937256, "learning_rate": 3.923710371791994e-06, "loss": 0.3432, "step": 12362 }, { "epoch": 0.5734230055658627, "grad_norm": 6.464534282684326, "learning_rate": 3.922991693183636e-06, "loss": 0.3183, "step": 12363 }, { "epoch": 0.573469387755102, "grad_norm": 6.5937819480896, "learning_rate": 3.922273037908095e-06, "loss": 0.2618, "step": 12364 }, { "epoch": 0.5735157699443414, "grad_norm": 9.36491584777832, "learning_rate": 3.92155440598094e-06, "loss": 0.3714, "step": 12365 }, { "epoch": 0.5735621521335807, "grad_norm": 8.115498542785645, "learning_rate": 3.920835797417739e-06, "loss": 0.3488, "step": 12366 }, { "epoch": 0.57360853432282, "grad_norm": 4.911067962646484, "learning_rate": 3.920117212234065e-06, "loss": 0.3228, "step": 12367 }, { "epoch": 0.5736549165120594, "grad_norm": 5.492706298828125, "learning_rate": 3.9193986504454784e-06, "loss": 0.3239, "step": 12368 }, { "epoch": 0.5737012987012987, "grad_norm": 7.54638671875, "learning_rate": 3.918680112067551e-06, "loss": 0.3469, "step": 12369 }, { "epoch": 0.5737476808905381, "grad_norm": 5.067221164703369, "learning_rate": 3.917961597115848e-06, "loss": 0.357, "step": 12370 }, { "epoch": 0.5737940630797773, "grad_norm": 4.5515546798706055, "learning_rate": 3.917243105605936e-06, "loss": 0.3797, "step": 12371 }, { "epoch": 0.5738404452690167, "grad_norm": 8.233460426330566, "learning_rate": 3.916524637553383e-06, "loss": 0.3769, "step": 12372 }, { "epoch": 0.573886827458256, "grad_norm": 6.7436747550964355, "learning_rate": 3.915806192973752e-06, "loss": 0.3201, "step": 12373 }, { "epoch": 0.5739332096474954, "grad_norm": 6.201341152191162, "learning_rate": 3.915087771882607e-06, "loss": 0.3225, "step": 12374 }, { "epoch": 0.5739795918367347, "grad_norm": 7.495726108551025, "learning_rate": 3.914369374295514e-06, "loss": 0.2899, "step": 12375 }, { "epoch": 0.574025974025974, "grad_norm": 13.679107666015625, "learning_rate": 3.9136510002280344e-06, "loss": 0.334, "step": 12376 }, { "epoch": 0.5740723562152134, "grad_norm": 13.922158241271973, "learning_rate": 3.912932649695734e-06, "loss": 0.5009, "step": 12377 }, { "epoch": 0.5741187384044527, "grad_norm": 7.365459442138672, "learning_rate": 3.9122143227141755e-06, "loss": 0.4103, "step": 12378 }, { "epoch": 0.5741651205936921, "grad_norm": 6.877885818481445, "learning_rate": 3.9114960192989186e-06, "loss": 0.3586, "step": 12379 }, { "epoch": 0.5742115027829313, "grad_norm": 6.680383682250977, "learning_rate": 3.910777739465526e-06, "loss": 0.3326, "step": 12380 }, { "epoch": 0.5742578849721707, "grad_norm": 5.554231643676758, "learning_rate": 3.910059483229559e-06, "loss": 0.2902, "step": 12381 }, { "epoch": 0.57430426716141, "grad_norm": 4.625906944274902, "learning_rate": 3.909341250606578e-06, "loss": 0.3403, "step": 12382 }, { "epoch": 0.5743506493506494, "grad_norm": 11.332825660705566, "learning_rate": 3.908623041612145e-06, "loss": 0.4181, "step": 12383 }, { "epoch": 0.5743970315398886, "grad_norm": 6.208878517150879, "learning_rate": 3.90790485626182e-06, "loss": 0.3773, "step": 12384 }, { "epoch": 0.574443413729128, "grad_norm": 5.519937992095947, "learning_rate": 3.907186694571157e-06, "loss": 0.3532, "step": 12385 }, { "epoch": 0.5744897959183674, "grad_norm": 6.447519779205322, "learning_rate": 3.9064685565557185e-06, "loss": 0.2985, "step": 12386 }, { "epoch": 0.5745361781076067, "grad_norm": 38.70393371582031, "learning_rate": 3.905750442231062e-06, "loss": 0.6273, "step": 12387 }, { "epoch": 0.574582560296846, "grad_norm": 4.330963134765625, "learning_rate": 3.9050323516127445e-06, "loss": 0.3503, "step": 12388 }, { "epoch": 0.5746289424860853, "grad_norm": 8.751848220825195, "learning_rate": 3.904314284716326e-06, "loss": 0.3548, "step": 12389 }, { "epoch": 0.5746753246753247, "grad_norm": 3.7504866123199463, "learning_rate": 3.903596241557358e-06, "loss": 0.1674, "step": 12390 }, { "epoch": 0.574721706864564, "grad_norm": 6.309416770935059, "learning_rate": 3.902878222151401e-06, "loss": 0.3861, "step": 12391 }, { "epoch": 0.5747680890538034, "grad_norm": 6.754518032073975, "learning_rate": 3.902160226514007e-06, "loss": 0.3512, "step": 12392 }, { "epoch": 0.5748144712430426, "grad_norm": 6.845743179321289, "learning_rate": 3.901442254660733e-06, "loss": 0.3604, "step": 12393 }, { "epoch": 0.574860853432282, "grad_norm": 7.922157287597656, "learning_rate": 3.9007243066071325e-06, "loss": 0.465, "step": 12394 }, { "epoch": 0.5749072356215214, "grad_norm": 6.875747203826904, "learning_rate": 3.900006382368763e-06, "loss": 0.3034, "step": 12395 }, { "epoch": 0.5749536178107607, "grad_norm": 6.814456939697266, "learning_rate": 3.899288481961173e-06, "loss": 0.3202, "step": 12396 }, { "epoch": 0.575, "grad_norm": 8.137056350708008, "learning_rate": 3.8985706053999175e-06, "loss": 0.3764, "step": 12397 }, { "epoch": 0.5750463821892393, "grad_norm": 7.237949371337891, "learning_rate": 3.897852752700548e-06, "loss": 0.3171, "step": 12398 }, { "epoch": 0.5750927643784787, "grad_norm": 4.971520900726318, "learning_rate": 3.897134923878618e-06, "loss": 0.3922, "step": 12399 }, { "epoch": 0.575139146567718, "grad_norm": 7.45949649810791, "learning_rate": 3.89641711894968e-06, "loss": 0.3165, "step": 12400 }, { "epoch": 0.5751855287569573, "grad_norm": 7.291889667510986, "learning_rate": 3.895699337929281e-06, "loss": 0.3906, "step": 12401 }, { "epoch": 0.5752319109461966, "grad_norm": 6.042549133300781, "learning_rate": 3.8949815808329735e-06, "loss": 0.3372, "step": 12402 }, { "epoch": 0.575278293135436, "grad_norm": 7.7223968505859375, "learning_rate": 3.8942638476763075e-06, "loss": 0.406, "step": 12403 }, { "epoch": 0.5753246753246753, "grad_norm": 5.9253458976745605, "learning_rate": 3.893546138474832e-06, "loss": 0.3601, "step": 12404 }, { "epoch": 0.5753710575139147, "grad_norm": 4.996538162231445, "learning_rate": 3.892828453244095e-06, "loss": 0.4775, "step": 12405 }, { "epoch": 0.575417439703154, "grad_norm": 8.976539611816406, "learning_rate": 3.892110791999649e-06, "loss": 0.439, "step": 12406 }, { "epoch": 0.5754638218923933, "grad_norm": 5.4028401374816895, "learning_rate": 3.891393154757035e-06, "loss": 0.2848, "step": 12407 }, { "epoch": 0.5755102040816327, "grad_norm": 12.975516319274902, "learning_rate": 3.8906755415318045e-06, "loss": 0.3835, "step": 12408 }, { "epoch": 0.575556586270872, "grad_norm": 6.20745325088501, "learning_rate": 3.889957952339503e-06, "loss": 0.3536, "step": 12409 }, { "epoch": 0.5756029684601113, "grad_norm": 12.08310317993164, "learning_rate": 3.8892403871956765e-06, "loss": 0.4401, "step": 12410 }, { "epoch": 0.5756493506493506, "grad_norm": 7.5550761222839355, "learning_rate": 3.8885228461158735e-06, "loss": 0.3474, "step": 12411 }, { "epoch": 0.57569573283859, "grad_norm": 7.51668119430542, "learning_rate": 3.887805329115635e-06, "loss": 0.2722, "step": 12412 }, { "epoch": 0.5757421150278293, "grad_norm": 9.407624244689941, "learning_rate": 3.887087836210507e-06, "loss": 0.3687, "step": 12413 }, { "epoch": 0.5757884972170686, "grad_norm": 5.157167434692383, "learning_rate": 3.886370367416034e-06, "loss": 0.1751, "step": 12414 }, { "epoch": 0.575834879406308, "grad_norm": 4.653213024139404, "learning_rate": 3.88565292274776e-06, "loss": 0.314, "step": 12415 }, { "epoch": 0.5758812615955473, "grad_norm": 12.98370361328125, "learning_rate": 3.884935502221229e-06, "loss": 0.3929, "step": 12416 }, { "epoch": 0.5759276437847867, "grad_norm": 8.568690299987793, "learning_rate": 3.884218105851981e-06, "loss": 0.2768, "step": 12417 }, { "epoch": 0.575974025974026, "grad_norm": 8.810643196105957, "learning_rate": 3.883500733655559e-06, "loss": 0.4884, "step": 12418 }, { "epoch": 0.5760204081632653, "grad_norm": 7.854263782501221, "learning_rate": 3.882783385647505e-06, "loss": 0.3042, "step": 12419 }, { "epoch": 0.5760667903525046, "grad_norm": 5.6282057762146, "learning_rate": 3.8820660618433595e-06, "loss": 0.3235, "step": 12420 }, { "epoch": 0.576113172541744, "grad_norm": 7.067296504974365, "learning_rate": 3.881348762258664e-06, "loss": 0.3572, "step": 12421 }, { "epoch": 0.5761595547309833, "grad_norm": 4.542050361633301, "learning_rate": 3.880631486908958e-06, "loss": 0.2208, "step": 12422 }, { "epoch": 0.5762059369202226, "grad_norm": 7.344119548797607, "learning_rate": 3.879914235809778e-06, "loss": 0.293, "step": 12423 }, { "epoch": 0.576252319109462, "grad_norm": 5.595691204071045, "learning_rate": 3.8791970089766665e-06, "loss": 0.2456, "step": 12424 }, { "epoch": 0.5762987012987013, "grad_norm": 6.196579933166504, "learning_rate": 3.87847980642516e-06, "loss": 0.3915, "step": 12425 }, { "epoch": 0.5763450834879407, "grad_norm": 9.594114303588867, "learning_rate": 3.877762628170799e-06, "loss": 0.3384, "step": 12426 }, { "epoch": 0.5763914656771799, "grad_norm": 11.239749908447266, "learning_rate": 3.877045474229116e-06, "loss": 0.2992, "step": 12427 }, { "epoch": 0.5764378478664193, "grad_norm": 10.665119171142578, "learning_rate": 3.876328344615653e-06, "loss": 0.4383, "step": 12428 }, { "epoch": 0.5764842300556586, "grad_norm": 7.59539794921875, "learning_rate": 3.875611239345942e-06, "loss": 0.3802, "step": 12429 }, { "epoch": 0.576530612244898, "grad_norm": 5.4131317138671875, "learning_rate": 3.874894158435522e-06, "loss": 0.3032, "step": 12430 }, { "epoch": 0.5765769944341373, "grad_norm": 6.353158950805664, "learning_rate": 3.874177101899926e-06, "loss": 0.3325, "step": 12431 }, { "epoch": 0.5766233766233766, "grad_norm": 7.830877780914307, "learning_rate": 3.873460069754688e-06, "loss": 0.3499, "step": 12432 }, { "epoch": 0.576669758812616, "grad_norm": 7.128381729125977, "learning_rate": 3.872743062015346e-06, "loss": 0.4499, "step": 12433 }, { "epoch": 0.5767161410018553, "grad_norm": 6.5505475997924805, "learning_rate": 3.872026078697429e-06, "loss": 0.3145, "step": 12434 }, { "epoch": 0.5767625231910947, "grad_norm": 5.445227146148682, "learning_rate": 3.871309119816471e-06, "loss": 0.2785, "step": 12435 }, { "epoch": 0.5768089053803339, "grad_norm": 4.714645862579346, "learning_rate": 3.870592185388008e-06, "loss": 0.2289, "step": 12436 }, { "epoch": 0.5768552875695733, "grad_norm": 4.351012706756592, "learning_rate": 3.869875275427569e-06, "loss": 0.2921, "step": 12437 }, { "epoch": 0.5769016697588126, "grad_norm": 9.431163787841797, "learning_rate": 3.869158389950686e-06, "loss": 0.2795, "step": 12438 }, { "epoch": 0.576948051948052, "grad_norm": 7.189319133758545, "learning_rate": 3.868441528972889e-06, "loss": 0.3114, "step": 12439 }, { "epoch": 0.5769944341372912, "grad_norm": 7.080638885498047, "learning_rate": 3.86772469250971e-06, "loss": 0.3884, "step": 12440 }, { "epoch": 0.5770408163265306, "grad_norm": 5.554019451141357, "learning_rate": 3.86700788057668e-06, "loss": 0.3359, "step": 12441 }, { "epoch": 0.57708719851577, "grad_norm": 7.7757463455200195, "learning_rate": 3.866291093189324e-06, "loss": 0.3023, "step": 12442 }, { "epoch": 0.5771335807050093, "grad_norm": 6.967491626739502, "learning_rate": 3.865574330363175e-06, "loss": 0.2586, "step": 12443 }, { "epoch": 0.5771799628942486, "grad_norm": 8.588028907775879, "learning_rate": 3.86485759211376e-06, "loss": 0.3981, "step": 12444 }, { "epoch": 0.5772263450834879, "grad_norm": 7.437984466552734, "learning_rate": 3.864140878456605e-06, "loss": 0.3211, "step": 12445 }, { "epoch": 0.5772727272727273, "grad_norm": 6.03035831451416, "learning_rate": 3.86342418940724e-06, "loss": 0.3141, "step": 12446 }, { "epoch": 0.5773191094619666, "grad_norm": 8.244473457336426, "learning_rate": 3.86270752498119e-06, "loss": 0.3808, "step": 12447 }, { "epoch": 0.577365491651206, "grad_norm": 8.472044944763184, "learning_rate": 3.861990885193983e-06, "loss": 0.3118, "step": 12448 }, { "epoch": 0.5774118738404452, "grad_norm": 10.860024452209473, "learning_rate": 3.861274270061142e-06, "loss": 0.3935, "step": 12449 }, { "epoch": 0.5774582560296846, "grad_norm": 6.238738536834717, "learning_rate": 3.8605576795981936e-06, "loss": 0.3441, "step": 12450 }, { "epoch": 0.577504638218924, "grad_norm": 7.5851969718933105, "learning_rate": 3.859841113820662e-06, "loss": 0.353, "step": 12451 }, { "epoch": 0.5775510204081633, "grad_norm": 10.377373695373535, "learning_rate": 3.859124572744072e-06, "loss": 0.4331, "step": 12452 }, { "epoch": 0.5775974025974026, "grad_norm": 7.604257106781006, "learning_rate": 3.8584080563839446e-06, "loss": 0.327, "step": 12453 }, { "epoch": 0.5776437847866419, "grad_norm": 3.835364818572998, "learning_rate": 3.857691564755805e-06, "loss": 0.3379, "step": 12454 }, { "epoch": 0.5776901669758813, "grad_norm": 13.793906211853027, "learning_rate": 3.856975097875177e-06, "loss": 0.534, "step": 12455 }, { "epoch": 0.5777365491651206, "grad_norm": 4.75562047958374, "learning_rate": 3.856258655757578e-06, "loss": 0.2438, "step": 12456 }, { "epoch": 0.5777829313543599, "grad_norm": 5.057002544403076, "learning_rate": 3.855542238418534e-06, "loss": 0.3476, "step": 12457 }, { "epoch": 0.5778293135435992, "grad_norm": 7.4504570960998535, "learning_rate": 3.854825845873562e-06, "loss": 0.3763, "step": 12458 }, { "epoch": 0.5778756957328386, "grad_norm": 5.109736442565918, "learning_rate": 3.854109478138186e-06, "loss": 0.2087, "step": 12459 }, { "epoch": 0.577922077922078, "grad_norm": 5.1963605880737305, "learning_rate": 3.8533931352279234e-06, "loss": 0.2895, "step": 12460 }, { "epoch": 0.5779684601113173, "grad_norm": 8.149958610534668, "learning_rate": 3.852676817158292e-06, "loss": 0.4372, "step": 12461 }, { "epoch": 0.5780148423005566, "grad_norm": 6.131230354309082, "learning_rate": 3.851960523944814e-06, "loss": 0.3823, "step": 12462 }, { "epoch": 0.5780612244897959, "grad_norm": 8.078202247619629, "learning_rate": 3.851244255603005e-06, "loss": 0.2161, "step": 12463 }, { "epoch": 0.5781076066790353, "grad_norm": 5.508965492248535, "learning_rate": 3.850528012148383e-06, "loss": 0.2911, "step": 12464 }, { "epoch": 0.5781539888682746, "grad_norm": 9.876903533935547, "learning_rate": 3.849811793596465e-06, "loss": 0.3351, "step": 12465 }, { "epoch": 0.5782003710575139, "grad_norm": 7.542329788208008, "learning_rate": 3.84909559996277e-06, "loss": 0.3505, "step": 12466 }, { "epoch": 0.5782467532467532, "grad_norm": 4.051523208618164, "learning_rate": 3.8483794312628105e-06, "loss": 0.2371, "step": 12467 }, { "epoch": 0.5782931354359926, "grad_norm": 5.340789794921875, "learning_rate": 3.8476632875121025e-06, "loss": 0.3241, "step": 12468 }, { "epoch": 0.578339517625232, "grad_norm": 4.5267415046691895, "learning_rate": 3.846947168726163e-06, "loss": 0.309, "step": 12469 }, { "epoch": 0.5783858998144712, "grad_norm": 9.252001762390137, "learning_rate": 3.8462310749205024e-06, "loss": 0.386, "step": 12470 }, { "epoch": 0.5784322820037106, "grad_norm": 7.305608749389648, "learning_rate": 3.84551500611064e-06, "loss": 0.3595, "step": 12471 }, { "epoch": 0.5784786641929499, "grad_norm": 3.9616572856903076, "learning_rate": 3.844798962312085e-06, "loss": 0.32, "step": 12472 }, { "epoch": 0.5785250463821893, "grad_norm": 6.686038494110107, "learning_rate": 3.84408294354035e-06, "loss": 0.3853, "step": 12473 }, { "epoch": 0.5785714285714286, "grad_norm": 5.677772521972656, "learning_rate": 3.843366949810949e-06, "loss": 0.296, "step": 12474 }, { "epoch": 0.5786178107606679, "grad_norm": 5.387065887451172, "learning_rate": 3.842650981139393e-06, "loss": 0.333, "step": 12475 }, { "epoch": 0.5786641929499072, "grad_norm": 4.482337951660156, "learning_rate": 3.8419350375411934e-06, "loss": 0.3599, "step": 12476 }, { "epoch": 0.5787105751391466, "grad_norm": 13.654534339904785, "learning_rate": 3.841219119031862e-06, "loss": 0.3769, "step": 12477 }, { "epoch": 0.578756957328386, "grad_norm": 13.189486503601074, "learning_rate": 3.840503225626904e-06, "loss": 0.4008, "step": 12478 }, { "epoch": 0.5788033395176252, "grad_norm": 8.928150177001953, "learning_rate": 3.839787357341834e-06, "loss": 0.3249, "step": 12479 }, { "epoch": 0.5788497217068646, "grad_norm": 7.734890937805176, "learning_rate": 3.839071514192158e-06, "loss": 0.3209, "step": 12480 }, { "epoch": 0.5788961038961039, "grad_norm": 6.8643717765808105, "learning_rate": 3.838355696193386e-06, "loss": 0.3767, "step": 12481 }, { "epoch": 0.5789424860853433, "grad_norm": 8.920424461364746, "learning_rate": 3.8376399033610265e-06, "loss": 0.3033, "step": 12482 }, { "epoch": 0.5789888682745825, "grad_norm": 12.541627883911133, "learning_rate": 3.836924135710583e-06, "loss": 0.4844, "step": 12483 }, { "epoch": 0.5790352504638219, "grad_norm": 6.188072681427002, "learning_rate": 3.836208393257566e-06, "loss": 0.3364, "step": 12484 }, { "epoch": 0.5790816326530612, "grad_norm": 7.626730918884277, "learning_rate": 3.835492676017479e-06, "loss": 0.3693, "step": 12485 }, { "epoch": 0.5791280148423006, "grad_norm": 8.196757316589355, "learning_rate": 3.834776984005829e-06, "loss": 0.2567, "step": 12486 }, { "epoch": 0.5791743970315398, "grad_norm": 6.563895225524902, "learning_rate": 3.834061317238122e-06, "loss": 0.4093, "step": 12487 }, { "epoch": 0.5792207792207792, "grad_norm": 10.636505126953125, "learning_rate": 3.833345675729863e-06, "loss": 0.3917, "step": 12488 }, { "epoch": 0.5792671614100185, "grad_norm": 9.315305709838867, "learning_rate": 3.832630059496553e-06, "loss": 0.421, "step": 12489 }, { "epoch": 0.5793135435992579, "grad_norm": 9.410296440124512, "learning_rate": 3.831914468553695e-06, "loss": 0.336, "step": 12490 }, { "epoch": 0.5793599257884973, "grad_norm": 4.584338665008545, "learning_rate": 3.831198902916797e-06, "loss": 0.2048, "step": 12491 }, { "epoch": 0.5794063079777365, "grad_norm": 7.409265995025635, "learning_rate": 3.8304833626013565e-06, "loss": 0.2331, "step": 12492 }, { "epoch": 0.5794526901669759, "grad_norm": 12.639522552490234, "learning_rate": 3.829767847622879e-06, "loss": 0.3996, "step": 12493 }, { "epoch": 0.5794990723562152, "grad_norm": 10.086808204650879, "learning_rate": 3.829052357996863e-06, "loss": 0.3603, "step": 12494 }, { "epoch": 0.5795454545454546, "grad_norm": 9.669646263122559, "learning_rate": 3.828336893738809e-06, "loss": 0.3634, "step": 12495 }, { "epoch": 0.5795918367346938, "grad_norm": 4.9714860916137695, "learning_rate": 3.827621454864219e-06, "loss": 0.3823, "step": 12496 }, { "epoch": 0.5796382189239332, "grad_norm": 5.9347662925720215, "learning_rate": 3.826906041388591e-06, "loss": 0.287, "step": 12497 }, { "epoch": 0.5796846011131725, "grad_norm": 17.100934982299805, "learning_rate": 3.8261906533274254e-06, "loss": 0.461, "step": 12498 }, { "epoch": 0.5797309833024119, "grad_norm": 4.742152690887451, "learning_rate": 3.825475290696222e-06, "loss": 0.3577, "step": 12499 }, { "epoch": 0.5797773654916512, "grad_norm": 10.02562141418457, "learning_rate": 3.8247599535104755e-06, "loss": 0.3078, "step": 12500 }, { "epoch": 0.5798237476808905, "grad_norm": 7.730583190917969, "learning_rate": 3.824044641785684e-06, "loss": 0.3361, "step": 12501 }, { "epoch": 0.5798701298701299, "grad_norm": 8.042909622192383, "learning_rate": 3.823329355537345e-06, "loss": 0.3236, "step": 12502 }, { "epoch": 0.5799165120593692, "grad_norm": 11.060189247131348, "learning_rate": 3.822614094780955e-06, "loss": 0.3259, "step": 12503 }, { "epoch": 0.5799628942486086, "grad_norm": 6.650015354156494, "learning_rate": 3.821898859532013e-06, "loss": 0.3297, "step": 12504 }, { "epoch": 0.5800092764378478, "grad_norm": 6.015474796295166, "learning_rate": 3.821183649806007e-06, "loss": 0.4115, "step": 12505 }, { "epoch": 0.5800556586270872, "grad_norm": 8.295560836791992, "learning_rate": 3.820468465618437e-06, "loss": 0.3797, "step": 12506 }, { "epoch": 0.5801020408163265, "grad_norm": 6.485301971435547, "learning_rate": 3.819753306984794e-06, "loss": 0.4172, "step": 12507 }, { "epoch": 0.5801484230055659, "grad_norm": 18.701826095581055, "learning_rate": 3.819038173920574e-06, "loss": 0.4225, "step": 12508 }, { "epoch": 0.5801948051948052, "grad_norm": 6.621581077575684, "learning_rate": 3.8183230664412685e-06, "loss": 0.4405, "step": 12509 }, { "epoch": 0.5802411873840445, "grad_norm": 6.440378665924072, "learning_rate": 3.817607984562373e-06, "loss": 0.3458, "step": 12510 }, { "epoch": 0.5802875695732839, "grad_norm": 7.788339138031006, "learning_rate": 3.816892928299374e-06, "loss": 0.3536, "step": 12511 }, { "epoch": 0.5803339517625232, "grad_norm": 5.444510459899902, "learning_rate": 3.816177897667767e-06, "loss": 0.298, "step": 12512 }, { "epoch": 0.5803803339517625, "grad_norm": 8.271385192871094, "learning_rate": 3.81546289268304e-06, "loss": 0.3499, "step": 12513 }, { "epoch": 0.5804267161410018, "grad_norm": 6.438950538635254, "learning_rate": 3.814747913360687e-06, "loss": 0.4128, "step": 12514 }, { "epoch": 0.5804730983302412, "grad_norm": 4.576785564422607, "learning_rate": 3.814032959716195e-06, "loss": 0.2701, "step": 12515 }, { "epoch": 0.5805194805194805, "grad_norm": 9.430153846740723, "learning_rate": 3.813318031765052e-06, "loss": 0.2709, "step": 12516 }, { "epoch": 0.5805658627087199, "grad_norm": 6.038059234619141, "learning_rate": 3.8126031295227484e-06, "loss": 0.3917, "step": 12517 }, { "epoch": 0.5806122448979592, "grad_norm": 15.84240436553955, "learning_rate": 3.8118882530047713e-06, "loss": 0.2486, "step": 12518 }, { "epoch": 0.5806586270871985, "grad_norm": 8.030080795288086, "learning_rate": 3.8111734022266087e-06, "loss": 0.3715, "step": 12519 }, { "epoch": 0.5807050092764379, "grad_norm": 9.350557327270508, "learning_rate": 3.8104585772037493e-06, "loss": 0.3606, "step": 12520 }, { "epoch": 0.5807513914656772, "grad_norm": 3.1105544567108154, "learning_rate": 3.8097437779516754e-06, "loss": 0.2449, "step": 12521 }, { "epoch": 0.5807977736549165, "grad_norm": 4.009233474731445, "learning_rate": 3.8090290044858747e-06, "loss": 0.2848, "step": 12522 }, { "epoch": 0.5808441558441558, "grad_norm": 7.618555545806885, "learning_rate": 3.8083142568218325e-06, "loss": 0.4253, "step": 12523 }, { "epoch": 0.5808905380333952, "grad_norm": 5.723200798034668, "learning_rate": 3.807599534975033e-06, "loss": 0.4067, "step": 12524 }, { "epoch": 0.5809369202226345, "grad_norm": 6.613861083984375, "learning_rate": 3.806884838960961e-06, "loss": 0.3297, "step": 12525 }, { "epoch": 0.5809833024118738, "grad_norm": 5.803737163543701, "learning_rate": 3.806170168795102e-06, "loss": 0.2907, "step": 12526 }, { "epoch": 0.5810296846011131, "grad_norm": 4.449652194976807, "learning_rate": 3.805455524492934e-06, "loss": 0.2362, "step": 12527 }, { "epoch": 0.5810760667903525, "grad_norm": 12.37855339050293, "learning_rate": 3.8047409060699426e-06, "loss": 0.4318, "step": 12528 }, { "epoch": 0.5811224489795919, "grad_norm": 4.1883955001831055, "learning_rate": 3.804026313541609e-06, "loss": 0.2787, "step": 12529 }, { "epoch": 0.5811688311688312, "grad_norm": 5.605139255523682, "learning_rate": 3.803311746923414e-06, "loss": 0.3229, "step": 12530 }, { "epoch": 0.5812152133580705, "grad_norm": 9.250308990478516, "learning_rate": 3.8025972062308407e-06, "loss": 0.3892, "step": 12531 }, { "epoch": 0.5812615955473098, "grad_norm": 5.183818340301514, "learning_rate": 3.801882691479366e-06, "loss": 0.2442, "step": 12532 }, { "epoch": 0.5813079777365492, "grad_norm": 9.311567306518555, "learning_rate": 3.8011682026844706e-06, "loss": 0.4678, "step": 12533 }, { "epoch": 0.5813543599257885, "grad_norm": 4.409888744354248, "learning_rate": 3.8004537398616336e-06, "loss": 0.2539, "step": 12534 }, { "epoch": 0.5814007421150278, "grad_norm": 7.844252586364746, "learning_rate": 3.7997393030263336e-06, "loss": 0.3382, "step": 12535 }, { "epoch": 0.5814471243042671, "grad_norm": 9.316183090209961, "learning_rate": 3.7990248921940485e-06, "loss": 0.3746, "step": 12536 }, { "epoch": 0.5814935064935065, "grad_norm": 5.358190059661865, "learning_rate": 3.7983105073802583e-06, "loss": 0.3403, "step": 12537 }, { "epoch": 0.5815398886827459, "grad_norm": 8.138801574707031, "learning_rate": 3.7975961486004344e-06, "loss": 0.2683, "step": 12538 }, { "epoch": 0.5815862708719851, "grad_norm": 9.022405624389648, "learning_rate": 3.796881815870056e-06, "loss": 0.4532, "step": 12539 }, { "epoch": 0.5816326530612245, "grad_norm": 10.473015785217285, "learning_rate": 3.7961675092045986e-06, "loss": 0.3131, "step": 12540 }, { "epoch": 0.5816790352504638, "grad_norm": 11.546151161193848, "learning_rate": 3.7954532286195365e-06, "loss": 0.3747, "step": 12541 }, { "epoch": 0.5817254174397032, "grad_norm": 5.445003032684326, "learning_rate": 3.794738974130348e-06, "loss": 0.3432, "step": 12542 }, { "epoch": 0.5817717996289424, "grad_norm": 7.61956787109375, "learning_rate": 3.7940247457525014e-06, "loss": 0.3501, "step": 12543 }, { "epoch": 0.5818181818181818, "grad_norm": 9.323055267333984, "learning_rate": 3.7933105435014727e-06, "loss": 0.434, "step": 12544 }, { "epoch": 0.5818645640074211, "grad_norm": 8.301440238952637, "learning_rate": 3.792596367392734e-06, "loss": 0.3362, "step": 12545 }, { "epoch": 0.5819109461966605, "grad_norm": 17.39828109741211, "learning_rate": 3.7918822174417584e-06, "loss": 0.3298, "step": 12546 }, { "epoch": 0.5819573283858999, "grad_norm": 6.369910717010498, "learning_rate": 3.791168093664018e-06, "loss": 0.3516, "step": 12547 }, { "epoch": 0.5820037105751391, "grad_norm": 3.9202582836151123, "learning_rate": 3.790453996074985e-06, "loss": 0.293, "step": 12548 }, { "epoch": 0.5820500927643785, "grad_norm": 5.4066901206970215, "learning_rate": 3.7897399246901255e-06, "loss": 0.3845, "step": 12549 }, { "epoch": 0.5820964749536178, "grad_norm": 11.97877025604248, "learning_rate": 3.789025879524913e-06, "loss": 0.2928, "step": 12550 }, { "epoch": 0.5821428571428572, "grad_norm": 6.370843887329102, "learning_rate": 3.7883118605948154e-06, "loss": 0.2788, "step": 12551 }, { "epoch": 0.5821892393320964, "grad_norm": 7.242530345916748, "learning_rate": 3.787597867915303e-06, "loss": 0.3732, "step": 12552 }, { "epoch": 0.5822356215213358, "grad_norm": 11.390172004699707, "learning_rate": 3.786883901501845e-06, "loss": 0.2592, "step": 12553 }, { "epoch": 0.5822820037105751, "grad_norm": 10.763396263122559, "learning_rate": 3.7861699613699053e-06, "loss": 0.2101, "step": 12554 }, { "epoch": 0.5823283858998145, "grad_norm": 5.224686622619629, "learning_rate": 3.7854560475349533e-06, "loss": 0.2571, "step": 12555 }, { "epoch": 0.5823747680890538, "grad_norm": 7.973050594329834, "learning_rate": 3.7847421600124556e-06, "loss": 0.3615, "step": 12556 }, { "epoch": 0.5824211502782931, "grad_norm": 9.057783126831055, "learning_rate": 3.7840282988178777e-06, "loss": 0.381, "step": 12557 }, { "epoch": 0.5824675324675325, "grad_norm": 9.575138092041016, "learning_rate": 3.783314463966685e-06, "loss": 0.4228, "step": 12558 }, { "epoch": 0.5825139146567718, "grad_norm": 8.835593223571777, "learning_rate": 3.7826006554743456e-06, "loss": 0.4632, "step": 12559 }, { "epoch": 0.5825602968460112, "grad_norm": 5.836326599121094, "learning_rate": 3.7818868733563187e-06, "loss": 0.3684, "step": 12560 }, { "epoch": 0.5826066790352504, "grad_norm": 9.829950332641602, "learning_rate": 3.7811731176280695e-06, "loss": 0.3732, "step": 12561 }, { "epoch": 0.5826530612244898, "grad_norm": 4.958055019378662, "learning_rate": 3.780459388305062e-06, "loss": 0.3629, "step": 12562 }, { "epoch": 0.5826994434137291, "grad_norm": 8.590991973876953, "learning_rate": 3.779745685402759e-06, "loss": 0.3188, "step": 12563 }, { "epoch": 0.5827458256029685, "grad_norm": 6.500023365020752, "learning_rate": 3.779032008936624e-06, "loss": 0.2909, "step": 12564 }, { "epoch": 0.5827922077922078, "grad_norm": 7.387854099273682, "learning_rate": 3.7783183589221146e-06, "loss": 0.2765, "step": 12565 }, { "epoch": 0.5828385899814471, "grad_norm": 5.819685459136963, "learning_rate": 3.7776047353746935e-06, "loss": 0.3688, "step": 12566 }, { "epoch": 0.5828849721706865, "grad_norm": 5.199089527130127, "learning_rate": 3.776891138309821e-06, "loss": 0.3608, "step": 12567 }, { "epoch": 0.5829313543599258, "grad_norm": 6.134703636169434, "learning_rate": 3.7761775677429567e-06, "loss": 0.2819, "step": 12568 }, { "epoch": 0.5829777365491651, "grad_norm": 5.259434700012207, "learning_rate": 3.775464023689561e-06, "loss": 0.2951, "step": 12569 }, { "epoch": 0.5830241187384044, "grad_norm": 8.779350280761719, "learning_rate": 3.774750506165092e-06, "loss": 0.3059, "step": 12570 }, { "epoch": 0.5830705009276438, "grad_norm": 24.283798217773438, "learning_rate": 3.7740370151850055e-06, "loss": 0.635, "step": 12571 }, { "epoch": 0.5831168831168831, "grad_norm": 6.757794380187988, "learning_rate": 3.7733235507647602e-06, "loss": 0.2983, "step": 12572 }, { "epoch": 0.5831632653061225, "grad_norm": 6.2834978103637695, "learning_rate": 3.7726101129198133e-06, "loss": 0.1709, "step": 12573 }, { "epoch": 0.5832096474953617, "grad_norm": 5.292276859283447, "learning_rate": 3.7718967016656227e-06, "loss": 0.3434, "step": 12574 }, { "epoch": 0.5832560296846011, "grad_norm": 6.253766059875488, "learning_rate": 3.7711833170176415e-06, "loss": 0.4012, "step": 12575 }, { "epoch": 0.5833024118738405, "grad_norm": 5.583893775939941, "learning_rate": 3.770469958991325e-06, "loss": 0.4289, "step": 12576 }, { "epoch": 0.5833487940630798, "grad_norm": 6.53902006149292, "learning_rate": 3.769756627602128e-06, "loss": 0.3735, "step": 12577 }, { "epoch": 0.5833951762523191, "grad_norm": 15.454971313476562, "learning_rate": 3.7690433228655043e-06, "loss": 0.4115, "step": 12578 }, { "epoch": 0.5834415584415584, "grad_norm": 8.291454315185547, "learning_rate": 3.7683300447969095e-06, "loss": 0.3349, "step": 12579 }, { "epoch": 0.5834879406307978, "grad_norm": 6.494968414306641, "learning_rate": 3.7676167934117935e-06, "loss": 0.2379, "step": 12580 }, { "epoch": 0.5835343228200371, "grad_norm": 6.215189456939697, "learning_rate": 3.7669035687256105e-06, "loss": 0.2791, "step": 12581 }, { "epoch": 0.5835807050092764, "grad_norm": 5.007765769958496, "learning_rate": 3.7661903707538107e-06, "loss": 0.2855, "step": 12582 }, { "epoch": 0.5836270871985157, "grad_norm": 6.895212650299072, "learning_rate": 3.7654771995118457e-06, "loss": 0.3039, "step": 12583 }, { "epoch": 0.5836734693877551, "grad_norm": 11.266886711120605, "learning_rate": 3.7647640550151666e-06, "loss": 0.3745, "step": 12584 }, { "epoch": 0.5837198515769945, "grad_norm": 8.027288436889648, "learning_rate": 3.7640509372792218e-06, "loss": 0.324, "step": 12585 }, { "epoch": 0.5837662337662337, "grad_norm": 7.153608322143555, "learning_rate": 3.7633378463194626e-06, "loss": 0.2533, "step": 12586 }, { "epoch": 0.5838126159554731, "grad_norm": 11.634356498718262, "learning_rate": 3.7626247821513357e-06, "loss": 0.4773, "step": 12587 }, { "epoch": 0.5838589981447124, "grad_norm": 6.761167526245117, "learning_rate": 3.7619117447902904e-06, "loss": 0.2822, "step": 12588 }, { "epoch": 0.5839053803339518, "grad_norm": 6.172952651977539, "learning_rate": 3.7611987342517746e-06, "loss": 0.3299, "step": 12589 }, { "epoch": 0.5839517625231911, "grad_norm": 6.3032121658325195, "learning_rate": 3.7604857505512342e-06, "loss": 0.3246, "step": 12590 }, { "epoch": 0.5839981447124304, "grad_norm": 9.38470458984375, "learning_rate": 3.7597727937041163e-06, "loss": 0.4268, "step": 12591 }, { "epoch": 0.5840445269016697, "grad_norm": 6.008424282073975, "learning_rate": 3.7590598637258678e-06, "loss": 0.2652, "step": 12592 }, { "epoch": 0.5840909090909091, "grad_norm": 8.13676643371582, "learning_rate": 3.7583469606319316e-06, "loss": 0.318, "step": 12593 }, { "epoch": 0.5841372912801485, "grad_norm": 3.809612512588501, "learning_rate": 3.7576340844377545e-06, "loss": 0.2364, "step": 12594 }, { "epoch": 0.5841836734693877, "grad_norm": 12.378022193908691, "learning_rate": 3.7569212351587793e-06, "loss": 0.3962, "step": 12595 }, { "epoch": 0.5842300556586271, "grad_norm": 3.5981037616729736, "learning_rate": 3.7562084128104493e-06, "loss": 0.2659, "step": 12596 }, { "epoch": 0.5842764378478664, "grad_norm": 13.945569038391113, "learning_rate": 3.7554956174082093e-06, "loss": 0.4606, "step": 12597 }, { "epoch": 0.5843228200371058, "grad_norm": 7.493610382080078, "learning_rate": 3.7547828489674993e-06, "loss": 0.3685, "step": 12598 }, { "epoch": 0.584369202226345, "grad_norm": 7.138926982879639, "learning_rate": 3.754070107503763e-06, "loss": 0.3527, "step": 12599 }, { "epoch": 0.5844155844155844, "grad_norm": 5.718319892883301, "learning_rate": 3.7533573930324395e-06, "loss": 0.3799, "step": 12600 }, { "epoch": 0.5844619666048237, "grad_norm": 11.324110984802246, "learning_rate": 3.752644705568971e-06, "loss": 0.4124, "step": 12601 }, { "epoch": 0.5845083487940631, "grad_norm": 5.907349586486816, "learning_rate": 3.751932045128797e-06, "loss": 0.4065, "step": 12602 }, { "epoch": 0.5845547309833025, "grad_norm": 5.705287456512451, "learning_rate": 3.751219411727357e-06, "loss": 0.2597, "step": 12603 }, { "epoch": 0.5846011131725417, "grad_norm": 13.599071502685547, "learning_rate": 3.7505068053800903e-06, "loss": 0.4263, "step": 12604 }, { "epoch": 0.5846474953617811, "grad_norm": 4.202611446380615, "learning_rate": 3.749794226102434e-06, "loss": 0.3203, "step": 12605 }, { "epoch": 0.5846938775510204, "grad_norm": 15.026982307434082, "learning_rate": 3.7490816739098258e-06, "loss": 0.5321, "step": 12606 }, { "epoch": 0.5847402597402598, "grad_norm": 7.232632637023926, "learning_rate": 3.748369148817704e-06, "loss": 0.2916, "step": 12607 }, { "epoch": 0.584786641929499, "grad_norm": 10.464866638183594, "learning_rate": 3.7476566508415046e-06, "loss": 0.4916, "step": 12608 }, { "epoch": 0.5848330241187384, "grad_norm": 8.833833694458008, "learning_rate": 3.746944179996664e-06, "loss": 0.2644, "step": 12609 }, { "epoch": 0.5848794063079777, "grad_norm": 5.678837299346924, "learning_rate": 3.7462317362986157e-06, "loss": 0.3092, "step": 12610 }, { "epoch": 0.5849257884972171, "grad_norm": 5.63087797164917, "learning_rate": 3.745519319762796e-06, "loss": 0.2463, "step": 12611 }, { "epoch": 0.5849721706864563, "grad_norm": 5.163650035858154, "learning_rate": 3.7448069304046374e-06, "loss": 0.346, "step": 12612 }, { "epoch": 0.5850185528756957, "grad_norm": 5.197350978851318, "learning_rate": 3.744094568239577e-06, "loss": 0.1929, "step": 12613 }, { "epoch": 0.5850649350649351, "grad_norm": 8.836849212646484, "learning_rate": 3.7433822332830444e-06, "loss": 0.3755, "step": 12614 }, { "epoch": 0.5851113172541744, "grad_norm": 5.175995349884033, "learning_rate": 3.742669925550472e-06, "loss": 0.1997, "step": 12615 }, { "epoch": 0.5851576994434138, "grad_norm": 9.745543479919434, "learning_rate": 3.7419576450572924e-06, "loss": 0.2629, "step": 12616 }, { "epoch": 0.585204081632653, "grad_norm": 9.684351921081543, "learning_rate": 3.741245391818937e-06, "loss": 0.3398, "step": 12617 }, { "epoch": 0.5852504638218924, "grad_norm": 4.820413112640381, "learning_rate": 3.740533165850837e-06, "loss": 0.3566, "step": 12618 }, { "epoch": 0.5852968460111317, "grad_norm": 6.349776268005371, "learning_rate": 3.739820967168423e-06, "loss": 0.3425, "step": 12619 }, { "epoch": 0.5853432282003711, "grad_norm": 13.632655143737793, "learning_rate": 3.7391087957871208e-06, "loss": 0.4778, "step": 12620 }, { "epoch": 0.5853896103896103, "grad_norm": 9.219706535339355, "learning_rate": 3.7383966517223623e-06, "loss": 0.3204, "step": 12621 }, { "epoch": 0.5854359925788497, "grad_norm": 7.07530403137207, "learning_rate": 3.737684534989574e-06, "loss": 0.3252, "step": 12622 }, { "epoch": 0.5854823747680891, "grad_norm": 5.5012969970703125, "learning_rate": 3.736972445604185e-06, "loss": 0.3349, "step": 12623 }, { "epoch": 0.5855287569573284, "grad_norm": 7.903543949127197, "learning_rate": 3.7362603835816237e-06, "loss": 0.2964, "step": 12624 }, { "epoch": 0.5855751391465677, "grad_norm": 4.993573188781738, "learning_rate": 3.7355483489373124e-06, "loss": 0.2687, "step": 12625 }, { "epoch": 0.585621521335807, "grad_norm": 11.566495895385742, "learning_rate": 3.7348363416866797e-06, "loss": 0.4422, "step": 12626 }, { "epoch": 0.5856679035250464, "grad_norm": 10.259177207946777, "learning_rate": 3.73412436184515e-06, "loss": 0.4442, "step": 12627 }, { "epoch": 0.5857142857142857, "grad_norm": 7.430202007293701, "learning_rate": 3.733412409428148e-06, "loss": 0.2496, "step": 12628 }, { "epoch": 0.5857606679035251, "grad_norm": 7.687473297119141, "learning_rate": 3.7327004844510983e-06, "loss": 0.2764, "step": 12629 }, { "epoch": 0.5858070500927643, "grad_norm": 11.866494178771973, "learning_rate": 3.731988586929427e-06, "loss": 0.3858, "step": 12630 }, { "epoch": 0.5858534322820037, "grad_norm": 4.921431064605713, "learning_rate": 3.7312767168785515e-06, "loss": 0.3323, "step": 12631 }, { "epoch": 0.5858998144712431, "grad_norm": 10.264994621276855, "learning_rate": 3.7305648743138966e-06, "loss": 0.2265, "step": 12632 }, { "epoch": 0.5859461966604824, "grad_norm": 15.174274444580078, "learning_rate": 3.729853059250884e-06, "loss": 0.4648, "step": 12633 }, { "epoch": 0.5859925788497217, "grad_norm": 5.2703471183776855, "learning_rate": 3.729141271704935e-06, "loss": 0.3939, "step": 12634 }, { "epoch": 0.586038961038961, "grad_norm": 3.8591606616973877, "learning_rate": 3.7284295116914726e-06, "loss": 0.2674, "step": 12635 }, { "epoch": 0.5860853432282004, "grad_norm": 4.289517879486084, "learning_rate": 3.727717779225912e-06, "loss": 0.2962, "step": 12636 }, { "epoch": 0.5861317254174397, "grad_norm": 7.594594955444336, "learning_rate": 3.727006074323675e-06, "loss": 0.3966, "step": 12637 }, { "epoch": 0.586178107606679, "grad_norm": 12.77639389038086, "learning_rate": 3.7262943970001784e-06, "loss": 0.437, "step": 12638 }, { "epoch": 0.5862244897959183, "grad_norm": 8.600037574768066, "learning_rate": 3.7255827472708428e-06, "loss": 0.3457, "step": 12639 }, { "epoch": 0.5862708719851577, "grad_norm": 9.360306739807129, "learning_rate": 3.724871125151085e-06, "loss": 0.3199, "step": 12640 }, { "epoch": 0.5863172541743971, "grad_norm": 6.935787677764893, "learning_rate": 3.7241595306563237e-06, "loss": 0.3203, "step": 12641 }, { "epoch": 0.5863636363636363, "grad_norm": 5.628810405731201, "learning_rate": 3.7234479638019715e-06, "loss": 0.3296, "step": 12642 }, { "epoch": 0.5864100185528757, "grad_norm": 6.775420188903809, "learning_rate": 3.7227364246034453e-06, "loss": 0.4052, "step": 12643 }, { "epoch": 0.586456400742115, "grad_norm": 5.628598690032959, "learning_rate": 3.722024913076161e-06, "loss": 0.3485, "step": 12644 }, { "epoch": 0.5865027829313544, "grad_norm": 9.623680114746094, "learning_rate": 3.7213134292355325e-06, "loss": 0.353, "step": 12645 }, { "epoch": 0.5865491651205937, "grad_norm": 8.418286323547363, "learning_rate": 3.7206019730969767e-06, "loss": 0.2857, "step": 12646 }, { "epoch": 0.586595547309833, "grad_norm": 6.82661247253418, "learning_rate": 3.719890544675903e-06, "loss": 0.2526, "step": 12647 }, { "epoch": 0.5866419294990723, "grad_norm": 8.78150749206543, "learning_rate": 3.7191791439877236e-06, "loss": 0.277, "step": 12648 }, { "epoch": 0.5866883116883117, "grad_norm": 5.111889839172363, "learning_rate": 3.7184677710478533e-06, "loss": 0.3101, "step": 12649 }, { "epoch": 0.5867346938775511, "grad_norm": 6.983563423156738, "learning_rate": 3.7177564258717036e-06, "loss": 0.3331, "step": 12650 }, { "epoch": 0.5867810760667903, "grad_norm": 12.598597526550293, "learning_rate": 3.717045108474684e-06, "loss": 0.3254, "step": 12651 }, { "epoch": 0.5868274582560297, "grad_norm": 4.452366828918457, "learning_rate": 3.7163338188722076e-06, "loss": 0.3183, "step": 12652 }, { "epoch": 0.586873840445269, "grad_norm": 5.748491287231445, "learning_rate": 3.7156225570796796e-06, "loss": 0.3436, "step": 12653 }, { "epoch": 0.5869202226345084, "grad_norm": 12.48790454864502, "learning_rate": 3.7149113231125113e-06, "loss": 0.2754, "step": 12654 }, { "epoch": 0.5869666048237476, "grad_norm": 7.046149730682373, "learning_rate": 3.714200116986112e-06, "loss": 0.2731, "step": 12655 }, { "epoch": 0.587012987012987, "grad_norm": 11.942630767822266, "learning_rate": 3.7134889387158885e-06, "loss": 0.3838, "step": 12656 }, { "epoch": 0.5870593692022263, "grad_norm": 9.886469841003418, "learning_rate": 3.7127777883172513e-06, "loss": 0.3615, "step": 12657 }, { "epoch": 0.5871057513914657, "grad_norm": 10.885676383972168, "learning_rate": 3.7120666658056017e-06, "loss": 0.3471, "step": 12658 }, { "epoch": 0.5871521335807051, "grad_norm": 5.402642726898193, "learning_rate": 3.7113555711963478e-06, "loss": 0.2806, "step": 12659 }, { "epoch": 0.5871985157699443, "grad_norm": 5.039618015289307, "learning_rate": 3.7106445045048966e-06, "loss": 0.2242, "step": 12660 }, { "epoch": 0.5872448979591837, "grad_norm": 5.967844486236572, "learning_rate": 3.7099334657466523e-06, "loss": 0.3377, "step": 12661 }, { "epoch": 0.587291280148423, "grad_norm": 8.147621154785156, "learning_rate": 3.709222454937019e-06, "loss": 0.3252, "step": 12662 }, { "epoch": 0.5873376623376624, "grad_norm": 9.29577350616455, "learning_rate": 3.7085114720914023e-06, "loss": 0.3275, "step": 12663 }, { "epoch": 0.5873840445269016, "grad_norm": 11.692876815795898, "learning_rate": 3.7078005172252015e-06, "loss": 0.4296, "step": 12664 }, { "epoch": 0.587430426716141, "grad_norm": 7.62300968170166, "learning_rate": 3.7070895903538207e-06, "loss": 0.3898, "step": 12665 }, { "epoch": 0.5874768089053803, "grad_norm": 4.5931501388549805, "learning_rate": 3.706378691492662e-06, "loss": 0.1716, "step": 12666 }, { "epoch": 0.5875231910946197, "grad_norm": 9.738934516906738, "learning_rate": 3.705667820657126e-06, "loss": 0.4349, "step": 12667 }, { "epoch": 0.587569573283859, "grad_norm": 5.616711139678955, "learning_rate": 3.7049569778626173e-06, "loss": 0.2543, "step": 12668 }, { "epoch": 0.5876159554730983, "grad_norm": 6.956509590148926, "learning_rate": 3.70424616312453e-06, "loss": 0.3023, "step": 12669 }, { "epoch": 0.5876623376623377, "grad_norm": 8.363505363464355, "learning_rate": 3.703535376458266e-06, "loss": 0.3315, "step": 12670 }, { "epoch": 0.587708719851577, "grad_norm": 5.302210330963135, "learning_rate": 3.702824617879224e-06, "loss": 0.19, "step": 12671 }, { "epoch": 0.5877551020408164, "grad_norm": 12.040165901184082, "learning_rate": 3.702113887402803e-06, "loss": 0.3472, "step": 12672 }, { "epoch": 0.5878014842300556, "grad_norm": 6.0011444091796875, "learning_rate": 3.7014031850444e-06, "loss": 0.3709, "step": 12673 }, { "epoch": 0.587847866419295, "grad_norm": 7.525554656982422, "learning_rate": 3.7006925108194135e-06, "loss": 0.3523, "step": 12674 }, { "epoch": 0.5878942486085343, "grad_norm": 10.187596321105957, "learning_rate": 3.699981864743237e-06, "loss": 0.4781, "step": 12675 }, { "epoch": 0.5879406307977737, "grad_norm": 4.728002548217773, "learning_rate": 3.6992712468312675e-06, "loss": 0.3185, "step": 12676 }, { "epoch": 0.587987012987013, "grad_norm": 16.507545471191406, "learning_rate": 3.6985606570989008e-06, "loss": 0.3116, "step": 12677 }, { "epoch": 0.5880333951762523, "grad_norm": 9.081028938293457, "learning_rate": 3.6978500955615304e-06, "loss": 0.3081, "step": 12678 }, { "epoch": 0.5880797773654917, "grad_norm": 4.933302879333496, "learning_rate": 3.6971395622345535e-06, "loss": 0.2647, "step": 12679 }, { "epoch": 0.588126159554731, "grad_norm": 4.958203315734863, "learning_rate": 3.6964290571333583e-06, "loss": 0.3842, "step": 12680 }, { "epoch": 0.5881725417439703, "grad_norm": 9.855600357055664, "learning_rate": 3.69571858027334e-06, "loss": 0.4403, "step": 12681 }, { "epoch": 0.5882189239332096, "grad_norm": 6.513413429260254, "learning_rate": 3.695008131669891e-06, "loss": 0.2887, "step": 12682 }, { "epoch": 0.588265306122449, "grad_norm": 9.300354957580566, "learning_rate": 3.6942977113384026e-06, "loss": 0.3822, "step": 12683 }, { "epoch": 0.5883116883116883, "grad_norm": 7.119069576263428, "learning_rate": 3.6935873192942672e-06, "loss": 0.4049, "step": 12684 }, { "epoch": 0.5883580705009277, "grad_norm": 7.172749996185303, "learning_rate": 3.692876955552872e-06, "loss": 0.294, "step": 12685 }, { "epoch": 0.588404452690167, "grad_norm": 5.847766399383545, "learning_rate": 3.692166620129608e-06, "loss": 0.3983, "step": 12686 }, { "epoch": 0.5884508348794063, "grad_norm": 6.39346981048584, "learning_rate": 3.691456313039864e-06, "loss": 0.276, "step": 12687 }, { "epoch": 0.5884972170686457, "grad_norm": 6.549526691436768, "learning_rate": 3.6907460342990285e-06, "loss": 0.3566, "step": 12688 }, { "epoch": 0.588543599257885, "grad_norm": 6.015239238739014, "learning_rate": 3.6900357839224897e-06, "loss": 0.3746, "step": 12689 }, { "epoch": 0.5885899814471243, "grad_norm": 9.284345626831055, "learning_rate": 3.6893255619256375e-06, "loss": 0.2052, "step": 12690 }, { "epoch": 0.5886363636363636, "grad_norm": 6.6556596755981445, "learning_rate": 3.6886153683238533e-06, "loss": 0.3813, "step": 12691 }, { "epoch": 0.588682745825603, "grad_norm": 7.592014789581299, "learning_rate": 3.6879052031325257e-06, "loss": 0.4768, "step": 12692 }, { "epoch": 0.5887291280148423, "grad_norm": 10.038022994995117, "learning_rate": 3.68719506636704e-06, "loss": 0.2888, "step": 12693 }, { "epoch": 0.5887755102040816, "grad_norm": 12.508050918579102, "learning_rate": 3.68648495804278e-06, "loss": 0.3063, "step": 12694 }, { "epoch": 0.588821892393321, "grad_norm": 7.4241461753845215, "learning_rate": 3.685774878175132e-06, "loss": 0.3959, "step": 12695 }, { "epoch": 0.5888682745825603, "grad_norm": 11.22848892211914, "learning_rate": 3.6850648267794776e-06, "loss": 0.3894, "step": 12696 }, { "epoch": 0.5889146567717997, "grad_norm": 5.764286518096924, "learning_rate": 3.6843548038711995e-06, "loss": 0.2953, "step": 12697 }, { "epoch": 0.5889610389610389, "grad_norm": 7.9112443923950195, "learning_rate": 3.6836448094656805e-06, "loss": 0.4185, "step": 12698 }, { "epoch": 0.5890074211502783, "grad_norm": 8.933000564575195, "learning_rate": 3.682934843578303e-06, "loss": 0.3068, "step": 12699 }, { "epoch": 0.5890538033395176, "grad_norm": 5.88519811630249, "learning_rate": 3.6822249062244464e-06, "loss": 0.2921, "step": 12700 }, { "epoch": 0.589100185528757, "grad_norm": 9.095196723937988, "learning_rate": 3.681514997419495e-06, "loss": 0.3696, "step": 12701 }, { "epoch": 0.5891465677179963, "grad_norm": 6.774830341339111, "learning_rate": 3.680805117178824e-06, "loss": 0.3253, "step": 12702 }, { "epoch": 0.5891929499072356, "grad_norm": 7.150265216827393, "learning_rate": 3.6800952655178135e-06, "loss": 0.3107, "step": 12703 }, { "epoch": 0.5892393320964749, "grad_norm": 6.820828914642334, "learning_rate": 3.6793854424518427e-06, "loss": 0.3135, "step": 12704 }, { "epoch": 0.5892857142857143, "grad_norm": 13.079140663146973, "learning_rate": 3.67867564799629e-06, "loss": 0.3366, "step": 12705 }, { "epoch": 0.5893320964749537, "grad_norm": 6.667013645172119, "learning_rate": 3.6779658821665355e-06, "loss": 0.312, "step": 12706 }, { "epoch": 0.5893784786641929, "grad_norm": 10.648210525512695, "learning_rate": 3.6772561449779496e-06, "loss": 0.3242, "step": 12707 }, { "epoch": 0.5894248608534323, "grad_norm": 5.602684497833252, "learning_rate": 3.6765464364459126e-06, "loss": 0.245, "step": 12708 }, { "epoch": 0.5894712430426716, "grad_norm": 10.280998229980469, "learning_rate": 3.675836756585798e-06, "loss": 0.4264, "step": 12709 }, { "epoch": 0.589517625231911, "grad_norm": 6.524367332458496, "learning_rate": 3.675127105412982e-06, "loss": 0.3161, "step": 12710 }, { "epoch": 0.5895640074211502, "grad_norm": 6.480325222015381, "learning_rate": 3.674417482942839e-06, "loss": 0.3199, "step": 12711 }, { "epoch": 0.5896103896103896, "grad_norm": 9.556782722473145, "learning_rate": 3.673707889190744e-06, "loss": 0.2284, "step": 12712 }, { "epoch": 0.5896567717996289, "grad_norm": 6.8018107414245605, "learning_rate": 3.6729983241720655e-06, "loss": 0.3124, "step": 12713 }, { "epoch": 0.5897031539888683, "grad_norm": 8.96289348602295, "learning_rate": 3.672288787902179e-06, "loss": 0.3671, "step": 12714 }, { "epoch": 0.5897495361781077, "grad_norm": 6.829593658447266, "learning_rate": 3.671579280396455e-06, "loss": 0.3002, "step": 12715 }, { "epoch": 0.5897959183673469, "grad_norm": 4.26863956451416, "learning_rate": 3.670869801670266e-06, "loss": 0.2643, "step": 12716 }, { "epoch": 0.5898423005565863, "grad_norm": 7.587739944458008, "learning_rate": 3.6701603517389828e-06, "loss": 0.3581, "step": 12717 }, { "epoch": 0.5898886827458256, "grad_norm": 7.425901889801025, "learning_rate": 3.6694509306179726e-06, "loss": 0.3335, "step": 12718 }, { "epoch": 0.589935064935065, "grad_norm": 11.146224021911621, "learning_rate": 3.668741538322606e-06, "loss": 0.4226, "step": 12719 }, { "epoch": 0.5899814471243042, "grad_norm": 9.768006324768066, "learning_rate": 3.6680321748682523e-06, "loss": 0.3591, "step": 12720 }, { "epoch": 0.5900278293135436, "grad_norm": 7.377236843109131, "learning_rate": 3.6673228402702783e-06, "loss": 0.2657, "step": 12721 }, { "epoch": 0.5900742115027829, "grad_norm": 7.746480941772461, "learning_rate": 3.6666135345440534e-06, "loss": 0.4129, "step": 12722 }, { "epoch": 0.5901205936920223, "grad_norm": 7.650644779205322, "learning_rate": 3.665904257704943e-06, "loss": 0.3752, "step": 12723 }, { "epoch": 0.5901669758812615, "grad_norm": 6.08989953994751, "learning_rate": 3.665195009768312e-06, "loss": 0.3686, "step": 12724 }, { "epoch": 0.5902133580705009, "grad_norm": 7.488487243652344, "learning_rate": 3.6644857907495273e-06, "loss": 0.455, "step": 12725 }, { "epoch": 0.5902597402597403, "grad_norm": 7.13405704498291, "learning_rate": 3.6637766006639538e-06, "loss": 0.3362, "step": 12726 }, { "epoch": 0.5903061224489796, "grad_norm": 9.18161678314209, "learning_rate": 3.663067439526956e-06, "loss": 0.2873, "step": 12727 }, { "epoch": 0.590352504638219, "grad_norm": 6.151217937469482, "learning_rate": 3.662358307353897e-06, "loss": 0.336, "step": 12728 }, { "epoch": 0.5903988868274582, "grad_norm": 8.17047119140625, "learning_rate": 3.661649204160139e-06, "loss": 0.2982, "step": 12729 }, { "epoch": 0.5904452690166976, "grad_norm": 5.741003036499023, "learning_rate": 3.6609401299610446e-06, "loss": 0.3159, "step": 12730 }, { "epoch": 0.5904916512059369, "grad_norm": 10.05479621887207, "learning_rate": 3.6602310847719767e-06, "loss": 0.3984, "step": 12731 }, { "epoch": 0.5905380333951763, "grad_norm": 6.73046350479126, "learning_rate": 3.659522068608296e-06, "loss": 0.3208, "step": 12732 }, { "epoch": 0.5905844155844155, "grad_norm": 11.653792381286621, "learning_rate": 3.6588130814853617e-06, "loss": 0.3068, "step": 12733 }, { "epoch": 0.5906307977736549, "grad_norm": 7.358588218688965, "learning_rate": 3.6581041234185368e-06, "loss": 0.3409, "step": 12734 }, { "epoch": 0.5906771799628943, "grad_norm": 6.533332347869873, "learning_rate": 3.6573951944231756e-06, "loss": 0.2607, "step": 12735 }, { "epoch": 0.5907235621521336, "grad_norm": 13.734550476074219, "learning_rate": 3.6566862945146404e-06, "loss": 0.3855, "step": 12736 }, { "epoch": 0.5907699443413729, "grad_norm": 5.003542900085449, "learning_rate": 3.655977423708289e-06, "loss": 0.2719, "step": 12737 }, { "epoch": 0.5908163265306122, "grad_norm": 9.381041526794434, "learning_rate": 3.655268582019477e-06, "loss": 0.5467, "step": 12738 }, { "epoch": 0.5908627087198516, "grad_norm": 7.939599990844727, "learning_rate": 3.654559769463562e-06, "loss": 0.2884, "step": 12739 }, { "epoch": 0.5909090909090909, "grad_norm": 4.789253234863281, "learning_rate": 3.6538509860558992e-06, "loss": 0.3742, "step": 12740 }, { "epoch": 0.5909554730983302, "grad_norm": 4.507318496704102, "learning_rate": 3.6531422318118453e-06, "loss": 0.2744, "step": 12741 }, { "epoch": 0.5910018552875695, "grad_norm": 5.085471153259277, "learning_rate": 3.6524335067467554e-06, "loss": 0.3013, "step": 12742 }, { "epoch": 0.5910482374768089, "grad_norm": 6.1363630294799805, "learning_rate": 3.6517248108759817e-06, "loss": 0.3255, "step": 12743 }, { "epoch": 0.5910946196660483, "grad_norm": 5.175107002258301, "learning_rate": 3.6510161442148783e-06, "loss": 0.2963, "step": 12744 }, { "epoch": 0.5911410018552876, "grad_norm": 5.116032600402832, "learning_rate": 3.6503075067788006e-06, "loss": 0.261, "step": 12745 }, { "epoch": 0.5911873840445269, "grad_norm": 6.43735408782959, "learning_rate": 3.6495988985830977e-06, "loss": 0.371, "step": 12746 }, { "epoch": 0.5912337662337662, "grad_norm": 9.0219144821167, "learning_rate": 3.648890319643123e-06, "loss": 0.3282, "step": 12747 }, { "epoch": 0.5912801484230056, "grad_norm": 5.864360809326172, "learning_rate": 3.6481817699742263e-06, "loss": 0.3235, "step": 12748 }, { "epoch": 0.5913265306122449, "grad_norm": 5.439386367797852, "learning_rate": 3.647473249591758e-06, "loss": 0.2441, "step": 12749 }, { "epoch": 0.5913729128014842, "grad_norm": 4.08070707321167, "learning_rate": 3.64676475851107e-06, "loss": 0.3073, "step": 12750 }, { "epoch": 0.5914192949907235, "grad_norm": 5.533884048461914, "learning_rate": 3.646056296747508e-06, "loss": 0.346, "step": 12751 }, { "epoch": 0.5914656771799629, "grad_norm": 6.243208408355713, "learning_rate": 3.6453478643164236e-06, "loss": 0.2548, "step": 12752 }, { "epoch": 0.5915120593692023, "grad_norm": 5.361963748931885, "learning_rate": 3.6446394612331624e-06, "loss": 0.2081, "step": 12753 }, { "epoch": 0.5915584415584415, "grad_norm": 6.746853351593018, "learning_rate": 3.6439310875130718e-06, "loss": 0.3589, "step": 12754 }, { "epoch": 0.5916048237476809, "grad_norm": 6.83914041519165, "learning_rate": 3.6432227431714993e-06, "loss": 0.3262, "step": 12755 }, { "epoch": 0.5916512059369202, "grad_norm": 9.76819133758545, "learning_rate": 3.6425144282237917e-06, "loss": 0.3572, "step": 12756 }, { "epoch": 0.5916975881261596, "grad_norm": 6.100450038909912, "learning_rate": 3.641806142685293e-06, "loss": 0.3653, "step": 12757 }, { "epoch": 0.5917439703153989, "grad_norm": 7.400729656219482, "learning_rate": 3.6410978865713465e-06, "loss": 0.3121, "step": 12758 }, { "epoch": 0.5917903525046382, "grad_norm": 10.93236255645752, "learning_rate": 3.6403896598972978e-06, "loss": 0.3939, "step": 12759 }, { "epoch": 0.5918367346938775, "grad_norm": 6.083707809448242, "learning_rate": 3.63968146267849e-06, "loss": 0.3809, "step": 12760 }, { "epoch": 0.5918831168831169, "grad_norm": 6.689781665802002, "learning_rate": 3.6389732949302663e-06, "loss": 0.3607, "step": 12761 }, { "epoch": 0.5919294990723563, "grad_norm": 6.285101413726807, "learning_rate": 3.6382651566679687e-06, "loss": 0.3733, "step": 12762 }, { "epoch": 0.5919758812615955, "grad_norm": 4.369192600250244, "learning_rate": 3.637557047906938e-06, "loss": 0.2143, "step": 12763 }, { "epoch": 0.5920222634508349, "grad_norm": 8.95821475982666, "learning_rate": 3.6368489686625143e-06, "loss": 0.378, "step": 12764 }, { "epoch": 0.5920686456400742, "grad_norm": 5.134654521942139, "learning_rate": 3.6361409189500387e-06, "loss": 0.2995, "step": 12765 }, { "epoch": 0.5921150278293136, "grad_norm": 6.792786598205566, "learning_rate": 3.635432898784852e-06, "loss": 0.4263, "step": 12766 }, { "epoch": 0.5921614100185528, "grad_norm": 5.909323215484619, "learning_rate": 3.6347249081822915e-06, "loss": 0.1935, "step": 12767 }, { "epoch": 0.5922077922077922, "grad_norm": 6.669737339019775, "learning_rate": 3.6340169471576947e-06, "loss": 0.3284, "step": 12768 }, { "epoch": 0.5922541743970315, "grad_norm": 5.804118633270264, "learning_rate": 3.6333090157264004e-06, "loss": 0.3488, "step": 12769 }, { "epoch": 0.5923005565862709, "grad_norm": 9.614982604980469, "learning_rate": 3.632601113903746e-06, "loss": 0.3968, "step": 12770 }, { "epoch": 0.5923469387755103, "grad_norm": 6.792783260345459, "learning_rate": 3.6318932417050667e-06, "loss": 0.3659, "step": 12771 }, { "epoch": 0.5923933209647495, "grad_norm": 10.063961029052734, "learning_rate": 3.6311853991457012e-06, "loss": 0.4479, "step": 12772 }, { "epoch": 0.5924397031539889, "grad_norm": 9.885788917541504, "learning_rate": 3.6304775862409796e-06, "loss": 0.4251, "step": 12773 }, { "epoch": 0.5924860853432282, "grad_norm": 6.9492058753967285, "learning_rate": 3.629769803006239e-06, "loss": 0.3599, "step": 12774 }, { "epoch": 0.5925324675324676, "grad_norm": 6.392050266265869, "learning_rate": 3.6290620494568125e-06, "loss": 0.2725, "step": 12775 }, { "epoch": 0.5925788497217068, "grad_norm": 7.956516265869141, "learning_rate": 3.6283543256080334e-06, "loss": 0.4906, "step": 12776 }, { "epoch": 0.5926252319109462, "grad_norm": 5.6432318687438965, "learning_rate": 3.6276466314752372e-06, "loss": 0.3232, "step": 12777 }, { "epoch": 0.5926716141001855, "grad_norm": 6.9193644523620605, "learning_rate": 3.626938967073751e-06, "loss": 0.44, "step": 12778 }, { "epoch": 0.5927179962894249, "grad_norm": 6.397618770599365, "learning_rate": 3.6262313324189068e-06, "loss": 0.362, "step": 12779 }, { "epoch": 0.5927643784786641, "grad_norm": 6.581752777099609, "learning_rate": 3.6255237275260367e-06, "loss": 0.331, "step": 12780 }, { "epoch": 0.5928107606679035, "grad_norm": 4.658331394195557, "learning_rate": 3.62481615241047e-06, "loss": 0.2842, "step": 12781 }, { "epoch": 0.5928571428571429, "grad_norm": 7.893682479858398, "learning_rate": 3.6241086070875363e-06, "loss": 0.3056, "step": 12782 }, { "epoch": 0.5929035250463822, "grad_norm": 11.146328926086426, "learning_rate": 3.623401091572566e-06, "loss": 0.3758, "step": 12783 }, { "epoch": 0.5929499072356216, "grad_norm": 4.647357940673828, "learning_rate": 3.6226936058808826e-06, "loss": 0.3836, "step": 12784 }, { "epoch": 0.5929962894248608, "grad_norm": 5.01377534866333, "learning_rate": 3.621986150027816e-06, "loss": 0.2614, "step": 12785 }, { "epoch": 0.5930426716141002, "grad_norm": 4.593358993530273, "learning_rate": 3.6212787240286927e-06, "loss": 0.349, "step": 12786 }, { "epoch": 0.5930890538033395, "grad_norm": 9.84482479095459, "learning_rate": 3.6205713278988387e-06, "loss": 0.3926, "step": 12787 }, { "epoch": 0.5931354359925789, "grad_norm": 6.269189834594727, "learning_rate": 3.6198639616535807e-06, "loss": 0.386, "step": 12788 }, { "epoch": 0.5931818181818181, "grad_norm": 7.563000202178955, "learning_rate": 3.6191566253082405e-06, "loss": 0.2654, "step": 12789 }, { "epoch": 0.5932282003710575, "grad_norm": 10.884880065917969, "learning_rate": 3.6184493188781435e-06, "loss": 0.2976, "step": 12790 }, { "epoch": 0.5932745825602969, "grad_norm": 4.692930698394775, "learning_rate": 3.6177420423786135e-06, "loss": 0.2981, "step": 12791 }, { "epoch": 0.5933209647495362, "grad_norm": 4.5549421310424805, "learning_rate": 3.6170347958249728e-06, "loss": 0.3491, "step": 12792 }, { "epoch": 0.5933673469387755, "grad_norm": 5.926843643188477, "learning_rate": 3.616327579232544e-06, "loss": 0.3736, "step": 12793 }, { "epoch": 0.5934137291280148, "grad_norm": 5.394496440887451, "learning_rate": 3.615620392616651e-06, "loss": 0.3493, "step": 12794 }, { "epoch": 0.5934601113172542, "grad_norm": 3.89817214012146, "learning_rate": 3.6149132359926093e-06, "loss": 0.2689, "step": 12795 }, { "epoch": 0.5935064935064935, "grad_norm": 8.928092956542969, "learning_rate": 3.6142061093757418e-06, "loss": 0.4281, "step": 12796 }, { "epoch": 0.5935528756957328, "grad_norm": 10.846790313720703, "learning_rate": 3.613499012781368e-06, "loss": 0.3595, "step": 12797 }, { "epoch": 0.5935992578849721, "grad_norm": 9.071425437927246, "learning_rate": 3.6127919462248074e-06, "loss": 0.3881, "step": 12798 }, { "epoch": 0.5936456400742115, "grad_norm": 5.902713298797607, "learning_rate": 3.6120849097213785e-06, "loss": 0.2274, "step": 12799 }, { "epoch": 0.5936920222634509, "grad_norm": 9.522027015686035, "learning_rate": 3.6113779032863973e-06, "loss": 0.3466, "step": 12800 }, { "epoch": 0.5937384044526902, "grad_norm": 8.343527793884277, "learning_rate": 3.6106709269351805e-06, "loss": 0.3636, "step": 12801 }, { "epoch": 0.5937847866419295, "grad_norm": 5.484549045562744, "learning_rate": 3.6099639806830456e-06, "loss": 0.3332, "step": 12802 }, { "epoch": 0.5938311688311688, "grad_norm": 5.162949085235596, "learning_rate": 3.6092570645453075e-06, "loss": 0.3929, "step": 12803 }, { "epoch": 0.5938775510204082, "grad_norm": 5.190388202667236, "learning_rate": 3.608550178537282e-06, "loss": 0.3003, "step": 12804 }, { "epoch": 0.5939239332096475, "grad_norm": 4.070064067840576, "learning_rate": 3.6078433226742846e-06, "loss": 0.293, "step": 12805 }, { "epoch": 0.5939703153988868, "grad_norm": 10.965941429138184, "learning_rate": 3.6071364969716256e-06, "loss": 0.2987, "step": 12806 }, { "epoch": 0.5940166975881261, "grad_norm": 10.435728073120117, "learning_rate": 3.6064297014446194e-06, "loss": 0.2635, "step": 12807 }, { "epoch": 0.5940630797773655, "grad_norm": 8.697601318359375, "learning_rate": 3.605722936108579e-06, "loss": 0.4588, "step": 12808 }, { "epoch": 0.5941094619666049, "grad_norm": 6.458643436431885, "learning_rate": 3.6050162009788157e-06, "loss": 0.3886, "step": 12809 }, { "epoch": 0.5941558441558441, "grad_norm": 5.208071231842041, "learning_rate": 3.6043094960706426e-06, "loss": 0.2574, "step": 12810 }, { "epoch": 0.5942022263450835, "grad_norm": 33.823875427246094, "learning_rate": 3.6036028213993664e-06, "loss": 0.3815, "step": 12811 }, { "epoch": 0.5942486085343228, "grad_norm": 6.368638515472412, "learning_rate": 3.6028961769802983e-06, "loss": 0.304, "step": 12812 }, { "epoch": 0.5942949907235622, "grad_norm": 7.6677350997924805, "learning_rate": 3.6021895628287472e-06, "loss": 0.3186, "step": 12813 }, { "epoch": 0.5943413729128015, "grad_norm": 10.21586799621582, "learning_rate": 3.6014829789600225e-06, "loss": 0.3284, "step": 12814 }, { "epoch": 0.5943877551020408, "grad_norm": 7.891831398010254, "learning_rate": 3.6007764253894317e-06, "loss": 0.3143, "step": 12815 }, { "epoch": 0.5944341372912801, "grad_norm": 4.722958087921143, "learning_rate": 3.600069902132283e-06, "loss": 0.3342, "step": 12816 }, { "epoch": 0.5944805194805195, "grad_norm": 6.491623878479004, "learning_rate": 3.59936340920388e-06, "loss": 0.3037, "step": 12817 }, { "epoch": 0.5945269016697589, "grad_norm": 15.693923950195312, "learning_rate": 3.59865694661953e-06, "loss": 0.3897, "step": 12818 }, { "epoch": 0.5945732838589981, "grad_norm": 8.515018463134766, "learning_rate": 3.5979505143945377e-06, "loss": 0.412, "step": 12819 }, { "epoch": 0.5946196660482375, "grad_norm": 9.425675392150879, "learning_rate": 3.597244112544208e-06, "loss": 0.3115, "step": 12820 }, { "epoch": 0.5946660482374768, "grad_norm": 6.8782057762146, "learning_rate": 3.596537741083847e-06, "loss": 0.3718, "step": 12821 }, { "epoch": 0.5947124304267162, "grad_norm": 12.557555198669434, "learning_rate": 3.595831400028753e-06, "loss": 0.3496, "step": 12822 }, { "epoch": 0.5947588126159554, "grad_norm": 4.863479137420654, "learning_rate": 3.595125089394232e-06, "loss": 0.2786, "step": 12823 }, { "epoch": 0.5948051948051948, "grad_norm": 4.699811935424805, "learning_rate": 3.5944188091955843e-06, "loss": 0.3738, "step": 12824 }, { "epoch": 0.5948515769944341, "grad_norm": 8.07310676574707, "learning_rate": 3.593712559448112e-06, "loss": 0.4112, "step": 12825 }, { "epoch": 0.5948979591836735, "grad_norm": 6.764310359954834, "learning_rate": 3.5930063401671157e-06, "loss": 0.3211, "step": 12826 }, { "epoch": 0.5949443413729129, "grad_norm": 9.798918724060059, "learning_rate": 3.592300151367896e-06, "loss": 0.3341, "step": 12827 }, { "epoch": 0.5949907235621521, "grad_norm": 12.857759475708008, "learning_rate": 3.5915939930657494e-06, "loss": 0.329, "step": 12828 }, { "epoch": 0.5950371057513915, "grad_norm": 14.709786415100098, "learning_rate": 3.5908878652759766e-06, "loss": 0.3166, "step": 12829 }, { "epoch": 0.5950834879406308, "grad_norm": 7.110976696014404, "learning_rate": 3.5901817680138747e-06, "loss": 0.3016, "step": 12830 }, { "epoch": 0.5951298701298702, "grad_norm": 8.1783447265625, "learning_rate": 3.589475701294741e-06, "loss": 0.2592, "step": 12831 }, { "epoch": 0.5951762523191094, "grad_norm": 5.321571350097656, "learning_rate": 3.5887696651338742e-06, "loss": 0.3473, "step": 12832 }, { "epoch": 0.5952226345083488, "grad_norm": 8.527384757995605, "learning_rate": 3.5880636595465666e-06, "loss": 0.3313, "step": 12833 }, { "epoch": 0.5952690166975881, "grad_norm": 4.216038227081299, "learning_rate": 3.587357684548115e-06, "loss": 0.3486, "step": 12834 }, { "epoch": 0.5953153988868275, "grad_norm": 4.745180130004883, "learning_rate": 3.5866517401538137e-06, "loss": 0.369, "step": 12835 }, { "epoch": 0.5953617810760667, "grad_norm": 4.900727272033691, "learning_rate": 3.5859458263789577e-06, "loss": 0.1772, "step": 12836 }, { "epoch": 0.5954081632653061, "grad_norm": 13.202669143676758, "learning_rate": 3.5852399432388385e-06, "loss": 0.4044, "step": 12837 }, { "epoch": 0.5954545454545455, "grad_norm": 9.145291328430176, "learning_rate": 3.5845340907487525e-06, "loss": 0.3428, "step": 12838 }, { "epoch": 0.5955009276437848, "grad_norm": 11.034791946411133, "learning_rate": 3.5838282689239867e-06, "loss": 0.356, "step": 12839 }, { "epoch": 0.5955473098330241, "grad_norm": 5.517939567565918, "learning_rate": 3.5831224777798346e-06, "loss": 0.2773, "step": 12840 }, { "epoch": 0.5955936920222634, "grad_norm": 9.91836929321289, "learning_rate": 3.5824167173315865e-06, "loss": 0.4408, "step": 12841 }, { "epoch": 0.5956400742115028, "grad_norm": 8.562738418579102, "learning_rate": 3.5817109875945325e-06, "loss": 0.3852, "step": 12842 }, { "epoch": 0.5956864564007421, "grad_norm": 13.267415046691895, "learning_rate": 3.5810052885839642e-06, "loss": 0.4914, "step": 12843 }, { "epoch": 0.5957328385899815, "grad_norm": 8.936687469482422, "learning_rate": 3.580299620315166e-06, "loss": 0.2999, "step": 12844 }, { "epoch": 0.5957792207792207, "grad_norm": 9.5257568359375, "learning_rate": 3.5795939828034272e-06, "loss": 0.4737, "step": 12845 }, { "epoch": 0.5958256029684601, "grad_norm": 6.246013641357422, "learning_rate": 3.578888376064036e-06, "loss": 0.3798, "step": 12846 }, { "epoch": 0.5958719851576995, "grad_norm": 5.507312297821045, "learning_rate": 3.578182800112278e-06, "loss": 0.3637, "step": 12847 }, { "epoch": 0.5959183673469388, "grad_norm": 5.001802444458008, "learning_rate": 3.577477254963442e-06, "loss": 0.3009, "step": 12848 }, { "epoch": 0.5959647495361781, "grad_norm": 12.424325942993164, "learning_rate": 3.5767717406328096e-06, "loss": 0.3985, "step": 12849 }, { "epoch": 0.5960111317254174, "grad_norm": 9.242598533630371, "learning_rate": 3.5760662571356657e-06, "loss": 0.4598, "step": 12850 }, { "epoch": 0.5960575139146568, "grad_norm": 7.876141548156738, "learning_rate": 3.5753608044872957e-06, "loss": 0.3538, "step": 12851 }, { "epoch": 0.5961038961038961, "grad_norm": 7.632684230804443, "learning_rate": 3.5746553827029827e-06, "loss": 0.3512, "step": 12852 }, { "epoch": 0.5961502782931354, "grad_norm": 6.875000476837158, "learning_rate": 3.5739499917980088e-06, "loss": 0.3361, "step": 12853 }, { "epoch": 0.5961966604823747, "grad_norm": 5.433263301849365, "learning_rate": 3.573244631787658e-06, "loss": 0.3633, "step": 12854 }, { "epoch": 0.5962430426716141, "grad_norm": 3.8622705936431885, "learning_rate": 3.572539302687208e-06, "loss": 0.2896, "step": 12855 }, { "epoch": 0.5962894248608535, "grad_norm": 5.302243232727051, "learning_rate": 3.5718340045119416e-06, "loss": 0.3544, "step": 12856 }, { "epoch": 0.5963358070500928, "grad_norm": 4.673247814178467, "learning_rate": 3.5711287372771377e-06, "loss": 0.223, "step": 12857 }, { "epoch": 0.5963821892393321, "grad_norm": 6.456725120544434, "learning_rate": 3.5704235009980753e-06, "loss": 0.329, "step": 12858 }, { "epoch": 0.5964285714285714, "grad_norm": 4.981780052185059, "learning_rate": 3.5697182956900365e-06, "loss": 0.2255, "step": 12859 }, { "epoch": 0.5964749536178108, "grad_norm": 6.839829444885254, "learning_rate": 3.5690131213682943e-06, "loss": 0.3149, "step": 12860 }, { "epoch": 0.5965213358070501, "grad_norm": 8.03166389465332, "learning_rate": 3.5683079780481285e-06, "loss": 0.3059, "step": 12861 }, { "epoch": 0.5965677179962894, "grad_norm": 8.892279624938965, "learning_rate": 3.5676028657448148e-06, "loss": 0.2963, "step": 12862 }, { "epoch": 0.5966141001855287, "grad_norm": 13.607680320739746, "learning_rate": 3.5668977844736296e-06, "loss": 0.4074, "step": 12863 }, { "epoch": 0.5966604823747681, "grad_norm": 6.548185348510742, "learning_rate": 3.566192734249849e-06, "loss": 0.3049, "step": 12864 }, { "epoch": 0.5967068645640075, "grad_norm": 5.507153511047363, "learning_rate": 3.565487715088746e-06, "loss": 0.3227, "step": 12865 }, { "epoch": 0.5967532467532467, "grad_norm": 6.770227909088135, "learning_rate": 3.5647827270055945e-06, "loss": 0.3693, "step": 12866 }, { "epoch": 0.5967996289424861, "grad_norm": 11.573467254638672, "learning_rate": 3.5640777700156688e-06, "loss": 0.4257, "step": 12867 }, { "epoch": 0.5968460111317254, "grad_norm": 8.844582557678223, "learning_rate": 3.56337284413424e-06, "loss": 0.2667, "step": 12868 }, { "epoch": 0.5968923933209648, "grad_norm": 12.151248931884766, "learning_rate": 3.5626679493765827e-06, "loss": 0.4664, "step": 12869 }, { "epoch": 0.5969387755102041, "grad_norm": 6.866849422454834, "learning_rate": 3.561963085757965e-06, "loss": 0.3154, "step": 12870 }, { "epoch": 0.5969851576994434, "grad_norm": 6.470084190368652, "learning_rate": 3.561258253293658e-06, "loss": 0.3596, "step": 12871 }, { "epoch": 0.5970315398886827, "grad_norm": 12.070119857788086, "learning_rate": 3.5605534519989327e-06, "loss": 0.5059, "step": 12872 }, { "epoch": 0.5970779220779221, "grad_norm": 9.365345001220703, "learning_rate": 3.5598486818890565e-06, "loss": 0.3337, "step": 12873 }, { "epoch": 0.5971243042671615, "grad_norm": 5.900075435638428, "learning_rate": 3.559143942979301e-06, "loss": 0.224, "step": 12874 }, { "epoch": 0.5971706864564007, "grad_norm": 5.2490129470825195, "learning_rate": 3.5584392352849307e-06, "loss": 0.2918, "step": 12875 }, { "epoch": 0.5972170686456401, "grad_norm": 5.6460723876953125, "learning_rate": 3.5577345588212154e-06, "loss": 0.3692, "step": 12876 }, { "epoch": 0.5972634508348794, "grad_norm": 6.435543060302734, "learning_rate": 3.557029913603418e-06, "loss": 0.2564, "step": 12877 }, { "epoch": 0.5973098330241188, "grad_norm": 7.848567485809326, "learning_rate": 3.556325299646808e-06, "loss": 0.3682, "step": 12878 }, { "epoch": 0.597356215213358, "grad_norm": 8.71522331237793, "learning_rate": 3.555620716966649e-06, "loss": 0.3824, "step": 12879 }, { "epoch": 0.5974025974025974, "grad_norm": 7.72974157333374, "learning_rate": 3.554916165578204e-06, "loss": 0.3046, "step": 12880 }, { "epoch": 0.5974489795918367, "grad_norm": 8.005998611450195, "learning_rate": 3.5542116454967395e-06, "loss": 0.2565, "step": 12881 }, { "epoch": 0.5974953617810761, "grad_norm": 4.845513820648193, "learning_rate": 3.5535071567375163e-06, "loss": 0.286, "step": 12882 }, { "epoch": 0.5975417439703155, "grad_norm": 8.643112182617188, "learning_rate": 3.552802699315797e-06, "loss": 0.3519, "step": 12883 }, { "epoch": 0.5975881261595547, "grad_norm": 8.834832191467285, "learning_rate": 3.5520982732468456e-06, "loss": 0.4, "step": 12884 }, { "epoch": 0.5976345083487941, "grad_norm": 4.873838901519775, "learning_rate": 3.55139387854592e-06, "loss": 0.3775, "step": 12885 }, { "epoch": 0.5976808905380334, "grad_norm": 5.91655969619751, "learning_rate": 3.550689515228282e-06, "loss": 0.3961, "step": 12886 }, { "epoch": 0.5977272727272728, "grad_norm": 19.069496154785156, "learning_rate": 3.5499851833091925e-06, "loss": 0.6134, "step": 12887 }, { "epoch": 0.597773654916512, "grad_norm": 5.309941291809082, "learning_rate": 3.5492808828039083e-06, "loss": 0.2728, "step": 12888 }, { "epoch": 0.5978200371057514, "grad_norm": 4.394758701324463, "learning_rate": 3.5485766137276894e-06, "loss": 0.2736, "step": 12889 }, { "epoch": 0.5978664192949907, "grad_norm": 8.819472312927246, "learning_rate": 3.547872376095791e-06, "loss": 0.3725, "step": 12890 }, { "epoch": 0.5979128014842301, "grad_norm": 6.220523834228516, "learning_rate": 3.5471681699234727e-06, "loss": 0.3232, "step": 12891 }, { "epoch": 0.5979591836734693, "grad_norm": 6.7328619956970215, "learning_rate": 3.5464639952259903e-06, "loss": 0.3119, "step": 12892 }, { "epoch": 0.5980055658627087, "grad_norm": 3.8413639068603516, "learning_rate": 3.5457598520185975e-06, "loss": 0.2863, "step": 12893 }, { "epoch": 0.5980519480519481, "grad_norm": 5.846630573272705, "learning_rate": 3.5450557403165507e-06, "loss": 0.2565, "step": 12894 }, { "epoch": 0.5980983302411874, "grad_norm": 7.768920421600342, "learning_rate": 3.5443516601351046e-06, "loss": 0.3276, "step": 12895 }, { "epoch": 0.5981447124304267, "grad_norm": 7.567666053771973, "learning_rate": 3.543647611489511e-06, "loss": 0.2614, "step": 12896 }, { "epoch": 0.598191094619666, "grad_norm": 7.305520057678223, "learning_rate": 3.5429435943950248e-06, "loss": 0.2967, "step": 12897 }, { "epoch": 0.5982374768089054, "grad_norm": 7.078761577606201, "learning_rate": 3.5422396088668974e-06, "loss": 0.3825, "step": 12898 }, { "epoch": 0.5982838589981447, "grad_norm": 13.798609733581543, "learning_rate": 3.541535654920379e-06, "loss": 0.4003, "step": 12899 }, { "epoch": 0.5983302411873841, "grad_norm": 13.105968475341797, "learning_rate": 3.540831732570722e-06, "loss": 0.3442, "step": 12900 }, { "epoch": 0.5983766233766233, "grad_norm": 14.172499656677246, "learning_rate": 3.5401278418331754e-06, "loss": 0.5438, "step": 12901 }, { "epoch": 0.5984230055658627, "grad_norm": 4.253088474273682, "learning_rate": 3.5394239827229887e-06, "loss": 0.3161, "step": 12902 }, { "epoch": 0.5984693877551021, "grad_norm": 6.725678443908691, "learning_rate": 3.5387201552554122e-06, "loss": 0.399, "step": 12903 }, { "epoch": 0.5985157699443414, "grad_norm": 4.654791355133057, "learning_rate": 3.538016359445692e-06, "loss": 0.2492, "step": 12904 }, { "epoch": 0.5985621521335807, "grad_norm": 12.542503356933594, "learning_rate": 3.537312595309077e-06, "loss": 0.4567, "step": 12905 }, { "epoch": 0.59860853432282, "grad_norm": 6.668501853942871, "learning_rate": 3.536608862860812e-06, "loss": 0.301, "step": 12906 }, { "epoch": 0.5986549165120594, "grad_norm": 11.044503211975098, "learning_rate": 3.5359051621161445e-06, "loss": 0.4674, "step": 12907 }, { "epoch": 0.5987012987012987, "grad_norm": 5.762604713439941, "learning_rate": 3.535201493090319e-06, "loss": 0.3266, "step": 12908 }, { "epoch": 0.598747680890538, "grad_norm": 5.091622829437256, "learning_rate": 3.534497855798582e-06, "loss": 0.2697, "step": 12909 }, { "epoch": 0.5987940630797773, "grad_norm": 5.2917375564575195, "learning_rate": 3.5337942502561763e-06, "loss": 0.3632, "step": 12910 }, { "epoch": 0.5988404452690167, "grad_norm": 9.924443244934082, "learning_rate": 3.533090676478343e-06, "loss": 0.4841, "step": 12911 }, { "epoch": 0.598886827458256, "grad_norm": 4.336188316345215, "learning_rate": 3.532387134480327e-06, "loss": 0.2553, "step": 12912 }, { "epoch": 0.5989332096474954, "grad_norm": 10.955007553100586, "learning_rate": 3.531683624277369e-06, "loss": 0.3945, "step": 12913 }, { "epoch": 0.5989795918367347, "grad_norm": 3.932218074798584, "learning_rate": 3.530980145884713e-06, "loss": 0.2036, "step": 12914 }, { "epoch": 0.599025974025974, "grad_norm": 6.610738277435303, "learning_rate": 3.5302766993175965e-06, "loss": 0.3091, "step": 12915 }, { "epoch": 0.5990723562152134, "grad_norm": 8.525064468383789, "learning_rate": 3.5295732845912588e-06, "loss": 0.325, "step": 12916 }, { "epoch": 0.5991187384044527, "grad_norm": 7.82963752746582, "learning_rate": 3.5288699017209404e-06, "loss": 0.3829, "step": 12917 }, { "epoch": 0.599165120593692, "grad_norm": 7.362812042236328, "learning_rate": 3.5281665507218804e-06, "loss": 0.3583, "step": 12918 }, { "epoch": 0.5992115027829313, "grad_norm": 11.774784088134766, "learning_rate": 3.527463231609315e-06, "loss": 0.3696, "step": 12919 }, { "epoch": 0.5992578849721707, "grad_norm": 5.155043601989746, "learning_rate": 3.5267599443984848e-06, "loss": 0.2832, "step": 12920 }, { "epoch": 0.59930426716141, "grad_norm": 11.460067749023438, "learning_rate": 3.526056689104621e-06, "loss": 0.3945, "step": 12921 }, { "epoch": 0.5993506493506493, "grad_norm": 5.878636837005615, "learning_rate": 3.525353465742961e-06, "loss": 0.3044, "step": 12922 }, { "epoch": 0.5993970315398887, "grad_norm": 6.0950775146484375, "learning_rate": 3.5246502743287413e-06, "loss": 0.258, "step": 12923 }, { "epoch": 0.599443413729128, "grad_norm": 7.328537464141846, "learning_rate": 3.5239471148771944e-06, "loss": 0.2876, "step": 12924 }, { "epoch": 0.5994897959183674, "grad_norm": 10.251395225524902, "learning_rate": 3.5232439874035574e-06, "loss": 0.4516, "step": 12925 }, { "epoch": 0.5995361781076067, "grad_norm": 5.953382968902588, "learning_rate": 3.5225408919230588e-06, "loss": 0.3224, "step": 12926 }, { "epoch": 0.599582560296846, "grad_norm": 7.077336311340332, "learning_rate": 3.5218378284509314e-06, "loss": 0.3918, "step": 12927 }, { "epoch": 0.5996289424860853, "grad_norm": 5.8665385246276855, "learning_rate": 3.521134797002408e-06, "loss": 0.3304, "step": 12928 }, { "epoch": 0.5996753246753247, "grad_norm": 7.991796016693115, "learning_rate": 3.520431797592719e-06, "loss": 0.315, "step": 12929 }, { "epoch": 0.599721706864564, "grad_norm": 5.4180989265441895, "learning_rate": 3.5197288302370957e-06, "loss": 0.269, "step": 12930 }, { "epoch": 0.5997680890538033, "grad_norm": 5.814249515533447, "learning_rate": 3.5190258949507685e-06, "loss": 0.3398, "step": 12931 }, { "epoch": 0.5998144712430427, "grad_norm": 6.380027770996094, "learning_rate": 3.5183229917489613e-06, "loss": 0.3424, "step": 12932 }, { "epoch": 0.599860853432282, "grad_norm": 5.309239387512207, "learning_rate": 3.517620120646906e-06, "loss": 0.3788, "step": 12933 }, { "epoch": 0.5999072356215214, "grad_norm": 16.608991622924805, "learning_rate": 3.516917281659828e-06, "loss": 0.3721, "step": 12934 }, { "epoch": 0.5999536178107606, "grad_norm": 9.419398307800293, "learning_rate": 3.516214474802955e-06, "loss": 0.3729, "step": 12935 }, { "epoch": 0.6, "grad_norm": 8.710193634033203, "learning_rate": 3.5155117000915153e-06, "loss": 0.3377, "step": 12936 }, { "epoch": 0.6, "eval_loss": 0.33409908413887024, "eval_runtime": 38.0184, "eval_samples_per_second": 45.846, "eval_steps_per_second": 5.734, "step": 12936 }, { "epoch": 0.6000463821892393, "grad_norm": 9.12767219543457, "learning_rate": 3.5148089575407297e-06, "loss": 0.413, "step": 12937 }, { "epoch": 0.6000927643784787, "grad_norm": 4.922755241394043, "learning_rate": 3.514106247165824e-06, "loss": 0.4007, "step": 12938 }, { "epoch": 0.600139146567718, "grad_norm": 8.286236763000488, "learning_rate": 3.513403568982023e-06, "loss": 0.3576, "step": 12939 }, { "epoch": 0.6001855287569573, "grad_norm": 9.940082550048828, "learning_rate": 3.5127009230045493e-06, "loss": 0.3463, "step": 12940 }, { "epoch": 0.6002319109461967, "grad_norm": 10.546319007873535, "learning_rate": 3.511998309248628e-06, "loss": 0.3799, "step": 12941 }, { "epoch": 0.600278293135436, "grad_norm": 5.530889987945557, "learning_rate": 3.5112957277294766e-06, "loss": 0.3218, "step": 12942 }, { "epoch": 0.6003246753246754, "grad_norm": 4.372391223907471, "learning_rate": 3.5105931784623177e-06, "loss": 0.2684, "step": 12943 }, { "epoch": 0.6003710575139146, "grad_norm": 6.752861976623535, "learning_rate": 3.509890661462372e-06, "loss": 0.298, "step": 12944 }, { "epoch": 0.600417439703154, "grad_norm": 15.050274848937988, "learning_rate": 3.5091881767448593e-06, "loss": 0.538, "step": 12945 }, { "epoch": 0.6004638218923933, "grad_norm": 8.39859390258789, "learning_rate": 3.5084857243249988e-06, "loss": 0.2885, "step": 12946 }, { "epoch": 0.6005102040816327, "grad_norm": 4.925930976867676, "learning_rate": 3.50778330421801e-06, "loss": 0.3274, "step": 12947 }, { "epoch": 0.6005565862708719, "grad_norm": 8.250022888183594, "learning_rate": 3.5070809164391066e-06, "loss": 0.3876, "step": 12948 }, { "epoch": 0.6006029684601113, "grad_norm": 4.49753999710083, "learning_rate": 3.5063785610035085e-06, "loss": 0.2443, "step": 12949 }, { "epoch": 0.6006493506493507, "grad_norm": 7.4069013595581055, "learning_rate": 3.5056762379264303e-06, "loss": 0.3867, "step": 12950 }, { "epoch": 0.60069573283859, "grad_norm": 4.944419860839844, "learning_rate": 3.504973947223088e-06, "loss": 0.2495, "step": 12951 }, { "epoch": 0.6007421150278293, "grad_norm": 5.794146537780762, "learning_rate": 3.5042716889086998e-06, "loss": 0.3646, "step": 12952 }, { "epoch": 0.6007884972170686, "grad_norm": 9.147153854370117, "learning_rate": 3.503569462998474e-06, "loss": 0.4135, "step": 12953 }, { "epoch": 0.600834879406308, "grad_norm": 5.709409713745117, "learning_rate": 3.502867269507626e-06, "loss": 0.2228, "step": 12954 }, { "epoch": 0.6008812615955473, "grad_norm": 11.256365776062012, "learning_rate": 3.5021651084513696e-06, "loss": 0.363, "step": 12955 }, { "epoch": 0.6009276437847867, "grad_norm": 5.574272155761719, "learning_rate": 3.5014629798449153e-06, "loss": 0.3601, "step": 12956 }, { "epoch": 0.6009740259740259, "grad_norm": 4.279094219207764, "learning_rate": 3.5007608837034758e-06, "loss": 0.2989, "step": 12957 }, { "epoch": 0.6010204081632653, "grad_norm": 12.011669158935547, "learning_rate": 3.500058820042263e-06, "loss": 0.5702, "step": 12958 }, { "epoch": 0.6010667903525047, "grad_norm": 5.16204309463501, "learning_rate": 3.4993567888764824e-06, "loss": 0.3734, "step": 12959 }, { "epoch": 0.601113172541744, "grad_norm": 6.938200950622559, "learning_rate": 3.498654790221346e-06, "loss": 0.2994, "step": 12960 }, { "epoch": 0.6011595547309833, "grad_norm": 6.4526262283325195, "learning_rate": 3.4979528240920612e-06, "loss": 0.3718, "step": 12961 }, { "epoch": 0.6012059369202226, "grad_norm": 4.565816402435303, "learning_rate": 3.497250890503836e-06, "loss": 0.2549, "step": 12962 }, { "epoch": 0.601252319109462, "grad_norm": 5.702054977416992, "learning_rate": 3.49654898947188e-06, "loss": 0.2782, "step": 12963 }, { "epoch": 0.6012987012987013, "grad_norm": 5.051711559295654, "learning_rate": 3.4958471210113953e-06, "loss": 0.3546, "step": 12964 }, { "epoch": 0.6013450834879406, "grad_norm": 6.470999717712402, "learning_rate": 3.4951452851375887e-06, "loss": 0.3465, "step": 12965 }, { "epoch": 0.6013914656771799, "grad_norm": 14.251642227172852, "learning_rate": 3.494443481865667e-06, "loss": 0.4929, "step": 12966 }, { "epoch": 0.6014378478664193, "grad_norm": 30.325429916381836, "learning_rate": 3.493741711210832e-06, "loss": 0.5947, "step": 12967 }, { "epoch": 0.6014842300556587, "grad_norm": 8.352150917053223, "learning_rate": 3.493039973188289e-06, "loss": 0.3939, "step": 12968 }, { "epoch": 0.601530612244898, "grad_norm": 5.8410186767578125, "learning_rate": 3.4923382678132423e-06, "loss": 0.3142, "step": 12969 }, { "epoch": 0.6015769944341373, "grad_norm": 7.246777057647705, "learning_rate": 3.491636595100889e-06, "loss": 0.3596, "step": 12970 }, { "epoch": 0.6016233766233766, "grad_norm": 6.63883113861084, "learning_rate": 3.490934955066434e-06, "loss": 0.3385, "step": 12971 }, { "epoch": 0.601669758812616, "grad_norm": 7.187559604644775, "learning_rate": 3.490233347725077e-06, "loss": 0.2676, "step": 12972 }, { "epoch": 0.6017161410018553, "grad_norm": 4.221256256103516, "learning_rate": 3.489531773092018e-06, "loss": 0.2627, "step": 12973 }, { "epoch": 0.6017625231910946, "grad_norm": 10.491744995117188, "learning_rate": 3.4888302311824586e-06, "loss": 0.2779, "step": 12974 }, { "epoch": 0.6018089053803339, "grad_norm": 5.94327974319458, "learning_rate": 3.4881287220115932e-06, "loss": 0.3265, "step": 12975 }, { "epoch": 0.6018552875695733, "grad_norm": 8.732928276062012, "learning_rate": 3.4874272455946217e-06, "loss": 0.3791, "step": 12976 }, { "epoch": 0.6019016697588127, "grad_norm": 4.282742023468018, "learning_rate": 3.486725801946741e-06, "loss": 0.2866, "step": 12977 }, { "epoch": 0.6019480519480519, "grad_norm": 6.837844371795654, "learning_rate": 3.486024391083148e-06, "loss": 0.2676, "step": 12978 }, { "epoch": 0.6019944341372913, "grad_norm": 7.698390007019043, "learning_rate": 3.4853230130190373e-06, "loss": 0.3292, "step": 12979 }, { "epoch": 0.6020408163265306, "grad_norm": 8.74758243560791, "learning_rate": 3.4846216677696078e-06, "loss": 0.4311, "step": 12980 }, { "epoch": 0.60208719851577, "grad_norm": 5.4000725746154785, "learning_rate": 3.4839203553500474e-06, "loss": 0.3757, "step": 12981 }, { "epoch": 0.6021335807050093, "grad_norm": 8.136507987976074, "learning_rate": 3.4832190757755533e-06, "loss": 0.475, "step": 12982 }, { "epoch": 0.6021799628942486, "grad_norm": 6.626524925231934, "learning_rate": 3.4825178290613183e-06, "loss": 0.3486, "step": 12983 }, { "epoch": 0.6022263450834879, "grad_norm": 7.360348224639893, "learning_rate": 3.481816615222533e-06, "loss": 0.3531, "step": 12984 }, { "epoch": 0.6022727272727273, "grad_norm": 7.280293941497803, "learning_rate": 3.481115434274393e-06, "loss": 0.3652, "step": 12985 }, { "epoch": 0.6023191094619667, "grad_norm": 10.884310722351074, "learning_rate": 3.4804142862320833e-06, "loss": 0.3835, "step": 12986 }, { "epoch": 0.6023654916512059, "grad_norm": 7.379481792449951, "learning_rate": 3.4797131711107973e-06, "loss": 0.389, "step": 12987 }, { "epoch": 0.6024118738404453, "grad_norm": 3.713155746459961, "learning_rate": 3.4790120889257234e-06, "loss": 0.3148, "step": 12988 }, { "epoch": 0.6024582560296846, "grad_norm": 3.694044589996338, "learning_rate": 3.4783110396920498e-06, "loss": 0.2394, "step": 12989 }, { "epoch": 0.602504638218924, "grad_norm": 6.838767051696777, "learning_rate": 3.4776100234249653e-06, "loss": 0.3751, "step": 12990 }, { "epoch": 0.6025510204081632, "grad_norm": 15.471692085266113, "learning_rate": 3.4769090401396587e-06, "loss": 0.4254, "step": 12991 }, { "epoch": 0.6025974025974026, "grad_norm": 8.076910018920898, "learning_rate": 3.476208089851313e-06, "loss": 0.3894, "step": 12992 }, { "epoch": 0.6026437847866419, "grad_norm": 12.710766792297363, "learning_rate": 3.4755071725751144e-06, "loss": 0.4227, "step": 12993 }, { "epoch": 0.6026901669758813, "grad_norm": 3.674628973007202, "learning_rate": 3.474806288326249e-06, "loss": 0.2148, "step": 12994 }, { "epoch": 0.6027365491651205, "grad_norm": 6.2116804122924805, "learning_rate": 3.4741054371199014e-06, "loss": 0.3634, "step": 12995 }, { "epoch": 0.6027829313543599, "grad_norm": 10.202897071838379, "learning_rate": 3.4734046189712563e-06, "loss": 0.3701, "step": 12996 }, { "epoch": 0.6028293135435993, "grad_norm": 5.2946858406066895, "learning_rate": 3.4727038338954934e-06, "loss": 0.2634, "step": 12997 }, { "epoch": 0.6028756957328386, "grad_norm": 12.313374519348145, "learning_rate": 3.4720030819077953e-06, "loss": 0.497, "step": 12998 }, { "epoch": 0.602922077922078, "grad_norm": 5.213276386260986, "learning_rate": 3.4713023630233454e-06, "loss": 0.4266, "step": 12999 }, { "epoch": 0.6029684601113172, "grad_norm": 9.526849746704102, "learning_rate": 3.470601677257323e-06, "loss": 0.3852, "step": 13000 }, { "epoch": 0.6030148423005566, "grad_norm": 6.42879056930542, "learning_rate": 3.4699010246249086e-06, "loss": 0.2601, "step": 13001 }, { "epoch": 0.6030612244897959, "grad_norm": 5.040937423706055, "learning_rate": 3.469200405141284e-06, "loss": 0.3163, "step": 13002 }, { "epoch": 0.6031076066790353, "grad_norm": 7.104703426361084, "learning_rate": 3.4684998188216227e-06, "loss": 0.295, "step": 13003 }, { "epoch": 0.6031539888682745, "grad_norm": 6.868095874786377, "learning_rate": 3.4677992656811054e-06, "loss": 0.2899, "step": 13004 }, { "epoch": 0.6032003710575139, "grad_norm": 8.511218070983887, "learning_rate": 3.467098745734909e-06, "loss": 0.2193, "step": 13005 }, { "epoch": 0.6032467532467533, "grad_norm": 6.486359119415283, "learning_rate": 3.466398258998209e-06, "loss": 0.2796, "step": 13006 }, { "epoch": 0.6032931354359926, "grad_norm": 5.520013332366943, "learning_rate": 3.4656978054861843e-06, "loss": 0.2929, "step": 13007 }, { "epoch": 0.6033395176252319, "grad_norm": 6.134889602661133, "learning_rate": 3.464997385214005e-06, "loss": 0.3686, "step": 13008 }, { "epoch": 0.6033858998144712, "grad_norm": 5.220885276794434, "learning_rate": 3.4642969981968487e-06, "loss": 0.4195, "step": 13009 }, { "epoch": 0.6034322820037106, "grad_norm": 5.396878719329834, "learning_rate": 3.463596644449887e-06, "loss": 0.2172, "step": 13010 }, { "epoch": 0.6034786641929499, "grad_norm": 7.390817165374756, "learning_rate": 3.462896323988294e-06, "loss": 0.3327, "step": 13011 }, { "epoch": 0.6035250463821893, "grad_norm": 5.443044662475586, "learning_rate": 3.462196036827242e-06, "loss": 0.1776, "step": 13012 }, { "epoch": 0.6035714285714285, "grad_norm": 8.299874305725098, "learning_rate": 3.461495782981903e-06, "loss": 0.3928, "step": 13013 }, { "epoch": 0.6036178107606679, "grad_norm": 9.30019474029541, "learning_rate": 3.4607955624674438e-06, "loss": 0.3643, "step": 13014 }, { "epoch": 0.6036641929499073, "grad_norm": 8.073884963989258, "learning_rate": 3.4600953752990373e-06, "loss": 0.4617, "step": 13015 }, { "epoch": 0.6037105751391466, "grad_norm": 6.208885192871094, "learning_rate": 3.459395221491853e-06, "loss": 0.3323, "step": 13016 }, { "epoch": 0.6037569573283859, "grad_norm": 8.956159591674805, "learning_rate": 3.458695101061059e-06, "loss": 0.3902, "step": 13017 }, { "epoch": 0.6038033395176252, "grad_norm": 9.512307167053223, "learning_rate": 3.457995014021823e-06, "loss": 0.4171, "step": 13018 }, { "epoch": 0.6038497217068646, "grad_norm": 6.600151062011719, "learning_rate": 3.4572949603893106e-06, "loss": 0.3579, "step": 13019 }, { "epoch": 0.6038961038961039, "grad_norm": 9.54042911529541, "learning_rate": 3.456594940178689e-06, "loss": 0.2823, "step": 13020 }, { "epoch": 0.6039424860853432, "grad_norm": 6.2379469871521, "learning_rate": 3.4558949534051245e-06, "loss": 0.3433, "step": 13021 }, { "epoch": 0.6039888682745825, "grad_norm": 9.729146003723145, "learning_rate": 3.4551950000837818e-06, "loss": 0.3185, "step": 13022 }, { "epoch": 0.6040352504638219, "grad_norm": 6.573065280914307, "learning_rate": 3.454495080229825e-06, "loss": 0.33, "step": 13023 }, { "epoch": 0.6040816326530613, "grad_norm": 9.00787353515625, "learning_rate": 3.453795193858416e-06, "loss": 0.3663, "step": 13024 }, { "epoch": 0.6041280148423006, "grad_norm": 8.108055114746094, "learning_rate": 3.4530953409847186e-06, "loss": 0.32, "step": 13025 }, { "epoch": 0.6041743970315399, "grad_norm": 5.230971813201904, "learning_rate": 3.4523955216238947e-06, "loss": 0.3128, "step": 13026 }, { "epoch": 0.6042207792207792, "grad_norm": 5.403048992156982, "learning_rate": 3.451695735791106e-06, "loss": 0.2911, "step": 13027 }, { "epoch": 0.6042671614100186, "grad_norm": 10.990272521972656, "learning_rate": 3.450995983501513e-06, "loss": 0.4098, "step": 13028 }, { "epoch": 0.6043135435992579, "grad_norm": 20.775814056396484, "learning_rate": 3.450296264770274e-06, "loss": 0.3976, "step": 13029 }, { "epoch": 0.6043599257884972, "grad_norm": 10.043671607971191, "learning_rate": 3.4495965796125496e-06, "loss": 0.5239, "step": 13030 }, { "epoch": 0.6044063079777365, "grad_norm": 6.535023212432861, "learning_rate": 3.448896928043497e-06, "loss": 0.3404, "step": 13031 }, { "epoch": 0.6044526901669759, "grad_norm": 12.156468391418457, "learning_rate": 3.4481973100782756e-06, "loss": 0.428, "step": 13032 }, { "epoch": 0.6044990723562153, "grad_norm": 7.695651531219482, "learning_rate": 3.4474977257320395e-06, "loss": 0.3506, "step": 13033 }, { "epoch": 0.6045454545454545, "grad_norm": 4.4893975257873535, "learning_rate": 3.446798175019948e-06, "loss": 0.281, "step": 13034 }, { "epoch": 0.6045918367346939, "grad_norm": 6.678851127624512, "learning_rate": 3.446098657957153e-06, "loss": 0.3217, "step": 13035 }, { "epoch": 0.6046382189239332, "grad_norm": 8.5882568359375, "learning_rate": 3.4453991745588116e-06, "loss": 0.3079, "step": 13036 }, { "epoch": 0.6046846011131726, "grad_norm": 8.237589836120605, "learning_rate": 3.444699724840078e-06, "loss": 0.4159, "step": 13037 }, { "epoch": 0.6047309833024119, "grad_norm": 6.011612892150879, "learning_rate": 3.4440003088161034e-06, "loss": 0.3291, "step": 13038 }, { "epoch": 0.6047773654916512, "grad_norm": 4.141557216644287, "learning_rate": 3.443300926502041e-06, "loss": 0.2168, "step": 13039 }, { "epoch": 0.6048237476808905, "grad_norm": 7.548096179962158, "learning_rate": 3.442601577913045e-06, "loss": 0.3414, "step": 13040 }, { "epoch": 0.6048701298701299, "grad_norm": 7.742949962615967, "learning_rate": 3.441902263064263e-06, "loss": 0.3137, "step": 13041 }, { "epoch": 0.6049165120593692, "grad_norm": 8.082318305969238, "learning_rate": 3.441202981970847e-06, "loss": 0.3668, "step": 13042 }, { "epoch": 0.6049628942486085, "grad_norm": 11.121709823608398, "learning_rate": 3.440503734647946e-06, "loss": 0.4143, "step": 13043 }, { "epoch": 0.6050092764378479, "grad_norm": 5.675877094268799, "learning_rate": 3.4398045211107083e-06, "loss": 0.2343, "step": 13044 }, { "epoch": 0.6050556586270872, "grad_norm": 6.346436023712158, "learning_rate": 3.4391053413742844e-06, "loss": 0.2487, "step": 13045 }, { "epoch": 0.6051020408163266, "grad_norm": 10.455169677734375, "learning_rate": 3.4384061954538183e-06, "loss": 0.4321, "step": 13046 }, { "epoch": 0.6051484230055658, "grad_norm": 6.325353145599365, "learning_rate": 3.4377070833644597e-06, "loss": 0.337, "step": 13047 }, { "epoch": 0.6051948051948052, "grad_norm": 8.334894180297852, "learning_rate": 3.4370080051213527e-06, "loss": 0.4619, "step": 13048 }, { "epoch": 0.6052411873840445, "grad_norm": 4.73803186416626, "learning_rate": 3.4363089607396422e-06, "loss": 0.3696, "step": 13049 }, { "epoch": 0.6052875695732839, "grad_norm": 4.5391845703125, "learning_rate": 3.435609950234473e-06, "loss": 0.192, "step": 13050 }, { "epoch": 0.6053339517625231, "grad_norm": 4.218226432800293, "learning_rate": 3.4349109736209906e-06, "loss": 0.3042, "step": 13051 }, { "epoch": 0.6053803339517625, "grad_norm": 8.41595458984375, "learning_rate": 3.434212030914337e-06, "loss": 0.3528, "step": 13052 }, { "epoch": 0.6054267161410019, "grad_norm": 5.422752857208252, "learning_rate": 3.4335131221296517e-06, "loss": 0.3228, "step": 13053 }, { "epoch": 0.6054730983302412, "grad_norm": 8.838882446289062, "learning_rate": 3.432814247282079e-06, "loss": 0.4506, "step": 13054 }, { "epoch": 0.6055194805194806, "grad_norm": 5.725833892822266, "learning_rate": 3.432115406386759e-06, "loss": 0.2808, "step": 13055 }, { "epoch": 0.6055658627087198, "grad_norm": 9.188819885253906, "learning_rate": 3.4314165994588327e-06, "loss": 0.3097, "step": 13056 }, { "epoch": 0.6056122448979592, "grad_norm": 4.3823747634887695, "learning_rate": 3.430717826513439e-06, "loss": 0.2669, "step": 13057 }, { "epoch": 0.6056586270871985, "grad_norm": 3.6217010021209717, "learning_rate": 3.4300190875657135e-06, "loss": 0.2757, "step": 13058 }, { "epoch": 0.6057050092764379, "grad_norm": 5.699826240539551, "learning_rate": 3.4293203826307977e-06, "loss": 0.3424, "step": 13059 }, { "epoch": 0.6057513914656771, "grad_norm": 5.69721794128418, "learning_rate": 3.4286217117238267e-06, "loss": 0.2331, "step": 13060 }, { "epoch": 0.6057977736549165, "grad_norm": 10.59990119934082, "learning_rate": 3.427923074859938e-06, "loss": 0.2979, "step": 13061 }, { "epoch": 0.6058441558441559, "grad_norm": 5.259859561920166, "learning_rate": 3.427224472054268e-06, "loss": 0.3641, "step": 13062 }, { "epoch": 0.6058905380333952, "grad_norm": 5.371934413909912, "learning_rate": 3.426525903321949e-06, "loss": 0.299, "step": 13063 }, { "epoch": 0.6059369202226345, "grad_norm": 7.800501823425293, "learning_rate": 3.4258273686781156e-06, "loss": 0.3306, "step": 13064 }, { "epoch": 0.6059833024118738, "grad_norm": 5.7113776206970215, "learning_rate": 3.4251288681379026e-06, "loss": 0.3037, "step": 13065 }, { "epoch": 0.6060296846011132, "grad_norm": 14.901684761047363, "learning_rate": 3.424430401716442e-06, "loss": 0.3561, "step": 13066 }, { "epoch": 0.6060760667903525, "grad_norm": 12.539118766784668, "learning_rate": 3.4237319694288674e-06, "loss": 0.4275, "step": 13067 }, { "epoch": 0.6061224489795919, "grad_norm": 6.2349653244018555, "learning_rate": 3.423033571290306e-06, "loss": 0.3656, "step": 13068 }, { "epoch": 0.6061688311688311, "grad_norm": 4.690145969390869, "learning_rate": 3.422335207315891e-06, "loss": 0.2911, "step": 13069 }, { "epoch": 0.6062152133580705, "grad_norm": 8.67432975769043, "learning_rate": 3.421636877520752e-06, "loss": 0.4106, "step": 13070 }, { "epoch": 0.6062615955473099, "grad_norm": 12.598091125488281, "learning_rate": 3.420938581920017e-06, "loss": 0.5319, "step": 13071 }, { "epoch": 0.6063079777365492, "grad_norm": 10.281116485595703, "learning_rate": 3.420240320528815e-06, "loss": 0.3462, "step": 13072 }, { "epoch": 0.6063543599257885, "grad_norm": 8.89946460723877, "learning_rate": 3.419542093362275e-06, "loss": 0.3818, "step": 13073 }, { "epoch": 0.6064007421150278, "grad_norm": 8.277530670166016, "learning_rate": 3.41884390043552e-06, "loss": 0.3554, "step": 13074 }, { "epoch": 0.6064471243042672, "grad_norm": 12.210362434387207, "learning_rate": 3.4181457417636774e-06, "loss": 0.4933, "step": 13075 }, { "epoch": 0.6064935064935065, "grad_norm": 7.239794731140137, "learning_rate": 3.4174476173618736e-06, "loss": 0.396, "step": 13076 }, { "epoch": 0.6065398886827458, "grad_norm": 6.7927350997924805, "learning_rate": 3.416749527245232e-06, "loss": 0.2953, "step": 13077 }, { "epoch": 0.6065862708719851, "grad_norm": 8.839234352111816, "learning_rate": 3.4160514714288795e-06, "loss": 0.2949, "step": 13078 }, { "epoch": 0.6066326530612245, "grad_norm": 5.464972019195557, "learning_rate": 3.415353449927934e-06, "loss": 0.3569, "step": 13079 }, { "epoch": 0.6066790352504638, "grad_norm": 8.275596618652344, "learning_rate": 3.4146554627575207e-06, "loss": 0.3404, "step": 13080 }, { "epoch": 0.6067254174397032, "grad_norm": 6.862821102142334, "learning_rate": 3.4139575099327605e-06, "loss": 0.2919, "step": 13081 }, { "epoch": 0.6067717996289425, "grad_norm": 6.173007011413574, "learning_rate": 3.413259591468774e-06, "loss": 0.3725, "step": 13082 }, { "epoch": 0.6068181818181818, "grad_norm": 11.319206237792969, "learning_rate": 3.412561707380681e-06, "loss": 0.4321, "step": 13083 }, { "epoch": 0.6068645640074212, "grad_norm": 6.812318801879883, "learning_rate": 3.411863857683604e-06, "loss": 0.3511, "step": 13084 }, { "epoch": 0.6069109461966605, "grad_norm": 3.880749225616455, "learning_rate": 3.4111660423926564e-06, "loss": 0.3136, "step": 13085 }, { "epoch": 0.6069573283858998, "grad_norm": 4.657279968261719, "learning_rate": 3.410468261522959e-06, "loss": 0.2658, "step": 13086 }, { "epoch": 0.6070037105751391, "grad_norm": 8.815763473510742, "learning_rate": 3.409770515089628e-06, "loss": 0.3527, "step": 13087 }, { "epoch": 0.6070500927643785, "grad_norm": 7.3207011222839355, "learning_rate": 3.40907280310778e-06, "loss": 0.4065, "step": 13088 }, { "epoch": 0.6070964749536178, "grad_norm": 6.272680282592773, "learning_rate": 3.4083751255925336e-06, "loss": 0.3208, "step": 13089 }, { "epoch": 0.6071428571428571, "grad_norm": 6.60359001159668, "learning_rate": 3.4076774825589975e-06, "loss": 0.2764, "step": 13090 }, { "epoch": 0.6071892393320965, "grad_norm": 16.480730056762695, "learning_rate": 3.4069798740222886e-06, "loss": 0.335, "step": 13091 }, { "epoch": 0.6072356215213358, "grad_norm": 5.2555389404296875, "learning_rate": 3.406282299997521e-06, "loss": 0.3335, "step": 13092 }, { "epoch": 0.6072820037105752, "grad_norm": 4.6715922355651855, "learning_rate": 3.4055847604998073e-06, "loss": 0.333, "step": 13093 }, { "epoch": 0.6073283858998144, "grad_norm": 13.306699752807617, "learning_rate": 3.404887255544258e-06, "loss": 0.5011, "step": 13094 }, { "epoch": 0.6073747680890538, "grad_norm": 6.840017318725586, "learning_rate": 3.404189785145987e-06, "loss": 0.2798, "step": 13095 }, { "epoch": 0.6074211502782931, "grad_norm": 5.875580787658691, "learning_rate": 3.403492349320101e-06, "loss": 0.3565, "step": 13096 }, { "epoch": 0.6074675324675325, "grad_norm": 4.659041881561279, "learning_rate": 3.4027949480817113e-06, "loss": 0.308, "step": 13097 }, { "epoch": 0.6075139146567718, "grad_norm": 4.7202863693237305, "learning_rate": 3.402097581445926e-06, "loss": 0.3388, "step": 13098 }, { "epoch": 0.6075602968460111, "grad_norm": 5.0940680503845215, "learning_rate": 3.4014002494278542e-06, "loss": 0.3318, "step": 13099 }, { "epoch": 0.6076066790352505, "grad_norm": 7.5531816482543945, "learning_rate": 3.400702952042605e-06, "loss": 0.3349, "step": 13100 }, { "epoch": 0.6076530612244898, "grad_norm": 5.5634636878967285, "learning_rate": 3.4000056893052807e-06, "loss": 0.2677, "step": 13101 }, { "epoch": 0.6076994434137292, "grad_norm": 5.960272789001465, "learning_rate": 3.399308461230989e-06, "loss": 0.3234, "step": 13102 }, { "epoch": 0.6077458256029684, "grad_norm": 8.14973258972168, "learning_rate": 3.3986112678348355e-06, "loss": 0.3099, "step": 13103 }, { "epoch": 0.6077922077922078, "grad_norm": 7.5223236083984375, "learning_rate": 3.397914109131924e-06, "loss": 0.3755, "step": 13104 }, { "epoch": 0.6078385899814471, "grad_norm": 10.976507186889648, "learning_rate": 3.3972169851373603e-06, "loss": 0.4847, "step": 13105 }, { "epoch": 0.6078849721706865, "grad_norm": 6.4541707038879395, "learning_rate": 3.396519895866243e-06, "loss": 0.2734, "step": 13106 }, { "epoch": 0.6079313543599257, "grad_norm": 9.250991821289062, "learning_rate": 3.395822841333677e-06, "loss": 0.4288, "step": 13107 }, { "epoch": 0.6079777365491651, "grad_norm": 7.963510990142822, "learning_rate": 3.395125821554763e-06, "loss": 0.411, "step": 13108 }, { "epoch": 0.6080241187384045, "grad_norm": 4.514041423797607, "learning_rate": 3.394428836544601e-06, "loss": 0.3269, "step": 13109 }, { "epoch": 0.6080705009276438, "grad_norm": 4.704777717590332, "learning_rate": 3.393731886318292e-06, "loss": 0.2718, "step": 13110 }, { "epoch": 0.6081168831168832, "grad_norm": 7.722800254821777, "learning_rate": 3.393034970890936e-06, "loss": 0.3446, "step": 13111 }, { "epoch": 0.6081632653061224, "grad_norm": 13.02091121673584, "learning_rate": 3.392338090277628e-06, "loss": 0.4966, "step": 13112 }, { "epoch": 0.6082096474953618, "grad_norm": 5.368030548095703, "learning_rate": 3.391641244493468e-06, "loss": 0.3129, "step": 13113 }, { "epoch": 0.6082560296846011, "grad_norm": 15.341392517089844, "learning_rate": 3.3909444335535506e-06, "loss": 0.4482, "step": 13114 }, { "epoch": 0.6083024118738405, "grad_norm": 8.58441162109375, "learning_rate": 3.3902476574729744e-06, "loss": 0.3073, "step": 13115 }, { "epoch": 0.6083487940630797, "grad_norm": 10.785262107849121, "learning_rate": 3.3895509162668357e-06, "loss": 0.3891, "step": 13116 }, { "epoch": 0.6083951762523191, "grad_norm": 7.028096675872803, "learning_rate": 3.388854209950224e-06, "loss": 0.3544, "step": 13117 }, { "epoch": 0.6084415584415584, "grad_norm": 9.436888694763184, "learning_rate": 3.388157538538237e-06, "loss": 0.4502, "step": 13118 }, { "epoch": 0.6084879406307978, "grad_norm": 8.00256633758545, "learning_rate": 3.387460902045967e-06, "loss": 0.3062, "step": 13119 }, { "epoch": 0.608534322820037, "grad_norm": 5.93073844909668, "learning_rate": 3.3867643004885055e-06, "loss": 0.2727, "step": 13120 }, { "epoch": 0.6085807050092764, "grad_norm": 34.58613586425781, "learning_rate": 3.3860677338809446e-06, "loss": 0.4368, "step": 13121 }, { "epoch": 0.6086270871985158, "grad_norm": 10.016317367553711, "learning_rate": 3.385371202238377e-06, "loss": 0.3918, "step": 13122 }, { "epoch": 0.6086734693877551, "grad_norm": 5.403956413269043, "learning_rate": 3.3846747055758884e-06, "loss": 0.3176, "step": 13123 }, { "epoch": 0.6087198515769945, "grad_norm": 5.564053535461426, "learning_rate": 3.3839782439085704e-06, "loss": 0.2776, "step": 13124 }, { "epoch": 0.6087662337662337, "grad_norm": 6.543228626251221, "learning_rate": 3.3832818172515115e-06, "loss": 0.3544, "step": 13125 }, { "epoch": 0.6088126159554731, "grad_norm": 4.5395050048828125, "learning_rate": 3.3825854256197994e-06, "loss": 0.2292, "step": 13126 }, { "epoch": 0.6088589981447124, "grad_norm": 6.963812828063965, "learning_rate": 3.3818890690285232e-06, "loss": 0.3196, "step": 13127 }, { "epoch": 0.6089053803339518, "grad_norm": 6.771130084991455, "learning_rate": 3.3811927474927644e-06, "loss": 0.3068, "step": 13128 }, { "epoch": 0.608951762523191, "grad_norm": 6.160604953765869, "learning_rate": 3.380496461027611e-06, "loss": 0.3283, "step": 13129 }, { "epoch": 0.6089981447124304, "grad_norm": 7.540902614593506, "learning_rate": 3.3798002096481475e-06, "loss": 0.4087, "step": 13130 }, { "epoch": 0.6090445269016698, "grad_norm": 8.933931350708008, "learning_rate": 3.379103993369458e-06, "loss": 0.3241, "step": 13131 }, { "epoch": 0.6090909090909091, "grad_norm": 7.730639934539795, "learning_rate": 3.3784078122066253e-06, "loss": 0.3353, "step": 13132 }, { "epoch": 0.6091372912801484, "grad_norm": 9.057631492614746, "learning_rate": 3.3777116661747345e-06, "loss": 0.3626, "step": 13133 }, { "epoch": 0.6091836734693877, "grad_norm": 8.923294067382812, "learning_rate": 3.377015555288863e-06, "loss": 0.3814, "step": 13134 }, { "epoch": 0.6092300556586271, "grad_norm": 5.243546962738037, "learning_rate": 3.376319479564093e-06, "loss": 0.3059, "step": 13135 }, { "epoch": 0.6092764378478664, "grad_norm": 5.024157524108887, "learning_rate": 3.3756234390155053e-06, "loss": 0.2843, "step": 13136 }, { "epoch": 0.6093228200371058, "grad_norm": 4.143901348114014, "learning_rate": 3.3749274336581784e-06, "loss": 0.277, "step": 13137 }, { "epoch": 0.609369202226345, "grad_norm": 4.953906536102295, "learning_rate": 3.3742314635071937e-06, "loss": 0.3045, "step": 13138 }, { "epoch": 0.6094155844155844, "grad_norm": 7.3666768074035645, "learning_rate": 3.3735355285776257e-06, "loss": 0.3956, "step": 13139 }, { "epoch": 0.6094619666048238, "grad_norm": 7.3448381423950195, "learning_rate": 3.372839628884552e-06, "loss": 0.3182, "step": 13140 }, { "epoch": 0.6095083487940631, "grad_norm": 11.9797945022583, "learning_rate": 3.3721437644430493e-06, "loss": 0.4109, "step": 13141 }, { "epoch": 0.6095547309833024, "grad_norm": 7.324395656585693, "learning_rate": 3.371447935268194e-06, "loss": 0.3277, "step": 13142 }, { "epoch": 0.6096011131725417, "grad_norm": 11.41053295135498, "learning_rate": 3.3707521413750593e-06, "loss": 0.2576, "step": 13143 }, { "epoch": 0.6096474953617811, "grad_norm": 3.8179357051849365, "learning_rate": 3.3700563827787224e-06, "loss": 0.2969, "step": 13144 }, { "epoch": 0.6096938775510204, "grad_norm": 5.902023792266846, "learning_rate": 3.369360659494253e-06, "loss": 0.3801, "step": 13145 }, { "epoch": 0.6097402597402597, "grad_norm": 4.829202175140381, "learning_rate": 3.368664971536724e-06, "loss": 0.2222, "step": 13146 }, { "epoch": 0.609786641929499, "grad_norm": 8.934510231018066, "learning_rate": 3.367969318921208e-06, "loss": 0.5133, "step": 13147 }, { "epoch": 0.6098330241187384, "grad_norm": 8.763993263244629, "learning_rate": 3.3672737016627767e-06, "loss": 0.3897, "step": 13148 }, { "epoch": 0.6098794063079778, "grad_norm": 6.014430046081543, "learning_rate": 3.3665781197765e-06, "loss": 0.2863, "step": 13149 }, { "epoch": 0.609925788497217, "grad_norm": 8.711052894592285, "learning_rate": 3.3658825732774457e-06, "loss": 0.3882, "step": 13150 }, { "epoch": 0.6099721706864564, "grad_norm": 6.783475875854492, "learning_rate": 3.365187062180684e-06, "loss": 0.2947, "step": 13151 }, { "epoch": 0.6100185528756957, "grad_norm": 5.1005659103393555, "learning_rate": 3.3644915865012816e-06, "loss": 0.2022, "step": 13152 }, { "epoch": 0.6100649350649351, "grad_norm": 13.11625862121582, "learning_rate": 3.363796146254307e-06, "loss": 0.3404, "step": 13153 }, { "epoch": 0.6101113172541744, "grad_norm": 5.487527847290039, "learning_rate": 3.3631007414548254e-06, "loss": 0.2616, "step": 13154 }, { "epoch": 0.6101576994434137, "grad_norm": 8.1826171875, "learning_rate": 3.362405372117905e-06, "loss": 0.4531, "step": 13155 }, { "epoch": 0.610204081632653, "grad_norm": 5.674679756164551, "learning_rate": 3.3617100382586065e-06, "loss": 0.2727, "step": 13156 }, { "epoch": 0.6102504638218924, "grad_norm": 3.968698501586914, "learning_rate": 3.361014739891996e-06, "loss": 0.2771, "step": 13157 }, { "epoch": 0.6102968460111318, "grad_norm": 9.34423542022705, "learning_rate": 3.360319477033136e-06, "loss": 0.287, "step": 13158 }, { "epoch": 0.610343228200371, "grad_norm": 8.17790412902832, "learning_rate": 3.3596242496970905e-06, "loss": 0.292, "step": 13159 }, { "epoch": 0.6103896103896104, "grad_norm": 4.539457321166992, "learning_rate": 3.358929057898922e-06, "loss": 0.2787, "step": 13160 }, { "epoch": 0.6104359925788497, "grad_norm": 9.598152160644531, "learning_rate": 3.358233901653688e-06, "loss": 0.3434, "step": 13161 }, { "epoch": 0.6104823747680891, "grad_norm": 7.028369426727295, "learning_rate": 3.3575387809764504e-06, "loss": 0.4112, "step": 13162 }, { "epoch": 0.6105287569573283, "grad_norm": 7.600263595581055, "learning_rate": 3.356843695882269e-06, "loss": 0.4038, "step": 13163 }, { "epoch": 0.6105751391465677, "grad_norm": 9.677569389343262, "learning_rate": 3.3561486463862026e-06, "loss": 0.4296, "step": 13164 }, { "epoch": 0.610621521335807, "grad_norm": 7.650371551513672, "learning_rate": 3.3554536325033093e-06, "loss": 0.357, "step": 13165 }, { "epoch": 0.6106679035250464, "grad_norm": 5.542078971862793, "learning_rate": 3.354758654248646e-06, "loss": 0.2648, "step": 13166 }, { "epoch": 0.6107142857142858, "grad_norm": 6.90955114364624, "learning_rate": 3.3540637116372677e-06, "loss": 0.2934, "step": 13167 }, { "epoch": 0.610760667903525, "grad_norm": 8.859430313110352, "learning_rate": 3.3533688046842305e-06, "loss": 0.2995, "step": 13168 }, { "epoch": 0.6108070500927644, "grad_norm": 10.118812561035156, "learning_rate": 3.35267393340459e-06, "loss": 0.3853, "step": 13169 }, { "epoch": 0.6108534322820037, "grad_norm": 7.565817832946777, "learning_rate": 3.3519790978134004e-06, "loss": 0.3293, "step": 13170 }, { "epoch": 0.6108998144712431, "grad_norm": 5.570667266845703, "learning_rate": 3.3512842979257144e-06, "loss": 0.3379, "step": 13171 }, { "epoch": 0.6109461966604823, "grad_norm": 5.377929210662842, "learning_rate": 3.350589533756584e-06, "loss": 0.1923, "step": 13172 }, { "epoch": 0.6109925788497217, "grad_norm": 10.341257095336914, "learning_rate": 3.349894805321061e-06, "loss": 0.3422, "step": 13173 }, { "epoch": 0.611038961038961, "grad_norm": 10.12379264831543, "learning_rate": 3.3492001126341965e-06, "loss": 0.4726, "step": 13174 }, { "epoch": 0.6110853432282004, "grad_norm": 5.642992973327637, "learning_rate": 3.3485054557110424e-06, "loss": 0.2987, "step": 13175 }, { "epoch": 0.6111317254174397, "grad_norm": 7.258047103881836, "learning_rate": 3.3478108345666456e-06, "loss": 0.4538, "step": 13176 }, { "epoch": 0.611178107606679, "grad_norm": 5.797384738922119, "learning_rate": 3.3471162492160557e-06, "loss": 0.3781, "step": 13177 }, { "epoch": 0.6112244897959184, "grad_norm": 7.008035659790039, "learning_rate": 3.3464216996743203e-06, "loss": 0.328, "step": 13178 }, { "epoch": 0.6112708719851577, "grad_norm": 8.199530601501465, "learning_rate": 3.345727185956487e-06, "loss": 0.3605, "step": 13179 }, { "epoch": 0.6113172541743971, "grad_norm": 8.044638633728027, "learning_rate": 3.345032708077602e-06, "loss": 0.3256, "step": 13180 }, { "epoch": 0.6113636363636363, "grad_norm": 11.970528602600098, "learning_rate": 3.344338266052709e-06, "loss": 0.3157, "step": 13181 }, { "epoch": 0.6114100185528757, "grad_norm": 10.356345176696777, "learning_rate": 3.3436438598968563e-06, "loss": 0.2743, "step": 13182 }, { "epoch": 0.611456400742115, "grad_norm": 10.40312385559082, "learning_rate": 3.3429494896250837e-06, "loss": 0.4068, "step": 13183 }, { "epoch": 0.6115027829313544, "grad_norm": 6.355099678039551, "learning_rate": 3.3422551552524372e-06, "loss": 0.2605, "step": 13184 }, { "epoch": 0.6115491651205937, "grad_norm": 4.906591892242432, "learning_rate": 3.3415608567939593e-06, "loss": 0.288, "step": 13185 }, { "epoch": 0.611595547309833, "grad_norm": 8.730217933654785, "learning_rate": 3.340866594264689e-06, "loss": 0.3756, "step": 13186 }, { "epoch": 0.6116419294990724, "grad_norm": 9.54538631439209, "learning_rate": 3.3401723676796703e-06, "loss": 0.3013, "step": 13187 }, { "epoch": 0.6116883116883117, "grad_norm": 7.3992600440979, "learning_rate": 3.3394781770539406e-06, "loss": 0.3805, "step": 13188 }, { "epoch": 0.611734693877551, "grad_norm": 8.36016845703125, "learning_rate": 3.3387840224025414e-06, "loss": 0.4546, "step": 13189 }, { "epoch": 0.6117810760667903, "grad_norm": 8.687457084655762, "learning_rate": 3.3380899037405103e-06, "loss": 0.3835, "step": 13190 }, { "epoch": 0.6118274582560297, "grad_norm": 8.873551368713379, "learning_rate": 3.337395821082884e-06, "loss": 0.3262, "step": 13191 }, { "epoch": 0.611873840445269, "grad_norm": 4.161442279815674, "learning_rate": 3.3367017744446995e-06, "loss": 0.3031, "step": 13192 }, { "epoch": 0.6119202226345084, "grad_norm": 10.69223403930664, "learning_rate": 3.336007763840996e-06, "loss": 0.4227, "step": 13193 }, { "epoch": 0.6119666048237477, "grad_norm": 6.897312641143799, "learning_rate": 3.335313789286805e-06, "loss": 0.4378, "step": 13194 }, { "epoch": 0.612012987012987, "grad_norm": 6.198975086212158, "learning_rate": 3.334619850797164e-06, "loss": 0.3197, "step": 13195 }, { "epoch": 0.6120593692022264, "grad_norm": 11.063921928405762, "learning_rate": 3.333925948387104e-06, "loss": 0.5245, "step": 13196 }, { "epoch": 0.6121057513914657, "grad_norm": 10.388757705688477, "learning_rate": 3.33323208207166e-06, "loss": 0.3426, "step": 13197 }, { "epoch": 0.612152133580705, "grad_norm": 5.866208553314209, "learning_rate": 3.3325382518658644e-06, "loss": 0.3769, "step": 13198 }, { "epoch": 0.6121985157699443, "grad_norm": 8.491291046142578, "learning_rate": 3.3318444577847475e-06, "loss": 0.3263, "step": 13199 }, { "epoch": 0.6122448979591837, "grad_norm": 3.710993528366089, "learning_rate": 3.331150699843342e-06, "loss": 0.2645, "step": 13200 }, { "epoch": 0.612291280148423, "grad_norm": 7.007137298583984, "learning_rate": 3.3304569780566743e-06, "loss": 0.313, "step": 13201 }, { "epoch": 0.6123376623376623, "grad_norm": 6.594139099121094, "learning_rate": 3.3297632924397762e-06, "loss": 0.3941, "step": 13202 }, { "epoch": 0.6123840445269016, "grad_norm": 19.23833465576172, "learning_rate": 3.329069643007675e-06, "loss": 0.4009, "step": 13203 }, { "epoch": 0.612430426716141, "grad_norm": 17.299104690551758, "learning_rate": 3.328376029775401e-06, "loss": 0.4338, "step": 13204 }, { "epoch": 0.6124768089053804, "grad_norm": 6.0370893478393555, "learning_rate": 3.327682452757977e-06, "loss": 0.2692, "step": 13205 }, { "epoch": 0.6125231910946196, "grad_norm": 5.014091968536377, "learning_rate": 3.32698891197043e-06, "loss": 0.2826, "step": 13206 }, { "epoch": 0.612569573283859, "grad_norm": 8.712574005126953, "learning_rate": 3.3262954074277866e-06, "loss": 0.407, "step": 13207 }, { "epoch": 0.6126159554730983, "grad_norm": 5.024024963378906, "learning_rate": 3.3256019391450696e-06, "loss": 0.3703, "step": 13208 }, { "epoch": 0.6126623376623377, "grad_norm": 4.097498416900635, "learning_rate": 3.3249085071373042e-06, "loss": 0.2741, "step": 13209 }, { "epoch": 0.612708719851577, "grad_norm": 5.709216117858887, "learning_rate": 3.324215111419513e-06, "loss": 0.2438, "step": 13210 }, { "epoch": 0.6127551020408163, "grad_norm": 4.1682610511779785, "learning_rate": 3.3235217520067163e-06, "loss": 0.2941, "step": 13211 }, { "epoch": 0.6128014842300556, "grad_norm": 5.3433990478515625, "learning_rate": 3.3228284289139375e-06, "loss": 0.3813, "step": 13212 }, { "epoch": 0.612847866419295, "grad_norm": 4.262756824493408, "learning_rate": 3.3221351421561956e-06, "loss": 0.302, "step": 13213 }, { "epoch": 0.6128942486085344, "grad_norm": 6.263112545013428, "learning_rate": 3.3214418917485104e-06, "loss": 0.28, "step": 13214 }, { "epoch": 0.6129406307977736, "grad_norm": 7.830286979675293, "learning_rate": 3.3207486777059034e-06, "loss": 0.2993, "step": 13215 }, { "epoch": 0.612987012987013, "grad_norm": 6.275429725646973, "learning_rate": 3.3200555000433885e-06, "loss": 0.383, "step": 13216 }, { "epoch": 0.6130333951762523, "grad_norm": 9.508199691772461, "learning_rate": 3.319362358775986e-06, "loss": 0.5102, "step": 13217 }, { "epoch": 0.6130797773654917, "grad_norm": 3.9261858463287354, "learning_rate": 3.3186692539187104e-06, "loss": 0.233, "step": 13218 }, { "epoch": 0.6131261595547309, "grad_norm": 5.075737476348877, "learning_rate": 3.3179761854865787e-06, "loss": 0.3531, "step": 13219 }, { "epoch": 0.6131725417439703, "grad_norm": 17.349382400512695, "learning_rate": 3.3172831534946085e-06, "loss": 0.3417, "step": 13220 }, { "epoch": 0.6132189239332096, "grad_norm": 6.609018802642822, "learning_rate": 3.316590157957809e-06, "loss": 0.305, "step": 13221 }, { "epoch": 0.613265306122449, "grad_norm": 8.530080795288086, "learning_rate": 3.315897198891196e-06, "loss": 0.3298, "step": 13222 }, { "epoch": 0.6133116883116884, "grad_norm": 7.233323097229004, "learning_rate": 3.3152042763097814e-06, "loss": 0.3532, "step": 13223 }, { "epoch": 0.6133580705009276, "grad_norm": 10.348653793334961, "learning_rate": 3.314511390228578e-06, "loss": 0.4988, "step": 13224 }, { "epoch": 0.613404452690167, "grad_norm": 8.055191993713379, "learning_rate": 3.313818540662596e-06, "loss": 0.3709, "step": 13225 }, { "epoch": 0.6134508348794063, "grad_norm": 7.682555198669434, "learning_rate": 3.313125727626848e-06, "loss": 0.3757, "step": 13226 }, { "epoch": 0.6134972170686457, "grad_norm": 8.156807899475098, "learning_rate": 3.3124329511363397e-06, "loss": 0.2563, "step": 13227 }, { "epoch": 0.6135435992578849, "grad_norm": 6.573216915130615, "learning_rate": 3.311740211206081e-06, "loss": 0.2402, "step": 13228 }, { "epoch": 0.6135899814471243, "grad_norm": 6.957387447357178, "learning_rate": 3.3110475078510807e-06, "loss": 0.3512, "step": 13229 }, { "epoch": 0.6136363636363636, "grad_norm": 8.456646919250488, "learning_rate": 3.3103548410863445e-06, "loss": 0.3253, "step": 13230 }, { "epoch": 0.613682745825603, "grad_norm": 8.287455558776855, "learning_rate": 3.3096622109268817e-06, "loss": 0.3807, "step": 13231 }, { "epoch": 0.6137291280148423, "grad_norm": 6.033447265625, "learning_rate": 3.308969617387694e-06, "loss": 0.2963, "step": 13232 }, { "epoch": 0.6137755102040816, "grad_norm": 8.2649564743042, "learning_rate": 3.308277060483787e-06, "loss": 0.3036, "step": 13233 }, { "epoch": 0.613821892393321, "grad_norm": 5.617944717407227, "learning_rate": 3.3075845402301652e-06, "loss": 0.2975, "step": 13234 }, { "epoch": 0.6138682745825603, "grad_norm": 5.6157684326171875, "learning_rate": 3.306892056641833e-06, "loss": 0.3316, "step": 13235 }, { "epoch": 0.6139146567717997, "grad_norm": 16.323253631591797, "learning_rate": 3.30619960973379e-06, "loss": 0.4325, "step": 13236 }, { "epoch": 0.6139610389610389, "grad_norm": 7.037364482879639, "learning_rate": 3.3055071995210412e-06, "loss": 0.3871, "step": 13237 }, { "epoch": 0.6140074211502783, "grad_norm": 5.54096794128418, "learning_rate": 3.304814826018584e-06, "loss": 0.3675, "step": 13238 }, { "epoch": 0.6140538033395176, "grad_norm": 5.123814582824707, "learning_rate": 3.304122489241419e-06, "loss": 0.4134, "step": 13239 }, { "epoch": 0.614100185528757, "grad_norm": 12.25764274597168, "learning_rate": 3.303430189204545e-06, "loss": 0.3011, "step": 13240 }, { "epoch": 0.6141465677179963, "grad_norm": 8.051478385925293, "learning_rate": 3.302737925922962e-06, "loss": 0.3959, "step": 13241 }, { "epoch": 0.6141929499072356, "grad_norm": 10.39344596862793, "learning_rate": 3.3020456994116688e-06, "loss": 0.3905, "step": 13242 }, { "epoch": 0.614239332096475, "grad_norm": 9.521618843078613, "learning_rate": 3.3013535096856576e-06, "loss": 0.2898, "step": 13243 }, { "epoch": 0.6142857142857143, "grad_norm": 7.966215133666992, "learning_rate": 3.3006613567599266e-06, "loss": 0.4033, "step": 13244 }, { "epoch": 0.6143320964749536, "grad_norm": 6.96950101852417, "learning_rate": 3.299969240649471e-06, "loss": 0.3871, "step": 13245 }, { "epoch": 0.6143784786641929, "grad_norm": 8.973722457885742, "learning_rate": 3.2992771613692847e-06, "loss": 0.4675, "step": 13246 }, { "epoch": 0.6144248608534323, "grad_norm": 11.735882759094238, "learning_rate": 3.298585118934362e-06, "loss": 0.4899, "step": 13247 }, { "epoch": 0.6144712430426716, "grad_norm": 7.904469966888428, "learning_rate": 3.297893113359698e-06, "loss": 0.3725, "step": 13248 }, { "epoch": 0.6145176252319109, "grad_norm": 5.833374500274658, "learning_rate": 3.297201144660279e-06, "loss": 0.3264, "step": 13249 }, { "epoch": 0.6145640074211502, "grad_norm": 7.89967679977417, "learning_rate": 3.2965092128510996e-06, "loss": 0.3575, "step": 13250 }, { "epoch": 0.6146103896103896, "grad_norm": 6.677567958831787, "learning_rate": 3.2958173179471496e-06, "loss": 0.3552, "step": 13251 }, { "epoch": 0.614656771799629, "grad_norm": 11.271219253540039, "learning_rate": 3.295125459963418e-06, "loss": 0.3997, "step": 13252 }, { "epoch": 0.6147031539888683, "grad_norm": 8.64830207824707, "learning_rate": 3.294433638914897e-06, "loss": 0.3746, "step": 13253 }, { "epoch": 0.6147495361781076, "grad_norm": 6.813366889953613, "learning_rate": 3.293741854816569e-06, "loss": 0.2519, "step": 13254 }, { "epoch": 0.6147959183673469, "grad_norm": 14.03470516204834, "learning_rate": 3.2930501076834233e-06, "loss": 0.4527, "step": 13255 }, { "epoch": 0.6148423005565863, "grad_norm": 13.401819229125977, "learning_rate": 3.2923583975304474e-06, "loss": 0.3448, "step": 13256 }, { "epoch": 0.6148886827458256, "grad_norm": 7.8691277503967285, "learning_rate": 3.2916667243726265e-06, "loss": 0.3412, "step": 13257 }, { "epoch": 0.6149350649350649, "grad_norm": 7.736851692199707, "learning_rate": 3.2909750882249438e-06, "loss": 0.339, "step": 13258 }, { "epoch": 0.6149814471243042, "grad_norm": 8.545804023742676, "learning_rate": 3.290283489102387e-06, "loss": 0.3321, "step": 13259 }, { "epoch": 0.6150278293135436, "grad_norm": 7.436276435852051, "learning_rate": 3.289591927019935e-06, "loss": 0.3239, "step": 13260 }, { "epoch": 0.615074211502783, "grad_norm": 9.395181655883789, "learning_rate": 3.2889004019925706e-06, "loss": 0.3479, "step": 13261 }, { "epoch": 0.6151205936920222, "grad_norm": 8.737886428833008, "learning_rate": 3.288208914035278e-06, "loss": 0.1667, "step": 13262 }, { "epoch": 0.6151669758812616, "grad_norm": 4.738509654998779, "learning_rate": 3.287517463163036e-06, "loss": 0.3043, "step": 13263 }, { "epoch": 0.6152133580705009, "grad_norm": 18.102310180664062, "learning_rate": 3.286826049390826e-06, "loss": 0.3024, "step": 13264 }, { "epoch": 0.6152597402597403, "grad_norm": 4.047186374664307, "learning_rate": 3.286134672733625e-06, "loss": 0.3196, "step": 13265 }, { "epoch": 0.6153061224489796, "grad_norm": 4.961609363555908, "learning_rate": 3.2854433332064116e-06, "loss": 0.2791, "step": 13266 }, { "epoch": 0.6153525046382189, "grad_norm": 12.827729225158691, "learning_rate": 3.2847520308241645e-06, "loss": 0.3978, "step": 13267 }, { "epoch": 0.6153988868274582, "grad_norm": 4.962608814239502, "learning_rate": 3.2840607656018596e-06, "loss": 0.2621, "step": 13268 }, { "epoch": 0.6154452690166976, "grad_norm": 3.4432694911956787, "learning_rate": 3.283369537554476e-06, "loss": 0.2325, "step": 13269 }, { "epoch": 0.615491651205937, "grad_norm": 9.983844757080078, "learning_rate": 3.2826783466969835e-06, "loss": 0.4523, "step": 13270 }, { "epoch": 0.6155380333951762, "grad_norm": 4.230182647705078, "learning_rate": 3.281987193044358e-06, "loss": 0.2574, "step": 13271 }, { "epoch": 0.6155844155844156, "grad_norm": 6.419891834259033, "learning_rate": 3.2812960766115747e-06, "loss": 0.356, "step": 13272 }, { "epoch": 0.6156307977736549, "grad_norm": 6.885072231292725, "learning_rate": 3.2806049974136056e-06, "loss": 0.3637, "step": 13273 }, { "epoch": 0.6156771799628943, "grad_norm": 11.04282283782959, "learning_rate": 3.2799139554654214e-06, "loss": 0.4424, "step": 13274 }, { "epoch": 0.6157235621521335, "grad_norm": 13.421549797058105, "learning_rate": 3.2792229507819972e-06, "loss": 0.3327, "step": 13275 }, { "epoch": 0.6157699443413729, "grad_norm": 9.705730438232422, "learning_rate": 3.2785319833782973e-06, "loss": 0.2836, "step": 13276 }, { "epoch": 0.6158163265306122, "grad_norm": 8.102256774902344, "learning_rate": 3.2778410532692946e-06, "loss": 0.3625, "step": 13277 }, { "epoch": 0.6158627087198516, "grad_norm": 6.028767108917236, "learning_rate": 3.277150160469956e-06, "loss": 0.2475, "step": 13278 }, { "epoch": 0.615909090909091, "grad_norm": 4.617042064666748, "learning_rate": 3.276459304995252e-06, "loss": 0.3278, "step": 13279 }, { "epoch": 0.6159554730983302, "grad_norm": 8.742852210998535, "learning_rate": 3.275768486860149e-06, "loss": 0.4022, "step": 13280 }, { "epoch": 0.6160018552875696, "grad_norm": 6.8626532554626465, "learning_rate": 3.2750777060796114e-06, "loss": 0.4143, "step": 13281 }, { "epoch": 0.6160482374768089, "grad_norm": 8.595701217651367, "learning_rate": 3.274386962668604e-06, "loss": 0.3707, "step": 13282 }, { "epoch": 0.6160946196660483, "grad_norm": 20.086795806884766, "learning_rate": 3.273696256642094e-06, "loss": 0.3224, "step": 13283 }, { "epoch": 0.6161410018552875, "grad_norm": 7.100000858306885, "learning_rate": 3.2730055880150435e-06, "loss": 0.3392, "step": 13284 }, { "epoch": 0.6161873840445269, "grad_norm": 7.5058417320251465, "learning_rate": 3.2723149568024164e-06, "loss": 0.3438, "step": 13285 }, { "epoch": 0.6162337662337662, "grad_norm": 6.8636698722839355, "learning_rate": 3.271624363019177e-06, "loss": 0.3101, "step": 13286 }, { "epoch": 0.6162801484230056, "grad_norm": 5.071112632751465, "learning_rate": 3.270933806680281e-06, "loss": 0.3587, "step": 13287 }, { "epoch": 0.6163265306122448, "grad_norm": 9.9874906539917, "learning_rate": 3.270243287800693e-06, "loss": 0.4595, "step": 13288 }, { "epoch": 0.6163729128014842, "grad_norm": 5.229462623596191, "learning_rate": 3.269552806395371e-06, "loss": 0.3416, "step": 13289 }, { "epoch": 0.6164192949907236, "grad_norm": 8.735560417175293, "learning_rate": 3.2688623624792747e-06, "loss": 0.273, "step": 13290 }, { "epoch": 0.6164656771799629, "grad_norm": 7.715559959411621, "learning_rate": 3.268171956067365e-06, "loss": 0.4176, "step": 13291 }, { "epoch": 0.6165120593692023, "grad_norm": 4.935143947601318, "learning_rate": 3.2674815871745945e-06, "loss": 0.2945, "step": 13292 }, { "epoch": 0.6165584415584415, "grad_norm": 5.412637710571289, "learning_rate": 3.266791255815921e-06, "loss": 0.3588, "step": 13293 }, { "epoch": 0.6166048237476809, "grad_norm": 8.710565567016602, "learning_rate": 3.266100962006301e-06, "loss": 0.3398, "step": 13294 }, { "epoch": 0.6166512059369202, "grad_norm": 5.877878665924072, "learning_rate": 3.2654107057606892e-06, "loss": 0.3518, "step": 13295 }, { "epoch": 0.6166975881261596, "grad_norm": 6.394583225250244, "learning_rate": 3.26472048709404e-06, "loss": 0.3425, "step": 13296 }, { "epoch": 0.6167439703153988, "grad_norm": 5.43604040145874, "learning_rate": 3.2640303060213076e-06, "loss": 0.2797, "step": 13297 }, { "epoch": 0.6167903525046382, "grad_norm": 4.785632133483887, "learning_rate": 3.263340162557441e-06, "loss": 0.1926, "step": 13298 }, { "epoch": 0.6168367346938776, "grad_norm": 5.670657634735107, "learning_rate": 3.2626500567173935e-06, "loss": 0.3287, "step": 13299 }, { "epoch": 0.6168831168831169, "grad_norm": 9.343486785888672, "learning_rate": 3.2619599885161166e-06, "loss": 0.4607, "step": 13300 }, { "epoch": 0.6169294990723562, "grad_norm": 7.553284168243408, "learning_rate": 3.26126995796856e-06, "loss": 0.4164, "step": 13301 }, { "epoch": 0.6169758812615955, "grad_norm": 6.082647800445557, "learning_rate": 3.260579965089674e-06, "loss": 0.3171, "step": 13302 }, { "epoch": 0.6170222634508349, "grad_norm": 5.5162177085876465, "learning_rate": 3.259890009894404e-06, "loss": 0.2715, "step": 13303 }, { "epoch": 0.6170686456400742, "grad_norm": 8.587478637695312, "learning_rate": 3.2592000923976997e-06, "loss": 0.312, "step": 13304 }, { "epoch": 0.6171150278293135, "grad_norm": 11.163479804992676, "learning_rate": 3.2585102126145063e-06, "loss": 0.3891, "step": 13305 }, { "epoch": 0.6171614100185528, "grad_norm": 6.274506092071533, "learning_rate": 3.257820370559771e-06, "loss": 0.2931, "step": 13306 }, { "epoch": 0.6172077922077922, "grad_norm": 6.353775501251221, "learning_rate": 3.2571305662484392e-06, "loss": 0.3187, "step": 13307 }, { "epoch": 0.6172541743970316, "grad_norm": 6.895289897918701, "learning_rate": 3.256440799695455e-06, "loss": 0.3001, "step": 13308 }, { "epoch": 0.6173005565862709, "grad_norm": 6.987936973571777, "learning_rate": 3.25575107091576e-06, "loss": 0.3429, "step": 13309 }, { "epoch": 0.6173469387755102, "grad_norm": 6.266780376434326, "learning_rate": 3.255061379924298e-06, "loss": 0.3352, "step": 13310 }, { "epoch": 0.6173933209647495, "grad_norm": 5.613714694976807, "learning_rate": 3.2543717267360105e-06, "loss": 0.3432, "step": 13311 }, { "epoch": 0.6174397031539889, "grad_norm": 7.525647163391113, "learning_rate": 3.25368211136584e-06, "loss": 0.3349, "step": 13312 }, { "epoch": 0.6174860853432282, "grad_norm": 7.812268257141113, "learning_rate": 3.252992533828726e-06, "loss": 0.3123, "step": 13313 }, { "epoch": 0.6175324675324675, "grad_norm": 4.146610736846924, "learning_rate": 3.2523029941396057e-06, "loss": 0.3214, "step": 13314 }, { "epoch": 0.6175788497217068, "grad_norm": 6.340737342834473, "learning_rate": 3.251613492313419e-06, "loss": 0.3408, "step": 13315 }, { "epoch": 0.6176252319109462, "grad_norm": 7.64471960067749, "learning_rate": 3.2509240283651047e-06, "loss": 0.3766, "step": 13316 }, { "epoch": 0.6176716141001856, "grad_norm": 11.494034767150879, "learning_rate": 3.2502346023095988e-06, "loss": 0.3482, "step": 13317 }, { "epoch": 0.6177179962894248, "grad_norm": 6.1057634353637695, "learning_rate": 3.2495452141618374e-06, "loss": 0.2927, "step": 13318 }, { "epoch": 0.6177643784786642, "grad_norm": 11.369545936584473, "learning_rate": 3.248855863936756e-06, "loss": 0.3642, "step": 13319 }, { "epoch": 0.6178107606679035, "grad_norm": 5.515601634979248, "learning_rate": 3.2481665516492876e-06, "loss": 0.2898, "step": 13320 }, { "epoch": 0.6178571428571429, "grad_norm": 5.9006195068359375, "learning_rate": 3.247477277314367e-06, "loss": 0.36, "step": 13321 }, { "epoch": 0.6179035250463822, "grad_norm": 7.476559638977051, "learning_rate": 3.2467880409469276e-06, "loss": 0.2559, "step": 13322 }, { "epoch": 0.6179499072356215, "grad_norm": 4.503598213195801, "learning_rate": 3.2460988425619e-06, "loss": 0.3568, "step": 13323 }, { "epoch": 0.6179962894248608, "grad_norm": 11.160470962524414, "learning_rate": 3.245409682174217e-06, "loss": 0.3826, "step": 13324 }, { "epoch": 0.6180426716141002, "grad_norm": 5.198797225952148, "learning_rate": 3.2447205597988065e-06, "loss": 0.2562, "step": 13325 }, { "epoch": 0.6180890538033396, "grad_norm": 5.844056129455566, "learning_rate": 3.244031475450599e-06, "loss": 0.3216, "step": 13326 }, { "epoch": 0.6181354359925788, "grad_norm": 4.653972625732422, "learning_rate": 3.243342429144525e-06, "loss": 0.3958, "step": 13327 }, { "epoch": 0.6181818181818182, "grad_norm": 7.462268352508545, "learning_rate": 3.24265342089551e-06, "loss": 0.3413, "step": 13328 }, { "epoch": 0.6182282003710575, "grad_norm": 5.225761890411377, "learning_rate": 3.2419644507184805e-06, "loss": 0.3271, "step": 13329 }, { "epoch": 0.6182745825602969, "grad_norm": 4.673242092132568, "learning_rate": 3.2412755186283656e-06, "loss": 0.247, "step": 13330 }, { "epoch": 0.6183209647495361, "grad_norm": 5.839203357696533, "learning_rate": 3.240586624640088e-06, "loss": 0.1978, "step": 13331 }, { "epoch": 0.6183673469387755, "grad_norm": 11.526618003845215, "learning_rate": 3.239897768768574e-06, "loss": 0.3294, "step": 13332 }, { "epoch": 0.6184137291280148, "grad_norm": 7.528426170349121, "learning_rate": 3.2392089510287456e-06, "loss": 0.3099, "step": 13333 }, { "epoch": 0.6184601113172542, "grad_norm": 5.628584861755371, "learning_rate": 3.238520171435526e-06, "loss": 0.3044, "step": 13334 }, { "epoch": 0.6185064935064936, "grad_norm": 4.878310203552246, "learning_rate": 3.23783143000384e-06, "loss": 0.202, "step": 13335 }, { "epoch": 0.6185528756957328, "grad_norm": 5.237517833709717, "learning_rate": 3.2371427267486044e-06, "loss": 0.273, "step": 13336 }, { "epoch": 0.6185992578849722, "grad_norm": 7.907690048217773, "learning_rate": 3.2364540616847427e-06, "loss": 0.398, "step": 13337 }, { "epoch": 0.6186456400742115, "grad_norm": 4.0814361572265625, "learning_rate": 3.2357654348271744e-06, "loss": 0.2203, "step": 13338 }, { "epoch": 0.6186920222634509, "grad_norm": 8.026561737060547, "learning_rate": 3.2350768461908164e-06, "loss": 0.4302, "step": 13339 }, { "epoch": 0.6187384044526901, "grad_norm": 8.895963668823242, "learning_rate": 3.234388295790587e-06, "loss": 0.3327, "step": 13340 }, { "epoch": 0.6187847866419295, "grad_norm": 4.842934608459473, "learning_rate": 3.2336997836414058e-06, "loss": 0.3167, "step": 13341 }, { "epoch": 0.6188311688311688, "grad_norm": 6.125999450683594, "learning_rate": 3.233011309758186e-06, "loss": 0.3569, "step": 13342 }, { "epoch": 0.6188775510204082, "grad_norm": 8.08406925201416, "learning_rate": 3.2323228741558445e-06, "loss": 0.3043, "step": 13343 }, { "epoch": 0.6189239332096474, "grad_norm": 9.538084983825684, "learning_rate": 3.231634476849295e-06, "loss": 0.2689, "step": 13344 }, { "epoch": 0.6189703153988868, "grad_norm": 5.163466453552246, "learning_rate": 3.230946117853452e-06, "loss": 0.2573, "step": 13345 }, { "epoch": 0.6190166975881262, "grad_norm": 6.5482635498046875, "learning_rate": 3.2302577971832292e-06, "loss": 0.4171, "step": 13346 }, { "epoch": 0.6190630797773655, "grad_norm": 8.260809898376465, "learning_rate": 3.2295695148535366e-06, "loss": 0.1303, "step": 13347 }, { "epoch": 0.6191094619666048, "grad_norm": 6.020454406738281, "learning_rate": 3.228881270879288e-06, "loss": 0.3256, "step": 13348 }, { "epoch": 0.6191558441558441, "grad_norm": 8.141413688659668, "learning_rate": 3.228193065275391e-06, "loss": 0.2347, "step": 13349 }, { "epoch": 0.6192022263450835, "grad_norm": 6.281895160675049, "learning_rate": 3.227504898056757e-06, "loss": 0.3275, "step": 13350 }, { "epoch": 0.6192486085343228, "grad_norm": 11.298114776611328, "learning_rate": 3.2268167692382957e-06, "loss": 0.472, "step": 13351 }, { "epoch": 0.6192949907235622, "grad_norm": 7.424582004547119, "learning_rate": 3.2261286788349127e-06, "loss": 0.2981, "step": 13352 }, { "epoch": 0.6193413729128014, "grad_norm": 13.280461311340332, "learning_rate": 3.2254406268615175e-06, "loss": 0.4171, "step": 13353 }, { "epoch": 0.6193877551020408, "grad_norm": 7.132042407989502, "learning_rate": 3.2247526133330135e-06, "loss": 0.2677, "step": 13354 }, { "epoch": 0.6194341372912802, "grad_norm": 5.138113498687744, "learning_rate": 3.224064638264308e-06, "loss": 0.3426, "step": 13355 }, { "epoch": 0.6194805194805195, "grad_norm": 7.418939113616943, "learning_rate": 3.2233767016703055e-06, "loss": 0.3095, "step": 13356 }, { "epoch": 0.6195269016697588, "grad_norm": 7.972845077514648, "learning_rate": 3.222688803565911e-06, "loss": 0.3475, "step": 13357 }, { "epoch": 0.6195732838589981, "grad_norm": 5.5315775871276855, "learning_rate": 3.222000943966026e-06, "loss": 0.4192, "step": 13358 }, { "epoch": 0.6196196660482375, "grad_norm": 9.714696884155273, "learning_rate": 3.221313122885551e-06, "loss": 0.2561, "step": 13359 }, { "epoch": 0.6196660482374768, "grad_norm": 8.119294166564941, "learning_rate": 3.22062534033939e-06, "loss": 0.3711, "step": 13360 }, { "epoch": 0.6197124304267161, "grad_norm": 7.346150875091553, "learning_rate": 3.2199375963424417e-06, "loss": 0.3653, "step": 13361 }, { "epoch": 0.6197588126159554, "grad_norm": 7.763154983520508, "learning_rate": 3.2192498909096083e-06, "loss": 0.4353, "step": 13362 }, { "epoch": 0.6198051948051948, "grad_norm": 5.283623218536377, "learning_rate": 3.218562224055786e-06, "loss": 0.2984, "step": 13363 }, { "epoch": 0.6198515769944342, "grad_norm": 4.356685638427734, "learning_rate": 3.2178745957958734e-06, "loss": 0.3275, "step": 13364 }, { "epoch": 0.6198979591836735, "grad_norm": 5.321403503417969, "learning_rate": 3.2171870061447668e-06, "loss": 0.3097, "step": 13365 }, { "epoch": 0.6199443413729128, "grad_norm": 9.111971855163574, "learning_rate": 3.216499455117364e-06, "loss": 0.3488, "step": 13366 }, { "epoch": 0.6199907235621521, "grad_norm": 5.679532527923584, "learning_rate": 3.2158119427285594e-06, "loss": 0.3425, "step": 13367 }, { "epoch": 0.6200371057513915, "grad_norm": 6.779538154602051, "learning_rate": 3.2151244689932505e-06, "loss": 0.306, "step": 13368 }, { "epoch": 0.6200834879406308, "grad_norm": 12.761025428771973, "learning_rate": 3.214437033926326e-06, "loss": 0.367, "step": 13369 }, { "epoch": 0.6201298701298701, "grad_norm": 9.827469825744629, "learning_rate": 3.2137496375426815e-06, "loss": 0.3747, "step": 13370 }, { "epoch": 0.6201762523191094, "grad_norm": 9.602255821228027, "learning_rate": 3.213062279857209e-06, "loss": 0.3128, "step": 13371 }, { "epoch": 0.6202226345083488, "grad_norm": 8.897933959960938, "learning_rate": 3.2123749608847998e-06, "loss": 0.2158, "step": 13372 }, { "epoch": 0.6202690166975882, "grad_norm": 9.125386238098145, "learning_rate": 3.211687680640346e-06, "loss": 0.4111, "step": 13373 }, { "epoch": 0.6203153988868274, "grad_norm": 5.197535037994385, "learning_rate": 3.211000439138734e-06, "loss": 0.4322, "step": 13374 }, { "epoch": 0.6203617810760668, "grad_norm": 7.332177639007568, "learning_rate": 3.210313236394853e-06, "loss": 0.3791, "step": 13375 }, { "epoch": 0.6204081632653061, "grad_norm": 8.29601764678955, "learning_rate": 3.209626072423592e-06, "loss": 0.3563, "step": 13376 }, { "epoch": 0.6204545454545455, "grad_norm": 16.01169776916504, "learning_rate": 3.208938947239838e-06, "loss": 0.4823, "step": 13377 }, { "epoch": 0.6205009276437848, "grad_norm": 6.185399055480957, "learning_rate": 3.2082518608584773e-06, "loss": 0.2555, "step": 13378 }, { "epoch": 0.6205473098330241, "grad_norm": 6.711388111114502, "learning_rate": 3.2075648132943966e-06, "loss": 0.399, "step": 13379 }, { "epoch": 0.6205936920222634, "grad_norm": 7.478482723236084, "learning_rate": 3.206877804562477e-06, "loss": 0.2518, "step": 13380 }, { "epoch": 0.6206400742115028, "grad_norm": 10.741450309753418, "learning_rate": 3.206190834677604e-06, "loss": 0.4652, "step": 13381 }, { "epoch": 0.6206864564007422, "grad_norm": 14.807498931884766, "learning_rate": 3.20550390365466e-06, "loss": 0.5545, "step": 13382 }, { "epoch": 0.6207328385899814, "grad_norm": 10.130739212036133, "learning_rate": 3.2048170115085274e-06, "loss": 0.4023, "step": 13383 }, { "epoch": 0.6207792207792208, "grad_norm": 9.595431327819824, "learning_rate": 3.2041301582540903e-06, "loss": 0.3786, "step": 13384 }, { "epoch": 0.6208256029684601, "grad_norm": 8.323016166687012, "learning_rate": 3.2034433439062235e-06, "loss": 0.3074, "step": 13385 }, { "epoch": 0.6208719851576995, "grad_norm": 4.188810348510742, "learning_rate": 3.202756568479809e-06, "loss": 0.3006, "step": 13386 }, { "epoch": 0.6209183673469387, "grad_norm": 10.144027709960938, "learning_rate": 3.202069831989726e-06, "loss": 0.3175, "step": 13387 }, { "epoch": 0.6209647495361781, "grad_norm": 11.449100494384766, "learning_rate": 3.2013831344508506e-06, "loss": 0.3716, "step": 13388 }, { "epoch": 0.6210111317254174, "grad_norm": 5.065433979034424, "learning_rate": 3.2006964758780624e-06, "loss": 0.3298, "step": 13389 }, { "epoch": 0.6210575139146568, "grad_norm": 8.240955352783203, "learning_rate": 3.200009856286237e-06, "loss": 0.325, "step": 13390 }, { "epoch": 0.6211038961038962, "grad_norm": 38.27627182006836, "learning_rate": 3.199323275690247e-06, "loss": 0.4846, "step": 13391 }, { "epoch": 0.6211502782931354, "grad_norm": 5.475429058074951, "learning_rate": 3.1986367341049684e-06, "loss": 0.2904, "step": 13392 }, { "epoch": 0.6211966604823748, "grad_norm": 10.553801536560059, "learning_rate": 3.197950231545275e-06, "loss": 0.3948, "step": 13393 }, { "epoch": 0.6212430426716141, "grad_norm": 6.094082832336426, "learning_rate": 3.1972637680260388e-06, "loss": 0.3129, "step": 13394 }, { "epoch": 0.6212894248608535, "grad_norm": 9.13333511352539, "learning_rate": 3.1965773435621352e-06, "loss": 0.3416, "step": 13395 }, { "epoch": 0.6213358070500927, "grad_norm": 13.54903793334961, "learning_rate": 3.1958909581684293e-06, "loss": 0.3419, "step": 13396 }, { "epoch": 0.6213821892393321, "grad_norm": 6.796904563903809, "learning_rate": 3.1952046118597946e-06, "loss": 0.3, "step": 13397 }, { "epoch": 0.6214285714285714, "grad_norm": 9.026638984680176, "learning_rate": 3.1945183046511002e-06, "loss": 0.385, "step": 13398 }, { "epoch": 0.6214749536178108, "grad_norm": 5.443242073059082, "learning_rate": 3.1938320365572147e-06, "loss": 0.3391, "step": 13399 }, { "epoch": 0.62152133580705, "grad_norm": 9.996586799621582, "learning_rate": 3.1931458075930046e-06, "loss": 0.3406, "step": 13400 }, { "epoch": 0.6215677179962894, "grad_norm": 7.400210857391357, "learning_rate": 3.19245961777334e-06, "loss": 0.284, "step": 13401 }, { "epoch": 0.6216141001855288, "grad_norm": 6.076174259185791, "learning_rate": 3.191773467113083e-06, "loss": 0.3155, "step": 13402 }, { "epoch": 0.6216604823747681, "grad_norm": 8.65312671661377, "learning_rate": 3.1910873556270994e-06, "loss": 0.4145, "step": 13403 }, { "epoch": 0.6217068645640074, "grad_norm": 5.192117691040039, "learning_rate": 3.1904012833302544e-06, "loss": 0.3158, "step": 13404 }, { "epoch": 0.6217532467532467, "grad_norm": 7.581177711486816, "learning_rate": 3.189715250237411e-06, "loss": 0.4245, "step": 13405 }, { "epoch": 0.6217996289424861, "grad_norm": 9.951995849609375, "learning_rate": 3.189029256363434e-06, "loss": 0.3052, "step": 13406 }, { "epoch": 0.6218460111317254, "grad_norm": 7.275083065032959, "learning_rate": 3.188343301723181e-06, "loss": 0.2939, "step": 13407 }, { "epoch": 0.6218923933209648, "grad_norm": 7.689898490905762, "learning_rate": 3.187657386331514e-06, "loss": 0.2957, "step": 13408 }, { "epoch": 0.621938775510204, "grad_norm": 15.250853538513184, "learning_rate": 3.186971510203295e-06, "loss": 0.6495, "step": 13409 }, { "epoch": 0.6219851576994434, "grad_norm": 8.70382308959961, "learning_rate": 3.1862856733533808e-06, "loss": 0.4012, "step": 13410 }, { "epoch": 0.6220315398886828, "grad_norm": 5.145735263824463, "learning_rate": 3.185599875796632e-06, "loss": 0.2646, "step": 13411 }, { "epoch": 0.6220779220779221, "grad_norm": 4.611215114593506, "learning_rate": 3.1849141175479063e-06, "loss": 0.2766, "step": 13412 }, { "epoch": 0.6221243042671614, "grad_norm": 6.1982808113098145, "learning_rate": 3.1842283986220566e-06, "loss": 0.2988, "step": 13413 }, { "epoch": 0.6221706864564007, "grad_norm": 9.028573036193848, "learning_rate": 3.183542719033942e-06, "loss": 0.2682, "step": 13414 }, { "epoch": 0.6222170686456401, "grad_norm": 12.060515403747559, "learning_rate": 3.182857078798416e-06, "loss": 0.4036, "step": 13415 }, { "epoch": 0.6222634508348794, "grad_norm": 12.160856246948242, "learning_rate": 3.182171477930332e-06, "loss": 0.3914, "step": 13416 }, { "epoch": 0.6223098330241187, "grad_norm": 5.635605335235596, "learning_rate": 3.1814859164445467e-06, "loss": 0.3113, "step": 13417 }, { "epoch": 0.622356215213358, "grad_norm": 10.053163528442383, "learning_rate": 3.180800394355908e-06, "loss": 0.3263, "step": 13418 }, { "epoch": 0.6224025974025974, "grad_norm": 9.232698440551758, "learning_rate": 3.180114911679269e-06, "loss": 0.4494, "step": 13419 }, { "epoch": 0.6224489795918368, "grad_norm": 5.357819557189941, "learning_rate": 3.1794294684294806e-06, "loss": 0.3307, "step": 13420 }, { "epoch": 0.6224953617810761, "grad_norm": 4.679882526397705, "learning_rate": 3.178744064621393e-06, "loss": 0.3717, "step": 13421 }, { "epoch": 0.6225417439703154, "grad_norm": 10.966022491455078, "learning_rate": 3.178058700269854e-06, "loss": 0.2896, "step": 13422 }, { "epoch": 0.6225881261595547, "grad_norm": 19.824195861816406, "learning_rate": 3.177373375389714e-06, "loss": 0.4565, "step": 13423 }, { "epoch": 0.6226345083487941, "grad_norm": 9.123810768127441, "learning_rate": 3.1766880899958176e-06, "loss": 0.3434, "step": 13424 }, { "epoch": 0.6226808905380334, "grad_norm": 4.048943042755127, "learning_rate": 3.176002844103011e-06, "loss": 0.25, "step": 13425 }, { "epoch": 0.6227272727272727, "grad_norm": 4.213907718658447, "learning_rate": 3.175317637726141e-06, "loss": 0.2562, "step": 13426 }, { "epoch": 0.622773654916512, "grad_norm": 6.002880096435547, "learning_rate": 3.1746324708800523e-06, "loss": 0.2919, "step": 13427 }, { "epoch": 0.6228200371057514, "grad_norm": 7.283458232879639, "learning_rate": 3.1739473435795896e-06, "loss": 0.4479, "step": 13428 }, { "epoch": 0.6228664192949908, "grad_norm": 6.098235607147217, "learning_rate": 3.1732622558395936e-06, "loss": 0.311, "step": 13429 }, { "epoch": 0.62291280148423, "grad_norm": 5.532379150390625, "learning_rate": 3.172577207674907e-06, "loss": 0.3908, "step": 13430 }, { "epoch": 0.6229591836734694, "grad_norm": 7.171616554260254, "learning_rate": 3.171892199100371e-06, "loss": 0.3315, "step": 13431 }, { "epoch": 0.6230055658627087, "grad_norm": 5.791990280151367, "learning_rate": 3.171207230130826e-06, "loss": 0.3579, "step": 13432 }, { "epoch": 0.6230519480519481, "grad_norm": 9.900988578796387, "learning_rate": 3.1705223007811147e-06, "loss": 0.3285, "step": 13433 }, { "epoch": 0.6230983302411874, "grad_norm": 10.923258781433105, "learning_rate": 3.1698374110660703e-06, "loss": 0.3939, "step": 13434 }, { "epoch": 0.6231447124304267, "grad_norm": 5.989734649658203, "learning_rate": 3.1691525610005326e-06, "loss": 0.3853, "step": 13435 }, { "epoch": 0.623191094619666, "grad_norm": 6.5614237785339355, "learning_rate": 3.16846775059934e-06, "loss": 0.3033, "step": 13436 }, { "epoch": 0.6232374768089054, "grad_norm": 5.759059429168701, "learning_rate": 3.1677829798773264e-06, "loss": 0.343, "step": 13437 }, { "epoch": 0.6232838589981448, "grad_norm": 11.019227027893066, "learning_rate": 3.167098248849329e-06, "loss": 0.3759, "step": 13438 }, { "epoch": 0.623330241187384, "grad_norm": 4.878030300140381, "learning_rate": 3.166413557530183e-06, "loss": 0.2885, "step": 13439 }, { "epoch": 0.6233766233766234, "grad_norm": 8.326549530029297, "learning_rate": 3.1657289059347184e-06, "loss": 0.3444, "step": 13440 }, { "epoch": 0.6234230055658627, "grad_norm": 14.635662078857422, "learning_rate": 3.165044294077769e-06, "loss": 0.3267, "step": 13441 }, { "epoch": 0.6234693877551021, "grad_norm": 8.54928970336914, "learning_rate": 3.164359721974167e-06, "loss": 0.3192, "step": 13442 }, { "epoch": 0.6235157699443413, "grad_norm": 7.529666900634766, "learning_rate": 3.1636751896387446e-06, "loss": 0.344, "step": 13443 }, { "epoch": 0.6235621521335807, "grad_norm": 11.730223655700684, "learning_rate": 3.162990697086332e-06, "loss": 0.2977, "step": 13444 }, { "epoch": 0.62360853432282, "grad_norm": 10.645376205444336, "learning_rate": 3.1623062443317553e-06, "loss": 0.3932, "step": 13445 }, { "epoch": 0.6236549165120594, "grad_norm": 6.424900054931641, "learning_rate": 3.161621831389844e-06, "loss": 0.3423, "step": 13446 }, { "epoch": 0.6237012987012988, "grad_norm": 6.030002117156982, "learning_rate": 3.160937458275426e-06, "loss": 0.3156, "step": 13447 }, { "epoch": 0.623747680890538, "grad_norm": 5.168900012969971, "learning_rate": 3.1602531250033286e-06, "loss": 0.2501, "step": 13448 }, { "epoch": 0.6237940630797774, "grad_norm": 5.607947826385498, "learning_rate": 3.1595688315883765e-06, "loss": 0.3835, "step": 13449 }, { "epoch": 0.6238404452690167, "grad_norm": 6.482878684997559, "learning_rate": 3.158884578045397e-06, "loss": 0.3483, "step": 13450 }, { "epoch": 0.6238868274582561, "grad_norm": 6.745988845825195, "learning_rate": 3.15820036438921e-06, "loss": 0.3304, "step": 13451 }, { "epoch": 0.6239332096474953, "grad_norm": 7.768876075744629, "learning_rate": 3.1575161906346406e-06, "loss": 0.3722, "step": 13452 }, { "epoch": 0.6239795918367347, "grad_norm": 4.8099164962768555, "learning_rate": 3.1568320567965115e-06, "loss": 0.25, "step": 13453 }, { "epoch": 0.624025974025974, "grad_norm": 6.448056697845459, "learning_rate": 3.156147962889643e-06, "loss": 0.2546, "step": 13454 }, { "epoch": 0.6240723562152134, "grad_norm": 8.550522804260254, "learning_rate": 3.155463908928859e-06, "loss": 0.2813, "step": 13455 }, { "epoch": 0.6241187384044526, "grad_norm": 6.02726411819458, "learning_rate": 3.1547798949289745e-06, "loss": 0.2994, "step": 13456 }, { "epoch": 0.624165120593692, "grad_norm": 11.377705574035645, "learning_rate": 3.15409592090481e-06, "loss": 0.3029, "step": 13457 }, { "epoch": 0.6242115027829314, "grad_norm": 6.386152267456055, "learning_rate": 3.153411986871184e-06, "loss": 0.3832, "step": 13458 }, { "epoch": 0.6242578849721707, "grad_norm": 5.363919258117676, "learning_rate": 3.152728092842913e-06, "loss": 0.3073, "step": 13459 }, { "epoch": 0.62430426716141, "grad_norm": 7.930418014526367, "learning_rate": 3.1520442388348147e-06, "loss": 0.2968, "step": 13460 }, { "epoch": 0.6243506493506493, "grad_norm": 8.610095977783203, "learning_rate": 3.1513604248617036e-06, "loss": 0.3986, "step": 13461 }, { "epoch": 0.6243970315398887, "grad_norm": 4.539948463439941, "learning_rate": 3.150676650938392e-06, "loss": 0.2435, "step": 13462 }, { "epoch": 0.624443413729128, "grad_norm": 8.116302490234375, "learning_rate": 3.149992917079696e-06, "loss": 0.2298, "step": 13463 }, { "epoch": 0.6244897959183674, "grad_norm": 6.071873188018799, "learning_rate": 3.149309223300428e-06, "loss": 0.2042, "step": 13464 }, { "epoch": 0.6245361781076066, "grad_norm": 4.512791156768799, "learning_rate": 3.1486255696154e-06, "loss": 0.3105, "step": 13465 }, { "epoch": 0.624582560296846, "grad_norm": 6.950628280639648, "learning_rate": 3.147941956039423e-06, "loss": 0.2994, "step": 13466 }, { "epoch": 0.6246289424860854, "grad_norm": 6.0511274337768555, "learning_rate": 3.147258382587305e-06, "loss": 0.3932, "step": 13467 }, { "epoch": 0.6246753246753247, "grad_norm": 8.991850852966309, "learning_rate": 3.1465748492738574e-06, "loss": 0.3077, "step": 13468 }, { "epoch": 0.624721706864564, "grad_norm": 11.586185455322266, "learning_rate": 3.145891356113888e-06, "loss": 0.4131, "step": 13469 }, { "epoch": 0.6247680890538033, "grad_norm": 6.3385419845581055, "learning_rate": 3.1452079031222056e-06, "loss": 0.3486, "step": 13470 }, { "epoch": 0.6248144712430427, "grad_norm": 5.930183410644531, "learning_rate": 3.144524490313614e-06, "loss": 0.3296, "step": 13471 }, { "epoch": 0.624860853432282, "grad_norm": 8.428139686584473, "learning_rate": 3.143841117702923e-06, "loss": 0.3946, "step": 13472 }, { "epoch": 0.6249072356215213, "grad_norm": 5.360245704650879, "learning_rate": 3.1431577853049326e-06, "loss": 0.31, "step": 13473 }, { "epoch": 0.6249536178107606, "grad_norm": 5.789858341217041, "learning_rate": 3.1424744931344505e-06, "loss": 0.3292, "step": 13474 }, { "epoch": 0.625, "grad_norm": 6.860095500946045, "learning_rate": 3.141791241206279e-06, "loss": 0.4222, "step": 13475 }, { "epoch": 0.6250463821892394, "grad_norm": 5.7855753898620605, "learning_rate": 3.141108029535219e-06, "loss": 0.2926, "step": 13476 }, { "epoch": 0.6250927643784787, "grad_norm": 4.423130512237549, "learning_rate": 3.140424858136075e-06, "loss": 0.2198, "step": 13477 }, { "epoch": 0.625139146567718, "grad_norm": 5.330297470092773, "learning_rate": 3.139741727023644e-06, "loss": 0.2742, "step": 13478 }, { "epoch": 0.6251855287569573, "grad_norm": 6.722419261932373, "learning_rate": 3.1390586362127277e-06, "loss": 0.3036, "step": 13479 }, { "epoch": 0.6252319109461967, "grad_norm": 5.229659080505371, "learning_rate": 3.1383755857181253e-06, "loss": 0.2966, "step": 13480 }, { "epoch": 0.625278293135436, "grad_norm": 8.197096824645996, "learning_rate": 3.137692575554633e-06, "loss": 0.2762, "step": 13481 }, { "epoch": 0.6253246753246753, "grad_norm": 5.51536750793457, "learning_rate": 3.1370096057370493e-06, "loss": 0.3232, "step": 13482 }, { "epoch": 0.6253710575139146, "grad_norm": 5.6091694831848145, "learning_rate": 3.13632667628017e-06, "loss": 0.2248, "step": 13483 }, { "epoch": 0.625417439703154, "grad_norm": 3.8249731063842773, "learning_rate": 3.1356437871987895e-06, "loss": 0.2706, "step": 13484 }, { "epoch": 0.6254638218923934, "grad_norm": 8.189119338989258, "learning_rate": 3.134960938507704e-06, "loss": 0.3271, "step": 13485 }, { "epoch": 0.6255102040816326, "grad_norm": 7.7311553955078125, "learning_rate": 3.1342781302217052e-06, "loss": 0.1843, "step": 13486 }, { "epoch": 0.625556586270872, "grad_norm": 8.223088264465332, "learning_rate": 3.1335953623555864e-06, "loss": 0.428, "step": 13487 }, { "epoch": 0.6256029684601113, "grad_norm": 7.1690568923950195, "learning_rate": 3.1329126349241413e-06, "loss": 0.3615, "step": 13488 }, { "epoch": 0.6256493506493507, "grad_norm": 17.133726119995117, "learning_rate": 3.132229947942157e-06, "loss": 0.566, "step": 13489 }, { "epoch": 0.62569573283859, "grad_norm": 11.774079322814941, "learning_rate": 3.131547301424428e-06, "loss": 0.3531, "step": 13490 }, { "epoch": 0.6257421150278293, "grad_norm": 5.796926975250244, "learning_rate": 3.130864695385739e-06, "loss": 0.278, "step": 13491 }, { "epoch": 0.6257884972170686, "grad_norm": 4.90806770324707, "learning_rate": 3.1301821298408818e-06, "loss": 0.253, "step": 13492 }, { "epoch": 0.625834879406308, "grad_norm": 4.20335054397583, "learning_rate": 3.1294996048046416e-06, "loss": 0.2774, "step": 13493 }, { "epoch": 0.6258812615955474, "grad_norm": 5.608394145965576, "learning_rate": 3.1288171202918074e-06, "loss": 0.3156, "step": 13494 }, { "epoch": 0.6259276437847866, "grad_norm": 5.016088008880615, "learning_rate": 3.1281346763171628e-06, "loss": 0.2706, "step": 13495 }, { "epoch": 0.625974025974026, "grad_norm": 3.876396417617798, "learning_rate": 3.1274522728954928e-06, "loss": 0.2032, "step": 13496 }, { "epoch": 0.6260204081632653, "grad_norm": 4.881544589996338, "learning_rate": 3.126769910041581e-06, "loss": 0.3119, "step": 13497 }, { "epoch": 0.6260667903525047, "grad_norm": 14.634652137756348, "learning_rate": 3.1260875877702113e-06, "loss": 0.5211, "step": 13498 }, { "epoch": 0.6261131725417439, "grad_norm": 6.268440246582031, "learning_rate": 3.1254053060961663e-06, "loss": 0.3218, "step": 13499 }, { "epoch": 0.6261595547309833, "grad_norm": 6.881512641906738, "learning_rate": 3.1247230650342274e-06, "loss": 0.3559, "step": 13500 }, { "epoch": 0.6262059369202226, "grad_norm": 8.038400650024414, "learning_rate": 3.1240408645991725e-06, "loss": 0.3488, "step": 13501 }, { "epoch": 0.626252319109462, "grad_norm": 9.207075119018555, "learning_rate": 3.1233587048057834e-06, "loss": 0.3251, "step": 13502 }, { "epoch": 0.6262987012987012, "grad_norm": 8.403470039367676, "learning_rate": 3.122676585668838e-06, "loss": 0.3718, "step": 13503 }, { "epoch": 0.6263450834879406, "grad_norm": 4.30244255065918, "learning_rate": 3.121994507203114e-06, "loss": 0.2856, "step": 13504 }, { "epoch": 0.62639146567718, "grad_norm": 14.224043846130371, "learning_rate": 3.1213124694233897e-06, "loss": 0.4025, "step": 13505 }, { "epoch": 0.6264378478664193, "grad_norm": 13.151189804077148, "learning_rate": 3.120630472344439e-06, "loss": 0.4256, "step": 13506 }, { "epoch": 0.6264842300556587, "grad_norm": 15.098325729370117, "learning_rate": 3.1199485159810373e-06, "loss": 0.5571, "step": 13507 }, { "epoch": 0.6265306122448979, "grad_norm": 9.631964683532715, "learning_rate": 3.1192666003479598e-06, "loss": 0.4187, "step": 13508 }, { "epoch": 0.6265769944341373, "grad_norm": 6.3635759353637695, "learning_rate": 3.1185847254599788e-06, "loss": 0.2446, "step": 13509 }, { "epoch": 0.6266233766233766, "grad_norm": 9.624556541442871, "learning_rate": 3.11790289133187e-06, "loss": 0.3467, "step": 13510 }, { "epoch": 0.626669758812616, "grad_norm": 9.026348114013672, "learning_rate": 3.1172210979784003e-06, "loss": 0.3512, "step": 13511 }, { "epoch": 0.6267161410018552, "grad_norm": 7.3410258293151855, "learning_rate": 3.1165393454143423e-06, "loss": 0.3679, "step": 13512 }, { "epoch": 0.6267625231910946, "grad_norm": 6.987468242645264, "learning_rate": 3.1158576336544656e-06, "loss": 0.2838, "step": 13513 }, { "epoch": 0.626808905380334, "grad_norm": 6.195602893829346, "learning_rate": 3.11517596271354e-06, "loss": 0.2939, "step": 13514 }, { "epoch": 0.6268552875695733, "grad_norm": 6.128613471984863, "learning_rate": 3.1144943326063347e-06, "loss": 0.3152, "step": 13515 }, { "epoch": 0.6269016697588126, "grad_norm": 6.4048261642456055, "learning_rate": 3.113812743347613e-06, "loss": 0.3382, "step": 13516 }, { "epoch": 0.6269480519480519, "grad_norm": 8.668622016906738, "learning_rate": 3.1131311949521436e-06, "loss": 0.4309, "step": 13517 }, { "epoch": 0.6269944341372913, "grad_norm": 12.524065971374512, "learning_rate": 3.112449687434692e-06, "loss": 0.596, "step": 13518 }, { "epoch": 0.6270408163265306, "grad_norm": 5.752049446105957, "learning_rate": 3.1117682208100218e-06, "loss": 0.3395, "step": 13519 }, { "epoch": 0.62708719851577, "grad_norm": 9.356871604919434, "learning_rate": 3.1110867950928973e-06, "loss": 0.3613, "step": 13520 }, { "epoch": 0.6271335807050092, "grad_norm": 5.242266654968262, "learning_rate": 3.110405410298083e-06, "loss": 0.3212, "step": 13521 }, { "epoch": 0.6271799628942486, "grad_norm": 9.708072662353516, "learning_rate": 3.109724066440337e-06, "loss": 0.2455, "step": 13522 }, { "epoch": 0.627226345083488, "grad_norm": 7.388514041900635, "learning_rate": 3.1090427635344223e-06, "loss": 0.2106, "step": 13523 }, { "epoch": 0.6272727272727273, "grad_norm": 6.081435203552246, "learning_rate": 3.1083615015950984e-06, "loss": 0.2577, "step": 13524 }, { "epoch": 0.6273191094619666, "grad_norm": 14.535621643066406, "learning_rate": 3.1076802806371254e-06, "loss": 0.3842, "step": 13525 }, { "epoch": 0.6273654916512059, "grad_norm": 10.917293548583984, "learning_rate": 3.1069991006752625e-06, "loss": 0.4084, "step": 13526 }, { "epoch": 0.6274118738404453, "grad_norm": 7.648880958557129, "learning_rate": 3.106317961724263e-06, "loss": 0.3614, "step": 13527 }, { "epoch": 0.6274582560296846, "grad_norm": 7.57143497467041, "learning_rate": 3.1056368637988876e-06, "loss": 0.2414, "step": 13528 }, { "epoch": 0.6275046382189239, "grad_norm": 15.926494598388672, "learning_rate": 3.1049558069138896e-06, "loss": 0.3744, "step": 13529 }, { "epoch": 0.6275510204081632, "grad_norm": 5.954583644866943, "learning_rate": 3.104274791084024e-06, "loss": 0.4303, "step": 13530 }, { "epoch": 0.6275974025974026, "grad_norm": 12.131382942199707, "learning_rate": 3.103593816324046e-06, "loss": 0.4225, "step": 13531 }, { "epoch": 0.627643784786642, "grad_norm": 7.593510150909424, "learning_rate": 3.1029128826487094e-06, "loss": 0.3727, "step": 13532 }, { "epoch": 0.6276901669758813, "grad_norm": 5.161386489868164, "learning_rate": 3.1022319900727626e-06, "loss": 0.3353, "step": 13533 }, { "epoch": 0.6277365491651206, "grad_norm": 8.609965324401855, "learning_rate": 3.1015511386109586e-06, "loss": 0.3482, "step": 13534 }, { "epoch": 0.6277829313543599, "grad_norm": 8.75363540649414, "learning_rate": 3.1008703282780476e-06, "loss": 0.4353, "step": 13535 }, { "epoch": 0.6278293135435993, "grad_norm": 15.966389656066895, "learning_rate": 3.10018955908878e-06, "loss": 0.3614, "step": 13536 }, { "epoch": 0.6278756957328386, "grad_norm": 6.426632881164551, "learning_rate": 3.0995088310579045e-06, "loss": 0.3121, "step": 13537 }, { "epoch": 0.6279220779220779, "grad_norm": 5.889336585998535, "learning_rate": 3.0988281442001665e-06, "loss": 0.3048, "step": 13538 }, { "epoch": 0.6279684601113172, "grad_norm": 4.139446258544922, "learning_rate": 3.098147498530314e-06, "loss": 0.1421, "step": 13539 }, { "epoch": 0.6280148423005566, "grad_norm": 8.376023292541504, "learning_rate": 3.0974668940630925e-06, "loss": 0.3203, "step": 13540 }, { "epoch": 0.628061224489796, "grad_norm": 12.185281753540039, "learning_rate": 3.0967863308132477e-06, "loss": 0.4022, "step": 13541 }, { "epoch": 0.6281076066790352, "grad_norm": 4.602285385131836, "learning_rate": 3.096105808795523e-06, "loss": 0.2691, "step": 13542 }, { "epoch": 0.6281539888682746, "grad_norm": 5.517385959625244, "learning_rate": 3.095425328024664e-06, "loss": 0.2305, "step": 13543 }, { "epoch": 0.6282003710575139, "grad_norm": 5.735599040985107, "learning_rate": 3.0947448885154085e-06, "loss": 0.2816, "step": 13544 }, { "epoch": 0.6282467532467533, "grad_norm": 16.354482650756836, "learning_rate": 3.0940644902825e-06, "loss": 0.3642, "step": 13545 }, { "epoch": 0.6282931354359926, "grad_norm": 6.382072925567627, "learning_rate": 3.093384133340679e-06, "loss": 0.1896, "step": 13546 }, { "epoch": 0.6283395176252319, "grad_norm": 7.653629302978516, "learning_rate": 3.0927038177046855e-06, "loss": 0.3416, "step": 13547 }, { "epoch": 0.6283858998144712, "grad_norm": 10.554866790771484, "learning_rate": 3.0920235433892592e-06, "loss": 0.286, "step": 13548 }, { "epoch": 0.6284322820037106, "grad_norm": 9.075966835021973, "learning_rate": 3.091343310409135e-06, "loss": 0.2917, "step": 13549 }, { "epoch": 0.62847866419295, "grad_norm": 11.012900352478027, "learning_rate": 3.090663118779051e-06, "loss": 0.2169, "step": 13550 }, { "epoch": 0.6285250463821892, "grad_norm": 9.997647285461426, "learning_rate": 3.0899829685137434e-06, "loss": 0.3859, "step": 13551 }, { "epoch": 0.6285714285714286, "grad_norm": 8.924542427062988, "learning_rate": 3.089302859627947e-06, "loss": 0.4051, "step": 13552 }, { "epoch": 0.6286178107606679, "grad_norm": 6.385928630828857, "learning_rate": 3.088622792136397e-06, "loss": 0.305, "step": 13553 }, { "epoch": 0.6286641929499073, "grad_norm": 9.56020450592041, "learning_rate": 3.087942766053828e-06, "loss": 0.4063, "step": 13554 }, { "epoch": 0.6287105751391465, "grad_norm": 7.454442501068115, "learning_rate": 3.0872627813949686e-06, "loss": 0.3314, "step": 13555 }, { "epoch": 0.6287569573283859, "grad_norm": 6.82836389541626, "learning_rate": 3.0865828381745515e-06, "loss": 0.2445, "step": 13556 }, { "epoch": 0.6288033395176252, "grad_norm": 21.89723777770996, "learning_rate": 3.085902936407309e-06, "loss": 0.4034, "step": 13557 }, { "epoch": 0.6288497217068646, "grad_norm": 14.005602836608887, "learning_rate": 3.0852230761079687e-06, "loss": 0.3689, "step": 13558 }, { "epoch": 0.6288961038961038, "grad_norm": 9.666080474853516, "learning_rate": 3.0845432572912633e-06, "loss": 0.3784, "step": 13559 }, { "epoch": 0.6289424860853432, "grad_norm": 9.524667739868164, "learning_rate": 3.0838634799719157e-06, "loss": 0.3674, "step": 13560 }, { "epoch": 0.6289888682745826, "grad_norm": 14.543841361999512, "learning_rate": 3.083183744164655e-06, "loss": 0.5193, "step": 13561 }, { "epoch": 0.6290352504638219, "grad_norm": 11.58675479888916, "learning_rate": 3.0825040498842074e-06, "loss": 0.3917, "step": 13562 }, { "epoch": 0.6290816326530613, "grad_norm": 5.143747329711914, "learning_rate": 3.081824397145299e-06, "loss": 0.2717, "step": 13563 }, { "epoch": 0.6291280148423005, "grad_norm": 9.791046142578125, "learning_rate": 3.081144785962652e-06, "loss": 0.3652, "step": 13564 }, { "epoch": 0.6291743970315399, "grad_norm": 8.468279838562012, "learning_rate": 3.080465216350994e-06, "loss": 0.3427, "step": 13565 }, { "epoch": 0.6292207792207792, "grad_norm": 7.2049031257629395, "learning_rate": 3.079785688325042e-06, "loss": 0.29, "step": 13566 }, { "epoch": 0.6292671614100186, "grad_norm": 6.483786582946777, "learning_rate": 3.079106201899521e-06, "loss": 0.3305, "step": 13567 }, { "epoch": 0.6293135435992578, "grad_norm": 6.826335430145264, "learning_rate": 3.0784267570891506e-06, "loss": 0.3148, "step": 13568 }, { "epoch": 0.6293599257884972, "grad_norm": 4.778143405914307, "learning_rate": 3.077747353908651e-06, "loss": 0.2665, "step": 13569 }, { "epoch": 0.6294063079777366, "grad_norm": 6.247906684875488, "learning_rate": 3.0770679923727433e-06, "loss": 0.3324, "step": 13570 }, { "epoch": 0.6294526901669759, "grad_norm": 5.652071952819824, "learning_rate": 3.076388672496141e-06, "loss": 0.3768, "step": 13571 }, { "epoch": 0.6294990723562152, "grad_norm": 10.876900672912598, "learning_rate": 3.0757093942935634e-06, "loss": 0.3593, "step": 13572 }, { "epoch": 0.6295454545454545, "grad_norm": 7.363704681396484, "learning_rate": 3.0750301577797275e-06, "loss": 0.3155, "step": 13573 }, { "epoch": 0.6295918367346939, "grad_norm": 6.272221565246582, "learning_rate": 3.0743509629693474e-06, "loss": 0.3293, "step": 13574 }, { "epoch": 0.6296382189239332, "grad_norm": 5.715141296386719, "learning_rate": 3.0736718098771378e-06, "loss": 0.3734, "step": 13575 }, { "epoch": 0.6296846011131726, "grad_norm": 9.874998092651367, "learning_rate": 3.072992698517815e-06, "loss": 0.3204, "step": 13576 }, { "epoch": 0.6297309833024118, "grad_norm": 10.459464073181152, "learning_rate": 3.072313628906087e-06, "loss": 0.5577, "step": 13577 }, { "epoch": 0.6297773654916512, "grad_norm": 7.670817852020264, "learning_rate": 3.0716346010566665e-06, "loss": 0.3205, "step": 13578 }, { "epoch": 0.6298237476808906, "grad_norm": 6.691510200500488, "learning_rate": 3.0709556149842667e-06, "loss": 0.3003, "step": 13579 }, { "epoch": 0.6298701298701299, "grad_norm": 9.520377159118652, "learning_rate": 3.0702766707035946e-06, "loss": 0.3489, "step": 13580 }, { "epoch": 0.6299165120593692, "grad_norm": 5.762459754943848, "learning_rate": 3.0695977682293636e-06, "loss": 0.4109, "step": 13581 }, { "epoch": 0.6299628942486085, "grad_norm": 9.848872184753418, "learning_rate": 3.0689189075762764e-06, "loss": 0.4363, "step": 13582 }, { "epoch": 0.6300092764378479, "grad_norm": 5.157888412475586, "learning_rate": 3.0682400887590426e-06, "loss": 0.3097, "step": 13583 }, { "epoch": 0.6300556586270872, "grad_norm": 6.707597255706787, "learning_rate": 3.0675613117923684e-06, "loss": 0.232, "step": 13584 }, { "epoch": 0.6301020408163265, "grad_norm": 5.192707061767578, "learning_rate": 3.066882576690959e-06, "loss": 0.1596, "step": 13585 }, { "epoch": 0.6301484230055658, "grad_norm": 9.800952911376953, "learning_rate": 3.066203883469519e-06, "loss": 0.246, "step": 13586 }, { "epoch": 0.6301948051948052, "grad_norm": 8.07017993927002, "learning_rate": 3.065525232142754e-06, "loss": 0.394, "step": 13587 }, { "epoch": 0.6302411873840446, "grad_norm": 7.507970333099365, "learning_rate": 3.064846622725363e-06, "loss": 0.3634, "step": 13588 }, { "epoch": 0.6302875695732839, "grad_norm": 8.487092971801758, "learning_rate": 3.0641680552320486e-06, "loss": 0.348, "step": 13589 }, { "epoch": 0.6303339517625232, "grad_norm": 6.100660800933838, "learning_rate": 3.063489529677512e-06, "loss": 0.2715, "step": 13590 }, { "epoch": 0.6303803339517625, "grad_norm": 4.562592029571533, "learning_rate": 3.062811046076454e-06, "loss": 0.2738, "step": 13591 }, { "epoch": 0.6304267161410019, "grad_norm": 4.629899978637695, "learning_rate": 3.0621326044435738e-06, "loss": 0.3377, "step": 13592 }, { "epoch": 0.6304730983302412, "grad_norm": 8.66323471069336, "learning_rate": 3.061454204793568e-06, "loss": 0.298, "step": 13593 }, { "epoch": 0.6305194805194805, "grad_norm": 7.9605817794799805, "learning_rate": 3.0607758471411336e-06, "loss": 0.3738, "step": 13594 }, { "epoch": 0.6305658627087198, "grad_norm": 6.209512233734131, "learning_rate": 3.060097531500968e-06, "loss": 0.3362, "step": 13595 }, { "epoch": 0.6306122448979592, "grad_norm": 10.43160343170166, "learning_rate": 3.0594192578877657e-06, "loss": 0.3805, "step": 13596 }, { "epoch": 0.6306586270871986, "grad_norm": 12.122703552246094, "learning_rate": 3.058741026316222e-06, "loss": 0.5263, "step": 13597 }, { "epoch": 0.6307050092764378, "grad_norm": 4.718976974487305, "learning_rate": 3.058062836801032e-06, "loss": 0.2636, "step": 13598 }, { "epoch": 0.6307513914656772, "grad_norm": 5.762325286865234, "learning_rate": 3.0573846893568846e-06, "loss": 0.3233, "step": 13599 }, { "epoch": 0.6307977736549165, "grad_norm": 6.444000244140625, "learning_rate": 3.056706583998473e-06, "loss": 0.3107, "step": 13600 }, { "epoch": 0.6308441558441559, "grad_norm": 8.144747734069824, "learning_rate": 3.0560285207404882e-06, "loss": 0.4296, "step": 13601 }, { "epoch": 0.6308905380333952, "grad_norm": 7.545938968658447, "learning_rate": 3.0553504995976204e-06, "loss": 0.3901, "step": 13602 }, { "epoch": 0.6309369202226345, "grad_norm": 9.33309268951416, "learning_rate": 3.05467252058456e-06, "loss": 0.3094, "step": 13603 }, { "epoch": 0.6309833024118738, "grad_norm": 12.420942306518555, "learning_rate": 3.0539945837159917e-06, "loss": 0.4487, "step": 13604 }, { "epoch": 0.6310296846011132, "grad_norm": 13.258563995361328, "learning_rate": 3.0533166890066047e-06, "loss": 0.2303, "step": 13605 }, { "epoch": 0.6310760667903526, "grad_norm": 5.6378889083862305, "learning_rate": 3.0526388364710842e-06, "loss": 0.2053, "step": 13606 }, { "epoch": 0.6311224489795918, "grad_norm": 7.794966220855713, "learning_rate": 3.051961026124116e-06, "loss": 0.3172, "step": 13607 }, { "epoch": 0.6311688311688312, "grad_norm": 8.638425827026367, "learning_rate": 3.0512832579803873e-06, "loss": 0.3621, "step": 13608 }, { "epoch": 0.6312152133580705, "grad_norm": 9.925620079040527, "learning_rate": 3.0506055320545767e-06, "loss": 0.3516, "step": 13609 }, { "epoch": 0.6312615955473099, "grad_norm": 9.082775115966797, "learning_rate": 3.0499278483613693e-06, "loss": 0.3168, "step": 13610 }, { "epoch": 0.6313079777365491, "grad_norm": 5.000645637512207, "learning_rate": 3.0492502069154463e-06, "loss": 0.2453, "step": 13611 }, { "epoch": 0.6313543599257885, "grad_norm": 8.538771629333496, "learning_rate": 3.048572607731488e-06, "loss": 0.4824, "step": 13612 }, { "epoch": 0.6314007421150278, "grad_norm": 5.97099494934082, "learning_rate": 3.0478950508241767e-06, "loss": 0.2549, "step": 13613 }, { "epoch": 0.6314471243042672, "grad_norm": 7.118514060974121, "learning_rate": 3.0472175362081896e-06, "loss": 0.3962, "step": 13614 }, { "epoch": 0.6314935064935064, "grad_norm": 7.404092311859131, "learning_rate": 3.0465400638982036e-06, "loss": 0.3394, "step": 13615 }, { "epoch": 0.6315398886827458, "grad_norm": 13.516554832458496, "learning_rate": 3.045862633908896e-06, "loss": 0.3943, "step": 13616 }, { "epoch": 0.6315862708719852, "grad_norm": 3.9821743965148926, "learning_rate": 3.0451852462549448e-06, "loss": 0.2318, "step": 13617 }, { "epoch": 0.6316326530612245, "grad_norm": 6.960875988006592, "learning_rate": 3.044507900951024e-06, "loss": 0.2539, "step": 13618 }, { "epoch": 0.6316790352504639, "grad_norm": 7.745149612426758, "learning_rate": 3.0438305980118087e-06, "loss": 0.352, "step": 13619 }, { "epoch": 0.6317254174397031, "grad_norm": 8.769434928894043, "learning_rate": 3.0431533374519712e-06, "loss": 0.471, "step": 13620 }, { "epoch": 0.6317717996289425, "grad_norm": 6.097030162811279, "learning_rate": 3.042476119286184e-06, "loss": 0.3521, "step": 13621 }, { "epoch": 0.6318181818181818, "grad_norm": 5.667389869689941, "learning_rate": 3.0417989435291194e-06, "loss": 0.2608, "step": 13622 }, { "epoch": 0.6318645640074212, "grad_norm": 10.837559700012207, "learning_rate": 3.0411218101954487e-06, "loss": 0.3578, "step": 13623 }, { "epoch": 0.6319109461966604, "grad_norm": 6.208981990814209, "learning_rate": 3.0404447192998398e-06, "loss": 0.2781, "step": 13624 }, { "epoch": 0.6319573283858998, "grad_norm": 8.132433891296387, "learning_rate": 3.0397676708569635e-06, "loss": 0.4321, "step": 13625 }, { "epoch": 0.6320037105751392, "grad_norm": 9.881938934326172, "learning_rate": 3.0390906648814854e-06, "loss": 0.3253, "step": 13626 }, { "epoch": 0.6320500927643785, "grad_norm": 12.859552383422852, "learning_rate": 3.0384137013880745e-06, "loss": 0.3964, "step": 13627 }, { "epoch": 0.6320964749536178, "grad_norm": 6.607645511627197, "learning_rate": 3.037736780391396e-06, "loss": 0.2548, "step": 13628 }, { "epoch": 0.6321428571428571, "grad_norm": 7.729804992675781, "learning_rate": 3.0370599019061154e-06, "loss": 0.3621, "step": 13629 }, { "epoch": 0.6321892393320965, "grad_norm": 7.803532600402832, "learning_rate": 3.036383065946897e-06, "loss": 0.2384, "step": 13630 }, { "epoch": 0.6322356215213358, "grad_norm": 5.689188480377197, "learning_rate": 3.0357062725284028e-06, "loss": 0.2772, "step": 13631 }, { "epoch": 0.6322820037105752, "grad_norm": 4.885796546936035, "learning_rate": 3.0350295216652957e-06, "loss": 0.1872, "step": 13632 }, { "epoch": 0.6323283858998144, "grad_norm": 8.965057373046875, "learning_rate": 3.03435281337224e-06, "loss": 0.3108, "step": 13633 }, { "epoch": 0.6323747680890538, "grad_norm": 10.976717948913574, "learning_rate": 3.033676147663892e-06, "loss": 0.444, "step": 13634 }, { "epoch": 0.6324211502782932, "grad_norm": 7.896671772003174, "learning_rate": 3.0329995245549124e-06, "loss": 0.3809, "step": 13635 }, { "epoch": 0.6324675324675325, "grad_norm": 6.282296657562256, "learning_rate": 3.0323229440599627e-06, "loss": 0.3098, "step": 13636 }, { "epoch": 0.6325139146567718, "grad_norm": 6.219454288482666, "learning_rate": 3.0316464061936973e-06, "loss": 0.3293, "step": 13637 }, { "epoch": 0.6325602968460111, "grad_norm": 11.410618782043457, "learning_rate": 3.0309699109707753e-06, "loss": 0.3603, "step": 13638 }, { "epoch": 0.6326066790352505, "grad_norm": 8.619215965270996, "learning_rate": 3.0302934584058506e-06, "loss": 0.3261, "step": 13639 }, { "epoch": 0.6326530612244898, "grad_norm": 9.7528076171875, "learning_rate": 3.029617048513579e-06, "loss": 0.4963, "step": 13640 }, { "epoch": 0.6326994434137291, "grad_norm": 11.543359756469727, "learning_rate": 3.028940681308616e-06, "loss": 0.4185, "step": 13641 }, { "epoch": 0.6327458256029684, "grad_norm": 8.92319393157959, "learning_rate": 3.0282643568056124e-06, "loss": 0.3125, "step": 13642 }, { "epoch": 0.6327922077922078, "grad_norm": 7.387612342834473, "learning_rate": 3.0275880750192226e-06, "loss": 0.362, "step": 13643 }, { "epoch": 0.6328385899814472, "grad_norm": 11.06419849395752, "learning_rate": 3.026911835964096e-06, "loss": 0.3416, "step": 13644 }, { "epoch": 0.6328849721706865, "grad_norm": 9.911247253417969, "learning_rate": 3.0262356396548832e-06, "loss": 0.4184, "step": 13645 }, { "epoch": 0.6329313543599258, "grad_norm": 10.848471641540527, "learning_rate": 3.025559486106235e-06, "loss": 0.3577, "step": 13646 }, { "epoch": 0.6329777365491651, "grad_norm": 6.456480026245117, "learning_rate": 3.0248833753327992e-06, "loss": 0.4378, "step": 13647 }, { "epoch": 0.6330241187384045, "grad_norm": 7.705058574676514, "learning_rate": 3.0242073073492238e-06, "loss": 0.3235, "step": 13648 }, { "epoch": 0.6330705009276438, "grad_norm": 6.224052429199219, "learning_rate": 3.0235312821701546e-06, "loss": 0.3577, "step": 13649 }, { "epoch": 0.6331168831168831, "grad_norm": 6.0129194259643555, "learning_rate": 3.0228552998102366e-06, "loss": 0.3633, "step": 13650 }, { "epoch": 0.6331632653061224, "grad_norm": 7.028080940246582, "learning_rate": 3.0221793602841166e-06, "loss": 0.3515, "step": 13651 }, { "epoch": 0.6332096474953618, "grad_norm": 5.56195068359375, "learning_rate": 3.0215034636064376e-06, "loss": 0.3129, "step": 13652 }, { "epoch": 0.6332560296846012, "grad_norm": 5.811412334442139, "learning_rate": 3.0208276097918433e-06, "loss": 0.3616, "step": 13653 }, { "epoch": 0.6333024118738404, "grad_norm": 5.658174991607666, "learning_rate": 3.020151798854973e-06, "loss": 0.2837, "step": 13654 }, { "epoch": 0.6333487940630798, "grad_norm": 7.529165267944336, "learning_rate": 3.019476030810471e-06, "loss": 0.2282, "step": 13655 }, { "epoch": 0.6333951762523191, "grad_norm": 6.055397033691406, "learning_rate": 3.0188003056729752e-06, "loss": 0.4347, "step": 13656 }, { "epoch": 0.6334415584415585, "grad_norm": 8.512982368469238, "learning_rate": 3.0181246234571263e-06, "loss": 0.3889, "step": 13657 }, { "epoch": 0.6334879406307977, "grad_norm": 8.093061447143555, "learning_rate": 3.0174489841775643e-06, "loss": 0.4512, "step": 13658 }, { "epoch": 0.6335343228200371, "grad_norm": 5.120431900024414, "learning_rate": 3.016773387848922e-06, "loss": 0.3372, "step": 13659 }, { "epoch": 0.6335807050092764, "grad_norm": 10.016190528869629, "learning_rate": 3.0160978344858384e-06, "loss": 0.3386, "step": 13660 }, { "epoch": 0.6336270871985158, "grad_norm": 8.889204025268555, "learning_rate": 3.0154223241029484e-06, "loss": 0.4372, "step": 13661 }, { "epoch": 0.6336734693877552, "grad_norm": 21.61698341369629, "learning_rate": 3.0147468567148876e-06, "loss": 0.4828, "step": 13662 }, { "epoch": 0.6337198515769944, "grad_norm": 10.975382804870605, "learning_rate": 3.014071432336291e-06, "loss": 0.3179, "step": 13663 }, { "epoch": 0.6337662337662338, "grad_norm": 6.768829822540283, "learning_rate": 3.0133960509817867e-06, "loss": 0.3187, "step": 13664 }, { "epoch": 0.6338126159554731, "grad_norm": 9.687050819396973, "learning_rate": 3.0127207126660094e-06, "loss": 0.321, "step": 13665 }, { "epoch": 0.6338589981447125, "grad_norm": 7.1162238121032715, "learning_rate": 3.0120454174035896e-06, "loss": 0.3375, "step": 13666 }, { "epoch": 0.6339053803339517, "grad_norm": 7.597057342529297, "learning_rate": 3.0113701652091576e-06, "loss": 0.4027, "step": 13667 }, { "epoch": 0.6339517625231911, "grad_norm": 5.4801344871521, "learning_rate": 3.010694956097342e-06, "loss": 0.3301, "step": 13668 }, { "epoch": 0.6339981447124304, "grad_norm": 11.059654235839844, "learning_rate": 3.0100197900827733e-06, "loss": 0.4344, "step": 13669 }, { "epoch": 0.6340445269016698, "grad_norm": 8.187740325927734, "learning_rate": 3.009344667180073e-06, "loss": 0.3454, "step": 13670 }, { "epoch": 0.634090909090909, "grad_norm": 5.399507522583008, "learning_rate": 3.0086695874038717e-06, "loss": 0.4171, "step": 13671 }, { "epoch": 0.6341372912801484, "grad_norm": 8.421503067016602, "learning_rate": 3.007994550768793e-06, "loss": 0.3428, "step": 13672 }, { "epoch": 0.6341836734693878, "grad_norm": 4.791064262390137, "learning_rate": 3.007319557289462e-06, "loss": 0.3967, "step": 13673 }, { "epoch": 0.6342300556586271, "grad_norm": 5.237918376922607, "learning_rate": 3.0066446069805033e-06, "loss": 0.3347, "step": 13674 }, { "epoch": 0.6342764378478665, "grad_norm": 4.361824989318848, "learning_rate": 3.0059696998565362e-06, "loss": 0.3211, "step": 13675 }, { "epoch": 0.6343228200371057, "grad_norm": 4.876838684082031, "learning_rate": 3.0052948359321837e-06, "loss": 0.3323, "step": 13676 }, { "epoch": 0.6343692022263451, "grad_norm": 9.302071571350098, "learning_rate": 3.004620015222066e-06, "loss": 0.4105, "step": 13677 }, { "epoch": 0.6344155844155844, "grad_norm": 7.915829181671143, "learning_rate": 3.0039452377408043e-06, "loss": 0.318, "step": 13678 }, { "epoch": 0.6344619666048238, "grad_norm": 8.534481048583984, "learning_rate": 3.003270503503015e-06, "loss": 0.3777, "step": 13679 }, { "epoch": 0.634508348794063, "grad_norm": 5.438872337341309, "learning_rate": 3.00259581252332e-06, "loss": 0.2881, "step": 13680 }, { "epoch": 0.6345547309833024, "grad_norm": 8.24992847442627, "learning_rate": 3.001921164816331e-06, "loss": 0.3688, "step": 13681 }, { "epoch": 0.6346011131725418, "grad_norm": 7.545971393585205, "learning_rate": 3.0012465603966656e-06, "loss": 0.2792, "step": 13682 }, { "epoch": 0.6346474953617811, "grad_norm": 8.501598358154297, "learning_rate": 3.0005719992789397e-06, "loss": 0.4606, "step": 13683 }, { "epoch": 0.6346938775510204, "grad_norm": 8.387585639953613, "learning_rate": 2.999897481477767e-06, "loss": 0.389, "step": 13684 }, { "epoch": 0.6347402597402597, "grad_norm": 5.312418460845947, "learning_rate": 2.999223007007762e-06, "loss": 0.3413, "step": 13685 }, { "epoch": 0.6347866419294991, "grad_norm": 8.36142349243164, "learning_rate": 2.998548575883533e-06, "loss": 0.3086, "step": 13686 }, { "epoch": 0.6348330241187384, "grad_norm": 7.1333489418029785, "learning_rate": 2.9978741881196943e-06, "loss": 0.3519, "step": 13687 }, { "epoch": 0.6348794063079778, "grad_norm": 6.5029144287109375, "learning_rate": 2.9971998437308546e-06, "loss": 0.3225, "step": 13688 }, { "epoch": 0.634925788497217, "grad_norm": 9.114572525024414, "learning_rate": 2.9965255427316244e-06, "loss": 0.3821, "step": 13689 }, { "epoch": 0.6349721706864564, "grad_norm": 4.884158134460449, "learning_rate": 2.9958512851366135e-06, "loss": 0.3254, "step": 13690 }, { "epoch": 0.6350185528756958, "grad_norm": 6.229441165924072, "learning_rate": 2.9951770709604254e-06, "loss": 0.2789, "step": 13691 }, { "epoch": 0.6350649350649351, "grad_norm": 4.955905914306641, "learning_rate": 2.9945029002176686e-06, "loss": 0.2953, "step": 13692 }, { "epoch": 0.6351113172541744, "grad_norm": 5.466950416564941, "learning_rate": 2.993828772922949e-06, "loss": 0.32, "step": 13693 }, { "epoch": 0.6351576994434137, "grad_norm": 7.861527919769287, "learning_rate": 2.9931546890908695e-06, "loss": 0.3349, "step": 13694 }, { "epoch": 0.6352040816326531, "grad_norm": 8.50297737121582, "learning_rate": 2.9924806487360363e-06, "loss": 0.3609, "step": 13695 }, { "epoch": 0.6352504638218924, "grad_norm": 7.59672212600708, "learning_rate": 2.9918066518730525e-06, "loss": 0.4101, "step": 13696 }, { "epoch": 0.6352968460111317, "grad_norm": 8.645313262939453, "learning_rate": 2.991132698516517e-06, "loss": 0.4109, "step": 13697 }, { "epoch": 0.635343228200371, "grad_norm": 11.794721603393555, "learning_rate": 2.9904587886810323e-06, "loss": 0.3778, "step": 13698 }, { "epoch": 0.6353896103896104, "grad_norm": 12.667717933654785, "learning_rate": 2.989784922381197e-06, "loss": 0.3083, "step": 13699 }, { "epoch": 0.6354359925788498, "grad_norm": 4.333507537841797, "learning_rate": 2.9891110996316117e-06, "loss": 0.2188, "step": 13700 }, { "epoch": 0.6354823747680891, "grad_norm": 5.498922348022461, "learning_rate": 2.9884373204468754e-06, "loss": 0.2821, "step": 13701 }, { "epoch": 0.6355287569573284, "grad_norm": 13.31588077545166, "learning_rate": 2.987763584841582e-06, "loss": 0.3284, "step": 13702 }, { "epoch": 0.6355751391465677, "grad_norm": 13.365900039672852, "learning_rate": 2.9870898928303294e-06, "loss": 0.462, "step": 13703 }, { "epoch": 0.6356215213358071, "grad_norm": 10.82983112335205, "learning_rate": 2.9864162444277118e-06, "loss": 0.2738, "step": 13704 }, { "epoch": 0.6356679035250464, "grad_norm": 8.016825675964355, "learning_rate": 2.9857426396483248e-06, "loss": 0.2429, "step": 13705 }, { "epoch": 0.6357142857142857, "grad_norm": 5.874632835388184, "learning_rate": 2.9850690785067606e-06, "loss": 0.3003, "step": 13706 }, { "epoch": 0.635760667903525, "grad_norm": 5.120481014251709, "learning_rate": 2.9843955610176145e-06, "loss": 0.2717, "step": 13707 }, { "epoch": 0.6358070500927644, "grad_norm": 5.734194278717041, "learning_rate": 2.9837220871954733e-06, "loss": 0.2908, "step": 13708 }, { "epoch": 0.6358534322820037, "grad_norm": 11.680521965026855, "learning_rate": 2.9830486570549295e-06, "loss": 0.3722, "step": 13709 }, { "epoch": 0.635899814471243, "grad_norm": 5.554213047027588, "learning_rate": 2.9823752706105726e-06, "loss": 0.2072, "step": 13710 }, { "epoch": 0.6359461966604824, "grad_norm": 9.739261627197266, "learning_rate": 2.9817019278769917e-06, "loss": 0.3979, "step": 13711 }, { "epoch": 0.6359925788497217, "grad_norm": 7.359442234039307, "learning_rate": 2.9810286288687754e-06, "loss": 0.3006, "step": 13712 }, { "epoch": 0.6360389610389611, "grad_norm": 9.1817045211792, "learning_rate": 2.9803553736005075e-06, "loss": 0.3646, "step": 13713 }, { "epoch": 0.6360853432282003, "grad_norm": 5.24649715423584, "learning_rate": 2.979682162086775e-06, "loss": 0.3687, "step": 13714 }, { "epoch": 0.6361317254174397, "grad_norm": 7.557576656341553, "learning_rate": 2.9790089943421628e-06, "loss": 0.4147, "step": 13715 }, { "epoch": 0.636178107606679, "grad_norm": 6.779378414154053, "learning_rate": 2.978335870381255e-06, "loss": 0.294, "step": 13716 }, { "epoch": 0.6362244897959184, "grad_norm": 10.071906089782715, "learning_rate": 2.977662790218634e-06, "loss": 0.3883, "step": 13717 }, { "epoch": 0.6362708719851577, "grad_norm": 6.661693096160889, "learning_rate": 2.9769897538688843e-06, "loss": 0.2755, "step": 13718 }, { "epoch": 0.636317254174397, "grad_norm": 5.0776047706604, "learning_rate": 2.9763167613465817e-06, "loss": 0.3512, "step": 13719 }, { "epoch": 0.6363636363636364, "grad_norm": 6.761713027954102, "learning_rate": 2.97564381266631e-06, "loss": 0.4107, "step": 13720 }, { "epoch": 0.6364100185528757, "grad_norm": 6.4754791259765625, "learning_rate": 2.974970907842647e-06, "loss": 0.2899, "step": 13721 }, { "epoch": 0.6364564007421151, "grad_norm": 7.218099117279053, "learning_rate": 2.974298046890171e-06, "loss": 0.3388, "step": 13722 }, { "epoch": 0.6365027829313543, "grad_norm": 10.568634986877441, "learning_rate": 2.973625229823462e-06, "loss": 0.3679, "step": 13723 }, { "epoch": 0.6365491651205937, "grad_norm": 11.288476943969727, "learning_rate": 2.972952456657091e-06, "loss": 0.3837, "step": 13724 }, { "epoch": 0.636595547309833, "grad_norm": 9.338778495788574, "learning_rate": 2.9722797274056358e-06, "loss": 0.4932, "step": 13725 }, { "epoch": 0.6366419294990724, "grad_norm": 9.593027114868164, "learning_rate": 2.9716070420836716e-06, "loss": 0.2185, "step": 13726 }, { "epoch": 0.6366883116883116, "grad_norm": 12.016024589538574, "learning_rate": 2.9709344007057704e-06, "loss": 0.4212, "step": 13727 }, { "epoch": 0.636734693877551, "grad_norm": 8.328632354736328, "learning_rate": 2.970261803286505e-06, "loss": 0.4055, "step": 13728 }, { "epoch": 0.6367810760667904, "grad_norm": 9.803665161132812, "learning_rate": 2.9695892498404496e-06, "loss": 0.2851, "step": 13729 }, { "epoch": 0.6368274582560297, "grad_norm": 6.767007827758789, "learning_rate": 2.9689167403821696e-06, "loss": 0.3052, "step": 13730 }, { "epoch": 0.6368738404452691, "grad_norm": 6.81101131439209, "learning_rate": 2.9682442749262374e-06, "loss": 0.3804, "step": 13731 }, { "epoch": 0.6369202226345083, "grad_norm": 4.553088188171387, "learning_rate": 2.967571853487221e-06, "loss": 0.2323, "step": 13732 }, { "epoch": 0.6369666048237477, "grad_norm": 11.947001457214355, "learning_rate": 2.9668994760796886e-06, "loss": 0.4017, "step": 13733 }, { "epoch": 0.637012987012987, "grad_norm": 10.986388206481934, "learning_rate": 2.9662271427182095e-06, "loss": 0.4672, "step": 13734 }, { "epoch": 0.6370593692022264, "grad_norm": 5.5748772621154785, "learning_rate": 2.9655548534173438e-06, "loss": 0.1919, "step": 13735 }, { "epoch": 0.6371057513914656, "grad_norm": 10.474845886230469, "learning_rate": 2.964882608191659e-06, "loss": 0.3878, "step": 13736 }, { "epoch": 0.637152133580705, "grad_norm": 6.073649883270264, "learning_rate": 2.96421040705572e-06, "loss": 0.2104, "step": 13737 }, { "epoch": 0.6371985157699444, "grad_norm": 4.500268936157227, "learning_rate": 2.9635382500240876e-06, "loss": 0.2904, "step": 13738 }, { "epoch": 0.6372448979591837, "grad_norm": 5.236766338348389, "learning_rate": 2.9628661371113255e-06, "loss": 0.3347, "step": 13739 }, { "epoch": 0.637291280148423, "grad_norm": 7.732250690460205, "learning_rate": 2.962194068331996e-06, "loss": 0.3679, "step": 13740 }, { "epoch": 0.6373376623376623, "grad_norm": 5.235434532165527, "learning_rate": 2.9615220437006554e-06, "loss": 0.1901, "step": 13741 }, { "epoch": 0.6373840445269017, "grad_norm": 6.5937652587890625, "learning_rate": 2.960850063231865e-06, "loss": 0.2923, "step": 13742 }, { "epoch": 0.637430426716141, "grad_norm": 7.377085208892822, "learning_rate": 2.9601781269401813e-06, "loss": 0.4052, "step": 13743 }, { "epoch": 0.6374768089053804, "grad_norm": 7.1520891189575195, "learning_rate": 2.959506234840163e-06, "loss": 0.2849, "step": 13744 }, { "epoch": 0.6375231910946196, "grad_norm": 4.34614896774292, "learning_rate": 2.9588343869463674e-06, "loss": 0.3091, "step": 13745 }, { "epoch": 0.637569573283859, "grad_norm": 6.1033220291137695, "learning_rate": 2.9581625832733475e-06, "loss": 0.3411, "step": 13746 }, { "epoch": 0.6376159554730984, "grad_norm": 7.381175994873047, "learning_rate": 2.957490823835657e-06, "loss": 0.2785, "step": 13747 }, { "epoch": 0.6376623376623377, "grad_norm": 7.899409770965576, "learning_rate": 2.9568191086478515e-06, "loss": 0.3539, "step": 13748 }, { "epoch": 0.637708719851577, "grad_norm": 5.910125255584717, "learning_rate": 2.9561474377244812e-06, "loss": 0.2465, "step": 13749 }, { "epoch": 0.6377551020408163, "grad_norm": 5.302832126617432, "learning_rate": 2.955475811080101e-06, "loss": 0.4135, "step": 13750 }, { "epoch": 0.6378014842300557, "grad_norm": 9.107847213745117, "learning_rate": 2.954804228729258e-06, "loss": 0.3366, "step": 13751 }, { "epoch": 0.637847866419295, "grad_norm": 10.531556129455566, "learning_rate": 2.954132690686502e-06, "loss": 0.3836, "step": 13752 }, { "epoch": 0.6378942486085343, "grad_norm": 8.449418067932129, "learning_rate": 2.9534611969663825e-06, "loss": 0.2761, "step": 13753 }, { "epoch": 0.6379406307977736, "grad_norm": 4.922489643096924, "learning_rate": 2.952789747583446e-06, "loss": 0.2528, "step": 13754 }, { "epoch": 0.637987012987013, "grad_norm": 10.693119049072266, "learning_rate": 2.9521183425522417e-06, "loss": 0.4639, "step": 13755 }, { "epoch": 0.6380333951762523, "grad_norm": 7.896361827850342, "learning_rate": 2.9514469818873126e-06, "loss": 0.363, "step": 13756 }, { "epoch": 0.6380797773654916, "grad_norm": 7.8651533126831055, "learning_rate": 2.9507756656032034e-06, "loss": 0.2797, "step": 13757 }, { "epoch": 0.638126159554731, "grad_norm": 3.899885654449463, "learning_rate": 2.950104393714458e-06, "loss": 0.2019, "step": 13758 }, { "epoch": 0.6381725417439703, "grad_norm": 17.18586540222168, "learning_rate": 2.9494331662356203e-06, "loss": 0.5718, "step": 13759 }, { "epoch": 0.6382189239332097, "grad_norm": 9.344618797302246, "learning_rate": 2.9487619831812323e-06, "loss": 0.3328, "step": 13760 }, { "epoch": 0.638265306122449, "grad_norm": 9.701315879821777, "learning_rate": 2.9480908445658337e-06, "loss": 0.4157, "step": 13761 }, { "epoch": 0.6383116883116883, "grad_norm": 13.57254409790039, "learning_rate": 2.9474197504039646e-06, "loss": 0.2535, "step": 13762 }, { "epoch": 0.6383580705009276, "grad_norm": 10.859701156616211, "learning_rate": 2.9467487007101636e-06, "loss": 0.4128, "step": 13763 }, { "epoch": 0.638404452690167, "grad_norm": 8.361639976501465, "learning_rate": 2.946077695498969e-06, "loss": 0.39, "step": 13764 }, { "epoch": 0.6384508348794063, "grad_norm": 9.091897010803223, "learning_rate": 2.945406734784919e-06, "loss": 0.3626, "step": 13765 }, { "epoch": 0.6384972170686456, "grad_norm": 7.611630439758301, "learning_rate": 2.944735818582547e-06, "loss": 0.3578, "step": 13766 }, { "epoch": 0.638543599257885, "grad_norm": 7.921306610107422, "learning_rate": 2.944064946906391e-06, "loss": 0.259, "step": 13767 }, { "epoch": 0.6385899814471243, "grad_norm": 5.627228260040283, "learning_rate": 2.9433941197709813e-06, "loss": 0.3346, "step": 13768 }, { "epoch": 0.6386363636363637, "grad_norm": 10.472651481628418, "learning_rate": 2.942723337190855e-06, "loss": 0.3764, "step": 13769 }, { "epoch": 0.6386827458256029, "grad_norm": 9.86347484588623, "learning_rate": 2.942052599180542e-06, "loss": 0.4611, "step": 13770 }, { "epoch": 0.6387291280148423, "grad_norm": 5.519401550292969, "learning_rate": 2.941381905754573e-06, "loss": 0.2784, "step": 13771 }, { "epoch": 0.6387755102040816, "grad_norm": 9.53243350982666, "learning_rate": 2.9407112569274814e-06, "loss": 0.318, "step": 13772 }, { "epoch": 0.638821892393321, "grad_norm": 17.473695755004883, "learning_rate": 2.9400406527137924e-06, "loss": 0.3167, "step": 13773 }, { "epoch": 0.6388682745825603, "grad_norm": 6.640315055847168, "learning_rate": 2.9393700931280367e-06, "loss": 0.4319, "step": 13774 }, { "epoch": 0.6389146567717996, "grad_norm": 6.180603504180908, "learning_rate": 2.9386995781847416e-06, "loss": 0.3387, "step": 13775 }, { "epoch": 0.638961038961039, "grad_norm": 7.314316272735596, "learning_rate": 2.9380291078984325e-06, "loss": 0.3159, "step": 13776 }, { "epoch": 0.6390074211502783, "grad_norm": 9.574249267578125, "learning_rate": 2.9373586822836344e-06, "loss": 0.523, "step": 13777 }, { "epoch": 0.6390538033395177, "grad_norm": 6.199421405792236, "learning_rate": 2.936688301354874e-06, "loss": 0.3685, "step": 13778 }, { "epoch": 0.6391001855287569, "grad_norm": 7.388599395751953, "learning_rate": 2.936017965126672e-06, "loss": 0.3957, "step": 13779 }, { "epoch": 0.6391465677179963, "grad_norm": 3.58689546585083, "learning_rate": 2.9353476736135522e-06, "loss": 0.2544, "step": 13780 }, { "epoch": 0.6391929499072356, "grad_norm": 10.719348907470703, "learning_rate": 2.934677426830037e-06, "loss": 0.3425, "step": 13781 }, { "epoch": 0.639239332096475, "grad_norm": 7.28242301940918, "learning_rate": 2.934007224790645e-06, "loss": 0.3011, "step": 13782 }, { "epoch": 0.6392857142857142, "grad_norm": 7.0955305099487305, "learning_rate": 2.9333370675098984e-06, "loss": 0.2807, "step": 13783 }, { "epoch": 0.6393320964749536, "grad_norm": 18.65644073486328, "learning_rate": 2.9326669550023124e-06, "loss": 0.3987, "step": 13784 }, { "epoch": 0.639378478664193, "grad_norm": 6.6065216064453125, "learning_rate": 2.9319968872824066e-06, "loss": 0.2421, "step": 13785 }, { "epoch": 0.6394248608534323, "grad_norm": 6.829897403717041, "learning_rate": 2.9313268643646988e-06, "loss": 0.3119, "step": 13786 }, { "epoch": 0.6394712430426717, "grad_norm": 5.866652965545654, "learning_rate": 2.930656886263702e-06, "loss": 0.3116, "step": 13787 }, { "epoch": 0.6395176252319109, "grad_norm": 4.8921098709106445, "learning_rate": 2.929986952993933e-06, "loss": 0.3134, "step": 13788 }, { "epoch": 0.6395640074211503, "grad_norm": 6.392723560333252, "learning_rate": 2.9293170645699046e-06, "loss": 0.3262, "step": 13789 }, { "epoch": 0.6396103896103896, "grad_norm": 8.213953971862793, "learning_rate": 2.9286472210061303e-06, "loss": 0.3271, "step": 13790 }, { "epoch": 0.639656771799629, "grad_norm": 4.914583206176758, "learning_rate": 2.927977422317121e-06, "loss": 0.345, "step": 13791 }, { "epoch": 0.6397031539888682, "grad_norm": 13.659531593322754, "learning_rate": 2.927307668517388e-06, "loss": 0.4236, "step": 13792 }, { "epoch": 0.6397495361781076, "grad_norm": 5.730243682861328, "learning_rate": 2.9266379596214408e-06, "loss": 0.2382, "step": 13793 }, { "epoch": 0.639795918367347, "grad_norm": 6.0686726570129395, "learning_rate": 2.9259682956437894e-06, "loss": 0.2811, "step": 13794 }, { "epoch": 0.6398423005565863, "grad_norm": 9.535555839538574, "learning_rate": 2.92529867659894e-06, "loss": 0.3871, "step": 13795 }, { "epoch": 0.6398886827458256, "grad_norm": 8.391730308532715, "learning_rate": 2.924629102501402e-06, "loss": 0.2729, "step": 13796 }, { "epoch": 0.6399350649350649, "grad_norm": 7.08459997177124, "learning_rate": 2.9239595733656778e-06, "loss": 0.2729, "step": 13797 }, { "epoch": 0.6399814471243043, "grad_norm": 8.351900100708008, "learning_rate": 2.9232900892062754e-06, "loss": 0.3241, "step": 13798 }, { "epoch": 0.6400278293135436, "grad_norm": 15.124448776245117, "learning_rate": 2.9226206500376975e-06, "loss": 0.2628, "step": 13799 }, { "epoch": 0.640074211502783, "grad_norm": 8.543436050415039, "learning_rate": 2.921951255874449e-06, "loss": 0.3182, "step": 13800 }, { "epoch": 0.6401205936920222, "grad_norm": 4.5669941902160645, "learning_rate": 2.9212819067310293e-06, "loss": 0.2095, "step": 13801 }, { "epoch": 0.6401669758812616, "grad_norm": 8.962668418884277, "learning_rate": 2.9206126026219405e-06, "loss": 0.3749, "step": 13802 }, { "epoch": 0.640213358070501, "grad_norm": 5.295323371887207, "learning_rate": 2.919943343561683e-06, "loss": 0.2804, "step": 13803 }, { "epoch": 0.6402597402597403, "grad_norm": 6.635021686553955, "learning_rate": 2.919274129564756e-06, "loss": 0.3508, "step": 13804 }, { "epoch": 0.6403061224489796, "grad_norm": 6.152994632720947, "learning_rate": 2.918604960645659e-06, "loss": 0.3536, "step": 13805 }, { "epoch": 0.6403525046382189, "grad_norm": 4.56571102142334, "learning_rate": 2.917935836818886e-06, "loss": 0.2508, "step": 13806 }, { "epoch": 0.6403988868274583, "grad_norm": 7.797415256500244, "learning_rate": 2.9172667580989346e-06, "loss": 0.2368, "step": 13807 }, { "epoch": 0.6404452690166976, "grad_norm": 5.535276412963867, "learning_rate": 2.916597724500301e-06, "loss": 0.3121, "step": 13808 }, { "epoch": 0.6404916512059369, "grad_norm": 6.115728855133057, "learning_rate": 2.915928736037478e-06, "loss": 0.3188, "step": 13809 }, { "epoch": 0.6405380333951762, "grad_norm": 8.992051124572754, "learning_rate": 2.9152597927249604e-06, "loss": 0.4328, "step": 13810 }, { "epoch": 0.6405844155844156, "grad_norm": 11.419238090515137, "learning_rate": 2.9145908945772417e-06, "loss": 0.3859, "step": 13811 }, { "epoch": 0.640630797773655, "grad_norm": 5.824103355407715, "learning_rate": 2.9139220416088087e-06, "loss": 0.3546, "step": 13812 }, { "epoch": 0.6406771799628942, "grad_norm": 5.524756908416748, "learning_rate": 2.9132532338341553e-06, "loss": 0.3098, "step": 13813 }, { "epoch": 0.6407235621521336, "grad_norm": 13.794307708740234, "learning_rate": 2.912584471267769e-06, "loss": 0.5008, "step": 13814 }, { "epoch": 0.6407699443413729, "grad_norm": 8.09387493133545, "learning_rate": 2.9119157539241395e-06, "loss": 0.3766, "step": 13815 }, { "epoch": 0.6408163265306123, "grad_norm": 9.779718399047852, "learning_rate": 2.9112470818177563e-06, "loss": 0.3471, "step": 13816 }, { "epoch": 0.6408627087198516, "grad_norm": 7.0577239990234375, "learning_rate": 2.910578454963101e-06, "loss": 0.3242, "step": 13817 }, { "epoch": 0.6409090909090909, "grad_norm": 10.96973705291748, "learning_rate": 2.9099098733746613e-06, "loss": 0.3599, "step": 13818 }, { "epoch": 0.6409554730983302, "grad_norm": 4.808198928833008, "learning_rate": 2.909241337066922e-06, "loss": 0.2491, "step": 13819 }, { "epoch": 0.6410018552875696, "grad_norm": 5.557531833648682, "learning_rate": 2.908572846054366e-06, "loss": 0.2824, "step": 13820 }, { "epoch": 0.641048237476809, "grad_norm": 8.20149040222168, "learning_rate": 2.9079044003514768e-06, "loss": 0.4745, "step": 13821 }, { "epoch": 0.6410946196660482, "grad_norm": 10.956846237182617, "learning_rate": 2.9072359999727374e-06, "loss": 0.343, "step": 13822 }, { "epoch": 0.6411410018552876, "grad_norm": 5.181678771972656, "learning_rate": 2.906567644932623e-06, "loss": 0.2982, "step": 13823 }, { "epoch": 0.6411873840445269, "grad_norm": 8.762506484985352, "learning_rate": 2.9058993352456178e-06, "loss": 0.3617, "step": 13824 }, { "epoch": 0.6412337662337663, "grad_norm": 5.638703346252441, "learning_rate": 2.9052310709261983e-06, "loss": 0.2677, "step": 13825 }, { "epoch": 0.6412801484230055, "grad_norm": 6.423529148101807, "learning_rate": 2.9045628519888425e-06, "loss": 0.3421, "step": 13826 }, { "epoch": 0.6413265306122449, "grad_norm": 9.664193153381348, "learning_rate": 2.90389467844803e-06, "loss": 0.3866, "step": 13827 }, { "epoch": 0.6413729128014842, "grad_norm": 7.476991653442383, "learning_rate": 2.903226550318231e-06, "loss": 0.2622, "step": 13828 }, { "epoch": 0.6414192949907236, "grad_norm": 5.891458988189697, "learning_rate": 2.9025584676139233e-06, "loss": 0.2749, "step": 13829 }, { "epoch": 0.641465677179963, "grad_norm": 4.501150131225586, "learning_rate": 2.9018904303495797e-06, "loss": 0.2851, "step": 13830 }, { "epoch": 0.6415120593692022, "grad_norm": 5.816624641418457, "learning_rate": 2.9012224385396736e-06, "loss": 0.3106, "step": 13831 }, { "epoch": 0.6415584415584416, "grad_norm": 11.365707397460938, "learning_rate": 2.9005544921986774e-06, "loss": 0.3832, "step": 13832 }, { "epoch": 0.6416048237476809, "grad_norm": 5.620909214019775, "learning_rate": 2.8998865913410613e-06, "loss": 0.3663, "step": 13833 }, { "epoch": 0.6416512059369203, "grad_norm": 8.510509490966797, "learning_rate": 2.8992187359812928e-06, "loss": 0.402, "step": 13834 }, { "epoch": 0.6416975881261595, "grad_norm": 4.584951877593994, "learning_rate": 2.898550926133843e-06, "loss": 0.3221, "step": 13835 }, { "epoch": 0.6417439703153989, "grad_norm": 8.592096328735352, "learning_rate": 2.897883161813178e-06, "loss": 0.3128, "step": 13836 }, { "epoch": 0.6417903525046382, "grad_norm": 5.613687515258789, "learning_rate": 2.8972154430337663e-06, "loss": 0.2613, "step": 13837 }, { "epoch": 0.6418367346938776, "grad_norm": 7.68800687789917, "learning_rate": 2.896547769810075e-06, "loss": 0.3821, "step": 13838 }, { "epoch": 0.6418831168831168, "grad_norm": 9.727856636047363, "learning_rate": 2.895880142156564e-06, "loss": 0.4072, "step": 13839 }, { "epoch": 0.6419294990723562, "grad_norm": 5.17311429977417, "learning_rate": 2.8952125600877e-06, "loss": 0.3648, "step": 13840 }, { "epoch": 0.6419758812615955, "grad_norm": 15.971720695495605, "learning_rate": 2.894545023617947e-06, "loss": 0.3437, "step": 13841 }, { "epoch": 0.6420222634508349, "grad_norm": 8.025069236755371, "learning_rate": 2.893877532761764e-06, "loss": 0.3319, "step": 13842 }, { "epoch": 0.6420686456400743, "grad_norm": 10.098618507385254, "learning_rate": 2.8932100875336146e-06, "loss": 0.3961, "step": 13843 }, { "epoch": 0.6421150278293135, "grad_norm": 5.820672035217285, "learning_rate": 2.892542687947959e-06, "loss": 0.2454, "step": 13844 }, { "epoch": 0.6421614100185529, "grad_norm": 8.028371810913086, "learning_rate": 2.8918753340192517e-06, "loss": 0.214, "step": 13845 }, { "epoch": 0.6422077922077922, "grad_norm": 5.258509159088135, "learning_rate": 2.8912080257619546e-06, "loss": 0.263, "step": 13846 }, { "epoch": 0.6422541743970316, "grad_norm": 4.751507759094238, "learning_rate": 2.8905407631905225e-06, "loss": 0.3009, "step": 13847 }, { "epoch": 0.6423005565862708, "grad_norm": 4.352591514587402, "learning_rate": 2.8898735463194128e-06, "loss": 0.3232, "step": 13848 }, { "epoch": 0.6423469387755102, "grad_norm": 11.292811393737793, "learning_rate": 2.8892063751630815e-06, "loss": 0.5096, "step": 13849 }, { "epoch": 0.6423933209647495, "grad_norm": 6.407843112945557, "learning_rate": 2.888539249735979e-06, "loss": 0.3252, "step": 13850 }, { "epoch": 0.6424397031539889, "grad_norm": 14.975881576538086, "learning_rate": 2.887872170052559e-06, "loss": 0.4948, "step": 13851 }, { "epoch": 0.6424860853432282, "grad_norm": 4.631537914276123, "learning_rate": 2.8872051361272752e-06, "loss": 0.2855, "step": 13852 }, { "epoch": 0.6425324675324675, "grad_norm": 12.104453086853027, "learning_rate": 2.886538147974578e-06, "loss": 0.4311, "step": 13853 }, { "epoch": 0.6425788497217069, "grad_norm": 9.29488754272461, "learning_rate": 2.8858712056089182e-06, "loss": 0.4217, "step": 13854 }, { "epoch": 0.6426252319109462, "grad_norm": 4.935227870941162, "learning_rate": 2.885204309044742e-06, "loss": 0.335, "step": 13855 }, { "epoch": 0.6426716141001856, "grad_norm": 6.8441386222839355, "learning_rate": 2.884537458296499e-06, "loss": 0.3015, "step": 13856 }, { "epoch": 0.6427179962894248, "grad_norm": 8.36867618560791, "learning_rate": 2.8838706533786366e-06, "loss": 0.4139, "step": 13857 }, { "epoch": 0.6427643784786642, "grad_norm": 7.126663684844971, "learning_rate": 2.8832038943056003e-06, "loss": 0.2665, "step": 13858 }, { "epoch": 0.6428107606679035, "grad_norm": 5.756007671356201, "learning_rate": 2.8825371810918345e-06, "loss": 0.3469, "step": 13859 }, { "epoch": 0.6428571428571429, "grad_norm": 8.115785598754883, "learning_rate": 2.8818705137517866e-06, "loss": 0.3277, "step": 13860 }, { "epoch": 0.6429035250463822, "grad_norm": 20.137378692626953, "learning_rate": 2.8812038922998943e-06, "loss": 0.5343, "step": 13861 }, { "epoch": 0.6429499072356215, "grad_norm": 7.086613655090332, "learning_rate": 2.8805373167506025e-06, "loss": 0.2821, "step": 13862 }, { "epoch": 0.6429962894248609, "grad_norm": 9.869523048400879, "learning_rate": 2.879870787118352e-06, "loss": 0.3122, "step": 13863 }, { "epoch": 0.6430426716141002, "grad_norm": 6.3629150390625, "learning_rate": 2.8792043034175817e-06, "loss": 0.3073, "step": 13864 }, { "epoch": 0.6430890538033395, "grad_norm": 8.147886276245117, "learning_rate": 2.878537865662734e-06, "loss": 0.4143, "step": 13865 }, { "epoch": 0.6431354359925788, "grad_norm": 5.968081951141357, "learning_rate": 2.8778714738682424e-06, "loss": 0.3816, "step": 13866 }, { "epoch": 0.6431818181818182, "grad_norm": 8.928130149841309, "learning_rate": 2.8772051280485466e-06, "loss": 0.4025, "step": 13867 }, { "epoch": 0.6432282003710575, "grad_norm": 8.683429718017578, "learning_rate": 2.876538828218082e-06, "loss": 0.3132, "step": 13868 }, { "epoch": 0.6432745825602968, "grad_norm": 7.853170871734619, "learning_rate": 2.875872574391283e-06, "loss": 0.3541, "step": 13869 }, { "epoch": 0.6433209647495362, "grad_norm": 5.114429950714111, "learning_rate": 2.8752063665825846e-06, "loss": 0.3375, "step": 13870 }, { "epoch": 0.6433673469387755, "grad_norm": 5.740644454956055, "learning_rate": 2.8745402048064214e-06, "loss": 0.2774, "step": 13871 }, { "epoch": 0.6434137291280149, "grad_norm": 7.7442450523376465, "learning_rate": 2.873874089077222e-06, "loss": 0.3592, "step": 13872 }, { "epoch": 0.6434601113172542, "grad_norm": 6.754759788513184, "learning_rate": 2.8732080194094187e-06, "loss": 0.3429, "step": 13873 }, { "epoch": 0.6435064935064935, "grad_norm": 5.819265365600586, "learning_rate": 2.8725419958174423e-06, "loss": 0.2563, "step": 13874 }, { "epoch": 0.6435528756957328, "grad_norm": 8.383973121643066, "learning_rate": 2.8718760183157214e-06, "loss": 0.2921, "step": 13875 }, { "epoch": 0.6435992578849722, "grad_norm": 6.291990756988525, "learning_rate": 2.8712100869186853e-06, "loss": 0.338, "step": 13876 }, { "epoch": 0.6436456400742115, "grad_norm": 7.272324085235596, "learning_rate": 2.870544201640759e-06, "loss": 0.3385, "step": 13877 }, { "epoch": 0.6436920222634508, "grad_norm": 6.365324020385742, "learning_rate": 2.8698783624963684e-06, "loss": 0.2591, "step": 13878 }, { "epoch": 0.6437384044526901, "grad_norm": 3.951772451400757, "learning_rate": 2.86921256949994e-06, "loss": 0.2662, "step": 13879 }, { "epoch": 0.6437847866419295, "grad_norm": 6.683987617492676, "learning_rate": 2.8685468226658974e-06, "loss": 0.2345, "step": 13880 }, { "epoch": 0.6438311688311689, "grad_norm": 7.982043743133545, "learning_rate": 2.8678811220086635e-06, "loss": 0.3686, "step": 13881 }, { "epoch": 0.6438775510204081, "grad_norm": 16.172191619873047, "learning_rate": 2.867215467542663e-06, "loss": 0.5191, "step": 13882 }, { "epoch": 0.6439239332096475, "grad_norm": 5.544219493865967, "learning_rate": 2.866549859282312e-06, "loss": 0.4044, "step": 13883 }, { "epoch": 0.6439703153988868, "grad_norm": 7.985701560974121, "learning_rate": 2.865884297242033e-06, "loss": 0.3818, "step": 13884 }, { "epoch": 0.6440166975881262, "grad_norm": 4.981565475463867, "learning_rate": 2.865218781436245e-06, "loss": 0.2172, "step": 13885 }, { "epoch": 0.6440630797773655, "grad_norm": 7.565272808074951, "learning_rate": 2.8645533118793665e-06, "loss": 0.2453, "step": 13886 }, { "epoch": 0.6441094619666048, "grad_norm": 10.390853881835938, "learning_rate": 2.8638878885858164e-06, "loss": 0.4583, "step": 13887 }, { "epoch": 0.6441558441558441, "grad_norm": 9.108330726623535, "learning_rate": 2.863222511570006e-06, "loss": 0.3683, "step": 13888 }, { "epoch": 0.6442022263450835, "grad_norm": 5.514027118682861, "learning_rate": 2.862557180846353e-06, "loss": 0.2963, "step": 13889 }, { "epoch": 0.6442486085343229, "grad_norm": 3.896735906600952, "learning_rate": 2.8618918964292715e-06, "loss": 0.2158, "step": 13890 }, { "epoch": 0.6442949907235621, "grad_norm": 7.4467692375183105, "learning_rate": 2.861226658333174e-06, "loss": 0.4012, "step": 13891 }, { "epoch": 0.6443413729128015, "grad_norm": 13.976725578308105, "learning_rate": 2.860561466572473e-06, "loss": 0.3415, "step": 13892 }, { "epoch": 0.6443877551020408, "grad_norm": 8.617774963378906, "learning_rate": 2.8598963211615817e-06, "loss": 0.3884, "step": 13893 }, { "epoch": 0.6444341372912802, "grad_norm": 4.899238586425781, "learning_rate": 2.8592312221149047e-06, "loss": 0.2855, "step": 13894 }, { "epoch": 0.6444805194805194, "grad_norm": 5.132415294647217, "learning_rate": 2.8585661694468552e-06, "loss": 0.3558, "step": 13895 }, { "epoch": 0.6445269016697588, "grad_norm": 5.702539920806885, "learning_rate": 2.85790116317184e-06, "loss": 0.2648, "step": 13896 }, { "epoch": 0.6445732838589981, "grad_norm": 6.223459720611572, "learning_rate": 2.857236203304266e-06, "loss": 0.2203, "step": 13897 }, { "epoch": 0.6446196660482375, "grad_norm": 3.894904613494873, "learning_rate": 2.856571289858542e-06, "loss": 0.2324, "step": 13898 }, { "epoch": 0.6446660482374769, "grad_norm": 5.37938117980957, "learning_rate": 2.8559064228490674e-06, "loss": 0.3068, "step": 13899 }, { "epoch": 0.6447124304267161, "grad_norm": 9.239943504333496, "learning_rate": 2.85524160229025e-06, "loss": 0.2991, "step": 13900 }, { "epoch": 0.6447588126159555, "grad_norm": 5.795215129852295, "learning_rate": 2.8545768281964925e-06, "loss": 0.4154, "step": 13901 }, { "epoch": 0.6448051948051948, "grad_norm": 6.032095432281494, "learning_rate": 2.8539121005821956e-06, "loss": 0.3141, "step": 13902 }, { "epoch": 0.6448515769944342, "grad_norm": 8.079449653625488, "learning_rate": 2.853247419461761e-06, "loss": 0.4132, "step": 13903 }, { "epoch": 0.6448979591836734, "grad_norm": 7.032954216003418, "learning_rate": 2.8525827848495912e-06, "loss": 0.3099, "step": 13904 }, { "epoch": 0.6449443413729128, "grad_norm": 7.233211994171143, "learning_rate": 2.8519181967600805e-06, "loss": 0.3588, "step": 13905 }, { "epoch": 0.6449907235621521, "grad_norm": 4.405383110046387, "learning_rate": 2.8512536552076296e-06, "loss": 0.3587, "step": 13906 }, { "epoch": 0.6450371057513915, "grad_norm": 15.150571823120117, "learning_rate": 2.850589160206635e-06, "loss": 0.4311, "step": 13907 }, { "epoch": 0.6450834879406308, "grad_norm": 4.7337727546691895, "learning_rate": 2.8499247117714916e-06, "loss": 0.2653, "step": 13908 }, { "epoch": 0.6451298701298701, "grad_norm": 8.770156860351562, "learning_rate": 2.8492603099165975e-06, "loss": 0.3848, "step": 13909 }, { "epoch": 0.6451762523191095, "grad_norm": 9.07262897491455, "learning_rate": 2.848595954656343e-06, "loss": 0.5187, "step": 13910 }, { "epoch": 0.6452226345083488, "grad_norm": 10.182894706726074, "learning_rate": 2.847931646005122e-06, "loss": 0.3123, "step": 13911 }, { "epoch": 0.6452690166975881, "grad_norm": 11.954291343688965, "learning_rate": 2.8472673839773267e-06, "loss": 0.3726, "step": 13912 }, { "epoch": 0.6453153988868274, "grad_norm": 8.328031539916992, "learning_rate": 2.8466031685873484e-06, "loss": 0.2687, "step": 13913 }, { "epoch": 0.6453617810760668, "grad_norm": 13.292718887329102, "learning_rate": 2.845938999849577e-06, "loss": 0.4399, "step": 13914 }, { "epoch": 0.6454081632653061, "grad_norm": 6.765796184539795, "learning_rate": 2.8452748777784024e-06, "loss": 0.209, "step": 13915 }, { "epoch": 0.6454545454545455, "grad_norm": 4.531651496887207, "learning_rate": 2.844610802388209e-06, "loss": 0.3154, "step": 13916 }, { "epoch": 0.6455009276437847, "grad_norm": 5.403502464294434, "learning_rate": 2.8439467736933856e-06, "loss": 0.2952, "step": 13917 }, { "epoch": 0.6455473098330241, "grad_norm": 8.87768268585205, "learning_rate": 2.8432827917083185e-06, "loss": 0.2696, "step": 13918 }, { "epoch": 0.6455936920222635, "grad_norm": 18.81793212890625, "learning_rate": 2.842618856447392e-06, "loss": 0.4194, "step": 13919 }, { "epoch": 0.6456400742115028, "grad_norm": 5.259524345397949, "learning_rate": 2.8419549679249925e-06, "loss": 0.2585, "step": 13920 }, { "epoch": 0.6456864564007421, "grad_norm": 5.7638726234436035, "learning_rate": 2.8412911261554975e-06, "loss": 0.353, "step": 13921 }, { "epoch": 0.6457328385899814, "grad_norm": 8.377336502075195, "learning_rate": 2.8406273311532927e-06, "loss": 0.333, "step": 13922 }, { "epoch": 0.6457792207792208, "grad_norm": 4.360887050628662, "learning_rate": 2.8399635829327575e-06, "loss": 0.2604, "step": 13923 }, { "epoch": 0.6458256029684601, "grad_norm": 6.894368648529053, "learning_rate": 2.839299881508272e-06, "loss": 0.3629, "step": 13924 }, { "epoch": 0.6458719851576994, "grad_norm": 5.615636825561523, "learning_rate": 2.838636226894214e-06, "loss": 0.2868, "step": 13925 }, { "epoch": 0.6459183673469387, "grad_norm": 8.073036193847656, "learning_rate": 2.837972619104965e-06, "loss": 0.4175, "step": 13926 }, { "epoch": 0.6459647495361781, "grad_norm": 6.297210216522217, "learning_rate": 2.837309058154897e-06, "loss": 0.3964, "step": 13927 }, { "epoch": 0.6460111317254175, "grad_norm": 4.127694606781006, "learning_rate": 2.8366455440583874e-06, "loss": 0.2218, "step": 13928 }, { "epoch": 0.6460575139146568, "grad_norm": 8.609342575073242, "learning_rate": 2.835982076829811e-06, "loss": 0.1481, "step": 13929 }, { "epoch": 0.6461038961038961, "grad_norm": 6.712489128112793, "learning_rate": 2.835318656483541e-06, "loss": 0.4077, "step": 13930 }, { "epoch": 0.6461502782931354, "grad_norm": 10.84827709197998, "learning_rate": 2.834655283033953e-06, "loss": 0.4447, "step": 13931 }, { "epoch": 0.6461966604823748, "grad_norm": 6.717282772064209, "learning_rate": 2.8339919564954143e-06, "loss": 0.2982, "step": 13932 }, { "epoch": 0.6462430426716141, "grad_norm": 7.065238952636719, "learning_rate": 2.833328676882297e-06, "loss": 0.3364, "step": 13933 }, { "epoch": 0.6462894248608534, "grad_norm": 5.284727573394775, "learning_rate": 2.832665444208972e-06, "loss": 0.2855, "step": 13934 }, { "epoch": 0.6463358070500927, "grad_norm": 8.31189250946045, "learning_rate": 2.832002258489806e-06, "loss": 0.3058, "step": 13935 }, { "epoch": 0.6463821892393321, "grad_norm": 5.139360427856445, "learning_rate": 2.8313391197391697e-06, "loss": 0.2733, "step": 13936 }, { "epoch": 0.6464285714285715, "grad_norm": 8.82583236694336, "learning_rate": 2.8306760279714263e-06, "loss": 0.3889, "step": 13937 }, { "epoch": 0.6464749536178107, "grad_norm": 11.134004592895508, "learning_rate": 2.8300129832009414e-06, "loss": 0.3811, "step": 13938 }, { "epoch": 0.6465213358070501, "grad_norm": 6.418094635009766, "learning_rate": 2.8293499854420814e-06, "loss": 0.3424, "step": 13939 }, { "epoch": 0.6465677179962894, "grad_norm": 7.264105796813965, "learning_rate": 2.8286870347092096e-06, "loss": 0.4516, "step": 13940 }, { "epoch": 0.6466141001855288, "grad_norm": 4.662789344787598, "learning_rate": 2.828024131016687e-06, "loss": 0.2475, "step": 13941 }, { "epoch": 0.6466604823747681, "grad_norm": 6.2934675216674805, "learning_rate": 2.827361274378878e-06, "loss": 0.2804, "step": 13942 }, { "epoch": 0.6467068645640074, "grad_norm": 5.759808540344238, "learning_rate": 2.82669846481014e-06, "loss": 0.2572, "step": 13943 }, { "epoch": 0.6467532467532467, "grad_norm": 9.237288475036621, "learning_rate": 2.8260357023248323e-06, "loss": 0.2907, "step": 13944 }, { "epoch": 0.6467996289424861, "grad_norm": 10.976408004760742, "learning_rate": 2.8253729869373154e-06, "loss": 0.4266, "step": 13945 }, { "epoch": 0.6468460111317255, "grad_norm": 5.2448320388793945, "learning_rate": 2.824710318661945e-06, "loss": 0.2017, "step": 13946 }, { "epoch": 0.6468923933209647, "grad_norm": 8.40871810913086, "learning_rate": 2.8240476975130806e-06, "loss": 0.3368, "step": 13947 }, { "epoch": 0.6469387755102041, "grad_norm": 6.628762245178223, "learning_rate": 2.8233851235050738e-06, "loss": 0.2972, "step": 13948 }, { "epoch": 0.6469851576994434, "grad_norm": 9.664361000061035, "learning_rate": 2.822722596652279e-06, "loss": 0.3061, "step": 13949 }, { "epoch": 0.6470315398886828, "grad_norm": 6.78383207321167, "learning_rate": 2.822060116969051e-06, "loss": 0.3153, "step": 13950 }, { "epoch": 0.647077922077922, "grad_norm": 12.846128463745117, "learning_rate": 2.8213976844697423e-06, "loss": 0.3533, "step": 13951 }, { "epoch": 0.6471243042671614, "grad_norm": 12.105666160583496, "learning_rate": 2.8207352991687033e-06, "loss": 0.4149, "step": 13952 }, { "epoch": 0.6471706864564007, "grad_norm": 22.882923126220703, "learning_rate": 2.820072961080287e-06, "loss": 0.4421, "step": 13953 }, { "epoch": 0.6472170686456401, "grad_norm": 6.989753723144531, "learning_rate": 2.8194106702188378e-06, "loss": 0.4232, "step": 13954 }, { "epoch": 0.6472634508348795, "grad_norm": 14.047938346862793, "learning_rate": 2.8187484265987065e-06, "loss": 0.5072, "step": 13955 }, { "epoch": 0.6473098330241187, "grad_norm": 6.290904521942139, "learning_rate": 2.8180862302342406e-06, "loss": 0.3165, "step": 13956 }, { "epoch": 0.6473562152133581, "grad_norm": 5.512563705444336, "learning_rate": 2.8174240811397845e-06, "loss": 0.2378, "step": 13957 }, { "epoch": 0.6474025974025974, "grad_norm": 7.389566898345947, "learning_rate": 2.8167619793296874e-06, "loss": 0.3028, "step": 13958 }, { "epoch": 0.6474489795918368, "grad_norm": 8.423900604248047, "learning_rate": 2.8160999248182878e-06, "loss": 0.4148, "step": 13959 }, { "epoch": 0.647495361781076, "grad_norm": 4.995166778564453, "learning_rate": 2.815437917619932e-06, "loss": 0.2298, "step": 13960 }, { "epoch": 0.6475417439703154, "grad_norm": 6.6319122314453125, "learning_rate": 2.8147759577489614e-06, "loss": 0.2929, "step": 13961 }, { "epoch": 0.6475881261595547, "grad_norm": 5.303680419921875, "learning_rate": 2.814114045219717e-06, "loss": 0.2631, "step": 13962 }, { "epoch": 0.6476345083487941, "grad_norm": 8.445881843566895, "learning_rate": 2.8134521800465385e-06, "loss": 0.351, "step": 13963 }, { "epoch": 0.6476808905380333, "grad_norm": 7.971834659576416, "learning_rate": 2.812790362243768e-06, "loss": 0.3999, "step": 13964 }, { "epoch": 0.6477272727272727, "grad_norm": 5.678519248962402, "learning_rate": 2.812128591825738e-06, "loss": 0.3469, "step": 13965 }, { "epoch": 0.6477736549165121, "grad_norm": 7.658514022827148, "learning_rate": 2.8114668688067885e-06, "loss": 0.3313, "step": 13966 }, { "epoch": 0.6478200371057514, "grad_norm": 14.32646656036377, "learning_rate": 2.810805193201255e-06, "loss": 0.2966, "step": 13967 }, { "epoch": 0.6478664192949907, "grad_norm": 7.846221446990967, "learning_rate": 2.810143565023472e-06, "loss": 0.345, "step": 13968 }, { "epoch": 0.64791280148423, "grad_norm": 4.27241849899292, "learning_rate": 2.809481984287776e-06, "loss": 0.2246, "step": 13969 }, { "epoch": 0.6479591836734694, "grad_norm": 10.996740341186523, "learning_rate": 2.8088204510084948e-06, "loss": 0.4507, "step": 13970 }, { "epoch": 0.6480055658627087, "grad_norm": 5.362973213195801, "learning_rate": 2.808158965199963e-06, "loss": 0.3479, "step": 13971 }, { "epoch": 0.6480519480519481, "grad_norm": 12.8063383102417, "learning_rate": 2.8074975268765113e-06, "loss": 0.5203, "step": 13972 }, { "epoch": 0.6480983302411873, "grad_norm": 6.843174934387207, "learning_rate": 2.80683613605247e-06, "loss": 0.2701, "step": 13973 }, { "epoch": 0.6481447124304267, "grad_norm": 7.320475101470947, "learning_rate": 2.8061747927421677e-06, "loss": 0.2458, "step": 13974 }, { "epoch": 0.6481910946196661, "grad_norm": 6.172387599945068, "learning_rate": 2.80551349695993e-06, "loss": 0.3887, "step": 13975 }, { "epoch": 0.6482374768089054, "grad_norm": 7.568587303161621, "learning_rate": 2.804852248720085e-06, "loss": 0.35, "step": 13976 }, { "epoch": 0.6482838589981447, "grad_norm": 7.224253177642822, "learning_rate": 2.8041910480369585e-06, "loss": 0.3025, "step": 13977 }, { "epoch": 0.648330241187384, "grad_norm": 10.113652229309082, "learning_rate": 2.8035298949248746e-06, "loss": 0.3214, "step": 13978 }, { "epoch": 0.6483766233766234, "grad_norm": 9.581156730651855, "learning_rate": 2.802868789398159e-06, "loss": 0.3139, "step": 13979 }, { "epoch": 0.6484230055658627, "grad_norm": 9.492445945739746, "learning_rate": 2.8022077314711293e-06, "loss": 0.4084, "step": 13980 }, { "epoch": 0.648469387755102, "grad_norm": 9.494174003601074, "learning_rate": 2.8015467211581103e-06, "loss": 0.2927, "step": 13981 }, { "epoch": 0.6485157699443413, "grad_norm": 14.169743537902832, "learning_rate": 2.8008857584734228e-06, "loss": 0.4114, "step": 13982 }, { "epoch": 0.6485621521335807, "grad_norm": 5.229282379150391, "learning_rate": 2.800224843431385e-06, "loss": 0.2827, "step": 13983 }, { "epoch": 0.6486085343228201, "grad_norm": 7.594305992126465, "learning_rate": 2.799563976046318e-06, "loss": 0.3714, "step": 13984 }, { "epoch": 0.6486549165120594, "grad_norm": 9.947454452514648, "learning_rate": 2.798903156332534e-06, "loss": 0.5358, "step": 13985 }, { "epoch": 0.6487012987012987, "grad_norm": 9.43764877319336, "learning_rate": 2.7982423843043527e-06, "loss": 0.2957, "step": 13986 }, { "epoch": 0.648747680890538, "grad_norm": 4.215501308441162, "learning_rate": 2.797581659976089e-06, "loss": 0.2846, "step": 13987 }, { "epoch": 0.6487940630797774, "grad_norm": 17.531944274902344, "learning_rate": 2.796920983362057e-06, "loss": 0.5011, "step": 13988 }, { "epoch": 0.6488404452690167, "grad_norm": 10.59081745147705, "learning_rate": 2.7962603544765714e-06, "loss": 0.4369, "step": 13989 }, { "epoch": 0.648886827458256, "grad_norm": 5.862066745758057, "learning_rate": 2.7955997733339414e-06, "loss": 0.3305, "step": 13990 }, { "epoch": 0.6489332096474953, "grad_norm": 5.833305835723877, "learning_rate": 2.7949392399484794e-06, "loss": 0.3576, "step": 13991 }, { "epoch": 0.6489795918367347, "grad_norm": 7.845105171203613, "learning_rate": 2.7942787543344957e-06, "loss": 0.3842, "step": 13992 }, { "epoch": 0.6490259740259741, "grad_norm": 8.313861846923828, "learning_rate": 2.7936183165062993e-06, "loss": 0.2823, "step": 13993 }, { "epoch": 0.6490723562152133, "grad_norm": 10.739141464233398, "learning_rate": 2.7929579264782003e-06, "loss": 0.4786, "step": 13994 }, { "epoch": 0.6491187384044527, "grad_norm": 5.727503776550293, "learning_rate": 2.7922975842645018e-06, "loss": 0.3285, "step": 13995 }, { "epoch": 0.649165120593692, "grad_norm": 7.070171356201172, "learning_rate": 2.7916372898795117e-06, "loss": 0.3896, "step": 13996 }, { "epoch": 0.6492115027829314, "grad_norm": 4.538773059844971, "learning_rate": 2.790977043337535e-06, "loss": 0.3186, "step": 13997 }, { "epoch": 0.6492578849721707, "grad_norm": 10.108256340026855, "learning_rate": 2.790316844652876e-06, "loss": 0.3323, "step": 13998 }, { "epoch": 0.64930426716141, "grad_norm": 10.63245964050293, "learning_rate": 2.7896566938398383e-06, "loss": 0.305, "step": 13999 }, { "epoch": 0.6493506493506493, "grad_norm": 8.266761779785156, "learning_rate": 2.788996590912721e-06, "loss": 0.4469, "step": 14000 }, { "epoch": 0.6493970315398887, "grad_norm": 12.455975532531738, "learning_rate": 2.788336535885826e-06, "loss": 0.4181, "step": 14001 }, { "epoch": 0.6494434137291281, "grad_norm": 6.0126872062683105, "learning_rate": 2.787676528773453e-06, "loss": 0.3178, "step": 14002 }, { "epoch": 0.6494897959183673, "grad_norm": 5.579313278198242, "learning_rate": 2.787016569589902e-06, "loss": 0.3023, "step": 14003 }, { "epoch": 0.6495361781076067, "grad_norm": 7.538239479064941, "learning_rate": 2.7863566583494695e-06, "loss": 0.286, "step": 14004 }, { "epoch": 0.649582560296846, "grad_norm": 6.436320781707764, "learning_rate": 2.785696795066455e-06, "loss": 0.3788, "step": 14005 }, { "epoch": 0.6496289424860854, "grad_norm": 7.710939407348633, "learning_rate": 2.7850369797551485e-06, "loss": 0.3476, "step": 14006 }, { "epoch": 0.6496753246753246, "grad_norm": 7.463405132293701, "learning_rate": 2.784377212429848e-06, "loss": 0.4045, "step": 14007 }, { "epoch": 0.649721706864564, "grad_norm": 5.582157611846924, "learning_rate": 2.783717493104846e-06, "loss": 0.2887, "step": 14008 }, { "epoch": 0.6497680890538033, "grad_norm": 8.776602745056152, "learning_rate": 2.7830578217944363e-06, "loss": 0.3942, "step": 14009 }, { "epoch": 0.6498144712430427, "grad_norm": 7.970439910888672, "learning_rate": 2.7823981985129118e-06, "loss": 0.4141, "step": 14010 }, { "epoch": 0.649860853432282, "grad_norm": 6.715378761291504, "learning_rate": 2.781738623274558e-06, "loss": 0.3508, "step": 14011 }, { "epoch": 0.6499072356215213, "grad_norm": 8.031084060668945, "learning_rate": 2.7810790960936674e-06, "loss": 0.2968, "step": 14012 }, { "epoch": 0.6499536178107607, "grad_norm": 13.276078224182129, "learning_rate": 2.7804196169845277e-06, "loss": 0.3461, "step": 14013 }, { "epoch": 0.65, "grad_norm": 10.27828598022461, "learning_rate": 2.7797601859614255e-06, "loss": 0.3058, "step": 14014 }, { "epoch": 0.6500463821892394, "grad_norm": 5.529260635375977, "learning_rate": 2.779100803038651e-06, "loss": 0.3387, "step": 14015 }, { "epoch": 0.6500927643784786, "grad_norm": 6.806757926940918, "learning_rate": 2.778441468230483e-06, "loss": 0.3231, "step": 14016 }, { "epoch": 0.650139146567718, "grad_norm": 7.819612979888916, "learning_rate": 2.7777821815512096e-06, "loss": 0.3739, "step": 14017 }, { "epoch": 0.6501855287569573, "grad_norm": 8.205482482910156, "learning_rate": 2.7771229430151136e-06, "loss": 0.4345, "step": 14018 }, { "epoch": 0.6502319109461967, "grad_norm": 7.8965654373168945, "learning_rate": 2.7764637526364756e-06, "loss": 0.3068, "step": 14019 }, { "epoch": 0.650278293135436, "grad_norm": 9.126784324645996, "learning_rate": 2.77580461042958e-06, "loss": 0.4842, "step": 14020 }, { "epoch": 0.6503246753246753, "grad_norm": 7.482913970947266, "learning_rate": 2.7751455164087026e-06, "loss": 0.339, "step": 14021 }, { "epoch": 0.6503710575139147, "grad_norm": 12.875816345214844, "learning_rate": 2.7744864705881243e-06, "loss": 0.3178, "step": 14022 }, { "epoch": 0.650417439703154, "grad_norm": 7.191051483154297, "learning_rate": 2.7738274729821226e-06, "loss": 0.3189, "step": 14023 }, { "epoch": 0.6504638218923933, "grad_norm": 4.964025497436523, "learning_rate": 2.7731685236049745e-06, "loss": 0.2925, "step": 14024 }, { "epoch": 0.6505102040816326, "grad_norm": 3.5485785007476807, "learning_rate": 2.7725096224709568e-06, "loss": 0.2416, "step": 14025 }, { "epoch": 0.650556586270872, "grad_norm": 7.17240571975708, "learning_rate": 2.771850769594342e-06, "loss": 0.3325, "step": 14026 }, { "epoch": 0.6506029684601113, "grad_norm": 8.647174835205078, "learning_rate": 2.7711919649894055e-06, "loss": 0.3653, "step": 14027 }, { "epoch": 0.6506493506493507, "grad_norm": 26.019121170043945, "learning_rate": 2.7705332086704195e-06, "loss": 0.5313, "step": 14028 }, { "epoch": 0.65069573283859, "grad_norm": 6.2486724853515625, "learning_rate": 2.769874500651655e-06, "loss": 0.2536, "step": 14029 }, { "epoch": 0.6507421150278293, "grad_norm": 8.807994842529297, "learning_rate": 2.769215840947385e-06, "loss": 0.2538, "step": 14030 }, { "epoch": 0.6507884972170687, "grad_norm": 11.310821533203125, "learning_rate": 2.768557229571876e-06, "loss": 0.2817, "step": 14031 }, { "epoch": 0.650834879406308, "grad_norm": 5.83695125579834, "learning_rate": 2.767898666539397e-06, "loss": 0.3979, "step": 14032 }, { "epoch": 0.6508812615955473, "grad_norm": 5.679426670074463, "learning_rate": 2.7672401518642167e-06, "loss": 0.1956, "step": 14033 }, { "epoch": 0.6509276437847866, "grad_norm": 12.147375106811523, "learning_rate": 2.7665816855606006e-06, "loss": 0.501, "step": 14034 }, { "epoch": 0.650974025974026, "grad_norm": 5.892493724822998, "learning_rate": 2.765923267642816e-06, "loss": 0.2622, "step": 14035 }, { "epoch": 0.6510204081632653, "grad_norm": 6.57792854309082, "learning_rate": 2.7652648981251232e-06, "loss": 0.3184, "step": 14036 }, { "epoch": 0.6510667903525046, "grad_norm": 7.138522624969482, "learning_rate": 2.7646065770217884e-06, "loss": 0.3946, "step": 14037 }, { "epoch": 0.651113172541744, "grad_norm": 10.226832389831543, "learning_rate": 2.763948304347073e-06, "loss": 0.3466, "step": 14038 }, { "epoch": 0.6511595547309833, "grad_norm": 4.332156658172607, "learning_rate": 2.763290080115238e-06, "loss": 0.2786, "step": 14039 }, { "epoch": 0.6512059369202227, "grad_norm": 5.814312934875488, "learning_rate": 2.762631904340546e-06, "loss": 0.295, "step": 14040 }, { "epoch": 0.651252319109462, "grad_norm": 6.434474945068359, "learning_rate": 2.7619737770372513e-06, "loss": 0.3065, "step": 14041 }, { "epoch": 0.6512987012987013, "grad_norm": 4.718070983886719, "learning_rate": 2.7613156982196137e-06, "loss": 0.3734, "step": 14042 }, { "epoch": 0.6513450834879406, "grad_norm": 6.034677982330322, "learning_rate": 2.7606576679018914e-06, "loss": 0.3331, "step": 14043 }, { "epoch": 0.65139146567718, "grad_norm": 6.3278703689575195, "learning_rate": 2.75999968609834e-06, "loss": 0.2232, "step": 14044 }, { "epoch": 0.6514378478664193, "grad_norm": 9.004118919372559, "learning_rate": 2.759341752823213e-06, "loss": 0.3644, "step": 14045 }, { "epoch": 0.6514842300556586, "grad_norm": 9.131444931030273, "learning_rate": 2.758683868090768e-06, "loss": 0.4206, "step": 14046 }, { "epoch": 0.6515306122448979, "grad_norm": 9.969435691833496, "learning_rate": 2.758026031915252e-06, "loss": 0.2689, "step": 14047 }, { "epoch": 0.6515769944341373, "grad_norm": 5.3903727531433105, "learning_rate": 2.75736824431092e-06, "loss": 0.2404, "step": 14048 }, { "epoch": 0.6516233766233767, "grad_norm": 8.958666801452637, "learning_rate": 2.756710505292022e-06, "loss": 0.312, "step": 14049 }, { "epoch": 0.6516697588126159, "grad_norm": 7.725616455078125, "learning_rate": 2.7560528148728076e-06, "loss": 0.3996, "step": 14050 }, { "epoch": 0.6517161410018553, "grad_norm": 5.891481876373291, "learning_rate": 2.7553951730675277e-06, "loss": 0.2022, "step": 14051 }, { "epoch": 0.6517625231910946, "grad_norm": 5.2461137771606445, "learning_rate": 2.7547375798904253e-06, "loss": 0.2161, "step": 14052 }, { "epoch": 0.651808905380334, "grad_norm": 6.519900798797607, "learning_rate": 2.754080035355749e-06, "loss": 0.3354, "step": 14053 }, { "epoch": 0.6518552875695733, "grad_norm": 6.910414218902588, "learning_rate": 2.753422539477744e-06, "loss": 0.3815, "step": 14054 }, { "epoch": 0.6519016697588126, "grad_norm": 8.06731128692627, "learning_rate": 2.752765092270655e-06, "loss": 0.3772, "step": 14055 }, { "epoch": 0.6519480519480519, "grad_norm": 8.747210502624512, "learning_rate": 2.7521076937487248e-06, "loss": 0.3867, "step": 14056 }, { "epoch": 0.6519944341372913, "grad_norm": 10.297996520996094, "learning_rate": 2.7514503439261973e-06, "loss": 0.2945, "step": 14057 }, { "epoch": 0.6520408163265307, "grad_norm": 5.470174312591553, "learning_rate": 2.7507930428173114e-06, "loss": 0.2558, "step": 14058 }, { "epoch": 0.6520871985157699, "grad_norm": 9.03874683380127, "learning_rate": 2.750135790436307e-06, "loss": 0.1402, "step": 14059 }, { "epoch": 0.6521335807050093, "grad_norm": 7.724194526672363, "learning_rate": 2.749478586797424e-06, "loss": 0.3624, "step": 14060 }, { "epoch": 0.6521799628942486, "grad_norm": 9.311460494995117, "learning_rate": 2.7488214319149004e-06, "loss": 0.3651, "step": 14061 }, { "epoch": 0.652226345083488, "grad_norm": 9.38868522644043, "learning_rate": 2.748164325802975e-06, "loss": 0.5325, "step": 14062 }, { "epoch": 0.6522727272727272, "grad_norm": 4.660679817199707, "learning_rate": 2.7475072684758797e-06, "loss": 0.2547, "step": 14063 }, { "epoch": 0.6523191094619666, "grad_norm": 11.409648895263672, "learning_rate": 2.746850259947852e-06, "loss": 0.4889, "step": 14064 }, { "epoch": 0.6523654916512059, "grad_norm": 4.32988166809082, "learning_rate": 2.746193300233124e-06, "loss": 0.1294, "step": 14065 }, { "epoch": 0.6524118738404453, "grad_norm": 8.487271308898926, "learning_rate": 2.7455363893459308e-06, "loss": 0.2758, "step": 14066 }, { "epoch": 0.6524582560296845, "grad_norm": 8.133291244506836, "learning_rate": 2.744879527300502e-06, "loss": 0.3723, "step": 14067 }, { "epoch": 0.6525046382189239, "grad_norm": 4.61142635345459, "learning_rate": 2.74422271411107e-06, "loss": 0.3594, "step": 14068 }, { "epoch": 0.6525510204081633, "grad_norm": 10.586861610412598, "learning_rate": 2.7435659497918622e-06, "loss": 0.4579, "step": 14069 }, { "epoch": 0.6525974025974026, "grad_norm": 9.346712112426758, "learning_rate": 2.7429092343571073e-06, "loss": 0.3815, "step": 14070 }, { "epoch": 0.652643784786642, "grad_norm": 5.912858009338379, "learning_rate": 2.7422525678210337e-06, "loss": 0.3158, "step": 14071 }, { "epoch": 0.6526901669758812, "grad_norm": 5.398243427276611, "learning_rate": 2.7415959501978674e-06, "loss": 0.3586, "step": 14072 }, { "epoch": 0.6527365491651206, "grad_norm": 6.683903217315674, "learning_rate": 2.740939381501835e-06, "loss": 0.3455, "step": 14073 }, { "epoch": 0.6527829313543599, "grad_norm": 4.16174840927124, "learning_rate": 2.7402828617471583e-06, "loss": 0.2755, "step": 14074 }, { "epoch": 0.6528293135435993, "grad_norm": 9.12069320678711, "learning_rate": 2.739626390948061e-06, "loss": 0.3304, "step": 14075 }, { "epoch": 0.6528756957328385, "grad_norm": 7.963809013366699, "learning_rate": 2.7389699691187656e-06, "loss": 0.3117, "step": 14076 }, { "epoch": 0.6529220779220779, "grad_norm": 4.703042984008789, "learning_rate": 2.7383135962734937e-06, "loss": 0.3135, "step": 14077 }, { "epoch": 0.6529684601113173, "grad_norm": 7.338946342468262, "learning_rate": 2.7376572724264642e-06, "loss": 0.3694, "step": 14078 }, { "epoch": 0.6530148423005566, "grad_norm": 10.850561141967773, "learning_rate": 2.7370009975918993e-06, "loss": 0.3831, "step": 14079 }, { "epoch": 0.6530612244897959, "grad_norm": 14.386273384094238, "learning_rate": 2.736344771784012e-06, "loss": 0.5005, "step": 14080 }, { "epoch": 0.6531076066790352, "grad_norm": 7.819552421569824, "learning_rate": 2.735688595017021e-06, "loss": 0.2494, "step": 14081 }, { "epoch": 0.6531539888682746, "grad_norm": 9.38748836517334, "learning_rate": 2.735032467305142e-06, "loss": 0.3908, "step": 14082 }, { "epoch": 0.6532003710575139, "grad_norm": 5.404147624969482, "learning_rate": 2.7343763886625906e-06, "loss": 0.3651, "step": 14083 }, { "epoch": 0.6532467532467533, "grad_norm": 5.266266345977783, "learning_rate": 2.733720359103582e-06, "loss": 0.3006, "step": 14084 }, { "epoch": 0.6532931354359925, "grad_norm": 8.172521591186523, "learning_rate": 2.733064378642324e-06, "loss": 0.4164, "step": 14085 }, { "epoch": 0.6533395176252319, "grad_norm": 12.483386993408203, "learning_rate": 2.73240844729303e-06, "loss": 0.3385, "step": 14086 }, { "epoch": 0.6533858998144713, "grad_norm": 10.383990287780762, "learning_rate": 2.7317525650699116e-06, "loss": 0.4496, "step": 14087 }, { "epoch": 0.6534322820037106, "grad_norm": 10.552847862243652, "learning_rate": 2.731096731987177e-06, "loss": 0.5023, "step": 14088 }, { "epoch": 0.6534786641929499, "grad_norm": 4.712457180023193, "learning_rate": 2.730440948059036e-06, "loss": 0.3247, "step": 14089 }, { "epoch": 0.6535250463821892, "grad_norm": 7.649205684661865, "learning_rate": 2.729785213299696e-06, "loss": 0.4156, "step": 14090 }, { "epoch": 0.6535714285714286, "grad_norm": 6.666251182556152, "learning_rate": 2.7291295277233605e-06, "loss": 0.3545, "step": 14091 }, { "epoch": 0.6536178107606679, "grad_norm": 10.48644733428955, "learning_rate": 2.7284738913442355e-06, "loss": 0.3461, "step": 14092 }, { "epoch": 0.6536641929499072, "grad_norm": 8.873798370361328, "learning_rate": 2.727818304176526e-06, "loss": 0.4334, "step": 14093 }, { "epoch": 0.6537105751391465, "grad_norm": 5.3116583824157715, "learning_rate": 2.727162766234434e-06, "loss": 0.3292, "step": 14094 }, { "epoch": 0.6537569573283859, "grad_norm": 11.751433372497559, "learning_rate": 2.7265072775321642e-06, "loss": 0.4479, "step": 14095 }, { "epoch": 0.6538033395176253, "grad_norm": 5.273531436920166, "learning_rate": 2.7258518380839126e-06, "loss": 0.35, "step": 14096 }, { "epoch": 0.6538497217068646, "grad_norm": 10.029773712158203, "learning_rate": 2.7251964479038817e-06, "loss": 0.36, "step": 14097 }, { "epoch": 0.6538961038961039, "grad_norm": 7.909688472747803, "learning_rate": 2.72454110700627e-06, "loss": 0.3244, "step": 14098 }, { "epoch": 0.6539424860853432, "grad_norm": 6.681088447570801, "learning_rate": 2.7238858154052754e-06, "loss": 0.3361, "step": 14099 }, { "epoch": 0.6539888682745826, "grad_norm": 7.872330665588379, "learning_rate": 2.7232305731150956e-06, "loss": 0.3134, "step": 14100 }, { "epoch": 0.6540352504638219, "grad_norm": 11.408066749572754, "learning_rate": 2.7225753801499228e-06, "loss": 0.4228, "step": 14101 }, { "epoch": 0.6540816326530612, "grad_norm": 7.737499237060547, "learning_rate": 2.721920236523953e-06, "loss": 0.3163, "step": 14102 }, { "epoch": 0.6541280148423005, "grad_norm": 7.079484462738037, "learning_rate": 2.7212651422513804e-06, "loss": 0.3419, "step": 14103 }, { "epoch": 0.6541743970315399, "grad_norm": 4.509731292724609, "learning_rate": 2.7206100973463958e-06, "loss": 0.2827, "step": 14104 }, { "epoch": 0.6542207792207793, "grad_norm": 5.387872219085693, "learning_rate": 2.7199551018231905e-06, "loss": 0.2866, "step": 14105 }, { "epoch": 0.6542671614100185, "grad_norm": 6.6375603675842285, "learning_rate": 2.7193001556959585e-06, "loss": 0.3761, "step": 14106 }, { "epoch": 0.6543135435992579, "grad_norm": 11.644797325134277, "learning_rate": 2.7186452589788835e-06, "loss": 0.3171, "step": 14107 }, { "epoch": 0.6543599257884972, "grad_norm": 6.787998676300049, "learning_rate": 2.7179904116861557e-06, "loss": 0.308, "step": 14108 }, { "epoch": 0.6544063079777366, "grad_norm": 4.135776042938232, "learning_rate": 2.717335613831962e-06, "loss": 0.3132, "step": 14109 }, { "epoch": 0.6544526901669759, "grad_norm": 5.999507427215576, "learning_rate": 2.7166808654304877e-06, "loss": 0.2872, "step": 14110 }, { "epoch": 0.6544990723562152, "grad_norm": 5.564859867095947, "learning_rate": 2.716026166495921e-06, "loss": 0.3022, "step": 14111 }, { "epoch": 0.6545454545454545, "grad_norm": 7.370481491088867, "learning_rate": 2.71537151704244e-06, "loss": 0.2698, "step": 14112 }, { "epoch": 0.6545918367346939, "grad_norm": 5.042761325836182, "learning_rate": 2.71471691708423e-06, "loss": 0.3383, "step": 14113 }, { "epoch": 0.6546382189239333, "grad_norm": 9.324593544006348, "learning_rate": 2.714062366635473e-06, "loss": 0.4431, "step": 14114 }, { "epoch": 0.6546846011131725, "grad_norm": 5.029290199279785, "learning_rate": 2.713407865710349e-06, "loss": 0.3253, "step": 14115 }, { "epoch": 0.6547309833024119, "grad_norm": 4.756640911102295, "learning_rate": 2.712753414323038e-06, "loss": 0.3855, "step": 14116 }, { "epoch": 0.6547773654916512, "grad_norm": 6.6450395584106445, "learning_rate": 2.712099012487719e-06, "loss": 0.2756, "step": 14117 }, { "epoch": 0.6548237476808906, "grad_norm": 5.953205108642578, "learning_rate": 2.7114446602185673e-06, "loss": 0.3728, "step": 14118 }, { "epoch": 0.6548701298701298, "grad_norm": 12.764699935913086, "learning_rate": 2.710790357529759e-06, "loss": 0.3589, "step": 14119 }, { "epoch": 0.6549165120593692, "grad_norm": 7.416140079498291, "learning_rate": 2.71013610443547e-06, "loss": 0.3943, "step": 14120 }, { "epoch": 0.6549628942486085, "grad_norm": 4.8269219398498535, "learning_rate": 2.709481900949875e-06, "loss": 0.3685, "step": 14121 }, { "epoch": 0.6550092764378479, "grad_norm": 8.912930488586426, "learning_rate": 2.708827747087149e-06, "loss": 0.3309, "step": 14122 }, { "epoch": 0.6550556586270871, "grad_norm": 8.041023254394531, "learning_rate": 2.7081736428614584e-06, "loss": 0.3512, "step": 14123 }, { "epoch": 0.6551020408163265, "grad_norm": 6.097361087799072, "learning_rate": 2.7075195882869775e-06, "loss": 0.3162, "step": 14124 }, { "epoch": 0.6551484230055659, "grad_norm": 5.834473609924316, "learning_rate": 2.7068655833778754e-06, "loss": 0.3079, "step": 14125 }, { "epoch": 0.6551948051948052, "grad_norm": 11.290973663330078, "learning_rate": 2.7062116281483213e-06, "loss": 0.4778, "step": 14126 }, { "epoch": 0.6552411873840446, "grad_norm": 4.641373157501221, "learning_rate": 2.7055577226124825e-06, "loss": 0.2819, "step": 14127 }, { "epoch": 0.6552875695732838, "grad_norm": 5.573410511016846, "learning_rate": 2.7049038667845274e-06, "loss": 0.2783, "step": 14128 }, { "epoch": 0.6553339517625232, "grad_norm": 3.9612271785736084, "learning_rate": 2.704250060678617e-06, "loss": 0.2977, "step": 14129 }, { "epoch": 0.6553803339517625, "grad_norm": 7.699024200439453, "learning_rate": 2.703596304308919e-06, "loss": 0.3701, "step": 14130 }, { "epoch": 0.6554267161410019, "grad_norm": 5.921867370605469, "learning_rate": 2.7029425976895948e-06, "loss": 0.2383, "step": 14131 }, { "epoch": 0.6554730983302411, "grad_norm": 6.0618815422058105, "learning_rate": 2.7022889408348074e-06, "loss": 0.3429, "step": 14132 }, { "epoch": 0.6555194805194805, "grad_norm": 4.683163166046143, "learning_rate": 2.7016353337587207e-06, "loss": 0.3056, "step": 14133 }, { "epoch": 0.6555658627087199, "grad_norm": 8.93028450012207, "learning_rate": 2.700981776475491e-06, "loss": 0.2463, "step": 14134 }, { "epoch": 0.6556122448979592, "grad_norm": 13.53999137878418, "learning_rate": 2.700328268999278e-06, "loss": 0.4861, "step": 14135 }, { "epoch": 0.6556586270871985, "grad_norm": 8.559591293334961, "learning_rate": 2.6996748113442397e-06, "loss": 0.3502, "step": 14136 }, { "epoch": 0.6557050092764378, "grad_norm": 7.1058173179626465, "learning_rate": 2.6990214035245338e-06, "loss": 0.3437, "step": 14137 }, { "epoch": 0.6557513914656772, "grad_norm": 8.96120834350586, "learning_rate": 2.6983680455543156e-06, "loss": 0.3552, "step": 14138 }, { "epoch": 0.6557977736549165, "grad_norm": 6.530324935913086, "learning_rate": 2.6977147374477408e-06, "loss": 0.3005, "step": 14139 }, { "epoch": 0.6558441558441559, "grad_norm": 9.008413314819336, "learning_rate": 2.697061479218961e-06, "loss": 0.5288, "step": 14140 }, { "epoch": 0.6558905380333951, "grad_norm": 4.918094158172607, "learning_rate": 2.696408270882129e-06, "loss": 0.3923, "step": 14141 }, { "epoch": 0.6559369202226345, "grad_norm": 8.042171478271484, "learning_rate": 2.695755112451397e-06, "loss": 0.3267, "step": 14142 }, { "epoch": 0.6559833024118739, "grad_norm": 4.424245357513428, "learning_rate": 2.6951020039409147e-06, "loss": 0.2098, "step": 14143 }, { "epoch": 0.6560296846011132, "grad_norm": 11.028064727783203, "learning_rate": 2.694448945364835e-06, "loss": 0.3221, "step": 14144 }, { "epoch": 0.6560760667903525, "grad_norm": 8.507535934448242, "learning_rate": 2.6937959367372994e-06, "loss": 0.3214, "step": 14145 }, { "epoch": 0.6561224489795918, "grad_norm": 7.883124351501465, "learning_rate": 2.6931429780724594e-06, "loss": 0.397, "step": 14146 }, { "epoch": 0.6561688311688312, "grad_norm": 10.578227996826172, "learning_rate": 2.6924900693844593e-06, "loss": 0.4601, "step": 14147 }, { "epoch": 0.6562152133580705, "grad_norm": 6.460509300231934, "learning_rate": 2.691837210687446e-06, "loss": 0.3976, "step": 14148 }, { "epoch": 0.6562615955473098, "grad_norm": 8.717939376831055, "learning_rate": 2.69118440199556e-06, "loss": 0.3967, "step": 14149 }, { "epoch": 0.6563079777365491, "grad_norm": 5.161614894866943, "learning_rate": 2.69053164332295e-06, "loss": 0.2661, "step": 14150 }, { "epoch": 0.6563543599257885, "grad_norm": 5.345635414123535, "learning_rate": 2.689878934683751e-06, "loss": 0.2761, "step": 14151 }, { "epoch": 0.6564007421150279, "grad_norm": 6.481848239898682, "learning_rate": 2.689226276092107e-06, "loss": 0.3449, "step": 14152 }, { "epoch": 0.6564471243042672, "grad_norm": 6.694865703582764, "learning_rate": 2.6885736675621566e-06, "loss": 0.2961, "step": 14153 }, { "epoch": 0.6564935064935065, "grad_norm": 8.510649681091309, "learning_rate": 2.687921109108038e-06, "loss": 0.3993, "step": 14154 }, { "epoch": 0.6565398886827458, "grad_norm": 7.257002353668213, "learning_rate": 2.687268600743892e-06, "loss": 0.3351, "step": 14155 }, { "epoch": 0.6565862708719852, "grad_norm": 10.68419075012207, "learning_rate": 2.6866161424838495e-06, "loss": 0.4133, "step": 14156 }, { "epoch": 0.6566326530612245, "grad_norm": 5.377121925354004, "learning_rate": 2.685963734342048e-06, "loss": 0.2781, "step": 14157 }, { "epoch": 0.6566790352504638, "grad_norm": 6.079565048217773, "learning_rate": 2.6853113763326215e-06, "loss": 0.2843, "step": 14158 }, { "epoch": 0.6567254174397031, "grad_norm": 8.899977684020996, "learning_rate": 2.684659068469704e-06, "loss": 0.4286, "step": 14159 }, { "epoch": 0.6567717996289425, "grad_norm": 4.676049709320068, "learning_rate": 2.6840068107674254e-06, "loss": 0.3179, "step": 14160 }, { "epoch": 0.6568181818181819, "grad_norm": 5.723434925079346, "learning_rate": 2.6833546032399206e-06, "loss": 0.3178, "step": 14161 }, { "epoch": 0.6568645640074211, "grad_norm": 6.521945953369141, "learning_rate": 2.682702445901314e-06, "loss": 0.333, "step": 14162 }, { "epoch": 0.6569109461966605, "grad_norm": 4.156228065490723, "learning_rate": 2.682050338765737e-06, "loss": 0.229, "step": 14163 }, { "epoch": 0.6569573283858998, "grad_norm": 8.218707084655762, "learning_rate": 2.681398281847316e-06, "loss": 0.3108, "step": 14164 }, { "epoch": 0.6570037105751392, "grad_norm": 6.943292140960693, "learning_rate": 2.6807462751601793e-06, "loss": 0.3193, "step": 14165 }, { "epoch": 0.6570500927643784, "grad_norm": 4.749430179595947, "learning_rate": 2.6800943187184524e-06, "loss": 0.2948, "step": 14166 }, { "epoch": 0.6570964749536178, "grad_norm": 6.010622978210449, "learning_rate": 2.6794424125362572e-06, "loss": 0.4284, "step": 14167 }, { "epoch": 0.6571428571428571, "grad_norm": 5.58195686340332, "learning_rate": 2.6787905566277185e-06, "loss": 0.4032, "step": 14168 }, { "epoch": 0.6571892393320965, "grad_norm": 10.241503715515137, "learning_rate": 2.6781387510069566e-06, "loss": 0.4136, "step": 14169 }, { "epoch": 0.6572356215213359, "grad_norm": 8.23129653930664, "learning_rate": 2.6774869956880954e-06, "loss": 0.3104, "step": 14170 }, { "epoch": 0.6572820037105751, "grad_norm": 5.836864948272705, "learning_rate": 2.6768352906852525e-06, "loss": 0.2535, "step": 14171 }, { "epoch": 0.6573283858998145, "grad_norm": 7.9654645919799805, "learning_rate": 2.6761836360125503e-06, "loss": 0.3505, "step": 14172 }, { "epoch": 0.6573747680890538, "grad_norm": 10.275289535522461, "learning_rate": 2.6755320316841014e-06, "loss": 0.3615, "step": 14173 }, { "epoch": 0.6574211502782932, "grad_norm": 7.708193302154541, "learning_rate": 2.6748804777140257e-06, "loss": 0.2614, "step": 14174 }, { "epoch": 0.6574675324675324, "grad_norm": 7.481733798980713, "learning_rate": 2.6742289741164374e-06, "loss": 0.2422, "step": 14175 }, { "epoch": 0.6575139146567718, "grad_norm": 6.286895275115967, "learning_rate": 2.6735775209054523e-06, "loss": 0.306, "step": 14176 }, { "epoch": 0.6575602968460111, "grad_norm": 7.493982791900635, "learning_rate": 2.672926118095185e-06, "loss": 0.2849, "step": 14177 }, { "epoch": 0.6576066790352505, "grad_norm": 10.300548553466797, "learning_rate": 2.6722747656997433e-06, "loss": 0.3819, "step": 14178 }, { "epoch": 0.6576530612244897, "grad_norm": 13.054642677307129, "learning_rate": 2.6716234637332418e-06, "loss": 0.3236, "step": 14179 }, { "epoch": 0.6576994434137291, "grad_norm": 6.6982011795043945, "learning_rate": 2.6709722122097892e-06, "loss": 0.2475, "step": 14180 }, { "epoch": 0.6577458256029685, "grad_norm": 8.660898208618164, "learning_rate": 2.6703210111434956e-06, "loss": 0.4556, "step": 14181 }, { "epoch": 0.6577922077922078, "grad_norm": 7.543100833892822, "learning_rate": 2.6696698605484707e-06, "loss": 0.3715, "step": 14182 }, { "epoch": 0.6578385899814472, "grad_norm": 8.402637481689453, "learning_rate": 2.669018760438817e-06, "loss": 0.3471, "step": 14183 }, { "epoch": 0.6578849721706864, "grad_norm": 9.20900821685791, "learning_rate": 2.6683677108286423e-06, "loss": 0.3559, "step": 14184 }, { "epoch": 0.6579313543599258, "grad_norm": 6.452553749084473, "learning_rate": 2.6677167117320512e-06, "loss": 0.2864, "step": 14185 }, { "epoch": 0.6579777365491651, "grad_norm": 4.0671610832214355, "learning_rate": 2.667065763163148e-06, "loss": 0.2402, "step": 14186 }, { "epoch": 0.6580241187384045, "grad_norm": 5.261993408203125, "learning_rate": 2.666414865136034e-06, "loss": 0.3136, "step": 14187 }, { "epoch": 0.6580705009276437, "grad_norm": 4.528625965118408, "learning_rate": 2.665764017664813e-06, "loss": 0.3371, "step": 14188 }, { "epoch": 0.6581168831168831, "grad_norm": 6.28399133682251, "learning_rate": 2.6651132207635818e-06, "loss": 0.3629, "step": 14189 }, { "epoch": 0.6581632653061225, "grad_norm": 9.508833885192871, "learning_rate": 2.664462474446441e-06, "loss": 0.365, "step": 14190 }, { "epoch": 0.6582096474953618, "grad_norm": 7.389835834503174, "learning_rate": 2.6638117787274886e-06, "loss": 0.3188, "step": 14191 }, { "epoch": 0.6582560296846011, "grad_norm": 4.297121524810791, "learning_rate": 2.663161133620822e-06, "loss": 0.1888, "step": 14192 }, { "epoch": 0.6583024118738404, "grad_norm": 11.220560073852539, "learning_rate": 2.6625105391405386e-06, "loss": 0.4064, "step": 14193 }, { "epoch": 0.6583487940630798, "grad_norm": 7.467321872711182, "learning_rate": 2.6618599953007294e-06, "loss": 0.36, "step": 14194 }, { "epoch": 0.6583951762523191, "grad_norm": 11.099662780761719, "learning_rate": 2.6612095021154904e-06, "loss": 0.4526, "step": 14195 }, { "epoch": 0.6584415584415585, "grad_norm": 6.626489162445068, "learning_rate": 2.6605590595989144e-06, "loss": 0.3438, "step": 14196 }, { "epoch": 0.6584879406307977, "grad_norm": 6.917331218719482, "learning_rate": 2.659908667765092e-06, "loss": 0.3586, "step": 14197 }, { "epoch": 0.6585343228200371, "grad_norm": 12.269163131713867, "learning_rate": 2.6592583266281147e-06, "loss": 0.4723, "step": 14198 }, { "epoch": 0.6585807050092765, "grad_norm": 8.230278015136719, "learning_rate": 2.658608036202072e-06, "loss": 0.3181, "step": 14199 }, { "epoch": 0.6586270871985158, "grad_norm": 6.272952556610107, "learning_rate": 2.65795779650105e-06, "loss": 0.2882, "step": 14200 }, { "epoch": 0.6586734693877551, "grad_norm": 14.801813125610352, "learning_rate": 2.657307607539137e-06, "loss": 0.4076, "step": 14201 }, { "epoch": 0.6587198515769944, "grad_norm": 6.536038398742676, "learning_rate": 2.6566574693304188e-06, "loss": 0.3058, "step": 14202 }, { "epoch": 0.6587662337662338, "grad_norm": 5.771846294403076, "learning_rate": 2.656007381888981e-06, "loss": 0.3627, "step": 14203 }, { "epoch": 0.6588126159554731, "grad_norm": 4.963444232940674, "learning_rate": 2.6553573452289083e-06, "loss": 0.3545, "step": 14204 }, { "epoch": 0.6588589981447124, "grad_norm": 10.108478546142578, "learning_rate": 2.6547073593642804e-06, "loss": 0.3352, "step": 14205 }, { "epoch": 0.6589053803339517, "grad_norm": 5.33295202255249, "learning_rate": 2.65405742430918e-06, "loss": 0.3373, "step": 14206 }, { "epoch": 0.6589517625231911, "grad_norm": 10.72826099395752, "learning_rate": 2.6534075400776887e-06, "loss": 0.3854, "step": 14207 }, { "epoch": 0.6589981447124305, "grad_norm": 6.038944244384766, "learning_rate": 2.652757706683885e-06, "loss": 0.388, "step": 14208 }, { "epoch": 0.6590445269016698, "grad_norm": 8.75693416595459, "learning_rate": 2.652107924141848e-06, "loss": 0.3688, "step": 14209 }, { "epoch": 0.6590909090909091, "grad_norm": 10.941427230834961, "learning_rate": 2.6514581924656563e-06, "loss": 0.3576, "step": 14210 }, { "epoch": 0.6591372912801484, "grad_norm": 9.713278770446777, "learning_rate": 2.650808511669382e-06, "loss": 0.3204, "step": 14211 }, { "epoch": 0.6591836734693878, "grad_norm": 10.073545455932617, "learning_rate": 2.6501588817671026e-06, "loss": 0.4443, "step": 14212 }, { "epoch": 0.6592300556586271, "grad_norm": 6.015741348266602, "learning_rate": 2.649509302772891e-06, "loss": 0.2784, "step": 14213 }, { "epoch": 0.6592764378478664, "grad_norm": 7.400091171264648, "learning_rate": 2.648859774700821e-06, "loss": 0.3848, "step": 14214 }, { "epoch": 0.6593228200371057, "grad_norm": 5.020979404449463, "learning_rate": 2.6482102975649658e-06, "loss": 0.3608, "step": 14215 }, { "epoch": 0.6593692022263451, "grad_norm": 6.964836597442627, "learning_rate": 2.6475608713793923e-06, "loss": 0.3077, "step": 14216 }, { "epoch": 0.6594155844155845, "grad_norm": 9.634353637695312, "learning_rate": 2.646911496158171e-06, "loss": 0.3855, "step": 14217 }, { "epoch": 0.6594619666048237, "grad_norm": 5.176292896270752, "learning_rate": 2.646262171915372e-06, "loss": 0.2808, "step": 14218 }, { "epoch": 0.6595083487940631, "grad_norm": 8.136734962463379, "learning_rate": 2.6456128986650607e-06, "loss": 0.4748, "step": 14219 }, { "epoch": 0.6595547309833024, "grad_norm": 4.008033752441406, "learning_rate": 2.644963676421305e-06, "loss": 0.3232, "step": 14220 }, { "epoch": 0.6596011131725418, "grad_norm": 8.53663444519043, "learning_rate": 2.6443145051981704e-06, "loss": 0.279, "step": 14221 }, { "epoch": 0.659647495361781, "grad_norm": 5.747851848602295, "learning_rate": 2.6436653850097183e-06, "loss": 0.3218, "step": 14222 }, { "epoch": 0.6596938775510204, "grad_norm": 8.891827583312988, "learning_rate": 2.6430163158700116e-06, "loss": 0.357, "step": 14223 }, { "epoch": 0.6597402597402597, "grad_norm": 9.270768165588379, "learning_rate": 2.6423672977931144e-06, "loss": 0.3532, "step": 14224 }, { "epoch": 0.6597866419294991, "grad_norm": 7.5870466232299805, "learning_rate": 2.6417183307930857e-06, "loss": 0.3006, "step": 14225 }, { "epoch": 0.6598330241187385, "grad_norm": 4.268792152404785, "learning_rate": 2.641069414883987e-06, "loss": 0.3103, "step": 14226 }, { "epoch": 0.6598794063079777, "grad_norm": 6.4292449951171875, "learning_rate": 2.640420550079874e-06, "loss": 0.3541, "step": 14227 }, { "epoch": 0.6599257884972171, "grad_norm": 9.859138488769531, "learning_rate": 2.639771736394805e-06, "loss": 0.4625, "step": 14228 }, { "epoch": 0.6599721706864564, "grad_norm": 6.531306266784668, "learning_rate": 2.639122973842836e-06, "loss": 0.3779, "step": 14229 }, { "epoch": 0.6600185528756958, "grad_norm": 8.763772964477539, "learning_rate": 2.6384742624380234e-06, "loss": 0.334, "step": 14230 }, { "epoch": 0.660064935064935, "grad_norm": 5.273422718048096, "learning_rate": 2.6378256021944194e-06, "loss": 0.2602, "step": 14231 }, { "epoch": 0.6601113172541744, "grad_norm": 3.875659704208374, "learning_rate": 2.6371769931260806e-06, "loss": 0.3087, "step": 14232 }, { "epoch": 0.6601576994434137, "grad_norm": 21.15012550354004, "learning_rate": 2.6365284352470533e-06, "loss": 0.5009, "step": 14233 }, { "epoch": 0.6602040816326531, "grad_norm": 6.857182025909424, "learning_rate": 2.6358799285713916e-06, "loss": 0.2967, "step": 14234 }, { "epoch": 0.6602504638218923, "grad_norm": 6.4685139656066895, "learning_rate": 2.6352314731131436e-06, "loss": 0.2427, "step": 14235 }, { "epoch": 0.6602968460111317, "grad_norm": 4.447453498840332, "learning_rate": 2.6345830688863587e-06, "loss": 0.2375, "step": 14236 }, { "epoch": 0.6603432282003711, "grad_norm": 11.59376335144043, "learning_rate": 2.6339347159050866e-06, "loss": 0.4, "step": 14237 }, { "epoch": 0.6603896103896104, "grad_norm": 5.635776996612549, "learning_rate": 2.633286414183368e-06, "loss": 0.2623, "step": 14238 }, { "epoch": 0.6604359925788498, "grad_norm": 8.021828651428223, "learning_rate": 2.6326381637352518e-06, "loss": 0.3556, "step": 14239 }, { "epoch": 0.660482374768089, "grad_norm": 4.586589336395264, "learning_rate": 2.6319899645747804e-06, "loss": 0.1812, "step": 14240 }, { "epoch": 0.6605287569573284, "grad_norm": 5.547011852264404, "learning_rate": 2.6313418167159976e-06, "loss": 0.2658, "step": 14241 }, { "epoch": 0.6605751391465677, "grad_norm": 5.812854290008545, "learning_rate": 2.630693720172945e-06, "loss": 0.3412, "step": 14242 }, { "epoch": 0.6606215213358071, "grad_norm": 7.363091945648193, "learning_rate": 2.6300456749596647e-06, "loss": 0.3072, "step": 14243 }, { "epoch": 0.6606679035250463, "grad_norm": 7.496375560760498, "learning_rate": 2.629397681090194e-06, "loss": 0.3005, "step": 14244 }, { "epoch": 0.6607142857142857, "grad_norm": 4.3552680015563965, "learning_rate": 2.6287497385785715e-06, "loss": 0.263, "step": 14245 }, { "epoch": 0.6607606679035251, "grad_norm": 10.137657165527344, "learning_rate": 2.6281018474388354e-06, "loss": 0.4359, "step": 14246 }, { "epoch": 0.6608070500927644, "grad_norm": 5.198362827301025, "learning_rate": 2.6274540076850218e-06, "loss": 0.2812, "step": 14247 }, { "epoch": 0.6608534322820037, "grad_norm": 4.064183712005615, "learning_rate": 2.6268062193311672e-06, "loss": 0.3413, "step": 14248 }, { "epoch": 0.660899814471243, "grad_norm": 4.6407694816589355, "learning_rate": 2.626158482391302e-06, "loss": 0.3141, "step": 14249 }, { "epoch": 0.6609461966604824, "grad_norm": 4.579573631286621, "learning_rate": 2.6255107968794623e-06, "loss": 0.3558, "step": 14250 }, { "epoch": 0.6609925788497217, "grad_norm": 9.547316551208496, "learning_rate": 2.6248631628096784e-06, "loss": 0.4554, "step": 14251 }, { "epoch": 0.6610389610389611, "grad_norm": 6.752137660980225, "learning_rate": 2.6242155801959814e-06, "loss": 0.3949, "step": 14252 }, { "epoch": 0.6610853432282003, "grad_norm": 7.134527683258057, "learning_rate": 2.6235680490524e-06, "loss": 0.4348, "step": 14253 }, { "epoch": 0.6611317254174397, "grad_norm": 6.751020908355713, "learning_rate": 2.622920569392966e-06, "loss": 0.3346, "step": 14254 }, { "epoch": 0.661178107606679, "grad_norm": 6.262439250946045, "learning_rate": 2.6222731412317014e-06, "loss": 0.3084, "step": 14255 }, { "epoch": 0.6612244897959184, "grad_norm": 6.616937160491943, "learning_rate": 2.621625764582636e-06, "loss": 0.3823, "step": 14256 }, { "epoch": 0.6612708719851577, "grad_norm": 8.02452564239502, "learning_rate": 2.620978439459794e-06, "loss": 0.1991, "step": 14257 }, { "epoch": 0.661317254174397, "grad_norm": 5.609311103820801, "learning_rate": 2.6203311658771983e-06, "loss": 0.2899, "step": 14258 }, { "epoch": 0.6613636363636364, "grad_norm": 12.714405059814453, "learning_rate": 2.6196839438488752e-06, "loss": 0.5353, "step": 14259 }, { "epoch": 0.6614100185528757, "grad_norm": 7.65125036239624, "learning_rate": 2.619036773388842e-06, "loss": 0.3978, "step": 14260 }, { "epoch": 0.661456400742115, "grad_norm": 5.991678714752197, "learning_rate": 2.618389654511122e-06, "loss": 0.3228, "step": 14261 }, { "epoch": 0.6615027829313543, "grad_norm": 5.679821014404297, "learning_rate": 2.6177425872297335e-06, "loss": 0.3722, "step": 14262 }, { "epoch": 0.6615491651205937, "grad_norm": 5.131181240081787, "learning_rate": 2.617095571558695e-06, "loss": 0.2684, "step": 14263 }, { "epoch": 0.661595547309833, "grad_norm": 3.708937168121338, "learning_rate": 2.6164486075120245e-06, "loss": 0.2735, "step": 14264 }, { "epoch": 0.6616419294990723, "grad_norm": 8.581748008728027, "learning_rate": 2.615801695103739e-06, "loss": 0.348, "step": 14265 }, { "epoch": 0.6616883116883117, "grad_norm": 4.8890252113342285, "learning_rate": 2.6151548343478504e-06, "loss": 0.1821, "step": 14266 }, { "epoch": 0.661734693877551, "grad_norm": 4.711525917053223, "learning_rate": 2.6145080252583754e-06, "loss": 0.3519, "step": 14267 }, { "epoch": 0.6617810760667904, "grad_norm": 7.152507781982422, "learning_rate": 2.613861267849325e-06, "loss": 0.4106, "step": 14268 }, { "epoch": 0.6618274582560297, "grad_norm": 9.919306755065918, "learning_rate": 2.6132145621347116e-06, "loss": 0.3927, "step": 14269 }, { "epoch": 0.661873840445269, "grad_norm": 5.09959077835083, "learning_rate": 2.612567908128548e-06, "loss": 0.338, "step": 14270 }, { "epoch": 0.6619202226345083, "grad_norm": 4.465810775756836, "learning_rate": 2.61192130584484e-06, "loss": 0.2885, "step": 14271 }, { "epoch": 0.6619666048237477, "grad_norm": 12.271475791931152, "learning_rate": 2.611274755297597e-06, "loss": 0.4354, "step": 14272 }, { "epoch": 0.662012987012987, "grad_norm": 5.214939594268799, "learning_rate": 2.610628256500826e-06, "loss": 0.3068, "step": 14273 }, { "epoch": 0.6620593692022263, "grad_norm": 5.180337905883789, "learning_rate": 2.6099818094685337e-06, "loss": 0.3244, "step": 14274 }, { "epoch": 0.6621057513914657, "grad_norm": 9.375441551208496, "learning_rate": 2.6093354142147276e-06, "loss": 0.2605, "step": 14275 }, { "epoch": 0.662152133580705, "grad_norm": 10.177779197692871, "learning_rate": 2.6086890707534062e-06, "loss": 0.3722, "step": 14276 }, { "epoch": 0.6621985157699444, "grad_norm": 3.7841131687164307, "learning_rate": 2.6080427790985753e-06, "loss": 0.2739, "step": 14277 }, { "epoch": 0.6622448979591836, "grad_norm": 5.110115051269531, "learning_rate": 2.6073965392642354e-06, "loss": 0.4001, "step": 14278 }, { "epoch": 0.662291280148423, "grad_norm": 10.763102531433105, "learning_rate": 2.6067503512643875e-06, "loss": 0.3193, "step": 14279 }, { "epoch": 0.6623376623376623, "grad_norm": 6.255650043487549, "learning_rate": 2.606104215113033e-06, "loss": 0.315, "step": 14280 }, { "epoch": 0.6623840445269017, "grad_norm": 20.867773056030273, "learning_rate": 2.6054581308241663e-06, "loss": 0.38, "step": 14281 }, { "epoch": 0.662430426716141, "grad_norm": 5.356381893157959, "learning_rate": 2.604812098411786e-06, "loss": 0.2284, "step": 14282 }, { "epoch": 0.6624768089053803, "grad_norm": 6.417219638824463, "learning_rate": 2.604166117889888e-06, "loss": 0.2954, "step": 14283 }, { "epoch": 0.6625231910946197, "grad_norm": 12.416361808776855, "learning_rate": 2.603520189272468e-06, "loss": 0.4104, "step": 14284 }, { "epoch": 0.662569573283859, "grad_norm": 7.225841999053955, "learning_rate": 2.6028743125735206e-06, "loss": 0.3547, "step": 14285 }, { "epoch": 0.6626159554730984, "grad_norm": 7.631541728973389, "learning_rate": 2.6022284878070347e-06, "loss": 0.3194, "step": 14286 }, { "epoch": 0.6626623376623376, "grad_norm": 6.0068254470825195, "learning_rate": 2.6015827149870043e-06, "loss": 0.3624, "step": 14287 }, { "epoch": 0.662708719851577, "grad_norm": 4.579699993133545, "learning_rate": 2.6009369941274197e-06, "loss": 0.2734, "step": 14288 }, { "epoch": 0.6627551020408163, "grad_norm": 11.846930503845215, "learning_rate": 2.600291325242269e-06, "loss": 0.4203, "step": 14289 }, { "epoch": 0.6628014842300557, "grad_norm": 5.965967655181885, "learning_rate": 2.599645708345544e-06, "loss": 0.283, "step": 14290 }, { "epoch": 0.6628478664192949, "grad_norm": 5.897785186767578, "learning_rate": 2.5990001434512257e-06, "loss": 0.3275, "step": 14291 }, { "epoch": 0.6628942486085343, "grad_norm": 6.39042329788208, "learning_rate": 2.598354630573303e-06, "loss": 0.3468, "step": 14292 }, { "epoch": 0.6629406307977737, "grad_norm": 4.267775058746338, "learning_rate": 2.597709169725761e-06, "loss": 0.245, "step": 14293 }, { "epoch": 0.662987012987013, "grad_norm": 6.572515964508057, "learning_rate": 2.5970637609225823e-06, "loss": 0.3086, "step": 14294 }, { "epoch": 0.6630333951762524, "grad_norm": 6.188106060028076, "learning_rate": 2.5964184041777525e-06, "loss": 0.2744, "step": 14295 }, { "epoch": 0.6630797773654916, "grad_norm": 12.203327178955078, "learning_rate": 2.5957730995052477e-06, "loss": 0.3189, "step": 14296 }, { "epoch": 0.663126159554731, "grad_norm": 7.470992565155029, "learning_rate": 2.5951278469190506e-06, "loss": 0.3105, "step": 14297 }, { "epoch": 0.6631725417439703, "grad_norm": 5.316638946533203, "learning_rate": 2.5944826464331403e-06, "loss": 0.3014, "step": 14298 }, { "epoch": 0.6632189239332097, "grad_norm": 11.270648002624512, "learning_rate": 2.5938374980614945e-06, "loss": 0.4212, "step": 14299 }, { "epoch": 0.6632653061224489, "grad_norm": 7.664426803588867, "learning_rate": 2.5931924018180922e-06, "loss": 0.4201, "step": 14300 }, { "epoch": 0.6633116883116883, "grad_norm": 13.696535110473633, "learning_rate": 2.5925473577169057e-06, "loss": 0.4508, "step": 14301 }, { "epoch": 0.6633580705009277, "grad_norm": 6.983067035675049, "learning_rate": 2.59190236577191e-06, "loss": 0.2991, "step": 14302 }, { "epoch": 0.663404452690167, "grad_norm": 5.629432201385498, "learning_rate": 2.5912574259970803e-06, "loss": 0.2763, "step": 14303 }, { "epoch": 0.6634508348794063, "grad_norm": 11.051965713500977, "learning_rate": 2.5906125384063875e-06, "loss": 0.4357, "step": 14304 }, { "epoch": 0.6634972170686456, "grad_norm": 10.015544891357422, "learning_rate": 2.589967703013805e-06, "loss": 0.3439, "step": 14305 }, { "epoch": 0.663543599257885, "grad_norm": 9.07097339630127, "learning_rate": 2.5893229198332993e-06, "loss": 0.4985, "step": 14306 }, { "epoch": 0.6635899814471243, "grad_norm": 4.6069416999816895, "learning_rate": 2.588678188878841e-06, "loss": 0.3021, "step": 14307 }, { "epoch": 0.6636363636363637, "grad_norm": 9.45888900756836, "learning_rate": 2.5880335101643983e-06, "loss": 0.3776, "step": 14308 }, { "epoch": 0.6636827458256029, "grad_norm": 5.5083112716674805, "learning_rate": 2.587388883703937e-06, "loss": 0.3099, "step": 14309 }, { "epoch": 0.6637291280148423, "grad_norm": 5.95810079574585, "learning_rate": 2.5867443095114248e-06, "loss": 0.3237, "step": 14310 }, { "epoch": 0.6637755102040817, "grad_norm": 5.874077320098877, "learning_rate": 2.5860997876008228e-06, "loss": 0.3057, "step": 14311 }, { "epoch": 0.663821892393321, "grad_norm": 4.974861145019531, "learning_rate": 2.585455317986095e-06, "loss": 0.2096, "step": 14312 }, { "epoch": 0.6638682745825603, "grad_norm": 5.24918270111084, "learning_rate": 2.584810900681205e-06, "loss": 0.3582, "step": 14313 }, { "epoch": 0.6639146567717996, "grad_norm": 6.5711283683776855, "learning_rate": 2.584166535700112e-06, "loss": 0.271, "step": 14314 }, { "epoch": 0.663961038961039, "grad_norm": 11.132063865661621, "learning_rate": 2.58352222305678e-06, "loss": 0.366, "step": 14315 }, { "epoch": 0.6640074211502783, "grad_norm": 4.597537517547607, "learning_rate": 2.582877962765162e-06, "loss": 0.2998, "step": 14316 }, { "epoch": 0.6640538033395176, "grad_norm": 16.0507869720459, "learning_rate": 2.5822337548392172e-06, "loss": 0.406, "step": 14317 }, { "epoch": 0.6641001855287569, "grad_norm": 8.674629211425781, "learning_rate": 2.581589599292904e-06, "loss": 0.2476, "step": 14318 }, { "epoch": 0.6641465677179963, "grad_norm": 4.686324596405029, "learning_rate": 2.580945496140176e-06, "loss": 0.3268, "step": 14319 }, { "epoch": 0.6641929499072357, "grad_norm": 4.636105060577393, "learning_rate": 2.5803014453949902e-06, "loss": 0.2155, "step": 14320 }, { "epoch": 0.6642393320964749, "grad_norm": 5.001861572265625, "learning_rate": 2.5796574470712956e-06, "loss": 0.2517, "step": 14321 }, { "epoch": 0.6642857142857143, "grad_norm": 8.497953414916992, "learning_rate": 2.5790135011830455e-06, "loss": 0.3987, "step": 14322 }, { "epoch": 0.6643320964749536, "grad_norm": 8.835537910461426, "learning_rate": 2.57836960774419e-06, "loss": 0.309, "step": 14323 }, { "epoch": 0.664378478664193, "grad_norm": 4.733601093292236, "learning_rate": 2.5777257667686807e-06, "loss": 0.2845, "step": 14324 }, { "epoch": 0.6644248608534323, "grad_norm": 7.491174697875977, "learning_rate": 2.577081978270467e-06, "loss": 0.4108, "step": 14325 }, { "epoch": 0.6644712430426716, "grad_norm": 11.018838882446289, "learning_rate": 2.576438242263492e-06, "loss": 0.444, "step": 14326 }, { "epoch": 0.6645176252319109, "grad_norm": 6.281508445739746, "learning_rate": 2.5757945587617045e-06, "loss": 0.3738, "step": 14327 }, { "epoch": 0.6645640074211503, "grad_norm": 3.7889163494110107, "learning_rate": 2.5751509277790487e-06, "loss": 0.231, "step": 14328 }, { "epoch": 0.6646103896103897, "grad_norm": 8.685281753540039, "learning_rate": 2.57450734932947e-06, "loss": 0.4868, "step": 14329 }, { "epoch": 0.6646567717996289, "grad_norm": 6.160512447357178, "learning_rate": 2.5738638234269116e-06, "loss": 0.3085, "step": 14330 }, { "epoch": 0.6647031539888683, "grad_norm": 9.224160194396973, "learning_rate": 2.5732203500853116e-06, "loss": 0.3294, "step": 14331 }, { "epoch": 0.6647495361781076, "grad_norm": 6.581737041473389, "learning_rate": 2.572576929318613e-06, "loss": 0.3191, "step": 14332 }, { "epoch": 0.664795918367347, "grad_norm": 4.8129730224609375, "learning_rate": 2.5719335611407553e-06, "loss": 0.3131, "step": 14333 }, { "epoch": 0.6648423005565862, "grad_norm": 4.8241095542907715, "learning_rate": 2.5712902455656765e-06, "loss": 0.336, "step": 14334 }, { "epoch": 0.6648886827458256, "grad_norm": 8.241530418395996, "learning_rate": 2.570646982607312e-06, "loss": 0.477, "step": 14335 }, { "epoch": 0.6649350649350649, "grad_norm": 12.156087875366211, "learning_rate": 2.5700037722796023e-06, "loss": 0.3858, "step": 14336 }, { "epoch": 0.6649814471243043, "grad_norm": 5.435599327087402, "learning_rate": 2.5693606145964776e-06, "loss": 0.3809, "step": 14337 }, { "epoch": 0.6650278293135437, "grad_norm": 7.717168807983398, "learning_rate": 2.5687175095718726e-06, "loss": 0.3823, "step": 14338 }, { "epoch": 0.6650742115027829, "grad_norm": 6.353362083435059, "learning_rate": 2.5680744572197204e-06, "loss": 0.4308, "step": 14339 }, { "epoch": 0.6651205936920223, "grad_norm": 7.474123954772949, "learning_rate": 2.5674314575539526e-06, "loss": 0.3234, "step": 14340 }, { "epoch": 0.6651669758812616, "grad_norm": 4.655210018157959, "learning_rate": 2.566788510588501e-06, "loss": 0.2831, "step": 14341 }, { "epoch": 0.665213358070501, "grad_norm": 7.481792449951172, "learning_rate": 2.5661456163372912e-06, "loss": 0.386, "step": 14342 }, { "epoch": 0.6652597402597402, "grad_norm": 12.53759765625, "learning_rate": 2.5655027748142523e-06, "loss": 0.3628, "step": 14343 }, { "epoch": 0.6653061224489796, "grad_norm": 6.83443546295166, "learning_rate": 2.5648599860333122e-06, "loss": 0.374, "step": 14344 }, { "epoch": 0.6653525046382189, "grad_norm": 5.512913703918457, "learning_rate": 2.5642172500083963e-06, "loss": 0.411, "step": 14345 }, { "epoch": 0.6653988868274583, "grad_norm": 9.67315673828125, "learning_rate": 2.5635745667534284e-06, "loss": 0.3888, "step": 14346 }, { "epoch": 0.6654452690166975, "grad_norm": 5.879298686981201, "learning_rate": 2.5629319362823346e-06, "loss": 0.2685, "step": 14347 }, { "epoch": 0.6654916512059369, "grad_norm": 10.740671157836914, "learning_rate": 2.562289358609034e-06, "loss": 0.3954, "step": 14348 }, { "epoch": 0.6655380333951763, "grad_norm": 6.677968502044678, "learning_rate": 2.5616468337474474e-06, "loss": 0.2148, "step": 14349 }, { "epoch": 0.6655844155844156, "grad_norm": 7.793241024017334, "learning_rate": 2.5610043617114964e-06, "loss": 0.3505, "step": 14350 }, { "epoch": 0.665630797773655, "grad_norm": 5.825631141662598, "learning_rate": 2.5603619425150993e-06, "loss": 0.303, "step": 14351 }, { "epoch": 0.6656771799628942, "grad_norm": 5.070807933807373, "learning_rate": 2.559719576172176e-06, "loss": 0.3361, "step": 14352 }, { "epoch": 0.6657235621521336, "grad_norm": 9.898782730102539, "learning_rate": 2.5590772626966394e-06, "loss": 0.6479, "step": 14353 }, { "epoch": 0.6657699443413729, "grad_norm": 5.580790042877197, "learning_rate": 2.5584350021024063e-06, "loss": 0.2569, "step": 14354 }, { "epoch": 0.6658163265306123, "grad_norm": 6.979902744293213, "learning_rate": 2.5577927944033908e-06, "loss": 0.2921, "step": 14355 }, { "epoch": 0.6658627087198515, "grad_norm": 4.7190165519714355, "learning_rate": 2.5571506396135064e-06, "loss": 0.3075, "step": 14356 }, { "epoch": 0.6659090909090909, "grad_norm": 7.767345905303955, "learning_rate": 2.556508537746667e-06, "loss": 0.3835, "step": 14357 }, { "epoch": 0.6659554730983303, "grad_norm": 8.015338897705078, "learning_rate": 2.5558664888167794e-06, "loss": 0.3057, "step": 14358 }, { "epoch": 0.6660018552875696, "grad_norm": 5.275300025939941, "learning_rate": 2.5552244928377546e-06, "loss": 0.3198, "step": 14359 }, { "epoch": 0.6660482374768089, "grad_norm": 4.803347110748291, "learning_rate": 2.554582549823502e-06, "loss": 0.322, "step": 14360 }, { "epoch": 0.6660946196660482, "grad_norm": 5.753846645355225, "learning_rate": 2.553940659787929e-06, "loss": 0.2495, "step": 14361 }, { "epoch": 0.6661410018552876, "grad_norm": 9.461882591247559, "learning_rate": 2.5532988227449413e-06, "loss": 0.3655, "step": 14362 }, { "epoch": 0.6661873840445269, "grad_norm": 5.187089443206787, "learning_rate": 2.552657038708446e-06, "loss": 0.285, "step": 14363 }, { "epoch": 0.6662337662337663, "grad_norm": 4.404447555541992, "learning_rate": 2.552015307692343e-06, "loss": 0.2733, "step": 14364 }, { "epoch": 0.6662801484230055, "grad_norm": 5.247655391693115, "learning_rate": 2.551373629710537e-06, "loss": 0.3063, "step": 14365 }, { "epoch": 0.6663265306122449, "grad_norm": 9.207850456237793, "learning_rate": 2.550732004776931e-06, "loss": 0.2502, "step": 14366 }, { "epoch": 0.6663729128014843, "grad_norm": 6.1891913414001465, "learning_rate": 2.550090432905423e-06, "loss": 0.31, "step": 14367 }, { "epoch": 0.6664192949907236, "grad_norm": 7.57297420501709, "learning_rate": 2.5494489141099155e-06, "loss": 0.3361, "step": 14368 }, { "epoch": 0.6664656771799629, "grad_norm": 7.284214496612549, "learning_rate": 2.5488074484043036e-06, "loss": 0.4136, "step": 14369 }, { "epoch": 0.6665120593692022, "grad_norm": 7.857572078704834, "learning_rate": 2.548166035802485e-06, "loss": 0.4009, "step": 14370 }, { "epoch": 0.6665584415584416, "grad_norm": 4.176025390625, "learning_rate": 2.547524676318356e-06, "loss": 0.3231, "step": 14371 }, { "epoch": 0.6666048237476809, "grad_norm": 4.347628116607666, "learning_rate": 2.5468833699658114e-06, "loss": 0.311, "step": 14372 }, { "epoch": 0.6666512059369202, "grad_norm": 7.319922924041748, "learning_rate": 2.5462421167587448e-06, "loss": 0.37, "step": 14373 }, { "epoch": 0.6666975881261595, "grad_norm": 16.160598754882812, "learning_rate": 2.545600916711051e-06, "loss": 0.4617, "step": 14374 }, { "epoch": 0.6667439703153989, "grad_norm": 9.763288497924805, "learning_rate": 2.5449597698366156e-06, "loss": 0.3193, "step": 14375 }, { "epoch": 0.6667903525046383, "grad_norm": 5.9741926193237305, "learning_rate": 2.5443186761493327e-06, "loss": 0.2972, "step": 14376 }, { "epoch": 0.6668367346938775, "grad_norm": 9.65493392944336, "learning_rate": 2.5436776356630898e-06, "loss": 0.2931, "step": 14377 }, { "epoch": 0.6668831168831169, "grad_norm": 8.462495803833008, "learning_rate": 2.543036648391776e-06, "loss": 0.3924, "step": 14378 }, { "epoch": 0.6669294990723562, "grad_norm": 3.844139337539673, "learning_rate": 2.5423957143492795e-06, "loss": 0.2733, "step": 14379 }, { "epoch": 0.6669758812615956, "grad_norm": 8.769512176513672, "learning_rate": 2.541754833549481e-06, "loss": 0.3172, "step": 14380 }, { "epoch": 0.6670222634508349, "grad_norm": 4.793242931365967, "learning_rate": 2.541114006006268e-06, "loss": 0.3102, "step": 14381 }, { "epoch": 0.6670686456400742, "grad_norm": 9.472418785095215, "learning_rate": 2.5404732317335225e-06, "loss": 0.3287, "step": 14382 }, { "epoch": 0.6671150278293135, "grad_norm": 6.6781697273254395, "learning_rate": 2.539832510745127e-06, "loss": 0.254, "step": 14383 }, { "epoch": 0.6671614100185529, "grad_norm": 6.0859150886535645, "learning_rate": 2.5391918430549635e-06, "loss": 0.3399, "step": 14384 }, { "epoch": 0.6672077922077922, "grad_norm": 5.724212646484375, "learning_rate": 2.538551228676912e-06, "loss": 0.256, "step": 14385 }, { "epoch": 0.6672541743970315, "grad_norm": 5.894890308380127, "learning_rate": 2.5379106676248483e-06, "loss": 0.2617, "step": 14386 }, { "epoch": 0.6673005565862709, "grad_norm": 5.531307697296143, "learning_rate": 2.5372701599126514e-06, "loss": 0.2843, "step": 14387 }, { "epoch": 0.6673469387755102, "grad_norm": 9.951153755187988, "learning_rate": 2.536629705554197e-06, "loss": 0.3359, "step": 14388 }, { "epoch": 0.6673933209647496, "grad_norm": 9.178340911865234, "learning_rate": 2.535989304563361e-06, "loss": 0.4717, "step": 14389 }, { "epoch": 0.6674397031539888, "grad_norm": 5.964295864105225, "learning_rate": 2.535348956954019e-06, "loss": 0.3203, "step": 14390 }, { "epoch": 0.6674860853432282, "grad_norm": 9.109430313110352, "learning_rate": 2.5347086627400403e-06, "loss": 0.3494, "step": 14391 }, { "epoch": 0.6675324675324675, "grad_norm": 13.2893648147583, "learning_rate": 2.5340684219352977e-06, "loss": 0.486, "step": 14392 }, { "epoch": 0.6675788497217069, "grad_norm": 8.907829284667969, "learning_rate": 2.5334282345536626e-06, "loss": 0.2752, "step": 14393 }, { "epoch": 0.6676252319109462, "grad_norm": 5.448023319244385, "learning_rate": 2.5327881006090033e-06, "loss": 0.2969, "step": 14394 }, { "epoch": 0.6676716141001855, "grad_norm": 10.926155090332031, "learning_rate": 2.532148020115189e-06, "loss": 0.407, "step": 14395 }, { "epoch": 0.6677179962894249, "grad_norm": 6.8450927734375, "learning_rate": 2.5315079930860877e-06, "loss": 0.3123, "step": 14396 }, { "epoch": 0.6677643784786642, "grad_norm": 7.855081558227539, "learning_rate": 2.530868019535562e-06, "loss": 0.3588, "step": 14397 }, { "epoch": 0.6678107606679036, "grad_norm": 6.089636325836182, "learning_rate": 2.5302280994774775e-06, "loss": 0.2613, "step": 14398 }, { "epoch": 0.6678571428571428, "grad_norm": 5.156866550445557, "learning_rate": 2.5295882329256994e-06, "loss": 0.3241, "step": 14399 }, { "epoch": 0.6679035250463822, "grad_norm": 8.230297088623047, "learning_rate": 2.5289484198940895e-06, "loss": 0.3601, "step": 14400 }, { "epoch": 0.6679499072356215, "grad_norm": 5.108260154724121, "learning_rate": 2.52830866039651e-06, "loss": 0.2786, "step": 14401 }, { "epoch": 0.6679962894248609, "grad_norm": 5.022837162017822, "learning_rate": 2.527668954446818e-06, "loss": 0.268, "step": 14402 }, { "epoch": 0.6680426716141001, "grad_norm": 8.716835021972656, "learning_rate": 2.527029302058874e-06, "loss": 0.35, "step": 14403 }, { "epoch": 0.6680890538033395, "grad_norm": 6.7025346755981445, "learning_rate": 2.5263897032465362e-06, "loss": 0.3243, "step": 14404 }, { "epoch": 0.6681354359925789, "grad_norm": 7.838736057281494, "learning_rate": 2.5257501580236604e-06, "loss": 0.3045, "step": 14405 }, { "epoch": 0.6681818181818182, "grad_norm": 7.359192848205566, "learning_rate": 2.5251106664041026e-06, "loss": 0.3868, "step": 14406 }, { "epoch": 0.6682282003710576, "grad_norm": 6.660599708557129, "learning_rate": 2.5244712284017185e-06, "loss": 0.3429, "step": 14407 }, { "epoch": 0.6682745825602968, "grad_norm": 6.918447017669678, "learning_rate": 2.523831844030358e-06, "loss": 0.2835, "step": 14408 }, { "epoch": 0.6683209647495362, "grad_norm": 4.298977375030518, "learning_rate": 2.5231925133038744e-06, "loss": 0.3137, "step": 14409 }, { "epoch": 0.6683673469387755, "grad_norm": 7.234298229217529, "learning_rate": 2.5225532362361185e-06, "loss": 0.2836, "step": 14410 }, { "epoch": 0.6684137291280149, "grad_norm": 8.223220825195312, "learning_rate": 2.52191401284094e-06, "loss": 0.3573, "step": 14411 }, { "epoch": 0.6684601113172541, "grad_norm": 3.844494342803955, "learning_rate": 2.5212748431321895e-06, "loss": 0.1997, "step": 14412 }, { "epoch": 0.6685064935064935, "grad_norm": 5.0214385986328125, "learning_rate": 2.52063572712371e-06, "loss": 0.2329, "step": 14413 }, { "epoch": 0.6685528756957329, "grad_norm": 5.88079833984375, "learning_rate": 2.5199966648293496e-06, "loss": 0.2883, "step": 14414 }, { "epoch": 0.6685992578849722, "grad_norm": 5.902207374572754, "learning_rate": 2.519357656262954e-06, "loss": 0.2691, "step": 14415 }, { "epoch": 0.6686456400742115, "grad_norm": 7.818711280822754, "learning_rate": 2.518718701438366e-06, "loss": 0.402, "step": 14416 }, { "epoch": 0.6686920222634508, "grad_norm": 6.28980016708374, "learning_rate": 2.5180798003694284e-06, "loss": 0.2918, "step": 14417 }, { "epoch": 0.6687384044526902, "grad_norm": 6.184958457946777, "learning_rate": 2.517440953069985e-06, "loss": 0.266, "step": 14418 }, { "epoch": 0.6687847866419295, "grad_norm": 7.0156683921813965, "learning_rate": 2.5168021595538715e-06, "loss": 0.3206, "step": 14419 }, { "epoch": 0.6688311688311688, "grad_norm": 8.880091667175293, "learning_rate": 2.5161634198349307e-06, "loss": 0.3874, "step": 14420 }, { "epoch": 0.6688775510204081, "grad_norm": 7.56922721862793, "learning_rate": 2.5155247339269985e-06, "loss": 0.3185, "step": 14421 }, { "epoch": 0.6689239332096475, "grad_norm": 7.315232276916504, "learning_rate": 2.5148861018439118e-06, "loss": 0.3584, "step": 14422 }, { "epoch": 0.6689703153988869, "grad_norm": 13.616538047790527, "learning_rate": 2.51424752359951e-06, "loss": 0.3799, "step": 14423 }, { "epoch": 0.6690166975881262, "grad_norm": 4.549986362457275, "learning_rate": 2.513608999207622e-06, "loss": 0.2622, "step": 14424 }, { "epoch": 0.6690630797773655, "grad_norm": 10.85916805267334, "learning_rate": 2.5129705286820836e-06, "loss": 0.4885, "step": 14425 }, { "epoch": 0.6691094619666048, "grad_norm": 10.694114685058594, "learning_rate": 2.512332112036727e-06, "loss": 0.3845, "step": 14426 }, { "epoch": 0.6691558441558442, "grad_norm": 6.312005043029785, "learning_rate": 2.5116937492853833e-06, "loss": 0.3821, "step": 14427 }, { "epoch": 0.6692022263450835, "grad_norm": 8.279834747314453, "learning_rate": 2.5110554404418824e-06, "loss": 0.3609, "step": 14428 }, { "epoch": 0.6692486085343228, "grad_norm": 6.300690650939941, "learning_rate": 2.510417185520054e-06, "loss": 0.2815, "step": 14429 }, { "epoch": 0.6692949907235621, "grad_norm": 4.699378967285156, "learning_rate": 2.5097789845337223e-06, "loss": 0.2687, "step": 14430 }, { "epoch": 0.6693413729128015, "grad_norm": 8.060648918151855, "learning_rate": 2.5091408374967154e-06, "loss": 0.301, "step": 14431 }, { "epoch": 0.6693877551020408, "grad_norm": 16.550554275512695, "learning_rate": 2.5085027444228587e-06, "loss": 0.4942, "step": 14432 }, { "epoch": 0.6694341372912801, "grad_norm": 9.188380241394043, "learning_rate": 2.507864705325976e-06, "loss": 0.3125, "step": 14433 }, { "epoch": 0.6694805194805195, "grad_norm": 8.467216491699219, "learning_rate": 2.5072267202198917e-06, "loss": 0.1996, "step": 14434 }, { "epoch": 0.6695269016697588, "grad_norm": 6.897353172302246, "learning_rate": 2.506588789118424e-06, "loss": 0.3381, "step": 14435 }, { "epoch": 0.6695732838589982, "grad_norm": 9.050418853759766, "learning_rate": 2.505950912035394e-06, "loss": 0.3396, "step": 14436 }, { "epoch": 0.6696196660482375, "grad_norm": 10.201906204223633, "learning_rate": 2.5053130889846233e-06, "loss": 0.3057, "step": 14437 }, { "epoch": 0.6696660482374768, "grad_norm": 12.970597267150879, "learning_rate": 2.504675319979928e-06, "loss": 0.4246, "step": 14438 }, { "epoch": 0.6697124304267161, "grad_norm": 11.503351211547852, "learning_rate": 2.5040376050351277e-06, "loss": 0.3186, "step": 14439 }, { "epoch": 0.6697588126159555, "grad_norm": 8.809319496154785, "learning_rate": 2.503399944164035e-06, "loss": 0.5858, "step": 14440 }, { "epoch": 0.6698051948051948, "grad_norm": 7.987275123596191, "learning_rate": 2.5027623373804657e-06, "loss": 0.3341, "step": 14441 }, { "epoch": 0.6698515769944341, "grad_norm": 14.354890823364258, "learning_rate": 2.502124784698232e-06, "loss": 0.3594, "step": 14442 }, { "epoch": 0.6698979591836735, "grad_norm": 3.5117297172546387, "learning_rate": 2.5014872861311477e-06, "loss": 0.2682, "step": 14443 }, { "epoch": 0.6699443413729128, "grad_norm": 6.388352870941162, "learning_rate": 2.5008498416930243e-06, "loss": 0.3168, "step": 14444 }, { "epoch": 0.6699907235621522, "grad_norm": 4.314191818237305, "learning_rate": 2.5002124513976724e-06, "loss": 0.2733, "step": 14445 }, { "epoch": 0.6700371057513914, "grad_norm": 3.526962995529175, "learning_rate": 2.499575115258897e-06, "loss": 0.2756, "step": 14446 }, { "epoch": 0.6700834879406308, "grad_norm": 6.929091453552246, "learning_rate": 2.4989378332905084e-06, "loss": 0.3368, "step": 14447 }, { "epoch": 0.6701298701298701, "grad_norm": 6.374969482421875, "learning_rate": 2.4983006055063125e-06, "loss": 0.3221, "step": 14448 }, { "epoch": 0.6701762523191095, "grad_norm": 8.168429374694824, "learning_rate": 2.4976634319201144e-06, "loss": 0.3523, "step": 14449 }, { "epoch": 0.6702226345083488, "grad_norm": 6.867447853088379, "learning_rate": 2.49702631254572e-06, "loss": 0.2217, "step": 14450 }, { "epoch": 0.6702690166975881, "grad_norm": 6.8790106773376465, "learning_rate": 2.496389247396928e-06, "loss": 0.3411, "step": 14451 }, { "epoch": 0.6703153988868275, "grad_norm": 8.479533195495605, "learning_rate": 2.495752236487543e-06, "loss": 0.4079, "step": 14452 }, { "epoch": 0.6703617810760668, "grad_norm": 11.991364479064941, "learning_rate": 2.495115279831365e-06, "loss": 0.4543, "step": 14453 }, { "epoch": 0.6704081632653062, "grad_norm": 7.430718898773193, "learning_rate": 2.4944783774421925e-06, "loss": 0.3179, "step": 14454 }, { "epoch": 0.6704545454545454, "grad_norm": 5.215274810791016, "learning_rate": 2.4938415293338254e-06, "loss": 0.2643, "step": 14455 }, { "epoch": 0.6705009276437848, "grad_norm": 5.605620861053467, "learning_rate": 2.4932047355200613e-06, "loss": 0.3402, "step": 14456 }, { "epoch": 0.6705473098330241, "grad_norm": 4.584602355957031, "learning_rate": 2.4925679960146927e-06, "loss": 0.3336, "step": 14457 }, { "epoch": 0.6705936920222635, "grad_norm": 4.5403151512146, "learning_rate": 2.491931310831515e-06, "loss": 0.2563, "step": 14458 }, { "epoch": 0.6706400742115027, "grad_norm": 6.730420112609863, "learning_rate": 2.4912946799843235e-06, "loss": 0.3065, "step": 14459 }, { "epoch": 0.6706864564007421, "grad_norm": 6.57067346572876, "learning_rate": 2.490658103486909e-06, "loss": 0.3385, "step": 14460 }, { "epoch": 0.6707328385899815, "grad_norm": 4.61746883392334, "learning_rate": 2.4900215813530655e-06, "loss": 0.391, "step": 14461 }, { "epoch": 0.6707792207792208, "grad_norm": 10.695754051208496, "learning_rate": 2.4893851135965784e-06, "loss": 0.5028, "step": 14462 }, { "epoch": 0.6708256029684602, "grad_norm": 4.064692974090576, "learning_rate": 2.4887487002312388e-06, "loss": 0.3096, "step": 14463 }, { "epoch": 0.6708719851576994, "grad_norm": 7.415788650512695, "learning_rate": 2.4881123412708343e-06, "loss": 0.3766, "step": 14464 }, { "epoch": 0.6709183673469388, "grad_norm": 10.441977500915527, "learning_rate": 2.4874760367291507e-06, "loss": 0.3263, "step": 14465 }, { "epoch": 0.6709647495361781, "grad_norm": 8.25174617767334, "learning_rate": 2.4868397866199732e-06, "loss": 0.3566, "step": 14466 }, { "epoch": 0.6710111317254175, "grad_norm": 6.380529880523682, "learning_rate": 2.4862035909570885e-06, "loss": 0.2364, "step": 14467 }, { "epoch": 0.6710575139146567, "grad_norm": 11.434452056884766, "learning_rate": 2.485567449754275e-06, "loss": 0.3764, "step": 14468 }, { "epoch": 0.6711038961038961, "grad_norm": 9.238934516906738, "learning_rate": 2.484931363025317e-06, "loss": 0.3182, "step": 14469 }, { "epoch": 0.6711502782931354, "grad_norm": 4.621452808380127, "learning_rate": 2.484295330783994e-06, "loss": 0.3151, "step": 14470 }, { "epoch": 0.6711966604823748, "grad_norm": 9.59118366241455, "learning_rate": 2.4836593530440854e-06, "loss": 0.3838, "step": 14471 }, { "epoch": 0.671243042671614, "grad_norm": 8.6536283493042, "learning_rate": 2.483023429819372e-06, "loss": 0.299, "step": 14472 }, { "epoch": 0.6712894248608534, "grad_norm": 6.169593334197998, "learning_rate": 2.482387561123627e-06, "loss": 0.3553, "step": 14473 }, { "epoch": 0.6713358070500928, "grad_norm": 9.058113098144531, "learning_rate": 2.481751746970627e-06, "loss": 0.4035, "step": 14474 }, { "epoch": 0.6713821892393321, "grad_norm": 14.030413627624512, "learning_rate": 2.4811159873741475e-06, "loss": 0.4224, "step": 14475 }, { "epoch": 0.6714285714285714, "grad_norm": 6.948525428771973, "learning_rate": 2.480480282347961e-06, "loss": 0.3475, "step": 14476 }, { "epoch": 0.6714749536178107, "grad_norm": 10.135106086730957, "learning_rate": 2.479844631905841e-06, "loss": 0.2828, "step": 14477 }, { "epoch": 0.6715213358070501, "grad_norm": 4.526399612426758, "learning_rate": 2.4792090360615595e-06, "loss": 0.304, "step": 14478 }, { "epoch": 0.6715677179962894, "grad_norm": 7.1907877922058105, "learning_rate": 2.4785734948288824e-06, "loss": 0.3543, "step": 14479 }, { "epoch": 0.6716141001855288, "grad_norm": 4.664966106414795, "learning_rate": 2.477938008221581e-06, "loss": 0.3598, "step": 14480 }, { "epoch": 0.671660482374768, "grad_norm": 9.294482231140137, "learning_rate": 2.477302576253422e-06, "loss": 0.2868, "step": 14481 }, { "epoch": 0.6717068645640074, "grad_norm": 5.5127739906311035, "learning_rate": 2.4766671989381726e-06, "loss": 0.3225, "step": 14482 }, { "epoch": 0.6717532467532468, "grad_norm": 6.867374897003174, "learning_rate": 2.4760318762895986e-06, "loss": 0.3045, "step": 14483 }, { "epoch": 0.6717996289424861, "grad_norm": 9.3636474609375, "learning_rate": 2.4753966083214613e-06, "loss": 0.4076, "step": 14484 }, { "epoch": 0.6718460111317254, "grad_norm": 10.375207901000977, "learning_rate": 2.474761395047525e-06, "loss": 0.3674, "step": 14485 }, { "epoch": 0.6718923933209647, "grad_norm": 6.4272780418396, "learning_rate": 2.474126236481551e-06, "loss": 0.4363, "step": 14486 }, { "epoch": 0.6719387755102041, "grad_norm": 6.59732723236084, "learning_rate": 2.4734911326373e-06, "loss": 0.2576, "step": 14487 }, { "epoch": 0.6719851576994434, "grad_norm": 7.264339923858643, "learning_rate": 2.472856083528531e-06, "loss": 0.3277, "step": 14488 }, { "epoch": 0.6720315398886827, "grad_norm": 5.304035663604736, "learning_rate": 2.4722210891690034e-06, "loss": 0.3073, "step": 14489 }, { "epoch": 0.672077922077922, "grad_norm": 4.841818809509277, "learning_rate": 2.4715861495724713e-06, "loss": 0.3285, "step": 14490 }, { "epoch": 0.6721243042671614, "grad_norm": 6.451257228851318, "learning_rate": 2.4709512647526912e-06, "loss": 0.2652, "step": 14491 }, { "epoch": 0.6721706864564008, "grad_norm": 9.859657287597656, "learning_rate": 2.4703164347234186e-06, "loss": 0.4642, "step": 14492 }, { "epoch": 0.6722170686456401, "grad_norm": 10.58871841430664, "learning_rate": 2.469681659498406e-06, "loss": 0.5053, "step": 14493 }, { "epoch": 0.6722634508348794, "grad_norm": 7.329678535461426, "learning_rate": 2.4690469390914073e-06, "loss": 0.3423, "step": 14494 }, { "epoch": 0.6723098330241187, "grad_norm": 12.768050193786621, "learning_rate": 2.46841227351617e-06, "loss": 0.3736, "step": 14495 }, { "epoch": 0.6723562152133581, "grad_norm": 4.7089972496032715, "learning_rate": 2.467777662786445e-06, "loss": 0.2469, "step": 14496 }, { "epoch": 0.6724025974025974, "grad_norm": 14.419076919555664, "learning_rate": 2.4671431069159822e-06, "loss": 0.4182, "step": 14497 }, { "epoch": 0.6724489795918367, "grad_norm": 6.597879886627197, "learning_rate": 2.466508605918528e-06, "loss": 0.2546, "step": 14498 }, { "epoch": 0.672495361781076, "grad_norm": 7.462676525115967, "learning_rate": 2.465874159807828e-06, "loss": 0.3708, "step": 14499 }, { "epoch": 0.6725417439703154, "grad_norm": 4.994518280029297, "learning_rate": 2.46523976859763e-06, "loss": 0.2686, "step": 14500 }, { "epoch": 0.6725881261595548, "grad_norm": 7.803918838500977, "learning_rate": 2.464605432301673e-06, "loss": 0.2865, "step": 14501 }, { "epoch": 0.672634508348794, "grad_norm": 5.883589267730713, "learning_rate": 2.463971150933703e-06, "loss": 0.3101, "step": 14502 }, { "epoch": 0.6726808905380334, "grad_norm": 7.724501609802246, "learning_rate": 2.4633369245074594e-06, "loss": 0.2659, "step": 14503 }, { "epoch": 0.6727272727272727, "grad_norm": 7.471136569976807, "learning_rate": 2.4627027530366836e-06, "loss": 0.4689, "step": 14504 }, { "epoch": 0.6727736549165121, "grad_norm": 7.026425838470459, "learning_rate": 2.4620686365351167e-06, "loss": 0.3797, "step": 14505 }, { "epoch": 0.6728200371057514, "grad_norm": 5.616731643676758, "learning_rate": 2.461434575016492e-06, "loss": 0.3097, "step": 14506 }, { "epoch": 0.6728664192949907, "grad_norm": 6.283447265625, "learning_rate": 2.4608005684945484e-06, "loss": 0.3181, "step": 14507 }, { "epoch": 0.67291280148423, "grad_norm": 5.558286190032959, "learning_rate": 2.4601666169830213e-06, "loss": 0.2928, "step": 14508 }, { "epoch": 0.6729591836734694, "grad_norm": 7.38856315612793, "learning_rate": 2.4595327204956443e-06, "loss": 0.3024, "step": 14509 }, { "epoch": 0.6730055658627088, "grad_norm": 13.869243621826172, "learning_rate": 2.4588988790461516e-06, "loss": 0.248, "step": 14510 }, { "epoch": 0.673051948051948, "grad_norm": 3.967735767364502, "learning_rate": 2.458265092648276e-06, "loss": 0.3331, "step": 14511 }, { "epoch": 0.6730983302411874, "grad_norm": 6.599421977996826, "learning_rate": 2.457631361315745e-06, "loss": 0.3481, "step": 14512 }, { "epoch": 0.6731447124304267, "grad_norm": 9.153261184692383, "learning_rate": 2.456997685062289e-06, "loss": 0.3904, "step": 14513 }, { "epoch": 0.6731910946196661, "grad_norm": 5.7213029861450195, "learning_rate": 2.4563640639016366e-06, "loss": 0.3627, "step": 14514 }, { "epoch": 0.6732374768089053, "grad_norm": 5.260754108428955, "learning_rate": 2.4557304978475162e-06, "loss": 0.3549, "step": 14515 }, { "epoch": 0.6732838589981447, "grad_norm": 4.411330699920654, "learning_rate": 2.4550969869136537e-06, "loss": 0.341, "step": 14516 }, { "epoch": 0.673330241187384, "grad_norm": 6.151564121246338, "learning_rate": 2.454463531113771e-06, "loss": 0.3395, "step": 14517 }, { "epoch": 0.6733766233766234, "grad_norm": 15.905674934387207, "learning_rate": 2.4538301304615935e-06, "loss": 0.3387, "step": 14518 }, { "epoch": 0.6734230055658627, "grad_norm": 8.37407112121582, "learning_rate": 2.4531967849708428e-06, "loss": 0.3275, "step": 14519 }, { "epoch": 0.673469387755102, "grad_norm": 14.678008079528809, "learning_rate": 2.4525634946552405e-06, "loss": 0.3589, "step": 14520 }, { "epoch": 0.6735157699443414, "grad_norm": 9.132099151611328, "learning_rate": 2.4519302595285087e-06, "loss": 0.4665, "step": 14521 }, { "epoch": 0.6735621521335807, "grad_norm": 6.794356346130371, "learning_rate": 2.4512970796043616e-06, "loss": 0.3609, "step": 14522 }, { "epoch": 0.6736085343228201, "grad_norm": 6.4343767166137695, "learning_rate": 2.4506639548965196e-06, "loss": 0.3451, "step": 14523 }, { "epoch": 0.6736549165120593, "grad_norm": 5.455394744873047, "learning_rate": 2.4500308854186977e-06, "loss": 0.3156, "step": 14524 }, { "epoch": 0.6737012987012987, "grad_norm": 10.5834321975708, "learning_rate": 2.4493978711846123e-06, "loss": 0.2328, "step": 14525 }, { "epoch": 0.673747680890538, "grad_norm": 11.17776107788086, "learning_rate": 2.4487649122079766e-06, "loss": 0.4518, "step": 14526 }, { "epoch": 0.6737940630797774, "grad_norm": 6.729375839233398, "learning_rate": 2.4481320085025044e-06, "loss": 0.1724, "step": 14527 }, { "epoch": 0.6738404452690167, "grad_norm": 13.43620491027832, "learning_rate": 2.4474991600819053e-06, "loss": 0.3968, "step": 14528 }, { "epoch": 0.673886827458256, "grad_norm": 6.308038711547852, "learning_rate": 2.4468663669598908e-06, "loss": 0.2436, "step": 14529 }, { "epoch": 0.6739332096474954, "grad_norm": 11.888213157653809, "learning_rate": 2.446233629150169e-06, "loss": 0.2922, "step": 14530 }, { "epoch": 0.6739795918367347, "grad_norm": 9.625842094421387, "learning_rate": 2.4456009466664495e-06, "loss": 0.2715, "step": 14531 }, { "epoch": 0.674025974025974, "grad_norm": 8.474486351013184, "learning_rate": 2.4449683195224396e-06, "loss": 0.3166, "step": 14532 }, { "epoch": 0.6740723562152133, "grad_norm": 5.751650333404541, "learning_rate": 2.4443357477318414e-06, "loss": 0.2257, "step": 14533 }, { "epoch": 0.6741187384044527, "grad_norm": 6.867137432098389, "learning_rate": 2.4437032313083615e-06, "loss": 0.3143, "step": 14534 }, { "epoch": 0.674165120593692, "grad_norm": 8.454453468322754, "learning_rate": 2.4430707702657024e-06, "loss": 0.4186, "step": 14535 }, { "epoch": 0.6742115027829314, "grad_norm": 10.447408676147461, "learning_rate": 2.442438364617567e-06, "loss": 0.4621, "step": 14536 }, { "epoch": 0.6742578849721707, "grad_norm": 10.149255752563477, "learning_rate": 2.4418060143776552e-06, "loss": 0.3028, "step": 14537 }, { "epoch": 0.67430426716141, "grad_norm": 6.654030799865723, "learning_rate": 2.4411737195596686e-06, "loss": 0.3161, "step": 14538 }, { "epoch": 0.6743506493506494, "grad_norm": 9.622848510742188, "learning_rate": 2.440541480177302e-06, "loss": 0.4499, "step": 14539 }, { "epoch": 0.6743970315398887, "grad_norm": 10.738558769226074, "learning_rate": 2.4399092962442538e-06, "loss": 0.4222, "step": 14540 }, { "epoch": 0.674443413729128, "grad_norm": 9.65213680267334, "learning_rate": 2.439277167774221e-06, "loss": 0.3501, "step": 14541 }, { "epoch": 0.6744897959183673, "grad_norm": 4.971065998077393, "learning_rate": 2.4386450947808976e-06, "loss": 0.344, "step": 14542 }, { "epoch": 0.6745361781076067, "grad_norm": 5.467591285705566, "learning_rate": 2.4380130772779785e-06, "loss": 0.3599, "step": 14543 }, { "epoch": 0.674582560296846, "grad_norm": 8.983834266662598, "learning_rate": 2.4373811152791536e-06, "loss": 0.3753, "step": 14544 }, { "epoch": 0.6746289424860853, "grad_norm": 10.722978591918945, "learning_rate": 2.436749208798115e-06, "loss": 0.4241, "step": 14545 }, { "epoch": 0.6746753246753247, "grad_norm": 7.249460697174072, "learning_rate": 2.436117357848553e-06, "loss": 0.2696, "step": 14546 }, { "epoch": 0.674721706864564, "grad_norm": 4.934178829193115, "learning_rate": 2.435485562444156e-06, "loss": 0.3182, "step": 14547 }, { "epoch": 0.6747680890538034, "grad_norm": 7.211650848388672, "learning_rate": 2.434853822598611e-06, "loss": 0.4037, "step": 14548 }, { "epoch": 0.6748144712430427, "grad_norm": 6.8203840255737305, "learning_rate": 2.4342221383256083e-06, "loss": 0.3186, "step": 14549 }, { "epoch": 0.674860853432282, "grad_norm": 11.251848220825195, "learning_rate": 2.433590509638827e-06, "loss": 0.3651, "step": 14550 }, { "epoch": 0.6749072356215213, "grad_norm": 7.80197811126709, "learning_rate": 2.432958936551953e-06, "loss": 0.3659, "step": 14551 }, { "epoch": 0.6749536178107607, "grad_norm": 6.052604675292969, "learning_rate": 2.4323274190786703e-06, "loss": 0.3732, "step": 14552 }, { "epoch": 0.675, "grad_norm": 9.493667602539062, "learning_rate": 2.43169595723266e-06, "loss": 0.4846, "step": 14553 }, { "epoch": 0.6750463821892393, "grad_norm": 11.366291046142578, "learning_rate": 2.4310645510276037e-06, "loss": 0.3546, "step": 14554 }, { "epoch": 0.6750927643784786, "grad_norm": 13.02292537689209, "learning_rate": 2.430433200477177e-06, "loss": 0.3786, "step": 14555 }, { "epoch": 0.675139146567718, "grad_norm": 4.0588698387146, "learning_rate": 2.42980190559506e-06, "loss": 0.288, "step": 14556 }, { "epoch": 0.6751855287569574, "grad_norm": 7.697267055511475, "learning_rate": 2.4291706663949288e-06, "loss": 0.2325, "step": 14557 }, { "epoch": 0.6752319109461966, "grad_norm": 15.876781463623047, "learning_rate": 2.428539482890459e-06, "loss": 0.4432, "step": 14558 }, { "epoch": 0.675278293135436, "grad_norm": 4.116755485534668, "learning_rate": 2.427908355095325e-06, "loss": 0.2472, "step": 14559 }, { "epoch": 0.6753246753246753, "grad_norm": 4.532259464263916, "learning_rate": 2.4272772830232022e-06, "loss": 0.2615, "step": 14560 }, { "epoch": 0.6753710575139147, "grad_norm": 8.155740737915039, "learning_rate": 2.4266462666877582e-06, "loss": 0.362, "step": 14561 }, { "epoch": 0.675417439703154, "grad_norm": 5.893330097198486, "learning_rate": 2.4260153061026655e-06, "loss": 0.2347, "step": 14562 }, { "epoch": 0.6754638218923933, "grad_norm": 7.436267852783203, "learning_rate": 2.425384401281593e-06, "loss": 0.3645, "step": 14563 }, { "epoch": 0.6755102040816326, "grad_norm": 5.462649822235107, "learning_rate": 2.42475355223821e-06, "loss": 0.2972, "step": 14564 }, { "epoch": 0.675556586270872, "grad_norm": 4.0255327224731445, "learning_rate": 2.4241227589861843e-06, "loss": 0.2312, "step": 14565 }, { "epoch": 0.6756029684601114, "grad_norm": 7.992486476898193, "learning_rate": 2.4234920215391787e-06, "loss": 0.3202, "step": 14566 }, { "epoch": 0.6756493506493506, "grad_norm": 7.137789726257324, "learning_rate": 2.4228613399108604e-06, "loss": 0.368, "step": 14567 }, { "epoch": 0.67569573283859, "grad_norm": 17.925817489624023, "learning_rate": 2.422230714114891e-06, "loss": 0.4689, "step": 14568 }, { "epoch": 0.6757421150278293, "grad_norm": 5.523570537567139, "learning_rate": 2.4216001441649333e-06, "loss": 0.3271, "step": 14569 }, { "epoch": 0.6757884972170687, "grad_norm": 5.536259174346924, "learning_rate": 2.420969630074651e-06, "loss": 0.156, "step": 14570 }, { "epoch": 0.6758348794063079, "grad_norm": 10.728970527648926, "learning_rate": 2.4203391718576995e-06, "loss": 0.4303, "step": 14571 }, { "epoch": 0.6758812615955473, "grad_norm": 11.796093940734863, "learning_rate": 2.4197087695277386e-06, "loss": 0.3268, "step": 14572 }, { "epoch": 0.6759276437847866, "grad_norm": 13.239849090576172, "learning_rate": 2.419078423098427e-06, "loss": 0.4637, "step": 14573 }, { "epoch": 0.675974025974026, "grad_norm": 6.606042385101318, "learning_rate": 2.41844813258342e-06, "loss": 0.2925, "step": 14574 }, { "epoch": 0.6760204081632653, "grad_norm": 4.652603626251221, "learning_rate": 2.417817897996374e-06, "loss": 0.3148, "step": 14575 }, { "epoch": 0.6760667903525046, "grad_norm": 6.416119575500488, "learning_rate": 2.4171877193509397e-06, "loss": 0.3381, "step": 14576 }, { "epoch": 0.676113172541744, "grad_norm": 7.555934429168701, "learning_rate": 2.4165575966607706e-06, "loss": 0.3639, "step": 14577 }, { "epoch": 0.6761595547309833, "grad_norm": 9.686531066894531, "learning_rate": 2.415927529939519e-06, "loss": 0.3893, "step": 14578 }, { "epoch": 0.6762059369202227, "grad_norm": 7.615756511688232, "learning_rate": 2.415297519200835e-06, "loss": 0.3252, "step": 14579 }, { "epoch": 0.6762523191094619, "grad_norm": 6.2827863693237305, "learning_rate": 2.4146675644583685e-06, "loss": 0.3013, "step": 14580 }, { "epoch": 0.6762987012987013, "grad_norm": 6.5086283683776855, "learning_rate": 2.414037665725763e-06, "loss": 0.2609, "step": 14581 }, { "epoch": 0.6763450834879406, "grad_norm": 5.460644721984863, "learning_rate": 2.4134078230166685e-06, "loss": 0.3414, "step": 14582 }, { "epoch": 0.67639146567718, "grad_norm": 5.159332752227783, "learning_rate": 2.4127780363447284e-06, "loss": 0.2495, "step": 14583 }, { "epoch": 0.6764378478664193, "grad_norm": 12.222639083862305, "learning_rate": 2.4121483057235884e-06, "loss": 0.3976, "step": 14584 }, { "epoch": 0.6764842300556586, "grad_norm": 5.631401062011719, "learning_rate": 2.4115186311668914e-06, "loss": 0.2873, "step": 14585 }, { "epoch": 0.676530612244898, "grad_norm": 10.203368186950684, "learning_rate": 2.4108890126882766e-06, "loss": 0.3931, "step": 14586 }, { "epoch": 0.6765769944341373, "grad_norm": 7.3367695808410645, "learning_rate": 2.410259450301386e-06, "loss": 0.3475, "step": 14587 }, { "epoch": 0.6766233766233766, "grad_norm": 7.280767440795898, "learning_rate": 2.409629944019858e-06, "loss": 0.2325, "step": 14588 }, { "epoch": 0.6766697588126159, "grad_norm": 5.550899982452393, "learning_rate": 2.409000493857331e-06, "loss": 0.255, "step": 14589 }, { "epoch": 0.6767161410018553, "grad_norm": 6.964236259460449, "learning_rate": 2.4083710998274436e-06, "loss": 0.3049, "step": 14590 }, { "epoch": 0.6767625231910946, "grad_norm": 6.6163249015808105, "learning_rate": 2.4077417619438277e-06, "loss": 0.3488, "step": 14591 }, { "epoch": 0.676808905380334, "grad_norm": 7.385301113128662, "learning_rate": 2.407112480220119e-06, "loss": 0.3599, "step": 14592 }, { "epoch": 0.6768552875695732, "grad_norm": 6.396330833435059, "learning_rate": 2.4064832546699507e-06, "loss": 0.3143, "step": 14593 }, { "epoch": 0.6769016697588126, "grad_norm": 4.246237754821777, "learning_rate": 2.405854085306955e-06, "loss": 0.2605, "step": 14594 }, { "epoch": 0.676948051948052, "grad_norm": 6.214117050170898, "learning_rate": 2.4052249721447645e-06, "loss": 0.3579, "step": 14595 }, { "epoch": 0.6769944341372913, "grad_norm": 6.020373821258545, "learning_rate": 2.4045959151970037e-06, "loss": 0.2771, "step": 14596 }, { "epoch": 0.6770408163265306, "grad_norm": 7.026499271392822, "learning_rate": 2.403966914477304e-06, "loss": 0.323, "step": 14597 }, { "epoch": 0.6770871985157699, "grad_norm": 9.856963157653809, "learning_rate": 2.403337969999292e-06, "loss": 0.3959, "step": 14598 }, { "epoch": 0.6771335807050093, "grad_norm": 6.5536675453186035, "learning_rate": 2.402709081776593e-06, "loss": 0.357, "step": 14599 }, { "epoch": 0.6771799628942486, "grad_norm": 9.685995101928711, "learning_rate": 2.4020802498228333e-06, "loss": 0.3374, "step": 14600 }, { "epoch": 0.6772263450834879, "grad_norm": 8.462532043457031, "learning_rate": 2.401451474151634e-06, "loss": 0.3776, "step": 14601 }, { "epoch": 0.6772727272727272, "grad_norm": 8.99256420135498, "learning_rate": 2.4008227547766167e-06, "loss": 0.2592, "step": 14602 }, { "epoch": 0.6773191094619666, "grad_norm": 10.292276382446289, "learning_rate": 2.400194091711404e-06, "loss": 0.4242, "step": 14603 }, { "epoch": 0.677365491651206, "grad_norm": 5.248485565185547, "learning_rate": 2.3995654849696147e-06, "loss": 0.2387, "step": 14604 }, { "epoch": 0.6774118738404453, "grad_norm": 12.136983871459961, "learning_rate": 2.39893693456487e-06, "loss": 0.3937, "step": 14605 }, { "epoch": 0.6774582560296846, "grad_norm": 7.148245811462402, "learning_rate": 2.3983084405107826e-06, "loss": 0.3116, "step": 14606 }, { "epoch": 0.6775046382189239, "grad_norm": 4.864574909210205, "learning_rate": 2.3976800028209706e-06, "loss": 0.3417, "step": 14607 }, { "epoch": 0.6775510204081633, "grad_norm": 9.046762466430664, "learning_rate": 2.397051621509049e-06, "loss": 0.3497, "step": 14608 }, { "epoch": 0.6775974025974026, "grad_norm": 8.132129669189453, "learning_rate": 2.396423296588631e-06, "loss": 0.317, "step": 14609 }, { "epoch": 0.6776437847866419, "grad_norm": 13.140863418579102, "learning_rate": 2.3957950280733304e-06, "loss": 0.461, "step": 14610 }, { "epoch": 0.6776901669758812, "grad_norm": 6.153916358947754, "learning_rate": 2.3951668159767556e-06, "loss": 0.2933, "step": 14611 }, { "epoch": 0.6777365491651206, "grad_norm": 7.543120384216309, "learning_rate": 2.3945386603125176e-06, "loss": 0.2718, "step": 14612 }, { "epoch": 0.67778293135436, "grad_norm": 6.287403106689453, "learning_rate": 2.3939105610942254e-06, "loss": 0.3152, "step": 14613 }, { "epoch": 0.6778293135435992, "grad_norm": 6.660655975341797, "learning_rate": 2.3932825183354864e-06, "loss": 0.2745, "step": 14614 }, { "epoch": 0.6778756957328386, "grad_norm": 7.680825710296631, "learning_rate": 2.3926545320499082e-06, "loss": 0.2885, "step": 14615 }, { "epoch": 0.6779220779220779, "grad_norm": 5.584966659545898, "learning_rate": 2.392026602251093e-06, "loss": 0.3007, "step": 14616 }, { "epoch": 0.6779684601113173, "grad_norm": 10.35240364074707, "learning_rate": 2.3913987289526454e-06, "loss": 0.2831, "step": 14617 }, { "epoch": 0.6780148423005566, "grad_norm": 11.932154655456543, "learning_rate": 2.3907709121681686e-06, "loss": 0.2567, "step": 14618 }, { "epoch": 0.6780612244897959, "grad_norm": 6.4123921394348145, "learning_rate": 2.390143151911264e-06, "loss": 0.3019, "step": 14619 }, { "epoch": 0.6781076066790352, "grad_norm": 14.369016647338867, "learning_rate": 2.3895154481955328e-06, "loss": 0.4962, "step": 14620 }, { "epoch": 0.6781539888682746, "grad_norm": 12.171956062316895, "learning_rate": 2.388887801034571e-06, "loss": 0.4176, "step": 14621 }, { "epoch": 0.678200371057514, "grad_norm": 7.767702579498291, "learning_rate": 2.3882602104419783e-06, "loss": 0.3336, "step": 14622 }, { "epoch": 0.6782467532467532, "grad_norm": 11.315563201904297, "learning_rate": 2.38763267643135e-06, "loss": 0.3299, "step": 14623 }, { "epoch": 0.6782931354359926, "grad_norm": 4.844155311584473, "learning_rate": 2.3870051990162818e-06, "loss": 0.3597, "step": 14624 }, { "epoch": 0.6783395176252319, "grad_norm": 8.319406509399414, "learning_rate": 2.38637777821037e-06, "loss": 0.3817, "step": 14625 }, { "epoch": 0.6783858998144713, "grad_norm": 5.421435832977295, "learning_rate": 2.385750414027203e-06, "loss": 0.2904, "step": 14626 }, { "epoch": 0.6784322820037105, "grad_norm": 4.298797130584717, "learning_rate": 2.3851231064803744e-06, "loss": 0.256, "step": 14627 }, { "epoch": 0.6784786641929499, "grad_norm": 6.772572040557861, "learning_rate": 2.3844958555834745e-06, "loss": 0.3937, "step": 14628 }, { "epoch": 0.6785250463821892, "grad_norm": 6.11616849899292, "learning_rate": 2.3838686613500926e-06, "loss": 0.2755, "step": 14629 }, { "epoch": 0.6785714285714286, "grad_norm": 5.228402137756348, "learning_rate": 2.3832415237938163e-06, "loss": 0.3012, "step": 14630 }, { "epoch": 0.6786178107606679, "grad_norm": 10.677828788757324, "learning_rate": 2.3826144429282333e-06, "loss": 0.3696, "step": 14631 }, { "epoch": 0.6786641929499072, "grad_norm": 11.502568244934082, "learning_rate": 2.3819874187669266e-06, "loss": 0.5769, "step": 14632 }, { "epoch": 0.6787105751391466, "grad_norm": 7.990043640136719, "learning_rate": 2.381360451323482e-06, "loss": 0.2836, "step": 14633 }, { "epoch": 0.6787569573283859, "grad_norm": 6.891809940338135, "learning_rate": 2.380733540611481e-06, "loss": 0.3112, "step": 14634 }, { "epoch": 0.6788033395176253, "grad_norm": 6.671943664550781, "learning_rate": 2.380106686644507e-06, "loss": 0.3407, "step": 14635 }, { "epoch": 0.6788497217068645, "grad_norm": 4.834091663360596, "learning_rate": 2.3794798894361415e-06, "loss": 0.3458, "step": 14636 }, { "epoch": 0.6788961038961039, "grad_norm": 8.86471176147461, "learning_rate": 2.3788531489999606e-06, "loss": 0.3525, "step": 14637 }, { "epoch": 0.6789424860853432, "grad_norm": 13.626294136047363, "learning_rate": 2.378226465349543e-06, "loss": 0.6226, "step": 14638 }, { "epoch": 0.6789888682745826, "grad_norm": 8.508326530456543, "learning_rate": 2.3775998384984666e-06, "loss": 0.2909, "step": 14639 }, { "epoch": 0.6790352504638218, "grad_norm": 6.294663906097412, "learning_rate": 2.3769732684603064e-06, "loss": 0.3973, "step": 14640 }, { "epoch": 0.6790816326530612, "grad_norm": 7.8109917640686035, "learning_rate": 2.376346755248637e-06, "loss": 0.3292, "step": 14641 }, { "epoch": 0.6791280148423006, "grad_norm": 10.114370346069336, "learning_rate": 2.375720298877033e-06, "loss": 0.5552, "step": 14642 }, { "epoch": 0.6791743970315399, "grad_norm": 13.024556159973145, "learning_rate": 2.3750938993590627e-06, "loss": 0.3667, "step": 14643 }, { "epoch": 0.6792207792207792, "grad_norm": 9.434804916381836, "learning_rate": 2.3744675567082986e-06, "loss": 0.4201, "step": 14644 }, { "epoch": 0.6792671614100185, "grad_norm": 11.867246627807617, "learning_rate": 2.37384127093831e-06, "loss": 0.3985, "step": 14645 }, { "epoch": 0.6793135435992579, "grad_norm": 7.034730911254883, "learning_rate": 2.3732150420626653e-06, "loss": 0.3275, "step": 14646 }, { "epoch": 0.6793599257884972, "grad_norm": 6.4501872062683105, "learning_rate": 2.372588870094933e-06, "loss": 0.2733, "step": 14647 }, { "epoch": 0.6794063079777366, "grad_norm": 5.630244255065918, "learning_rate": 2.371962755048675e-06, "loss": 0.2995, "step": 14648 }, { "epoch": 0.6794526901669758, "grad_norm": 5.835719585418701, "learning_rate": 2.371336696937458e-06, "loss": 0.315, "step": 14649 }, { "epoch": 0.6794990723562152, "grad_norm": 5.450470447540283, "learning_rate": 2.370710695774845e-06, "loss": 0.3501, "step": 14650 }, { "epoch": 0.6795454545454546, "grad_norm": 13.220015525817871, "learning_rate": 2.370084751574398e-06, "loss": 0.4151, "step": 14651 }, { "epoch": 0.6795918367346939, "grad_norm": 9.101374626159668, "learning_rate": 2.3694588643496774e-06, "loss": 0.3404, "step": 14652 }, { "epoch": 0.6796382189239332, "grad_norm": 5.792309761047363, "learning_rate": 2.368833034114245e-06, "loss": 0.3335, "step": 14653 }, { "epoch": 0.6796846011131725, "grad_norm": 8.636518478393555, "learning_rate": 2.368207260881655e-06, "loss": 0.3007, "step": 14654 }, { "epoch": 0.6797309833024119, "grad_norm": 6.465022087097168, "learning_rate": 2.3675815446654676e-06, "loss": 0.3163, "step": 14655 }, { "epoch": 0.6797773654916512, "grad_norm": 8.022985458374023, "learning_rate": 2.3669558854792368e-06, "loss": 0.402, "step": 14656 }, { "epoch": 0.6798237476808905, "grad_norm": 8.510049819946289, "learning_rate": 2.366330283336518e-06, "loss": 0.4159, "step": 14657 }, { "epoch": 0.6798701298701298, "grad_norm": 7.010003566741943, "learning_rate": 2.3657047382508663e-06, "loss": 0.5076, "step": 14658 }, { "epoch": 0.6799165120593692, "grad_norm": 5.185482025146484, "learning_rate": 2.3650792502358304e-06, "loss": 0.3667, "step": 14659 }, { "epoch": 0.6799628942486086, "grad_norm": 7.232972621917725, "learning_rate": 2.3644538193049626e-06, "loss": 0.2502, "step": 14660 }, { "epoch": 0.6800092764378479, "grad_norm": 5.8610663414001465, "learning_rate": 2.3638284454718125e-06, "loss": 0.2579, "step": 14661 }, { "epoch": 0.6800556586270872, "grad_norm": 7.02105712890625, "learning_rate": 2.363203128749929e-06, "loss": 0.2443, "step": 14662 }, { "epoch": 0.6801020408163265, "grad_norm": 6.742513179779053, "learning_rate": 2.362577869152859e-06, "loss": 0.3483, "step": 14663 }, { "epoch": 0.6801484230055659, "grad_norm": 12.976160049438477, "learning_rate": 2.36195266669415e-06, "loss": 0.3466, "step": 14664 }, { "epoch": 0.6801948051948052, "grad_norm": 8.257779121398926, "learning_rate": 2.361327521387343e-06, "loss": 0.2471, "step": 14665 }, { "epoch": 0.6802411873840445, "grad_norm": 10.303535461425781, "learning_rate": 2.360702433245984e-06, "loss": 0.496, "step": 14666 }, { "epoch": 0.6802875695732838, "grad_norm": 6.659785747528076, "learning_rate": 2.360077402283614e-06, "loss": 0.2468, "step": 14667 }, { "epoch": 0.6803339517625232, "grad_norm": 6.113109588623047, "learning_rate": 2.3594524285137753e-06, "loss": 0.3699, "step": 14668 }, { "epoch": 0.6803803339517626, "grad_norm": 4.823902130126953, "learning_rate": 2.3588275119500086e-06, "loss": 0.2782, "step": 14669 }, { "epoch": 0.6804267161410018, "grad_norm": 7.664185523986816, "learning_rate": 2.3582026526058492e-06, "loss": 0.3384, "step": 14670 }, { "epoch": 0.6804730983302412, "grad_norm": 6.359391689300537, "learning_rate": 2.357577850494836e-06, "loss": 0.3362, "step": 14671 }, { "epoch": 0.6805194805194805, "grad_norm": 5.349801540374756, "learning_rate": 2.3569531056305043e-06, "loss": 0.3476, "step": 14672 }, { "epoch": 0.6805658627087199, "grad_norm": 7.215829372406006, "learning_rate": 2.3563284180263903e-06, "loss": 0.2766, "step": 14673 }, { "epoch": 0.6806122448979591, "grad_norm": 8.524601936340332, "learning_rate": 2.355703787696026e-06, "loss": 0.3649, "step": 14674 }, { "epoch": 0.6806586270871985, "grad_norm": 3.7695183753967285, "learning_rate": 2.3550792146529467e-06, "loss": 0.1417, "step": 14675 }, { "epoch": 0.6807050092764378, "grad_norm": 5.183448791503906, "learning_rate": 2.3544546989106793e-06, "loss": 0.3467, "step": 14676 }, { "epoch": 0.6807513914656772, "grad_norm": 6.918517589569092, "learning_rate": 2.3538302404827552e-06, "loss": 0.4711, "step": 14677 }, { "epoch": 0.6807977736549166, "grad_norm": 10.479010581970215, "learning_rate": 2.3532058393827033e-06, "loss": 0.3761, "step": 14678 }, { "epoch": 0.6808441558441558, "grad_norm": 7.220691680908203, "learning_rate": 2.352581495624051e-06, "loss": 0.2561, "step": 14679 }, { "epoch": 0.6808905380333952, "grad_norm": 4.565499305725098, "learning_rate": 2.351957209220326e-06, "loss": 0.1764, "step": 14680 }, { "epoch": 0.6809369202226345, "grad_norm": 8.448430061340332, "learning_rate": 2.35133298018505e-06, "loss": 0.3723, "step": 14681 }, { "epoch": 0.6809833024118739, "grad_norm": 9.562148094177246, "learning_rate": 2.350708808531747e-06, "loss": 0.4313, "step": 14682 }, { "epoch": 0.6810296846011131, "grad_norm": 5.971139430999756, "learning_rate": 2.350084694273941e-06, "loss": 0.2996, "step": 14683 }, { "epoch": 0.6810760667903525, "grad_norm": 6.0141921043396, "learning_rate": 2.349460637425152e-06, "loss": 0.2917, "step": 14684 }, { "epoch": 0.6811224489795918, "grad_norm": 5.371613502502441, "learning_rate": 2.3488366379989027e-06, "loss": 0.2803, "step": 14685 }, { "epoch": 0.6811688311688312, "grad_norm": 8.878410339355469, "learning_rate": 2.348212696008707e-06, "loss": 0.4165, "step": 14686 }, { "epoch": 0.6812152133580704, "grad_norm": 7.215484619140625, "learning_rate": 2.3475888114680843e-06, "loss": 0.3395, "step": 14687 }, { "epoch": 0.6812615955473098, "grad_norm": 4.320163726806641, "learning_rate": 2.346964984390552e-06, "loss": 0.2393, "step": 14688 }, { "epoch": 0.6813079777365492, "grad_norm": 10.605085372924805, "learning_rate": 2.3463412147896233e-06, "loss": 0.3254, "step": 14689 }, { "epoch": 0.6813543599257885, "grad_norm": 6.160386562347412, "learning_rate": 2.3457175026788122e-06, "loss": 0.2666, "step": 14690 }, { "epoch": 0.6814007421150279, "grad_norm": 14.406854629516602, "learning_rate": 2.345093848071634e-06, "loss": 0.4319, "step": 14691 }, { "epoch": 0.6814471243042671, "grad_norm": 7.250202178955078, "learning_rate": 2.3444702509815948e-06, "loss": 0.3573, "step": 14692 }, { "epoch": 0.6814935064935065, "grad_norm": 4.98521089553833, "learning_rate": 2.3438467114222074e-06, "loss": 0.2648, "step": 14693 }, { "epoch": 0.6815398886827458, "grad_norm": 8.347640037536621, "learning_rate": 2.3432232294069796e-06, "loss": 0.3144, "step": 14694 }, { "epoch": 0.6815862708719852, "grad_norm": 6.864507675170898, "learning_rate": 2.3425998049494193e-06, "loss": 0.3911, "step": 14695 }, { "epoch": 0.6816326530612244, "grad_norm": 6.159975051879883, "learning_rate": 2.341976438063035e-06, "loss": 0.4227, "step": 14696 }, { "epoch": 0.6816790352504638, "grad_norm": 5.847843647003174, "learning_rate": 2.3413531287613267e-06, "loss": 0.3142, "step": 14697 }, { "epoch": 0.6817254174397032, "grad_norm": 7.812591075897217, "learning_rate": 2.340729877057801e-06, "loss": 0.2918, "step": 14698 }, { "epoch": 0.6817717996289425, "grad_norm": 4.545709133148193, "learning_rate": 2.3401066829659595e-06, "loss": 0.3118, "step": 14699 }, { "epoch": 0.6818181818181818, "grad_norm": 4.372573375701904, "learning_rate": 2.3394835464993044e-06, "loss": 0.242, "step": 14700 }, { "epoch": 0.6818645640074211, "grad_norm": 5.022110939025879, "learning_rate": 2.3388604676713346e-06, "loss": 0.3805, "step": 14701 }, { "epoch": 0.6819109461966605, "grad_norm": 4.337222576141357, "learning_rate": 2.338237446495551e-06, "loss": 0.2751, "step": 14702 }, { "epoch": 0.6819573283858998, "grad_norm": 11.136974334716797, "learning_rate": 2.337614482985447e-06, "loss": 0.322, "step": 14703 }, { "epoch": 0.6820037105751392, "grad_norm": 6.672796726226807, "learning_rate": 2.3369915771545213e-06, "loss": 0.3298, "step": 14704 }, { "epoch": 0.6820500927643784, "grad_norm": 5.439123153686523, "learning_rate": 2.3363687290162683e-06, "loss": 0.2878, "step": 14705 }, { "epoch": 0.6820964749536178, "grad_norm": 9.433377265930176, "learning_rate": 2.3357459385841824e-06, "loss": 0.3943, "step": 14706 }, { "epoch": 0.6821428571428572, "grad_norm": 11.766443252563477, "learning_rate": 2.335123205871757e-06, "loss": 0.3595, "step": 14707 }, { "epoch": 0.6821892393320965, "grad_norm": 6.2646636962890625, "learning_rate": 2.33450053089248e-06, "loss": 0.3383, "step": 14708 }, { "epoch": 0.6822356215213358, "grad_norm": 8.347519874572754, "learning_rate": 2.333877913659843e-06, "loss": 0.3246, "step": 14709 }, { "epoch": 0.6822820037105751, "grad_norm": 7.93349027633667, "learning_rate": 2.3332553541873347e-06, "loss": 0.166, "step": 14710 }, { "epoch": 0.6823283858998145, "grad_norm": 5.704465389251709, "learning_rate": 2.332632852488443e-06, "loss": 0.3129, "step": 14711 }, { "epoch": 0.6823747680890538, "grad_norm": 3.873955726623535, "learning_rate": 2.332010408576653e-06, "loss": 0.2691, "step": 14712 }, { "epoch": 0.6824211502782931, "grad_norm": 5.369195461273193, "learning_rate": 2.3313880224654524e-06, "loss": 0.3574, "step": 14713 }, { "epoch": 0.6824675324675324, "grad_norm": 10.683399200439453, "learning_rate": 2.330765694168321e-06, "loss": 0.5343, "step": 14714 }, { "epoch": 0.6825139146567718, "grad_norm": 6.460085868835449, "learning_rate": 2.330143423698743e-06, "loss": 0.3027, "step": 14715 }, { "epoch": 0.6825602968460112, "grad_norm": 14.15268611907959, "learning_rate": 2.3295212110701994e-06, "loss": 0.5049, "step": 14716 }, { "epoch": 0.6826066790352505, "grad_norm": 5.1844162940979, "learning_rate": 2.3288990562961704e-06, "loss": 0.3042, "step": 14717 }, { "epoch": 0.6826530612244898, "grad_norm": 6.393123149871826, "learning_rate": 2.328276959390136e-06, "loss": 0.2086, "step": 14718 }, { "epoch": 0.6826994434137291, "grad_norm": 5.0128912925720215, "learning_rate": 2.3276549203655707e-06, "loss": 0.2732, "step": 14719 }, { "epoch": 0.6827458256029685, "grad_norm": 5.519514083862305, "learning_rate": 2.327032939235952e-06, "loss": 0.268, "step": 14720 }, { "epoch": 0.6827922077922078, "grad_norm": 4.793482303619385, "learning_rate": 2.3264110160147545e-06, "loss": 0.3159, "step": 14721 }, { "epoch": 0.6828385899814471, "grad_norm": 9.03431510925293, "learning_rate": 2.3257891507154524e-06, "loss": 0.4068, "step": 14722 }, { "epoch": 0.6828849721706864, "grad_norm": 4.8277058601379395, "learning_rate": 2.3251673433515174e-06, "loss": 0.3787, "step": 14723 }, { "epoch": 0.6829313543599258, "grad_norm": 11.936549186706543, "learning_rate": 2.3245455939364238e-06, "loss": 0.3279, "step": 14724 }, { "epoch": 0.6829777365491652, "grad_norm": 5.458961486816406, "learning_rate": 2.323923902483636e-06, "loss": 0.3298, "step": 14725 }, { "epoch": 0.6830241187384044, "grad_norm": 10.029707908630371, "learning_rate": 2.323302269006626e-06, "loss": 0.3705, "step": 14726 }, { "epoch": 0.6830705009276438, "grad_norm": 5.3729448318481445, "learning_rate": 2.32268069351886e-06, "loss": 0.2673, "step": 14727 }, { "epoch": 0.6831168831168831, "grad_norm": 4.551580429077148, "learning_rate": 2.3220591760338046e-06, "loss": 0.2775, "step": 14728 }, { "epoch": 0.6831632653061225, "grad_norm": 9.657328605651855, "learning_rate": 2.3214377165649273e-06, "loss": 0.3508, "step": 14729 }, { "epoch": 0.6832096474953617, "grad_norm": 8.94884967803955, "learning_rate": 2.320816315125687e-06, "loss": 0.3456, "step": 14730 }, { "epoch": 0.6832560296846011, "grad_norm": 6.3416523933410645, "learning_rate": 2.3201949717295473e-06, "loss": 0.3024, "step": 14731 }, { "epoch": 0.6833024118738404, "grad_norm": 5.9146575927734375, "learning_rate": 2.319573686389971e-06, "loss": 0.2331, "step": 14732 }, { "epoch": 0.6833487940630798, "grad_norm": 5.232816696166992, "learning_rate": 2.3189524591204165e-06, "loss": 0.328, "step": 14733 }, { "epoch": 0.6833951762523192, "grad_norm": 7.0919623374938965, "learning_rate": 2.3183312899343428e-06, "loss": 0.2122, "step": 14734 }, { "epoch": 0.6834415584415584, "grad_norm": 5.095644950866699, "learning_rate": 2.3177101788452094e-06, "loss": 0.319, "step": 14735 }, { "epoch": 0.6834879406307978, "grad_norm": 10.340785026550293, "learning_rate": 2.3170891258664686e-06, "loss": 0.4698, "step": 14736 }, { "epoch": 0.6835343228200371, "grad_norm": 7.950554847717285, "learning_rate": 2.3164681310115767e-06, "loss": 0.4105, "step": 14737 }, { "epoch": 0.6835807050092765, "grad_norm": 4.397334575653076, "learning_rate": 2.315847194293987e-06, "loss": 0.2712, "step": 14738 }, { "epoch": 0.6836270871985157, "grad_norm": 9.614992141723633, "learning_rate": 2.315226315727152e-06, "loss": 0.3683, "step": 14739 }, { "epoch": 0.6836734693877551, "grad_norm": 7.234879016876221, "learning_rate": 2.314605495324525e-06, "loss": 0.3495, "step": 14740 }, { "epoch": 0.6837198515769944, "grad_norm": 8.61962890625, "learning_rate": 2.313984733099552e-06, "loss": 0.4683, "step": 14741 }, { "epoch": 0.6837662337662338, "grad_norm": 12.223197937011719, "learning_rate": 2.3133640290656823e-06, "loss": 0.4503, "step": 14742 }, { "epoch": 0.683812615955473, "grad_norm": 8.805253028869629, "learning_rate": 2.3127433832363644e-06, "loss": 0.3609, "step": 14743 }, { "epoch": 0.6838589981447124, "grad_norm": 5.485641956329346, "learning_rate": 2.3121227956250435e-06, "loss": 0.3083, "step": 14744 }, { "epoch": 0.6839053803339518, "grad_norm": 4.611009120941162, "learning_rate": 2.311502266245164e-06, "loss": 0.266, "step": 14745 }, { "epoch": 0.6839517625231911, "grad_norm": 5.561392784118652, "learning_rate": 2.3108817951101726e-06, "loss": 0.344, "step": 14746 }, { "epoch": 0.6839981447124305, "grad_norm": 11.010708808898926, "learning_rate": 2.3102613822335067e-06, "loss": 0.3943, "step": 14747 }, { "epoch": 0.6840445269016697, "grad_norm": 11.479812622070312, "learning_rate": 2.309641027628609e-06, "loss": 0.3604, "step": 14748 }, { "epoch": 0.6840909090909091, "grad_norm": 13.7427396774292, "learning_rate": 2.30902073130892e-06, "loss": 0.4054, "step": 14749 }, { "epoch": 0.6841372912801484, "grad_norm": 10.60861587524414, "learning_rate": 2.3084004932878766e-06, "loss": 0.3056, "step": 14750 }, { "epoch": 0.6841836734693878, "grad_norm": 7.141334533691406, "learning_rate": 2.307780313578919e-06, "loss": 0.4117, "step": 14751 }, { "epoch": 0.684230055658627, "grad_norm": 7.2893757820129395, "learning_rate": 2.3071601921954797e-06, "loss": 0.2404, "step": 14752 }, { "epoch": 0.6842764378478664, "grad_norm": 7.993531703948975, "learning_rate": 2.306540129150994e-06, "loss": 0.2361, "step": 14753 }, { "epoch": 0.6843228200371058, "grad_norm": 7.965747356414795, "learning_rate": 2.3059201244588953e-06, "loss": 0.3713, "step": 14754 }, { "epoch": 0.6843692022263451, "grad_norm": 6.772041320800781, "learning_rate": 2.3053001781326168e-06, "loss": 0.2839, "step": 14755 }, { "epoch": 0.6844155844155844, "grad_norm": 6.2774200439453125, "learning_rate": 2.304680290185588e-06, "loss": 0.3272, "step": 14756 }, { "epoch": 0.6844619666048237, "grad_norm": 4.664498805999756, "learning_rate": 2.304060460631242e-06, "loss": 0.2565, "step": 14757 }, { "epoch": 0.6845083487940631, "grad_norm": 7.659746170043945, "learning_rate": 2.3034406894830014e-06, "loss": 0.3596, "step": 14758 }, { "epoch": 0.6845547309833024, "grad_norm": 14.4430513381958, "learning_rate": 2.3028209767542965e-06, "loss": 0.4045, "step": 14759 }, { "epoch": 0.6846011131725418, "grad_norm": 14.255593299865723, "learning_rate": 2.302201322458552e-06, "loss": 0.4748, "step": 14760 }, { "epoch": 0.684647495361781, "grad_norm": 12.091797828674316, "learning_rate": 2.3015817266091934e-06, "loss": 0.3881, "step": 14761 }, { "epoch": 0.6846938775510204, "grad_norm": 6.4863786697387695, "learning_rate": 2.300962189219645e-06, "loss": 0.3507, "step": 14762 }, { "epoch": 0.6847402597402598, "grad_norm": 16.6887149810791, "learning_rate": 2.300342710303325e-06, "loss": 0.3679, "step": 14763 }, { "epoch": 0.6847866419294991, "grad_norm": 10.439476013183594, "learning_rate": 2.2997232898736567e-06, "loss": 0.3826, "step": 14764 }, { "epoch": 0.6848330241187384, "grad_norm": 9.189319610595703, "learning_rate": 2.299103927944059e-06, "loss": 0.4429, "step": 14765 }, { "epoch": 0.6848794063079777, "grad_norm": 11.657502174377441, "learning_rate": 2.2984846245279504e-06, "loss": 0.4228, "step": 14766 }, { "epoch": 0.6849257884972171, "grad_norm": 5.259485721588135, "learning_rate": 2.297865379638749e-06, "loss": 0.3002, "step": 14767 }, { "epoch": 0.6849721706864564, "grad_norm": 9.297090530395508, "learning_rate": 2.297246193289867e-06, "loss": 0.2981, "step": 14768 }, { "epoch": 0.6850185528756957, "grad_norm": 6.424492359161377, "learning_rate": 2.296627065494721e-06, "loss": 0.3037, "step": 14769 }, { "epoch": 0.685064935064935, "grad_norm": 5.475498199462891, "learning_rate": 2.296007996266724e-06, "loss": 0.2549, "step": 14770 }, { "epoch": 0.6851113172541744, "grad_norm": 8.11726188659668, "learning_rate": 2.295388985619287e-06, "loss": 0.3285, "step": 14771 }, { "epoch": 0.6851576994434138, "grad_norm": 10.57805347442627, "learning_rate": 2.2947700335658206e-06, "loss": 0.4311, "step": 14772 }, { "epoch": 0.685204081632653, "grad_norm": 6.611851215362549, "learning_rate": 2.2941511401197376e-06, "loss": 0.2809, "step": 14773 }, { "epoch": 0.6852504638218924, "grad_norm": 14.935501098632812, "learning_rate": 2.2935323052944404e-06, "loss": 0.6698, "step": 14774 }, { "epoch": 0.6852968460111317, "grad_norm": 5.657480239868164, "learning_rate": 2.2929135291033384e-06, "loss": 0.294, "step": 14775 }, { "epoch": 0.6853432282003711, "grad_norm": 9.5563325881958, "learning_rate": 2.292294811559837e-06, "loss": 0.3258, "step": 14776 }, { "epoch": 0.6853896103896104, "grad_norm": 4.519175052642822, "learning_rate": 2.29167615267734e-06, "loss": 0.2766, "step": 14777 }, { "epoch": 0.6854359925788497, "grad_norm": 4.726627349853516, "learning_rate": 2.291057552469253e-06, "loss": 0.2223, "step": 14778 }, { "epoch": 0.685482374768089, "grad_norm": 5.822312355041504, "learning_rate": 2.2904390109489733e-06, "loss": 0.3611, "step": 14779 }, { "epoch": 0.6855287569573284, "grad_norm": 4.962779998779297, "learning_rate": 2.2898205281299035e-06, "loss": 0.2604, "step": 14780 }, { "epoch": 0.6855751391465678, "grad_norm": 6.573885440826416, "learning_rate": 2.289202104025442e-06, "loss": 0.3703, "step": 14781 }, { "epoch": 0.685621521335807, "grad_norm": 4.152446746826172, "learning_rate": 2.288583738648988e-06, "loss": 0.2859, "step": 14782 }, { "epoch": 0.6856679035250464, "grad_norm": 8.158768653869629, "learning_rate": 2.287965432013936e-06, "loss": 0.3344, "step": 14783 }, { "epoch": 0.6857142857142857, "grad_norm": 10.668242454528809, "learning_rate": 2.2873471841336853e-06, "loss": 0.3309, "step": 14784 }, { "epoch": 0.6857606679035251, "grad_norm": 6.427052021026611, "learning_rate": 2.2867289950216246e-06, "loss": 0.2315, "step": 14785 }, { "epoch": 0.6858070500927643, "grad_norm": 4.883123874664307, "learning_rate": 2.2861108646911497e-06, "loss": 0.2472, "step": 14786 }, { "epoch": 0.6858534322820037, "grad_norm": 9.464883804321289, "learning_rate": 2.2854927931556508e-06, "loss": 0.4153, "step": 14787 }, { "epoch": 0.685899814471243, "grad_norm": 8.393285751342773, "learning_rate": 2.2848747804285194e-06, "loss": 0.3466, "step": 14788 }, { "epoch": 0.6859461966604824, "grad_norm": 18.820758819580078, "learning_rate": 2.284256826523145e-06, "loss": 0.509, "step": 14789 }, { "epoch": 0.6859925788497218, "grad_norm": 6.6916279792785645, "learning_rate": 2.2836389314529127e-06, "loss": 0.3056, "step": 14790 }, { "epoch": 0.686038961038961, "grad_norm": 5.986746311187744, "learning_rate": 2.28302109523121e-06, "loss": 0.2758, "step": 14791 }, { "epoch": 0.6860853432282004, "grad_norm": 10.153148651123047, "learning_rate": 2.282403317871422e-06, "loss": 0.3512, "step": 14792 }, { "epoch": 0.6861317254174397, "grad_norm": 7.4493608474731445, "learning_rate": 2.281785599386934e-06, "loss": 0.3606, "step": 14793 }, { "epoch": 0.6861781076066791, "grad_norm": 6.498610973358154, "learning_rate": 2.2811679397911255e-06, "loss": 0.3025, "step": 14794 }, { "epoch": 0.6862244897959183, "grad_norm": 6.946633338928223, "learning_rate": 2.2805503390973827e-06, "loss": 0.3399, "step": 14795 }, { "epoch": 0.6862708719851577, "grad_norm": 11.513962745666504, "learning_rate": 2.27993279731908e-06, "loss": 0.3959, "step": 14796 }, { "epoch": 0.686317254174397, "grad_norm": 8.896835327148438, "learning_rate": 2.2793153144695982e-06, "loss": 0.4233, "step": 14797 }, { "epoch": 0.6863636363636364, "grad_norm": 7.613856315612793, "learning_rate": 2.278697890562316e-06, "loss": 0.3082, "step": 14798 }, { "epoch": 0.6864100185528756, "grad_norm": 4.427759647369385, "learning_rate": 2.278080525610608e-06, "loss": 0.295, "step": 14799 }, { "epoch": 0.686456400742115, "grad_norm": 4.6392822265625, "learning_rate": 2.2774632196278525e-06, "loss": 0.2098, "step": 14800 }, { "epoch": 0.6865027829313544, "grad_norm": 4.925384998321533, "learning_rate": 2.276845972627417e-06, "loss": 0.3276, "step": 14801 }, { "epoch": 0.6865491651205937, "grad_norm": 10.689565658569336, "learning_rate": 2.276228784622678e-06, "loss": 0.3673, "step": 14802 }, { "epoch": 0.6865955473098331, "grad_norm": 9.729095458984375, "learning_rate": 2.2756116556270063e-06, "loss": 0.4252, "step": 14803 }, { "epoch": 0.6866419294990723, "grad_norm": 10.229727745056152, "learning_rate": 2.2749945856537705e-06, "loss": 0.3258, "step": 14804 }, { "epoch": 0.6866883116883117, "grad_norm": 4.279293537139893, "learning_rate": 2.27437757471634e-06, "loss": 0.2996, "step": 14805 }, { "epoch": 0.686734693877551, "grad_norm": 7.922767639160156, "learning_rate": 2.2737606228280834e-06, "loss": 0.3269, "step": 14806 }, { "epoch": 0.6867810760667904, "grad_norm": 5.305405616760254, "learning_rate": 2.2731437300023627e-06, "loss": 0.358, "step": 14807 }, { "epoch": 0.6868274582560296, "grad_norm": 6.027668476104736, "learning_rate": 2.2725268962525454e-06, "loss": 0.2855, "step": 14808 }, { "epoch": 0.686873840445269, "grad_norm": 3.6180872917175293, "learning_rate": 2.2719101215919943e-06, "loss": 0.1362, "step": 14809 }, { "epoch": 0.6869202226345084, "grad_norm": 8.66897201538086, "learning_rate": 2.2712934060340712e-06, "loss": 0.3826, "step": 14810 }, { "epoch": 0.6869666048237477, "grad_norm": 6.864563941955566, "learning_rate": 2.2706767495921395e-06, "loss": 0.2957, "step": 14811 }, { "epoch": 0.687012987012987, "grad_norm": 12.558623313903809, "learning_rate": 2.270060152279554e-06, "loss": 0.4998, "step": 14812 }, { "epoch": 0.6870593692022263, "grad_norm": 5.41527795791626, "learning_rate": 2.269443614109676e-06, "loss": 0.3198, "step": 14813 }, { "epoch": 0.6871057513914657, "grad_norm": 11.851675987243652, "learning_rate": 2.2688271350958613e-06, "loss": 0.4189, "step": 14814 }, { "epoch": 0.687152133580705, "grad_norm": 6.5584025382995605, "learning_rate": 2.2682107152514665e-06, "loss": 0.3812, "step": 14815 }, { "epoch": 0.6871985157699444, "grad_norm": 7.692390441894531, "learning_rate": 2.2675943545898455e-06, "loss": 0.3436, "step": 14816 }, { "epoch": 0.6872448979591836, "grad_norm": 5.092217445373535, "learning_rate": 2.2669780531243533e-06, "loss": 0.2818, "step": 14817 }, { "epoch": 0.687291280148423, "grad_norm": 8.591890335083008, "learning_rate": 2.266361810868339e-06, "loss": 0.3705, "step": 14818 }, { "epoch": 0.6873376623376624, "grad_norm": 4.928102970123291, "learning_rate": 2.2657456278351535e-06, "loss": 0.3437, "step": 14819 }, { "epoch": 0.6873840445269017, "grad_norm": 9.71430492401123, "learning_rate": 2.265129504038147e-06, "loss": 0.4449, "step": 14820 }, { "epoch": 0.687430426716141, "grad_norm": 4.416027069091797, "learning_rate": 2.264513439490668e-06, "loss": 0.2984, "step": 14821 }, { "epoch": 0.6874768089053803, "grad_norm": 11.389745712280273, "learning_rate": 2.263897434206063e-06, "loss": 0.4395, "step": 14822 }, { "epoch": 0.6875231910946197, "grad_norm": 6.4402594566345215, "learning_rate": 2.263281488197676e-06, "loss": 0.2676, "step": 14823 }, { "epoch": 0.687569573283859, "grad_norm": 5.160314559936523, "learning_rate": 2.262665601478852e-06, "loss": 0.2836, "step": 14824 }, { "epoch": 0.6876159554730983, "grad_norm": 7.128627300262451, "learning_rate": 2.2620497740629337e-06, "loss": 0.2809, "step": 14825 }, { "epoch": 0.6876623376623376, "grad_norm": 5.832573413848877, "learning_rate": 2.2614340059632633e-06, "loss": 0.3127, "step": 14826 }, { "epoch": 0.687708719851577, "grad_norm": 8.987319946289062, "learning_rate": 2.26081829719318e-06, "loss": 0.4055, "step": 14827 }, { "epoch": 0.6877551020408164, "grad_norm": 8.082541465759277, "learning_rate": 2.260202647766026e-06, "loss": 0.324, "step": 14828 }, { "epoch": 0.6878014842300556, "grad_norm": 6.834960460662842, "learning_rate": 2.2595870576951353e-06, "loss": 0.3415, "step": 14829 }, { "epoch": 0.687847866419295, "grad_norm": 6.56669282913208, "learning_rate": 2.2589715269938447e-06, "loss": 0.4384, "step": 14830 }, { "epoch": 0.6878942486085343, "grad_norm": 4.622048854827881, "learning_rate": 2.2583560556754903e-06, "loss": 0.3342, "step": 14831 }, { "epoch": 0.6879406307977737, "grad_norm": 5.610946178436279, "learning_rate": 2.2577406437534055e-06, "loss": 0.3731, "step": 14832 }, { "epoch": 0.687987012987013, "grad_norm": 11.963418960571289, "learning_rate": 2.2571252912409257e-06, "loss": 0.4235, "step": 14833 }, { "epoch": 0.6880333951762523, "grad_norm": 4.20382833480835, "learning_rate": 2.256509998151378e-06, "loss": 0.3223, "step": 14834 }, { "epoch": 0.6880797773654916, "grad_norm": 8.787505149841309, "learning_rate": 2.255894764498094e-06, "loss": 0.3081, "step": 14835 }, { "epoch": 0.688126159554731, "grad_norm": 6.1234869956970215, "learning_rate": 2.2552795902944025e-06, "loss": 0.2753, "step": 14836 }, { "epoch": 0.6881725417439704, "grad_norm": 6.477189540863037, "learning_rate": 2.2546644755536307e-06, "loss": 0.3259, "step": 14837 }, { "epoch": 0.6882189239332096, "grad_norm": 5.268805980682373, "learning_rate": 2.254049420289105e-06, "loss": 0.3465, "step": 14838 }, { "epoch": 0.688265306122449, "grad_norm": 5.926504611968994, "learning_rate": 2.2534344245141526e-06, "loss": 0.3374, "step": 14839 }, { "epoch": 0.6883116883116883, "grad_norm": 5.540092468261719, "learning_rate": 2.252819488242093e-06, "loss": 0.3395, "step": 14840 }, { "epoch": 0.6883580705009277, "grad_norm": 8.36345100402832, "learning_rate": 2.2522046114862495e-06, "loss": 0.39, "step": 14841 }, { "epoch": 0.6884044526901669, "grad_norm": 19.574342727661133, "learning_rate": 2.251589794259944e-06, "loss": 0.4432, "step": 14842 }, { "epoch": 0.6884508348794063, "grad_norm": 7.981081962585449, "learning_rate": 2.2509750365764963e-06, "loss": 0.3417, "step": 14843 }, { "epoch": 0.6884972170686456, "grad_norm": 7.2908101081848145, "learning_rate": 2.250360338449226e-06, "loss": 0.2811, "step": 14844 }, { "epoch": 0.688543599257885, "grad_norm": 9.263284683227539, "learning_rate": 2.2497456998914472e-06, "loss": 0.3375, "step": 14845 }, { "epoch": 0.6885899814471244, "grad_norm": 5.6848344802856445, "learning_rate": 2.2491311209164774e-06, "loss": 0.3802, "step": 14846 }, { "epoch": 0.6886363636363636, "grad_norm": 6.6665873527526855, "learning_rate": 2.24851660153763e-06, "loss": 0.3435, "step": 14847 }, { "epoch": 0.688682745825603, "grad_norm": 9.221139907836914, "learning_rate": 2.24790214176822e-06, "loss": 0.4173, "step": 14848 }, { "epoch": 0.6887291280148423, "grad_norm": 14.44981861114502, "learning_rate": 2.24728774162156e-06, "loss": 0.5376, "step": 14849 }, { "epoch": 0.6887755102040817, "grad_norm": 6.623838424682617, "learning_rate": 2.2466734011109575e-06, "loss": 0.2978, "step": 14850 }, { "epoch": 0.6888218923933209, "grad_norm": 11.353865623474121, "learning_rate": 2.2460591202497234e-06, "loss": 0.3661, "step": 14851 }, { "epoch": 0.6888682745825603, "grad_norm": 13.82660961151123, "learning_rate": 2.2454448990511655e-06, "loss": 0.4778, "step": 14852 }, { "epoch": 0.6889146567717996, "grad_norm": 6.835427761077881, "learning_rate": 2.2448307375285923e-06, "loss": 0.3207, "step": 14853 }, { "epoch": 0.688961038961039, "grad_norm": 8.314531326293945, "learning_rate": 2.2442166356953064e-06, "loss": 0.3989, "step": 14854 }, { "epoch": 0.6890074211502782, "grad_norm": 7.795554161071777, "learning_rate": 2.2436025935646165e-06, "loss": 0.3577, "step": 14855 }, { "epoch": 0.6890538033395176, "grad_norm": 7.248748779296875, "learning_rate": 2.24298861114982e-06, "loss": 0.3321, "step": 14856 }, { "epoch": 0.689100185528757, "grad_norm": 7.699543476104736, "learning_rate": 2.2423746884642215e-06, "loss": 0.2893, "step": 14857 }, { "epoch": 0.6891465677179963, "grad_norm": 12.405604362487793, "learning_rate": 2.241760825521121e-06, "loss": 0.4549, "step": 14858 }, { "epoch": 0.6891929499072357, "grad_norm": 11.635124206542969, "learning_rate": 2.241147022333817e-06, "loss": 0.3792, "step": 14859 }, { "epoch": 0.6892393320964749, "grad_norm": 3.7639124393463135, "learning_rate": 2.2405332789156098e-06, "loss": 0.3129, "step": 14860 }, { "epoch": 0.6892857142857143, "grad_norm": 7.672171115875244, "learning_rate": 2.2399195952797915e-06, "loss": 0.3319, "step": 14861 }, { "epoch": 0.6893320964749536, "grad_norm": 9.195556640625, "learning_rate": 2.2393059714396597e-06, "loss": 0.3515, "step": 14862 }, { "epoch": 0.689378478664193, "grad_norm": 6.759893417358398, "learning_rate": 2.2386924074085074e-06, "loss": 0.3487, "step": 14863 }, { "epoch": 0.6894248608534322, "grad_norm": 10.68297290802002, "learning_rate": 2.238078903199628e-06, "loss": 0.3548, "step": 14864 }, { "epoch": 0.6894712430426716, "grad_norm": 6.0675787925720215, "learning_rate": 2.2374654588263145e-06, "loss": 0.2681, "step": 14865 }, { "epoch": 0.689517625231911, "grad_norm": 14.538558006286621, "learning_rate": 2.236852074301852e-06, "loss": 0.3185, "step": 14866 }, { "epoch": 0.6895640074211503, "grad_norm": 7.036942481994629, "learning_rate": 2.2362387496395333e-06, "loss": 0.1945, "step": 14867 }, { "epoch": 0.6896103896103896, "grad_norm": 7.006154537200928, "learning_rate": 2.235625484852643e-06, "loss": 0.2592, "step": 14868 }, { "epoch": 0.6896567717996289, "grad_norm": 7.35872745513916, "learning_rate": 2.2350122799544693e-06, "loss": 0.2772, "step": 14869 }, { "epoch": 0.6897031539888683, "grad_norm": 4.145562648773193, "learning_rate": 2.234399134958298e-06, "loss": 0.2432, "step": 14870 }, { "epoch": 0.6897495361781076, "grad_norm": 6.273867607116699, "learning_rate": 2.233786049877409e-06, "loss": 0.3489, "step": 14871 }, { "epoch": 0.689795918367347, "grad_norm": 13.407095909118652, "learning_rate": 2.2331730247250857e-06, "loss": 0.3709, "step": 14872 }, { "epoch": 0.6898423005565862, "grad_norm": 5.82363748550415, "learning_rate": 2.2325600595146094e-06, "loss": 0.2748, "step": 14873 }, { "epoch": 0.6898886827458256, "grad_norm": 7.685760974884033, "learning_rate": 2.23194715425926e-06, "loss": 0.2691, "step": 14874 }, { "epoch": 0.689935064935065, "grad_norm": 7.420376300811768, "learning_rate": 2.2313343089723177e-06, "loss": 0.3291, "step": 14875 }, { "epoch": 0.6899814471243043, "grad_norm": 6.817675590515137, "learning_rate": 2.2307215236670553e-06, "loss": 0.2988, "step": 14876 }, { "epoch": 0.6900278293135436, "grad_norm": 6.449349880218506, "learning_rate": 2.23010879835675e-06, "loss": 0.3524, "step": 14877 }, { "epoch": 0.6900742115027829, "grad_norm": 6.583075046539307, "learning_rate": 2.229496133054677e-06, "loss": 0.3456, "step": 14878 }, { "epoch": 0.6901205936920223, "grad_norm": 10.560988426208496, "learning_rate": 2.228883527774109e-06, "loss": 0.4205, "step": 14879 }, { "epoch": 0.6901669758812616, "grad_norm": 4.975880146026611, "learning_rate": 2.2282709825283193e-06, "loss": 0.3139, "step": 14880 }, { "epoch": 0.6902133580705009, "grad_norm": 11.349811553955078, "learning_rate": 2.2276584973305753e-06, "loss": 0.4365, "step": 14881 }, { "epoch": 0.6902597402597402, "grad_norm": 13.90151309967041, "learning_rate": 2.2270460721941474e-06, "loss": 0.4952, "step": 14882 }, { "epoch": 0.6903061224489796, "grad_norm": 4.486303806304932, "learning_rate": 2.226433707132304e-06, "loss": 0.1796, "step": 14883 }, { "epoch": 0.690352504638219, "grad_norm": 4.084699630737305, "learning_rate": 2.225821402158311e-06, "loss": 0.2596, "step": 14884 }, { "epoch": 0.6903988868274582, "grad_norm": 5.201681137084961, "learning_rate": 2.225209157285437e-06, "loss": 0.2955, "step": 14885 }, { "epoch": 0.6904452690166976, "grad_norm": 5.64768123626709, "learning_rate": 2.22459697252694e-06, "loss": 0.2717, "step": 14886 }, { "epoch": 0.6904916512059369, "grad_norm": 7.279439449310303, "learning_rate": 2.2239848478960863e-06, "loss": 0.366, "step": 14887 }, { "epoch": 0.6905380333951763, "grad_norm": 5.793691158294678, "learning_rate": 2.223372783406137e-06, "loss": 0.3421, "step": 14888 }, { "epoch": 0.6905844155844156, "grad_norm": 10.233487129211426, "learning_rate": 2.222760779070351e-06, "loss": 0.3436, "step": 14889 }, { "epoch": 0.6906307977736549, "grad_norm": 6.788992404937744, "learning_rate": 2.2221488349019903e-06, "loss": 0.3106, "step": 14890 }, { "epoch": 0.6906771799628942, "grad_norm": 12.401358604431152, "learning_rate": 2.221536950914308e-06, "loss": 0.356, "step": 14891 }, { "epoch": 0.6907235621521336, "grad_norm": 4.589908599853516, "learning_rate": 2.2209251271205624e-06, "loss": 0.3127, "step": 14892 }, { "epoch": 0.690769944341373, "grad_norm": 7.315242290496826, "learning_rate": 2.2203133635340073e-06, "loss": 0.3365, "step": 14893 }, { "epoch": 0.6908163265306122, "grad_norm": 5.247128963470459, "learning_rate": 2.219701660167898e-06, "loss": 0.3557, "step": 14894 }, { "epoch": 0.6908627087198516, "grad_norm": 16.76534652709961, "learning_rate": 2.219090017035485e-06, "loss": 0.3906, "step": 14895 }, { "epoch": 0.6909090909090909, "grad_norm": 7.14544153213501, "learning_rate": 2.2184784341500226e-06, "loss": 0.3274, "step": 14896 }, { "epoch": 0.6909554730983303, "grad_norm": 4.991472244262695, "learning_rate": 2.2178669115247555e-06, "loss": 0.2624, "step": 14897 }, { "epoch": 0.6910018552875695, "grad_norm": 6.816911220550537, "learning_rate": 2.217255449172934e-06, "loss": 0.3826, "step": 14898 }, { "epoch": 0.6910482374768089, "grad_norm": 10.363510131835938, "learning_rate": 2.216644047107806e-06, "loss": 0.4496, "step": 14899 }, { "epoch": 0.6910946196660482, "grad_norm": 5.783421516418457, "learning_rate": 2.2160327053426168e-06, "loss": 0.319, "step": 14900 }, { "epoch": 0.6911410018552876, "grad_norm": 4.688385009765625, "learning_rate": 2.215421423890612e-06, "loss": 0.2169, "step": 14901 }, { "epoch": 0.691187384044527, "grad_norm": 4.890621662139893, "learning_rate": 2.214810202765032e-06, "loss": 0.209, "step": 14902 }, { "epoch": 0.6912337662337662, "grad_norm": 10.294697761535645, "learning_rate": 2.2141990419791194e-06, "loss": 0.396, "step": 14903 }, { "epoch": 0.6912801484230056, "grad_norm": 5.194399356842041, "learning_rate": 2.2135879415461152e-06, "loss": 0.3433, "step": 14904 }, { "epoch": 0.6913265306122449, "grad_norm": 10.642932891845703, "learning_rate": 2.212976901479259e-06, "loss": 0.3855, "step": 14905 }, { "epoch": 0.6913729128014843, "grad_norm": 10.749670028686523, "learning_rate": 2.2123659217917896e-06, "loss": 0.4988, "step": 14906 }, { "epoch": 0.6914192949907235, "grad_norm": 9.419107437133789, "learning_rate": 2.211755002496941e-06, "loss": 0.3889, "step": 14907 }, { "epoch": 0.6914656771799629, "grad_norm": 6.119392395019531, "learning_rate": 2.211144143607949e-06, "loss": 0.3109, "step": 14908 }, { "epoch": 0.6915120593692022, "grad_norm": 5.984154224395752, "learning_rate": 2.2105333451380486e-06, "loss": 0.3355, "step": 14909 }, { "epoch": 0.6915584415584416, "grad_norm": 9.056218147277832, "learning_rate": 2.209922607100472e-06, "loss": 0.252, "step": 14910 }, { "epoch": 0.6916048237476808, "grad_norm": 5.693154335021973, "learning_rate": 2.209311929508452e-06, "loss": 0.287, "step": 14911 }, { "epoch": 0.6916512059369202, "grad_norm": 6.484076023101807, "learning_rate": 2.2087013123752155e-06, "loss": 0.2909, "step": 14912 }, { "epoch": 0.6916975881261596, "grad_norm": 6.244047164916992, "learning_rate": 2.2080907557139926e-06, "loss": 0.3465, "step": 14913 }, { "epoch": 0.6917439703153989, "grad_norm": 7.785481929779053, "learning_rate": 2.207480259538011e-06, "loss": 0.5148, "step": 14914 }, { "epoch": 0.6917903525046383, "grad_norm": 5.39111852645874, "learning_rate": 2.206869823860497e-06, "loss": 0.2488, "step": 14915 }, { "epoch": 0.6918367346938775, "grad_norm": 10.209233283996582, "learning_rate": 2.2062594486946765e-06, "loss": 0.3232, "step": 14916 }, { "epoch": 0.6918831168831169, "grad_norm": 12.587681770324707, "learning_rate": 2.2056491340537695e-06, "loss": 0.4825, "step": 14917 }, { "epoch": 0.6919294990723562, "grad_norm": 5.158040523529053, "learning_rate": 2.2050388799510005e-06, "loss": 0.3716, "step": 14918 }, { "epoch": 0.6919758812615956, "grad_norm": 7.081808567047119, "learning_rate": 2.2044286863995896e-06, "loss": 0.4026, "step": 14919 }, { "epoch": 0.6920222634508348, "grad_norm": 8.0337553024292, "learning_rate": 2.203818553412757e-06, "loss": 0.4667, "step": 14920 }, { "epoch": 0.6920686456400742, "grad_norm": 4.946702480316162, "learning_rate": 2.2032084810037224e-06, "loss": 0.2149, "step": 14921 }, { "epoch": 0.6921150278293136, "grad_norm": 5.004302978515625, "learning_rate": 2.202598469185699e-06, "loss": 0.3555, "step": 14922 }, { "epoch": 0.6921614100185529, "grad_norm": 8.389008522033691, "learning_rate": 2.2019885179719047e-06, "loss": 0.3184, "step": 14923 }, { "epoch": 0.6922077922077922, "grad_norm": 9.800889015197754, "learning_rate": 2.2013786273755527e-06, "loss": 0.3509, "step": 14924 }, { "epoch": 0.6922541743970315, "grad_norm": 8.575690269470215, "learning_rate": 2.2007687974098568e-06, "loss": 0.4404, "step": 14925 }, { "epoch": 0.6923005565862709, "grad_norm": 6.962328910827637, "learning_rate": 2.200159028088031e-06, "loss": 0.3916, "step": 14926 }, { "epoch": 0.6923469387755102, "grad_norm": 7.224905490875244, "learning_rate": 2.1995493194232803e-06, "loss": 0.3156, "step": 14927 }, { "epoch": 0.6923933209647495, "grad_norm": 6.429095268249512, "learning_rate": 2.198939671428817e-06, "loss": 0.2644, "step": 14928 }, { "epoch": 0.6924397031539888, "grad_norm": 4.396102428436279, "learning_rate": 2.1983300841178486e-06, "loss": 0.268, "step": 14929 }, { "epoch": 0.6924860853432282, "grad_norm": 7.164990425109863, "learning_rate": 2.1977205575035802e-06, "loss": 0.3044, "step": 14930 }, { "epoch": 0.6925324675324676, "grad_norm": 11.255924224853516, "learning_rate": 2.197111091599219e-06, "loss": 0.3618, "step": 14931 }, { "epoch": 0.6925788497217069, "grad_norm": 11.606179237365723, "learning_rate": 2.196501686417969e-06, "loss": 0.4141, "step": 14932 }, { "epoch": 0.6926252319109462, "grad_norm": 8.323868751525879, "learning_rate": 2.1958923419730288e-06, "loss": 0.3412, "step": 14933 }, { "epoch": 0.6926716141001855, "grad_norm": 7.543543815612793, "learning_rate": 2.195283058277603e-06, "loss": 0.3197, "step": 14934 }, { "epoch": 0.6927179962894249, "grad_norm": 9.127276420593262, "learning_rate": 2.1946738353448898e-06, "loss": 0.3523, "step": 14935 }, { "epoch": 0.6927643784786642, "grad_norm": 7.138632297515869, "learning_rate": 2.1940646731880887e-06, "loss": 0.2992, "step": 14936 }, { "epoch": 0.6928107606679035, "grad_norm": 5.964010715484619, "learning_rate": 2.193455571820398e-06, "loss": 0.2207, "step": 14937 }, { "epoch": 0.6928571428571428, "grad_norm": 8.429532051086426, "learning_rate": 2.19284653125501e-06, "loss": 0.2188, "step": 14938 }, { "epoch": 0.6929035250463822, "grad_norm": 5.835363388061523, "learning_rate": 2.192237551505122e-06, "loss": 0.2954, "step": 14939 }, { "epoch": 0.6929499072356216, "grad_norm": 7.419338226318359, "learning_rate": 2.191628632583926e-06, "loss": 0.3953, "step": 14940 }, { "epoch": 0.6929962894248608, "grad_norm": 5.864649772644043, "learning_rate": 2.1910197745046146e-06, "loss": 0.3159, "step": 14941 }, { "epoch": 0.6930426716141002, "grad_norm": 9.271812438964844, "learning_rate": 2.19041097728038e-06, "loss": 0.3089, "step": 14942 }, { "epoch": 0.6930890538033395, "grad_norm": 4.908215522766113, "learning_rate": 2.189802240924408e-06, "loss": 0.2124, "step": 14943 }, { "epoch": 0.6931354359925789, "grad_norm": 5.730676651000977, "learning_rate": 2.1891935654498876e-06, "loss": 0.3186, "step": 14944 }, { "epoch": 0.6931818181818182, "grad_norm": 6.599915981292725, "learning_rate": 2.188584950870007e-06, "loss": 0.299, "step": 14945 }, { "epoch": 0.6932282003710575, "grad_norm": 6.089258670806885, "learning_rate": 2.1879763971979494e-06, "loss": 0.2785, "step": 14946 }, { "epoch": 0.6932745825602968, "grad_norm": 5.3429412841796875, "learning_rate": 2.187367904446901e-06, "loss": 0.3383, "step": 14947 }, { "epoch": 0.6933209647495362, "grad_norm": 3.7805659770965576, "learning_rate": 2.186759472630045e-06, "loss": 0.2439, "step": 14948 }, { "epoch": 0.6933673469387756, "grad_norm": 16.038328170776367, "learning_rate": 2.1861511017605592e-06, "loss": 0.4609, "step": 14949 }, { "epoch": 0.6934137291280148, "grad_norm": 7.612983703613281, "learning_rate": 2.185542791851626e-06, "loss": 0.3249, "step": 14950 }, { "epoch": 0.6934601113172542, "grad_norm": 15.920195579528809, "learning_rate": 2.1849345429164233e-06, "loss": 0.4872, "step": 14951 }, { "epoch": 0.6935064935064935, "grad_norm": 9.820034980773926, "learning_rate": 2.1843263549681287e-06, "loss": 0.3413, "step": 14952 }, { "epoch": 0.6935528756957329, "grad_norm": 4.917206764221191, "learning_rate": 2.183718228019921e-06, "loss": 0.3422, "step": 14953 }, { "epoch": 0.6935992578849721, "grad_norm": 12.339792251586914, "learning_rate": 2.1831101620849702e-06, "loss": 0.4923, "step": 14954 }, { "epoch": 0.6936456400742115, "grad_norm": 6.041735649108887, "learning_rate": 2.182502157176452e-06, "loss": 0.2141, "step": 14955 }, { "epoch": 0.6936920222634508, "grad_norm": 4.661062717437744, "learning_rate": 2.181894213307539e-06, "loss": 0.2959, "step": 14956 }, { "epoch": 0.6937384044526902, "grad_norm": 5.721469402313232, "learning_rate": 2.1812863304914006e-06, "loss": 0.2973, "step": 14957 }, { "epoch": 0.6937847866419296, "grad_norm": 5.714017391204834, "learning_rate": 2.180678508741208e-06, "loss": 0.3043, "step": 14958 }, { "epoch": 0.6938311688311688, "grad_norm": 8.766088485717773, "learning_rate": 2.18007074807013e-06, "loss": 0.3102, "step": 14959 }, { "epoch": 0.6938775510204082, "grad_norm": 5.33111572265625, "learning_rate": 2.1794630484913298e-06, "loss": 0.3255, "step": 14960 }, { "epoch": 0.6939239332096475, "grad_norm": 7.2147040367126465, "learning_rate": 2.1788554100179747e-06, "loss": 0.3534, "step": 14961 }, { "epoch": 0.6939703153988869, "grad_norm": 9.87073802947998, "learning_rate": 2.1782478326632302e-06, "loss": 0.431, "step": 14962 }, { "epoch": 0.6940166975881261, "grad_norm": 17.433881759643555, "learning_rate": 2.177640316440257e-06, "loss": 0.2808, "step": 14963 }, { "epoch": 0.6940630797773655, "grad_norm": 5.659150123596191, "learning_rate": 2.1770328613622207e-06, "loss": 0.1988, "step": 14964 }, { "epoch": 0.6941094619666048, "grad_norm": 8.137856483459473, "learning_rate": 2.176425467442276e-06, "loss": 0.3142, "step": 14965 }, { "epoch": 0.6941558441558442, "grad_norm": 5.058468818664551, "learning_rate": 2.1758181346935835e-06, "loss": 0.2539, "step": 14966 }, { "epoch": 0.6942022263450834, "grad_norm": 16.271045684814453, "learning_rate": 2.175210863129302e-06, "loss": 0.6001, "step": 14967 }, { "epoch": 0.6942486085343228, "grad_norm": 5.457371711730957, "learning_rate": 2.174603652762588e-06, "loss": 0.309, "step": 14968 }, { "epoch": 0.6942949907235622, "grad_norm": 5.248530864715576, "learning_rate": 2.173996503606595e-06, "loss": 0.2478, "step": 14969 }, { "epoch": 0.6943413729128015, "grad_norm": 5.711875915527344, "learning_rate": 2.1733894156744793e-06, "loss": 0.3363, "step": 14970 }, { "epoch": 0.6943877551020409, "grad_norm": 3.7318050861358643, "learning_rate": 2.1727823889793893e-06, "loss": 0.2872, "step": 14971 }, { "epoch": 0.6944341372912801, "grad_norm": 5.983497142791748, "learning_rate": 2.1721754235344774e-06, "loss": 0.3324, "step": 14972 }, { "epoch": 0.6944805194805195, "grad_norm": 11.061359405517578, "learning_rate": 2.171568519352893e-06, "loss": 0.2928, "step": 14973 }, { "epoch": 0.6945269016697588, "grad_norm": 13.782073020935059, "learning_rate": 2.170961676447785e-06, "loss": 0.441, "step": 14974 }, { "epoch": 0.6945732838589982, "grad_norm": 4.5023980140686035, "learning_rate": 2.1703548948323023e-06, "loss": 0.2479, "step": 14975 }, { "epoch": 0.6946196660482374, "grad_norm": 7.521969795227051, "learning_rate": 2.1697481745195863e-06, "loss": 0.3094, "step": 14976 }, { "epoch": 0.6946660482374768, "grad_norm": 10.820197105407715, "learning_rate": 2.1691415155227837e-06, "loss": 0.2402, "step": 14977 }, { "epoch": 0.6947124304267162, "grad_norm": 6.639062404632568, "learning_rate": 2.168534917855036e-06, "loss": 0.322, "step": 14978 }, { "epoch": 0.6947588126159555, "grad_norm": 4.986992835998535, "learning_rate": 2.167928381529487e-06, "loss": 0.3021, "step": 14979 }, { "epoch": 0.6948051948051948, "grad_norm": 4.906428813934326, "learning_rate": 2.1673219065592756e-06, "loss": 0.3095, "step": 14980 }, { "epoch": 0.6948515769944341, "grad_norm": 8.941344261169434, "learning_rate": 2.166715492957542e-06, "loss": 0.2435, "step": 14981 }, { "epoch": 0.6948979591836735, "grad_norm": 11.008983612060547, "learning_rate": 2.166109140737422e-06, "loss": 0.2496, "step": 14982 }, { "epoch": 0.6949443413729128, "grad_norm": 10.989859580993652, "learning_rate": 2.1655028499120524e-06, "loss": 0.4028, "step": 14983 }, { "epoch": 0.6949907235621521, "grad_norm": 6.3651204109191895, "learning_rate": 2.164896620494569e-06, "loss": 0.3407, "step": 14984 }, { "epoch": 0.6950371057513914, "grad_norm": 12.308645248413086, "learning_rate": 2.164290452498104e-06, "loss": 0.3503, "step": 14985 }, { "epoch": 0.6950834879406308, "grad_norm": 5.922626495361328, "learning_rate": 2.163684345935793e-06, "loss": 0.3785, "step": 14986 }, { "epoch": 0.6951298701298702, "grad_norm": 7.764420986175537, "learning_rate": 2.163078300820763e-06, "loss": 0.3208, "step": 14987 }, { "epoch": 0.6951762523191095, "grad_norm": 6.2411980628967285, "learning_rate": 2.162472317166145e-06, "loss": 0.3489, "step": 14988 }, { "epoch": 0.6952226345083488, "grad_norm": 5.608913898468018, "learning_rate": 2.161866394985068e-06, "loss": 0.1967, "step": 14989 }, { "epoch": 0.6952690166975881, "grad_norm": 4.021618366241455, "learning_rate": 2.161260534290658e-06, "loss": 0.3135, "step": 14990 }, { "epoch": 0.6953153988868275, "grad_norm": 7.7684197425842285, "learning_rate": 2.160654735096041e-06, "loss": 0.2805, "step": 14991 }, { "epoch": 0.6953617810760668, "grad_norm": 7.03948974609375, "learning_rate": 2.160048997414344e-06, "loss": 0.3227, "step": 14992 }, { "epoch": 0.6954081632653061, "grad_norm": 8.318486213684082, "learning_rate": 2.159443321258686e-06, "loss": 0.2966, "step": 14993 }, { "epoch": 0.6954545454545454, "grad_norm": 8.049596786499023, "learning_rate": 2.1588377066421896e-06, "loss": 0.3668, "step": 14994 }, { "epoch": 0.6955009276437848, "grad_norm": 5.099193096160889, "learning_rate": 2.158232153577976e-06, "loss": 0.3795, "step": 14995 }, { "epoch": 0.6955473098330242, "grad_norm": 5.43348503112793, "learning_rate": 2.1576266620791635e-06, "loss": 0.3024, "step": 14996 }, { "epoch": 0.6955936920222634, "grad_norm": 6.778983116149902, "learning_rate": 2.157021232158873e-06, "loss": 0.2381, "step": 14997 }, { "epoch": 0.6956400742115028, "grad_norm": 10.953625679016113, "learning_rate": 2.1564158638302154e-06, "loss": 0.3622, "step": 14998 }, { "epoch": 0.6956864564007421, "grad_norm": 7.367138862609863, "learning_rate": 2.155810557106308e-06, "loss": 0.3818, "step": 14999 }, { "epoch": 0.6957328385899815, "grad_norm": 12.111078262329102, "learning_rate": 2.1552053120002655e-06, "loss": 0.4863, "step": 15000 }, { "epoch": 0.6957792207792208, "grad_norm": 5.843446731567383, "learning_rate": 2.1546001285251987e-06, "loss": 0.3193, "step": 15001 }, { "epoch": 0.6958256029684601, "grad_norm": 6.406569957733154, "learning_rate": 2.15399500669422e-06, "loss": 0.3054, "step": 15002 }, { "epoch": 0.6958719851576994, "grad_norm": 8.511725425720215, "learning_rate": 2.15338994652044e-06, "loss": 0.3002, "step": 15003 }, { "epoch": 0.6959183673469388, "grad_norm": 10.271242141723633, "learning_rate": 2.152784948016963e-06, "loss": 0.414, "step": 15004 }, { "epoch": 0.6959647495361782, "grad_norm": 12.51993179321289, "learning_rate": 2.1521800111968995e-06, "loss": 0.3853, "step": 15005 }, { "epoch": 0.6960111317254174, "grad_norm": 7.745620250701904, "learning_rate": 2.151575136073353e-06, "loss": 0.2892, "step": 15006 }, { "epoch": 0.6960575139146568, "grad_norm": 14.8989839553833, "learning_rate": 2.150970322659429e-06, "loss": 0.3948, "step": 15007 }, { "epoch": 0.6961038961038961, "grad_norm": 13.327798843383789, "learning_rate": 2.150365570968232e-06, "loss": 0.4496, "step": 15008 }, { "epoch": 0.6961502782931355, "grad_norm": 5.151654243469238, "learning_rate": 2.1497608810128605e-06, "loss": 0.2825, "step": 15009 }, { "epoch": 0.6961966604823747, "grad_norm": 5.885918617248535, "learning_rate": 2.149156252806416e-06, "loss": 0.3622, "step": 15010 }, { "epoch": 0.6962430426716141, "grad_norm": 7.168769836425781, "learning_rate": 2.1485516863619972e-06, "loss": 0.2681, "step": 15011 }, { "epoch": 0.6962894248608534, "grad_norm": 6.611225605010986, "learning_rate": 2.147947181692703e-06, "loss": 0.3217, "step": 15012 }, { "epoch": 0.6963358070500928, "grad_norm": 6.066380977630615, "learning_rate": 2.1473427388116282e-06, "loss": 0.3349, "step": 15013 }, { "epoch": 0.6963821892393321, "grad_norm": 8.838888168334961, "learning_rate": 2.1467383577318698e-06, "loss": 0.3244, "step": 15014 }, { "epoch": 0.6964285714285714, "grad_norm": 6.8742780685424805, "learning_rate": 2.1461340384665187e-06, "loss": 0.3854, "step": 15015 }, { "epoch": 0.6964749536178108, "grad_norm": 7.969156742095947, "learning_rate": 2.145529781028668e-06, "loss": 0.2403, "step": 15016 }, { "epoch": 0.6965213358070501, "grad_norm": 5.737495422363281, "learning_rate": 2.1449255854314095e-06, "loss": 0.3816, "step": 15017 }, { "epoch": 0.6965677179962895, "grad_norm": 8.670658111572266, "learning_rate": 2.1443214516878318e-06, "loss": 0.3404, "step": 15018 }, { "epoch": 0.6966141001855287, "grad_norm": 6.625118732452393, "learning_rate": 2.1437173798110255e-06, "loss": 0.2989, "step": 15019 }, { "epoch": 0.6966604823747681, "grad_norm": 9.613431930541992, "learning_rate": 2.1431133698140744e-06, "loss": 0.4228, "step": 15020 }, { "epoch": 0.6967068645640074, "grad_norm": 7.034599304199219, "learning_rate": 2.142509421710065e-06, "loss": 0.324, "step": 15021 }, { "epoch": 0.6967532467532468, "grad_norm": 6.633779048919678, "learning_rate": 2.1419055355120814e-06, "loss": 0.2969, "step": 15022 }, { "epoch": 0.696799628942486, "grad_norm": 5.901429653167725, "learning_rate": 2.1413017112332073e-06, "loss": 0.2741, "step": 15023 }, { "epoch": 0.6968460111317254, "grad_norm": 7.618439197540283, "learning_rate": 2.1406979488865253e-06, "loss": 0.2474, "step": 15024 }, { "epoch": 0.6968923933209648, "grad_norm": 6.702054500579834, "learning_rate": 2.1400942484851127e-06, "loss": 0.2912, "step": 15025 }, { "epoch": 0.6969387755102041, "grad_norm": 7.231140613555908, "learning_rate": 2.1394906100420497e-06, "loss": 0.318, "step": 15026 }, { "epoch": 0.6969851576994434, "grad_norm": 4.891744136810303, "learning_rate": 2.138887033570414e-06, "loss": 0.336, "step": 15027 }, { "epoch": 0.6970315398886827, "grad_norm": 4.426613807678223, "learning_rate": 2.138283519083281e-06, "loss": 0.3064, "step": 15028 }, { "epoch": 0.6970779220779221, "grad_norm": 10.5264253616333, "learning_rate": 2.137680066593727e-06, "loss": 0.3439, "step": 15029 }, { "epoch": 0.6971243042671614, "grad_norm": 4.840390682220459, "learning_rate": 2.137076676114826e-06, "loss": 0.2851, "step": 15030 }, { "epoch": 0.6971706864564008, "grad_norm": 4.169025897979736, "learning_rate": 2.1364733476596466e-06, "loss": 0.2938, "step": 15031 }, { "epoch": 0.69721706864564, "grad_norm": 5.750879764556885, "learning_rate": 2.1358700812412625e-06, "loss": 0.2291, "step": 15032 }, { "epoch": 0.6972634508348794, "grad_norm": 7.541476726531982, "learning_rate": 2.135266876872742e-06, "loss": 0.4119, "step": 15033 }, { "epoch": 0.6973098330241188, "grad_norm": 13.476365089416504, "learning_rate": 2.134663734567154e-06, "loss": 0.3031, "step": 15034 }, { "epoch": 0.6973562152133581, "grad_norm": 9.72947883605957, "learning_rate": 2.134060654337566e-06, "loss": 0.3772, "step": 15035 }, { "epoch": 0.6974025974025974, "grad_norm": 5.345025539398193, "learning_rate": 2.133457636197041e-06, "loss": 0.3066, "step": 15036 }, { "epoch": 0.6974489795918367, "grad_norm": 7.317227840423584, "learning_rate": 2.132854680158644e-06, "loss": 0.3447, "step": 15037 }, { "epoch": 0.6974953617810761, "grad_norm": 14.687474250793457, "learning_rate": 2.1322517862354384e-06, "loss": 0.3984, "step": 15038 }, { "epoch": 0.6975417439703154, "grad_norm": 9.160552024841309, "learning_rate": 2.1316489544404855e-06, "loss": 0.3934, "step": 15039 }, { "epoch": 0.6975881261595547, "grad_norm": 6.250207901000977, "learning_rate": 2.1310461847868447e-06, "loss": 0.4415, "step": 15040 }, { "epoch": 0.697634508348794, "grad_norm": 9.547982215881348, "learning_rate": 2.130443477287577e-06, "loss": 0.2337, "step": 15041 }, { "epoch": 0.6976808905380334, "grad_norm": 8.680249214172363, "learning_rate": 2.1298408319557364e-06, "loss": 0.4085, "step": 15042 }, { "epoch": 0.6977272727272728, "grad_norm": 5.9162421226501465, "learning_rate": 2.12923824880438e-06, "loss": 0.3389, "step": 15043 }, { "epoch": 0.6977736549165121, "grad_norm": 7.983931541442871, "learning_rate": 2.1286357278465624e-06, "loss": 0.3004, "step": 15044 }, { "epoch": 0.6978200371057514, "grad_norm": 4.831305503845215, "learning_rate": 2.1280332690953375e-06, "loss": 0.3352, "step": 15045 }, { "epoch": 0.6978664192949907, "grad_norm": 8.27353572845459, "learning_rate": 2.1274308725637592e-06, "loss": 0.3269, "step": 15046 }, { "epoch": 0.6979128014842301, "grad_norm": 18.386323928833008, "learning_rate": 2.126828538264874e-06, "loss": 0.3789, "step": 15047 }, { "epoch": 0.6979591836734694, "grad_norm": 9.443363189697266, "learning_rate": 2.1262262662117327e-06, "loss": 0.3567, "step": 15048 }, { "epoch": 0.6980055658627087, "grad_norm": 13.299994468688965, "learning_rate": 2.125624056417384e-06, "loss": 0.3656, "step": 15049 }, { "epoch": 0.698051948051948, "grad_norm": 5.700273036956787, "learning_rate": 2.1250219088948736e-06, "loss": 0.3094, "step": 15050 }, { "epoch": 0.6980983302411874, "grad_norm": 7.98371696472168, "learning_rate": 2.1244198236572475e-06, "loss": 0.3543, "step": 15051 }, { "epoch": 0.6981447124304268, "grad_norm": 6.697298526763916, "learning_rate": 2.123817800717551e-06, "loss": 0.2446, "step": 15052 }, { "epoch": 0.698191094619666, "grad_norm": 16.783010482788086, "learning_rate": 2.123215840088823e-06, "loss": 0.3599, "step": 15053 }, { "epoch": 0.6982374768089054, "grad_norm": 10.34475326538086, "learning_rate": 2.1226139417841063e-06, "loss": 0.3814, "step": 15054 }, { "epoch": 0.6982838589981447, "grad_norm": 5.792728900909424, "learning_rate": 2.122012105816441e-06, "loss": 0.3455, "step": 15055 }, { "epoch": 0.6983302411873841, "grad_norm": 9.55601692199707, "learning_rate": 2.1214103321988655e-06, "loss": 0.2959, "step": 15056 }, { "epoch": 0.6983766233766234, "grad_norm": 7.184834957122803, "learning_rate": 2.120808620944419e-06, "loss": 0.4086, "step": 15057 }, { "epoch": 0.6984230055658627, "grad_norm": 8.755029678344727, "learning_rate": 2.120206972066133e-06, "loss": 0.4811, "step": 15058 }, { "epoch": 0.698469387755102, "grad_norm": 10.110706329345703, "learning_rate": 2.1196053855770437e-06, "loss": 0.3463, "step": 15059 }, { "epoch": 0.6985157699443414, "grad_norm": 7.136960983276367, "learning_rate": 2.1190038614901846e-06, "loss": 0.3699, "step": 15060 }, { "epoch": 0.6985621521335807, "grad_norm": 4.464425086975098, "learning_rate": 2.1184023998185877e-06, "loss": 0.2796, "step": 15061 }, { "epoch": 0.69860853432282, "grad_norm": 11.466463088989258, "learning_rate": 2.1178010005752824e-06, "loss": 0.3929, "step": 15062 }, { "epoch": 0.6986549165120594, "grad_norm": 7.48895788192749, "learning_rate": 2.1171996637733e-06, "loss": 0.2579, "step": 15063 }, { "epoch": 0.6987012987012987, "grad_norm": 5.3137125968933105, "learning_rate": 2.1165983894256647e-06, "loss": 0.3099, "step": 15064 }, { "epoch": 0.6987476808905381, "grad_norm": 11.14134407043457, "learning_rate": 2.1159971775454045e-06, "loss": 0.4656, "step": 15065 }, { "epoch": 0.6987940630797773, "grad_norm": 7.088536262512207, "learning_rate": 2.115396028145545e-06, "loss": 0.3728, "step": 15066 }, { "epoch": 0.6988404452690167, "grad_norm": 8.740303993225098, "learning_rate": 2.1147949412391083e-06, "loss": 0.2949, "step": 15067 }, { "epoch": 0.698886827458256, "grad_norm": 6.479061603546143, "learning_rate": 2.114193916839119e-06, "loss": 0.3117, "step": 15068 }, { "epoch": 0.6989332096474954, "grad_norm": 3.8036000728607178, "learning_rate": 2.1135929549585953e-06, "loss": 0.2487, "step": 15069 }, { "epoch": 0.6989795918367347, "grad_norm": 7.713623523712158, "learning_rate": 2.112992055610557e-06, "loss": 0.3805, "step": 15070 }, { "epoch": 0.699025974025974, "grad_norm": 12.174840927124023, "learning_rate": 2.1123912188080237e-06, "loss": 0.4712, "step": 15071 }, { "epoch": 0.6990723562152134, "grad_norm": 5.6994781494140625, "learning_rate": 2.111790444564011e-06, "loss": 0.2637, "step": 15072 }, { "epoch": 0.6991187384044527, "grad_norm": 10.619009017944336, "learning_rate": 2.111189732891536e-06, "loss": 0.4938, "step": 15073 }, { "epoch": 0.6991651205936921, "grad_norm": 9.933065414428711, "learning_rate": 2.1105890838036133e-06, "loss": 0.2835, "step": 15074 }, { "epoch": 0.6992115027829313, "grad_norm": 6.740609645843506, "learning_rate": 2.1099884973132512e-06, "loss": 0.2953, "step": 15075 }, { "epoch": 0.6992578849721707, "grad_norm": 6.019870758056641, "learning_rate": 2.109387973433465e-06, "loss": 0.3191, "step": 15076 }, { "epoch": 0.69930426716141, "grad_norm": 9.34262752532959, "learning_rate": 2.1087875121772633e-06, "loss": 0.4018, "step": 15077 }, { "epoch": 0.6993506493506494, "grad_norm": 7.857912540435791, "learning_rate": 2.1081871135576553e-06, "loss": 0.4258, "step": 15078 }, { "epoch": 0.6993970315398886, "grad_norm": 5.805241584777832, "learning_rate": 2.10758677758765e-06, "loss": 0.2555, "step": 15079 }, { "epoch": 0.699443413729128, "grad_norm": 9.888971328735352, "learning_rate": 2.1069865042802502e-06, "loss": 0.4661, "step": 15080 }, { "epoch": 0.6994897959183674, "grad_norm": 9.445862770080566, "learning_rate": 2.106386293648461e-06, "loss": 0.2715, "step": 15081 }, { "epoch": 0.6995361781076067, "grad_norm": 5.5182318687438965, "learning_rate": 2.105786145705287e-06, "loss": 0.2734, "step": 15082 }, { "epoch": 0.699582560296846, "grad_norm": 5.2792582511901855, "learning_rate": 2.10518606046373e-06, "loss": 0.3073, "step": 15083 }, { "epoch": 0.6996289424860853, "grad_norm": 5.412256240844727, "learning_rate": 2.1045860379367894e-06, "loss": 0.3532, "step": 15084 }, { "epoch": 0.6996753246753247, "grad_norm": 6.216710567474365, "learning_rate": 2.1039860781374673e-06, "loss": 0.257, "step": 15085 }, { "epoch": 0.699721706864564, "grad_norm": 9.944575309753418, "learning_rate": 2.1033861810787578e-06, "loss": 0.4522, "step": 15086 }, { "epoch": 0.6997680890538034, "grad_norm": 7.321124076843262, "learning_rate": 2.102786346773658e-06, "loss": 0.2866, "step": 15087 }, { "epoch": 0.6998144712430426, "grad_norm": 9.12020492553711, "learning_rate": 2.102186575235165e-06, "loss": 0.4649, "step": 15088 }, { "epoch": 0.699860853432282, "grad_norm": 7.987740516662598, "learning_rate": 2.1015868664762705e-06, "loss": 0.2668, "step": 15089 }, { "epoch": 0.6999072356215214, "grad_norm": 12.832442283630371, "learning_rate": 2.1009872205099703e-06, "loss": 0.3385, "step": 15090 }, { "epoch": 0.6999536178107607, "grad_norm": 4.1697235107421875, "learning_rate": 2.1003876373492505e-06, "loss": 0.2728, "step": 15091 }, { "epoch": 0.7, "grad_norm": 10.794854164123535, "learning_rate": 2.0997881170071034e-06, "loss": 0.4547, "step": 15092 }, { "epoch": 0.7, "eval_loss": 0.32578200101852417, "eval_runtime": 38.0653, "eval_samples_per_second": 45.79, "eval_steps_per_second": 5.727, "step": 15092 }, { "epoch": 0.7000463821892393, "grad_norm": 5.775805473327637, "learning_rate": 2.0991886594965174e-06, "loss": 0.2757, "step": 15093 }, { "epoch": 0.7000927643784787, "grad_norm": 6.759728908538818, "learning_rate": 2.0985892648304782e-06, "loss": 0.2982, "step": 15094 }, { "epoch": 0.700139146567718, "grad_norm": 5.562281131744385, "learning_rate": 2.0979899330219728e-06, "loss": 0.2759, "step": 15095 }, { "epoch": 0.7001855287569573, "grad_norm": 5.5349321365356445, "learning_rate": 2.0973906640839867e-06, "loss": 0.3032, "step": 15096 }, { "epoch": 0.7002319109461966, "grad_norm": 4.898031711578369, "learning_rate": 2.096791458029499e-06, "loss": 0.3485, "step": 15097 }, { "epoch": 0.700278293135436, "grad_norm": 5.7494120597839355, "learning_rate": 2.096192314871493e-06, "loss": 0.3043, "step": 15098 }, { "epoch": 0.7003246753246753, "grad_norm": 7.271758079528809, "learning_rate": 2.0955932346229492e-06, "loss": 0.4244, "step": 15099 }, { "epoch": 0.7003710575139147, "grad_norm": 10.386529922485352, "learning_rate": 2.0949942172968456e-06, "loss": 0.3645, "step": 15100 }, { "epoch": 0.700417439703154, "grad_norm": 5.938282489776611, "learning_rate": 2.094395262906162e-06, "loss": 0.2874, "step": 15101 }, { "epoch": 0.7004638218923933, "grad_norm": 6.338263511657715, "learning_rate": 2.093796371463871e-06, "loss": 0.3045, "step": 15102 }, { "epoch": 0.7005102040816327, "grad_norm": 6.910443305969238, "learning_rate": 2.0931975429829486e-06, "loss": 0.2508, "step": 15103 }, { "epoch": 0.700556586270872, "grad_norm": 6.573345184326172, "learning_rate": 2.0925987774763686e-06, "loss": 0.4624, "step": 15104 }, { "epoch": 0.7006029684601113, "grad_norm": 4.545742034912109, "learning_rate": 2.0920000749571016e-06, "loss": 0.2482, "step": 15105 }, { "epoch": 0.7006493506493506, "grad_norm": 4.794674396514893, "learning_rate": 2.0914014354381217e-06, "loss": 0.3025, "step": 15106 }, { "epoch": 0.70069573283859, "grad_norm": 5.62257719039917, "learning_rate": 2.0908028589323935e-06, "loss": 0.238, "step": 15107 }, { "epoch": 0.7007421150278293, "grad_norm": 9.60283088684082, "learning_rate": 2.090204345452887e-06, "loss": 0.2695, "step": 15108 }, { "epoch": 0.7007884972170686, "grad_norm": 9.46701431274414, "learning_rate": 2.0896058950125685e-06, "loss": 0.3372, "step": 15109 }, { "epoch": 0.700834879406308, "grad_norm": 8.860227584838867, "learning_rate": 2.089007507624404e-06, "loss": 0.3649, "step": 15110 }, { "epoch": 0.7008812615955473, "grad_norm": 4.364299774169922, "learning_rate": 2.0884091833013554e-06, "loss": 0.2901, "step": 15111 }, { "epoch": 0.7009276437847867, "grad_norm": 16.22081184387207, "learning_rate": 2.0878109220563884e-06, "loss": 0.4045, "step": 15112 }, { "epoch": 0.700974025974026, "grad_norm": 10.318313598632812, "learning_rate": 2.0872127239024596e-06, "loss": 0.4288, "step": 15113 }, { "epoch": 0.7010204081632653, "grad_norm": 8.498482704162598, "learning_rate": 2.0866145888525315e-06, "loss": 0.2955, "step": 15114 }, { "epoch": 0.7010667903525046, "grad_norm": 4.727163791656494, "learning_rate": 2.086016516919561e-06, "loss": 0.3772, "step": 15115 }, { "epoch": 0.701113172541744, "grad_norm": 6.410853385925293, "learning_rate": 2.085418508116506e-06, "loss": 0.2625, "step": 15116 }, { "epoch": 0.7011595547309833, "grad_norm": 8.93984317779541, "learning_rate": 2.0848205624563235e-06, "loss": 0.3178, "step": 15117 }, { "epoch": 0.7012059369202226, "grad_norm": 4.220444679260254, "learning_rate": 2.0842226799519643e-06, "loss": 0.2242, "step": 15118 }, { "epoch": 0.701252319109462, "grad_norm": 12.679359436035156, "learning_rate": 2.0836248606163827e-06, "loss": 0.4818, "step": 15119 }, { "epoch": 0.7012987012987013, "grad_norm": 5.010056495666504, "learning_rate": 2.08302710446253e-06, "loss": 0.3201, "step": 15120 }, { "epoch": 0.7013450834879407, "grad_norm": 8.153700828552246, "learning_rate": 2.082429411503357e-06, "loss": 0.3282, "step": 15121 }, { "epoch": 0.7013914656771799, "grad_norm": 8.887011528015137, "learning_rate": 2.0818317817518117e-06, "loss": 0.381, "step": 15122 }, { "epoch": 0.7014378478664193, "grad_norm": 12.496533393859863, "learning_rate": 2.0812342152208437e-06, "loss": 0.3698, "step": 15123 }, { "epoch": 0.7014842300556586, "grad_norm": 9.108036994934082, "learning_rate": 2.0806367119233946e-06, "loss": 0.3463, "step": 15124 }, { "epoch": 0.701530612244898, "grad_norm": 6.063813209533691, "learning_rate": 2.0800392718724116e-06, "loss": 0.4102, "step": 15125 }, { "epoch": 0.7015769944341373, "grad_norm": 8.471138000488281, "learning_rate": 2.0794418950808374e-06, "loss": 0.3835, "step": 15126 }, { "epoch": 0.7016233766233766, "grad_norm": 6.7191162109375, "learning_rate": 2.0788445815616148e-06, "loss": 0.3969, "step": 15127 }, { "epoch": 0.701669758812616, "grad_norm": 25.305496215820312, "learning_rate": 2.078247331327685e-06, "loss": 0.2601, "step": 15128 }, { "epoch": 0.7017161410018553, "grad_norm": 4.419483661651611, "learning_rate": 2.077650144391984e-06, "loss": 0.2971, "step": 15129 }, { "epoch": 0.7017625231910947, "grad_norm": 4.627358913421631, "learning_rate": 2.077053020767451e-06, "loss": 0.2522, "step": 15130 }, { "epoch": 0.7018089053803339, "grad_norm": 7.8985819816589355, "learning_rate": 2.0764559604670233e-06, "loss": 0.3212, "step": 15131 }, { "epoch": 0.7018552875695733, "grad_norm": 8.944242477416992, "learning_rate": 2.075858963503634e-06, "loss": 0.3459, "step": 15132 }, { "epoch": 0.7019016697588126, "grad_norm": 5.510110378265381, "learning_rate": 2.0752620298902186e-06, "loss": 0.2858, "step": 15133 }, { "epoch": 0.701948051948052, "grad_norm": 3.826934576034546, "learning_rate": 2.074665159639711e-06, "loss": 0.1961, "step": 15134 }, { "epoch": 0.7019944341372912, "grad_norm": 7.655484199523926, "learning_rate": 2.0740683527650373e-06, "loss": 0.3811, "step": 15135 }, { "epoch": 0.7020408163265306, "grad_norm": 9.669157981872559, "learning_rate": 2.0734716092791295e-06, "loss": 0.3909, "step": 15136 }, { "epoch": 0.70208719851577, "grad_norm": 10.770233154296875, "learning_rate": 2.072874929194916e-06, "loss": 0.4117, "step": 15137 }, { "epoch": 0.7021335807050093, "grad_norm": 8.152288436889648, "learning_rate": 2.072278312525323e-06, "loss": 0.3193, "step": 15138 }, { "epoch": 0.7021799628942486, "grad_norm": 5.060300827026367, "learning_rate": 2.0716817592832783e-06, "loss": 0.1926, "step": 15139 }, { "epoch": 0.7022263450834879, "grad_norm": 8.34271240234375, "learning_rate": 2.0710852694817015e-06, "loss": 0.3346, "step": 15140 }, { "epoch": 0.7022727272727273, "grad_norm": 11.206650733947754, "learning_rate": 2.0704888431335176e-06, "loss": 0.3665, "step": 15141 }, { "epoch": 0.7023191094619666, "grad_norm": 8.785696029663086, "learning_rate": 2.069892480251648e-06, "loss": 0.2989, "step": 15142 }, { "epoch": 0.702365491651206, "grad_norm": 9.79373550415039, "learning_rate": 2.069296180849012e-06, "loss": 0.2695, "step": 15143 }, { "epoch": 0.7024118738404452, "grad_norm": 5.454314708709717, "learning_rate": 2.0686999449385286e-06, "loss": 0.2528, "step": 15144 }, { "epoch": 0.7024582560296846, "grad_norm": 5.612460613250732, "learning_rate": 2.0681037725331165e-06, "loss": 0.3211, "step": 15145 }, { "epoch": 0.702504638218924, "grad_norm": 6.617987632751465, "learning_rate": 2.067507663645688e-06, "loss": 0.3462, "step": 15146 }, { "epoch": 0.7025510204081633, "grad_norm": 7.625784397125244, "learning_rate": 2.0669116182891598e-06, "loss": 0.3097, "step": 15147 }, { "epoch": 0.7025974025974026, "grad_norm": 8.600049018859863, "learning_rate": 2.0663156364764437e-06, "loss": 0.3738, "step": 15148 }, { "epoch": 0.7026437847866419, "grad_norm": 7.114474773406982, "learning_rate": 2.0657197182204523e-06, "loss": 0.3091, "step": 15149 }, { "epoch": 0.7026901669758813, "grad_norm": 10.662993431091309, "learning_rate": 2.065123863534097e-06, "loss": 0.3529, "step": 15150 }, { "epoch": 0.7027365491651206, "grad_norm": 5.965420722961426, "learning_rate": 2.0645280724302835e-06, "loss": 0.3476, "step": 15151 }, { "epoch": 0.7027829313543599, "grad_norm": 9.374727249145508, "learning_rate": 2.063932344921921e-06, "loss": 0.3089, "step": 15152 }, { "epoch": 0.7028293135435992, "grad_norm": 3.873075485229492, "learning_rate": 2.0633366810219163e-06, "loss": 0.1932, "step": 15153 }, { "epoch": 0.7028756957328386, "grad_norm": 6.866640090942383, "learning_rate": 2.062741080743173e-06, "loss": 0.3622, "step": 15154 }, { "epoch": 0.702922077922078, "grad_norm": 6.517841339111328, "learning_rate": 2.062145544098595e-06, "loss": 0.3987, "step": 15155 }, { "epoch": 0.7029684601113173, "grad_norm": 7.18791389465332, "learning_rate": 2.0615500711010855e-06, "loss": 0.3346, "step": 15156 }, { "epoch": 0.7030148423005566, "grad_norm": 5.688205718994141, "learning_rate": 2.060954661763542e-06, "loss": 0.3239, "step": 15157 }, { "epoch": 0.7030612244897959, "grad_norm": 11.175077438354492, "learning_rate": 2.0603593160988665e-06, "loss": 0.3859, "step": 15158 }, { "epoch": 0.7031076066790353, "grad_norm": 5.837080478668213, "learning_rate": 2.0597640341199555e-06, "loss": 0.2512, "step": 15159 }, { "epoch": 0.7031539888682746, "grad_norm": 6.118684768676758, "learning_rate": 2.0591688158397054e-06, "loss": 0.3416, "step": 15160 }, { "epoch": 0.7032003710575139, "grad_norm": 8.62279987335205, "learning_rate": 2.0585736612710143e-06, "loss": 0.4445, "step": 15161 }, { "epoch": 0.7032467532467532, "grad_norm": 12.74315357208252, "learning_rate": 2.057978570426771e-06, "loss": 0.4874, "step": 15162 }, { "epoch": 0.7032931354359926, "grad_norm": 4.297421932220459, "learning_rate": 2.0573835433198707e-06, "loss": 0.2318, "step": 15163 }, { "epoch": 0.703339517625232, "grad_norm": 14.431619644165039, "learning_rate": 2.0567885799632036e-06, "loss": 0.4639, "step": 15164 }, { "epoch": 0.7033858998144712, "grad_norm": 8.106328010559082, "learning_rate": 2.05619368036966e-06, "loss": 0.3006, "step": 15165 }, { "epoch": 0.7034322820037106, "grad_norm": 8.150174140930176, "learning_rate": 2.055598844552129e-06, "loss": 0.3209, "step": 15166 }, { "epoch": 0.7034786641929499, "grad_norm": 5.667752265930176, "learning_rate": 2.0550040725234945e-06, "loss": 0.3935, "step": 15167 }, { "epoch": 0.7035250463821893, "grad_norm": 5.148085117340088, "learning_rate": 2.0544093642966426e-06, "loss": 0.2597, "step": 15168 }, { "epoch": 0.7035714285714286, "grad_norm": 8.32140827178955, "learning_rate": 2.053814719884459e-06, "loss": 0.3591, "step": 15169 }, { "epoch": 0.7036178107606679, "grad_norm": 6.720555305480957, "learning_rate": 2.053220139299825e-06, "loss": 0.3543, "step": 15170 }, { "epoch": 0.7036641929499072, "grad_norm": 6.183554172515869, "learning_rate": 2.0526256225556244e-06, "loss": 0.2418, "step": 15171 }, { "epoch": 0.7037105751391466, "grad_norm": 4.653054237365723, "learning_rate": 2.052031169664734e-06, "loss": 0.3341, "step": 15172 }, { "epoch": 0.703756957328386, "grad_norm": 6.101203441619873, "learning_rate": 2.0514367806400325e-06, "loss": 0.3178, "step": 15173 }, { "epoch": 0.7038033395176252, "grad_norm": 6.086727619171143, "learning_rate": 2.0508424554943986e-06, "loss": 0.3583, "step": 15174 }, { "epoch": 0.7038497217068646, "grad_norm": 8.400321006774902, "learning_rate": 2.0502481942407073e-06, "loss": 0.2779, "step": 15175 }, { "epoch": 0.7038961038961039, "grad_norm": 8.336291313171387, "learning_rate": 2.0496539968918342e-06, "loss": 0.3839, "step": 15176 }, { "epoch": 0.7039424860853433, "grad_norm": 6.107328414916992, "learning_rate": 2.04905986346065e-06, "loss": 0.2838, "step": 15177 }, { "epoch": 0.7039888682745825, "grad_norm": 5.9993205070495605, "learning_rate": 2.048465793960027e-06, "loss": 0.2964, "step": 15178 }, { "epoch": 0.7040352504638219, "grad_norm": 7.424407482147217, "learning_rate": 2.047871788402836e-06, "loss": 0.3841, "step": 15179 }, { "epoch": 0.7040816326530612, "grad_norm": 8.359416961669922, "learning_rate": 2.0472778468019456e-06, "loss": 0.3673, "step": 15180 }, { "epoch": 0.7041280148423006, "grad_norm": 6.6828413009643555, "learning_rate": 2.0466839691702253e-06, "loss": 0.3663, "step": 15181 }, { "epoch": 0.7041743970315398, "grad_norm": 4.605432987213135, "learning_rate": 2.0460901555205366e-06, "loss": 0.2517, "step": 15182 }, { "epoch": 0.7042207792207792, "grad_norm": 6.4312639236450195, "learning_rate": 2.0454964058657474e-06, "loss": 0.3269, "step": 15183 }, { "epoch": 0.7042671614100185, "grad_norm": 12.096744537353516, "learning_rate": 2.0449027202187194e-06, "loss": 0.3402, "step": 15184 }, { "epoch": 0.7043135435992579, "grad_norm": 27.118528366088867, "learning_rate": 2.044309098592316e-06, "loss": 0.4931, "step": 15185 }, { "epoch": 0.7043599257884973, "grad_norm": 4.85795259475708, "learning_rate": 2.0437155409993986e-06, "loss": 0.2727, "step": 15186 }, { "epoch": 0.7044063079777365, "grad_norm": 9.26595687866211, "learning_rate": 2.043122047452823e-06, "loss": 0.348, "step": 15187 }, { "epoch": 0.7044526901669759, "grad_norm": 8.198217391967773, "learning_rate": 2.0425286179654484e-06, "loss": 0.2764, "step": 15188 }, { "epoch": 0.7044990723562152, "grad_norm": 5.768886566162109, "learning_rate": 2.0419352525501317e-06, "loss": 0.2881, "step": 15189 }, { "epoch": 0.7045454545454546, "grad_norm": 6.572909832000732, "learning_rate": 2.0413419512197276e-06, "loss": 0.3989, "step": 15190 }, { "epoch": 0.7045918367346938, "grad_norm": 13.90479850769043, "learning_rate": 2.0407487139870913e-06, "loss": 0.4273, "step": 15191 }, { "epoch": 0.7046382189239332, "grad_norm": 6.105273723602295, "learning_rate": 2.0401555408650714e-06, "loss": 0.2694, "step": 15192 }, { "epoch": 0.7046846011131725, "grad_norm": 11.337594985961914, "learning_rate": 2.0395624318665203e-06, "loss": 0.3934, "step": 15193 }, { "epoch": 0.7047309833024119, "grad_norm": 11.668718338012695, "learning_rate": 2.038969387004288e-06, "loss": 0.4174, "step": 15194 }, { "epoch": 0.7047773654916512, "grad_norm": 6.202223777770996, "learning_rate": 2.0383764062912222e-06, "loss": 0.2446, "step": 15195 }, { "epoch": 0.7048237476808905, "grad_norm": 6.136172771453857, "learning_rate": 2.037783489740171e-06, "loss": 0.2492, "step": 15196 }, { "epoch": 0.7048701298701299, "grad_norm": 6.45392370223999, "learning_rate": 2.037190637363976e-06, "loss": 0.2864, "step": 15197 }, { "epoch": 0.7049165120593692, "grad_norm": 7.086357593536377, "learning_rate": 2.036597849175484e-06, "loss": 0.3148, "step": 15198 }, { "epoch": 0.7049628942486086, "grad_norm": 8.693438529968262, "learning_rate": 2.0360051251875356e-06, "loss": 0.3078, "step": 15199 }, { "epoch": 0.7050092764378478, "grad_norm": 5.705362796783447, "learning_rate": 2.0354124654129737e-06, "loss": 0.2809, "step": 15200 }, { "epoch": 0.7050556586270872, "grad_norm": 14.687272071838379, "learning_rate": 2.034819869864638e-06, "loss": 0.5532, "step": 15201 }, { "epoch": 0.7051020408163265, "grad_norm": 4.9296979904174805, "learning_rate": 2.034227338555365e-06, "loss": 0.3005, "step": 15202 }, { "epoch": 0.7051484230055659, "grad_norm": 8.870640754699707, "learning_rate": 2.033634871497992e-06, "loss": 0.3478, "step": 15203 }, { "epoch": 0.7051948051948052, "grad_norm": 4.960409641265869, "learning_rate": 2.0330424687053546e-06, "loss": 0.2927, "step": 15204 }, { "epoch": 0.7052411873840445, "grad_norm": 8.955534934997559, "learning_rate": 2.032450130190288e-06, "loss": 0.2891, "step": 15205 }, { "epoch": 0.7052875695732839, "grad_norm": 8.36596965789795, "learning_rate": 2.031857855965625e-06, "loss": 0.2609, "step": 15206 }, { "epoch": 0.7053339517625232, "grad_norm": 4.242132186889648, "learning_rate": 2.031265646044195e-06, "loss": 0.2506, "step": 15207 }, { "epoch": 0.7053803339517625, "grad_norm": 5.797396659851074, "learning_rate": 2.030673500438828e-06, "loss": 0.3392, "step": 15208 }, { "epoch": 0.7054267161410018, "grad_norm": 14.273491859436035, "learning_rate": 2.030081419162354e-06, "loss": 0.3304, "step": 15209 }, { "epoch": 0.7054730983302412, "grad_norm": 7.245806694030762, "learning_rate": 2.0294894022275996e-06, "loss": 0.2047, "step": 15210 }, { "epoch": 0.7055194805194805, "grad_norm": 12.916365623474121, "learning_rate": 2.0288974496473925e-06, "loss": 0.3579, "step": 15211 }, { "epoch": 0.7055658627087199, "grad_norm": 5.861364364624023, "learning_rate": 2.028305561434553e-06, "loss": 0.2915, "step": 15212 }, { "epoch": 0.7056122448979592, "grad_norm": 6.5181989669799805, "learning_rate": 2.027713737601906e-06, "loss": 0.3663, "step": 15213 }, { "epoch": 0.7056586270871985, "grad_norm": 6.425895690917969, "learning_rate": 2.0271219781622737e-06, "loss": 0.3142, "step": 15214 }, { "epoch": 0.7057050092764379, "grad_norm": 5.305899620056152, "learning_rate": 2.026530283128475e-06, "loss": 0.2584, "step": 15215 }, { "epoch": 0.7057513914656772, "grad_norm": 7.998850345611572, "learning_rate": 2.0259386525133316e-06, "loss": 0.2852, "step": 15216 }, { "epoch": 0.7057977736549165, "grad_norm": 8.737682342529297, "learning_rate": 2.025347086329657e-06, "loss": 0.4381, "step": 15217 }, { "epoch": 0.7058441558441558, "grad_norm": 13.626930236816406, "learning_rate": 2.0247555845902684e-06, "loss": 0.3096, "step": 15218 }, { "epoch": 0.7058905380333952, "grad_norm": 9.854142189025879, "learning_rate": 2.024164147307981e-06, "loss": 0.2694, "step": 15219 }, { "epoch": 0.7059369202226345, "grad_norm": 4.8182172775268555, "learning_rate": 2.023572774495608e-06, "loss": 0.2966, "step": 15220 }, { "epoch": 0.7059833024118738, "grad_norm": 5.745126247406006, "learning_rate": 2.022981466165962e-06, "loss": 0.3044, "step": 15221 }, { "epoch": 0.7060296846011131, "grad_norm": 7.4143452644348145, "learning_rate": 2.0223902223318508e-06, "loss": 0.3483, "step": 15222 }, { "epoch": 0.7060760667903525, "grad_norm": 5.018831729888916, "learning_rate": 2.0217990430060854e-06, "loss": 0.2782, "step": 15223 }, { "epoch": 0.7061224489795919, "grad_norm": 7.293615341186523, "learning_rate": 2.0212079282014725e-06, "loss": 0.3995, "step": 15224 }, { "epoch": 0.7061688311688312, "grad_norm": 6.491857051849365, "learning_rate": 2.020616877930819e-06, "loss": 0.3316, "step": 15225 }, { "epoch": 0.7062152133580705, "grad_norm": 4.038021564483643, "learning_rate": 2.020025892206929e-06, "loss": 0.2341, "step": 15226 }, { "epoch": 0.7062615955473098, "grad_norm": 5.681384086608887, "learning_rate": 2.0194349710426083e-06, "loss": 0.2489, "step": 15227 }, { "epoch": 0.7063079777365492, "grad_norm": 6.888150691986084, "learning_rate": 2.0188441144506548e-06, "loss": 0.2423, "step": 15228 }, { "epoch": 0.7063543599257885, "grad_norm": 7.5568413734436035, "learning_rate": 2.0182533224438716e-06, "loss": 0.3496, "step": 15229 }, { "epoch": 0.7064007421150278, "grad_norm": 7.005921840667725, "learning_rate": 2.017662595035057e-06, "loss": 0.2895, "step": 15230 }, { "epoch": 0.7064471243042671, "grad_norm": 5.5238356590271, "learning_rate": 2.0170719322370095e-06, "loss": 0.2684, "step": 15231 }, { "epoch": 0.7064935064935065, "grad_norm": 10.397012710571289, "learning_rate": 2.016481334062527e-06, "loss": 0.273, "step": 15232 }, { "epoch": 0.7065398886827459, "grad_norm": 11.384908676147461, "learning_rate": 2.015890800524401e-06, "loss": 0.3454, "step": 15233 }, { "epoch": 0.7065862708719851, "grad_norm": 10.222142219543457, "learning_rate": 2.015300331635427e-06, "loss": 0.4149, "step": 15234 }, { "epoch": 0.7066326530612245, "grad_norm": 5.536592483520508, "learning_rate": 2.014709927408397e-06, "loss": 0.2581, "step": 15235 }, { "epoch": 0.7066790352504638, "grad_norm": 6.021576404571533, "learning_rate": 2.0141195878561016e-06, "loss": 0.2883, "step": 15236 }, { "epoch": 0.7067254174397032, "grad_norm": 5.164721965789795, "learning_rate": 2.0135293129913304e-06, "loss": 0.199, "step": 15237 }, { "epoch": 0.7067717996289424, "grad_norm": 7.72465705871582, "learning_rate": 2.0129391028268736e-06, "loss": 0.3614, "step": 15238 }, { "epoch": 0.7068181818181818, "grad_norm": 5.951416969299316, "learning_rate": 2.0123489573755132e-06, "loss": 0.3158, "step": 15239 }, { "epoch": 0.7068645640074211, "grad_norm": 6.197658538818359, "learning_rate": 2.0117588766500375e-06, "loss": 0.3342, "step": 15240 }, { "epoch": 0.7069109461966605, "grad_norm": 4.992285251617432, "learning_rate": 2.0111688606632298e-06, "loss": 0.3547, "step": 15241 }, { "epoch": 0.7069573283858999, "grad_norm": 15.412041664123535, "learning_rate": 2.0105789094278714e-06, "loss": 0.2818, "step": 15242 }, { "epoch": 0.7070037105751391, "grad_norm": 8.397058486938477, "learning_rate": 2.0099890229567466e-06, "loss": 0.3167, "step": 15243 }, { "epoch": 0.7070500927643785, "grad_norm": 5.982978343963623, "learning_rate": 2.0093992012626308e-06, "loss": 0.2479, "step": 15244 }, { "epoch": 0.7070964749536178, "grad_norm": 10.772472381591797, "learning_rate": 2.0088094443583035e-06, "loss": 0.3402, "step": 15245 }, { "epoch": 0.7071428571428572, "grad_norm": 12.158406257629395, "learning_rate": 2.0082197522565423e-06, "loss": 0.3321, "step": 15246 }, { "epoch": 0.7071892393320964, "grad_norm": 10.49577808380127, "learning_rate": 2.0076301249701217e-06, "loss": 0.4248, "step": 15247 }, { "epoch": 0.7072356215213358, "grad_norm": 10.948915481567383, "learning_rate": 2.0070405625118162e-06, "loss": 0.3447, "step": 15248 }, { "epoch": 0.7072820037105751, "grad_norm": 4.433846950531006, "learning_rate": 2.0064510648944004e-06, "loss": 0.3644, "step": 15249 }, { "epoch": 0.7073283858998145, "grad_norm": 9.940762519836426, "learning_rate": 2.005861632130641e-06, "loss": 0.1892, "step": 15250 }, { "epoch": 0.7073747680890538, "grad_norm": 9.455199241638184, "learning_rate": 2.0052722642333113e-06, "loss": 0.2902, "step": 15251 }, { "epoch": 0.7074211502782931, "grad_norm": 12.930466651916504, "learning_rate": 2.0046829612151777e-06, "loss": 0.3595, "step": 15252 }, { "epoch": 0.7074675324675325, "grad_norm": 8.939598083496094, "learning_rate": 2.0040937230890073e-06, "loss": 0.3568, "step": 15253 }, { "epoch": 0.7075139146567718, "grad_norm": 4.456233501434326, "learning_rate": 2.0035045498675687e-06, "loss": 0.2792, "step": 15254 }, { "epoch": 0.7075602968460112, "grad_norm": 9.221426010131836, "learning_rate": 2.0029154415636216e-06, "loss": 0.4259, "step": 15255 }, { "epoch": 0.7076066790352504, "grad_norm": 5.3291015625, "learning_rate": 2.002326398189931e-06, "loss": 0.3089, "step": 15256 }, { "epoch": 0.7076530612244898, "grad_norm": 17.273347854614258, "learning_rate": 2.001737419759257e-06, "loss": 0.3675, "step": 15257 }, { "epoch": 0.7076994434137291, "grad_norm": 6.790149688720703, "learning_rate": 2.001148506284361e-06, "loss": 0.3147, "step": 15258 }, { "epoch": 0.7077458256029685, "grad_norm": 6.308183670043945, "learning_rate": 2.000559657778001e-06, "loss": 0.3426, "step": 15259 }, { "epoch": 0.7077922077922078, "grad_norm": 4.98654842376709, "learning_rate": 1.9999708742529354e-06, "loss": 0.2967, "step": 15260 }, { "epoch": 0.7078385899814471, "grad_norm": 5.766470909118652, "learning_rate": 1.9993821557219173e-06, "loss": 0.3057, "step": 15261 }, { "epoch": 0.7078849721706865, "grad_norm": 4.9100823402404785, "learning_rate": 1.998793502197701e-06, "loss": 0.2262, "step": 15262 }, { "epoch": 0.7079313543599258, "grad_norm": 8.905472755432129, "learning_rate": 1.9982049136930416e-06, "loss": 0.382, "step": 15263 }, { "epoch": 0.7079777365491651, "grad_norm": 3.9582579135894775, "learning_rate": 1.997616390220689e-06, "loss": 0.2603, "step": 15264 }, { "epoch": 0.7080241187384044, "grad_norm": 5.75437593460083, "learning_rate": 1.9970279317933953e-06, "loss": 0.2293, "step": 15265 }, { "epoch": 0.7080705009276438, "grad_norm": 7.8631696701049805, "learning_rate": 1.9964395384239067e-06, "loss": 0.3332, "step": 15266 }, { "epoch": 0.7081168831168831, "grad_norm": 7.222097873687744, "learning_rate": 1.9958512101249705e-06, "loss": 0.2435, "step": 15267 }, { "epoch": 0.7081632653061225, "grad_norm": 5.122314453125, "learning_rate": 1.9952629469093333e-06, "loss": 0.2568, "step": 15268 }, { "epoch": 0.7082096474953617, "grad_norm": 6.296548366546631, "learning_rate": 1.9946747487897392e-06, "loss": 0.2361, "step": 15269 }, { "epoch": 0.7082560296846011, "grad_norm": 6.117075443267822, "learning_rate": 1.994086615778934e-06, "loss": 0.3084, "step": 15270 }, { "epoch": 0.7083024118738405, "grad_norm": 10.130828857421875, "learning_rate": 1.9934985478896548e-06, "loss": 0.3392, "step": 15271 }, { "epoch": 0.7083487940630798, "grad_norm": 5.388071537017822, "learning_rate": 1.9929105451346436e-06, "loss": 0.2435, "step": 15272 }, { "epoch": 0.7083951762523191, "grad_norm": 9.616180419921875, "learning_rate": 1.99232260752664e-06, "loss": 0.3912, "step": 15273 }, { "epoch": 0.7084415584415584, "grad_norm": 7.642026901245117, "learning_rate": 1.99173473507838e-06, "loss": 0.3717, "step": 15274 }, { "epoch": 0.7084879406307978, "grad_norm": 5.481226444244385, "learning_rate": 1.991146927802601e-06, "loss": 0.2666, "step": 15275 }, { "epoch": 0.7085343228200371, "grad_norm": 5.818635940551758, "learning_rate": 1.9905591857120376e-06, "loss": 0.331, "step": 15276 }, { "epoch": 0.7085807050092764, "grad_norm": 7.559150695800781, "learning_rate": 1.989971508819421e-06, "loss": 0.2839, "step": 15277 }, { "epoch": 0.7086270871985157, "grad_norm": 12.785374641418457, "learning_rate": 1.989383897137484e-06, "loss": 0.2813, "step": 15278 }, { "epoch": 0.7086734693877551, "grad_norm": 4.412975311279297, "learning_rate": 1.9887963506789566e-06, "loss": 0.2233, "step": 15279 }, { "epoch": 0.7087198515769945, "grad_norm": 6.224394798278809, "learning_rate": 1.9882088694565683e-06, "loss": 0.1516, "step": 15280 }, { "epoch": 0.7087662337662337, "grad_norm": 6.946048736572266, "learning_rate": 1.987621453483048e-06, "loss": 0.339, "step": 15281 }, { "epoch": 0.7088126159554731, "grad_norm": 6.906554222106934, "learning_rate": 1.987034102771118e-06, "loss": 0.3363, "step": 15282 }, { "epoch": 0.7088589981447124, "grad_norm": 6.589277744293213, "learning_rate": 1.9864468173335057e-06, "loss": 0.3445, "step": 15283 }, { "epoch": 0.7089053803339518, "grad_norm": 6.45619535446167, "learning_rate": 1.985859597182933e-06, "loss": 0.2907, "step": 15284 }, { "epoch": 0.7089517625231911, "grad_norm": 10.505861282348633, "learning_rate": 1.985272442332123e-06, "loss": 0.4657, "step": 15285 }, { "epoch": 0.7089981447124304, "grad_norm": 5.962786674499512, "learning_rate": 1.984685352793795e-06, "loss": 0.3464, "step": 15286 }, { "epoch": 0.7090445269016697, "grad_norm": 15.395394325256348, "learning_rate": 1.98409832858067e-06, "loss": 0.2823, "step": 15287 }, { "epoch": 0.7090909090909091, "grad_norm": 7.416203022003174, "learning_rate": 1.983511369705462e-06, "loss": 0.3026, "step": 15288 }, { "epoch": 0.7091372912801485, "grad_norm": 13.735390663146973, "learning_rate": 1.9829244761808898e-06, "loss": 0.4814, "step": 15289 }, { "epoch": 0.7091836734693877, "grad_norm": 9.806570053100586, "learning_rate": 1.982337648019667e-06, "loss": 0.3581, "step": 15290 }, { "epoch": 0.7092300556586271, "grad_norm": 9.035317420959473, "learning_rate": 1.9817508852345073e-06, "loss": 0.4344, "step": 15291 }, { "epoch": 0.7092764378478664, "grad_norm": 7.0567426681518555, "learning_rate": 1.981164187838125e-06, "loss": 0.3464, "step": 15292 }, { "epoch": 0.7093228200371058, "grad_norm": 5.100978851318359, "learning_rate": 1.9805775558432265e-06, "loss": 0.2646, "step": 15293 }, { "epoch": 0.709369202226345, "grad_norm": 9.820916175842285, "learning_rate": 1.9799909892625225e-06, "loss": 0.2473, "step": 15294 }, { "epoch": 0.7094155844155844, "grad_norm": 5.422181606292725, "learning_rate": 1.979404488108721e-06, "loss": 0.2705, "step": 15295 }, { "epoch": 0.7094619666048237, "grad_norm": 7.133869171142578, "learning_rate": 1.978818052394528e-06, "loss": 0.3127, "step": 15296 }, { "epoch": 0.7095083487940631, "grad_norm": 7.201056480407715, "learning_rate": 1.9782316821326482e-06, "loss": 0.266, "step": 15297 }, { "epoch": 0.7095547309833025, "grad_norm": 4.406808376312256, "learning_rate": 1.977645377335787e-06, "loss": 0.2011, "step": 15298 }, { "epoch": 0.7096011131725417, "grad_norm": 11.76026725769043, "learning_rate": 1.977059138016643e-06, "loss": 0.4764, "step": 15299 }, { "epoch": 0.7096474953617811, "grad_norm": 6.59285831451416, "learning_rate": 1.976472964187918e-06, "loss": 0.2727, "step": 15300 }, { "epoch": 0.7096938775510204, "grad_norm": 8.7269926071167, "learning_rate": 1.975886855862311e-06, "loss": 0.3409, "step": 15301 }, { "epoch": 0.7097402597402598, "grad_norm": 7.096630096435547, "learning_rate": 1.9753008130525214e-06, "loss": 0.2778, "step": 15302 }, { "epoch": 0.709786641929499, "grad_norm": 5.614644527435303, "learning_rate": 1.974714835771245e-06, "loss": 0.3241, "step": 15303 }, { "epoch": 0.7098330241187384, "grad_norm": 11.222362518310547, "learning_rate": 1.9741289240311757e-06, "loss": 0.2867, "step": 15304 }, { "epoch": 0.7098794063079777, "grad_norm": 4.133719444274902, "learning_rate": 1.973543077845006e-06, "loss": 0.2553, "step": 15305 }, { "epoch": 0.7099257884972171, "grad_norm": 8.446269035339355, "learning_rate": 1.9729572972254298e-06, "loss": 0.3135, "step": 15306 }, { "epoch": 0.7099721706864563, "grad_norm": 3.873816967010498, "learning_rate": 1.972371582185137e-06, "loss": 0.197, "step": 15307 }, { "epoch": 0.7100185528756957, "grad_norm": 8.138599395751953, "learning_rate": 1.9717859327368167e-06, "loss": 0.3135, "step": 15308 }, { "epoch": 0.7100649350649351, "grad_norm": 7.011111736297607, "learning_rate": 1.9712003488931593e-06, "loss": 0.3592, "step": 15309 }, { "epoch": 0.7101113172541744, "grad_norm": 7.143978118896484, "learning_rate": 1.9706148306668466e-06, "loss": 0.2905, "step": 15310 }, { "epoch": 0.7101576994434138, "grad_norm": 6.520504951477051, "learning_rate": 1.970029378070566e-06, "loss": 0.2572, "step": 15311 }, { "epoch": 0.710204081632653, "grad_norm": 5.566850185394287, "learning_rate": 1.9694439911170004e-06, "loss": 0.3629, "step": 15312 }, { "epoch": 0.7102504638218924, "grad_norm": 8.874297142028809, "learning_rate": 1.968858669818833e-06, "loss": 0.252, "step": 15313 }, { "epoch": 0.7102968460111317, "grad_norm": 9.038893699645996, "learning_rate": 1.9682734141887445e-06, "loss": 0.3202, "step": 15314 }, { "epoch": 0.7103432282003711, "grad_norm": 6.630188465118408, "learning_rate": 1.9676882242394125e-06, "loss": 0.378, "step": 15315 }, { "epoch": 0.7103896103896103, "grad_norm": 9.342208862304688, "learning_rate": 1.967103099983515e-06, "loss": 0.3819, "step": 15316 }, { "epoch": 0.7104359925788497, "grad_norm": 15.096817970275879, "learning_rate": 1.96651804143373e-06, "loss": 0.3462, "step": 15317 }, { "epoch": 0.7104823747680891, "grad_norm": 8.491125106811523, "learning_rate": 1.965933048602731e-06, "loss": 0.2479, "step": 15318 }, { "epoch": 0.7105287569573284, "grad_norm": 5.580245494842529, "learning_rate": 1.9653481215031926e-06, "loss": 0.2212, "step": 15319 }, { "epoch": 0.7105751391465677, "grad_norm": 9.292791366577148, "learning_rate": 1.9647632601477877e-06, "loss": 0.4437, "step": 15320 }, { "epoch": 0.710621521335807, "grad_norm": 7.769918918609619, "learning_rate": 1.9641784645491852e-06, "loss": 0.3753, "step": 15321 }, { "epoch": 0.7106679035250464, "grad_norm": 8.550896644592285, "learning_rate": 1.963593734720054e-06, "loss": 0.2752, "step": 15322 }, { "epoch": 0.7107142857142857, "grad_norm": 7.185947418212891, "learning_rate": 1.9630090706730636e-06, "loss": 0.2994, "step": 15323 }, { "epoch": 0.7107606679035251, "grad_norm": 4.913969993591309, "learning_rate": 1.9624244724208795e-06, "loss": 0.2768, "step": 15324 }, { "epoch": 0.7108070500927643, "grad_norm": 7.1674275398254395, "learning_rate": 1.9618399399761688e-06, "loss": 0.2793, "step": 15325 }, { "epoch": 0.7108534322820037, "grad_norm": 6.001863956451416, "learning_rate": 1.9612554733515914e-06, "loss": 0.3354, "step": 15326 }, { "epoch": 0.7108998144712431, "grad_norm": 10.669561386108398, "learning_rate": 1.9606710725598124e-06, "loss": 0.3597, "step": 15327 }, { "epoch": 0.7109461966604824, "grad_norm": 8.40686321258545, "learning_rate": 1.96008673761349e-06, "loss": 0.4157, "step": 15328 }, { "epoch": 0.7109925788497217, "grad_norm": 6.161930084228516, "learning_rate": 1.9595024685252863e-06, "loss": 0.3119, "step": 15329 }, { "epoch": 0.711038961038961, "grad_norm": 6.7364678382873535, "learning_rate": 1.9589182653078576e-06, "loss": 0.3069, "step": 15330 }, { "epoch": 0.7110853432282004, "grad_norm": 10.14411449432373, "learning_rate": 1.9583341279738623e-06, "loss": 0.4134, "step": 15331 }, { "epoch": 0.7111317254174397, "grad_norm": 4.917131423950195, "learning_rate": 1.957750056535952e-06, "loss": 0.2902, "step": 15332 }, { "epoch": 0.711178107606679, "grad_norm": 7.918565273284912, "learning_rate": 1.9571660510067825e-06, "loss": 0.3703, "step": 15333 }, { "epoch": 0.7112244897959183, "grad_norm": 6.0013909339904785, "learning_rate": 1.956582111399005e-06, "loss": 0.2598, "step": 15334 }, { "epoch": 0.7112708719851577, "grad_norm": 8.058666229248047, "learning_rate": 1.9559982377252714e-06, "loss": 0.4033, "step": 15335 }, { "epoch": 0.7113172541743971, "grad_norm": 5.213623523712158, "learning_rate": 1.9554144299982314e-06, "loss": 0.2773, "step": 15336 }, { "epoch": 0.7113636363636363, "grad_norm": 11.708312034606934, "learning_rate": 1.954830688230531e-06, "loss": 0.3183, "step": 15337 }, { "epoch": 0.7114100185528757, "grad_norm": 8.326841354370117, "learning_rate": 1.9542470124348173e-06, "loss": 0.3736, "step": 15338 }, { "epoch": 0.711456400742115, "grad_norm": 5.702556610107422, "learning_rate": 1.953663402623735e-06, "loss": 0.2009, "step": 15339 }, { "epoch": 0.7115027829313544, "grad_norm": 4.556715488433838, "learning_rate": 1.953079858809929e-06, "loss": 0.2832, "step": 15340 }, { "epoch": 0.7115491651205937, "grad_norm": 5.343597888946533, "learning_rate": 1.9524963810060405e-06, "loss": 0.349, "step": 15341 }, { "epoch": 0.711595547309833, "grad_norm": 8.157959938049316, "learning_rate": 1.9519129692247126e-06, "loss": 0.3284, "step": 15342 }, { "epoch": 0.7116419294990723, "grad_norm": 12.26318645477295, "learning_rate": 1.9513296234785807e-06, "loss": 0.3344, "step": 15343 }, { "epoch": 0.7116883116883117, "grad_norm": 4.932008743286133, "learning_rate": 1.950746343780284e-06, "loss": 0.3101, "step": 15344 }, { "epoch": 0.7117346938775511, "grad_norm": 8.145286560058594, "learning_rate": 1.95016313014246e-06, "loss": 0.355, "step": 15345 }, { "epoch": 0.7117810760667903, "grad_norm": 10.905672073364258, "learning_rate": 1.949579982577743e-06, "loss": 0.403, "step": 15346 }, { "epoch": 0.7118274582560297, "grad_norm": 9.106733322143555, "learning_rate": 1.9489969010987687e-06, "loss": 0.3629, "step": 15347 }, { "epoch": 0.711873840445269, "grad_norm": 5.382136821746826, "learning_rate": 1.948413885718165e-06, "loss": 0.2682, "step": 15348 }, { "epoch": 0.7119202226345084, "grad_norm": 7.447577476501465, "learning_rate": 1.9478309364485656e-06, "loss": 0.2753, "step": 15349 }, { "epoch": 0.7119666048237476, "grad_norm": 9.110587120056152, "learning_rate": 1.947248053302598e-06, "loss": 0.2871, "step": 15350 }, { "epoch": 0.712012987012987, "grad_norm": 5.184500694274902, "learning_rate": 1.946665236292892e-06, "loss": 0.3264, "step": 15351 }, { "epoch": 0.7120593692022263, "grad_norm": 5.5349812507629395, "learning_rate": 1.9460824854320755e-06, "loss": 0.3415, "step": 15352 }, { "epoch": 0.7121057513914657, "grad_norm": 9.002121925354004, "learning_rate": 1.9454998007327686e-06, "loss": 0.3882, "step": 15353 }, { "epoch": 0.7121521335807051, "grad_norm": 12.163407325744629, "learning_rate": 1.9449171822075973e-06, "loss": 0.5081, "step": 15354 }, { "epoch": 0.7121985157699443, "grad_norm": 6.407333850860596, "learning_rate": 1.9443346298691845e-06, "loss": 0.4133, "step": 15355 }, { "epoch": 0.7122448979591837, "grad_norm": 15.951995849609375, "learning_rate": 1.9437521437301497e-06, "loss": 0.3201, "step": 15356 }, { "epoch": 0.712291280148423, "grad_norm": 7.365970134735107, "learning_rate": 1.9431697238031133e-06, "loss": 0.3866, "step": 15357 }, { "epoch": 0.7123376623376624, "grad_norm": 6.201054573059082, "learning_rate": 1.9425873701006936e-06, "loss": 0.3356, "step": 15358 }, { "epoch": 0.7123840445269016, "grad_norm": 9.207755088806152, "learning_rate": 1.9420050826355045e-06, "loss": 0.3665, "step": 15359 }, { "epoch": 0.712430426716141, "grad_norm": 13.843910217285156, "learning_rate": 1.9414228614201626e-06, "loss": 0.367, "step": 15360 }, { "epoch": 0.7124768089053803, "grad_norm": 5.456550598144531, "learning_rate": 1.9408407064672807e-06, "loss": 0.2611, "step": 15361 }, { "epoch": 0.7125231910946197, "grad_norm": 9.39140510559082, "learning_rate": 1.9402586177894715e-06, "loss": 0.3502, "step": 15362 }, { "epoch": 0.712569573283859, "grad_norm": 7.930438995361328, "learning_rate": 1.9396765953993474e-06, "loss": 0.3176, "step": 15363 }, { "epoch": 0.7126159554730983, "grad_norm": 6.059325218200684, "learning_rate": 1.939094639309514e-06, "loss": 0.2489, "step": 15364 }, { "epoch": 0.7126623376623377, "grad_norm": 19.332862854003906, "learning_rate": 1.9385127495325806e-06, "loss": 0.4646, "step": 15365 }, { "epoch": 0.712708719851577, "grad_norm": 5.710547924041748, "learning_rate": 1.9379309260811537e-06, "loss": 0.3144, "step": 15366 }, { "epoch": 0.7127551020408164, "grad_norm": 5.753734588623047, "learning_rate": 1.937349168967838e-06, "loss": 0.3419, "step": 15367 }, { "epoch": 0.7128014842300556, "grad_norm": 6.031022071838379, "learning_rate": 1.9367674782052376e-06, "loss": 0.2982, "step": 15368 }, { "epoch": 0.712847866419295, "grad_norm": 10.965659141540527, "learning_rate": 1.9361858538059557e-06, "loss": 0.3869, "step": 15369 }, { "epoch": 0.7128942486085343, "grad_norm": 10.404810905456543, "learning_rate": 1.935604295782589e-06, "loss": 0.4768, "step": 15370 }, { "epoch": 0.7129406307977737, "grad_norm": 8.964715003967285, "learning_rate": 1.93502280414774e-06, "loss": 0.3672, "step": 15371 }, { "epoch": 0.712987012987013, "grad_norm": 6.966900825500488, "learning_rate": 1.934441378914005e-06, "loss": 0.2722, "step": 15372 }, { "epoch": 0.7130333951762523, "grad_norm": 5.506345748901367, "learning_rate": 1.9338600200939805e-06, "loss": 0.3182, "step": 15373 }, { "epoch": 0.7130797773654917, "grad_norm": 5.117801666259766, "learning_rate": 1.933278727700263e-06, "loss": 0.2624, "step": 15374 }, { "epoch": 0.713126159554731, "grad_norm": 8.015024185180664, "learning_rate": 1.9326975017454427e-06, "loss": 0.3146, "step": 15375 }, { "epoch": 0.7131725417439703, "grad_norm": 6.213071346282959, "learning_rate": 1.932116342242113e-06, "loss": 0.2687, "step": 15376 }, { "epoch": 0.7132189239332096, "grad_norm": 7.964413642883301, "learning_rate": 1.931535249202865e-06, "loss": 0.332, "step": 15377 }, { "epoch": 0.713265306122449, "grad_norm": 10.0925931930542, "learning_rate": 1.930954222640287e-06, "loss": 0.4466, "step": 15378 }, { "epoch": 0.7133116883116883, "grad_norm": 10.563632011413574, "learning_rate": 1.9303732625669674e-06, "loss": 0.4009, "step": 15379 }, { "epoch": 0.7133580705009277, "grad_norm": 7.960164546966553, "learning_rate": 1.9297923689954932e-06, "loss": 0.3278, "step": 15380 }, { "epoch": 0.713404452690167, "grad_norm": 6.230878829956055, "learning_rate": 1.9292115419384468e-06, "loss": 0.3019, "step": 15381 }, { "epoch": 0.7134508348794063, "grad_norm": 7.8159356117248535, "learning_rate": 1.928630781408413e-06, "loss": 0.2926, "step": 15382 }, { "epoch": 0.7134972170686457, "grad_norm": 4.000674247741699, "learning_rate": 1.9280500874179723e-06, "loss": 0.3041, "step": 15383 }, { "epoch": 0.713543599257885, "grad_norm": 5.247339248657227, "learning_rate": 1.9274694599797067e-06, "loss": 0.249, "step": 15384 }, { "epoch": 0.7135899814471243, "grad_norm": 8.466743469238281, "learning_rate": 1.926888899106196e-06, "loss": 0.3385, "step": 15385 }, { "epoch": 0.7136363636363636, "grad_norm": 10.423837661743164, "learning_rate": 1.926308404810015e-06, "loss": 0.4182, "step": 15386 }, { "epoch": 0.713682745825603, "grad_norm": 10.062897682189941, "learning_rate": 1.9257279771037414e-06, "loss": 0.3703, "step": 15387 }, { "epoch": 0.7137291280148423, "grad_norm": 3.8112735748291016, "learning_rate": 1.9251476159999495e-06, "loss": 0.2506, "step": 15388 }, { "epoch": 0.7137755102040816, "grad_norm": 4.34059476852417, "learning_rate": 1.9245673215112124e-06, "loss": 0.2725, "step": 15389 }, { "epoch": 0.713821892393321, "grad_norm": 5.41550874710083, "learning_rate": 1.9239870936501027e-06, "loss": 0.3078, "step": 15390 }, { "epoch": 0.7138682745825603, "grad_norm": 4.155821800231934, "learning_rate": 1.9234069324291914e-06, "loss": 0.2357, "step": 15391 }, { "epoch": 0.7139146567717997, "grad_norm": 4.906888484954834, "learning_rate": 1.922826837861045e-06, "loss": 0.2901, "step": 15392 }, { "epoch": 0.7139610389610389, "grad_norm": 6.581816673278809, "learning_rate": 1.922246809958232e-06, "loss": 0.3103, "step": 15393 }, { "epoch": 0.7140074211502783, "grad_norm": 6.828880786895752, "learning_rate": 1.9216668487333177e-06, "loss": 0.2843, "step": 15394 }, { "epoch": 0.7140538033395176, "grad_norm": 7.297598361968994, "learning_rate": 1.9210869541988685e-06, "loss": 0.3759, "step": 15395 }, { "epoch": 0.714100185528757, "grad_norm": 3.432521104812622, "learning_rate": 1.920507126367448e-06, "loss": 0.1736, "step": 15396 }, { "epoch": 0.7141465677179963, "grad_norm": 9.634258270263672, "learning_rate": 1.919927365251614e-06, "loss": 0.2793, "step": 15397 }, { "epoch": 0.7141929499072356, "grad_norm": 8.994161605834961, "learning_rate": 1.91934767086393e-06, "loss": 0.4448, "step": 15398 }, { "epoch": 0.7142393320964749, "grad_norm": 5.371576309204102, "learning_rate": 1.918768043216953e-06, "loss": 0.2171, "step": 15399 }, { "epoch": 0.7142857142857143, "grad_norm": 4.604323863983154, "learning_rate": 1.918188482323242e-06, "loss": 0.2877, "step": 15400 }, { "epoch": 0.7143320964749537, "grad_norm": 5.808309555053711, "learning_rate": 1.9176089881953513e-06, "loss": 0.2929, "step": 15401 }, { "epoch": 0.7143784786641929, "grad_norm": 7.533295154571533, "learning_rate": 1.917029560845838e-06, "loss": 0.2642, "step": 15402 }, { "epoch": 0.7144248608534323, "grad_norm": 5.371109962463379, "learning_rate": 1.9164502002872513e-06, "loss": 0.288, "step": 15403 }, { "epoch": 0.7144712430426716, "grad_norm": 5.660982131958008, "learning_rate": 1.9158709065321445e-06, "loss": 0.216, "step": 15404 }, { "epoch": 0.714517625231911, "grad_norm": 8.179988861083984, "learning_rate": 1.915291679593068e-06, "loss": 0.478, "step": 15405 }, { "epoch": 0.7145640074211502, "grad_norm": 8.059043884277344, "learning_rate": 1.9147125194825706e-06, "loss": 0.3797, "step": 15406 }, { "epoch": 0.7146103896103896, "grad_norm": 9.725671768188477, "learning_rate": 1.9141334262132007e-06, "loss": 0.3384, "step": 15407 }, { "epoch": 0.7146567717996289, "grad_norm": 6.026042461395264, "learning_rate": 1.913554399797501e-06, "loss": 0.3912, "step": 15408 }, { "epoch": 0.7147031539888683, "grad_norm": 4.807184219360352, "learning_rate": 1.9129754402480167e-06, "loss": 0.2117, "step": 15409 }, { "epoch": 0.7147495361781077, "grad_norm": 5.274288654327393, "learning_rate": 1.9123965475772915e-06, "loss": 0.2996, "step": 15410 }, { "epoch": 0.7147959183673469, "grad_norm": 9.567492485046387, "learning_rate": 1.9118177217978668e-06, "loss": 0.421, "step": 15411 }, { "epoch": 0.7148423005565863, "grad_norm": 5.417550563812256, "learning_rate": 1.911238962922282e-06, "loss": 0.2739, "step": 15412 }, { "epoch": 0.7148886827458256, "grad_norm": 22.601497650146484, "learning_rate": 1.910660270963078e-06, "loss": 0.4497, "step": 15413 }, { "epoch": 0.714935064935065, "grad_norm": 6.626099586486816, "learning_rate": 1.9100816459327874e-06, "loss": 0.2781, "step": 15414 }, { "epoch": 0.7149814471243042, "grad_norm": 12.441990852355957, "learning_rate": 1.9095030878439484e-06, "loss": 0.3372, "step": 15415 }, { "epoch": 0.7150278293135436, "grad_norm": 11.906569480895996, "learning_rate": 1.9089245967090952e-06, "loss": 0.4226, "step": 15416 }, { "epoch": 0.7150742115027829, "grad_norm": 8.4203519821167, "learning_rate": 1.9083461725407604e-06, "loss": 0.3281, "step": 15417 }, { "epoch": 0.7151205936920223, "grad_norm": 7.511417388916016, "learning_rate": 1.9077678153514763e-06, "loss": 0.3099, "step": 15418 }, { "epoch": 0.7151669758812615, "grad_norm": 11.144211769104004, "learning_rate": 1.9071895251537703e-06, "loss": 0.3569, "step": 15419 }, { "epoch": 0.7152133580705009, "grad_norm": 5.480036735534668, "learning_rate": 1.9066113019601717e-06, "loss": 0.2616, "step": 15420 }, { "epoch": 0.7152597402597403, "grad_norm": 11.758804321289062, "learning_rate": 1.9060331457832076e-06, "loss": 0.511, "step": 15421 }, { "epoch": 0.7153061224489796, "grad_norm": 5.9771013259887695, "learning_rate": 1.9054550566354035e-06, "loss": 0.1865, "step": 15422 }, { "epoch": 0.715352504638219, "grad_norm": 11.080832481384277, "learning_rate": 1.9048770345292832e-06, "loss": 0.323, "step": 15423 }, { "epoch": 0.7153988868274582, "grad_norm": 5.118948936462402, "learning_rate": 1.9042990794773713e-06, "loss": 0.2805, "step": 15424 }, { "epoch": 0.7154452690166976, "grad_norm": 9.238343238830566, "learning_rate": 1.9037211914921854e-06, "loss": 0.3644, "step": 15425 }, { "epoch": 0.7154916512059369, "grad_norm": 4.549191951751709, "learning_rate": 1.9031433705862468e-06, "loss": 0.3393, "step": 15426 }, { "epoch": 0.7155380333951763, "grad_norm": 5.580140113830566, "learning_rate": 1.9025656167720735e-06, "loss": 0.2129, "step": 15427 }, { "epoch": 0.7155844155844155, "grad_norm": 10.259093284606934, "learning_rate": 1.901987930062183e-06, "loss": 0.4327, "step": 15428 }, { "epoch": 0.7156307977736549, "grad_norm": 5.451864242553711, "learning_rate": 1.9014103104690911e-06, "loss": 0.2764, "step": 15429 }, { "epoch": 0.7156771799628943, "grad_norm": 7.614006042480469, "learning_rate": 1.9008327580053087e-06, "loss": 0.2615, "step": 15430 }, { "epoch": 0.7157235621521336, "grad_norm": 5.178450584411621, "learning_rate": 1.9002552726833502e-06, "loss": 0.2161, "step": 15431 }, { "epoch": 0.7157699443413729, "grad_norm": 4.661564826965332, "learning_rate": 1.8996778545157263e-06, "loss": 0.2694, "step": 15432 }, { "epoch": 0.7158163265306122, "grad_norm": 4.611180305480957, "learning_rate": 1.8991005035149462e-06, "loss": 0.3248, "step": 15433 }, { "epoch": 0.7158627087198516, "grad_norm": 6.926891326904297, "learning_rate": 1.8985232196935198e-06, "loss": 0.3221, "step": 15434 }, { "epoch": 0.7159090909090909, "grad_norm": 3.4709694385528564, "learning_rate": 1.8979460030639508e-06, "loss": 0.2927, "step": 15435 }, { "epoch": 0.7159554730983302, "grad_norm": 12.847028732299805, "learning_rate": 1.8973688536387446e-06, "loss": 0.3605, "step": 15436 }, { "epoch": 0.7160018552875695, "grad_norm": 4.411762714385986, "learning_rate": 1.8967917714304068e-06, "loss": 0.3024, "step": 15437 }, { "epoch": 0.7160482374768089, "grad_norm": 5.967800617218018, "learning_rate": 1.8962147564514378e-06, "loss": 0.3621, "step": 15438 }, { "epoch": 0.7160946196660483, "grad_norm": 4.606771945953369, "learning_rate": 1.895637808714339e-06, "loss": 0.3089, "step": 15439 }, { "epoch": 0.7161410018552876, "grad_norm": 4.473019599914551, "learning_rate": 1.8950609282316119e-06, "loss": 0.3024, "step": 15440 }, { "epoch": 0.7161873840445269, "grad_norm": 6.822780609130859, "learning_rate": 1.89448411501575e-06, "loss": 0.3791, "step": 15441 }, { "epoch": 0.7162337662337662, "grad_norm": 9.331214904785156, "learning_rate": 1.893907369079252e-06, "loss": 0.3141, "step": 15442 }, { "epoch": 0.7162801484230056, "grad_norm": 11.984057426452637, "learning_rate": 1.893330690434612e-06, "loss": 0.3616, "step": 15443 }, { "epoch": 0.7163265306122449, "grad_norm": 6.6118621826171875, "learning_rate": 1.8927540790943244e-06, "loss": 0.3155, "step": 15444 }, { "epoch": 0.7163729128014842, "grad_norm": 9.091466903686523, "learning_rate": 1.8921775350708827e-06, "loss": 0.3278, "step": 15445 }, { "epoch": 0.7164192949907235, "grad_norm": 5.56758975982666, "learning_rate": 1.8916010583767736e-06, "loss": 0.3275, "step": 15446 }, { "epoch": 0.7164656771799629, "grad_norm": 4.357637405395508, "learning_rate": 1.8910246490244876e-06, "loss": 0.3091, "step": 15447 }, { "epoch": 0.7165120593692023, "grad_norm": 11.402286529541016, "learning_rate": 1.8904483070265133e-06, "loss": 0.3357, "step": 15448 }, { "epoch": 0.7165584415584415, "grad_norm": 10.239620208740234, "learning_rate": 1.8898720323953362e-06, "loss": 0.323, "step": 15449 }, { "epoch": 0.7166048237476809, "grad_norm": 11.642058372497559, "learning_rate": 1.8892958251434413e-06, "loss": 0.5503, "step": 15450 }, { "epoch": 0.7166512059369202, "grad_norm": 9.037622451782227, "learning_rate": 1.8887196852833128e-06, "loss": 0.4212, "step": 15451 }, { "epoch": 0.7166975881261596, "grad_norm": 6.591086387634277, "learning_rate": 1.88814361282743e-06, "loss": 0.3223, "step": 15452 }, { "epoch": 0.7167439703153989, "grad_norm": 7.601011753082275, "learning_rate": 1.8875676077882748e-06, "loss": 0.4728, "step": 15453 }, { "epoch": 0.7167903525046382, "grad_norm": 12.097243309020996, "learning_rate": 1.8869916701783254e-06, "loss": 0.3463, "step": 15454 }, { "epoch": 0.7168367346938775, "grad_norm": 8.579242706298828, "learning_rate": 1.8864158000100591e-06, "loss": 0.2857, "step": 15455 }, { "epoch": 0.7168831168831169, "grad_norm": 8.897868156433105, "learning_rate": 1.8858399972959545e-06, "loss": 0.3404, "step": 15456 }, { "epoch": 0.7169294990723563, "grad_norm": 6.627651691436768, "learning_rate": 1.8852642620484818e-06, "loss": 0.3258, "step": 15457 }, { "epoch": 0.7169758812615955, "grad_norm": 7.573047161102295, "learning_rate": 1.8846885942801164e-06, "loss": 0.2555, "step": 15458 }, { "epoch": 0.7170222634508349, "grad_norm": 13.543741226196289, "learning_rate": 1.8841129940033286e-06, "loss": 0.3672, "step": 15459 }, { "epoch": 0.7170686456400742, "grad_norm": 4.773664474487305, "learning_rate": 1.8835374612305902e-06, "loss": 0.293, "step": 15460 }, { "epoch": 0.7171150278293136, "grad_norm": 4.849971771240234, "learning_rate": 1.8829619959743705e-06, "loss": 0.3109, "step": 15461 }, { "epoch": 0.7171614100185528, "grad_norm": 9.477259635925293, "learning_rate": 1.8823865982471334e-06, "loss": 0.2549, "step": 15462 }, { "epoch": 0.7172077922077922, "grad_norm": 7.0007195472717285, "learning_rate": 1.8818112680613459e-06, "loss": 0.3039, "step": 15463 }, { "epoch": 0.7172541743970315, "grad_norm": 6.197604656219482, "learning_rate": 1.8812360054294725e-06, "loss": 0.3586, "step": 15464 }, { "epoch": 0.7173005565862709, "grad_norm": 12.138469696044922, "learning_rate": 1.8806608103639768e-06, "loss": 0.2952, "step": 15465 }, { "epoch": 0.7173469387755103, "grad_norm": 6.437589168548584, "learning_rate": 1.8800856828773207e-06, "loss": 0.2486, "step": 15466 }, { "epoch": 0.7173933209647495, "grad_norm": 6.52716064453125, "learning_rate": 1.879510622981961e-06, "loss": 0.3159, "step": 15467 }, { "epoch": 0.7174397031539889, "grad_norm": 5.451722621917725, "learning_rate": 1.8789356306903577e-06, "loss": 0.2706, "step": 15468 }, { "epoch": 0.7174860853432282, "grad_norm": 16.107088088989258, "learning_rate": 1.8783607060149678e-06, "loss": 0.4018, "step": 15469 }, { "epoch": 0.7175324675324676, "grad_norm": 10.571369171142578, "learning_rate": 1.8777858489682465e-06, "loss": 0.3906, "step": 15470 }, { "epoch": 0.7175788497217068, "grad_norm": 11.16736888885498, "learning_rate": 1.8772110595626503e-06, "loss": 0.5035, "step": 15471 }, { "epoch": 0.7176252319109462, "grad_norm": 5.485394477844238, "learning_rate": 1.8766363378106268e-06, "loss": 0.2864, "step": 15472 }, { "epoch": 0.7176716141001855, "grad_norm": 11.115092277526855, "learning_rate": 1.8760616837246303e-06, "loss": 0.3225, "step": 15473 }, { "epoch": 0.7177179962894249, "grad_norm": 6.3757429122924805, "learning_rate": 1.8754870973171096e-06, "loss": 0.2948, "step": 15474 }, { "epoch": 0.7177643784786641, "grad_norm": 8.964095115661621, "learning_rate": 1.8749125786005128e-06, "loss": 0.4046, "step": 15475 }, { "epoch": 0.7178107606679035, "grad_norm": 10.6954345703125, "learning_rate": 1.8743381275872885e-06, "loss": 0.5091, "step": 15476 }, { "epoch": 0.7178571428571429, "grad_norm": 4.990763187408447, "learning_rate": 1.8737637442898781e-06, "loss": 0.3637, "step": 15477 }, { "epoch": 0.7179035250463822, "grad_norm": 6.44413423538208, "learning_rate": 1.8731894287207275e-06, "loss": 0.3082, "step": 15478 }, { "epoch": 0.7179499072356216, "grad_norm": 5.165318012237549, "learning_rate": 1.8726151808922788e-06, "loss": 0.3108, "step": 15479 }, { "epoch": 0.7179962894248608, "grad_norm": 15.437202453613281, "learning_rate": 1.8720410008169727e-06, "loss": 0.2079, "step": 15480 }, { "epoch": 0.7180426716141002, "grad_norm": 4.592528820037842, "learning_rate": 1.87146688850725e-06, "loss": 0.199, "step": 15481 }, { "epoch": 0.7180890538033395, "grad_norm": 6.457483291625977, "learning_rate": 1.8708928439755454e-06, "loss": 0.3748, "step": 15482 }, { "epoch": 0.7181354359925789, "grad_norm": 10.89716911315918, "learning_rate": 1.8703188672342964e-06, "loss": 0.4575, "step": 15483 }, { "epoch": 0.7181818181818181, "grad_norm": 8.154093742370605, "learning_rate": 1.8697449582959392e-06, "loss": 0.3655, "step": 15484 }, { "epoch": 0.7182282003710575, "grad_norm": 7.544655799865723, "learning_rate": 1.869171117172906e-06, "loss": 0.2217, "step": 15485 }, { "epoch": 0.7182745825602969, "grad_norm": 21.97269058227539, "learning_rate": 1.8685973438776312e-06, "loss": 0.3802, "step": 15486 }, { "epoch": 0.7183209647495362, "grad_norm": 5.065865993499756, "learning_rate": 1.8680236384225415e-06, "loss": 0.3848, "step": 15487 }, { "epoch": 0.7183673469387755, "grad_norm": 9.258283615112305, "learning_rate": 1.8674500008200675e-06, "loss": 0.3253, "step": 15488 }, { "epoch": 0.7184137291280148, "grad_norm": 8.584562301635742, "learning_rate": 1.866876431082637e-06, "loss": 0.3234, "step": 15489 }, { "epoch": 0.7184601113172542, "grad_norm": 7.925050735473633, "learning_rate": 1.8663029292226765e-06, "loss": 0.3149, "step": 15490 }, { "epoch": 0.7185064935064935, "grad_norm": 6.007377624511719, "learning_rate": 1.8657294952526116e-06, "loss": 0.3826, "step": 15491 }, { "epoch": 0.7185528756957328, "grad_norm": 7.978658676147461, "learning_rate": 1.8651561291848623e-06, "loss": 0.3529, "step": 15492 }, { "epoch": 0.7185992578849721, "grad_norm": 6.032240867614746, "learning_rate": 1.864582831031852e-06, "loss": 0.2906, "step": 15493 }, { "epoch": 0.7186456400742115, "grad_norm": 6.938551425933838, "learning_rate": 1.864009600806001e-06, "loss": 0.3096, "step": 15494 }, { "epoch": 0.7186920222634509, "grad_norm": 9.292811393737793, "learning_rate": 1.8634364385197278e-06, "loss": 0.2865, "step": 15495 }, { "epoch": 0.7187384044526902, "grad_norm": 7.179138660430908, "learning_rate": 1.8628633441854515e-06, "loss": 0.4509, "step": 15496 }, { "epoch": 0.7187847866419295, "grad_norm": 12.499919891357422, "learning_rate": 1.8622903178155844e-06, "loss": 0.2394, "step": 15497 }, { "epoch": 0.7188311688311688, "grad_norm": 8.356718063354492, "learning_rate": 1.8617173594225429e-06, "loss": 0.3591, "step": 15498 }, { "epoch": 0.7188775510204082, "grad_norm": 5.7184929847717285, "learning_rate": 1.861144469018739e-06, "loss": 0.3395, "step": 15499 }, { "epoch": 0.7189239332096475, "grad_norm": 5.579331874847412, "learning_rate": 1.8605716466165852e-06, "loss": 0.2585, "step": 15500 }, { "epoch": 0.7189703153988868, "grad_norm": 5.275450229644775, "learning_rate": 1.8599988922284922e-06, "loss": 0.2726, "step": 15501 }, { "epoch": 0.7190166975881261, "grad_norm": 8.554790496826172, "learning_rate": 1.8594262058668656e-06, "loss": 0.328, "step": 15502 }, { "epoch": 0.7190630797773655, "grad_norm": 5.46584415435791, "learning_rate": 1.8588535875441138e-06, "loss": 0.3464, "step": 15503 }, { "epoch": 0.7191094619666049, "grad_norm": 14.477690696716309, "learning_rate": 1.8582810372726417e-06, "loss": 0.2946, "step": 15504 }, { "epoch": 0.7191558441558441, "grad_norm": 5.695080757141113, "learning_rate": 1.8577085550648544e-06, "loss": 0.3581, "step": 15505 }, { "epoch": 0.7192022263450835, "grad_norm": 10.140928268432617, "learning_rate": 1.857136140933155e-06, "loss": 0.3599, "step": 15506 }, { "epoch": 0.7192486085343228, "grad_norm": 10.084843635559082, "learning_rate": 1.8565637948899423e-06, "loss": 0.4623, "step": 15507 }, { "epoch": 0.7192949907235622, "grad_norm": 4.452348232269287, "learning_rate": 1.8559915169476167e-06, "loss": 0.3003, "step": 15508 }, { "epoch": 0.7193413729128015, "grad_norm": 7.957992076873779, "learning_rate": 1.8554193071185767e-06, "loss": 0.4042, "step": 15509 }, { "epoch": 0.7193877551020408, "grad_norm": 4.138017177581787, "learning_rate": 1.8548471654152188e-06, "loss": 0.3257, "step": 15510 }, { "epoch": 0.7194341372912801, "grad_norm": 8.46510124206543, "learning_rate": 1.8542750918499397e-06, "loss": 0.2907, "step": 15511 }, { "epoch": 0.7194805194805195, "grad_norm": 6.6450300216674805, "learning_rate": 1.8537030864351303e-06, "loss": 0.3484, "step": 15512 }, { "epoch": 0.7195269016697589, "grad_norm": 4.691097259521484, "learning_rate": 1.8531311491831833e-06, "loss": 0.2521, "step": 15513 }, { "epoch": 0.7195732838589981, "grad_norm": 7.484070777893066, "learning_rate": 1.8525592801064906e-06, "loss": 0.2148, "step": 15514 }, { "epoch": 0.7196196660482375, "grad_norm": 5.303770542144775, "learning_rate": 1.8519874792174414e-06, "loss": 0.3378, "step": 15515 }, { "epoch": 0.7196660482374768, "grad_norm": 6.461546421051025, "learning_rate": 1.8514157465284237e-06, "loss": 0.3032, "step": 15516 }, { "epoch": 0.7197124304267162, "grad_norm": 12.93535327911377, "learning_rate": 1.8508440820518225e-06, "loss": 0.492, "step": 15517 }, { "epoch": 0.7197588126159554, "grad_norm": 22.28597640991211, "learning_rate": 1.8502724858000225e-06, "loss": 0.6924, "step": 15518 }, { "epoch": 0.7198051948051948, "grad_norm": 8.520306587219238, "learning_rate": 1.849700957785408e-06, "loss": 0.4643, "step": 15519 }, { "epoch": 0.7198515769944341, "grad_norm": 9.419963836669922, "learning_rate": 1.849129498020361e-06, "loss": 0.3924, "step": 15520 }, { "epoch": 0.7198979591836735, "grad_norm": 6.47764253616333, "learning_rate": 1.8485581065172615e-06, "loss": 0.3835, "step": 15521 }, { "epoch": 0.7199443413729129, "grad_norm": 9.372663497924805, "learning_rate": 1.8479867832884896e-06, "loss": 0.3601, "step": 15522 }, { "epoch": 0.7199907235621521, "grad_norm": 11.796380043029785, "learning_rate": 1.84741552834642e-06, "loss": 0.3728, "step": 15523 }, { "epoch": 0.7200371057513915, "grad_norm": 8.832341194152832, "learning_rate": 1.8468443417034304e-06, "loss": 0.3475, "step": 15524 }, { "epoch": 0.7200834879406308, "grad_norm": 8.336589813232422, "learning_rate": 1.8462732233718949e-06, "loss": 0.2573, "step": 15525 }, { "epoch": 0.7201298701298702, "grad_norm": 9.657405853271484, "learning_rate": 1.8457021733641866e-06, "loss": 0.3279, "step": 15526 }, { "epoch": 0.7201762523191094, "grad_norm": 5.521917819976807, "learning_rate": 1.8451311916926784e-06, "loss": 0.2714, "step": 15527 }, { "epoch": 0.7202226345083488, "grad_norm": 6.308144569396973, "learning_rate": 1.8445602783697375e-06, "loss": 0.2468, "step": 15528 }, { "epoch": 0.7202690166975881, "grad_norm": 6.6653361320495605, "learning_rate": 1.843989433407734e-06, "loss": 0.3686, "step": 15529 }, { "epoch": 0.7203153988868275, "grad_norm": 5.397015571594238, "learning_rate": 1.8434186568190348e-06, "loss": 0.3858, "step": 15530 }, { "epoch": 0.7203617810760667, "grad_norm": 5.52993106842041, "learning_rate": 1.842847948616005e-06, "loss": 0.295, "step": 15531 }, { "epoch": 0.7204081632653061, "grad_norm": 6.505004405975342, "learning_rate": 1.8422773088110097e-06, "loss": 0.3483, "step": 15532 }, { "epoch": 0.7204545454545455, "grad_norm": 8.902600288391113, "learning_rate": 1.841706737416412e-06, "loss": 0.3289, "step": 15533 }, { "epoch": 0.7205009276437848, "grad_norm": 11.042451858520508, "learning_rate": 1.841136234444571e-06, "loss": 0.4543, "step": 15534 }, { "epoch": 0.7205473098330241, "grad_norm": 8.980745315551758, "learning_rate": 1.8405657999078475e-06, "loss": 0.4314, "step": 15535 }, { "epoch": 0.7205936920222634, "grad_norm": 6.152975559234619, "learning_rate": 1.8399954338185987e-06, "loss": 0.2902, "step": 15536 }, { "epoch": 0.7206400742115028, "grad_norm": 23.92815589904785, "learning_rate": 1.839425136189183e-06, "loss": 0.6139, "step": 15537 }, { "epoch": 0.7206864564007421, "grad_norm": 5.949151992797852, "learning_rate": 1.8388549070319555e-06, "loss": 0.3238, "step": 15538 }, { "epoch": 0.7207328385899815, "grad_norm": 6.730526924133301, "learning_rate": 1.838284746359268e-06, "loss": 0.3189, "step": 15539 }, { "epoch": 0.7207792207792207, "grad_norm": 6.331218719482422, "learning_rate": 1.8377146541834735e-06, "loss": 0.1896, "step": 15540 }, { "epoch": 0.7208256029684601, "grad_norm": 28.710458755493164, "learning_rate": 1.8371446305169233e-06, "loss": 0.4202, "step": 15541 }, { "epoch": 0.7208719851576995, "grad_norm": 9.452768325805664, "learning_rate": 1.8365746753719665e-06, "loss": 0.2594, "step": 15542 }, { "epoch": 0.7209183673469388, "grad_norm": 6.5264716148376465, "learning_rate": 1.8360047887609506e-06, "loss": 0.3916, "step": 15543 }, { "epoch": 0.7209647495361781, "grad_norm": 11.01360034942627, "learning_rate": 1.8354349706962243e-06, "loss": 0.328, "step": 15544 }, { "epoch": 0.7210111317254174, "grad_norm": 5.686924934387207, "learning_rate": 1.834865221190128e-06, "loss": 0.3186, "step": 15545 }, { "epoch": 0.7210575139146568, "grad_norm": 10.929314613342285, "learning_rate": 1.8342955402550072e-06, "loss": 0.4534, "step": 15546 }, { "epoch": 0.7211038961038961, "grad_norm": 6.332452774047852, "learning_rate": 1.8337259279032044e-06, "loss": 0.273, "step": 15547 }, { "epoch": 0.7211502782931354, "grad_norm": 8.381034851074219, "learning_rate": 1.833156384147059e-06, "loss": 0.4178, "step": 15548 }, { "epoch": 0.7211966604823747, "grad_norm": 11.434403419494629, "learning_rate": 1.8325869089989119e-06, "loss": 0.417, "step": 15549 }, { "epoch": 0.7212430426716141, "grad_norm": 5.79867696762085, "learning_rate": 1.8320175024710968e-06, "loss": 0.2056, "step": 15550 }, { "epoch": 0.7212894248608535, "grad_norm": 10.372900009155273, "learning_rate": 1.8314481645759519e-06, "loss": 0.2975, "step": 15551 }, { "epoch": 0.7213358070500928, "grad_norm": 8.131453514099121, "learning_rate": 1.8308788953258111e-06, "loss": 0.3589, "step": 15552 }, { "epoch": 0.7213821892393321, "grad_norm": 7.377096652984619, "learning_rate": 1.8303096947330074e-06, "loss": 0.3637, "step": 15553 }, { "epoch": 0.7214285714285714, "grad_norm": 8.699271202087402, "learning_rate": 1.8297405628098723e-06, "loss": 0.3447, "step": 15554 }, { "epoch": 0.7214749536178108, "grad_norm": 5.083894729614258, "learning_rate": 1.8291714995687382e-06, "loss": 0.343, "step": 15555 }, { "epoch": 0.7215213358070501, "grad_norm": 8.51197338104248, "learning_rate": 1.8286025050219287e-06, "loss": 0.3049, "step": 15556 }, { "epoch": 0.7215677179962894, "grad_norm": 8.099692344665527, "learning_rate": 1.8280335791817733e-06, "loss": 0.4119, "step": 15557 }, { "epoch": 0.7216141001855287, "grad_norm": 5.537050724029541, "learning_rate": 1.8274647220605974e-06, "loss": 0.3529, "step": 15558 }, { "epoch": 0.7216604823747681, "grad_norm": 9.782573699951172, "learning_rate": 1.8268959336707254e-06, "loss": 0.4382, "step": 15559 }, { "epoch": 0.7217068645640075, "grad_norm": 9.036273956298828, "learning_rate": 1.8263272140244803e-06, "loss": 0.3373, "step": 15560 }, { "epoch": 0.7217532467532467, "grad_norm": 4.866591453552246, "learning_rate": 1.8257585631341811e-06, "loss": 0.2807, "step": 15561 }, { "epoch": 0.7217996289424861, "grad_norm": 3.05743145942688, "learning_rate": 1.8251899810121482e-06, "loss": 0.2184, "step": 15562 }, { "epoch": 0.7218460111317254, "grad_norm": 7.565967082977295, "learning_rate": 1.8246214676706997e-06, "loss": 0.3852, "step": 15563 }, { "epoch": 0.7218923933209648, "grad_norm": 5.165468692779541, "learning_rate": 1.824053023122152e-06, "loss": 0.3172, "step": 15564 }, { "epoch": 0.7219387755102041, "grad_norm": 6.337084770202637, "learning_rate": 1.823484647378821e-06, "loss": 0.3093, "step": 15565 }, { "epoch": 0.7219851576994434, "grad_norm": 10.883992195129395, "learning_rate": 1.8229163404530209e-06, "loss": 0.2916, "step": 15566 }, { "epoch": 0.7220315398886827, "grad_norm": 9.213605880737305, "learning_rate": 1.8223481023570611e-06, "loss": 0.2541, "step": 15567 }, { "epoch": 0.7220779220779221, "grad_norm": 16.458162307739258, "learning_rate": 1.8217799331032538e-06, "loss": 0.401, "step": 15568 }, { "epoch": 0.7221243042671615, "grad_norm": 10.947325706481934, "learning_rate": 1.8212118327039074e-06, "loss": 0.4024, "step": 15569 }, { "epoch": 0.7221706864564007, "grad_norm": 8.017016410827637, "learning_rate": 1.8206438011713306e-06, "loss": 0.2429, "step": 15570 }, { "epoch": 0.7222170686456401, "grad_norm": 6.8703718185424805, "learning_rate": 1.8200758385178302e-06, "loss": 0.2207, "step": 15571 }, { "epoch": 0.7222634508348794, "grad_norm": 6.62150239944458, "learning_rate": 1.8195079447557079e-06, "loss": 0.3519, "step": 15572 }, { "epoch": 0.7223098330241188, "grad_norm": 8.221989631652832, "learning_rate": 1.818940119897269e-06, "loss": 0.2422, "step": 15573 }, { "epoch": 0.722356215213358, "grad_norm": 10.46448802947998, "learning_rate": 1.818372363954814e-06, "loss": 0.4229, "step": 15574 }, { "epoch": 0.7224025974025974, "grad_norm": 8.8622407913208, "learning_rate": 1.8178046769406432e-06, "loss": 0.4101, "step": 15575 }, { "epoch": 0.7224489795918367, "grad_norm": 13.164634704589844, "learning_rate": 1.8172370588670563e-06, "loss": 0.3553, "step": 15576 }, { "epoch": 0.7224953617810761, "grad_norm": 5.389538764953613, "learning_rate": 1.8166695097463516e-06, "loss": 0.2468, "step": 15577 }, { "epoch": 0.7225417439703155, "grad_norm": 8.6482515335083, "learning_rate": 1.8161020295908204e-06, "loss": 0.3473, "step": 15578 }, { "epoch": 0.7225881261595547, "grad_norm": 10.110532760620117, "learning_rate": 1.8155346184127605e-06, "loss": 0.4522, "step": 15579 }, { "epoch": 0.7226345083487941, "grad_norm": 4.85394811630249, "learning_rate": 1.8149672762244625e-06, "loss": 0.2785, "step": 15580 }, { "epoch": 0.7226808905380334, "grad_norm": 6.136628150939941, "learning_rate": 1.8144000030382192e-06, "loss": 0.3621, "step": 15581 }, { "epoch": 0.7227272727272728, "grad_norm": 12.671956062316895, "learning_rate": 1.8138327988663206e-06, "loss": 0.396, "step": 15582 }, { "epoch": 0.722773654916512, "grad_norm": 3.553783893585205, "learning_rate": 1.8132656637210528e-06, "loss": 0.2073, "step": 15583 }, { "epoch": 0.7228200371057514, "grad_norm": 9.80632495880127, "learning_rate": 1.8126985976147032e-06, "loss": 0.3726, "step": 15584 }, { "epoch": 0.7228664192949907, "grad_norm": 5.687907695770264, "learning_rate": 1.8121316005595578e-06, "loss": 0.192, "step": 15585 }, { "epoch": 0.7229128014842301, "grad_norm": 11.897246360778809, "learning_rate": 1.8115646725678997e-06, "loss": 0.4089, "step": 15586 }, { "epoch": 0.7229591836734693, "grad_norm": 12.977392196655273, "learning_rate": 1.8109978136520111e-06, "loss": 0.3659, "step": 15587 }, { "epoch": 0.7230055658627087, "grad_norm": 4.95552921295166, "learning_rate": 1.8104310238241745e-06, "loss": 0.3358, "step": 15588 }, { "epoch": 0.7230519480519481, "grad_norm": 6.6291351318359375, "learning_rate": 1.8098643030966655e-06, "loss": 0.3107, "step": 15589 }, { "epoch": 0.7230983302411874, "grad_norm": 8.902260780334473, "learning_rate": 1.8092976514817644e-06, "loss": 0.3714, "step": 15590 }, { "epoch": 0.7231447124304267, "grad_norm": 7.750734806060791, "learning_rate": 1.8087310689917464e-06, "loss": 0.3694, "step": 15591 }, { "epoch": 0.723191094619666, "grad_norm": 6.091590404510498, "learning_rate": 1.8081645556388866e-06, "loss": 0.2889, "step": 15592 }, { "epoch": 0.7232374768089054, "grad_norm": 8.466551780700684, "learning_rate": 1.8075981114354602e-06, "loss": 0.3951, "step": 15593 }, { "epoch": 0.7232838589981447, "grad_norm": 11.014586448669434, "learning_rate": 1.8070317363937345e-06, "loss": 0.3773, "step": 15594 }, { "epoch": 0.7233302411873841, "grad_norm": 7.664741039276123, "learning_rate": 1.8064654305259826e-06, "loss": 0.2718, "step": 15595 }, { "epoch": 0.7233766233766233, "grad_norm": 10.411130905151367, "learning_rate": 1.8058991938444724e-06, "loss": 0.2556, "step": 15596 }, { "epoch": 0.7234230055658627, "grad_norm": 8.502039909362793, "learning_rate": 1.8053330263614722e-06, "loss": 0.4118, "step": 15597 }, { "epoch": 0.7234693877551021, "grad_norm": 7.2308669090271, "learning_rate": 1.804766928089246e-06, "loss": 0.3157, "step": 15598 }, { "epoch": 0.7235157699443414, "grad_norm": 9.499828338623047, "learning_rate": 1.8042008990400617e-06, "loss": 0.4091, "step": 15599 }, { "epoch": 0.7235621521335807, "grad_norm": 7.366281986236572, "learning_rate": 1.803634939226177e-06, "loss": 0.221, "step": 15600 }, { "epoch": 0.72360853432282, "grad_norm": 14.014993667602539, "learning_rate": 1.803069048659855e-06, "loss": 0.3946, "step": 15601 }, { "epoch": 0.7236549165120594, "grad_norm": 5.632761001586914, "learning_rate": 1.8025032273533566e-06, "loss": 0.3126, "step": 15602 }, { "epoch": 0.7237012987012987, "grad_norm": 10.712450981140137, "learning_rate": 1.801937475318939e-06, "loss": 0.2736, "step": 15603 }, { "epoch": 0.723747680890538, "grad_norm": 5.45952844619751, "learning_rate": 1.8013717925688612e-06, "loss": 0.2072, "step": 15604 }, { "epoch": 0.7237940630797773, "grad_norm": 4.800183296203613, "learning_rate": 1.8008061791153742e-06, "loss": 0.3784, "step": 15605 }, { "epoch": 0.7238404452690167, "grad_norm": 6.75683069229126, "learning_rate": 1.800240634970734e-06, "loss": 0.2992, "step": 15606 }, { "epoch": 0.723886827458256, "grad_norm": 10.55321979522705, "learning_rate": 1.7996751601471934e-06, "loss": 0.5603, "step": 15607 }, { "epoch": 0.7239332096474954, "grad_norm": 5.345810413360596, "learning_rate": 1.7991097546570018e-06, "loss": 0.3083, "step": 15608 }, { "epoch": 0.7239795918367347, "grad_norm": 10.672707557678223, "learning_rate": 1.7985444185124113e-06, "loss": 0.3714, "step": 15609 }, { "epoch": 0.724025974025974, "grad_norm": 5.384544372558594, "learning_rate": 1.797979151725665e-06, "loss": 0.2477, "step": 15610 }, { "epoch": 0.7240723562152134, "grad_norm": 5.147636413574219, "learning_rate": 1.797413954309012e-06, "loss": 0.2259, "step": 15611 }, { "epoch": 0.7241187384044527, "grad_norm": 10.354798316955566, "learning_rate": 1.7968488262746964e-06, "loss": 0.1715, "step": 15612 }, { "epoch": 0.724165120593692, "grad_norm": 7.268392562866211, "learning_rate": 1.7962837676349615e-06, "loss": 0.3877, "step": 15613 }, { "epoch": 0.7242115027829313, "grad_norm": 3.5660431385040283, "learning_rate": 1.7957187784020486e-06, "loss": 0.2565, "step": 15614 }, { "epoch": 0.7242578849721707, "grad_norm": 10.191032409667969, "learning_rate": 1.7951538585882005e-06, "loss": 0.279, "step": 15615 }, { "epoch": 0.72430426716141, "grad_norm": 4.919708251953125, "learning_rate": 1.7945890082056516e-06, "loss": 0.3162, "step": 15616 }, { "epoch": 0.7243506493506493, "grad_norm": 7.217459201812744, "learning_rate": 1.7940242272666413e-06, "loss": 0.2977, "step": 15617 }, { "epoch": 0.7243970315398887, "grad_norm": 9.573990821838379, "learning_rate": 1.793459515783405e-06, "loss": 0.2358, "step": 15618 }, { "epoch": 0.724443413729128, "grad_norm": 4.917972564697266, "learning_rate": 1.7928948737681773e-06, "loss": 0.326, "step": 15619 }, { "epoch": 0.7244897959183674, "grad_norm": 6.847766876220703, "learning_rate": 1.792330301233192e-06, "loss": 0.2607, "step": 15620 }, { "epoch": 0.7245361781076067, "grad_norm": 9.308998107910156, "learning_rate": 1.7917657981906777e-06, "loss": 0.346, "step": 15621 }, { "epoch": 0.724582560296846, "grad_norm": 9.696940422058105, "learning_rate": 1.7912013646528648e-06, "loss": 0.3198, "step": 15622 }, { "epoch": 0.7246289424860853, "grad_norm": 12.709665298461914, "learning_rate": 1.7906370006319817e-06, "loss": 0.5522, "step": 15623 }, { "epoch": 0.7246753246753247, "grad_norm": 9.259010314941406, "learning_rate": 1.7900727061402556e-06, "loss": 0.361, "step": 15624 }, { "epoch": 0.724721706864564, "grad_norm": 5.901492595672607, "learning_rate": 1.789508481189911e-06, "loss": 0.278, "step": 15625 }, { "epoch": 0.7247680890538033, "grad_norm": 4.951532363891602, "learning_rate": 1.7889443257931738e-06, "loss": 0.2576, "step": 15626 }, { "epoch": 0.7248144712430427, "grad_norm": 9.51097583770752, "learning_rate": 1.788380239962262e-06, "loss": 0.3471, "step": 15627 }, { "epoch": 0.724860853432282, "grad_norm": 5.492284774780273, "learning_rate": 1.7878162237093987e-06, "loss": 0.2944, "step": 15628 }, { "epoch": 0.7249072356215214, "grad_norm": 9.71325397491455, "learning_rate": 1.7872522770468026e-06, "loss": 0.2582, "step": 15629 }, { "epoch": 0.7249536178107606, "grad_norm": 9.069293975830078, "learning_rate": 1.7866883999866912e-06, "loss": 0.3545, "step": 15630 }, { "epoch": 0.725, "grad_norm": 3.947244167327881, "learning_rate": 1.7861245925412824e-06, "loss": 0.245, "step": 15631 }, { "epoch": 0.7250463821892393, "grad_norm": 9.624407768249512, "learning_rate": 1.7855608547227876e-06, "loss": 0.416, "step": 15632 }, { "epoch": 0.7250927643784787, "grad_norm": 8.807662010192871, "learning_rate": 1.7849971865434218e-06, "loss": 0.4084, "step": 15633 }, { "epoch": 0.725139146567718, "grad_norm": 14.686287879943848, "learning_rate": 1.7844335880153958e-06, "loss": 0.4523, "step": 15634 }, { "epoch": 0.7251855287569573, "grad_norm": 13.518287658691406, "learning_rate": 1.78387005915092e-06, "loss": 0.347, "step": 15635 }, { "epoch": 0.7252319109461967, "grad_norm": 7.115544319152832, "learning_rate": 1.7833065999622034e-06, "loss": 0.3282, "step": 15636 }, { "epoch": 0.725278293135436, "grad_norm": 8.493890762329102, "learning_rate": 1.782743210461454e-06, "loss": 0.3701, "step": 15637 }, { "epoch": 0.7253246753246754, "grad_norm": 5.448791980743408, "learning_rate": 1.7821798906608745e-06, "loss": 0.3102, "step": 15638 }, { "epoch": 0.7253710575139146, "grad_norm": 15.305130004882812, "learning_rate": 1.7816166405726703e-06, "loss": 0.4157, "step": 15639 }, { "epoch": 0.725417439703154, "grad_norm": 7.197269439697266, "learning_rate": 1.7810534602090445e-06, "loss": 0.3755, "step": 15640 }, { "epoch": 0.7254638218923933, "grad_norm": 5.452016830444336, "learning_rate": 1.7804903495821973e-06, "loss": 0.3408, "step": 15641 }, { "epoch": 0.7255102040816327, "grad_norm": 5.441062927246094, "learning_rate": 1.7799273087043306e-06, "loss": 0.2023, "step": 15642 }, { "epoch": 0.7255565862708719, "grad_norm": 6.933948993682861, "learning_rate": 1.779364337587638e-06, "loss": 0.2561, "step": 15643 }, { "epoch": 0.7256029684601113, "grad_norm": 20.28278350830078, "learning_rate": 1.778801436244319e-06, "loss": 0.3441, "step": 15644 }, { "epoch": 0.7256493506493507, "grad_norm": 5.43867826461792, "learning_rate": 1.7782386046865675e-06, "loss": 0.3357, "step": 15645 }, { "epoch": 0.72569573283859, "grad_norm": 10.235082626342773, "learning_rate": 1.7776758429265771e-06, "loss": 0.37, "step": 15646 }, { "epoch": 0.7257421150278293, "grad_norm": 5.461511611938477, "learning_rate": 1.7771131509765404e-06, "loss": 0.3352, "step": 15647 }, { "epoch": 0.7257884972170686, "grad_norm": 9.366119384765625, "learning_rate": 1.776550528848649e-06, "loss": 0.3451, "step": 15648 }, { "epoch": 0.725834879406308, "grad_norm": 5.119400978088379, "learning_rate": 1.7759879765550887e-06, "loss": 0.2843, "step": 15649 }, { "epoch": 0.7258812615955473, "grad_norm": 7.442642688751221, "learning_rate": 1.7754254941080479e-06, "loss": 0.3058, "step": 15650 }, { "epoch": 0.7259276437847867, "grad_norm": 5.850509166717529, "learning_rate": 1.7748630815197131e-06, "loss": 0.2888, "step": 15651 }, { "epoch": 0.7259740259740259, "grad_norm": 10.684423446655273, "learning_rate": 1.7743007388022688e-06, "loss": 0.4556, "step": 15652 }, { "epoch": 0.7260204081632653, "grad_norm": 6.399075031280518, "learning_rate": 1.7737384659678996e-06, "loss": 0.3645, "step": 15653 }, { "epoch": 0.7260667903525047, "grad_norm": 12.543280601501465, "learning_rate": 1.7731762630287824e-06, "loss": 0.2993, "step": 15654 }, { "epoch": 0.726113172541744, "grad_norm": 14.016158103942871, "learning_rate": 1.7726141299971e-06, "loss": 0.4083, "step": 15655 }, { "epoch": 0.7261595547309833, "grad_norm": 12.262904167175293, "learning_rate": 1.77205206688503e-06, "loss": 0.4724, "step": 15656 }, { "epoch": 0.7262059369202226, "grad_norm": 13.573291778564453, "learning_rate": 1.7714900737047496e-06, "loss": 0.3827, "step": 15657 }, { "epoch": 0.726252319109462, "grad_norm": 12.708863258361816, "learning_rate": 1.7709281504684339e-06, "loss": 0.3828, "step": 15658 }, { "epoch": 0.7262987012987013, "grad_norm": 12.623579025268555, "learning_rate": 1.770366297188258e-06, "loss": 0.2895, "step": 15659 }, { "epoch": 0.7263450834879406, "grad_norm": 6.953887939453125, "learning_rate": 1.7698045138763915e-06, "loss": 0.2455, "step": 15660 }, { "epoch": 0.7263914656771799, "grad_norm": 5.179389953613281, "learning_rate": 1.7692428005450064e-06, "loss": 0.2444, "step": 15661 }, { "epoch": 0.7264378478664193, "grad_norm": 9.095499038696289, "learning_rate": 1.7686811572062718e-06, "loss": 0.4437, "step": 15662 }, { "epoch": 0.7264842300556587, "grad_norm": 13.071208953857422, "learning_rate": 1.768119583872356e-06, "loss": 0.3628, "step": 15663 }, { "epoch": 0.726530612244898, "grad_norm": 7.542766094207764, "learning_rate": 1.767558080555426e-06, "loss": 0.2576, "step": 15664 }, { "epoch": 0.7265769944341373, "grad_norm": 4.897854804992676, "learning_rate": 1.7669966472676436e-06, "loss": 0.3205, "step": 15665 }, { "epoch": 0.7266233766233766, "grad_norm": 6.053969383239746, "learning_rate": 1.7664352840211735e-06, "loss": 0.3783, "step": 15666 }, { "epoch": 0.726669758812616, "grad_norm": 12.086968421936035, "learning_rate": 1.765873990828178e-06, "loss": 0.4083, "step": 15667 }, { "epoch": 0.7267161410018553, "grad_norm": 7.214951992034912, "learning_rate": 1.765312767700816e-06, "loss": 0.2073, "step": 15668 }, { "epoch": 0.7267625231910946, "grad_norm": 4.26162052154541, "learning_rate": 1.7647516146512473e-06, "loss": 0.1899, "step": 15669 }, { "epoch": 0.7268089053803339, "grad_norm": 11.341959953308105, "learning_rate": 1.7641905316916298e-06, "loss": 0.3794, "step": 15670 }, { "epoch": 0.7268552875695733, "grad_norm": 7.864655494689941, "learning_rate": 1.7636295188341162e-06, "loss": 0.3473, "step": 15671 }, { "epoch": 0.7269016697588127, "grad_norm": 6.807493686676025, "learning_rate": 1.7630685760908623e-06, "loss": 0.2867, "step": 15672 }, { "epoch": 0.7269480519480519, "grad_norm": 11.99947452545166, "learning_rate": 1.7625077034740195e-06, "loss": 0.396, "step": 15673 }, { "epoch": 0.7269944341372913, "grad_norm": 7.061351776123047, "learning_rate": 1.7619469009957407e-06, "loss": 0.3067, "step": 15674 }, { "epoch": 0.7270408163265306, "grad_norm": 5.930948257446289, "learning_rate": 1.7613861686681754e-06, "loss": 0.2837, "step": 15675 }, { "epoch": 0.72708719851577, "grad_norm": 5.849279403686523, "learning_rate": 1.7608255065034695e-06, "loss": 0.2622, "step": 15676 }, { "epoch": 0.7271335807050093, "grad_norm": 10.059711456298828, "learning_rate": 1.76026491451377e-06, "loss": 0.3779, "step": 15677 }, { "epoch": 0.7271799628942486, "grad_norm": 8.793526649475098, "learning_rate": 1.7597043927112228e-06, "loss": 0.297, "step": 15678 }, { "epoch": 0.7272263450834879, "grad_norm": 12.27017593383789, "learning_rate": 1.7591439411079703e-06, "loss": 0.4436, "step": 15679 }, { "epoch": 0.7272727272727273, "grad_norm": 7.409290790557861, "learning_rate": 1.7585835597161555e-06, "loss": 0.3587, "step": 15680 }, { "epoch": 0.7273191094619667, "grad_norm": 7.509750843048096, "learning_rate": 1.7580232485479198e-06, "loss": 0.2357, "step": 15681 }, { "epoch": 0.7273654916512059, "grad_norm": 7.038815498352051, "learning_rate": 1.7574630076153987e-06, "loss": 0.3341, "step": 15682 }, { "epoch": 0.7274118738404453, "grad_norm": 8.118945121765137, "learning_rate": 1.7569028369307312e-06, "loss": 0.3144, "step": 15683 }, { "epoch": 0.7274582560296846, "grad_norm": 7.452887535095215, "learning_rate": 1.756342736506053e-06, "loss": 0.4468, "step": 15684 }, { "epoch": 0.727504638218924, "grad_norm": 14.64331340789795, "learning_rate": 1.7557827063534988e-06, "loss": 0.4392, "step": 15685 }, { "epoch": 0.7275510204081632, "grad_norm": 6.919075012207031, "learning_rate": 1.7552227464852024e-06, "loss": 0.2928, "step": 15686 }, { "epoch": 0.7275974025974026, "grad_norm": 4.61735725402832, "learning_rate": 1.7546628569132928e-06, "loss": 0.3567, "step": 15687 }, { "epoch": 0.7276437847866419, "grad_norm": 7.224102020263672, "learning_rate": 1.7541030376499002e-06, "loss": 0.3503, "step": 15688 }, { "epoch": 0.7276901669758813, "grad_norm": 11.816410064697266, "learning_rate": 1.753543288707153e-06, "loss": 0.296, "step": 15689 }, { "epoch": 0.7277365491651205, "grad_norm": 10.246170043945312, "learning_rate": 1.7529836100971786e-06, "loss": 0.398, "step": 15690 }, { "epoch": 0.7277829313543599, "grad_norm": 6.529738903045654, "learning_rate": 1.7524240018321032e-06, "loss": 0.3488, "step": 15691 }, { "epoch": 0.7278293135435993, "grad_norm": 6.228453159332275, "learning_rate": 1.7518644639240474e-06, "loss": 0.2749, "step": 15692 }, { "epoch": 0.7278756957328386, "grad_norm": 6.794240474700928, "learning_rate": 1.7513049963851346e-06, "loss": 0.3346, "step": 15693 }, { "epoch": 0.727922077922078, "grad_norm": 8.696603775024414, "learning_rate": 1.7507455992274853e-06, "loss": 0.3301, "step": 15694 }, { "epoch": 0.7279684601113172, "grad_norm": 23.480857849121094, "learning_rate": 1.750186272463219e-06, "loss": 0.3041, "step": 15695 }, { "epoch": 0.7280148423005566, "grad_norm": 8.094867706298828, "learning_rate": 1.7496270161044533e-06, "loss": 0.3448, "step": 15696 }, { "epoch": 0.7280612244897959, "grad_norm": 5.091702461242676, "learning_rate": 1.749067830163305e-06, "loss": 0.2863, "step": 15697 }, { "epoch": 0.7281076066790353, "grad_norm": 4.803749084472656, "learning_rate": 1.7485087146518864e-06, "loss": 0.2732, "step": 15698 }, { "epoch": 0.7281539888682745, "grad_norm": 7.463318824768066, "learning_rate": 1.7479496695823111e-06, "loss": 0.2299, "step": 15699 }, { "epoch": 0.7282003710575139, "grad_norm": 4.353376865386963, "learning_rate": 1.7473906949666913e-06, "loss": 0.3273, "step": 15700 }, { "epoch": 0.7282467532467533, "grad_norm": 7.366912841796875, "learning_rate": 1.7468317908171361e-06, "loss": 0.3254, "step": 15701 }, { "epoch": 0.7282931354359926, "grad_norm": 6.66554594039917, "learning_rate": 1.746272957145756e-06, "loss": 0.2868, "step": 15702 }, { "epoch": 0.7283395176252319, "grad_norm": 6.992616653442383, "learning_rate": 1.745714193964655e-06, "loss": 0.2908, "step": 15703 }, { "epoch": 0.7283858998144712, "grad_norm": 6.447331428527832, "learning_rate": 1.745155501285939e-06, "loss": 0.3607, "step": 15704 }, { "epoch": 0.7284322820037106, "grad_norm": 5.478789329528809, "learning_rate": 1.744596879121712e-06, "loss": 0.2272, "step": 15705 }, { "epoch": 0.7284786641929499, "grad_norm": 6.938353538513184, "learning_rate": 1.7440383274840772e-06, "loss": 0.3347, "step": 15706 }, { "epoch": 0.7285250463821893, "grad_norm": 8.759062767028809, "learning_rate": 1.7434798463851343e-06, "loss": 0.3319, "step": 15707 }, { "epoch": 0.7285714285714285, "grad_norm": 7.126399993896484, "learning_rate": 1.742921435836985e-06, "loss": 0.2918, "step": 15708 }, { "epoch": 0.7286178107606679, "grad_norm": 12.15723705291748, "learning_rate": 1.7423630958517229e-06, "loss": 0.3215, "step": 15709 }, { "epoch": 0.7286641929499073, "grad_norm": 5.0735673904418945, "learning_rate": 1.7418048264414462e-06, "loss": 0.2505, "step": 15710 }, { "epoch": 0.7287105751391466, "grad_norm": 5.966620445251465, "learning_rate": 1.7412466276182493e-06, "loss": 0.315, "step": 15711 }, { "epoch": 0.7287569573283859, "grad_norm": 3.6189961433410645, "learning_rate": 1.7406884993942252e-06, "loss": 0.1975, "step": 15712 }, { "epoch": 0.7288033395176252, "grad_norm": 7.457306861877441, "learning_rate": 1.740130441781468e-06, "loss": 0.4053, "step": 15713 }, { "epoch": 0.7288497217068646, "grad_norm": 11.557914733886719, "learning_rate": 1.7395724547920633e-06, "loss": 0.4899, "step": 15714 }, { "epoch": 0.7288961038961039, "grad_norm": 7.488382339477539, "learning_rate": 1.7390145384381013e-06, "loss": 0.2549, "step": 15715 }, { "epoch": 0.7289424860853432, "grad_norm": 6.709848403930664, "learning_rate": 1.7384566927316698e-06, "loss": 0.4124, "step": 15716 }, { "epoch": 0.7289888682745825, "grad_norm": 11.803317070007324, "learning_rate": 1.7378989176848538e-06, "loss": 0.4079, "step": 15717 }, { "epoch": 0.7290352504638219, "grad_norm": 7.2374162673950195, "learning_rate": 1.7373412133097373e-06, "loss": 0.3931, "step": 15718 }, { "epoch": 0.7290816326530613, "grad_norm": 13.040473937988281, "learning_rate": 1.7367835796184036e-06, "loss": 0.3484, "step": 15719 }, { "epoch": 0.7291280148423006, "grad_norm": 5.100753307342529, "learning_rate": 1.736226016622931e-06, "loss": 0.2281, "step": 15720 }, { "epoch": 0.7291743970315399, "grad_norm": 7.165133953094482, "learning_rate": 1.7356685243354004e-06, "loss": 0.3575, "step": 15721 }, { "epoch": 0.7292207792207792, "grad_norm": 3.7621335983276367, "learning_rate": 1.7351111027678895e-06, "loss": 0.3005, "step": 15722 }, { "epoch": 0.7292671614100186, "grad_norm": 6.943041801452637, "learning_rate": 1.7345537519324752e-06, "loss": 0.3898, "step": 15723 }, { "epoch": 0.7293135435992579, "grad_norm": 4.83054780960083, "learning_rate": 1.7339964718412323e-06, "loss": 0.2808, "step": 15724 }, { "epoch": 0.7293599257884972, "grad_norm": 9.851290702819824, "learning_rate": 1.733439262506232e-06, "loss": 0.4107, "step": 15725 }, { "epoch": 0.7294063079777365, "grad_norm": 3.809113025665283, "learning_rate": 1.7328821239395465e-06, "loss": 0.2355, "step": 15726 }, { "epoch": 0.7294526901669759, "grad_norm": 10.24079418182373, "learning_rate": 1.7323250561532469e-06, "loss": 0.3826, "step": 15727 }, { "epoch": 0.7294990723562153, "grad_norm": 6.404372692108154, "learning_rate": 1.7317680591594016e-06, "loss": 0.2962, "step": 15728 }, { "epoch": 0.7295454545454545, "grad_norm": 9.542034149169922, "learning_rate": 1.731211132970077e-06, "loss": 0.3342, "step": 15729 }, { "epoch": 0.7295918367346939, "grad_norm": 6.1127495765686035, "learning_rate": 1.7306542775973417e-06, "loss": 0.3248, "step": 15730 }, { "epoch": 0.7296382189239332, "grad_norm": 10.108583450317383, "learning_rate": 1.7300974930532543e-06, "loss": 0.3315, "step": 15731 }, { "epoch": 0.7296846011131726, "grad_norm": 4.242634296417236, "learning_rate": 1.7295407793498809e-06, "loss": 0.3914, "step": 15732 }, { "epoch": 0.7297309833024119, "grad_norm": 7.580663681030273, "learning_rate": 1.728984136499281e-06, "loss": 0.3317, "step": 15733 }, { "epoch": 0.7297773654916512, "grad_norm": 4.465056896209717, "learning_rate": 1.728427564513515e-06, "loss": 0.2648, "step": 15734 }, { "epoch": 0.7298237476808905, "grad_norm": 7.57496452331543, "learning_rate": 1.7278710634046414e-06, "loss": 0.2289, "step": 15735 }, { "epoch": 0.7298701298701299, "grad_norm": 10.836626052856445, "learning_rate": 1.727314633184714e-06, "loss": 0.4231, "step": 15736 }, { "epoch": 0.7299165120593692, "grad_norm": 14.145469665527344, "learning_rate": 1.726758273865789e-06, "loss": 0.3798, "step": 15737 }, { "epoch": 0.7299628942486085, "grad_norm": 12.512166023254395, "learning_rate": 1.7262019854599194e-06, "loss": 0.5377, "step": 15738 }, { "epoch": 0.7300092764378479, "grad_norm": 4.507266521453857, "learning_rate": 1.7256457679791567e-06, "loss": 0.3349, "step": 15739 }, { "epoch": 0.7300556586270872, "grad_norm": 8.955608367919922, "learning_rate": 1.7250896214355517e-06, "loss": 0.3662, "step": 15740 }, { "epoch": 0.7301020408163266, "grad_norm": 6.059275150299072, "learning_rate": 1.7245335458411543e-06, "loss": 0.2287, "step": 15741 }, { "epoch": 0.7301484230055658, "grad_norm": 5.706604957580566, "learning_rate": 1.7239775412080085e-06, "loss": 0.3411, "step": 15742 }, { "epoch": 0.7301948051948052, "grad_norm": 6.917747974395752, "learning_rate": 1.7234216075481614e-06, "loss": 0.3937, "step": 15743 }, { "epoch": 0.7302411873840445, "grad_norm": 7.227278709411621, "learning_rate": 1.7228657448736563e-06, "loss": 0.3357, "step": 15744 }, { "epoch": 0.7302875695732839, "grad_norm": 11.026193618774414, "learning_rate": 1.722309953196537e-06, "loss": 0.3512, "step": 15745 }, { "epoch": 0.7303339517625231, "grad_norm": 6.012134075164795, "learning_rate": 1.7217542325288456e-06, "loss": 0.2453, "step": 15746 }, { "epoch": 0.7303803339517625, "grad_norm": 7.427779674530029, "learning_rate": 1.7211985828826173e-06, "loss": 0.3416, "step": 15747 }, { "epoch": 0.7304267161410019, "grad_norm": 15.258082389831543, "learning_rate": 1.7206430042698929e-06, "loss": 0.5176, "step": 15748 }, { "epoch": 0.7304730983302412, "grad_norm": 10.081422805786133, "learning_rate": 1.7200874967027077e-06, "loss": 0.3896, "step": 15749 }, { "epoch": 0.7305194805194806, "grad_norm": 10.838689804077148, "learning_rate": 1.719532060193097e-06, "loss": 0.3683, "step": 15750 }, { "epoch": 0.7305658627087198, "grad_norm": 10.560004234313965, "learning_rate": 1.7189766947530956e-06, "loss": 0.4541, "step": 15751 }, { "epoch": 0.7306122448979592, "grad_norm": 10.202157020568848, "learning_rate": 1.718421400394732e-06, "loss": 0.3881, "step": 15752 }, { "epoch": 0.7306586270871985, "grad_norm": 10.731575012207031, "learning_rate": 1.717866177130038e-06, "loss": 0.351, "step": 15753 }, { "epoch": 0.7307050092764379, "grad_norm": 3.9812862873077393, "learning_rate": 1.7173110249710422e-06, "loss": 0.3056, "step": 15754 }, { "epoch": 0.7307513914656771, "grad_norm": 8.935600280761719, "learning_rate": 1.7167559439297715e-06, "loss": 0.3565, "step": 15755 }, { "epoch": 0.7307977736549165, "grad_norm": 5.5819244384765625, "learning_rate": 1.716200934018253e-06, "loss": 0.2784, "step": 15756 }, { "epoch": 0.7308441558441559, "grad_norm": 10.07736587524414, "learning_rate": 1.7156459952485076e-06, "loss": 0.3112, "step": 15757 }, { "epoch": 0.7308905380333952, "grad_norm": 6.16192626953125, "learning_rate": 1.71509112763256e-06, "loss": 0.3975, "step": 15758 }, { "epoch": 0.7309369202226345, "grad_norm": 8.044673919677734, "learning_rate": 1.7145363311824298e-06, "loss": 0.302, "step": 15759 }, { "epoch": 0.7309833024118738, "grad_norm": 6.850918769836426, "learning_rate": 1.7139816059101372e-06, "loss": 0.2762, "step": 15760 }, { "epoch": 0.7310296846011132, "grad_norm": 8.095385551452637, "learning_rate": 1.713426951827702e-06, "loss": 0.3108, "step": 15761 }, { "epoch": 0.7310760667903525, "grad_norm": 5.319077968597412, "learning_rate": 1.712872368947136e-06, "loss": 0.2765, "step": 15762 }, { "epoch": 0.7311224489795919, "grad_norm": 7.38959264755249, "learning_rate": 1.7123178572804572e-06, "loss": 0.4091, "step": 15763 }, { "epoch": 0.7311688311688311, "grad_norm": 7.6973042488098145, "learning_rate": 1.7117634168396774e-06, "loss": 0.3152, "step": 15764 }, { "epoch": 0.7312152133580705, "grad_norm": 11.537092208862305, "learning_rate": 1.7112090476368088e-06, "loss": 0.3058, "step": 15765 }, { "epoch": 0.7312615955473099, "grad_norm": 12.031434059143066, "learning_rate": 1.7106547496838633e-06, "loss": 0.3717, "step": 15766 }, { "epoch": 0.7313079777365492, "grad_norm": 10.358441352844238, "learning_rate": 1.7101005229928464e-06, "loss": 0.4252, "step": 15767 }, { "epoch": 0.7313543599257885, "grad_norm": 10.19854736328125, "learning_rate": 1.7095463675757656e-06, "loss": 0.3361, "step": 15768 }, { "epoch": 0.7314007421150278, "grad_norm": 6.47571325302124, "learning_rate": 1.7089922834446283e-06, "loss": 0.3283, "step": 15769 }, { "epoch": 0.7314471243042672, "grad_norm": 6.427689075469971, "learning_rate": 1.7084382706114366e-06, "loss": 0.3466, "step": 15770 }, { "epoch": 0.7314935064935065, "grad_norm": 8.916808128356934, "learning_rate": 1.7078843290881952e-06, "loss": 0.3465, "step": 15771 }, { "epoch": 0.7315398886827458, "grad_norm": 4.816589832305908, "learning_rate": 1.7073304588869023e-06, "loss": 0.3475, "step": 15772 }, { "epoch": 0.7315862708719851, "grad_norm": 3.736348867416382, "learning_rate": 1.706776660019558e-06, "loss": 0.235, "step": 15773 }, { "epoch": 0.7316326530612245, "grad_norm": 5.909224987030029, "learning_rate": 1.70622293249816e-06, "loss": 0.2842, "step": 15774 }, { "epoch": 0.7316790352504638, "grad_norm": 9.142831802368164, "learning_rate": 1.7056692763347055e-06, "loss": 0.4579, "step": 15775 }, { "epoch": 0.7317254174397032, "grad_norm": 9.151667594909668, "learning_rate": 1.7051156915411897e-06, "loss": 0.3524, "step": 15776 }, { "epoch": 0.7317717996289425, "grad_norm": 10.851436614990234, "learning_rate": 1.7045621781296034e-06, "loss": 0.3095, "step": 15777 }, { "epoch": 0.7318181818181818, "grad_norm": 8.27274227142334, "learning_rate": 1.704008736111939e-06, "loss": 0.3307, "step": 15778 }, { "epoch": 0.7318645640074212, "grad_norm": 6.4785027503967285, "learning_rate": 1.7034553655001874e-06, "loss": 0.3498, "step": 15779 }, { "epoch": 0.7319109461966605, "grad_norm": 4.510815143585205, "learning_rate": 1.7029020663063361e-06, "loss": 0.2732, "step": 15780 }, { "epoch": 0.7319573283858998, "grad_norm": 5.803790092468262, "learning_rate": 1.7023488385423725e-06, "loss": 0.3566, "step": 15781 }, { "epoch": 0.7320037105751391, "grad_norm": 6.186988353729248, "learning_rate": 1.7017956822202836e-06, "loss": 0.3301, "step": 15782 }, { "epoch": 0.7320500927643785, "grad_norm": 8.269857406616211, "learning_rate": 1.7012425973520501e-06, "loss": 0.3547, "step": 15783 }, { "epoch": 0.7320964749536178, "grad_norm": 5.6302103996276855, "learning_rate": 1.7006895839496557e-06, "loss": 0.3506, "step": 15784 }, { "epoch": 0.7321428571428571, "grad_norm": 11.554031372070312, "learning_rate": 1.7001366420250815e-06, "loss": 0.3856, "step": 15785 }, { "epoch": 0.7321892393320965, "grad_norm": 12.970489501953125, "learning_rate": 1.6995837715903063e-06, "loss": 0.4161, "step": 15786 }, { "epoch": 0.7322356215213358, "grad_norm": 5.377418518066406, "learning_rate": 1.6990309726573095e-06, "loss": 0.3263, "step": 15787 }, { "epoch": 0.7322820037105752, "grad_norm": 5.184220790863037, "learning_rate": 1.6984782452380644e-06, "loss": 0.3579, "step": 15788 }, { "epoch": 0.7323283858998144, "grad_norm": 10.38497257232666, "learning_rate": 1.6979255893445462e-06, "loss": 0.4493, "step": 15789 }, { "epoch": 0.7323747680890538, "grad_norm": 6.184042453765869, "learning_rate": 1.6973730049887282e-06, "loss": 0.3144, "step": 15790 }, { "epoch": 0.7324211502782931, "grad_norm": 6.078895092010498, "learning_rate": 1.6968204921825826e-06, "loss": 0.2362, "step": 15791 }, { "epoch": 0.7324675324675325, "grad_norm": 10.198945999145508, "learning_rate": 1.6962680509380808e-06, "loss": 0.3533, "step": 15792 }, { "epoch": 0.7325139146567718, "grad_norm": 6.562265872955322, "learning_rate": 1.6957156812671865e-06, "loss": 0.2462, "step": 15793 }, { "epoch": 0.7325602968460111, "grad_norm": 11.569278717041016, "learning_rate": 1.6951633831818698e-06, "loss": 0.3832, "step": 15794 }, { "epoch": 0.7326066790352505, "grad_norm": 8.186466217041016, "learning_rate": 1.6946111566940953e-06, "loss": 0.3704, "step": 15795 }, { "epoch": 0.7326530612244898, "grad_norm": 5.961100101470947, "learning_rate": 1.6940590018158264e-06, "loss": 0.2552, "step": 15796 }, { "epoch": 0.7326994434137292, "grad_norm": 5.97376823425293, "learning_rate": 1.6935069185590274e-06, "loss": 0.3276, "step": 15797 }, { "epoch": 0.7327458256029684, "grad_norm": 11.160530090332031, "learning_rate": 1.6929549069356555e-06, "loss": 0.4572, "step": 15798 }, { "epoch": 0.7327922077922078, "grad_norm": 6.310128688812256, "learning_rate": 1.6924029669576707e-06, "loss": 0.3548, "step": 15799 }, { "epoch": 0.7328385899814471, "grad_norm": 5.144104957580566, "learning_rate": 1.6918510986370312e-06, "loss": 0.279, "step": 15800 }, { "epoch": 0.7328849721706865, "grad_norm": 6.078070163726807, "learning_rate": 1.6912993019856932e-06, "loss": 0.2165, "step": 15801 }, { "epoch": 0.7329313543599257, "grad_norm": 12.982288360595703, "learning_rate": 1.6907475770156124e-06, "loss": 0.3025, "step": 15802 }, { "epoch": 0.7329777365491651, "grad_norm": 7.236029148101807, "learning_rate": 1.6901959237387377e-06, "loss": 0.3062, "step": 15803 }, { "epoch": 0.7330241187384045, "grad_norm": 13.06286334991455, "learning_rate": 1.6896443421670228e-06, "loss": 0.2222, "step": 15804 }, { "epoch": 0.7330705009276438, "grad_norm": 4.916723251342773, "learning_rate": 1.6890928323124172e-06, "loss": 0.2824, "step": 15805 }, { "epoch": 0.7331168831168832, "grad_norm": 14.718070030212402, "learning_rate": 1.6885413941868688e-06, "loss": 0.2812, "step": 15806 }, { "epoch": 0.7331632653061224, "grad_norm": 7.416478633880615, "learning_rate": 1.6879900278023265e-06, "loss": 0.2385, "step": 15807 }, { "epoch": 0.7332096474953618, "grad_norm": 6.146012306213379, "learning_rate": 1.6874387331707315e-06, "loss": 0.2991, "step": 15808 }, { "epoch": 0.7332560296846011, "grad_norm": 6.257762908935547, "learning_rate": 1.6868875103040295e-06, "loss": 0.3355, "step": 15809 }, { "epoch": 0.7333024118738405, "grad_norm": 6.392212390899658, "learning_rate": 1.6863363592141618e-06, "loss": 0.391, "step": 15810 }, { "epoch": 0.7333487940630797, "grad_norm": 9.632699012756348, "learning_rate": 1.6857852799130692e-06, "loss": 0.3195, "step": 15811 }, { "epoch": 0.7333951762523191, "grad_norm": 6.310225009918213, "learning_rate": 1.6852342724126918e-06, "loss": 0.2729, "step": 15812 }, { "epoch": 0.7334415584415584, "grad_norm": 10.077559471130371, "learning_rate": 1.6846833367249643e-06, "loss": 0.3249, "step": 15813 }, { "epoch": 0.7334879406307978, "grad_norm": 5.805395603179932, "learning_rate": 1.684132472861823e-06, "loss": 0.1652, "step": 15814 }, { "epoch": 0.733534322820037, "grad_norm": 4.268463611602783, "learning_rate": 1.6835816808352034e-06, "loss": 0.3053, "step": 15815 }, { "epoch": 0.7335807050092764, "grad_norm": 11.652458190917969, "learning_rate": 1.6830309606570372e-06, "loss": 0.4331, "step": 15816 }, { "epoch": 0.7336270871985158, "grad_norm": 9.749664306640625, "learning_rate": 1.6824803123392574e-06, "loss": 0.3755, "step": 15817 }, { "epoch": 0.7336734693877551, "grad_norm": 8.218283653259277, "learning_rate": 1.6819297358937903e-06, "loss": 0.4061, "step": 15818 }, { "epoch": 0.7337198515769945, "grad_norm": 8.848753929138184, "learning_rate": 1.6813792313325655e-06, "loss": 0.285, "step": 15819 }, { "epoch": 0.7337662337662337, "grad_norm": 7.090798854827881, "learning_rate": 1.6808287986675087e-06, "loss": 0.3404, "step": 15820 }, { "epoch": 0.7338126159554731, "grad_norm": 4.503943920135498, "learning_rate": 1.6802784379105459e-06, "loss": 0.2383, "step": 15821 }, { "epoch": 0.7338589981447124, "grad_norm": 8.624853134155273, "learning_rate": 1.6797281490735995e-06, "loss": 0.2404, "step": 15822 }, { "epoch": 0.7339053803339518, "grad_norm": 6.711641311645508, "learning_rate": 1.679177932168593e-06, "loss": 0.3556, "step": 15823 }, { "epoch": 0.733951762523191, "grad_norm": 11.861875534057617, "learning_rate": 1.6786277872074437e-06, "loss": 0.3698, "step": 15824 }, { "epoch": 0.7339981447124304, "grad_norm": 6.205081939697266, "learning_rate": 1.6780777142020715e-06, "loss": 0.3397, "step": 15825 }, { "epoch": 0.7340445269016698, "grad_norm": 15.087646484375, "learning_rate": 1.6775277131643935e-06, "loss": 0.4977, "step": 15826 }, { "epoch": 0.7340909090909091, "grad_norm": 5.039133548736572, "learning_rate": 1.676977784106325e-06, "loss": 0.3172, "step": 15827 }, { "epoch": 0.7341372912801484, "grad_norm": 8.806367874145508, "learning_rate": 1.6764279270397815e-06, "loss": 0.3756, "step": 15828 }, { "epoch": 0.7341836734693877, "grad_norm": 4.272566795349121, "learning_rate": 1.675878141976673e-06, "loss": 0.2957, "step": 15829 }, { "epoch": 0.7342300556586271, "grad_norm": 13.95727825164795, "learning_rate": 1.6753284289289106e-06, "loss": 0.3016, "step": 15830 }, { "epoch": 0.7342764378478664, "grad_norm": 8.718847274780273, "learning_rate": 1.6747787879084044e-06, "loss": 0.2845, "step": 15831 }, { "epoch": 0.7343228200371058, "grad_norm": 3.6517672538757324, "learning_rate": 1.674229218927062e-06, "loss": 0.2338, "step": 15832 }, { "epoch": 0.734369202226345, "grad_norm": 3.9488320350646973, "learning_rate": 1.6736797219967892e-06, "loss": 0.2728, "step": 15833 }, { "epoch": 0.7344155844155844, "grad_norm": 11.63826847076416, "learning_rate": 1.6731302971294927e-06, "loss": 0.3353, "step": 15834 }, { "epoch": 0.7344619666048238, "grad_norm": 16.154951095581055, "learning_rate": 1.672580944337072e-06, "loss": 0.4019, "step": 15835 }, { "epoch": 0.7345083487940631, "grad_norm": 8.365323066711426, "learning_rate": 1.6720316636314298e-06, "loss": 0.2858, "step": 15836 }, { "epoch": 0.7345547309833024, "grad_norm": 4.995889186859131, "learning_rate": 1.6714824550244668e-06, "loss": 0.1984, "step": 15837 }, { "epoch": 0.7346011131725417, "grad_norm": 7.491534233093262, "learning_rate": 1.67093331852808e-06, "loss": 0.4019, "step": 15838 }, { "epoch": 0.7346474953617811, "grad_norm": 5.43312931060791, "learning_rate": 1.6703842541541693e-06, "loss": 0.2941, "step": 15839 }, { "epoch": 0.7346938775510204, "grad_norm": 21.677072525024414, "learning_rate": 1.6698352619146257e-06, "loss": 0.3844, "step": 15840 }, { "epoch": 0.7347402597402597, "grad_norm": 5.0809712409973145, "learning_rate": 1.6692863418213445e-06, "loss": 0.2651, "step": 15841 }, { "epoch": 0.734786641929499, "grad_norm": 5.984213829040527, "learning_rate": 1.6687374938862183e-06, "loss": 0.3111, "step": 15842 }, { "epoch": 0.7348330241187384, "grad_norm": 6.566982746124268, "learning_rate": 1.668188718121137e-06, "loss": 0.2335, "step": 15843 }, { "epoch": 0.7348794063079778, "grad_norm": 5.5034260749816895, "learning_rate": 1.6676400145379896e-06, "loss": 0.2654, "step": 15844 }, { "epoch": 0.734925788497217, "grad_norm": 6.345966339111328, "learning_rate": 1.6670913831486652e-06, "loss": 0.1678, "step": 15845 }, { "epoch": 0.7349721706864564, "grad_norm": 12.771505355834961, "learning_rate": 1.6665428239650467e-06, "loss": 0.3932, "step": 15846 }, { "epoch": 0.7350185528756957, "grad_norm": 6.690110683441162, "learning_rate": 1.66599433699902e-06, "loss": 0.2935, "step": 15847 }, { "epoch": 0.7350649350649351, "grad_norm": 4.948976993560791, "learning_rate": 1.665445922262467e-06, "loss": 0.2376, "step": 15848 }, { "epoch": 0.7351113172541744, "grad_norm": 14.191096305847168, "learning_rate": 1.6648975797672695e-06, "loss": 0.4495, "step": 15849 }, { "epoch": 0.7351576994434137, "grad_norm": 5.718350887298584, "learning_rate": 1.6643493095253082e-06, "loss": 0.3409, "step": 15850 }, { "epoch": 0.735204081632653, "grad_norm": 6.747403144836426, "learning_rate": 1.6638011115484587e-06, "loss": 0.3249, "step": 15851 }, { "epoch": 0.7352504638218924, "grad_norm": 4.8740386962890625, "learning_rate": 1.6632529858485979e-06, "loss": 0.2753, "step": 15852 }, { "epoch": 0.7352968460111318, "grad_norm": 10.153761863708496, "learning_rate": 1.6627049324376017e-06, "loss": 0.3359, "step": 15853 }, { "epoch": 0.735343228200371, "grad_norm": 8.370930671691895, "learning_rate": 1.6621569513273422e-06, "loss": 0.4365, "step": 15854 }, { "epoch": 0.7353896103896104, "grad_norm": 8.65590763092041, "learning_rate": 1.6616090425296938e-06, "loss": 0.384, "step": 15855 }, { "epoch": 0.7354359925788497, "grad_norm": 7.1164445877075195, "learning_rate": 1.6610612060565235e-06, "loss": 0.3557, "step": 15856 }, { "epoch": 0.7354823747680891, "grad_norm": 12.228798866271973, "learning_rate": 1.6605134419197005e-06, "loss": 0.3601, "step": 15857 }, { "epoch": 0.7355287569573283, "grad_norm": 6.826075553894043, "learning_rate": 1.6599657501310924e-06, "loss": 0.3685, "step": 15858 }, { "epoch": 0.7355751391465677, "grad_norm": 6.772283554077148, "learning_rate": 1.6594181307025648e-06, "loss": 0.3163, "step": 15859 }, { "epoch": 0.735621521335807, "grad_norm": 3.9247374534606934, "learning_rate": 1.6588705836459817e-06, "loss": 0.1463, "step": 15860 }, { "epoch": 0.7356679035250464, "grad_norm": 8.185108184814453, "learning_rate": 1.6583231089732066e-06, "loss": 0.3619, "step": 15861 }, { "epoch": 0.7357142857142858, "grad_norm": 4.757994174957275, "learning_rate": 1.657775706696097e-06, "loss": 0.3586, "step": 15862 }, { "epoch": 0.735760667903525, "grad_norm": 4.712655067443848, "learning_rate": 1.6572283768265136e-06, "loss": 0.2728, "step": 15863 }, { "epoch": 0.7358070500927644, "grad_norm": 4.117938995361328, "learning_rate": 1.6566811193763149e-06, "loss": 0.2206, "step": 15864 }, { "epoch": 0.7358534322820037, "grad_norm": 6.668024063110352, "learning_rate": 1.6561339343573558e-06, "loss": 0.2898, "step": 15865 }, { "epoch": 0.7358998144712431, "grad_norm": 7.172345161437988, "learning_rate": 1.6555868217814936e-06, "loss": 0.3043, "step": 15866 }, { "epoch": 0.7359461966604823, "grad_norm": 4.6954755783081055, "learning_rate": 1.6550397816605767e-06, "loss": 0.3044, "step": 15867 }, { "epoch": 0.7359925788497217, "grad_norm": 6.08615255355835, "learning_rate": 1.6544928140064586e-06, "loss": 0.2946, "step": 15868 }, { "epoch": 0.736038961038961, "grad_norm": 6.755830764770508, "learning_rate": 1.6539459188309892e-06, "loss": 0.2963, "step": 15869 }, { "epoch": 0.7360853432282004, "grad_norm": 7.78520393371582, "learning_rate": 1.6533990961460166e-06, "loss": 0.2672, "step": 15870 }, { "epoch": 0.7361317254174397, "grad_norm": 12.25139045715332, "learning_rate": 1.6528523459633877e-06, "loss": 0.4206, "step": 15871 }, { "epoch": 0.736178107606679, "grad_norm": 5.40480375289917, "learning_rate": 1.652305668294949e-06, "loss": 0.2267, "step": 15872 }, { "epoch": 0.7362244897959184, "grad_norm": 11.015548706054688, "learning_rate": 1.6517590631525403e-06, "loss": 0.3567, "step": 15873 }, { "epoch": 0.7362708719851577, "grad_norm": 7.334138870239258, "learning_rate": 1.6512125305480059e-06, "loss": 0.3723, "step": 15874 }, { "epoch": 0.7363172541743971, "grad_norm": 12.043066024780273, "learning_rate": 1.6506660704931854e-06, "loss": 0.3798, "step": 15875 }, { "epoch": 0.7363636363636363, "grad_norm": 8.338418960571289, "learning_rate": 1.6501196829999179e-06, "loss": 0.4258, "step": 15876 }, { "epoch": 0.7364100185528757, "grad_norm": 6.38321590423584, "learning_rate": 1.6495733680800425e-06, "loss": 0.2615, "step": 15877 }, { "epoch": 0.736456400742115, "grad_norm": 5.716351509094238, "learning_rate": 1.649027125745391e-06, "loss": 0.2867, "step": 15878 }, { "epoch": 0.7365027829313544, "grad_norm": 9.667384147644043, "learning_rate": 1.6484809560077992e-06, "loss": 0.347, "step": 15879 }, { "epoch": 0.7365491651205937, "grad_norm": 9.96623420715332, "learning_rate": 1.6479348588791e-06, "loss": 0.4521, "step": 15880 }, { "epoch": 0.736595547309833, "grad_norm": 5.740350246429443, "learning_rate": 1.6473888343711241e-06, "loss": 0.2948, "step": 15881 }, { "epoch": 0.7366419294990724, "grad_norm": 8.028773307800293, "learning_rate": 1.6468428824957006e-06, "loss": 0.2962, "step": 15882 }, { "epoch": 0.7366883116883117, "grad_norm": 6.796474456787109, "learning_rate": 1.646297003264659e-06, "loss": 0.2735, "step": 15883 }, { "epoch": 0.736734693877551, "grad_norm": 11.283792495727539, "learning_rate": 1.6457511966898225e-06, "loss": 0.4592, "step": 15884 }, { "epoch": 0.7367810760667903, "grad_norm": 9.603778839111328, "learning_rate": 1.6452054627830172e-06, "loss": 0.346, "step": 15885 }, { "epoch": 0.7368274582560297, "grad_norm": 6.6133646965026855, "learning_rate": 1.644659801556066e-06, "loss": 0.2803, "step": 15886 }, { "epoch": 0.736873840445269, "grad_norm": 6.802614212036133, "learning_rate": 1.6441142130207905e-06, "loss": 0.3774, "step": 15887 }, { "epoch": 0.7369202226345084, "grad_norm": 7.231820106506348, "learning_rate": 1.6435686971890119e-06, "loss": 0.3862, "step": 15888 }, { "epoch": 0.7369666048237477, "grad_norm": 5.125155448913574, "learning_rate": 1.643023254072546e-06, "loss": 0.3318, "step": 15889 }, { "epoch": 0.737012987012987, "grad_norm": 6.6062188148498535, "learning_rate": 1.64247788368321e-06, "loss": 0.3347, "step": 15890 }, { "epoch": 0.7370593692022264, "grad_norm": 6.809544086456299, "learning_rate": 1.6419325860328206e-06, "loss": 0.3646, "step": 15891 }, { "epoch": 0.7371057513914657, "grad_norm": 14.543106079101562, "learning_rate": 1.6413873611331899e-06, "loss": 0.358, "step": 15892 }, { "epoch": 0.737152133580705, "grad_norm": 8.519120216369629, "learning_rate": 1.6408422089961307e-06, "loss": 0.3032, "step": 15893 }, { "epoch": 0.7371985157699443, "grad_norm": 4.3431315422058105, "learning_rate": 1.6402971296334547e-06, "loss": 0.3299, "step": 15894 }, { "epoch": 0.7372448979591837, "grad_norm": 8.809181213378906, "learning_rate": 1.6397521230569679e-06, "loss": 0.3035, "step": 15895 }, { "epoch": 0.737291280148423, "grad_norm": 16.62639045715332, "learning_rate": 1.6392071892784789e-06, "loss": 0.3881, "step": 15896 }, { "epoch": 0.7373376623376623, "grad_norm": 8.108697891235352, "learning_rate": 1.6386623283097936e-06, "loss": 0.1377, "step": 15897 }, { "epoch": 0.7373840445269016, "grad_norm": 5.01264762878418, "learning_rate": 1.6381175401627164e-06, "loss": 0.2726, "step": 15898 }, { "epoch": 0.737430426716141, "grad_norm": 4.957647323608398, "learning_rate": 1.6375728248490507e-06, "loss": 0.2915, "step": 15899 }, { "epoch": 0.7374768089053804, "grad_norm": 6.2625861167907715, "learning_rate": 1.6370281823805945e-06, "loss": 0.3053, "step": 15900 }, { "epoch": 0.7375231910946196, "grad_norm": 7.333845615386963, "learning_rate": 1.636483612769149e-06, "loss": 0.288, "step": 15901 }, { "epoch": 0.737569573283859, "grad_norm": 5.3158674240112305, "learning_rate": 1.6359391160265127e-06, "loss": 0.2903, "step": 15902 }, { "epoch": 0.7376159554730983, "grad_norm": 10.266297340393066, "learning_rate": 1.6353946921644803e-06, "loss": 0.2777, "step": 15903 }, { "epoch": 0.7376623376623377, "grad_norm": 11.92724323272705, "learning_rate": 1.6348503411948474e-06, "loss": 0.4679, "step": 15904 }, { "epoch": 0.737708719851577, "grad_norm": 6.91645622253418, "learning_rate": 1.6343060631294083e-06, "loss": 0.3205, "step": 15905 }, { "epoch": 0.7377551020408163, "grad_norm": 5.783622741699219, "learning_rate": 1.6337618579799518e-06, "loss": 0.2807, "step": 15906 }, { "epoch": 0.7378014842300556, "grad_norm": 8.758933067321777, "learning_rate": 1.6332177257582693e-06, "loss": 0.2789, "step": 15907 }, { "epoch": 0.737847866419295, "grad_norm": 9.663614273071289, "learning_rate": 1.6326736664761488e-06, "loss": 0.347, "step": 15908 }, { "epoch": 0.7378942486085344, "grad_norm": 7.614235877990723, "learning_rate": 1.6321296801453772e-06, "loss": 0.244, "step": 15909 }, { "epoch": 0.7379406307977736, "grad_norm": 6.198366165161133, "learning_rate": 1.6315857667777412e-06, "loss": 0.3071, "step": 15910 }, { "epoch": 0.737987012987013, "grad_norm": 8.488960266113281, "learning_rate": 1.631041926385022e-06, "loss": 0.3195, "step": 15911 }, { "epoch": 0.7380333951762523, "grad_norm": 5.766088962554932, "learning_rate": 1.6304981589790015e-06, "loss": 0.2468, "step": 15912 }, { "epoch": 0.7380797773654917, "grad_norm": 8.91429328918457, "learning_rate": 1.6299544645714616e-06, "loss": 0.4255, "step": 15913 }, { "epoch": 0.7381261595547309, "grad_norm": 5.870405197143555, "learning_rate": 1.6294108431741812e-06, "loss": 0.3641, "step": 15914 }, { "epoch": 0.7381725417439703, "grad_norm": 9.049544334411621, "learning_rate": 1.6288672947989364e-06, "loss": 0.3671, "step": 15915 }, { "epoch": 0.7382189239332096, "grad_norm": 4.9377899169921875, "learning_rate": 1.6283238194575056e-06, "loss": 0.2511, "step": 15916 }, { "epoch": 0.738265306122449, "grad_norm": 8.416406631469727, "learning_rate": 1.6277804171616591e-06, "loss": 0.3683, "step": 15917 }, { "epoch": 0.7383116883116884, "grad_norm": 6.643168926239014, "learning_rate": 1.6272370879231709e-06, "loss": 0.3072, "step": 15918 }, { "epoch": 0.7383580705009276, "grad_norm": 5.754731178283691, "learning_rate": 1.6266938317538127e-06, "loss": 0.3359, "step": 15919 }, { "epoch": 0.738404452690167, "grad_norm": 8.02757740020752, "learning_rate": 1.6261506486653534e-06, "loss": 0.3829, "step": 15920 }, { "epoch": 0.7384508348794063, "grad_norm": 5.70985746383667, "learning_rate": 1.625607538669562e-06, "loss": 0.339, "step": 15921 }, { "epoch": 0.7384972170686457, "grad_norm": 4.91707181930542, "learning_rate": 1.6250645017782024e-06, "loss": 0.2625, "step": 15922 }, { "epoch": 0.7385435992578849, "grad_norm": 6.099146366119385, "learning_rate": 1.6245215380030399e-06, "loss": 0.3928, "step": 15923 }, { "epoch": 0.7385899814471243, "grad_norm": 7.276057720184326, "learning_rate": 1.6239786473558377e-06, "loss": 0.3534, "step": 15924 }, { "epoch": 0.7386363636363636, "grad_norm": 12.028719902038574, "learning_rate": 1.6234358298483578e-06, "loss": 0.3788, "step": 15925 }, { "epoch": 0.738682745825603, "grad_norm": 7.401732921600342, "learning_rate": 1.6228930854923597e-06, "loss": 0.3416, "step": 15926 }, { "epoch": 0.7387291280148423, "grad_norm": 7.770900249481201, "learning_rate": 1.6223504142996032e-06, "loss": 0.2891, "step": 15927 }, { "epoch": 0.7387755102040816, "grad_norm": 5.034055709838867, "learning_rate": 1.6218078162818418e-06, "loss": 0.2577, "step": 15928 }, { "epoch": 0.738821892393321, "grad_norm": 6.03687047958374, "learning_rate": 1.621265291450832e-06, "loss": 0.243, "step": 15929 }, { "epoch": 0.7388682745825603, "grad_norm": 5.853480815887451, "learning_rate": 1.6207228398183278e-06, "loss": 0.2884, "step": 15930 }, { "epoch": 0.7389146567717997, "grad_norm": 12.620205879211426, "learning_rate": 1.6201804613960808e-06, "loss": 0.3639, "step": 15931 }, { "epoch": 0.7389610389610389, "grad_norm": 5.909432411193848, "learning_rate": 1.6196381561958436e-06, "loss": 0.3716, "step": 15932 }, { "epoch": 0.7390074211502783, "grad_norm": 8.889660835266113, "learning_rate": 1.6190959242293608e-06, "loss": 0.3638, "step": 15933 }, { "epoch": 0.7390538033395176, "grad_norm": 9.504194259643555, "learning_rate": 1.6185537655083817e-06, "loss": 0.3737, "step": 15934 }, { "epoch": 0.739100185528757, "grad_norm": 4.763086795806885, "learning_rate": 1.618011680044651e-06, "loss": 0.2866, "step": 15935 }, { "epoch": 0.7391465677179963, "grad_norm": 5.687345027923584, "learning_rate": 1.617469667849914e-06, "loss": 0.2882, "step": 15936 }, { "epoch": 0.7391929499072356, "grad_norm": 8.852476119995117, "learning_rate": 1.6169277289359142e-06, "loss": 0.3839, "step": 15937 }, { "epoch": 0.739239332096475, "grad_norm": 9.561975479125977, "learning_rate": 1.6163858633143892e-06, "loss": 0.3465, "step": 15938 }, { "epoch": 0.7392857142857143, "grad_norm": 11.315506935119629, "learning_rate": 1.6158440709970796e-06, "loss": 0.3937, "step": 15939 }, { "epoch": 0.7393320964749536, "grad_norm": 5.971685886383057, "learning_rate": 1.6153023519957234e-06, "loss": 0.268, "step": 15940 }, { "epoch": 0.7393784786641929, "grad_norm": 8.056876182556152, "learning_rate": 1.6147607063220567e-06, "loss": 0.3449, "step": 15941 }, { "epoch": 0.7394248608534323, "grad_norm": 5.260931015014648, "learning_rate": 1.6142191339878132e-06, "loss": 0.287, "step": 15942 }, { "epoch": 0.7394712430426716, "grad_norm": 7.878138065338135, "learning_rate": 1.6136776350047283e-06, "loss": 0.3404, "step": 15943 }, { "epoch": 0.7395176252319109, "grad_norm": 18.018672943115234, "learning_rate": 1.6131362093845299e-06, "loss": 0.3997, "step": 15944 }, { "epoch": 0.7395640074211502, "grad_norm": 5.940571308135986, "learning_rate": 1.6125948571389489e-06, "loss": 0.2869, "step": 15945 }, { "epoch": 0.7396103896103896, "grad_norm": 6.72491455078125, "learning_rate": 1.6120535782797137e-06, "loss": 0.3317, "step": 15946 }, { "epoch": 0.739656771799629, "grad_norm": 9.119181632995605, "learning_rate": 1.6115123728185512e-06, "loss": 0.2964, "step": 15947 }, { "epoch": 0.7397031539888683, "grad_norm": 11.917197227478027, "learning_rate": 1.6109712407671867e-06, "loss": 0.3188, "step": 15948 }, { "epoch": 0.7397495361781076, "grad_norm": 8.874134063720703, "learning_rate": 1.6104301821373414e-06, "loss": 0.2862, "step": 15949 }, { "epoch": 0.7397959183673469, "grad_norm": 9.871623039245605, "learning_rate": 1.6098891969407387e-06, "loss": 0.2269, "step": 15950 }, { "epoch": 0.7398423005565863, "grad_norm": 7.708014011383057, "learning_rate": 1.6093482851890985e-06, "loss": 0.2369, "step": 15951 }, { "epoch": 0.7398886827458256, "grad_norm": 5.286078929901123, "learning_rate": 1.6088074468941388e-06, "loss": 0.2795, "step": 15952 }, { "epoch": 0.7399350649350649, "grad_norm": 7.053030490875244, "learning_rate": 1.6082666820675773e-06, "loss": 0.2959, "step": 15953 }, { "epoch": 0.7399814471243042, "grad_norm": 12.397361755371094, "learning_rate": 1.6077259907211312e-06, "loss": 0.3061, "step": 15954 }, { "epoch": 0.7400278293135436, "grad_norm": 6.287917137145996, "learning_rate": 1.60718537286651e-06, "loss": 0.2943, "step": 15955 }, { "epoch": 0.740074211502783, "grad_norm": 5.018837928771973, "learning_rate": 1.6066448285154284e-06, "loss": 0.294, "step": 15956 }, { "epoch": 0.7401205936920222, "grad_norm": 8.856204986572266, "learning_rate": 1.606104357679597e-06, "loss": 0.3437, "step": 15957 }, { "epoch": 0.7401669758812616, "grad_norm": 7.96539306640625, "learning_rate": 1.6055639603707246e-06, "loss": 0.405, "step": 15958 }, { "epoch": 0.7402133580705009, "grad_norm": 4.872636795043945, "learning_rate": 1.6050236366005202e-06, "loss": 0.2226, "step": 15959 }, { "epoch": 0.7402597402597403, "grad_norm": 10.475170135498047, "learning_rate": 1.6044833863806864e-06, "loss": 0.2818, "step": 15960 }, { "epoch": 0.7403061224489796, "grad_norm": 9.943902969360352, "learning_rate": 1.603943209722929e-06, "loss": 0.3669, "step": 15961 }, { "epoch": 0.7403525046382189, "grad_norm": 7.050788879394531, "learning_rate": 1.6034031066389504e-06, "loss": 0.3088, "step": 15962 }, { "epoch": 0.7403988868274582, "grad_norm": 8.919761657714844, "learning_rate": 1.6028630771404524e-06, "loss": 0.3326, "step": 15963 }, { "epoch": 0.7404452690166976, "grad_norm": 6.68282413482666, "learning_rate": 1.6023231212391344e-06, "loss": 0.3376, "step": 15964 }, { "epoch": 0.740491651205937, "grad_norm": 8.281858444213867, "learning_rate": 1.6017832389466947e-06, "loss": 0.4586, "step": 15965 }, { "epoch": 0.7405380333951762, "grad_norm": 4.691678524017334, "learning_rate": 1.6012434302748275e-06, "loss": 0.2482, "step": 15966 }, { "epoch": 0.7405844155844156, "grad_norm": 6.515200614929199, "learning_rate": 1.600703695235229e-06, "loss": 0.3101, "step": 15967 }, { "epoch": 0.7406307977736549, "grad_norm": 6.102892875671387, "learning_rate": 1.6001640338395918e-06, "loss": 0.2967, "step": 15968 }, { "epoch": 0.7406771799628943, "grad_norm": 8.794112205505371, "learning_rate": 1.5996244460996075e-06, "loss": 0.4103, "step": 15969 }, { "epoch": 0.7407235621521335, "grad_norm": 7.825911045074463, "learning_rate": 1.5990849320269675e-06, "loss": 0.3607, "step": 15970 }, { "epoch": 0.7407699443413729, "grad_norm": 8.184603691101074, "learning_rate": 1.5985454916333576e-06, "loss": 0.3085, "step": 15971 }, { "epoch": 0.7408163265306122, "grad_norm": 4.882091522216797, "learning_rate": 1.5980061249304652e-06, "loss": 0.2867, "step": 15972 }, { "epoch": 0.7408627087198516, "grad_norm": 9.025298118591309, "learning_rate": 1.5974668319299757e-06, "loss": 0.4187, "step": 15973 }, { "epoch": 0.740909090909091, "grad_norm": 8.975695610046387, "learning_rate": 1.5969276126435724e-06, "loss": 0.36, "step": 15974 }, { "epoch": 0.7409554730983302, "grad_norm": 5.380104064941406, "learning_rate": 1.5963884670829377e-06, "loss": 0.3144, "step": 15975 }, { "epoch": 0.7410018552875696, "grad_norm": 8.616042137145996, "learning_rate": 1.5958493952597536e-06, "loss": 0.3256, "step": 15976 }, { "epoch": 0.7410482374768089, "grad_norm": 5.91636848449707, "learning_rate": 1.5953103971856947e-06, "loss": 0.2451, "step": 15977 }, { "epoch": 0.7410946196660483, "grad_norm": 13.557506561279297, "learning_rate": 1.5947714728724406e-06, "loss": 0.417, "step": 15978 }, { "epoch": 0.7411410018552875, "grad_norm": 4.8850908279418945, "learning_rate": 1.5942326223316662e-06, "loss": 0.2397, "step": 15979 }, { "epoch": 0.7411873840445269, "grad_norm": 9.41298770904541, "learning_rate": 1.5936938455750457e-06, "loss": 0.3557, "step": 15980 }, { "epoch": 0.7412337662337662, "grad_norm": 8.96844482421875, "learning_rate": 1.5931551426142532e-06, "loss": 0.2868, "step": 15981 }, { "epoch": 0.7412801484230056, "grad_norm": 9.252370834350586, "learning_rate": 1.592616513460956e-06, "loss": 0.4164, "step": 15982 }, { "epoch": 0.7413265306122448, "grad_norm": 7.609886169433594, "learning_rate": 1.5920779581268248e-06, "loss": 0.3073, "step": 15983 }, { "epoch": 0.7413729128014842, "grad_norm": 7.542266845703125, "learning_rate": 1.591539476623527e-06, "loss": 0.364, "step": 15984 }, { "epoch": 0.7414192949907236, "grad_norm": 6.75490140914917, "learning_rate": 1.5910010689627287e-06, "loss": 0.3877, "step": 15985 }, { "epoch": 0.7414656771799629, "grad_norm": 5.202127456665039, "learning_rate": 1.590462735156094e-06, "loss": 0.2616, "step": 15986 }, { "epoch": 0.7415120593692023, "grad_norm": 11.676618576049805, "learning_rate": 1.5899244752152882e-06, "loss": 0.3406, "step": 15987 }, { "epoch": 0.7415584415584415, "grad_norm": 6.169849872589111, "learning_rate": 1.589386289151968e-06, "loss": 0.3125, "step": 15988 }, { "epoch": 0.7416048237476809, "grad_norm": 10.682243347167969, "learning_rate": 1.5888481769777947e-06, "loss": 0.4301, "step": 15989 }, { "epoch": 0.7416512059369202, "grad_norm": 9.241864204406738, "learning_rate": 1.5883101387044263e-06, "loss": 0.37, "step": 15990 }, { "epoch": 0.7416975881261596, "grad_norm": 11.12331485748291, "learning_rate": 1.5877721743435192e-06, "loss": 0.4825, "step": 15991 }, { "epoch": 0.7417439703153988, "grad_norm": 8.312155723571777, "learning_rate": 1.5872342839067305e-06, "loss": 0.2438, "step": 15992 }, { "epoch": 0.7417903525046382, "grad_norm": 9.304909706115723, "learning_rate": 1.5866964674057089e-06, "loss": 0.3341, "step": 15993 }, { "epoch": 0.7418367346938776, "grad_norm": 6.715232849121094, "learning_rate": 1.5861587248521083e-06, "loss": 0.3335, "step": 15994 }, { "epoch": 0.7418831168831169, "grad_norm": 12.094268798828125, "learning_rate": 1.585621056257578e-06, "loss": 0.3793, "step": 15995 }, { "epoch": 0.7419294990723562, "grad_norm": 16.03969955444336, "learning_rate": 1.585083461633767e-06, "loss": 0.4991, "step": 15996 }, { "epoch": 0.7419758812615955, "grad_norm": 6.82885217666626, "learning_rate": 1.584545940992321e-06, "loss": 0.3867, "step": 15997 }, { "epoch": 0.7420222634508349, "grad_norm": 11.31033992767334, "learning_rate": 1.5840084943448874e-06, "loss": 0.4721, "step": 15998 }, { "epoch": 0.7420686456400742, "grad_norm": 4.104026794433594, "learning_rate": 1.5834711217031067e-06, "loss": 0.2999, "step": 15999 }, { "epoch": 0.7421150278293135, "grad_norm": 6.4007110595703125, "learning_rate": 1.5829338230786223e-06, "loss": 0.3883, "step": 16000 }, { "epoch": 0.7421614100185528, "grad_norm": 7.290597915649414, "learning_rate": 1.5823965984830741e-06, "loss": 0.3799, "step": 16001 }, { "epoch": 0.7422077922077922, "grad_norm": 4.232919692993164, "learning_rate": 1.5818594479281008e-06, "loss": 0.312, "step": 16002 }, { "epoch": 0.7422541743970316, "grad_norm": 5.090105056762695, "learning_rate": 1.5813223714253418e-06, "loss": 0.3495, "step": 16003 }, { "epoch": 0.7423005565862709, "grad_norm": 5.251026630401611, "learning_rate": 1.5807853689864284e-06, "loss": 0.2868, "step": 16004 }, { "epoch": 0.7423469387755102, "grad_norm": 4.714338302612305, "learning_rate": 1.5802484406229968e-06, "loss": 0.3029, "step": 16005 }, { "epoch": 0.7423933209647495, "grad_norm": 12.33938980102539, "learning_rate": 1.579711586346679e-06, "loss": 0.4379, "step": 16006 }, { "epoch": 0.7424397031539889, "grad_norm": 10.664671897888184, "learning_rate": 1.5791748061691054e-06, "loss": 0.3688, "step": 16007 }, { "epoch": 0.7424860853432282, "grad_norm": 5.25559663772583, "learning_rate": 1.5786381001019052e-06, "loss": 0.3542, "step": 16008 }, { "epoch": 0.7425324675324675, "grad_norm": 7.172380447387695, "learning_rate": 1.578101468156708e-06, "loss": 0.3384, "step": 16009 }, { "epoch": 0.7425788497217068, "grad_norm": 6.695189952850342, "learning_rate": 1.5775649103451362e-06, "loss": 0.3479, "step": 16010 }, { "epoch": 0.7426252319109462, "grad_norm": 7.018637180328369, "learning_rate": 1.577028426678815e-06, "loss": 0.3845, "step": 16011 }, { "epoch": 0.7426716141001856, "grad_norm": 4.9488325119018555, "learning_rate": 1.5764920171693676e-06, "loss": 0.3355, "step": 16012 }, { "epoch": 0.7427179962894248, "grad_norm": 7.722365379333496, "learning_rate": 1.575955681828415e-06, "loss": 0.3858, "step": 16013 }, { "epoch": 0.7427643784786642, "grad_norm": 10.066731452941895, "learning_rate": 1.5754194206675783e-06, "loss": 0.3666, "step": 16014 }, { "epoch": 0.7428107606679035, "grad_norm": 6.631546974182129, "learning_rate": 1.574883233698472e-06, "loss": 0.3816, "step": 16015 }, { "epoch": 0.7428571428571429, "grad_norm": 7.669765949249268, "learning_rate": 1.5743471209327132e-06, "loss": 0.2857, "step": 16016 }, { "epoch": 0.7429035250463822, "grad_norm": 9.16629695892334, "learning_rate": 1.573811082381918e-06, "loss": 0.3202, "step": 16017 }, { "epoch": 0.7429499072356215, "grad_norm": 13.899392127990723, "learning_rate": 1.5732751180576982e-06, "loss": 0.4434, "step": 16018 }, { "epoch": 0.7429962894248608, "grad_norm": 4.769471645355225, "learning_rate": 1.5727392279716674e-06, "loss": 0.2492, "step": 16019 }, { "epoch": 0.7430426716141002, "grad_norm": 10.446074485778809, "learning_rate": 1.5722034121354317e-06, "loss": 0.2935, "step": 16020 }, { "epoch": 0.7430890538033396, "grad_norm": 7.381304740905762, "learning_rate": 1.571667670560601e-06, "loss": 0.3542, "step": 16021 }, { "epoch": 0.7431354359925788, "grad_norm": 8.153448104858398, "learning_rate": 1.5711320032587824e-06, "loss": 0.2942, "step": 16022 }, { "epoch": 0.7431818181818182, "grad_norm": 5.919966220855713, "learning_rate": 1.5705964102415799e-06, "loss": 0.3171, "step": 16023 }, { "epoch": 0.7432282003710575, "grad_norm": 6.271772861480713, "learning_rate": 1.5700608915205978e-06, "loss": 0.2479, "step": 16024 }, { "epoch": 0.7432745825602969, "grad_norm": 8.678763389587402, "learning_rate": 1.569525447107439e-06, "loss": 0.2218, "step": 16025 }, { "epoch": 0.7433209647495361, "grad_norm": 7.403059005737305, "learning_rate": 1.5689900770137002e-06, "loss": 0.3532, "step": 16026 }, { "epoch": 0.7433673469387755, "grad_norm": 4.9088921546936035, "learning_rate": 1.5684547812509814e-06, "loss": 0.2512, "step": 16027 }, { "epoch": 0.7434137291280148, "grad_norm": 6.117295265197754, "learning_rate": 1.56791955983088e-06, "loss": 0.3058, "step": 16028 }, { "epoch": 0.7434601113172542, "grad_norm": 7.431445598602295, "learning_rate": 1.5673844127649913e-06, "loss": 0.2957, "step": 16029 }, { "epoch": 0.7435064935064936, "grad_norm": 37.01104736328125, "learning_rate": 1.5668493400649105e-06, "loss": 0.4368, "step": 16030 }, { "epoch": 0.7435528756957328, "grad_norm": 9.650932312011719, "learning_rate": 1.5663143417422261e-06, "loss": 0.3355, "step": 16031 }, { "epoch": 0.7435992578849722, "grad_norm": 5.113925457000732, "learning_rate": 1.56577941780853e-06, "loss": 0.3185, "step": 16032 }, { "epoch": 0.7436456400742115, "grad_norm": 4.254446983337402, "learning_rate": 1.5652445682754119e-06, "loss": 0.2595, "step": 16033 }, { "epoch": 0.7436920222634509, "grad_norm": 5.988715648651123, "learning_rate": 1.5647097931544587e-06, "loss": 0.4193, "step": 16034 }, { "epoch": 0.7437384044526901, "grad_norm": 8.449236869812012, "learning_rate": 1.5641750924572558e-06, "loss": 0.3712, "step": 16035 }, { "epoch": 0.7437847866419295, "grad_norm": 5.625718593597412, "learning_rate": 1.563640466195389e-06, "loss": 0.3543, "step": 16036 }, { "epoch": 0.7438311688311688, "grad_norm": 6.5426506996154785, "learning_rate": 1.5631059143804372e-06, "loss": 0.3431, "step": 16037 }, { "epoch": 0.7438775510204082, "grad_norm": 5.170525550842285, "learning_rate": 1.562571437023983e-06, "loss": 0.3466, "step": 16038 }, { "epoch": 0.7439239332096474, "grad_norm": 5.478084564208984, "learning_rate": 1.5620370341376057e-06, "loss": 0.2936, "step": 16039 }, { "epoch": 0.7439703153988868, "grad_norm": 4.236057758331299, "learning_rate": 1.561502705732883e-06, "loss": 0.2283, "step": 16040 }, { "epoch": 0.7440166975881262, "grad_norm": 3.3016159534454346, "learning_rate": 1.560968451821392e-06, "loss": 0.1666, "step": 16041 }, { "epoch": 0.7440630797773655, "grad_norm": 13.844131469726562, "learning_rate": 1.5604342724147037e-06, "loss": 0.3549, "step": 16042 }, { "epoch": 0.7441094619666048, "grad_norm": 6.049601078033447, "learning_rate": 1.5599001675243923e-06, "loss": 0.3347, "step": 16043 }, { "epoch": 0.7441558441558441, "grad_norm": 11.833928108215332, "learning_rate": 1.55936613716203e-06, "loss": 0.6132, "step": 16044 }, { "epoch": 0.7442022263450835, "grad_norm": 9.364726066589355, "learning_rate": 1.5588321813391854e-06, "loss": 0.3828, "step": 16045 }, { "epoch": 0.7442486085343228, "grad_norm": 6.087822437286377, "learning_rate": 1.5582983000674267e-06, "loss": 0.2802, "step": 16046 }, { "epoch": 0.7442949907235622, "grad_norm": 6.787117958068848, "learning_rate": 1.5577644933583218e-06, "loss": 0.4223, "step": 16047 }, { "epoch": 0.7443413729128014, "grad_norm": 7.850216865539551, "learning_rate": 1.5572307612234316e-06, "loss": 0.3296, "step": 16048 }, { "epoch": 0.7443877551020408, "grad_norm": 8.910086631774902, "learning_rate": 1.556697103674321e-06, "loss": 0.3576, "step": 16049 }, { "epoch": 0.7444341372912802, "grad_norm": 7.156993389129639, "learning_rate": 1.5561635207225518e-06, "loss": 0.3178, "step": 16050 }, { "epoch": 0.7444805194805195, "grad_norm": 5.793269157409668, "learning_rate": 1.5556300123796836e-06, "loss": 0.1515, "step": 16051 }, { "epoch": 0.7445269016697588, "grad_norm": 13.037405014038086, "learning_rate": 1.5550965786572758e-06, "loss": 0.4272, "step": 16052 }, { "epoch": 0.7445732838589981, "grad_norm": 5.162050724029541, "learning_rate": 1.5545632195668819e-06, "loss": 0.2465, "step": 16053 }, { "epoch": 0.7446196660482375, "grad_norm": 8.786874771118164, "learning_rate": 1.5540299351200588e-06, "loss": 0.3476, "step": 16054 }, { "epoch": 0.7446660482374768, "grad_norm": 19.660310745239258, "learning_rate": 1.5534967253283594e-06, "loss": 0.2925, "step": 16055 }, { "epoch": 0.7447124304267161, "grad_norm": 11.993256568908691, "learning_rate": 1.5529635902033358e-06, "loss": 0.511, "step": 16056 }, { "epoch": 0.7447588126159554, "grad_norm": 8.661980628967285, "learning_rate": 1.5524305297565396e-06, "loss": 0.3382, "step": 16057 }, { "epoch": 0.7448051948051948, "grad_norm": 7.554565906524658, "learning_rate": 1.5518975439995154e-06, "loss": 0.3322, "step": 16058 }, { "epoch": 0.7448515769944342, "grad_norm": 15.37641429901123, "learning_rate": 1.5513646329438125e-06, "loss": 0.4409, "step": 16059 }, { "epoch": 0.7448979591836735, "grad_norm": 14.525775909423828, "learning_rate": 1.5508317966009762e-06, "loss": 0.4783, "step": 16060 }, { "epoch": 0.7449443413729128, "grad_norm": 10.204345703125, "learning_rate": 1.5502990349825498e-06, "loss": 0.4681, "step": 16061 }, { "epoch": 0.7449907235621521, "grad_norm": 7.702067852020264, "learning_rate": 1.5497663481000764e-06, "loss": 0.4218, "step": 16062 }, { "epoch": 0.7450371057513915, "grad_norm": 5.5843329429626465, "learning_rate": 1.5492337359650939e-06, "loss": 0.2922, "step": 16063 }, { "epoch": 0.7450834879406308, "grad_norm": 11.313249588012695, "learning_rate": 1.5487011985891425e-06, "loss": 0.3982, "step": 16064 }, { "epoch": 0.7451298701298701, "grad_norm": 5.351332664489746, "learning_rate": 1.548168735983759e-06, "loss": 0.328, "step": 16065 }, { "epoch": 0.7451762523191094, "grad_norm": 9.534218788146973, "learning_rate": 1.5476363481604794e-06, "loss": 0.3744, "step": 16066 }, { "epoch": 0.7452226345083488, "grad_norm": 7.861250877380371, "learning_rate": 1.5471040351308392e-06, "loss": 0.3563, "step": 16067 }, { "epoch": 0.7452690166975882, "grad_norm": 5.134917259216309, "learning_rate": 1.5465717969063665e-06, "loss": 0.3213, "step": 16068 }, { "epoch": 0.7453153988868274, "grad_norm": 9.592799186706543, "learning_rate": 1.546039633498595e-06, "loss": 0.3281, "step": 16069 }, { "epoch": 0.7453617810760668, "grad_norm": 10.713543891906738, "learning_rate": 1.545507544919053e-06, "loss": 0.3328, "step": 16070 }, { "epoch": 0.7454081632653061, "grad_norm": 6.990283012390137, "learning_rate": 1.544975531179268e-06, "loss": 0.3495, "step": 16071 }, { "epoch": 0.7454545454545455, "grad_norm": 10.49656867980957, "learning_rate": 1.5444435922907669e-06, "loss": 0.296, "step": 16072 }, { "epoch": 0.7455009276437848, "grad_norm": 6.121609210968018, "learning_rate": 1.5439117282650717e-06, "loss": 0.2943, "step": 16073 }, { "epoch": 0.7455473098330241, "grad_norm": 10.51643180847168, "learning_rate": 1.543379939113706e-06, "loss": 0.3734, "step": 16074 }, { "epoch": 0.7455936920222634, "grad_norm": 7.982515811920166, "learning_rate": 1.5428482248481907e-06, "loss": 0.4213, "step": 16075 }, { "epoch": 0.7456400742115028, "grad_norm": 7.336598873138428, "learning_rate": 1.5423165854800449e-06, "loss": 0.3091, "step": 16076 }, { "epoch": 0.7456864564007422, "grad_norm": 3.6314897537231445, "learning_rate": 1.541785021020788e-06, "loss": 0.302, "step": 16077 }, { "epoch": 0.7457328385899814, "grad_norm": 7.071107387542725, "learning_rate": 1.5412535314819332e-06, "loss": 0.3555, "step": 16078 }, { "epoch": 0.7457792207792208, "grad_norm": 7.1695756912231445, "learning_rate": 1.540722116874997e-06, "loss": 0.3375, "step": 16079 }, { "epoch": 0.7458256029684601, "grad_norm": 10.55467414855957, "learning_rate": 1.5401907772114906e-06, "loss": 0.3879, "step": 16080 }, { "epoch": 0.7458719851576995, "grad_norm": 6.7541117668151855, "learning_rate": 1.5396595125029268e-06, "loss": 0.3475, "step": 16081 }, { "epoch": 0.7459183673469387, "grad_norm": 7.379032611846924, "learning_rate": 1.5391283227608156e-06, "loss": 0.431, "step": 16082 }, { "epoch": 0.7459647495361781, "grad_norm": 13.392474174499512, "learning_rate": 1.5385972079966627e-06, "loss": 0.298, "step": 16083 }, { "epoch": 0.7460111317254174, "grad_norm": 11.335290908813477, "learning_rate": 1.5380661682219756e-06, "loss": 0.3021, "step": 16084 }, { "epoch": 0.7460575139146568, "grad_norm": 7.798476696014404, "learning_rate": 1.5375352034482583e-06, "loss": 0.3726, "step": 16085 }, { "epoch": 0.7461038961038962, "grad_norm": 7.805659294128418, "learning_rate": 1.537004313687015e-06, "loss": 0.2571, "step": 16086 }, { "epoch": 0.7461502782931354, "grad_norm": 5.860493183135986, "learning_rate": 1.5364734989497486e-06, "loss": 0.3513, "step": 16087 }, { "epoch": 0.7461966604823748, "grad_norm": 4.771552562713623, "learning_rate": 1.5359427592479553e-06, "loss": 0.3007, "step": 16088 }, { "epoch": 0.7462430426716141, "grad_norm": 6.275014400482178, "learning_rate": 1.535412094593135e-06, "loss": 0.2742, "step": 16089 }, { "epoch": 0.7462894248608535, "grad_norm": 8.368678092956543, "learning_rate": 1.534881504996784e-06, "loss": 0.2744, "step": 16090 }, { "epoch": 0.7463358070500927, "grad_norm": 15.315863609313965, "learning_rate": 1.534350990470398e-06, "loss": 0.4445, "step": 16091 }, { "epoch": 0.7463821892393321, "grad_norm": 7.9268999099731445, "learning_rate": 1.533820551025471e-06, "loss": 0.3621, "step": 16092 }, { "epoch": 0.7464285714285714, "grad_norm": 15.877212524414062, "learning_rate": 1.5332901866734922e-06, "loss": 0.4897, "step": 16093 }, { "epoch": 0.7464749536178108, "grad_norm": 8.289528846740723, "learning_rate": 1.5327598974259528e-06, "loss": 0.3038, "step": 16094 }, { "epoch": 0.74652133580705, "grad_norm": 12.71425724029541, "learning_rate": 1.5322296832943417e-06, "loss": 0.3377, "step": 16095 }, { "epoch": 0.7465677179962894, "grad_norm": 7.756003379821777, "learning_rate": 1.531699544290145e-06, "loss": 0.3629, "step": 16096 }, { "epoch": 0.7466141001855288, "grad_norm": 5.7147908210754395, "learning_rate": 1.5311694804248506e-06, "loss": 0.37, "step": 16097 }, { "epoch": 0.7466604823747681, "grad_norm": 7.885610103607178, "learning_rate": 1.5306394917099377e-06, "loss": 0.3545, "step": 16098 }, { "epoch": 0.7467068645640074, "grad_norm": 9.37878131866455, "learning_rate": 1.5301095781568904e-06, "loss": 0.458, "step": 16099 }, { "epoch": 0.7467532467532467, "grad_norm": 5.627737998962402, "learning_rate": 1.529579739777189e-06, "loss": 0.3004, "step": 16100 }, { "epoch": 0.7467996289424861, "grad_norm": 12.438268661499023, "learning_rate": 1.529049976582312e-06, "loss": 0.3276, "step": 16101 }, { "epoch": 0.7468460111317254, "grad_norm": 6.0521697998046875, "learning_rate": 1.528520288583738e-06, "loss": 0.1737, "step": 16102 }, { "epoch": 0.7468923933209648, "grad_norm": 6.820705890655518, "learning_rate": 1.5279906757929397e-06, "loss": 0.2943, "step": 16103 }, { "epoch": 0.746938775510204, "grad_norm": 9.745841979980469, "learning_rate": 1.5274611382213922e-06, "loss": 0.3797, "step": 16104 }, { "epoch": 0.7469851576994434, "grad_norm": 5.939493656158447, "learning_rate": 1.526931675880567e-06, "loss": 0.2952, "step": 16105 }, { "epoch": 0.7470315398886828, "grad_norm": 11.126537322998047, "learning_rate": 1.5264022887819357e-06, "loss": 0.4561, "step": 16106 }, { "epoch": 0.7470779220779221, "grad_norm": 6.13828182220459, "learning_rate": 1.5258729769369678e-06, "loss": 0.2872, "step": 16107 }, { "epoch": 0.7471243042671614, "grad_norm": 10.618786811828613, "learning_rate": 1.525343740357128e-06, "loss": 0.2921, "step": 16108 }, { "epoch": 0.7471706864564007, "grad_norm": 6.945199012756348, "learning_rate": 1.5248145790538838e-06, "loss": 0.2865, "step": 16109 }, { "epoch": 0.7472170686456401, "grad_norm": 5.352663040161133, "learning_rate": 1.5242854930386986e-06, "loss": 0.3163, "step": 16110 }, { "epoch": 0.7472634508348794, "grad_norm": 10.8623046875, "learning_rate": 1.5237564823230345e-06, "loss": 0.4239, "step": 16111 }, { "epoch": 0.7473098330241187, "grad_norm": 16.23731231689453, "learning_rate": 1.5232275469183549e-06, "loss": 0.4469, "step": 16112 }, { "epoch": 0.747356215213358, "grad_norm": 8.262227058410645, "learning_rate": 1.5226986868361148e-06, "loss": 0.2958, "step": 16113 }, { "epoch": 0.7474025974025974, "grad_norm": 6.642194747924805, "learning_rate": 1.5221699020877735e-06, "loss": 0.2624, "step": 16114 }, { "epoch": 0.7474489795918368, "grad_norm": 14.573140144348145, "learning_rate": 1.5216411926847869e-06, "loss": 0.354, "step": 16115 }, { "epoch": 0.7474953617810761, "grad_norm": 9.486635208129883, "learning_rate": 1.5211125586386094e-06, "loss": 0.314, "step": 16116 }, { "epoch": 0.7475417439703154, "grad_norm": 6.733719348907471, "learning_rate": 1.520583999960693e-06, "loss": 0.3133, "step": 16117 }, { "epoch": 0.7475881261595547, "grad_norm": 9.484049797058105, "learning_rate": 1.5200555166624908e-06, "loss": 0.3635, "step": 16118 }, { "epoch": 0.7476345083487941, "grad_norm": 4.671146392822266, "learning_rate": 1.519527108755449e-06, "loss": 0.3461, "step": 16119 }, { "epoch": 0.7476808905380334, "grad_norm": 5.844361782073975, "learning_rate": 1.5189987762510167e-06, "loss": 0.2017, "step": 16120 }, { "epoch": 0.7477272727272727, "grad_norm": 11.750391960144043, "learning_rate": 1.5184705191606396e-06, "loss": 0.3052, "step": 16121 }, { "epoch": 0.747773654916512, "grad_norm": 5.082418441772461, "learning_rate": 1.5179423374957625e-06, "loss": 0.3024, "step": 16122 }, { "epoch": 0.7478200371057514, "grad_norm": 10.90063762664795, "learning_rate": 1.5174142312678296e-06, "loss": 0.3637, "step": 16123 }, { "epoch": 0.7478664192949908, "grad_norm": 5.351265907287598, "learning_rate": 1.5168862004882789e-06, "loss": 0.2784, "step": 16124 }, { "epoch": 0.74791280148423, "grad_norm": 5.5152692794799805, "learning_rate": 1.5163582451685516e-06, "loss": 0.33, "step": 16125 }, { "epoch": 0.7479591836734694, "grad_norm": 7.463006496429443, "learning_rate": 1.5158303653200852e-06, "loss": 0.308, "step": 16126 }, { "epoch": 0.7480055658627087, "grad_norm": 8.305582046508789, "learning_rate": 1.5153025609543165e-06, "loss": 0.3739, "step": 16127 }, { "epoch": 0.7480519480519481, "grad_norm": 8.207822799682617, "learning_rate": 1.51477483208268e-06, "loss": 0.3464, "step": 16128 }, { "epoch": 0.7480983302411874, "grad_norm": 8.237739562988281, "learning_rate": 1.5142471787166097e-06, "loss": 0.3271, "step": 16129 }, { "epoch": 0.7481447124304267, "grad_norm": 12.518924713134766, "learning_rate": 1.5137196008675343e-06, "loss": 0.4228, "step": 16130 }, { "epoch": 0.748191094619666, "grad_norm": 5.5841474533081055, "learning_rate": 1.513192098546885e-06, "loss": 0.356, "step": 16131 }, { "epoch": 0.7482374768089054, "grad_norm": 10.750137329101562, "learning_rate": 1.5126646717660898e-06, "loss": 0.3143, "step": 16132 }, { "epoch": 0.7482838589981448, "grad_norm": 6.938471794128418, "learning_rate": 1.5121373205365752e-06, "loss": 0.2369, "step": 16133 }, { "epoch": 0.748330241187384, "grad_norm": 4.932369232177734, "learning_rate": 1.5116100448697674e-06, "loss": 0.365, "step": 16134 }, { "epoch": 0.7483766233766234, "grad_norm": 10.357246398925781, "learning_rate": 1.5110828447770865e-06, "loss": 0.384, "step": 16135 }, { "epoch": 0.7484230055658627, "grad_norm": 7.81122350692749, "learning_rate": 1.510555720269955e-06, "loss": 0.2772, "step": 16136 }, { "epoch": 0.7484693877551021, "grad_norm": 10.737099647521973, "learning_rate": 1.5100286713597938e-06, "loss": 0.3688, "step": 16137 }, { "epoch": 0.7485157699443413, "grad_norm": 5.137218952178955, "learning_rate": 1.5095016980580206e-06, "loss": 0.3454, "step": 16138 }, { "epoch": 0.7485621521335807, "grad_norm": 5.73159646987915, "learning_rate": 1.5089748003760523e-06, "loss": 0.3135, "step": 16139 }, { "epoch": 0.74860853432282, "grad_norm": 6.5597920417785645, "learning_rate": 1.5084479783253047e-06, "loss": 0.2537, "step": 16140 }, { "epoch": 0.7486549165120594, "grad_norm": 11.125333786010742, "learning_rate": 1.5079212319171887e-06, "loss": 0.368, "step": 16141 }, { "epoch": 0.7487012987012988, "grad_norm": 10.943915367126465, "learning_rate": 1.507394561163118e-06, "loss": 0.3641, "step": 16142 }, { "epoch": 0.748747680890538, "grad_norm": 8.594012260437012, "learning_rate": 1.5068679660745012e-06, "loss": 0.3725, "step": 16143 }, { "epoch": 0.7487940630797774, "grad_norm": 14.319497108459473, "learning_rate": 1.5063414466627475e-06, "loss": 0.4813, "step": 16144 }, { "epoch": 0.7488404452690167, "grad_norm": 7.88875150680542, "learning_rate": 1.5058150029392654e-06, "loss": 0.366, "step": 16145 }, { "epoch": 0.7488868274582561, "grad_norm": 7.46537971496582, "learning_rate": 1.5052886349154566e-06, "loss": 0.3446, "step": 16146 }, { "epoch": 0.7489332096474953, "grad_norm": 5.500491619110107, "learning_rate": 1.5047623426027264e-06, "loss": 0.3032, "step": 16147 }, { "epoch": 0.7489795918367347, "grad_norm": 8.608563423156738, "learning_rate": 1.5042361260124767e-06, "loss": 0.3822, "step": 16148 }, { "epoch": 0.749025974025974, "grad_norm": 7.073931694030762, "learning_rate": 1.5037099851561071e-06, "loss": 0.3114, "step": 16149 }, { "epoch": 0.7490723562152134, "grad_norm": 15.356565475463867, "learning_rate": 1.5031839200450166e-06, "loss": 0.5944, "step": 16150 }, { "epoch": 0.7491187384044526, "grad_norm": 5.433272361755371, "learning_rate": 1.5026579306906036e-06, "loss": 0.2661, "step": 16151 }, { "epoch": 0.749165120593692, "grad_norm": 8.413525581359863, "learning_rate": 1.5021320171042608e-06, "loss": 0.3338, "step": 16152 }, { "epoch": 0.7492115027829314, "grad_norm": 7.85790491104126, "learning_rate": 1.5016061792973825e-06, "loss": 0.3138, "step": 16153 }, { "epoch": 0.7492578849721707, "grad_norm": 8.650532722473145, "learning_rate": 1.5010804172813614e-06, "loss": 0.318, "step": 16154 }, { "epoch": 0.74930426716141, "grad_norm": 4.7686967849731445, "learning_rate": 1.5005547310675872e-06, "loss": 0.2616, "step": 16155 }, { "epoch": 0.7493506493506493, "grad_norm": 7.456780910491943, "learning_rate": 1.5000291206674512e-06, "loss": 0.3923, "step": 16156 }, { "epoch": 0.7493970315398887, "grad_norm": 7.236611843109131, "learning_rate": 1.4995035860923358e-06, "loss": 0.3492, "step": 16157 }, { "epoch": 0.749443413729128, "grad_norm": 7.688713550567627, "learning_rate": 1.4989781273536296e-06, "loss": 0.3771, "step": 16158 }, { "epoch": 0.7494897959183674, "grad_norm": 7.007968902587891, "learning_rate": 1.498452744462715e-06, "loss": 0.342, "step": 16159 }, { "epoch": 0.7495361781076066, "grad_norm": 9.177534103393555, "learning_rate": 1.4979274374309754e-06, "loss": 0.4494, "step": 16160 }, { "epoch": 0.749582560296846, "grad_norm": 10.23956298828125, "learning_rate": 1.4974022062697908e-06, "loss": 0.3922, "step": 16161 }, { "epoch": 0.7496289424860854, "grad_norm": 4.63613748550415, "learning_rate": 1.496877050990541e-06, "loss": 0.341, "step": 16162 }, { "epoch": 0.7496753246753247, "grad_norm": 7.153128147125244, "learning_rate": 1.496351971604601e-06, "loss": 0.373, "step": 16163 }, { "epoch": 0.749721706864564, "grad_norm": 26.04082679748535, "learning_rate": 1.4958269681233472e-06, "loss": 0.4336, "step": 16164 }, { "epoch": 0.7497680890538033, "grad_norm": 7.1920647621154785, "learning_rate": 1.4953020405581542e-06, "loss": 0.3597, "step": 16165 }, { "epoch": 0.7498144712430427, "grad_norm": 7.824403762817383, "learning_rate": 1.4947771889203937e-06, "loss": 0.275, "step": 16166 }, { "epoch": 0.749860853432282, "grad_norm": 7.190554618835449, "learning_rate": 1.4942524132214387e-06, "loss": 0.3836, "step": 16167 }, { "epoch": 0.7499072356215213, "grad_norm": 6.506980895996094, "learning_rate": 1.4937277134726542e-06, "loss": 0.3671, "step": 16168 }, { "epoch": 0.7499536178107606, "grad_norm": 5.445619106292725, "learning_rate": 1.4932030896854094e-06, "loss": 0.4466, "step": 16169 }, { "epoch": 0.75, "grad_norm": 10.20026683807373, "learning_rate": 1.49267854187107e-06, "loss": 0.3815, "step": 16170 }, { "epoch": 0.7500463821892394, "grad_norm": 13.901036262512207, "learning_rate": 1.4921540700409997e-06, "loss": 0.4473, "step": 16171 }, { "epoch": 0.7500927643784787, "grad_norm": 4.808570861816406, "learning_rate": 1.4916296742065616e-06, "loss": 0.2505, "step": 16172 }, { "epoch": 0.750139146567718, "grad_norm": 5.336522579193115, "learning_rate": 1.4911053543791181e-06, "loss": 0.2741, "step": 16173 }, { "epoch": 0.7501855287569573, "grad_norm": 5.6869401931762695, "learning_rate": 1.4905811105700246e-06, "loss": 0.3512, "step": 16174 }, { "epoch": 0.7502319109461967, "grad_norm": 7.309281349182129, "learning_rate": 1.4900569427906396e-06, "loss": 0.2312, "step": 16175 }, { "epoch": 0.750278293135436, "grad_norm": 10.160917282104492, "learning_rate": 1.4895328510523205e-06, "loss": 0.4682, "step": 16176 }, { "epoch": 0.7503246753246753, "grad_norm": 6.421428203582764, "learning_rate": 1.4890088353664201e-06, "loss": 0.2733, "step": 16177 }, { "epoch": 0.7503710575139146, "grad_norm": 6.133551120758057, "learning_rate": 1.4884848957442933e-06, "loss": 0.281, "step": 16178 }, { "epoch": 0.750417439703154, "grad_norm": 7.007096290588379, "learning_rate": 1.4879610321972876e-06, "loss": 0.2372, "step": 16179 }, { "epoch": 0.7504638218923934, "grad_norm": 4.586915016174316, "learning_rate": 1.4874372447367536e-06, "loss": 0.3177, "step": 16180 }, { "epoch": 0.7505102040816326, "grad_norm": 7.793444633483887, "learning_rate": 1.4869135333740387e-06, "loss": 0.2928, "step": 16181 }, { "epoch": 0.750556586270872, "grad_norm": 7.57494592666626, "learning_rate": 1.4863898981204894e-06, "loss": 0.2935, "step": 16182 }, { "epoch": 0.7506029684601113, "grad_norm": 5.309382438659668, "learning_rate": 1.4858663389874512e-06, "loss": 0.2513, "step": 16183 }, { "epoch": 0.7506493506493507, "grad_norm": 7.119933605194092, "learning_rate": 1.4853428559862637e-06, "loss": 0.2988, "step": 16184 }, { "epoch": 0.75069573283859, "grad_norm": 7.1892547607421875, "learning_rate": 1.4848194491282697e-06, "loss": 0.3174, "step": 16185 }, { "epoch": 0.7507421150278293, "grad_norm": 7.446829319000244, "learning_rate": 1.4842961184248078e-06, "loss": 0.3339, "step": 16186 }, { "epoch": 0.7507884972170686, "grad_norm": 9.470573425292969, "learning_rate": 1.4837728638872168e-06, "loss": 0.3664, "step": 16187 }, { "epoch": 0.750834879406308, "grad_norm": 5.398582935333252, "learning_rate": 1.4832496855268314e-06, "loss": 0.1736, "step": 16188 }, { "epoch": 0.7508812615955474, "grad_norm": 6.166609764099121, "learning_rate": 1.4827265833549887e-06, "loss": 0.3715, "step": 16189 }, { "epoch": 0.7509276437847866, "grad_norm": 6.927923679351807, "learning_rate": 1.482203557383018e-06, "loss": 0.298, "step": 16190 }, { "epoch": 0.750974025974026, "grad_norm": 5.747769832611084, "learning_rate": 1.4816806076222512e-06, "loss": 0.3645, "step": 16191 }, { "epoch": 0.7510204081632653, "grad_norm": 7.100261688232422, "learning_rate": 1.4811577340840188e-06, "loss": 0.3037, "step": 16192 }, { "epoch": 0.7510667903525047, "grad_norm": 9.140618324279785, "learning_rate": 1.4806349367796474e-06, "loss": 0.3594, "step": 16193 }, { "epoch": 0.7511131725417439, "grad_norm": 8.597038269042969, "learning_rate": 1.480112215720466e-06, "loss": 0.3846, "step": 16194 }, { "epoch": 0.7511595547309833, "grad_norm": 9.362665176391602, "learning_rate": 1.4795895709177954e-06, "loss": 0.368, "step": 16195 }, { "epoch": 0.7512059369202226, "grad_norm": 6.418790817260742, "learning_rate": 1.4790670023829596e-06, "loss": 0.3316, "step": 16196 }, { "epoch": 0.751252319109462, "grad_norm": 4.054913520812988, "learning_rate": 1.4785445101272805e-06, "loss": 0.281, "step": 16197 }, { "epoch": 0.7512987012987012, "grad_norm": 8.526872634887695, "learning_rate": 1.4780220941620765e-06, "loss": 0.3612, "step": 16198 }, { "epoch": 0.7513450834879406, "grad_norm": 8.09709358215332, "learning_rate": 1.477499754498667e-06, "loss": 0.4071, "step": 16199 }, { "epoch": 0.75139146567718, "grad_norm": 9.155434608459473, "learning_rate": 1.4769774911483686e-06, "loss": 0.3247, "step": 16200 }, { "epoch": 0.7514378478664193, "grad_norm": 9.933859825134277, "learning_rate": 1.4764553041224926e-06, "loss": 0.2803, "step": 16201 }, { "epoch": 0.7514842300556587, "grad_norm": 12.668100357055664, "learning_rate": 1.4759331934323546e-06, "loss": 0.2685, "step": 16202 }, { "epoch": 0.7515306122448979, "grad_norm": 5.078458309173584, "learning_rate": 1.475411159089265e-06, "loss": 0.3119, "step": 16203 }, { "epoch": 0.7515769944341373, "grad_norm": 11.495583534240723, "learning_rate": 1.474889201104534e-06, "loss": 0.333, "step": 16204 }, { "epoch": 0.7516233766233766, "grad_norm": 14.146890640258789, "learning_rate": 1.4743673194894702e-06, "loss": 0.4939, "step": 16205 }, { "epoch": 0.751669758812616, "grad_norm": 4.518834590911865, "learning_rate": 1.4738455142553776e-06, "loss": 0.2149, "step": 16206 }, { "epoch": 0.7517161410018552, "grad_norm": 6.716654300689697, "learning_rate": 1.4733237854135619e-06, "loss": 0.3647, "step": 16207 }, { "epoch": 0.7517625231910946, "grad_norm": 11.67017936706543, "learning_rate": 1.472802132975326e-06, "loss": 0.4311, "step": 16208 }, { "epoch": 0.751808905380334, "grad_norm": 6.2358503341674805, "learning_rate": 1.4722805569519721e-06, "loss": 0.3501, "step": 16209 }, { "epoch": 0.7518552875695733, "grad_norm": 7.389636993408203, "learning_rate": 1.471759057354799e-06, "loss": 0.3529, "step": 16210 }, { "epoch": 0.7519016697588126, "grad_norm": 5.7051100730896, "learning_rate": 1.4712376341951061e-06, "loss": 0.3628, "step": 16211 }, { "epoch": 0.7519480519480519, "grad_norm": 6.63035774230957, "learning_rate": 1.4707162874841874e-06, "loss": 0.2712, "step": 16212 }, { "epoch": 0.7519944341372913, "grad_norm": 9.903483390808105, "learning_rate": 1.470195017233339e-06, "loss": 0.4726, "step": 16213 }, { "epoch": 0.7520408163265306, "grad_norm": 13.034546852111816, "learning_rate": 1.4696738234538537e-06, "loss": 0.3914, "step": 16214 }, { "epoch": 0.75208719851577, "grad_norm": 6.593905448913574, "learning_rate": 1.4691527061570226e-06, "loss": 0.2499, "step": 16215 }, { "epoch": 0.7521335807050092, "grad_norm": 6.005300521850586, "learning_rate": 1.4686316653541377e-06, "loss": 0.2329, "step": 16216 }, { "epoch": 0.7521799628942486, "grad_norm": 15.663982391357422, "learning_rate": 1.4681107010564837e-06, "loss": 0.4167, "step": 16217 }, { "epoch": 0.752226345083488, "grad_norm": 6.377286434173584, "learning_rate": 1.4675898132753485e-06, "loss": 0.3293, "step": 16218 }, { "epoch": 0.7522727272727273, "grad_norm": 5.9382829666137695, "learning_rate": 1.4670690020220168e-06, "loss": 0.4, "step": 16219 }, { "epoch": 0.7523191094619666, "grad_norm": 7.442427635192871, "learning_rate": 1.4665482673077712e-06, "loss": 0.2189, "step": 16220 }, { "epoch": 0.7523654916512059, "grad_norm": 9.881628036499023, "learning_rate": 1.4660276091438947e-06, "loss": 0.3549, "step": 16221 }, { "epoch": 0.7524118738404453, "grad_norm": 7.996914863586426, "learning_rate": 1.4655070275416667e-06, "loss": 0.3681, "step": 16222 }, { "epoch": 0.7524582560296846, "grad_norm": 6.9032416343688965, "learning_rate": 1.4649865225123639e-06, "loss": 0.2451, "step": 16223 }, { "epoch": 0.7525046382189239, "grad_norm": 6.974484443664551, "learning_rate": 1.4644660940672628e-06, "loss": 0.2069, "step": 16224 }, { "epoch": 0.7525510204081632, "grad_norm": 4.982539653778076, "learning_rate": 1.4639457422176396e-06, "loss": 0.2802, "step": 16225 }, { "epoch": 0.7525974025974026, "grad_norm": 5.755831241607666, "learning_rate": 1.463425466974766e-06, "loss": 0.3673, "step": 16226 }, { "epoch": 0.752643784786642, "grad_norm": 5.412654876708984, "learning_rate": 1.4629052683499168e-06, "loss": 0.3272, "step": 16227 }, { "epoch": 0.7526901669758813, "grad_norm": 6.96217679977417, "learning_rate": 1.4623851463543576e-06, "loss": 0.3895, "step": 16228 }, { "epoch": 0.7527365491651206, "grad_norm": 8.871870040893555, "learning_rate": 1.4618651009993578e-06, "loss": 0.3511, "step": 16229 }, { "epoch": 0.7527829313543599, "grad_norm": 4.642805099487305, "learning_rate": 1.4613451322961847e-06, "loss": 0.3241, "step": 16230 }, { "epoch": 0.7528293135435993, "grad_norm": 9.180688858032227, "learning_rate": 1.4608252402561029e-06, "loss": 0.3814, "step": 16231 }, { "epoch": 0.7528756957328386, "grad_norm": 17.723175048828125, "learning_rate": 1.4603054248903752e-06, "loss": 0.5133, "step": 16232 }, { "epoch": 0.7529220779220779, "grad_norm": 12.003628730773926, "learning_rate": 1.4597856862102655e-06, "loss": 0.4295, "step": 16233 }, { "epoch": 0.7529684601113172, "grad_norm": 9.709342002868652, "learning_rate": 1.4592660242270302e-06, "loss": 0.4, "step": 16234 }, { "epoch": 0.7530148423005566, "grad_norm": 3.9824531078338623, "learning_rate": 1.4587464389519285e-06, "loss": 0.3244, "step": 16235 }, { "epoch": 0.753061224489796, "grad_norm": 5.099942207336426, "learning_rate": 1.4582269303962176e-06, "loss": 0.2407, "step": 16236 }, { "epoch": 0.7531076066790352, "grad_norm": 6.280825138092041, "learning_rate": 1.4577074985711521e-06, "loss": 0.3712, "step": 16237 }, { "epoch": 0.7531539888682746, "grad_norm": 5.944682598114014, "learning_rate": 1.457188143487987e-06, "loss": 0.2479, "step": 16238 }, { "epoch": 0.7532003710575139, "grad_norm": 5.718667507171631, "learning_rate": 1.4566688651579702e-06, "loss": 0.3735, "step": 16239 }, { "epoch": 0.7532467532467533, "grad_norm": 7.837695598602295, "learning_rate": 1.4561496635923538e-06, "loss": 0.3528, "step": 16240 }, { "epoch": 0.7532931354359926, "grad_norm": 5.612842559814453, "learning_rate": 1.4556305388023862e-06, "loss": 0.2924, "step": 16241 }, { "epoch": 0.7533395176252319, "grad_norm": 5.623172283172607, "learning_rate": 1.455111490799313e-06, "loss": 0.2395, "step": 16242 }, { "epoch": 0.7533858998144712, "grad_norm": 8.527421951293945, "learning_rate": 1.4545925195943794e-06, "loss": 0.2969, "step": 16243 }, { "epoch": 0.7534322820037106, "grad_norm": 7.342255115509033, "learning_rate": 1.4540736251988307e-06, "loss": 0.3528, "step": 16244 }, { "epoch": 0.75347866419295, "grad_norm": 6.19268274307251, "learning_rate": 1.4535548076239054e-06, "loss": 0.4323, "step": 16245 }, { "epoch": 0.7535250463821892, "grad_norm": 12.798996925354004, "learning_rate": 1.453036066880844e-06, "loss": 0.1612, "step": 16246 }, { "epoch": 0.7535714285714286, "grad_norm": 3.707110643386841, "learning_rate": 1.4525174029808858e-06, "loss": 0.2469, "step": 16247 }, { "epoch": 0.7536178107606679, "grad_norm": 4.788053512573242, "learning_rate": 1.4519988159352665e-06, "loss": 0.2645, "step": 16248 }, { "epoch": 0.7536641929499073, "grad_norm": 3.5301671028137207, "learning_rate": 1.4514803057552235e-06, "loss": 0.1975, "step": 16249 }, { "epoch": 0.7537105751391465, "grad_norm": 5.839006423950195, "learning_rate": 1.4509618724519864e-06, "loss": 0.3179, "step": 16250 }, { "epoch": 0.7537569573283859, "grad_norm": 20.287630081176758, "learning_rate": 1.4504435160367886e-06, "loss": 0.3561, "step": 16251 }, { "epoch": 0.7538033395176252, "grad_norm": 22.18220329284668, "learning_rate": 1.4499252365208593e-06, "loss": 0.4681, "step": 16252 }, { "epoch": 0.7538497217068646, "grad_norm": 4.836007595062256, "learning_rate": 1.4494070339154275e-06, "loss": 0.3159, "step": 16253 }, { "epoch": 0.7538961038961038, "grad_norm": 9.495627403259277, "learning_rate": 1.4488889082317198e-06, "loss": 0.3842, "step": 16254 }, { "epoch": 0.7539424860853432, "grad_norm": 9.033285140991211, "learning_rate": 1.448370859480962e-06, "loss": 0.4231, "step": 16255 }, { "epoch": 0.7539888682745826, "grad_norm": 6.335140228271484, "learning_rate": 1.4478528876743753e-06, "loss": 0.3191, "step": 16256 }, { "epoch": 0.7540352504638219, "grad_norm": 7.397958278656006, "learning_rate": 1.4473349928231818e-06, "loss": 0.3145, "step": 16257 }, { "epoch": 0.7540816326530613, "grad_norm": 6.183131217956543, "learning_rate": 1.4468171749386018e-06, "loss": 0.2803, "step": 16258 }, { "epoch": 0.7541280148423005, "grad_norm": 5.9509172439575195, "learning_rate": 1.4462994340318536e-06, "loss": 0.2998, "step": 16259 }, { "epoch": 0.7541743970315399, "grad_norm": 3.4022045135498047, "learning_rate": 1.4457817701141558e-06, "loss": 0.2552, "step": 16260 }, { "epoch": 0.7542207792207792, "grad_norm": 4.265206336975098, "learning_rate": 1.4452641831967191e-06, "loss": 0.2194, "step": 16261 }, { "epoch": 0.7542671614100186, "grad_norm": 4.95969820022583, "learning_rate": 1.4447466732907594e-06, "loss": 0.284, "step": 16262 }, { "epoch": 0.7543135435992578, "grad_norm": 6.596691131591797, "learning_rate": 1.4442292404074875e-06, "loss": 0.3144, "step": 16263 }, { "epoch": 0.7543599257884972, "grad_norm": 9.584611892700195, "learning_rate": 1.4437118845581138e-06, "loss": 0.4241, "step": 16264 }, { "epoch": 0.7544063079777366, "grad_norm": 5.586363792419434, "learning_rate": 1.4431946057538464e-06, "loss": 0.2651, "step": 16265 }, { "epoch": 0.7544526901669759, "grad_norm": 4.810415267944336, "learning_rate": 1.4426774040058932e-06, "loss": 0.3431, "step": 16266 }, { "epoch": 0.7544990723562152, "grad_norm": 10.345351219177246, "learning_rate": 1.442160279325457e-06, "loss": 0.4057, "step": 16267 }, { "epoch": 0.7545454545454545, "grad_norm": 9.635937690734863, "learning_rate": 1.4416432317237406e-06, "loss": 0.2704, "step": 16268 }, { "epoch": 0.7545918367346939, "grad_norm": 6.404027462005615, "learning_rate": 1.4411262612119475e-06, "loss": 0.2702, "step": 16269 }, { "epoch": 0.7546382189239332, "grad_norm": 6.611382007598877, "learning_rate": 1.4406093678012767e-06, "loss": 0.3245, "step": 16270 }, { "epoch": 0.7546846011131726, "grad_norm": 6.446726322174072, "learning_rate": 1.4400925515029279e-06, "loss": 0.3093, "step": 16271 }, { "epoch": 0.7547309833024118, "grad_norm": 11.319543838500977, "learning_rate": 1.4395758123280951e-06, "loss": 0.3997, "step": 16272 }, { "epoch": 0.7547773654916512, "grad_norm": 10.889981269836426, "learning_rate": 1.4390591502879742e-06, "loss": 0.4742, "step": 16273 }, { "epoch": 0.7548237476808906, "grad_norm": 5.746304988861084, "learning_rate": 1.4385425653937585e-06, "loss": 0.3037, "step": 16274 }, { "epoch": 0.7548701298701299, "grad_norm": 7.444263458251953, "learning_rate": 1.4380260576566396e-06, "loss": 0.3368, "step": 16275 }, { "epoch": 0.7549165120593692, "grad_norm": 8.55510139465332, "learning_rate": 1.4375096270878092e-06, "loss": 0.3007, "step": 16276 }, { "epoch": 0.7549628942486085, "grad_norm": 8.745448112487793, "learning_rate": 1.4369932736984522e-06, "loss": 0.3727, "step": 16277 }, { "epoch": 0.7550092764378479, "grad_norm": 8.590798377990723, "learning_rate": 1.4364769974997567e-06, "loss": 0.3957, "step": 16278 }, { "epoch": 0.7550556586270872, "grad_norm": 7.339339256286621, "learning_rate": 1.4359607985029072e-06, "loss": 0.2549, "step": 16279 }, { "epoch": 0.7551020408163265, "grad_norm": 6.368767261505127, "learning_rate": 1.4354446767190873e-06, "loss": 0.3374, "step": 16280 }, { "epoch": 0.7551484230055658, "grad_norm": 6.527986526489258, "learning_rate": 1.4349286321594786e-06, "loss": 0.3916, "step": 16281 }, { "epoch": 0.7551948051948052, "grad_norm": 4.729674816131592, "learning_rate": 1.434412664835262e-06, "loss": 0.3951, "step": 16282 }, { "epoch": 0.7552411873840446, "grad_norm": 4.997734069824219, "learning_rate": 1.4338967747576126e-06, "loss": 0.2654, "step": 16283 }, { "epoch": 0.7552875695732839, "grad_norm": 5.045416831970215, "learning_rate": 1.4333809619377093e-06, "loss": 0.2806, "step": 16284 }, { "epoch": 0.7553339517625232, "grad_norm": 7.250051975250244, "learning_rate": 1.4328652263867255e-06, "loss": 0.3439, "step": 16285 }, { "epoch": 0.7553803339517625, "grad_norm": 7.258068084716797, "learning_rate": 1.4323495681158356e-06, "loss": 0.3017, "step": 16286 }, { "epoch": 0.7554267161410019, "grad_norm": 5.566572666168213, "learning_rate": 1.431833987136212e-06, "loss": 0.1668, "step": 16287 }, { "epoch": 0.7554730983302412, "grad_norm": 10.932184219360352, "learning_rate": 1.4313184834590215e-06, "loss": 0.4827, "step": 16288 }, { "epoch": 0.7555194805194805, "grad_norm": 7.962629795074463, "learning_rate": 1.4308030570954334e-06, "loss": 0.2384, "step": 16289 }, { "epoch": 0.7555658627087198, "grad_norm": 3.21364688873291, "learning_rate": 1.4302877080566152e-06, "loss": 0.1572, "step": 16290 }, { "epoch": 0.7556122448979592, "grad_norm": 6.5357866287231445, "learning_rate": 1.4297724363537307e-06, "loss": 0.285, "step": 16291 }, { "epoch": 0.7556586270871986, "grad_norm": 5.34498405456543, "learning_rate": 1.429257241997943e-06, "loss": 0.3702, "step": 16292 }, { "epoch": 0.7557050092764378, "grad_norm": 7.484835624694824, "learning_rate": 1.4287421250004157e-06, "loss": 0.2993, "step": 16293 }, { "epoch": 0.7557513914656772, "grad_norm": 9.339872360229492, "learning_rate": 1.4282270853723047e-06, "loss": 0.4438, "step": 16294 }, { "epoch": 0.7557977736549165, "grad_norm": 10.689891815185547, "learning_rate": 1.4277121231247704e-06, "loss": 0.4174, "step": 16295 }, { "epoch": 0.7558441558441559, "grad_norm": 9.96930980682373, "learning_rate": 1.4271972382689685e-06, "loss": 0.4315, "step": 16296 }, { "epoch": 0.7558905380333952, "grad_norm": 8.078261375427246, "learning_rate": 1.4266824308160538e-06, "loss": 0.3534, "step": 16297 }, { "epoch": 0.7559369202226345, "grad_norm": 10.607032775878906, "learning_rate": 1.4261677007771812e-06, "loss": 0.3859, "step": 16298 }, { "epoch": 0.7559833024118738, "grad_norm": 10.750306129455566, "learning_rate": 1.4256530481634989e-06, "loss": 0.2787, "step": 16299 }, { "epoch": 0.7560296846011132, "grad_norm": 8.993721961975098, "learning_rate": 1.4251384729861585e-06, "loss": 0.3655, "step": 16300 }, { "epoch": 0.7560760667903526, "grad_norm": 7.3576154708862305, "learning_rate": 1.4246239752563068e-06, "loss": 0.3364, "step": 16301 }, { "epoch": 0.7561224489795918, "grad_norm": 7.564400672912598, "learning_rate": 1.4241095549850915e-06, "loss": 0.3286, "step": 16302 }, { "epoch": 0.7561688311688312, "grad_norm": 6.955830097198486, "learning_rate": 1.423595212183656e-06, "loss": 0.2819, "step": 16303 }, { "epoch": 0.7562152133580705, "grad_norm": 4.528944969177246, "learning_rate": 1.4230809468631457e-06, "loss": 0.224, "step": 16304 }, { "epoch": 0.7562615955473099, "grad_norm": 12.725581169128418, "learning_rate": 1.4225667590346987e-06, "loss": 0.3912, "step": 16305 }, { "epoch": 0.7563079777365491, "grad_norm": 5.984273433685303, "learning_rate": 1.4220526487094554e-06, "loss": 0.2852, "step": 16306 }, { "epoch": 0.7563543599257885, "grad_norm": 10.03052043914795, "learning_rate": 1.421538615898555e-06, "loss": 0.4238, "step": 16307 }, { "epoch": 0.7564007421150278, "grad_norm": 4.073867321014404, "learning_rate": 1.4210246606131323e-06, "loss": 0.2013, "step": 16308 }, { "epoch": 0.7564471243042672, "grad_norm": 7.1909589767456055, "learning_rate": 1.420510782864325e-06, "loss": 0.2454, "step": 16309 }, { "epoch": 0.7564935064935064, "grad_norm": 5.863779067993164, "learning_rate": 1.4199969826632615e-06, "loss": 0.245, "step": 16310 }, { "epoch": 0.7565398886827458, "grad_norm": 4.081313133239746, "learning_rate": 1.419483260021075e-06, "loss": 0.2503, "step": 16311 }, { "epoch": 0.7565862708719852, "grad_norm": 5.769671440124512, "learning_rate": 1.4189696149488956e-06, "loss": 0.2578, "step": 16312 }, { "epoch": 0.7566326530612245, "grad_norm": 11.093809127807617, "learning_rate": 1.41845604745785e-06, "loss": 0.33, "step": 16313 }, { "epoch": 0.7566790352504639, "grad_norm": 14.662325859069824, "learning_rate": 1.417942557559066e-06, "loss": 0.3247, "step": 16314 }, { "epoch": 0.7567254174397031, "grad_norm": 9.680082321166992, "learning_rate": 1.417429145263668e-06, "loss": 0.447, "step": 16315 }, { "epoch": 0.7567717996289425, "grad_norm": 16.207584381103516, "learning_rate": 1.4169158105827768e-06, "loss": 0.3214, "step": 16316 }, { "epoch": 0.7568181818181818, "grad_norm": 3.9542129039764404, "learning_rate": 1.4164025535275145e-06, "loss": 0.2788, "step": 16317 }, { "epoch": 0.7568645640074212, "grad_norm": 10.47020435333252, "learning_rate": 1.4158893741090007e-06, "loss": 0.4274, "step": 16318 }, { "epoch": 0.7569109461966604, "grad_norm": 4.721057891845703, "learning_rate": 1.4153762723383536e-06, "loss": 0.2979, "step": 16319 }, { "epoch": 0.7569573283858998, "grad_norm": 6.300755500793457, "learning_rate": 1.41486324822669e-06, "loss": 0.309, "step": 16320 }, { "epoch": 0.7570037105751392, "grad_norm": 7.6283111572265625, "learning_rate": 1.4143503017851212e-06, "loss": 0.2979, "step": 16321 }, { "epoch": 0.7570500927643785, "grad_norm": 6.803372383117676, "learning_rate": 1.4138374330247629e-06, "loss": 0.4089, "step": 16322 }, { "epoch": 0.7570964749536178, "grad_norm": 16.78164291381836, "learning_rate": 1.4133246419567244e-06, "loss": 0.4076, "step": 16323 }, { "epoch": 0.7571428571428571, "grad_norm": 12.323932647705078, "learning_rate": 1.412811928592116e-06, "loss": 0.5298, "step": 16324 }, { "epoch": 0.7571892393320965, "grad_norm": 6.45967960357666, "learning_rate": 1.4122992929420448e-06, "loss": 0.3102, "step": 16325 }, { "epoch": 0.7572356215213358, "grad_norm": 10.665183067321777, "learning_rate": 1.411786735017619e-06, "loss": 0.4628, "step": 16326 }, { "epoch": 0.7572820037105752, "grad_norm": 10.648723602294922, "learning_rate": 1.4112742548299391e-06, "loss": 0.4176, "step": 16327 }, { "epoch": 0.7573283858998144, "grad_norm": 6.1963934898376465, "learning_rate": 1.4107618523901101e-06, "loss": 0.3455, "step": 16328 }, { "epoch": 0.7573747680890538, "grad_norm": 8.974750518798828, "learning_rate": 1.4102495277092322e-06, "loss": 0.3375, "step": 16329 }, { "epoch": 0.7574211502782932, "grad_norm": 7.081140041351318, "learning_rate": 1.409737280798405e-06, "loss": 0.3675, "step": 16330 }, { "epoch": 0.7574675324675325, "grad_norm": 8.158561706542969, "learning_rate": 1.4092251116687271e-06, "loss": 0.3338, "step": 16331 }, { "epoch": 0.7575139146567718, "grad_norm": 11.80811595916748, "learning_rate": 1.4087130203312921e-06, "loss": 0.3866, "step": 16332 }, { "epoch": 0.7575602968460111, "grad_norm": 6.304422855377197, "learning_rate": 1.4082010067971952e-06, "loss": 0.3136, "step": 16333 }, { "epoch": 0.7576066790352505, "grad_norm": 7.04285192489624, "learning_rate": 1.4076890710775293e-06, "loss": 0.3615, "step": 16334 }, { "epoch": 0.7576530612244898, "grad_norm": 8.81680965423584, "learning_rate": 1.4071772131833844e-06, "loss": 0.3588, "step": 16335 }, { "epoch": 0.7576994434137291, "grad_norm": 4.437684535980225, "learning_rate": 1.4066654331258506e-06, "loss": 0.3255, "step": 16336 }, { "epoch": 0.7577458256029684, "grad_norm": 7.9105072021484375, "learning_rate": 1.4061537309160162e-06, "loss": 0.2845, "step": 16337 }, { "epoch": 0.7577922077922078, "grad_norm": 9.447031021118164, "learning_rate": 1.405642106564964e-06, "loss": 0.416, "step": 16338 }, { "epoch": 0.7578385899814472, "grad_norm": 6.266603946685791, "learning_rate": 1.40513056008378e-06, "loss": 0.2787, "step": 16339 }, { "epoch": 0.7578849721706865, "grad_norm": 4.906790733337402, "learning_rate": 1.404619091483546e-06, "loss": 0.2086, "step": 16340 }, { "epoch": 0.7579313543599258, "grad_norm": 10.346796035766602, "learning_rate": 1.4041077007753435e-06, "loss": 0.3788, "step": 16341 }, { "epoch": 0.7579777365491651, "grad_norm": 10.357369422912598, "learning_rate": 1.4035963879702518e-06, "loss": 0.3882, "step": 16342 }, { "epoch": 0.7580241187384045, "grad_norm": 10.713719367980957, "learning_rate": 1.4030851530793467e-06, "loss": 0.3985, "step": 16343 }, { "epoch": 0.7580705009276438, "grad_norm": 5.105518341064453, "learning_rate": 1.4025739961137043e-06, "loss": 0.3574, "step": 16344 }, { "epoch": 0.7581168831168831, "grad_norm": 12.184645652770996, "learning_rate": 1.4020629170843985e-06, "loss": 0.4371, "step": 16345 }, { "epoch": 0.7581632653061224, "grad_norm": 12.073789596557617, "learning_rate": 1.4015519160025027e-06, "loss": 0.4017, "step": 16346 }, { "epoch": 0.7582096474953618, "grad_norm": 22.512950897216797, "learning_rate": 1.4010409928790875e-06, "loss": 0.389, "step": 16347 }, { "epoch": 0.7582560296846012, "grad_norm": 9.180355072021484, "learning_rate": 1.4005301477252192e-06, "loss": 0.5042, "step": 16348 }, { "epoch": 0.7583024118738404, "grad_norm": 6.154958724975586, "learning_rate": 1.4000193805519675e-06, "loss": 0.371, "step": 16349 }, { "epoch": 0.7583487940630798, "grad_norm": 9.849931716918945, "learning_rate": 1.3995086913703965e-06, "loss": 0.3133, "step": 16350 }, { "epoch": 0.7583951762523191, "grad_norm": 7.277621269226074, "learning_rate": 1.398998080191571e-06, "loss": 0.3991, "step": 16351 }, { "epoch": 0.7584415584415585, "grad_norm": 10.209748268127441, "learning_rate": 1.3984875470265541e-06, "loss": 0.3077, "step": 16352 }, { "epoch": 0.7584879406307977, "grad_norm": 7.257966041564941, "learning_rate": 1.3979770918864033e-06, "loss": 0.2636, "step": 16353 }, { "epoch": 0.7585343228200371, "grad_norm": 12.493010520935059, "learning_rate": 1.3974667147821792e-06, "loss": 0.3231, "step": 16354 }, { "epoch": 0.7585807050092764, "grad_norm": 6.251348972320557, "learning_rate": 1.3969564157249388e-06, "loss": 0.2682, "step": 16355 }, { "epoch": 0.7586270871985158, "grad_norm": 7.291704177856445, "learning_rate": 1.396446194725737e-06, "loss": 0.2582, "step": 16356 }, { "epoch": 0.7586734693877552, "grad_norm": 3.9421982765197754, "learning_rate": 1.395936051795629e-06, "loss": 0.2767, "step": 16357 }, { "epoch": 0.7587198515769944, "grad_norm": 4.802383899688721, "learning_rate": 1.3954259869456638e-06, "loss": 0.273, "step": 16358 }, { "epoch": 0.7587662337662338, "grad_norm": 6.910102367401123, "learning_rate": 1.3949160001868938e-06, "loss": 0.3353, "step": 16359 }, { "epoch": 0.7588126159554731, "grad_norm": 6.54440975189209, "learning_rate": 1.394406091530367e-06, "loss": 0.2675, "step": 16360 }, { "epoch": 0.7588589981447125, "grad_norm": 10.55240249633789, "learning_rate": 1.3938962609871304e-06, "loss": 0.3881, "step": 16361 }, { "epoch": 0.7589053803339517, "grad_norm": 6.224403381347656, "learning_rate": 1.3933865085682313e-06, "loss": 0.3393, "step": 16362 }, { "epoch": 0.7589517625231911, "grad_norm": 4.928727626800537, "learning_rate": 1.3928768342847092e-06, "loss": 0.3188, "step": 16363 }, { "epoch": 0.7589981447124304, "grad_norm": 8.437251091003418, "learning_rate": 1.3923672381476084e-06, "loss": 0.368, "step": 16364 }, { "epoch": 0.7590445269016698, "grad_norm": 6.901442527770996, "learning_rate": 1.3918577201679678e-06, "loss": 0.3416, "step": 16365 }, { "epoch": 0.759090909090909, "grad_norm": 7.206338405609131, "learning_rate": 1.3913482803568274e-06, "loss": 0.3611, "step": 16366 }, { "epoch": 0.7591372912801484, "grad_norm": 12.98531723022461, "learning_rate": 1.3908389187252241e-06, "loss": 0.4642, "step": 16367 }, { "epoch": 0.7591836734693878, "grad_norm": 8.811942100524902, "learning_rate": 1.3903296352841904e-06, "loss": 0.3594, "step": 16368 }, { "epoch": 0.7592300556586271, "grad_norm": 6.251497745513916, "learning_rate": 1.3898204300447615e-06, "loss": 0.3908, "step": 16369 }, { "epoch": 0.7592764378478665, "grad_norm": 6.393045425415039, "learning_rate": 1.389311303017969e-06, "loss": 0.302, "step": 16370 }, { "epoch": 0.7593228200371057, "grad_norm": 6.401488780975342, "learning_rate": 1.3888022542148421e-06, "loss": 0.3277, "step": 16371 }, { "epoch": 0.7593692022263451, "grad_norm": 7.648542881011963, "learning_rate": 1.388293283646412e-06, "loss": 0.4065, "step": 16372 }, { "epoch": 0.7594155844155844, "grad_norm": 6.334853172302246, "learning_rate": 1.387784391323701e-06, "loss": 0.2465, "step": 16373 }, { "epoch": 0.7594619666048238, "grad_norm": 5.416463375091553, "learning_rate": 1.3872755772577357e-06, "loss": 0.2363, "step": 16374 }, { "epoch": 0.759508348794063, "grad_norm": 13.055220603942871, "learning_rate": 1.3867668414595398e-06, "loss": 0.3157, "step": 16375 }, { "epoch": 0.7595547309833024, "grad_norm": 4.509467601776123, "learning_rate": 1.3862581839401346e-06, "loss": 0.3454, "step": 16376 }, { "epoch": 0.7596011131725418, "grad_norm": 6.213496208190918, "learning_rate": 1.3857496047105407e-06, "loss": 0.4491, "step": 16377 }, { "epoch": 0.7596474953617811, "grad_norm": 5.692246913909912, "learning_rate": 1.3852411037817742e-06, "loss": 0.2903, "step": 16378 }, { "epoch": 0.7596938775510204, "grad_norm": 12.314461708068848, "learning_rate": 1.3847326811648526e-06, "loss": 0.4394, "step": 16379 }, { "epoch": 0.7597402597402597, "grad_norm": 9.97888469696045, "learning_rate": 1.3842243368707907e-06, "loss": 0.2355, "step": 16380 }, { "epoch": 0.7597866419294991, "grad_norm": 6.792247772216797, "learning_rate": 1.3837160709106013e-06, "loss": 0.3445, "step": 16381 }, { "epoch": 0.7598330241187384, "grad_norm": 3.3957455158233643, "learning_rate": 1.3832078832952971e-06, "loss": 0.2048, "step": 16382 }, { "epoch": 0.7598794063079778, "grad_norm": 7.5463032722473145, "learning_rate": 1.3826997740358855e-06, "loss": 0.3312, "step": 16383 }, { "epoch": 0.759925788497217, "grad_norm": 11.893856048583984, "learning_rate": 1.382191743143375e-06, "loss": 0.376, "step": 16384 }, { "epoch": 0.7599721706864564, "grad_norm": 8.481342315673828, "learning_rate": 1.3816837906287721e-06, "loss": 0.4576, "step": 16385 }, { "epoch": 0.7600185528756958, "grad_norm": 5.071561336517334, "learning_rate": 1.381175916503082e-06, "loss": 0.1898, "step": 16386 }, { "epoch": 0.7600649350649351, "grad_norm": 7.108280181884766, "learning_rate": 1.3806681207773077e-06, "loss": 0.3555, "step": 16387 }, { "epoch": 0.7601113172541744, "grad_norm": 6.128922939300537, "learning_rate": 1.3801604034624482e-06, "loss": 0.3681, "step": 16388 }, { "epoch": 0.7601576994434137, "grad_norm": 7.2305426597595215, "learning_rate": 1.3796527645695046e-06, "loss": 0.3137, "step": 16389 }, { "epoch": 0.7602040816326531, "grad_norm": 13.452878952026367, "learning_rate": 1.3791452041094745e-06, "loss": 0.388, "step": 16390 }, { "epoch": 0.7602504638218924, "grad_norm": 14.002766609191895, "learning_rate": 1.3786377220933533e-06, "loss": 0.4539, "step": 16391 }, { "epoch": 0.7602968460111317, "grad_norm": 6.23130464553833, "learning_rate": 1.3781303185321377e-06, "loss": 0.3066, "step": 16392 }, { "epoch": 0.760343228200371, "grad_norm": 9.181336402893066, "learning_rate": 1.3776229934368162e-06, "loss": 0.3949, "step": 16393 }, { "epoch": 0.7603896103896104, "grad_norm": 6.243204593658447, "learning_rate": 1.3771157468183828e-06, "loss": 0.3481, "step": 16394 }, { "epoch": 0.7604359925788498, "grad_norm": 16.800695419311523, "learning_rate": 1.3766085786878253e-06, "loss": 0.3547, "step": 16395 }, { "epoch": 0.7604823747680891, "grad_norm": 7.335345268249512, "learning_rate": 1.3761014890561319e-06, "loss": 0.3705, "step": 16396 }, { "epoch": 0.7605287569573284, "grad_norm": 4.462526321411133, "learning_rate": 1.37559447793429e-06, "loss": 0.3423, "step": 16397 }, { "epoch": 0.7605751391465677, "grad_norm": 6.7347798347473145, "learning_rate": 1.3750875453332801e-06, "loss": 0.2689, "step": 16398 }, { "epoch": 0.7606215213358071, "grad_norm": 5.6454596519470215, "learning_rate": 1.374580691264087e-06, "loss": 0.3211, "step": 16399 }, { "epoch": 0.7606679035250464, "grad_norm": 6.8661017417907715, "learning_rate": 1.374073915737691e-06, "loss": 0.2608, "step": 16400 }, { "epoch": 0.7607142857142857, "grad_norm": 5.337140083312988, "learning_rate": 1.3735672187650712e-06, "loss": 0.3515, "step": 16401 }, { "epoch": 0.760760667903525, "grad_norm": 5.578515529632568, "learning_rate": 1.3730606003572061e-06, "loss": 0.3555, "step": 16402 }, { "epoch": 0.7608070500927644, "grad_norm": 10.826485633850098, "learning_rate": 1.3725540605250687e-06, "loss": 0.416, "step": 16403 }, { "epoch": 0.7608534322820037, "grad_norm": 7.3645429611206055, "learning_rate": 1.3720475992796345e-06, "loss": 0.3702, "step": 16404 }, { "epoch": 0.760899814471243, "grad_norm": 5.831966400146484, "learning_rate": 1.3715412166318753e-06, "loss": 0.2924, "step": 16405 }, { "epoch": 0.7609461966604824, "grad_norm": 14.23167896270752, "learning_rate": 1.3710349125927618e-06, "loss": 0.5076, "step": 16406 }, { "epoch": 0.7609925788497217, "grad_norm": 6.525540351867676, "learning_rate": 1.3705286871732632e-06, "loss": 0.3712, "step": 16407 }, { "epoch": 0.7610389610389611, "grad_norm": 5.326727867126465, "learning_rate": 1.370022540384347e-06, "loss": 0.3368, "step": 16408 }, { "epoch": 0.7610853432282003, "grad_norm": 5.808660507202148, "learning_rate": 1.369516472236977e-06, "loss": 0.2485, "step": 16409 }, { "epoch": 0.7611317254174397, "grad_norm": 7.549509525299072, "learning_rate": 1.3690104827421174e-06, "loss": 0.2966, "step": 16410 }, { "epoch": 0.761178107606679, "grad_norm": 6.379505634307861, "learning_rate": 1.368504571910731e-06, "loss": 0.3106, "step": 16411 }, { "epoch": 0.7612244897959184, "grad_norm": 8.69798469543457, "learning_rate": 1.3679987397537774e-06, "loss": 0.2317, "step": 16412 }, { "epoch": 0.7612708719851577, "grad_norm": 5.883768081665039, "learning_rate": 1.3674929862822168e-06, "loss": 0.2793, "step": 16413 }, { "epoch": 0.761317254174397, "grad_norm": 3.9872567653656006, "learning_rate": 1.3669873115070032e-06, "loss": 0.3571, "step": 16414 }, { "epoch": 0.7613636363636364, "grad_norm": 7.8135271072387695, "learning_rate": 1.3664817154390936e-06, "loss": 0.3204, "step": 16415 }, { "epoch": 0.7614100185528757, "grad_norm": 6.2924299240112305, "learning_rate": 1.3659761980894403e-06, "loss": 0.373, "step": 16416 }, { "epoch": 0.7614564007421151, "grad_norm": 7.937592506408691, "learning_rate": 1.365470759468997e-06, "loss": 0.3104, "step": 16417 }, { "epoch": 0.7615027829313543, "grad_norm": 9.914173126220703, "learning_rate": 1.3649653995887118e-06, "loss": 0.3763, "step": 16418 }, { "epoch": 0.7615491651205937, "grad_norm": 4.92324161529541, "learning_rate": 1.3644601184595363e-06, "loss": 0.2639, "step": 16419 }, { "epoch": 0.761595547309833, "grad_norm": 8.933442115783691, "learning_rate": 1.3639549160924127e-06, "loss": 0.4861, "step": 16420 }, { "epoch": 0.7616419294990724, "grad_norm": 6.257683753967285, "learning_rate": 1.363449792498288e-06, "loss": 0.3475, "step": 16421 }, { "epoch": 0.7616883116883116, "grad_norm": 5.865964889526367, "learning_rate": 1.3629447476881058e-06, "loss": 0.2955, "step": 16422 }, { "epoch": 0.761734693877551, "grad_norm": 6.194489002227783, "learning_rate": 1.3624397816728068e-06, "loss": 0.3247, "step": 16423 }, { "epoch": 0.7617810760667904, "grad_norm": 7.108725070953369, "learning_rate": 1.3619348944633331e-06, "loss": 0.3184, "step": 16424 }, { "epoch": 0.7618274582560297, "grad_norm": 9.052736282348633, "learning_rate": 1.3614300860706198e-06, "loss": 0.3882, "step": 16425 }, { "epoch": 0.7618738404452691, "grad_norm": 12.085851669311523, "learning_rate": 1.3609253565056046e-06, "loss": 0.3525, "step": 16426 }, { "epoch": 0.7619202226345083, "grad_norm": 5.0073628425598145, "learning_rate": 1.3604207057792218e-06, "loss": 0.288, "step": 16427 }, { "epoch": 0.7619666048237477, "grad_norm": 4.3759236335754395, "learning_rate": 1.3599161339024048e-06, "loss": 0.2537, "step": 16428 }, { "epoch": 0.762012987012987, "grad_norm": 6.685702800750732, "learning_rate": 1.3594116408860847e-06, "loss": 0.2851, "step": 16429 }, { "epoch": 0.7620593692022264, "grad_norm": 8.116375923156738, "learning_rate": 1.358907226741193e-06, "loss": 0.2654, "step": 16430 }, { "epoch": 0.7621057513914656, "grad_norm": 8.033857345581055, "learning_rate": 1.358402891478654e-06, "loss": 0.3607, "step": 16431 }, { "epoch": 0.762152133580705, "grad_norm": 9.662713050842285, "learning_rate": 1.3578986351093953e-06, "loss": 0.3539, "step": 16432 }, { "epoch": 0.7621985157699444, "grad_norm": 5.109281063079834, "learning_rate": 1.357394457644342e-06, "loss": 0.2388, "step": 16433 }, { "epoch": 0.7622448979591837, "grad_norm": 6.4841694831848145, "learning_rate": 1.3568903590944166e-06, "loss": 0.3474, "step": 16434 }, { "epoch": 0.762291280148423, "grad_norm": 8.628387451171875, "learning_rate": 1.3563863394705418e-06, "loss": 0.4241, "step": 16435 }, { "epoch": 0.7623376623376623, "grad_norm": 9.421931266784668, "learning_rate": 1.3558823987836329e-06, "loss": 0.239, "step": 16436 }, { "epoch": 0.7623840445269017, "grad_norm": 11.618457794189453, "learning_rate": 1.3553785370446104e-06, "loss": 0.3767, "step": 16437 }, { "epoch": 0.762430426716141, "grad_norm": 4.658081531524658, "learning_rate": 1.354874754264389e-06, "loss": 0.348, "step": 16438 }, { "epoch": 0.7624768089053804, "grad_norm": 4.471307754516602, "learning_rate": 1.3543710504538838e-06, "loss": 0.3591, "step": 16439 }, { "epoch": 0.7625231910946196, "grad_norm": 6.248414993286133, "learning_rate": 1.3538674256240087e-06, "loss": 0.2884, "step": 16440 }, { "epoch": 0.762569573283859, "grad_norm": 7.97761344909668, "learning_rate": 1.3533638797856707e-06, "loss": 0.3075, "step": 16441 }, { "epoch": 0.7626159554730984, "grad_norm": 5.872779846191406, "learning_rate": 1.3528604129497814e-06, "loss": 0.3422, "step": 16442 }, { "epoch": 0.7626623376623377, "grad_norm": 7.399216651916504, "learning_rate": 1.3523570251272466e-06, "loss": 0.3889, "step": 16443 }, { "epoch": 0.762708719851577, "grad_norm": 6.034520626068115, "learning_rate": 1.3518537163289736e-06, "loss": 0.3474, "step": 16444 }, { "epoch": 0.7627551020408163, "grad_norm": 7.880866050720215, "learning_rate": 1.3513504865658655e-06, "loss": 0.3245, "step": 16445 }, { "epoch": 0.7628014842300557, "grad_norm": 9.435074806213379, "learning_rate": 1.3508473358488267e-06, "loss": 0.4661, "step": 16446 }, { "epoch": 0.762847866419295, "grad_norm": 6.4286885261535645, "learning_rate": 1.350344264188753e-06, "loss": 0.3507, "step": 16447 }, { "epoch": 0.7628942486085343, "grad_norm": 6.225111484527588, "learning_rate": 1.349841271596547e-06, "loss": 0.302, "step": 16448 }, { "epoch": 0.7629406307977736, "grad_norm": 8.124954223632812, "learning_rate": 1.3493383580831038e-06, "loss": 0.4195, "step": 16449 }, { "epoch": 0.762987012987013, "grad_norm": 4.8670430183410645, "learning_rate": 1.3488355236593198e-06, "loss": 0.2634, "step": 16450 }, { "epoch": 0.7630333951762523, "grad_norm": 11.29440689086914, "learning_rate": 1.3483327683360903e-06, "loss": 0.4794, "step": 16451 }, { "epoch": 0.7630797773654916, "grad_norm": 9.01826286315918, "learning_rate": 1.347830092124303e-06, "loss": 0.4394, "step": 16452 }, { "epoch": 0.763126159554731, "grad_norm": 6.991360664367676, "learning_rate": 1.3473274950348515e-06, "loss": 0.3021, "step": 16453 }, { "epoch": 0.7631725417439703, "grad_norm": 10.996015548706055, "learning_rate": 1.3468249770786223e-06, "loss": 0.4157, "step": 16454 }, { "epoch": 0.7632189239332097, "grad_norm": 7.097580432891846, "learning_rate": 1.3463225382665034e-06, "loss": 0.3589, "step": 16455 }, { "epoch": 0.763265306122449, "grad_norm": 7.273719787597656, "learning_rate": 1.3458201786093795e-06, "loss": 0.2579, "step": 16456 }, { "epoch": 0.7633116883116883, "grad_norm": 8.135692596435547, "learning_rate": 1.3453178981181359e-06, "loss": 0.3171, "step": 16457 }, { "epoch": 0.7633580705009276, "grad_norm": 8.644432067871094, "learning_rate": 1.3448156968036507e-06, "loss": 0.3623, "step": 16458 }, { "epoch": 0.763404452690167, "grad_norm": 5.447494983673096, "learning_rate": 1.3443135746768055e-06, "loss": 0.2849, "step": 16459 }, { "epoch": 0.7634508348794063, "grad_norm": 8.943921089172363, "learning_rate": 1.3438115317484785e-06, "loss": 0.3505, "step": 16460 }, { "epoch": 0.7634972170686456, "grad_norm": 8.6267728805542, "learning_rate": 1.343309568029546e-06, "loss": 0.2818, "step": 16461 }, { "epoch": 0.763543599257885, "grad_norm": 4.568799018859863, "learning_rate": 1.342807683530885e-06, "loss": 0.2069, "step": 16462 }, { "epoch": 0.7635899814471243, "grad_norm": 5.313150405883789, "learning_rate": 1.3423058782633648e-06, "loss": 0.2605, "step": 16463 }, { "epoch": 0.7636363636363637, "grad_norm": 10.505204200744629, "learning_rate": 1.3418041522378583e-06, "loss": 0.4354, "step": 16464 }, { "epoch": 0.7636827458256029, "grad_norm": 5.398509979248047, "learning_rate": 1.3413025054652357e-06, "loss": 0.3129, "step": 16465 }, { "epoch": 0.7637291280148423, "grad_norm": 11.165886878967285, "learning_rate": 1.3408009379563646e-06, "loss": 0.4612, "step": 16466 }, { "epoch": 0.7637755102040816, "grad_norm": 9.305529594421387, "learning_rate": 1.3402994497221105e-06, "loss": 0.3944, "step": 16467 }, { "epoch": 0.763821892393321, "grad_norm": 5.14309549331665, "learning_rate": 1.3397980407733408e-06, "loss": 0.2692, "step": 16468 }, { "epoch": 0.7638682745825603, "grad_norm": 8.479372024536133, "learning_rate": 1.3392967111209142e-06, "loss": 0.2748, "step": 16469 }, { "epoch": 0.7639146567717996, "grad_norm": 8.2680082321167, "learning_rate": 1.3387954607756938e-06, "loss": 0.3783, "step": 16470 }, { "epoch": 0.763961038961039, "grad_norm": 4.707921504974365, "learning_rate": 1.3382942897485386e-06, "loss": 0.2793, "step": 16471 }, { "epoch": 0.7640074211502783, "grad_norm": 6.458069324493408, "learning_rate": 1.3377931980503055e-06, "loss": 0.3697, "step": 16472 }, { "epoch": 0.7640538033395177, "grad_norm": 7.48427677154541, "learning_rate": 1.3372921856918536e-06, "loss": 0.3395, "step": 16473 }, { "epoch": 0.7641001855287569, "grad_norm": 4.357601642608643, "learning_rate": 1.3367912526840326e-06, "loss": 0.2667, "step": 16474 }, { "epoch": 0.7641465677179963, "grad_norm": 6.644272804260254, "learning_rate": 1.3362903990376968e-06, "loss": 0.3458, "step": 16475 }, { "epoch": 0.7641929499072356, "grad_norm": 8.504304885864258, "learning_rate": 1.3357896247636976e-06, "loss": 0.3147, "step": 16476 }, { "epoch": 0.764239332096475, "grad_norm": 7.906774997711182, "learning_rate": 1.3352889298728832e-06, "loss": 0.3493, "step": 16477 }, { "epoch": 0.7642857142857142, "grad_norm": 7.408168315887451, "learning_rate": 1.3347883143761016e-06, "loss": 0.3619, "step": 16478 }, { "epoch": 0.7643320964749536, "grad_norm": 5.307458877563477, "learning_rate": 1.334287778284199e-06, "loss": 0.2458, "step": 16479 }, { "epoch": 0.764378478664193, "grad_norm": 8.075204849243164, "learning_rate": 1.333787321608017e-06, "loss": 0.3202, "step": 16480 }, { "epoch": 0.7644248608534323, "grad_norm": 10.823748588562012, "learning_rate": 1.333286944358399e-06, "loss": 0.4507, "step": 16481 }, { "epoch": 0.7644712430426717, "grad_norm": 4.572762966156006, "learning_rate": 1.3327866465461853e-06, "loss": 0.2443, "step": 16482 }, { "epoch": 0.7645176252319109, "grad_norm": 7.820464134216309, "learning_rate": 1.3322864281822146e-06, "loss": 0.3766, "step": 16483 }, { "epoch": 0.7645640074211503, "grad_norm": 3.802414655685425, "learning_rate": 1.3317862892773254e-06, "loss": 0.2703, "step": 16484 }, { "epoch": 0.7646103896103896, "grad_norm": 5.555211544036865, "learning_rate": 1.3312862298423495e-06, "loss": 0.2296, "step": 16485 }, { "epoch": 0.764656771799629, "grad_norm": 13.176399230957031, "learning_rate": 1.330786249888123e-06, "loss": 0.4776, "step": 16486 }, { "epoch": 0.7647031539888682, "grad_norm": 13.19925308227539, "learning_rate": 1.330286349425477e-06, "loss": 0.3143, "step": 16487 }, { "epoch": 0.7647495361781076, "grad_norm": 5.9059224128723145, "learning_rate": 1.3297865284652417e-06, "loss": 0.2943, "step": 16488 }, { "epoch": 0.764795918367347, "grad_norm": 6.224067211151123, "learning_rate": 1.3292867870182453e-06, "loss": 0.2562, "step": 16489 }, { "epoch": 0.7648423005565863, "grad_norm": 4.726921558380127, "learning_rate": 1.328787125095316e-06, "loss": 0.3091, "step": 16490 }, { "epoch": 0.7648886827458256, "grad_norm": 10.658649444580078, "learning_rate": 1.328287542707276e-06, "loss": 0.3402, "step": 16491 }, { "epoch": 0.7649350649350649, "grad_norm": 9.303418159484863, "learning_rate": 1.3277880398649495e-06, "loss": 0.3476, "step": 16492 }, { "epoch": 0.7649814471243043, "grad_norm": 7.363343238830566, "learning_rate": 1.3272886165791581e-06, "loss": 0.3087, "step": 16493 }, { "epoch": 0.7650278293135436, "grad_norm": 7.745631217956543, "learning_rate": 1.3267892728607223e-06, "loss": 0.4366, "step": 16494 }, { "epoch": 0.765074211502783, "grad_norm": 15.01292610168457, "learning_rate": 1.3262900087204605e-06, "loss": 0.3315, "step": 16495 }, { "epoch": 0.7651205936920222, "grad_norm": 6.609401702880859, "learning_rate": 1.3257908241691864e-06, "loss": 0.3588, "step": 16496 }, { "epoch": 0.7651669758812616, "grad_norm": 9.018172264099121, "learning_rate": 1.3252917192177167e-06, "loss": 0.3567, "step": 16497 }, { "epoch": 0.765213358070501, "grad_norm": 11.042466163635254, "learning_rate": 1.324792693876863e-06, "loss": 0.3582, "step": 16498 }, { "epoch": 0.7652597402597403, "grad_norm": 5.043414115905762, "learning_rate": 1.3242937481574375e-06, "loss": 0.3195, "step": 16499 }, { "epoch": 0.7653061224489796, "grad_norm": 5.2054924964904785, "learning_rate": 1.3237948820702495e-06, "loss": 0.2948, "step": 16500 }, { "epoch": 0.7653525046382189, "grad_norm": 5.037291049957275, "learning_rate": 1.3232960956261077e-06, "loss": 0.3392, "step": 16501 }, { "epoch": 0.7653988868274583, "grad_norm": 5.292471408843994, "learning_rate": 1.3227973888358148e-06, "loss": 0.2132, "step": 16502 }, { "epoch": 0.7654452690166976, "grad_norm": 6.713890075683594, "learning_rate": 1.3222987617101773e-06, "loss": 0.3481, "step": 16503 }, { "epoch": 0.7654916512059369, "grad_norm": 6.677769660949707, "learning_rate": 1.3218002142599973e-06, "loss": 0.3736, "step": 16504 }, { "epoch": 0.7655380333951762, "grad_norm": 5.45307731628418, "learning_rate": 1.3213017464960754e-06, "loss": 0.2337, "step": 16505 }, { "epoch": 0.7655844155844156, "grad_norm": 6.977902889251709, "learning_rate": 1.3208033584292124e-06, "loss": 0.315, "step": 16506 }, { "epoch": 0.765630797773655, "grad_norm": 7.751257419586182, "learning_rate": 1.3203050500702031e-06, "loss": 0.3216, "step": 16507 }, { "epoch": 0.7656771799628942, "grad_norm": 8.965856552124023, "learning_rate": 1.319806821429843e-06, "loss": 0.3351, "step": 16508 }, { "epoch": 0.7657235621521336, "grad_norm": 8.669015884399414, "learning_rate": 1.319308672518928e-06, "loss": 0.3041, "step": 16509 }, { "epoch": 0.7657699443413729, "grad_norm": 8.095276832580566, "learning_rate": 1.318810603348249e-06, "loss": 0.2731, "step": 16510 }, { "epoch": 0.7658163265306123, "grad_norm": 7.5869035720825195, "learning_rate": 1.3183126139285961e-06, "loss": 0.3829, "step": 16511 }, { "epoch": 0.7658627087198516, "grad_norm": 8.381200790405273, "learning_rate": 1.3178147042707607e-06, "loss": 0.2693, "step": 16512 }, { "epoch": 0.7659090909090909, "grad_norm": 6.172766208648682, "learning_rate": 1.3173168743855259e-06, "loss": 0.2949, "step": 16513 }, { "epoch": 0.7659554730983302, "grad_norm": 11.654176712036133, "learning_rate": 1.3168191242836787e-06, "loss": 0.4357, "step": 16514 }, { "epoch": 0.7660018552875696, "grad_norm": 7.338356018066406, "learning_rate": 1.3163214539760022e-06, "loss": 0.272, "step": 16515 }, { "epoch": 0.766048237476809, "grad_norm": 6.033973693847656, "learning_rate": 1.315823863473279e-06, "loss": 0.3094, "step": 16516 }, { "epoch": 0.7660946196660482, "grad_norm": 3.977529525756836, "learning_rate": 1.3153263527862897e-06, "loss": 0.2316, "step": 16517 }, { "epoch": 0.7661410018552876, "grad_norm": 5.992269039154053, "learning_rate": 1.3148289219258098e-06, "loss": 0.371, "step": 16518 }, { "epoch": 0.7661873840445269, "grad_norm": 7.003007888793945, "learning_rate": 1.3143315709026178e-06, "loss": 0.311, "step": 16519 }, { "epoch": 0.7662337662337663, "grad_norm": 7.260914325714111, "learning_rate": 1.3138342997274883e-06, "loss": 0.2961, "step": 16520 }, { "epoch": 0.7662801484230055, "grad_norm": 21.720901489257812, "learning_rate": 1.3133371084111946e-06, "loss": 0.7416, "step": 16521 }, { "epoch": 0.7663265306122449, "grad_norm": 9.073426246643066, "learning_rate": 1.3128399969645094e-06, "loss": 0.3508, "step": 16522 }, { "epoch": 0.7663729128014842, "grad_norm": 9.048417091369629, "learning_rate": 1.3123429653981995e-06, "loss": 0.3198, "step": 16523 }, { "epoch": 0.7664192949907236, "grad_norm": 5.522308826446533, "learning_rate": 1.3118460137230337e-06, "loss": 0.2381, "step": 16524 }, { "epoch": 0.766465677179963, "grad_norm": 5.836811542510986, "learning_rate": 1.3113491419497792e-06, "loss": 0.337, "step": 16525 }, { "epoch": 0.7665120593692022, "grad_norm": 4.532570838928223, "learning_rate": 1.3108523500892002e-06, "loss": 0.235, "step": 16526 }, { "epoch": 0.7665584415584416, "grad_norm": 5.324185371398926, "learning_rate": 1.3103556381520587e-06, "loss": 0.2897, "step": 16527 }, { "epoch": 0.7666048237476809, "grad_norm": 5.352100372314453, "learning_rate": 1.309859006149118e-06, "loss": 0.2983, "step": 16528 }, { "epoch": 0.7666512059369203, "grad_norm": 8.822308540344238, "learning_rate": 1.3093624540911336e-06, "loss": 0.2488, "step": 16529 }, { "epoch": 0.7666975881261595, "grad_norm": 6.701045036315918, "learning_rate": 1.3088659819888655e-06, "loss": 0.2894, "step": 16530 }, { "epoch": 0.7667439703153989, "grad_norm": 5.258854389190674, "learning_rate": 1.308369589853069e-06, "loss": 0.29, "step": 16531 }, { "epoch": 0.7667903525046382, "grad_norm": 5.396036148071289, "learning_rate": 1.307873277694498e-06, "loss": 0.2453, "step": 16532 }, { "epoch": 0.7668367346938776, "grad_norm": 5.678133487701416, "learning_rate": 1.3073770455239066e-06, "loss": 0.3343, "step": 16533 }, { "epoch": 0.7668831168831168, "grad_norm": 10.652751922607422, "learning_rate": 1.3068808933520427e-06, "loss": 0.39, "step": 16534 }, { "epoch": 0.7669294990723562, "grad_norm": 7.241847991943359, "learning_rate": 1.3063848211896558e-06, "loss": 0.3118, "step": 16535 }, { "epoch": 0.7669758812615955, "grad_norm": 10.14074420928955, "learning_rate": 1.3058888290474937e-06, "loss": 0.3785, "step": 16536 }, { "epoch": 0.7670222634508349, "grad_norm": 11.868610382080078, "learning_rate": 1.3053929169363017e-06, "loss": 0.3546, "step": 16537 }, { "epoch": 0.7670686456400743, "grad_norm": 7.737049102783203, "learning_rate": 1.3048970848668235e-06, "loss": 0.2931, "step": 16538 }, { "epoch": 0.7671150278293135, "grad_norm": 6.8838958740234375, "learning_rate": 1.3044013328498029e-06, "loss": 0.2396, "step": 16539 }, { "epoch": 0.7671614100185529, "grad_norm": 4.362683296203613, "learning_rate": 1.3039056608959761e-06, "loss": 0.2413, "step": 16540 }, { "epoch": 0.7672077922077922, "grad_norm": 5.745250701904297, "learning_rate": 1.303410069016084e-06, "loss": 0.3361, "step": 16541 }, { "epoch": 0.7672541743970316, "grad_norm": 11.526554107666016, "learning_rate": 1.3029145572208628e-06, "loss": 0.3529, "step": 16542 }, { "epoch": 0.7673005565862708, "grad_norm": 6.616433620452881, "learning_rate": 1.3024191255210477e-06, "loss": 0.3131, "step": 16543 }, { "epoch": 0.7673469387755102, "grad_norm": 8.717630386352539, "learning_rate": 1.3019237739273737e-06, "loss": 0.2087, "step": 16544 }, { "epoch": 0.7673933209647495, "grad_norm": 28.41387176513672, "learning_rate": 1.301428502450569e-06, "loss": 0.5009, "step": 16545 }, { "epoch": 0.7674397031539889, "grad_norm": 4.954368591308594, "learning_rate": 1.300933311101365e-06, "loss": 0.3423, "step": 16546 }, { "epoch": 0.7674860853432282, "grad_norm": 5.064313888549805, "learning_rate": 1.3004381998904896e-06, "loss": 0.1846, "step": 16547 }, { "epoch": 0.7675324675324675, "grad_norm": 7.350973606109619, "learning_rate": 1.2999431688286696e-06, "loss": 0.3037, "step": 16548 }, { "epoch": 0.7675788497217069, "grad_norm": 5.820855617523193, "learning_rate": 1.2994482179266294e-06, "loss": 0.3052, "step": 16549 }, { "epoch": 0.7676252319109462, "grad_norm": 9.070974349975586, "learning_rate": 1.2989533471950927e-06, "loss": 0.4155, "step": 16550 }, { "epoch": 0.7676716141001856, "grad_norm": 8.468402862548828, "learning_rate": 1.2984585566447784e-06, "loss": 0.2945, "step": 16551 }, { "epoch": 0.7677179962894248, "grad_norm": 9.705792427062988, "learning_rate": 1.2979638462864069e-06, "loss": 0.2858, "step": 16552 }, { "epoch": 0.7677643784786642, "grad_norm": 5.738431930541992, "learning_rate": 1.2974692161306961e-06, "loss": 0.2281, "step": 16553 }, { "epoch": 0.7678107606679035, "grad_norm": 5.154110908508301, "learning_rate": 1.2969746661883626e-06, "loss": 0.3616, "step": 16554 }, { "epoch": 0.7678571428571429, "grad_norm": 8.7430419921875, "learning_rate": 1.2964801964701202e-06, "loss": 0.3247, "step": 16555 }, { "epoch": 0.7679035250463822, "grad_norm": 5.680927276611328, "learning_rate": 1.2959858069866798e-06, "loss": 0.2316, "step": 16556 }, { "epoch": 0.7679499072356215, "grad_norm": 8.70217514038086, "learning_rate": 1.2954914977487537e-06, "loss": 0.3015, "step": 16557 }, { "epoch": 0.7679962894248609, "grad_norm": 6.87067174911499, "learning_rate": 1.2949972687670499e-06, "loss": 0.2217, "step": 16558 }, { "epoch": 0.7680426716141002, "grad_norm": 8.439931869506836, "learning_rate": 1.2945031200522763e-06, "loss": 0.2511, "step": 16559 }, { "epoch": 0.7680890538033395, "grad_norm": 6.622231960296631, "learning_rate": 1.2940090516151383e-06, "loss": 0.3057, "step": 16560 }, { "epoch": 0.7681354359925788, "grad_norm": 13.452033042907715, "learning_rate": 1.2935150634663406e-06, "loss": 0.334, "step": 16561 }, { "epoch": 0.7681818181818182, "grad_norm": 7.4351582527160645, "learning_rate": 1.2930211556165827e-06, "loss": 0.3636, "step": 16562 }, { "epoch": 0.7682282003710575, "grad_norm": 5.469398498535156, "learning_rate": 1.2925273280765666e-06, "loss": 0.3146, "step": 16563 }, { "epoch": 0.7682745825602968, "grad_norm": 10.480494499206543, "learning_rate": 1.29203358085699e-06, "loss": 0.313, "step": 16564 }, { "epoch": 0.7683209647495362, "grad_norm": 8.91455078125, "learning_rate": 1.2915399139685503e-06, "loss": 0.3914, "step": 16565 }, { "epoch": 0.7683673469387755, "grad_norm": 10.484124183654785, "learning_rate": 1.2910463274219438e-06, "loss": 0.3399, "step": 16566 }, { "epoch": 0.7684137291280149, "grad_norm": 9.033807754516602, "learning_rate": 1.2905528212278606e-06, "loss": 0.3016, "step": 16567 }, { "epoch": 0.7684601113172542, "grad_norm": 4.977589130401611, "learning_rate": 1.2900593953969947e-06, "loss": 0.1949, "step": 16568 }, { "epoch": 0.7685064935064935, "grad_norm": 5.2485504150390625, "learning_rate": 1.2895660499400347e-06, "loss": 0.2627, "step": 16569 }, { "epoch": 0.7685528756957328, "grad_norm": 11.020914077758789, "learning_rate": 1.2890727848676692e-06, "loss": 0.4404, "step": 16570 }, { "epoch": 0.7685992578849722, "grad_norm": 5.787806987762451, "learning_rate": 1.2885796001905843e-06, "loss": 0.2966, "step": 16571 }, { "epoch": 0.7686456400742115, "grad_norm": 9.714761734008789, "learning_rate": 1.2880864959194666e-06, "loss": 0.4846, "step": 16572 }, { "epoch": 0.7686920222634508, "grad_norm": 5.602039813995361, "learning_rate": 1.2875934720649958e-06, "loss": 0.3201, "step": 16573 }, { "epoch": 0.7687384044526901, "grad_norm": 7.06518030166626, "learning_rate": 1.287100528637854e-06, "loss": 0.3612, "step": 16574 }, { "epoch": 0.7687847866419295, "grad_norm": 6.670289039611816, "learning_rate": 1.2866076656487208e-06, "loss": 0.2398, "step": 16575 }, { "epoch": 0.7688311688311689, "grad_norm": 13.0601806640625, "learning_rate": 1.2861148831082743e-06, "loss": 0.3356, "step": 16576 }, { "epoch": 0.7688775510204081, "grad_norm": 10.317557334899902, "learning_rate": 1.2856221810271914e-06, "loss": 0.409, "step": 16577 }, { "epoch": 0.7689239332096475, "grad_norm": 6.636383056640625, "learning_rate": 1.2851295594161434e-06, "loss": 0.3299, "step": 16578 }, { "epoch": 0.7689703153988868, "grad_norm": 8.195113182067871, "learning_rate": 1.2846370182858037e-06, "loss": 0.2902, "step": 16579 }, { "epoch": 0.7690166975881262, "grad_norm": 7.811182022094727, "learning_rate": 1.2841445576468436e-06, "loss": 0.3249, "step": 16580 }, { "epoch": 0.7690630797773655, "grad_norm": 4.965402126312256, "learning_rate": 1.283652177509932e-06, "loss": 0.2799, "step": 16581 }, { "epoch": 0.7691094619666048, "grad_norm": 5.388775825500488, "learning_rate": 1.2831598778857357e-06, "loss": 0.3301, "step": 16582 }, { "epoch": 0.7691558441558441, "grad_norm": 10.694249153137207, "learning_rate": 1.2826676587849213e-06, "loss": 0.4932, "step": 16583 }, { "epoch": 0.7692022263450835, "grad_norm": 7.676989555358887, "learning_rate": 1.2821755202181503e-06, "loss": 0.3303, "step": 16584 }, { "epoch": 0.7692486085343229, "grad_norm": 7.1723833084106445, "learning_rate": 1.2816834621960855e-06, "loss": 0.2974, "step": 16585 }, { "epoch": 0.7692949907235621, "grad_norm": 4.5814313888549805, "learning_rate": 1.2811914847293872e-06, "loss": 0.2977, "step": 16586 }, { "epoch": 0.7693413729128015, "grad_norm": 7.460324764251709, "learning_rate": 1.2806995878287143e-06, "loss": 0.3849, "step": 16587 }, { "epoch": 0.7693877551020408, "grad_norm": 5.10228157043457, "learning_rate": 1.2802077715047244e-06, "loss": 0.3232, "step": 16588 }, { "epoch": 0.7694341372912802, "grad_norm": 7.908091068267822, "learning_rate": 1.2797160357680694e-06, "loss": 0.3858, "step": 16589 }, { "epoch": 0.7694805194805194, "grad_norm": 4.839300155639648, "learning_rate": 1.2792243806294047e-06, "loss": 0.3696, "step": 16590 }, { "epoch": 0.7695269016697588, "grad_norm": 6.622528076171875, "learning_rate": 1.2787328060993808e-06, "loss": 0.3101, "step": 16591 }, { "epoch": 0.7695732838589981, "grad_norm": 8.912084579467773, "learning_rate": 1.2782413121886483e-06, "loss": 0.3454, "step": 16592 }, { "epoch": 0.7696196660482375, "grad_norm": 4.181732654571533, "learning_rate": 1.2777498989078546e-06, "loss": 0.2713, "step": 16593 }, { "epoch": 0.7696660482374769, "grad_norm": 9.022987365722656, "learning_rate": 1.2772585662676472e-06, "loss": 0.3758, "step": 16594 }, { "epoch": 0.7697124304267161, "grad_norm": 6.065186977386475, "learning_rate": 1.2767673142786686e-06, "loss": 0.2772, "step": 16595 }, { "epoch": 0.7697588126159555, "grad_norm": 6.38019323348999, "learning_rate": 1.276276142951562e-06, "loss": 0.3138, "step": 16596 }, { "epoch": 0.7698051948051948, "grad_norm": 9.588237762451172, "learning_rate": 1.2757850522969684e-06, "loss": 0.5416, "step": 16597 }, { "epoch": 0.7698515769944342, "grad_norm": 8.634106636047363, "learning_rate": 1.2752940423255278e-06, "loss": 0.3848, "step": 16598 }, { "epoch": 0.7698979591836734, "grad_norm": 8.29776382446289, "learning_rate": 1.2748031130478783e-06, "loss": 0.2268, "step": 16599 }, { "epoch": 0.7699443413729128, "grad_norm": 10.360177040100098, "learning_rate": 1.2743122644746536e-06, "loss": 0.4708, "step": 16600 }, { "epoch": 0.7699907235621521, "grad_norm": 7.62346887588501, "learning_rate": 1.273821496616488e-06, "loss": 0.3346, "step": 16601 }, { "epoch": 0.7700371057513915, "grad_norm": 7.539588928222656, "learning_rate": 1.2733308094840147e-06, "loss": 0.2919, "step": 16602 }, { "epoch": 0.7700834879406308, "grad_norm": 7.306943893432617, "learning_rate": 1.2728402030878633e-06, "loss": 0.353, "step": 16603 }, { "epoch": 0.7701298701298701, "grad_norm": 9.679484367370605, "learning_rate": 1.2723496774386652e-06, "loss": 0.2612, "step": 16604 }, { "epoch": 0.7701762523191095, "grad_norm": 6.936054706573486, "learning_rate": 1.2718592325470435e-06, "loss": 0.4385, "step": 16605 }, { "epoch": 0.7702226345083488, "grad_norm": 6.857784748077393, "learning_rate": 1.2713688684236246e-06, "loss": 0.2418, "step": 16606 }, { "epoch": 0.7702690166975881, "grad_norm": 13.489990234375, "learning_rate": 1.2708785850790334e-06, "loss": 0.2711, "step": 16607 }, { "epoch": 0.7703153988868274, "grad_norm": 6.226769924163818, "learning_rate": 1.27038838252389e-06, "loss": 0.2548, "step": 16608 }, { "epoch": 0.7703617810760668, "grad_norm": 6.1273884773254395, "learning_rate": 1.2698982607688153e-06, "loss": 0.2652, "step": 16609 }, { "epoch": 0.7704081632653061, "grad_norm": 8.630946159362793, "learning_rate": 1.2694082198244296e-06, "loss": 0.2718, "step": 16610 }, { "epoch": 0.7704545454545455, "grad_norm": 4.284735202789307, "learning_rate": 1.2689182597013449e-06, "loss": 0.2589, "step": 16611 }, { "epoch": 0.7705009276437847, "grad_norm": 3.881324291229248, "learning_rate": 1.2684283804101783e-06, "loss": 0.3447, "step": 16612 }, { "epoch": 0.7705473098330241, "grad_norm": 5.973596096038818, "learning_rate": 1.2679385819615425e-06, "loss": 0.3638, "step": 16613 }, { "epoch": 0.7705936920222635, "grad_norm": 8.98857593536377, "learning_rate": 1.2674488643660488e-06, "loss": 0.4256, "step": 16614 }, { "epoch": 0.7706400742115028, "grad_norm": 5.056803226470947, "learning_rate": 1.2669592276343084e-06, "loss": 0.2885, "step": 16615 }, { "epoch": 0.7706864564007421, "grad_norm": 7.259415149688721, "learning_rate": 1.266469671776926e-06, "loss": 0.3616, "step": 16616 }, { "epoch": 0.7707328385899814, "grad_norm": 12.301963806152344, "learning_rate": 1.2659801968045087e-06, "loss": 0.3851, "step": 16617 }, { "epoch": 0.7707792207792208, "grad_norm": 7.230834484100342, "learning_rate": 1.2654908027276613e-06, "loss": 0.3691, "step": 16618 }, { "epoch": 0.7708256029684601, "grad_norm": 11.582490921020508, "learning_rate": 1.2650014895569857e-06, "loss": 0.3872, "step": 16619 }, { "epoch": 0.7708719851576994, "grad_norm": 8.35947036743164, "learning_rate": 1.2645122573030826e-06, "loss": 0.4416, "step": 16620 }, { "epoch": 0.7709183673469387, "grad_norm": 4.258821964263916, "learning_rate": 1.264023105976553e-06, "loss": 0.2336, "step": 16621 }, { "epoch": 0.7709647495361781, "grad_norm": 10.478557586669922, "learning_rate": 1.2635340355879905e-06, "loss": 0.4783, "step": 16622 }, { "epoch": 0.7710111317254175, "grad_norm": 9.878886222839355, "learning_rate": 1.2630450461479927e-06, "loss": 0.5407, "step": 16623 }, { "epoch": 0.7710575139146568, "grad_norm": 4.28226900100708, "learning_rate": 1.2625561376671524e-06, "loss": 0.3012, "step": 16624 }, { "epoch": 0.7711038961038961, "grad_norm": 14.940849304199219, "learning_rate": 1.2620673101560626e-06, "loss": 0.4416, "step": 16625 }, { "epoch": 0.7711502782931354, "grad_norm": 6.710912704467773, "learning_rate": 1.2615785636253142e-06, "loss": 0.2346, "step": 16626 }, { "epoch": 0.7711966604823748, "grad_norm": 6.805055141448975, "learning_rate": 1.261089898085493e-06, "loss": 0.2288, "step": 16627 }, { "epoch": 0.7712430426716141, "grad_norm": 6.825353622436523, "learning_rate": 1.2606013135471874e-06, "loss": 0.247, "step": 16628 }, { "epoch": 0.7712894248608534, "grad_norm": 5.539163589477539, "learning_rate": 1.2601128100209813e-06, "loss": 0.2464, "step": 16629 }, { "epoch": 0.7713358070500927, "grad_norm": 6.250139236450195, "learning_rate": 1.259624387517459e-06, "loss": 0.2552, "step": 16630 }, { "epoch": 0.7713821892393321, "grad_norm": 5.541454315185547, "learning_rate": 1.2591360460472018e-06, "loss": 0.3381, "step": 16631 }, { "epoch": 0.7714285714285715, "grad_norm": 11.700517654418945, "learning_rate": 1.2586477856207902e-06, "loss": 0.3067, "step": 16632 }, { "epoch": 0.7714749536178107, "grad_norm": 5.317495822906494, "learning_rate": 1.2581596062487995e-06, "loss": 0.1622, "step": 16633 }, { "epoch": 0.7715213358070501, "grad_norm": 6.86393928527832, "learning_rate": 1.2576715079418072e-06, "loss": 0.2744, "step": 16634 }, { "epoch": 0.7715677179962894, "grad_norm": 10.726702690124512, "learning_rate": 1.257183490710388e-06, "loss": 0.3347, "step": 16635 }, { "epoch": 0.7716141001855288, "grad_norm": 7.889936923980713, "learning_rate": 1.2566955545651145e-06, "loss": 0.3125, "step": 16636 }, { "epoch": 0.7716604823747681, "grad_norm": 8.078956604003906, "learning_rate": 1.2562076995165584e-06, "loss": 0.3289, "step": 16637 }, { "epoch": 0.7717068645640074, "grad_norm": 4.609035491943359, "learning_rate": 1.2557199255752866e-06, "loss": 0.2856, "step": 16638 }, { "epoch": 0.7717532467532467, "grad_norm": 4.90346622467041, "learning_rate": 1.2552322327518678e-06, "loss": 0.2586, "step": 16639 }, { "epoch": 0.7717996289424861, "grad_norm": 6.491262435913086, "learning_rate": 1.2547446210568675e-06, "loss": 0.2656, "step": 16640 }, { "epoch": 0.7718460111317255, "grad_norm": 8.40677261352539, "learning_rate": 1.2542570905008495e-06, "loss": 0.3211, "step": 16641 }, { "epoch": 0.7718923933209647, "grad_norm": 8.887165069580078, "learning_rate": 1.2537696410943777e-06, "loss": 0.3436, "step": 16642 }, { "epoch": 0.7719387755102041, "grad_norm": 7.123946189880371, "learning_rate": 1.2532822728480087e-06, "loss": 0.4144, "step": 16643 }, { "epoch": 0.7719851576994434, "grad_norm": 8.511222839355469, "learning_rate": 1.2527949857723037e-06, "loss": 0.2644, "step": 16644 }, { "epoch": 0.7720315398886828, "grad_norm": 11.705009460449219, "learning_rate": 1.2523077798778188e-06, "loss": 0.3188, "step": 16645 }, { "epoch": 0.772077922077922, "grad_norm": 9.140949249267578, "learning_rate": 1.251820655175109e-06, "loss": 0.3553, "step": 16646 }, { "epoch": 0.7721243042671614, "grad_norm": 7.3969526290893555, "learning_rate": 1.2513336116747287e-06, "loss": 0.2765, "step": 16647 }, { "epoch": 0.7721706864564007, "grad_norm": 8.89952278137207, "learning_rate": 1.2508466493872273e-06, "loss": 0.3259, "step": 16648 }, { "epoch": 0.7722170686456401, "grad_norm": 7.891798496246338, "learning_rate": 1.250359768323156e-06, "loss": 0.3132, "step": 16649 }, { "epoch": 0.7722634508348795, "grad_norm": 7.184804439544678, "learning_rate": 1.2498729684930626e-06, "loss": 0.3381, "step": 16650 }, { "epoch": 0.7723098330241187, "grad_norm": 11.554718971252441, "learning_rate": 1.2493862499074931e-06, "loss": 0.3771, "step": 16651 }, { "epoch": 0.7723562152133581, "grad_norm": 10.150965690612793, "learning_rate": 1.2488996125769943e-06, "loss": 0.3537, "step": 16652 }, { "epoch": 0.7724025974025974, "grad_norm": 10.534984588623047, "learning_rate": 1.248413056512105e-06, "loss": 0.4788, "step": 16653 }, { "epoch": 0.7724489795918368, "grad_norm": 6.889892101287842, "learning_rate": 1.2479265817233682e-06, "loss": 0.3482, "step": 16654 }, { "epoch": 0.772495361781076, "grad_norm": 4.870189189910889, "learning_rate": 1.2474401882213227e-06, "loss": 0.3032, "step": 16655 }, { "epoch": 0.7725417439703154, "grad_norm": 12.590248107910156, "learning_rate": 1.2469538760165062e-06, "loss": 0.4509, "step": 16656 }, { "epoch": 0.7725881261595547, "grad_norm": 5.74739408493042, "learning_rate": 1.2464676451194563e-06, "loss": 0.2022, "step": 16657 }, { "epoch": 0.7726345083487941, "grad_norm": 11.424164772033691, "learning_rate": 1.2459814955407034e-06, "loss": 0.3925, "step": 16658 }, { "epoch": 0.7726808905380333, "grad_norm": 8.27003002166748, "learning_rate": 1.2454954272907816e-06, "loss": 0.3871, "step": 16659 }, { "epoch": 0.7727272727272727, "grad_norm": 8.519051551818848, "learning_rate": 1.245009440380221e-06, "loss": 0.3799, "step": 16660 }, { "epoch": 0.7727736549165121, "grad_norm": 9.75299072265625, "learning_rate": 1.24452353481955e-06, "loss": 0.3173, "step": 16661 }, { "epoch": 0.7728200371057514, "grad_norm": 5.072320938110352, "learning_rate": 1.2440377106192974e-06, "loss": 0.2647, "step": 16662 }, { "epoch": 0.7728664192949907, "grad_norm": 11.699718475341797, "learning_rate": 1.2435519677899855e-06, "loss": 0.3393, "step": 16663 }, { "epoch": 0.77291280148423, "grad_norm": 8.462347030639648, "learning_rate": 1.2430663063421388e-06, "loss": 0.3554, "step": 16664 }, { "epoch": 0.7729591836734694, "grad_norm": 6.67138147354126, "learning_rate": 1.2425807262862793e-06, "loss": 0.3211, "step": 16665 }, { "epoch": 0.7730055658627087, "grad_norm": 4.778335094451904, "learning_rate": 1.2420952276329262e-06, "loss": 0.2799, "step": 16666 }, { "epoch": 0.7730519480519481, "grad_norm": 6.030011177062988, "learning_rate": 1.2416098103925995e-06, "loss": 0.251, "step": 16667 }, { "epoch": 0.7730983302411873, "grad_norm": 7.43488073348999, "learning_rate": 1.2411244745758123e-06, "loss": 0.3431, "step": 16668 }, { "epoch": 0.7731447124304267, "grad_norm": 4.240732192993164, "learning_rate": 1.2406392201930805e-06, "loss": 0.2559, "step": 16669 }, { "epoch": 0.7731910946196661, "grad_norm": 7.9672322273254395, "learning_rate": 1.2401540472549174e-06, "loss": 0.2709, "step": 16670 }, { "epoch": 0.7732374768089054, "grad_norm": 10.241787910461426, "learning_rate": 1.239668955771834e-06, "loss": 0.3818, "step": 16671 }, { "epoch": 0.7732838589981447, "grad_norm": 4.135161399841309, "learning_rate": 1.2391839457543392e-06, "loss": 0.371, "step": 16672 }, { "epoch": 0.773330241187384, "grad_norm": 9.919737815856934, "learning_rate": 1.2386990172129416e-06, "loss": 0.4006, "step": 16673 }, { "epoch": 0.7733766233766234, "grad_norm": 7.956252098083496, "learning_rate": 1.238214170158145e-06, "loss": 0.4191, "step": 16674 }, { "epoch": 0.7734230055658627, "grad_norm": 7.445956230163574, "learning_rate": 1.237729404600454e-06, "loss": 0.3003, "step": 16675 }, { "epoch": 0.773469387755102, "grad_norm": 4.716812610626221, "learning_rate": 1.2372447205503713e-06, "loss": 0.2465, "step": 16676 }, { "epoch": 0.7735157699443413, "grad_norm": 4.105802059173584, "learning_rate": 1.2367601180183968e-06, "loss": 0.2843, "step": 16677 }, { "epoch": 0.7735621521335807, "grad_norm": 6.826245307922363, "learning_rate": 1.2362755970150308e-06, "loss": 0.3174, "step": 16678 }, { "epoch": 0.7736085343228201, "grad_norm": 14.902454376220703, "learning_rate": 1.2357911575507676e-06, "loss": 0.4451, "step": 16679 }, { "epoch": 0.7736549165120594, "grad_norm": 5.169410228729248, "learning_rate": 1.2353067996361034e-06, "loss": 0.3259, "step": 16680 }, { "epoch": 0.7737012987012987, "grad_norm": 6.764974594116211, "learning_rate": 1.234822523281532e-06, "loss": 0.3076, "step": 16681 }, { "epoch": 0.773747680890538, "grad_norm": 6.821197986602783, "learning_rate": 1.2343383284975447e-06, "loss": 0.4125, "step": 16682 }, { "epoch": 0.7737940630797774, "grad_norm": 6.60980224609375, "learning_rate": 1.2338542152946326e-06, "loss": 0.3767, "step": 16683 }, { "epoch": 0.7738404452690167, "grad_norm": 4.603551864624023, "learning_rate": 1.2333701836832812e-06, "loss": 0.2326, "step": 16684 }, { "epoch": 0.773886827458256, "grad_norm": 5.4557881355285645, "learning_rate": 1.2328862336739778e-06, "loss": 0.3846, "step": 16685 }, { "epoch": 0.7739332096474953, "grad_norm": 7.0841145515441895, "learning_rate": 1.232402365277207e-06, "loss": 0.3396, "step": 16686 }, { "epoch": 0.7739795918367347, "grad_norm": 5.90203332901001, "learning_rate": 1.2319185785034527e-06, "loss": 0.2957, "step": 16687 }, { "epoch": 0.7740259740259741, "grad_norm": 9.070398330688477, "learning_rate": 1.2314348733631958e-06, "loss": 0.3582, "step": 16688 }, { "epoch": 0.7740723562152133, "grad_norm": 11.552786827087402, "learning_rate": 1.230951249866913e-06, "loss": 0.4704, "step": 16689 }, { "epoch": 0.7741187384044527, "grad_norm": 6.273526668548584, "learning_rate": 1.2304677080250837e-06, "loss": 0.2235, "step": 16690 }, { "epoch": 0.774165120593692, "grad_norm": 13.345059394836426, "learning_rate": 1.2299842478481832e-06, "loss": 0.3304, "step": 16691 }, { "epoch": 0.7742115027829314, "grad_norm": 6.944644451141357, "learning_rate": 1.2295008693466854e-06, "loss": 0.3394, "step": 16692 }, { "epoch": 0.7742578849721707, "grad_norm": 7.192424774169922, "learning_rate": 1.2290175725310643e-06, "loss": 0.3377, "step": 16693 }, { "epoch": 0.77430426716141, "grad_norm": 5.574396133422852, "learning_rate": 1.228534357411787e-06, "loss": 0.2686, "step": 16694 }, { "epoch": 0.7743506493506493, "grad_norm": 5.360097408294678, "learning_rate": 1.228051223999323e-06, "loss": 0.3048, "step": 16695 }, { "epoch": 0.7743970315398887, "grad_norm": 4.675765514373779, "learning_rate": 1.2275681723041406e-06, "loss": 0.3434, "step": 16696 }, { "epoch": 0.7744434137291281, "grad_norm": 6.612997531890869, "learning_rate": 1.2270852023367036e-06, "loss": 0.2835, "step": 16697 }, { "epoch": 0.7744897959183673, "grad_norm": 8.309085845947266, "learning_rate": 1.2266023141074768e-06, "loss": 0.3895, "step": 16698 }, { "epoch": 0.7745361781076067, "grad_norm": 12.492711067199707, "learning_rate": 1.2261195076269195e-06, "loss": 0.2946, "step": 16699 }, { "epoch": 0.774582560296846, "grad_norm": 4.61773157119751, "learning_rate": 1.2256367829054926e-06, "loss": 0.3359, "step": 16700 }, { "epoch": 0.7746289424860854, "grad_norm": 6.6531147956848145, "learning_rate": 1.2251541399536542e-06, "loss": 0.4114, "step": 16701 }, { "epoch": 0.7746753246753246, "grad_norm": 4.833830833435059, "learning_rate": 1.2246715787818596e-06, "loss": 0.3436, "step": 16702 }, { "epoch": 0.774721706864564, "grad_norm": 8.768560409545898, "learning_rate": 1.2241890994005661e-06, "loss": 0.3344, "step": 16703 }, { "epoch": 0.7747680890538033, "grad_norm": 10.128771781921387, "learning_rate": 1.2237067018202225e-06, "loss": 0.2908, "step": 16704 }, { "epoch": 0.7748144712430427, "grad_norm": 6.658135890960693, "learning_rate": 1.2232243860512816e-06, "loss": 0.3311, "step": 16705 }, { "epoch": 0.774860853432282, "grad_norm": 7.103257179260254, "learning_rate": 1.2227421521041926e-06, "loss": 0.3587, "step": 16706 }, { "epoch": 0.7749072356215213, "grad_norm": 8.630475044250488, "learning_rate": 1.222259999989402e-06, "loss": 0.3261, "step": 16707 }, { "epoch": 0.7749536178107607, "grad_norm": 3.3797903060913086, "learning_rate": 1.2217779297173577e-06, "loss": 0.2183, "step": 16708 }, { "epoch": 0.775, "grad_norm": 8.062724113464355, "learning_rate": 1.2212959412985004e-06, "loss": 0.2604, "step": 16709 }, { "epoch": 0.7750463821892394, "grad_norm": 7.996496200561523, "learning_rate": 1.2208140347432734e-06, "loss": 0.4387, "step": 16710 }, { "epoch": 0.7750927643784786, "grad_norm": 7.372075080871582, "learning_rate": 1.2203322100621167e-06, "loss": 0.3098, "step": 16711 }, { "epoch": 0.775139146567718, "grad_norm": 14.989319801330566, "learning_rate": 1.2198504672654694e-06, "loss": 0.5016, "step": 16712 }, { "epoch": 0.7751855287569573, "grad_norm": 4.348986625671387, "learning_rate": 1.2193688063637677e-06, "loss": 0.3333, "step": 16713 }, { "epoch": 0.7752319109461967, "grad_norm": 12.401355743408203, "learning_rate": 1.2188872273674484e-06, "loss": 0.4108, "step": 16714 }, { "epoch": 0.775278293135436, "grad_norm": 9.948476791381836, "learning_rate": 1.2184057302869412e-06, "loss": 0.4554, "step": 16715 }, { "epoch": 0.7753246753246753, "grad_norm": 5.3213090896606445, "learning_rate": 1.2179243151326797e-06, "loss": 0.29, "step": 16716 }, { "epoch": 0.7753710575139147, "grad_norm": 4.444154739379883, "learning_rate": 1.2174429819150928e-06, "loss": 0.3279, "step": 16717 }, { "epoch": 0.775417439703154, "grad_norm": 6.591198921203613, "learning_rate": 1.216961730644609e-06, "loss": 0.2786, "step": 16718 }, { "epoch": 0.7754638218923933, "grad_norm": 9.806111335754395, "learning_rate": 1.216480561331655e-06, "loss": 0.3675, "step": 16719 }, { "epoch": 0.7755102040816326, "grad_norm": 14.187800407409668, "learning_rate": 1.2159994739866525e-06, "loss": 0.348, "step": 16720 }, { "epoch": 0.775556586270872, "grad_norm": 6.764986991882324, "learning_rate": 1.2155184686200261e-06, "loss": 0.289, "step": 16721 }, { "epoch": 0.7756029684601113, "grad_norm": 10.528505325317383, "learning_rate": 1.2150375452421952e-06, "loss": 0.3209, "step": 16722 }, { "epoch": 0.7756493506493507, "grad_norm": 10.73316478729248, "learning_rate": 1.2145567038635803e-06, "loss": 0.3059, "step": 16723 }, { "epoch": 0.77569573283859, "grad_norm": 6.519837856292725, "learning_rate": 1.2140759444945977e-06, "loss": 0.3737, "step": 16724 }, { "epoch": 0.7757421150278293, "grad_norm": 6.127213478088379, "learning_rate": 1.213595267145664e-06, "loss": 0.2612, "step": 16725 }, { "epoch": 0.7757884972170687, "grad_norm": 6.165529251098633, "learning_rate": 1.2131146718271902e-06, "loss": 0.2448, "step": 16726 }, { "epoch": 0.775834879406308, "grad_norm": 5.436216354370117, "learning_rate": 1.2126341585495898e-06, "loss": 0.2387, "step": 16727 }, { "epoch": 0.7758812615955473, "grad_norm": 6.844618797302246, "learning_rate": 1.212153727323273e-06, "loss": 0.3874, "step": 16728 }, { "epoch": 0.7759276437847866, "grad_norm": 5.554354667663574, "learning_rate": 1.2116733781586475e-06, "loss": 0.3232, "step": 16729 }, { "epoch": 0.775974025974026, "grad_norm": 9.590981483459473, "learning_rate": 1.2111931110661213e-06, "loss": 0.3713, "step": 16730 }, { "epoch": 0.7760204081632653, "grad_norm": 6.230071544647217, "learning_rate": 1.2107129260560973e-06, "loss": 0.2696, "step": 16731 }, { "epoch": 0.7760667903525046, "grad_norm": 6.238188743591309, "learning_rate": 1.2102328231389787e-06, "loss": 0.3089, "step": 16732 }, { "epoch": 0.776113172541744, "grad_norm": 12.273880958557129, "learning_rate": 1.209752802325167e-06, "loss": 0.4739, "step": 16733 }, { "epoch": 0.7761595547309833, "grad_norm": 9.245728492736816, "learning_rate": 1.2092728636250618e-06, "loss": 0.3556, "step": 16734 }, { "epoch": 0.7762059369202227, "grad_norm": 9.071739196777344, "learning_rate": 1.2087930070490605e-06, "loss": 0.3719, "step": 16735 }, { "epoch": 0.776252319109462, "grad_norm": 4.5403733253479, "learning_rate": 1.2083132326075603e-06, "loss": 0.2609, "step": 16736 }, { "epoch": 0.7762987012987013, "grad_norm": 7.4067487716674805, "learning_rate": 1.2078335403109532e-06, "loss": 0.3158, "step": 16737 }, { "epoch": 0.7763450834879406, "grad_norm": 6.148570537567139, "learning_rate": 1.207353930169632e-06, "loss": 0.282, "step": 16738 }, { "epoch": 0.77639146567718, "grad_norm": 8.154955863952637, "learning_rate": 1.2068744021939872e-06, "loss": 0.401, "step": 16739 }, { "epoch": 0.7764378478664193, "grad_norm": 6.122918128967285, "learning_rate": 1.2063949563944077e-06, "loss": 0.3537, "step": 16740 }, { "epoch": 0.7764842300556586, "grad_norm": 5.31959342956543, "learning_rate": 1.2059155927812826e-06, "loss": 0.2153, "step": 16741 }, { "epoch": 0.7765306122448979, "grad_norm": 4.600619792938232, "learning_rate": 1.205436311364993e-06, "loss": 0.2748, "step": 16742 }, { "epoch": 0.7765769944341373, "grad_norm": 14.458212852478027, "learning_rate": 1.204957112155925e-06, "loss": 0.5413, "step": 16743 }, { "epoch": 0.7766233766233767, "grad_norm": 6.769741535186768, "learning_rate": 1.2044779951644586e-06, "loss": 0.3189, "step": 16744 }, { "epoch": 0.7766697588126159, "grad_norm": 11.940652847290039, "learning_rate": 1.2039989604009755e-06, "loss": 0.5372, "step": 16745 }, { "epoch": 0.7767161410018553, "grad_norm": 6.2052178382873535, "learning_rate": 1.2035200078758518e-06, "loss": 0.3515, "step": 16746 }, { "epoch": 0.7767625231910946, "grad_norm": 8.09423828125, "learning_rate": 1.2030411375994666e-06, "loss": 0.4839, "step": 16747 }, { "epoch": 0.776808905380334, "grad_norm": 14.498860359191895, "learning_rate": 1.2025623495821908e-06, "loss": 0.473, "step": 16748 }, { "epoch": 0.7768552875695733, "grad_norm": 7.263105392456055, "learning_rate": 1.2020836438343992e-06, "loss": 0.318, "step": 16749 }, { "epoch": 0.7769016697588126, "grad_norm": 9.8270845413208, "learning_rate": 1.2016050203664619e-06, "loss": 0.3053, "step": 16750 }, { "epoch": 0.7769480519480519, "grad_norm": 10.914932250976562, "learning_rate": 1.2011264791887484e-06, "loss": 0.3153, "step": 16751 }, { "epoch": 0.7769944341372913, "grad_norm": 5.502514839172363, "learning_rate": 1.2006480203116278e-06, "loss": 0.3745, "step": 16752 }, { "epoch": 0.7770408163265307, "grad_norm": 12.609424591064453, "learning_rate": 1.2001696437454623e-06, "loss": 0.4208, "step": 16753 }, { "epoch": 0.7770871985157699, "grad_norm": 10.374403953552246, "learning_rate": 1.1996913495006168e-06, "loss": 0.3973, "step": 16754 }, { "epoch": 0.7771335807050093, "grad_norm": 4.699737071990967, "learning_rate": 1.1992131375874544e-06, "loss": 0.3111, "step": 16755 }, { "epoch": 0.7771799628942486, "grad_norm": 4.993213653564453, "learning_rate": 1.1987350080163346e-06, "loss": 0.2868, "step": 16756 }, { "epoch": 0.777226345083488, "grad_norm": 7.558723449707031, "learning_rate": 1.1982569607976157e-06, "loss": 0.3023, "step": 16757 }, { "epoch": 0.7772727272727272, "grad_norm": 12.052549362182617, "learning_rate": 1.1977789959416558e-06, "loss": 0.3716, "step": 16758 }, { "epoch": 0.7773191094619666, "grad_norm": 4.3126678466796875, "learning_rate": 1.197301113458807e-06, "loss": 0.3124, "step": 16759 }, { "epoch": 0.7773654916512059, "grad_norm": 9.746685028076172, "learning_rate": 1.1968233133594243e-06, "loss": 0.3719, "step": 16760 }, { "epoch": 0.7774118738404453, "grad_norm": 6.9887871742248535, "learning_rate": 1.1963455956538584e-06, "loss": 0.2858, "step": 16761 }, { "epoch": 0.7774582560296845, "grad_norm": 9.622062683105469, "learning_rate": 1.1958679603524588e-06, "loss": 0.4445, "step": 16762 }, { "epoch": 0.7775046382189239, "grad_norm": 6.8464813232421875, "learning_rate": 1.1953904074655747e-06, "loss": 0.3085, "step": 16763 }, { "epoch": 0.7775510204081633, "grad_norm": 6.57352876663208, "learning_rate": 1.1949129370035495e-06, "loss": 0.3312, "step": 16764 }, { "epoch": 0.7775974025974026, "grad_norm": 6.837917804718018, "learning_rate": 1.1944355489767285e-06, "loss": 0.251, "step": 16765 }, { "epoch": 0.777643784786642, "grad_norm": 6.722517967224121, "learning_rate": 1.1939582433954544e-06, "loss": 0.2892, "step": 16766 }, { "epoch": 0.7776901669758812, "grad_norm": 7.051758289337158, "learning_rate": 1.193481020270067e-06, "loss": 0.2608, "step": 16767 }, { "epoch": 0.7777365491651206, "grad_norm": 6.3297295570373535, "learning_rate": 1.1930038796109073e-06, "loss": 0.2845, "step": 16768 }, { "epoch": 0.7777829313543599, "grad_norm": 12.848782539367676, "learning_rate": 1.1925268214283092e-06, "loss": 0.4003, "step": 16769 }, { "epoch": 0.7778293135435993, "grad_norm": 11.38128662109375, "learning_rate": 1.192049845732609e-06, "loss": 0.4414, "step": 16770 }, { "epoch": 0.7778756957328385, "grad_norm": 10.815421104431152, "learning_rate": 1.1915729525341401e-06, "loss": 0.2367, "step": 16771 }, { "epoch": 0.7779220779220779, "grad_norm": 6.093074321746826, "learning_rate": 1.1910961418432355e-06, "loss": 0.3279, "step": 16772 }, { "epoch": 0.7779684601113173, "grad_norm": 8.710209846496582, "learning_rate": 1.1906194136702232e-06, "loss": 0.4086, "step": 16773 }, { "epoch": 0.7780148423005566, "grad_norm": 5.323132514953613, "learning_rate": 1.1901427680254334e-06, "loss": 0.3689, "step": 16774 }, { "epoch": 0.7780612244897959, "grad_norm": 8.337545394897461, "learning_rate": 1.1896662049191898e-06, "loss": 0.2995, "step": 16775 }, { "epoch": 0.7781076066790352, "grad_norm": 6.429002285003662, "learning_rate": 1.1891897243618184e-06, "loss": 0.3727, "step": 16776 }, { "epoch": 0.7781539888682746, "grad_norm": 4.859106540679932, "learning_rate": 1.1887133263636414e-06, "loss": 0.283, "step": 16777 }, { "epoch": 0.7782003710575139, "grad_norm": 6.58425235748291, "learning_rate": 1.18823701093498e-06, "loss": 0.2429, "step": 16778 }, { "epoch": 0.7782467532467533, "grad_norm": 7.732376575469971, "learning_rate": 1.1877607780861544e-06, "loss": 0.344, "step": 16779 }, { "epoch": 0.7782931354359925, "grad_norm": 4.36270809173584, "learning_rate": 1.1872846278274797e-06, "loss": 0.2828, "step": 16780 }, { "epoch": 0.7783395176252319, "grad_norm": 4.4185590744018555, "learning_rate": 1.1868085601692725e-06, "loss": 0.253, "step": 16781 }, { "epoch": 0.7783858998144713, "grad_norm": 7.064274311065674, "learning_rate": 1.1863325751218468e-06, "loss": 0.2419, "step": 16782 }, { "epoch": 0.7784322820037106, "grad_norm": 6.332122802734375, "learning_rate": 1.185856672695514e-06, "loss": 0.3909, "step": 16783 }, { "epoch": 0.7784786641929499, "grad_norm": 11.023350715637207, "learning_rate": 1.1853808529005844e-06, "loss": 0.3955, "step": 16784 }, { "epoch": 0.7785250463821892, "grad_norm": 6.3513617515563965, "learning_rate": 1.1849051157473684e-06, "loss": 0.3083, "step": 16785 }, { "epoch": 0.7785714285714286, "grad_norm": 11.099102020263672, "learning_rate": 1.1844294612461699e-06, "loss": 0.3657, "step": 16786 }, { "epoch": 0.7786178107606679, "grad_norm": 9.000910758972168, "learning_rate": 1.1839538894072938e-06, "loss": 0.2996, "step": 16787 }, { "epoch": 0.7786641929499072, "grad_norm": 5.321194648742676, "learning_rate": 1.1834784002410444e-06, "loss": 0.369, "step": 16788 }, { "epoch": 0.7787105751391465, "grad_norm": 10.269047737121582, "learning_rate": 1.183002993757722e-06, "loss": 0.4606, "step": 16789 }, { "epoch": 0.7787569573283859, "grad_norm": 6.3622260093688965, "learning_rate": 1.1825276699676286e-06, "loss": 0.2403, "step": 16790 }, { "epoch": 0.7788033395176253, "grad_norm": 5.566903591156006, "learning_rate": 1.1820524288810575e-06, "loss": 0.3385, "step": 16791 }, { "epoch": 0.7788497217068646, "grad_norm": 5.937619686126709, "learning_rate": 1.1815772705083072e-06, "loss": 0.3378, "step": 16792 }, { "epoch": 0.7788961038961039, "grad_norm": 12.906754493713379, "learning_rate": 1.1811021948596712e-06, "loss": 0.4445, "step": 16793 }, { "epoch": 0.7789424860853432, "grad_norm": 10.342639923095703, "learning_rate": 1.1806272019454424e-06, "loss": 0.4482, "step": 16794 }, { "epoch": 0.7789888682745826, "grad_norm": 9.429065704345703, "learning_rate": 1.1801522917759101e-06, "loss": 0.3508, "step": 16795 }, { "epoch": 0.7790352504638219, "grad_norm": 5.182860374450684, "learning_rate": 1.1796774643613657e-06, "loss": 0.3089, "step": 16796 }, { "epoch": 0.7790816326530612, "grad_norm": 5.927778244018555, "learning_rate": 1.179202719712092e-06, "loss": 0.2624, "step": 16797 }, { "epoch": 0.7791280148423005, "grad_norm": 6.965412139892578, "learning_rate": 1.1787280578383763e-06, "loss": 0.3094, "step": 16798 }, { "epoch": 0.7791743970315399, "grad_norm": 4.625664234161377, "learning_rate": 1.1782534787505017e-06, "loss": 0.3366, "step": 16799 }, { "epoch": 0.7792207792207793, "grad_norm": 8.816572189331055, "learning_rate": 1.1777789824587498e-06, "loss": 0.3514, "step": 16800 }, { "epoch": 0.7792671614100185, "grad_norm": 7.696358680725098, "learning_rate": 1.1773045689734014e-06, "loss": 0.2592, "step": 16801 }, { "epoch": 0.7793135435992579, "grad_norm": 7.888433456420898, "learning_rate": 1.1768302383047325e-06, "loss": 0.2762, "step": 16802 }, { "epoch": 0.7793599257884972, "grad_norm": 7.645496845245361, "learning_rate": 1.1763559904630194e-06, "loss": 0.3809, "step": 16803 }, { "epoch": 0.7794063079777366, "grad_norm": 5.237127304077148, "learning_rate": 1.175881825458537e-06, "loss": 0.2718, "step": 16804 }, { "epoch": 0.7794526901669759, "grad_norm": 6.695702075958252, "learning_rate": 1.1754077433015582e-06, "loss": 0.2915, "step": 16805 }, { "epoch": 0.7794990723562152, "grad_norm": 6.42854642868042, "learning_rate": 1.1749337440023528e-06, "loss": 0.2358, "step": 16806 }, { "epoch": 0.7795454545454545, "grad_norm": 8.869132041931152, "learning_rate": 1.1744598275711922e-06, "loss": 0.332, "step": 16807 }, { "epoch": 0.7795918367346939, "grad_norm": 7.266677379608154, "learning_rate": 1.17398599401834e-06, "loss": 0.2614, "step": 16808 }, { "epoch": 0.7796382189239333, "grad_norm": 8.276910781860352, "learning_rate": 1.1735122433540635e-06, "loss": 0.3666, "step": 16809 }, { "epoch": 0.7796846011131725, "grad_norm": 8.024425506591797, "learning_rate": 1.173038575588626e-06, "loss": 0.4574, "step": 16810 }, { "epoch": 0.7797309833024119, "grad_norm": 10.741179466247559, "learning_rate": 1.1725649907322888e-06, "loss": 0.4364, "step": 16811 }, { "epoch": 0.7797773654916512, "grad_norm": 4.428831577301025, "learning_rate": 1.1720914887953144e-06, "loss": 0.3516, "step": 16812 }, { "epoch": 0.7798237476808906, "grad_norm": 5.407410621643066, "learning_rate": 1.171618069787957e-06, "loss": 0.2956, "step": 16813 }, { "epoch": 0.7798701298701298, "grad_norm": 6.100576400756836, "learning_rate": 1.171144733720475e-06, "loss": 0.2654, "step": 16814 }, { "epoch": 0.7799165120593692, "grad_norm": 9.636749267578125, "learning_rate": 1.1706714806031228e-06, "loss": 0.3219, "step": 16815 }, { "epoch": 0.7799628942486085, "grad_norm": 8.12435531616211, "learning_rate": 1.1701983104461535e-06, "loss": 0.355, "step": 16816 }, { "epoch": 0.7800092764378479, "grad_norm": 6.903215408325195, "learning_rate": 1.169725223259817e-06, "loss": 0.329, "step": 16817 }, { "epoch": 0.7800556586270871, "grad_norm": 7.090977668762207, "learning_rate": 1.1692522190543653e-06, "loss": 0.2972, "step": 16818 }, { "epoch": 0.7801020408163265, "grad_norm": 9.561989784240723, "learning_rate": 1.1687792978400425e-06, "loss": 0.3856, "step": 16819 }, { "epoch": 0.7801484230055659, "grad_norm": 9.221105575561523, "learning_rate": 1.1683064596270955e-06, "loss": 0.3038, "step": 16820 }, { "epoch": 0.7801948051948052, "grad_norm": 13.1904878616333, "learning_rate": 1.1678337044257676e-06, "loss": 0.3671, "step": 16821 }, { "epoch": 0.7802411873840446, "grad_norm": 7.501346111297607, "learning_rate": 1.1673610322463014e-06, "loss": 0.3147, "step": 16822 }, { "epoch": 0.7802875695732838, "grad_norm": 5.03059196472168, "learning_rate": 1.166888443098938e-06, "loss": 0.3216, "step": 16823 }, { "epoch": 0.7803339517625232, "grad_norm": 6.466979503631592, "learning_rate": 1.1664159369939137e-06, "loss": 0.2901, "step": 16824 }, { "epoch": 0.7803803339517625, "grad_norm": 6.657119274139404, "learning_rate": 1.165943513941466e-06, "loss": 0.313, "step": 16825 }, { "epoch": 0.7804267161410019, "grad_norm": 6.163316249847412, "learning_rate": 1.1654711739518298e-06, "loss": 0.3136, "step": 16826 }, { "epoch": 0.7804730983302411, "grad_norm": 7.111032962799072, "learning_rate": 1.1649989170352383e-06, "loss": 0.4834, "step": 16827 }, { "epoch": 0.7805194805194805, "grad_norm": 9.12658977508545, "learning_rate": 1.1645267432019224e-06, "loss": 0.5655, "step": 16828 }, { "epoch": 0.7805658627087199, "grad_norm": 11.148634910583496, "learning_rate": 1.1640546524621133e-06, "loss": 0.2694, "step": 16829 }, { "epoch": 0.7806122448979592, "grad_norm": 10.475601196289062, "learning_rate": 1.1635826448260351e-06, "loss": 0.3171, "step": 16830 }, { "epoch": 0.7806586270871985, "grad_norm": 5.544609069824219, "learning_rate": 1.1631107203039155e-06, "loss": 0.3013, "step": 16831 }, { "epoch": 0.7807050092764378, "grad_norm": 5.488629341125488, "learning_rate": 1.1626388789059783e-06, "loss": 0.2789, "step": 16832 }, { "epoch": 0.7807513914656772, "grad_norm": 4.382674217224121, "learning_rate": 1.1621671206424461e-06, "loss": 0.3085, "step": 16833 }, { "epoch": 0.7807977736549165, "grad_norm": 9.36635971069336, "learning_rate": 1.1616954455235402e-06, "loss": 0.4162, "step": 16834 }, { "epoch": 0.7808441558441559, "grad_norm": 9.900145530700684, "learning_rate": 1.1612238535594767e-06, "loss": 0.3627, "step": 16835 }, { "epoch": 0.7808905380333951, "grad_norm": 17.84332275390625, "learning_rate": 1.1607523447604735e-06, "loss": 0.3458, "step": 16836 }, { "epoch": 0.7809369202226345, "grad_norm": 8.368319511413574, "learning_rate": 1.160280919136746e-06, "loss": 0.3022, "step": 16837 }, { "epoch": 0.7809833024118739, "grad_norm": 5.928800582885742, "learning_rate": 1.1598095766985069e-06, "loss": 0.2119, "step": 16838 }, { "epoch": 0.7810296846011132, "grad_norm": 9.056618690490723, "learning_rate": 1.1593383174559674e-06, "loss": 0.4074, "step": 16839 }, { "epoch": 0.7810760667903525, "grad_norm": 7.7067999839782715, "learning_rate": 1.1588671414193397e-06, "loss": 0.3031, "step": 16840 }, { "epoch": 0.7811224489795918, "grad_norm": 5.372117042541504, "learning_rate": 1.1583960485988278e-06, "loss": 0.2021, "step": 16841 }, { "epoch": 0.7811688311688312, "grad_norm": 17.721355438232422, "learning_rate": 1.1579250390046392e-06, "loss": 0.3105, "step": 16842 }, { "epoch": 0.7812152133580705, "grad_norm": 8.037720680236816, "learning_rate": 1.1574541126469779e-06, "loss": 0.4417, "step": 16843 }, { "epoch": 0.7812615955473098, "grad_norm": 6.294075012207031, "learning_rate": 1.1569832695360466e-06, "loss": 0.2682, "step": 16844 }, { "epoch": 0.7813079777365491, "grad_norm": 23.936283111572266, "learning_rate": 1.1565125096820473e-06, "loss": 0.3183, "step": 16845 }, { "epoch": 0.7813543599257885, "grad_norm": 4.628954887390137, "learning_rate": 1.156041833095176e-06, "loss": 0.3065, "step": 16846 }, { "epoch": 0.7814007421150279, "grad_norm": 6.6472015380859375, "learning_rate": 1.1555712397856306e-06, "loss": 0.3407, "step": 16847 }, { "epoch": 0.7814471243042672, "grad_norm": 14.414196968078613, "learning_rate": 1.155100729763607e-06, "loss": 0.4536, "step": 16848 }, { "epoch": 0.7814935064935065, "grad_norm": 10.717597007751465, "learning_rate": 1.154630303039298e-06, "loss": 0.4693, "step": 16849 }, { "epoch": 0.7815398886827458, "grad_norm": 4.2278828620910645, "learning_rate": 1.1541599596228964e-06, "loss": 0.3168, "step": 16850 }, { "epoch": 0.7815862708719852, "grad_norm": 8.396462440490723, "learning_rate": 1.1536896995245894e-06, "loss": 0.3245, "step": 16851 }, { "epoch": 0.7816326530612245, "grad_norm": 11.081660270690918, "learning_rate": 1.1532195227545667e-06, "loss": 0.2918, "step": 16852 }, { "epoch": 0.7816790352504638, "grad_norm": 11.77114486694336, "learning_rate": 1.1527494293230135e-06, "loss": 0.4368, "step": 16853 }, { "epoch": 0.7817254174397031, "grad_norm": 5.918369293212891, "learning_rate": 1.1522794192401154e-06, "loss": 0.2954, "step": 16854 }, { "epoch": 0.7817717996289425, "grad_norm": 8.664925575256348, "learning_rate": 1.1518094925160538e-06, "loss": 0.3329, "step": 16855 }, { "epoch": 0.7818181818181819, "grad_norm": 7.452586650848389, "learning_rate": 1.1513396491610113e-06, "loss": 0.3205, "step": 16856 }, { "epoch": 0.7818645640074211, "grad_norm": 4.90958309173584, "learning_rate": 1.150869889185164e-06, "loss": 0.3266, "step": 16857 }, { "epoch": 0.7819109461966605, "grad_norm": 6.877789497375488, "learning_rate": 1.1504002125986903e-06, "loss": 0.2879, "step": 16858 }, { "epoch": 0.7819573283858998, "grad_norm": 10.420333862304688, "learning_rate": 1.1499306194117654e-06, "loss": 0.3738, "step": 16859 }, { "epoch": 0.7820037105751392, "grad_norm": 8.173105239868164, "learning_rate": 1.1494611096345632e-06, "loss": 0.2804, "step": 16860 }, { "epoch": 0.7820500927643784, "grad_norm": 9.221915245056152, "learning_rate": 1.1489916832772563e-06, "loss": 0.3877, "step": 16861 }, { "epoch": 0.7820964749536178, "grad_norm": 7.5568437576293945, "learning_rate": 1.1485223403500118e-06, "loss": 0.2876, "step": 16862 }, { "epoch": 0.7821428571428571, "grad_norm": 7.0911359786987305, "learning_rate": 1.1480530808629996e-06, "loss": 0.3253, "step": 16863 }, { "epoch": 0.7821892393320965, "grad_norm": 7.85189962387085, "learning_rate": 1.147583904826386e-06, "loss": 0.3686, "step": 16864 }, { "epoch": 0.7822356215213359, "grad_norm": 7.032832622528076, "learning_rate": 1.147114812250335e-06, "loss": 0.3192, "step": 16865 }, { "epoch": 0.7822820037105751, "grad_norm": 5.204726696014404, "learning_rate": 1.1466458031450088e-06, "loss": 0.2852, "step": 16866 }, { "epoch": 0.7823283858998145, "grad_norm": 11.195719718933105, "learning_rate": 1.1461768775205712e-06, "loss": 0.269, "step": 16867 }, { "epoch": 0.7823747680890538, "grad_norm": 10.892627716064453, "learning_rate": 1.145708035387177e-06, "loss": 0.3325, "step": 16868 }, { "epoch": 0.7824211502782932, "grad_norm": 6.249592304229736, "learning_rate": 1.1452392767549853e-06, "loss": 0.34, "step": 16869 }, { "epoch": 0.7824675324675324, "grad_norm": 7.07338809967041, "learning_rate": 1.1447706016341513e-06, "loss": 0.3346, "step": 16870 }, { "epoch": 0.7825139146567718, "grad_norm": 8.685198783874512, "learning_rate": 1.144302010034829e-06, "loss": 0.2588, "step": 16871 }, { "epoch": 0.7825602968460111, "grad_norm": 6.004518032073975, "learning_rate": 1.1438335019671715e-06, "loss": 0.2641, "step": 16872 }, { "epoch": 0.7826066790352505, "grad_norm": 9.055937767028809, "learning_rate": 1.1433650774413257e-06, "loss": 0.4218, "step": 16873 }, { "epoch": 0.7826530612244897, "grad_norm": 5.064850807189941, "learning_rate": 1.1428967364674414e-06, "loss": 0.2324, "step": 16874 }, { "epoch": 0.7826994434137291, "grad_norm": 4.358926773071289, "learning_rate": 1.142428479055665e-06, "loss": 0.3224, "step": 16875 }, { "epoch": 0.7827458256029685, "grad_norm": 7.646090507507324, "learning_rate": 1.141960305216141e-06, "loss": 0.3705, "step": 16876 }, { "epoch": 0.7827922077922078, "grad_norm": 6.347657203674316, "learning_rate": 1.1414922149590119e-06, "loss": 0.3322, "step": 16877 }, { "epoch": 0.7828385899814472, "grad_norm": 9.064406394958496, "learning_rate": 1.14102420829442e-06, "loss": 0.2841, "step": 16878 }, { "epoch": 0.7828849721706864, "grad_norm": 5.171232223510742, "learning_rate": 1.140556285232502e-06, "loss": 0.2627, "step": 16879 }, { "epoch": 0.7829313543599258, "grad_norm": 7.966856002807617, "learning_rate": 1.1400884457833966e-06, "loss": 0.3161, "step": 16880 }, { "epoch": 0.7829777365491651, "grad_norm": 4.179614067077637, "learning_rate": 1.1396206899572388e-06, "loss": 0.2725, "step": 16881 }, { "epoch": 0.7830241187384045, "grad_norm": 5.9062933921813965, "learning_rate": 1.139153017764163e-06, "loss": 0.2766, "step": 16882 }, { "epoch": 0.7830705009276437, "grad_norm": 9.173532485961914, "learning_rate": 1.1386854292143019e-06, "loss": 0.3778, "step": 16883 }, { "epoch": 0.7831168831168831, "grad_norm": 6.237879753112793, "learning_rate": 1.1382179243177826e-06, "loss": 0.2393, "step": 16884 }, { "epoch": 0.7831632653061225, "grad_norm": 6.162220478057861, "learning_rate": 1.1377505030847358e-06, "loss": 0.3777, "step": 16885 }, { "epoch": 0.7832096474953618, "grad_norm": 8.076255798339844, "learning_rate": 1.137283165525287e-06, "loss": 0.3271, "step": 16886 }, { "epoch": 0.7832560296846011, "grad_norm": 6.890034198760986, "learning_rate": 1.1368159116495608e-06, "loss": 0.2915, "step": 16887 }, { "epoch": 0.7833024118738404, "grad_norm": 5.959043502807617, "learning_rate": 1.1363487414676805e-06, "loss": 0.2956, "step": 16888 }, { "epoch": 0.7833487940630798, "grad_norm": 8.79442310333252, "learning_rate": 1.1358816549897677e-06, "loss": 0.2186, "step": 16889 }, { "epoch": 0.7833951762523191, "grad_norm": 8.266817092895508, "learning_rate": 1.13541465222594e-06, "loss": 0.3877, "step": 16890 }, { "epoch": 0.7834415584415585, "grad_norm": 7.243247985839844, "learning_rate": 1.134947733186315e-06, "loss": 0.3363, "step": 16891 }, { "epoch": 0.7834879406307977, "grad_norm": 6.154437065124512, "learning_rate": 1.134480897881009e-06, "loss": 0.2679, "step": 16892 }, { "epoch": 0.7835343228200371, "grad_norm": 12.570862770080566, "learning_rate": 1.134014146320136e-06, "loss": 0.3893, "step": 16893 }, { "epoch": 0.7835807050092765, "grad_norm": 8.81849479675293, "learning_rate": 1.1335474785138084e-06, "loss": 0.403, "step": 16894 }, { "epoch": 0.7836270871985158, "grad_norm": 7.975885391235352, "learning_rate": 1.1330808944721338e-06, "loss": 0.2908, "step": 16895 }, { "epoch": 0.7836734693877551, "grad_norm": 6.134786128997803, "learning_rate": 1.132614394205222e-06, "loss": 0.3247, "step": 16896 }, { "epoch": 0.7837198515769944, "grad_norm": 4.46138858795166, "learning_rate": 1.1321479777231798e-06, "loss": 0.2299, "step": 16897 }, { "epoch": 0.7837662337662338, "grad_norm": 8.547270774841309, "learning_rate": 1.1316816450361113e-06, "loss": 0.4627, "step": 16898 }, { "epoch": 0.7838126159554731, "grad_norm": 6.008488178253174, "learning_rate": 1.1312153961541196e-06, "loss": 0.3849, "step": 16899 }, { "epoch": 0.7838589981447124, "grad_norm": 6.395198822021484, "learning_rate": 1.1307492310873075e-06, "loss": 0.3309, "step": 16900 }, { "epoch": 0.7839053803339517, "grad_norm": 6.2965521812438965, "learning_rate": 1.1302831498457706e-06, "loss": 0.3961, "step": 16901 }, { "epoch": 0.7839517625231911, "grad_norm": 6.469820022583008, "learning_rate": 1.129817152439609e-06, "loss": 0.3304, "step": 16902 }, { "epoch": 0.7839981447124305, "grad_norm": 10.452348709106445, "learning_rate": 1.1293512388789168e-06, "loss": 0.4127, "step": 16903 }, { "epoch": 0.7840445269016698, "grad_norm": 8.127401351928711, "learning_rate": 1.128885409173789e-06, "loss": 0.3627, "step": 16904 }, { "epoch": 0.7840909090909091, "grad_norm": 7.7481231689453125, "learning_rate": 1.1284196633343175e-06, "loss": 0.4291, "step": 16905 }, { "epoch": 0.7841372912801484, "grad_norm": 4.436368465423584, "learning_rate": 1.1279540013705915e-06, "loss": 0.2797, "step": 16906 }, { "epoch": 0.7841836734693878, "grad_norm": 8.775248527526855, "learning_rate": 1.127488423292699e-06, "loss": 0.4021, "step": 16907 }, { "epoch": 0.7842300556586271, "grad_norm": 6.108771800994873, "learning_rate": 1.1270229291107276e-06, "loss": 0.2317, "step": 16908 }, { "epoch": 0.7842764378478664, "grad_norm": 7.986642837524414, "learning_rate": 1.1265575188347622e-06, "loss": 0.3747, "step": 16909 }, { "epoch": 0.7843228200371057, "grad_norm": 4.607533931732178, "learning_rate": 1.1260921924748847e-06, "loss": 0.279, "step": 16910 }, { "epoch": 0.7843692022263451, "grad_norm": 8.948751449584961, "learning_rate": 1.1256269500411777e-06, "loss": 0.4673, "step": 16911 }, { "epoch": 0.7844155844155845, "grad_norm": 13.237467765808105, "learning_rate": 1.125161791543718e-06, "loss": 0.413, "step": 16912 }, { "epoch": 0.7844619666048237, "grad_norm": 11.41307258605957, "learning_rate": 1.1246967169925843e-06, "loss": 0.2919, "step": 16913 }, { "epoch": 0.7845083487940631, "grad_norm": 5.228042125701904, "learning_rate": 1.1242317263978525e-06, "loss": 0.2891, "step": 16914 }, { "epoch": 0.7845547309833024, "grad_norm": 8.028655052185059, "learning_rate": 1.123766819769596e-06, "loss": 0.2966, "step": 16915 }, { "epoch": 0.7846011131725418, "grad_norm": 8.646430015563965, "learning_rate": 1.1233019971178882e-06, "loss": 0.3292, "step": 16916 }, { "epoch": 0.784647495361781, "grad_norm": 8.392059326171875, "learning_rate": 1.1228372584527964e-06, "loss": 0.298, "step": 16917 }, { "epoch": 0.7846938775510204, "grad_norm": 6.742813587188721, "learning_rate": 1.1223726037843907e-06, "loss": 0.2182, "step": 16918 }, { "epoch": 0.7847402597402597, "grad_norm": 4.526821613311768, "learning_rate": 1.1219080331227372e-06, "loss": 0.3153, "step": 16919 }, { "epoch": 0.7847866419294991, "grad_norm": 12.173691749572754, "learning_rate": 1.1214435464779006e-06, "loss": 0.4741, "step": 16920 }, { "epoch": 0.7848330241187385, "grad_norm": 4.941897869110107, "learning_rate": 1.1209791438599438e-06, "loss": 0.2228, "step": 16921 }, { "epoch": 0.7848794063079777, "grad_norm": 8.023594856262207, "learning_rate": 1.1205148252789294e-06, "loss": 0.2922, "step": 16922 }, { "epoch": 0.7849257884972171, "grad_norm": 6.3951311111450195, "learning_rate": 1.1200505907449134e-06, "loss": 0.3494, "step": 16923 }, { "epoch": 0.7849721706864564, "grad_norm": 10.641438484191895, "learning_rate": 1.1195864402679553e-06, "loss": 0.4224, "step": 16924 }, { "epoch": 0.7850185528756958, "grad_norm": 6.293806076049805, "learning_rate": 1.1191223738581098e-06, "loss": 0.3189, "step": 16925 }, { "epoch": 0.785064935064935, "grad_norm": 5.830636978149414, "learning_rate": 1.1186583915254312e-06, "loss": 0.3345, "step": 16926 }, { "epoch": 0.7851113172541744, "grad_norm": 4.380958080291748, "learning_rate": 1.1181944932799732e-06, "loss": 0.3069, "step": 16927 }, { "epoch": 0.7851576994434137, "grad_norm": 9.455429077148438, "learning_rate": 1.1177306791317821e-06, "loss": 0.2156, "step": 16928 }, { "epoch": 0.7852040816326531, "grad_norm": 5.982260227203369, "learning_rate": 1.1172669490909088e-06, "loss": 0.3205, "step": 16929 }, { "epoch": 0.7852504638218923, "grad_norm": 7.30472469329834, "learning_rate": 1.1168033031673986e-06, "loss": 0.3518, "step": 16930 }, { "epoch": 0.7852968460111317, "grad_norm": 6.40652322769165, "learning_rate": 1.1163397413712967e-06, "loss": 0.3795, "step": 16931 }, { "epoch": 0.7853432282003711, "grad_norm": 13.502339363098145, "learning_rate": 1.1158762637126463e-06, "loss": 0.3076, "step": 16932 }, { "epoch": 0.7853896103896104, "grad_norm": 3.934401512145996, "learning_rate": 1.1154128702014893e-06, "loss": 0.232, "step": 16933 }, { "epoch": 0.7854359925788498, "grad_norm": 3.4578628540039062, "learning_rate": 1.1149495608478617e-06, "loss": 0.2433, "step": 16934 }, { "epoch": 0.785482374768089, "grad_norm": 6.19326114654541, "learning_rate": 1.114486335661803e-06, "loss": 0.3009, "step": 16935 }, { "epoch": 0.7855287569573284, "grad_norm": 6.557425498962402, "learning_rate": 1.1140231946533486e-06, "loss": 0.2864, "step": 16936 }, { "epoch": 0.7855751391465677, "grad_norm": 8.021220207214355, "learning_rate": 1.1135601378325317e-06, "loss": 0.3149, "step": 16937 }, { "epoch": 0.7856215213358071, "grad_norm": 4.5419697761535645, "learning_rate": 1.1130971652093863e-06, "loss": 0.3063, "step": 16938 }, { "epoch": 0.7856679035250463, "grad_norm": 5.71344518661499, "learning_rate": 1.112634276793939e-06, "loss": 0.3538, "step": 16939 }, { "epoch": 0.7857142857142857, "grad_norm": 5.271859645843506, "learning_rate": 1.1121714725962197e-06, "loss": 0.3376, "step": 16940 }, { "epoch": 0.7857606679035251, "grad_norm": 4.696167945861816, "learning_rate": 1.111708752626255e-06, "loss": 0.3225, "step": 16941 }, { "epoch": 0.7858070500927644, "grad_norm": 7.610599517822266, "learning_rate": 1.1112461168940692e-06, "loss": 0.3835, "step": 16942 }, { "epoch": 0.7858534322820037, "grad_norm": 7.38527774810791, "learning_rate": 1.1107835654096866e-06, "loss": 0.2726, "step": 16943 }, { "epoch": 0.785899814471243, "grad_norm": 4.535241603851318, "learning_rate": 1.1103210981831252e-06, "loss": 0.2527, "step": 16944 }, { "epoch": 0.7859461966604824, "grad_norm": 7.126676082611084, "learning_rate": 1.1098587152244061e-06, "loss": 0.3644, "step": 16945 }, { "epoch": 0.7859925788497217, "grad_norm": 4.876781463623047, "learning_rate": 1.1093964165435455e-06, "loss": 0.2803, "step": 16946 }, { "epoch": 0.7860389610389611, "grad_norm": 10.514708518981934, "learning_rate": 1.10893420215056e-06, "loss": 0.3414, "step": 16947 }, { "epoch": 0.7860853432282003, "grad_norm": 5.431373596191406, "learning_rate": 1.1084720720554637e-06, "loss": 0.3071, "step": 16948 }, { "epoch": 0.7861317254174397, "grad_norm": 9.013734817504883, "learning_rate": 1.1080100262682663e-06, "loss": 0.2793, "step": 16949 }, { "epoch": 0.786178107606679, "grad_norm": 5.921809196472168, "learning_rate": 1.1075480647989783e-06, "loss": 0.3154, "step": 16950 }, { "epoch": 0.7862244897959184, "grad_norm": 7.6287713050842285, "learning_rate": 1.107086187657609e-06, "loss": 0.4088, "step": 16951 }, { "epoch": 0.7862708719851577, "grad_norm": 12.157112121582031, "learning_rate": 1.1066243948541638e-06, "loss": 0.4136, "step": 16952 }, { "epoch": 0.786317254174397, "grad_norm": 7.562524318695068, "learning_rate": 1.1061626863986491e-06, "loss": 0.3852, "step": 16953 }, { "epoch": 0.7863636363636364, "grad_norm": 11.845856666564941, "learning_rate": 1.1057010623010644e-06, "loss": 0.4283, "step": 16954 }, { "epoch": 0.7864100185528757, "grad_norm": 7.739737033843994, "learning_rate": 1.1052395225714125e-06, "loss": 0.3387, "step": 16955 }, { "epoch": 0.786456400742115, "grad_norm": 9.812270164489746, "learning_rate": 1.1047780672196923e-06, "loss": 0.3162, "step": 16956 }, { "epoch": 0.7865027829313543, "grad_norm": 5.494509220123291, "learning_rate": 1.1043166962559005e-06, "loss": 0.1955, "step": 16957 }, { "epoch": 0.7865491651205937, "grad_norm": 10.904314041137695, "learning_rate": 1.103855409690034e-06, "loss": 0.3875, "step": 16958 }, { "epoch": 0.786595547309833, "grad_norm": 7.237059593200684, "learning_rate": 1.103394207532083e-06, "loss": 0.3631, "step": 16959 }, { "epoch": 0.7866419294990723, "grad_norm": 9.618309020996094, "learning_rate": 1.102933089792042e-06, "loss": 0.3903, "step": 16960 }, { "epoch": 0.7866883116883117, "grad_norm": 4.105321407318115, "learning_rate": 1.1024720564799002e-06, "loss": 0.2885, "step": 16961 }, { "epoch": 0.786734693877551, "grad_norm": 8.770333290100098, "learning_rate": 1.1020111076056457e-06, "loss": 0.324, "step": 16962 }, { "epoch": 0.7867810760667904, "grad_norm": 15.63424015045166, "learning_rate": 1.1015502431792652e-06, "loss": 0.424, "step": 16963 }, { "epoch": 0.7868274582560297, "grad_norm": 11.152569770812988, "learning_rate": 1.1010894632107415e-06, "loss": 0.5229, "step": 16964 }, { "epoch": 0.786873840445269, "grad_norm": 7.885020732879639, "learning_rate": 1.1006287677100586e-06, "loss": 0.3884, "step": 16965 }, { "epoch": 0.7869202226345083, "grad_norm": 12.561994552612305, "learning_rate": 1.1001681566871959e-06, "loss": 0.2293, "step": 16966 }, { "epoch": 0.7869666048237477, "grad_norm": 8.98404598236084, "learning_rate": 1.0997076301521337e-06, "loss": 0.4, "step": 16967 }, { "epoch": 0.787012987012987, "grad_norm": 7.601101875305176, "learning_rate": 1.0992471881148497e-06, "loss": 0.3733, "step": 16968 }, { "epoch": 0.7870593692022263, "grad_norm": 13.549925804138184, "learning_rate": 1.0987868305853168e-06, "loss": 0.4617, "step": 16969 }, { "epoch": 0.7871057513914657, "grad_norm": 8.981071472167969, "learning_rate": 1.098326557573509e-06, "loss": 0.3397, "step": 16970 }, { "epoch": 0.787152133580705, "grad_norm": 8.942989349365234, "learning_rate": 1.0978663690893988e-06, "loss": 0.304, "step": 16971 }, { "epoch": 0.7871985157699444, "grad_norm": 7.25160026550293, "learning_rate": 1.0974062651429557e-06, "loss": 0.2336, "step": 16972 }, { "epoch": 0.7872448979591836, "grad_norm": 6.486123085021973, "learning_rate": 1.0969462457441483e-06, "loss": 0.2401, "step": 16973 }, { "epoch": 0.787291280148423, "grad_norm": 16.969816207885742, "learning_rate": 1.096486310902941e-06, "loss": 0.3597, "step": 16974 }, { "epoch": 0.7873376623376623, "grad_norm": 7.007887363433838, "learning_rate": 1.0960264606292987e-06, "loss": 0.2558, "step": 16975 }, { "epoch": 0.7873840445269017, "grad_norm": 5.2199506759643555, "learning_rate": 1.0955666949331839e-06, "loss": 0.196, "step": 16976 }, { "epoch": 0.787430426716141, "grad_norm": 9.956708908081055, "learning_rate": 1.0951070138245574e-06, "loss": 0.2851, "step": 16977 }, { "epoch": 0.7874768089053803, "grad_norm": 5.7548112869262695, "learning_rate": 1.0946474173133791e-06, "loss": 0.3215, "step": 16978 }, { "epoch": 0.7875231910946197, "grad_norm": 8.962109565734863, "learning_rate": 1.0941879054096032e-06, "loss": 0.351, "step": 16979 }, { "epoch": 0.787569573283859, "grad_norm": 8.469538688659668, "learning_rate": 1.0937284781231865e-06, "loss": 0.3829, "step": 16980 }, { "epoch": 0.7876159554730984, "grad_norm": 7.823869705200195, "learning_rate": 1.0932691354640817e-06, "loss": 0.2662, "step": 16981 }, { "epoch": 0.7876623376623376, "grad_norm": 5.382328987121582, "learning_rate": 1.0928098774422409e-06, "loss": 0.3932, "step": 16982 }, { "epoch": 0.787708719851577, "grad_norm": 8.983609199523926, "learning_rate": 1.092350704067614e-06, "loss": 0.3098, "step": 16983 }, { "epoch": 0.7877551020408163, "grad_norm": 4.370254993438721, "learning_rate": 1.091891615350147e-06, "loss": 0.2193, "step": 16984 }, { "epoch": 0.7878014842300557, "grad_norm": 11.486715316772461, "learning_rate": 1.0914326112997868e-06, "loss": 0.4047, "step": 16985 }, { "epoch": 0.7878478664192949, "grad_norm": 7.57204532623291, "learning_rate": 1.090973691926478e-06, "loss": 0.3117, "step": 16986 }, { "epoch": 0.7878942486085343, "grad_norm": 8.911211967468262, "learning_rate": 1.090514857240162e-06, "loss": 0.3194, "step": 16987 }, { "epoch": 0.7879406307977737, "grad_norm": 11.332881927490234, "learning_rate": 1.0900561072507803e-06, "loss": 0.2629, "step": 16988 }, { "epoch": 0.787987012987013, "grad_norm": 8.647578239440918, "learning_rate": 1.0895974419682703e-06, "loss": 0.2986, "step": 16989 }, { "epoch": 0.7880333951762524, "grad_norm": 5.988708019256592, "learning_rate": 1.0891388614025688e-06, "loss": 0.3178, "step": 16990 }, { "epoch": 0.7880797773654916, "grad_norm": 9.88904857635498, "learning_rate": 1.0886803655636114e-06, "loss": 0.4364, "step": 16991 }, { "epoch": 0.788126159554731, "grad_norm": 5.215721607208252, "learning_rate": 1.0882219544613304e-06, "loss": 0.3, "step": 16992 }, { "epoch": 0.7881725417439703, "grad_norm": 8.380533218383789, "learning_rate": 1.087763628105659e-06, "loss": 0.314, "step": 16993 }, { "epoch": 0.7882189239332097, "grad_norm": 8.387413024902344, "learning_rate": 1.0873053865065242e-06, "loss": 0.2434, "step": 16994 }, { "epoch": 0.7882653061224489, "grad_norm": 7.398128509521484, "learning_rate": 1.0868472296738541e-06, "loss": 0.3467, "step": 16995 }, { "epoch": 0.7883116883116883, "grad_norm": 6.265561103820801, "learning_rate": 1.086389157617575e-06, "loss": 0.2744, "step": 16996 }, { "epoch": 0.7883580705009277, "grad_norm": 6.884929180145264, "learning_rate": 1.0859311703476105e-06, "loss": 0.3363, "step": 16997 }, { "epoch": 0.788404452690167, "grad_norm": 8.19482421875, "learning_rate": 1.0854732678738838e-06, "loss": 0.298, "step": 16998 }, { "epoch": 0.7884508348794063, "grad_norm": 6.773850917816162, "learning_rate": 1.0850154502063132e-06, "loss": 0.2847, "step": 16999 }, { "epoch": 0.7884972170686456, "grad_norm": 5.871165752410889, "learning_rate": 1.0845577173548172e-06, "loss": 0.3282, "step": 17000 }, { "epoch": 0.788543599257885, "grad_norm": 7.67881441116333, "learning_rate": 1.0841000693293136e-06, "loss": 0.3217, "step": 17001 }, { "epoch": 0.7885899814471243, "grad_norm": 18.29981231689453, "learning_rate": 1.0836425061397165e-06, "loss": 0.4907, "step": 17002 }, { "epoch": 0.7886363636363637, "grad_norm": 6.073060035705566, "learning_rate": 1.0831850277959387e-06, "loss": 0.3158, "step": 17003 }, { "epoch": 0.7886827458256029, "grad_norm": 6.922381401062012, "learning_rate": 1.0827276343078936e-06, "loss": 0.2225, "step": 17004 }, { "epoch": 0.7887291280148423, "grad_norm": 5.110634803771973, "learning_rate": 1.0822703256854856e-06, "loss": 0.295, "step": 17005 }, { "epoch": 0.7887755102040817, "grad_norm": 5.3767194747924805, "learning_rate": 1.081813101938625e-06, "loss": 0.3017, "step": 17006 }, { "epoch": 0.788821892393321, "grad_norm": 10.38625717163086, "learning_rate": 1.0813559630772174e-06, "loss": 0.2333, "step": 17007 }, { "epoch": 0.7888682745825603, "grad_norm": 6.58328914642334, "learning_rate": 1.0808989091111655e-06, "loss": 0.3573, "step": 17008 }, { "epoch": 0.7889146567717996, "grad_norm": 4.534702301025391, "learning_rate": 1.080441940050373e-06, "loss": 0.251, "step": 17009 }, { "epoch": 0.788961038961039, "grad_norm": 8.975259780883789, "learning_rate": 1.079985055904737e-06, "loss": 0.2636, "step": 17010 }, { "epoch": 0.7890074211502783, "grad_norm": 6.732227325439453, "learning_rate": 1.0795282566841574e-06, "loss": 0.3485, "step": 17011 }, { "epoch": 0.7890538033395176, "grad_norm": 10.371502876281738, "learning_rate": 1.0790715423985303e-06, "loss": 0.3983, "step": 17012 }, { "epoch": 0.7891001855287569, "grad_norm": 9.569063186645508, "learning_rate": 1.0786149130577506e-06, "loss": 0.3534, "step": 17013 }, { "epoch": 0.7891465677179963, "grad_norm": 8.552412986755371, "learning_rate": 1.0781583686717101e-06, "loss": 0.4081, "step": 17014 }, { "epoch": 0.7891929499072357, "grad_norm": 7.774197101593018, "learning_rate": 1.0777019092503011e-06, "loss": 0.2963, "step": 17015 }, { "epoch": 0.7892393320964749, "grad_norm": 6.025627613067627, "learning_rate": 1.07724553480341e-06, "loss": 0.3226, "step": 17016 }, { "epoch": 0.7892857142857143, "grad_norm": 8.318144798278809, "learning_rate": 1.0767892453409257e-06, "loss": 0.2205, "step": 17017 }, { "epoch": 0.7893320964749536, "grad_norm": 5.341972351074219, "learning_rate": 1.076333040872733e-06, "loss": 0.2938, "step": 17018 }, { "epoch": 0.789378478664193, "grad_norm": 9.21557331085205, "learning_rate": 1.0758769214087156e-06, "loss": 0.3094, "step": 17019 }, { "epoch": 0.7894248608534323, "grad_norm": 8.528914451599121, "learning_rate": 1.0754208869587562e-06, "loss": 0.2917, "step": 17020 }, { "epoch": 0.7894712430426716, "grad_norm": 10.739542961120605, "learning_rate": 1.0749649375327321e-06, "loss": 0.3636, "step": 17021 }, { "epoch": 0.7895176252319109, "grad_norm": 9.099578857421875, "learning_rate": 1.0745090731405222e-06, "loss": 0.3594, "step": 17022 }, { "epoch": 0.7895640074211503, "grad_norm": 6.251907825469971, "learning_rate": 1.0740532937920028e-06, "loss": 0.2978, "step": 17023 }, { "epoch": 0.7896103896103897, "grad_norm": 6.868581771850586, "learning_rate": 1.0735975994970477e-06, "loss": 0.2995, "step": 17024 }, { "epoch": 0.7896567717996289, "grad_norm": 7.000436305999756, "learning_rate": 1.0731419902655315e-06, "loss": 0.2329, "step": 17025 }, { "epoch": 0.7897031539888683, "grad_norm": 7.124391078948975, "learning_rate": 1.0726864661073216e-06, "loss": 0.2618, "step": 17026 }, { "epoch": 0.7897495361781076, "grad_norm": 7.749538421630859, "learning_rate": 1.0722310270322877e-06, "loss": 0.3159, "step": 17027 }, { "epoch": 0.789795918367347, "grad_norm": 3.9235639572143555, "learning_rate": 1.071775673050297e-06, "loss": 0.2709, "step": 17028 }, { "epoch": 0.7898423005565862, "grad_norm": 5.78243350982666, "learning_rate": 1.0713204041712145e-06, "loss": 0.3268, "step": 17029 }, { "epoch": 0.7898886827458256, "grad_norm": 14.515228271484375, "learning_rate": 1.070865220404903e-06, "loss": 0.3977, "step": 17030 }, { "epoch": 0.7899350649350649, "grad_norm": 12.48491382598877, "learning_rate": 1.0704101217612257e-06, "loss": 0.3309, "step": 17031 }, { "epoch": 0.7899814471243043, "grad_norm": 16.831409454345703, "learning_rate": 1.0699551082500387e-06, "loss": 0.2908, "step": 17032 }, { "epoch": 0.7900278293135437, "grad_norm": 4.957341194152832, "learning_rate": 1.0695001798812016e-06, "loss": 0.3037, "step": 17033 }, { "epoch": 0.7900742115027829, "grad_norm": 6.171204566955566, "learning_rate": 1.0690453366645704e-06, "loss": 0.1779, "step": 17034 }, { "epoch": 0.7901205936920223, "grad_norm": 5.4864301681518555, "learning_rate": 1.0685905786099981e-06, "loss": 0.2653, "step": 17035 }, { "epoch": 0.7901669758812616, "grad_norm": 10.81390380859375, "learning_rate": 1.0681359057273388e-06, "loss": 0.3353, "step": 17036 }, { "epoch": 0.790213358070501, "grad_norm": 6.754603385925293, "learning_rate": 1.06768131802644e-06, "loss": 0.2868, "step": 17037 }, { "epoch": 0.7902597402597402, "grad_norm": 11.290898323059082, "learning_rate": 1.0672268155171516e-06, "loss": 0.3779, "step": 17038 }, { "epoch": 0.7903061224489796, "grad_norm": 6.935927391052246, "learning_rate": 1.0667723982093198e-06, "loss": 0.1836, "step": 17039 }, { "epoch": 0.7903525046382189, "grad_norm": 4.443467140197754, "learning_rate": 1.0663180661127892e-06, "loss": 0.2973, "step": 17040 }, { "epoch": 0.7903988868274583, "grad_norm": 7.70661735534668, "learning_rate": 1.0658638192374028e-06, "loss": 0.3724, "step": 17041 }, { "epoch": 0.7904452690166975, "grad_norm": 5.522702217102051, "learning_rate": 1.0654096575930035e-06, "loss": 0.2809, "step": 17042 }, { "epoch": 0.7904916512059369, "grad_norm": 7.2543625831604, "learning_rate": 1.064955581189427e-06, "loss": 0.3532, "step": 17043 }, { "epoch": 0.7905380333951763, "grad_norm": 7.2824177742004395, "learning_rate": 1.0645015900365124e-06, "loss": 0.2897, "step": 17044 }, { "epoch": 0.7905844155844156, "grad_norm": 13.478421211242676, "learning_rate": 1.064047684144095e-06, "loss": 0.4525, "step": 17045 }, { "epoch": 0.790630797773655, "grad_norm": 9.58739185333252, "learning_rate": 1.063593863522009e-06, "loss": 0.268, "step": 17046 }, { "epoch": 0.7906771799628942, "grad_norm": 7.369647026062012, "learning_rate": 1.0631401281800863e-06, "loss": 0.3543, "step": 17047 }, { "epoch": 0.7907235621521336, "grad_norm": 6.526613712310791, "learning_rate": 1.0626864781281553e-06, "loss": 0.3125, "step": 17048 }, { "epoch": 0.7907699443413729, "grad_norm": 6.183207035064697, "learning_rate": 1.062232913376045e-06, "loss": 0.31, "step": 17049 }, { "epoch": 0.7908163265306123, "grad_norm": 11.167011260986328, "learning_rate": 1.061779433933582e-06, "loss": 0.2842, "step": 17050 }, { "epoch": 0.7908627087198515, "grad_norm": 6.118356704711914, "learning_rate": 1.0613260398105902e-06, "loss": 0.2456, "step": 17051 }, { "epoch": 0.7909090909090909, "grad_norm": 10.529454231262207, "learning_rate": 1.0608727310168921e-06, "loss": 0.3284, "step": 17052 }, { "epoch": 0.7909554730983303, "grad_norm": 20.058115005493164, "learning_rate": 1.0604195075623098e-06, "loss": 0.3994, "step": 17053 }, { "epoch": 0.7910018552875696, "grad_norm": 13.604068756103516, "learning_rate": 1.0599663694566597e-06, "loss": 0.4315, "step": 17054 }, { "epoch": 0.7910482374768089, "grad_norm": 7.599280834197998, "learning_rate": 1.0595133167097605e-06, "loss": 0.4014, "step": 17055 }, { "epoch": 0.7910946196660482, "grad_norm": 5.177025318145752, "learning_rate": 1.0590603493314266e-06, "loss": 0.2718, "step": 17056 }, { "epoch": 0.7911410018552876, "grad_norm": 10.811457633972168, "learning_rate": 1.0586074673314717e-06, "loss": 0.312, "step": 17057 }, { "epoch": 0.7911873840445269, "grad_norm": 5.233348369598389, "learning_rate": 1.0581546707197082e-06, "loss": 0.2933, "step": 17058 }, { "epoch": 0.7912337662337663, "grad_norm": 4.415653705596924, "learning_rate": 1.0577019595059434e-06, "loss": 0.2415, "step": 17059 }, { "epoch": 0.7912801484230055, "grad_norm": 7.863639831542969, "learning_rate": 1.0572493336999867e-06, "loss": 0.3119, "step": 17060 }, { "epoch": 0.7913265306122449, "grad_norm": 7.654885768890381, "learning_rate": 1.056796793311643e-06, "loss": 0.2907, "step": 17061 }, { "epoch": 0.7913729128014843, "grad_norm": 9.212034225463867, "learning_rate": 1.0563443383507172e-06, "loss": 0.2314, "step": 17062 }, { "epoch": 0.7914192949907236, "grad_norm": 10.625725746154785, "learning_rate": 1.0558919688270114e-06, "loss": 0.5058, "step": 17063 }, { "epoch": 0.7914656771799629, "grad_norm": 8.422205924987793, "learning_rate": 1.0554396847503272e-06, "loss": 0.3058, "step": 17064 }, { "epoch": 0.7915120593692022, "grad_norm": 4.187530517578125, "learning_rate": 1.0549874861304604e-06, "loss": 0.3405, "step": 17065 }, { "epoch": 0.7915584415584416, "grad_norm": 11.186050415039062, "learning_rate": 1.0545353729772084e-06, "loss": 0.3917, "step": 17066 }, { "epoch": 0.7916048237476809, "grad_norm": 8.190337181091309, "learning_rate": 1.0540833453003668e-06, "loss": 0.3658, "step": 17067 }, { "epoch": 0.7916512059369202, "grad_norm": 4.227856636047363, "learning_rate": 1.0536314031097283e-06, "loss": 0.2334, "step": 17068 }, { "epoch": 0.7916975881261595, "grad_norm": 5.076483726501465, "learning_rate": 1.0531795464150856e-06, "loss": 0.314, "step": 17069 }, { "epoch": 0.7917439703153989, "grad_norm": 10.719170570373535, "learning_rate": 1.0527277752262249e-06, "loss": 0.3318, "step": 17070 }, { "epoch": 0.7917903525046383, "grad_norm": 11.416699409484863, "learning_rate": 1.0522760895529343e-06, "loss": 0.3889, "step": 17071 }, { "epoch": 0.7918367346938775, "grad_norm": 8.526559829711914, "learning_rate": 1.0518244894050006e-06, "loss": 0.3797, "step": 17072 }, { "epoch": 0.7918831168831169, "grad_norm": 6.100981712341309, "learning_rate": 1.0513729747922068e-06, "loss": 0.3347, "step": 17073 }, { "epoch": 0.7919294990723562, "grad_norm": 9.963894844055176, "learning_rate": 1.0509215457243348e-06, "loss": 0.42, "step": 17074 }, { "epoch": 0.7919758812615956, "grad_norm": 6.548031330108643, "learning_rate": 1.0504702022111662e-06, "loss": 0.366, "step": 17075 }, { "epoch": 0.7920222634508349, "grad_norm": 6.6904449462890625, "learning_rate": 1.0500189442624758e-06, "loss": 0.3743, "step": 17076 }, { "epoch": 0.7920686456400742, "grad_norm": 10.67760181427002, "learning_rate": 1.0495677718880416e-06, "loss": 0.2641, "step": 17077 }, { "epoch": 0.7921150278293135, "grad_norm": 5.9440789222717285, "learning_rate": 1.049116685097638e-06, "loss": 0.3442, "step": 17078 }, { "epoch": 0.7921614100185529, "grad_norm": 3.9659199714660645, "learning_rate": 1.0486656839010378e-06, "loss": 0.2228, "step": 17079 }, { "epoch": 0.7922077922077922, "grad_norm": 9.85804271697998, "learning_rate": 1.0482147683080125e-06, "loss": 0.2834, "step": 17080 }, { "epoch": 0.7922541743970315, "grad_norm": 5.524001598358154, "learning_rate": 1.0477639383283288e-06, "loss": 0.3565, "step": 17081 }, { "epoch": 0.7923005565862709, "grad_norm": 10.853560447692871, "learning_rate": 1.0473131939717545e-06, "loss": 0.4067, "step": 17082 }, { "epoch": 0.7923469387755102, "grad_norm": 7.672569274902344, "learning_rate": 1.0468625352480555e-06, "loss": 0.404, "step": 17083 }, { "epoch": 0.7923933209647496, "grad_norm": 7.2890777587890625, "learning_rate": 1.0464119621669939e-06, "loss": 0.2673, "step": 17084 }, { "epoch": 0.7924397031539888, "grad_norm": 10.332001686096191, "learning_rate": 1.0459614747383324e-06, "loss": 0.4192, "step": 17085 }, { "epoch": 0.7924860853432282, "grad_norm": 10.542430877685547, "learning_rate": 1.0455110729718309e-06, "loss": 0.3665, "step": 17086 }, { "epoch": 0.7925324675324675, "grad_norm": 5.343838214874268, "learning_rate": 1.0450607568772452e-06, "loss": 0.2805, "step": 17087 }, { "epoch": 0.7925788497217069, "grad_norm": 8.139339447021484, "learning_rate": 1.0446105264643324e-06, "loss": 0.298, "step": 17088 }, { "epoch": 0.7926252319109462, "grad_norm": 7.320741176605225, "learning_rate": 1.0441603817428458e-06, "loss": 0.3491, "step": 17089 }, { "epoch": 0.7926716141001855, "grad_norm": 7.97983455657959, "learning_rate": 1.0437103227225382e-06, "loss": 0.3385, "step": 17090 }, { "epoch": 0.7927179962894249, "grad_norm": 6.3682637214660645, "learning_rate": 1.0432603494131616e-06, "loss": 0.3671, "step": 17091 }, { "epoch": 0.7927643784786642, "grad_norm": 5.572151184082031, "learning_rate": 1.0428104618244605e-06, "loss": 0.3002, "step": 17092 }, { "epoch": 0.7928107606679036, "grad_norm": 7.79615592956543, "learning_rate": 1.0423606599661839e-06, "loss": 0.3184, "step": 17093 }, { "epoch": 0.7928571428571428, "grad_norm": 5.879819869995117, "learning_rate": 1.0419109438480762e-06, "loss": 0.2829, "step": 17094 }, { "epoch": 0.7929035250463822, "grad_norm": 7.127108573913574, "learning_rate": 1.04146131347988e-06, "loss": 0.3645, "step": 17095 }, { "epoch": 0.7929499072356215, "grad_norm": 5.14532470703125, "learning_rate": 1.0410117688713366e-06, "loss": 0.3314, "step": 17096 }, { "epoch": 0.7929962894248609, "grad_norm": 7.581087112426758, "learning_rate": 1.0405623100321865e-06, "loss": 0.3725, "step": 17097 }, { "epoch": 0.7930426716141001, "grad_norm": 5.906010150909424, "learning_rate": 1.040112936972164e-06, "loss": 0.355, "step": 17098 }, { "epoch": 0.7930890538033395, "grad_norm": 8.185426712036133, "learning_rate": 1.0396636497010065e-06, "loss": 0.2657, "step": 17099 }, { "epoch": 0.7931354359925789, "grad_norm": 7.567638397216797, "learning_rate": 1.0392144482284472e-06, "loss": 0.3116, "step": 17100 }, { "epoch": 0.7931818181818182, "grad_norm": 9.776436805725098, "learning_rate": 1.0387653325642173e-06, "loss": 0.2541, "step": 17101 }, { "epoch": 0.7932282003710576, "grad_norm": 10.95791244506836, "learning_rate": 1.0383163027180488e-06, "loss": 0.4004, "step": 17102 }, { "epoch": 0.7932745825602968, "grad_norm": 6.447641849517822, "learning_rate": 1.0378673586996668e-06, "loss": 0.2469, "step": 17103 }, { "epoch": 0.7933209647495362, "grad_norm": 7.348241806030273, "learning_rate": 1.0374185005187981e-06, "loss": 0.193, "step": 17104 }, { "epoch": 0.7933673469387755, "grad_norm": 6.913398265838623, "learning_rate": 1.0369697281851683e-06, "loss": 0.4195, "step": 17105 }, { "epoch": 0.7934137291280149, "grad_norm": 10.885894775390625, "learning_rate": 1.0365210417084987e-06, "loss": 0.3848, "step": 17106 }, { "epoch": 0.7934601113172541, "grad_norm": 6.613504409790039, "learning_rate": 1.0360724410985112e-06, "loss": 0.2432, "step": 17107 }, { "epoch": 0.7935064935064935, "grad_norm": 7.769243240356445, "learning_rate": 1.0356239263649226e-06, "loss": 0.4373, "step": 17108 }, { "epoch": 0.7935528756957329, "grad_norm": 9.74205207824707, "learning_rate": 1.0351754975174505e-06, "loss": 0.3933, "step": 17109 }, { "epoch": 0.7935992578849722, "grad_norm": 5.859803199768066, "learning_rate": 1.03472715456581e-06, "loss": 0.2148, "step": 17110 }, { "epoch": 0.7936456400742115, "grad_norm": 12.211199760437012, "learning_rate": 1.0342788975197144e-06, "loss": 0.3423, "step": 17111 }, { "epoch": 0.7936920222634508, "grad_norm": 28.814456939697266, "learning_rate": 1.0338307263888748e-06, "loss": 0.5044, "step": 17112 }, { "epoch": 0.7937384044526902, "grad_norm": 13.030623435974121, "learning_rate": 1.0333826411830016e-06, "loss": 0.3717, "step": 17113 }, { "epoch": 0.7937847866419295, "grad_norm": 4.772152900695801, "learning_rate": 1.0329346419118003e-06, "loss": 0.3239, "step": 17114 }, { "epoch": 0.7938311688311688, "grad_norm": 5.479245662689209, "learning_rate": 1.0324867285849777e-06, "loss": 0.2989, "step": 17115 }, { "epoch": 0.7938775510204081, "grad_norm": 4.720670700073242, "learning_rate": 1.0320389012122372e-06, "loss": 0.3222, "step": 17116 }, { "epoch": 0.7939239332096475, "grad_norm": 5.676730632781982, "learning_rate": 1.0315911598032807e-06, "loss": 0.2656, "step": 17117 }, { "epoch": 0.7939703153988869, "grad_norm": 13.70114517211914, "learning_rate": 1.0311435043678103e-06, "loss": 0.2844, "step": 17118 }, { "epoch": 0.7940166975881262, "grad_norm": 17.512962341308594, "learning_rate": 1.0306959349155215e-06, "loss": 0.2718, "step": 17119 }, { "epoch": 0.7940630797773655, "grad_norm": 6.739229202270508, "learning_rate": 1.0302484514561111e-06, "loss": 0.3551, "step": 17120 }, { "epoch": 0.7941094619666048, "grad_norm": 7.271311283111572, "learning_rate": 1.0298010539992748e-06, "loss": 0.2347, "step": 17121 }, { "epoch": 0.7941558441558442, "grad_norm": 6.4975738525390625, "learning_rate": 1.029353742554704e-06, "loss": 0.2586, "step": 17122 }, { "epoch": 0.7942022263450835, "grad_norm": 10.206160545349121, "learning_rate": 1.0289065171320906e-06, "loss": 0.3379, "step": 17123 }, { "epoch": 0.7942486085343228, "grad_norm": 4.348689079284668, "learning_rate": 1.0284593777411239e-06, "loss": 0.2672, "step": 17124 }, { "epoch": 0.7942949907235621, "grad_norm": 9.831100463867188, "learning_rate": 1.0280123243914886e-06, "loss": 0.3711, "step": 17125 }, { "epoch": 0.7943413729128015, "grad_norm": 4.626887321472168, "learning_rate": 1.0275653570928718e-06, "loss": 0.3093, "step": 17126 }, { "epoch": 0.7943877551020408, "grad_norm": 5.657885551452637, "learning_rate": 1.0271184758549558e-06, "loss": 0.3337, "step": 17127 }, { "epoch": 0.7944341372912801, "grad_norm": 9.866068840026855, "learning_rate": 1.0266716806874227e-06, "loss": 0.2888, "step": 17128 }, { "epoch": 0.7944805194805195, "grad_norm": 9.203657150268555, "learning_rate": 1.0262249715999533e-06, "loss": 0.3108, "step": 17129 }, { "epoch": 0.7945269016697588, "grad_norm": 8.349576950073242, "learning_rate": 1.0257783486022226e-06, "loss": 0.4116, "step": 17130 }, { "epoch": 0.7945732838589982, "grad_norm": 12.157649040222168, "learning_rate": 1.025331811703908e-06, "loss": 0.471, "step": 17131 }, { "epoch": 0.7946196660482375, "grad_norm": 3.5662453174591064, "learning_rate": 1.024885360914683e-06, "loss": 0.2676, "step": 17132 }, { "epoch": 0.7946660482374768, "grad_norm": 5.507762432098389, "learning_rate": 1.0244389962442198e-06, "loss": 0.3158, "step": 17133 }, { "epoch": 0.7947124304267161, "grad_norm": 12.467644691467285, "learning_rate": 1.023992717702189e-06, "loss": 0.3786, "step": 17134 }, { "epoch": 0.7947588126159555, "grad_norm": 11.943926811218262, "learning_rate": 1.0235465252982608e-06, "loss": 0.3953, "step": 17135 }, { "epoch": 0.7948051948051948, "grad_norm": 14.063823699951172, "learning_rate": 1.0231004190420979e-06, "loss": 0.3707, "step": 17136 }, { "epoch": 0.7948515769944341, "grad_norm": 7.522042751312256, "learning_rate": 1.0226543989433668e-06, "loss": 0.2776, "step": 17137 }, { "epoch": 0.7948979591836735, "grad_norm": 6.633808135986328, "learning_rate": 1.0222084650117304e-06, "loss": 0.3514, "step": 17138 }, { "epoch": 0.7949443413729128, "grad_norm": 10.177411079406738, "learning_rate": 1.0217626172568495e-06, "loss": 0.3674, "step": 17139 }, { "epoch": 0.7949907235621522, "grad_norm": 4.757702827453613, "learning_rate": 1.0213168556883846e-06, "loss": 0.3134, "step": 17140 }, { "epoch": 0.7950371057513914, "grad_norm": 9.072308540344238, "learning_rate": 1.0208711803159898e-06, "loss": 0.254, "step": 17141 }, { "epoch": 0.7950834879406308, "grad_norm": 10.632552146911621, "learning_rate": 1.0204255911493228e-06, "loss": 0.3483, "step": 17142 }, { "epoch": 0.7951298701298701, "grad_norm": 6.597204208374023, "learning_rate": 1.019980088198036e-06, "loss": 0.2712, "step": 17143 }, { "epoch": 0.7951762523191095, "grad_norm": 11.368448257446289, "learning_rate": 1.0195346714717813e-06, "loss": 0.4322, "step": 17144 }, { "epoch": 0.7952226345083488, "grad_norm": 6.510513782501221, "learning_rate": 1.0190893409802083e-06, "loss": 0.2766, "step": 17145 }, { "epoch": 0.7952690166975881, "grad_norm": 10.694299697875977, "learning_rate": 1.0186440967329664e-06, "loss": 0.4271, "step": 17146 }, { "epoch": 0.7953153988868275, "grad_norm": 9.31689739227295, "learning_rate": 1.018198938739699e-06, "loss": 0.3113, "step": 17147 }, { "epoch": 0.7953617810760668, "grad_norm": 6.47206449508667, "learning_rate": 1.0177538670100518e-06, "loss": 0.2223, "step": 17148 }, { "epoch": 0.7954081632653062, "grad_norm": 6.599419116973877, "learning_rate": 1.0173088815536657e-06, "loss": 0.3554, "step": 17149 }, { "epoch": 0.7954545454545454, "grad_norm": 8.101404190063477, "learning_rate": 1.0168639823801829e-06, "loss": 0.3217, "step": 17150 }, { "epoch": 0.7955009276437848, "grad_norm": 6.822236061096191, "learning_rate": 1.016419169499242e-06, "loss": 0.2285, "step": 17151 }, { "epoch": 0.7955473098330241, "grad_norm": 8.684216499328613, "learning_rate": 1.0159744429204776e-06, "loss": 0.3162, "step": 17152 }, { "epoch": 0.7955936920222635, "grad_norm": 5.957218647003174, "learning_rate": 1.0155298026535255e-06, "loss": 0.1955, "step": 17153 }, { "epoch": 0.7956400742115027, "grad_norm": 9.647125244140625, "learning_rate": 1.0150852487080182e-06, "loss": 0.393, "step": 17154 }, { "epoch": 0.7956864564007421, "grad_norm": 6.637772560119629, "learning_rate": 1.0146407810935876e-06, "loss": 0.3975, "step": 17155 }, { "epoch": 0.7957328385899815, "grad_norm": 12.49703311920166, "learning_rate": 1.0141963998198624e-06, "loss": 0.4143, "step": 17156 }, { "epoch": 0.7957792207792208, "grad_norm": 5.349315643310547, "learning_rate": 1.0137521048964711e-06, "loss": 0.3426, "step": 17157 }, { "epoch": 0.7958256029684602, "grad_norm": 6.983086585998535, "learning_rate": 1.0133078963330368e-06, "loss": 0.2763, "step": 17158 }, { "epoch": 0.7958719851576994, "grad_norm": 9.373997688293457, "learning_rate": 1.012863774139184e-06, "loss": 0.3227, "step": 17159 }, { "epoch": 0.7959183673469388, "grad_norm": 11.712661743164062, "learning_rate": 1.0124197383245344e-06, "loss": 0.3149, "step": 17160 }, { "epoch": 0.7959647495361781, "grad_norm": 6.209448337554932, "learning_rate": 1.0119757888987075e-06, "loss": 0.317, "step": 17161 }, { "epoch": 0.7960111317254175, "grad_norm": 6.04071044921875, "learning_rate": 1.011531925871324e-06, "loss": 0.3568, "step": 17162 }, { "epoch": 0.7960575139146567, "grad_norm": 8.630838394165039, "learning_rate": 1.0110881492519957e-06, "loss": 0.2704, "step": 17163 }, { "epoch": 0.7961038961038961, "grad_norm": 6.547440052032471, "learning_rate": 1.0106444590503389e-06, "loss": 0.4439, "step": 17164 }, { "epoch": 0.7961502782931354, "grad_norm": 7.758756637573242, "learning_rate": 1.0102008552759658e-06, "loss": 0.3639, "step": 17165 }, { "epoch": 0.7961966604823748, "grad_norm": 5.135544300079346, "learning_rate": 1.0097573379384867e-06, "loss": 0.2938, "step": 17166 }, { "epoch": 0.796243042671614, "grad_norm": 7.048310279846191, "learning_rate": 1.00931390704751e-06, "loss": 0.4409, "step": 17167 }, { "epoch": 0.7962894248608534, "grad_norm": 5.220143795013428, "learning_rate": 1.0088705626126445e-06, "loss": 0.3712, "step": 17168 }, { "epoch": 0.7963358070500928, "grad_norm": 5.643391132354736, "learning_rate": 1.0084273046434912e-06, "loss": 0.2808, "step": 17169 }, { "epoch": 0.7963821892393321, "grad_norm": 7.008602142333984, "learning_rate": 1.0079841331496553e-06, "loss": 0.3844, "step": 17170 }, { "epoch": 0.7964285714285714, "grad_norm": 6.518875598907471, "learning_rate": 1.0075410481407376e-06, "loss": 0.3325, "step": 17171 }, { "epoch": 0.7964749536178107, "grad_norm": 9.581999778747559, "learning_rate": 1.0070980496263371e-06, "loss": 0.3525, "step": 17172 }, { "epoch": 0.7965213358070501, "grad_norm": 10.834844589233398, "learning_rate": 1.0066551376160528e-06, "loss": 0.5615, "step": 17173 }, { "epoch": 0.7965677179962894, "grad_norm": 4.32781982421875, "learning_rate": 1.0062123121194778e-06, "loss": 0.303, "step": 17174 }, { "epoch": 0.7966141001855288, "grad_norm": 10.308318138122559, "learning_rate": 1.0057695731462057e-06, "loss": 0.4707, "step": 17175 }, { "epoch": 0.796660482374768, "grad_norm": 9.351245880126953, "learning_rate": 1.0053269207058298e-06, "loss": 0.2522, "step": 17176 }, { "epoch": 0.7967068645640074, "grad_norm": 5.6287946701049805, "learning_rate": 1.004884354807939e-06, "loss": 0.3183, "step": 17177 }, { "epoch": 0.7967532467532468, "grad_norm": 16.795679092407227, "learning_rate": 1.004441875462121e-06, "loss": 0.41, "step": 17178 }, { "epoch": 0.7967996289424861, "grad_norm": 7.7186102867126465, "learning_rate": 1.0039994826779642e-06, "loss": 0.2827, "step": 17179 }, { "epoch": 0.7968460111317254, "grad_norm": 9.4598970413208, "learning_rate": 1.0035571764650491e-06, "loss": 0.3751, "step": 17180 }, { "epoch": 0.7968923933209647, "grad_norm": 8.321239471435547, "learning_rate": 1.0031149568329601e-06, "loss": 0.398, "step": 17181 }, { "epoch": 0.7969387755102041, "grad_norm": 6.39967679977417, "learning_rate": 1.0026728237912776e-06, "loss": 0.2704, "step": 17182 }, { "epoch": 0.7969851576994434, "grad_norm": 5.950993537902832, "learning_rate": 1.0022307773495799e-06, "loss": 0.2843, "step": 17183 }, { "epoch": 0.7970315398886827, "grad_norm": 10.524811744689941, "learning_rate": 1.001788817517445e-06, "loss": 0.4268, "step": 17184 }, { "epoch": 0.797077922077922, "grad_norm": 6.080331802368164, "learning_rate": 1.0013469443044455e-06, "loss": 0.2314, "step": 17185 }, { "epoch": 0.7971243042671614, "grad_norm": 9.475345611572266, "learning_rate": 1.000905157720155e-06, "loss": 0.3494, "step": 17186 }, { "epoch": 0.7971706864564008, "grad_norm": 8.245770454406738, "learning_rate": 1.0004634577741456e-06, "loss": 0.3654, "step": 17187 }, { "epoch": 0.7972170686456401, "grad_norm": 5.899071216583252, "learning_rate": 1.000021844475985e-06, "loss": 0.2645, "step": 17188 }, { "epoch": 0.7972634508348794, "grad_norm": 8.017081260681152, "learning_rate": 9.995803178352437e-07, "loss": 0.4677, "step": 17189 }, { "epoch": 0.7973098330241187, "grad_norm": 16.210060119628906, "learning_rate": 9.991388778614825e-07, "loss": 0.4196, "step": 17190 }, { "epoch": 0.7973562152133581, "grad_norm": 6.700494289398193, "learning_rate": 9.986975245642677e-07, "loss": 0.2667, "step": 17191 }, { "epoch": 0.7974025974025974, "grad_norm": 7.743266582489014, "learning_rate": 9.982562579531607e-07, "loss": 0.3456, "step": 17192 }, { "epoch": 0.7974489795918367, "grad_norm": 4.914984226226807, "learning_rate": 9.97815078037721e-07, "loss": 0.3227, "step": 17193 }, { "epoch": 0.797495361781076, "grad_norm": 6.182836055755615, "learning_rate": 9.973739848275066e-07, "loss": 0.3802, "step": 17194 }, { "epoch": 0.7975417439703154, "grad_norm": 8.990178108215332, "learning_rate": 9.969329783320752e-07, "loss": 0.4481, "step": 17195 }, { "epoch": 0.7975881261595548, "grad_norm": 10.894333839416504, "learning_rate": 9.964920585609784e-07, "loss": 0.4223, "step": 17196 }, { "epoch": 0.797634508348794, "grad_norm": 8.507883071899414, "learning_rate": 9.960512255237686e-07, "loss": 0.2917, "step": 17197 }, { "epoch": 0.7976808905380334, "grad_norm": 7.803213596343994, "learning_rate": 9.95610479229998e-07, "loss": 0.3765, "step": 17198 }, { "epoch": 0.7977272727272727, "grad_norm": 9.394173622131348, "learning_rate": 9.95169819689214e-07, "loss": 0.3608, "step": 17199 }, { "epoch": 0.7977736549165121, "grad_norm": 7.2832465171813965, "learning_rate": 9.947292469109649e-07, "loss": 0.3257, "step": 17200 }, { "epoch": 0.7978200371057514, "grad_norm": 5.49573278427124, "learning_rate": 9.942887609047924e-07, "loss": 0.3413, "step": 17201 }, { "epoch": 0.7978664192949907, "grad_norm": 7.264926910400391, "learning_rate": 9.938483616802413e-07, "loss": 0.3043, "step": 17202 }, { "epoch": 0.79791280148423, "grad_norm": 4.792059421539307, "learning_rate": 9.934080492468523e-07, "loss": 0.2645, "step": 17203 }, { "epoch": 0.7979591836734694, "grad_norm": 11.604665756225586, "learning_rate": 9.929678236141649e-07, "loss": 0.3334, "step": 17204 }, { "epoch": 0.7980055658627088, "grad_norm": 6.405464172363281, "learning_rate": 9.925276847917165e-07, "loss": 0.3545, "step": 17205 }, { "epoch": 0.798051948051948, "grad_norm": 8.797530174255371, "learning_rate": 9.920876327890428e-07, "loss": 0.3257, "step": 17206 }, { "epoch": 0.7980983302411874, "grad_norm": 4.594472408294678, "learning_rate": 9.916476676156756e-07, "loss": 0.3209, "step": 17207 }, { "epoch": 0.7981447124304267, "grad_norm": 9.169654846191406, "learning_rate": 9.912077892811473e-07, "loss": 0.4088, "step": 17208 }, { "epoch": 0.7981910946196661, "grad_norm": 5.168645858764648, "learning_rate": 9.907679977949874e-07, "loss": 0.409, "step": 17209 }, { "epoch": 0.7982374768089053, "grad_norm": 13.454126358032227, "learning_rate": 9.903282931667246e-07, "loss": 0.4209, "step": 17210 }, { "epoch": 0.7982838589981447, "grad_norm": 8.463239669799805, "learning_rate": 9.898886754058863e-07, "loss": 0.4228, "step": 17211 }, { "epoch": 0.798330241187384, "grad_norm": 6.205013751983643, "learning_rate": 9.894491445219927e-07, "loss": 0.259, "step": 17212 }, { "epoch": 0.7983766233766234, "grad_norm": 6.15771484375, "learning_rate": 9.89009700524568e-07, "loss": 0.3959, "step": 17213 }, { "epoch": 0.7984230055658627, "grad_norm": 7.6348748207092285, "learning_rate": 9.88570343423133e-07, "loss": 0.408, "step": 17214 }, { "epoch": 0.798469387755102, "grad_norm": 7.371541500091553, "learning_rate": 9.881310732272054e-07, "loss": 0.3193, "step": 17215 }, { "epoch": 0.7985157699443414, "grad_norm": 6.12074613571167, "learning_rate": 9.876918899463022e-07, "loss": 0.2951, "step": 17216 }, { "epoch": 0.7985621521335807, "grad_norm": 6.095059871673584, "learning_rate": 9.872527935899396e-07, "loss": 0.2863, "step": 17217 }, { "epoch": 0.7986085343228201, "grad_norm": 5.809043884277344, "learning_rate": 9.868137841676268e-07, "loss": 0.3523, "step": 17218 }, { "epoch": 0.7986549165120593, "grad_norm": 5.9093017578125, "learning_rate": 9.863748616888768e-07, "loss": 0.2976, "step": 17219 }, { "epoch": 0.7987012987012987, "grad_norm": 8.518426895141602, "learning_rate": 9.859360261631985e-07, "loss": 0.3341, "step": 17220 }, { "epoch": 0.798747680890538, "grad_norm": 15.298476219177246, "learning_rate": 9.854972776000992e-07, "loss": 0.5227, "step": 17221 }, { "epoch": 0.7987940630797774, "grad_norm": 7.014597415924072, "learning_rate": 9.850586160090848e-07, "loss": 0.3884, "step": 17222 }, { "epoch": 0.7988404452690167, "grad_norm": 5.9436259269714355, "learning_rate": 9.84620041399657e-07, "loss": 0.2599, "step": 17223 }, { "epoch": 0.798886827458256, "grad_norm": 4.235498905181885, "learning_rate": 9.841815537813177e-07, "loss": 0.2662, "step": 17224 }, { "epoch": 0.7989332096474954, "grad_norm": 15.331808090209961, "learning_rate": 9.83743153163567e-07, "loss": 0.5468, "step": 17225 }, { "epoch": 0.7989795918367347, "grad_norm": 13.749349594116211, "learning_rate": 9.833048395559031e-07, "loss": 0.4643, "step": 17226 }, { "epoch": 0.799025974025974, "grad_norm": 5.051168918609619, "learning_rate": 9.828666129678204e-07, "loss": 0.2027, "step": 17227 }, { "epoch": 0.7990723562152133, "grad_norm": 9.450389862060547, "learning_rate": 9.824284734088157e-07, "loss": 0.2351, "step": 17228 }, { "epoch": 0.7991187384044527, "grad_norm": 18.291980743408203, "learning_rate": 9.819904208883773e-07, "loss": 0.4276, "step": 17229 }, { "epoch": 0.799165120593692, "grad_norm": 11.342023849487305, "learning_rate": 9.815524554159978e-07, "loss": 0.4177, "step": 17230 }, { "epoch": 0.7992115027829314, "grad_norm": 5.373246192932129, "learning_rate": 9.811145770011643e-07, "loss": 0.2773, "step": 17231 }, { "epoch": 0.7992578849721707, "grad_norm": 6.187115669250488, "learning_rate": 9.80676785653364e-07, "loss": 0.2471, "step": 17232 }, { "epoch": 0.79930426716141, "grad_norm": 11.20549201965332, "learning_rate": 9.802390813820823e-07, "loss": 0.3357, "step": 17233 }, { "epoch": 0.7993506493506494, "grad_norm": 5.464032173156738, "learning_rate": 9.79801464196799e-07, "loss": 0.3267, "step": 17234 }, { "epoch": 0.7993970315398887, "grad_norm": 4.810117721557617, "learning_rate": 9.793639341069971e-07, "loss": 0.2429, "step": 17235 }, { "epoch": 0.799443413729128, "grad_norm": 6.29874324798584, "learning_rate": 9.789264911221546e-07, "loss": 0.2676, "step": 17236 }, { "epoch": 0.7994897959183673, "grad_norm": 7.735645771026611, "learning_rate": 9.784891352517489e-07, "loss": 0.27, "step": 17237 }, { "epoch": 0.7995361781076067, "grad_norm": 4.418246746063232, "learning_rate": 9.780518665052563e-07, "loss": 0.2591, "step": 17238 }, { "epoch": 0.799582560296846, "grad_norm": 9.876273155212402, "learning_rate": 9.776146848921475e-07, "loss": 0.3592, "step": 17239 }, { "epoch": 0.7996289424860853, "grad_norm": 6.18864107131958, "learning_rate": 9.77177590421895e-07, "loss": 0.235, "step": 17240 }, { "epoch": 0.7996753246753247, "grad_norm": 5.722042560577393, "learning_rate": 9.767405831039678e-07, "loss": 0.3502, "step": 17241 }, { "epoch": 0.799721706864564, "grad_norm": 7.640851974487305, "learning_rate": 9.76303662947834e-07, "loss": 0.3271, "step": 17242 }, { "epoch": 0.7997680890538034, "grad_norm": 10.23759937286377, "learning_rate": 9.758668299629603e-07, "loss": 0.3245, "step": 17243 }, { "epoch": 0.7998144712430427, "grad_norm": 5.973913192749023, "learning_rate": 9.754300841588082e-07, "loss": 0.2979, "step": 17244 }, { "epoch": 0.799860853432282, "grad_norm": 6.244579792022705, "learning_rate": 9.749934255448401e-07, "loss": 0.3308, "step": 17245 }, { "epoch": 0.7999072356215213, "grad_norm": 5.6766157150268555, "learning_rate": 9.745568541305173e-07, "loss": 0.3649, "step": 17246 }, { "epoch": 0.7999536178107607, "grad_norm": 7.834208011627197, "learning_rate": 9.74120369925296e-07, "loss": 0.3826, "step": 17247 }, { "epoch": 0.8, "grad_norm": 4.153587818145752, "learning_rate": 9.736839729386355e-07, "loss": 0.2282, "step": 17248 }, { "epoch": 0.8, "eval_loss": 0.32277151942253113, "eval_runtime": 38.047, "eval_samples_per_second": 45.812, "eval_steps_per_second": 5.73, "step": 17248 }, { "epoch": 0.8000463821892393, "grad_norm": 5.938899040222168, "learning_rate": 9.732476631799865e-07, "loss": 0.1862, "step": 17249 }, { "epoch": 0.8000927643784786, "grad_norm": 7.485679626464844, "learning_rate": 9.72811440658803e-07, "loss": 0.3354, "step": 17250 }, { "epoch": 0.800139146567718, "grad_norm": 3.877441883087158, "learning_rate": 9.72375305384536e-07, "loss": 0.3255, "step": 17251 }, { "epoch": 0.8001855287569574, "grad_norm": 3.7864089012145996, "learning_rate": 9.719392573666325e-07, "loss": 0.2372, "step": 17252 }, { "epoch": 0.8002319109461966, "grad_norm": 8.712370872497559, "learning_rate": 9.715032966145428e-07, "loss": 0.3175, "step": 17253 }, { "epoch": 0.800278293135436, "grad_norm": 9.345939636230469, "learning_rate": 9.710674231377076e-07, "loss": 0.3716, "step": 17254 }, { "epoch": 0.8003246753246753, "grad_norm": 6.621462821960449, "learning_rate": 9.706316369455715e-07, "loss": 0.3889, "step": 17255 }, { "epoch": 0.8003710575139147, "grad_norm": 10.347042083740234, "learning_rate": 9.70195938047576e-07, "loss": 0.3168, "step": 17256 }, { "epoch": 0.800417439703154, "grad_norm": 4.368910789489746, "learning_rate": 9.697603264531602e-07, "loss": 0.2898, "step": 17257 }, { "epoch": 0.8004638218923933, "grad_norm": 9.533912658691406, "learning_rate": 9.693248021717621e-07, "loss": 0.3356, "step": 17258 }, { "epoch": 0.8005102040816326, "grad_norm": 5.099451541900635, "learning_rate": 9.688893652128151e-07, "loss": 0.3205, "step": 17259 }, { "epoch": 0.800556586270872, "grad_norm": 7.175496578216553, "learning_rate": 9.684540155857536e-07, "loss": 0.4379, "step": 17260 }, { "epoch": 0.8006029684601114, "grad_norm": 10.007438659667969, "learning_rate": 9.680187533000096e-07, "loss": 0.3984, "step": 17261 }, { "epoch": 0.8006493506493506, "grad_norm": 4.500552654266357, "learning_rate": 9.675835783650128e-07, "loss": 0.3004, "step": 17262 }, { "epoch": 0.80069573283859, "grad_norm": 5.046735763549805, "learning_rate": 9.671484907901917e-07, "loss": 0.2837, "step": 17263 }, { "epoch": 0.8007421150278293, "grad_norm": 5.835992336273193, "learning_rate": 9.667134905849707e-07, "loss": 0.2777, "step": 17264 }, { "epoch": 0.8007884972170687, "grad_norm": 4.36826229095459, "learning_rate": 9.662785777587747e-07, "loss": 0.3015, "step": 17265 }, { "epoch": 0.8008348794063079, "grad_norm": 4.929134845733643, "learning_rate": 9.65843752321025e-07, "loss": 0.2518, "step": 17266 }, { "epoch": 0.8008812615955473, "grad_norm": 5.852059841156006, "learning_rate": 9.654090142811433e-07, "loss": 0.3646, "step": 17267 }, { "epoch": 0.8009276437847866, "grad_norm": 5.330526828765869, "learning_rate": 9.64974363648548e-07, "loss": 0.3228, "step": 17268 }, { "epoch": 0.800974025974026, "grad_norm": 4.702839374542236, "learning_rate": 9.645398004326538e-07, "loss": 0.371, "step": 17269 }, { "epoch": 0.8010204081632653, "grad_norm": 8.643745422363281, "learning_rate": 9.641053246428767e-07, "loss": 0.2302, "step": 17270 }, { "epoch": 0.8010667903525046, "grad_norm": 5.8412981033325195, "learning_rate": 9.636709362886288e-07, "loss": 0.3637, "step": 17271 }, { "epoch": 0.801113172541744, "grad_norm": 11.723788261413574, "learning_rate": 9.63236635379321e-07, "loss": 0.427, "step": 17272 }, { "epoch": 0.8011595547309833, "grad_norm": 7.247495651245117, "learning_rate": 9.628024219243638e-07, "loss": 0.3338, "step": 17273 }, { "epoch": 0.8012059369202227, "grad_norm": 12.007091522216797, "learning_rate": 9.62368295933161e-07, "loss": 0.4159, "step": 17274 }, { "epoch": 0.8012523191094619, "grad_norm": 6.4684953689575195, "learning_rate": 9.619342574151198e-07, "loss": 0.3349, "step": 17275 }, { "epoch": 0.8012987012987013, "grad_norm": 5.993860244750977, "learning_rate": 9.61500306379643e-07, "loss": 0.3637, "step": 17276 }, { "epoch": 0.8013450834879406, "grad_norm": 26.882509231567383, "learning_rate": 9.61066442836132e-07, "loss": 0.4273, "step": 17277 }, { "epoch": 0.80139146567718, "grad_norm": 9.796982765197754, "learning_rate": 9.606326667939874e-07, "loss": 0.3851, "step": 17278 }, { "epoch": 0.8014378478664193, "grad_norm": 6.079845428466797, "learning_rate": 9.601989782626042e-07, "loss": 0.3608, "step": 17279 }, { "epoch": 0.8014842300556586, "grad_norm": 7.222350597381592, "learning_rate": 9.597653772513799e-07, "loss": 0.3066, "step": 17280 }, { "epoch": 0.801530612244898, "grad_norm": 5.946587085723877, "learning_rate": 9.593318637697068e-07, "loss": 0.3623, "step": 17281 }, { "epoch": 0.8015769944341373, "grad_norm": 7.388789176940918, "learning_rate": 9.588984378269784e-07, "loss": 0.3211, "step": 17282 }, { "epoch": 0.8016233766233766, "grad_norm": 6.171773433685303, "learning_rate": 9.58465099432585e-07, "loss": 0.3027, "step": 17283 }, { "epoch": 0.8016697588126159, "grad_norm": 6.102128982543945, "learning_rate": 9.580318485959123e-07, "loss": 0.32, "step": 17284 }, { "epoch": 0.8017161410018553, "grad_norm": 5.752650737762451, "learning_rate": 9.575986853263475e-07, "loss": 0.2652, "step": 17285 }, { "epoch": 0.8017625231910946, "grad_norm": 5.156100273132324, "learning_rate": 9.571656096332749e-07, "loss": 0.2147, "step": 17286 }, { "epoch": 0.801808905380334, "grad_norm": 10.917688369750977, "learning_rate": 9.567326215260769e-07, "loss": 0.2742, "step": 17287 }, { "epoch": 0.8018552875695732, "grad_norm": 13.599146842956543, "learning_rate": 9.562997210141355e-07, "loss": 0.4712, "step": 17288 }, { "epoch": 0.8019016697588126, "grad_norm": 5.662514686584473, "learning_rate": 9.558669081068268e-07, "loss": 0.1941, "step": 17289 }, { "epoch": 0.801948051948052, "grad_norm": 9.948206901550293, "learning_rate": 9.554341828135282e-07, "loss": 0.2888, "step": 17290 }, { "epoch": 0.8019944341372913, "grad_norm": 9.621838569641113, "learning_rate": 9.55001545143615e-07, "loss": 0.3048, "step": 17291 }, { "epoch": 0.8020408163265306, "grad_norm": 8.048980712890625, "learning_rate": 9.545689951064596e-07, "loss": 0.3534, "step": 17292 }, { "epoch": 0.8020871985157699, "grad_norm": 7.440450191497803, "learning_rate": 9.54136532711435e-07, "loss": 0.155, "step": 17293 }, { "epoch": 0.8021335807050093, "grad_norm": 11.751980781555176, "learning_rate": 9.537041579679063e-07, "loss": 0.5202, "step": 17294 }, { "epoch": 0.8021799628942486, "grad_norm": 9.953715324401855, "learning_rate": 9.532718708852434e-07, "loss": 0.2688, "step": 17295 }, { "epoch": 0.8022263450834879, "grad_norm": 5.818460941314697, "learning_rate": 9.528396714728105e-07, "loss": 0.2333, "step": 17296 }, { "epoch": 0.8022727272727272, "grad_norm": 4.925416469573975, "learning_rate": 9.524075597399718e-07, "loss": 0.3365, "step": 17297 }, { "epoch": 0.8023191094619666, "grad_norm": 8.724039077758789, "learning_rate": 9.519755356960885e-07, "loss": 0.3526, "step": 17298 }, { "epoch": 0.802365491651206, "grad_norm": 4.721911430358887, "learning_rate": 9.515435993505212e-07, "loss": 0.2266, "step": 17299 }, { "epoch": 0.8024118738404453, "grad_norm": 10.56493854522705, "learning_rate": 9.511117507126255e-07, "loss": 0.2976, "step": 17300 }, { "epoch": 0.8024582560296846, "grad_norm": 8.670412063598633, "learning_rate": 9.506799897917579e-07, "loss": 0.3244, "step": 17301 }, { "epoch": 0.8025046382189239, "grad_norm": 12.353642463684082, "learning_rate": 9.502483165972725e-07, "loss": 0.4005, "step": 17302 }, { "epoch": 0.8025510204081633, "grad_norm": 8.540196418762207, "learning_rate": 9.498167311385215e-07, "loss": 0.3151, "step": 17303 }, { "epoch": 0.8025974025974026, "grad_norm": 9.524773597717285, "learning_rate": 9.49385233424856e-07, "loss": 0.4312, "step": 17304 }, { "epoch": 0.8026437847866419, "grad_norm": 6.536107540130615, "learning_rate": 9.489538234656215e-07, "loss": 0.2562, "step": 17305 }, { "epoch": 0.8026901669758812, "grad_norm": 7.924431800842285, "learning_rate": 9.48522501270166e-07, "loss": 0.4308, "step": 17306 }, { "epoch": 0.8027365491651206, "grad_norm": 7.177647590637207, "learning_rate": 9.480912668478331e-07, "loss": 0.2426, "step": 17307 }, { "epoch": 0.80278293135436, "grad_norm": 6.9440813064575195, "learning_rate": 9.476601202079661e-07, "loss": 0.2922, "step": 17308 }, { "epoch": 0.8028293135435992, "grad_norm": 6.642992973327637, "learning_rate": 9.47229061359905e-07, "loss": 0.3022, "step": 17309 }, { "epoch": 0.8028756957328386, "grad_norm": 5.679927825927734, "learning_rate": 9.467980903129903e-07, "loss": 0.2583, "step": 17310 }, { "epoch": 0.8029220779220779, "grad_norm": 8.675192832946777, "learning_rate": 9.463672070765556e-07, "loss": 0.3257, "step": 17311 }, { "epoch": 0.8029684601113173, "grad_norm": 6.554900646209717, "learning_rate": 9.459364116599373e-07, "loss": 0.3255, "step": 17312 }, { "epoch": 0.8030148423005566, "grad_norm": 5.864280700683594, "learning_rate": 9.45505704072468e-07, "loss": 0.3126, "step": 17313 }, { "epoch": 0.8030612244897959, "grad_norm": 7.671731472015381, "learning_rate": 9.450750843234797e-07, "loss": 0.356, "step": 17314 }, { "epoch": 0.8031076066790352, "grad_norm": 5.256885051727295, "learning_rate": 9.446445524223019e-07, "loss": 0.3258, "step": 17315 }, { "epoch": 0.8031539888682746, "grad_norm": 4.810371398925781, "learning_rate": 9.442141083782596e-07, "loss": 0.2251, "step": 17316 }, { "epoch": 0.803200371057514, "grad_norm": 7.972212791442871, "learning_rate": 9.437837522006799e-07, "loss": 0.3891, "step": 17317 }, { "epoch": 0.8032467532467532, "grad_norm": 9.982258796691895, "learning_rate": 9.43353483898885e-07, "loss": 0.3267, "step": 17318 }, { "epoch": 0.8032931354359926, "grad_norm": 6.7699875831604, "learning_rate": 9.429233034821977e-07, "loss": 0.2211, "step": 17319 }, { "epoch": 0.8033395176252319, "grad_norm": 6.290922164916992, "learning_rate": 9.424932109599372e-07, "loss": 0.3058, "step": 17320 }, { "epoch": 0.8033858998144713, "grad_norm": 10.873794555664062, "learning_rate": 9.420632063414226e-07, "loss": 0.3365, "step": 17321 }, { "epoch": 0.8034322820037105, "grad_norm": 5.523396015167236, "learning_rate": 9.416332896359665e-07, "loss": 0.3007, "step": 17322 }, { "epoch": 0.8034786641929499, "grad_norm": 7.4132771492004395, "learning_rate": 9.412034608528847e-07, "loss": 0.4364, "step": 17323 }, { "epoch": 0.8035250463821892, "grad_norm": 10.452238082885742, "learning_rate": 9.407737200014893e-07, "loss": 0.3368, "step": 17324 }, { "epoch": 0.8035714285714286, "grad_norm": 4.843759059906006, "learning_rate": 9.403440670910908e-07, "loss": 0.238, "step": 17325 }, { "epoch": 0.8036178107606679, "grad_norm": 7.908158302307129, "learning_rate": 9.399145021309974e-07, "loss": 0.288, "step": 17326 }, { "epoch": 0.8036641929499072, "grad_norm": 6.804934024810791, "learning_rate": 9.394850251305138e-07, "loss": 0.3636, "step": 17327 }, { "epoch": 0.8037105751391466, "grad_norm": 7.642158031463623, "learning_rate": 9.39055636098945e-07, "loss": 0.2664, "step": 17328 }, { "epoch": 0.8037569573283859, "grad_norm": 10.178704261779785, "learning_rate": 9.386263350455943e-07, "loss": 0.2681, "step": 17329 }, { "epoch": 0.8038033395176253, "grad_norm": 7.723759174346924, "learning_rate": 9.38197121979762e-07, "loss": 0.2588, "step": 17330 }, { "epoch": 0.8038497217068645, "grad_norm": 8.199563026428223, "learning_rate": 9.377679969107468e-07, "loss": 0.393, "step": 17331 }, { "epoch": 0.8038961038961039, "grad_norm": 8.728365898132324, "learning_rate": 9.373389598478461e-07, "loss": 0.3322, "step": 17332 }, { "epoch": 0.8039424860853432, "grad_norm": 7.085110187530518, "learning_rate": 9.369100108003531e-07, "loss": 0.3571, "step": 17333 }, { "epoch": 0.8039888682745826, "grad_norm": 5.1385369300842285, "learning_rate": 9.364811497775617e-07, "loss": 0.2836, "step": 17334 }, { "epoch": 0.8040352504638218, "grad_norm": 9.999382019042969, "learning_rate": 9.360523767887624e-07, "loss": 0.3786, "step": 17335 }, { "epoch": 0.8040816326530612, "grad_norm": 8.65779972076416, "learning_rate": 9.356236918432454e-07, "loss": 0.2314, "step": 17336 }, { "epoch": 0.8041280148423006, "grad_norm": 3.9012653827667236, "learning_rate": 9.351950949502986e-07, "loss": 0.2965, "step": 17337 }, { "epoch": 0.8041743970315399, "grad_norm": 8.388531684875488, "learning_rate": 9.347665861192046e-07, "loss": 0.3515, "step": 17338 }, { "epoch": 0.8042207792207792, "grad_norm": 8.446480751037598, "learning_rate": 9.343381653592481e-07, "loss": 0.2901, "step": 17339 }, { "epoch": 0.8042671614100185, "grad_norm": 5.288750648498535, "learning_rate": 9.339098326797114e-07, "loss": 0.2951, "step": 17340 }, { "epoch": 0.8043135435992579, "grad_norm": 12.848221778869629, "learning_rate": 9.334815880898734e-07, "loss": 0.4043, "step": 17341 }, { "epoch": 0.8043599257884972, "grad_norm": 6.794705867767334, "learning_rate": 9.330534315990114e-07, "loss": 0.2929, "step": 17342 }, { "epoch": 0.8044063079777366, "grad_norm": 10.01820182800293, "learning_rate": 9.326253632164034e-07, "loss": 0.37, "step": 17343 }, { "epoch": 0.8044526901669758, "grad_norm": 13.228489875793457, "learning_rate": 9.321973829513204e-07, "loss": 0.3947, "step": 17344 }, { "epoch": 0.8044990723562152, "grad_norm": 9.096773147583008, "learning_rate": 9.317694908130359e-07, "loss": 0.3132, "step": 17345 }, { "epoch": 0.8045454545454546, "grad_norm": 6.701581001281738, "learning_rate": 9.313416868108188e-07, "loss": 0.3153, "step": 17346 }, { "epoch": 0.8045918367346939, "grad_norm": 4.360795021057129, "learning_rate": 9.309139709539389e-07, "loss": 0.2063, "step": 17347 }, { "epoch": 0.8046382189239332, "grad_norm": 6.868668079376221, "learning_rate": 9.30486343251662e-07, "loss": 0.2975, "step": 17348 }, { "epoch": 0.8046846011131725, "grad_norm": 4.072778224945068, "learning_rate": 9.300588037132513e-07, "loss": 0.2217, "step": 17349 }, { "epoch": 0.8047309833024119, "grad_norm": 8.083155632019043, "learning_rate": 9.2963135234797e-07, "loss": 0.3742, "step": 17350 }, { "epoch": 0.8047773654916512, "grad_norm": 6.70799446105957, "learning_rate": 9.292039891650784e-07, "loss": 0.3346, "step": 17351 }, { "epoch": 0.8048237476808905, "grad_norm": 8.24094295501709, "learning_rate": 9.287767141738352e-07, "loss": 0.1691, "step": 17352 }, { "epoch": 0.8048701298701298, "grad_norm": 5.549503803253174, "learning_rate": 9.283495273834986e-07, "loss": 0.312, "step": 17353 }, { "epoch": 0.8049165120593692, "grad_norm": 6.26234245300293, "learning_rate": 9.279224288033201e-07, "loss": 0.2831, "step": 17354 }, { "epoch": 0.8049628942486086, "grad_norm": 5.7670087814331055, "learning_rate": 9.274954184425549e-07, "loss": 0.2933, "step": 17355 }, { "epoch": 0.8050092764378479, "grad_norm": 9.428979873657227, "learning_rate": 9.270684963104532e-07, "loss": 0.2809, "step": 17356 }, { "epoch": 0.8050556586270872, "grad_norm": 8.364578247070312, "learning_rate": 9.266416624162644e-07, "loss": 0.3113, "step": 17357 }, { "epoch": 0.8051020408163265, "grad_norm": 5.2214274406433105, "learning_rate": 9.262149167692359e-07, "loss": 0.2761, "step": 17358 }, { "epoch": 0.8051484230055659, "grad_norm": 16.098295211791992, "learning_rate": 9.257882593786133e-07, "loss": 0.3473, "step": 17359 }, { "epoch": 0.8051948051948052, "grad_norm": 10.686667442321777, "learning_rate": 9.253616902536378e-07, "loss": 0.3916, "step": 17360 }, { "epoch": 0.8052411873840445, "grad_norm": 5.643803596496582, "learning_rate": 9.249352094035524e-07, "loss": 0.3266, "step": 17361 }, { "epoch": 0.8052875695732838, "grad_norm": 8.266072273254395, "learning_rate": 9.245088168375965e-07, "loss": 0.4144, "step": 17362 }, { "epoch": 0.8053339517625232, "grad_norm": 6.07517671585083, "learning_rate": 9.240825125650071e-07, "loss": 0.279, "step": 17363 }, { "epoch": 0.8053803339517626, "grad_norm": 5.982641696929932, "learning_rate": 9.236562965950213e-07, "loss": 0.3045, "step": 17364 }, { "epoch": 0.8054267161410018, "grad_norm": 9.700668334960938, "learning_rate": 9.232301689368711e-07, "loss": 0.4085, "step": 17365 }, { "epoch": 0.8054730983302412, "grad_norm": 6.144565105438232, "learning_rate": 9.228041295997886e-07, "loss": 0.3351, "step": 17366 }, { "epoch": 0.8055194805194805, "grad_norm": 7.001101493835449, "learning_rate": 9.223781785930047e-07, "loss": 0.2437, "step": 17367 }, { "epoch": 0.8055658627087199, "grad_norm": 9.404297828674316, "learning_rate": 9.21952315925746e-07, "loss": 0.2728, "step": 17368 }, { "epoch": 0.8056122448979591, "grad_norm": 7.379103183746338, "learning_rate": 9.2152654160724e-07, "loss": 0.3238, "step": 17369 }, { "epoch": 0.8056586270871985, "grad_norm": 5.268101692199707, "learning_rate": 9.211008556467115e-07, "loss": 0.2537, "step": 17370 }, { "epoch": 0.8057050092764378, "grad_norm": 5.167739391326904, "learning_rate": 9.206752580533801e-07, "loss": 0.3233, "step": 17371 }, { "epoch": 0.8057513914656772, "grad_norm": 6.396167278289795, "learning_rate": 9.202497488364682e-07, "loss": 0.3541, "step": 17372 }, { "epoch": 0.8057977736549166, "grad_norm": 7.020195960998535, "learning_rate": 9.198243280051928e-07, "loss": 0.3484, "step": 17373 }, { "epoch": 0.8058441558441558, "grad_norm": 8.307623863220215, "learning_rate": 9.193989955687715e-07, "loss": 0.3832, "step": 17374 }, { "epoch": 0.8058905380333952, "grad_norm": 8.719186782836914, "learning_rate": 9.189737515364205e-07, "loss": 0.4446, "step": 17375 }, { "epoch": 0.8059369202226345, "grad_norm": 4.959407806396484, "learning_rate": 9.18548595917349e-07, "loss": 0.2528, "step": 17376 }, { "epoch": 0.8059833024118739, "grad_norm": 7.974571704864502, "learning_rate": 9.181235287207696e-07, "loss": 0.2961, "step": 17377 }, { "epoch": 0.8060296846011131, "grad_norm": 4.685114860534668, "learning_rate": 9.176985499558905e-07, "loss": 0.2798, "step": 17378 }, { "epoch": 0.8060760667903525, "grad_norm": 9.241040229797363, "learning_rate": 9.172736596319192e-07, "loss": 0.3949, "step": 17379 }, { "epoch": 0.8061224489795918, "grad_norm": 6.259311676025391, "learning_rate": 9.168488577580614e-07, "loss": 0.3414, "step": 17380 }, { "epoch": 0.8061688311688312, "grad_norm": 8.983227729797363, "learning_rate": 9.164241443435201e-07, "loss": 0.3336, "step": 17381 }, { "epoch": 0.8062152133580704, "grad_norm": 7.2676191329956055, "learning_rate": 9.159995193974946e-07, "loss": 0.3723, "step": 17382 }, { "epoch": 0.8062615955473098, "grad_norm": 6.153368949890137, "learning_rate": 9.155749829291855e-07, "loss": 0.3199, "step": 17383 }, { "epoch": 0.8063079777365492, "grad_norm": 4.73201322555542, "learning_rate": 9.151505349477901e-07, "loss": 0.2963, "step": 17384 }, { "epoch": 0.8063543599257885, "grad_norm": 5.728392601013184, "learning_rate": 9.147261754625037e-07, "loss": 0.323, "step": 17385 }, { "epoch": 0.8064007421150279, "grad_norm": 4.4329681396484375, "learning_rate": 9.143019044825213e-07, "loss": 0.27, "step": 17386 }, { "epoch": 0.8064471243042671, "grad_norm": 8.761359214782715, "learning_rate": 9.13877722017032e-07, "loss": 0.2887, "step": 17387 }, { "epoch": 0.8064935064935065, "grad_norm": 6.459423065185547, "learning_rate": 9.134536280752271e-07, "loss": 0.2666, "step": 17388 }, { "epoch": 0.8065398886827458, "grad_norm": 7.557071208953857, "learning_rate": 9.130296226662932e-07, "loss": 0.2002, "step": 17389 }, { "epoch": 0.8065862708719852, "grad_norm": 9.396852493286133, "learning_rate": 9.12605705799417e-07, "loss": 0.3827, "step": 17390 }, { "epoch": 0.8066326530612244, "grad_norm": 10.418301582336426, "learning_rate": 9.121818774837826e-07, "loss": 0.34, "step": 17391 }, { "epoch": 0.8066790352504638, "grad_norm": 6.415112018585205, "learning_rate": 9.117581377285728e-07, "loss": 0.31, "step": 17392 }, { "epoch": 0.8067254174397032, "grad_norm": 6.753148555755615, "learning_rate": 9.113344865429652e-07, "loss": 0.339, "step": 17393 }, { "epoch": 0.8067717996289425, "grad_norm": 6.804765224456787, "learning_rate": 9.109109239361397e-07, "loss": 0.3103, "step": 17394 }, { "epoch": 0.8068181818181818, "grad_norm": 5.955442428588867, "learning_rate": 9.104874499172717e-07, "loss": 0.3493, "step": 17395 }, { "epoch": 0.8068645640074211, "grad_norm": 5.3433003425598145, "learning_rate": 9.100640644955367e-07, "loss": 0.264, "step": 17396 }, { "epoch": 0.8069109461966605, "grad_norm": 5.002960681915283, "learning_rate": 9.096407676801078e-07, "loss": 0.2274, "step": 17397 }, { "epoch": 0.8069573283858998, "grad_norm": 4.899661540985107, "learning_rate": 9.092175594801528e-07, "loss": 0.2634, "step": 17398 }, { "epoch": 0.8070037105751392, "grad_norm": 4.787222385406494, "learning_rate": 9.087944399048415e-07, "loss": 0.2706, "step": 17399 }, { "epoch": 0.8070500927643784, "grad_norm": 7.799545764923096, "learning_rate": 9.08371408963341e-07, "loss": 0.2807, "step": 17400 }, { "epoch": 0.8070964749536178, "grad_norm": 4.22011137008667, "learning_rate": 9.079484666648158e-07, "loss": 0.2174, "step": 17401 }, { "epoch": 0.8071428571428572, "grad_norm": 6.453035354614258, "learning_rate": 9.075256130184284e-07, "loss": 0.3679, "step": 17402 }, { "epoch": 0.8071892393320965, "grad_norm": 8.478482246398926, "learning_rate": 9.071028480333421e-07, "loss": 0.3255, "step": 17403 }, { "epoch": 0.8072356215213358, "grad_norm": 11.131932258605957, "learning_rate": 9.066801717187118e-07, "loss": 0.3888, "step": 17404 }, { "epoch": 0.8072820037105751, "grad_norm": 7.621732711791992, "learning_rate": 9.062575840836968e-07, "loss": 0.2693, "step": 17405 }, { "epoch": 0.8073283858998145, "grad_norm": 9.429971694946289, "learning_rate": 9.05835085137452e-07, "loss": 0.3699, "step": 17406 }, { "epoch": 0.8073747680890538, "grad_norm": 7.680206775665283, "learning_rate": 9.054126748891307e-07, "loss": 0.2796, "step": 17407 }, { "epoch": 0.8074211502782931, "grad_norm": 4.863913059234619, "learning_rate": 9.049903533478854e-07, "loss": 0.2732, "step": 17408 }, { "epoch": 0.8074675324675324, "grad_norm": 8.056413650512695, "learning_rate": 9.045681205228629e-07, "loss": 0.2015, "step": 17409 }, { "epoch": 0.8075139146567718, "grad_norm": 8.683833122253418, "learning_rate": 9.041459764232125e-07, "loss": 0.3621, "step": 17410 }, { "epoch": 0.8075602968460112, "grad_norm": 7.432155132293701, "learning_rate": 9.037239210580784e-07, "loss": 0.2508, "step": 17411 }, { "epoch": 0.8076066790352505, "grad_norm": 4.03549337387085, "learning_rate": 9.03301954436605e-07, "loss": 0.2382, "step": 17412 }, { "epoch": 0.8076530612244898, "grad_norm": 5.6153364181518555, "learning_rate": 9.028800765679346e-07, "loss": 0.3538, "step": 17413 }, { "epoch": 0.8076994434137291, "grad_norm": 8.569369316101074, "learning_rate": 9.02458287461207e-07, "loss": 0.299, "step": 17414 }, { "epoch": 0.8077458256029685, "grad_norm": 5.112569332122803, "learning_rate": 9.020365871255582e-07, "loss": 0.2431, "step": 17415 }, { "epoch": 0.8077922077922078, "grad_norm": 8.373847007751465, "learning_rate": 9.016149755701259e-07, "loss": 0.2473, "step": 17416 }, { "epoch": 0.8078385899814471, "grad_norm": 11.087044715881348, "learning_rate": 9.011934528040428e-07, "loss": 0.3176, "step": 17417 }, { "epoch": 0.8078849721706864, "grad_norm": 9.84464168548584, "learning_rate": 9.007720188364416e-07, "loss": 0.3969, "step": 17418 }, { "epoch": 0.8079313543599258, "grad_norm": 11.289146423339844, "learning_rate": 9.003506736764545e-07, "loss": 0.3235, "step": 17419 }, { "epoch": 0.8079777365491652, "grad_norm": 8.557840347290039, "learning_rate": 8.999294173332058e-07, "loss": 0.3593, "step": 17420 }, { "epoch": 0.8080241187384044, "grad_norm": 8.392078399658203, "learning_rate": 8.995082498158236e-07, "loss": 0.3126, "step": 17421 }, { "epoch": 0.8080705009276438, "grad_norm": 7.134500026702881, "learning_rate": 8.990871711334331e-07, "loss": 0.3758, "step": 17422 }, { "epoch": 0.8081168831168831, "grad_norm": 8.585371971130371, "learning_rate": 8.986661812951553e-07, "loss": 0.316, "step": 17423 }, { "epoch": 0.8081632653061225, "grad_norm": 32.167964935302734, "learning_rate": 8.982452803101122e-07, "loss": 0.3898, "step": 17424 }, { "epoch": 0.8082096474953617, "grad_norm": 10.507553100585938, "learning_rate": 8.978244681874221e-07, "loss": 0.6555, "step": 17425 }, { "epoch": 0.8082560296846011, "grad_norm": 4.85729455947876, "learning_rate": 8.974037449362005e-07, "loss": 0.2965, "step": 17426 }, { "epoch": 0.8083024118738404, "grad_norm": 7.74229621887207, "learning_rate": 8.969831105655624e-07, "loss": 0.2121, "step": 17427 }, { "epoch": 0.8083487940630798, "grad_norm": 10.463349342346191, "learning_rate": 8.965625650846216e-07, "loss": 0.2924, "step": 17428 }, { "epoch": 0.8083951762523192, "grad_norm": 8.207117080688477, "learning_rate": 8.961421085024885e-07, "loss": 0.2802, "step": 17429 }, { "epoch": 0.8084415584415584, "grad_norm": 7.030591011047363, "learning_rate": 8.957217408282731e-07, "loss": 0.3702, "step": 17430 }, { "epoch": 0.8084879406307978, "grad_norm": 6.7896809577941895, "learning_rate": 8.953014620710799e-07, "loss": 0.357, "step": 17431 }, { "epoch": 0.8085343228200371, "grad_norm": 7.809432506561279, "learning_rate": 8.948812722400157e-07, "loss": 0.3801, "step": 17432 }, { "epoch": 0.8085807050092765, "grad_norm": 11.497654914855957, "learning_rate": 8.944611713441836e-07, "loss": 0.3404, "step": 17433 }, { "epoch": 0.8086270871985157, "grad_norm": 6.617749214172363, "learning_rate": 8.940411593926851e-07, "loss": 0.3566, "step": 17434 }, { "epoch": 0.8086734693877551, "grad_norm": 4.411314964294434, "learning_rate": 8.9362123639462e-07, "loss": 0.344, "step": 17435 }, { "epoch": 0.8087198515769944, "grad_norm": 4.5013933181762695, "learning_rate": 8.93201402359084e-07, "loss": 0.2286, "step": 17436 }, { "epoch": 0.8087662337662338, "grad_norm": 6.960444450378418, "learning_rate": 8.927816572951731e-07, "loss": 0.2676, "step": 17437 }, { "epoch": 0.808812615955473, "grad_norm": 4.577308654785156, "learning_rate": 8.923620012119816e-07, "loss": 0.3041, "step": 17438 }, { "epoch": 0.8088589981447124, "grad_norm": 5.8734283447265625, "learning_rate": 8.919424341186006e-07, "loss": 0.3012, "step": 17439 }, { "epoch": 0.8089053803339518, "grad_norm": 4.244203090667725, "learning_rate": 8.915229560241201e-07, "loss": 0.2614, "step": 17440 }, { "epoch": 0.8089517625231911, "grad_norm": 5.49269962310791, "learning_rate": 8.911035669376289e-07, "loss": 0.3085, "step": 17441 }, { "epoch": 0.8089981447124305, "grad_norm": 6.191681861877441, "learning_rate": 8.906842668682102e-07, "loss": 0.328, "step": 17442 }, { "epoch": 0.8090445269016697, "grad_norm": 9.318071365356445, "learning_rate": 8.902650558249498e-07, "loss": 0.3566, "step": 17443 }, { "epoch": 0.8090909090909091, "grad_norm": 6.748971939086914, "learning_rate": 8.898459338169296e-07, "loss": 0.3027, "step": 17444 }, { "epoch": 0.8091372912801484, "grad_norm": 5.085690498352051, "learning_rate": 8.894269008532292e-07, "loss": 0.3016, "step": 17445 }, { "epoch": 0.8091836734693878, "grad_norm": 7.130293369293213, "learning_rate": 8.890079569429277e-07, "loss": 0.2675, "step": 17446 }, { "epoch": 0.809230055658627, "grad_norm": 6.398576736450195, "learning_rate": 8.885891020950993e-07, "loss": 0.3245, "step": 17447 }, { "epoch": 0.8092764378478664, "grad_norm": 4.789830684661865, "learning_rate": 8.881703363188199e-07, "loss": 0.3176, "step": 17448 }, { "epoch": 0.8093228200371058, "grad_norm": 6.906800746917725, "learning_rate": 8.877516596231612e-07, "loss": 0.3259, "step": 17449 }, { "epoch": 0.8093692022263451, "grad_norm": 14.758316993713379, "learning_rate": 8.873330720171936e-07, "loss": 0.5938, "step": 17450 }, { "epoch": 0.8094155844155844, "grad_norm": 8.179056167602539, "learning_rate": 8.86914573509986e-07, "loss": 0.4081, "step": 17451 }, { "epoch": 0.8094619666048237, "grad_norm": 8.883711814880371, "learning_rate": 8.864961641106063e-07, "loss": 0.2831, "step": 17452 }, { "epoch": 0.8095083487940631, "grad_norm": 13.183582305908203, "learning_rate": 8.860778438281159e-07, "loss": 0.399, "step": 17453 }, { "epoch": 0.8095547309833024, "grad_norm": 7.090452194213867, "learning_rate": 8.856596126715788e-07, "loss": 0.3578, "step": 17454 }, { "epoch": 0.8096011131725418, "grad_norm": 4.103941917419434, "learning_rate": 8.852414706500567e-07, "loss": 0.2198, "step": 17455 }, { "epoch": 0.809647495361781, "grad_norm": 8.11359977722168, "learning_rate": 8.848234177726079e-07, "loss": 0.3863, "step": 17456 }, { "epoch": 0.8096938775510204, "grad_norm": 11.261547088623047, "learning_rate": 8.844054540482899e-07, "loss": 0.4047, "step": 17457 }, { "epoch": 0.8097402597402598, "grad_norm": 8.769006729125977, "learning_rate": 8.839875794861563e-07, "loss": 0.3059, "step": 17458 }, { "epoch": 0.8097866419294991, "grad_norm": 16.619237899780273, "learning_rate": 8.835697940952603e-07, "loss": 0.5013, "step": 17459 }, { "epoch": 0.8098330241187384, "grad_norm": 5.811287879943848, "learning_rate": 8.831520978846542e-07, "loss": 0.3694, "step": 17460 }, { "epoch": 0.8098794063079777, "grad_norm": 7.101649761199951, "learning_rate": 8.827344908633856e-07, "loss": 0.2862, "step": 17461 }, { "epoch": 0.8099257884972171, "grad_norm": 6.463649749755859, "learning_rate": 8.823169730405035e-07, "loss": 0.3436, "step": 17462 }, { "epoch": 0.8099721706864564, "grad_norm": 12.371332168579102, "learning_rate": 8.818995444250528e-07, "loss": 0.2483, "step": 17463 }, { "epoch": 0.8100185528756957, "grad_norm": 8.855049133300781, "learning_rate": 8.814822050260758e-07, "loss": 0.3422, "step": 17464 }, { "epoch": 0.810064935064935, "grad_norm": 4.02310848236084, "learning_rate": 8.81064954852614e-07, "loss": 0.1879, "step": 17465 }, { "epoch": 0.8101113172541744, "grad_norm": 5.797159194946289, "learning_rate": 8.806477939137081e-07, "loss": 0.2393, "step": 17466 }, { "epoch": 0.8101576994434138, "grad_norm": 5.730146884918213, "learning_rate": 8.802307222183942e-07, "loss": 0.3188, "step": 17467 }, { "epoch": 0.810204081632653, "grad_norm": 6.494020938873291, "learning_rate": 8.798137397757107e-07, "loss": 0.3555, "step": 17468 }, { "epoch": 0.8102504638218924, "grad_norm": 7.831191062927246, "learning_rate": 8.793968465946878e-07, "loss": 0.3773, "step": 17469 }, { "epoch": 0.8102968460111317, "grad_norm": 5.593530654907227, "learning_rate": 8.789800426843587e-07, "loss": 0.2956, "step": 17470 }, { "epoch": 0.8103432282003711, "grad_norm": 6.282345771789551, "learning_rate": 8.785633280537537e-07, "loss": 0.2759, "step": 17471 }, { "epoch": 0.8103896103896104, "grad_norm": 11.209712028503418, "learning_rate": 8.781467027119001e-07, "loss": 0.3667, "step": 17472 }, { "epoch": 0.8104359925788497, "grad_norm": 5.302668571472168, "learning_rate": 8.777301666678245e-07, "loss": 0.2891, "step": 17473 }, { "epoch": 0.810482374768089, "grad_norm": 7.4429931640625, "learning_rate": 8.773137199305515e-07, "loss": 0.2702, "step": 17474 }, { "epoch": 0.8105287569573284, "grad_norm": 11.912616729736328, "learning_rate": 8.76897362509101e-07, "loss": 0.4134, "step": 17475 }, { "epoch": 0.8105751391465678, "grad_norm": 8.832561492919922, "learning_rate": 8.764810944124946e-07, "loss": 0.2262, "step": 17476 }, { "epoch": 0.810621521335807, "grad_norm": 6.338589191436768, "learning_rate": 8.760649156497503e-07, "loss": 0.2672, "step": 17477 }, { "epoch": 0.8106679035250464, "grad_norm": 5.824700355529785, "learning_rate": 8.75648826229884e-07, "loss": 0.3408, "step": 17478 }, { "epoch": 0.8107142857142857, "grad_norm": 8.406098365783691, "learning_rate": 8.752328261619125e-07, "loss": 0.331, "step": 17479 }, { "epoch": 0.8107606679035251, "grad_norm": 4.870240688323975, "learning_rate": 8.748169154548448e-07, "loss": 0.2696, "step": 17480 }, { "epoch": 0.8108070500927643, "grad_norm": 7.971610069274902, "learning_rate": 8.744010941176923e-07, "loss": 0.4125, "step": 17481 }, { "epoch": 0.8108534322820037, "grad_norm": 5.074734687805176, "learning_rate": 8.739853621594646e-07, "loss": 0.2524, "step": 17482 }, { "epoch": 0.810899814471243, "grad_norm": 6.609487533569336, "learning_rate": 8.735697195891674e-07, "loss": 0.3734, "step": 17483 }, { "epoch": 0.8109461966604824, "grad_norm": 6.096741676330566, "learning_rate": 8.731541664158061e-07, "loss": 0.2439, "step": 17484 }, { "epoch": 0.8109925788497218, "grad_norm": 5.7035908699035645, "learning_rate": 8.727387026483842e-07, "loss": 0.3227, "step": 17485 }, { "epoch": 0.811038961038961, "grad_norm": 12.350686073303223, "learning_rate": 8.723233282959004e-07, "loss": 0.3686, "step": 17486 }, { "epoch": 0.8110853432282004, "grad_norm": 6.675765037536621, "learning_rate": 8.719080433673544e-07, "loss": 0.2916, "step": 17487 }, { "epoch": 0.8111317254174397, "grad_norm": 5.770143032073975, "learning_rate": 8.714928478717433e-07, "loss": 0.2811, "step": 17488 }, { "epoch": 0.8111781076066791, "grad_norm": 8.311237335205078, "learning_rate": 8.710777418180616e-07, "loss": 0.3336, "step": 17489 }, { "epoch": 0.8112244897959183, "grad_norm": 9.48049545288086, "learning_rate": 8.70662725215305e-07, "loss": 0.379, "step": 17490 }, { "epoch": 0.8112708719851577, "grad_norm": 12.37917709350586, "learning_rate": 8.702477980724605e-07, "loss": 0.3631, "step": 17491 }, { "epoch": 0.811317254174397, "grad_norm": 6.276059627532959, "learning_rate": 8.698329603985195e-07, "loss": 0.2962, "step": 17492 }, { "epoch": 0.8113636363636364, "grad_norm": 11.19497299194336, "learning_rate": 8.694182122024691e-07, "loss": 0.4174, "step": 17493 }, { "epoch": 0.8114100185528756, "grad_norm": 6.947111129760742, "learning_rate": 8.690035534932939e-07, "loss": 0.2907, "step": 17494 }, { "epoch": 0.811456400742115, "grad_norm": 11.387290000915527, "learning_rate": 8.685889842799783e-07, "loss": 0.3543, "step": 17495 }, { "epoch": 0.8115027829313544, "grad_norm": 4.884113311767578, "learning_rate": 8.681745045715045e-07, "loss": 0.3593, "step": 17496 }, { "epoch": 0.8115491651205937, "grad_norm": 5.072912693023682, "learning_rate": 8.6776011437685e-07, "loss": 0.2499, "step": 17497 }, { "epoch": 0.8115955473098331, "grad_norm": 11.513765335083008, "learning_rate": 8.673458137049923e-07, "loss": 0.2915, "step": 17498 }, { "epoch": 0.8116419294990723, "grad_norm": 7.55298376083374, "learning_rate": 8.669316025649083e-07, "loss": 0.2644, "step": 17499 }, { "epoch": 0.8116883116883117, "grad_norm": 15.847319602966309, "learning_rate": 8.665174809655707e-07, "loss": 0.4347, "step": 17500 }, { "epoch": 0.811734693877551, "grad_norm": 5.208160877227783, "learning_rate": 8.661034489159531e-07, "loss": 0.306, "step": 17501 }, { "epoch": 0.8117810760667904, "grad_norm": 7.566695690155029, "learning_rate": 8.656895064250231e-07, "loss": 0.3196, "step": 17502 }, { "epoch": 0.8118274582560296, "grad_norm": 6.644008636474609, "learning_rate": 8.652756535017487e-07, "loss": 0.3214, "step": 17503 }, { "epoch": 0.811873840445269, "grad_norm": 5.682129859924316, "learning_rate": 8.648618901550965e-07, "loss": 0.2389, "step": 17504 }, { "epoch": 0.8119202226345084, "grad_norm": 6.892910480499268, "learning_rate": 8.64448216394031e-07, "loss": 0.2356, "step": 17505 }, { "epoch": 0.8119666048237477, "grad_norm": 4.78298807144165, "learning_rate": 8.640346322275128e-07, "loss": 0.2715, "step": 17506 }, { "epoch": 0.812012987012987, "grad_norm": 7.903651237487793, "learning_rate": 8.636211376645043e-07, "loss": 0.3216, "step": 17507 }, { "epoch": 0.8120593692022263, "grad_norm": 9.485831260681152, "learning_rate": 8.632077327139616e-07, "loss": 0.4402, "step": 17508 }, { "epoch": 0.8121057513914657, "grad_norm": 9.730545043945312, "learning_rate": 8.627944173848407e-07, "loss": 0.3803, "step": 17509 }, { "epoch": 0.812152133580705, "grad_norm": 6.672234058380127, "learning_rate": 8.623811916860963e-07, "loss": 0.282, "step": 17510 }, { "epoch": 0.8121985157699444, "grad_norm": 4.6850714683532715, "learning_rate": 8.619680556266818e-07, "loss": 0.2527, "step": 17511 }, { "epoch": 0.8122448979591836, "grad_norm": 10.184040069580078, "learning_rate": 8.615550092155478e-07, "loss": 0.4002, "step": 17512 }, { "epoch": 0.812291280148423, "grad_norm": 5.679396152496338, "learning_rate": 8.611420524616404e-07, "loss": 0.2828, "step": 17513 }, { "epoch": 0.8123376623376624, "grad_norm": 5.27032470703125, "learning_rate": 8.607291853739075e-07, "loss": 0.2017, "step": 17514 }, { "epoch": 0.8123840445269017, "grad_norm": 4.199392318725586, "learning_rate": 8.60316407961293e-07, "loss": 0.249, "step": 17515 }, { "epoch": 0.812430426716141, "grad_norm": 5.355130195617676, "learning_rate": 8.599037202327409e-07, "loss": 0.2394, "step": 17516 }, { "epoch": 0.8124768089053803, "grad_norm": 7.774855613708496, "learning_rate": 8.594911221971919e-07, "loss": 0.3668, "step": 17517 }, { "epoch": 0.8125231910946197, "grad_norm": 3.797841787338257, "learning_rate": 8.590786138635826e-07, "loss": 0.2888, "step": 17518 }, { "epoch": 0.812569573283859, "grad_norm": 4.274763107299805, "learning_rate": 8.58666195240851e-07, "loss": 0.2973, "step": 17519 }, { "epoch": 0.8126159554730983, "grad_norm": 7.880095481872559, "learning_rate": 8.58253866337932e-07, "loss": 0.3127, "step": 17520 }, { "epoch": 0.8126623376623376, "grad_norm": 9.919681549072266, "learning_rate": 8.578416271637586e-07, "loss": 0.3614, "step": 17521 }, { "epoch": 0.812708719851577, "grad_norm": 8.72498893737793, "learning_rate": 8.574294777272612e-07, "loss": 0.3269, "step": 17522 }, { "epoch": 0.8127551020408164, "grad_norm": 9.683272361755371, "learning_rate": 8.570174180373702e-07, "loss": 0.327, "step": 17523 }, { "epoch": 0.8128014842300556, "grad_norm": 9.373458862304688, "learning_rate": 8.566054481030111e-07, "loss": 0.4547, "step": 17524 }, { "epoch": 0.812847866419295, "grad_norm": 7.214217662811279, "learning_rate": 8.561935679331096e-07, "loss": 0.3952, "step": 17525 }, { "epoch": 0.8128942486085343, "grad_norm": 4.1779351234436035, "learning_rate": 8.557817775365884e-07, "loss": 0.2945, "step": 17526 }, { "epoch": 0.8129406307977737, "grad_norm": 6.097902297973633, "learning_rate": 8.553700769223694e-07, "loss": 0.3072, "step": 17527 }, { "epoch": 0.812987012987013, "grad_norm": 7.3001933097839355, "learning_rate": 8.549584660993726e-07, "loss": 0.3277, "step": 17528 }, { "epoch": 0.8130333951762523, "grad_norm": 7.796271800994873, "learning_rate": 8.545469450765132e-07, "loss": 0.4711, "step": 17529 }, { "epoch": 0.8130797773654916, "grad_norm": 5.382660388946533, "learning_rate": 8.541355138627078e-07, "loss": 0.275, "step": 17530 }, { "epoch": 0.813126159554731, "grad_norm": 4.890957832336426, "learning_rate": 8.537241724668693e-07, "loss": 0.3376, "step": 17531 }, { "epoch": 0.8131725417439704, "grad_norm": 15.452507972717285, "learning_rate": 8.533129208979102e-07, "loss": 0.3307, "step": 17532 }, { "epoch": 0.8132189239332096, "grad_norm": 5.817788600921631, "learning_rate": 8.52901759164741e-07, "loss": 0.3334, "step": 17533 }, { "epoch": 0.813265306122449, "grad_norm": 7.8210768699646, "learning_rate": 8.524906872762661e-07, "loss": 0.3016, "step": 17534 }, { "epoch": 0.8133116883116883, "grad_norm": 6.393888473510742, "learning_rate": 8.520797052413932e-07, "loss": 0.2807, "step": 17535 }, { "epoch": 0.8133580705009277, "grad_norm": 15.023090362548828, "learning_rate": 8.516688130690253e-07, "loss": 0.3326, "step": 17536 }, { "epoch": 0.8134044526901669, "grad_norm": 10.391886711120605, "learning_rate": 8.512580107680645e-07, "loss": 0.3003, "step": 17537 }, { "epoch": 0.8134508348794063, "grad_norm": 9.855489730834961, "learning_rate": 8.508472983474125e-07, "loss": 0.3396, "step": 17538 }, { "epoch": 0.8134972170686456, "grad_norm": 6.959385871887207, "learning_rate": 8.504366758159638e-07, "loss": 0.2213, "step": 17539 }, { "epoch": 0.813543599257885, "grad_norm": 11.14720344543457, "learning_rate": 8.500261431826156e-07, "loss": 0.3753, "step": 17540 }, { "epoch": 0.8135899814471244, "grad_norm": 6.501521110534668, "learning_rate": 8.496157004562627e-07, "loss": 0.2874, "step": 17541 }, { "epoch": 0.8136363636363636, "grad_norm": 6.090621471405029, "learning_rate": 8.492053476457962e-07, "loss": 0.3749, "step": 17542 }, { "epoch": 0.813682745825603, "grad_norm": 7.609965801239014, "learning_rate": 8.487950847601073e-07, "loss": 0.3657, "step": 17543 }, { "epoch": 0.8137291280148423, "grad_norm": 13.186209678649902, "learning_rate": 8.483849118080828e-07, "loss": 0.4605, "step": 17544 }, { "epoch": 0.8137755102040817, "grad_norm": 6.018280506134033, "learning_rate": 8.479748287986095e-07, "loss": 0.319, "step": 17545 }, { "epoch": 0.8138218923933209, "grad_norm": 7.439058780670166, "learning_rate": 8.475648357405708e-07, "loss": 0.3288, "step": 17546 }, { "epoch": 0.8138682745825603, "grad_norm": 4.541872978210449, "learning_rate": 8.471549326428502e-07, "loss": 0.2863, "step": 17547 }, { "epoch": 0.8139146567717996, "grad_norm": 27.826751708984375, "learning_rate": 8.46745119514329e-07, "loss": 0.5787, "step": 17548 }, { "epoch": 0.813961038961039, "grad_norm": 9.10585880279541, "learning_rate": 8.463353963638832e-07, "loss": 0.2991, "step": 17549 }, { "epoch": 0.8140074211502782, "grad_norm": 11.999102592468262, "learning_rate": 8.459257632003898e-07, "loss": 0.5026, "step": 17550 }, { "epoch": 0.8140538033395176, "grad_norm": 5.474348068237305, "learning_rate": 8.455162200327233e-07, "loss": 0.3181, "step": 17551 }, { "epoch": 0.814100185528757, "grad_norm": 6.0045976638793945, "learning_rate": 8.451067668697571e-07, "loss": 0.3327, "step": 17552 }, { "epoch": 0.8141465677179963, "grad_norm": 5.402024745941162, "learning_rate": 8.446974037203626e-07, "loss": 0.3207, "step": 17553 }, { "epoch": 0.8141929499072357, "grad_norm": 8.562943458557129, "learning_rate": 8.442881305934059e-07, "loss": 0.3881, "step": 17554 }, { "epoch": 0.8142393320964749, "grad_norm": 6.653125286102295, "learning_rate": 8.438789474977549e-07, "loss": 0.2315, "step": 17555 }, { "epoch": 0.8142857142857143, "grad_norm": 6.1934943199157715, "learning_rate": 8.434698544422742e-07, "loss": 0.2737, "step": 17556 }, { "epoch": 0.8143320964749536, "grad_norm": 6.299509525299072, "learning_rate": 8.430608514358268e-07, "loss": 0.3409, "step": 17557 }, { "epoch": 0.814378478664193, "grad_norm": 5.783443450927734, "learning_rate": 8.426519384872733e-07, "loss": 0.3079, "step": 17558 }, { "epoch": 0.8144248608534322, "grad_norm": 15.471786499023438, "learning_rate": 8.422431156054745e-07, "loss": 0.3988, "step": 17559 }, { "epoch": 0.8144712430426716, "grad_norm": 8.33517837524414, "learning_rate": 8.418343827992842e-07, "loss": 0.4054, "step": 17560 }, { "epoch": 0.814517625231911, "grad_norm": 5.855793476104736, "learning_rate": 8.414257400775589e-07, "loss": 0.2727, "step": 17561 }, { "epoch": 0.8145640074211503, "grad_norm": 9.443668365478516, "learning_rate": 8.410171874491512e-07, "loss": 0.3673, "step": 17562 }, { "epoch": 0.8146103896103896, "grad_norm": 7.4972662925720215, "learning_rate": 8.406087249229127e-07, "loss": 0.3536, "step": 17563 }, { "epoch": 0.8146567717996289, "grad_norm": 12.37753963470459, "learning_rate": 8.402003525076935e-07, "loss": 0.3149, "step": 17564 }, { "epoch": 0.8147031539888683, "grad_norm": 10.49893569946289, "learning_rate": 8.397920702123386e-07, "loss": 0.2341, "step": 17565 }, { "epoch": 0.8147495361781076, "grad_norm": 8.100067138671875, "learning_rate": 8.393838780456936e-07, "loss": 0.2837, "step": 17566 }, { "epoch": 0.814795918367347, "grad_norm": 6.569957733154297, "learning_rate": 8.389757760166034e-07, "loss": 0.2935, "step": 17567 }, { "epoch": 0.8148423005565862, "grad_norm": 6.585439205169678, "learning_rate": 8.385677641339074e-07, "loss": 0.2719, "step": 17568 }, { "epoch": 0.8148886827458256, "grad_norm": 5.567282676696777, "learning_rate": 8.381598424064475e-07, "loss": 0.3428, "step": 17569 }, { "epoch": 0.814935064935065, "grad_norm": 7.5028910636901855, "learning_rate": 8.377520108430582e-07, "loss": 0.375, "step": 17570 }, { "epoch": 0.8149814471243043, "grad_norm": 5.3412675857543945, "learning_rate": 8.373442694525762e-07, "loss": 0.3623, "step": 17571 }, { "epoch": 0.8150278293135436, "grad_norm": 6.65432071685791, "learning_rate": 8.369366182438349e-07, "loss": 0.4282, "step": 17572 }, { "epoch": 0.8150742115027829, "grad_norm": 9.476006507873535, "learning_rate": 8.365290572256662e-07, "loss": 0.3493, "step": 17573 }, { "epoch": 0.8151205936920223, "grad_norm": 5.7012410163879395, "learning_rate": 8.361215864069005e-07, "loss": 0.3046, "step": 17574 }, { "epoch": 0.8151669758812616, "grad_norm": 8.359672546386719, "learning_rate": 8.357142057963635e-07, "loss": 0.397, "step": 17575 }, { "epoch": 0.8152133580705009, "grad_norm": 6.4655561447143555, "learning_rate": 8.353069154028814e-07, "loss": 0.2667, "step": 17576 }, { "epoch": 0.8152597402597402, "grad_norm": 7.390553951263428, "learning_rate": 8.348997152352784e-07, "loss": 0.3065, "step": 17577 }, { "epoch": 0.8153061224489796, "grad_norm": 6.311489105224609, "learning_rate": 8.344926053023767e-07, "loss": 0.3649, "step": 17578 }, { "epoch": 0.815352504638219, "grad_norm": 7.828622341156006, "learning_rate": 8.340855856129959e-07, "loss": 0.3607, "step": 17579 }, { "epoch": 0.8153988868274582, "grad_norm": 7.051769733428955, "learning_rate": 8.33678656175953e-07, "loss": 0.3056, "step": 17580 }, { "epoch": 0.8154452690166976, "grad_norm": 8.012967109680176, "learning_rate": 8.332718170000648e-07, "loss": 0.3336, "step": 17581 }, { "epoch": 0.8154916512059369, "grad_norm": 16.00693130493164, "learning_rate": 8.328650680941447e-07, "loss": 0.4111, "step": 17582 }, { "epoch": 0.8155380333951763, "grad_norm": 6.240589141845703, "learning_rate": 8.324584094670046e-07, "loss": 0.2835, "step": 17583 }, { "epoch": 0.8155844155844156, "grad_norm": 4.7202229499816895, "learning_rate": 8.320518411274564e-07, "loss": 0.3064, "step": 17584 }, { "epoch": 0.8156307977736549, "grad_norm": 7.897970199584961, "learning_rate": 8.316453630843057e-07, "loss": 0.2792, "step": 17585 }, { "epoch": 0.8156771799628942, "grad_norm": 8.51384162902832, "learning_rate": 8.31238975346359e-07, "loss": 0.3764, "step": 17586 }, { "epoch": 0.8157235621521336, "grad_norm": 22.04192352294922, "learning_rate": 8.308326779224218e-07, "loss": 0.3674, "step": 17587 }, { "epoch": 0.815769944341373, "grad_norm": 4.515523433685303, "learning_rate": 8.304264708212955e-07, "loss": 0.2846, "step": 17588 }, { "epoch": 0.8158163265306122, "grad_norm": 9.01017951965332, "learning_rate": 8.300203540517815e-07, "loss": 0.405, "step": 17589 }, { "epoch": 0.8158627087198516, "grad_norm": 4.976329803466797, "learning_rate": 8.296143276226759e-07, "loss": 0.2981, "step": 17590 }, { "epoch": 0.8159090909090909, "grad_norm": 5.3595476150512695, "learning_rate": 8.292083915427763e-07, "loss": 0.2588, "step": 17591 }, { "epoch": 0.8159554730983303, "grad_norm": 6.7245588302612305, "learning_rate": 8.28802545820877e-07, "loss": 0.3242, "step": 17592 }, { "epoch": 0.8160018552875695, "grad_norm": 9.619362831115723, "learning_rate": 8.283967904657703e-07, "loss": 0.2158, "step": 17593 }, { "epoch": 0.8160482374768089, "grad_norm": 5.070460319519043, "learning_rate": 8.279911254862483e-07, "loss": 0.302, "step": 17594 }, { "epoch": 0.8160946196660482, "grad_norm": 7.368754863739014, "learning_rate": 8.275855508910968e-07, "loss": 0.3979, "step": 17595 }, { "epoch": 0.8161410018552876, "grad_norm": 10.402363777160645, "learning_rate": 8.271800666891039e-07, "loss": 0.4486, "step": 17596 }, { "epoch": 0.816187384044527, "grad_norm": 6.534378528594971, "learning_rate": 8.267746728890535e-07, "loss": 0.3555, "step": 17597 }, { "epoch": 0.8162337662337662, "grad_norm": 5.9013166427612305, "learning_rate": 8.263693694997288e-07, "loss": 0.245, "step": 17598 }, { "epoch": 0.8162801484230056, "grad_norm": 8.71981430053711, "learning_rate": 8.259641565299104e-07, "loss": 0.426, "step": 17599 }, { "epoch": 0.8163265306122449, "grad_norm": 6.529290676116943, "learning_rate": 8.255590339883779e-07, "loss": 0.2894, "step": 17600 }, { "epoch": 0.8163729128014843, "grad_norm": 4.7113518714904785, "learning_rate": 8.25154001883906e-07, "loss": 0.3311, "step": 17601 }, { "epoch": 0.8164192949907235, "grad_norm": 5.332980155944824, "learning_rate": 8.247490602252712e-07, "loss": 0.2879, "step": 17602 }, { "epoch": 0.8164656771799629, "grad_norm": 8.978302001953125, "learning_rate": 8.243442090212455e-07, "loss": 0.3556, "step": 17603 }, { "epoch": 0.8165120593692022, "grad_norm": 6.288593769073486, "learning_rate": 8.239394482805996e-07, "loss": 0.3749, "step": 17604 }, { "epoch": 0.8165584415584416, "grad_norm": 6.533034801483154, "learning_rate": 8.23534778012105e-07, "loss": 0.3497, "step": 17605 }, { "epoch": 0.8166048237476808, "grad_norm": 32.79776382446289, "learning_rate": 8.231301982245249e-07, "loss": 0.336, "step": 17606 }, { "epoch": 0.8166512059369202, "grad_norm": 6.721001625061035, "learning_rate": 8.227257089266255e-07, "loss": 0.3229, "step": 17607 }, { "epoch": 0.8166975881261596, "grad_norm": 12.14633560180664, "learning_rate": 8.223213101271709e-07, "loss": 0.3203, "step": 17608 }, { "epoch": 0.8167439703153989, "grad_norm": 6.092187881469727, "learning_rate": 8.219170018349215e-07, "loss": 0.2535, "step": 17609 }, { "epoch": 0.8167903525046383, "grad_norm": 12.918785095214844, "learning_rate": 8.215127840586379e-07, "loss": 0.4451, "step": 17610 }, { "epoch": 0.8168367346938775, "grad_norm": 6.47118616104126, "learning_rate": 8.211086568070747e-07, "loss": 0.384, "step": 17611 }, { "epoch": 0.8168831168831169, "grad_norm": 11.680197715759277, "learning_rate": 8.207046200889879e-07, "loss": 0.4031, "step": 17612 }, { "epoch": 0.8169294990723562, "grad_norm": 13.553400039672852, "learning_rate": 8.203006739131314e-07, "loss": 0.4245, "step": 17613 }, { "epoch": 0.8169758812615956, "grad_norm": 7.821572780609131, "learning_rate": 8.198968182882566e-07, "loss": 0.2945, "step": 17614 }, { "epoch": 0.8170222634508348, "grad_norm": 8.2980318069458, "learning_rate": 8.194930532231121e-07, "loss": 0.4271, "step": 17615 }, { "epoch": 0.8170686456400742, "grad_norm": 5.8721489906311035, "learning_rate": 8.19089378726447e-07, "loss": 0.3552, "step": 17616 }, { "epoch": 0.8171150278293136, "grad_norm": 13.463052749633789, "learning_rate": 8.186857948070037e-07, "loss": 0.4045, "step": 17617 }, { "epoch": 0.8171614100185529, "grad_norm": 6.20115852355957, "learning_rate": 8.182823014735274e-07, "loss": 0.3095, "step": 17618 }, { "epoch": 0.8172077922077922, "grad_norm": 11.05457592010498, "learning_rate": 8.178788987347597e-07, "loss": 0.3908, "step": 17619 }, { "epoch": 0.8172541743970315, "grad_norm": 4.875563144683838, "learning_rate": 8.174755865994399e-07, "loss": 0.3263, "step": 17620 }, { "epoch": 0.8173005565862709, "grad_norm": 8.794968605041504, "learning_rate": 8.170723650763062e-07, "loss": 0.3483, "step": 17621 }, { "epoch": 0.8173469387755102, "grad_norm": 6.4114227294921875, "learning_rate": 8.166692341740923e-07, "loss": 0.3294, "step": 17622 }, { "epoch": 0.8173933209647495, "grad_norm": 7.380402565002441, "learning_rate": 8.162661939015332e-07, "loss": 0.2751, "step": 17623 }, { "epoch": 0.8174397031539888, "grad_norm": 7.543894290924072, "learning_rate": 8.158632442673603e-07, "loss": 0.3642, "step": 17624 }, { "epoch": 0.8174860853432282, "grad_norm": 8.341096878051758, "learning_rate": 8.154603852803034e-07, "loss": 0.2935, "step": 17625 }, { "epoch": 0.8175324675324676, "grad_norm": 8.361140251159668, "learning_rate": 8.150576169490898e-07, "loss": 0.2796, "step": 17626 }, { "epoch": 0.8175788497217069, "grad_norm": 9.93224048614502, "learning_rate": 8.146549392824471e-07, "loss": 0.3613, "step": 17627 }, { "epoch": 0.8176252319109462, "grad_norm": 10.185015678405762, "learning_rate": 8.142523522890961e-07, "loss": 0.4411, "step": 17628 }, { "epoch": 0.8176716141001855, "grad_norm": 8.982808113098145, "learning_rate": 8.138498559777608e-07, "loss": 0.439, "step": 17629 }, { "epoch": 0.8177179962894249, "grad_norm": 8.420578002929688, "learning_rate": 8.134474503571599e-07, "loss": 0.3193, "step": 17630 }, { "epoch": 0.8177643784786642, "grad_norm": 6.883047580718994, "learning_rate": 8.130451354360114e-07, "loss": 0.235, "step": 17631 }, { "epoch": 0.8178107606679035, "grad_norm": 6.458575248718262, "learning_rate": 8.126429112230333e-07, "loss": 0.3065, "step": 17632 }, { "epoch": 0.8178571428571428, "grad_norm": 6.068408012390137, "learning_rate": 8.12240777726937e-07, "loss": 0.322, "step": 17633 }, { "epoch": 0.8179035250463822, "grad_norm": 10.013967514038086, "learning_rate": 8.11838734956435e-07, "loss": 0.4251, "step": 17634 }, { "epoch": 0.8179499072356216, "grad_norm": 8.588860511779785, "learning_rate": 8.11436782920238e-07, "loss": 0.4456, "step": 17635 }, { "epoch": 0.8179962894248608, "grad_norm": 8.50987434387207, "learning_rate": 8.110349216270541e-07, "loss": 0.3048, "step": 17636 }, { "epoch": 0.8180426716141002, "grad_norm": 9.995835304260254, "learning_rate": 8.106331510855887e-07, "loss": 0.5351, "step": 17637 }, { "epoch": 0.8180890538033395, "grad_norm": 8.729511260986328, "learning_rate": 8.102314713045478e-07, "loss": 0.2836, "step": 17638 }, { "epoch": 0.8181354359925789, "grad_norm": 9.866652488708496, "learning_rate": 8.09829882292631e-07, "loss": 0.4286, "step": 17639 }, { "epoch": 0.8181818181818182, "grad_norm": 8.317923545837402, "learning_rate": 8.094283840585398e-07, "loss": 0.3859, "step": 17640 }, { "epoch": 0.8182282003710575, "grad_norm": 11.114726066589355, "learning_rate": 8.090269766109721e-07, "loss": 0.4468, "step": 17641 }, { "epoch": 0.8182745825602968, "grad_norm": 5.931258201599121, "learning_rate": 8.086256599586245e-07, "loss": 0.2876, "step": 17642 }, { "epoch": 0.8183209647495362, "grad_norm": 10.402873992919922, "learning_rate": 8.08224434110193e-07, "loss": 0.4364, "step": 17643 }, { "epoch": 0.8183673469387756, "grad_norm": 7.471714496612549, "learning_rate": 8.078232990743668e-07, "loss": 0.3704, "step": 17644 }, { "epoch": 0.8184137291280148, "grad_norm": 8.411039352416992, "learning_rate": 8.07422254859837e-07, "loss": 0.3225, "step": 17645 }, { "epoch": 0.8184601113172542, "grad_norm": 7.013874530792236, "learning_rate": 8.070213014752931e-07, "loss": 0.2817, "step": 17646 }, { "epoch": 0.8185064935064935, "grad_norm": 6.163195610046387, "learning_rate": 8.066204389294214e-07, "loss": 0.3843, "step": 17647 }, { "epoch": 0.8185528756957329, "grad_norm": 4.625142574310303, "learning_rate": 8.062196672309058e-07, "loss": 0.2469, "step": 17648 }, { "epoch": 0.8185992578849721, "grad_norm": 7.94792366027832, "learning_rate": 8.058189863884308e-07, "loss": 0.3121, "step": 17649 }, { "epoch": 0.8186456400742115, "grad_norm": 10.298206329345703, "learning_rate": 8.054183964106737e-07, "loss": 0.3639, "step": 17650 }, { "epoch": 0.8186920222634508, "grad_norm": 8.401200294494629, "learning_rate": 8.050178973063144e-07, "loss": 0.3473, "step": 17651 }, { "epoch": 0.8187384044526902, "grad_norm": 7.226358890533447, "learning_rate": 8.046174890840302e-07, "loss": 0.307, "step": 17652 }, { "epoch": 0.8187847866419296, "grad_norm": 7.635320663452148, "learning_rate": 8.04217171752495e-07, "loss": 0.3058, "step": 17653 }, { "epoch": 0.8188311688311688, "grad_norm": 5.725573539733887, "learning_rate": 8.038169453203831e-07, "loss": 0.2594, "step": 17654 }, { "epoch": 0.8188775510204082, "grad_norm": 4.611710071563721, "learning_rate": 8.034168097963624e-07, "loss": 0.3733, "step": 17655 }, { "epoch": 0.8189239332096475, "grad_norm": 8.645358085632324, "learning_rate": 8.03016765189103e-07, "loss": 0.4396, "step": 17656 }, { "epoch": 0.8189703153988869, "grad_norm": 3.79421067237854, "learning_rate": 8.026168115072719e-07, "loss": 0.1891, "step": 17657 }, { "epoch": 0.8190166975881261, "grad_norm": 4.390519618988037, "learning_rate": 8.022169487595338e-07, "loss": 0.3165, "step": 17658 }, { "epoch": 0.8190630797773655, "grad_norm": 10.141322135925293, "learning_rate": 8.018171769545513e-07, "loss": 0.366, "step": 17659 }, { "epoch": 0.8191094619666048, "grad_norm": 5.21924352645874, "learning_rate": 8.014174961009862e-07, "loss": 0.2474, "step": 17660 }, { "epoch": 0.8191558441558442, "grad_norm": 5.093456268310547, "learning_rate": 8.010179062074957e-07, "loss": 0.3097, "step": 17661 }, { "epoch": 0.8192022263450834, "grad_norm": 5.054572582244873, "learning_rate": 8.006184072827372e-07, "loss": 0.3591, "step": 17662 }, { "epoch": 0.8192486085343228, "grad_norm": 10.359734535217285, "learning_rate": 8.002189993353665e-07, "loss": 0.3666, "step": 17663 }, { "epoch": 0.8192949907235622, "grad_norm": 6.661128520965576, "learning_rate": 7.998196823740357e-07, "loss": 0.3846, "step": 17664 }, { "epoch": 0.8193413729128015, "grad_norm": 4.451253414154053, "learning_rate": 7.99420456407397e-07, "loss": 0.262, "step": 17665 }, { "epoch": 0.8193877551020409, "grad_norm": 7.2513427734375, "learning_rate": 7.990213214440978e-07, "loss": 0.2538, "step": 17666 }, { "epoch": 0.8194341372912801, "grad_norm": 10.051239967346191, "learning_rate": 7.98622277492786e-07, "loss": 0.4911, "step": 17667 }, { "epoch": 0.8194805194805195, "grad_norm": 6.875899791717529, "learning_rate": 7.982233245621063e-07, "loss": 0.3317, "step": 17668 }, { "epoch": 0.8195269016697588, "grad_norm": 9.837597846984863, "learning_rate": 7.978244626607023e-07, "loss": 0.3499, "step": 17669 }, { "epoch": 0.8195732838589982, "grad_norm": 9.314714431762695, "learning_rate": 7.974256917972145e-07, "loss": 0.4233, "step": 17670 }, { "epoch": 0.8196196660482374, "grad_norm": 6.990706443786621, "learning_rate": 7.970270119802836e-07, "loss": 0.2772, "step": 17671 }, { "epoch": 0.8196660482374768, "grad_norm": 7.0847554206848145, "learning_rate": 7.966284232185451e-07, "loss": 0.3602, "step": 17672 }, { "epoch": 0.8197124304267162, "grad_norm": 5.862334251403809, "learning_rate": 7.96229925520634e-07, "loss": 0.2952, "step": 17673 }, { "epoch": 0.8197588126159555, "grad_norm": 7.875053405761719, "learning_rate": 7.958315188951848e-07, "loss": 0.3071, "step": 17674 }, { "epoch": 0.8198051948051948, "grad_norm": 7.387192726135254, "learning_rate": 7.954332033508283e-07, "loss": 0.3371, "step": 17675 }, { "epoch": 0.8198515769944341, "grad_norm": 7.886044025421143, "learning_rate": 7.950349788961948e-07, "loss": 0.3622, "step": 17676 }, { "epoch": 0.8198979591836735, "grad_norm": 4.041008472442627, "learning_rate": 7.946368455399095e-07, "loss": 0.279, "step": 17677 }, { "epoch": 0.8199443413729128, "grad_norm": 4.394217014312744, "learning_rate": 7.94238803290599e-07, "loss": 0.2918, "step": 17678 }, { "epoch": 0.8199907235621521, "grad_norm": 12.511848449707031, "learning_rate": 7.938408521568864e-07, "loss": 0.4495, "step": 17679 }, { "epoch": 0.8200371057513914, "grad_norm": 5.104051113128662, "learning_rate": 7.934429921473929e-07, "loss": 0.3002, "step": 17680 }, { "epoch": 0.8200834879406308, "grad_norm": 7.068357944488525, "learning_rate": 7.930452232707386e-07, "loss": 0.2679, "step": 17681 }, { "epoch": 0.8201298701298702, "grad_norm": 6.876693248748779, "learning_rate": 7.926475455355415e-07, "loss": 0.3757, "step": 17682 }, { "epoch": 0.8201762523191095, "grad_norm": 11.757162094116211, "learning_rate": 7.922499589504151e-07, "loss": 0.2984, "step": 17683 }, { "epoch": 0.8202226345083488, "grad_norm": 7.916018009185791, "learning_rate": 7.918524635239739e-07, "loss": 0.3339, "step": 17684 }, { "epoch": 0.8202690166975881, "grad_norm": 4.1764750480651855, "learning_rate": 7.914550592648296e-07, "loss": 0.2818, "step": 17685 }, { "epoch": 0.8203153988868275, "grad_norm": 5.195005893707275, "learning_rate": 7.910577461815915e-07, "loss": 0.207, "step": 17686 }, { "epoch": 0.8203617810760668, "grad_norm": 5.412891864776611, "learning_rate": 7.906605242828691e-07, "loss": 0.2349, "step": 17687 }, { "epoch": 0.8204081632653061, "grad_norm": 6.8983001708984375, "learning_rate": 7.902633935772647e-07, "loss": 0.2973, "step": 17688 }, { "epoch": 0.8204545454545454, "grad_norm": 6.9970855712890625, "learning_rate": 7.898663540733836e-07, "loss": 0.2753, "step": 17689 }, { "epoch": 0.8205009276437848, "grad_norm": 10.315656661987305, "learning_rate": 7.894694057798269e-07, "loss": 0.3214, "step": 17690 }, { "epoch": 0.8205473098330242, "grad_norm": 5.311362266540527, "learning_rate": 7.890725487051953e-07, "loss": 0.3218, "step": 17691 }, { "epoch": 0.8205936920222634, "grad_norm": 7.73381233215332, "learning_rate": 7.886757828580865e-07, "loss": 0.4058, "step": 17692 }, { "epoch": 0.8206400742115028, "grad_norm": 4.762331008911133, "learning_rate": 7.882791082470947e-07, "loss": 0.252, "step": 17693 }, { "epoch": 0.8206864564007421, "grad_norm": 6.613250255584717, "learning_rate": 7.878825248808148e-07, "loss": 0.3258, "step": 17694 }, { "epoch": 0.8207328385899815, "grad_norm": 8.950505256652832, "learning_rate": 7.874860327678379e-07, "loss": 0.2952, "step": 17695 }, { "epoch": 0.8207792207792208, "grad_norm": 13.928476333618164, "learning_rate": 7.870896319167548e-07, "loss": 0.2859, "step": 17696 }, { "epoch": 0.8208256029684601, "grad_norm": 6.418491363525391, "learning_rate": 7.866933223361523e-07, "loss": 0.3389, "step": 17697 }, { "epoch": 0.8208719851576994, "grad_norm": 10.718427658081055, "learning_rate": 7.862971040346179e-07, "loss": 0.3915, "step": 17698 }, { "epoch": 0.8209183673469388, "grad_norm": 6.286076545715332, "learning_rate": 7.859009770207332e-07, "loss": 0.2994, "step": 17699 }, { "epoch": 0.8209647495361782, "grad_norm": 4.518238544464111, "learning_rate": 7.855049413030807e-07, "loss": 0.2468, "step": 17700 }, { "epoch": 0.8210111317254174, "grad_norm": 5.093091011047363, "learning_rate": 7.851089968902414e-07, "loss": 0.3346, "step": 17701 }, { "epoch": 0.8210575139146568, "grad_norm": 11.887401580810547, "learning_rate": 7.847131437907923e-07, "loss": 0.4225, "step": 17702 }, { "epoch": 0.8211038961038961, "grad_norm": 4.998045921325684, "learning_rate": 7.843173820133104e-07, "loss": 0.2688, "step": 17703 }, { "epoch": 0.8211502782931355, "grad_norm": 8.174724578857422, "learning_rate": 7.839217115663683e-07, "loss": 0.3129, "step": 17704 }, { "epoch": 0.8211966604823747, "grad_norm": 7.333927631378174, "learning_rate": 7.83526132458538e-07, "loss": 0.2548, "step": 17705 }, { "epoch": 0.8212430426716141, "grad_norm": 10.234649658203125, "learning_rate": 7.831306446983905e-07, "loss": 0.356, "step": 17706 }, { "epoch": 0.8212894248608534, "grad_norm": 6.019763946533203, "learning_rate": 7.827352482944933e-07, "loss": 0.3999, "step": 17707 }, { "epoch": 0.8213358070500928, "grad_norm": 10.59515380859375, "learning_rate": 7.82339943255413e-07, "loss": 0.3051, "step": 17708 }, { "epoch": 0.8213821892393321, "grad_norm": 12.577461242675781, "learning_rate": 7.819447295897137e-07, "loss": 0.4164, "step": 17709 }, { "epoch": 0.8214285714285714, "grad_norm": 12.658018112182617, "learning_rate": 7.815496073059565e-07, "loss": 0.2757, "step": 17710 }, { "epoch": 0.8214749536178108, "grad_norm": 4.203352451324463, "learning_rate": 7.811545764127016e-07, "loss": 0.2757, "step": 17711 }, { "epoch": 0.8215213358070501, "grad_norm": 4.552214622497559, "learning_rate": 7.807596369185077e-07, "loss": 0.2392, "step": 17712 }, { "epoch": 0.8215677179962895, "grad_norm": 5.385985374450684, "learning_rate": 7.803647888319305e-07, "loss": 0.2787, "step": 17713 }, { "epoch": 0.8216141001855287, "grad_norm": 7.801124572753906, "learning_rate": 7.799700321615261e-07, "loss": 0.3312, "step": 17714 }, { "epoch": 0.8216604823747681, "grad_norm": 4.241288661956787, "learning_rate": 7.79575366915844e-07, "loss": 0.2789, "step": 17715 }, { "epoch": 0.8217068645640074, "grad_norm": 9.750782012939453, "learning_rate": 7.791807931034356e-07, "loss": 0.332, "step": 17716 }, { "epoch": 0.8217532467532468, "grad_norm": 9.224567413330078, "learning_rate": 7.787863107328486e-07, "loss": 0.2692, "step": 17717 }, { "epoch": 0.821799628942486, "grad_norm": 7.013314247131348, "learning_rate": 7.783919198126299e-07, "loss": 0.243, "step": 17718 }, { "epoch": 0.8218460111317254, "grad_norm": 9.25136661529541, "learning_rate": 7.77997620351324e-07, "loss": 0.2885, "step": 17719 }, { "epoch": 0.8218923933209648, "grad_norm": 9.639337539672852, "learning_rate": 7.776034123574738e-07, "loss": 0.2864, "step": 17720 }, { "epoch": 0.8219387755102041, "grad_norm": 8.492086410522461, "learning_rate": 7.772092958396172e-07, "loss": 0.293, "step": 17721 }, { "epoch": 0.8219851576994434, "grad_norm": 11.418362617492676, "learning_rate": 7.768152708062943e-07, "loss": 0.3779, "step": 17722 }, { "epoch": 0.8220315398886827, "grad_norm": 8.273548126220703, "learning_rate": 7.764213372660407e-07, "loss": 0.387, "step": 17723 }, { "epoch": 0.8220779220779221, "grad_norm": 6.101943016052246, "learning_rate": 7.76027495227391e-07, "loss": 0.272, "step": 17724 }, { "epoch": 0.8221243042671614, "grad_norm": 8.90236759185791, "learning_rate": 7.756337446988793e-07, "loss": 0.3856, "step": 17725 }, { "epoch": 0.8221706864564008, "grad_norm": 5.94938325881958, "learning_rate": 7.75240085689033e-07, "loss": 0.3046, "step": 17726 }, { "epoch": 0.82221706864564, "grad_norm": 9.396965026855469, "learning_rate": 7.748465182063819e-07, "loss": 0.3229, "step": 17727 }, { "epoch": 0.8222634508348794, "grad_norm": 7.769534111022949, "learning_rate": 7.744530422594521e-07, "loss": 0.3394, "step": 17728 }, { "epoch": 0.8223098330241188, "grad_norm": 7.6405534744262695, "learning_rate": 7.740596578567689e-07, "loss": 0.3656, "step": 17729 }, { "epoch": 0.8223562152133581, "grad_norm": 10.442855834960938, "learning_rate": 7.736663650068537e-07, "loss": 0.3672, "step": 17730 }, { "epoch": 0.8224025974025974, "grad_norm": 6.387883186340332, "learning_rate": 7.732731637182294e-07, "loss": 0.2864, "step": 17731 }, { "epoch": 0.8224489795918367, "grad_norm": 14.714110374450684, "learning_rate": 7.728800539994113e-07, "loss": 0.4887, "step": 17732 }, { "epoch": 0.8224953617810761, "grad_norm": 8.945558547973633, "learning_rate": 7.72487035858917e-07, "loss": 0.4465, "step": 17733 }, { "epoch": 0.8225417439703154, "grad_norm": 6.837920665740967, "learning_rate": 7.720941093052614e-07, "loss": 0.355, "step": 17734 }, { "epoch": 0.8225881261595547, "grad_norm": 12.334179878234863, "learning_rate": 7.717012743469571e-07, "loss": 0.3171, "step": 17735 }, { "epoch": 0.822634508348794, "grad_norm": 11.786164283752441, "learning_rate": 7.713085309925156e-07, "loss": 0.3608, "step": 17736 }, { "epoch": 0.8226808905380334, "grad_norm": 15.356032371520996, "learning_rate": 7.709158792504434e-07, "loss": 0.4038, "step": 17737 }, { "epoch": 0.8227272727272728, "grad_norm": 5.171529769897461, "learning_rate": 7.705233191292478e-07, "loss": 0.3316, "step": 17738 }, { "epoch": 0.8227736549165121, "grad_norm": 5.328051567077637, "learning_rate": 7.701308506374333e-07, "loss": 0.3042, "step": 17739 }, { "epoch": 0.8228200371057514, "grad_norm": 8.994418144226074, "learning_rate": 7.697384737835034e-07, "loss": 0.3561, "step": 17740 }, { "epoch": 0.8228664192949907, "grad_norm": 3.663208484649658, "learning_rate": 7.693461885759584e-07, "loss": 0.3001, "step": 17741 }, { "epoch": 0.8229128014842301, "grad_norm": 5.1508097648620605, "learning_rate": 7.689539950232977e-07, "loss": 0.3944, "step": 17742 }, { "epoch": 0.8229591836734694, "grad_norm": 6.131763458251953, "learning_rate": 7.685618931340155e-07, "loss": 0.2882, "step": 17743 }, { "epoch": 0.8230055658627087, "grad_norm": 6.669986248016357, "learning_rate": 7.681698829166085e-07, "loss": 0.1975, "step": 17744 }, { "epoch": 0.823051948051948, "grad_norm": 6.415719985961914, "learning_rate": 7.677779643795691e-07, "loss": 0.328, "step": 17745 }, { "epoch": 0.8230983302411874, "grad_norm": 6.184061050415039, "learning_rate": 7.673861375313873e-07, "loss": 0.2882, "step": 17746 }, { "epoch": 0.8231447124304268, "grad_norm": 6.006071090698242, "learning_rate": 7.669944023805536e-07, "loss": 0.2186, "step": 17747 }, { "epoch": 0.823191094619666, "grad_norm": 5.841894626617432, "learning_rate": 7.666027589355529e-07, "loss": 0.3031, "step": 17748 }, { "epoch": 0.8232374768089054, "grad_norm": 9.915569305419922, "learning_rate": 7.662112072048699e-07, "loss": 0.4156, "step": 17749 }, { "epoch": 0.8232838589981447, "grad_norm": 8.864448547363281, "learning_rate": 7.658197471969886e-07, "loss": 0.2918, "step": 17750 }, { "epoch": 0.8233302411873841, "grad_norm": 4.430248737335205, "learning_rate": 7.654283789203887e-07, "loss": 0.2873, "step": 17751 }, { "epoch": 0.8233766233766234, "grad_norm": 8.150652885437012, "learning_rate": 7.650371023835495e-07, "loss": 0.3133, "step": 17752 }, { "epoch": 0.8234230055658627, "grad_norm": 6.137041091918945, "learning_rate": 7.646459175949489e-07, "loss": 0.2645, "step": 17753 }, { "epoch": 0.823469387755102, "grad_norm": 4.848258972167969, "learning_rate": 7.642548245630599e-07, "loss": 0.33, "step": 17754 }, { "epoch": 0.8235157699443414, "grad_norm": 9.362318992614746, "learning_rate": 7.638638232963558e-07, "loss": 0.2841, "step": 17755 }, { "epoch": 0.8235621521335807, "grad_norm": 6.874330520629883, "learning_rate": 7.63472913803307e-07, "loss": 0.3341, "step": 17756 }, { "epoch": 0.82360853432282, "grad_norm": 6.800512313842773, "learning_rate": 7.630820960923835e-07, "loss": 0.2495, "step": 17757 }, { "epoch": 0.8236549165120594, "grad_norm": 5.785367488861084, "learning_rate": 7.626913701720529e-07, "loss": 0.2711, "step": 17758 }, { "epoch": 0.8237012987012987, "grad_norm": 8.785845756530762, "learning_rate": 7.623007360507778e-07, "loss": 0.3258, "step": 17759 }, { "epoch": 0.8237476808905381, "grad_norm": 9.149919509887695, "learning_rate": 7.619101937370216e-07, "loss": 0.3662, "step": 17760 }, { "epoch": 0.8237940630797773, "grad_norm": 14.42922306060791, "learning_rate": 7.615197432392462e-07, "loss": 0.3928, "step": 17761 }, { "epoch": 0.8238404452690167, "grad_norm": 7.39495849609375, "learning_rate": 7.611293845659096e-07, "loss": 0.3614, "step": 17762 }, { "epoch": 0.823886827458256, "grad_norm": 9.697149276733398, "learning_rate": 7.607391177254692e-07, "loss": 0.4071, "step": 17763 }, { "epoch": 0.8239332096474954, "grad_norm": 7.477863311767578, "learning_rate": 7.603489427263811e-07, "loss": 0.268, "step": 17764 }, { "epoch": 0.8239795918367347, "grad_norm": 4.304914474487305, "learning_rate": 7.599588595770957e-07, "loss": 0.2532, "step": 17765 }, { "epoch": 0.824025974025974, "grad_norm": 8.164870262145996, "learning_rate": 7.595688682860652e-07, "loss": 0.3812, "step": 17766 }, { "epoch": 0.8240723562152134, "grad_norm": 7.991227149963379, "learning_rate": 7.59178968861739e-07, "loss": 0.3176, "step": 17767 }, { "epoch": 0.8241187384044527, "grad_norm": 7.206126689910889, "learning_rate": 7.587891613125631e-07, "loss": 0.2523, "step": 17768 }, { "epoch": 0.8241651205936921, "grad_norm": 11.011062622070312, "learning_rate": 7.583994456469845e-07, "loss": 0.4827, "step": 17769 }, { "epoch": 0.8242115027829313, "grad_norm": 5.782947540283203, "learning_rate": 7.580098218734433e-07, "loss": 0.2874, "step": 17770 }, { "epoch": 0.8242578849721707, "grad_norm": 5.302613735198975, "learning_rate": 7.576202900003821e-07, "loss": 0.2816, "step": 17771 }, { "epoch": 0.82430426716141, "grad_norm": 7.62282133102417, "learning_rate": 7.572308500362396e-07, "loss": 0.3288, "step": 17772 }, { "epoch": 0.8243506493506494, "grad_norm": 6.499369144439697, "learning_rate": 7.568415019894532e-07, "loss": 0.3517, "step": 17773 }, { "epoch": 0.8243970315398886, "grad_norm": 9.852099418640137, "learning_rate": 7.564522458684581e-07, "loss": 0.3331, "step": 17774 }, { "epoch": 0.824443413729128, "grad_norm": 4.65464448928833, "learning_rate": 7.56063081681686e-07, "loss": 0.2742, "step": 17775 }, { "epoch": 0.8244897959183674, "grad_norm": 10.482848167419434, "learning_rate": 7.556740094375691e-07, "loss": 0.3602, "step": 17776 }, { "epoch": 0.8245361781076067, "grad_norm": 4.9717841148376465, "learning_rate": 7.552850291445362e-07, "loss": 0.2274, "step": 17777 }, { "epoch": 0.824582560296846, "grad_norm": 4.627678394317627, "learning_rate": 7.54896140811014e-07, "loss": 0.2913, "step": 17778 }, { "epoch": 0.8246289424860853, "grad_norm": 17.82675552368164, "learning_rate": 7.545073444454276e-07, "loss": 0.4736, "step": 17779 }, { "epoch": 0.8246753246753247, "grad_norm": 4.717238426208496, "learning_rate": 7.541186400562018e-07, "loss": 0.2841, "step": 17780 }, { "epoch": 0.824721706864564, "grad_norm": 9.072497367858887, "learning_rate": 7.537300276517551e-07, "loss": 0.2455, "step": 17781 }, { "epoch": 0.8247680890538034, "grad_norm": 11.77412223815918, "learning_rate": 7.533415072405075e-07, "loss": 0.4097, "step": 17782 }, { "epoch": 0.8248144712430426, "grad_norm": 4.694512367248535, "learning_rate": 7.529530788308764e-07, "loss": 0.2301, "step": 17783 }, { "epoch": 0.824860853432282, "grad_norm": 6.296116352081299, "learning_rate": 7.525647424312766e-07, "loss": 0.3019, "step": 17784 }, { "epoch": 0.8249072356215214, "grad_norm": 6.3636369705200195, "learning_rate": 7.521764980501228e-07, "loss": 0.272, "step": 17785 }, { "epoch": 0.8249536178107607, "grad_norm": 7.581340312957764, "learning_rate": 7.517883456958231e-07, "loss": 0.3025, "step": 17786 }, { "epoch": 0.825, "grad_norm": 8.588394165039062, "learning_rate": 7.514002853767887e-07, "loss": 0.3712, "step": 17787 }, { "epoch": 0.8250463821892393, "grad_norm": 7.377432346343994, "learning_rate": 7.510123171014255e-07, "loss": 0.2919, "step": 17788 }, { "epoch": 0.8250927643784787, "grad_norm": 5.973151206970215, "learning_rate": 7.506244408781399e-07, "loss": 0.2726, "step": 17789 }, { "epoch": 0.825139146567718, "grad_norm": 13.317928314208984, "learning_rate": 7.502366567153346e-07, "loss": 0.3712, "step": 17790 }, { "epoch": 0.8251855287569573, "grad_norm": 8.385247230529785, "learning_rate": 7.498489646214113e-07, "loss": 0.2937, "step": 17791 }, { "epoch": 0.8252319109461966, "grad_norm": 7.116456985473633, "learning_rate": 7.494613646047677e-07, "loss": 0.3056, "step": 17792 }, { "epoch": 0.825278293135436, "grad_norm": 6.812318801879883, "learning_rate": 7.490738566738015e-07, "loss": 0.2526, "step": 17793 }, { "epoch": 0.8253246753246753, "grad_norm": 13.863222122192383, "learning_rate": 7.486864408369082e-07, "loss": 0.3756, "step": 17794 }, { "epoch": 0.8253710575139147, "grad_norm": 5.759734153747559, "learning_rate": 7.482991171024806e-07, "loss": 0.2986, "step": 17795 }, { "epoch": 0.825417439703154, "grad_norm": 11.569133758544922, "learning_rate": 7.479118854789114e-07, "loss": 0.3497, "step": 17796 }, { "epoch": 0.8254638218923933, "grad_norm": 7.33140754699707, "learning_rate": 7.475247459745871e-07, "loss": 0.3464, "step": 17797 }, { "epoch": 0.8255102040816327, "grad_norm": 15.847152709960938, "learning_rate": 7.471376985978968e-07, "loss": 0.4682, "step": 17798 }, { "epoch": 0.825556586270872, "grad_norm": 8.308186531066895, "learning_rate": 7.467507433572246e-07, "loss": 0.434, "step": 17799 }, { "epoch": 0.8256029684601113, "grad_norm": 6.045764923095703, "learning_rate": 7.46363880260954e-07, "loss": 0.335, "step": 17800 }, { "epoch": 0.8256493506493506, "grad_norm": 8.089476585388184, "learning_rate": 7.459771093174672e-07, "loss": 0.3193, "step": 17801 }, { "epoch": 0.82569573283859, "grad_norm": 7.4710469245910645, "learning_rate": 7.45590430535143e-07, "loss": 0.3209, "step": 17802 }, { "epoch": 0.8257421150278293, "grad_norm": 4.518257141113281, "learning_rate": 7.452038439223574e-07, "loss": 0.2578, "step": 17803 }, { "epoch": 0.8257884972170686, "grad_norm": 4.701404571533203, "learning_rate": 7.448173494874861e-07, "loss": 0.2804, "step": 17804 }, { "epoch": 0.825834879406308, "grad_norm": 8.780815124511719, "learning_rate": 7.444309472389027e-07, "loss": 0.3964, "step": 17805 }, { "epoch": 0.8258812615955473, "grad_norm": 6.232537746429443, "learning_rate": 7.44044637184978e-07, "loss": 0.3546, "step": 17806 }, { "epoch": 0.8259276437847867, "grad_norm": 6.6543121337890625, "learning_rate": 7.436584193340829e-07, "loss": 0.4422, "step": 17807 }, { "epoch": 0.825974025974026, "grad_norm": 7.81942892074585, "learning_rate": 7.432722936945819e-07, "loss": 0.3299, "step": 17808 }, { "epoch": 0.8260204081632653, "grad_norm": 5.961057662963867, "learning_rate": 7.428862602748416e-07, "loss": 0.3454, "step": 17809 }, { "epoch": 0.8260667903525046, "grad_norm": 16.408918380737305, "learning_rate": 7.425003190832248e-07, "loss": 0.3805, "step": 17810 }, { "epoch": 0.826113172541744, "grad_norm": 8.32008171081543, "learning_rate": 7.42114470128093e-07, "loss": 0.3463, "step": 17811 }, { "epoch": 0.8261595547309833, "grad_norm": 7.633059501647949, "learning_rate": 7.417287134178052e-07, "loss": 0.3011, "step": 17812 }, { "epoch": 0.8262059369202226, "grad_norm": 7.8721842765808105, "learning_rate": 7.413430489607204e-07, "loss": 0.318, "step": 17813 }, { "epoch": 0.826252319109462, "grad_norm": 10.368644714355469, "learning_rate": 7.409574767651911e-07, "loss": 0.3921, "step": 17814 }, { "epoch": 0.8262987012987013, "grad_norm": 14.867666244506836, "learning_rate": 7.405719968395713e-07, "loss": 0.3137, "step": 17815 }, { "epoch": 0.8263450834879407, "grad_norm": 5.017614841461182, "learning_rate": 7.401866091922133e-07, "loss": 0.3549, "step": 17816 }, { "epoch": 0.8263914656771799, "grad_norm": 10.901959419250488, "learning_rate": 7.398013138314647e-07, "loss": 0.3618, "step": 17817 }, { "epoch": 0.8264378478664193, "grad_norm": 8.194865226745605, "learning_rate": 7.394161107656755e-07, "loss": 0.3275, "step": 17818 }, { "epoch": 0.8264842300556586, "grad_norm": 11.260154724121094, "learning_rate": 7.390310000031875e-07, "loss": 0.396, "step": 17819 }, { "epoch": 0.826530612244898, "grad_norm": 6.522393226623535, "learning_rate": 7.386459815523456e-07, "loss": 0.3206, "step": 17820 }, { "epoch": 0.8265769944341373, "grad_norm": 5.266909122467041, "learning_rate": 7.382610554214903e-07, "loss": 0.3467, "step": 17821 }, { "epoch": 0.8266233766233766, "grad_norm": 6.173388957977295, "learning_rate": 7.378762216189622e-07, "loss": 0.3008, "step": 17822 }, { "epoch": 0.826669758812616, "grad_norm": 7.003704071044922, "learning_rate": 7.374914801530974e-07, "loss": 0.2802, "step": 17823 }, { "epoch": 0.8267161410018553, "grad_norm": 6.128594398498535, "learning_rate": 7.371068310322327e-07, "loss": 0.2577, "step": 17824 }, { "epoch": 0.8267625231910947, "grad_norm": 4.2340593338012695, "learning_rate": 7.367222742646984e-07, "loss": 0.3563, "step": 17825 }, { "epoch": 0.8268089053803339, "grad_norm": 8.23748779296875, "learning_rate": 7.363378098588281e-07, "loss": 0.3642, "step": 17826 }, { "epoch": 0.8268552875695733, "grad_norm": 18.430707931518555, "learning_rate": 7.359534378229499e-07, "loss": 0.3611, "step": 17827 }, { "epoch": 0.8269016697588126, "grad_norm": 10.259541511535645, "learning_rate": 7.355691581653918e-07, "loss": 0.3274, "step": 17828 }, { "epoch": 0.826948051948052, "grad_norm": 7.028364658355713, "learning_rate": 7.351849708944792e-07, "loss": 0.3031, "step": 17829 }, { "epoch": 0.8269944341372912, "grad_norm": 7.1603851318359375, "learning_rate": 7.348008760185343e-07, "loss": 0.3112, "step": 17830 }, { "epoch": 0.8270408163265306, "grad_norm": 6.204500198364258, "learning_rate": 7.344168735458779e-07, "loss": 0.3173, "step": 17831 }, { "epoch": 0.82708719851577, "grad_norm": 5.660591125488281, "learning_rate": 7.340329634848309e-07, "loss": 0.2389, "step": 17832 }, { "epoch": 0.8271335807050093, "grad_norm": 6.972646713256836, "learning_rate": 7.336491458437095e-07, "loss": 0.3972, "step": 17833 }, { "epoch": 0.8271799628942486, "grad_norm": 9.562494277954102, "learning_rate": 7.332654206308299e-07, "loss": 0.3712, "step": 17834 }, { "epoch": 0.8272263450834879, "grad_norm": 14.019168853759766, "learning_rate": 7.328817878545036e-07, "loss": 0.4378, "step": 17835 }, { "epoch": 0.8272727272727273, "grad_norm": 7.265182971954346, "learning_rate": 7.324982475230424e-07, "loss": 0.2799, "step": 17836 }, { "epoch": 0.8273191094619666, "grad_norm": 6.2163987159729, "learning_rate": 7.321147996447558e-07, "loss": 0.3061, "step": 17837 }, { "epoch": 0.827365491651206, "grad_norm": 7.892913341522217, "learning_rate": 7.317314442279517e-07, "loss": 0.4135, "step": 17838 }, { "epoch": 0.8274118738404452, "grad_norm": 5.753483772277832, "learning_rate": 7.313481812809347e-07, "loss": 0.3374, "step": 17839 }, { "epoch": 0.8274582560296846, "grad_norm": 9.24880599975586, "learning_rate": 7.309650108120076e-07, "loss": 0.2887, "step": 17840 }, { "epoch": 0.827504638218924, "grad_norm": 5.8532185554504395, "learning_rate": 7.305819328294717e-07, "loss": 0.3495, "step": 17841 }, { "epoch": 0.8275510204081633, "grad_norm": 18.14081382751465, "learning_rate": 7.30198947341626e-07, "loss": 0.4335, "step": 17842 }, { "epoch": 0.8275974025974026, "grad_norm": 5.11757755279541, "learning_rate": 7.298160543567684e-07, "loss": 0.3501, "step": 17843 }, { "epoch": 0.8276437847866419, "grad_norm": 6.561831474304199, "learning_rate": 7.294332538831945e-07, "loss": 0.2111, "step": 17844 }, { "epoch": 0.8276901669758813, "grad_norm": 4.947852611541748, "learning_rate": 7.290505459291958e-07, "loss": 0.2764, "step": 17845 }, { "epoch": 0.8277365491651206, "grad_norm": 7.083569049835205, "learning_rate": 7.286679305030636e-07, "loss": 0.2629, "step": 17846 }, { "epoch": 0.8277829313543599, "grad_norm": 7.3958024978637695, "learning_rate": 7.282854076130886e-07, "loss": 0.2672, "step": 17847 }, { "epoch": 0.8278293135435992, "grad_norm": 6.29288387298584, "learning_rate": 7.279029772675572e-07, "loss": 0.3414, "step": 17848 }, { "epoch": 0.8278756957328386, "grad_norm": 4.171468734741211, "learning_rate": 7.275206394747552e-07, "loss": 0.2533, "step": 17849 }, { "epoch": 0.827922077922078, "grad_norm": 9.707563400268555, "learning_rate": 7.271383942429638e-07, "loss": 0.3339, "step": 17850 }, { "epoch": 0.8279684601113173, "grad_norm": 5.633882522583008, "learning_rate": 7.267562415804658e-07, "loss": 0.3852, "step": 17851 }, { "epoch": 0.8280148423005566, "grad_norm": 6.5339579582214355, "learning_rate": 7.2637418149554e-07, "loss": 0.273, "step": 17852 }, { "epoch": 0.8280612244897959, "grad_norm": 4.58064603805542, "learning_rate": 7.259922139964631e-07, "loss": 0.3064, "step": 17853 }, { "epoch": 0.8281076066790353, "grad_norm": 4.688146591186523, "learning_rate": 7.25610339091512e-07, "loss": 0.2452, "step": 17854 }, { "epoch": 0.8281539888682746, "grad_norm": 8.31233024597168, "learning_rate": 7.252285567889572e-07, "loss": 0.3769, "step": 17855 }, { "epoch": 0.8282003710575139, "grad_norm": 7.728220462799072, "learning_rate": 7.248468670970704e-07, "loss": 0.367, "step": 17856 }, { "epoch": 0.8282467532467532, "grad_norm": 5.073828220367432, "learning_rate": 7.244652700241223e-07, "loss": 0.3085, "step": 17857 }, { "epoch": 0.8282931354359926, "grad_norm": 4.113433837890625, "learning_rate": 7.240837655783783e-07, "loss": 0.2955, "step": 17858 }, { "epoch": 0.828339517625232, "grad_norm": 5.872781753540039, "learning_rate": 7.237023537681059e-07, "loss": 0.3166, "step": 17859 }, { "epoch": 0.8283858998144712, "grad_norm": 5.637554168701172, "learning_rate": 7.233210346015651e-07, "loss": 0.3342, "step": 17860 }, { "epoch": 0.8284322820037106, "grad_norm": 15.565417289733887, "learning_rate": 7.229398080870181e-07, "loss": 0.4267, "step": 17861 }, { "epoch": 0.8284786641929499, "grad_norm": 6.747501850128174, "learning_rate": 7.225586742327245e-07, "loss": 0.3631, "step": 17862 }, { "epoch": 0.8285250463821893, "grad_norm": 7.13162088394165, "learning_rate": 7.22177633046941e-07, "loss": 0.3075, "step": 17863 }, { "epoch": 0.8285714285714286, "grad_norm": 6.851170539855957, "learning_rate": 7.217966845379243e-07, "loss": 0.2313, "step": 17864 }, { "epoch": 0.8286178107606679, "grad_norm": 10.06285285949707, "learning_rate": 7.214158287139245e-07, "loss": 0.302, "step": 17865 }, { "epoch": 0.8286641929499072, "grad_norm": 5.328906059265137, "learning_rate": 7.210350655831938e-07, "loss": 0.2474, "step": 17866 }, { "epoch": 0.8287105751391466, "grad_norm": 12.554162979125977, "learning_rate": 7.206543951539818e-07, "loss": 0.3436, "step": 17867 }, { "epoch": 0.828756957328386, "grad_norm": 9.382302284240723, "learning_rate": 7.202738174345347e-07, "loss": 0.414, "step": 17868 }, { "epoch": 0.8288033395176252, "grad_norm": 8.702299118041992, "learning_rate": 7.198933324330997e-07, "loss": 0.3556, "step": 17869 }, { "epoch": 0.8288497217068646, "grad_norm": 7.613751411437988, "learning_rate": 7.195129401579171e-07, "loss": 0.3891, "step": 17870 }, { "epoch": 0.8288961038961039, "grad_norm": 7.4882025718688965, "learning_rate": 7.191326406172288e-07, "loss": 0.3131, "step": 17871 }, { "epoch": 0.8289424860853433, "grad_norm": 5.002666473388672, "learning_rate": 7.187524338192736e-07, "loss": 0.3197, "step": 17872 }, { "epoch": 0.8289888682745825, "grad_norm": 7.326461315155029, "learning_rate": 7.183723197722892e-07, "loss": 0.3813, "step": 17873 }, { "epoch": 0.8290352504638219, "grad_norm": 6.619463920593262, "learning_rate": 7.179922984845112e-07, "loss": 0.2143, "step": 17874 }, { "epoch": 0.8290816326530612, "grad_norm": 10.369561195373535, "learning_rate": 7.176123699641702e-07, "loss": 0.3234, "step": 17875 }, { "epoch": 0.8291280148423006, "grad_norm": 7.764044284820557, "learning_rate": 7.172325342194986e-07, "loss": 0.374, "step": 17876 }, { "epoch": 0.8291743970315398, "grad_norm": 8.459152221679688, "learning_rate": 7.168527912587253e-07, "loss": 0.2983, "step": 17877 }, { "epoch": 0.8292207792207792, "grad_norm": 5.154586315155029, "learning_rate": 7.164731410900766e-07, "loss": 0.3575, "step": 17878 }, { "epoch": 0.8292671614100185, "grad_norm": 4.44306755065918, "learning_rate": 7.160935837217798e-07, "loss": 0.2116, "step": 17879 }, { "epoch": 0.8293135435992579, "grad_norm": 12.444073677062988, "learning_rate": 7.157141191620548e-07, "loss": 0.4293, "step": 17880 }, { "epoch": 0.8293599257884973, "grad_norm": 4.21958065032959, "learning_rate": 7.153347474191236e-07, "loss": 0.196, "step": 17881 }, { "epoch": 0.8294063079777365, "grad_norm": 6.5722527503967285, "learning_rate": 7.14955468501205e-07, "loss": 0.2828, "step": 17882 }, { "epoch": 0.8294526901669759, "grad_norm": 5.275655269622803, "learning_rate": 7.145762824165159e-07, "loss": 0.2775, "step": 17883 }, { "epoch": 0.8294990723562152, "grad_norm": 9.6260404586792, "learning_rate": 7.141971891732729e-07, "loss": 0.3305, "step": 17884 }, { "epoch": 0.8295454545454546, "grad_norm": 9.396574020385742, "learning_rate": 7.138181887796857e-07, "loss": 0.3578, "step": 17885 }, { "epoch": 0.8295918367346938, "grad_norm": 10.162117004394531, "learning_rate": 7.134392812439672e-07, "loss": 0.4044, "step": 17886 }, { "epoch": 0.8296382189239332, "grad_norm": 10.071173667907715, "learning_rate": 7.130604665743251e-07, "loss": 0.3102, "step": 17887 }, { "epoch": 0.8296846011131725, "grad_norm": 4.506215572357178, "learning_rate": 7.126817447789674e-07, "loss": 0.2915, "step": 17888 }, { "epoch": 0.8297309833024119, "grad_norm": 7.438061714172363, "learning_rate": 7.123031158660993e-07, "loss": 0.2956, "step": 17889 }, { "epoch": 0.8297773654916512, "grad_norm": 7.110324859619141, "learning_rate": 7.119245798439217e-07, "loss": 0.3488, "step": 17890 }, { "epoch": 0.8298237476808905, "grad_norm": 10.217089653015137, "learning_rate": 7.115461367206361e-07, "loss": 0.3767, "step": 17891 }, { "epoch": 0.8298701298701299, "grad_norm": 4.982690811157227, "learning_rate": 7.11167786504442e-07, "loss": 0.2292, "step": 17892 }, { "epoch": 0.8299165120593692, "grad_norm": 11.753165245056152, "learning_rate": 7.107895292035355e-07, "loss": 0.3884, "step": 17893 }, { "epoch": 0.8299628942486086, "grad_norm": 4.760348320007324, "learning_rate": 7.104113648261113e-07, "loss": 0.3524, "step": 17894 }, { "epoch": 0.8300092764378478, "grad_norm": 8.054790496826172, "learning_rate": 7.100332933803633e-07, "loss": 0.3216, "step": 17895 }, { "epoch": 0.8300556586270872, "grad_norm": 7.673480987548828, "learning_rate": 7.096553148744806e-07, "loss": 0.3124, "step": 17896 }, { "epoch": 0.8301020408163265, "grad_norm": 8.820847511291504, "learning_rate": 7.092774293166522e-07, "loss": 0.3319, "step": 17897 }, { "epoch": 0.8301484230055659, "grad_norm": 6.434875011444092, "learning_rate": 7.088996367150658e-07, "loss": 0.4053, "step": 17898 }, { "epoch": 0.8301948051948052, "grad_norm": 6.289393424987793, "learning_rate": 7.085219370779045e-07, "loss": 0.2182, "step": 17899 }, { "epoch": 0.8302411873840445, "grad_norm": 8.184938430786133, "learning_rate": 7.081443304133539e-07, "loss": 0.3264, "step": 17900 }, { "epoch": 0.8302875695732839, "grad_norm": 3.9433984756469727, "learning_rate": 7.077668167295909e-07, "loss": 0.2389, "step": 17901 }, { "epoch": 0.8303339517625232, "grad_norm": 5.807816982269287, "learning_rate": 7.073893960347966e-07, "loss": 0.28, "step": 17902 }, { "epoch": 0.8303803339517625, "grad_norm": 8.07188892364502, "learning_rate": 7.070120683371462e-07, "loss": 0.3398, "step": 17903 }, { "epoch": 0.8304267161410018, "grad_norm": 6.940020561218262, "learning_rate": 7.066348336448153e-07, "loss": 0.2762, "step": 17904 }, { "epoch": 0.8304730983302412, "grad_norm": 6.070308685302734, "learning_rate": 7.062576919659763e-07, "loss": 0.2675, "step": 17905 }, { "epoch": 0.8305194805194805, "grad_norm": 13.386367797851562, "learning_rate": 7.058806433088011e-07, "loss": 0.5011, "step": 17906 }, { "epoch": 0.8305658627087199, "grad_norm": 5.483226299285889, "learning_rate": 7.055036876814553e-07, "loss": 0.2473, "step": 17907 }, { "epoch": 0.8306122448979592, "grad_norm": 5.1202497482299805, "learning_rate": 7.051268250921072e-07, "loss": 0.2784, "step": 17908 }, { "epoch": 0.8306586270871985, "grad_norm": 9.356966972351074, "learning_rate": 7.047500555489212e-07, "loss": 0.2513, "step": 17909 }, { "epoch": 0.8307050092764379, "grad_norm": 5.997819900512695, "learning_rate": 7.043733790600593e-07, "loss": 0.3151, "step": 17910 }, { "epoch": 0.8307513914656772, "grad_norm": 9.357070922851562, "learning_rate": 7.039967956336841e-07, "loss": 0.3108, "step": 17911 }, { "epoch": 0.8307977736549165, "grad_norm": 10.661218643188477, "learning_rate": 7.036203052779506e-07, "loss": 0.3482, "step": 17912 }, { "epoch": 0.8308441558441558, "grad_norm": 6.253955841064453, "learning_rate": 7.032439080010178e-07, "loss": 0.3584, "step": 17913 }, { "epoch": 0.8308905380333952, "grad_norm": 10.947949409484863, "learning_rate": 7.028676038110388e-07, "loss": 0.2994, "step": 17914 }, { "epoch": 0.8309369202226345, "grad_norm": 7.059499263763428, "learning_rate": 7.024913927161675e-07, "loss": 0.3613, "step": 17915 }, { "epoch": 0.8309833024118738, "grad_norm": 4.294378280639648, "learning_rate": 7.02115274724553e-07, "loss": 0.2384, "step": 17916 }, { "epoch": 0.8310296846011131, "grad_norm": 4.589292049407959, "learning_rate": 7.017392498443448e-07, "loss": 0.3066, "step": 17917 }, { "epoch": 0.8310760667903525, "grad_norm": 4.8831257820129395, "learning_rate": 7.013633180836882e-07, "loss": 0.2857, "step": 17918 }, { "epoch": 0.8311224489795919, "grad_norm": 9.845783233642578, "learning_rate": 7.009874794507277e-07, "loss": 0.346, "step": 17919 }, { "epoch": 0.8311688311688312, "grad_norm": 6.478931427001953, "learning_rate": 7.006117339536061e-07, "loss": 0.3035, "step": 17920 }, { "epoch": 0.8312152133580705, "grad_norm": 12.33991813659668, "learning_rate": 7.002360816004639e-07, "loss": 0.437, "step": 17921 }, { "epoch": 0.8312615955473098, "grad_norm": 12.479679107666016, "learning_rate": 6.998605223994398e-07, "loss": 0.4202, "step": 17922 }, { "epoch": 0.8313079777365492, "grad_norm": 7.868828773498535, "learning_rate": 6.994850563586686e-07, "loss": 0.2773, "step": 17923 }, { "epoch": 0.8313543599257885, "grad_norm": 11.11532974243164, "learning_rate": 6.991096834862849e-07, "loss": 0.3706, "step": 17924 }, { "epoch": 0.8314007421150278, "grad_norm": 6.085794448852539, "learning_rate": 6.987344037904214e-07, "loss": 0.2746, "step": 17925 }, { "epoch": 0.8314471243042671, "grad_norm": 10.975001335144043, "learning_rate": 6.983592172792087e-07, "loss": 0.3939, "step": 17926 }, { "epoch": 0.8314935064935065, "grad_norm": 13.621718406677246, "learning_rate": 6.979841239607749e-07, "loss": 0.4679, "step": 17927 }, { "epoch": 0.8315398886827459, "grad_norm": 8.624659538269043, "learning_rate": 6.97609123843247e-07, "loss": 0.2895, "step": 17928 }, { "epoch": 0.8315862708719851, "grad_norm": 18.082284927368164, "learning_rate": 6.972342169347468e-07, "loss": 0.4394, "step": 17929 }, { "epoch": 0.8316326530612245, "grad_norm": 10.711272239685059, "learning_rate": 6.968594032433978e-07, "loss": 0.3918, "step": 17930 }, { "epoch": 0.8316790352504638, "grad_norm": 5.802883148193359, "learning_rate": 6.964846827773203e-07, "loss": 0.2889, "step": 17931 }, { "epoch": 0.8317254174397032, "grad_norm": 5.5849995613098145, "learning_rate": 6.961100555446321e-07, "loss": 0.2996, "step": 17932 }, { "epoch": 0.8317717996289424, "grad_norm": 4.663064479827881, "learning_rate": 6.957355215534505e-07, "loss": 0.1804, "step": 17933 }, { "epoch": 0.8318181818181818, "grad_norm": 11.544116973876953, "learning_rate": 6.953610808118876e-07, "loss": 0.2962, "step": 17934 }, { "epoch": 0.8318645640074211, "grad_norm": 6.234281063079834, "learning_rate": 6.949867333280569e-07, "loss": 0.3336, "step": 17935 }, { "epoch": 0.8319109461966605, "grad_norm": 12.953707695007324, "learning_rate": 6.946124791100672e-07, "loss": 0.3498, "step": 17936 }, { "epoch": 0.8319573283858999, "grad_norm": 9.872013092041016, "learning_rate": 6.942383181660278e-07, "loss": 0.3662, "step": 17937 }, { "epoch": 0.8320037105751391, "grad_norm": 6.3127899169921875, "learning_rate": 6.938642505040449e-07, "loss": 0.2366, "step": 17938 }, { "epoch": 0.8320500927643785, "grad_norm": 7.509467124938965, "learning_rate": 6.93490276132221e-07, "loss": 0.3594, "step": 17939 }, { "epoch": 0.8320964749536178, "grad_norm": 9.761481285095215, "learning_rate": 6.931163950586589e-07, "loss": 0.3015, "step": 17940 }, { "epoch": 0.8321428571428572, "grad_norm": 8.019440650939941, "learning_rate": 6.927426072914583e-07, "loss": 0.2774, "step": 17941 }, { "epoch": 0.8321892393320964, "grad_norm": 4.349834442138672, "learning_rate": 6.923689128387168e-07, "loss": 0.2971, "step": 17942 }, { "epoch": 0.8322356215213358, "grad_norm": 4.565650463104248, "learning_rate": 6.919953117085316e-07, "loss": 0.3174, "step": 17943 }, { "epoch": 0.8322820037105751, "grad_norm": 5.997389793395996, "learning_rate": 6.916218039089961e-07, "loss": 0.2461, "step": 17944 }, { "epoch": 0.8323283858998145, "grad_norm": 4.6242594718933105, "learning_rate": 6.912483894482014e-07, "loss": 0.2129, "step": 17945 }, { "epoch": 0.8323747680890538, "grad_norm": 4.954476833343506, "learning_rate": 6.908750683342369e-07, "loss": 0.2057, "step": 17946 }, { "epoch": 0.8324211502782931, "grad_norm": 34.063133239746094, "learning_rate": 6.905018405751918e-07, "loss": 0.4593, "step": 17947 }, { "epoch": 0.8324675324675325, "grad_norm": 5.757302284240723, "learning_rate": 6.901287061791512e-07, "loss": 0.2906, "step": 17948 }, { "epoch": 0.8325139146567718, "grad_norm": 8.982738494873047, "learning_rate": 6.897556651542003e-07, "loss": 0.376, "step": 17949 }, { "epoch": 0.8325602968460112, "grad_norm": 8.160770416259766, "learning_rate": 6.893827175084178e-07, "loss": 0.3427, "step": 17950 }, { "epoch": 0.8326066790352504, "grad_norm": 9.979362487792969, "learning_rate": 6.890098632498854e-07, "loss": 0.3948, "step": 17951 }, { "epoch": 0.8326530612244898, "grad_norm": 6.86899471282959, "learning_rate": 6.886371023866806e-07, "loss": 0.3306, "step": 17952 }, { "epoch": 0.8326994434137291, "grad_norm": 6.0267534255981445, "learning_rate": 6.882644349268791e-07, "loss": 0.2127, "step": 17953 }, { "epoch": 0.8327458256029685, "grad_norm": 6.491969108581543, "learning_rate": 6.878918608785539e-07, "loss": 0.2186, "step": 17954 }, { "epoch": 0.8327922077922078, "grad_norm": 7.883076190948486, "learning_rate": 6.875193802497787e-07, "loss": 0.298, "step": 17955 }, { "epoch": 0.8328385899814471, "grad_norm": 6.119344711303711, "learning_rate": 6.871469930486202e-07, "loss": 0.2662, "step": 17956 }, { "epoch": 0.8328849721706865, "grad_norm": 6.221107006072998, "learning_rate": 6.867746992831475e-07, "loss": 0.3141, "step": 17957 }, { "epoch": 0.8329313543599258, "grad_norm": 11.514273643493652, "learning_rate": 6.86402498961426e-07, "loss": 0.3226, "step": 17958 }, { "epoch": 0.8329777365491651, "grad_norm": 4.51102352142334, "learning_rate": 6.860303920915191e-07, "loss": 0.425, "step": 17959 }, { "epoch": 0.8330241187384044, "grad_norm": 7.312247276306152, "learning_rate": 6.856583786814891e-07, "loss": 0.364, "step": 17960 }, { "epoch": 0.8330705009276438, "grad_norm": 8.874625205993652, "learning_rate": 6.852864587393937e-07, "loss": 0.3685, "step": 17961 }, { "epoch": 0.8331168831168831, "grad_norm": 4.350678443908691, "learning_rate": 6.849146322732919e-07, "loss": 0.2669, "step": 17962 }, { "epoch": 0.8331632653061225, "grad_norm": 9.185812950134277, "learning_rate": 6.845428992912385e-07, "loss": 0.3579, "step": 17963 }, { "epoch": 0.8332096474953617, "grad_norm": 7.652845859527588, "learning_rate": 6.841712598012867e-07, "loss": 0.3634, "step": 17964 }, { "epoch": 0.8332560296846011, "grad_norm": 6.17025089263916, "learning_rate": 6.837997138114882e-07, "loss": 0.3051, "step": 17965 }, { "epoch": 0.8333024118738405, "grad_norm": 6.327803134918213, "learning_rate": 6.834282613298937e-07, "loss": 0.2914, "step": 17966 }, { "epoch": 0.8333487940630798, "grad_norm": 7.005701541900635, "learning_rate": 6.830569023645479e-07, "loss": 0.1974, "step": 17967 }, { "epoch": 0.8333951762523191, "grad_norm": 7.666424751281738, "learning_rate": 6.826856369234974e-07, "loss": 0.4201, "step": 17968 }, { "epoch": 0.8334415584415584, "grad_norm": 7.857727527618408, "learning_rate": 6.823144650147856e-07, "loss": 0.2452, "step": 17969 }, { "epoch": 0.8334879406307978, "grad_norm": 5.280030250549316, "learning_rate": 6.819433866464531e-07, "loss": 0.2837, "step": 17970 }, { "epoch": 0.8335343228200371, "grad_norm": 6.236131191253662, "learning_rate": 6.815724018265413e-07, "loss": 0.2638, "step": 17971 }, { "epoch": 0.8335807050092764, "grad_norm": 7.395391464233398, "learning_rate": 6.812015105630842e-07, "loss": 0.2471, "step": 17972 }, { "epoch": 0.8336270871985157, "grad_norm": 3.723982095718384, "learning_rate": 6.808307128641184e-07, "loss": 0.2756, "step": 17973 }, { "epoch": 0.8336734693877551, "grad_norm": 16.896242141723633, "learning_rate": 6.804600087376773e-07, "loss": 0.4766, "step": 17974 }, { "epoch": 0.8337198515769945, "grad_norm": 5.681980609893799, "learning_rate": 6.800893981917917e-07, "loss": 0.3336, "step": 17975 }, { "epoch": 0.8337662337662337, "grad_norm": 17.94344139099121, "learning_rate": 6.797188812344907e-07, "loss": 0.6472, "step": 17976 }, { "epoch": 0.8338126159554731, "grad_norm": 6.230223655700684, "learning_rate": 6.79348457873803e-07, "loss": 0.3038, "step": 17977 }, { "epoch": 0.8338589981447124, "grad_norm": 5.252758502960205, "learning_rate": 6.789781281177504e-07, "loss": 0.3146, "step": 17978 }, { "epoch": 0.8339053803339518, "grad_norm": 6.545395851135254, "learning_rate": 6.786078919743578e-07, "loss": 0.3462, "step": 17979 }, { "epoch": 0.8339517625231911, "grad_norm": 4.270483493804932, "learning_rate": 6.782377494516456e-07, "loss": 0.2481, "step": 17980 }, { "epoch": 0.8339981447124304, "grad_norm": 5.516160488128662, "learning_rate": 6.778677005576334e-07, "loss": 0.23, "step": 17981 }, { "epoch": 0.8340445269016697, "grad_norm": 4.435680389404297, "learning_rate": 6.77497745300339e-07, "loss": 0.2594, "step": 17982 }, { "epoch": 0.8340909090909091, "grad_norm": 6.618092060089111, "learning_rate": 6.771278836877748e-07, "loss": 0.4402, "step": 17983 }, { "epoch": 0.8341372912801485, "grad_norm": 6.8019280433654785, "learning_rate": 6.767581157279545e-07, "loss": 0.2816, "step": 17984 }, { "epoch": 0.8341836734693877, "grad_norm": 3.7247540950775146, "learning_rate": 6.763884414288901e-07, "loss": 0.229, "step": 17985 }, { "epoch": 0.8342300556586271, "grad_norm": 4.085665225982666, "learning_rate": 6.76018860798589e-07, "loss": 0.3015, "step": 17986 }, { "epoch": 0.8342764378478664, "grad_norm": 7.9241943359375, "learning_rate": 6.756493738450592e-07, "loss": 0.3871, "step": 17987 }, { "epoch": 0.8343228200371058, "grad_norm": 3.293137550354004, "learning_rate": 6.752799805763055e-07, "loss": 0.2946, "step": 17988 }, { "epoch": 0.834369202226345, "grad_norm": 12.519947052001953, "learning_rate": 6.749106810003292e-07, "loss": 0.4292, "step": 17989 }, { "epoch": 0.8344155844155844, "grad_norm": 8.694463729858398, "learning_rate": 6.745414751251317e-07, "loss": 0.3781, "step": 17990 }, { "epoch": 0.8344619666048237, "grad_norm": 4.867188930511475, "learning_rate": 6.741723629587115e-07, "loss": 0.2038, "step": 17991 }, { "epoch": 0.8345083487940631, "grad_norm": 4.7858991622924805, "learning_rate": 6.738033445090653e-07, "loss": 0.2691, "step": 17992 }, { "epoch": 0.8345547309833025, "grad_norm": 4.932016849517822, "learning_rate": 6.734344197841891e-07, "loss": 0.1627, "step": 17993 }, { "epoch": 0.8346011131725417, "grad_norm": 8.654058456420898, "learning_rate": 6.730655887920734e-07, "loss": 0.3349, "step": 17994 }, { "epoch": 0.8346474953617811, "grad_norm": 8.749173164367676, "learning_rate": 6.726968515407089e-07, "loss": 0.3594, "step": 17995 }, { "epoch": 0.8346938775510204, "grad_norm": 9.25500774383545, "learning_rate": 6.72328208038085e-07, "loss": 0.354, "step": 17996 }, { "epoch": 0.8347402597402598, "grad_norm": 18.147062301635742, "learning_rate": 6.719596582921878e-07, "loss": 0.3901, "step": 17997 }, { "epoch": 0.834786641929499, "grad_norm": 12.016504287719727, "learning_rate": 6.715912023110021e-07, "loss": 0.3652, "step": 17998 }, { "epoch": 0.8348330241187384, "grad_norm": 21.95167350769043, "learning_rate": 6.712228401025106e-07, "loss": 0.3207, "step": 17999 }, { "epoch": 0.8348794063079777, "grad_norm": 5.915618896484375, "learning_rate": 6.708545716746923e-07, "loss": 0.2153, "step": 18000 }, { "epoch": 0.8349257884972171, "grad_norm": 5.703503131866455, "learning_rate": 6.704863970355263e-07, "loss": 0.3551, "step": 18001 }, { "epoch": 0.8349721706864563, "grad_norm": 6.251396656036377, "learning_rate": 6.701183161929886e-07, "loss": 0.1588, "step": 18002 }, { "epoch": 0.8350185528756957, "grad_norm": 7.15571403503418, "learning_rate": 6.697503291550544e-07, "loss": 0.2865, "step": 18003 }, { "epoch": 0.8350649350649351, "grad_norm": 11.088483810424805, "learning_rate": 6.693824359296963e-07, "loss": 0.3711, "step": 18004 }, { "epoch": 0.8351113172541744, "grad_norm": 8.35316276550293, "learning_rate": 6.690146365248823e-07, "loss": 0.3177, "step": 18005 }, { "epoch": 0.8351576994434138, "grad_norm": 10.046974182128906, "learning_rate": 6.686469309485815e-07, "loss": 0.4089, "step": 18006 }, { "epoch": 0.835204081632653, "grad_norm": 12.402606010437012, "learning_rate": 6.682793192087611e-07, "loss": 0.3374, "step": 18007 }, { "epoch": 0.8352504638218924, "grad_norm": 6.202614784240723, "learning_rate": 6.67911801313384e-07, "loss": 0.2258, "step": 18008 }, { "epoch": 0.8352968460111317, "grad_norm": 11.8418607711792, "learning_rate": 6.675443772704126e-07, "loss": 0.4673, "step": 18009 }, { "epoch": 0.8353432282003711, "grad_norm": 16.63954734802246, "learning_rate": 6.671770470878086e-07, "loss": 0.3652, "step": 18010 }, { "epoch": 0.8353896103896103, "grad_norm": 18.392568588256836, "learning_rate": 6.668098107735277e-07, "loss": 0.3429, "step": 18011 }, { "epoch": 0.8354359925788497, "grad_norm": 6.006561279296875, "learning_rate": 6.664426683355263e-07, "loss": 0.2218, "step": 18012 }, { "epoch": 0.8354823747680891, "grad_norm": 7.880908489227295, "learning_rate": 6.660756197817591e-07, "loss": 0.3268, "step": 18013 }, { "epoch": 0.8355287569573284, "grad_norm": 3.9839820861816406, "learning_rate": 6.657086651201772e-07, "loss": 0.268, "step": 18014 }, { "epoch": 0.8355751391465677, "grad_norm": 7.694504261016846, "learning_rate": 6.653418043587323e-07, "loss": 0.2251, "step": 18015 }, { "epoch": 0.835621521335807, "grad_norm": 5.670109748840332, "learning_rate": 6.649750375053699e-07, "loss": 0.2425, "step": 18016 }, { "epoch": 0.8356679035250464, "grad_norm": 4.789888858795166, "learning_rate": 6.646083645680368e-07, "loss": 0.2765, "step": 18017 }, { "epoch": 0.8357142857142857, "grad_norm": 11.376882553100586, "learning_rate": 6.642417855546768e-07, "loss": 0.3299, "step": 18018 }, { "epoch": 0.8357606679035251, "grad_norm": 7.522733688354492, "learning_rate": 6.638753004732318e-07, "loss": 0.3786, "step": 18019 }, { "epoch": 0.8358070500927643, "grad_norm": 7.679347515106201, "learning_rate": 6.635089093316421e-07, "loss": 0.3399, "step": 18020 }, { "epoch": 0.8358534322820037, "grad_norm": 7.429098129272461, "learning_rate": 6.631426121378438e-07, "loss": 0.2813, "step": 18021 }, { "epoch": 0.8358998144712431, "grad_norm": 11.326580047607422, "learning_rate": 6.627764088997735e-07, "loss": 0.3621, "step": 18022 }, { "epoch": 0.8359461966604824, "grad_norm": 5.03225564956665, "learning_rate": 6.624102996253645e-07, "loss": 0.2991, "step": 18023 }, { "epoch": 0.8359925788497217, "grad_norm": 8.345769882202148, "learning_rate": 6.620442843225483e-07, "loss": 0.3207, "step": 18024 }, { "epoch": 0.836038961038961, "grad_norm": 7.937620162963867, "learning_rate": 6.616783629992546e-07, "loss": 0.2167, "step": 18025 }, { "epoch": 0.8360853432282004, "grad_norm": 4.363470077514648, "learning_rate": 6.613125356634126e-07, "loss": 0.2029, "step": 18026 }, { "epoch": 0.8361317254174397, "grad_norm": 6.133827209472656, "learning_rate": 6.609468023229443e-07, "loss": 0.2837, "step": 18027 }, { "epoch": 0.836178107606679, "grad_norm": 7.170175075531006, "learning_rate": 6.605811629857756e-07, "loss": 0.3175, "step": 18028 }, { "epoch": 0.8362244897959183, "grad_norm": 9.624076843261719, "learning_rate": 6.602156176598268e-07, "loss": 0.3628, "step": 18029 }, { "epoch": 0.8362708719851577, "grad_norm": 7.790302753448486, "learning_rate": 6.598501663530177e-07, "loss": 0.2992, "step": 18030 }, { "epoch": 0.8363172541743971, "grad_norm": 5.211810111999512, "learning_rate": 6.594848090732664e-07, "loss": 0.2287, "step": 18031 }, { "epoch": 0.8363636363636363, "grad_norm": 12.29949951171875, "learning_rate": 6.591195458284866e-07, "loss": 0.4197, "step": 18032 }, { "epoch": 0.8364100185528757, "grad_norm": 9.840347290039062, "learning_rate": 6.587543766265919e-07, "loss": 0.3249, "step": 18033 }, { "epoch": 0.836456400742115, "grad_norm": 5.619334697723389, "learning_rate": 6.583893014754944e-07, "loss": 0.2381, "step": 18034 }, { "epoch": 0.8365027829313544, "grad_norm": 6.16554069519043, "learning_rate": 6.580243203831021e-07, "loss": 0.2071, "step": 18035 }, { "epoch": 0.8365491651205937, "grad_norm": 4.802633762359619, "learning_rate": 6.576594333573233e-07, "loss": 0.2744, "step": 18036 }, { "epoch": 0.836595547309833, "grad_norm": 8.30319595336914, "learning_rate": 6.572946404060638e-07, "loss": 0.3918, "step": 18037 }, { "epoch": 0.8366419294990723, "grad_norm": 7.149421691894531, "learning_rate": 6.569299415372238e-07, "loss": 0.2894, "step": 18038 }, { "epoch": 0.8366883116883117, "grad_norm": 5.130675315856934, "learning_rate": 6.565653367587054e-07, "loss": 0.3505, "step": 18039 }, { "epoch": 0.8367346938775511, "grad_norm": 14.464375495910645, "learning_rate": 6.562008260784092e-07, "loss": 0.2899, "step": 18040 }, { "epoch": 0.8367810760667903, "grad_norm": 9.088235855102539, "learning_rate": 6.558364095042302e-07, "loss": 0.258, "step": 18041 }, { "epoch": 0.8368274582560297, "grad_norm": 7.206320285797119, "learning_rate": 6.554720870440656e-07, "loss": 0.3643, "step": 18042 }, { "epoch": 0.836873840445269, "grad_norm": 11.27161693572998, "learning_rate": 6.551078587058051e-07, "loss": 0.4252, "step": 18043 }, { "epoch": 0.8369202226345084, "grad_norm": 4.814363479614258, "learning_rate": 6.547437244973415e-07, "loss": 0.2877, "step": 18044 }, { "epoch": 0.8369666048237476, "grad_norm": 8.675405502319336, "learning_rate": 6.543796844265632e-07, "loss": 0.3011, "step": 18045 }, { "epoch": 0.837012987012987, "grad_norm": 9.12915325164795, "learning_rate": 6.540157385013568e-07, "loss": 0.3514, "step": 18046 }, { "epoch": 0.8370593692022263, "grad_norm": 4.55202579498291, "learning_rate": 6.536518867296077e-07, "loss": 0.2786, "step": 18047 }, { "epoch": 0.8371057513914657, "grad_norm": 6.719723224639893, "learning_rate": 6.532881291191984e-07, "loss": 0.2028, "step": 18048 }, { "epoch": 0.8371521335807051, "grad_norm": 20.600723266601562, "learning_rate": 6.529244656780087e-07, "loss": 0.6223, "step": 18049 }, { "epoch": 0.8371985157699443, "grad_norm": 7.669163703918457, "learning_rate": 6.525608964139174e-07, "loss": 0.3495, "step": 18050 }, { "epoch": 0.8372448979591837, "grad_norm": 8.275084495544434, "learning_rate": 6.521974213348009e-07, "loss": 0.3425, "step": 18051 }, { "epoch": 0.837291280148423, "grad_norm": 17.393003463745117, "learning_rate": 6.518340404485341e-07, "loss": 0.3332, "step": 18052 }, { "epoch": 0.8373376623376624, "grad_norm": 11.860958099365234, "learning_rate": 6.514707537629905e-07, "loss": 0.4138, "step": 18053 }, { "epoch": 0.8373840445269016, "grad_norm": 6.108469009399414, "learning_rate": 6.511075612860385e-07, "loss": 0.3567, "step": 18054 }, { "epoch": 0.837430426716141, "grad_norm": 6.6516032218933105, "learning_rate": 6.507444630255477e-07, "loss": 0.2899, "step": 18055 }, { "epoch": 0.8374768089053803, "grad_norm": 7.597390174865723, "learning_rate": 6.503814589893836e-07, "loss": 0.2887, "step": 18056 }, { "epoch": 0.8375231910946197, "grad_norm": 4.980865478515625, "learning_rate": 6.50018549185411e-07, "loss": 0.2943, "step": 18057 }, { "epoch": 0.837569573283859, "grad_norm": 4.352934837341309, "learning_rate": 6.496557336214926e-07, "loss": 0.2379, "step": 18058 }, { "epoch": 0.8376159554730983, "grad_norm": 12.323463439941406, "learning_rate": 6.492930123054891e-07, "loss": 0.3066, "step": 18059 }, { "epoch": 0.8376623376623377, "grad_norm": 6.745848655700684, "learning_rate": 6.489303852452566e-07, "loss": 0.3484, "step": 18060 }, { "epoch": 0.837708719851577, "grad_norm": 6.70914888381958, "learning_rate": 6.485678524486522e-07, "loss": 0.2592, "step": 18061 }, { "epoch": 0.8377551020408164, "grad_norm": 4.82218074798584, "learning_rate": 6.4820541392353e-07, "loss": 0.2082, "step": 18062 }, { "epoch": 0.8378014842300556, "grad_norm": 7.911758899688721, "learning_rate": 6.478430696777426e-07, "loss": 0.3251, "step": 18063 }, { "epoch": 0.837847866419295, "grad_norm": 10.92705249786377, "learning_rate": 6.474808197191401e-07, "loss": 0.3298, "step": 18064 }, { "epoch": 0.8378942486085343, "grad_norm": 10.198456764221191, "learning_rate": 6.471186640555693e-07, "loss": 0.4241, "step": 18065 }, { "epoch": 0.8379406307977737, "grad_norm": 7.38730525970459, "learning_rate": 6.467566026948768e-07, "loss": 0.3698, "step": 18066 }, { "epoch": 0.837987012987013, "grad_norm": 4.837325572967529, "learning_rate": 6.463946356449063e-07, "loss": 0.3, "step": 18067 }, { "epoch": 0.8380333951762523, "grad_norm": 6.222515106201172, "learning_rate": 6.460327629134994e-07, "loss": 0.2645, "step": 18068 }, { "epoch": 0.8380797773654917, "grad_norm": 4.942663192749023, "learning_rate": 6.456709845084969e-07, "loss": 0.2972, "step": 18069 }, { "epoch": 0.838126159554731, "grad_norm": 8.401880264282227, "learning_rate": 6.453093004377364e-07, "loss": 0.3958, "step": 18070 }, { "epoch": 0.8381725417439703, "grad_norm": 6.640196800231934, "learning_rate": 6.449477107090518e-07, "loss": 0.2887, "step": 18071 }, { "epoch": 0.8382189239332096, "grad_norm": 6.0522990226745605, "learning_rate": 6.445862153302784e-07, "loss": 0.1261, "step": 18072 }, { "epoch": 0.838265306122449, "grad_norm": 9.892692565917969, "learning_rate": 6.442248143092472e-07, "loss": 0.3919, "step": 18073 }, { "epoch": 0.8383116883116883, "grad_norm": 5.183095932006836, "learning_rate": 6.438635076537881e-07, "loss": 0.2418, "step": 18074 }, { "epoch": 0.8383580705009277, "grad_norm": 7.1896138191223145, "learning_rate": 6.435022953717295e-07, "loss": 0.2687, "step": 18075 }, { "epoch": 0.838404452690167, "grad_norm": 7.926759719848633, "learning_rate": 6.431411774708945e-07, "loss": 0.3695, "step": 18076 }, { "epoch": 0.8384508348794063, "grad_norm": 5.092504024505615, "learning_rate": 6.427801539591083e-07, "loss": 0.4016, "step": 18077 }, { "epoch": 0.8384972170686457, "grad_norm": 10.207023620605469, "learning_rate": 6.424192248441918e-07, "loss": 0.3827, "step": 18078 }, { "epoch": 0.838543599257885, "grad_norm": 4.627486228942871, "learning_rate": 6.420583901339644e-07, "loss": 0.3546, "step": 18079 }, { "epoch": 0.8385899814471243, "grad_norm": 7.851893424987793, "learning_rate": 6.416976498362432e-07, "loss": 0.3397, "step": 18080 }, { "epoch": 0.8386363636363636, "grad_norm": 9.373285293579102, "learning_rate": 6.413370039588451e-07, "loss": 0.2927, "step": 18081 }, { "epoch": 0.838682745825603, "grad_norm": 7.79745626449585, "learning_rate": 6.409764525095807e-07, "loss": 0.306, "step": 18082 }, { "epoch": 0.8387291280148423, "grad_norm": 9.305235862731934, "learning_rate": 6.40615995496262e-07, "loss": 0.3362, "step": 18083 }, { "epoch": 0.8387755102040816, "grad_norm": 5.145394802093506, "learning_rate": 6.402556329266984e-07, "loss": 0.2886, "step": 18084 }, { "epoch": 0.838821892393321, "grad_norm": 5.555695056915283, "learning_rate": 6.398953648086975e-07, "loss": 0.2885, "step": 18085 }, { "epoch": 0.8388682745825603, "grad_norm": 12.160107612609863, "learning_rate": 6.395351911500646e-07, "loss": 0.3157, "step": 18086 }, { "epoch": 0.8389146567717997, "grad_norm": 9.576635360717773, "learning_rate": 6.391751119586004e-07, "loss": 0.4245, "step": 18087 }, { "epoch": 0.8389610389610389, "grad_norm": 7.608540058135986, "learning_rate": 6.388151272421078e-07, "loss": 0.2093, "step": 18088 }, { "epoch": 0.8390074211502783, "grad_norm": 7.547390460968018, "learning_rate": 6.384552370083851e-07, "loss": 0.4318, "step": 18089 }, { "epoch": 0.8390538033395176, "grad_norm": 11.414892196655273, "learning_rate": 6.380954412652291e-07, "loss": 0.3599, "step": 18090 }, { "epoch": 0.839100185528757, "grad_norm": 13.190690040588379, "learning_rate": 6.377357400204348e-07, "loss": 0.3475, "step": 18091 }, { "epoch": 0.8391465677179963, "grad_norm": 6.738153457641602, "learning_rate": 6.373761332817963e-07, "loss": 0.3179, "step": 18092 }, { "epoch": 0.8391929499072356, "grad_norm": 5.46569299697876, "learning_rate": 6.370166210571011e-07, "loss": 0.2422, "step": 18093 }, { "epoch": 0.8392393320964749, "grad_norm": 6.311557292938232, "learning_rate": 6.366572033541397e-07, "loss": 0.3044, "step": 18094 }, { "epoch": 0.8392857142857143, "grad_norm": 7.958165645599365, "learning_rate": 6.362978801806985e-07, "loss": 0.2539, "step": 18095 }, { "epoch": 0.8393320964749537, "grad_norm": 7.6698994636535645, "learning_rate": 6.359386515445626e-07, "loss": 0.2848, "step": 18096 }, { "epoch": 0.8393784786641929, "grad_norm": 8.240033149719238, "learning_rate": 6.355795174535145e-07, "loss": 0.2248, "step": 18097 }, { "epoch": 0.8394248608534323, "grad_norm": 9.403075218200684, "learning_rate": 6.352204779153337e-07, "loss": 0.2919, "step": 18098 }, { "epoch": 0.8394712430426716, "grad_norm": 11.35924243927002, "learning_rate": 6.348615329377988e-07, "loss": 0.312, "step": 18099 }, { "epoch": 0.839517625231911, "grad_norm": 12.145648956298828, "learning_rate": 6.34502682528686e-07, "loss": 0.2944, "step": 18100 }, { "epoch": 0.8395640074211502, "grad_norm": 4.466288089752197, "learning_rate": 6.341439266957705e-07, "loss": 0.2634, "step": 18101 }, { "epoch": 0.8396103896103896, "grad_norm": 13.548990249633789, "learning_rate": 6.337852654468252e-07, "loss": 0.4591, "step": 18102 }, { "epoch": 0.8396567717996289, "grad_norm": 6.354928970336914, "learning_rate": 6.334266987896176e-07, "loss": 0.3708, "step": 18103 }, { "epoch": 0.8397031539888683, "grad_norm": 5.739843368530273, "learning_rate": 6.330682267319177e-07, "loss": 0.295, "step": 18104 }, { "epoch": 0.8397495361781077, "grad_norm": 3.701603651046753, "learning_rate": 6.327098492814915e-07, "loss": 0.2563, "step": 18105 }, { "epoch": 0.8397959183673469, "grad_norm": 9.372415542602539, "learning_rate": 6.323515664461033e-07, "loss": 0.4298, "step": 18106 }, { "epoch": 0.8398423005565863, "grad_norm": 10.050650596618652, "learning_rate": 6.319933782335141e-07, "loss": 0.305, "step": 18107 }, { "epoch": 0.8398886827458256, "grad_norm": 13.366157531738281, "learning_rate": 6.316352846514857e-07, "loss": 0.3766, "step": 18108 }, { "epoch": 0.839935064935065, "grad_norm": 6.904337406158447, "learning_rate": 6.312772857077737e-07, "loss": 0.2782, "step": 18109 }, { "epoch": 0.8399814471243042, "grad_norm": 14.496663093566895, "learning_rate": 6.30919381410135e-07, "loss": 0.4974, "step": 18110 }, { "epoch": 0.8400278293135436, "grad_norm": 8.785972595214844, "learning_rate": 6.305615717663233e-07, "loss": 0.411, "step": 18111 }, { "epoch": 0.8400742115027829, "grad_norm": 6.001057147979736, "learning_rate": 6.302038567840912e-07, "loss": 0.3279, "step": 18112 }, { "epoch": 0.8401205936920223, "grad_norm": 9.868647575378418, "learning_rate": 6.298462364711882e-07, "loss": 0.3671, "step": 18113 }, { "epoch": 0.8401669758812615, "grad_norm": 15.704392433166504, "learning_rate": 6.294887108353603e-07, "loss": 0.5541, "step": 18114 }, { "epoch": 0.8402133580705009, "grad_norm": 8.27321720123291, "learning_rate": 6.291312798843546e-07, "loss": 0.3144, "step": 18115 }, { "epoch": 0.8402597402597403, "grad_norm": 3.899200201034546, "learning_rate": 6.287739436259144e-07, "loss": 0.2417, "step": 18116 }, { "epoch": 0.8403061224489796, "grad_norm": 11.803986549377441, "learning_rate": 6.284167020677806e-07, "loss": 0.3842, "step": 18117 }, { "epoch": 0.840352504638219, "grad_norm": 9.875639915466309, "learning_rate": 6.280595552176938e-07, "loss": 0.4578, "step": 18118 }, { "epoch": 0.8403988868274582, "grad_norm": 11.157898902893066, "learning_rate": 6.277025030833917e-07, "loss": 0.4282, "step": 18119 }, { "epoch": 0.8404452690166976, "grad_norm": 6.940671443939209, "learning_rate": 6.273455456726074e-07, "loss": 0.3525, "step": 18120 }, { "epoch": 0.8404916512059369, "grad_norm": 7.908967018127441, "learning_rate": 6.269886829930754e-07, "loss": 0.279, "step": 18121 }, { "epoch": 0.8405380333951763, "grad_norm": 9.61994457244873, "learning_rate": 6.266319150525274e-07, "loss": 0.3182, "step": 18122 }, { "epoch": 0.8405844155844155, "grad_norm": 7.693122386932373, "learning_rate": 6.262752418586921e-07, "loss": 0.2089, "step": 18123 }, { "epoch": 0.8406307977736549, "grad_norm": 3.6841366291046143, "learning_rate": 6.259186634192982e-07, "loss": 0.2495, "step": 18124 }, { "epoch": 0.8406771799628943, "grad_norm": 10.365375518798828, "learning_rate": 6.25562179742068e-07, "loss": 0.3035, "step": 18125 }, { "epoch": 0.8407235621521336, "grad_norm": 7.316362380981445, "learning_rate": 6.252057908347259e-07, "loss": 0.3007, "step": 18126 }, { "epoch": 0.8407699443413729, "grad_norm": 9.02329158782959, "learning_rate": 6.248494967049928e-07, "loss": 0.3242, "step": 18127 }, { "epoch": 0.8408163265306122, "grad_norm": 6.183379173278809, "learning_rate": 6.244932973605877e-07, "loss": 0.2966, "step": 18128 }, { "epoch": 0.8408627087198516, "grad_norm": 6.08021879196167, "learning_rate": 6.241371928092288e-07, "loss": 0.2804, "step": 18129 }, { "epoch": 0.8409090909090909, "grad_norm": 11.020402908325195, "learning_rate": 6.237811830586282e-07, "loss": 0.4926, "step": 18130 }, { "epoch": 0.8409554730983302, "grad_norm": 9.546747207641602, "learning_rate": 6.234252681165004e-07, "loss": 0.3493, "step": 18131 }, { "epoch": 0.8410018552875695, "grad_norm": 7.837448596954346, "learning_rate": 6.230694479905558e-07, "loss": 0.3373, "step": 18132 }, { "epoch": 0.8410482374768089, "grad_norm": 6.4415669441223145, "learning_rate": 6.227137226885027e-07, "loss": 0.3597, "step": 18133 }, { "epoch": 0.8410946196660483, "grad_norm": 12.081469535827637, "learning_rate": 6.223580922180489e-07, "loss": 0.4315, "step": 18134 }, { "epoch": 0.8411410018552876, "grad_norm": 7.6638569831848145, "learning_rate": 6.220025565868976e-07, "loss": 0.2485, "step": 18135 }, { "epoch": 0.8411873840445269, "grad_norm": 5.806511878967285, "learning_rate": 6.216471158027515e-07, "loss": 0.2028, "step": 18136 }, { "epoch": 0.8412337662337662, "grad_norm": 4.918171405792236, "learning_rate": 6.212917698733112e-07, "loss": 0.2384, "step": 18137 }, { "epoch": 0.8412801484230056, "grad_norm": 11.589752197265625, "learning_rate": 6.20936518806275e-07, "loss": 0.2935, "step": 18138 }, { "epoch": 0.8413265306122449, "grad_norm": 10.394988059997559, "learning_rate": 6.20581362609341e-07, "loss": 0.3512, "step": 18139 }, { "epoch": 0.8413729128014842, "grad_norm": 6.133001804351807, "learning_rate": 6.202263012902005e-07, "loss": 0.3195, "step": 18140 }, { "epoch": 0.8414192949907235, "grad_norm": 8.769316673278809, "learning_rate": 6.198713348565471e-07, "loss": 0.2901, "step": 18141 }, { "epoch": 0.8414656771799629, "grad_norm": 6.6705780029296875, "learning_rate": 6.195164633160705e-07, "loss": 0.361, "step": 18142 }, { "epoch": 0.8415120593692023, "grad_norm": 5.340119361877441, "learning_rate": 6.1916168667646e-07, "loss": 0.1949, "step": 18143 }, { "epoch": 0.8415584415584415, "grad_norm": 6.799276828765869, "learning_rate": 6.188070049454014e-07, "loss": 0.3899, "step": 18144 }, { "epoch": 0.8416048237476809, "grad_norm": 7.609010219573975, "learning_rate": 6.184524181305779e-07, "loss": 0.4035, "step": 18145 }, { "epoch": 0.8416512059369202, "grad_norm": 6.307253837585449, "learning_rate": 6.180979262396708e-07, "loss": 0.2897, "step": 18146 }, { "epoch": 0.8416975881261596, "grad_norm": 8.822067260742188, "learning_rate": 6.177435292803618e-07, "loss": 0.3185, "step": 18147 }, { "epoch": 0.8417439703153989, "grad_norm": 7.227260112762451, "learning_rate": 6.173892272603271e-07, "loss": 0.3257, "step": 18148 }, { "epoch": 0.8417903525046382, "grad_norm": 6.480686187744141, "learning_rate": 6.17035020187245e-07, "loss": 0.3798, "step": 18149 }, { "epoch": 0.8418367346938775, "grad_norm": 7.60109281539917, "learning_rate": 6.166809080687858e-07, "loss": 0.2704, "step": 18150 }, { "epoch": 0.8418831168831169, "grad_norm": 8.017114639282227, "learning_rate": 6.163268909126225e-07, "loss": 0.3856, "step": 18151 }, { "epoch": 0.8419294990723563, "grad_norm": 7.958639621734619, "learning_rate": 6.159729687264254e-07, "loss": 0.3058, "step": 18152 }, { "epoch": 0.8419758812615955, "grad_norm": 6.493786811828613, "learning_rate": 6.156191415178619e-07, "loss": 0.2767, "step": 18153 }, { "epoch": 0.8420222634508349, "grad_norm": 8.562329292297363, "learning_rate": 6.152654092945976e-07, "loss": 0.3523, "step": 18154 }, { "epoch": 0.8420686456400742, "grad_norm": 11.591840744018555, "learning_rate": 6.14911772064295e-07, "loss": 0.2846, "step": 18155 }, { "epoch": 0.8421150278293136, "grad_norm": 14.969385147094727, "learning_rate": 6.145582298346153e-07, "loss": 0.4717, "step": 18156 }, { "epoch": 0.8421614100185528, "grad_norm": 8.835990905761719, "learning_rate": 6.142047826132191e-07, "loss": 0.2207, "step": 18157 }, { "epoch": 0.8422077922077922, "grad_norm": 8.766825675964355, "learning_rate": 6.138514304077631e-07, "loss": 0.3523, "step": 18158 }, { "epoch": 0.8422541743970315, "grad_norm": 9.427085876464844, "learning_rate": 6.134981732259027e-07, "loss": 0.4172, "step": 18159 }, { "epoch": 0.8423005565862709, "grad_norm": 4.542849063873291, "learning_rate": 6.131450110752901e-07, "loss": 0.3061, "step": 18160 }, { "epoch": 0.8423469387755103, "grad_norm": 6.217494964599609, "learning_rate": 6.127919439635771e-07, "loss": 0.3087, "step": 18161 }, { "epoch": 0.8423933209647495, "grad_norm": 7.090089797973633, "learning_rate": 6.124389718984131e-07, "loss": 0.2793, "step": 18162 }, { "epoch": 0.8424397031539889, "grad_norm": 5.919843673706055, "learning_rate": 6.120860948874435e-07, "loss": 0.3527, "step": 18163 }, { "epoch": 0.8424860853432282, "grad_norm": 5.787410736083984, "learning_rate": 6.117333129383162e-07, "loss": 0.3262, "step": 18164 }, { "epoch": 0.8425324675324676, "grad_norm": 13.852569580078125, "learning_rate": 6.113806260586708e-07, "loss": 0.3443, "step": 18165 }, { "epoch": 0.8425788497217068, "grad_norm": 9.480474472045898, "learning_rate": 6.110280342561492e-07, "loss": 0.2989, "step": 18166 }, { "epoch": 0.8426252319109462, "grad_norm": 5.839147567749023, "learning_rate": 6.106755375383905e-07, "loss": 0.38, "step": 18167 }, { "epoch": 0.8426716141001855, "grad_norm": 19.15214729309082, "learning_rate": 6.103231359130308e-07, "loss": 0.5092, "step": 18168 }, { "epoch": 0.8427179962894249, "grad_norm": 5.640504837036133, "learning_rate": 6.099708293877066e-07, "loss": 0.2787, "step": 18169 }, { "epoch": 0.8427643784786641, "grad_norm": 9.353792190551758, "learning_rate": 6.096186179700475e-07, "loss": 0.3232, "step": 18170 }, { "epoch": 0.8428107606679035, "grad_norm": 6.493753433227539, "learning_rate": 6.092665016676852e-07, "loss": 0.2397, "step": 18171 }, { "epoch": 0.8428571428571429, "grad_norm": 13.819647789001465, "learning_rate": 6.089144804882485e-07, "loss": 0.313, "step": 18172 }, { "epoch": 0.8429035250463822, "grad_norm": 8.777158737182617, "learning_rate": 6.085625544393636e-07, "loss": 0.3226, "step": 18173 }, { "epoch": 0.8429499072356216, "grad_norm": 9.086645126342773, "learning_rate": 6.08210723528656e-07, "loss": 0.4448, "step": 18174 }, { "epoch": 0.8429962894248608, "grad_norm": 12.617239952087402, "learning_rate": 6.078589877637453e-07, "loss": 0.514, "step": 18175 }, { "epoch": 0.8430426716141002, "grad_norm": 5.693139553070068, "learning_rate": 6.07507347152253e-07, "loss": 0.3008, "step": 18176 }, { "epoch": 0.8430890538033395, "grad_norm": 6.176590442657471, "learning_rate": 6.07155801701797e-07, "loss": 0.3006, "step": 18177 }, { "epoch": 0.8431354359925789, "grad_norm": 8.724878311157227, "learning_rate": 6.068043514199939e-07, "loss": 0.3032, "step": 18178 }, { "epoch": 0.8431818181818181, "grad_norm": 6.676463603973389, "learning_rate": 6.064529963144583e-07, "loss": 0.2995, "step": 18179 }, { "epoch": 0.8432282003710575, "grad_norm": 11.435474395751953, "learning_rate": 6.061017363928002e-07, "loss": 0.4379, "step": 18180 }, { "epoch": 0.8432745825602969, "grad_norm": 10.362525939941406, "learning_rate": 6.057505716626305e-07, "loss": 0.2948, "step": 18181 }, { "epoch": 0.8433209647495362, "grad_norm": 7.646849632263184, "learning_rate": 6.053995021315568e-07, "loss": 0.21, "step": 18182 }, { "epoch": 0.8433673469387755, "grad_norm": 3.7403883934020996, "learning_rate": 6.050485278071849e-07, "loss": 0.1963, "step": 18183 }, { "epoch": 0.8434137291280148, "grad_norm": 5.882821559906006, "learning_rate": 6.046976486971201e-07, "loss": 0.2272, "step": 18184 }, { "epoch": 0.8434601113172542, "grad_norm": 32.891300201416016, "learning_rate": 6.043468648089607e-07, "loss": 0.3883, "step": 18185 }, { "epoch": 0.8435064935064935, "grad_norm": 6.646916389465332, "learning_rate": 6.039961761503088e-07, "loss": 0.2709, "step": 18186 }, { "epoch": 0.8435528756957328, "grad_norm": 6.6355791091918945, "learning_rate": 6.036455827287601e-07, "loss": 0.2308, "step": 18187 }, { "epoch": 0.8435992578849721, "grad_norm": 5.935556888580322, "learning_rate": 6.03295084551912e-07, "loss": 0.255, "step": 18188 }, { "epoch": 0.8436456400742115, "grad_norm": 8.915618896484375, "learning_rate": 6.029446816273565e-07, "loss": 0.4365, "step": 18189 }, { "epoch": 0.8436920222634509, "grad_norm": 7.489847183227539, "learning_rate": 6.025943739626861e-07, "loss": 0.2832, "step": 18190 }, { "epoch": 0.8437384044526902, "grad_norm": 5.234316825866699, "learning_rate": 6.022441615654883e-07, "loss": 0.3568, "step": 18191 }, { "epoch": 0.8437847866419295, "grad_norm": 16.55196762084961, "learning_rate": 6.018940444433513e-07, "loss": 0.4322, "step": 18192 }, { "epoch": 0.8438311688311688, "grad_norm": 8.402789115905762, "learning_rate": 6.0154402260386e-07, "loss": 0.3479, "step": 18193 }, { "epoch": 0.8438775510204082, "grad_norm": 13.410111427307129, "learning_rate": 6.01194096054597e-07, "loss": 0.5237, "step": 18194 }, { "epoch": 0.8439239332096475, "grad_norm": 8.614801406860352, "learning_rate": 6.008442648031454e-07, "loss": 0.3596, "step": 18195 }, { "epoch": 0.8439703153988868, "grad_norm": 4.1733317375183105, "learning_rate": 6.004945288570813e-07, "loss": 0.2888, "step": 18196 }, { "epoch": 0.8440166975881261, "grad_norm": 10.826225280761719, "learning_rate": 6.00144888223983e-07, "loss": 0.3024, "step": 18197 }, { "epoch": 0.8440630797773655, "grad_norm": 4.787935733795166, "learning_rate": 5.997953429114245e-07, "loss": 0.2951, "step": 18198 }, { "epoch": 0.8441094619666049, "grad_norm": 8.964938163757324, "learning_rate": 5.994458929269792e-07, "loss": 0.4526, "step": 18199 }, { "epoch": 0.8441558441558441, "grad_norm": 13.47104549407959, "learning_rate": 5.990965382782177e-07, "loss": 0.3048, "step": 18200 }, { "epoch": 0.8442022263450835, "grad_norm": 7.858605861663818, "learning_rate": 5.987472789727094e-07, "loss": 0.4515, "step": 18201 }, { "epoch": 0.8442486085343228, "grad_norm": 9.450116157531738, "learning_rate": 5.98398115018019e-07, "loss": 0.3833, "step": 18202 }, { "epoch": 0.8442949907235622, "grad_norm": 6.880646705627441, "learning_rate": 5.98049046421712e-07, "loss": 0.3409, "step": 18203 }, { "epoch": 0.8443413729128015, "grad_norm": 9.565747261047363, "learning_rate": 5.977000731913501e-07, "loss": 0.3132, "step": 18204 }, { "epoch": 0.8443877551020408, "grad_norm": 8.484323501586914, "learning_rate": 5.973511953344946e-07, "loss": 0.4147, "step": 18205 }, { "epoch": 0.8444341372912801, "grad_norm": 6.675728797912598, "learning_rate": 5.970024128587037e-07, "loss": 0.2568, "step": 18206 }, { "epoch": 0.8444805194805195, "grad_norm": 10.913190841674805, "learning_rate": 5.966537257715327e-07, "loss": 0.3683, "step": 18207 }, { "epoch": 0.8445269016697589, "grad_norm": 9.32222843170166, "learning_rate": 5.963051340805364e-07, "loss": 0.3926, "step": 18208 }, { "epoch": 0.8445732838589981, "grad_norm": 5.866206645965576, "learning_rate": 5.959566377932663e-07, "loss": 0.332, "step": 18209 }, { "epoch": 0.8446196660482375, "grad_norm": 8.714659690856934, "learning_rate": 5.956082369172728e-07, "loss": 0.3396, "step": 18210 }, { "epoch": 0.8446660482374768, "grad_norm": 8.84158706665039, "learning_rate": 5.952599314601037e-07, "loss": 0.3537, "step": 18211 }, { "epoch": 0.8447124304267162, "grad_norm": 5.6946187019348145, "learning_rate": 5.94911721429306e-07, "loss": 0.2853, "step": 18212 }, { "epoch": 0.8447588126159554, "grad_norm": 13.35785961151123, "learning_rate": 5.945636068324217e-07, "loss": 0.4125, "step": 18213 }, { "epoch": 0.8448051948051948, "grad_norm": 8.590309143066406, "learning_rate": 5.942155876769928e-07, "loss": 0.3109, "step": 18214 }, { "epoch": 0.8448515769944341, "grad_norm": 5.578019618988037, "learning_rate": 5.938676639705598e-07, "loss": 0.1958, "step": 18215 }, { "epoch": 0.8448979591836735, "grad_norm": 5.730958938598633, "learning_rate": 5.935198357206595e-07, "loss": 0.243, "step": 18216 }, { "epoch": 0.8449443413729129, "grad_norm": 13.471172332763672, "learning_rate": 5.931721029348287e-07, "loss": 0.4398, "step": 18217 }, { "epoch": 0.8449907235621521, "grad_norm": 7.483297824859619, "learning_rate": 5.928244656205995e-07, "loss": 0.3374, "step": 18218 }, { "epoch": 0.8450371057513915, "grad_norm": 6.36673641204834, "learning_rate": 5.924769237855033e-07, "loss": 0.3204, "step": 18219 }, { "epoch": 0.8450834879406308, "grad_norm": 7.912754535675049, "learning_rate": 5.921294774370695e-07, "loss": 0.3979, "step": 18220 }, { "epoch": 0.8451298701298702, "grad_norm": 6.323245048522949, "learning_rate": 5.917821265828261e-07, "loss": 0.2974, "step": 18221 }, { "epoch": 0.8451762523191094, "grad_norm": 7.382256984710693, "learning_rate": 5.914348712302981e-07, "loss": 0.2768, "step": 18222 }, { "epoch": 0.8452226345083488, "grad_norm": 8.528276443481445, "learning_rate": 5.91087711387009e-07, "loss": 0.3615, "step": 18223 }, { "epoch": 0.8452690166975881, "grad_norm": 6.006945610046387, "learning_rate": 5.907406470604782e-07, "loss": 0.2752, "step": 18224 }, { "epoch": 0.8453153988868275, "grad_norm": 4.858709812164307, "learning_rate": 5.903936782582253e-07, "loss": 0.3022, "step": 18225 }, { "epoch": 0.8453617810760667, "grad_norm": 6.888482093811035, "learning_rate": 5.900468049877678e-07, "loss": 0.4426, "step": 18226 }, { "epoch": 0.8454081632653061, "grad_norm": 8.321474075317383, "learning_rate": 5.897000272566205e-07, "loss": 0.265, "step": 18227 }, { "epoch": 0.8454545454545455, "grad_norm": 11.50722885131836, "learning_rate": 5.893533450722966e-07, "loss": 0.4224, "step": 18228 }, { "epoch": 0.8455009276437848, "grad_norm": 5.113360404968262, "learning_rate": 5.890067584423054e-07, "loss": 0.308, "step": 18229 }, { "epoch": 0.8455473098330241, "grad_norm": 10.271589279174805, "learning_rate": 5.886602673741559e-07, "loss": 0.371, "step": 18230 }, { "epoch": 0.8455936920222634, "grad_norm": 7.9540019035339355, "learning_rate": 5.88313871875355e-07, "loss": 0.3965, "step": 18231 }, { "epoch": 0.8456400742115028, "grad_norm": 4.660429954528809, "learning_rate": 5.879675719534078e-07, "loss": 0.2784, "step": 18232 }, { "epoch": 0.8456864564007421, "grad_norm": 7.157065391540527, "learning_rate": 5.876213676158155e-07, "loss": 0.3662, "step": 18233 }, { "epoch": 0.8457328385899815, "grad_norm": 6.657443523406982, "learning_rate": 5.872752588700798e-07, "loss": 0.3362, "step": 18234 }, { "epoch": 0.8457792207792207, "grad_norm": 5.233067989349365, "learning_rate": 5.869292457236975e-07, "loss": 0.2519, "step": 18235 }, { "epoch": 0.8458256029684601, "grad_norm": 10.44469165802002, "learning_rate": 5.865833281841654e-07, "loss": 0.3002, "step": 18236 }, { "epoch": 0.8458719851576995, "grad_norm": 11.827747344970703, "learning_rate": 5.862375062589776e-07, "loss": 0.4231, "step": 18237 }, { "epoch": 0.8459183673469388, "grad_norm": 6.563742160797119, "learning_rate": 5.858917799556258e-07, "loss": 0.2762, "step": 18238 }, { "epoch": 0.8459647495361781, "grad_norm": 6.844779014587402, "learning_rate": 5.855461492816017e-07, "loss": 0.2474, "step": 18239 }, { "epoch": 0.8460111317254174, "grad_norm": 6.508057594299316, "learning_rate": 5.852006142443912e-07, "loss": 0.2997, "step": 18240 }, { "epoch": 0.8460575139146568, "grad_norm": 5.544246196746826, "learning_rate": 5.848551748514802e-07, "loss": 0.2647, "step": 18241 }, { "epoch": 0.8461038961038961, "grad_norm": 7.052427768707275, "learning_rate": 5.845098311103536e-07, "loss": 0.3992, "step": 18242 }, { "epoch": 0.8461502782931354, "grad_norm": 8.979080200195312, "learning_rate": 5.841645830284925e-07, "loss": 0.3479, "step": 18243 }, { "epoch": 0.8461966604823747, "grad_norm": 6.743502616882324, "learning_rate": 5.838194306133766e-07, "loss": 0.3084, "step": 18244 }, { "epoch": 0.8462430426716141, "grad_norm": 10.902573585510254, "learning_rate": 5.834743738724846e-07, "loss": 0.5051, "step": 18245 }, { "epoch": 0.8462894248608535, "grad_norm": 4.976858139038086, "learning_rate": 5.831294128132897e-07, "loss": 0.3941, "step": 18246 }, { "epoch": 0.8463358070500928, "grad_norm": 5.536705017089844, "learning_rate": 5.827845474432658e-07, "loss": 0.3053, "step": 18247 }, { "epoch": 0.8463821892393321, "grad_norm": 6.316246509552002, "learning_rate": 5.824397777698859e-07, "loss": 0.304, "step": 18248 }, { "epoch": 0.8464285714285714, "grad_norm": 5.876131534576416, "learning_rate": 5.820951038006173e-07, "loss": 0.2502, "step": 18249 }, { "epoch": 0.8464749536178108, "grad_norm": 9.757967948913574, "learning_rate": 5.817505255429296e-07, "loss": 0.3413, "step": 18250 }, { "epoch": 0.8465213358070501, "grad_norm": 7.263096332550049, "learning_rate": 5.814060430042851e-07, "loss": 0.2286, "step": 18251 }, { "epoch": 0.8465677179962894, "grad_norm": 4.904791831970215, "learning_rate": 5.810616561921484e-07, "loss": 0.2684, "step": 18252 }, { "epoch": 0.8466141001855287, "grad_norm": 6.3448381423950195, "learning_rate": 5.807173651139797e-07, "loss": 0.2648, "step": 18253 }, { "epoch": 0.8466604823747681, "grad_norm": 9.334062576293945, "learning_rate": 5.803731697772391e-07, "loss": 0.2884, "step": 18254 }, { "epoch": 0.8467068645640075, "grad_norm": 13.447524070739746, "learning_rate": 5.800290701893818e-07, "loss": 0.26, "step": 18255 }, { "epoch": 0.8467532467532467, "grad_norm": 10.244352340698242, "learning_rate": 5.796850663578651e-07, "loss": 0.4043, "step": 18256 }, { "epoch": 0.8467996289424861, "grad_norm": 9.461648941040039, "learning_rate": 5.793411582901387e-07, "loss": 0.338, "step": 18257 }, { "epoch": 0.8468460111317254, "grad_norm": 6.69342041015625, "learning_rate": 5.789973459936543e-07, "loss": 0.2399, "step": 18258 }, { "epoch": 0.8468923933209648, "grad_norm": 5.717155456542969, "learning_rate": 5.78653629475861e-07, "loss": 0.3041, "step": 18259 }, { "epoch": 0.8469387755102041, "grad_norm": 7.471679210662842, "learning_rate": 5.783100087442045e-07, "loss": 0.33, "step": 18260 }, { "epoch": 0.8469851576994434, "grad_norm": 15.5929536819458, "learning_rate": 5.779664838061306e-07, "loss": 0.2716, "step": 18261 }, { "epoch": 0.8470315398886827, "grad_norm": 9.024375915527344, "learning_rate": 5.776230546690792e-07, "loss": 0.3415, "step": 18262 }, { "epoch": 0.8470779220779221, "grad_norm": 7.786294460296631, "learning_rate": 5.772797213404918e-07, "loss": 0.3347, "step": 18263 }, { "epoch": 0.8471243042671615, "grad_norm": 5.776583194732666, "learning_rate": 5.769364838278063e-07, "loss": 0.209, "step": 18264 }, { "epoch": 0.8471706864564007, "grad_norm": 10.642034530639648, "learning_rate": 5.765933421384595e-07, "loss": 0.3731, "step": 18265 }, { "epoch": 0.8472170686456401, "grad_norm": 8.476927757263184, "learning_rate": 5.762502962798844e-07, "loss": 0.2935, "step": 18266 }, { "epoch": 0.8472634508348794, "grad_norm": 13.573216438293457, "learning_rate": 5.759073462595144e-07, "loss": 0.4258, "step": 18267 }, { "epoch": 0.8473098330241188, "grad_norm": 5.764735698699951, "learning_rate": 5.755644920847775e-07, "loss": 0.3079, "step": 18268 }, { "epoch": 0.847356215213358, "grad_norm": 9.447965621948242, "learning_rate": 5.752217337631022e-07, "loss": 0.373, "step": 18269 }, { "epoch": 0.8474025974025974, "grad_norm": 4.296761989593506, "learning_rate": 5.748790713019142e-07, "loss": 0.1795, "step": 18270 }, { "epoch": 0.8474489795918367, "grad_norm": 6.475375175476074, "learning_rate": 5.745365047086371e-07, "loss": 0.2482, "step": 18271 }, { "epoch": 0.8474953617810761, "grad_norm": 6.062712669372559, "learning_rate": 5.741940339906932e-07, "loss": 0.3215, "step": 18272 }, { "epoch": 0.8475417439703155, "grad_norm": 9.585762023925781, "learning_rate": 5.738516591555004e-07, "loss": 0.3471, "step": 18273 }, { "epoch": 0.8475881261595547, "grad_norm": 4.073883533477783, "learning_rate": 5.735093802104768e-07, "loss": 0.2475, "step": 18274 }, { "epoch": 0.8476345083487941, "grad_norm": 7.680096626281738, "learning_rate": 5.731671971630376e-07, "loss": 0.3591, "step": 18275 }, { "epoch": 0.8476808905380334, "grad_norm": 6.080018043518066, "learning_rate": 5.728251100205967e-07, "loss": 0.2367, "step": 18276 }, { "epoch": 0.8477272727272728, "grad_norm": 4.307215213775635, "learning_rate": 5.72483118790565e-07, "loss": 0.2808, "step": 18277 }, { "epoch": 0.847773654916512, "grad_norm": 8.909762382507324, "learning_rate": 5.721412234803508e-07, "loss": 0.3186, "step": 18278 }, { "epoch": 0.8478200371057514, "grad_norm": 10.168896675109863, "learning_rate": 5.717994240973612e-07, "loss": 0.3558, "step": 18279 }, { "epoch": 0.8478664192949907, "grad_norm": 6.4999589920043945, "learning_rate": 5.714577206490018e-07, "loss": 0.2726, "step": 18280 }, { "epoch": 0.8479128014842301, "grad_norm": 6.27170991897583, "learning_rate": 5.711161131426746e-07, "loss": 0.3177, "step": 18281 }, { "epoch": 0.8479591836734693, "grad_norm": 6.453110218048096, "learning_rate": 5.707746015857813e-07, "loss": 0.4068, "step": 18282 }, { "epoch": 0.8480055658627087, "grad_norm": 4.680615425109863, "learning_rate": 5.704331859857209e-07, "loss": 0.2313, "step": 18283 }, { "epoch": 0.8480519480519481, "grad_norm": 6.027757167816162, "learning_rate": 5.700918663498878e-07, "loss": 0.301, "step": 18284 }, { "epoch": 0.8480983302411874, "grad_norm": 9.6689453125, "learning_rate": 5.697506426856786e-07, "loss": 0.3729, "step": 18285 }, { "epoch": 0.8481447124304267, "grad_norm": 8.57746410369873, "learning_rate": 5.694095150004847e-07, "loss": 0.3282, "step": 18286 }, { "epoch": 0.848191094619666, "grad_norm": 7.519357681274414, "learning_rate": 5.690684833016963e-07, "loss": 0.2669, "step": 18287 }, { "epoch": 0.8482374768089054, "grad_norm": 7.619199275970459, "learning_rate": 5.687275475967036e-07, "loss": 0.2301, "step": 18288 }, { "epoch": 0.8482838589981447, "grad_norm": 13.597840309143066, "learning_rate": 5.6838670789289e-07, "loss": 0.428, "step": 18289 }, { "epoch": 0.8483302411873841, "grad_norm": 5.485402584075928, "learning_rate": 5.680459641976416e-07, "loss": 0.3798, "step": 18290 }, { "epoch": 0.8483766233766233, "grad_norm": 6.7915449142456055, "learning_rate": 5.677053165183394e-07, "loss": 0.2078, "step": 18291 }, { "epoch": 0.8484230055658627, "grad_norm": 6.727259159088135, "learning_rate": 5.673647648623637e-07, "loss": 0.3231, "step": 18292 }, { "epoch": 0.8484693877551021, "grad_norm": 10.206164360046387, "learning_rate": 5.670243092370925e-07, "loss": 0.3734, "step": 18293 }, { "epoch": 0.8485157699443414, "grad_norm": 10.817099571228027, "learning_rate": 5.666839496499021e-07, "loss": 0.2998, "step": 18294 }, { "epoch": 0.8485621521335807, "grad_norm": 7.300068378448486, "learning_rate": 5.66343686108165e-07, "loss": 0.3334, "step": 18295 }, { "epoch": 0.84860853432282, "grad_norm": 6.649223804473877, "learning_rate": 5.660035186192531e-07, "loss": 0.3094, "step": 18296 }, { "epoch": 0.8486549165120594, "grad_norm": 6.3802642822265625, "learning_rate": 5.656634471905365e-07, "loss": 0.3439, "step": 18297 }, { "epoch": 0.8487012987012987, "grad_norm": 6.326140403747559, "learning_rate": 5.653234718293826e-07, "loss": 0.2746, "step": 18298 }, { "epoch": 0.848747680890538, "grad_norm": 13.023874282836914, "learning_rate": 5.649835925431574e-07, "loss": 0.3783, "step": 18299 }, { "epoch": 0.8487940630797773, "grad_norm": 6.497420787811279, "learning_rate": 5.646438093392226e-07, "loss": 0.3312, "step": 18300 }, { "epoch": 0.8488404452690167, "grad_norm": 11.678730964660645, "learning_rate": 5.643041222249401e-07, "loss": 0.3184, "step": 18301 }, { "epoch": 0.848886827458256, "grad_norm": 7.830857753753662, "learning_rate": 5.639645312076692e-07, "loss": 0.3313, "step": 18302 }, { "epoch": 0.8489332096474954, "grad_norm": 10.990124702453613, "learning_rate": 5.63625036294767e-07, "loss": 0.4219, "step": 18303 }, { "epoch": 0.8489795918367347, "grad_norm": 8.844627380371094, "learning_rate": 5.632856374935885e-07, "loss": 0.3542, "step": 18304 }, { "epoch": 0.849025974025974, "grad_norm": 7.395195007324219, "learning_rate": 5.629463348114877e-07, "loss": 0.3656, "step": 18305 }, { "epoch": 0.8490723562152134, "grad_norm": 7.098472595214844, "learning_rate": 5.626071282558132e-07, "loss": 0.2782, "step": 18306 }, { "epoch": 0.8491187384044527, "grad_norm": 12.354275703430176, "learning_rate": 5.622680178339147e-07, "loss": 0.4466, "step": 18307 }, { "epoch": 0.849165120593692, "grad_norm": 7.982028484344482, "learning_rate": 5.619290035531388e-07, "loss": 0.2605, "step": 18308 }, { "epoch": 0.8492115027829313, "grad_norm": 12.645559310913086, "learning_rate": 5.615900854208306e-07, "loss": 0.5307, "step": 18309 }, { "epoch": 0.8492578849721707, "grad_norm": 4.650846481323242, "learning_rate": 5.612512634443329e-07, "loss": 0.2488, "step": 18310 }, { "epoch": 0.84930426716141, "grad_norm": 8.464544296264648, "learning_rate": 5.609125376309843e-07, "loss": 0.3666, "step": 18311 }, { "epoch": 0.8493506493506493, "grad_norm": 6.119271278381348, "learning_rate": 5.60573907988124e-07, "loss": 0.2893, "step": 18312 }, { "epoch": 0.8493970315398887, "grad_norm": 6.5598626136779785, "learning_rate": 5.602353745230887e-07, "loss": 0.3101, "step": 18313 }, { "epoch": 0.849443413729128, "grad_norm": 6.185927867889404, "learning_rate": 5.598969372432122e-07, "loss": 0.263, "step": 18314 }, { "epoch": 0.8494897959183674, "grad_norm": 4.788577556610107, "learning_rate": 5.59558596155827e-07, "loss": 0.3389, "step": 18315 }, { "epoch": 0.8495361781076067, "grad_norm": 6.310259819030762, "learning_rate": 5.592203512682637e-07, "loss": 0.3222, "step": 18316 }, { "epoch": 0.849582560296846, "grad_norm": 7.220888137817383, "learning_rate": 5.588822025878476e-07, "loss": 0.3163, "step": 18317 }, { "epoch": 0.8496289424860853, "grad_norm": 5.305715560913086, "learning_rate": 5.585441501219069e-07, "loss": 0.2084, "step": 18318 }, { "epoch": 0.8496753246753247, "grad_norm": 7.836612701416016, "learning_rate": 5.582061938777639e-07, "loss": 0.3129, "step": 18319 }, { "epoch": 0.849721706864564, "grad_norm": 15.081857681274414, "learning_rate": 5.578683338627411e-07, "loss": 0.3537, "step": 18320 }, { "epoch": 0.8497680890538033, "grad_norm": 14.36169147491455, "learning_rate": 5.575305700841594e-07, "loss": 0.4794, "step": 18321 }, { "epoch": 0.8498144712430427, "grad_norm": 35.730079650878906, "learning_rate": 5.571929025493328e-07, "loss": 0.4153, "step": 18322 }, { "epoch": 0.849860853432282, "grad_norm": 6.749523162841797, "learning_rate": 5.568553312655794e-07, "loss": 0.3227, "step": 18323 }, { "epoch": 0.8499072356215214, "grad_norm": 5.808701992034912, "learning_rate": 5.565178562402113e-07, "loss": 0.1964, "step": 18324 }, { "epoch": 0.8499536178107606, "grad_norm": 4.735691070556641, "learning_rate": 5.561804774805402e-07, "loss": 0.2963, "step": 18325 }, { "epoch": 0.85, "grad_norm": 5.7433061599731445, "learning_rate": 5.558431949938752e-07, "loss": 0.2786, "step": 18326 }, { "epoch": 0.8500463821892393, "grad_norm": 13.811932563781738, "learning_rate": 5.555060087875246e-07, "loss": 0.2943, "step": 18327 }, { "epoch": 0.8500927643784787, "grad_norm": 15.49772834777832, "learning_rate": 5.551689188687909e-07, "loss": 0.3584, "step": 18328 }, { "epoch": 0.850139146567718, "grad_norm": 6.404010772705078, "learning_rate": 5.548319252449785e-07, "loss": 0.2414, "step": 18329 }, { "epoch": 0.8501855287569573, "grad_norm": 6.3496294021606445, "learning_rate": 5.544950279233874e-07, "loss": 0.3104, "step": 18330 }, { "epoch": 0.8502319109461967, "grad_norm": 10.169453620910645, "learning_rate": 5.541582269113171e-07, "loss": 0.4137, "step": 18331 }, { "epoch": 0.850278293135436, "grad_norm": 8.556035041809082, "learning_rate": 5.538215222160643e-07, "loss": 0.372, "step": 18332 }, { "epoch": 0.8503246753246754, "grad_norm": 8.247163772583008, "learning_rate": 5.534849138449228e-07, "loss": 0.2541, "step": 18333 }, { "epoch": 0.8503710575139146, "grad_norm": 8.480635643005371, "learning_rate": 5.531484018051847e-07, "loss": 0.4409, "step": 18334 }, { "epoch": 0.850417439703154, "grad_norm": 8.836689949035645, "learning_rate": 5.528119861041414e-07, "loss": 0.4213, "step": 18335 }, { "epoch": 0.8504638218923933, "grad_norm": 9.293076515197754, "learning_rate": 5.52475666749081e-07, "loss": 0.3791, "step": 18336 }, { "epoch": 0.8505102040816327, "grad_norm": 7.904923439025879, "learning_rate": 5.521394437472893e-07, "loss": 0.1252, "step": 18337 }, { "epoch": 0.8505565862708719, "grad_norm": 9.203529357910156, "learning_rate": 5.518033171060511e-07, "loss": 0.3838, "step": 18338 }, { "epoch": 0.8506029684601113, "grad_norm": 8.513818740844727, "learning_rate": 5.514672868326471e-07, "loss": 0.3973, "step": 18339 }, { "epoch": 0.8506493506493507, "grad_norm": 8.545140266418457, "learning_rate": 5.511313529343581e-07, "loss": 0.4093, "step": 18340 }, { "epoch": 0.85069573283859, "grad_norm": 6.251953601837158, "learning_rate": 5.507955154184619e-07, "loss": 0.3111, "step": 18341 }, { "epoch": 0.8507421150278293, "grad_norm": 5.01765251159668, "learning_rate": 5.50459774292234e-07, "loss": 0.2364, "step": 18342 }, { "epoch": 0.8507884972170686, "grad_norm": 8.99965763092041, "learning_rate": 5.501241295629494e-07, "loss": 0.3237, "step": 18343 }, { "epoch": 0.850834879406308, "grad_norm": 5.290226936340332, "learning_rate": 5.497885812378772e-07, "loss": 0.2443, "step": 18344 }, { "epoch": 0.8508812615955473, "grad_norm": 16.14948272705078, "learning_rate": 5.494531293242883e-07, "loss": 0.5381, "step": 18345 }, { "epoch": 0.8509276437847867, "grad_norm": 12.140586853027344, "learning_rate": 5.491177738294496e-07, "loss": 0.2444, "step": 18346 }, { "epoch": 0.8509740259740259, "grad_norm": 6.459406852722168, "learning_rate": 5.487825147606274e-07, "loss": 0.2024, "step": 18347 }, { "epoch": 0.8510204081632653, "grad_norm": 6.908514976501465, "learning_rate": 5.484473521250838e-07, "loss": 0.3135, "step": 18348 }, { "epoch": 0.8510667903525047, "grad_norm": 8.24109935760498, "learning_rate": 5.481122859300813e-07, "loss": 0.3486, "step": 18349 }, { "epoch": 0.851113172541744, "grad_norm": 4.700334072113037, "learning_rate": 5.47777316182877e-07, "loss": 0.3244, "step": 18350 }, { "epoch": 0.8511595547309833, "grad_norm": 5.5485711097717285, "learning_rate": 5.474424428907288e-07, "loss": 0.3074, "step": 18351 }, { "epoch": 0.8512059369202226, "grad_norm": 8.669490814208984, "learning_rate": 5.47107666060892e-07, "loss": 0.3289, "step": 18352 }, { "epoch": 0.851252319109462, "grad_norm": 12.584128379821777, "learning_rate": 5.467729857006188e-07, "loss": 0.3996, "step": 18353 }, { "epoch": 0.8512987012987013, "grad_norm": 15.843087196350098, "learning_rate": 5.464384018171609e-07, "loss": 0.5136, "step": 18354 }, { "epoch": 0.8513450834879406, "grad_norm": 8.109761238098145, "learning_rate": 5.461039144177649e-07, "loss": 0.1844, "step": 18355 }, { "epoch": 0.8513914656771799, "grad_norm": 10.078408241271973, "learning_rate": 5.457695235096788e-07, "loss": 0.196, "step": 18356 }, { "epoch": 0.8514378478664193, "grad_norm": 6.232997894287109, "learning_rate": 5.454352291001464e-07, "loss": 0.2977, "step": 18357 }, { "epoch": 0.8514842300556587, "grad_norm": 7.639204025268555, "learning_rate": 5.4510103119641e-07, "loss": 0.396, "step": 18358 }, { "epoch": 0.851530612244898, "grad_norm": 8.236628532409668, "learning_rate": 5.447669298057112e-07, "loss": 0.3552, "step": 18359 }, { "epoch": 0.8515769944341373, "grad_norm": 10.861087799072266, "learning_rate": 5.444329249352859e-07, "loss": 0.4147, "step": 18360 }, { "epoch": 0.8516233766233766, "grad_norm": 7.762228488922119, "learning_rate": 5.440990165923715e-07, "loss": 0.4169, "step": 18361 }, { "epoch": 0.851669758812616, "grad_norm": 8.820830345153809, "learning_rate": 5.437652047842018e-07, "loss": 0.3494, "step": 18362 }, { "epoch": 0.8517161410018553, "grad_norm": 16.07439422607422, "learning_rate": 5.434314895180081e-07, "loss": 0.4555, "step": 18363 }, { "epoch": 0.8517625231910946, "grad_norm": 5.910754203796387, "learning_rate": 5.430978708010204e-07, "loss": 0.2996, "step": 18364 }, { "epoch": 0.8518089053803339, "grad_norm": 9.52951717376709, "learning_rate": 5.427643486404683e-07, "loss": 0.3166, "step": 18365 }, { "epoch": 0.8518552875695733, "grad_norm": 5.070391654968262, "learning_rate": 5.424309230435737e-07, "loss": 0.2118, "step": 18366 }, { "epoch": 0.8519016697588127, "grad_norm": 5.826862812042236, "learning_rate": 5.420975940175627e-07, "loss": 0.2887, "step": 18367 }, { "epoch": 0.8519480519480519, "grad_norm": 6.853618144989014, "learning_rate": 5.417643615696561e-07, "loss": 0.3449, "step": 18368 }, { "epoch": 0.8519944341372913, "grad_norm": 6.101743698120117, "learning_rate": 5.414312257070725e-07, "loss": 0.2949, "step": 18369 }, { "epoch": 0.8520408163265306, "grad_norm": 7.5890936851501465, "learning_rate": 5.410981864370313e-07, "loss": 0.2745, "step": 18370 }, { "epoch": 0.85208719851577, "grad_norm": 13.037495613098145, "learning_rate": 5.407652437667449e-07, "loss": 0.3804, "step": 18371 }, { "epoch": 0.8521335807050093, "grad_norm": 10.495308876037598, "learning_rate": 5.404323977034276e-07, "loss": 0.4672, "step": 18372 }, { "epoch": 0.8521799628942486, "grad_norm": 9.52060317993164, "learning_rate": 5.400996482542903e-07, "loss": 0.4248, "step": 18373 }, { "epoch": 0.8522263450834879, "grad_norm": 9.002856254577637, "learning_rate": 5.397669954265417e-07, "loss": 0.3878, "step": 18374 }, { "epoch": 0.8522727272727273, "grad_norm": 8.098237991333008, "learning_rate": 5.394344392273882e-07, "loss": 0.309, "step": 18375 }, { "epoch": 0.8523191094619667, "grad_norm": 7.562251091003418, "learning_rate": 5.391019796640362e-07, "loss": 0.2465, "step": 18376 }, { "epoch": 0.8523654916512059, "grad_norm": 7.3941545486450195, "learning_rate": 5.387696167436856e-07, "loss": 0.2753, "step": 18377 }, { "epoch": 0.8524118738404453, "grad_norm": 6.232705116271973, "learning_rate": 5.38437350473539e-07, "loss": 0.3192, "step": 18378 }, { "epoch": 0.8524582560296846, "grad_norm": 4.995868682861328, "learning_rate": 5.381051808607935e-07, "loss": 0.2265, "step": 18379 }, { "epoch": 0.852504638218924, "grad_norm": 11.111600875854492, "learning_rate": 5.377731079126458e-07, "loss": 0.3262, "step": 18380 }, { "epoch": 0.8525510204081632, "grad_norm": 5.642270565032959, "learning_rate": 5.37441131636291e-07, "loss": 0.2678, "step": 18381 }, { "epoch": 0.8525974025974026, "grad_norm": 7.186679840087891, "learning_rate": 5.371092520389198e-07, "loss": 0.2595, "step": 18382 }, { "epoch": 0.8526437847866419, "grad_norm": 14.980907440185547, "learning_rate": 5.367774691277222e-07, "loss": 0.3739, "step": 18383 }, { "epoch": 0.8526901669758813, "grad_norm": 6.475696086883545, "learning_rate": 5.364457829098868e-07, "loss": 0.2812, "step": 18384 }, { "epoch": 0.8527365491651205, "grad_norm": 8.780465126037598, "learning_rate": 5.361141933925995e-07, "loss": 0.3095, "step": 18385 }, { "epoch": 0.8527829313543599, "grad_norm": 8.812867164611816, "learning_rate": 5.357827005830435e-07, "loss": 0.3644, "step": 18386 }, { "epoch": 0.8528293135435993, "grad_norm": 7.22334623336792, "learning_rate": 5.354513044884019e-07, "loss": 0.2528, "step": 18387 }, { "epoch": 0.8528756957328386, "grad_norm": 5.46722412109375, "learning_rate": 5.351200051158517e-07, "loss": 0.3529, "step": 18388 }, { "epoch": 0.852922077922078, "grad_norm": 4.6860833168029785, "learning_rate": 5.347888024725717e-07, "loss": 0.2323, "step": 18389 }, { "epoch": 0.8529684601113172, "grad_norm": 10.456435203552246, "learning_rate": 5.344576965657372e-07, "loss": 0.3763, "step": 18390 }, { "epoch": 0.8530148423005566, "grad_norm": 11.594298362731934, "learning_rate": 5.341266874025208e-07, "loss": 0.2302, "step": 18391 }, { "epoch": 0.8530612244897959, "grad_norm": 6.82643461227417, "learning_rate": 5.337957749900958e-07, "loss": 0.2929, "step": 18392 }, { "epoch": 0.8531076066790353, "grad_norm": 9.372970581054688, "learning_rate": 5.334649593356289e-07, "loss": 0.3252, "step": 18393 }, { "epoch": 0.8531539888682745, "grad_norm": 17.390361785888672, "learning_rate": 5.331342404462875e-07, "loss": 0.4736, "step": 18394 }, { "epoch": 0.8532003710575139, "grad_norm": 6.172532558441162, "learning_rate": 5.328036183292367e-07, "loss": 0.2559, "step": 18395 }, { "epoch": 0.8532467532467533, "grad_norm": 7.694795608520508, "learning_rate": 5.324730929916394e-07, "loss": 0.3003, "step": 18396 }, { "epoch": 0.8532931354359926, "grad_norm": 7.046271324157715, "learning_rate": 5.321426644406558e-07, "loss": 0.3187, "step": 18397 }, { "epoch": 0.8533395176252319, "grad_norm": 5.073148250579834, "learning_rate": 5.318123326834462e-07, "loss": 0.3276, "step": 18398 }, { "epoch": 0.8533858998144712, "grad_norm": 6.862986087799072, "learning_rate": 5.314820977271645e-07, "loss": 0.3154, "step": 18399 }, { "epoch": 0.8534322820037106, "grad_norm": 8.141316413879395, "learning_rate": 5.311519595789666e-07, "loss": 0.3323, "step": 18400 }, { "epoch": 0.8534786641929499, "grad_norm": 4.658438205718994, "learning_rate": 5.308219182460045e-07, "loss": 0.233, "step": 18401 }, { "epoch": 0.8535250463821893, "grad_norm": 8.373650550842285, "learning_rate": 5.30491973735428e-07, "loss": 0.3771, "step": 18402 }, { "epoch": 0.8535714285714285, "grad_norm": 5.731122970581055, "learning_rate": 5.301621260543866e-07, "loss": 0.2903, "step": 18403 }, { "epoch": 0.8536178107606679, "grad_norm": 8.833330154418945, "learning_rate": 5.298323752100237e-07, "loss": 0.3867, "step": 18404 }, { "epoch": 0.8536641929499073, "grad_norm": 6.9564924240112305, "learning_rate": 5.29502721209485e-07, "loss": 0.2576, "step": 18405 }, { "epoch": 0.8537105751391466, "grad_norm": 6.776607513427734, "learning_rate": 5.291731640599118e-07, "loss": 0.3458, "step": 18406 }, { "epoch": 0.8537569573283859, "grad_norm": 6.512687683105469, "learning_rate": 5.288437037684441e-07, "loss": 0.3109, "step": 18407 }, { "epoch": 0.8538033395176252, "grad_norm": 7.656123638153076, "learning_rate": 5.285143403422188e-07, "loss": 0.2725, "step": 18408 }, { "epoch": 0.8538497217068646, "grad_norm": 10.734993934631348, "learning_rate": 5.281850737883731e-07, "loss": 0.318, "step": 18409 }, { "epoch": 0.8538961038961039, "grad_norm": 6.5517096519470215, "learning_rate": 5.278559041140386e-07, "loss": 0.3519, "step": 18410 }, { "epoch": 0.8539424860853432, "grad_norm": 5.61361837387085, "learning_rate": 5.275268313263465e-07, "loss": 0.3032, "step": 18411 }, { "epoch": 0.8539888682745825, "grad_norm": 8.389595985412598, "learning_rate": 5.27197855432427e-07, "loss": 0.3164, "step": 18412 }, { "epoch": 0.8540352504638219, "grad_norm": 9.839576721191406, "learning_rate": 5.268689764394064e-07, "loss": 0.2296, "step": 18413 }, { "epoch": 0.8540816326530613, "grad_norm": 6.742861270904541, "learning_rate": 5.265401943544119e-07, "loss": 0.2211, "step": 18414 }, { "epoch": 0.8541280148423006, "grad_norm": 6.755588531494141, "learning_rate": 5.262115091845626e-07, "loss": 0.3676, "step": 18415 }, { "epoch": 0.8541743970315399, "grad_norm": 12.384804725646973, "learning_rate": 5.25882920936982e-07, "loss": 0.4792, "step": 18416 }, { "epoch": 0.8542207792207792, "grad_norm": 11.3392333984375, "learning_rate": 5.255544296187876e-07, "loss": 0.4086, "step": 18417 }, { "epoch": 0.8542671614100186, "grad_norm": 14.488166809082031, "learning_rate": 5.252260352370963e-07, "loss": 0.3616, "step": 18418 }, { "epoch": 0.8543135435992579, "grad_norm": 7.159363269805908, "learning_rate": 5.248977377990244e-07, "loss": 0.3237, "step": 18419 }, { "epoch": 0.8543599257884972, "grad_norm": 9.486291885375977, "learning_rate": 5.245695373116816e-07, "loss": 0.3562, "step": 18420 }, { "epoch": 0.8544063079777365, "grad_norm": 5.988668441772461, "learning_rate": 5.242414337821789e-07, "loss": 0.2795, "step": 18421 }, { "epoch": 0.8544526901669759, "grad_norm": 8.216104507446289, "learning_rate": 5.23913427217625e-07, "loss": 0.3384, "step": 18422 }, { "epoch": 0.8544990723562153, "grad_norm": 4.771698951721191, "learning_rate": 5.235855176251253e-07, "loss": 0.2705, "step": 18423 }, { "epoch": 0.8545454545454545, "grad_norm": 7.7149529457092285, "learning_rate": 5.23257705011786e-07, "loss": 0.363, "step": 18424 }, { "epoch": 0.8545918367346939, "grad_norm": 7.6286725997924805, "learning_rate": 5.229299893847056e-07, "loss": 0.3233, "step": 18425 }, { "epoch": 0.8546382189239332, "grad_norm": 18.63892364501953, "learning_rate": 5.226023707509859e-07, "loss": 0.275, "step": 18426 }, { "epoch": 0.8546846011131726, "grad_norm": 6.11285924911499, "learning_rate": 5.222748491177243e-07, "loss": 0.3312, "step": 18427 }, { "epoch": 0.8547309833024119, "grad_norm": 10.378293991088867, "learning_rate": 5.219474244920164e-07, "loss": 0.2946, "step": 18428 }, { "epoch": 0.8547773654916512, "grad_norm": 16.403554916381836, "learning_rate": 5.216200968809565e-07, "loss": 0.4903, "step": 18429 }, { "epoch": 0.8548237476808905, "grad_norm": 12.275113105773926, "learning_rate": 5.212928662916344e-07, "loss": 0.4771, "step": 18430 }, { "epoch": 0.8548701298701299, "grad_norm": 11.726634979248047, "learning_rate": 5.209657327311396e-07, "loss": 0.4004, "step": 18431 }, { "epoch": 0.8549165120593692, "grad_norm": 6.006113529205322, "learning_rate": 5.206386962065601e-07, "loss": 0.3604, "step": 18432 }, { "epoch": 0.8549628942486085, "grad_norm": 8.53548812866211, "learning_rate": 5.203117567249805e-07, "loss": 0.3199, "step": 18433 }, { "epoch": 0.8550092764378479, "grad_norm": 8.229408264160156, "learning_rate": 5.199849142934849e-07, "loss": 0.3205, "step": 18434 }, { "epoch": 0.8550556586270872, "grad_norm": 4.778797626495361, "learning_rate": 5.196581689191521e-07, "loss": 0.2746, "step": 18435 }, { "epoch": 0.8551020408163266, "grad_norm": 11.143465042114258, "learning_rate": 5.193315206090622e-07, "loss": 0.416, "step": 18436 }, { "epoch": 0.8551484230055658, "grad_norm": 5.494741439819336, "learning_rate": 5.190049693702914e-07, "loss": 0.3309, "step": 18437 }, { "epoch": 0.8551948051948052, "grad_norm": 4.356931686401367, "learning_rate": 5.186785152099145e-07, "loss": 0.1882, "step": 18438 }, { "epoch": 0.8552411873840445, "grad_norm": 7.557770252227783, "learning_rate": 5.183521581350048e-07, "loss": 0.2651, "step": 18439 }, { "epoch": 0.8552875695732839, "grad_norm": 5.340341091156006, "learning_rate": 5.18025898152631e-07, "loss": 0.2796, "step": 18440 }, { "epoch": 0.8553339517625231, "grad_norm": 11.515486717224121, "learning_rate": 5.176997352698621e-07, "loss": 0.4129, "step": 18441 }, { "epoch": 0.8553803339517625, "grad_norm": 10.226642608642578, "learning_rate": 5.173736694937642e-07, "loss": 0.306, "step": 18442 }, { "epoch": 0.8554267161410019, "grad_norm": 5.487394332885742, "learning_rate": 5.170477008314012e-07, "loss": 0.3459, "step": 18443 }, { "epoch": 0.8554730983302412, "grad_norm": 12.069567680358887, "learning_rate": 5.167218292898368e-07, "loss": 0.3734, "step": 18444 }, { "epoch": 0.8555194805194806, "grad_norm": 16.417858123779297, "learning_rate": 5.163960548761276e-07, "loss": 0.5337, "step": 18445 }, { "epoch": 0.8555658627087198, "grad_norm": 10.159534454345703, "learning_rate": 5.160703775973336e-07, "loss": 0.3633, "step": 18446 }, { "epoch": 0.8556122448979592, "grad_norm": 11.639349937438965, "learning_rate": 5.157447974605095e-07, "loss": 0.297, "step": 18447 }, { "epoch": 0.8556586270871985, "grad_norm": 4.760294437408447, "learning_rate": 5.154193144727093e-07, "loss": 0.336, "step": 18448 }, { "epoch": 0.8557050092764379, "grad_norm": 5.414217948913574, "learning_rate": 5.150939286409845e-07, "loss": 0.368, "step": 18449 }, { "epoch": 0.8557513914656771, "grad_norm": 5.979361534118652, "learning_rate": 5.147686399723845e-07, "loss": 0.3215, "step": 18450 }, { "epoch": 0.8557977736549165, "grad_norm": 7.274436950683594, "learning_rate": 5.144434484739558e-07, "loss": 0.285, "step": 18451 }, { "epoch": 0.8558441558441559, "grad_norm": 8.317854881286621, "learning_rate": 5.141183541527439e-07, "loss": 0.3248, "step": 18452 }, { "epoch": 0.8558905380333952, "grad_norm": 4.880577564239502, "learning_rate": 5.137933570157916e-07, "loss": 0.2396, "step": 18453 }, { "epoch": 0.8559369202226345, "grad_norm": 6.06778621673584, "learning_rate": 5.134684570701398e-07, "loss": 0.3186, "step": 18454 }, { "epoch": 0.8559833024118738, "grad_norm": 5.175201892852783, "learning_rate": 5.131436543228291e-07, "loss": 0.3198, "step": 18455 }, { "epoch": 0.8560296846011132, "grad_norm": 11.401025772094727, "learning_rate": 5.128189487808927e-07, "loss": 0.3653, "step": 18456 }, { "epoch": 0.8560760667903525, "grad_norm": 6.840034008026123, "learning_rate": 5.124943404513677e-07, "loss": 0.3634, "step": 18457 }, { "epoch": 0.8561224489795919, "grad_norm": 7.193138599395752, "learning_rate": 5.121698293412857e-07, "loss": 0.307, "step": 18458 }, { "epoch": 0.8561688311688311, "grad_norm": 6.480956554412842, "learning_rate": 5.118454154576774e-07, "loss": 0.3074, "step": 18459 }, { "epoch": 0.8562152133580705, "grad_norm": 15.864925384521484, "learning_rate": 5.11521098807572e-07, "loss": 0.3549, "step": 18460 }, { "epoch": 0.8562615955473099, "grad_norm": 8.839156150817871, "learning_rate": 5.111968793979932e-07, "loss": 0.2922, "step": 18461 }, { "epoch": 0.8563079777365492, "grad_norm": 7.4501423835754395, "learning_rate": 5.108727572359661e-07, "loss": 0.3066, "step": 18462 }, { "epoch": 0.8563543599257885, "grad_norm": 6.138212203979492, "learning_rate": 5.105487323285136e-07, "loss": 0.2853, "step": 18463 }, { "epoch": 0.8564007421150278, "grad_norm": 5.990209579467773, "learning_rate": 5.102248046826546e-07, "loss": 0.2805, "step": 18464 }, { "epoch": 0.8564471243042672, "grad_norm": 8.627599716186523, "learning_rate": 5.099009743054084e-07, "loss": 0.3828, "step": 18465 }, { "epoch": 0.8564935064935065, "grad_norm": 4.784186840057373, "learning_rate": 5.095772412037881e-07, "loss": 0.3921, "step": 18466 }, { "epoch": 0.8565398886827458, "grad_norm": 5.261855125427246, "learning_rate": 5.092536053848085e-07, "loss": 0.2888, "step": 18467 }, { "epoch": 0.8565862708719851, "grad_norm": 25.75579261779785, "learning_rate": 5.089300668554808e-07, "loss": 0.3162, "step": 18468 }, { "epoch": 0.8566326530612245, "grad_norm": 7.368252754211426, "learning_rate": 5.086066256228145e-07, "loss": 0.2676, "step": 18469 }, { "epoch": 0.8566790352504638, "grad_norm": 7.931190013885498, "learning_rate": 5.082832816938177e-07, "loss": 0.31, "step": 18470 }, { "epoch": 0.8567254174397032, "grad_norm": 18.42302131652832, "learning_rate": 5.079600350754932e-07, "loss": 0.4995, "step": 18471 }, { "epoch": 0.8567717996289425, "grad_norm": 4.377967357635498, "learning_rate": 5.076368857748454e-07, "loss": 0.3686, "step": 18472 }, { "epoch": 0.8568181818181818, "grad_norm": 8.059800148010254, "learning_rate": 5.073138337988753e-07, "loss": 0.3357, "step": 18473 }, { "epoch": 0.8568645640074212, "grad_norm": 11.946898460388184, "learning_rate": 5.069908791545808e-07, "loss": 0.3723, "step": 18474 }, { "epoch": 0.8569109461966605, "grad_norm": 7.5225348472595215, "learning_rate": 5.066680218489606e-07, "loss": 0.2162, "step": 18475 }, { "epoch": 0.8569573283858998, "grad_norm": 8.770479202270508, "learning_rate": 5.063452618890064e-07, "loss": 0.3031, "step": 18476 }, { "epoch": 0.8570037105751391, "grad_norm": 7.13585901260376, "learning_rate": 5.06022599281712e-07, "loss": 0.2951, "step": 18477 }, { "epoch": 0.8570500927643785, "grad_norm": 6.681431770324707, "learning_rate": 5.057000340340679e-07, "loss": 0.3801, "step": 18478 }, { "epoch": 0.8570964749536178, "grad_norm": 5.742258548736572, "learning_rate": 5.053775661530619e-07, "loss": 0.264, "step": 18479 }, { "epoch": 0.8571428571428571, "grad_norm": 7.052499771118164, "learning_rate": 5.050551956456812e-07, "loss": 0.3345, "step": 18480 }, { "epoch": 0.8571892393320965, "grad_norm": 7.032045841217041, "learning_rate": 5.04732922518908e-07, "loss": 0.4453, "step": 18481 }, { "epoch": 0.8572356215213358, "grad_norm": 11.510381698608398, "learning_rate": 5.044107467797249e-07, "loss": 0.4625, "step": 18482 }, { "epoch": 0.8572820037105752, "grad_norm": 7.028099536895752, "learning_rate": 5.040886684351116e-07, "loss": 0.3342, "step": 18483 }, { "epoch": 0.8573283858998144, "grad_norm": 5.127054214477539, "learning_rate": 5.03766687492046e-07, "loss": 0.2638, "step": 18484 }, { "epoch": 0.8573747680890538, "grad_norm": 6.081696510314941, "learning_rate": 5.034448039575051e-07, "loss": 0.2876, "step": 18485 }, { "epoch": 0.8574211502782931, "grad_norm": 4.61905574798584, "learning_rate": 5.031230178384594e-07, "loss": 0.251, "step": 18486 }, { "epoch": 0.8574675324675325, "grad_norm": 5.411457061767578, "learning_rate": 5.028013291418815e-07, "loss": 0.3236, "step": 18487 }, { "epoch": 0.8575139146567718, "grad_norm": 4.593607425689697, "learning_rate": 5.024797378747414e-07, "loss": 0.2916, "step": 18488 }, { "epoch": 0.8575602968460111, "grad_norm": 11.478858947753906, "learning_rate": 5.021582440440048e-07, "loss": 0.3575, "step": 18489 }, { "epoch": 0.8576066790352505, "grad_norm": 4.9203691482543945, "learning_rate": 5.018368476566382e-07, "loss": 0.3261, "step": 18490 }, { "epoch": 0.8576530612244898, "grad_norm": 7.703601360321045, "learning_rate": 5.015155487196044e-07, "loss": 0.3041, "step": 18491 }, { "epoch": 0.8576994434137292, "grad_norm": 7.311089038848877, "learning_rate": 5.011943472398628e-07, "loss": 0.3233, "step": 18492 }, { "epoch": 0.8577458256029684, "grad_norm": 9.89511489868164, "learning_rate": 5.008732432243723e-07, "loss": 0.378, "step": 18493 }, { "epoch": 0.8577922077922078, "grad_norm": 7.9587321281433105, "learning_rate": 5.005522366800902e-07, "loss": 0.3026, "step": 18494 }, { "epoch": 0.8578385899814471, "grad_norm": 7.138218879699707, "learning_rate": 5.002313276139709e-07, "loss": 0.3825, "step": 18495 }, { "epoch": 0.8578849721706865, "grad_norm": 10.255215644836426, "learning_rate": 4.99910516032967e-07, "loss": 0.3099, "step": 18496 }, { "epoch": 0.8579313543599257, "grad_norm": 7.9600090980529785, "learning_rate": 4.995898019440277e-07, "loss": 0.2934, "step": 18497 }, { "epoch": 0.8579777365491651, "grad_norm": 6.059796333312988, "learning_rate": 4.992691853541015e-07, "loss": 0.3347, "step": 18498 }, { "epoch": 0.8580241187384045, "grad_norm": 6.085305213928223, "learning_rate": 4.989486662701348e-07, "loss": 0.3135, "step": 18499 }, { "epoch": 0.8580705009276438, "grad_norm": 12.225665092468262, "learning_rate": 4.986282446990708e-07, "loss": 0.3845, "step": 18500 }, { "epoch": 0.8581168831168832, "grad_norm": 8.94582462310791, "learning_rate": 4.983079206478513e-07, "loss": 0.3289, "step": 18501 }, { "epoch": 0.8581632653061224, "grad_norm": 5.647680282592773, "learning_rate": 4.97987694123418e-07, "loss": 0.3037, "step": 18502 }, { "epoch": 0.8582096474953618, "grad_norm": 4.098704814910889, "learning_rate": 4.976675651327056e-07, "loss": 0.2168, "step": 18503 }, { "epoch": 0.8582560296846011, "grad_norm": 11.671497344970703, "learning_rate": 4.973475336826506e-07, "loss": 0.3024, "step": 18504 }, { "epoch": 0.8583024118738405, "grad_norm": 8.313859939575195, "learning_rate": 4.97027599780186e-07, "loss": 0.3797, "step": 18505 }, { "epoch": 0.8583487940630797, "grad_norm": 8.424993515014648, "learning_rate": 4.967077634322437e-07, "loss": 0.3421, "step": 18506 }, { "epoch": 0.8583951762523191, "grad_norm": 16.482866287231445, "learning_rate": 4.963880246457537e-07, "loss": 0.4678, "step": 18507 }, { "epoch": 0.8584415584415584, "grad_norm": 5.168917179107666, "learning_rate": 4.960683834276403e-07, "loss": 0.3318, "step": 18508 }, { "epoch": 0.8584879406307978, "grad_norm": 9.150764465332031, "learning_rate": 4.957488397848303e-07, "loss": 0.3141, "step": 18509 }, { "epoch": 0.858534322820037, "grad_norm": 5.452677249908447, "learning_rate": 4.954293937242455e-07, "loss": 0.3037, "step": 18510 }, { "epoch": 0.8585807050092764, "grad_norm": 13.927545547485352, "learning_rate": 4.951100452528068e-07, "loss": 0.4222, "step": 18511 }, { "epoch": 0.8586270871985158, "grad_norm": 7.659706115722656, "learning_rate": 4.947907943774333e-07, "loss": 0.324, "step": 18512 }, { "epoch": 0.8586734693877551, "grad_norm": 4.661480903625488, "learning_rate": 4.944716411050421e-07, "loss": 0.2243, "step": 18513 }, { "epoch": 0.8587198515769945, "grad_norm": 4.453512668609619, "learning_rate": 4.94152585442545e-07, "loss": 0.2257, "step": 18514 }, { "epoch": 0.8587662337662337, "grad_norm": 5.026004791259766, "learning_rate": 4.938336273968558e-07, "loss": 0.2094, "step": 18515 }, { "epoch": 0.8588126159554731, "grad_norm": 3.899840831756592, "learning_rate": 4.935147669748841e-07, "loss": 0.2491, "step": 18516 }, { "epoch": 0.8588589981447124, "grad_norm": 4.976515293121338, "learning_rate": 4.931960041835382e-07, "loss": 0.2808, "step": 18517 }, { "epoch": 0.8589053803339518, "grad_norm": 5.014834403991699, "learning_rate": 4.928773390297248e-07, "loss": 0.3133, "step": 18518 }, { "epoch": 0.858951762523191, "grad_norm": 12.863348007202148, "learning_rate": 4.925587715203456e-07, "loss": 0.4365, "step": 18519 }, { "epoch": 0.8589981447124304, "grad_norm": 6.454136848449707, "learning_rate": 4.922403016623034e-07, "loss": 0.2718, "step": 18520 }, { "epoch": 0.8590445269016698, "grad_norm": 5.843242168426514, "learning_rate": 4.919219294624972e-07, "loss": 0.237, "step": 18521 }, { "epoch": 0.8590909090909091, "grad_norm": 8.45797348022461, "learning_rate": 4.916036549278246e-07, "loss": 0.2536, "step": 18522 }, { "epoch": 0.8591372912801484, "grad_norm": 5.956584453582764, "learning_rate": 4.912854780651815e-07, "loss": 0.2744, "step": 18523 }, { "epoch": 0.8591836734693877, "grad_norm": 5.868076324462891, "learning_rate": 4.9096739888146e-07, "loss": 0.2985, "step": 18524 }, { "epoch": 0.8592300556586271, "grad_norm": 6.614522457122803, "learning_rate": 4.906494173835514e-07, "loss": 0.2636, "step": 18525 }, { "epoch": 0.8592764378478664, "grad_norm": 10.870216369628906, "learning_rate": 4.903315335783448e-07, "loss": 0.4661, "step": 18526 }, { "epoch": 0.8593228200371058, "grad_norm": 8.164592742919922, "learning_rate": 4.900137474727268e-07, "loss": 0.2907, "step": 18527 }, { "epoch": 0.859369202226345, "grad_norm": 6.678684234619141, "learning_rate": 4.896960590735822e-07, "loss": 0.2616, "step": 18528 }, { "epoch": 0.8594155844155844, "grad_norm": 14.478946685791016, "learning_rate": 4.893784683877945e-07, "loss": 0.5607, "step": 18529 }, { "epoch": 0.8594619666048238, "grad_norm": 12.148015022277832, "learning_rate": 4.890609754222425e-07, "loss": 0.3943, "step": 18530 }, { "epoch": 0.8595083487940631, "grad_norm": 4.497629165649414, "learning_rate": 4.887435801838048e-07, "loss": 0.3347, "step": 18531 }, { "epoch": 0.8595547309833024, "grad_norm": 6.9813361167907715, "learning_rate": 4.884262826793584e-07, "loss": 0.3307, "step": 18532 }, { "epoch": 0.8596011131725417, "grad_norm": 5.527571678161621, "learning_rate": 4.881090829157764e-07, "loss": 0.2877, "step": 18533 }, { "epoch": 0.8596474953617811, "grad_norm": 5.836971759796143, "learning_rate": 4.877919808999326e-07, "loss": 0.2931, "step": 18534 }, { "epoch": 0.8596938775510204, "grad_norm": 4.802074909210205, "learning_rate": 4.874749766386949e-07, "loss": 0.2771, "step": 18535 }, { "epoch": 0.8597402597402597, "grad_norm": 3.642808675765991, "learning_rate": 4.871580701389316e-07, "loss": 0.2029, "step": 18536 }, { "epoch": 0.859786641929499, "grad_norm": 6.283979415893555, "learning_rate": 4.86841261407508e-07, "loss": 0.2409, "step": 18537 }, { "epoch": 0.8598330241187384, "grad_norm": 6.591818809509277, "learning_rate": 4.865245504512883e-07, "loss": 0.3322, "step": 18538 }, { "epoch": 0.8598794063079778, "grad_norm": 9.370684623718262, "learning_rate": 4.862079372771339e-07, "loss": 0.3385, "step": 18539 }, { "epoch": 0.859925788497217, "grad_norm": 6.58712911605835, "learning_rate": 4.858914218919047e-07, "loss": 0.2967, "step": 18540 }, { "epoch": 0.8599721706864564, "grad_norm": 9.640645980834961, "learning_rate": 4.855750043024554e-07, "loss": 0.2716, "step": 18541 }, { "epoch": 0.8600185528756957, "grad_norm": 11.007692337036133, "learning_rate": 4.852586845156431e-07, "loss": 0.3706, "step": 18542 }, { "epoch": 0.8600649350649351, "grad_norm": 7.75853967666626, "learning_rate": 4.849424625383198e-07, "loss": 0.2448, "step": 18543 }, { "epoch": 0.8601113172541744, "grad_norm": 6.70450496673584, "learning_rate": 4.846263383773364e-07, "loss": 0.2955, "step": 18544 }, { "epoch": 0.8601576994434137, "grad_norm": 6.983205318450928, "learning_rate": 4.843103120395432e-07, "loss": 0.3213, "step": 18545 }, { "epoch": 0.860204081632653, "grad_norm": 7.614800453186035, "learning_rate": 4.83994383531784e-07, "loss": 0.3515, "step": 18546 }, { "epoch": 0.8602504638218924, "grad_norm": 5.425804615020752, "learning_rate": 4.836785528609051e-07, "loss": 0.2676, "step": 18547 }, { "epoch": 0.8602968460111318, "grad_norm": 11.074024200439453, "learning_rate": 4.833628200337476e-07, "loss": 0.4031, "step": 18548 }, { "epoch": 0.860343228200371, "grad_norm": 7.232691764831543, "learning_rate": 4.830471850571527e-07, "loss": 0.345, "step": 18549 }, { "epoch": 0.8603896103896104, "grad_norm": 9.393232345581055, "learning_rate": 4.827316479379579e-07, "loss": 0.3918, "step": 18550 }, { "epoch": 0.8604359925788497, "grad_norm": 16.84649658203125, "learning_rate": 4.824162086830003e-07, "loss": 0.2653, "step": 18551 }, { "epoch": 0.8604823747680891, "grad_norm": 3.880385398864746, "learning_rate": 4.821008672991118e-07, "loss": 0.2604, "step": 18552 }, { "epoch": 0.8605287569573283, "grad_norm": 12.417078018188477, "learning_rate": 4.81785623793125e-07, "loss": 0.3583, "step": 18553 }, { "epoch": 0.8605751391465677, "grad_norm": 8.5392484664917, "learning_rate": 4.814704781718699e-07, "loss": 0.4887, "step": 18554 }, { "epoch": 0.860621521335807, "grad_norm": 7.093961715698242, "learning_rate": 4.811554304421734e-07, "loss": 0.4534, "step": 18555 }, { "epoch": 0.8606679035250464, "grad_norm": 6.758187294006348, "learning_rate": 4.808404806108618e-07, "loss": 0.3838, "step": 18556 }, { "epoch": 0.8607142857142858, "grad_norm": 6.4093852043151855, "learning_rate": 4.805256286847571e-07, "loss": 0.2935, "step": 18557 }, { "epoch": 0.860760667903525, "grad_norm": 12.81307315826416, "learning_rate": 4.802108746706802e-07, "loss": 0.42, "step": 18558 }, { "epoch": 0.8608070500927644, "grad_norm": 8.751371383666992, "learning_rate": 4.798962185754513e-07, "loss": 0.3355, "step": 18559 }, { "epoch": 0.8608534322820037, "grad_norm": 10.383955001831055, "learning_rate": 4.795816604058868e-07, "loss": 0.4638, "step": 18560 }, { "epoch": 0.8608998144712431, "grad_norm": 5.089719295501709, "learning_rate": 4.792672001688009e-07, "loss": 0.368, "step": 18561 }, { "epoch": 0.8609461966604823, "grad_norm": 6.884613990783691, "learning_rate": 4.789528378710079e-07, "loss": 0.384, "step": 18562 }, { "epoch": 0.8609925788497217, "grad_norm": 7.539985656738281, "learning_rate": 4.786385735193161e-07, "loss": 0.3861, "step": 18563 }, { "epoch": 0.861038961038961, "grad_norm": 6.144382476806641, "learning_rate": 4.783244071205345e-07, "loss": 0.1841, "step": 18564 }, { "epoch": 0.8610853432282004, "grad_norm": 8.581329345703125, "learning_rate": 4.780103386814699e-07, "loss": 0.35, "step": 18565 }, { "epoch": 0.8611317254174397, "grad_norm": 11.71057415008545, "learning_rate": 4.776963682089264e-07, "loss": 0.3612, "step": 18566 }, { "epoch": 0.861178107606679, "grad_norm": 11.188560485839844, "learning_rate": 4.773824957097068e-07, "loss": 0.3396, "step": 18567 }, { "epoch": 0.8612244897959184, "grad_norm": 6.709659099578857, "learning_rate": 4.770687211906089e-07, "loss": 0.325, "step": 18568 }, { "epoch": 0.8612708719851577, "grad_norm": 5.492083549499512, "learning_rate": 4.767550446584318e-07, "loss": 0.3139, "step": 18569 }, { "epoch": 0.8613172541743971, "grad_norm": 8.338544845581055, "learning_rate": 4.7644146611997064e-07, "loss": 0.3584, "step": 18570 }, { "epoch": 0.8613636363636363, "grad_norm": 8.507783889770508, "learning_rate": 4.761279855820189e-07, "loss": 0.3089, "step": 18571 }, { "epoch": 0.8614100185528757, "grad_norm": 8.075889587402344, "learning_rate": 4.7581460305136883e-07, "loss": 0.261, "step": 18572 }, { "epoch": 0.861456400742115, "grad_norm": 7.952487945556641, "learning_rate": 4.755013185348095e-07, "loss": 0.367, "step": 18573 }, { "epoch": 0.8615027829313544, "grad_norm": 6.698074817657471, "learning_rate": 4.7518813203912716e-07, "loss": 0.2964, "step": 18574 }, { "epoch": 0.8615491651205937, "grad_norm": 7.395402431488037, "learning_rate": 4.7487504357110746e-07, "loss": 0.2977, "step": 18575 }, { "epoch": 0.861595547309833, "grad_norm": 5.555912494659424, "learning_rate": 4.745620531375328e-07, "loss": 0.3191, "step": 18576 }, { "epoch": 0.8616419294990724, "grad_norm": 7.173313617706299, "learning_rate": 4.7424916074518435e-07, "loss": 0.3211, "step": 18577 }, { "epoch": 0.8616883116883117, "grad_norm": 5.737229347229004, "learning_rate": 4.7393636640084227e-07, "loss": 0.2536, "step": 18578 }, { "epoch": 0.861734693877551, "grad_norm": 9.483086585998535, "learning_rate": 4.7362367011128007e-07, "loss": 0.3887, "step": 18579 }, { "epoch": 0.8617810760667903, "grad_norm": 18.434993743896484, "learning_rate": 4.733110718832734e-07, "loss": 0.3698, "step": 18580 }, { "epoch": 0.8618274582560297, "grad_norm": 5.488314628601074, "learning_rate": 4.729985717235952e-07, "loss": 0.2722, "step": 18581 }, { "epoch": 0.861873840445269, "grad_norm": 13.719064712524414, "learning_rate": 4.726861696390156e-07, "loss": 0.3184, "step": 18582 }, { "epoch": 0.8619202226345084, "grad_norm": 8.071697235107422, "learning_rate": 4.723738656363014e-07, "loss": 0.3236, "step": 18583 }, { "epoch": 0.8619666048237477, "grad_norm": 4.67977237701416, "learning_rate": 4.720616597222205e-07, "loss": 0.3683, "step": 18584 }, { "epoch": 0.862012987012987, "grad_norm": 8.037618637084961, "learning_rate": 4.717495519035348e-07, "loss": 0.3177, "step": 18585 }, { "epoch": 0.8620593692022264, "grad_norm": 9.588044166564941, "learning_rate": 4.7143754218700653e-07, "loss": 0.364, "step": 18586 }, { "epoch": 0.8621057513914657, "grad_norm": 4.550803184509277, "learning_rate": 4.711256305793954e-07, "loss": 0.2421, "step": 18587 }, { "epoch": 0.862152133580705, "grad_norm": 9.272378921508789, "learning_rate": 4.708138170874588e-07, "loss": 0.3251, "step": 18588 }, { "epoch": 0.8621985157699443, "grad_norm": 12.785515785217285, "learning_rate": 4.7050210171795284e-07, "loss": 0.2773, "step": 18589 }, { "epoch": 0.8622448979591837, "grad_norm": 8.145048141479492, "learning_rate": 4.701904844776289e-07, "loss": 0.454, "step": 18590 }, { "epoch": 0.862291280148423, "grad_norm": 4.977920055389404, "learning_rate": 4.6987896537323885e-07, "loss": 0.3287, "step": 18591 }, { "epoch": 0.8623376623376623, "grad_norm": 9.06568717956543, "learning_rate": 4.695675444115316e-07, "loss": 0.3832, "step": 18592 }, { "epoch": 0.8623840445269016, "grad_norm": 4.3405866622924805, "learning_rate": 4.692562215992541e-07, "loss": 0.2205, "step": 18593 }, { "epoch": 0.862430426716141, "grad_norm": 12.204343795776367, "learning_rate": 4.689449969431503e-07, "loss": 0.2581, "step": 18594 }, { "epoch": 0.8624768089053804, "grad_norm": 5.7368059158325195, "learning_rate": 4.686338704499649e-07, "loss": 0.2072, "step": 18595 }, { "epoch": 0.8625231910946196, "grad_norm": 11.337273597717285, "learning_rate": 4.683228421264352e-07, "loss": 0.4312, "step": 18596 }, { "epoch": 0.862569573283859, "grad_norm": 4.360483646392822, "learning_rate": 4.680119119793014e-07, "loss": 0.3259, "step": 18597 }, { "epoch": 0.8626159554730983, "grad_norm": 16.916711807250977, "learning_rate": 4.6770108001529867e-07, "loss": 0.3583, "step": 18598 }, { "epoch": 0.8626623376623377, "grad_norm": 4.607222557067871, "learning_rate": 4.6739034624116165e-07, "loss": 0.2659, "step": 18599 }, { "epoch": 0.862708719851577, "grad_norm": 9.430948257446289, "learning_rate": 4.6707971066362324e-07, "loss": 0.3335, "step": 18600 }, { "epoch": 0.8627551020408163, "grad_norm": 6.622167587280273, "learning_rate": 4.6676917328941084e-07, "loss": 0.3331, "step": 18601 }, { "epoch": 0.8628014842300556, "grad_norm": 4.617147445678711, "learning_rate": 4.66458734125253e-07, "loss": 0.2694, "step": 18602 }, { "epoch": 0.862847866419295, "grad_norm": 5.437295436859131, "learning_rate": 4.66148393177876e-07, "loss": 0.3405, "step": 18603 }, { "epoch": 0.8628942486085344, "grad_norm": 6.113260746002197, "learning_rate": 4.6583815045400273e-07, "loss": 0.2135, "step": 18604 }, { "epoch": 0.8629406307977736, "grad_norm": 6.724825382232666, "learning_rate": 4.655280059603551e-07, "loss": 0.3218, "step": 18605 }, { "epoch": 0.862987012987013, "grad_norm": 7.051018714904785, "learning_rate": 4.652179597036505e-07, "loss": 0.3879, "step": 18606 }, { "epoch": 0.8630333951762523, "grad_norm": 6.104307174682617, "learning_rate": 4.649080116906074e-07, "loss": 0.3041, "step": 18607 }, { "epoch": 0.8630797773654917, "grad_norm": 7.219406604766846, "learning_rate": 4.645981619279394e-07, "loss": 0.3056, "step": 18608 }, { "epoch": 0.8631261595547309, "grad_norm": 8.698878288269043, "learning_rate": 4.6428841042236105e-07, "loss": 0.3465, "step": 18609 }, { "epoch": 0.8631725417439703, "grad_norm": 5.767240524291992, "learning_rate": 4.639787571805815e-07, "loss": 0.3222, "step": 18610 }, { "epoch": 0.8632189239332096, "grad_norm": 12.177623748779297, "learning_rate": 4.6366920220931035e-07, "loss": 0.4085, "step": 18611 }, { "epoch": 0.863265306122449, "grad_norm": 4.497117519378662, "learning_rate": 4.6335974551525275e-07, "loss": 0.3135, "step": 18612 }, { "epoch": 0.8633116883116884, "grad_norm": 7.826476097106934, "learning_rate": 4.6305038710511285e-07, "loss": 0.3664, "step": 18613 }, { "epoch": 0.8633580705009276, "grad_norm": 4.873485565185547, "learning_rate": 4.627411269855941e-07, "loss": 0.312, "step": 18614 }, { "epoch": 0.863404452690167, "grad_norm": 5.222153186798096, "learning_rate": 4.6243196516339517e-07, "loss": 0.2212, "step": 18615 }, { "epoch": 0.8634508348794063, "grad_norm": 7.5953264236450195, "learning_rate": 4.6212290164521554e-07, "loss": 0.433, "step": 18616 }, { "epoch": 0.8634972170686457, "grad_norm": 11.876571655273438, "learning_rate": 4.6181393643774887e-07, "loss": 0.3867, "step": 18617 }, { "epoch": 0.8635435992578849, "grad_norm": 6.656670570373535, "learning_rate": 4.6150506954768914e-07, "loss": 0.3553, "step": 18618 }, { "epoch": 0.8635899814471243, "grad_norm": 14.442136764526367, "learning_rate": 4.6119630098172887e-07, "loss": 0.3296, "step": 18619 }, { "epoch": 0.8636363636363636, "grad_norm": 8.44974422454834, "learning_rate": 4.608876307465565e-07, "loss": 0.3997, "step": 18620 }, { "epoch": 0.863682745825603, "grad_norm": 11.113054275512695, "learning_rate": 4.6057905884885956e-07, "loss": 0.2724, "step": 18621 }, { "epoch": 0.8637291280148423, "grad_norm": 8.058323860168457, "learning_rate": 4.6027058529532374e-07, "loss": 0.3902, "step": 18622 }, { "epoch": 0.8637755102040816, "grad_norm": 4.054887294769287, "learning_rate": 4.5996221009263043e-07, "loss": 0.2806, "step": 18623 }, { "epoch": 0.863821892393321, "grad_norm": 9.511916160583496, "learning_rate": 4.5965393324746146e-07, "loss": 0.4259, "step": 18624 }, { "epoch": 0.8638682745825603, "grad_norm": 12.41322135925293, "learning_rate": 4.593457547664948e-07, "loss": 0.4132, "step": 18625 }, { "epoch": 0.8639146567717997, "grad_norm": 8.368363380432129, "learning_rate": 4.590376746564079e-07, "loss": 0.2837, "step": 18626 }, { "epoch": 0.8639610389610389, "grad_norm": 7.602604389190674, "learning_rate": 4.587296929238749e-07, "loss": 0.3129, "step": 18627 }, { "epoch": 0.8640074211502783, "grad_norm": 7.691042423248291, "learning_rate": 4.584218095755677e-07, "loss": 0.2873, "step": 18628 }, { "epoch": 0.8640538033395176, "grad_norm": 21.172061920166016, "learning_rate": 4.581140246181559e-07, "loss": 0.4876, "step": 18629 }, { "epoch": 0.864100185528757, "grad_norm": 6.221107006072998, "learning_rate": 4.5780633805830863e-07, "loss": 0.3393, "step": 18630 }, { "epoch": 0.8641465677179963, "grad_norm": 4.539915084838867, "learning_rate": 4.5749874990269114e-07, "loss": 0.264, "step": 18631 }, { "epoch": 0.8641929499072356, "grad_norm": 10.4244966506958, "learning_rate": 4.5719126015796757e-07, "loss": 0.3769, "step": 18632 }, { "epoch": 0.864239332096475, "grad_norm": 5.594300270080566, "learning_rate": 4.568838688307997e-07, "loss": 0.3496, "step": 18633 }, { "epoch": 0.8642857142857143, "grad_norm": 7.129960536956787, "learning_rate": 4.5657657592784624e-07, "loss": 0.3995, "step": 18634 }, { "epoch": 0.8643320964749536, "grad_norm": 6.394087314605713, "learning_rate": 4.562693814557645e-07, "loss": 0.3465, "step": 18635 }, { "epoch": 0.8643784786641929, "grad_norm": 10.45059871673584, "learning_rate": 4.559622854212098e-07, "loss": 0.305, "step": 18636 }, { "epoch": 0.8644248608534323, "grad_norm": 8.418306350708008, "learning_rate": 4.5565528783083566e-07, "loss": 0.4175, "step": 18637 }, { "epoch": 0.8644712430426716, "grad_norm": 8.5082368850708, "learning_rate": 4.5534838869129407e-07, "loss": 0.2308, "step": 18638 }, { "epoch": 0.8645176252319109, "grad_norm": 12.375811576843262, "learning_rate": 4.5504158800923125e-07, "loss": 0.3723, "step": 18639 }, { "epoch": 0.8645640074211502, "grad_norm": 11.862110137939453, "learning_rate": 4.547348857912953e-07, "loss": 0.4045, "step": 18640 }, { "epoch": 0.8646103896103896, "grad_norm": 6.307793140411377, "learning_rate": 4.5442828204413035e-07, "loss": 0.2444, "step": 18641 }, { "epoch": 0.864656771799629, "grad_norm": 7.924872398376465, "learning_rate": 4.5412177677437994e-07, "loss": 0.3743, "step": 18642 }, { "epoch": 0.8647031539888683, "grad_norm": 6.299421310424805, "learning_rate": 4.538153699886827e-07, "loss": 0.2619, "step": 18643 }, { "epoch": 0.8647495361781076, "grad_norm": 8.0916109085083, "learning_rate": 4.535090616936788e-07, "loss": 0.2598, "step": 18644 }, { "epoch": 0.8647959183673469, "grad_norm": 4.65936279296875, "learning_rate": 4.5320285189600244e-07, "loss": 0.3409, "step": 18645 }, { "epoch": 0.8648423005565863, "grad_norm": 6.131684303283691, "learning_rate": 4.528967406022877e-07, "loss": 0.3952, "step": 18646 }, { "epoch": 0.8648886827458256, "grad_norm": 4.463976860046387, "learning_rate": 4.5259072781916715e-07, "loss": 0.293, "step": 18647 }, { "epoch": 0.8649350649350649, "grad_norm": 7.626500606536865, "learning_rate": 4.522848135532698e-07, "loss": 0.3137, "step": 18648 }, { "epoch": 0.8649814471243042, "grad_norm": 5.004138469696045, "learning_rate": 4.5197899781122433e-07, "loss": 0.2246, "step": 18649 }, { "epoch": 0.8650278293135436, "grad_norm": 8.056326866149902, "learning_rate": 4.5167328059965375e-07, "loss": 0.2912, "step": 18650 }, { "epoch": 0.865074211502783, "grad_norm": 6.585197925567627, "learning_rate": 4.513676619251828e-07, "loss": 0.3063, "step": 18651 }, { "epoch": 0.8651205936920222, "grad_norm": 11.055561065673828, "learning_rate": 4.510621417944322e-07, "loss": 0.4134, "step": 18652 }, { "epoch": 0.8651669758812616, "grad_norm": 5.109767436981201, "learning_rate": 4.5075672021402115e-07, "loss": 0.3357, "step": 18653 }, { "epoch": 0.8652133580705009, "grad_norm": 9.136214256286621, "learning_rate": 4.5045139719056605e-07, "loss": 0.3111, "step": 18654 }, { "epoch": 0.8652597402597403, "grad_norm": 9.582209587097168, "learning_rate": 4.5014617273068273e-07, "loss": 0.2523, "step": 18655 }, { "epoch": 0.8653061224489796, "grad_norm": 5.612105369567871, "learning_rate": 4.4984104684098253e-07, "loss": 0.321, "step": 18656 }, { "epoch": 0.8653525046382189, "grad_norm": 8.060384750366211, "learning_rate": 4.495360195280751e-07, "loss": 0.4178, "step": 18657 }, { "epoch": 0.8653988868274582, "grad_norm": 7.650356769561768, "learning_rate": 4.492310907985703e-07, "loss": 0.3465, "step": 18658 }, { "epoch": 0.8654452690166976, "grad_norm": 12.160462379455566, "learning_rate": 4.489262606590733e-07, "loss": 0.3981, "step": 18659 }, { "epoch": 0.865491651205937, "grad_norm": 5.5366997718811035, "learning_rate": 4.486215291161894e-07, "loss": 0.3073, "step": 18660 }, { "epoch": 0.8655380333951762, "grad_norm": 6.2099456787109375, "learning_rate": 4.4831689617651886e-07, "loss": 0.31, "step": 18661 }, { "epoch": 0.8655844155844156, "grad_norm": 4.936432838439941, "learning_rate": 4.48012361846662e-07, "loss": 0.323, "step": 18662 }, { "epoch": 0.8656307977736549, "grad_norm": 11.516377449035645, "learning_rate": 4.4770792613321623e-07, "loss": 0.4384, "step": 18663 }, { "epoch": 0.8656771799628943, "grad_norm": 7.14979887008667, "learning_rate": 4.474035890427769e-07, "loss": 0.3427, "step": 18664 }, { "epoch": 0.8657235621521335, "grad_norm": 10.030009269714355, "learning_rate": 4.470993505819376e-07, "loss": 0.3128, "step": 18665 }, { "epoch": 0.8657699443413729, "grad_norm": 5.306272506713867, "learning_rate": 4.467952107572909e-07, "loss": 0.3027, "step": 18666 }, { "epoch": 0.8658163265306122, "grad_norm": 8.075591087341309, "learning_rate": 4.464911695754232e-07, "loss": 0.3759, "step": 18667 }, { "epoch": 0.8658627087198516, "grad_norm": 5.098092079162598, "learning_rate": 4.461872270429224e-07, "loss": 0.3113, "step": 18668 }, { "epoch": 0.865909090909091, "grad_norm": 4.543133735656738, "learning_rate": 4.458833831663734e-07, "loss": 0.3104, "step": 18669 }, { "epoch": 0.8659554730983302, "grad_norm": 6.2344841957092285, "learning_rate": 4.4557963795235917e-07, "loss": 0.3947, "step": 18670 }, { "epoch": 0.8660018552875696, "grad_norm": 20.03832244873047, "learning_rate": 4.452759914074606e-07, "loss": 0.3715, "step": 18671 }, { "epoch": 0.8660482374768089, "grad_norm": 8.920258522033691, "learning_rate": 4.449724435382546e-07, "loss": 0.3219, "step": 18672 }, { "epoch": 0.8660946196660483, "grad_norm": 7.845579147338867, "learning_rate": 4.4466899435131774e-07, "loss": 0.3713, "step": 18673 }, { "epoch": 0.8661410018552875, "grad_norm": 5.9747090339660645, "learning_rate": 4.4436564385322457e-07, "loss": 0.3043, "step": 18674 }, { "epoch": 0.8661873840445269, "grad_norm": 4.873035907745361, "learning_rate": 4.4406239205054713e-07, "loss": 0.3166, "step": 18675 }, { "epoch": 0.8662337662337662, "grad_norm": 5.191606044769287, "learning_rate": 4.4375923894985463e-07, "loss": 0.3507, "step": 18676 }, { "epoch": 0.8662801484230056, "grad_norm": 5.705214023590088, "learning_rate": 4.434561845577162e-07, "loss": 0.2579, "step": 18677 }, { "epoch": 0.8663265306122448, "grad_norm": 6.670633316040039, "learning_rate": 4.431532288806956e-07, "loss": 0.3174, "step": 18678 }, { "epoch": 0.8663729128014842, "grad_norm": 6.027347087860107, "learning_rate": 4.428503719253563e-07, "loss": 0.3688, "step": 18679 }, { "epoch": 0.8664192949907236, "grad_norm": 8.320514678955078, "learning_rate": 4.4254761369825984e-07, "loss": 0.3789, "step": 18680 }, { "epoch": 0.8664656771799629, "grad_norm": 5.303335189819336, "learning_rate": 4.422449542059659e-07, "loss": 0.3029, "step": 18681 }, { "epoch": 0.8665120593692023, "grad_norm": 4.796965599060059, "learning_rate": 4.419423934550321e-07, "loss": 0.2685, "step": 18682 }, { "epoch": 0.8665584415584415, "grad_norm": 6.732819080352783, "learning_rate": 4.416399314520109e-07, "loss": 0.3224, "step": 18683 }, { "epoch": 0.8666048237476809, "grad_norm": 11.962465286254883, "learning_rate": 4.4133756820345655e-07, "loss": 0.377, "step": 18684 }, { "epoch": 0.8666512059369202, "grad_norm": 10.228962898254395, "learning_rate": 4.410353037159193e-07, "loss": 0.2816, "step": 18685 }, { "epoch": 0.8666975881261596, "grad_norm": 6.140547752380371, "learning_rate": 4.4073313799594785e-07, "loss": 0.294, "step": 18686 }, { "epoch": 0.8667439703153988, "grad_norm": 8.009468078613281, "learning_rate": 4.4043107105008866e-07, "loss": 0.3591, "step": 18687 }, { "epoch": 0.8667903525046382, "grad_norm": 7.8443827629089355, "learning_rate": 4.4012910288488477e-07, "loss": 0.227, "step": 18688 }, { "epoch": 0.8668367346938776, "grad_norm": 8.296076774597168, "learning_rate": 4.398272335068787e-07, "loss": 0.3905, "step": 18689 }, { "epoch": 0.8668831168831169, "grad_norm": 11.868409156799316, "learning_rate": 4.3952546292261033e-07, "loss": 0.2985, "step": 18690 }, { "epoch": 0.8669294990723562, "grad_norm": 8.25003433227539, "learning_rate": 4.392237911386177e-07, "loss": 0.2631, "step": 18691 }, { "epoch": 0.8669758812615955, "grad_norm": 4.987802505493164, "learning_rate": 4.3892221816143555e-07, "loss": 0.2531, "step": 18692 }, { "epoch": 0.8670222634508349, "grad_norm": 5.785101890563965, "learning_rate": 4.3862074399759924e-07, "loss": 0.3367, "step": 18693 }, { "epoch": 0.8670686456400742, "grad_norm": 9.924334526062012, "learning_rate": 4.3831936865363744e-07, "loss": 0.3726, "step": 18694 }, { "epoch": 0.8671150278293135, "grad_norm": 5.257275104522705, "learning_rate": 4.3801809213608046e-07, "loss": 0.2813, "step": 18695 }, { "epoch": 0.8671614100185528, "grad_norm": 11.210416793823242, "learning_rate": 4.377169144514554e-07, "loss": 0.3738, "step": 18696 }, { "epoch": 0.8672077922077922, "grad_norm": 6.219592571258545, "learning_rate": 4.3741583560628743e-07, "loss": 0.3714, "step": 18697 }, { "epoch": 0.8672541743970316, "grad_norm": 9.316910743713379, "learning_rate": 4.3711485560709923e-07, "loss": 0.4184, "step": 18698 }, { "epoch": 0.8673005565862709, "grad_norm": 7.393042087554932, "learning_rate": 4.3681397446040997e-07, "loss": 0.3679, "step": 18699 }, { "epoch": 0.8673469387755102, "grad_norm": 6.7638139724731445, "learning_rate": 4.3651319217273947e-07, "loss": 0.2469, "step": 18700 }, { "epoch": 0.8673933209647495, "grad_norm": 11.095135688781738, "learning_rate": 4.362125087506036e-07, "loss": 0.388, "step": 18701 }, { "epoch": 0.8674397031539889, "grad_norm": 6.0445098876953125, "learning_rate": 4.3591192420051666e-07, "loss": 0.2686, "step": 18702 }, { "epoch": 0.8674860853432282, "grad_norm": 5.777919292449951, "learning_rate": 4.3561143852899055e-07, "loss": 0.2826, "step": 18703 }, { "epoch": 0.8675324675324675, "grad_norm": 12.108312606811523, "learning_rate": 4.353110517425363e-07, "loss": 0.3867, "step": 18704 }, { "epoch": 0.8675788497217068, "grad_norm": 8.009198188781738, "learning_rate": 4.3501076384765916e-07, "loss": 0.3911, "step": 18705 }, { "epoch": 0.8676252319109462, "grad_norm": 9.365250587463379, "learning_rate": 4.347105748508662e-07, "loss": 0.2903, "step": 18706 }, { "epoch": 0.8676716141001856, "grad_norm": 8.50136947631836, "learning_rate": 4.3441048475866056e-07, "loss": 0.3536, "step": 18707 }, { "epoch": 0.8677179962894248, "grad_norm": 12.959495544433594, "learning_rate": 4.341104935775442e-07, "loss": 0.4468, "step": 18708 }, { "epoch": 0.8677643784786642, "grad_norm": 9.06949520111084, "learning_rate": 4.3381060131401587e-07, "loss": 0.3434, "step": 18709 }, { "epoch": 0.8678107606679035, "grad_norm": 10.924758911132812, "learning_rate": 4.33510807974572e-07, "loss": 0.4162, "step": 18710 }, { "epoch": 0.8678571428571429, "grad_norm": 7.5812249183654785, "learning_rate": 4.33211113565708e-07, "loss": 0.268, "step": 18711 }, { "epoch": 0.8679035250463822, "grad_norm": 5.084270477294922, "learning_rate": 4.329115180939164e-07, "loss": 0.3948, "step": 18712 }, { "epoch": 0.8679499072356215, "grad_norm": 7.9825921058654785, "learning_rate": 4.326120215656876e-07, "loss": 0.2255, "step": 18713 }, { "epoch": 0.8679962894248608, "grad_norm": 8.860173225402832, "learning_rate": 4.3231262398751084e-07, "loss": 0.3297, "step": 18714 }, { "epoch": 0.8680426716141002, "grad_norm": 4.561144828796387, "learning_rate": 4.32013325365872e-07, "loss": 0.2249, "step": 18715 }, { "epoch": 0.8680890538033396, "grad_norm": 6.7258782386779785, "learning_rate": 4.317141257072549e-07, "loss": 0.3019, "step": 18716 }, { "epoch": 0.8681354359925788, "grad_norm": 5.087885856628418, "learning_rate": 4.314150250181415e-07, "loss": 0.2445, "step": 18717 }, { "epoch": 0.8681818181818182, "grad_norm": 6.544617176055908, "learning_rate": 4.311160233050121e-07, "loss": 0.3618, "step": 18718 }, { "epoch": 0.8682282003710575, "grad_norm": 8.09768295288086, "learning_rate": 4.308171205743439e-07, "loss": 0.2631, "step": 18719 }, { "epoch": 0.8682745825602969, "grad_norm": 11.113316535949707, "learning_rate": 4.305183168326138e-07, "loss": 0.4382, "step": 18720 }, { "epoch": 0.8683209647495361, "grad_norm": 7.9831743240356445, "learning_rate": 4.3021961208629335e-07, "loss": 0.2886, "step": 18721 }, { "epoch": 0.8683673469387755, "grad_norm": 7.446096420288086, "learning_rate": 4.2992100634185463e-07, "loss": 0.2623, "step": 18722 }, { "epoch": 0.8684137291280148, "grad_norm": 14.548450469970703, "learning_rate": 4.2962249960576685e-07, "loss": 0.487, "step": 18723 }, { "epoch": 0.8684601113172542, "grad_norm": 10.02744197845459, "learning_rate": 4.293240918844971e-07, "loss": 0.2335, "step": 18724 }, { "epoch": 0.8685064935064936, "grad_norm": 7.275834083557129, "learning_rate": 4.290257831845107e-07, "loss": 0.2915, "step": 18725 }, { "epoch": 0.8685528756957328, "grad_norm": 5.635936737060547, "learning_rate": 4.2872757351226926e-07, "loss": 0.3647, "step": 18726 }, { "epoch": 0.8685992578849722, "grad_norm": 13.375862121582031, "learning_rate": 4.284294628742336e-07, "loss": 0.5178, "step": 18727 }, { "epoch": 0.8686456400742115, "grad_norm": 10.035867691040039, "learning_rate": 4.281314512768625e-07, "loss": 0.4407, "step": 18728 }, { "epoch": 0.8686920222634509, "grad_norm": 6.647315502166748, "learning_rate": 4.278335387266125e-07, "loss": 0.237, "step": 18729 }, { "epoch": 0.8687384044526901, "grad_norm": 4.876944541931152, "learning_rate": 4.275357252299378e-07, "loss": 0.2798, "step": 18730 }, { "epoch": 0.8687847866419295, "grad_norm": 8.928658485412598, "learning_rate": 4.2723801079328885e-07, "loss": 0.3743, "step": 18731 }, { "epoch": 0.8688311688311688, "grad_norm": 8.672281265258789, "learning_rate": 4.269403954231166e-07, "loss": 0.2974, "step": 18732 }, { "epoch": 0.8688775510204082, "grad_norm": 8.524911880493164, "learning_rate": 4.2664287912586865e-07, "loss": 0.3289, "step": 18733 }, { "epoch": 0.8689239332096474, "grad_norm": 9.689705848693848, "learning_rate": 4.263454619079904e-07, "loss": 0.3777, "step": 18734 }, { "epoch": 0.8689703153988868, "grad_norm": 16.690509796142578, "learning_rate": 4.260481437759267e-07, "loss": 0.3931, "step": 18735 }, { "epoch": 0.8690166975881262, "grad_norm": 5.576896667480469, "learning_rate": 4.2575092473611634e-07, "loss": 0.2204, "step": 18736 }, { "epoch": 0.8690630797773655, "grad_norm": 5.722464084625244, "learning_rate": 4.2545380479499963e-07, "loss": 0.3457, "step": 18737 }, { "epoch": 0.8691094619666048, "grad_norm": 5.763752460479736, "learning_rate": 4.2515678395901315e-07, "loss": 0.3521, "step": 18738 }, { "epoch": 0.8691558441558441, "grad_norm": 9.385860443115234, "learning_rate": 4.248598622345923e-07, "loss": 0.3428, "step": 18739 }, { "epoch": 0.8692022263450835, "grad_norm": 11.869563102722168, "learning_rate": 4.245630396281697e-07, "loss": 0.451, "step": 18740 }, { "epoch": 0.8692486085343228, "grad_norm": 11.679308891296387, "learning_rate": 4.242663161461752e-07, "loss": 0.4608, "step": 18741 }, { "epoch": 0.8692949907235622, "grad_norm": 5.471983432769775, "learning_rate": 4.2396969179503756e-07, "loss": 0.3024, "step": 18742 }, { "epoch": 0.8693413729128014, "grad_norm": 12.30123519897461, "learning_rate": 4.2367316658118274e-07, "loss": 0.2996, "step": 18743 }, { "epoch": 0.8693877551020408, "grad_norm": 15.496865272521973, "learning_rate": 4.2337674051103504e-07, "loss": 0.3736, "step": 18744 }, { "epoch": 0.8694341372912802, "grad_norm": 8.043990135192871, "learning_rate": 4.2308041359101715e-07, "loss": 0.4606, "step": 18745 }, { "epoch": 0.8694805194805195, "grad_norm": 3.8288657665252686, "learning_rate": 4.2278418582754724e-07, "loss": 0.2914, "step": 18746 }, { "epoch": 0.8695269016697588, "grad_norm": 7.675008296966553, "learning_rate": 4.2248805722704344e-07, "loss": 0.3327, "step": 18747 }, { "epoch": 0.8695732838589981, "grad_norm": 20.443782806396484, "learning_rate": 4.2219202779592186e-07, "loss": 0.4069, "step": 18748 }, { "epoch": 0.8696196660482375, "grad_norm": 5.480625629425049, "learning_rate": 4.2189609754059555e-07, "loss": 0.2771, "step": 18749 }, { "epoch": 0.8696660482374768, "grad_norm": 5.249680995941162, "learning_rate": 4.216002664674762e-07, "loss": 0.3597, "step": 18750 }, { "epoch": 0.8697124304267161, "grad_norm": 8.519612312316895, "learning_rate": 4.2130453458297136e-07, "loss": 0.3859, "step": 18751 }, { "epoch": 0.8697588126159554, "grad_norm": 7.492365837097168, "learning_rate": 4.2100890189348875e-07, "loss": 0.3727, "step": 18752 }, { "epoch": 0.8698051948051948, "grad_norm": 9.403250694274902, "learning_rate": 4.207133684054332e-07, "loss": 0.3754, "step": 18753 }, { "epoch": 0.8698515769944342, "grad_norm": 14.355734825134277, "learning_rate": 4.204179341252074e-07, "loss": 0.4753, "step": 18754 }, { "epoch": 0.8698979591836735, "grad_norm": 20.90652084350586, "learning_rate": 4.201225990592117e-07, "loss": 0.5278, "step": 18755 }, { "epoch": 0.8699443413729128, "grad_norm": 8.419264793395996, "learning_rate": 4.198273632138439e-07, "loss": 0.3009, "step": 18756 }, { "epoch": 0.8699907235621521, "grad_norm": 6.9826273918151855, "learning_rate": 4.19532226595501e-07, "loss": 0.2258, "step": 18757 }, { "epoch": 0.8700371057513915, "grad_norm": 10.04340934753418, "learning_rate": 4.192371892105757e-07, "loss": 0.3908, "step": 18758 }, { "epoch": 0.8700834879406308, "grad_norm": 15.8184175491333, "learning_rate": 4.189422510654606e-07, "loss": 0.3878, "step": 18759 }, { "epoch": 0.8701298701298701, "grad_norm": 8.778332710266113, "learning_rate": 4.186474121665468e-07, "loss": 0.3355, "step": 18760 }, { "epoch": 0.8701762523191094, "grad_norm": 4.923676490783691, "learning_rate": 4.1835267252021914e-07, "loss": 0.3196, "step": 18761 }, { "epoch": 0.8702226345083488, "grad_norm": 8.028664588928223, "learning_rate": 4.1805803213286425e-07, "loss": 0.2672, "step": 18762 }, { "epoch": 0.8702690166975882, "grad_norm": 7.609158992767334, "learning_rate": 4.1776349101086575e-07, "loss": 0.3207, "step": 18763 }, { "epoch": 0.8703153988868274, "grad_norm": 13.890244483947754, "learning_rate": 4.1746904916060373e-07, "loss": 0.4503, "step": 18764 }, { "epoch": 0.8703617810760668, "grad_norm": 6.72070837020874, "learning_rate": 4.1717470658845903e-07, "loss": 0.3141, "step": 18765 }, { "epoch": 0.8704081632653061, "grad_norm": 5.054398059844971, "learning_rate": 4.168804633008061e-07, "loss": 0.2033, "step": 18766 }, { "epoch": 0.8704545454545455, "grad_norm": 4.796118259429932, "learning_rate": 4.165863193040209e-07, "loss": 0.2932, "step": 18767 }, { "epoch": 0.8705009276437848, "grad_norm": 6.3991312980651855, "learning_rate": 4.162922746044751e-07, "loss": 0.2618, "step": 18768 }, { "epoch": 0.8705473098330241, "grad_norm": 10.741538047790527, "learning_rate": 4.159983292085401e-07, "loss": 0.3616, "step": 18769 }, { "epoch": 0.8705936920222634, "grad_norm": 7.63192892074585, "learning_rate": 4.157044831225837e-07, "loss": 0.2285, "step": 18770 }, { "epoch": 0.8706400742115028, "grad_norm": 10.062932014465332, "learning_rate": 4.1541073635297134e-07, "loss": 0.3143, "step": 18771 }, { "epoch": 0.8706864564007422, "grad_norm": 6.341723918914795, "learning_rate": 4.1511708890606685e-07, "loss": 0.3345, "step": 18772 }, { "epoch": 0.8707328385899814, "grad_norm": 9.025847434997559, "learning_rate": 4.1482354078823285e-07, "loss": 0.3279, "step": 18773 }, { "epoch": 0.8707792207792208, "grad_norm": 4.442477703094482, "learning_rate": 4.145300920058282e-07, "loss": 0.318, "step": 18774 }, { "epoch": 0.8708256029684601, "grad_norm": 6.349441051483154, "learning_rate": 4.142367425652111e-07, "loss": 0.3651, "step": 18775 }, { "epoch": 0.8708719851576995, "grad_norm": 5.661538124084473, "learning_rate": 4.139434924727359e-07, "loss": 0.3341, "step": 18776 }, { "epoch": 0.8709183673469387, "grad_norm": 10.291302680969238, "learning_rate": 4.1365034173475536e-07, "loss": 0.4382, "step": 18777 }, { "epoch": 0.8709647495361781, "grad_norm": 5.093438148498535, "learning_rate": 4.133572903576216e-07, "loss": 0.3095, "step": 18778 }, { "epoch": 0.8710111317254174, "grad_norm": 7.094746112823486, "learning_rate": 4.1306433834768287e-07, "loss": 0.3445, "step": 18779 }, { "epoch": 0.8710575139146568, "grad_norm": 5.705440998077393, "learning_rate": 4.127714857112869e-07, "loss": 0.3003, "step": 18780 }, { "epoch": 0.8711038961038962, "grad_norm": 7.394097328186035, "learning_rate": 4.1247873245477633e-07, "loss": 0.3685, "step": 18781 }, { "epoch": 0.8711502782931354, "grad_norm": 4.913357257843018, "learning_rate": 4.1218607858449387e-07, "loss": 0.351, "step": 18782 }, { "epoch": 0.8711966604823748, "grad_norm": 9.073355674743652, "learning_rate": 4.118935241067806e-07, "loss": 0.4893, "step": 18783 }, { "epoch": 0.8712430426716141, "grad_norm": 4.279465198516846, "learning_rate": 4.1160106902797424e-07, "loss": 0.2463, "step": 18784 }, { "epoch": 0.8712894248608535, "grad_norm": 7.217138290405273, "learning_rate": 4.1130871335441027e-07, "loss": 0.311, "step": 18785 }, { "epoch": 0.8713358070500927, "grad_norm": 12.666459083557129, "learning_rate": 4.1101645709242357e-07, "loss": 0.4425, "step": 18786 }, { "epoch": 0.8713821892393321, "grad_norm": 5.481802463531494, "learning_rate": 4.1072430024834407e-07, "loss": 0.2953, "step": 18787 }, { "epoch": 0.8714285714285714, "grad_norm": 6.786580562591553, "learning_rate": 4.1043224282850234e-07, "loss": 0.3574, "step": 18788 }, { "epoch": 0.8714749536178108, "grad_norm": 11.402501106262207, "learning_rate": 4.1014028483922496e-07, "loss": 0.4026, "step": 18789 }, { "epoch": 0.87152133580705, "grad_norm": 8.147649765014648, "learning_rate": 4.0984842628683796e-07, "loss": 0.2535, "step": 18790 }, { "epoch": 0.8715677179962894, "grad_norm": 9.55446720123291, "learning_rate": 4.0955666717766405e-07, "loss": 0.3791, "step": 18791 }, { "epoch": 0.8716141001855288, "grad_norm": 9.207908630371094, "learning_rate": 4.092650075180232e-07, "loss": 0.3682, "step": 18792 }, { "epoch": 0.8716604823747681, "grad_norm": 9.309991836547852, "learning_rate": 4.089734473142343e-07, "loss": 0.3104, "step": 18793 }, { "epoch": 0.8717068645640074, "grad_norm": 7.4677252769470215, "learning_rate": 4.0868198657261437e-07, "loss": 0.3095, "step": 18794 }, { "epoch": 0.8717532467532467, "grad_norm": 16.174076080322266, "learning_rate": 4.0839062529947735e-07, "loss": 0.5622, "step": 18795 }, { "epoch": 0.8717996289424861, "grad_norm": 8.82092571258545, "learning_rate": 4.08099363501136e-07, "loss": 0.3306, "step": 18796 }, { "epoch": 0.8718460111317254, "grad_norm": 7.152444839477539, "learning_rate": 4.0780820118390074e-07, "loss": 0.2743, "step": 18797 }, { "epoch": 0.8718923933209648, "grad_norm": 6.619584560394287, "learning_rate": 4.075171383540771e-07, "loss": 0.2683, "step": 18798 }, { "epoch": 0.871938775510204, "grad_norm": 9.722553253173828, "learning_rate": 4.072261750179735e-07, "loss": 0.5526, "step": 18799 }, { "epoch": 0.8719851576994434, "grad_norm": 8.776604652404785, "learning_rate": 4.069353111818913e-07, "loss": 0.3531, "step": 18800 }, { "epoch": 0.8720315398886828, "grad_norm": 6.421586036682129, "learning_rate": 4.066445468521335e-07, "loss": 0.281, "step": 18801 }, { "epoch": 0.8720779220779221, "grad_norm": 10.305675506591797, "learning_rate": 4.063538820349999e-07, "loss": 0.3335, "step": 18802 }, { "epoch": 0.8721243042671614, "grad_norm": 12.75704288482666, "learning_rate": 4.0606331673678556e-07, "loss": 0.3984, "step": 18803 }, { "epoch": 0.8721706864564007, "grad_norm": 14.594143867492676, "learning_rate": 4.057728509637865e-07, "loss": 0.3849, "step": 18804 }, { "epoch": 0.8722170686456401, "grad_norm": 8.268240928649902, "learning_rate": 4.054824847222949e-07, "loss": 0.286, "step": 18805 }, { "epoch": 0.8722634508348794, "grad_norm": 6.886017799377441, "learning_rate": 4.051922180186024e-07, "loss": 0.4123, "step": 18806 }, { "epoch": 0.8723098330241187, "grad_norm": 6.822046756744385, "learning_rate": 4.049020508589968e-07, "loss": 0.2627, "step": 18807 }, { "epoch": 0.872356215213358, "grad_norm": 6.974366188049316, "learning_rate": 4.046119832497658e-07, "loss": 0.3416, "step": 18808 }, { "epoch": 0.8724025974025974, "grad_norm": 4.520447254180908, "learning_rate": 4.0432201519719103e-07, "loss": 0.226, "step": 18809 }, { "epoch": 0.8724489795918368, "grad_norm": 6.053245544433594, "learning_rate": 4.0403214670755643e-07, "loss": 0.3912, "step": 18810 }, { "epoch": 0.8724953617810761, "grad_norm": 6.058682918548584, "learning_rate": 4.0374237778714076e-07, "loss": 0.3276, "step": 18811 }, { "epoch": 0.8725417439703154, "grad_norm": 6.22435188293457, "learning_rate": 4.034527084422224e-07, "loss": 0.3052, "step": 18812 }, { "epoch": 0.8725881261595547, "grad_norm": 11.670340538024902, "learning_rate": 4.031631386790774e-07, "loss": 0.2957, "step": 18813 }, { "epoch": 0.8726345083487941, "grad_norm": 13.731695175170898, "learning_rate": 4.028736685039775e-07, "loss": 0.4729, "step": 18814 }, { "epoch": 0.8726808905380334, "grad_norm": 9.595547676086426, "learning_rate": 4.025842979231948e-07, "loss": 0.3684, "step": 18815 }, { "epoch": 0.8727272727272727, "grad_norm": 11.087635040283203, "learning_rate": 4.022950269429987e-07, "loss": 0.4137, "step": 18816 }, { "epoch": 0.872773654916512, "grad_norm": 8.004104614257812, "learning_rate": 4.020058555696554e-07, "loss": 0.35, "step": 18817 }, { "epoch": 0.8728200371057514, "grad_norm": 7.3053483963012695, "learning_rate": 4.0171678380943046e-07, "loss": 0.3943, "step": 18818 }, { "epoch": 0.8728664192949908, "grad_norm": 11.507238388061523, "learning_rate": 4.0142781166858716e-07, "loss": 0.3782, "step": 18819 }, { "epoch": 0.87291280148423, "grad_norm": 7.064423084259033, "learning_rate": 4.011389391533832e-07, "loss": 0.2723, "step": 18820 }, { "epoch": 0.8729591836734694, "grad_norm": 8.468987464904785, "learning_rate": 4.008501662700792e-07, "loss": 0.4666, "step": 18821 }, { "epoch": 0.8730055658627087, "grad_norm": 21.572729110717773, "learning_rate": 4.005614930249302e-07, "loss": 0.413, "step": 18822 }, { "epoch": 0.8730519480519481, "grad_norm": 6.232894420623779, "learning_rate": 4.002729194241906e-07, "loss": 0.2594, "step": 18823 }, { "epoch": 0.8730983302411874, "grad_norm": 18.250375747680664, "learning_rate": 3.9998444547411255e-07, "loss": 0.4108, "step": 18824 }, { "epoch": 0.8731447124304267, "grad_norm": 5.552806854248047, "learning_rate": 3.99696071180945e-07, "loss": 0.2767, "step": 18825 }, { "epoch": 0.873191094619666, "grad_norm": 7.710270404815674, "learning_rate": 3.994077965509352e-07, "loss": 0.3491, "step": 18826 }, { "epoch": 0.8732374768089054, "grad_norm": 7.137539386749268, "learning_rate": 3.9911962159032923e-07, "loss": 0.3065, "step": 18827 }, { "epoch": 0.8732838589981448, "grad_norm": 13.63001537322998, "learning_rate": 3.9883154630536933e-07, "loss": 0.4937, "step": 18828 }, { "epoch": 0.873330241187384, "grad_norm": 9.730123519897461, "learning_rate": 3.985435707022978e-07, "loss": 0.3614, "step": 18829 }, { "epoch": 0.8733766233766234, "grad_norm": 6.345848083496094, "learning_rate": 3.9825569478735335e-07, "loss": 0.3315, "step": 18830 }, { "epoch": 0.8734230055658627, "grad_norm": 6.055706024169922, "learning_rate": 3.9796791856677174e-07, "loss": 0.2887, "step": 18831 }, { "epoch": 0.8734693877551021, "grad_norm": 6.736011505126953, "learning_rate": 3.976802420467873e-07, "loss": 0.2901, "step": 18832 }, { "epoch": 0.8735157699443413, "grad_norm": 4.135486602783203, "learning_rate": 3.9739266523363293e-07, "loss": 0.2466, "step": 18833 }, { "epoch": 0.8735621521335807, "grad_norm": 11.708415031433105, "learning_rate": 3.9710518813353914e-07, "loss": 0.3391, "step": 18834 }, { "epoch": 0.87360853432282, "grad_norm": 4.379217147827148, "learning_rate": 3.968178107527343e-07, "loss": 0.2042, "step": 18835 }, { "epoch": 0.8736549165120594, "grad_norm": 10.904151916503906, "learning_rate": 3.965305330974428e-07, "loss": 0.3381, "step": 18836 }, { "epoch": 0.8737012987012988, "grad_norm": 6.449854373931885, "learning_rate": 3.962433551738898e-07, "loss": 0.3127, "step": 18837 }, { "epoch": 0.873747680890538, "grad_norm": 7.699771881103516, "learning_rate": 3.9595627698829575e-07, "loss": 0.3166, "step": 18838 }, { "epoch": 0.8737940630797774, "grad_norm": 9.309940338134766, "learning_rate": 3.956692985468807e-07, "loss": 0.4192, "step": 18839 }, { "epoch": 0.8738404452690167, "grad_norm": 8.460040092468262, "learning_rate": 3.9538241985586144e-07, "loss": 0.3811, "step": 18840 }, { "epoch": 0.8738868274582561, "grad_norm": 6.212146282196045, "learning_rate": 3.950956409214546e-07, "loss": 0.4642, "step": 18841 }, { "epoch": 0.8739332096474953, "grad_norm": 9.604857444763184, "learning_rate": 3.948089617498707e-07, "loss": 0.4306, "step": 18842 }, { "epoch": 0.8739795918367347, "grad_norm": 5.9060282707214355, "learning_rate": 3.945223823473221e-07, "loss": 0.2965, "step": 18843 }, { "epoch": 0.874025974025974, "grad_norm": 5.037237167358398, "learning_rate": 3.9423590272001657e-07, "loss": 0.277, "step": 18844 }, { "epoch": 0.8740723562152134, "grad_norm": 6.60053825378418, "learning_rate": 3.939495228741613e-07, "loss": 0.2523, "step": 18845 }, { "epoch": 0.8741187384044526, "grad_norm": 5.793927192687988, "learning_rate": 3.936632428159609e-07, "loss": 0.2814, "step": 18846 }, { "epoch": 0.874165120593692, "grad_norm": 7.667346000671387, "learning_rate": 3.933770625516159e-07, "loss": 0.287, "step": 18847 }, { "epoch": 0.8742115027829314, "grad_norm": 8.950236320495605, "learning_rate": 3.9309098208732687e-07, "loss": 0.3825, "step": 18848 }, { "epoch": 0.8742578849721707, "grad_norm": 9.255736351013184, "learning_rate": 3.9280500142929166e-07, "loss": 0.343, "step": 18849 }, { "epoch": 0.87430426716141, "grad_norm": 6.8341965675354, "learning_rate": 3.9251912058370645e-07, "loss": 0.2659, "step": 18850 }, { "epoch": 0.8743506493506493, "grad_norm": 10.855755805969238, "learning_rate": 3.9223333955676513e-07, "loss": 0.3963, "step": 18851 }, { "epoch": 0.8743970315398887, "grad_norm": 5.618594646453857, "learning_rate": 3.919476583546572e-07, "loss": 0.2183, "step": 18852 }, { "epoch": 0.874443413729128, "grad_norm": 12.34374713897705, "learning_rate": 3.916620769835727e-07, "loss": 0.2979, "step": 18853 }, { "epoch": 0.8744897959183674, "grad_norm": 4.629354953765869, "learning_rate": 3.91376595449699e-07, "loss": 0.3088, "step": 18854 }, { "epoch": 0.8745361781076066, "grad_norm": 11.502772331237793, "learning_rate": 3.9109121375922046e-07, "loss": 0.3892, "step": 18855 }, { "epoch": 0.874582560296846, "grad_norm": 5.591917514801025, "learning_rate": 3.908059319183194e-07, "loss": 0.3186, "step": 18856 }, { "epoch": 0.8746289424860854, "grad_norm": 8.501208305358887, "learning_rate": 3.905207499331781e-07, "loss": 0.3914, "step": 18857 }, { "epoch": 0.8746753246753247, "grad_norm": 11.14142894744873, "learning_rate": 3.902356678099722e-07, "loss": 0.4587, "step": 18858 }, { "epoch": 0.874721706864564, "grad_norm": 7.819478511810303, "learning_rate": 3.899506855548796e-07, "loss": 0.3292, "step": 18859 }, { "epoch": 0.8747680890538033, "grad_norm": 5.201135635375977, "learning_rate": 3.896658031740735e-07, "loss": 0.1983, "step": 18860 }, { "epoch": 0.8748144712430427, "grad_norm": 6.50145959854126, "learning_rate": 3.893810206737264e-07, "loss": 0.3477, "step": 18861 }, { "epoch": 0.874860853432282, "grad_norm": 10.108929634094238, "learning_rate": 3.890963380600082e-07, "loss": 0.3064, "step": 18862 }, { "epoch": 0.8749072356215213, "grad_norm": 4.66182804107666, "learning_rate": 3.888117553390852e-07, "loss": 0.3883, "step": 18863 }, { "epoch": 0.8749536178107606, "grad_norm": 7.129664421081543, "learning_rate": 3.88527272517123e-07, "loss": 0.3387, "step": 18864 }, { "epoch": 0.875, "grad_norm": 7.439567565917969, "learning_rate": 3.8824288960028546e-07, "loss": 0.3514, "step": 18865 }, { "epoch": 0.8750463821892394, "grad_norm": 6.478201866149902, "learning_rate": 3.879586065947333e-07, "loss": 0.4392, "step": 18866 }, { "epoch": 0.8750927643784787, "grad_norm": 4.762018203735352, "learning_rate": 3.876744235066254e-07, "loss": 0.3054, "step": 18867 }, { "epoch": 0.875139146567718, "grad_norm": 5.849271774291992, "learning_rate": 3.8739034034211866e-07, "loss": 0.3742, "step": 18868 }, { "epoch": 0.8751855287569573, "grad_norm": 4.403426647186279, "learning_rate": 3.871063571073669e-07, "loss": 0.2317, "step": 18869 }, { "epoch": 0.8752319109461967, "grad_norm": 6.127298831939697, "learning_rate": 3.86822473808523e-07, "loss": 0.32, "step": 18870 }, { "epoch": 0.875278293135436, "grad_norm": 11.44808292388916, "learning_rate": 3.865386904517371e-07, "loss": 0.302, "step": 18871 }, { "epoch": 0.8753246753246753, "grad_norm": 7.48442268371582, "learning_rate": 3.8625500704315645e-07, "loss": 0.316, "step": 18872 }, { "epoch": 0.8753710575139146, "grad_norm": 6.606983184814453, "learning_rate": 3.859714235889289e-07, "loss": 0.3373, "step": 18873 }, { "epoch": 0.875417439703154, "grad_norm": 5.944060325622559, "learning_rate": 3.856879400951957e-07, "loss": 0.3212, "step": 18874 }, { "epoch": 0.8754638218923934, "grad_norm": 14.370742797851562, "learning_rate": 3.854045565680997e-07, "loss": 0.3698, "step": 18875 }, { "epoch": 0.8755102040816326, "grad_norm": 5.015573501586914, "learning_rate": 3.8512127301378044e-07, "loss": 0.3156, "step": 18876 }, { "epoch": 0.875556586270872, "grad_norm": 10.332594871520996, "learning_rate": 3.8483808943837407e-07, "loss": 0.3526, "step": 18877 }, { "epoch": 0.8756029684601113, "grad_norm": 6.086535453796387, "learning_rate": 3.845550058480163e-07, "loss": 0.3531, "step": 18878 }, { "epoch": 0.8756493506493507, "grad_norm": 7.1100921630859375, "learning_rate": 3.842720222488411e-07, "loss": 0.3145, "step": 18879 }, { "epoch": 0.87569573283859, "grad_norm": 10.586236953735352, "learning_rate": 3.839891386469768e-07, "loss": 0.3201, "step": 18880 }, { "epoch": 0.8757421150278293, "grad_norm": 8.485078811645508, "learning_rate": 3.837063550485537e-07, "loss": 0.3935, "step": 18881 }, { "epoch": 0.8757884972170686, "grad_norm": 4.505290508270264, "learning_rate": 3.8342367145969675e-07, "loss": 0.3114, "step": 18882 }, { "epoch": 0.875834879406308, "grad_norm": 8.897723197937012, "learning_rate": 3.831410878865316e-07, "loss": 0.3111, "step": 18883 }, { "epoch": 0.8758812615955474, "grad_norm": 5.369981288909912, "learning_rate": 3.828586043351801e-07, "loss": 0.2813, "step": 18884 }, { "epoch": 0.8759276437847866, "grad_norm": 8.457137107849121, "learning_rate": 3.8257622081176115e-07, "loss": 0.2968, "step": 18885 }, { "epoch": 0.875974025974026, "grad_norm": 4.686326026916504, "learning_rate": 3.822939373223927e-07, "loss": 0.4065, "step": 18886 }, { "epoch": 0.8760204081632653, "grad_norm": 5.565879821777344, "learning_rate": 3.8201175387319046e-07, "loss": 0.3165, "step": 18887 }, { "epoch": 0.8760667903525047, "grad_norm": 9.201180458068848, "learning_rate": 3.8172967047026834e-07, "loss": 0.2205, "step": 18888 }, { "epoch": 0.8761131725417439, "grad_norm": 5.4922003746032715, "learning_rate": 3.8144768711973646e-07, "loss": 0.3553, "step": 18889 }, { "epoch": 0.8761595547309833, "grad_norm": 7.679783821105957, "learning_rate": 3.811658038277055e-07, "loss": 0.2605, "step": 18890 }, { "epoch": 0.8762059369202226, "grad_norm": 7.256038188934326, "learning_rate": 3.8088402060028005e-07, "loss": 0.2378, "step": 18891 }, { "epoch": 0.876252319109462, "grad_norm": 6.105460166931152, "learning_rate": 3.8060233744356634e-07, "loss": 0.3598, "step": 18892 }, { "epoch": 0.8762987012987012, "grad_norm": 10.73832893371582, "learning_rate": 3.8032075436366665e-07, "loss": 0.3049, "step": 18893 }, { "epoch": 0.8763450834879406, "grad_norm": 8.584228515625, "learning_rate": 3.800392713666806e-07, "loss": 0.3561, "step": 18894 }, { "epoch": 0.87639146567718, "grad_norm": 9.132856369018555, "learning_rate": 3.7975788845870833e-07, "loss": 0.3104, "step": 18895 }, { "epoch": 0.8764378478664193, "grad_norm": 4.866355895996094, "learning_rate": 3.7947660564584375e-07, "loss": 0.266, "step": 18896 }, { "epoch": 0.8764842300556587, "grad_norm": 12.441559791564941, "learning_rate": 3.79195422934181e-07, "loss": 0.5254, "step": 18897 }, { "epoch": 0.8765306122448979, "grad_norm": 7.831631183624268, "learning_rate": 3.7891434032981236e-07, "loss": 0.3232, "step": 18898 }, { "epoch": 0.8765769944341373, "grad_norm": 4.842809677124023, "learning_rate": 3.7863335783882684e-07, "loss": 0.2308, "step": 18899 }, { "epoch": 0.8766233766233766, "grad_norm": 4.781900405883789, "learning_rate": 3.7835247546731234e-07, "loss": 0.2077, "step": 18900 }, { "epoch": 0.876669758812616, "grad_norm": 6.309563159942627, "learning_rate": 3.7807169322135517e-07, "loss": 0.2589, "step": 18901 }, { "epoch": 0.8767161410018552, "grad_norm": 8.904319763183594, "learning_rate": 3.7779101110703544e-07, "loss": 0.3729, "step": 18902 }, { "epoch": 0.8767625231910946, "grad_norm": 10.770607948303223, "learning_rate": 3.7751042913043547e-07, "loss": 0.3521, "step": 18903 }, { "epoch": 0.876808905380334, "grad_norm": 8.175434112548828, "learning_rate": 3.7722994729763427e-07, "loss": 0.3613, "step": 18904 }, { "epoch": 0.8768552875695733, "grad_norm": 8.866935729980469, "learning_rate": 3.769495656147082e-07, "loss": 0.3038, "step": 18905 }, { "epoch": 0.8769016697588126, "grad_norm": 9.042028427124023, "learning_rate": 3.7666928408773175e-07, "loss": 0.2674, "step": 18906 }, { "epoch": 0.8769480519480519, "grad_norm": 10.253643989562988, "learning_rate": 3.763891027227762e-07, "loss": 0.2847, "step": 18907 }, { "epoch": 0.8769944341372913, "grad_norm": 8.277488708496094, "learning_rate": 3.761090215259117e-07, "loss": 0.4263, "step": 18908 }, { "epoch": 0.8770408163265306, "grad_norm": 5.332890510559082, "learning_rate": 3.7582904050320667e-07, "loss": 0.2997, "step": 18909 }, { "epoch": 0.87708719851577, "grad_norm": 7.167474746704102, "learning_rate": 3.755491596607264e-07, "loss": 0.29, "step": 18910 }, { "epoch": 0.8771335807050092, "grad_norm": 5.1338934898376465, "learning_rate": 3.752693790045342e-07, "loss": 0.3292, "step": 18911 }, { "epoch": 0.8771799628942486, "grad_norm": 5.10734748840332, "learning_rate": 3.749896985406931e-07, "loss": 0.3042, "step": 18912 }, { "epoch": 0.877226345083488, "grad_norm": 9.132820129394531, "learning_rate": 3.747101182752594e-07, "loss": 0.3441, "step": 18913 }, { "epoch": 0.8772727272727273, "grad_norm": 5.981781482696533, "learning_rate": 3.744306382142915e-07, "loss": 0.3282, "step": 18914 }, { "epoch": 0.8773191094619666, "grad_norm": 8.959200859069824, "learning_rate": 3.74151258363844e-07, "loss": 0.3196, "step": 18915 }, { "epoch": 0.8773654916512059, "grad_norm": 5.196643352508545, "learning_rate": 3.7387197872996995e-07, "loss": 0.2947, "step": 18916 }, { "epoch": 0.8774118738404453, "grad_norm": 6.960920333862305, "learning_rate": 3.7359279931871996e-07, "loss": 0.2176, "step": 18917 }, { "epoch": 0.8774582560296846, "grad_norm": 6.6763153076171875, "learning_rate": 3.7331372013614085e-07, "loss": 0.4273, "step": 18918 }, { "epoch": 0.8775046382189239, "grad_norm": 15.269024848937988, "learning_rate": 3.730347411882801e-07, "loss": 0.4047, "step": 18919 }, { "epoch": 0.8775510204081632, "grad_norm": 9.017170906066895, "learning_rate": 3.7275586248118114e-07, "loss": 0.3697, "step": 18920 }, { "epoch": 0.8775974025974026, "grad_norm": 6.469341278076172, "learning_rate": 3.724770840208852e-07, "loss": 0.2518, "step": 18921 }, { "epoch": 0.877643784786642, "grad_norm": 14.887286186218262, "learning_rate": 3.7219840581343316e-07, "loss": 0.3402, "step": 18922 }, { "epoch": 0.8776901669758813, "grad_norm": 7.961154937744141, "learning_rate": 3.7191982786486227e-07, "loss": 0.382, "step": 18923 }, { "epoch": 0.8777365491651206, "grad_norm": 7.4326887130737305, "learning_rate": 3.7164135018120664e-07, "loss": 0.2643, "step": 18924 }, { "epoch": 0.8777829313543599, "grad_norm": 6.515979290008545, "learning_rate": 3.7136297276849975e-07, "loss": 0.2858, "step": 18925 }, { "epoch": 0.8778293135435993, "grad_norm": 7.788532733917236, "learning_rate": 3.7108469563277236e-07, "loss": 0.3581, "step": 18926 }, { "epoch": 0.8778756957328386, "grad_norm": 4.528388500213623, "learning_rate": 3.708065187800541e-07, "loss": 0.2795, "step": 18927 }, { "epoch": 0.8779220779220779, "grad_norm": 10.235081672668457, "learning_rate": 3.7052844221637185e-07, "loss": 0.2426, "step": 18928 }, { "epoch": 0.8779684601113172, "grad_norm": 3.4381086826324463, "learning_rate": 3.7025046594774793e-07, "loss": 0.2356, "step": 18929 }, { "epoch": 0.8780148423005566, "grad_norm": 7.159936904907227, "learning_rate": 3.699725899802059e-07, "loss": 0.3945, "step": 18930 }, { "epoch": 0.878061224489796, "grad_norm": 4.039767742156982, "learning_rate": 3.696948143197654e-07, "loss": 0.2484, "step": 18931 }, { "epoch": 0.8781076066790352, "grad_norm": 5.0921311378479, "learning_rate": 3.694171389724449e-07, "loss": 0.2784, "step": 18932 }, { "epoch": 0.8781539888682746, "grad_norm": 11.785886764526367, "learning_rate": 3.6913956394425966e-07, "loss": 0.3461, "step": 18933 }, { "epoch": 0.8782003710575139, "grad_norm": 8.935980796813965, "learning_rate": 3.688620892412237e-07, "loss": 0.3151, "step": 18934 }, { "epoch": 0.8782467532467533, "grad_norm": 6.989790439605713, "learning_rate": 3.685847148693472e-07, "loss": 0.2347, "step": 18935 }, { "epoch": 0.8782931354359926, "grad_norm": 18.009445190429688, "learning_rate": 3.683074408346404e-07, "loss": 0.5781, "step": 18936 }, { "epoch": 0.8783395176252319, "grad_norm": 6.57052755355835, "learning_rate": 3.680302671431096e-07, "loss": 0.2031, "step": 18937 }, { "epoch": 0.8783858998144712, "grad_norm": 6.466993808746338, "learning_rate": 3.6775319380076e-07, "loss": 0.3842, "step": 18938 }, { "epoch": 0.8784322820037106, "grad_norm": 7.528553485870361, "learning_rate": 3.674762208135951e-07, "loss": 0.3775, "step": 18939 }, { "epoch": 0.87847866419295, "grad_norm": 7.091826438903809, "learning_rate": 3.671993481876135e-07, "loss": 0.3279, "step": 18940 }, { "epoch": 0.8785250463821892, "grad_norm": 7.123327732086182, "learning_rate": 3.669225759288142e-07, "loss": 0.3987, "step": 18941 }, { "epoch": 0.8785714285714286, "grad_norm": 12.631162643432617, "learning_rate": 3.666459040431941e-07, "loss": 0.4945, "step": 18942 }, { "epoch": 0.8786178107606679, "grad_norm": 4.339746952056885, "learning_rate": 3.6636933253674676e-07, "loss": 0.3007, "step": 18943 }, { "epoch": 0.8786641929499073, "grad_norm": 11.272980690002441, "learning_rate": 3.6609286141546407e-07, "loss": 0.3857, "step": 18944 }, { "epoch": 0.8787105751391465, "grad_norm": 6.6122355461120605, "learning_rate": 3.658164906853351e-07, "loss": 0.3325, "step": 18945 }, { "epoch": 0.8787569573283859, "grad_norm": 6.8473615646362305, "learning_rate": 3.6554022035234726e-07, "loss": 0.3161, "step": 18946 }, { "epoch": 0.8788033395176252, "grad_norm": 15.07893180847168, "learning_rate": 3.652640504224864e-07, "loss": 0.4148, "step": 18947 }, { "epoch": 0.8788497217068646, "grad_norm": 8.904094696044922, "learning_rate": 3.6498798090173536e-07, "loss": 0.3898, "step": 18948 }, { "epoch": 0.8788961038961038, "grad_norm": 5.943090915679932, "learning_rate": 3.647120117960745e-07, "loss": 0.2881, "step": 18949 }, { "epoch": 0.8789424860853432, "grad_norm": 6.091736316680908, "learning_rate": 3.6443614311148456e-07, "loss": 0.2714, "step": 18950 }, { "epoch": 0.8789888682745826, "grad_norm": 10.349596977233887, "learning_rate": 3.6416037485393905e-07, "loss": 0.4403, "step": 18951 }, { "epoch": 0.8790352504638219, "grad_norm": 5.210336208343506, "learning_rate": 3.6388470702941436e-07, "loss": 0.3527, "step": 18952 }, { "epoch": 0.8790816326530613, "grad_norm": 6.483078479766846, "learning_rate": 3.636091396438823e-07, "loss": 0.4209, "step": 18953 }, { "epoch": 0.8791280148423005, "grad_norm": 5.484833240509033, "learning_rate": 3.633336727033121e-07, "loss": 0.293, "step": 18954 }, { "epoch": 0.8791743970315399, "grad_norm": 9.026379585266113, "learning_rate": 3.630583062136739e-07, "loss": 0.3622, "step": 18955 }, { "epoch": 0.8792207792207792, "grad_norm": 7.408839702606201, "learning_rate": 3.6278304018093067e-07, "loss": 0.3865, "step": 18956 }, { "epoch": 0.8792671614100186, "grad_norm": 8.53847885131836, "learning_rate": 3.6250787461104666e-07, "loss": 0.4008, "step": 18957 }, { "epoch": 0.8793135435992578, "grad_norm": 7.622979164123535, "learning_rate": 3.622328095099836e-07, "loss": 0.3821, "step": 18958 }, { "epoch": 0.8793599257884972, "grad_norm": 8.94663143157959, "learning_rate": 3.619578448837008e-07, "loss": 0.364, "step": 18959 }, { "epoch": 0.8794063079777366, "grad_norm": 10.269447326660156, "learning_rate": 3.61682980738155e-07, "loss": 0.3713, "step": 18960 }, { "epoch": 0.8794526901669759, "grad_norm": 5.757224082946777, "learning_rate": 3.614082170793021e-07, "loss": 0.2767, "step": 18961 }, { "epoch": 0.8794990723562152, "grad_norm": 8.106260299682617, "learning_rate": 3.6113355391309223e-07, "loss": 0.4642, "step": 18962 }, { "epoch": 0.8795454545454545, "grad_norm": 7.6154866218566895, "learning_rate": 3.608589912454774e-07, "loss": 0.2709, "step": 18963 }, { "epoch": 0.8795918367346939, "grad_norm": 12.35075569152832, "learning_rate": 3.605845290824056e-07, "loss": 0.3812, "step": 18964 }, { "epoch": 0.8796382189239332, "grad_norm": 6.902254104614258, "learning_rate": 3.6031016742982263e-07, "loss": 0.3689, "step": 18965 }, { "epoch": 0.8796846011131726, "grad_norm": 6.959475994110107, "learning_rate": 3.600359062936737e-07, "loss": 0.3247, "step": 18966 }, { "epoch": 0.8797309833024118, "grad_norm": 3.4905686378479004, "learning_rate": 3.5976174567989907e-07, "loss": 0.177, "step": 18967 }, { "epoch": 0.8797773654916512, "grad_norm": 6.680387496948242, "learning_rate": 3.594876855944385e-07, "loss": 0.3001, "step": 18968 }, { "epoch": 0.8798237476808906, "grad_norm": 5.664477825164795, "learning_rate": 3.5921372604322935e-07, "loss": 0.3623, "step": 18969 }, { "epoch": 0.8798701298701299, "grad_norm": 6.308506965637207, "learning_rate": 3.5893986703220753e-07, "loss": 0.3963, "step": 18970 }, { "epoch": 0.8799165120593692, "grad_norm": 11.457756042480469, "learning_rate": 3.586661085673049e-07, "loss": 0.4298, "step": 18971 }, { "epoch": 0.8799628942486085, "grad_norm": 8.265257835388184, "learning_rate": 3.583924506544545e-07, "loss": 0.4013, "step": 18972 }, { "epoch": 0.8800092764378479, "grad_norm": 14.366652488708496, "learning_rate": 3.5811889329958215e-07, "loss": 0.5357, "step": 18973 }, { "epoch": 0.8800556586270872, "grad_norm": 9.153621673583984, "learning_rate": 3.578454365086159e-07, "loss": 0.4125, "step": 18974 }, { "epoch": 0.8801020408163265, "grad_norm": 4.781355381011963, "learning_rate": 3.5757208028747993e-07, "loss": 0.2954, "step": 18975 }, { "epoch": 0.8801484230055658, "grad_norm": 4.681551456451416, "learning_rate": 3.572988246420961e-07, "loss": 0.2019, "step": 18976 }, { "epoch": 0.8801948051948052, "grad_norm": 9.651744842529297, "learning_rate": 3.5702566957838525e-07, "loss": 0.3822, "step": 18977 }, { "epoch": 0.8802411873840446, "grad_norm": 11.013675689697266, "learning_rate": 3.567526151022632e-07, "loss": 0.303, "step": 18978 }, { "epoch": 0.8802875695732839, "grad_norm": 4.3388872146606445, "learning_rate": 3.564796612196475e-07, "loss": 0.2102, "step": 18979 }, { "epoch": 0.8803339517625232, "grad_norm": 8.04786491394043, "learning_rate": 3.5620680793645003e-07, "loss": 0.3331, "step": 18980 }, { "epoch": 0.8803803339517625, "grad_norm": 6.120434284210205, "learning_rate": 3.559340552585827e-07, "loss": 0.3098, "step": 18981 }, { "epoch": 0.8804267161410019, "grad_norm": 5.548009395599365, "learning_rate": 3.5566140319195477e-07, "loss": 0.3498, "step": 18982 }, { "epoch": 0.8804730983302412, "grad_norm": 9.520615577697754, "learning_rate": 3.5538885174247416e-07, "loss": 0.3024, "step": 18983 }, { "epoch": 0.8805194805194805, "grad_norm": 7.548182487487793, "learning_rate": 3.5511640091604293e-07, "loss": 0.3042, "step": 18984 }, { "epoch": 0.8805658627087198, "grad_norm": 5.091054916381836, "learning_rate": 3.548440507185652e-07, "loss": 0.2447, "step": 18985 }, { "epoch": 0.8806122448979592, "grad_norm": 6.6075592041015625, "learning_rate": 3.545718011559407e-07, "loss": 0.2756, "step": 18986 }, { "epoch": 0.8806586270871986, "grad_norm": 6.839928150177002, "learning_rate": 3.542996522340686e-07, "loss": 0.3275, "step": 18987 }, { "epoch": 0.8807050092764378, "grad_norm": 8.230697631835938, "learning_rate": 3.5402760395884414e-07, "loss": 0.3888, "step": 18988 }, { "epoch": 0.8807513914656772, "grad_norm": 9.266583442687988, "learning_rate": 3.5375565633616104e-07, "loss": 0.2211, "step": 18989 }, { "epoch": 0.8807977736549165, "grad_norm": 8.840266227722168, "learning_rate": 3.5348380937191063e-07, "loss": 0.4817, "step": 18990 }, { "epoch": 0.8808441558441559, "grad_norm": 6.8841423988342285, "learning_rate": 3.5321206307198265e-07, "loss": 0.3308, "step": 18991 }, { "epoch": 0.8808905380333952, "grad_norm": 14.172224044799805, "learning_rate": 3.529404174422646e-07, "loss": 0.6001, "step": 18992 }, { "epoch": 0.8809369202226345, "grad_norm": 10.73984432220459, "learning_rate": 3.526688724886412e-07, "loss": 0.4887, "step": 18993 }, { "epoch": 0.8809833024118738, "grad_norm": 5.609991550445557, "learning_rate": 3.5239742821699676e-07, "loss": 0.3205, "step": 18994 }, { "epoch": 0.8810296846011132, "grad_norm": 5.484348297119141, "learning_rate": 3.5212608463320917e-07, "loss": 0.3083, "step": 18995 }, { "epoch": 0.8810760667903526, "grad_norm": 6.2552080154418945, "learning_rate": 3.5185484174315886e-07, "loss": 0.3737, "step": 18996 }, { "epoch": 0.8811224489795918, "grad_norm": 6.879982948303223, "learning_rate": 3.5158369955272166e-07, "loss": 0.4311, "step": 18997 }, { "epoch": 0.8811688311688312, "grad_norm": 6.990683555603027, "learning_rate": 3.513126580677717e-07, "loss": 0.4352, "step": 18998 }, { "epoch": 0.8812152133580705, "grad_norm": 10.45938491821289, "learning_rate": 3.5104171729418215e-07, "loss": 0.3973, "step": 18999 }, { "epoch": 0.8812615955473099, "grad_norm": 13.825629234313965, "learning_rate": 3.50770877237821e-07, "loss": 0.2961, "step": 19000 }, { "epoch": 0.8813079777365491, "grad_norm": 16.412046432495117, "learning_rate": 3.5050013790455637e-07, "loss": 0.274, "step": 19001 }, { "epoch": 0.8813543599257885, "grad_norm": 6.346879005432129, "learning_rate": 3.502294993002536e-07, "loss": 0.276, "step": 19002 }, { "epoch": 0.8814007421150278, "grad_norm": 6.648754119873047, "learning_rate": 3.499589614307769e-07, "loss": 0.449, "step": 19003 }, { "epoch": 0.8814471243042672, "grad_norm": 4.906705379486084, "learning_rate": 3.49688524301986e-07, "loss": 0.308, "step": 19004 }, { "epoch": 0.8814935064935064, "grad_norm": 4.75518274307251, "learning_rate": 3.494181879197417e-07, "loss": 0.2731, "step": 19005 }, { "epoch": 0.8815398886827458, "grad_norm": 7.010628700256348, "learning_rate": 3.491479522898983e-07, "loss": 0.4159, "step": 19006 }, { "epoch": 0.8815862708719852, "grad_norm": 4.485086917877197, "learning_rate": 3.488778174183116e-07, "loss": 0.268, "step": 19007 }, { "epoch": 0.8816326530612245, "grad_norm": 9.202376365661621, "learning_rate": 3.486077833108342e-07, "loss": 0.2525, "step": 19008 }, { "epoch": 0.8816790352504639, "grad_norm": 6.9486589431762695, "learning_rate": 3.4833784997331533e-07, "loss": 0.2626, "step": 19009 }, { "epoch": 0.8817254174397031, "grad_norm": 7.039684295654297, "learning_rate": 3.4806801741160415e-07, "loss": 0.3085, "step": 19010 }, { "epoch": 0.8817717996289425, "grad_norm": 8.921842575073242, "learning_rate": 3.477982856315454e-07, "loss": 0.2838, "step": 19011 }, { "epoch": 0.8818181818181818, "grad_norm": 8.76330280303955, "learning_rate": 3.475286546389822e-07, "loss": 0.3344, "step": 19012 }, { "epoch": 0.8818645640074212, "grad_norm": 6.279565334320068, "learning_rate": 3.4725912443975775e-07, "loss": 0.342, "step": 19013 }, { "epoch": 0.8819109461966604, "grad_norm": 11.803801536560059, "learning_rate": 3.4698969503970945e-07, "loss": 0.3964, "step": 19014 }, { "epoch": 0.8819573283858998, "grad_norm": 6.249440670013428, "learning_rate": 3.4672036644467665e-07, "loss": 0.3311, "step": 19015 }, { "epoch": 0.8820037105751392, "grad_norm": 11.329174041748047, "learning_rate": 3.4645113866049187e-07, "loss": 0.4004, "step": 19016 }, { "epoch": 0.8820500927643785, "grad_norm": 4.005016803741455, "learning_rate": 3.4618201169298813e-07, "loss": 0.3073, "step": 19017 }, { "epoch": 0.8820964749536178, "grad_norm": 4.617849826812744, "learning_rate": 3.45912985547997e-07, "loss": 0.2221, "step": 19018 }, { "epoch": 0.8821428571428571, "grad_norm": 10.980884552001953, "learning_rate": 3.45644060231346e-07, "loss": 0.383, "step": 19019 }, { "epoch": 0.8821892393320965, "grad_norm": 7.270501136779785, "learning_rate": 3.453752357488627e-07, "loss": 0.3914, "step": 19020 }, { "epoch": 0.8822356215213358, "grad_norm": 7.77459716796875, "learning_rate": 3.451065121063685e-07, "loss": 0.3353, "step": 19021 }, { "epoch": 0.8822820037105752, "grad_norm": 8.906052589416504, "learning_rate": 3.448378893096871e-07, "loss": 0.3691, "step": 19022 }, { "epoch": 0.8823283858998144, "grad_norm": 6.6446213722229, "learning_rate": 3.445693673646372e-07, "loss": 0.2839, "step": 19023 }, { "epoch": 0.8823747680890538, "grad_norm": 10.412731170654297, "learning_rate": 3.443009462770364e-07, "loss": 0.3446, "step": 19024 }, { "epoch": 0.8824211502782932, "grad_norm": 7.756371974945068, "learning_rate": 3.4403262605270105e-07, "loss": 0.3471, "step": 19025 }, { "epoch": 0.8824675324675325, "grad_norm": 4.588643550872803, "learning_rate": 3.4376440669744216e-07, "loss": 0.2335, "step": 19026 }, { "epoch": 0.8825139146567718, "grad_norm": 9.100391387939453, "learning_rate": 3.4349628821707115e-07, "loss": 0.2541, "step": 19027 }, { "epoch": 0.8825602968460111, "grad_norm": 5.38709831237793, "learning_rate": 3.432282706173973e-07, "loss": 0.2982, "step": 19028 }, { "epoch": 0.8826066790352505, "grad_norm": 8.329854965209961, "learning_rate": 3.42960353904227e-07, "loss": 0.3119, "step": 19029 }, { "epoch": 0.8826530612244898, "grad_norm": 4.450051784515381, "learning_rate": 3.4269253808336456e-07, "loss": 0.2503, "step": 19030 }, { "epoch": 0.8826994434137291, "grad_norm": 10.210183143615723, "learning_rate": 3.424248231606114e-07, "loss": 0.4017, "step": 19031 }, { "epoch": 0.8827458256029684, "grad_norm": 4.980948448181152, "learning_rate": 3.42157209141768e-07, "loss": 0.3265, "step": 19032 }, { "epoch": 0.8827922077922078, "grad_norm": 5.331965446472168, "learning_rate": 3.418896960326312e-07, "loss": 0.3027, "step": 19033 }, { "epoch": 0.8828385899814472, "grad_norm": 6.986696243286133, "learning_rate": 3.4162228383899766e-07, "loss": 0.3242, "step": 19034 }, { "epoch": 0.8828849721706865, "grad_norm": 6.620372295379639, "learning_rate": 3.413549725666615e-07, "loss": 0.279, "step": 19035 }, { "epoch": 0.8829313543599258, "grad_norm": 5.644684791564941, "learning_rate": 3.410877622214115e-07, "loss": 0.2911, "step": 19036 }, { "epoch": 0.8829777365491651, "grad_norm": 5.331603050231934, "learning_rate": 3.408206528090374e-07, "loss": 0.3709, "step": 19037 }, { "epoch": 0.8830241187384045, "grad_norm": 4.161937236785889, "learning_rate": 3.405536443353269e-07, "loss": 0.3318, "step": 19038 }, { "epoch": 0.8830705009276438, "grad_norm": 6.596102714538574, "learning_rate": 3.402867368060642e-07, "loss": 0.3188, "step": 19039 }, { "epoch": 0.8831168831168831, "grad_norm": 5.627807140350342, "learning_rate": 3.4001993022703237e-07, "loss": 0.3562, "step": 19040 }, { "epoch": 0.8831632653061224, "grad_norm": 7.6615447998046875, "learning_rate": 3.397532246040097e-07, "loss": 0.3793, "step": 19041 }, { "epoch": 0.8832096474953618, "grad_norm": 9.083090782165527, "learning_rate": 3.394866199427754e-07, "loss": 0.3537, "step": 19042 }, { "epoch": 0.8832560296846012, "grad_norm": 5.686987400054932, "learning_rate": 3.39220116249106e-07, "loss": 0.3193, "step": 19043 }, { "epoch": 0.8833024118738404, "grad_norm": 5.500555515289307, "learning_rate": 3.3895371352877347e-07, "loss": 0.2664, "step": 19044 }, { "epoch": 0.8833487940630798, "grad_norm": 11.490620613098145, "learning_rate": 3.386874117875522e-07, "loss": 0.4215, "step": 19045 }, { "epoch": 0.8833951762523191, "grad_norm": 8.558514595031738, "learning_rate": 3.3842121103120796e-07, "loss": 0.31, "step": 19046 }, { "epoch": 0.8834415584415585, "grad_norm": 5.251785755157471, "learning_rate": 3.381551112655096e-07, "loss": 0.2156, "step": 19047 }, { "epoch": 0.8834879406307977, "grad_norm": 7.728393077850342, "learning_rate": 3.3788911249622194e-07, "loss": 0.3371, "step": 19048 }, { "epoch": 0.8835343228200371, "grad_norm": 6.366450309753418, "learning_rate": 3.376232147291075e-07, "loss": 0.3838, "step": 19049 }, { "epoch": 0.8835807050092764, "grad_norm": 10.118807792663574, "learning_rate": 3.3735741796992795e-07, "loss": 0.1803, "step": 19050 }, { "epoch": 0.8836270871985158, "grad_norm": 7.527598857879639, "learning_rate": 3.370917222244402e-07, "loss": 0.3867, "step": 19051 }, { "epoch": 0.8836734693877552, "grad_norm": 8.156039237976074, "learning_rate": 3.368261274984003e-07, "loss": 0.3814, "step": 19052 }, { "epoch": 0.8837198515769944, "grad_norm": 8.30957317352295, "learning_rate": 3.36560633797563e-07, "loss": 0.2847, "step": 19053 }, { "epoch": 0.8837662337662338, "grad_norm": 15.136310577392578, "learning_rate": 3.3629524112767985e-07, "loss": 0.2453, "step": 19054 }, { "epoch": 0.8838126159554731, "grad_norm": 5.581416130065918, "learning_rate": 3.360299494945013e-07, "loss": 0.2729, "step": 19055 }, { "epoch": 0.8838589981447125, "grad_norm": 5.88159704208374, "learning_rate": 3.3576475890377323e-07, "loss": 0.2858, "step": 19056 }, { "epoch": 0.8839053803339517, "grad_norm": 7.755354404449463, "learning_rate": 3.354996693612411e-07, "loss": 0.2862, "step": 19057 }, { "epoch": 0.8839517625231911, "grad_norm": 10.420625686645508, "learning_rate": 3.3523468087264865e-07, "loss": 0.3576, "step": 19058 }, { "epoch": 0.8839981447124304, "grad_norm": 10.461846351623535, "learning_rate": 3.349697934437368e-07, "loss": 0.4274, "step": 19059 }, { "epoch": 0.8840445269016698, "grad_norm": 7.438536167144775, "learning_rate": 3.347050070802438e-07, "loss": 0.3166, "step": 19060 }, { "epoch": 0.884090909090909, "grad_norm": 8.631233215332031, "learning_rate": 3.344403217879061e-07, "loss": 0.253, "step": 19061 }, { "epoch": 0.8841372912801484, "grad_norm": 9.200566291809082, "learning_rate": 3.3417573757245746e-07, "loss": 0.3475, "step": 19062 }, { "epoch": 0.8841836734693878, "grad_norm": 7.803128242492676, "learning_rate": 3.339112544396306e-07, "loss": 0.3062, "step": 19063 }, { "epoch": 0.8842300556586271, "grad_norm": 6.176479339599609, "learning_rate": 3.336468723951558e-07, "loss": 0.3372, "step": 19064 }, { "epoch": 0.8842764378478665, "grad_norm": 9.667661666870117, "learning_rate": 3.333825914447608e-07, "loss": 0.3779, "step": 19065 }, { "epoch": 0.8843228200371057, "grad_norm": 65.29000091552734, "learning_rate": 3.331184115941694e-07, "loss": 0.4586, "step": 19066 }, { "epoch": 0.8843692022263451, "grad_norm": 5.75570821762085, "learning_rate": 3.328543328491063e-07, "loss": 0.2581, "step": 19067 }, { "epoch": 0.8844155844155844, "grad_norm": 7.674076080322266, "learning_rate": 3.325903552152926e-07, "loss": 0.2692, "step": 19068 }, { "epoch": 0.8844619666048238, "grad_norm": 5.516184329986572, "learning_rate": 3.32326478698447e-07, "loss": 0.3091, "step": 19069 }, { "epoch": 0.884508348794063, "grad_norm": 8.590838432312012, "learning_rate": 3.3206270330428667e-07, "loss": 0.2629, "step": 19070 }, { "epoch": 0.8845547309833024, "grad_norm": 5.346884727478027, "learning_rate": 3.3179902903852535e-07, "loss": 0.3976, "step": 19071 }, { "epoch": 0.8846011131725418, "grad_norm": 10.360421180725098, "learning_rate": 3.315354559068762e-07, "loss": 0.3578, "step": 19072 }, { "epoch": 0.8846474953617811, "grad_norm": 13.57042407989502, "learning_rate": 3.3127198391504855e-07, "loss": 0.519, "step": 19073 }, { "epoch": 0.8846938775510204, "grad_norm": 5.734708786010742, "learning_rate": 3.310086130687512e-07, "loss": 0.354, "step": 19074 }, { "epoch": 0.8847402597402597, "grad_norm": 6.675548076629639, "learning_rate": 3.3074534337368903e-07, "loss": 0.3517, "step": 19075 }, { "epoch": 0.8847866419294991, "grad_norm": 13.37304973602295, "learning_rate": 3.3048217483556743e-07, "loss": 0.2912, "step": 19076 }, { "epoch": 0.8848330241187384, "grad_norm": 6.446572303771973, "learning_rate": 3.302191074600858e-07, "loss": 0.3474, "step": 19077 }, { "epoch": 0.8848794063079778, "grad_norm": 6.848757743835449, "learning_rate": 3.2995614125294453e-07, "loss": 0.3148, "step": 19078 }, { "epoch": 0.884925788497217, "grad_norm": 8.066267967224121, "learning_rate": 3.296932762198396e-07, "loss": 0.321, "step": 19079 }, { "epoch": 0.8849721706864564, "grad_norm": 5.615921974182129, "learning_rate": 3.294305123664665e-07, "loss": 0.2494, "step": 19080 }, { "epoch": 0.8850185528756958, "grad_norm": 8.517269134521484, "learning_rate": 3.29167849698519e-07, "loss": 0.3081, "step": 19081 }, { "epoch": 0.8850649350649351, "grad_norm": 5.656612873077393, "learning_rate": 3.289052882216859e-07, "loss": 0.2576, "step": 19082 }, { "epoch": 0.8851113172541744, "grad_norm": 9.129942893981934, "learning_rate": 3.2864282794165536e-07, "loss": 0.3488, "step": 19083 }, { "epoch": 0.8851576994434137, "grad_norm": 11.54826545715332, "learning_rate": 3.2838046886411457e-07, "loss": 0.3209, "step": 19084 }, { "epoch": 0.8852040816326531, "grad_norm": 5.605896472930908, "learning_rate": 3.281182109947467e-07, "loss": 0.3147, "step": 19085 }, { "epoch": 0.8852504638218924, "grad_norm": 4.74735164642334, "learning_rate": 3.2785605433923395e-07, "loss": 0.2409, "step": 19086 }, { "epoch": 0.8852968460111317, "grad_norm": 4.872551918029785, "learning_rate": 3.275939989032556e-07, "loss": 0.2959, "step": 19087 }, { "epoch": 0.885343228200371, "grad_norm": 6.5536208152771, "learning_rate": 3.2733204469248825e-07, "loss": 0.278, "step": 19088 }, { "epoch": 0.8853896103896104, "grad_norm": 6.390567779541016, "learning_rate": 3.270701917126079e-07, "loss": 0.376, "step": 19089 }, { "epoch": 0.8854359925788498, "grad_norm": 8.23428726196289, "learning_rate": 3.268084399692872e-07, "loss": 0.1729, "step": 19090 }, { "epoch": 0.8854823747680891, "grad_norm": 8.775315284729004, "learning_rate": 3.2654678946819616e-07, "loss": 0.3164, "step": 19091 }, { "epoch": 0.8855287569573284, "grad_norm": 6.249152660369873, "learning_rate": 3.2628524021500516e-07, "loss": 0.3037, "step": 19092 }, { "epoch": 0.8855751391465677, "grad_norm": 7.039142608642578, "learning_rate": 3.260237922153786e-07, "loss": 0.2855, "step": 19093 }, { "epoch": 0.8856215213358071, "grad_norm": 12.12018871307373, "learning_rate": 3.2576244547498135e-07, "loss": 0.3642, "step": 19094 }, { "epoch": 0.8856679035250464, "grad_norm": 8.724205017089844, "learning_rate": 3.2550119999947506e-07, "loss": 0.3418, "step": 19095 }, { "epoch": 0.8857142857142857, "grad_norm": 4.809111595153809, "learning_rate": 3.2524005579452014e-07, "loss": 0.3099, "step": 19096 }, { "epoch": 0.885760667903525, "grad_norm": 10.395047187805176, "learning_rate": 3.2497901286577313e-07, "loss": 0.4059, "step": 19097 }, { "epoch": 0.8858070500927644, "grad_norm": 6.360832691192627, "learning_rate": 3.247180712188913e-07, "loss": 0.236, "step": 19098 }, { "epoch": 0.8858534322820037, "grad_norm": 7.612756252288818, "learning_rate": 3.244572308595251e-07, "loss": 0.3542, "step": 19099 }, { "epoch": 0.885899814471243, "grad_norm": 6.844669818878174, "learning_rate": 3.241964917933277e-07, "loss": 0.4876, "step": 19100 }, { "epoch": 0.8859461966604824, "grad_norm": 6.643264293670654, "learning_rate": 3.239358540259463e-07, "loss": 0.3827, "step": 19101 }, { "epoch": 0.8859925788497217, "grad_norm": 5.305461883544922, "learning_rate": 3.2367531756302863e-07, "loss": 0.2691, "step": 19102 }, { "epoch": 0.8860389610389611, "grad_norm": 10.80340576171875, "learning_rate": 3.234148824102196e-07, "loss": 0.4559, "step": 19103 }, { "epoch": 0.8860853432282003, "grad_norm": 10.304125785827637, "learning_rate": 3.231545485731596e-07, "loss": 0.3374, "step": 19104 }, { "epoch": 0.8861317254174397, "grad_norm": 5.219549179077148, "learning_rate": 3.2289431605748933e-07, "loss": 0.2103, "step": 19105 }, { "epoch": 0.886178107606679, "grad_norm": 5.450366020202637, "learning_rate": 3.2263418486884744e-07, "loss": 0.2617, "step": 19106 }, { "epoch": 0.8862244897959184, "grad_norm": 9.673868179321289, "learning_rate": 3.223741550128684e-07, "loss": 0.3678, "step": 19107 }, { "epoch": 0.8862708719851577, "grad_norm": 5.19019889831543, "learning_rate": 3.221142264951865e-07, "loss": 0.3731, "step": 19108 }, { "epoch": 0.886317254174397, "grad_norm": 5.119810581207275, "learning_rate": 3.218543993214324e-07, "loss": 0.1809, "step": 19109 }, { "epoch": 0.8863636363636364, "grad_norm": 7.43775749206543, "learning_rate": 3.2159467349723525e-07, "loss": 0.347, "step": 19110 }, { "epoch": 0.8864100185528757, "grad_norm": 8.194978713989258, "learning_rate": 3.213350490282213e-07, "loss": 0.3949, "step": 19111 }, { "epoch": 0.8864564007421151, "grad_norm": 10.529717445373535, "learning_rate": 3.2107552592001657e-07, "loss": 0.3711, "step": 19112 }, { "epoch": 0.8865027829313543, "grad_norm": 4.31800651550293, "learning_rate": 3.2081610417824203e-07, "loss": 0.2738, "step": 19113 }, { "epoch": 0.8865491651205937, "grad_norm": 11.789447784423828, "learning_rate": 3.2055678380851993e-07, "loss": 0.5902, "step": 19114 }, { "epoch": 0.886595547309833, "grad_norm": 4.977329254150391, "learning_rate": 3.202975648164658e-07, "loss": 0.2588, "step": 19115 }, { "epoch": 0.8866419294990724, "grad_norm": 8.936110496520996, "learning_rate": 3.2003844720769674e-07, "loss": 0.3842, "step": 19116 }, { "epoch": 0.8866883116883116, "grad_norm": 8.274816513061523, "learning_rate": 3.1977943098782604e-07, "loss": 0.2463, "step": 19117 }, { "epoch": 0.886734693877551, "grad_norm": 9.331727981567383, "learning_rate": 3.195205161624659e-07, "loss": 0.382, "step": 19118 }, { "epoch": 0.8867810760667904, "grad_norm": 6.495521068572998, "learning_rate": 3.1926170273722523e-07, "loss": 0.396, "step": 19119 }, { "epoch": 0.8868274582560297, "grad_norm": 8.187880516052246, "learning_rate": 3.190029907177106e-07, "loss": 0.3647, "step": 19120 }, { "epoch": 0.8868738404452691, "grad_norm": 4.877294540405273, "learning_rate": 3.1874438010952746e-07, "loss": 0.2908, "step": 19121 }, { "epoch": 0.8869202226345083, "grad_norm": 5.832142353057861, "learning_rate": 3.1848587091827757e-07, "loss": 0.3224, "step": 19122 }, { "epoch": 0.8869666048237477, "grad_norm": 5.221039772033691, "learning_rate": 3.1822746314956243e-07, "loss": 0.2924, "step": 19123 }, { "epoch": 0.887012987012987, "grad_norm": 8.642507553100586, "learning_rate": 3.179691568089799e-07, "loss": 0.4072, "step": 19124 }, { "epoch": 0.8870593692022264, "grad_norm": 13.377557754516602, "learning_rate": 3.177109519021271e-07, "loss": 0.4284, "step": 19125 }, { "epoch": 0.8871057513914656, "grad_norm": 4.332822799682617, "learning_rate": 3.174528484345957e-07, "loss": 0.1847, "step": 19126 }, { "epoch": 0.887152133580705, "grad_norm": 5.056390762329102, "learning_rate": 3.1719484641197893e-07, "loss": 0.3068, "step": 19127 }, { "epoch": 0.8871985157699444, "grad_norm": 4.247201919555664, "learning_rate": 3.169369458398652e-07, "loss": 0.236, "step": 19128 }, { "epoch": 0.8872448979591837, "grad_norm": 11.447970390319824, "learning_rate": 3.1667914672384336e-07, "loss": 0.3672, "step": 19129 }, { "epoch": 0.887291280148423, "grad_norm": 5.555309295654297, "learning_rate": 3.1642144906949777e-07, "loss": 0.203, "step": 19130 }, { "epoch": 0.8873376623376623, "grad_norm": 7.783693313598633, "learning_rate": 3.161638528824107e-07, "loss": 0.3217, "step": 19131 }, { "epoch": 0.8873840445269017, "grad_norm": 11.622906684875488, "learning_rate": 3.159063581681632e-07, "loss": 0.3587, "step": 19132 }, { "epoch": 0.887430426716141, "grad_norm": 6.179965972900391, "learning_rate": 3.1564896493233356e-07, "loss": 0.3533, "step": 19133 }, { "epoch": 0.8874768089053804, "grad_norm": 4.609950542449951, "learning_rate": 3.1539167318049843e-07, "loss": 0.3041, "step": 19134 }, { "epoch": 0.8875231910946196, "grad_norm": 5.296597480773926, "learning_rate": 3.1513448291823236e-07, "loss": 0.3345, "step": 19135 }, { "epoch": 0.887569573283859, "grad_norm": 4.98043966293335, "learning_rate": 3.1487739415110686e-07, "loss": 0.3399, "step": 19136 }, { "epoch": 0.8876159554730984, "grad_norm": 9.522878646850586, "learning_rate": 3.1462040688469085e-07, "loss": 0.3824, "step": 19137 }, { "epoch": 0.8876623376623377, "grad_norm": 7.399011135101318, "learning_rate": 3.1436352112455273e-07, "loss": 0.2948, "step": 19138 }, { "epoch": 0.887708719851577, "grad_norm": 5.861657619476318, "learning_rate": 3.1410673687625735e-07, "loss": 0.359, "step": 19139 }, { "epoch": 0.8877551020408163, "grad_norm": 12.528512954711914, "learning_rate": 3.1385005414536817e-07, "loss": 0.4379, "step": 19140 }, { "epoch": 0.8878014842300557, "grad_norm": 8.49496078491211, "learning_rate": 3.1359347293744624e-07, "loss": 0.3006, "step": 19141 }, { "epoch": 0.887847866419295, "grad_norm": 8.682565689086914, "learning_rate": 3.1333699325804935e-07, "loss": 0.1972, "step": 19142 }, { "epoch": 0.8878942486085343, "grad_norm": 4.478822231292725, "learning_rate": 3.130806151127347e-07, "loss": 0.2486, "step": 19143 }, { "epoch": 0.8879406307977736, "grad_norm": 5.242527961730957, "learning_rate": 3.128243385070562e-07, "loss": 0.3549, "step": 19144 }, { "epoch": 0.887987012987013, "grad_norm": 7.922848224639893, "learning_rate": 3.1256816344656605e-07, "loss": 0.3231, "step": 19145 }, { "epoch": 0.8880333951762523, "grad_norm": 3.9162285327911377, "learning_rate": 3.123120899368143e-07, "loss": 0.2304, "step": 19146 }, { "epoch": 0.8880797773654916, "grad_norm": 15.817936897277832, "learning_rate": 3.1205611798334977e-07, "loss": 0.4415, "step": 19147 }, { "epoch": 0.888126159554731, "grad_norm": 8.743024826049805, "learning_rate": 3.118002475917159e-07, "loss": 0.3567, "step": 19148 }, { "epoch": 0.8881725417439703, "grad_norm": 5.403254508972168, "learning_rate": 3.115444787674565e-07, "loss": 0.3351, "step": 19149 }, { "epoch": 0.8882189239332097, "grad_norm": 7.364140510559082, "learning_rate": 3.112888115161133e-07, "loss": 0.3362, "step": 19150 }, { "epoch": 0.888265306122449, "grad_norm": 8.72249984741211, "learning_rate": 3.1103324584322523e-07, "loss": 0.3784, "step": 19151 }, { "epoch": 0.8883116883116883, "grad_norm": 8.508893013000488, "learning_rate": 3.107777817543295e-07, "loss": 0.3795, "step": 19152 }, { "epoch": 0.8883580705009276, "grad_norm": 9.165590286254883, "learning_rate": 3.1052241925495887e-07, "loss": 0.2577, "step": 19153 }, { "epoch": 0.888404452690167, "grad_norm": 7.835709571838379, "learning_rate": 3.102671583506461e-07, "loss": 0.3256, "step": 19154 }, { "epoch": 0.8884508348794063, "grad_norm": 4.474181175231934, "learning_rate": 3.1001199904692246e-07, "loss": 0.3621, "step": 19155 }, { "epoch": 0.8884972170686456, "grad_norm": 5.1066412925720215, "learning_rate": 3.0975694134931513e-07, "loss": 0.3421, "step": 19156 }, { "epoch": 0.888543599257885, "grad_norm": 10.114484786987305, "learning_rate": 3.0950198526334965e-07, "loss": 0.3007, "step": 19157 }, { "epoch": 0.8885899814471243, "grad_norm": 8.304715156555176, "learning_rate": 3.092471307945505e-07, "loss": 0.3125, "step": 19158 }, { "epoch": 0.8886363636363637, "grad_norm": 27.92958641052246, "learning_rate": 3.0899237794843715e-07, "loss": 0.524, "step": 19159 }, { "epoch": 0.8886827458256029, "grad_norm": 5.461001396179199, "learning_rate": 3.087377267305297e-07, "loss": 0.2842, "step": 19160 }, { "epoch": 0.8887291280148423, "grad_norm": 6.121237277984619, "learning_rate": 3.0848317714634533e-07, "loss": 0.3278, "step": 19161 }, { "epoch": 0.8887755102040816, "grad_norm": 8.035333633422852, "learning_rate": 3.08228729201398e-07, "loss": 0.2387, "step": 19162 }, { "epoch": 0.888821892393321, "grad_norm": 30.20435333251953, "learning_rate": 3.0797438290120164e-07, "loss": 0.3122, "step": 19163 }, { "epoch": 0.8888682745825603, "grad_norm": 5.442277908325195, "learning_rate": 3.0772013825126457e-07, "loss": 0.3635, "step": 19164 }, { "epoch": 0.8889146567717996, "grad_norm": 7.855807781219482, "learning_rate": 3.0746599525709574e-07, "loss": 0.37, "step": 19165 }, { "epoch": 0.888961038961039, "grad_norm": 6.543247222900391, "learning_rate": 3.0721195392420135e-07, "loss": 0.2456, "step": 19166 }, { "epoch": 0.8890074211502783, "grad_norm": 4.350855827331543, "learning_rate": 3.069580142580847e-07, "loss": 0.2419, "step": 19167 }, { "epoch": 0.8890538033395177, "grad_norm": 9.695813179016113, "learning_rate": 3.067041762642475e-07, "loss": 0.3682, "step": 19168 }, { "epoch": 0.8891001855287569, "grad_norm": 6.90493106842041, "learning_rate": 3.064504399481893e-07, "loss": 0.2453, "step": 19169 }, { "epoch": 0.8891465677179963, "grad_norm": 10.407866477966309, "learning_rate": 3.061968053154063e-07, "loss": 0.3556, "step": 19170 }, { "epoch": 0.8891929499072356, "grad_norm": 7.294590473175049, "learning_rate": 3.059432723713934e-07, "loss": 0.337, "step": 19171 }, { "epoch": 0.889239332096475, "grad_norm": 5.49031925201416, "learning_rate": 3.0568984112164413e-07, "loss": 0.2702, "step": 19172 }, { "epoch": 0.8892857142857142, "grad_norm": 11.09698486328125, "learning_rate": 3.054365115716479e-07, "loss": 0.3659, "step": 19173 }, { "epoch": 0.8893320964749536, "grad_norm": 8.807616233825684, "learning_rate": 3.0518328372689475e-07, "loss": 0.3533, "step": 19174 }, { "epoch": 0.889378478664193, "grad_norm": 7.031339645385742, "learning_rate": 3.0493015759286817e-07, "loss": 0.2891, "step": 19175 }, { "epoch": 0.8894248608534323, "grad_norm": 8.234001159667969, "learning_rate": 3.0467713317505363e-07, "loss": 0.3089, "step": 19176 }, { "epoch": 0.8894712430426717, "grad_norm": 7.661088943481445, "learning_rate": 3.04424210478933e-07, "loss": 0.3314, "step": 19177 }, { "epoch": 0.8895176252319109, "grad_norm": 17.05921173095703, "learning_rate": 3.041713895099846e-07, "loss": 0.3493, "step": 19178 }, { "epoch": 0.8895640074211503, "grad_norm": 7.81865930557251, "learning_rate": 3.039186702736863e-07, "loss": 0.2725, "step": 19179 }, { "epoch": 0.8896103896103896, "grad_norm": 7.659397125244141, "learning_rate": 3.036660527755142e-07, "loss": 0.1951, "step": 19180 }, { "epoch": 0.889656771799629, "grad_norm": 5.904493808746338, "learning_rate": 3.0341353702093903e-07, "loss": 0.319, "step": 19181 }, { "epoch": 0.8897031539888682, "grad_norm": 18.76578712463379, "learning_rate": 3.0316112301543243e-07, "loss": 0.5203, "step": 19182 }, { "epoch": 0.8897495361781076, "grad_norm": 5.499879360198975, "learning_rate": 3.029088107644623e-07, "loss": 0.3337, "step": 19183 }, { "epoch": 0.889795918367347, "grad_norm": 7.020504474639893, "learning_rate": 3.0265660027349597e-07, "loss": 0.3198, "step": 19184 }, { "epoch": 0.8898423005565863, "grad_norm": 6.113595485687256, "learning_rate": 3.0240449154799733e-07, "loss": 0.3677, "step": 19185 }, { "epoch": 0.8898886827458256, "grad_norm": 5.948111534118652, "learning_rate": 3.0215248459342705e-07, "loss": 0.2387, "step": 19186 }, { "epoch": 0.8899350649350649, "grad_norm": 8.703797340393066, "learning_rate": 3.019005794152452e-07, "loss": 0.3533, "step": 19187 }, { "epoch": 0.8899814471243043, "grad_norm": 6.081148147583008, "learning_rate": 3.0164877601890907e-07, "loss": 0.3077, "step": 19188 }, { "epoch": 0.8900278293135436, "grad_norm": 6.445418834686279, "learning_rate": 3.0139707440987433e-07, "loss": 0.3585, "step": 19189 }, { "epoch": 0.890074211502783, "grad_norm": 6.827121734619141, "learning_rate": 3.0114547459359435e-07, "loss": 0.1561, "step": 19190 }, { "epoch": 0.8901205936920222, "grad_norm": 8.035189628601074, "learning_rate": 3.0089397657551867e-07, "loss": 0.4022, "step": 19191 }, { "epoch": 0.8901669758812616, "grad_norm": 5.224483966827393, "learning_rate": 3.006425803610963e-07, "loss": 0.2567, "step": 19192 }, { "epoch": 0.890213358070501, "grad_norm": 10.27016830444336, "learning_rate": 3.0039128595577393e-07, "loss": 0.3348, "step": 19193 }, { "epoch": 0.8902597402597403, "grad_norm": 8.075830459594727, "learning_rate": 3.001400933649956e-07, "loss": 0.4308, "step": 19194 }, { "epoch": 0.8903061224489796, "grad_norm": 10.994653701782227, "learning_rate": 2.9988900259420304e-07, "loss": 0.3657, "step": 19195 }, { "epoch": 0.8903525046382189, "grad_norm": 8.95305061340332, "learning_rate": 2.996380136488369e-07, "loss": 0.3219, "step": 19196 }, { "epoch": 0.8903988868274583, "grad_norm": 6.022520542144775, "learning_rate": 2.9938712653433346e-07, "loss": 0.283, "step": 19197 }, { "epoch": 0.8904452690166976, "grad_norm": 11.865882873535156, "learning_rate": 2.9913634125612877e-07, "loss": 0.4085, "step": 19198 }, { "epoch": 0.8904916512059369, "grad_norm": 17.071109771728516, "learning_rate": 2.9888565781965586e-07, "loss": 0.3565, "step": 19199 }, { "epoch": 0.8905380333951762, "grad_norm": 5.526235103607178, "learning_rate": 2.986350762303453e-07, "loss": 0.3218, "step": 19200 }, { "epoch": 0.8905844155844156, "grad_norm": 7.481773853302002, "learning_rate": 2.983845964936266e-07, "loss": 0.2885, "step": 19201 }, { "epoch": 0.890630797773655, "grad_norm": 7.127023696899414, "learning_rate": 2.981342186149255e-07, "loss": 0.272, "step": 19202 }, { "epoch": 0.8906771799628942, "grad_norm": 6.804741382598877, "learning_rate": 2.97883942599666e-07, "loss": 0.3648, "step": 19203 }, { "epoch": 0.8907235621521336, "grad_norm": 8.977062225341797, "learning_rate": 2.9763376845327153e-07, "loss": 0.3697, "step": 19204 }, { "epoch": 0.8907699443413729, "grad_norm": 9.100136756896973, "learning_rate": 2.973836961811605e-07, "loss": 0.3867, "step": 19205 }, { "epoch": 0.8908163265306123, "grad_norm": 9.731843948364258, "learning_rate": 2.9713372578875187e-07, "loss": 0.348, "step": 19206 }, { "epoch": 0.8908627087198516, "grad_norm": 6.413138389587402, "learning_rate": 2.968838572814614e-07, "loss": 0.2619, "step": 19207 }, { "epoch": 0.8909090909090909, "grad_norm": 6.607232093811035, "learning_rate": 2.9663409066470025e-07, "loss": 0.2861, "step": 19208 }, { "epoch": 0.8909554730983302, "grad_norm": 9.409683227539062, "learning_rate": 2.9638442594388086e-07, "loss": 0.3162, "step": 19209 }, { "epoch": 0.8910018552875696, "grad_norm": 12.77938461303711, "learning_rate": 2.961348631244121e-07, "loss": 0.3902, "step": 19210 }, { "epoch": 0.891048237476809, "grad_norm": 8.724288940429688, "learning_rate": 2.9588540221170083e-07, "loss": 0.3453, "step": 19211 }, { "epoch": 0.8910946196660482, "grad_norm": 9.201220512390137, "learning_rate": 2.9563604321115156e-07, "loss": 0.2515, "step": 19212 }, { "epoch": 0.8911410018552876, "grad_norm": 5.158440113067627, "learning_rate": 2.953867861281656e-07, "loss": 0.3206, "step": 19213 }, { "epoch": 0.8911873840445269, "grad_norm": 7.238873481750488, "learning_rate": 2.9513763096814305e-07, "loss": 0.3807, "step": 19214 }, { "epoch": 0.8912337662337663, "grad_norm": 9.356178283691406, "learning_rate": 2.948885777364824e-07, "loss": 0.3568, "step": 19215 }, { "epoch": 0.8912801484230055, "grad_norm": 8.7269868850708, "learning_rate": 2.946396264385792e-07, "loss": 0.3595, "step": 19216 }, { "epoch": 0.8913265306122449, "grad_norm": 5.993348598480225, "learning_rate": 2.9439077707982655e-07, "loss": 0.2329, "step": 19217 }, { "epoch": 0.8913729128014842, "grad_norm": 6.5138020515441895, "learning_rate": 2.9414202966561666e-07, "loss": 0.3056, "step": 19218 }, { "epoch": 0.8914192949907236, "grad_norm": 7.916711807250977, "learning_rate": 2.938933842013364e-07, "loss": 0.2622, "step": 19219 }, { "epoch": 0.891465677179963, "grad_norm": 8.218877792358398, "learning_rate": 2.936448406923742e-07, "loss": 0.2896, "step": 19220 }, { "epoch": 0.8915120593692022, "grad_norm": 19.70843505859375, "learning_rate": 2.933963991441141e-07, "loss": 0.3787, "step": 19221 }, { "epoch": 0.8915584415584416, "grad_norm": 8.09133529663086, "learning_rate": 2.931480595619385e-07, "loss": 0.2731, "step": 19222 }, { "epoch": 0.8916048237476809, "grad_norm": 6.455764293670654, "learning_rate": 2.9289982195122803e-07, "loss": 0.3904, "step": 19223 }, { "epoch": 0.8916512059369203, "grad_norm": 7.521719932556152, "learning_rate": 2.9265168631736005e-07, "loss": 0.3802, "step": 19224 }, { "epoch": 0.8916975881261595, "grad_norm": 10.510323524475098, "learning_rate": 2.924036526657098e-07, "loss": 0.3558, "step": 19225 }, { "epoch": 0.8917439703153989, "grad_norm": 8.690117835998535, "learning_rate": 2.921557210016518e-07, "loss": 0.3209, "step": 19226 }, { "epoch": 0.8917903525046382, "grad_norm": 4.497485637664795, "learning_rate": 2.919078913305567e-07, "loss": 0.2305, "step": 19227 }, { "epoch": 0.8918367346938776, "grad_norm": 9.162489891052246, "learning_rate": 2.9166016365779426e-07, "loss": 0.306, "step": 19228 }, { "epoch": 0.8918831168831168, "grad_norm": 7.872913837432861, "learning_rate": 2.914125379887317e-07, "loss": 0.3033, "step": 19229 }, { "epoch": 0.8919294990723562, "grad_norm": 10.83742618560791, "learning_rate": 2.91165014328732e-07, "loss": 0.3921, "step": 19230 }, { "epoch": 0.8919758812615955, "grad_norm": 11.424564361572266, "learning_rate": 2.9091759268315866e-07, "loss": 0.3399, "step": 19231 }, { "epoch": 0.8920222634508349, "grad_norm": 6.59008264541626, "learning_rate": 2.9067027305737185e-07, "loss": 0.2292, "step": 19232 }, { "epoch": 0.8920686456400743, "grad_norm": 8.740699768066406, "learning_rate": 2.9042305545673e-07, "loss": 0.3682, "step": 19233 }, { "epoch": 0.8921150278293135, "grad_norm": 5.18479061126709, "learning_rate": 2.9017593988658886e-07, "loss": 0.2506, "step": 19234 }, { "epoch": 0.8921614100185529, "grad_norm": 10.729759216308594, "learning_rate": 2.899289263523014e-07, "loss": 0.2933, "step": 19235 }, { "epoch": 0.8922077922077922, "grad_norm": 6.3660569190979, "learning_rate": 2.896820148592194e-07, "loss": 0.3782, "step": 19236 }, { "epoch": 0.8922541743970316, "grad_norm": 8.700620651245117, "learning_rate": 2.89435205412692e-07, "loss": 0.3671, "step": 19237 }, { "epoch": 0.8923005565862708, "grad_norm": 8.828102111816406, "learning_rate": 2.891884980180665e-07, "loss": 0.4023, "step": 19238 }, { "epoch": 0.8923469387755102, "grad_norm": 5.463171005249023, "learning_rate": 2.88941892680687e-07, "loss": 0.3764, "step": 19239 }, { "epoch": 0.8923933209647495, "grad_norm": 7.887010097503662, "learning_rate": 2.88695389405898e-07, "loss": 0.4151, "step": 19240 }, { "epoch": 0.8924397031539889, "grad_norm": 18.534643173217773, "learning_rate": 2.884489881990371e-07, "loss": 0.3056, "step": 19241 }, { "epoch": 0.8924860853432282, "grad_norm": 7.744901657104492, "learning_rate": 2.8820268906544437e-07, "loss": 0.2724, "step": 19242 }, { "epoch": 0.8925324675324675, "grad_norm": 6.041736125946045, "learning_rate": 2.8795649201045437e-07, "loss": 0.2521, "step": 19243 }, { "epoch": 0.8925788497217069, "grad_norm": 12.622598648071289, "learning_rate": 2.877103970394024e-07, "loss": 0.4001, "step": 19244 }, { "epoch": 0.8926252319109462, "grad_norm": 6.953920841217041, "learning_rate": 2.8746440415761965e-07, "loss": 0.3517, "step": 19245 }, { "epoch": 0.8926716141001856, "grad_norm": 8.21163272857666, "learning_rate": 2.8721851337043414e-07, "loss": 0.3021, "step": 19246 }, { "epoch": 0.8927179962894248, "grad_norm": 6.250157356262207, "learning_rate": 2.8697272468317386e-07, "loss": 0.2915, "step": 19247 }, { "epoch": 0.8927643784786642, "grad_norm": 5.319054126739502, "learning_rate": 2.867270381011639e-07, "loss": 0.3156, "step": 19248 }, { "epoch": 0.8928107606679035, "grad_norm": 16.129478454589844, "learning_rate": 2.864814536297261e-07, "loss": 0.5467, "step": 19249 }, { "epoch": 0.8928571428571429, "grad_norm": 13.99378490447998, "learning_rate": 2.862359712741819e-07, "loss": 0.3492, "step": 19250 }, { "epoch": 0.8929035250463822, "grad_norm": 7.786013603210449, "learning_rate": 2.8599059103985026e-07, "loss": 0.347, "step": 19251 }, { "epoch": 0.8929499072356215, "grad_norm": 4.848202705383301, "learning_rate": 2.857453129320448e-07, "loss": 0.3207, "step": 19252 }, { "epoch": 0.8929962894248609, "grad_norm": 6.9303202629089355, "learning_rate": 2.855001369560806e-07, "loss": 0.2907, "step": 19253 }, { "epoch": 0.8930426716141002, "grad_norm": 5.044919013977051, "learning_rate": 2.852550631172696e-07, "loss": 0.2713, "step": 19254 }, { "epoch": 0.8930890538033395, "grad_norm": 5.233714580535889, "learning_rate": 2.850100914209203e-07, "loss": 0.3213, "step": 19255 }, { "epoch": 0.8931354359925788, "grad_norm": 10.146220207214355, "learning_rate": 2.8476522187234177e-07, "loss": 0.3335, "step": 19256 }, { "epoch": 0.8931818181818182, "grad_norm": 16.204442977905273, "learning_rate": 2.8452045447683705e-07, "loss": 0.385, "step": 19257 }, { "epoch": 0.8932282003710575, "grad_norm": 5.161683559417725, "learning_rate": 2.8427578923970913e-07, "loss": 0.3094, "step": 19258 }, { "epoch": 0.8932745825602968, "grad_norm": 6.630278587341309, "learning_rate": 2.840312261662592e-07, "loss": 0.345, "step": 19259 }, { "epoch": 0.8933209647495362, "grad_norm": 8.518704414367676, "learning_rate": 2.8378676526178484e-07, "loss": 0.3637, "step": 19260 }, { "epoch": 0.8933673469387755, "grad_norm": 5.170598030090332, "learning_rate": 2.835424065315834e-07, "loss": 0.286, "step": 19261 }, { "epoch": 0.8934137291280149, "grad_norm": 6.006187915802002, "learning_rate": 2.832981499809484e-07, "loss": 0.313, "step": 19262 }, { "epoch": 0.8934601113172542, "grad_norm": 6.766152858734131, "learning_rate": 2.8305399561517067e-07, "loss": 0.3449, "step": 19263 }, { "epoch": 0.8935064935064935, "grad_norm": 4.216551780700684, "learning_rate": 2.8280994343953983e-07, "loss": 0.2916, "step": 19264 }, { "epoch": 0.8935528756957328, "grad_norm": 17.05866241455078, "learning_rate": 2.825659934593433e-07, "loss": 0.373, "step": 19265 }, { "epoch": 0.8935992578849722, "grad_norm": 5.276201248168945, "learning_rate": 2.823221456798669e-07, "loss": 0.2809, "step": 19266 }, { "epoch": 0.8936456400742115, "grad_norm": 4.750185489654541, "learning_rate": 2.8207840010639365e-07, "loss": 0.1833, "step": 19267 }, { "epoch": 0.8936920222634508, "grad_norm": 8.37842845916748, "learning_rate": 2.818347567442026e-07, "loss": 0.3137, "step": 19268 }, { "epoch": 0.8937384044526901, "grad_norm": 4.543163299560547, "learning_rate": 2.815912155985728e-07, "loss": 0.2728, "step": 19269 }, { "epoch": 0.8937847866419295, "grad_norm": 4.797008037567139, "learning_rate": 2.8134777667478074e-07, "loss": 0.1952, "step": 19270 }, { "epoch": 0.8938311688311689, "grad_norm": 10.744144439697266, "learning_rate": 2.8110443997809987e-07, "loss": 0.3475, "step": 19271 }, { "epoch": 0.8938775510204081, "grad_norm": 11.327482223510742, "learning_rate": 2.808612055138038e-07, "loss": 0.2924, "step": 19272 }, { "epoch": 0.8939239332096475, "grad_norm": 10.226133346557617, "learning_rate": 2.806180732871594e-07, "loss": 0.3216, "step": 19273 }, { "epoch": 0.8939703153988868, "grad_norm": 5.766125202178955, "learning_rate": 2.8037504330343525e-07, "loss": 0.3697, "step": 19274 }, { "epoch": 0.8940166975881262, "grad_norm": 5.613391399383545, "learning_rate": 2.801321155678965e-07, "loss": 0.3592, "step": 19275 }, { "epoch": 0.8940630797773655, "grad_norm": 4.872082710266113, "learning_rate": 2.798892900858058e-07, "loss": 0.2929, "step": 19276 }, { "epoch": 0.8941094619666048, "grad_norm": 9.217377662658691, "learning_rate": 2.796465668624243e-07, "loss": 0.2743, "step": 19277 }, { "epoch": 0.8941558441558441, "grad_norm": 7.523751258850098, "learning_rate": 2.7940394590301123e-07, "loss": 0.2796, "step": 19278 }, { "epoch": 0.8942022263450835, "grad_norm": 6.493042945861816, "learning_rate": 2.791614272128207e-07, "loss": 0.2536, "step": 19279 }, { "epoch": 0.8942486085343229, "grad_norm": 6.495639324188232, "learning_rate": 2.7891901079710795e-07, "loss": 0.3136, "step": 19280 }, { "epoch": 0.8942949907235621, "grad_norm": 8.90538501739502, "learning_rate": 2.7867669666112487e-07, "loss": 0.3554, "step": 19281 }, { "epoch": 0.8943413729128015, "grad_norm": 10.523075103759766, "learning_rate": 2.784344848101206e-07, "loss": 0.3718, "step": 19282 }, { "epoch": 0.8943877551020408, "grad_norm": 5.919215202331543, "learning_rate": 2.7819237524934375e-07, "loss": 0.3529, "step": 19283 }, { "epoch": 0.8944341372912802, "grad_norm": 20.630849838256836, "learning_rate": 2.7795036798403786e-07, "loss": 0.3856, "step": 19284 }, { "epoch": 0.8944805194805194, "grad_norm": 12.165095329284668, "learning_rate": 2.7770846301944655e-07, "loss": 0.439, "step": 19285 }, { "epoch": 0.8945269016697588, "grad_norm": 7.350240230560303, "learning_rate": 2.774666603608106e-07, "loss": 0.2462, "step": 19286 }, { "epoch": 0.8945732838589981, "grad_norm": 7.405416488647461, "learning_rate": 2.7722496001336865e-07, "loss": 0.2103, "step": 19287 }, { "epoch": 0.8946196660482375, "grad_norm": 10.207433700561523, "learning_rate": 2.76983361982357e-07, "loss": 0.2721, "step": 19288 }, { "epoch": 0.8946660482374769, "grad_norm": 9.273443222045898, "learning_rate": 2.767418662730104e-07, "loss": 0.2657, "step": 19289 }, { "epoch": 0.8947124304267161, "grad_norm": 7.666621208190918, "learning_rate": 2.7650047289055904e-07, "loss": 0.2918, "step": 19290 }, { "epoch": 0.8947588126159555, "grad_norm": 8.894394874572754, "learning_rate": 2.762591818402338e-07, "loss": 0.2644, "step": 19291 }, { "epoch": 0.8948051948051948, "grad_norm": 8.46041488647461, "learning_rate": 2.7601799312726165e-07, "loss": 0.4248, "step": 19292 }, { "epoch": 0.8948515769944342, "grad_norm": 8.73720932006836, "learning_rate": 2.757769067568683e-07, "loss": 0.2973, "step": 19293 }, { "epoch": 0.8948979591836734, "grad_norm": 7.01710319519043, "learning_rate": 2.755359227342774e-07, "loss": 0.3755, "step": 19294 }, { "epoch": 0.8949443413729128, "grad_norm": 4.146442890167236, "learning_rate": 2.752950410647076e-07, "loss": 0.2752, "step": 19295 }, { "epoch": 0.8949907235621521, "grad_norm": 12.227873802185059, "learning_rate": 2.750542617533791e-07, "loss": 0.3523, "step": 19296 }, { "epoch": 0.8950371057513915, "grad_norm": 7.880155563354492, "learning_rate": 2.748135848055078e-07, "loss": 0.2959, "step": 19297 }, { "epoch": 0.8950834879406308, "grad_norm": 5.371973037719727, "learning_rate": 2.745730102263078e-07, "loss": 0.3383, "step": 19298 }, { "epoch": 0.8951298701298701, "grad_norm": 17.46217155456543, "learning_rate": 2.74332538020991e-07, "loss": 0.5866, "step": 19299 }, { "epoch": 0.8951762523191095, "grad_norm": 4.668810844421387, "learning_rate": 2.7409216819476836e-07, "loss": 0.2877, "step": 19300 }, { "epoch": 0.8952226345083488, "grad_norm": 11.141576766967773, "learning_rate": 2.7385190075284504e-07, "loss": 0.3067, "step": 19301 }, { "epoch": 0.8952690166975881, "grad_norm": 8.211102485656738, "learning_rate": 2.736117357004281e-07, "loss": 0.2469, "step": 19302 }, { "epoch": 0.8953153988868274, "grad_norm": 9.548192977905273, "learning_rate": 2.7337167304271936e-07, "loss": 0.3794, "step": 19303 }, { "epoch": 0.8953617810760668, "grad_norm": 11.165276527404785, "learning_rate": 2.731317127849209e-07, "loss": 0.3432, "step": 19304 }, { "epoch": 0.8954081632653061, "grad_norm": 12.988997459411621, "learning_rate": 2.728918549322307e-07, "loss": 0.3633, "step": 19305 }, { "epoch": 0.8954545454545455, "grad_norm": 17.92234230041504, "learning_rate": 2.726520994898452e-07, "loss": 0.3697, "step": 19306 }, { "epoch": 0.8955009276437847, "grad_norm": 3.902374744415283, "learning_rate": 2.7241244646295795e-07, "loss": 0.2562, "step": 19307 }, { "epoch": 0.8955473098330241, "grad_norm": 5.082980155944824, "learning_rate": 2.721728958567621e-07, "loss": 0.3614, "step": 19308 }, { "epoch": 0.8955936920222635, "grad_norm": 5.43841552734375, "learning_rate": 2.7193344767644627e-07, "loss": 0.3678, "step": 19309 }, { "epoch": 0.8956400742115028, "grad_norm": 37.292640686035156, "learning_rate": 2.716941019271996e-07, "loss": 0.509, "step": 19310 }, { "epoch": 0.8956864564007421, "grad_norm": 4.047848701477051, "learning_rate": 2.714548586142052e-07, "loss": 0.287, "step": 19311 }, { "epoch": 0.8957328385899814, "grad_norm": 7.311829090118408, "learning_rate": 2.712157177426478e-07, "loss": 0.3476, "step": 19312 }, { "epoch": 0.8957792207792208, "grad_norm": 19.311092376708984, "learning_rate": 2.709766793177071e-07, "loss": 0.3877, "step": 19313 }, { "epoch": 0.8958256029684601, "grad_norm": 5.776551246643066, "learning_rate": 2.707377433445624e-07, "loss": 0.37, "step": 19314 }, { "epoch": 0.8958719851576994, "grad_norm": 7.075850486755371, "learning_rate": 2.7049890982839113e-07, "loss": 0.2132, "step": 19315 }, { "epoch": 0.8959183673469387, "grad_norm": 7.212954521179199, "learning_rate": 2.702601787743653e-07, "loss": 0.353, "step": 19316 }, { "epoch": 0.8959647495361781, "grad_norm": 7.290435791015625, "learning_rate": 2.700215501876585e-07, "loss": 0.3988, "step": 19317 }, { "epoch": 0.8960111317254175, "grad_norm": 10.236227989196777, "learning_rate": 2.697830240734395e-07, "loss": 0.346, "step": 19318 }, { "epoch": 0.8960575139146568, "grad_norm": 14.089468955993652, "learning_rate": 2.695446004368768e-07, "loss": 0.274, "step": 19319 }, { "epoch": 0.8961038961038961, "grad_norm": 13.907029151916504, "learning_rate": 2.693062792831358e-07, "loss": 0.5087, "step": 19320 }, { "epoch": 0.8961502782931354, "grad_norm": 6.05491828918457, "learning_rate": 2.6906806061737846e-07, "loss": 0.2281, "step": 19321 }, { "epoch": 0.8961966604823748, "grad_norm": 4.410130500793457, "learning_rate": 2.6882994444476616e-07, "loss": 0.2531, "step": 19322 }, { "epoch": 0.8962430426716141, "grad_norm": 9.388191223144531, "learning_rate": 2.6859193077045763e-07, "loss": 0.3345, "step": 19323 }, { "epoch": 0.8962894248608534, "grad_norm": 7.638976097106934, "learning_rate": 2.683540195996098e-07, "loss": 0.2715, "step": 19324 }, { "epoch": 0.8963358070500927, "grad_norm": 7.879029273986816, "learning_rate": 2.681162109373764e-07, "loss": 0.3043, "step": 19325 }, { "epoch": 0.8963821892393321, "grad_norm": 17.618757247924805, "learning_rate": 2.678785047889093e-07, "loss": 0.4135, "step": 19326 }, { "epoch": 0.8964285714285715, "grad_norm": 6.1152167320251465, "learning_rate": 2.6764090115935836e-07, "loss": 0.3026, "step": 19327 }, { "epoch": 0.8964749536178107, "grad_norm": 6.5943498611450195, "learning_rate": 2.674034000538711e-07, "loss": 0.3867, "step": 19328 }, { "epoch": 0.8965213358070501, "grad_norm": 13.343191146850586, "learning_rate": 2.671660014775934e-07, "loss": 0.4432, "step": 19329 }, { "epoch": 0.8965677179962894, "grad_norm": 7.365484237670898, "learning_rate": 2.669287054356684e-07, "loss": 0.3513, "step": 19330 }, { "epoch": 0.8966141001855288, "grad_norm": 5.622284412384033, "learning_rate": 2.6669151193323585e-07, "loss": 0.3509, "step": 19331 }, { "epoch": 0.8966604823747681, "grad_norm": 11.540936470031738, "learning_rate": 2.6645442097543494e-07, "loss": 0.3196, "step": 19332 }, { "epoch": 0.8967068645640074, "grad_norm": 6.360244274139404, "learning_rate": 2.662174325674027e-07, "loss": 0.283, "step": 19333 }, { "epoch": 0.8967532467532467, "grad_norm": 5.735903739929199, "learning_rate": 2.659805467142729e-07, "loss": 0.2504, "step": 19334 }, { "epoch": 0.8967996289424861, "grad_norm": 6.905209064483643, "learning_rate": 2.657437634211779e-07, "loss": 0.3369, "step": 19335 }, { "epoch": 0.8968460111317255, "grad_norm": 5.8902482986450195, "learning_rate": 2.655070826932471e-07, "loss": 0.2984, "step": 19336 }, { "epoch": 0.8968923933209647, "grad_norm": 9.311081886291504, "learning_rate": 2.6527050453560796e-07, "loss": 0.2556, "step": 19337 }, { "epoch": 0.8969387755102041, "grad_norm": 5.585747718811035, "learning_rate": 2.650340289533859e-07, "loss": 0.382, "step": 19338 }, { "epoch": 0.8969851576994434, "grad_norm": 5.5777974128723145, "learning_rate": 2.6479765595170404e-07, "loss": 0.2907, "step": 19339 }, { "epoch": 0.8970315398886828, "grad_norm": 7.564520359039307, "learning_rate": 2.645613855356838e-07, "loss": 0.458, "step": 19340 }, { "epoch": 0.897077922077922, "grad_norm": 4.711554527282715, "learning_rate": 2.6432521771044385e-07, "loss": 0.3057, "step": 19341 }, { "epoch": 0.8971243042671614, "grad_norm": 10.314647674560547, "learning_rate": 2.6408915248109955e-07, "loss": 0.4657, "step": 19342 }, { "epoch": 0.8971706864564007, "grad_norm": 8.31649112701416, "learning_rate": 2.6385318985276577e-07, "loss": 0.3832, "step": 19343 }, { "epoch": 0.8972170686456401, "grad_norm": 5.025340557098389, "learning_rate": 2.63617329830555e-07, "loss": 0.2384, "step": 19344 }, { "epoch": 0.8972634508348795, "grad_norm": 12.162160873413086, "learning_rate": 2.633815724195765e-07, "loss": 0.331, "step": 19345 }, { "epoch": 0.8973098330241187, "grad_norm": 15.082268714904785, "learning_rate": 2.6314591762493846e-07, "loss": 0.3812, "step": 19346 }, { "epoch": 0.8973562152133581, "grad_norm": 5.847423553466797, "learning_rate": 2.6291036545174455e-07, "loss": 0.3465, "step": 19347 }, { "epoch": 0.8974025974025974, "grad_norm": 6.654534339904785, "learning_rate": 2.6267491590509954e-07, "loss": 0.347, "step": 19348 }, { "epoch": 0.8974489795918368, "grad_norm": 8.345223426818848, "learning_rate": 2.6243956899010383e-07, "loss": 0.3487, "step": 19349 }, { "epoch": 0.897495361781076, "grad_norm": 7.6650166511535645, "learning_rate": 2.622043247118561e-07, "loss": 0.2856, "step": 19350 }, { "epoch": 0.8975417439703154, "grad_norm": 14.337325096130371, "learning_rate": 2.619691830754534e-07, "loss": 0.33, "step": 19351 }, { "epoch": 0.8975881261595547, "grad_norm": 14.470521926879883, "learning_rate": 2.617341440859883e-07, "loss": 0.4673, "step": 19352 }, { "epoch": 0.8976345083487941, "grad_norm": 8.070034980773926, "learning_rate": 2.6149920774855397e-07, "loss": 0.3238, "step": 19353 }, { "epoch": 0.8976808905380333, "grad_norm": 11.33721923828125, "learning_rate": 2.6126437406823966e-07, "loss": 0.3252, "step": 19354 }, { "epoch": 0.8977272727272727, "grad_norm": 6.48372745513916, "learning_rate": 2.610296430501341e-07, "loss": 0.3343, "step": 19355 }, { "epoch": 0.8977736549165121, "grad_norm": 10.832240104675293, "learning_rate": 2.6079501469932154e-07, "loss": 0.3653, "step": 19356 }, { "epoch": 0.8978200371057514, "grad_norm": 7.102280616760254, "learning_rate": 2.6056048902088516e-07, "loss": 0.3548, "step": 19357 }, { "epoch": 0.8978664192949907, "grad_norm": 11.233704566955566, "learning_rate": 2.6032606601990583e-07, "loss": 0.3712, "step": 19358 }, { "epoch": 0.89791280148423, "grad_norm": 8.259747505187988, "learning_rate": 2.600917457014629e-07, "loss": 0.2728, "step": 19359 }, { "epoch": 0.8979591836734694, "grad_norm": 3.645591974258423, "learning_rate": 2.598575280706317e-07, "loss": 0.33, "step": 19360 }, { "epoch": 0.8980055658627087, "grad_norm": 13.171045303344727, "learning_rate": 2.5962341313248764e-07, "loss": 0.4253, "step": 19361 }, { "epoch": 0.8980519480519481, "grad_norm": 11.033924102783203, "learning_rate": 2.5938940089210165e-07, "loss": 0.3927, "step": 19362 }, { "epoch": 0.8980983302411873, "grad_norm": 5.021219730377197, "learning_rate": 2.5915549135454353e-07, "loss": 0.2626, "step": 19363 }, { "epoch": 0.8981447124304267, "grad_norm": 6.152206897735596, "learning_rate": 2.5892168452488155e-07, "loss": 0.3976, "step": 19364 }, { "epoch": 0.8981910946196661, "grad_norm": 7.115890979766846, "learning_rate": 2.5868798040818044e-07, "loss": 0.3116, "step": 19365 }, { "epoch": 0.8982374768089054, "grad_norm": 23.60820960998535, "learning_rate": 2.5845437900950397e-07, "loss": 0.2897, "step": 19366 }, { "epoch": 0.8982838589981447, "grad_norm": 10.710888862609863, "learning_rate": 2.5822088033391204e-07, "loss": 0.3413, "step": 19367 }, { "epoch": 0.898330241187384, "grad_norm": 8.874136924743652, "learning_rate": 2.5798748438646326e-07, "loss": 0.4135, "step": 19368 }, { "epoch": 0.8983766233766234, "grad_norm": 6.476028919219971, "learning_rate": 2.5775419117221477e-07, "loss": 0.3363, "step": 19369 }, { "epoch": 0.8984230055658627, "grad_norm": 5.771538734436035, "learning_rate": 2.5752100069622087e-07, "loss": 0.2836, "step": 19370 }, { "epoch": 0.898469387755102, "grad_norm": 5.74282169342041, "learning_rate": 2.5728791296353306e-07, "loss": 0.2697, "step": 19371 }, { "epoch": 0.8985157699443413, "grad_norm": 5.806394100189209, "learning_rate": 2.570549279792012e-07, "loss": 0.304, "step": 19372 }, { "epoch": 0.8985621521335807, "grad_norm": 12.691488265991211, "learning_rate": 2.5682204574827176e-07, "loss": 0.4245, "step": 19373 }, { "epoch": 0.8986085343228201, "grad_norm": 5.444863319396973, "learning_rate": 2.5658926627579184e-07, "loss": 0.3294, "step": 19374 }, { "epoch": 0.8986549165120594, "grad_norm": 4.989833354949951, "learning_rate": 2.5635658956680296e-07, "loss": 0.2302, "step": 19375 }, { "epoch": 0.8987012987012987, "grad_norm": 10.418224334716797, "learning_rate": 2.5612401562634725e-07, "loss": 0.3643, "step": 19376 }, { "epoch": 0.898747680890538, "grad_norm": 6.52988862991333, "learning_rate": 2.558915444594623e-07, "loss": 0.2818, "step": 19377 }, { "epoch": 0.8987940630797774, "grad_norm": 6.4251604080200195, "learning_rate": 2.556591760711846e-07, "loss": 0.3199, "step": 19378 }, { "epoch": 0.8988404452690167, "grad_norm": 5.222272872924805, "learning_rate": 2.5542691046654857e-07, "loss": 0.257, "step": 19379 }, { "epoch": 0.898886827458256, "grad_norm": 7.958920955657959, "learning_rate": 2.551947476505856e-07, "loss": 0.3478, "step": 19380 }, { "epoch": 0.8989332096474953, "grad_norm": 11.017071723937988, "learning_rate": 2.549626876283262e-07, "loss": 0.3734, "step": 19381 }, { "epoch": 0.8989795918367347, "grad_norm": 5.872453212738037, "learning_rate": 2.54730730404798e-07, "loss": 0.3049, "step": 19382 }, { "epoch": 0.8990259740259741, "grad_norm": 8.196659088134766, "learning_rate": 2.544988759850253e-07, "loss": 0.3343, "step": 19383 }, { "epoch": 0.8990723562152133, "grad_norm": 7.997631549835205, "learning_rate": 2.5426712437403134e-07, "loss": 0.3735, "step": 19384 }, { "epoch": 0.8991187384044527, "grad_norm": 4.455254554748535, "learning_rate": 2.540354755768365e-07, "loss": 0.2762, "step": 19385 }, { "epoch": 0.899165120593692, "grad_norm": 4.950516223907471, "learning_rate": 2.5380392959846014e-07, "loss": 0.246, "step": 19386 }, { "epoch": 0.8992115027829314, "grad_norm": 7.152761459350586, "learning_rate": 2.535724864439193e-07, "loss": 0.268, "step": 19387 }, { "epoch": 0.8992578849721707, "grad_norm": 5.855671405792236, "learning_rate": 2.5334114611822615e-07, "loss": 0.3406, "step": 19388 }, { "epoch": 0.89930426716141, "grad_norm": 8.853610038757324, "learning_rate": 2.531099086263933e-07, "loss": 0.3772, "step": 19389 }, { "epoch": 0.8993506493506493, "grad_norm": 5.37954044342041, "learning_rate": 2.528787739734306e-07, "loss": 0.3721, "step": 19390 }, { "epoch": 0.8993970315398887, "grad_norm": 8.347162246704102, "learning_rate": 2.526477421643453e-07, "loss": 0.2151, "step": 19391 }, { "epoch": 0.8994434137291281, "grad_norm": 9.353631973266602, "learning_rate": 2.5241681320414326e-07, "loss": 0.3337, "step": 19392 }, { "epoch": 0.8994897959183673, "grad_norm": 6.2790207862854, "learning_rate": 2.5218598709782716e-07, "loss": 0.3215, "step": 19393 }, { "epoch": 0.8995361781076067, "grad_norm": 6.54559850692749, "learning_rate": 2.5195526385039637e-07, "loss": 0.3558, "step": 19394 }, { "epoch": 0.899582560296846, "grad_norm": 5.304439544677734, "learning_rate": 2.517246434668513e-07, "loss": 0.2983, "step": 19395 }, { "epoch": 0.8996289424860854, "grad_norm": 8.058765411376953, "learning_rate": 2.514941259521869e-07, "loss": 0.1818, "step": 19396 }, { "epoch": 0.8996753246753246, "grad_norm": 6.55950927734375, "learning_rate": 2.512637113113975e-07, "loss": 0.3783, "step": 19397 }, { "epoch": 0.899721706864564, "grad_norm": 4.953808784484863, "learning_rate": 2.5103339954947624e-07, "loss": 0.3012, "step": 19398 }, { "epoch": 0.8997680890538033, "grad_norm": 7.867869853973389, "learning_rate": 2.5080319067141033e-07, "loss": 0.3404, "step": 19399 }, { "epoch": 0.8998144712430427, "grad_norm": 5.455786228179932, "learning_rate": 2.5057308468218913e-07, "loss": 0.3008, "step": 19400 }, { "epoch": 0.899860853432282, "grad_norm": 6.50028133392334, "learning_rate": 2.503430815867963e-07, "loss": 0.333, "step": 19401 }, { "epoch": 0.8999072356215213, "grad_norm": 5.877671718597412, "learning_rate": 2.501131813902158e-07, "loss": 0.2895, "step": 19402 }, { "epoch": 0.8999536178107607, "grad_norm": 18.4116153717041, "learning_rate": 2.4988338409742743e-07, "loss": 0.3143, "step": 19403 }, { "epoch": 0.9, "grad_norm": 10.764175415039062, "learning_rate": 2.4965368971341107e-07, "loss": 0.2692, "step": 19404 }, { "epoch": 0.9, "eval_loss": 0.31949976086616516, "eval_runtime": 38.03, "eval_samples_per_second": 45.832, "eval_steps_per_second": 5.732, "step": 19404 }, { "epoch": 0.9000463821892394, "grad_norm": 8.18177604675293, "learning_rate": 2.494240982431417e-07, "loss": 0.3566, "step": 19405 }, { "epoch": 0.9000927643784786, "grad_norm": 8.949702262878418, "learning_rate": 2.4919460969159315e-07, "loss": 0.3138, "step": 19406 }, { "epoch": 0.900139146567718, "grad_norm": 4.128973960876465, "learning_rate": 2.4896522406373745e-07, "loss": 0.2768, "step": 19407 }, { "epoch": 0.9001855287569573, "grad_norm": 7.239128589630127, "learning_rate": 2.487359413645446e-07, "loss": 0.3336, "step": 19408 }, { "epoch": 0.9002319109461967, "grad_norm": 4.529285430908203, "learning_rate": 2.4850676159898226e-07, "loss": 0.1538, "step": 19409 }, { "epoch": 0.900278293135436, "grad_norm": 5.091893672943115, "learning_rate": 2.482776847720142e-07, "loss": 0.3712, "step": 19410 }, { "epoch": 0.9003246753246753, "grad_norm": 19.87103271484375, "learning_rate": 2.480487108886032e-07, "loss": 0.5354, "step": 19411 }, { "epoch": 0.9003710575139147, "grad_norm": 6.228236675262451, "learning_rate": 2.478198399537113e-07, "loss": 0.3158, "step": 19412 }, { "epoch": 0.900417439703154, "grad_norm": 9.043685913085938, "learning_rate": 2.4759107197229517e-07, "loss": 0.281, "step": 19413 }, { "epoch": 0.9004638218923933, "grad_norm": 7.576638698577881, "learning_rate": 2.4736240694931245e-07, "loss": 0.265, "step": 19414 }, { "epoch": 0.9005102040816326, "grad_norm": 6.431333541870117, "learning_rate": 2.4713384488971704e-07, "loss": 0.3499, "step": 19415 }, { "epoch": 0.900556586270872, "grad_norm": 8.16408634185791, "learning_rate": 2.4690538579845933e-07, "loss": 0.4572, "step": 19416 }, { "epoch": 0.9006029684601113, "grad_norm": 13.925259590148926, "learning_rate": 2.4667702968048933e-07, "loss": 0.3705, "step": 19417 }, { "epoch": 0.9006493506493507, "grad_norm": 9.8298978805542, "learning_rate": 2.4644877654075414e-07, "loss": 0.4312, "step": 19418 }, { "epoch": 0.90069573283859, "grad_norm": 7.013426780700684, "learning_rate": 2.462206263841993e-07, "loss": 0.2813, "step": 19419 }, { "epoch": 0.9007421150278293, "grad_norm": 3.923676013946533, "learning_rate": 2.45992579215768e-07, "loss": 0.2452, "step": 19420 }, { "epoch": 0.9007884972170687, "grad_norm": 5.982400894165039, "learning_rate": 2.4576463504039916e-07, "loss": 0.3067, "step": 19421 }, { "epoch": 0.900834879406308, "grad_norm": 8.427120208740234, "learning_rate": 2.455367938630321e-07, "loss": 0.3387, "step": 19422 }, { "epoch": 0.9008812615955473, "grad_norm": 5.214511394500732, "learning_rate": 2.453090556886023e-07, "loss": 0.288, "step": 19423 }, { "epoch": 0.9009276437847866, "grad_norm": 9.822385787963867, "learning_rate": 2.450814205220442e-07, "loss": 0.3886, "step": 19424 }, { "epoch": 0.900974025974026, "grad_norm": 11.0631685256958, "learning_rate": 2.448538883682888e-07, "loss": 0.3283, "step": 19425 }, { "epoch": 0.9010204081632653, "grad_norm": 8.20822811126709, "learning_rate": 2.446264592322667e-07, "loss": 0.4643, "step": 19426 }, { "epoch": 0.9010667903525046, "grad_norm": 5.467688083648682, "learning_rate": 2.4439913311890386e-07, "loss": 0.1782, "step": 19427 }, { "epoch": 0.901113172541744, "grad_norm": 7.196452617645264, "learning_rate": 2.441719100331252e-07, "loss": 0.2792, "step": 19428 }, { "epoch": 0.9011595547309833, "grad_norm": 10.132771492004395, "learning_rate": 2.439447899798536e-07, "loss": 0.2094, "step": 19429 }, { "epoch": 0.9012059369202227, "grad_norm": 7.875137805938721, "learning_rate": 2.4371777296401e-07, "loss": 0.3049, "step": 19430 }, { "epoch": 0.901252319109462, "grad_norm": 9.063446044921875, "learning_rate": 2.434908589905122e-07, "loss": 0.4343, "step": 19431 }, { "epoch": 0.9012987012987013, "grad_norm": 6.986227512359619, "learning_rate": 2.432640480642756e-07, "loss": 0.2052, "step": 19432 }, { "epoch": 0.9013450834879406, "grad_norm": 6.584331512451172, "learning_rate": 2.430373401902147e-07, "loss": 0.2115, "step": 19433 }, { "epoch": 0.90139146567718, "grad_norm": 4.954171657562256, "learning_rate": 2.4281073537324116e-07, "loss": 0.1906, "step": 19434 }, { "epoch": 0.9014378478664193, "grad_norm": 4.999621391296387, "learning_rate": 2.425842336182632e-07, "loss": 0.3161, "step": 19435 }, { "epoch": 0.9014842300556586, "grad_norm": 7.836327075958252, "learning_rate": 2.423578349301897e-07, "loss": 0.2853, "step": 19436 }, { "epoch": 0.9015306122448979, "grad_norm": 5.052456378936768, "learning_rate": 2.421315393139234e-07, "loss": 0.2671, "step": 19437 }, { "epoch": 0.9015769944341373, "grad_norm": 6.096677780151367, "learning_rate": 2.419053467743676e-07, "loss": 0.1738, "step": 19438 }, { "epoch": 0.9016233766233767, "grad_norm": 5.112178802490234, "learning_rate": 2.416792573164234e-07, "loss": 0.3266, "step": 19439 }, { "epoch": 0.9016697588126159, "grad_norm": 4.656489849090576, "learning_rate": 2.4145327094498795e-07, "loss": 0.2791, "step": 19440 }, { "epoch": 0.9017161410018553, "grad_norm": 10.02713680267334, "learning_rate": 2.4122738766495733e-07, "loss": 0.357, "step": 19441 }, { "epoch": 0.9017625231910946, "grad_norm": 6.557274341583252, "learning_rate": 2.410016074812266e-07, "loss": 0.3949, "step": 19442 }, { "epoch": 0.901808905380334, "grad_norm": 18.150968551635742, "learning_rate": 2.407759303986845e-07, "loss": 0.3978, "step": 19443 }, { "epoch": 0.9018552875695733, "grad_norm": 13.193853378295898, "learning_rate": 2.4055035642222225e-07, "loss": 0.4829, "step": 19444 }, { "epoch": 0.9019016697588126, "grad_norm": 6.931741714477539, "learning_rate": 2.4032488555672586e-07, "loss": 0.2581, "step": 19445 }, { "epoch": 0.9019480519480519, "grad_norm": 7.676023960113525, "learning_rate": 2.400995178070803e-07, "loss": 0.3256, "step": 19446 }, { "epoch": 0.9019944341372913, "grad_norm": 7.4701642990112305, "learning_rate": 2.398742531781695e-07, "loss": 0.3656, "step": 19447 }, { "epoch": 0.9020408163265307, "grad_norm": 9.387630462646484, "learning_rate": 2.396490916748706e-07, "loss": 0.4038, "step": 19448 }, { "epoch": 0.9020871985157699, "grad_norm": 8.7957124710083, "learning_rate": 2.394240333020642e-07, "loss": 0.3077, "step": 19449 }, { "epoch": 0.9021335807050093, "grad_norm": 10.293909072875977, "learning_rate": 2.391990780646247e-07, "loss": 0.3388, "step": 19450 }, { "epoch": 0.9021799628942486, "grad_norm": 7.92743444442749, "learning_rate": 2.3897422596742603e-07, "loss": 0.3267, "step": 19451 }, { "epoch": 0.902226345083488, "grad_norm": 8.499954223632812, "learning_rate": 2.3874947701533977e-07, "loss": 0.4297, "step": 19452 }, { "epoch": 0.9022727272727272, "grad_norm": 16.553558349609375, "learning_rate": 2.3852483121323546e-07, "loss": 0.4816, "step": 19453 }, { "epoch": 0.9023191094619666, "grad_norm": 9.000968933105469, "learning_rate": 2.3830028856597854e-07, "loss": 0.3005, "step": 19454 }, { "epoch": 0.9023654916512059, "grad_norm": 4.850947380065918, "learning_rate": 2.3807584907843406e-07, "loss": 0.2461, "step": 19455 }, { "epoch": 0.9024118738404453, "grad_norm": 9.01282787322998, "learning_rate": 2.3785151275546536e-07, "loss": 0.2942, "step": 19456 }, { "epoch": 0.9024582560296845, "grad_norm": 8.767152786254883, "learning_rate": 2.3762727960193133e-07, "loss": 0.3627, "step": 19457 }, { "epoch": 0.9025046382189239, "grad_norm": 8.306632995605469, "learning_rate": 2.374031496226914e-07, "loss": 0.2825, "step": 19458 }, { "epoch": 0.9025510204081633, "grad_norm": 10.168416023254395, "learning_rate": 2.371791228225989e-07, "loss": 0.3638, "step": 19459 }, { "epoch": 0.9025974025974026, "grad_norm": 9.037729263305664, "learning_rate": 2.3695519920650944e-07, "loss": 0.2997, "step": 19460 }, { "epoch": 0.902643784786642, "grad_norm": 8.112112045288086, "learning_rate": 2.3673137877927243e-07, "loss": 0.4068, "step": 19461 }, { "epoch": 0.9026901669758812, "grad_norm": 5.546782970428467, "learning_rate": 2.3650766154573846e-07, "loss": 0.2554, "step": 19462 }, { "epoch": 0.9027365491651206, "grad_norm": 4.906731605529785, "learning_rate": 2.3628404751075363e-07, "loss": 0.2749, "step": 19463 }, { "epoch": 0.9027829313543599, "grad_norm": 9.572588920593262, "learning_rate": 2.360605366791624e-07, "loss": 0.3027, "step": 19464 }, { "epoch": 0.9028293135435993, "grad_norm": 5.458088397979736, "learning_rate": 2.3583712905580646e-07, "loss": 0.253, "step": 19465 }, { "epoch": 0.9028756957328385, "grad_norm": 7.520199298858643, "learning_rate": 2.356138246455264e-07, "loss": 0.4093, "step": 19466 }, { "epoch": 0.9029220779220779, "grad_norm": 13.401474952697754, "learning_rate": 2.3539062345315998e-07, "loss": 0.4394, "step": 19467 }, { "epoch": 0.9029684601113173, "grad_norm": 5.770413875579834, "learning_rate": 2.3516752548354226e-07, "loss": 0.2288, "step": 19468 }, { "epoch": 0.9030148423005566, "grad_norm": 3.6138176918029785, "learning_rate": 2.3494453074150768e-07, "loss": 0.3047, "step": 19469 }, { "epoch": 0.9030612244897959, "grad_norm": 10.17443561553955, "learning_rate": 2.347216392318863e-07, "loss": 0.3064, "step": 19470 }, { "epoch": 0.9031076066790352, "grad_norm": 6.112210273742676, "learning_rate": 2.3449885095950643e-07, "loss": 0.2986, "step": 19471 }, { "epoch": 0.9031539888682746, "grad_norm": 17.969881057739258, "learning_rate": 2.3427616592919587e-07, "loss": 0.4742, "step": 19472 }, { "epoch": 0.9032003710575139, "grad_norm": 9.916979789733887, "learning_rate": 2.3405358414577862e-07, "loss": 0.258, "step": 19473 }, { "epoch": 0.9032467532467533, "grad_norm": 5.540518283843994, "learning_rate": 2.3383110561407685e-07, "loss": 0.3243, "step": 19474 }, { "epoch": 0.9032931354359925, "grad_norm": 9.647012710571289, "learning_rate": 2.3360873033891062e-07, "loss": 0.3274, "step": 19475 }, { "epoch": 0.9033395176252319, "grad_norm": 14.717347145080566, "learning_rate": 2.3338645832509666e-07, "loss": 0.4636, "step": 19476 }, { "epoch": 0.9033858998144713, "grad_norm": 11.331686019897461, "learning_rate": 2.3316428957745052e-07, "loss": 0.308, "step": 19477 }, { "epoch": 0.9034322820037106, "grad_norm": 9.72457504272461, "learning_rate": 2.3294222410078616e-07, "loss": 0.3948, "step": 19478 }, { "epoch": 0.9034786641929499, "grad_norm": 13.089457511901855, "learning_rate": 2.3272026189991414e-07, "loss": 0.4885, "step": 19479 }, { "epoch": 0.9035250463821892, "grad_norm": 5.390183925628662, "learning_rate": 2.32498402979644e-07, "loss": 0.3036, "step": 19480 }, { "epoch": 0.9035714285714286, "grad_norm": 4.274788856506348, "learning_rate": 2.3227664734478074e-07, "loss": 0.2858, "step": 19481 }, { "epoch": 0.9036178107606679, "grad_norm": 14.053973197937012, "learning_rate": 2.3205499500012886e-07, "loss": 0.5518, "step": 19482 }, { "epoch": 0.9036641929499072, "grad_norm": 9.896403312683105, "learning_rate": 2.3183344595049117e-07, "loss": 0.353, "step": 19483 }, { "epoch": 0.9037105751391465, "grad_norm": 4.7366557121276855, "learning_rate": 2.3161200020066665e-07, "loss": 0.2608, "step": 19484 }, { "epoch": 0.9037569573283859, "grad_norm": 5.6091203689575195, "learning_rate": 2.313906577554531e-07, "loss": 0.2993, "step": 19485 }, { "epoch": 0.9038033395176253, "grad_norm": 8.688021659851074, "learning_rate": 2.3116941861964614e-07, "loss": 0.3903, "step": 19486 }, { "epoch": 0.9038497217068646, "grad_norm": 14.521947860717773, "learning_rate": 2.3094828279803805e-07, "loss": 0.4055, "step": 19487 }, { "epoch": 0.9038961038961039, "grad_norm": 6.572344779968262, "learning_rate": 2.3072725029541998e-07, "loss": 0.269, "step": 19488 }, { "epoch": 0.9039424860853432, "grad_norm": 7.11224889755249, "learning_rate": 2.3050632111658033e-07, "loss": 0.3279, "step": 19489 }, { "epoch": 0.9039888682745826, "grad_norm": 9.88578987121582, "learning_rate": 2.3028549526630583e-07, "loss": 0.3691, "step": 19490 }, { "epoch": 0.9040352504638219, "grad_norm": 7.398303031921387, "learning_rate": 2.30064772749381e-07, "loss": 0.2724, "step": 19491 }, { "epoch": 0.9040816326530612, "grad_norm": 10.793527603149414, "learning_rate": 2.2984415357058644e-07, "loss": 0.479, "step": 19492 }, { "epoch": 0.9041280148423005, "grad_norm": 5.715691566467285, "learning_rate": 2.296236377347022e-07, "loss": 0.391, "step": 19493 }, { "epoch": 0.9041743970315399, "grad_norm": 5.227909564971924, "learning_rate": 2.294032252465056e-07, "loss": 0.298, "step": 19494 }, { "epoch": 0.9042207792207793, "grad_norm": 11.60692310333252, "learning_rate": 2.291829161107717e-07, "loss": 0.3034, "step": 19495 }, { "epoch": 0.9042671614100185, "grad_norm": 6.29870080947876, "learning_rate": 2.2896271033227392e-07, "loss": 0.2607, "step": 19496 }, { "epoch": 0.9043135435992579, "grad_norm": 8.011722564697266, "learning_rate": 2.2874260791578286e-07, "loss": 0.3864, "step": 19497 }, { "epoch": 0.9043599257884972, "grad_norm": 7.015359878540039, "learning_rate": 2.2852260886606635e-07, "loss": 0.3898, "step": 19498 }, { "epoch": 0.9044063079777366, "grad_norm": 11.124917984008789, "learning_rate": 2.2830271318789067e-07, "loss": 0.383, "step": 19499 }, { "epoch": 0.9044526901669759, "grad_norm": 8.460018157958984, "learning_rate": 2.280829208860197e-07, "loss": 0.3571, "step": 19500 }, { "epoch": 0.9044990723562152, "grad_norm": 11.99688720703125, "learning_rate": 2.2786323196521575e-07, "loss": 0.3742, "step": 19501 }, { "epoch": 0.9045454545454545, "grad_norm": 8.491997718811035, "learning_rate": 2.276436464302384e-07, "loss": 0.424, "step": 19502 }, { "epoch": 0.9045918367346939, "grad_norm": 5.593723773956299, "learning_rate": 2.2742416428584323e-07, "loss": 0.2302, "step": 19503 }, { "epoch": 0.9046382189239333, "grad_norm": 9.041467666625977, "learning_rate": 2.2720478553678703e-07, "loss": 0.2995, "step": 19504 }, { "epoch": 0.9046846011131725, "grad_norm": 4.676681041717529, "learning_rate": 2.26985510187821e-07, "loss": 0.3152, "step": 19505 }, { "epoch": 0.9047309833024119, "grad_norm": 23.94297218322754, "learning_rate": 2.2676633824369687e-07, "loss": 0.4116, "step": 19506 }, { "epoch": 0.9047773654916512, "grad_norm": 9.979165077209473, "learning_rate": 2.2654726970916197e-07, "loss": 0.2068, "step": 19507 }, { "epoch": 0.9048237476808906, "grad_norm": 11.79774284362793, "learning_rate": 2.263283045889636e-07, "loss": 0.3127, "step": 19508 }, { "epoch": 0.9048701298701298, "grad_norm": 11.370567321777344, "learning_rate": 2.261094428878441e-07, "loss": 0.4736, "step": 19509 }, { "epoch": 0.9049165120593692, "grad_norm": 9.55230712890625, "learning_rate": 2.2589068461054576e-07, "loss": 0.2591, "step": 19510 }, { "epoch": 0.9049628942486085, "grad_norm": 9.193130493164062, "learning_rate": 2.2567202976180757e-07, "loss": 0.4456, "step": 19511 }, { "epoch": 0.9050092764378479, "grad_norm": 5.2041754722595215, "learning_rate": 2.2545347834636632e-07, "loss": 0.3266, "step": 19512 }, { "epoch": 0.9050556586270871, "grad_norm": 8.941786766052246, "learning_rate": 2.2523503036895767e-07, "loss": 0.3161, "step": 19513 }, { "epoch": 0.9051020408163265, "grad_norm": 9.315385818481445, "learning_rate": 2.2501668583431335e-07, "loss": 0.3551, "step": 19514 }, { "epoch": 0.9051484230055659, "grad_norm": 7.506890773773193, "learning_rate": 2.2479844474716405e-07, "loss": 0.2341, "step": 19515 }, { "epoch": 0.9051948051948052, "grad_norm": 7.767626762390137, "learning_rate": 2.2458030711223765e-07, "loss": 0.355, "step": 19516 }, { "epoch": 0.9052411873840446, "grad_norm": 10.738639831542969, "learning_rate": 2.243622729342604e-07, "loss": 0.3404, "step": 19517 }, { "epoch": 0.9052875695732838, "grad_norm": 8.498839378356934, "learning_rate": 2.241443422179562e-07, "loss": 0.4192, "step": 19518 }, { "epoch": 0.9053339517625232, "grad_norm": 5.949084758758545, "learning_rate": 2.2392651496804473e-07, "loss": 0.3767, "step": 19519 }, { "epoch": 0.9053803339517625, "grad_norm": 11.423807144165039, "learning_rate": 2.237087911892466e-07, "loss": 0.525, "step": 19520 }, { "epoch": 0.9054267161410019, "grad_norm": 6.995018005371094, "learning_rate": 2.2349117088627803e-07, "loss": 0.2645, "step": 19521 }, { "epoch": 0.9054730983302411, "grad_norm": 9.450698852539062, "learning_rate": 2.2327365406385414e-07, "loss": 0.2784, "step": 19522 }, { "epoch": 0.9055194805194805, "grad_norm": 6.048696517944336, "learning_rate": 2.2305624072668675e-07, "loss": 0.3573, "step": 19523 }, { "epoch": 0.9055658627087199, "grad_norm": 6.525853157043457, "learning_rate": 2.228389308794876e-07, "loss": 0.3129, "step": 19524 }, { "epoch": 0.9056122448979592, "grad_norm": 12.647254943847656, "learning_rate": 2.2262172452696185e-07, "loss": 0.4468, "step": 19525 }, { "epoch": 0.9056586270871985, "grad_norm": 7.059370994567871, "learning_rate": 2.2240462167381737e-07, "loss": 0.219, "step": 19526 }, { "epoch": 0.9057050092764378, "grad_norm": 5.636778354644775, "learning_rate": 2.2218762232475654e-07, "loss": 0.283, "step": 19527 }, { "epoch": 0.9057513914656772, "grad_norm": 7.392487525939941, "learning_rate": 2.219707264844806e-07, "loss": 0.3309, "step": 19528 }, { "epoch": 0.9057977736549165, "grad_norm": 5.743340492248535, "learning_rate": 2.2175393415768965e-07, "loss": 0.2711, "step": 19529 }, { "epoch": 0.9058441558441559, "grad_norm": 10.716257095336914, "learning_rate": 2.2153724534907884e-07, "loss": 0.3973, "step": 19530 }, { "epoch": 0.9058905380333951, "grad_norm": 4.175967216491699, "learning_rate": 2.213206600633433e-07, "loss": 0.2922, "step": 19531 }, { "epoch": 0.9059369202226345, "grad_norm": 8.079794883728027, "learning_rate": 2.2110417830517483e-07, "loss": 0.318, "step": 19532 }, { "epoch": 0.9059833024118739, "grad_norm": 7.802352428436279, "learning_rate": 2.2088780007926413e-07, "loss": 0.2604, "step": 19533 }, { "epoch": 0.9060296846011132, "grad_norm": 7.908313751220703, "learning_rate": 2.2067152539029802e-07, "loss": 0.2839, "step": 19534 }, { "epoch": 0.9060760667903525, "grad_norm": 7.391885757446289, "learning_rate": 2.2045535424296382e-07, "loss": 0.2824, "step": 19535 }, { "epoch": 0.9061224489795918, "grad_norm": 9.577048301696777, "learning_rate": 2.2023928664194229e-07, "loss": 0.4589, "step": 19536 }, { "epoch": 0.9061688311688312, "grad_norm": 6.194066524505615, "learning_rate": 2.2002332259191572e-07, "loss": 0.2867, "step": 19537 }, { "epoch": 0.9062152133580705, "grad_norm": 7.629856109619141, "learning_rate": 2.1980746209756264e-07, "loss": 0.4014, "step": 19538 }, { "epoch": 0.9062615955473098, "grad_norm": 6.7570905685424805, "learning_rate": 2.1959170516355932e-07, "loss": 0.3381, "step": 19539 }, { "epoch": 0.9063079777365491, "grad_norm": 14.103559494018555, "learning_rate": 2.1937605179458143e-07, "loss": 0.5381, "step": 19540 }, { "epoch": 0.9063543599257885, "grad_norm": 9.567789077758789, "learning_rate": 2.191605019952986e-07, "loss": 0.3142, "step": 19541 }, { "epoch": 0.9064007421150279, "grad_norm": 5.227818012237549, "learning_rate": 2.189450557703826e-07, "loss": 0.2235, "step": 19542 }, { "epoch": 0.9064471243042672, "grad_norm": 4.713995456695557, "learning_rate": 2.1872971312449975e-07, "loss": 0.3679, "step": 19543 }, { "epoch": 0.9064935064935065, "grad_norm": 12.236163139343262, "learning_rate": 2.1851447406231573e-07, "loss": 0.3235, "step": 19544 }, { "epoch": 0.9065398886827458, "grad_norm": 8.117179870605469, "learning_rate": 2.1829933858849406e-07, "loss": 0.3336, "step": 19545 }, { "epoch": 0.9065862708719852, "grad_norm": 8.768682479858398, "learning_rate": 2.1808430670769543e-07, "loss": 0.337, "step": 19546 }, { "epoch": 0.9066326530612245, "grad_norm": 5.451052188873291, "learning_rate": 2.1786937842457778e-07, "loss": 0.2857, "step": 19547 }, { "epoch": 0.9066790352504638, "grad_norm": 5.39484167098999, "learning_rate": 2.176545537437974e-07, "loss": 0.299, "step": 19548 }, { "epoch": 0.9067254174397031, "grad_norm": 5.947795867919922, "learning_rate": 2.174398326700089e-07, "loss": 0.2363, "step": 19549 }, { "epoch": 0.9067717996289425, "grad_norm": 8.18301773071289, "learning_rate": 2.172252152078641e-07, "loss": 0.3063, "step": 19550 }, { "epoch": 0.9068181818181819, "grad_norm": 7.772075653076172, "learning_rate": 2.1701070136201263e-07, "loss": 0.3484, "step": 19551 }, { "epoch": 0.9068645640074211, "grad_norm": 12.841376304626465, "learning_rate": 2.167962911371013e-07, "loss": 0.4326, "step": 19552 }, { "epoch": 0.9069109461966605, "grad_norm": 6.712935447692871, "learning_rate": 2.1658198453777478e-07, "loss": 0.2102, "step": 19553 }, { "epoch": 0.9069573283858998, "grad_norm": 14.669705390930176, "learning_rate": 2.163677815686771e-07, "loss": 0.3279, "step": 19554 }, { "epoch": 0.9070037105751392, "grad_norm": 8.86694049835205, "learning_rate": 2.1615368223444843e-07, "loss": 0.2805, "step": 19555 }, { "epoch": 0.9070500927643784, "grad_norm": 5.5020318031311035, "learning_rate": 2.1593968653972673e-07, "loss": 0.2353, "step": 19556 }, { "epoch": 0.9070964749536178, "grad_norm": 5.326880931854248, "learning_rate": 2.1572579448914888e-07, "loss": 0.304, "step": 19557 }, { "epoch": 0.9071428571428571, "grad_norm": 7.839375972747803, "learning_rate": 2.1551200608734778e-07, "loss": 0.3032, "step": 19558 }, { "epoch": 0.9071892393320965, "grad_norm": 5.759556770324707, "learning_rate": 2.152983213389559e-07, "loss": 0.2185, "step": 19559 }, { "epoch": 0.9072356215213359, "grad_norm": 4.748689651489258, "learning_rate": 2.1508474024860171e-07, "loss": 0.28, "step": 19560 }, { "epoch": 0.9072820037105751, "grad_norm": 4.739268779754639, "learning_rate": 2.148712628209132e-07, "loss": 0.3675, "step": 19561 }, { "epoch": 0.9073283858998145, "grad_norm": 7.079565048217773, "learning_rate": 2.1465788906051498e-07, "loss": 0.2704, "step": 19562 }, { "epoch": 0.9073747680890538, "grad_norm": 4.8438191413879395, "learning_rate": 2.144446189720295e-07, "loss": 0.2139, "step": 19563 }, { "epoch": 0.9074211502782932, "grad_norm": 5.371798038482666, "learning_rate": 2.1423145256007693e-07, "loss": 0.2812, "step": 19564 }, { "epoch": 0.9074675324675324, "grad_norm": 10.75493049621582, "learning_rate": 2.1401838982927581e-07, "loss": 0.3769, "step": 19565 }, { "epoch": 0.9075139146567718, "grad_norm": 6.844905853271484, "learning_rate": 2.1380543078424186e-07, "loss": 0.4204, "step": 19566 }, { "epoch": 0.9075602968460111, "grad_norm": 6.70628547668457, "learning_rate": 2.1359257542958922e-07, "loss": 0.2974, "step": 19567 }, { "epoch": 0.9076066790352505, "grad_norm": 8.277153968811035, "learning_rate": 2.1337982376992917e-07, "loss": 0.4059, "step": 19568 }, { "epoch": 0.9076530612244897, "grad_norm": 13.252120971679688, "learning_rate": 2.1316717580986969e-07, "loss": 0.3861, "step": 19569 }, { "epoch": 0.9076994434137291, "grad_norm": 7.160347938537598, "learning_rate": 2.1295463155401874e-07, "loss": 0.2981, "step": 19570 }, { "epoch": 0.9077458256029685, "grad_norm": 12.209911346435547, "learning_rate": 2.1274219100698103e-07, "loss": 0.4419, "step": 19571 }, { "epoch": 0.9077922077922078, "grad_norm": 13.50107479095459, "learning_rate": 2.1252985417335835e-07, "loss": 0.3325, "step": 19572 }, { "epoch": 0.9078385899814472, "grad_norm": 5.2043046951293945, "learning_rate": 2.1231762105775212e-07, "loss": 0.2478, "step": 19573 }, { "epoch": 0.9078849721706864, "grad_norm": 7.482542037963867, "learning_rate": 2.121054916647586e-07, "loss": 0.3167, "step": 19574 }, { "epoch": 0.9079313543599258, "grad_norm": 5.779440402984619, "learning_rate": 2.1189346599897464e-07, "loss": 0.3694, "step": 19575 }, { "epoch": 0.9079777365491651, "grad_norm": 8.621774673461914, "learning_rate": 2.1168154406499275e-07, "loss": 0.2769, "step": 19576 }, { "epoch": 0.9080241187384045, "grad_norm": 6.972278594970703, "learning_rate": 2.1146972586740532e-07, "loss": 0.277, "step": 19577 }, { "epoch": 0.9080705009276437, "grad_norm": 5.563629150390625, "learning_rate": 2.1125801141079982e-07, "loss": 0.1714, "step": 19578 }, { "epoch": 0.9081168831168831, "grad_norm": 8.139565467834473, "learning_rate": 2.110464006997648e-07, "loss": 0.344, "step": 19579 }, { "epoch": 0.9081632653061225, "grad_norm": 5.441208362579346, "learning_rate": 2.1083489373888267e-07, "loss": 0.2792, "step": 19580 }, { "epoch": 0.9082096474953618, "grad_norm": 5.704620361328125, "learning_rate": 2.10623490532737e-07, "loss": 0.244, "step": 19581 }, { "epoch": 0.9082560296846011, "grad_norm": 15.772661209106445, "learning_rate": 2.1041219108590692e-07, "loss": 0.4385, "step": 19582 }, { "epoch": 0.9083024118738404, "grad_norm": 4.847968578338623, "learning_rate": 2.1020099540297044e-07, "loss": 0.1724, "step": 19583 }, { "epoch": 0.9083487940630798, "grad_norm": 9.380014419555664, "learning_rate": 2.099899034885039e-07, "loss": 0.3031, "step": 19584 }, { "epoch": 0.9083951762523191, "grad_norm": 11.978280067443848, "learning_rate": 2.0977891534707916e-07, "loss": 0.2729, "step": 19585 }, { "epoch": 0.9084415584415585, "grad_norm": 4.705217361450195, "learning_rate": 2.0956803098326704e-07, "loss": 0.2846, "step": 19586 }, { "epoch": 0.9084879406307977, "grad_norm": 10.97104549407959, "learning_rate": 2.0935725040163724e-07, "loss": 0.3997, "step": 19587 }, { "epoch": 0.9085343228200371, "grad_norm": 6.117278099060059, "learning_rate": 2.0914657360675606e-07, "loss": 0.2853, "step": 19588 }, { "epoch": 0.9085807050092765, "grad_norm": 7.904671669006348, "learning_rate": 2.0893600060318708e-07, "loss": 0.3316, "step": 19589 }, { "epoch": 0.9086270871985158, "grad_norm": 6.616952896118164, "learning_rate": 2.0872553139549335e-07, "loss": 0.281, "step": 19590 }, { "epoch": 0.9086734693877551, "grad_norm": 7.481876850128174, "learning_rate": 2.0851516598823284e-07, "loss": 0.2651, "step": 19591 }, { "epoch": 0.9087198515769944, "grad_norm": 7.064802646636963, "learning_rate": 2.0830490438596418e-07, "loss": 0.3017, "step": 19592 }, { "epoch": 0.9087662337662338, "grad_norm": 8.630308151245117, "learning_rate": 2.0809474659324257e-07, "loss": 0.3919, "step": 19593 }, { "epoch": 0.9088126159554731, "grad_norm": 9.873713493347168, "learning_rate": 2.0788469261462106e-07, "loss": 0.2179, "step": 19594 }, { "epoch": 0.9088589981447124, "grad_norm": 5.658143997192383, "learning_rate": 2.0767474245465047e-07, "loss": 0.2685, "step": 19595 }, { "epoch": 0.9089053803339517, "grad_norm": 6.105859756469727, "learning_rate": 2.0746489611787825e-07, "loss": 0.3215, "step": 19596 }, { "epoch": 0.9089517625231911, "grad_norm": 5.47035551071167, "learning_rate": 2.072551536088513e-07, "loss": 0.2865, "step": 19597 }, { "epoch": 0.9089981447124305, "grad_norm": 6.446140289306641, "learning_rate": 2.0704551493211378e-07, "loss": 0.246, "step": 19598 }, { "epoch": 0.9090445269016698, "grad_norm": 7.179342746734619, "learning_rate": 2.0683598009220707e-07, "loss": 0.3781, "step": 19599 }, { "epoch": 0.9090909090909091, "grad_norm": 7.151734828948975, "learning_rate": 2.066265490936703e-07, "loss": 0.345, "step": 19600 }, { "epoch": 0.9091372912801484, "grad_norm": 7.183186054229736, "learning_rate": 2.0641722194104209e-07, "loss": 0.396, "step": 19601 }, { "epoch": 0.9091836734693878, "grad_norm": 14.099224090576172, "learning_rate": 2.0620799863885544e-07, "loss": 0.561, "step": 19602 }, { "epoch": 0.9092300556586271, "grad_norm": 5.271327018737793, "learning_rate": 2.0599887919164451e-07, "loss": 0.2239, "step": 19603 }, { "epoch": 0.9092764378478664, "grad_norm": 4.294282913208008, "learning_rate": 2.0578986360393904e-07, "loss": 0.2368, "step": 19604 }, { "epoch": 0.9093228200371057, "grad_norm": 4.794729709625244, "learning_rate": 2.055809518802676e-07, "loss": 0.3306, "step": 19605 }, { "epoch": 0.9093692022263451, "grad_norm": 6.606318473815918, "learning_rate": 2.053721440251566e-07, "loss": 0.3419, "step": 19606 }, { "epoch": 0.9094155844155845, "grad_norm": 6.948400974273682, "learning_rate": 2.0516344004312849e-07, "loss": 0.2656, "step": 19607 }, { "epoch": 0.9094619666048237, "grad_norm": 4.762181758880615, "learning_rate": 2.0495483993870578e-07, "loss": 0.3691, "step": 19608 }, { "epoch": 0.9095083487940631, "grad_norm": 7.906543731689453, "learning_rate": 2.0474634371640713e-07, "loss": 0.3057, "step": 19609 }, { "epoch": 0.9095547309833024, "grad_norm": 8.568394660949707, "learning_rate": 2.045379513807494e-07, "loss": 0.3679, "step": 19610 }, { "epoch": 0.9096011131725418, "grad_norm": 10.30636978149414, "learning_rate": 2.0432966293624846e-07, "loss": 0.3061, "step": 19611 }, { "epoch": 0.909647495361781, "grad_norm": 9.013970375061035, "learning_rate": 2.0412147838741513e-07, "loss": 0.3459, "step": 19612 }, { "epoch": 0.9096938775510204, "grad_norm": 5.678798198699951, "learning_rate": 2.0391339773876085e-07, "loss": 0.2927, "step": 19613 }, { "epoch": 0.9097402597402597, "grad_norm": 8.328110694885254, "learning_rate": 2.0370542099479308e-07, "loss": 0.401, "step": 19614 }, { "epoch": 0.9097866419294991, "grad_norm": 11.901650428771973, "learning_rate": 2.034975481600171e-07, "loss": 0.4612, "step": 19615 }, { "epoch": 0.9098330241187385, "grad_norm": 3.5290944576263428, "learning_rate": 2.0328977923893824e-07, "loss": 0.2004, "step": 19616 }, { "epoch": 0.9098794063079777, "grad_norm": 10.184980392456055, "learning_rate": 2.0308211423605507e-07, "loss": 0.3317, "step": 19617 }, { "epoch": 0.9099257884972171, "grad_norm": 5.843733787536621, "learning_rate": 2.028745531558679e-07, "loss": 0.3125, "step": 19618 }, { "epoch": 0.9099721706864564, "grad_norm": 9.577706336975098, "learning_rate": 2.0266709600287316e-07, "loss": 0.3969, "step": 19619 }, { "epoch": 0.9100185528756958, "grad_norm": 6.532840728759766, "learning_rate": 2.0245974278156555e-07, "loss": 0.3436, "step": 19620 }, { "epoch": 0.910064935064935, "grad_norm": 7.410691261291504, "learning_rate": 2.0225249349643817e-07, "loss": 0.3263, "step": 19621 }, { "epoch": 0.9101113172541744, "grad_norm": 4.7095818519592285, "learning_rate": 2.0204534815197907e-07, "loss": 0.3427, "step": 19622 }, { "epoch": 0.9101576994434137, "grad_norm": 9.002484321594238, "learning_rate": 2.018383067526769e-07, "loss": 0.4165, "step": 19623 }, { "epoch": 0.9102040816326531, "grad_norm": 9.48625659942627, "learning_rate": 2.0163136930301696e-07, "loss": 0.313, "step": 19624 }, { "epoch": 0.9102504638218923, "grad_norm": 8.229307174682617, "learning_rate": 2.0142453580748233e-07, "loss": 0.3564, "step": 19625 }, { "epoch": 0.9102968460111317, "grad_norm": 6.878056526184082, "learning_rate": 2.0121780627055498e-07, "loss": 0.2594, "step": 19626 }, { "epoch": 0.9103432282003711, "grad_norm": 6.598771572113037, "learning_rate": 2.0101118069671244e-07, "loss": 0.3175, "step": 19627 }, { "epoch": 0.9103896103896104, "grad_norm": 11.36152458190918, "learning_rate": 2.0080465909043113e-07, "loss": 0.329, "step": 19628 }, { "epoch": 0.9104359925788498, "grad_norm": 5.10872220993042, "learning_rate": 2.0059824145618577e-07, "loss": 0.1831, "step": 19629 }, { "epoch": 0.910482374768089, "grad_norm": 10.082050323486328, "learning_rate": 2.0039192779844784e-07, "loss": 0.3224, "step": 19630 }, { "epoch": 0.9105287569573284, "grad_norm": 7.152275085449219, "learning_rate": 2.001857181216882e-07, "loss": 0.3479, "step": 19631 }, { "epoch": 0.9105751391465677, "grad_norm": 6.30591344833374, "learning_rate": 1.999796124303721e-07, "loss": 0.3214, "step": 19632 }, { "epoch": 0.9106215213358071, "grad_norm": 8.470830917358398, "learning_rate": 1.9977361072896663e-07, "loss": 0.3096, "step": 19633 }, { "epoch": 0.9106679035250463, "grad_norm": 9.98978328704834, "learning_rate": 1.9956771302193368e-07, "loss": 0.3241, "step": 19634 }, { "epoch": 0.9107142857142857, "grad_norm": 5.491885662078857, "learning_rate": 1.9936191931373415e-07, "loss": 0.2965, "step": 19635 }, { "epoch": 0.9107606679035251, "grad_norm": 8.292304992675781, "learning_rate": 1.9915622960882785e-07, "loss": 0.342, "step": 19636 }, { "epoch": 0.9108070500927644, "grad_norm": 12.072685241699219, "learning_rate": 1.989506439116684e-07, "loss": 0.4924, "step": 19637 }, { "epoch": 0.9108534322820037, "grad_norm": 5.796350002288818, "learning_rate": 1.9874516222671115e-07, "loss": 0.3086, "step": 19638 }, { "epoch": 0.910899814471243, "grad_norm": 8.416830062866211, "learning_rate": 1.985397845584075e-07, "loss": 0.3225, "step": 19639 }, { "epoch": 0.9109461966604824, "grad_norm": 4.653858661651611, "learning_rate": 1.9833451091120727e-07, "loss": 0.3139, "step": 19640 }, { "epoch": 0.9109925788497217, "grad_norm": 5.951242446899414, "learning_rate": 1.981293412895574e-07, "loss": 0.3211, "step": 19641 }, { "epoch": 0.9110389610389611, "grad_norm": 6.001837253570557, "learning_rate": 1.9792427569790217e-07, "loss": 0.3437, "step": 19642 }, { "epoch": 0.9110853432282003, "grad_norm": 6.883037090301514, "learning_rate": 1.9771931414068468e-07, "loss": 0.3674, "step": 19643 }, { "epoch": 0.9111317254174397, "grad_norm": 4.374600410461426, "learning_rate": 1.9751445662234525e-07, "loss": 0.3398, "step": 19644 }, { "epoch": 0.911178107606679, "grad_norm": 6.392457008361816, "learning_rate": 1.97309703147322e-07, "loss": 0.2943, "step": 19645 }, { "epoch": 0.9112244897959184, "grad_norm": 10.051965713500977, "learning_rate": 1.971050537200514e-07, "loss": 0.324, "step": 19646 }, { "epoch": 0.9112708719851577, "grad_norm": 7.951496601104736, "learning_rate": 1.9690050834496654e-07, "loss": 0.3597, "step": 19647 }, { "epoch": 0.911317254174397, "grad_norm": 6.17890739440918, "learning_rate": 1.9669606702649835e-07, "loss": 0.276, "step": 19648 }, { "epoch": 0.9113636363636364, "grad_norm": 6.907683849334717, "learning_rate": 1.964917297690766e-07, "loss": 0.3388, "step": 19649 }, { "epoch": 0.9114100185528757, "grad_norm": 5.875672817230225, "learning_rate": 1.9628749657712777e-07, "loss": 0.3266, "step": 19650 }, { "epoch": 0.911456400742115, "grad_norm": 6.253291606903076, "learning_rate": 1.9608336745507716e-07, "loss": 0.2668, "step": 19651 }, { "epoch": 0.9115027829313543, "grad_norm": 5.7673540115356445, "learning_rate": 1.9587934240734574e-07, "loss": 0.3183, "step": 19652 }, { "epoch": 0.9115491651205937, "grad_norm": 4.311264514923096, "learning_rate": 1.9567542143835493e-07, "loss": 0.2491, "step": 19653 }, { "epoch": 0.911595547309833, "grad_norm": 11.817975044250488, "learning_rate": 1.954716045525218e-07, "loss": 0.3903, "step": 19654 }, { "epoch": 0.9116419294990723, "grad_norm": 10.75210952758789, "learning_rate": 1.9526789175426275e-07, "loss": 0.5015, "step": 19655 }, { "epoch": 0.9116883116883117, "grad_norm": 9.380245208740234, "learning_rate": 1.9506428304799095e-07, "loss": 0.3701, "step": 19656 }, { "epoch": 0.911734693877551, "grad_norm": 7.383646011352539, "learning_rate": 1.9486077843811623e-07, "loss": 0.3489, "step": 19657 }, { "epoch": 0.9117810760667904, "grad_norm": 4.970520496368408, "learning_rate": 1.9465737792904838e-07, "loss": 0.3078, "step": 19658 }, { "epoch": 0.9118274582560297, "grad_norm": 10.748023986816406, "learning_rate": 1.9445408152519384e-07, "loss": 0.4251, "step": 19659 }, { "epoch": 0.911873840445269, "grad_norm": 6.261592864990234, "learning_rate": 1.9425088923095746e-07, "loss": 0.3685, "step": 19660 }, { "epoch": 0.9119202226345083, "grad_norm": 3.7288930416107178, "learning_rate": 1.9404780105074073e-07, "loss": 0.2239, "step": 19661 }, { "epoch": 0.9119666048237477, "grad_norm": 7.429727077484131, "learning_rate": 1.938448169889434e-07, "loss": 0.278, "step": 19662 }, { "epoch": 0.912012987012987, "grad_norm": 6.872367858886719, "learning_rate": 1.9364193704996316e-07, "loss": 0.3537, "step": 19663 }, { "epoch": 0.9120593692022263, "grad_norm": 11.939467430114746, "learning_rate": 1.9343916123819473e-07, "loss": 0.4101, "step": 19664 }, { "epoch": 0.9121057513914657, "grad_norm": 7.65001106262207, "learning_rate": 1.9323648955803187e-07, "loss": 0.3971, "step": 19665 }, { "epoch": 0.912152133580705, "grad_norm": 4.904656887054443, "learning_rate": 1.9303392201386605e-07, "loss": 0.1761, "step": 19666 }, { "epoch": 0.9121985157699444, "grad_norm": 4.706215858459473, "learning_rate": 1.9283145861008436e-07, "loss": 0.2527, "step": 19667 }, { "epoch": 0.9122448979591836, "grad_norm": 5.908932209014893, "learning_rate": 1.926290993510732e-07, "loss": 0.2798, "step": 19668 }, { "epoch": 0.912291280148423, "grad_norm": 6.1865763664245605, "learning_rate": 1.9242684424121693e-07, "loss": 0.3323, "step": 19669 }, { "epoch": 0.9123376623376623, "grad_norm": 15.160822868347168, "learning_rate": 1.9222469328489812e-07, "loss": 0.4333, "step": 19670 }, { "epoch": 0.9123840445269017, "grad_norm": 8.882772445678711, "learning_rate": 1.9202264648649495e-07, "loss": 0.344, "step": 19671 }, { "epoch": 0.912430426716141, "grad_norm": 6.412286281585693, "learning_rate": 1.9182070385038555e-07, "loss": 0.2613, "step": 19672 }, { "epoch": 0.9124768089053803, "grad_norm": 9.334012985229492, "learning_rate": 1.916188653809442e-07, "loss": 0.4114, "step": 19673 }, { "epoch": 0.9125231910946197, "grad_norm": 9.107422828674316, "learning_rate": 1.9141713108254413e-07, "loss": 0.2989, "step": 19674 }, { "epoch": 0.912569573283859, "grad_norm": 6.836134433746338, "learning_rate": 1.912155009595551e-07, "loss": 0.2924, "step": 19675 }, { "epoch": 0.9126159554730984, "grad_norm": 10.62530517578125, "learning_rate": 1.9101397501634645e-07, "loss": 0.3345, "step": 19676 }, { "epoch": 0.9126623376623376, "grad_norm": 8.51271915435791, "learning_rate": 1.9081255325728408e-07, "loss": 0.51, "step": 19677 }, { "epoch": 0.912708719851577, "grad_norm": 7.108584880828857, "learning_rate": 1.9061123568673068e-07, "loss": 0.3107, "step": 19678 }, { "epoch": 0.9127551020408163, "grad_norm": 6.386297225952148, "learning_rate": 1.904100223090477e-07, "loss": 0.4128, "step": 19679 }, { "epoch": 0.9128014842300557, "grad_norm": 7.411037445068359, "learning_rate": 1.9020891312859501e-07, "loss": 0.3178, "step": 19680 }, { "epoch": 0.9128478664192949, "grad_norm": 9.877744674682617, "learning_rate": 1.900079081497297e-07, "loss": 0.3826, "step": 19681 }, { "epoch": 0.9128942486085343, "grad_norm": 5.991984844207764, "learning_rate": 1.8980700737680546e-07, "loss": 0.3394, "step": 19682 }, { "epoch": 0.9129406307977737, "grad_norm": 7.291030406951904, "learning_rate": 1.8960621081417607e-07, "loss": 0.3585, "step": 19683 }, { "epoch": 0.912987012987013, "grad_norm": 9.100242614746094, "learning_rate": 1.8940551846619025e-07, "loss": 0.3227, "step": 19684 }, { "epoch": 0.9130333951762524, "grad_norm": 8.301715850830078, "learning_rate": 1.892049303371968e-07, "loss": 0.2795, "step": 19685 }, { "epoch": 0.9130797773654916, "grad_norm": 9.32807445526123, "learning_rate": 1.8900444643154104e-07, "loss": 0.4433, "step": 19686 }, { "epoch": 0.913126159554731, "grad_norm": 4.7322821617126465, "learning_rate": 1.8880406675356623e-07, "loss": 0.2762, "step": 19687 }, { "epoch": 0.9131725417439703, "grad_norm": 9.095877647399902, "learning_rate": 1.886037913076144e-07, "loss": 0.3258, "step": 19688 }, { "epoch": 0.9132189239332097, "grad_norm": 8.898000717163086, "learning_rate": 1.8840362009802325e-07, "loss": 0.4438, "step": 19689 }, { "epoch": 0.9132653061224489, "grad_norm": 5.822548866271973, "learning_rate": 1.8820355312912986e-07, "loss": 0.2783, "step": 19690 }, { "epoch": 0.9133116883116883, "grad_norm": 5.28857946395874, "learning_rate": 1.8800359040526793e-07, "loss": 0.2747, "step": 19691 }, { "epoch": 0.9133580705009277, "grad_norm": 5.404713153839111, "learning_rate": 1.8780373193077074e-07, "loss": 0.3803, "step": 19692 }, { "epoch": 0.913404452690167, "grad_norm": 4.400422096252441, "learning_rate": 1.8760397770996807e-07, "loss": 0.215, "step": 19693 }, { "epoch": 0.9134508348794063, "grad_norm": 10.147335052490234, "learning_rate": 1.8740432774718597e-07, "loss": 0.2797, "step": 19694 }, { "epoch": 0.9134972170686456, "grad_norm": 5.485233783721924, "learning_rate": 1.8720478204675097e-07, "loss": 0.366, "step": 19695 }, { "epoch": 0.913543599257885, "grad_norm": 5.724396228790283, "learning_rate": 1.870053406129857e-07, "loss": 0.3489, "step": 19696 }, { "epoch": 0.9135899814471243, "grad_norm": 5.029901504516602, "learning_rate": 1.8680600345021172e-07, "loss": 0.2969, "step": 19697 }, { "epoch": 0.9136363636363637, "grad_norm": 5.057767868041992, "learning_rate": 1.8660677056274668e-07, "loss": 0.3041, "step": 19698 }, { "epoch": 0.9136827458256029, "grad_norm": 5.176113605499268, "learning_rate": 1.8640764195490768e-07, "loss": 0.2985, "step": 19699 }, { "epoch": 0.9137291280148423, "grad_norm": 7.51904821395874, "learning_rate": 1.862086176310074e-07, "loss": 0.3207, "step": 19700 }, { "epoch": 0.9137755102040817, "grad_norm": 5.896388530731201, "learning_rate": 1.8600969759535846e-07, "loss": 0.3328, "step": 19701 }, { "epoch": 0.913821892393321, "grad_norm": 10.216009140014648, "learning_rate": 1.8581088185227025e-07, "loss": 0.383, "step": 19702 }, { "epoch": 0.9138682745825603, "grad_norm": 7.5452961921691895, "learning_rate": 1.856121704060504e-07, "loss": 0.358, "step": 19703 }, { "epoch": 0.9139146567717996, "grad_norm": 8.289909362792969, "learning_rate": 1.8541356326100436e-07, "loss": 0.2528, "step": 19704 }, { "epoch": 0.913961038961039, "grad_norm": 6.949429512023926, "learning_rate": 1.852150604214331e-07, "loss": 0.31, "step": 19705 }, { "epoch": 0.9140074211502783, "grad_norm": 23.324092864990234, "learning_rate": 1.8501666189163769e-07, "loss": 0.5168, "step": 19706 }, { "epoch": 0.9140538033395176, "grad_norm": 8.235648155212402, "learning_rate": 1.8481836767591688e-07, "loss": 0.3387, "step": 19707 }, { "epoch": 0.9141001855287569, "grad_norm": 8.080947875976562, "learning_rate": 1.8462017777856667e-07, "loss": 0.3075, "step": 19708 }, { "epoch": 0.9141465677179963, "grad_norm": 15.372736930847168, "learning_rate": 1.844220922038803e-07, "loss": 0.4046, "step": 19709 }, { "epoch": 0.9141929499072357, "grad_norm": 9.38913345336914, "learning_rate": 1.842241109561499e-07, "loss": 0.2917, "step": 19710 }, { "epoch": 0.9142393320964749, "grad_norm": 20.186487197875977, "learning_rate": 1.8402623403966368e-07, "loss": 0.4165, "step": 19711 }, { "epoch": 0.9142857142857143, "grad_norm": 8.539326667785645, "learning_rate": 1.8382846145870881e-07, "loss": 0.2092, "step": 19712 }, { "epoch": 0.9143320964749536, "grad_norm": 9.129359245300293, "learning_rate": 1.8363079321757016e-07, "loss": 0.4261, "step": 19713 }, { "epoch": 0.914378478664193, "grad_norm": 4.444452285766602, "learning_rate": 1.8343322932052988e-07, "loss": 0.2124, "step": 19714 }, { "epoch": 0.9144248608534323, "grad_norm": 7.117430210113525, "learning_rate": 1.832357697718684e-07, "loss": 0.3133, "step": 19715 }, { "epoch": 0.9144712430426716, "grad_norm": 16.53392219543457, "learning_rate": 1.8303841457586347e-07, "loss": 0.4432, "step": 19716 }, { "epoch": 0.9145176252319109, "grad_norm": 7.1835103034973145, "learning_rate": 1.8284116373678994e-07, "loss": 0.2238, "step": 19717 }, { "epoch": 0.9145640074211503, "grad_norm": 10.354047775268555, "learning_rate": 1.8264401725892224e-07, "loss": 0.2653, "step": 19718 }, { "epoch": 0.9146103896103897, "grad_norm": 5.889483451843262, "learning_rate": 1.824469751465313e-07, "loss": 0.2705, "step": 19719 }, { "epoch": 0.9146567717996289, "grad_norm": 8.007494926452637, "learning_rate": 1.8225003740388546e-07, "loss": 0.2587, "step": 19720 }, { "epoch": 0.9147031539888683, "grad_norm": 13.83634090423584, "learning_rate": 1.8205320403525184e-07, "loss": 0.4036, "step": 19721 }, { "epoch": 0.9147495361781076, "grad_norm": 6.79443883895874, "learning_rate": 1.8185647504489369e-07, "loss": 0.3461, "step": 19722 }, { "epoch": 0.914795918367347, "grad_norm": 5.90002965927124, "learning_rate": 1.8165985043707367e-07, "loss": 0.3108, "step": 19723 }, { "epoch": 0.9148423005565862, "grad_norm": 5.498404502868652, "learning_rate": 1.814633302160518e-07, "loss": 0.3064, "step": 19724 }, { "epoch": 0.9148886827458256, "grad_norm": 6.668632984161377, "learning_rate": 1.8126691438608567e-07, "loss": 0.338, "step": 19725 }, { "epoch": 0.9149350649350649, "grad_norm": 12.917522430419922, "learning_rate": 1.8107060295143032e-07, "loss": 0.2923, "step": 19726 }, { "epoch": 0.9149814471243043, "grad_norm": 3.653592348098755, "learning_rate": 1.8087439591633839e-07, "loss": 0.2377, "step": 19727 }, { "epoch": 0.9150278293135437, "grad_norm": 8.215950012207031, "learning_rate": 1.8067829328506036e-07, "loss": 0.2884, "step": 19728 }, { "epoch": 0.9150742115027829, "grad_norm": 7.765566349029541, "learning_rate": 1.8048229506184566e-07, "loss": 0.3226, "step": 19729 }, { "epoch": 0.9151205936920223, "grad_norm": 4.418975353240967, "learning_rate": 1.8028640125093976e-07, "loss": 0.3106, "step": 19730 }, { "epoch": 0.9151669758812616, "grad_norm": 6.598872661590576, "learning_rate": 1.8009061185658704e-07, "loss": 0.2521, "step": 19731 }, { "epoch": 0.915213358070501, "grad_norm": 9.185105323791504, "learning_rate": 1.7989492688302913e-07, "loss": 0.4878, "step": 19732 }, { "epoch": 0.9152597402597402, "grad_norm": 3.7641608715057373, "learning_rate": 1.7969934633450537e-07, "loss": 0.1833, "step": 19733 }, { "epoch": 0.9153061224489796, "grad_norm": 6.406940460205078, "learning_rate": 1.795038702152524e-07, "loss": 0.3339, "step": 19734 }, { "epoch": 0.9153525046382189, "grad_norm": 8.066495895385742, "learning_rate": 1.7930849852950572e-07, "loss": 0.282, "step": 19735 }, { "epoch": 0.9153988868274583, "grad_norm": 6.373678207397461, "learning_rate": 1.791132312814975e-07, "loss": 0.3043, "step": 19736 }, { "epoch": 0.9154452690166975, "grad_norm": 11.614466667175293, "learning_rate": 1.7891806847545878e-07, "loss": 0.3535, "step": 19737 }, { "epoch": 0.9154916512059369, "grad_norm": 6.102109909057617, "learning_rate": 1.787230101156173e-07, "loss": 0.2434, "step": 19738 }, { "epoch": 0.9155380333951763, "grad_norm": 5.004364490509033, "learning_rate": 1.7852805620619862e-07, "loss": 0.3194, "step": 19739 }, { "epoch": 0.9155844155844156, "grad_norm": 4.600893497467041, "learning_rate": 1.783332067514265e-07, "loss": 0.2999, "step": 19740 }, { "epoch": 0.915630797773655, "grad_norm": 13.962514877319336, "learning_rate": 1.7813846175552208e-07, "loss": 0.4554, "step": 19741 }, { "epoch": 0.9156771799628942, "grad_norm": 17.93779945373535, "learning_rate": 1.7794382122270472e-07, "loss": 0.4137, "step": 19742 }, { "epoch": 0.9157235621521336, "grad_norm": 7.385587215423584, "learning_rate": 1.777492851571916e-07, "loss": 0.3531, "step": 19743 }, { "epoch": 0.9157699443413729, "grad_norm": 6.688141822814941, "learning_rate": 1.7755485356319657e-07, "loss": 0.2394, "step": 19744 }, { "epoch": 0.9158163265306123, "grad_norm": 5.591355800628662, "learning_rate": 1.7736052644493185e-07, "loss": 0.2063, "step": 19745 }, { "epoch": 0.9158627087198515, "grad_norm": 5.617980003356934, "learning_rate": 1.7716630380660737e-07, "loss": 0.3069, "step": 19746 }, { "epoch": 0.9159090909090909, "grad_norm": 10.36722183227539, "learning_rate": 1.7697218565243146e-07, "loss": 0.5422, "step": 19747 }, { "epoch": 0.9159554730983303, "grad_norm": 6.167073726654053, "learning_rate": 1.7677817198661017e-07, "loss": 0.2577, "step": 19748 }, { "epoch": 0.9160018552875696, "grad_norm": 6.01322078704834, "learning_rate": 1.7658426281334463e-07, "loss": 0.2619, "step": 19749 }, { "epoch": 0.9160482374768089, "grad_norm": 4.36668586730957, "learning_rate": 1.7639045813683698e-07, "loss": 0.2739, "step": 19750 }, { "epoch": 0.9160946196660482, "grad_norm": 7.264522552490234, "learning_rate": 1.761967579612861e-07, "loss": 0.3585, "step": 19751 }, { "epoch": 0.9161410018552876, "grad_norm": 5.087190628051758, "learning_rate": 1.760031622908881e-07, "loss": 0.312, "step": 19752 }, { "epoch": 0.9161873840445269, "grad_norm": 6.467607498168945, "learning_rate": 1.7580967112983737e-07, "loss": 0.3715, "step": 19753 }, { "epoch": 0.9162337662337663, "grad_norm": 4.329234600067139, "learning_rate": 1.7561628448232616e-07, "loss": 0.2429, "step": 19754 }, { "epoch": 0.9162801484230055, "grad_norm": 9.481118202209473, "learning_rate": 1.7542300235254274e-07, "loss": 0.2656, "step": 19755 }, { "epoch": 0.9163265306122449, "grad_norm": 6.371780872344971, "learning_rate": 1.7522982474467543e-07, "loss": 0.2723, "step": 19756 }, { "epoch": 0.9163729128014843, "grad_norm": 19.083518981933594, "learning_rate": 1.750367516629087e-07, "loss": 0.4916, "step": 19757 }, { "epoch": 0.9164192949907236, "grad_norm": 5.152840614318848, "learning_rate": 1.7484378311142634e-07, "loss": 0.3258, "step": 19758 }, { "epoch": 0.9164656771799629, "grad_norm": 6.888979911804199, "learning_rate": 1.74650919094409e-07, "loss": 0.216, "step": 19759 }, { "epoch": 0.9165120593692022, "grad_norm": 4.981368541717529, "learning_rate": 1.7445815961603386e-07, "loss": 0.2839, "step": 19760 }, { "epoch": 0.9165584415584416, "grad_norm": 13.213658332824707, "learning_rate": 1.7426550468047698e-07, "loss": 0.346, "step": 19761 }, { "epoch": 0.9166048237476809, "grad_norm": 9.235508918762207, "learning_rate": 1.740729542919134e-07, "loss": 0.4112, "step": 19762 }, { "epoch": 0.9166512059369202, "grad_norm": 6.421535968780518, "learning_rate": 1.738805084545131e-07, "loss": 0.286, "step": 19763 }, { "epoch": 0.9166975881261595, "grad_norm": 15.017226219177246, "learning_rate": 1.7368816717244663e-07, "loss": 0.4303, "step": 19764 }, { "epoch": 0.9167439703153989, "grad_norm": 9.932437896728516, "learning_rate": 1.7349593044988066e-07, "loss": 0.4119, "step": 19765 }, { "epoch": 0.9167903525046383, "grad_norm": 5.090264797210693, "learning_rate": 1.733037982909791e-07, "loss": 0.2873, "step": 19766 }, { "epoch": 0.9168367346938775, "grad_norm": 7.441737651824951, "learning_rate": 1.7311177069990526e-07, "loss": 0.2131, "step": 19767 }, { "epoch": 0.9168831168831169, "grad_norm": 8.767280578613281, "learning_rate": 1.729198476808186e-07, "loss": 0.3525, "step": 19768 }, { "epoch": 0.9169294990723562, "grad_norm": 3.7546191215515137, "learning_rate": 1.7272802923787746e-07, "loss": 0.1418, "step": 19769 }, { "epoch": 0.9169758812615956, "grad_norm": 6.517380714416504, "learning_rate": 1.7253631537523796e-07, "loss": 0.263, "step": 19770 }, { "epoch": 0.9170222634508349, "grad_norm": 11.313047409057617, "learning_rate": 1.723447060970529e-07, "loss": 0.3393, "step": 19771 }, { "epoch": 0.9170686456400742, "grad_norm": 5.168606758117676, "learning_rate": 1.7215320140747283e-07, "loss": 0.3292, "step": 19772 }, { "epoch": 0.9171150278293135, "grad_norm": 4.864143371582031, "learning_rate": 1.7196180131064778e-07, "loss": 0.2668, "step": 19773 }, { "epoch": 0.9171614100185529, "grad_norm": 6.069028377532959, "learning_rate": 1.717705058107233e-07, "loss": 0.3348, "step": 19774 }, { "epoch": 0.9172077922077922, "grad_norm": 6.7388811111450195, "learning_rate": 1.7157931491184498e-07, "loss": 0.3407, "step": 19775 }, { "epoch": 0.9172541743970315, "grad_norm": 16.9514102935791, "learning_rate": 1.713882286181534e-07, "loss": 0.4956, "step": 19776 }, { "epoch": 0.9173005565862709, "grad_norm": 3.5006887912750244, "learning_rate": 1.7119724693378915e-07, "loss": 0.2825, "step": 19777 }, { "epoch": 0.9173469387755102, "grad_norm": 6.280893802642822, "learning_rate": 1.7100636986288944e-07, "loss": 0.3212, "step": 19778 }, { "epoch": 0.9173933209647496, "grad_norm": 14.149473190307617, "learning_rate": 1.7081559740958986e-07, "loss": 0.5064, "step": 19779 }, { "epoch": 0.9174397031539888, "grad_norm": 9.464948654174805, "learning_rate": 1.7062492957802324e-07, "loss": 0.2385, "step": 19780 }, { "epoch": 0.9174860853432282, "grad_norm": 5.795586109161377, "learning_rate": 1.7043436637232125e-07, "loss": 0.2737, "step": 19781 }, { "epoch": 0.9175324675324675, "grad_norm": 5.974964141845703, "learning_rate": 1.7024390779661003e-07, "loss": 0.2269, "step": 19782 }, { "epoch": 0.9175788497217069, "grad_norm": 5.719669342041016, "learning_rate": 1.7005355385501744e-07, "loss": 0.2652, "step": 19783 }, { "epoch": 0.9176252319109462, "grad_norm": 5.899400234222412, "learning_rate": 1.6986330455166733e-07, "loss": 0.2796, "step": 19784 }, { "epoch": 0.9176716141001855, "grad_norm": 15.477330207824707, "learning_rate": 1.696731598906809e-07, "loss": 0.427, "step": 19785 }, { "epoch": 0.9177179962894249, "grad_norm": 7.876262664794922, "learning_rate": 1.6948311987617815e-07, "loss": 0.2914, "step": 19786 }, { "epoch": 0.9177643784786642, "grad_norm": 7.773625373840332, "learning_rate": 1.6929318451227527e-07, "loss": 0.3112, "step": 19787 }, { "epoch": 0.9178107606679036, "grad_norm": 6.3869452476501465, "learning_rate": 1.6910335380308783e-07, "loss": 0.2736, "step": 19788 }, { "epoch": 0.9178571428571428, "grad_norm": 6.806680202484131, "learning_rate": 1.6891362775272813e-07, "loss": 0.3244, "step": 19789 }, { "epoch": 0.9179035250463822, "grad_norm": 7.996670246124268, "learning_rate": 1.6872400636530616e-07, "loss": 0.3409, "step": 19790 }, { "epoch": 0.9179499072356215, "grad_norm": 5.906307697296143, "learning_rate": 1.685344896449309e-07, "loss": 0.3416, "step": 19791 }, { "epoch": 0.9179962894248609, "grad_norm": 13.87003231048584, "learning_rate": 1.6834507759570795e-07, "loss": 0.4948, "step": 19792 }, { "epoch": 0.9180426716141001, "grad_norm": 8.355334281921387, "learning_rate": 1.6815577022174012e-07, "loss": 0.3078, "step": 19793 }, { "epoch": 0.9180890538033395, "grad_norm": 5.381136417388916, "learning_rate": 1.679665675271286e-07, "loss": 0.3144, "step": 19794 }, { "epoch": 0.9181354359925789, "grad_norm": 9.197346687316895, "learning_rate": 1.6777746951597283e-07, "loss": 0.3397, "step": 19795 }, { "epoch": 0.9181818181818182, "grad_norm": 6.352916240692139, "learning_rate": 1.675884761923696e-07, "loss": 0.2398, "step": 19796 }, { "epoch": 0.9182282003710576, "grad_norm": 7.233625888824463, "learning_rate": 1.6739958756041342e-07, "loss": 0.4098, "step": 19797 }, { "epoch": 0.9182745825602968, "grad_norm": 9.466470718383789, "learning_rate": 1.6721080362419594e-07, "loss": 0.299, "step": 19798 }, { "epoch": 0.9183209647495362, "grad_norm": 7.242284774780273, "learning_rate": 1.6702212438780784e-07, "loss": 0.3661, "step": 19799 }, { "epoch": 0.9183673469387755, "grad_norm": 7.847938537597656, "learning_rate": 1.6683354985533583e-07, "loss": 0.3844, "step": 19800 }, { "epoch": 0.9184137291280149, "grad_norm": 4.818304538726807, "learning_rate": 1.6664508003086556e-07, "loss": 0.211, "step": 19801 }, { "epoch": 0.9184601113172541, "grad_norm": 7.647064685821533, "learning_rate": 1.6645671491848038e-07, "loss": 0.3663, "step": 19802 }, { "epoch": 0.9185064935064935, "grad_norm": 25.457626342773438, "learning_rate": 1.6626845452226148e-07, "loss": 0.442, "step": 19803 }, { "epoch": 0.9185528756957329, "grad_norm": 7.2085442543029785, "learning_rate": 1.6608029884628675e-07, "loss": 0.2433, "step": 19804 }, { "epoch": 0.9185992578849722, "grad_norm": 7.244084358215332, "learning_rate": 1.6589224789463287e-07, "loss": 0.3584, "step": 19805 }, { "epoch": 0.9186456400742115, "grad_norm": 6.704558849334717, "learning_rate": 1.6570430167137332e-07, "loss": 0.2699, "step": 19806 }, { "epoch": 0.9186920222634508, "grad_norm": 9.541385650634766, "learning_rate": 1.6551646018058032e-07, "loss": 0.3965, "step": 19807 }, { "epoch": 0.9187384044526902, "grad_norm": 4.3881354331970215, "learning_rate": 1.65328723426324e-07, "loss": 0.2829, "step": 19808 }, { "epoch": 0.9187847866419295, "grad_norm": 7.076195240020752, "learning_rate": 1.6514109141267053e-07, "loss": 0.4073, "step": 19809 }, { "epoch": 0.9188311688311688, "grad_norm": 5.769280910491943, "learning_rate": 1.6495356414368446e-07, "loss": 0.3887, "step": 19810 }, { "epoch": 0.9188775510204081, "grad_norm": 4.6527228355407715, "learning_rate": 1.6476614162342974e-07, "loss": 0.3075, "step": 19811 }, { "epoch": 0.9189239332096475, "grad_norm": 8.08266544342041, "learning_rate": 1.6457882385596647e-07, "loss": 0.2986, "step": 19812 }, { "epoch": 0.9189703153988869, "grad_norm": 6.240694522857666, "learning_rate": 1.6439161084535193e-07, "loss": 0.2699, "step": 19813 }, { "epoch": 0.9190166975881262, "grad_norm": 5.730281829833984, "learning_rate": 1.6420450259564347e-07, "loss": 0.3569, "step": 19814 }, { "epoch": 0.9190630797773655, "grad_norm": 16.24205780029297, "learning_rate": 1.6401749911089338e-07, "loss": 0.355, "step": 19815 }, { "epoch": 0.9191094619666048, "grad_norm": 6.997021675109863, "learning_rate": 1.6383060039515343e-07, "loss": 0.3068, "step": 19816 }, { "epoch": 0.9191558441558442, "grad_norm": 9.549182891845703, "learning_rate": 1.6364380645247258e-07, "loss": 0.2772, "step": 19817 }, { "epoch": 0.9192022263450835, "grad_norm": 6.213588237762451, "learning_rate": 1.634571172868976e-07, "loss": 0.321, "step": 19818 }, { "epoch": 0.9192486085343228, "grad_norm": 21.991792678833008, "learning_rate": 1.632705329024742e-07, "loss": 0.6025, "step": 19819 }, { "epoch": 0.9192949907235621, "grad_norm": 10.277779579162598, "learning_rate": 1.6308405330324294e-07, "loss": 0.3544, "step": 19820 }, { "epoch": 0.9193413729128015, "grad_norm": 6.027011871337891, "learning_rate": 1.6289767849324402e-07, "loss": 0.2882, "step": 19821 }, { "epoch": 0.9193877551020408, "grad_norm": 9.126688003540039, "learning_rate": 1.6271140847651578e-07, "loss": 0.2887, "step": 19822 }, { "epoch": 0.9194341372912801, "grad_norm": 7.719520092010498, "learning_rate": 1.625252432570934e-07, "loss": 0.3481, "step": 19823 }, { "epoch": 0.9194805194805195, "grad_norm": 6.581862926483154, "learning_rate": 1.6233918283901028e-07, "loss": 0.3409, "step": 19824 }, { "epoch": 0.9195269016697588, "grad_norm": 8.151820182800293, "learning_rate": 1.6215322722629823e-07, "loss": 0.311, "step": 19825 }, { "epoch": 0.9195732838589982, "grad_norm": 6.96189546585083, "learning_rate": 1.6196737642298342e-07, "loss": 0.3022, "step": 19826 }, { "epoch": 0.9196196660482375, "grad_norm": 7.035042762756348, "learning_rate": 1.6178163043309382e-07, "loss": 0.3252, "step": 19827 }, { "epoch": 0.9196660482374768, "grad_norm": 10.765877723693848, "learning_rate": 1.615959892606539e-07, "loss": 0.3502, "step": 19828 }, { "epoch": 0.9197124304267161, "grad_norm": 8.244159698486328, "learning_rate": 1.614104529096844e-07, "loss": 0.4174, "step": 19829 }, { "epoch": 0.9197588126159555, "grad_norm": 7.906473159790039, "learning_rate": 1.6122502138420537e-07, "loss": 0.2846, "step": 19830 }, { "epoch": 0.9198051948051948, "grad_norm": 11.110353469848633, "learning_rate": 1.6103969468823422e-07, "loss": 0.4185, "step": 19831 }, { "epoch": 0.9198515769944341, "grad_norm": 10.044920921325684, "learning_rate": 1.6085447282578548e-07, "loss": 0.4704, "step": 19832 }, { "epoch": 0.9198979591836735, "grad_norm": 9.298633575439453, "learning_rate": 1.6066935580087263e-07, "loss": 0.3244, "step": 19833 }, { "epoch": 0.9199443413729128, "grad_norm": 7.0438737869262695, "learning_rate": 1.604843436175052e-07, "loss": 0.2232, "step": 19834 }, { "epoch": 0.9199907235621522, "grad_norm": 6.97683048248291, "learning_rate": 1.6029943627969226e-07, "loss": 0.3052, "step": 19835 }, { "epoch": 0.9200371057513914, "grad_norm": 8.154138565063477, "learning_rate": 1.6011463379144e-07, "loss": 0.3559, "step": 19836 }, { "epoch": 0.9200834879406308, "grad_norm": 9.422412872314453, "learning_rate": 1.5992993615675023e-07, "loss": 0.3693, "step": 19837 }, { "epoch": 0.9201298701298701, "grad_norm": 11.484457015991211, "learning_rate": 1.5974534337962588e-07, "loss": 0.4695, "step": 19838 }, { "epoch": 0.9201762523191095, "grad_norm": 4.566390514373779, "learning_rate": 1.5956085546406596e-07, "loss": 0.2679, "step": 19839 }, { "epoch": 0.9202226345083488, "grad_norm": 12.310159683227539, "learning_rate": 1.5937647241406674e-07, "loss": 0.5203, "step": 19840 }, { "epoch": 0.9202690166975881, "grad_norm": 3.9023189544677734, "learning_rate": 1.5919219423362387e-07, "loss": 0.2457, "step": 19841 }, { "epoch": 0.9203153988868275, "grad_norm": 10.713443756103516, "learning_rate": 1.5900802092672806e-07, "loss": 0.2779, "step": 19842 }, { "epoch": 0.9203617810760668, "grad_norm": 7.758133411407471, "learning_rate": 1.5882395249737005e-07, "loss": 0.2606, "step": 19843 }, { "epoch": 0.9204081632653062, "grad_norm": 9.505488395690918, "learning_rate": 1.5863998894953826e-07, "loss": 0.3795, "step": 19844 }, { "epoch": 0.9204545454545454, "grad_norm": 4.2258477210998535, "learning_rate": 1.5845613028721674e-07, "loss": 0.2329, "step": 19845 }, { "epoch": 0.9205009276437848, "grad_norm": 5.7399773597717285, "learning_rate": 1.582723765143901e-07, "loss": 0.2066, "step": 19846 }, { "epoch": 0.9205473098330241, "grad_norm": 8.17392349243164, "learning_rate": 1.5808872763503902e-07, "loss": 0.3435, "step": 19847 }, { "epoch": 0.9205936920222635, "grad_norm": 4.883145809173584, "learning_rate": 1.579051836531409e-07, "loss": 0.3357, "step": 19848 }, { "epoch": 0.9206400742115027, "grad_norm": 7.244090557098389, "learning_rate": 1.5772174457267364e-07, "loss": 0.3847, "step": 19849 }, { "epoch": 0.9206864564007421, "grad_norm": 8.159497261047363, "learning_rate": 1.575384103976102e-07, "loss": 0.3651, "step": 19850 }, { "epoch": 0.9207328385899815, "grad_norm": 7.6764092445373535, "learning_rate": 1.573551811319235e-07, "loss": 0.3563, "step": 19851 }, { "epoch": 0.9207792207792208, "grad_norm": 5.991933345794678, "learning_rate": 1.5717205677958315e-07, "loss": 0.3108, "step": 19852 }, { "epoch": 0.9208256029684602, "grad_norm": 5.260477066040039, "learning_rate": 1.5698903734455485e-07, "loss": 0.2877, "step": 19853 }, { "epoch": 0.9208719851576994, "grad_norm": 5.57321834564209, "learning_rate": 1.5680612283080488e-07, "loss": 0.2989, "step": 19854 }, { "epoch": 0.9209183673469388, "grad_norm": 6.2277116775512695, "learning_rate": 1.5662331324229564e-07, "loss": 0.3117, "step": 19855 }, { "epoch": 0.9209647495361781, "grad_norm": 7.268479347229004, "learning_rate": 1.5644060858298782e-07, "loss": 0.3199, "step": 19856 }, { "epoch": 0.9210111317254175, "grad_norm": 5.956372261047363, "learning_rate": 1.5625800885683995e-07, "loss": 0.187, "step": 19857 }, { "epoch": 0.9210575139146567, "grad_norm": 10.891377449035645, "learning_rate": 1.560755140678072e-07, "loss": 0.3691, "step": 19858 }, { "epoch": 0.9211038961038961, "grad_norm": 8.49924373626709, "learning_rate": 1.5589312421984304e-07, "loss": 0.3983, "step": 19859 }, { "epoch": 0.9211502782931354, "grad_norm": 7.59304141998291, "learning_rate": 1.557108393168999e-07, "loss": 0.2139, "step": 19860 }, { "epoch": 0.9211966604823748, "grad_norm": 4.515729904174805, "learning_rate": 1.5552865936292572e-07, "loss": 0.3447, "step": 19861 }, { "epoch": 0.921243042671614, "grad_norm": 9.691300392150879, "learning_rate": 1.5534658436186846e-07, "loss": 0.2634, "step": 19862 }, { "epoch": 0.9212894248608534, "grad_norm": 36.03956985473633, "learning_rate": 1.5516461431767272e-07, "loss": 0.4992, "step": 19863 }, { "epoch": 0.9213358070500928, "grad_norm": 8.534506797790527, "learning_rate": 1.5498274923427925e-07, "loss": 0.2875, "step": 19864 }, { "epoch": 0.9213821892393321, "grad_norm": 5.167369842529297, "learning_rate": 1.5480098911562937e-07, "loss": 0.2984, "step": 19865 }, { "epoch": 0.9214285714285714, "grad_norm": 10.756681442260742, "learning_rate": 1.5461933396566042e-07, "loss": 0.2646, "step": 19866 }, { "epoch": 0.9214749536178107, "grad_norm": 5.900265693664551, "learning_rate": 1.544377837883082e-07, "loss": 0.3869, "step": 19867 }, { "epoch": 0.9215213358070501, "grad_norm": 7.514347553253174, "learning_rate": 1.5425633858750566e-07, "loss": 0.267, "step": 19868 }, { "epoch": 0.9215677179962894, "grad_norm": 7.008934020996094, "learning_rate": 1.5407499836718354e-07, "loss": 0.3021, "step": 19869 }, { "epoch": 0.9216141001855288, "grad_norm": 10.091106414794922, "learning_rate": 1.5389376313127036e-07, "loss": 0.4269, "step": 19870 }, { "epoch": 0.921660482374768, "grad_norm": 8.562944412231445, "learning_rate": 1.53712632883693e-07, "loss": 0.3184, "step": 19871 }, { "epoch": 0.9217068645640074, "grad_norm": 6.120497226715088, "learning_rate": 1.5353160762837494e-07, "loss": 0.2979, "step": 19872 }, { "epoch": 0.9217532467532468, "grad_norm": 9.481664657592773, "learning_rate": 1.5335068736923864e-07, "loss": 0.2072, "step": 19873 }, { "epoch": 0.9217996289424861, "grad_norm": 8.499496459960938, "learning_rate": 1.5316987211020372e-07, "loss": 0.3723, "step": 19874 }, { "epoch": 0.9218460111317254, "grad_norm": 6.658577919006348, "learning_rate": 1.5298916185518654e-07, "loss": 0.2816, "step": 19875 }, { "epoch": 0.9218923933209647, "grad_norm": 5.183844089508057, "learning_rate": 1.528085566081028e-07, "loss": 0.3026, "step": 19876 }, { "epoch": 0.9219387755102041, "grad_norm": 11.88556957244873, "learning_rate": 1.5262805637286494e-07, "loss": 0.2809, "step": 19877 }, { "epoch": 0.9219851576994434, "grad_norm": 6.190609455108643, "learning_rate": 1.5244766115338372e-07, "loss": 0.3356, "step": 19878 }, { "epoch": 0.9220315398886827, "grad_norm": 10.467355728149414, "learning_rate": 1.5226737095356713e-07, "loss": 0.3659, "step": 19879 }, { "epoch": 0.922077922077922, "grad_norm": 9.712430953979492, "learning_rate": 1.5208718577732096e-07, "loss": 0.369, "step": 19880 }, { "epoch": 0.9221243042671614, "grad_norm": 8.12569522857666, "learning_rate": 1.519071056285487e-07, "loss": 0.3034, "step": 19881 }, { "epoch": 0.9221706864564008, "grad_norm": 4.920129299163818, "learning_rate": 1.517271305111523e-07, "loss": 0.3419, "step": 19882 }, { "epoch": 0.9222170686456401, "grad_norm": 8.971246719360352, "learning_rate": 1.5154726042903023e-07, "loss": 0.3198, "step": 19883 }, { "epoch": 0.9222634508348794, "grad_norm": 16.24184799194336, "learning_rate": 1.5136749538607887e-07, "loss": 0.3536, "step": 19884 }, { "epoch": 0.9223098330241187, "grad_norm": 6.8656325340271, "learning_rate": 1.5118783538619453e-07, "loss": 0.3846, "step": 19885 }, { "epoch": 0.9223562152133581, "grad_norm": 6.071588039398193, "learning_rate": 1.5100828043326742e-07, "loss": 0.2898, "step": 19886 }, { "epoch": 0.9224025974025974, "grad_norm": 9.536011695861816, "learning_rate": 1.5082883053118836e-07, "loss": 0.4078, "step": 19887 }, { "epoch": 0.9224489795918367, "grad_norm": 13.898510932922363, "learning_rate": 1.506494856838453e-07, "loss": 0.4727, "step": 19888 }, { "epoch": 0.922495361781076, "grad_norm": 9.41129207611084, "learning_rate": 1.504702458951235e-07, "loss": 0.4537, "step": 19889 }, { "epoch": 0.9225417439703154, "grad_norm": 5.720437526702881, "learning_rate": 1.5029111116890592e-07, "loss": 0.2366, "step": 19890 }, { "epoch": 0.9225881261595548, "grad_norm": 6.555924892425537, "learning_rate": 1.501120815090734e-07, "loss": 0.3669, "step": 19891 }, { "epoch": 0.922634508348794, "grad_norm": 6.022244930267334, "learning_rate": 1.4993315691950395e-07, "loss": 0.2729, "step": 19892 }, { "epoch": 0.9226808905380334, "grad_norm": 8.920923233032227, "learning_rate": 1.4975433740407496e-07, "loss": 0.3269, "step": 19893 }, { "epoch": 0.9227272727272727, "grad_norm": 5.422237873077393, "learning_rate": 1.4957562296666007e-07, "loss": 0.2416, "step": 19894 }, { "epoch": 0.9227736549165121, "grad_norm": 5.581775188446045, "learning_rate": 1.4939701361113111e-07, "loss": 0.2727, "step": 19895 }, { "epoch": 0.9228200371057514, "grad_norm": 9.468988418579102, "learning_rate": 1.4921850934135785e-07, "loss": 0.314, "step": 19896 }, { "epoch": 0.9228664192949907, "grad_norm": 6.621324062347412, "learning_rate": 1.49040110161206e-07, "loss": 0.3342, "step": 19897 }, { "epoch": 0.92291280148423, "grad_norm": 5.618462562561035, "learning_rate": 1.4886181607454196e-07, "loss": 0.2661, "step": 19898 }, { "epoch": 0.9229591836734694, "grad_norm": 4.188536167144775, "learning_rate": 1.4868362708522822e-07, "loss": 0.2767, "step": 19899 }, { "epoch": 0.9230055658627088, "grad_norm": 11.404852867126465, "learning_rate": 1.485055431971244e-07, "loss": 0.351, "step": 19900 }, { "epoch": 0.923051948051948, "grad_norm": 3.823042392730713, "learning_rate": 1.4832756441409025e-07, "loss": 0.2475, "step": 19901 }, { "epoch": 0.9230983302411874, "grad_norm": 10.63672924041748, "learning_rate": 1.481496907399793e-07, "loss": 0.3898, "step": 19902 }, { "epoch": 0.9231447124304267, "grad_norm": 7.746214389801025, "learning_rate": 1.4797192217864632e-07, "loss": 0.3226, "step": 19903 }, { "epoch": 0.9231910946196661, "grad_norm": 7.556499004364014, "learning_rate": 1.477942587339426e-07, "loss": 0.3614, "step": 19904 }, { "epoch": 0.9232374768089053, "grad_norm": 4.092464923858643, "learning_rate": 1.4761670040971732e-07, "loss": 0.2199, "step": 19905 }, { "epoch": 0.9232838589981447, "grad_norm": 7.2220458984375, "learning_rate": 1.4743924720981683e-07, "loss": 0.3145, "step": 19906 }, { "epoch": 0.923330241187384, "grad_norm": 7.182905197143555, "learning_rate": 1.472618991380853e-07, "loss": 0.2855, "step": 19907 }, { "epoch": 0.9233766233766234, "grad_norm": 3.7233011722564697, "learning_rate": 1.470846561983652e-07, "loss": 0.2442, "step": 19908 }, { "epoch": 0.9234230055658627, "grad_norm": 5.84940242767334, "learning_rate": 1.4690751839449623e-07, "loss": 0.3671, "step": 19909 }, { "epoch": 0.923469387755102, "grad_norm": 11.52173900604248, "learning_rate": 1.4673048573031644e-07, "loss": 0.3853, "step": 19910 }, { "epoch": 0.9235157699443414, "grad_norm": 6.364078044891357, "learning_rate": 1.465535582096611e-07, "loss": 0.3486, "step": 19911 }, { "epoch": 0.9235621521335807, "grad_norm": 5.486988067626953, "learning_rate": 1.463767358363627e-07, "loss": 0.1405, "step": 19912 }, { "epoch": 0.9236085343228201, "grad_norm": 5.825860023498535, "learning_rate": 1.4620001861425203e-07, "loss": 0.2895, "step": 19913 }, { "epoch": 0.9236549165120593, "grad_norm": 8.312952041625977, "learning_rate": 1.4602340654715774e-07, "loss": 0.2279, "step": 19914 }, { "epoch": 0.9237012987012987, "grad_norm": 11.69428539276123, "learning_rate": 1.4584689963890618e-07, "loss": 0.4162, "step": 19915 }, { "epoch": 0.923747680890538, "grad_norm": 14.47835922241211, "learning_rate": 1.4567049789332212e-07, "loss": 0.5766, "step": 19916 }, { "epoch": 0.9237940630797774, "grad_norm": 6.960328102111816, "learning_rate": 1.4549420131422575e-07, "loss": 0.3285, "step": 19917 }, { "epoch": 0.9238404452690167, "grad_norm": 11.730834007263184, "learning_rate": 1.4531800990543689e-07, "loss": 0.3999, "step": 19918 }, { "epoch": 0.923886827458256, "grad_norm": 9.565339088439941, "learning_rate": 1.4514192367077296e-07, "loss": 0.3424, "step": 19919 }, { "epoch": 0.9239332096474954, "grad_norm": 7.367584705352783, "learning_rate": 1.4496594261404817e-07, "loss": 0.4096, "step": 19920 }, { "epoch": 0.9239795918367347, "grad_norm": 7.069614410400391, "learning_rate": 1.4479006673907615e-07, "loss": 0.2994, "step": 19921 }, { "epoch": 0.924025974025974, "grad_norm": 5.683314323425293, "learning_rate": 1.4461429604966603e-07, "loss": 0.2597, "step": 19922 }, { "epoch": 0.9240723562152133, "grad_norm": 5.880598068237305, "learning_rate": 1.444386305496265e-07, "loss": 0.3598, "step": 19923 }, { "epoch": 0.9241187384044527, "grad_norm": 4.974563121795654, "learning_rate": 1.4426307024276275e-07, "loss": 0.2587, "step": 19924 }, { "epoch": 0.924165120593692, "grad_norm": 4.354637622833252, "learning_rate": 1.440876151328785e-07, "loss": 0.1737, "step": 19925 }, { "epoch": 0.9242115027829314, "grad_norm": 6.491020679473877, "learning_rate": 1.439122652237751e-07, "loss": 0.3059, "step": 19926 }, { "epoch": 0.9242578849721707, "grad_norm": 6.526093482971191, "learning_rate": 1.4373702051925066e-07, "loss": 0.3265, "step": 19927 }, { "epoch": 0.92430426716141, "grad_norm": 10.234623908996582, "learning_rate": 1.4356188102310266e-07, "loss": 0.3472, "step": 19928 }, { "epoch": 0.9243506493506494, "grad_norm": 5.071361064910889, "learning_rate": 1.4338684673912472e-07, "loss": 0.2983, "step": 19929 }, { "epoch": 0.9243970315398887, "grad_norm": 6.437931537628174, "learning_rate": 1.4321191767110888e-07, "loss": 0.2953, "step": 19930 }, { "epoch": 0.924443413729128, "grad_norm": 5.807009220123291, "learning_rate": 1.4303709382284648e-07, "loss": 0.3373, "step": 19931 }, { "epoch": 0.9244897959183673, "grad_norm": 6.697851181030273, "learning_rate": 1.4286237519812229e-07, "loss": 0.3153, "step": 19932 }, { "epoch": 0.9245361781076067, "grad_norm": 13.704888343811035, "learning_rate": 1.426877618007233e-07, "loss": 0.3206, "step": 19933 }, { "epoch": 0.924582560296846, "grad_norm": 9.4440336227417, "learning_rate": 1.4251325363443204e-07, "loss": 0.3649, "step": 19934 }, { "epoch": 0.9246289424860853, "grad_norm": 7.483353137969971, "learning_rate": 1.4233885070302877e-07, "loss": 0.3142, "step": 19935 }, { "epoch": 0.9246753246753247, "grad_norm": 6.692939281463623, "learning_rate": 1.4216455301029274e-07, "loss": 0.2403, "step": 19936 }, { "epoch": 0.924721706864564, "grad_norm": 11.925960540771484, "learning_rate": 1.4199036055999926e-07, "loss": 0.3693, "step": 19937 }, { "epoch": 0.9247680890538034, "grad_norm": 6.685267925262451, "learning_rate": 1.4181627335592196e-07, "loss": 0.2663, "step": 19938 }, { "epoch": 0.9248144712430427, "grad_norm": 4.297903537750244, "learning_rate": 1.4164229140183284e-07, "loss": 0.2802, "step": 19939 }, { "epoch": 0.924860853432282, "grad_norm": 6.205088138580322, "learning_rate": 1.4146841470150107e-07, "loss": 0.2554, "step": 19940 }, { "epoch": 0.9249072356215213, "grad_norm": 6.28729248046875, "learning_rate": 1.4129464325869368e-07, "loss": 0.3119, "step": 19941 }, { "epoch": 0.9249536178107607, "grad_norm": 11.090180397033691, "learning_rate": 1.4112097707717487e-07, "loss": 0.403, "step": 19942 }, { "epoch": 0.925, "grad_norm": 8.108369827270508, "learning_rate": 1.4094741616070716e-07, "loss": 0.4263, "step": 19943 }, { "epoch": 0.9250463821892393, "grad_norm": 7.023176670074463, "learning_rate": 1.4077396051305093e-07, "loss": 0.2904, "step": 19944 }, { "epoch": 0.9250927643784786, "grad_norm": 9.035301208496094, "learning_rate": 1.4060061013796366e-07, "loss": 0.4207, "step": 19945 }, { "epoch": 0.925139146567718, "grad_norm": 8.038938522338867, "learning_rate": 1.4042736503920184e-07, "loss": 0.3361, "step": 19946 }, { "epoch": 0.9251855287569574, "grad_norm": 5.752167701721191, "learning_rate": 1.4025422522051746e-07, "loss": 0.3068, "step": 19947 }, { "epoch": 0.9252319109461966, "grad_norm": 9.49870777130127, "learning_rate": 1.4008119068566194e-07, "loss": 0.2959, "step": 19948 }, { "epoch": 0.925278293135436, "grad_norm": 7.317039489746094, "learning_rate": 1.39908261438384e-07, "loss": 0.2486, "step": 19949 }, { "epoch": 0.9253246753246753, "grad_norm": 5.851428508758545, "learning_rate": 1.3973543748243002e-07, "loss": 0.3549, "step": 19950 }, { "epoch": 0.9253710575139147, "grad_norm": 7.79538106918335, "learning_rate": 1.3956271882154538e-07, "loss": 0.2717, "step": 19951 }, { "epoch": 0.925417439703154, "grad_norm": 12.175848960876465, "learning_rate": 1.3939010545946984e-07, "loss": 0.3911, "step": 19952 }, { "epoch": 0.9254638218923933, "grad_norm": 17.880842208862305, "learning_rate": 1.3921759739994378e-07, "loss": 0.5275, "step": 19953 }, { "epoch": 0.9255102040816326, "grad_norm": 10.735565185546875, "learning_rate": 1.390451946467053e-07, "loss": 0.3955, "step": 19954 }, { "epoch": 0.925556586270872, "grad_norm": 9.961502075195312, "learning_rate": 1.3887289720348863e-07, "loss": 0.3031, "step": 19955 }, { "epoch": 0.9256029684601114, "grad_norm": 7.983379364013672, "learning_rate": 1.3870070507402688e-07, "loss": 0.4564, "step": 19956 }, { "epoch": 0.9256493506493506, "grad_norm": 6.3309645652771, "learning_rate": 1.385286182620499e-07, "loss": 0.3111, "step": 19957 }, { "epoch": 0.92569573283859, "grad_norm": 9.323409080505371, "learning_rate": 1.3835663677128631e-07, "loss": 0.4019, "step": 19958 }, { "epoch": 0.9257421150278293, "grad_norm": 7.035310745239258, "learning_rate": 1.381847606054615e-07, "loss": 0.3196, "step": 19959 }, { "epoch": 0.9257884972170687, "grad_norm": 8.954418182373047, "learning_rate": 1.3801298976830025e-07, "loss": 0.2808, "step": 19960 }, { "epoch": 0.9258348794063079, "grad_norm": 3.925469398498535, "learning_rate": 1.3784132426352292e-07, "loss": 0.2272, "step": 19961 }, { "epoch": 0.9258812615955473, "grad_norm": 12.589022636413574, "learning_rate": 1.3766976409484877e-07, "loss": 0.4041, "step": 19962 }, { "epoch": 0.9259276437847866, "grad_norm": 9.412731170654297, "learning_rate": 1.3749830926599427e-07, "loss": 0.3669, "step": 19963 }, { "epoch": 0.925974025974026, "grad_norm": 10.88998031616211, "learning_rate": 1.3732695978067478e-07, "loss": 0.3449, "step": 19964 }, { "epoch": 0.9260204081632653, "grad_norm": 5.326835632324219, "learning_rate": 1.3715571564260121e-07, "loss": 0.2632, "step": 19965 }, { "epoch": 0.9260667903525046, "grad_norm": 7.811648845672607, "learning_rate": 1.3698457685548449e-07, "loss": 0.387, "step": 19966 }, { "epoch": 0.926113172541744, "grad_norm": 7.857231140136719, "learning_rate": 1.3681354342303277e-07, "loss": 0.3327, "step": 19967 }, { "epoch": 0.9261595547309833, "grad_norm": 6.707023620605469, "learning_rate": 1.3664261534894975e-07, "loss": 0.356, "step": 19968 }, { "epoch": 0.9262059369202227, "grad_norm": 4.725252151489258, "learning_rate": 1.364717926369391e-07, "loss": 0.2222, "step": 19969 }, { "epoch": 0.9262523191094619, "grad_norm": 8.107054710388184, "learning_rate": 1.363010752907018e-07, "loss": 0.2775, "step": 19970 }, { "epoch": 0.9262987012987013, "grad_norm": 7.70463228225708, "learning_rate": 1.3613046331393654e-07, "loss": 0.3252, "step": 19971 }, { "epoch": 0.9263450834879406, "grad_norm": 9.119243621826172, "learning_rate": 1.3595995671033978e-07, "loss": 0.3231, "step": 19972 }, { "epoch": 0.92639146567718, "grad_norm": 23.90218734741211, "learning_rate": 1.3578955548360473e-07, "loss": 0.4779, "step": 19973 }, { "epoch": 0.9264378478664193, "grad_norm": 5.954303741455078, "learning_rate": 1.356192596374234e-07, "loss": 0.2222, "step": 19974 }, { "epoch": 0.9264842300556586, "grad_norm": 8.125905990600586, "learning_rate": 1.3544906917548505e-07, "loss": 0.2508, "step": 19975 }, { "epoch": 0.926530612244898, "grad_norm": 5.212984085083008, "learning_rate": 1.3527898410147677e-07, "loss": 0.2709, "step": 19976 }, { "epoch": 0.9265769944341373, "grad_norm": 7.305872917175293, "learning_rate": 1.3510900441908338e-07, "loss": 0.3129, "step": 19977 }, { "epoch": 0.9266233766233766, "grad_norm": 10.611199378967285, "learning_rate": 1.34939130131988e-07, "loss": 0.3313, "step": 19978 }, { "epoch": 0.9266697588126159, "grad_norm": 9.303157806396484, "learning_rate": 1.3476936124386996e-07, "loss": 0.2908, "step": 19979 }, { "epoch": 0.9267161410018553, "grad_norm": 7.510424613952637, "learning_rate": 1.3459969775840743e-07, "loss": 0.3208, "step": 19980 }, { "epoch": 0.9267625231910946, "grad_norm": 8.73433780670166, "learning_rate": 1.3443013967927632e-07, "loss": 0.3082, "step": 19981 }, { "epoch": 0.926808905380334, "grad_norm": 5.466315746307373, "learning_rate": 1.3426068701014983e-07, "loss": 0.217, "step": 19982 }, { "epoch": 0.9268552875695732, "grad_norm": 13.025567054748535, "learning_rate": 1.3409133975469947e-07, "loss": 0.3006, "step": 19983 }, { "epoch": 0.9269016697588126, "grad_norm": 5.8413920402526855, "learning_rate": 1.339220979165934e-07, "loss": 0.245, "step": 19984 }, { "epoch": 0.926948051948052, "grad_norm": 6.740835189819336, "learning_rate": 1.3375296149949867e-07, "loss": 0.3456, "step": 19985 }, { "epoch": 0.9269944341372913, "grad_norm": 7.759884834289551, "learning_rate": 1.3358393050707908e-07, "loss": 0.3188, "step": 19986 }, { "epoch": 0.9270408163265306, "grad_norm": 8.428165435791016, "learning_rate": 1.334150049429972e-07, "loss": 0.378, "step": 19987 }, { "epoch": 0.9270871985157699, "grad_norm": 8.4190673828125, "learning_rate": 1.332461848109118e-07, "loss": 0.3491, "step": 19988 }, { "epoch": 0.9271335807050093, "grad_norm": 11.060848236083984, "learning_rate": 1.330774701144816e-07, "loss": 0.438, "step": 19989 }, { "epoch": 0.9271799628942486, "grad_norm": 7.347683429718018, "learning_rate": 1.3290886085736088e-07, "loss": 0.3459, "step": 19990 }, { "epoch": 0.9272263450834879, "grad_norm": 7.770122528076172, "learning_rate": 1.3274035704320231e-07, "loss": 0.3106, "step": 19991 }, { "epoch": 0.9272727272727272, "grad_norm": 6.719268798828125, "learning_rate": 1.325719586756563e-07, "loss": 0.3717, "step": 19992 }, { "epoch": 0.9273191094619666, "grad_norm": 6.7305779457092285, "learning_rate": 1.3240366575837216e-07, "loss": 0.2724, "step": 19993 }, { "epoch": 0.927365491651206, "grad_norm": 9.045555114746094, "learning_rate": 1.3223547829499527e-07, "loss": 0.3359, "step": 19994 }, { "epoch": 0.9274118738404453, "grad_norm": 9.407811164855957, "learning_rate": 1.3206739628916886e-07, "loss": 0.3858, "step": 19995 }, { "epoch": 0.9274582560296846, "grad_norm": 8.376168251037598, "learning_rate": 1.3189941974453502e-07, "loss": 0.3272, "step": 19996 }, { "epoch": 0.9275046382189239, "grad_norm": 6.676793098449707, "learning_rate": 1.3173154866473248e-07, "loss": 0.3037, "step": 19997 }, { "epoch": 0.9275510204081633, "grad_norm": 12.567474365234375, "learning_rate": 1.3156378305339778e-07, "loss": 0.4331, "step": 19998 }, { "epoch": 0.9275974025974026, "grad_norm": 6.665516376495361, "learning_rate": 1.3139612291416636e-07, "loss": 0.2831, "step": 19999 }, { "epoch": 0.9276437847866419, "grad_norm": 8.46810531616211, "learning_rate": 1.312285682506703e-07, "loss": 0.2071, "step": 20000 }, { "epoch": 0.9276901669758812, "grad_norm": 8.808637619018555, "learning_rate": 1.3106111906653896e-07, "loss": 0.3952, "step": 20001 }, { "epoch": 0.9277365491651206, "grad_norm": 19.728055953979492, "learning_rate": 1.3089377536539994e-07, "loss": 0.5202, "step": 20002 }, { "epoch": 0.92778293135436, "grad_norm": 5.954697132110596, "learning_rate": 1.3072653715087924e-07, "loss": 0.2743, "step": 20003 }, { "epoch": 0.9278293135435992, "grad_norm": 4.861973762512207, "learning_rate": 1.3055940442660008e-07, "loss": 0.2908, "step": 20004 }, { "epoch": 0.9278756957328386, "grad_norm": 13.907085418701172, "learning_rate": 1.3039237719618348e-07, "loss": 0.5121, "step": 20005 }, { "epoch": 0.9279220779220779, "grad_norm": 7.386903762817383, "learning_rate": 1.3022545546324706e-07, "loss": 0.4685, "step": 20006 }, { "epoch": 0.9279684601113173, "grad_norm": 5.551381587982178, "learning_rate": 1.3005863923140795e-07, "loss": 0.2844, "step": 20007 }, { "epoch": 0.9280148423005566, "grad_norm": 5.850954055786133, "learning_rate": 1.2989192850427933e-07, "loss": 0.3776, "step": 20008 }, { "epoch": 0.9280612244897959, "grad_norm": 4.663415908813477, "learning_rate": 1.2972532328547338e-07, "loss": 0.1602, "step": 20009 }, { "epoch": 0.9281076066790352, "grad_norm": 6.631683349609375, "learning_rate": 1.2955882357859994e-07, "loss": 0.2996, "step": 20010 }, { "epoch": 0.9281539888682746, "grad_norm": 6.815840244293213, "learning_rate": 1.293924293872656e-07, "loss": 0.2857, "step": 20011 }, { "epoch": 0.928200371057514, "grad_norm": 12.021012306213379, "learning_rate": 1.2922614071507522e-07, "loss": 0.4217, "step": 20012 }, { "epoch": 0.9282467532467532, "grad_norm": 6.583495140075684, "learning_rate": 1.290599575656315e-07, "loss": 0.3902, "step": 20013 }, { "epoch": 0.9282931354359926, "grad_norm": 16.232194900512695, "learning_rate": 1.2889387994253432e-07, "loss": 0.3114, "step": 20014 }, { "epoch": 0.9283395176252319, "grad_norm": 5.8547539710998535, "learning_rate": 1.2872790784938195e-07, "loss": 0.2171, "step": 20015 }, { "epoch": 0.9283858998144713, "grad_norm": 4.731333255767822, "learning_rate": 1.285620412897709e-07, "loss": 0.3507, "step": 20016 }, { "epoch": 0.9284322820037105, "grad_norm": 6.800314426422119, "learning_rate": 1.2839628026729333e-07, "loss": 0.3713, "step": 20017 }, { "epoch": 0.9284786641929499, "grad_norm": 7.305483818054199, "learning_rate": 1.2823062478554083e-07, "loss": 0.3414, "step": 20018 }, { "epoch": 0.9285250463821892, "grad_norm": 9.28231143951416, "learning_rate": 1.2806507484810215e-07, "loss": 0.3832, "step": 20019 }, { "epoch": 0.9285714285714286, "grad_norm": 10.963528633117676, "learning_rate": 1.278996304585639e-07, "loss": 0.4096, "step": 20020 }, { "epoch": 0.9286178107606679, "grad_norm": 7.8201584815979, "learning_rate": 1.2773429162051097e-07, "loss": 0.2996, "step": 20021 }, { "epoch": 0.9286641929499072, "grad_norm": 5.200565814971924, "learning_rate": 1.2756905833752386e-07, "loss": 0.3248, "step": 20022 }, { "epoch": 0.9287105751391466, "grad_norm": 6.946499824523926, "learning_rate": 1.2740393061318357e-07, "loss": 0.2152, "step": 20023 }, { "epoch": 0.9287569573283859, "grad_norm": 9.47254753112793, "learning_rate": 1.2723890845106723e-07, "loss": 0.3346, "step": 20024 }, { "epoch": 0.9288033395176253, "grad_norm": 12.283960342407227, "learning_rate": 1.2707399185474977e-07, "loss": 0.4154, "step": 20025 }, { "epoch": 0.9288497217068645, "grad_norm": 5.3296709060668945, "learning_rate": 1.269091808278039e-07, "loss": 0.3605, "step": 20026 }, { "epoch": 0.9288961038961039, "grad_norm": 5.950379371643066, "learning_rate": 1.2674447537380063e-07, "loss": 0.2359, "step": 20027 }, { "epoch": 0.9289424860853432, "grad_norm": 10.71282958984375, "learning_rate": 1.2657987549630824e-07, "loss": 0.4251, "step": 20028 }, { "epoch": 0.9289888682745826, "grad_norm": 12.557130813598633, "learning_rate": 1.2641538119889164e-07, "loss": 0.3152, "step": 20029 }, { "epoch": 0.9290352504638218, "grad_norm": 4.983754634857178, "learning_rate": 1.2625099248511518e-07, "loss": 0.2676, "step": 20030 }, { "epoch": 0.9290816326530612, "grad_norm": 9.330854415893555, "learning_rate": 1.2608670935854107e-07, "loss": 0.378, "step": 20031 }, { "epoch": 0.9291280148423006, "grad_norm": 6.054365634918213, "learning_rate": 1.2592253182272752e-07, "loss": 0.2555, "step": 20032 }, { "epoch": 0.9291743970315399, "grad_norm": 8.272232055664062, "learning_rate": 1.2575845988123114e-07, "loss": 0.3894, "step": 20033 }, { "epoch": 0.9292207792207792, "grad_norm": 7.022956848144531, "learning_rate": 1.2559449353760688e-07, "loss": 0.3403, "step": 20034 }, { "epoch": 0.9292671614100185, "grad_norm": 7.967806816101074, "learning_rate": 1.254306327954069e-07, "loss": 0.37, "step": 20035 }, { "epoch": 0.9293135435992579, "grad_norm": 6.060547351837158, "learning_rate": 1.252668776581817e-07, "loss": 0.2975, "step": 20036 }, { "epoch": 0.9293599257884972, "grad_norm": 10.62900161743164, "learning_rate": 1.2510322812947784e-07, "loss": 0.3749, "step": 20037 }, { "epoch": 0.9294063079777366, "grad_norm": 5.313177585601807, "learning_rate": 1.2493968421284196e-07, "loss": 0.2814, "step": 20038 }, { "epoch": 0.9294526901669758, "grad_norm": 9.074952125549316, "learning_rate": 1.247762459118157e-07, "loss": 0.31, "step": 20039 }, { "epoch": 0.9294990723562152, "grad_norm": 4.277820110321045, "learning_rate": 1.2461291322994118e-07, "loss": 0.2013, "step": 20040 }, { "epoch": 0.9295454545454546, "grad_norm": 11.354434967041016, "learning_rate": 1.2444968617075616e-07, "loss": 0.3759, "step": 20041 }, { "epoch": 0.9295918367346939, "grad_norm": 7.774902820587158, "learning_rate": 1.242865647377972e-07, "loss": 0.3664, "step": 20042 }, { "epoch": 0.9296382189239332, "grad_norm": 5.8541483879089355, "learning_rate": 1.2412354893459822e-07, "loss": 0.3153, "step": 20043 }, { "epoch": 0.9296846011131725, "grad_norm": 5.638794898986816, "learning_rate": 1.2396063876469077e-07, "loss": 0.2602, "step": 20044 }, { "epoch": 0.9297309833024119, "grad_norm": 10.626411437988281, "learning_rate": 1.2379783423160429e-07, "loss": 0.2957, "step": 20045 }, { "epoch": 0.9297773654916512, "grad_norm": 9.449468612670898, "learning_rate": 1.236351353388654e-07, "loss": 0.3095, "step": 20046 }, { "epoch": 0.9298237476808905, "grad_norm": 5.443962097167969, "learning_rate": 1.234725420899996e-07, "loss": 0.3216, "step": 20047 }, { "epoch": 0.9298701298701298, "grad_norm": 6.815066814422607, "learning_rate": 1.2331005448852905e-07, "loss": 0.2105, "step": 20048 }, { "epoch": 0.9299165120593692, "grad_norm": 6.507541179656982, "learning_rate": 1.2314767253797433e-07, "loss": 0.3628, "step": 20049 }, { "epoch": 0.9299628942486086, "grad_norm": 8.33092212677002, "learning_rate": 1.2298539624185257e-07, "loss": 0.3825, "step": 20050 }, { "epoch": 0.9300092764378479, "grad_norm": 5.622794151306152, "learning_rate": 1.2282322560367986e-07, "loss": 0.2653, "step": 20051 }, { "epoch": 0.9300556586270872, "grad_norm": 11.969226837158203, "learning_rate": 1.2266116062696954e-07, "loss": 0.4071, "step": 20052 }, { "epoch": 0.9301020408163265, "grad_norm": 9.89486312866211, "learning_rate": 1.2249920131523263e-07, "loss": 0.3657, "step": 20053 }, { "epoch": 0.9301484230055659, "grad_norm": 9.942856788635254, "learning_rate": 1.223373476719786e-07, "loss": 0.4279, "step": 20054 }, { "epoch": 0.9301948051948052, "grad_norm": 8.352927207946777, "learning_rate": 1.2217559970071234e-07, "loss": 0.3466, "step": 20055 }, { "epoch": 0.9302411873840445, "grad_norm": 10.98885440826416, "learning_rate": 1.2201395740493948e-07, "loss": 0.3347, "step": 20056 }, { "epoch": 0.9302875695732838, "grad_norm": 8.27817153930664, "learning_rate": 1.2185242078816107e-07, "loss": 0.3443, "step": 20057 }, { "epoch": 0.9303339517625232, "grad_norm": 5.4810285568237305, "learning_rate": 1.2169098985387707e-07, "loss": 0.2983, "step": 20058 }, { "epoch": 0.9303803339517626, "grad_norm": 5.8861236572265625, "learning_rate": 1.2152966460558467e-07, "loss": 0.3235, "step": 20059 }, { "epoch": 0.9304267161410018, "grad_norm": 6.932093143463135, "learning_rate": 1.2136844504677946e-07, "loss": 0.3288, "step": 20060 }, { "epoch": 0.9304730983302412, "grad_norm": 9.557573318481445, "learning_rate": 1.2120733118095307e-07, "loss": 0.4198, "step": 20061 }, { "epoch": 0.9305194805194805, "grad_norm": 13.240373611450195, "learning_rate": 1.210463230115966e-07, "loss": 0.465, "step": 20062 }, { "epoch": 0.9305658627087199, "grad_norm": 7.638885974884033, "learning_rate": 1.208854205421983e-07, "loss": 0.3203, "step": 20063 }, { "epoch": 0.9306122448979591, "grad_norm": 11.188213348388672, "learning_rate": 1.2072462377624384e-07, "loss": 0.375, "step": 20064 }, { "epoch": 0.9306586270871985, "grad_norm": 4.686590671539307, "learning_rate": 1.20563932717217e-07, "loss": 0.2853, "step": 20065 }, { "epoch": 0.9307050092764378, "grad_norm": 9.24905014038086, "learning_rate": 1.2040334736859894e-07, "loss": 0.3959, "step": 20066 }, { "epoch": 0.9307513914656772, "grad_norm": 8.718779563903809, "learning_rate": 1.2024286773386852e-07, "loss": 0.3446, "step": 20067 }, { "epoch": 0.9307977736549166, "grad_norm": 7.430436611175537, "learning_rate": 1.2008249381650238e-07, "loss": 0.3303, "step": 20068 }, { "epoch": 0.9308441558441558, "grad_norm": 5.911914825439453, "learning_rate": 1.1992222561997502e-07, "loss": 0.3526, "step": 20069 }, { "epoch": 0.9308905380333952, "grad_norm": 6.4688029289245605, "learning_rate": 1.1976206314775918e-07, "loss": 0.3358, "step": 20070 }, { "epoch": 0.9309369202226345, "grad_norm": 9.37753677368164, "learning_rate": 1.196020064033243e-07, "loss": 0.324, "step": 20071 }, { "epoch": 0.9309833024118739, "grad_norm": 9.01705551147461, "learning_rate": 1.1944205539013708e-07, "loss": 0.2689, "step": 20072 }, { "epoch": 0.9310296846011131, "grad_norm": 6.5036444664001465, "learning_rate": 1.1928221011166418e-07, "loss": 0.3242, "step": 20073 }, { "epoch": 0.9310760667903525, "grad_norm": 9.042237281799316, "learning_rate": 1.1912247057136728e-07, "loss": 0.3795, "step": 20074 }, { "epoch": 0.9311224489795918, "grad_norm": 10.046343803405762, "learning_rate": 1.1896283677270803e-07, "loss": 0.308, "step": 20075 }, { "epoch": 0.9311688311688312, "grad_norm": 6.742618083953857, "learning_rate": 1.188033087191448e-07, "loss": 0.3624, "step": 20076 }, { "epoch": 0.9312152133580704, "grad_norm": 9.174818992614746, "learning_rate": 1.1864388641413316e-07, "loss": 0.3733, "step": 20077 }, { "epoch": 0.9312615955473098, "grad_norm": 9.571553230285645, "learning_rate": 1.1848456986112644e-07, "loss": 0.3448, "step": 20078 }, { "epoch": 0.9313079777365492, "grad_norm": 5.848498821258545, "learning_rate": 1.1832535906357689e-07, "loss": 0.2799, "step": 20079 }, { "epoch": 0.9313543599257885, "grad_norm": 13.774306297302246, "learning_rate": 1.1816625402493399e-07, "loss": 0.3003, "step": 20080 }, { "epoch": 0.9314007421150279, "grad_norm": 5.785885334014893, "learning_rate": 1.1800725474864438e-07, "loss": 0.2711, "step": 20081 }, { "epoch": 0.9314471243042671, "grad_norm": 9.267796516418457, "learning_rate": 1.1784836123815257e-07, "loss": 0.4259, "step": 20082 }, { "epoch": 0.9314935064935065, "grad_norm": 6.15006685256958, "learning_rate": 1.176895734969008e-07, "loss": 0.3064, "step": 20083 }, { "epoch": 0.9315398886827458, "grad_norm": 8.296088218688965, "learning_rate": 1.1753089152832964e-07, "loss": 0.365, "step": 20084 }, { "epoch": 0.9315862708719852, "grad_norm": 3.690159320831299, "learning_rate": 1.1737231533587579e-07, "loss": 0.3693, "step": 20085 }, { "epoch": 0.9316326530612244, "grad_norm": 7.9731645584106445, "learning_rate": 1.1721384492297593e-07, "loss": 0.3863, "step": 20086 }, { "epoch": 0.9316790352504638, "grad_norm": 7.592715740203857, "learning_rate": 1.1705548029306291e-07, "loss": 0.3352, "step": 20087 }, { "epoch": 0.9317254174397032, "grad_norm": 10.492802619934082, "learning_rate": 1.1689722144956672e-07, "loss": 0.303, "step": 20088 }, { "epoch": 0.9317717996289425, "grad_norm": 9.701945304870605, "learning_rate": 1.1673906839591686e-07, "loss": 0.4162, "step": 20089 }, { "epoch": 0.9318181818181818, "grad_norm": 5.777694225311279, "learning_rate": 1.1658102113553949e-07, "loss": 0.3378, "step": 20090 }, { "epoch": 0.9318645640074211, "grad_norm": 6.082802772521973, "learning_rate": 1.164230796718585e-07, "loss": 0.2897, "step": 20091 }, { "epoch": 0.9319109461966605, "grad_norm": 5.376104354858398, "learning_rate": 1.162652440082962e-07, "loss": 0.3265, "step": 20092 }, { "epoch": 0.9319573283858998, "grad_norm": 5.003373146057129, "learning_rate": 1.1610751414827148e-07, "loss": 0.3082, "step": 20093 }, { "epoch": 0.9320037105751392, "grad_norm": 5.780828475952148, "learning_rate": 1.1594989009520108e-07, "loss": 0.2965, "step": 20094 }, { "epoch": 0.9320500927643784, "grad_norm": 8.619178771972656, "learning_rate": 1.1579237185250003e-07, "loss": 0.3491, "step": 20095 }, { "epoch": 0.9320964749536178, "grad_norm": 10.046037673950195, "learning_rate": 1.1563495942358117e-07, "loss": 0.3524, "step": 20096 }, { "epoch": 0.9321428571428572, "grad_norm": 5.913456916809082, "learning_rate": 1.1547765281185508e-07, "loss": 0.2851, "step": 20097 }, { "epoch": 0.9321892393320965, "grad_norm": 8.757462501525879, "learning_rate": 1.1532045202072905e-07, "loss": 0.2936, "step": 20098 }, { "epoch": 0.9322356215213358, "grad_norm": 8.358201026916504, "learning_rate": 1.1516335705360925e-07, "loss": 0.3913, "step": 20099 }, { "epoch": 0.9322820037105751, "grad_norm": 9.694836616516113, "learning_rate": 1.1500636791389852e-07, "loss": 0.4357, "step": 20100 }, { "epoch": 0.9323283858998145, "grad_norm": 8.751476287841797, "learning_rate": 1.1484948460499856e-07, "loss": 0.3873, "step": 20101 }, { "epoch": 0.9323747680890538, "grad_norm": 4.620576858520508, "learning_rate": 1.1469270713030722e-07, "loss": 0.1969, "step": 20102 }, { "epoch": 0.9324211502782931, "grad_norm": 4.246755123138428, "learning_rate": 1.145360354932229e-07, "loss": 0.283, "step": 20103 }, { "epoch": 0.9324675324675324, "grad_norm": 12.162527084350586, "learning_rate": 1.1437946969713731e-07, "loss": 0.4239, "step": 20104 }, { "epoch": 0.9325139146567718, "grad_norm": 6.405409812927246, "learning_rate": 1.1422300974544443e-07, "loss": 0.2589, "step": 20105 }, { "epoch": 0.9325602968460112, "grad_norm": 12.753771781921387, "learning_rate": 1.1406665564153263e-07, "loss": 0.5426, "step": 20106 }, { "epoch": 0.9326066790352505, "grad_norm": 8.245820999145508, "learning_rate": 1.1391040738878978e-07, "loss": 0.3694, "step": 20107 }, { "epoch": 0.9326530612244898, "grad_norm": 9.281342506408691, "learning_rate": 1.1375426499060038e-07, "loss": 0.3957, "step": 20108 }, { "epoch": 0.9326994434137291, "grad_norm": 5.269104957580566, "learning_rate": 1.135982284503484e-07, "loss": 0.3114, "step": 20109 }, { "epoch": 0.9327458256029685, "grad_norm": 16.51066780090332, "learning_rate": 1.1344229777141336e-07, "loss": 0.2978, "step": 20110 }, { "epoch": 0.9327922077922078, "grad_norm": 17.38332748413086, "learning_rate": 1.132864729571731e-07, "loss": 0.6386, "step": 20111 }, { "epoch": 0.9328385899814471, "grad_norm": 5.122978687286377, "learning_rate": 1.1313075401100438e-07, "loss": 0.3281, "step": 20112 }, { "epoch": 0.9328849721706864, "grad_norm": 7.699676990509033, "learning_rate": 1.1297514093628004e-07, "loss": 0.3539, "step": 20113 }, { "epoch": 0.9329313543599258, "grad_norm": 6.692469120025635, "learning_rate": 1.1281963373637183e-07, "loss": 0.2221, "step": 20114 }, { "epoch": 0.9329777365491652, "grad_norm": 5.393332481384277, "learning_rate": 1.1266423241464819e-07, "loss": 0.2563, "step": 20115 }, { "epoch": 0.9330241187384044, "grad_norm": 4.840404510498047, "learning_rate": 1.125089369744764e-07, "loss": 0.2738, "step": 20116 }, { "epoch": 0.9330705009276438, "grad_norm": 4.91572904586792, "learning_rate": 1.1235374741922045e-07, "loss": 0.2383, "step": 20117 }, { "epoch": 0.9331168831168831, "grad_norm": 8.643354415893555, "learning_rate": 1.1219866375224265e-07, "loss": 0.3646, "step": 20118 }, { "epoch": 0.9331632653061225, "grad_norm": 5.0270280838012695, "learning_rate": 1.1204368597690252e-07, "loss": 0.3009, "step": 20119 }, { "epoch": 0.9332096474953617, "grad_norm": 7.523974418640137, "learning_rate": 1.1188881409655849e-07, "loss": 0.2979, "step": 20120 }, { "epoch": 0.9332560296846011, "grad_norm": 10.003294944763184, "learning_rate": 1.1173404811456513e-07, "loss": 0.2712, "step": 20121 }, { "epoch": 0.9333024118738404, "grad_norm": 7.8331475257873535, "learning_rate": 1.115793880342747e-07, "loss": 0.2907, "step": 20122 }, { "epoch": 0.9333487940630798, "grad_norm": 10.363909721374512, "learning_rate": 1.1142483385903846e-07, "loss": 0.2793, "step": 20123 }, { "epoch": 0.9333951762523192, "grad_norm": 4.648764133453369, "learning_rate": 1.1127038559220482e-07, "loss": 0.184, "step": 20124 }, { "epoch": 0.9334415584415584, "grad_norm": 5.992159366607666, "learning_rate": 1.1111604323712e-07, "loss": 0.2966, "step": 20125 }, { "epoch": 0.9334879406307978, "grad_norm": 9.526703834533691, "learning_rate": 1.1096180679712743e-07, "loss": 0.3612, "step": 20126 }, { "epoch": 0.9335343228200371, "grad_norm": 7.95621395111084, "learning_rate": 1.1080767627556832e-07, "loss": 0.3323, "step": 20127 }, { "epoch": 0.9335807050092765, "grad_norm": 11.521437644958496, "learning_rate": 1.1065365167578223e-07, "loss": 0.4713, "step": 20128 }, { "epoch": 0.9336270871985157, "grad_norm": 7.150509357452393, "learning_rate": 1.1049973300110595e-07, "loss": 0.3801, "step": 20129 }, { "epoch": 0.9336734693877551, "grad_norm": 5.602062225341797, "learning_rate": 1.10345920254874e-07, "loss": 0.3169, "step": 20130 }, { "epoch": 0.9337198515769944, "grad_norm": 6.207177639007568, "learning_rate": 1.1019221344041931e-07, "loss": 0.2426, "step": 20131 }, { "epoch": 0.9337662337662338, "grad_norm": 4.770633697509766, "learning_rate": 1.1003861256107084e-07, "loss": 0.2591, "step": 20132 }, { "epoch": 0.933812615955473, "grad_norm": 5.2196807861328125, "learning_rate": 1.0988511762015596e-07, "loss": 0.2296, "step": 20133 }, { "epoch": 0.9338589981447124, "grad_norm": 6.747407913208008, "learning_rate": 1.0973172862100145e-07, "loss": 0.4011, "step": 20134 }, { "epoch": 0.9339053803339518, "grad_norm": 4.653003215789795, "learning_rate": 1.095784455669291e-07, "loss": 0.1817, "step": 20135 }, { "epoch": 0.9339517625231911, "grad_norm": 4.826736927032471, "learning_rate": 1.0942526846126122e-07, "loss": 0.2579, "step": 20136 }, { "epoch": 0.9339981447124305, "grad_norm": 6.071587085723877, "learning_rate": 1.0927219730731464e-07, "loss": 0.3336, "step": 20137 }, { "epoch": 0.9340445269016697, "grad_norm": 8.823086738586426, "learning_rate": 1.091192321084067e-07, "loss": 0.2996, "step": 20138 }, { "epoch": 0.9340909090909091, "grad_norm": 10.261247634887695, "learning_rate": 1.0896637286785083e-07, "loss": 0.3836, "step": 20139 }, { "epoch": 0.9341372912801484, "grad_norm": 8.334775924682617, "learning_rate": 1.088136195889583e-07, "loss": 0.3481, "step": 20140 }, { "epoch": 0.9341836734693878, "grad_norm": 8.123248100280762, "learning_rate": 1.0866097227503925e-07, "loss": 0.2946, "step": 20141 }, { "epoch": 0.934230055658627, "grad_norm": 5.709850311279297, "learning_rate": 1.0850843092940045e-07, "loss": 0.274, "step": 20142 }, { "epoch": 0.9342764378478664, "grad_norm": 7.478256702423096, "learning_rate": 1.0835599555534649e-07, "loss": 0.3212, "step": 20143 }, { "epoch": 0.9343228200371058, "grad_norm": 10.564740180969238, "learning_rate": 1.0820366615617972e-07, "loss": 0.3131, "step": 20144 }, { "epoch": 0.9343692022263451, "grad_norm": 5.586427211761475, "learning_rate": 1.080514427352003e-07, "loss": 0.2415, "step": 20145 }, { "epoch": 0.9344155844155844, "grad_norm": 8.386405944824219, "learning_rate": 1.0789932529570613e-07, "loss": 0.4034, "step": 20146 }, { "epoch": 0.9344619666048237, "grad_norm": 9.498014450073242, "learning_rate": 1.077473138409929e-07, "loss": 0.3221, "step": 20147 }, { "epoch": 0.9345083487940631, "grad_norm": 8.790934562683105, "learning_rate": 1.0759540837435356e-07, "loss": 0.3616, "step": 20148 }, { "epoch": 0.9345547309833024, "grad_norm": 5.62404203414917, "learning_rate": 1.0744360889907935e-07, "loss": 0.2629, "step": 20149 }, { "epoch": 0.9346011131725418, "grad_norm": 5.09730863571167, "learning_rate": 1.0729191541845874e-07, "loss": 0.2837, "step": 20150 }, { "epoch": 0.934647495361781, "grad_norm": 6.702115058898926, "learning_rate": 1.07140327935778e-07, "loss": 0.3283, "step": 20151 }, { "epoch": 0.9346938775510204, "grad_norm": 6.919703960418701, "learning_rate": 1.0698884645432117e-07, "loss": 0.2758, "step": 20152 }, { "epoch": 0.9347402597402598, "grad_norm": 11.137984275817871, "learning_rate": 1.0683747097737118e-07, "loss": 0.2909, "step": 20153 }, { "epoch": 0.9347866419294991, "grad_norm": 11.978301048278809, "learning_rate": 1.0668620150820542e-07, "loss": 0.456, "step": 20154 }, { "epoch": 0.9348330241187384, "grad_norm": 17.023557662963867, "learning_rate": 1.0653503805010234e-07, "loss": 0.2261, "step": 20155 }, { "epoch": 0.9348794063079777, "grad_norm": 5.028195381164551, "learning_rate": 1.0638398060633714e-07, "loss": 0.2743, "step": 20156 }, { "epoch": 0.9349257884972171, "grad_norm": 10.600918769836426, "learning_rate": 1.0623302918018108e-07, "loss": 0.3424, "step": 20157 }, { "epoch": 0.9349721706864564, "grad_norm": 8.623461723327637, "learning_rate": 1.0608218377490653e-07, "loss": 0.3358, "step": 20158 }, { "epoch": 0.9350185528756957, "grad_norm": 5.0086565017700195, "learning_rate": 1.0593144439377923e-07, "loss": 0.2041, "step": 20159 }, { "epoch": 0.935064935064935, "grad_norm": 18.840259552001953, "learning_rate": 1.0578081104006598e-07, "loss": 0.3191, "step": 20160 }, { "epoch": 0.9351113172541744, "grad_norm": 7.226073741912842, "learning_rate": 1.0563028371702977e-07, "loss": 0.2706, "step": 20161 }, { "epoch": 0.9351576994434138, "grad_norm": 6.057023048400879, "learning_rate": 1.054798624279324e-07, "loss": 0.3605, "step": 20162 }, { "epoch": 0.935204081632653, "grad_norm": 8.848443031311035, "learning_rate": 1.0532954717603183e-07, "loss": 0.2724, "step": 20163 }, { "epoch": 0.9352504638218924, "grad_norm": 6.51945686340332, "learning_rate": 1.0517933796458546e-07, "loss": 0.2839, "step": 20164 }, { "epoch": 0.9352968460111317, "grad_norm": 4.072207450866699, "learning_rate": 1.050292347968468e-07, "loss": 0.2389, "step": 20165 }, { "epoch": 0.9353432282003711, "grad_norm": 9.880395889282227, "learning_rate": 1.0487923767606767e-07, "loss": 0.4584, "step": 20166 }, { "epoch": 0.9353896103896104, "grad_norm": 6.216925621032715, "learning_rate": 1.0472934660549827e-07, "loss": 0.2895, "step": 20167 }, { "epoch": 0.9354359925788497, "grad_norm": 8.279175758361816, "learning_rate": 1.0457956158838545e-07, "loss": 0.3708, "step": 20168 }, { "epoch": 0.935482374768089, "grad_norm": 4.728773593902588, "learning_rate": 1.0442988262797493e-07, "loss": 0.1999, "step": 20169 }, { "epoch": 0.9355287569573284, "grad_norm": 6.772329330444336, "learning_rate": 1.0428030972750802e-07, "loss": 0.3918, "step": 20170 }, { "epoch": 0.9355751391465678, "grad_norm": 7.361265659332275, "learning_rate": 1.0413084289022602e-07, "loss": 0.3215, "step": 20171 }, { "epoch": 0.935621521335807, "grad_norm": 7.932096004486084, "learning_rate": 1.0398148211936743e-07, "loss": 0.3933, "step": 20172 }, { "epoch": 0.9356679035250464, "grad_norm": 6.932075500488281, "learning_rate": 1.0383222741816745e-07, "loss": 0.3413, "step": 20173 }, { "epoch": 0.9357142857142857, "grad_norm": 5.8089680671691895, "learning_rate": 1.0368307878985962e-07, "loss": 0.3506, "step": 20174 }, { "epoch": 0.9357606679035251, "grad_norm": 4.89240837097168, "learning_rate": 1.0353403623767577e-07, "loss": 0.3323, "step": 20175 }, { "epoch": 0.9358070500927643, "grad_norm": 6.746123313903809, "learning_rate": 1.0338509976484445e-07, "loss": 0.3857, "step": 20176 }, { "epoch": 0.9358534322820037, "grad_norm": 6.494750499725342, "learning_rate": 1.0323626937459196e-07, "loss": 0.3199, "step": 20177 }, { "epoch": 0.935899814471243, "grad_norm": 11.560693740844727, "learning_rate": 1.0308754507014296e-07, "loss": 0.3052, "step": 20178 }, { "epoch": 0.9359461966604824, "grad_norm": 13.792348861694336, "learning_rate": 1.0293892685471928e-07, "loss": 0.3993, "step": 20179 }, { "epoch": 0.9359925788497218, "grad_norm": 8.612082481384277, "learning_rate": 1.0279041473154117e-07, "loss": 0.2515, "step": 20180 }, { "epoch": 0.936038961038961, "grad_norm": 4.635600566864014, "learning_rate": 1.0264200870382546e-07, "loss": 0.2359, "step": 20181 }, { "epoch": 0.9360853432282004, "grad_norm": 7.810612678527832, "learning_rate": 1.0249370877478737e-07, "loss": 0.3139, "step": 20182 }, { "epoch": 0.9361317254174397, "grad_norm": 6.794919490814209, "learning_rate": 1.0234551494764044e-07, "loss": 0.3358, "step": 20183 }, { "epoch": 0.9361781076066791, "grad_norm": 11.483223915100098, "learning_rate": 1.0219742722559433e-07, "loss": 0.3789, "step": 20184 }, { "epoch": 0.9362244897959183, "grad_norm": 9.328453063964844, "learning_rate": 1.0204944561185759e-07, "loss": 0.2594, "step": 20185 }, { "epoch": 0.9362708719851577, "grad_norm": 8.565603256225586, "learning_rate": 1.0190157010963653e-07, "loss": 0.4234, "step": 20186 }, { "epoch": 0.936317254174397, "grad_norm": 9.345259666442871, "learning_rate": 1.0175380072213414e-07, "loss": 0.3559, "step": 20187 }, { "epoch": 0.9363636363636364, "grad_norm": 10.54835033416748, "learning_rate": 1.0160613745255232e-07, "loss": 0.3162, "step": 20188 }, { "epoch": 0.9364100185528756, "grad_norm": 5.870181083679199, "learning_rate": 1.0145858030408961e-07, "loss": 0.3928, "step": 20189 }, { "epoch": 0.936456400742115, "grad_norm": 7.742702484130859, "learning_rate": 1.0131112927994347e-07, "loss": 0.3368, "step": 20190 }, { "epoch": 0.9365027829313544, "grad_norm": 5.773582458496094, "learning_rate": 1.0116378438330798e-07, "loss": 0.3146, "step": 20191 }, { "epoch": 0.9365491651205937, "grad_norm": 9.808333396911621, "learning_rate": 1.0101654561737451e-07, "loss": 0.3191, "step": 20192 }, { "epoch": 0.9365955473098331, "grad_norm": 7.909583568572998, "learning_rate": 1.0086941298533437e-07, "loss": 0.3215, "step": 20193 }, { "epoch": 0.9366419294990723, "grad_norm": 6.1034135818481445, "learning_rate": 1.0072238649037391e-07, "loss": 0.2887, "step": 20194 }, { "epoch": 0.9366883116883117, "grad_norm": 8.355642318725586, "learning_rate": 1.0057546613567892e-07, "loss": 0.3143, "step": 20195 }, { "epoch": 0.936734693877551, "grad_norm": 9.57372760772705, "learning_rate": 1.0042865192443296e-07, "loss": 0.352, "step": 20196 }, { "epoch": 0.9367810760667904, "grad_norm": 9.323302268981934, "learning_rate": 1.0028194385981515e-07, "loss": 0.3458, "step": 20197 }, { "epoch": 0.9368274582560296, "grad_norm": 3.8125417232513428, "learning_rate": 1.0013534194500518e-07, "loss": 0.2309, "step": 20198 }, { "epoch": 0.936873840445269, "grad_norm": 3.6736276149749756, "learning_rate": 9.998884618317828e-08, "loss": 0.2629, "step": 20199 }, { "epoch": 0.9369202226345084, "grad_norm": 14.411700248718262, "learning_rate": 9.984245657750857e-08, "loss": 0.4815, "step": 20200 }, { "epoch": 0.9369666048237477, "grad_norm": 4.806183338165283, "learning_rate": 9.969617313116798e-08, "loss": 0.3143, "step": 20201 }, { "epoch": 0.937012987012987, "grad_norm": 4.40562629699707, "learning_rate": 9.95499958473245e-08, "loss": 0.2661, "step": 20202 }, { "epoch": 0.9370593692022263, "grad_norm": 8.850096702575684, "learning_rate": 9.940392472914561e-08, "loss": 0.4092, "step": 20203 }, { "epoch": 0.9371057513914657, "grad_norm": 14.049675941467285, "learning_rate": 9.925795977979658e-08, "loss": 0.5045, "step": 20204 }, { "epoch": 0.937152133580705, "grad_norm": 5.489485263824463, "learning_rate": 9.911210100243817e-08, "loss": 0.2445, "step": 20205 }, { "epoch": 0.9371985157699444, "grad_norm": 6.020633697509766, "learning_rate": 9.896634840023178e-08, "loss": 0.3087, "step": 20206 }, { "epoch": 0.9372448979591836, "grad_norm": 6.330551624298096, "learning_rate": 9.882070197633431e-08, "loss": 0.2861, "step": 20207 }, { "epoch": 0.937291280148423, "grad_norm": 4.777610778808594, "learning_rate": 9.867516173390102e-08, "loss": 0.2866, "step": 20208 }, { "epoch": 0.9373376623376624, "grad_norm": 17.940893173217773, "learning_rate": 9.852972767608548e-08, "loss": 0.4715, "step": 20209 }, { "epoch": 0.9373840445269017, "grad_norm": 6.267427921295166, "learning_rate": 9.838439980603742e-08, "loss": 0.2757, "step": 20210 }, { "epoch": 0.937430426716141, "grad_norm": 4.1953229904174805, "learning_rate": 9.823917812690709e-08, "loss": 0.2573, "step": 20211 }, { "epoch": 0.9374768089053803, "grad_norm": 3.6135287284851074, "learning_rate": 9.809406264183918e-08, "loss": 0.2234, "step": 20212 }, { "epoch": 0.9375231910946197, "grad_norm": 4.756036281585693, "learning_rate": 9.794905335397731e-08, "loss": 0.2816, "step": 20213 }, { "epoch": 0.937569573283859, "grad_norm": 5.409088611602783, "learning_rate": 9.780415026646395e-08, "loss": 0.3147, "step": 20214 }, { "epoch": 0.9376159554730983, "grad_norm": 9.89773178100586, "learning_rate": 9.765935338243826e-08, "loss": 0.3269, "step": 20215 }, { "epoch": 0.9376623376623376, "grad_norm": 10.345081329345703, "learning_rate": 9.751466270503718e-08, "loss": 0.3329, "step": 20216 }, { "epoch": 0.937708719851577, "grad_norm": 4.536894798278809, "learning_rate": 9.737007823739486e-08, "loss": 0.2368, "step": 20217 }, { "epoch": 0.9377551020408164, "grad_norm": 8.837752342224121, "learning_rate": 9.722559998264381e-08, "loss": 0.3187, "step": 20218 }, { "epoch": 0.9378014842300556, "grad_norm": 20.2659854888916, "learning_rate": 9.708122794391373e-08, "loss": 0.3621, "step": 20219 }, { "epoch": 0.937847866419295, "grad_norm": 7.09570837020874, "learning_rate": 9.693696212433379e-08, "loss": 0.3485, "step": 20220 }, { "epoch": 0.9378942486085343, "grad_norm": 10.082403182983398, "learning_rate": 9.679280252702816e-08, "loss": 0.3752, "step": 20221 }, { "epoch": 0.9379406307977737, "grad_norm": 5.2422895431518555, "learning_rate": 9.66487491551199e-08, "loss": 0.3454, "step": 20222 }, { "epoch": 0.937987012987013, "grad_norm": 5.266085147857666, "learning_rate": 9.65048020117304e-08, "loss": 0.321, "step": 20223 }, { "epoch": 0.9380333951762523, "grad_norm": 5.474512100219727, "learning_rate": 9.636096109997828e-08, "loss": 0.2924, "step": 20224 }, { "epoch": 0.9380797773654916, "grad_norm": 11.361608505249023, "learning_rate": 9.621722642297937e-08, "loss": 0.3116, "step": 20225 }, { "epoch": 0.938126159554731, "grad_norm": 11.339645385742188, "learning_rate": 9.607359798384785e-08, "loss": 0.433, "step": 20226 }, { "epoch": 0.9381725417439704, "grad_norm": 6.580536365509033, "learning_rate": 9.593007578569513e-08, "loss": 0.2896, "step": 20227 }, { "epoch": 0.9382189239332096, "grad_norm": 10.512784004211426, "learning_rate": 9.578665983163094e-08, "loss": 0.357, "step": 20228 }, { "epoch": 0.938265306122449, "grad_norm": 6.575801849365234, "learning_rate": 9.564335012476167e-08, "loss": 0.324, "step": 20229 }, { "epoch": 0.9383116883116883, "grad_norm": 8.268115043640137, "learning_rate": 9.550014666819262e-08, "loss": 0.4421, "step": 20230 }, { "epoch": 0.9383580705009277, "grad_norm": 6.5857391357421875, "learning_rate": 9.535704946502633e-08, "loss": 0.335, "step": 20231 }, { "epoch": 0.9384044526901669, "grad_norm": 5.264142990112305, "learning_rate": 9.521405851836252e-08, "loss": 0.2296, "step": 20232 }, { "epoch": 0.9384508348794063, "grad_norm": 6.131717205047607, "learning_rate": 9.507117383129927e-08, "loss": 0.3509, "step": 20233 }, { "epoch": 0.9384972170686456, "grad_norm": 4.5355916023254395, "learning_rate": 9.492839540693132e-08, "loss": 0.3319, "step": 20234 }, { "epoch": 0.938543599257885, "grad_norm": 11.35513687133789, "learning_rate": 9.478572324835289e-08, "loss": 0.4719, "step": 20235 }, { "epoch": 0.9385899814471244, "grad_norm": 5.503435134887695, "learning_rate": 9.464315735865482e-08, "loss": 0.2869, "step": 20236 }, { "epoch": 0.9386363636363636, "grad_norm": 9.517247200012207, "learning_rate": 9.450069774092519e-08, "loss": 0.3873, "step": 20237 }, { "epoch": 0.938682745825603, "grad_norm": 6.249367713928223, "learning_rate": 9.435834439825043e-08, "loss": 0.3445, "step": 20238 }, { "epoch": 0.9387291280148423, "grad_norm": 7.611532211303711, "learning_rate": 9.421609733371472e-08, "loss": 0.3467, "step": 20239 }, { "epoch": 0.9387755102040817, "grad_norm": 4.175688743591309, "learning_rate": 9.407395655040008e-08, "loss": 0.225, "step": 20240 }, { "epoch": 0.9388218923933209, "grad_norm": 25.78134536743164, "learning_rate": 9.393192205138513e-08, "loss": 0.3333, "step": 20241 }, { "epoch": 0.9388682745825603, "grad_norm": 5.661338806152344, "learning_rate": 9.378999383974796e-08, "loss": 0.2066, "step": 20242 }, { "epoch": 0.9389146567717996, "grad_norm": 6.143605709075928, "learning_rate": 9.364817191856224e-08, "loss": 0.3545, "step": 20243 }, { "epoch": 0.938961038961039, "grad_norm": 5.977725505828857, "learning_rate": 9.350645629090105e-08, "loss": 0.2707, "step": 20244 }, { "epoch": 0.9390074211502782, "grad_norm": 20.940750122070312, "learning_rate": 9.336484695983472e-08, "loss": 0.5044, "step": 20245 }, { "epoch": 0.9390538033395176, "grad_norm": 8.340067863464355, "learning_rate": 9.322334392843136e-08, "loss": 0.391, "step": 20246 }, { "epoch": 0.939100185528757, "grad_norm": 7.585697650909424, "learning_rate": 9.308194719975628e-08, "loss": 0.3142, "step": 20247 }, { "epoch": 0.9391465677179963, "grad_norm": 10.926695823669434, "learning_rate": 9.294065677687202e-08, "loss": 0.3748, "step": 20248 }, { "epoch": 0.9391929499072357, "grad_norm": 7.301551342010498, "learning_rate": 9.279947266284062e-08, "loss": 0.3201, "step": 20249 }, { "epoch": 0.9392393320964749, "grad_norm": 7.948456287384033, "learning_rate": 9.265839486072015e-08, "loss": 0.3036, "step": 20250 }, { "epoch": 0.9392857142857143, "grad_norm": 6.219052314758301, "learning_rate": 9.251742337356707e-08, "loss": 0.3657, "step": 20251 }, { "epoch": 0.9393320964749536, "grad_norm": 3.9015564918518066, "learning_rate": 9.237655820443615e-08, "loss": 0.2124, "step": 20252 }, { "epoch": 0.939378478664193, "grad_norm": 4.603834629058838, "learning_rate": 9.223579935637828e-08, "loss": 0.2676, "step": 20253 }, { "epoch": 0.9394248608534322, "grad_norm": 8.256400108337402, "learning_rate": 9.209514683244325e-08, "loss": 0.3516, "step": 20254 }, { "epoch": 0.9394712430426716, "grad_norm": 4.266859531402588, "learning_rate": 9.195460063567807e-08, "loss": 0.3017, "step": 20255 }, { "epoch": 0.939517625231911, "grad_norm": 13.093878746032715, "learning_rate": 9.181416076912753e-08, "loss": 0.4809, "step": 20256 }, { "epoch": 0.9395640074211503, "grad_norm": 10.692687034606934, "learning_rate": 9.167382723583529e-08, "loss": 0.4122, "step": 20257 }, { "epoch": 0.9396103896103896, "grad_norm": 7.082273006439209, "learning_rate": 9.153360003884004e-08, "loss": 0.2689, "step": 20258 }, { "epoch": 0.9396567717996289, "grad_norm": 6.274008750915527, "learning_rate": 9.139347918118046e-08, "loss": 0.3634, "step": 20259 }, { "epoch": 0.9397031539888683, "grad_norm": 7.855735778808594, "learning_rate": 9.125346466589191e-08, "loss": 0.2717, "step": 20260 }, { "epoch": 0.9397495361781076, "grad_norm": 11.204794883728027, "learning_rate": 9.111355649600806e-08, "loss": 0.4216, "step": 20261 }, { "epoch": 0.939795918367347, "grad_norm": 12.271738052368164, "learning_rate": 9.097375467456038e-08, "loss": 0.3846, "step": 20262 }, { "epoch": 0.9398423005565862, "grad_norm": 7.4952802658081055, "learning_rate": 9.083405920457699e-08, "loss": 0.3187, "step": 20263 }, { "epoch": 0.9398886827458256, "grad_norm": 4.017479419708252, "learning_rate": 9.069447008908383e-08, "loss": 0.3037, "step": 20264 }, { "epoch": 0.939935064935065, "grad_norm": 6.069622039794922, "learning_rate": 9.055498733110568e-08, "loss": 0.2511, "step": 20265 }, { "epoch": 0.9399814471243043, "grad_norm": 8.401515007019043, "learning_rate": 9.041561093366457e-08, "loss": 0.3577, "step": 20266 }, { "epoch": 0.9400278293135436, "grad_norm": 8.16032600402832, "learning_rate": 9.027634089977921e-08, "loss": 0.3355, "step": 20267 }, { "epoch": 0.9400742115027829, "grad_norm": 11.350841522216797, "learning_rate": 9.013717723246829e-08, "loss": 0.4504, "step": 20268 }, { "epoch": 0.9401205936920223, "grad_norm": 6.154332160949707, "learning_rate": 8.999811993474495e-08, "loss": 0.2486, "step": 20269 }, { "epoch": 0.9401669758812616, "grad_norm": 8.881415367126465, "learning_rate": 8.985916900962288e-08, "loss": 0.3092, "step": 20270 }, { "epoch": 0.9402133580705009, "grad_norm": 4.230792045593262, "learning_rate": 8.972032446011192e-08, "loss": 0.2696, "step": 20271 }, { "epoch": 0.9402597402597402, "grad_norm": 7.008635997772217, "learning_rate": 8.95815862892202e-08, "loss": 0.3412, "step": 20272 }, { "epoch": 0.9403061224489796, "grad_norm": 5.341824531555176, "learning_rate": 8.944295449995366e-08, "loss": 0.3066, "step": 20273 }, { "epoch": 0.940352504638219, "grad_norm": 6.4516119956970215, "learning_rate": 8.930442909531545e-08, "loss": 0.297, "step": 20274 }, { "epoch": 0.9403988868274582, "grad_norm": 14.317651748657227, "learning_rate": 8.916601007830649e-08, "loss": 0.4641, "step": 20275 }, { "epoch": 0.9404452690166976, "grad_norm": 7.418057918548584, "learning_rate": 8.902769745192552e-08, "loss": 0.369, "step": 20276 }, { "epoch": 0.9404916512059369, "grad_norm": 5.270272254943848, "learning_rate": 8.888949121916957e-08, "loss": 0.328, "step": 20277 }, { "epoch": 0.9405380333951763, "grad_norm": 5.156106472015381, "learning_rate": 8.875139138303235e-08, "loss": 0.2783, "step": 20278 }, { "epoch": 0.9405844155844156, "grad_norm": 12.880864143371582, "learning_rate": 8.861339794650591e-08, "loss": 0.4452, "step": 20279 }, { "epoch": 0.9406307977736549, "grad_norm": 9.303485870361328, "learning_rate": 8.847551091257956e-08, "loss": 0.4079, "step": 20280 }, { "epoch": 0.9406771799628942, "grad_norm": 4.23931884765625, "learning_rate": 8.833773028424086e-08, "loss": 0.2389, "step": 20281 }, { "epoch": 0.9407235621521336, "grad_norm": 4.689660549163818, "learning_rate": 8.820005606447468e-08, "loss": 0.2548, "step": 20282 }, { "epoch": 0.940769944341373, "grad_norm": 4.998912334442139, "learning_rate": 8.806248825626306e-08, "loss": 0.222, "step": 20283 }, { "epoch": 0.9408163265306122, "grad_norm": 4.862728118896484, "learning_rate": 8.792502686258752e-08, "loss": 0.3165, "step": 20284 }, { "epoch": 0.9408627087198516, "grad_norm": 8.094212532043457, "learning_rate": 8.778767188642512e-08, "loss": 0.3434, "step": 20285 }, { "epoch": 0.9409090909090909, "grad_norm": 9.518081665039062, "learning_rate": 8.76504233307518e-08, "loss": 0.3188, "step": 20286 }, { "epoch": 0.9409554730983303, "grad_norm": 7.574878692626953, "learning_rate": 8.75132811985413e-08, "loss": 0.2916, "step": 20287 }, { "epoch": 0.9410018552875695, "grad_norm": 9.943702697753906, "learning_rate": 8.737624549276403e-08, "loss": 0.4589, "step": 20288 }, { "epoch": 0.9410482374768089, "grad_norm": 8.204896926879883, "learning_rate": 8.723931621638981e-08, "loss": 0.3531, "step": 20289 }, { "epoch": 0.9410946196660482, "grad_norm": 7.775932788848877, "learning_rate": 8.710249337238464e-08, "loss": 0.3301, "step": 20290 }, { "epoch": 0.9411410018552876, "grad_norm": 5.979891300201416, "learning_rate": 8.696577696371222e-08, "loss": 0.2335, "step": 20291 }, { "epoch": 0.941187384044527, "grad_norm": 5.32629919052124, "learning_rate": 8.682916699333521e-08, "loss": 0.335, "step": 20292 }, { "epoch": 0.9412337662337662, "grad_norm": 5.966263294219971, "learning_rate": 8.669266346421235e-08, "loss": 0.2669, "step": 20293 }, { "epoch": 0.9412801484230056, "grad_norm": 5.20102596282959, "learning_rate": 8.655626637930181e-08, "loss": 0.2009, "step": 20294 }, { "epoch": 0.9413265306122449, "grad_norm": 4.984292507171631, "learning_rate": 8.641997574155848e-08, "loss": 0.2301, "step": 20295 }, { "epoch": 0.9413729128014843, "grad_norm": 3.9375874996185303, "learning_rate": 8.62837915539344e-08, "loss": 0.3366, "step": 20296 }, { "epoch": 0.9414192949907235, "grad_norm": 4.0152435302734375, "learning_rate": 8.614771381938002e-08, "loss": 0.2319, "step": 20297 }, { "epoch": 0.9414656771799629, "grad_norm": 7.537150859832764, "learning_rate": 8.60117425408441e-08, "loss": 0.3279, "step": 20298 }, { "epoch": 0.9415120593692022, "grad_norm": 7.956435680389404, "learning_rate": 8.587587772127149e-08, "loss": 0.4099, "step": 20299 }, { "epoch": 0.9415584415584416, "grad_norm": 7.999159812927246, "learning_rate": 8.574011936360704e-08, "loss": 0.2747, "step": 20300 }, { "epoch": 0.9416048237476808, "grad_norm": 7.571996212005615, "learning_rate": 8.56044674707901e-08, "loss": 0.3638, "step": 20301 }, { "epoch": 0.9416512059369202, "grad_norm": 11.23960018157959, "learning_rate": 8.546892204575997e-08, "loss": 0.3767, "step": 20302 }, { "epoch": 0.9416975881261596, "grad_norm": 4.928085803985596, "learning_rate": 8.533348309145373e-08, "loss": 0.209, "step": 20303 }, { "epoch": 0.9417439703153989, "grad_norm": 11.289948463439941, "learning_rate": 8.519815061080572e-08, "loss": 0.2293, "step": 20304 }, { "epoch": 0.9417903525046383, "grad_norm": 10.897461891174316, "learning_rate": 8.506292460674748e-08, "loss": 0.3751, "step": 20305 }, { "epoch": 0.9418367346938775, "grad_norm": 6.451135158538818, "learning_rate": 8.492780508220833e-08, "loss": 0.3324, "step": 20306 }, { "epoch": 0.9418831168831169, "grad_norm": 10.394329071044922, "learning_rate": 8.479279204011593e-08, "loss": 0.2995, "step": 20307 }, { "epoch": 0.9419294990723562, "grad_norm": 13.259757041931152, "learning_rate": 8.465788548339515e-08, "loss": 0.3612, "step": 20308 }, { "epoch": 0.9419758812615956, "grad_norm": 3.9729580879211426, "learning_rate": 8.452308541496868e-08, "loss": 0.2994, "step": 20309 }, { "epoch": 0.9420222634508348, "grad_norm": 4.754558086395264, "learning_rate": 8.438839183775693e-08, "loss": 0.2733, "step": 20310 }, { "epoch": 0.9420686456400742, "grad_norm": 4.905253887176514, "learning_rate": 8.425380475467871e-08, "loss": 0.1818, "step": 20311 }, { "epoch": 0.9421150278293136, "grad_norm": 7.277632713317871, "learning_rate": 8.411932416864832e-08, "loss": 0.2469, "step": 20312 }, { "epoch": 0.9421614100185529, "grad_norm": 14.682181358337402, "learning_rate": 8.398495008257957e-08, "loss": 0.4634, "step": 20313 }, { "epoch": 0.9422077922077922, "grad_norm": 6.489434242248535, "learning_rate": 8.385068249938455e-08, "loss": 0.2018, "step": 20314 }, { "epoch": 0.9422541743970315, "grad_norm": 4.974045753479004, "learning_rate": 8.371652142197095e-08, "loss": 0.2817, "step": 20315 }, { "epoch": 0.9423005565862709, "grad_norm": 3.874480962753296, "learning_rate": 8.358246685324645e-08, "loss": 0.3019, "step": 20316 }, { "epoch": 0.9423469387755102, "grad_norm": 5.065485954284668, "learning_rate": 8.344851879611481e-08, "loss": 0.3198, "step": 20317 }, { "epoch": 0.9423933209647495, "grad_norm": 4.049257755279541, "learning_rate": 8.331467725347708e-08, "loss": 0.1663, "step": 20318 }, { "epoch": 0.9424397031539888, "grad_norm": 13.669536590576172, "learning_rate": 8.31809422282337e-08, "loss": 0.4887, "step": 20319 }, { "epoch": 0.9424860853432282, "grad_norm": 6.933775901794434, "learning_rate": 8.304731372328235e-08, "loss": 0.2532, "step": 20320 }, { "epoch": 0.9425324675324676, "grad_norm": 5.634008884429932, "learning_rate": 8.291379174151682e-08, "loss": 0.2633, "step": 20321 }, { "epoch": 0.9425788497217069, "grad_norm": 6.973871231079102, "learning_rate": 8.278037628583147e-08, "loss": 0.332, "step": 20322 }, { "epoch": 0.9426252319109462, "grad_norm": 11.750751495361328, "learning_rate": 8.264706735911509e-08, "loss": 0.3107, "step": 20323 }, { "epoch": 0.9426716141001855, "grad_norm": 12.166521072387695, "learning_rate": 8.251386496425596e-08, "loss": 0.3626, "step": 20324 }, { "epoch": 0.9427179962894249, "grad_norm": 7.233209133148193, "learning_rate": 8.238076910414061e-08, "loss": 0.3611, "step": 20325 }, { "epoch": 0.9427643784786642, "grad_norm": 5.3203582763671875, "learning_rate": 8.224777978165233e-08, "loss": 0.2779, "step": 20326 }, { "epoch": 0.9428107606679035, "grad_norm": 17.600217819213867, "learning_rate": 8.211489699967212e-08, "loss": 0.3652, "step": 20327 }, { "epoch": 0.9428571428571428, "grad_norm": 7.407966613769531, "learning_rate": 8.198212076107881e-08, "loss": 0.3217, "step": 20328 }, { "epoch": 0.9429035250463822, "grad_norm": 7.011741638183594, "learning_rate": 8.184945106874843e-08, "loss": 0.3273, "step": 20329 }, { "epoch": 0.9429499072356216, "grad_norm": 7.625283718109131, "learning_rate": 8.171688792555588e-08, "loss": 0.3339, "step": 20330 }, { "epoch": 0.9429962894248608, "grad_norm": 5.78278923034668, "learning_rate": 8.158443133437277e-08, "loss": 0.2926, "step": 20331 }, { "epoch": 0.9430426716141002, "grad_norm": 11.506538391113281, "learning_rate": 8.145208129806847e-08, "loss": 0.3695, "step": 20332 }, { "epoch": 0.9430890538033395, "grad_norm": 8.712815284729004, "learning_rate": 8.131983781951125e-08, "loss": 0.479, "step": 20333 }, { "epoch": 0.9431354359925789, "grad_norm": 4.60823392868042, "learning_rate": 8.118770090156491e-08, "loss": 0.248, "step": 20334 }, { "epoch": 0.9431818181818182, "grad_norm": 7.115918159484863, "learning_rate": 8.105567054709273e-08, "loss": 0.3459, "step": 20335 }, { "epoch": 0.9432282003710575, "grad_norm": 4.896000862121582, "learning_rate": 8.092374675895465e-08, "loss": 0.2762, "step": 20336 }, { "epoch": 0.9432745825602968, "grad_norm": 7.836654186248779, "learning_rate": 8.079192954000948e-08, "loss": 0.2756, "step": 20337 }, { "epoch": 0.9433209647495362, "grad_norm": 8.094389915466309, "learning_rate": 8.066021889311271e-08, "loss": 0.2609, "step": 20338 }, { "epoch": 0.9433673469387756, "grad_norm": 5.321132659912109, "learning_rate": 8.052861482111763e-08, "loss": 0.3168, "step": 20339 }, { "epoch": 0.9434137291280148, "grad_norm": 8.4798002243042, "learning_rate": 8.039711732687472e-08, "loss": 0.4151, "step": 20340 }, { "epoch": 0.9434601113172542, "grad_norm": 7.2617268562316895, "learning_rate": 8.026572641323394e-08, "loss": 0.2298, "step": 20341 }, { "epoch": 0.9435064935064935, "grad_norm": 17.781721115112305, "learning_rate": 8.013444208304133e-08, "loss": 0.4191, "step": 20342 }, { "epoch": 0.9435528756957329, "grad_norm": 4.96097469329834, "learning_rate": 8.000326433914074e-08, "loss": 0.2011, "step": 20343 }, { "epoch": 0.9435992578849721, "grad_norm": 5.582667827606201, "learning_rate": 7.987219318437489e-08, "loss": 0.3293, "step": 20344 }, { "epoch": 0.9436456400742115, "grad_norm": 6.624768257141113, "learning_rate": 7.974122862158263e-08, "loss": 0.2555, "step": 20345 }, { "epoch": 0.9436920222634508, "grad_norm": 10.858596801757812, "learning_rate": 7.96103706536011e-08, "loss": 0.4985, "step": 20346 }, { "epoch": 0.9437384044526902, "grad_norm": 9.97893238067627, "learning_rate": 7.947961928326586e-08, "loss": 0.3707, "step": 20347 }, { "epoch": 0.9437847866419296, "grad_norm": 17.563430786132812, "learning_rate": 7.934897451340962e-08, "loss": 0.5968, "step": 20348 }, { "epoch": 0.9438311688311688, "grad_norm": 13.284923553466797, "learning_rate": 7.921843634686233e-08, "loss": 0.3219, "step": 20349 }, { "epoch": 0.9438775510204082, "grad_norm": 6.288384437561035, "learning_rate": 7.908800478645229e-08, "loss": 0.2418, "step": 20350 }, { "epoch": 0.9439239332096475, "grad_norm": 9.493271827697754, "learning_rate": 7.895767983500502e-08, "loss": 0.3896, "step": 20351 }, { "epoch": 0.9439703153988869, "grad_norm": 7.500350475311279, "learning_rate": 7.882746149534382e-08, "loss": 0.3653, "step": 20352 }, { "epoch": 0.9440166975881261, "grad_norm": 5.866880893707275, "learning_rate": 7.869734977029031e-08, "loss": 0.3079, "step": 20353 }, { "epoch": 0.9440630797773655, "grad_norm": 11.593951225280762, "learning_rate": 7.85673446626628e-08, "loss": 0.3075, "step": 20354 }, { "epoch": 0.9441094619666048, "grad_norm": 14.494548797607422, "learning_rate": 7.843744617527849e-08, "loss": 0.3526, "step": 20355 }, { "epoch": 0.9441558441558442, "grad_norm": 9.204059600830078, "learning_rate": 7.830765431095067e-08, "loss": 0.3256, "step": 20356 }, { "epoch": 0.9442022263450834, "grad_norm": 4.583655834197998, "learning_rate": 7.817796907249154e-08, "loss": 0.2597, "step": 20357 }, { "epoch": 0.9442486085343228, "grad_norm": 6.895351409912109, "learning_rate": 7.804839046271051e-08, "loss": 0.397, "step": 20358 }, { "epoch": 0.9442949907235622, "grad_norm": 7.69008731842041, "learning_rate": 7.791891848441535e-08, "loss": 0.2876, "step": 20359 }, { "epoch": 0.9443413729128015, "grad_norm": 6.319377899169922, "learning_rate": 7.778955314041103e-08, "loss": 0.2935, "step": 20360 }, { "epoch": 0.9443877551020409, "grad_norm": 8.31190013885498, "learning_rate": 7.766029443349977e-08, "loss": 0.3496, "step": 20361 }, { "epoch": 0.9444341372912801, "grad_norm": 8.188350677490234, "learning_rate": 7.753114236648152e-08, "loss": 0.3376, "step": 20362 }, { "epoch": 0.9444805194805195, "grad_norm": 13.415631294250488, "learning_rate": 7.740209694215461e-08, "loss": 0.3043, "step": 20363 }, { "epoch": 0.9445269016697588, "grad_norm": 9.268301010131836, "learning_rate": 7.727315816331515e-08, "loss": 0.3437, "step": 20364 }, { "epoch": 0.9445732838589982, "grad_norm": 12.562943458557129, "learning_rate": 7.714432603275646e-08, "loss": 0.4817, "step": 20365 }, { "epoch": 0.9446196660482374, "grad_norm": 10.25926399230957, "learning_rate": 7.701560055326907e-08, "loss": 0.2855, "step": 20366 }, { "epoch": 0.9446660482374768, "grad_norm": 8.021368980407715, "learning_rate": 7.688698172764241e-08, "loss": 0.3705, "step": 20367 }, { "epoch": 0.9447124304267162, "grad_norm": 7.1783647537231445, "learning_rate": 7.675846955866206e-08, "loss": 0.3857, "step": 20368 }, { "epoch": 0.9447588126159555, "grad_norm": 6.282865047454834, "learning_rate": 7.663006404911355e-08, "loss": 0.3335, "step": 20369 }, { "epoch": 0.9448051948051948, "grad_norm": 5.873948097229004, "learning_rate": 7.650176520177744e-08, "loss": 0.2821, "step": 20370 }, { "epoch": 0.9448515769944341, "grad_norm": 7.971660614013672, "learning_rate": 7.637357301943371e-08, "loss": 0.33, "step": 20371 }, { "epoch": 0.9448979591836735, "grad_norm": 4.5457282066345215, "learning_rate": 7.62454875048596e-08, "loss": 0.264, "step": 20372 }, { "epoch": 0.9449443413729128, "grad_norm": 10.579830169677734, "learning_rate": 7.611750866083011e-08, "loss": 0.3721, "step": 20373 }, { "epoch": 0.9449907235621521, "grad_norm": 7.8055033683776855, "learning_rate": 7.598963649011748e-08, "loss": 0.2221, "step": 20374 }, { "epoch": 0.9450371057513914, "grad_norm": 6.043446063995361, "learning_rate": 7.586187099549225e-08, "loss": 0.2337, "step": 20375 }, { "epoch": 0.9450834879406308, "grad_norm": 7.09641170501709, "learning_rate": 7.573421217972222e-08, "loss": 0.338, "step": 20376 }, { "epoch": 0.9451298701298702, "grad_norm": 7.256657600402832, "learning_rate": 7.560666004557405e-08, "loss": 0.2694, "step": 20377 }, { "epoch": 0.9451762523191095, "grad_norm": 9.896405220031738, "learning_rate": 7.547921459580943e-08, "loss": 0.3269, "step": 20378 }, { "epoch": 0.9452226345083488, "grad_norm": 7.449126720428467, "learning_rate": 7.535187583319004e-08, "loss": 0.3522, "step": 20379 }, { "epoch": 0.9452690166975881, "grad_norm": 15.06058406829834, "learning_rate": 7.522464376047534e-08, "loss": 0.5058, "step": 20380 }, { "epoch": 0.9453153988868275, "grad_norm": 10.708404541015625, "learning_rate": 7.50975183804209e-08, "loss": 0.343, "step": 20381 }, { "epoch": 0.9453617810760668, "grad_norm": 5.738960266113281, "learning_rate": 7.497049969578174e-08, "loss": 0.2928, "step": 20382 }, { "epoch": 0.9454081632653061, "grad_norm": 5.853601455688477, "learning_rate": 7.484358770930844e-08, "loss": 0.3366, "step": 20383 }, { "epoch": 0.9454545454545454, "grad_norm": 12.809830665588379, "learning_rate": 7.471678242375102e-08, "loss": 0.3428, "step": 20384 }, { "epoch": 0.9455009276437848, "grad_norm": 14.78485107421875, "learning_rate": 7.459008384185673e-08, "loss": 0.2226, "step": 20385 }, { "epoch": 0.9455473098330242, "grad_norm": 7.405376434326172, "learning_rate": 7.446349196637003e-08, "loss": 0.3078, "step": 20386 }, { "epoch": 0.9455936920222634, "grad_norm": 10.306629180908203, "learning_rate": 7.43370068000343e-08, "loss": 0.3819, "step": 20387 }, { "epoch": 0.9456400742115028, "grad_norm": 5.362119197845459, "learning_rate": 7.421062834558956e-08, "loss": 0.2682, "step": 20388 }, { "epoch": 0.9456864564007421, "grad_norm": 14.003488540649414, "learning_rate": 7.408435660577307e-08, "loss": 0.3963, "step": 20389 }, { "epoch": 0.9457328385899815, "grad_norm": 7.917856216430664, "learning_rate": 7.395819158332096e-08, "loss": 0.325, "step": 20390 }, { "epoch": 0.9457792207792208, "grad_norm": 15.905242919921875, "learning_rate": 7.383213328096661e-08, "loss": 0.5238, "step": 20391 }, { "epoch": 0.9458256029684601, "grad_norm": 12.940031051635742, "learning_rate": 7.370618170144062e-08, "loss": 0.3103, "step": 20392 }, { "epoch": 0.9458719851576994, "grad_norm": 5.299489974975586, "learning_rate": 7.35803368474719e-08, "loss": 0.3189, "step": 20393 }, { "epoch": 0.9459183673469388, "grad_norm": 7.581595420837402, "learning_rate": 7.345459872178662e-08, "loss": 0.2723, "step": 20394 }, { "epoch": 0.9459647495361782, "grad_norm": 12.450214385986328, "learning_rate": 7.332896732710926e-08, "loss": 0.3714, "step": 20395 }, { "epoch": 0.9460111317254174, "grad_norm": 4.962599277496338, "learning_rate": 7.320344266616097e-08, "loss": 0.3386, "step": 20396 }, { "epoch": 0.9460575139146568, "grad_norm": 5.698326110839844, "learning_rate": 7.307802474166126e-08, "loss": 0.3211, "step": 20397 }, { "epoch": 0.9461038961038961, "grad_norm": 7.9569244384765625, "learning_rate": 7.295271355632794e-08, "loss": 0.3606, "step": 20398 }, { "epoch": 0.9461502782931355, "grad_norm": 5.20548152923584, "learning_rate": 7.28275091128755e-08, "loss": 0.3726, "step": 20399 }, { "epoch": 0.9461966604823747, "grad_norm": 14.770354270935059, "learning_rate": 7.270241141401568e-08, "loss": 0.4111, "step": 20400 }, { "epoch": 0.9462430426716141, "grad_norm": 7.990784645080566, "learning_rate": 7.257742046245964e-08, "loss": 0.4093, "step": 20401 }, { "epoch": 0.9462894248608534, "grad_norm": 12.825125694274902, "learning_rate": 7.245253626091465e-08, "loss": 0.385, "step": 20402 }, { "epoch": 0.9463358070500928, "grad_norm": 6.568366527557373, "learning_rate": 7.23277588120863e-08, "loss": 0.2536, "step": 20403 }, { "epoch": 0.9463821892393321, "grad_norm": 10.233271598815918, "learning_rate": 7.22030881186786e-08, "loss": 0.4121, "step": 20404 }, { "epoch": 0.9464285714285714, "grad_norm": 13.622297286987305, "learning_rate": 7.207852418339157e-08, "loss": 0.3728, "step": 20405 }, { "epoch": 0.9464749536178108, "grad_norm": 6.37863826751709, "learning_rate": 7.19540670089236e-08, "loss": 0.268, "step": 20406 }, { "epoch": 0.9465213358070501, "grad_norm": 4.941431999206543, "learning_rate": 7.1829716597972e-08, "loss": 0.3373, "step": 20407 }, { "epoch": 0.9465677179962895, "grad_norm": 10.199057579040527, "learning_rate": 7.170547295323016e-08, "loss": 0.3446, "step": 20408 }, { "epoch": 0.9466141001855287, "grad_norm": 5.512716293334961, "learning_rate": 7.158133607738981e-08, "loss": 0.3383, "step": 20409 }, { "epoch": 0.9466604823747681, "grad_norm": 9.645108222961426, "learning_rate": 7.14573059731405e-08, "loss": 0.3985, "step": 20410 }, { "epoch": 0.9467068645640074, "grad_norm": 10.921110153198242, "learning_rate": 7.133338264316892e-08, "loss": 0.4124, "step": 20411 }, { "epoch": 0.9467532467532468, "grad_norm": 4.817232131958008, "learning_rate": 7.12095660901596e-08, "loss": 0.2612, "step": 20412 }, { "epoch": 0.946799628942486, "grad_norm": 7.116189002990723, "learning_rate": 7.108585631679599e-08, "loss": 0.35, "step": 20413 }, { "epoch": 0.9468460111317254, "grad_norm": 5.883998870849609, "learning_rate": 7.096225332575757e-08, "loss": 0.3599, "step": 20414 }, { "epoch": 0.9468923933209648, "grad_norm": 6.227304458618164, "learning_rate": 7.083875711972221e-08, "loss": 0.3476, "step": 20415 }, { "epoch": 0.9469387755102041, "grad_norm": 8.549393653869629, "learning_rate": 7.0715367701365e-08, "loss": 0.3363, "step": 20416 }, { "epoch": 0.9469851576994434, "grad_norm": 7.368764400482178, "learning_rate": 7.059208507335934e-08, "loss": 0.2816, "step": 20417 }, { "epoch": 0.9470315398886827, "grad_norm": 6.238987922668457, "learning_rate": 7.046890923837645e-08, "loss": 0.1854, "step": 20418 }, { "epoch": 0.9470779220779221, "grad_norm": 5.525425910949707, "learning_rate": 7.034584019908419e-08, "loss": 0.2902, "step": 20419 }, { "epoch": 0.9471243042671614, "grad_norm": 9.226676940917969, "learning_rate": 7.022287795814931e-08, "loss": 0.3028, "step": 20420 }, { "epoch": 0.9471706864564008, "grad_norm": 7.396891117095947, "learning_rate": 7.010002251823633e-08, "loss": 0.3951, "step": 20421 }, { "epoch": 0.94721706864564, "grad_norm": 6.460177898406982, "learning_rate": 6.997727388200537e-08, "loss": 0.3524, "step": 20422 }, { "epoch": 0.9472634508348794, "grad_norm": 4.2610368728637695, "learning_rate": 6.985463205211651e-08, "loss": 0.2982, "step": 20423 }, { "epoch": 0.9473098330241188, "grad_norm": 6.519312381744385, "learning_rate": 6.973209703122652e-08, "loss": 0.3676, "step": 20424 }, { "epoch": 0.9473562152133581, "grad_norm": 3.856491804122925, "learning_rate": 6.96096688219905e-08, "loss": 0.2374, "step": 20425 }, { "epoch": 0.9474025974025974, "grad_norm": 4.930445671081543, "learning_rate": 6.948734742706076e-08, "loss": 0.3402, "step": 20426 }, { "epoch": 0.9474489795918367, "grad_norm": 14.70846939086914, "learning_rate": 6.93651328490863e-08, "loss": 0.3963, "step": 20427 }, { "epoch": 0.9474953617810761, "grad_norm": 5.740431785583496, "learning_rate": 6.924302509071612e-08, "loss": 0.2535, "step": 20428 }, { "epoch": 0.9475417439703154, "grad_norm": 6.953353404998779, "learning_rate": 6.912102415459476e-08, "loss": 0.3654, "step": 20429 }, { "epoch": 0.9475881261595547, "grad_norm": 6.54674768447876, "learning_rate": 6.899913004336622e-08, "loss": 0.3394, "step": 20430 }, { "epoch": 0.947634508348794, "grad_norm": 8.18703842163086, "learning_rate": 6.887734275967006e-08, "loss": 0.3349, "step": 20431 }, { "epoch": 0.9476808905380334, "grad_norm": 5.75509786605835, "learning_rate": 6.875566230614583e-08, "loss": 0.2703, "step": 20432 }, { "epoch": 0.9477272727272728, "grad_norm": 8.295248985290527, "learning_rate": 6.86340886854292e-08, "loss": 0.3512, "step": 20433 }, { "epoch": 0.9477736549165121, "grad_norm": 5.964740753173828, "learning_rate": 6.851262190015361e-08, "loss": 0.281, "step": 20434 }, { "epoch": 0.9478200371057514, "grad_norm": 7.813772678375244, "learning_rate": 6.839126195295143e-08, "loss": 0.3211, "step": 20435 }, { "epoch": 0.9478664192949907, "grad_norm": 8.399338722229004, "learning_rate": 6.827000884645108e-08, "loss": 0.2259, "step": 20436 }, { "epoch": 0.9479128014842301, "grad_norm": 12.146153450012207, "learning_rate": 6.814886258328047e-08, "loss": 0.29, "step": 20437 }, { "epoch": 0.9479591836734694, "grad_norm": 9.833930969238281, "learning_rate": 6.802782316606249e-08, "loss": 0.4586, "step": 20438 }, { "epoch": 0.9480055658627087, "grad_norm": 9.871030807495117, "learning_rate": 6.790689059742118e-08, "loss": 0.3311, "step": 20439 }, { "epoch": 0.948051948051948, "grad_norm": 13.295083045959473, "learning_rate": 6.778606487997496e-08, "loss": 0.3565, "step": 20440 }, { "epoch": 0.9480983302411874, "grad_norm": 9.406783103942871, "learning_rate": 6.766534601634234e-08, "loss": 0.3557, "step": 20441 }, { "epoch": 0.9481447124304268, "grad_norm": 3.920534133911133, "learning_rate": 6.754473400913897e-08, "loss": 0.2422, "step": 20442 }, { "epoch": 0.948191094619666, "grad_norm": 6.322653770446777, "learning_rate": 6.742422886097722e-08, "loss": 0.3077, "step": 20443 }, { "epoch": 0.9482374768089054, "grad_norm": 6.876750469207764, "learning_rate": 6.730383057446776e-08, "loss": 0.3844, "step": 20444 }, { "epoch": 0.9482838589981447, "grad_norm": 6.148815631866455, "learning_rate": 6.718353915221854e-08, "loss": 0.2417, "step": 20445 }, { "epoch": 0.9483302411873841, "grad_norm": 11.67648983001709, "learning_rate": 6.706335459683688e-08, "loss": 0.381, "step": 20446 }, { "epoch": 0.9483766233766234, "grad_norm": 6.948221206665039, "learning_rate": 6.694327691092517e-08, "loss": 0.2443, "step": 20447 }, { "epoch": 0.9484230055658627, "grad_norm": 5.721142768859863, "learning_rate": 6.682330609708632e-08, "loss": 0.3194, "step": 20448 }, { "epoch": 0.948469387755102, "grad_norm": 13.54671859741211, "learning_rate": 6.670344215791769e-08, "loss": 0.3182, "step": 20449 }, { "epoch": 0.9485157699443414, "grad_norm": 20.161100387573242, "learning_rate": 6.658368509601775e-08, "loss": 0.6027, "step": 20450 }, { "epoch": 0.9485621521335807, "grad_norm": 6.503566741943359, "learning_rate": 6.646403491397946e-08, "loss": 0.3796, "step": 20451 }, { "epoch": 0.94860853432282, "grad_norm": 5.9488749504089355, "learning_rate": 6.634449161439571e-08, "loss": 0.2528, "step": 20452 }, { "epoch": 0.9486549165120594, "grad_norm": 11.21744155883789, "learning_rate": 6.622505519985723e-08, "loss": 0.3773, "step": 20453 }, { "epoch": 0.9487012987012987, "grad_norm": 7.753706932067871, "learning_rate": 6.610572567294971e-08, "loss": 0.3278, "step": 20454 }, { "epoch": 0.9487476808905381, "grad_norm": 6.119747638702393, "learning_rate": 6.598650303625942e-08, "loss": 0.2913, "step": 20455 }, { "epoch": 0.9487940630797773, "grad_norm": 5.951274394989014, "learning_rate": 6.58673872923693e-08, "loss": 0.3235, "step": 20456 }, { "epoch": 0.9488404452690167, "grad_norm": 16.30474090576172, "learning_rate": 6.57483784438595e-08, "loss": 0.5508, "step": 20457 }, { "epoch": 0.948886827458256, "grad_norm": 5.578558444976807, "learning_rate": 6.562947649330853e-08, "loss": 0.3022, "step": 20458 }, { "epoch": 0.9489332096474954, "grad_norm": 6.456998348236084, "learning_rate": 6.55106814432932e-08, "loss": 0.3634, "step": 20459 }, { "epoch": 0.9489795918367347, "grad_norm": 6.588503360748291, "learning_rate": 6.539199329638535e-08, "loss": 0.2905, "step": 20460 }, { "epoch": 0.949025974025974, "grad_norm": 7.420226097106934, "learning_rate": 6.527341205515736e-08, "loss": 0.3515, "step": 20461 }, { "epoch": 0.9490723562152134, "grad_norm": 9.779741287231445, "learning_rate": 6.515493772217829e-08, "loss": 0.3078, "step": 20462 }, { "epoch": 0.9491187384044527, "grad_norm": 6.922708988189697, "learning_rate": 6.503657030001443e-08, "loss": 0.3031, "step": 20463 }, { "epoch": 0.9491651205936921, "grad_norm": 18.882753372192383, "learning_rate": 6.491830979123093e-08, "loss": 0.444, "step": 20464 }, { "epoch": 0.9492115027829313, "grad_norm": 6.496006011962891, "learning_rate": 6.480015619838853e-08, "loss": 0.3379, "step": 20465 }, { "epoch": 0.9492578849721707, "grad_norm": 7.662823677062988, "learning_rate": 6.468210952404796e-08, "loss": 0.3228, "step": 20466 }, { "epoch": 0.94930426716141, "grad_norm": 11.077467918395996, "learning_rate": 6.456416977076663e-08, "loss": 0.3615, "step": 20467 }, { "epoch": 0.9493506493506494, "grad_norm": 6.2395339012146, "learning_rate": 6.444633694109914e-08, "loss": 0.2272, "step": 20468 }, { "epoch": 0.9493970315398886, "grad_norm": 7.661913871765137, "learning_rate": 6.432861103759847e-08, "loss": 0.3703, "step": 20469 }, { "epoch": 0.949443413729128, "grad_norm": 8.072281837463379, "learning_rate": 6.421099206281533e-08, "loss": 0.2311, "step": 20470 }, { "epoch": 0.9494897959183674, "grad_norm": 3.6625516414642334, "learning_rate": 6.409348001929771e-08, "loss": 0.2171, "step": 20471 }, { "epoch": 0.9495361781076067, "grad_norm": 7.419840335845947, "learning_rate": 6.397607490959134e-08, "loss": 0.4104, "step": 20472 }, { "epoch": 0.949582560296846, "grad_norm": 15.431608200073242, "learning_rate": 6.385877673623975e-08, "loss": 0.3126, "step": 20473 }, { "epoch": 0.9496289424860853, "grad_norm": 4.631369113922119, "learning_rate": 6.374158550178422e-08, "loss": 0.2451, "step": 20474 }, { "epoch": 0.9496753246753247, "grad_norm": 21.6265926361084, "learning_rate": 6.362450120876385e-08, "loss": 0.5767, "step": 20475 }, { "epoch": 0.949721706864564, "grad_norm": 6.914512634277344, "learning_rate": 6.350752385971493e-08, "loss": 0.3511, "step": 20476 }, { "epoch": 0.9497680890538034, "grad_norm": 11.88672161102295, "learning_rate": 6.339065345717154e-08, "loss": 0.3113, "step": 20477 }, { "epoch": 0.9498144712430426, "grad_norm": 6.2617316246032715, "learning_rate": 6.327389000366613e-08, "loss": 0.1731, "step": 20478 }, { "epoch": 0.949860853432282, "grad_norm": 8.180870056152344, "learning_rate": 6.315723350172775e-08, "loss": 0.3497, "step": 20479 }, { "epoch": 0.9499072356215214, "grad_norm": 9.693158149719238, "learning_rate": 6.304068395388441e-08, "loss": 0.3568, "step": 20480 }, { "epoch": 0.9499536178107607, "grad_norm": 8.430415153503418, "learning_rate": 6.292424136266073e-08, "loss": 0.3617, "step": 20481 }, { "epoch": 0.95, "grad_norm": 9.268555641174316, "learning_rate": 6.28079057305786e-08, "loss": 0.351, "step": 20482 }, { "epoch": 0.9500463821892393, "grad_norm": 7.971709728240967, "learning_rate": 6.269167706015989e-08, "loss": 0.2664, "step": 20483 }, { "epoch": 0.9500927643784787, "grad_norm": 11.926796913146973, "learning_rate": 6.257555535392146e-08, "loss": 0.4844, "step": 20484 }, { "epoch": 0.950139146567718, "grad_norm": 5.3039774894714355, "learning_rate": 6.245954061437909e-08, "loss": 0.1971, "step": 20485 }, { "epoch": 0.9501855287569573, "grad_norm": 5.77393913269043, "learning_rate": 6.234363284404687e-08, "loss": 0.27, "step": 20486 }, { "epoch": 0.9502319109461966, "grad_norm": 10.23828125, "learning_rate": 6.22278320454356e-08, "loss": 0.3845, "step": 20487 }, { "epoch": 0.950278293135436, "grad_norm": 15.870478630065918, "learning_rate": 6.211213822105378e-08, "loss": 0.4689, "step": 20488 }, { "epoch": 0.9503246753246753, "grad_norm": 6.983277320861816, "learning_rate": 6.199655137340776e-08, "loss": 0.2517, "step": 20489 }, { "epoch": 0.9503710575139147, "grad_norm": 6.104804515838623, "learning_rate": 6.188107150500222e-08, "loss": 0.2623, "step": 20490 }, { "epoch": 0.950417439703154, "grad_norm": 6.7060866355896, "learning_rate": 6.176569861833903e-08, "loss": 0.4013, "step": 20491 }, { "epoch": 0.9504638218923933, "grad_norm": 9.492698669433594, "learning_rate": 6.165043271591731e-08, "loss": 0.4117, "step": 20492 }, { "epoch": 0.9505102040816327, "grad_norm": 11.971227645874023, "learning_rate": 6.153527380023338e-08, "loss": 0.3173, "step": 20493 }, { "epoch": 0.950556586270872, "grad_norm": 9.036495208740234, "learning_rate": 6.142022187378361e-08, "loss": 0.3093, "step": 20494 }, { "epoch": 0.9506029684601113, "grad_norm": 8.191197395324707, "learning_rate": 6.130527693905985e-08, "loss": 0.2621, "step": 20495 }, { "epoch": 0.9506493506493506, "grad_norm": 10.903677940368652, "learning_rate": 6.119043899855237e-08, "loss": 0.2678, "step": 20496 }, { "epoch": 0.95069573283859, "grad_norm": 12.438308715820312, "learning_rate": 6.107570805474916e-08, "loss": 0.4922, "step": 20497 }, { "epoch": 0.9507421150278293, "grad_norm": 7.674502849578857, "learning_rate": 6.096108411013546e-08, "loss": 0.3808, "step": 20498 }, { "epoch": 0.9507884972170686, "grad_norm": 6.037466049194336, "learning_rate": 6.084656716719483e-08, "loss": 0.3477, "step": 20499 }, { "epoch": 0.950834879406308, "grad_norm": 4.159564971923828, "learning_rate": 6.073215722840863e-08, "loss": 0.2417, "step": 20500 }, { "epoch": 0.9508812615955473, "grad_norm": 5.255710124969482, "learning_rate": 6.061785429625433e-08, "loss": 0.3099, "step": 20501 }, { "epoch": 0.9509276437847867, "grad_norm": 7.6092681884765625, "learning_rate": 6.050365837320993e-08, "loss": 0.3164, "step": 20502 }, { "epoch": 0.950974025974026, "grad_norm": 5.534379482269287, "learning_rate": 6.03895694617479e-08, "loss": 0.2976, "step": 20503 }, { "epoch": 0.9510204081632653, "grad_norm": 7.603957653045654, "learning_rate": 6.027558756434015e-08, "loss": 0.2654, "step": 20504 }, { "epoch": 0.9510667903525046, "grad_norm": 8.974198341369629, "learning_rate": 6.016171268345694e-08, "loss": 0.3774, "step": 20505 }, { "epoch": 0.951113172541744, "grad_norm": 6.525137424468994, "learning_rate": 6.004794482156406e-08, "loss": 0.2138, "step": 20506 }, { "epoch": 0.9511595547309833, "grad_norm": 20.44442367553711, "learning_rate": 5.993428398112788e-08, "loss": 0.4271, "step": 20507 }, { "epoch": 0.9512059369202226, "grad_norm": 9.052495956420898, "learning_rate": 5.982073016460921e-08, "loss": 0.2547, "step": 20508 }, { "epoch": 0.951252319109462, "grad_norm": 9.021711349487305, "learning_rate": 5.970728337446885e-08, "loss": 0.3882, "step": 20509 }, { "epoch": 0.9512987012987013, "grad_norm": 10.441563606262207, "learning_rate": 5.959394361316429e-08, "loss": 0.3301, "step": 20510 }, { "epoch": 0.9513450834879407, "grad_norm": 6.17609977722168, "learning_rate": 5.948071088315077e-08, "loss": 0.2607, "step": 20511 }, { "epoch": 0.9513914656771799, "grad_norm": 8.898650169372559, "learning_rate": 5.936758518688301e-08, "loss": 0.415, "step": 20512 }, { "epoch": 0.9514378478664193, "grad_norm": 5.991912841796875, "learning_rate": 5.925456652680961e-08, "loss": 0.2752, "step": 20513 }, { "epoch": 0.9514842300556586, "grad_norm": 4.557673454284668, "learning_rate": 5.9141654905380266e-08, "loss": 0.3125, "step": 20514 }, { "epoch": 0.951530612244898, "grad_norm": 6.082046985626221, "learning_rate": 5.90288503250408e-08, "loss": 0.2393, "step": 20515 }, { "epoch": 0.9515769944341373, "grad_norm": 5.901172637939453, "learning_rate": 5.891615278823537e-08, "loss": 0.2412, "step": 20516 }, { "epoch": 0.9516233766233766, "grad_norm": 7.664255142211914, "learning_rate": 5.8803562297405915e-08, "loss": 0.379, "step": 20517 }, { "epoch": 0.951669758812616, "grad_norm": 6.271639823913574, "learning_rate": 5.869107885499048e-08, "loss": 0.3218, "step": 20518 }, { "epoch": 0.9517161410018553, "grad_norm": 6.69985294342041, "learning_rate": 5.8578702463426564e-08, "loss": 0.3115, "step": 20519 }, { "epoch": 0.9517625231910947, "grad_norm": 9.069902420043945, "learning_rate": 5.846643312514888e-08, "loss": 0.3672, "step": 20520 }, { "epoch": 0.9518089053803339, "grad_norm": 5.883639812469482, "learning_rate": 5.835427084258938e-08, "loss": 0.2926, "step": 20521 }, { "epoch": 0.9518552875695733, "grad_norm": 6.865642070770264, "learning_rate": 5.8242215618178335e-08, "loss": 0.3149, "step": 20522 }, { "epoch": 0.9519016697588126, "grad_norm": 6.037710189819336, "learning_rate": 5.813026745434269e-08, "loss": 0.3163, "step": 20523 }, { "epoch": 0.951948051948052, "grad_norm": 5.9127912521362305, "learning_rate": 5.801842635350885e-08, "loss": 0.2885, "step": 20524 }, { "epoch": 0.9519944341372912, "grad_norm": 6.314327239990234, "learning_rate": 5.7906692318098755e-08, "loss": 0.3347, "step": 20525 }, { "epoch": 0.9520408163265306, "grad_norm": 5.215195178985596, "learning_rate": 5.7795065350533805e-08, "loss": 0.3141, "step": 20526 }, { "epoch": 0.95208719851577, "grad_norm": 6.195570468902588, "learning_rate": 5.768354545323207e-08, "loss": 0.3487, "step": 20527 }, { "epoch": 0.9521335807050093, "grad_norm": 11.625889778137207, "learning_rate": 5.757213262860939e-08, "loss": 0.3441, "step": 20528 }, { "epoch": 0.9521799628942486, "grad_norm": 4.858913421630859, "learning_rate": 5.746082687907939e-08, "loss": 0.2598, "step": 20529 }, { "epoch": 0.9522263450834879, "grad_norm": 5.252254009246826, "learning_rate": 5.734962820705403e-08, "loss": 0.3544, "step": 20530 }, { "epoch": 0.9522727272727273, "grad_norm": 6.513791561126709, "learning_rate": 5.7238536614941385e-08, "loss": 0.2546, "step": 20531 }, { "epoch": 0.9523191094619666, "grad_norm": 9.872340202331543, "learning_rate": 5.712755210514953e-08, "loss": 0.3241, "step": 20532 }, { "epoch": 0.952365491651206, "grad_norm": 6.059553623199463, "learning_rate": 5.7016674680082094e-08, "loss": 0.3054, "step": 20533 }, { "epoch": 0.9524118738404452, "grad_norm": 11.799100875854492, "learning_rate": 5.690590434214049e-08, "loss": 0.4268, "step": 20534 }, { "epoch": 0.9524582560296846, "grad_norm": 10.309816360473633, "learning_rate": 5.679524109372614e-08, "loss": 0.3134, "step": 20535 }, { "epoch": 0.952504638218924, "grad_norm": 6.383644104003906, "learning_rate": 5.668468493723489e-08, "loss": 0.1781, "step": 20536 }, { "epoch": 0.9525510204081633, "grad_norm": 6.985434055328369, "learning_rate": 5.6574235875063166e-08, "loss": 0.3025, "step": 20537 }, { "epoch": 0.9525974025974026, "grad_norm": 4.846680641174316, "learning_rate": 5.646389390960294e-08, "loss": 0.2673, "step": 20538 }, { "epoch": 0.9526437847866419, "grad_norm": 7.10805606842041, "learning_rate": 5.635365904324508e-08, "loss": 0.3155, "step": 20539 }, { "epoch": 0.9526901669758813, "grad_norm": 8.333601951599121, "learning_rate": 5.624353127837767e-08, "loss": 0.3225, "step": 20540 }, { "epoch": 0.9527365491651206, "grad_norm": 6.897369861602783, "learning_rate": 5.6133510617386586e-08, "loss": 0.3174, "step": 20541 }, { "epoch": 0.9527829313543599, "grad_norm": 5.157007217407227, "learning_rate": 5.602359706265603e-08, "loss": 0.2532, "step": 20542 }, { "epoch": 0.9528293135435992, "grad_norm": 10.355626106262207, "learning_rate": 5.5913790616565764e-08, "loss": 0.4592, "step": 20543 }, { "epoch": 0.9528756957328386, "grad_norm": 6.03801965713501, "learning_rate": 5.580409128149555e-08, "loss": 0.2863, "step": 20544 }, { "epoch": 0.952922077922078, "grad_norm": 6.17840576171875, "learning_rate": 5.569449905982238e-08, "loss": 0.2937, "step": 20545 }, { "epoch": 0.9529684601113173, "grad_norm": 6.2323479652404785, "learning_rate": 5.558501395391935e-08, "loss": 0.3279, "step": 20546 }, { "epoch": 0.9530148423005566, "grad_norm": 4.727721214294434, "learning_rate": 5.5475635966160124e-08, "loss": 0.2552, "step": 20547 }, { "epoch": 0.9530612244897959, "grad_norm": 8.702970504760742, "learning_rate": 5.536636509891225e-08, "loss": 0.2143, "step": 20548 }, { "epoch": 0.9531076066790353, "grad_norm": 6.576047420501709, "learning_rate": 5.525720135454438e-08, "loss": 0.228, "step": 20549 }, { "epoch": 0.9531539888682746, "grad_norm": 4.461037635803223, "learning_rate": 5.514814473542129e-08, "loss": 0.3252, "step": 20550 }, { "epoch": 0.9532003710575139, "grad_norm": 8.789069175720215, "learning_rate": 5.503919524390555e-08, "loss": 0.4491, "step": 20551 }, { "epoch": 0.9532467532467532, "grad_norm": 8.647970199584961, "learning_rate": 5.4930352882357486e-08, "loss": 0.3506, "step": 20552 }, { "epoch": 0.9532931354359926, "grad_norm": 5.874830722808838, "learning_rate": 5.482161765313465e-08, "loss": 0.3153, "step": 20553 }, { "epoch": 0.953339517625232, "grad_norm": 6.182392120361328, "learning_rate": 5.47129895585935e-08, "loss": 0.2221, "step": 20554 }, { "epoch": 0.9533858998144712, "grad_norm": 13.208622932434082, "learning_rate": 5.4604468601087145e-08, "loss": 0.5403, "step": 20555 }, { "epoch": 0.9534322820037106, "grad_norm": 5.674677848815918, "learning_rate": 5.4496054782965934e-08, "loss": 0.2591, "step": 20556 }, { "epoch": 0.9534786641929499, "grad_norm": 3.9395558834075928, "learning_rate": 5.4387748106580206e-08, "loss": 0.3016, "step": 20557 }, { "epoch": 0.9535250463821893, "grad_norm": 11.178399085998535, "learning_rate": 5.4279548574274754e-08, "loss": 0.3801, "step": 20558 }, { "epoch": 0.9535714285714286, "grad_norm": 6.3370232582092285, "learning_rate": 5.4171456188394364e-08, "loss": 0.3033, "step": 20559 }, { "epoch": 0.9536178107606679, "grad_norm": 5.864827632904053, "learning_rate": 5.4063470951280505e-08, "loss": 0.3043, "step": 20560 }, { "epoch": 0.9536641929499072, "grad_norm": 8.198816299438477, "learning_rate": 5.395559286527297e-08, "loss": 0.3426, "step": 20561 }, { "epoch": 0.9537105751391466, "grad_norm": 8.922114372253418, "learning_rate": 5.384782193270877e-08, "loss": 0.2437, "step": 20562 }, { "epoch": 0.953756957328386, "grad_norm": 5.451231002807617, "learning_rate": 5.374015815592326e-08, "loss": 0.3564, "step": 20563 }, { "epoch": 0.9538033395176252, "grad_norm": 4.528418064117432, "learning_rate": 5.363260153724792e-08, "loss": 0.3166, "step": 20564 }, { "epoch": 0.9538497217068646, "grad_norm": 11.832539558410645, "learning_rate": 5.35251520790131e-08, "loss": 0.4521, "step": 20565 }, { "epoch": 0.9538961038961039, "grad_norm": 4.501529216766357, "learning_rate": 5.341780978354694e-08, "loss": 0.3027, "step": 20566 }, { "epoch": 0.9539424860853433, "grad_norm": 5.517056941986084, "learning_rate": 5.3310574653174796e-08, "loss": 0.2234, "step": 20567 }, { "epoch": 0.9539888682745825, "grad_norm": 16.930700302124023, "learning_rate": 5.3203446690220374e-08, "loss": 0.4568, "step": 20568 }, { "epoch": 0.9540352504638219, "grad_norm": 14.842385292053223, "learning_rate": 5.3096425897004034e-08, "loss": 0.3618, "step": 20569 }, { "epoch": 0.9540816326530612, "grad_norm": 6.290870189666748, "learning_rate": 5.2989512275843926e-08, "loss": 0.3203, "step": 20570 }, { "epoch": 0.9541280148423006, "grad_norm": 7.674457550048828, "learning_rate": 5.288270582905708e-08, "loss": 0.3088, "step": 20571 }, { "epoch": 0.9541743970315398, "grad_norm": 9.509982109069824, "learning_rate": 5.2776006558957205e-08, "loss": 0.2702, "step": 20572 }, { "epoch": 0.9542207792207792, "grad_norm": 6.601216793060303, "learning_rate": 5.266941446785578e-08, "loss": 0.2385, "step": 20573 }, { "epoch": 0.9542671614100185, "grad_norm": 3.581578493118286, "learning_rate": 5.256292955806208e-08, "loss": 0.2424, "step": 20574 }, { "epoch": 0.9543135435992579, "grad_norm": 6.600430965423584, "learning_rate": 5.245655183188258e-08, "loss": 0.2734, "step": 20575 }, { "epoch": 0.9543599257884973, "grad_norm": 8.702652931213379, "learning_rate": 5.235028129162267e-08, "loss": 0.2998, "step": 20576 }, { "epoch": 0.9544063079777365, "grad_norm": 11.91622257232666, "learning_rate": 5.224411793958384e-08, "loss": 0.4317, "step": 20577 }, { "epoch": 0.9544526901669759, "grad_norm": 9.009222984313965, "learning_rate": 5.213806177806702e-08, "loss": 0.3816, "step": 20578 }, { "epoch": 0.9544990723562152, "grad_norm": 4.66124963760376, "learning_rate": 5.2032112809369265e-08, "loss": 0.2479, "step": 20579 }, { "epoch": 0.9545454545454546, "grad_norm": 6.543781280517578, "learning_rate": 5.192627103578596e-08, "loss": 0.2391, "step": 20580 }, { "epoch": 0.9545918367346938, "grad_norm": 7.978161334991455, "learning_rate": 5.182053645961027e-08, "loss": 0.3268, "step": 20581 }, { "epoch": 0.9546382189239332, "grad_norm": 14.41191291809082, "learning_rate": 5.171490908313259e-08, "loss": 0.3441, "step": 20582 }, { "epoch": 0.9546846011131725, "grad_norm": 8.924964904785156, "learning_rate": 5.1609388908641644e-08, "loss": 0.375, "step": 20583 }, { "epoch": 0.9547309833024119, "grad_norm": 7.898785591125488, "learning_rate": 5.1503975938422824e-08, "loss": 0.3559, "step": 20584 }, { "epoch": 0.9547773654916512, "grad_norm": 13.850552558898926, "learning_rate": 5.1398670174760966e-08, "loss": 0.3832, "step": 20585 }, { "epoch": 0.9548237476808905, "grad_norm": 6.3141303062438965, "learning_rate": 5.1293471619936475e-08, "loss": 0.3696, "step": 20586 }, { "epoch": 0.9548701298701299, "grad_norm": 5.23443078994751, "learning_rate": 5.118838027622863e-08, "loss": 0.2843, "step": 20587 }, { "epoch": 0.9549165120593692, "grad_norm": 10.500773429870605, "learning_rate": 5.108339614591451e-08, "loss": 0.3158, "step": 20588 }, { "epoch": 0.9549628942486086, "grad_norm": 7.71872091293335, "learning_rate": 5.0978519231268395e-08, "loss": 0.3317, "step": 20589 }, { "epoch": 0.9550092764378478, "grad_norm": 5.700761318206787, "learning_rate": 5.0873749534562366e-08, "loss": 0.2648, "step": 20590 }, { "epoch": 0.9550556586270872, "grad_norm": 12.06342601776123, "learning_rate": 5.076908705806571e-08, "loss": 0.3754, "step": 20591 }, { "epoch": 0.9551020408163265, "grad_norm": 7.461430072784424, "learning_rate": 5.0664531804046624e-08, "loss": 0.3129, "step": 20592 }, { "epoch": 0.9551484230055659, "grad_norm": 7.604994297027588, "learning_rate": 5.0560083774769953e-08, "loss": 0.3391, "step": 20593 }, { "epoch": 0.9551948051948052, "grad_norm": 6.459047794342041, "learning_rate": 5.0455742972498335e-08, "loss": 0.2682, "step": 20594 }, { "epoch": 0.9552411873840445, "grad_norm": 8.032393455505371, "learning_rate": 5.035150939949274e-08, "loss": 0.3203, "step": 20595 }, { "epoch": 0.9552875695732839, "grad_norm": 5.858903884887695, "learning_rate": 5.0247383058011354e-08, "loss": 0.2457, "step": 20596 }, { "epoch": 0.9553339517625232, "grad_norm": 6.781175136566162, "learning_rate": 5.0143363950309055e-08, "loss": 0.2625, "step": 20597 }, { "epoch": 0.9553803339517625, "grad_norm": 5.36836051940918, "learning_rate": 5.003945207864014e-08, "loss": 0.3381, "step": 20598 }, { "epoch": 0.9554267161410018, "grad_norm": 7.031084060668945, "learning_rate": 4.993564744525559e-08, "loss": 0.2962, "step": 20599 }, { "epoch": 0.9554730983302412, "grad_norm": 7.607354164123535, "learning_rate": 4.983195005240415e-08, "loss": 0.323, "step": 20600 }, { "epoch": 0.9555194805194805, "grad_norm": 14.917753219604492, "learning_rate": 4.972835990233349e-08, "loss": 0.4992, "step": 20601 }, { "epoch": 0.9555658627087199, "grad_norm": 5.265255451202393, "learning_rate": 4.9624876997286245e-08, "loss": 0.2316, "step": 20602 }, { "epoch": 0.9556122448979592, "grad_norm": 4.807863235473633, "learning_rate": 4.952150133950506e-08, "loss": 0.2772, "step": 20603 }, { "epoch": 0.9556586270871985, "grad_norm": 4.762282371520996, "learning_rate": 4.9418232931229274e-08, "loss": 0.3257, "step": 20604 }, { "epoch": 0.9557050092764379, "grad_norm": 6.398914813995361, "learning_rate": 4.931507177469652e-08, "loss": 0.151, "step": 20605 }, { "epoch": 0.9557513914656772, "grad_norm": 5.864566326141357, "learning_rate": 4.921201787214169e-08, "loss": 0.3193, "step": 20606 }, { "epoch": 0.9557977736549165, "grad_norm": 10.96436595916748, "learning_rate": 4.910907122579689e-08, "loss": 0.2553, "step": 20607 }, { "epoch": 0.9558441558441558, "grad_norm": 5.039053440093994, "learning_rate": 4.900623183789255e-08, "loss": 0.4096, "step": 20608 }, { "epoch": 0.9558905380333952, "grad_norm": 6.474203586578369, "learning_rate": 4.890349971065744e-08, "loss": 0.2585, "step": 20609 }, { "epoch": 0.9559369202226345, "grad_norm": 4.258383750915527, "learning_rate": 4.88008748463159e-08, "loss": 0.2788, "step": 20610 }, { "epoch": 0.9559833024118738, "grad_norm": 8.567392349243164, "learning_rate": 4.8698357247092264e-08, "loss": 0.3786, "step": 20611 }, { "epoch": 0.9560296846011131, "grad_norm": 9.212747573852539, "learning_rate": 4.859594691520753e-08, "loss": 0.3254, "step": 20612 }, { "epoch": 0.9560760667903525, "grad_norm": 10.396788597106934, "learning_rate": 4.849364385287936e-08, "loss": 0.5115, "step": 20613 }, { "epoch": 0.9561224489795919, "grad_norm": 12.658614158630371, "learning_rate": 4.839144806232543e-08, "loss": 0.2876, "step": 20614 }, { "epoch": 0.9561688311688312, "grad_norm": 5.061966419219971, "learning_rate": 4.828935954575842e-08, "loss": 0.182, "step": 20615 }, { "epoch": 0.9562152133580705, "grad_norm": 13.472905158996582, "learning_rate": 4.8187378305390994e-08, "loss": 0.393, "step": 20616 }, { "epoch": 0.9562615955473098, "grad_norm": 10.1637544631958, "learning_rate": 4.808550434343251e-08, "loss": 0.4056, "step": 20617 }, { "epoch": 0.9563079777365492, "grad_norm": 9.028366088867188, "learning_rate": 4.7983737662089524e-08, "loss": 0.4175, "step": 20618 }, { "epoch": 0.9563543599257885, "grad_norm": 6.626156330108643, "learning_rate": 4.788207826356639e-08, "loss": 0.3974, "step": 20619 }, { "epoch": 0.9564007421150278, "grad_norm": 7.4032301902771, "learning_rate": 4.778052615006634e-08, "loss": 0.3557, "step": 20620 }, { "epoch": 0.9564471243042671, "grad_norm": 16.112375259399414, "learning_rate": 4.767908132378929e-08, "loss": 0.4168, "step": 20621 }, { "epoch": 0.9564935064935065, "grad_norm": 4.445720195770264, "learning_rate": 4.7577743786932914e-08, "loss": 0.2268, "step": 20622 }, { "epoch": 0.9565398886827459, "grad_norm": 8.210955619812012, "learning_rate": 4.747651354169269e-08, "loss": 0.2161, "step": 20623 }, { "epoch": 0.9565862708719851, "grad_norm": 8.007813453674316, "learning_rate": 4.737539059026186e-08, "loss": 0.2817, "step": 20624 }, { "epoch": 0.9566326530612245, "grad_norm": 6.4398040771484375, "learning_rate": 4.727437493483034e-08, "loss": 0.326, "step": 20625 }, { "epoch": 0.9566790352504638, "grad_norm": 10.86119270324707, "learning_rate": 4.7173466577587477e-08, "loss": 0.3834, "step": 20626 }, { "epoch": 0.9567254174397032, "grad_norm": 7.067407608032227, "learning_rate": 4.707266552071876e-08, "loss": 0.2385, "step": 20627 }, { "epoch": 0.9567717996289424, "grad_norm": 8.769098281860352, "learning_rate": 4.697197176640911e-08, "loss": 0.3139, "step": 20628 }, { "epoch": 0.9568181818181818, "grad_norm": 8.763299942016602, "learning_rate": 4.687138531683899e-08, "loss": 0.3219, "step": 20629 }, { "epoch": 0.9568645640074211, "grad_norm": 8.649679183959961, "learning_rate": 4.677090617418722e-08, "loss": 0.3512, "step": 20630 }, { "epoch": 0.9569109461966605, "grad_norm": 8.475948333740234, "learning_rate": 4.667053434063151e-08, "loss": 0.2895, "step": 20631 }, { "epoch": 0.9569573283858999, "grad_norm": 9.502226829528809, "learning_rate": 4.657026981834623e-08, "loss": 0.4157, "step": 20632 }, { "epoch": 0.9570037105751391, "grad_norm": 6.067277908325195, "learning_rate": 4.6470112609503516e-08, "loss": 0.2995, "step": 20633 }, { "epoch": 0.9570500927643785, "grad_norm": 11.561553955078125, "learning_rate": 4.6370062716272756e-08, "loss": 0.3384, "step": 20634 }, { "epoch": 0.9570964749536178, "grad_norm": 7.119103908538818, "learning_rate": 4.6270120140822214e-08, "loss": 0.2778, "step": 20635 }, { "epoch": 0.9571428571428572, "grad_norm": 10.866964340209961, "learning_rate": 4.6170284885316276e-08, "loss": 0.3811, "step": 20636 }, { "epoch": 0.9571892393320964, "grad_norm": 5.023405075073242, "learning_rate": 4.60705569519182e-08, "loss": 0.2973, "step": 20637 }, { "epoch": 0.9572356215213358, "grad_norm": 8.08059310913086, "learning_rate": 4.597093634278904e-08, "loss": 0.3845, "step": 20638 }, { "epoch": 0.9572820037105751, "grad_norm": 5.8572468757629395, "learning_rate": 4.587142306008652e-08, "loss": 0.2678, "step": 20639 }, { "epoch": 0.9573283858998145, "grad_norm": 10.138700485229492, "learning_rate": 4.577201710596613e-08, "loss": 0.3445, "step": 20640 }, { "epoch": 0.9573747680890538, "grad_norm": 5.13623046875, "learning_rate": 4.5672718482582256e-08, "loss": 0.2228, "step": 20641 }, { "epoch": 0.9574211502782931, "grad_norm": 9.05617618560791, "learning_rate": 4.557352719208596e-08, "loss": 0.5384, "step": 20642 }, { "epoch": 0.9574675324675325, "grad_norm": 8.888697624206543, "learning_rate": 4.547444323662553e-08, "loss": 0.2533, "step": 20643 }, { "epoch": 0.9575139146567718, "grad_norm": 5.325826168060303, "learning_rate": 4.537546661834813e-08, "loss": 0.328, "step": 20644 }, { "epoch": 0.9575602968460112, "grad_norm": 8.235779762268066, "learning_rate": 4.5276597339398707e-08, "loss": 0.3411, "step": 20645 }, { "epoch": 0.9576066790352504, "grad_norm": 11.300625801086426, "learning_rate": 4.517783540191778e-08, "loss": 0.4595, "step": 20646 }, { "epoch": 0.9576530612244898, "grad_norm": 7.639061450958252, "learning_rate": 4.50791808080453e-08, "loss": 0.4091, "step": 20647 }, { "epoch": 0.9576994434137291, "grad_norm": 8.027522087097168, "learning_rate": 4.498063355991955e-08, "loss": 0.2896, "step": 20648 }, { "epoch": 0.9577458256029685, "grad_norm": 5.001104354858398, "learning_rate": 4.488219365967439e-08, "loss": 0.2956, "step": 20649 }, { "epoch": 0.9577922077922078, "grad_norm": 5.949972152709961, "learning_rate": 4.478386110944366e-08, "loss": 0.2929, "step": 20650 }, { "epoch": 0.9578385899814471, "grad_norm": 6.315093994140625, "learning_rate": 4.4685635911356775e-08, "loss": 0.2881, "step": 20651 }, { "epoch": 0.9578849721706865, "grad_norm": 5.571320533752441, "learning_rate": 4.458751806754147e-08, "loss": 0.3229, "step": 20652 }, { "epoch": 0.9579313543599258, "grad_norm": 4.34652042388916, "learning_rate": 4.448950758012438e-08, "loss": 0.2922, "step": 20653 }, { "epoch": 0.9579777365491651, "grad_norm": 4.642821788787842, "learning_rate": 4.43916044512277e-08, "loss": 0.2885, "step": 20654 }, { "epoch": 0.9580241187384044, "grad_norm": 5.951498508453369, "learning_rate": 4.4293808682973615e-08, "loss": 0.3451, "step": 20655 }, { "epoch": 0.9580705009276438, "grad_norm": 8.452951431274414, "learning_rate": 4.4196120277480436e-08, "loss": 0.313, "step": 20656 }, { "epoch": 0.9581168831168831, "grad_norm": 5.01595401763916, "learning_rate": 4.4098539236864246e-08, "loss": 0.2528, "step": 20657 }, { "epoch": 0.9581632653061225, "grad_norm": 9.042346954345703, "learning_rate": 4.400106556323891e-08, "loss": 0.2708, "step": 20658 }, { "epoch": 0.9582096474953617, "grad_norm": 8.512979507446289, "learning_rate": 4.3903699258716626e-08, "loss": 0.3466, "step": 20659 }, { "epoch": 0.9582560296846011, "grad_norm": 9.970521926879883, "learning_rate": 4.380644032540682e-08, "loss": 0.2962, "step": 20660 }, { "epoch": 0.9583024118738405, "grad_norm": 5.384796142578125, "learning_rate": 4.370928876541614e-08, "loss": 0.2919, "step": 20661 }, { "epoch": 0.9583487940630798, "grad_norm": 10.752249717712402, "learning_rate": 4.361224458084956e-08, "loss": 0.3604, "step": 20662 }, { "epoch": 0.9583951762523191, "grad_norm": 9.07646656036377, "learning_rate": 4.351530777380986e-08, "loss": 0.3626, "step": 20663 }, { "epoch": 0.9584415584415584, "grad_norm": 5.5342912673950195, "learning_rate": 4.341847834639645e-08, "loss": 0.218, "step": 20664 }, { "epoch": 0.9584879406307978, "grad_norm": 5.273929119110107, "learning_rate": 4.3321756300707116e-08, "loss": 0.2991, "step": 20665 }, { "epoch": 0.9585343228200371, "grad_norm": 9.894524574279785, "learning_rate": 4.322514163883795e-08, "loss": 0.3815, "step": 20666 }, { "epoch": 0.9585807050092764, "grad_norm": 6.946589469909668, "learning_rate": 4.312863436288173e-08, "loss": 0.3195, "step": 20667 }, { "epoch": 0.9586270871985157, "grad_norm": 5.261448383331299, "learning_rate": 4.303223447492899e-08, "loss": 0.2616, "step": 20668 }, { "epoch": 0.9586734693877551, "grad_norm": 14.271783828735352, "learning_rate": 4.2935941977068075e-08, "loss": 0.3009, "step": 20669 }, { "epoch": 0.9587198515769945, "grad_norm": 6.099551677703857, "learning_rate": 4.283975687138564e-08, "loss": 0.1879, "step": 20670 }, { "epoch": 0.9587662337662337, "grad_norm": 5.015454292297363, "learning_rate": 4.2743679159965576e-08, "loss": 0.3139, "step": 20671 }, { "epoch": 0.9588126159554731, "grad_norm": 6.186201095581055, "learning_rate": 4.2647708844889e-08, "loss": 0.2636, "step": 20672 }, { "epoch": 0.9588589981447124, "grad_norm": 6.12079381942749, "learning_rate": 4.25518459282348e-08, "loss": 0.2964, "step": 20673 }, { "epoch": 0.9589053803339518, "grad_norm": 9.802414894104004, "learning_rate": 4.245609041207965e-08, "loss": 0.3282, "step": 20674 }, { "epoch": 0.9589517625231911, "grad_norm": 12.52647876739502, "learning_rate": 4.2360442298499114e-08, "loss": 0.3622, "step": 20675 }, { "epoch": 0.9589981447124304, "grad_norm": 5.353888988494873, "learning_rate": 4.226490158956431e-08, "loss": 0.2564, "step": 20676 }, { "epoch": 0.9590445269016697, "grad_norm": 6.883162975311279, "learning_rate": 4.2169468287345805e-08, "loss": 0.3535, "step": 20677 }, { "epoch": 0.9590909090909091, "grad_norm": 10.993494987487793, "learning_rate": 4.2074142393910835e-08, "loss": 0.2437, "step": 20678 }, { "epoch": 0.9591372912801485, "grad_norm": 9.136161804199219, "learning_rate": 4.197892391132441e-08, "loss": 0.4211, "step": 20679 }, { "epoch": 0.9591836734693877, "grad_norm": 5.313547134399414, "learning_rate": 4.188381284164933e-08, "loss": 0.3476, "step": 20680 }, { "epoch": 0.9592300556586271, "grad_norm": 25.25889778137207, "learning_rate": 4.178880918694672e-08, "loss": 0.36, "step": 20681 }, { "epoch": 0.9592764378478664, "grad_norm": 12.694971084594727, "learning_rate": 4.169391294927383e-08, "loss": 0.4603, "step": 20682 }, { "epoch": 0.9593228200371058, "grad_norm": 10.818477630615234, "learning_rate": 4.159912413068734e-08, "loss": 0.3586, "step": 20683 }, { "epoch": 0.959369202226345, "grad_norm": 9.944771766662598, "learning_rate": 4.150444273324061e-08, "loss": 0.4475, "step": 20684 }, { "epoch": 0.9594155844155844, "grad_norm": 5.732666492462158, "learning_rate": 4.140986875898478e-08, "loss": 0.2795, "step": 20685 }, { "epoch": 0.9594619666048237, "grad_norm": 8.07691764831543, "learning_rate": 4.1315402209968766e-08, "loss": 0.3241, "step": 20686 }, { "epoch": 0.9595083487940631, "grad_norm": 7.830774307250977, "learning_rate": 4.1221043088239266e-08, "loss": 0.2872, "step": 20687 }, { "epoch": 0.9595547309833025, "grad_norm": 5.919122695922852, "learning_rate": 4.112679139584019e-08, "loss": 0.2936, "step": 20688 }, { "epoch": 0.9596011131725417, "grad_norm": 6.842341899871826, "learning_rate": 4.103264713481381e-08, "loss": 0.212, "step": 20689 }, { "epoch": 0.9596474953617811, "grad_norm": 20.01710319519043, "learning_rate": 4.093861030719903e-08, "loss": 0.269, "step": 20690 }, { "epoch": 0.9596938775510204, "grad_norm": 4.504141807556152, "learning_rate": 4.084468091503424e-08, "loss": 0.2501, "step": 20691 }, { "epoch": 0.9597402597402598, "grad_norm": 8.193333625793457, "learning_rate": 4.0750858960353356e-08, "loss": 0.29, "step": 20692 }, { "epoch": 0.959786641929499, "grad_norm": 8.847640991210938, "learning_rate": 4.0657144445189755e-08, "loss": 0.5006, "step": 20693 }, { "epoch": 0.9598330241187384, "grad_norm": 8.977839469909668, "learning_rate": 4.0563537371572926e-08, "loss": 0.3179, "step": 20694 }, { "epoch": 0.9598794063079777, "grad_norm": 12.524764060974121, "learning_rate": 4.04700377415318e-08, "loss": 0.3369, "step": 20695 }, { "epoch": 0.9599257884972171, "grad_norm": 7.251465797424316, "learning_rate": 4.0376645557090864e-08, "loss": 0.272, "step": 20696 }, { "epoch": 0.9599721706864563, "grad_norm": 8.96326732635498, "learning_rate": 4.0283360820274064e-08, "loss": 0.4162, "step": 20697 }, { "epoch": 0.9600185528756957, "grad_norm": 7.667359352111816, "learning_rate": 4.019018353310256e-08, "loss": 0.2216, "step": 20698 }, { "epoch": 0.9600649350649351, "grad_norm": 11.824808120727539, "learning_rate": 4.009711369759473e-08, "loss": 0.3936, "step": 20699 }, { "epoch": 0.9601113172541744, "grad_norm": 4.881169319152832, "learning_rate": 4.0004151315766184e-08, "loss": 0.3537, "step": 20700 }, { "epoch": 0.9601576994434138, "grad_norm": 15.869710922241211, "learning_rate": 3.991129638963198e-08, "loss": 0.3693, "step": 20701 }, { "epoch": 0.960204081632653, "grad_norm": 9.721315383911133, "learning_rate": 3.981854892120329e-08, "loss": 0.3371, "step": 20702 }, { "epoch": 0.9602504638218924, "grad_norm": 8.715307235717773, "learning_rate": 3.972590891248962e-08, "loss": 0.305, "step": 20703 }, { "epoch": 0.9602968460111317, "grad_norm": 5.914061069488525, "learning_rate": 3.9633376365498246e-08, "loss": 0.4007, "step": 20704 }, { "epoch": 0.9603432282003711, "grad_norm": 6.4066057205200195, "learning_rate": 3.954095128223312e-08, "loss": 0.2386, "step": 20705 }, { "epoch": 0.9603896103896103, "grad_norm": 15.956145286560059, "learning_rate": 3.944863366469709e-08, "loss": 0.4968, "step": 20706 }, { "epoch": 0.9604359925788497, "grad_norm": 8.738748550415039, "learning_rate": 3.935642351488966e-08, "loss": 0.4024, "step": 20707 }, { "epoch": 0.9604823747680891, "grad_norm": 10.372159957885742, "learning_rate": 3.926432083480869e-08, "loss": 0.3638, "step": 20708 }, { "epoch": 0.9605287569573284, "grad_norm": 6.483072757720947, "learning_rate": 3.917232562645035e-08, "loss": 0.3379, "step": 20709 }, { "epoch": 0.9605751391465677, "grad_norm": 6.201140880584717, "learning_rate": 3.9080437891806375e-08, "loss": 0.3044, "step": 20710 }, { "epoch": 0.960621521335807, "grad_norm": 8.13702392578125, "learning_rate": 3.898865763286852e-08, "loss": 0.3692, "step": 20711 }, { "epoch": 0.9606679035250464, "grad_norm": 9.597015380859375, "learning_rate": 3.889698485162463e-08, "loss": 0.36, "step": 20712 }, { "epoch": 0.9607142857142857, "grad_norm": 11.576790809631348, "learning_rate": 3.8805419550060893e-08, "loss": 0.4701, "step": 20713 }, { "epoch": 0.9607606679035251, "grad_norm": 14.417824745178223, "learning_rate": 3.8713961730160733e-08, "loss": 0.4548, "step": 20714 }, { "epoch": 0.9608070500927643, "grad_norm": 6.311306953430176, "learning_rate": 3.862261139390588e-08, "loss": 0.203, "step": 20715 }, { "epoch": 0.9608534322820037, "grad_norm": 10.149831771850586, "learning_rate": 3.853136854327588e-08, "loss": 0.4477, "step": 20716 }, { "epoch": 0.9608998144712431, "grad_norm": 6.704919338226318, "learning_rate": 3.8440233180246365e-08, "loss": 0.3311, "step": 20717 }, { "epoch": 0.9609461966604824, "grad_norm": 6.962728500366211, "learning_rate": 3.834920530679187e-08, "loss": 0.4427, "step": 20718 }, { "epoch": 0.9609925788497217, "grad_norm": 6.785707473754883, "learning_rate": 3.8258284924885256e-08, "loss": 0.3226, "step": 20719 }, { "epoch": 0.961038961038961, "grad_norm": 17.846385955810547, "learning_rate": 3.816747203649607e-08, "loss": 0.451, "step": 20720 }, { "epoch": 0.9610853432282004, "grad_norm": 9.004254341125488, "learning_rate": 3.8076766643591054e-08, "loss": 0.3247, "step": 20721 }, { "epoch": 0.9611317254174397, "grad_norm": 8.339744567871094, "learning_rate": 3.7986168748135876e-08, "loss": 0.3415, "step": 20722 }, { "epoch": 0.961178107606679, "grad_norm": 8.476225852966309, "learning_rate": 3.78956783520934e-08, "loss": 0.2757, "step": 20723 }, { "epoch": 0.9612244897959183, "grad_norm": 9.384917259216309, "learning_rate": 3.780529545742373e-08, "loss": 0.3095, "step": 20724 }, { "epoch": 0.9612708719851577, "grad_norm": 5.490500450134277, "learning_rate": 3.771502006608474e-08, "loss": 0.3097, "step": 20725 }, { "epoch": 0.9613172541743971, "grad_norm": 8.195686340332031, "learning_rate": 3.7624852180032644e-08, "loss": 0.3599, "step": 20726 }, { "epoch": 0.9613636363636363, "grad_norm": 7.325892448425293, "learning_rate": 3.7534791801221436e-08, "loss": 0.2646, "step": 20727 }, { "epoch": 0.9614100185528757, "grad_norm": 6.16310977935791, "learning_rate": 3.744483893160067e-08, "loss": 0.2629, "step": 20728 }, { "epoch": 0.961456400742115, "grad_norm": 6.948771953582764, "learning_rate": 3.7354993573120446e-08, "loss": 0.2478, "step": 20729 }, { "epoch": 0.9615027829313544, "grad_norm": 7.1621527671813965, "learning_rate": 3.726525572772643e-08, "loss": 0.37, "step": 20730 }, { "epoch": 0.9615491651205937, "grad_norm": 7.112705707550049, "learning_rate": 3.717562539736319e-08, "loss": 0.3634, "step": 20731 }, { "epoch": 0.961595547309833, "grad_norm": 9.5070219039917, "learning_rate": 3.70861025839725e-08, "loss": 0.4195, "step": 20732 }, { "epoch": 0.9616419294990723, "grad_norm": 7.808023452758789, "learning_rate": 3.6996687289493905e-08, "loss": 0.2994, "step": 20733 }, { "epoch": 0.9616883116883117, "grad_norm": 9.339184761047363, "learning_rate": 3.690737951586421e-08, "loss": 0.2682, "step": 20734 }, { "epoch": 0.9617346938775511, "grad_norm": 13.163287162780762, "learning_rate": 3.681817926501796e-08, "loss": 0.5218, "step": 20735 }, { "epoch": 0.9617810760667903, "grad_norm": 13.762091636657715, "learning_rate": 3.6729086538888626e-08, "loss": 0.3608, "step": 20736 }, { "epoch": 0.9618274582560297, "grad_norm": 6.86219596862793, "learning_rate": 3.664010133940576e-08, "loss": 0.2676, "step": 20737 }, { "epoch": 0.961873840445269, "grad_norm": 7.351888656616211, "learning_rate": 3.6551223668497284e-08, "loss": 0.2224, "step": 20738 }, { "epoch": 0.9619202226345084, "grad_norm": 9.94797134399414, "learning_rate": 3.6462453528088304e-08, "loss": 0.3772, "step": 20739 }, { "epoch": 0.9619666048237476, "grad_norm": 12.200860023498535, "learning_rate": 3.637379092010174e-08, "loss": 0.3543, "step": 20740 }, { "epoch": 0.962012987012987, "grad_norm": 18.100542068481445, "learning_rate": 3.628523584645993e-08, "loss": 0.3078, "step": 20741 }, { "epoch": 0.9620593692022263, "grad_norm": 6.35139799118042, "learning_rate": 3.619678830907969e-08, "loss": 0.394, "step": 20742 }, { "epoch": 0.9621057513914657, "grad_norm": 4.985945701599121, "learning_rate": 3.610844830987836e-08, "loss": 0.1916, "step": 20743 }, { "epoch": 0.9621521335807051, "grad_norm": 7.847840785980225, "learning_rate": 3.602021585076942e-08, "loss": 0.4011, "step": 20744 }, { "epoch": 0.9621985157699443, "grad_norm": 4.578333377838135, "learning_rate": 3.593209093366357e-08, "loss": 0.323, "step": 20745 }, { "epoch": 0.9622448979591837, "grad_norm": 9.326851844787598, "learning_rate": 3.5844073560471484e-08, "loss": 0.3936, "step": 20746 }, { "epoch": 0.962291280148423, "grad_norm": 8.140274047851562, "learning_rate": 3.575616373309887e-08, "loss": 0.2794, "step": 20747 }, { "epoch": 0.9623376623376624, "grad_norm": 12.028020858764648, "learning_rate": 3.566836145345032e-08, "loss": 0.4135, "step": 20748 }, { "epoch": 0.9623840445269016, "grad_norm": 11.700282096862793, "learning_rate": 3.558066672342875e-08, "loss": 0.3405, "step": 20749 }, { "epoch": 0.962430426716141, "grad_norm": 5.756654262542725, "learning_rate": 3.549307954493375e-08, "loss": 0.3507, "step": 20750 }, { "epoch": 0.9624768089053803, "grad_norm": 13.42049789428711, "learning_rate": 3.5405599919862144e-08, "loss": 0.4848, "step": 20751 }, { "epoch": 0.9625231910946197, "grad_norm": 9.02846908569336, "learning_rate": 3.531822785010963e-08, "loss": 0.3683, "step": 20752 }, { "epoch": 0.962569573283859, "grad_norm": 6.010214805603027, "learning_rate": 3.52309633375697e-08, "loss": 0.3314, "step": 20753 }, { "epoch": 0.9626159554730983, "grad_norm": 9.961421012878418, "learning_rate": 3.5143806384131954e-08, "loss": 0.3452, "step": 20754 }, { "epoch": 0.9626623376623377, "grad_norm": 8.422553062438965, "learning_rate": 3.505675699168487e-08, "loss": 0.3158, "step": 20755 }, { "epoch": 0.962708719851577, "grad_norm": 9.2006254196167, "learning_rate": 3.4969815162114726e-08, "loss": 0.325, "step": 20756 }, { "epoch": 0.9627551020408164, "grad_norm": 29.3167667388916, "learning_rate": 3.488298089730446e-08, "loss": 0.4428, "step": 20757 }, { "epoch": 0.9628014842300556, "grad_norm": 5.2472310066223145, "learning_rate": 3.4796254199135346e-08, "loss": 0.3185, "step": 20758 }, { "epoch": 0.962847866419295, "grad_norm": 8.403454780578613, "learning_rate": 3.4709635069486994e-08, "loss": 0.3033, "step": 20759 }, { "epoch": 0.9628942486085343, "grad_norm": 12.307863235473633, "learning_rate": 3.462312351023567e-08, "loss": 0.3505, "step": 20760 }, { "epoch": 0.9629406307977737, "grad_norm": 11.504242897033691, "learning_rate": 3.453671952325488e-08, "loss": 0.312, "step": 20761 }, { "epoch": 0.962987012987013, "grad_norm": 6.700110912322998, "learning_rate": 3.445042311041702e-08, "loss": 0.337, "step": 20762 }, { "epoch": 0.9630333951762523, "grad_norm": 10.071212768554688, "learning_rate": 3.4364234273592255e-08, "loss": 0.34, "step": 20763 }, { "epoch": 0.9630797773654917, "grad_norm": 12.533191680908203, "learning_rate": 3.427815301464688e-08, "loss": 0.444, "step": 20764 }, { "epoch": 0.963126159554731, "grad_norm": 4.93973970413208, "learning_rate": 3.419217933544605e-08, "loss": 0.232, "step": 20765 }, { "epoch": 0.9631725417439703, "grad_norm": 11.174458503723145, "learning_rate": 3.410631323785274e-08, "loss": 0.3491, "step": 20766 }, { "epoch": 0.9632189239332096, "grad_norm": 9.097123146057129, "learning_rate": 3.4020554723726564e-08, "loss": 0.3837, "step": 20767 }, { "epoch": 0.963265306122449, "grad_norm": 7.800387859344482, "learning_rate": 3.393490379492603e-08, "loss": 0.3871, "step": 20768 }, { "epoch": 0.9633116883116883, "grad_norm": 6.972103118896484, "learning_rate": 3.3849360453306334e-08, "loss": 0.2983, "step": 20769 }, { "epoch": 0.9633580705009277, "grad_norm": 14.354055404663086, "learning_rate": 3.376392470072154e-08, "loss": 0.4695, "step": 20770 }, { "epoch": 0.963404452690167, "grad_norm": 13.29613971710205, "learning_rate": 3.367859653902128e-08, "loss": 0.4432, "step": 20771 }, { "epoch": 0.9634508348794063, "grad_norm": 5.126638889312744, "learning_rate": 3.359337597005463e-08, "loss": 0.3029, "step": 20772 }, { "epoch": 0.9634972170686457, "grad_norm": 11.811086654663086, "learning_rate": 3.350826299566845e-08, "loss": 0.4176, "step": 20773 }, { "epoch": 0.963543599257885, "grad_norm": 6.5333757400512695, "learning_rate": 3.3423257617705705e-08, "loss": 0.2692, "step": 20774 }, { "epoch": 0.9635899814471243, "grad_norm": 7.437466144561768, "learning_rate": 3.333835983800881e-08, "loss": 0.3454, "step": 20775 }, { "epoch": 0.9636363636363636, "grad_norm": 7.48491096496582, "learning_rate": 3.325356965841686e-08, "loss": 0.358, "step": 20776 }, { "epoch": 0.963682745825603, "grad_norm": 8.5173921585083, "learning_rate": 3.316888708076616e-08, "loss": 0.3545, "step": 20777 }, { "epoch": 0.9637291280148423, "grad_norm": 6.572237491607666, "learning_rate": 3.308431210689245e-08, "loss": 0.3699, "step": 20778 }, { "epoch": 0.9637755102040816, "grad_norm": 8.471573829650879, "learning_rate": 3.2999844738626495e-08, "loss": 0.3945, "step": 20779 }, { "epoch": 0.963821892393321, "grad_norm": 6.141284465789795, "learning_rate": 3.291548497779962e-08, "loss": 0.3382, "step": 20780 }, { "epoch": 0.9638682745825603, "grad_norm": 6.14024019241333, "learning_rate": 3.2831232826239234e-08, "loss": 0.354, "step": 20781 }, { "epoch": 0.9639146567717997, "grad_norm": 9.438535690307617, "learning_rate": 3.2747088285769445e-08, "loss": 0.3189, "step": 20782 }, { "epoch": 0.9639610389610389, "grad_norm": 9.49321460723877, "learning_rate": 3.2663051358214904e-08, "loss": 0.3962, "step": 20783 }, { "epoch": 0.9640074211502783, "grad_norm": 5.329889297485352, "learning_rate": 3.257912204539471e-08, "loss": 0.2421, "step": 20784 }, { "epoch": 0.9640538033395176, "grad_norm": 5.419417381286621, "learning_rate": 3.249530034912795e-08, "loss": 0.2186, "step": 20785 }, { "epoch": 0.964100185528757, "grad_norm": 5.97220516204834, "learning_rate": 3.2411586271230425e-08, "loss": 0.2307, "step": 20786 }, { "epoch": 0.9641465677179963, "grad_norm": 13.690037727355957, "learning_rate": 3.2327979813515655e-08, "loss": 0.3215, "step": 20787 }, { "epoch": 0.9641929499072356, "grad_norm": 8.384516716003418, "learning_rate": 3.224448097779553e-08, "loss": 0.3371, "step": 20788 }, { "epoch": 0.9642393320964749, "grad_norm": 8.684306144714355, "learning_rate": 3.216108976587806e-08, "loss": 0.4497, "step": 20789 }, { "epoch": 0.9642857142857143, "grad_norm": 6.692726135253906, "learning_rate": 3.2077806179570127e-08, "loss": 0.2549, "step": 20790 }, { "epoch": 0.9643320964749537, "grad_norm": 5.814746856689453, "learning_rate": 3.199463022067639e-08, "loss": 0.2648, "step": 20791 }, { "epoch": 0.9643784786641929, "grad_norm": 10.566052436828613, "learning_rate": 3.191156189099931e-08, "loss": 0.3372, "step": 20792 }, { "epoch": 0.9644248608534323, "grad_norm": 9.197257041931152, "learning_rate": 3.1828601192336885e-08, "loss": 0.3757, "step": 20793 }, { "epoch": 0.9644712430426716, "grad_norm": 8.018553733825684, "learning_rate": 3.174574812648767e-08, "loss": 0.3429, "step": 20794 }, { "epoch": 0.964517625231911, "grad_norm": 11.331940650939941, "learning_rate": 3.166300269524691e-08, "loss": 0.3855, "step": 20795 }, { "epoch": 0.9645640074211502, "grad_norm": 11.774691581726074, "learning_rate": 3.158036490040595e-08, "loss": 0.5291, "step": 20796 }, { "epoch": 0.9646103896103896, "grad_norm": 10.939828872680664, "learning_rate": 3.1497834743756114e-08, "loss": 0.4131, "step": 20797 }, { "epoch": 0.9646567717996289, "grad_norm": 6.92092227935791, "learning_rate": 3.141541222708489e-08, "loss": 0.2804, "step": 20798 }, { "epoch": 0.9647031539888683, "grad_norm": 7.47806453704834, "learning_rate": 3.13330973521786e-08, "loss": 0.3409, "step": 20799 }, { "epoch": 0.9647495361781077, "grad_norm": 9.32860279083252, "learning_rate": 3.125089012081972e-08, "loss": 0.2863, "step": 20800 }, { "epoch": 0.9647959183673469, "grad_norm": 6.20087194442749, "learning_rate": 3.116879053478961e-08, "loss": 0.2852, "step": 20801 }, { "epoch": 0.9648423005565863, "grad_norm": 11.442883491516113, "learning_rate": 3.108679859586683e-08, "loss": 0.2396, "step": 20802 }, { "epoch": 0.9648886827458256, "grad_norm": 7.02564811706543, "learning_rate": 3.100491430582775e-08, "loss": 0.2934, "step": 20803 }, { "epoch": 0.964935064935065, "grad_norm": 8.509968757629395, "learning_rate": 3.0923137666445945e-08, "loss": 0.4062, "step": 20804 }, { "epoch": 0.9649814471243042, "grad_norm": 6.106583595275879, "learning_rate": 3.084146867949389e-08, "loss": 0.2776, "step": 20805 }, { "epoch": 0.9650278293135436, "grad_norm": 8.152494430541992, "learning_rate": 3.075990734674017e-08, "loss": 0.3405, "step": 20806 }, { "epoch": 0.9650742115027829, "grad_norm": 12.730877876281738, "learning_rate": 3.0678453669952816e-08, "loss": 0.333, "step": 20807 }, { "epoch": 0.9651205936920223, "grad_norm": 8.88603687286377, "learning_rate": 3.0597107650894855e-08, "loss": 0.4428, "step": 20808 }, { "epoch": 0.9651669758812615, "grad_norm": 7.478219032287598, "learning_rate": 3.051586929132988e-08, "loss": 0.3047, "step": 20809 }, { "epoch": 0.9652133580705009, "grad_norm": 10.676974296569824, "learning_rate": 3.043473859301704e-08, "loss": 0.3505, "step": 20810 }, { "epoch": 0.9652597402597403, "grad_norm": 4.9259562492370605, "learning_rate": 3.0353715557714934e-08, "loss": 0.2948, "step": 20811 }, { "epoch": 0.9653061224489796, "grad_norm": 9.126445770263672, "learning_rate": 3.0272800187178265e-08, "loss": 0.3393, "step": 20812 }, { "epoch": 0.965352504638219, "grad_norm": 7.088472843170166, "learning_rate": 3.019199248316007e-08, "loss": 0.2749, "step": 20813 }, { "epoch": 0.9653988868274582, "grad_norm": 6.373454570770264, "learning_rate": 3.011129244741118e-08, "loss": 0.3095, "step": 20814 }, { "epoch": 0.9654452690166976, "grad_norm": 12.154414176940918, "learning_rate": 3.003070008167963e-08, "loss": 0.4022, "step": 20815 }, { "epoch": 0.9654916512059369, "grad_norm": 7.4741997718811035, "learning_rate": 2.995021538771126e-08, "loss": 0.3319, "step": 20816 }, { "epoch": 0.9655380333951763, "grad_norm": 4.693546295166016, "learning_rate": 2.986983836725077e-08, "loss": 0.2137, "step": 20817 }, { "epoch": 0.9655844155844155, "grad_norm": 10.102547645568848, "learning_rate": 2.978956902203789e-08, "loss": 0.3675, "step": 20818 }, { "epoch": 0.9656307977736549, "grad_norm": 7.617784023284912, "learning_rate": 2.970940735381289e-08, "loss": 0.2436, "step": 20819 }, { "epoch": 0.9656771799628943, "grad_norm": 8.122389793395996, "learning_rate": 2.9629353364312163e-08, "loss": 0.313, "step": 20820 }, { "epoch": 0.9657235621521336, "grad_norm": 9.800418853759766, "learning_rate": 2.9549407055269875e-08, "loss": 0.3078, "step": 20821 }, { "epoch": 0.9657699443413729, "grad_norm": 6.957547664642334, "learning_rate": 2.9469568428417972e-08, "loss": 0.2482, "step": 20822 }, { "epoch": 0.9658163265306122, "grad_norm": 7.14169979095459, "learning_rate": 2.9389837485486183e-08, "loss": 0.3726, "step": 20823 }, { "epoch": 0.9658627087198516, "grad_norm": 24.8040828704834, "learning_rate": 2.9310214228202016e-08, "loss": 0.46, "step": 20824 }, { "epoch": 0.9659090909090909, "grad_norm": 6.042917251586914, "learning_rate": 2.9230698658290202e-08, "loss": 0.3397, "step": 20825 }, { "epoch": 0.9659554730983302, "grad_norm": 8.069846153259277, "learning_rate": 2.9151290777473252e-08, "loss": 0.2937, "step": 20826 }, { "epoch": 0.9660018552875695, "grad_norm": 7.015301704406738, "learning_rate": 2.907199058747201e-08, "loss": 0.316, "step": 20827 }, { "epoch": 0.9660482374768089, "grad_norm": 6.951096534729004, "learning_rate": 2.8992798090003993e-08, "loss": 0.3839, "step": 20828 }, { "epoch": 0.9660946196660483, "grad_norm": 8.33042049407959, "learning_rate": 2.8913713286785604e-08, "loss": 0.3417, "step": 20829 }, { "epoch": 0.9661410018552876, "grad_norm": 6.750100135803223, "learning_rate": 2.883473617952881e-08, "loss": 0.3366, "step": 20830 }, { "epoch": 0.9661873840445269, "grad_norm": 13.591875076293945, "learning_rate": 2.875586676994613e-08, "loss": 0.3975, "step": 20831 }, { "epoch": 0.9662337662337662, "grad_norm": 6.7765116691589355, "learning_rate": 2.867710505974508e-08, "loss": 0.3304, "step": 20832 }, { "epoch": 0.9662801484230056, "grad_norm": 5.32949686050415, "learning_rate": 2.8598451050632636e-08, "loss": 0.3166, "step": 20833 }, { "epoch": 0.9663265306122449, "grad_norm": 11.6314058303833, "learning_rate": 2.8519904744312986e-08, "loss": 0.4211, "step": 20834 }, { "epoch": 0.9663729128014842, "grad_norm": 7.380115985870361, "learning_rate": 2.8441466142486995e-08, "loss": 0.3019, "step": 20835 }, { "epoch": 0.9664192949907235, "grad_norm": 11.701997756958008, "learning_rate": 2.8363135246854412e-08, "loss": 0.5342, "step": 20836 }, { "epoch": 0.9664656771799629, "grad_norm": 18.441396713256836, "learning_rate": 2.8284912059112212e-08, "loss": 0.4871, "step": 20837 }, { "epoch": 0.9665120593692023, "grad_norm": 11.991522789001465, "learning_rate": 2.8206796580955153e-08, "loss": 0.5227, "step": 20838 }, { "epoch": 0.9665584415584415, "grad_norm": 5.694684982299805, "learning_rate": 2.812878881407577e-08, "loss": 0.295, "step": 20839 }, { "epoch": 0.9666048237476809, "grad_norm": 11.964130401611328, "learning_rate": 2.8050888760163265e-08, "loss": 0.3538, "step": 20840 }, { "epoch": 0.9666512059369202, "grad_norm": 5.86880350112915, "learning_rate": 2.7973096420905732e-08, "loss": 0.3228, "step": 20841 }, { "epoch": 0.9666975881261596, "grad_norm": 5.056684494018555, "learning_rate": 2.7895411797989602e-08, "loss": 0.192, "step": 20842 }, { "epoch": 0.9667439703153989, "grad_norm": 6.843619346618652, "learning_rate": 2.7817834893095752e-08, "loss": 0.2251, "step": 20843 }, { "epoch": 0.9667903525046382, "grad_norm": 11.223540306091309, "learning_rate": 2.7740365707906723e-08, "loss": 0.302, "step": 20844 }, { "epoch": 0.9668367346938775, "grad_norm": 3.693666458129883, "learning_rate": 2.7663004244099512e-08, "loss": 0.1502, "step": 20845 }, { "epoch": 0.9668831168831169, "grad_norm": 6.793977737426758, "learning_rate": 2.7585750503351105e-08, "loss": 0.331, "step": 20846 }, { "epoch": 0.9669294990723563, "grad_norm": 6.298221111297607, "learning_rate": 2.750860448733461e-08, "loss": 0.2547, "step": 20847 }, { "epoch": 0.9669758812615955, "grad_norm": 7.6640095710754395, "learning_rate": 2.7431566197721473e-08, "loss": 0.3452, "step": 20848 }, { "epoch": 0.9670222634508349, "grad_norm": 8.707559585571289, "learning_rate": 2.735463563618035e-08, "loss": 0.2424, "step": 20849 }, { "epoch": 0.9670686456400742, "grad_norm": 7.837329387664795, "learning_rate": 2.72778128043788e-08, "loss": 0.3306, "step": 20850 }, { "epoch": 0.9671150278293136, "grad_norm": 9.106743812561035, "learning_rate": 2.7201097703980495e-08, "loss": 0.3666, "step": 20851 }, { "epoch": 0.9671614100185528, "grad_norm": 16.256135940551758, "learning_rate": 2.7124490336647434e-08, "loss": 0.3008, "step": 20852 }, { "epoch": 0.9672077922077922, "grad_norm": 6.050878047943115, "learning_rate": 2.7047990704039408e-08, "loss": 0.2887, "step": 20853 }, { "epoch": 0.9672541743970315, "grad_norm": 7.27916955947876, "learning_rate": 2.6971598807813415e-08, "loss": 0.2976, "step": 20854 }, { "epoch": 0.9673005565862709, "grad_norm": 5.08444881439209, "learning_rate": 2.6895314649624803e-08, "loss": 0.2814, "step": 20855 }, { "epoch": 0.9673469387755103, "grad_norm": 3.8583035469055176, "learning_rate": 2.6819138231126695e-08, "loss": 0.2361, "step": 20856 }, { "epoch": 0.9673933209647495, "grad_norm": 7.6656317710876465, "learning_rate": 2.6743069553968328e-08, "loss": 0.3506, "step": 20857 }, { "epoch": 0.9674397031539889, "grad_norm": 5.013314247131348, "learning_rate": 2.6667108619798377e-08, "loss": 0.2648, "step": 20858 }, { "epoch": 0.9674860853432282, "grad_norm": 10.946950912475586, "learning_rate": 2.6591255430262752e-08, "loss": 0.2567, "step": 20859 }, { "epoch": 0.9675324675324676, "grad_norm": 5.789828777313232, "learning_rate": 2.6515509987003474e-08, "loss": 0.2716, "step": 20860 }, { "epoch": 0.9675788497217068, "grad_norm": 9.807819366455078, "learning_rate": 2.6439872291663116e-08, "loss": 0.2867, "step": 20861 }, { "epoch": 0.9676252319109462, "grad_norm": 5.6885457038879395, "learning_rate": 2.6364342345879813e-08, "loss": 0.2471, "step": 20862 }, { "epoch": 0.9676716141001855, "grad_norm": 9.666744232177734, "learning_rate": 2.6288920151289477e-08, "loss": 0.3797, "step": 20863 }, { "epoch": 0.9677179962894249, "grad_norm": 8.089982032775879, "learning_rate": 2.6213605709525803e-08, "loss": 0.3979, "step": 20864 }, { "epoch": 0.9677643784786641, "grad_norm": 8.071017265319824, "learning_rate": 2.6138399022221374e-08, "loss": 0.3085, "step": 20865 }, { "epoch": 0.9678107606679035, "grad_norm": 6.371434211730957, "learning_rate": 2.6063300091005438e-08, "loss": 0.2639, "step": 20866 }, { "epoch": 0.9678571428571429, "grad_norm": 7.126830101013184, "learning_rate": 2.598830891750448e-08, "loss": 0.3486, "step": 20867 }, { "epoch": 0.9679035250463822, "grad_norm": 4.489445209503174, "learning_rate": 2.591342550334275e-08, "loss": 0.3133, "step": 20868 }, { "epoch": 0.9679499072356216, "grad_norm": 6.24819803237915, "learning_rate": 2.5838649850143948e-08, "loss": 0.2203, "step": 20869 }, { "epoch": 0.9679962894248608, "grad_norm": 8.314903259277344, "learning_rate": 2.5763981959526786e-08, "loss": 0.2839, "step": 20870 }, { "epoch": 0.9680426716141002, "grad_norm": 6.996935844421387, "learning_rate": 2.5689421833108852e-08, "loss": 0.2575, "step": 20871 }, { "epoch": 0.9680890538033395, "grad_norm": 5.711012840270996, "learning_rate": 2.5614969472506634e-08, "loss": 0.3128, "step": 20872 }, { "epoch": 0.9681354359925789, "grad_norm": 6.273894309997559, "learning_rate": 2.554062487933162e-08, "loss": 0.3287, "step": 20873 }, { "epoch": 0.9681818181818181, "grad_norm": 15.449868202209473, "learning_rate": 2.5466388055195856e-08, "loss": 0.4292, "step": 20874 }, { "epoch": 0.9682282003710575, "grad_norm": 4.827614784240723, "learning_rate": 2.5392259001706943e-08, "loss": 0.2606, "step": 20875 }, { "epoch": 0.9682745825602969, "grad_norm": 9.18304443359375, "learning_rate": 2.5318237720470818e-08, "loss": 0.391, "step": 20876 }, { "epoch": 0.9683209647495362, "grad_norm": 6.840976238250732, "learning_rate": 2.5244324213091197e-08, "loss": 0.3218, "step": 20877 }, { "epoch": 0.9683673469387755, "grad_norm": 9.498625755310059, "learning_rate": 2.5170518481169582e-08, "loss": 0.4688, "step": 20878 }, { "epoch": 0.9684137291280148, "grad_norm": 5.378225803375244, "learning_rate": 2.5096820526304132e-08, "loss": 0.2997, "step": 20879 }, { "epoch": 0.9684601113172542, "grad_norm": 5.1238603591918945, "learning_rate": 2.5023230350092464e-08, "loss": 0.4012, "step": 20880 }, { "epoch": 0.9685064935064935, "grad_norm": 9.307889938354492, "learning_rate": 2.4949747954128855e-08, "loss": 0.3568, "step": 20881 }, { "epoch": 0.9685528756957328, "grad_norm": 4.570098400115967, "learning_rate": 2.4876373340004256e-08, "loss": 0.2091, "step": 20882 }, { "epoch": 0.9685992578849721, "grad_norm": 10.392220497131348, "learning_rate": 2.480310650930906e-08, "loss": 0.3694, "step": 20883 }, { "epoch": 0.9686456400742115, "grad_norm": 7.595142841339111, "learning_rate": 2.472994746363089e-08, "loss": 0.4174, "step": 20884 }, { "epoch": 0.9686920222634509, "grad_norm": 6.419748783111572, "learning_rate": 2.4656896204554027e-08, "loss": 0.277, "step": 20885 }, { "epoch": 0.9687384044526902, "grad_norm": 7.07935094833374, "learning_rate": 2.45839527336611e-08, "loss": 0.2941, "step": 20886 }, { "epoch": 0.9687847866419295, "grad_norm": 11.431014060974121, "learning_rate": 2.4511117052532508e-08, "loss": 0.4721, "step": 20887 }, { "epoch": 0.9688311688311688, "grad_norm": 9.268915176391602, "learning_rate": 2.4438389162746434e-08, "loss": 0.365, "step": 20888 }, { "epoch": 0.9688775510204082, "grad_norm": 4.803394794464111, "learning_rate": 2.4365769065878286e-08, "loss": 0.3132, "step": 20889 }, { "epoch": 0.9689239332096475, "grad_norm": 7.195235729217529, "learning_rate": 2.4293256763501248e-08, "loss": 0.2801, "step": 20890 }, { "epoch": 0.9689703153988868, "grad_norm": 6.927919864654541, "learning_rate": 2.4220852257186845e-08, "loss": 0.3075, "step": 20891 }, { "epoch": 0.9690166975881261, "grad_norm": 5.680617809295654, "learning_rate": 2.4148555548502707e-08, "loss": 0.2727, "step": 20892 }, { "epoch": 0.9690630797773655, "grad_norm": 4.120731830596924, "learning_rate": 2.4076366639015914e-08, "loss": 0.1906, "step": 20893 }, { "epoch": 0.9691094619666049, "grad_norm": 9.196534156799316, "learning_rate": 2.400428553028966e-08, "loss": 0.3591, "step": 20894 }, { "epoch": 0.9691558441558441, "grad_norm": 7.059598922729492, "learning_rate": 2.393231222388659e-08, "loss": 0.4115, "step": 20895 }, { "epoch": 0.9692022263450835, "grad_norm": 8.696215629577637, "learning_rate": 2.3860446721364892e-08, "loss": 0.2985, "step": 20896 }, { "epoch": 0.9692486085343228, "grad_norm": 5.8376922607421875, "learning_rate": 2.378868902428222e-08, "loss": 0.2488, "step": 20897 }, { "epoch": 0.9692949907235622, "grad_norm": 6.708972930908203, "learning_rate": 2.3717039134192878e-08, "loss": 0.3862, "step": 20898 }, { "epoch": 0.9693413729128015, "grad_norm": 8.999422073364258, "learning_rate": 2.3645497052649514e-08, "loss": 0.2634, "step": 20899 }, { "epoch": 0.9693877551020408, "grad_norm": 5.6475114822387695, "learning_rate": 2.3574062781201447e-08, "loss": 0.234, "step": 20900 }, { "epoch": 0.9694341372912801, "grad_norm": 7.910828590393066, "learning_rate": 2.3502736321396326e-08, "loss": 0.2805, "step": 20901 }, { "epoch": 0.9694805194805195, "grad_norm": 8.167492866516113, "learning_rate": 2.3431517674779582e-08, "loss": 0.3064, "step": 20902 }, { "epoch": 0.9695269016697589, "grad_norm": 5.629528522491455, "learning_rate": 2.3360406842894423e-08, "loss": 0.3137, "step": 20903 }, { "epoch": 0.9695732838589981, "grad_norm": 9.30912971496582, "learning_rate": 2.3289403827281287e-08, "loss": 0.3354, "step": 20904 }, { "epoch": 0.9696196660482375, "grad_norm": 10.753349304199219, "learning_rate": 2.3218508629477832e-08, "loss": 0.4521, "step": 20905 }, { "epoch": 0.9696660482374768, "grad_norm": 6.507328510284424, "learning_rate": 2.3147721251020606e-08, "loss": 0.3455, "step": 20906 }, { "epoch": 0.9697124304267162, "grad_norm": 7.171204566955566, "learning_rate": 2.3077041693442824e-08, "loss": 0.2721, "step": 20907 }, { "epoch": 0.9697588126159554, "grad_norm": 12.898863792419434, "learning_rate": 2.3006469958276046e-08, "loss": 0.4359, "step": 20908 }, { "epoch": 0.9698051948051948, "grad_norm": 4.027760982513428, "learning_rate": 2.2936006047049044e-08, "loss": 0.2399, "step": 20909 }, { "epoch": 0.9698515769944341, "grad_norm": 5.271371364593506, "learning_rate": 2.2865649961288373e-08, "loss": 0.3441, "step": 20910 }, { "epoch": 0.9698979591836735, "grad_norm": 5.108325004577637, "learning_rate": 2.2795401702517816e-08, "loss": 0.3385, "step": 20911 }, { "epoch": 0.9699443413729129, "grad_norm": 7.159687042236328, "learning_rate": 2.2725261272260047e-08, "loss": 0.3339, "step": 20912 }, { "epoch": 0.9699907235621521, "grad_norm": 7.184690475463867, "learning_rate": 2.2655228672034402e-08, "loss": 0.2483, "step": 20913 }, { "epoch": 0.9700371057513915, "grad_norm": 5.894551753997803, "learning_rate": 2.2585303903358002e-08, "loss": 0.2568, "step": 20914 }, { "epoch": 0.9700834879406308, "grad_norm": 10.245515823364258, "learning_rate": 2.2515486967745194e-08, "loss": 0.3764, "step": 20915 }, { "epoch": 0.9701298701298702, "grad_norm": 8.042984962463379, "learning_rate": 2.2445777866709208e-08, "loss": 0.3366, "step": 20916 }, { "epoch": 0.9701762523191094, "grad_norm": 5.491679668426514, "learning_rate": 2.2376176601759947e-08, "loss": 0.1674, "step": 20917 }, { "epoch": 0.9702226345083488, "grad_norm": 8.086982727050781, "learning_rate": 2.2306683174405653e-08, "loss": 0.3597, "step": 20918 }, { "epoch": 0.9702690166975881, "grad_norm": 10.69959831237793, "learning_rate": 2.2237297586151784e-08, "loss": 0.2753, "step": 20919 }, { "epoch": 0.9703153988868275, "grad_norm": 6.16654634475708, "learning_rate": 2.2168019838501032e-08, "loss": 0.3655, "step": 20920 }, { "epoch": 0.9703617810760667, "grad_norm": 11.537935256958008, "learning_rate": 2.2098849932954414e-08, "loss": 0.3694, "step": 20921 }, { "epoch": 0.9704081632653061, "grad_norm": 9.050663948059082, "learning_rate": 2.2029787871010734e-08, "loss": 0.3942, "step": 20922 }, { "epoch": 0.9704545454545455, "grad_norm": 14.616304397583008, "learning_rate": 2.196083365416657e-08, "loss": 0.4232, "step": 20923 }, { "epoch": 0.9705009276437848, "grad_norm": 6.692325115203857, "learning_rate": 2.189198728391517e-08, "loss": 0.3916, "step": 20924 }, { "epoch": 0.9705473098330241, "grad_norm": 13.55812931060791, "learning_rate": 2.1823248761748127e-08, "loss": 0.4131, "step": 20925 }, { "epoch": 0.9705936920222634, "grad_norm": 6.206171035766602, "learning_rate": 2.1754618089154244e-08, "loss": 0.3084, "step": 20926 }, { "epoch": 0.9706400742115028, "grad_norm": 10.834214210510254, "learning_rate": 2.168609526762122e-08, "loss": 0.4027, "step": 20927 }, { "epoch": 0.9706864564007421, "grad_norm": 5.540365695953369, "learning_rate": 2.161768029863287e-08, "loss": 0.3219, "step": 20928 }, { "epoch": 0.9707328385899815, "grad_norm": 5.497671604156494, "learning_rate": 2.1549373183671895e-08, "loss": 0.2714, "step": 20929 }, { "epoch": 0.9707792207792207, "grad_norm": 5.359902381896973, "learning_rate": 2.1481173924218224e-08, "loss": 0.2699, "step": 20930 }, { "epoch": 0.9708256029684601, "grad_norm": 6.197397232055664, "learning_rate": 2.141308252174845e-08, "loss": 0.3151, "step": 20931 }, { "epoch": 0.9708719851576995, "grad_norm": 5.3222880363464355, "learning_rate": 2.134509897773862e-08, "loss": 0.291, "step": 20932 }, { "epoch": 0.9709183673469388, "grad_norm": 9.72180461883545, "learning_rate": 2.1277223293660887e-08, "loss": 0.3391, "step": 20933 }, { "epoch": 0.9709647495361781, "grad_norm": 9.63573932647705, "learning_rate": 2.1209455470986295e-08, "loss": 0.3465, "step": 20934 }, { "epoch": 0.9710111317254174, "grad_norm": 23.6237735748291, "learning_rate": 2.1141795511183116e-08, "loss": 0.5443, "step": 20935 }, { "epoch": 0.9710575139146568, "grad_norm": 8.87782096862793, "learning_rate": 2.1074243415716288e-08, "loss": 0.3274, "step": 20936 }, { "epoch": 0.9711038961038961, "grad_norm": 6.730337619781494, "learning_rate": 2.1006799186050197e-08, "loss": 0.3464, "step": 20937 }, { "epoch": 0.9711502782931354, "grad_norm": 10.785340309143066, "learning_rate": 2.0939462823645896e-08, "loss": 0.3233, "step": 20938 }, { "epoch": 0.9711966604823747, "grad_norm": 6.250717639923096, "learning_rate": 2.087223432996166e-08, "loss": 0.2633, "step": 20939 }, { "epoch": 0.9712430426716141, "grad_norm": 4.7234787940979, "learning_rate": 2.080511370645466e-08, "loss": 0.2398, "step": 20940 }, { "epoch": 0.9712894248608535, "grad_norm": 8.511679649353027, "learning_rate": 2.073810095457818e-08, "loss": 0.3214, "step": 20941 }, { "epoch": 0.9713358070500928, "grad_norm": 8.995850563049316, "learning_rate": 2.0671196075784382e-08, "loss": 0.3965, "step": 20942 }, { "epoch": 0.9713821892393321, "grad_norm": 7.1274518966674805, "learning_rate": 2.060439907152323e-08, "loss": 0.3542, "step": 20943 }, { "epoch": 0.9714285714285714, "grad_norm": 6.849698066711426, "learning_rate": 2.053770994324078e-08, "loss": 0.3324, "step": 20944 }, { "epoch": 0.9714749536178108, "grad_norm": 4.8582329750061035, "learning_rate": 2.04711286923831e-08, "loss": 0.2921, "step": 20945 }, { "epoch": 0.9715213358070501, "grad_norm": 12.702692985534668, "learning_rate": 2.040465532039182e-08, "loss": 0.3417, "step": 20946 }, { "epoch": 0.9715677179962894, "grad_norm": 12.20198917388916, "learning_rate": 2.0338289828706892e-08, "loss": 0.4415, "step": 20947 }, { "epoch": 0.9716141001855287, "grad_norm": 5.293217182159424, "learning_rate": 2.0272032218766612e-08, "loss": 0.3109, "step": 20948 }, { "epoch": 0.9716604823747681, "grad_norm": 9.22332763671875, "learning_rate": 2.02058824920065e-08, "loss": 0.3796, "step": 20949 }, { "epoch": 0.9717068645640075, "grad_norm": 10.22403335571289, "learning_rate": 2.0139840649858744e-08, "loss": 0.3912, "step": 20950 }, { "epoch": 0.9717532467532467, "grad_norm": 4.9183454513549805, "learning_rate": 2.007390669375553e-08, "loss": 0.2954, "step": 20951 }, { "epoch": 0.9717996289424861, "grad_norm": 7.916277885437012, "learning_rate": 2.0008080625124048e-08, "loss": 0.4497, "step": 20952 }, { "epoch": 0.9718460111317254, "grad_norm": 7.191908836364746, "learning_rate": 1.9942362445390385e-08, "loss": 0.3298, "step": 20953 }, { "epoch": 0.9718923933209648, "grad_norm": 6.861752510070801, "learning_rate": 1.9876752155979506e-08, "loss": 0.33, "step": 20954 }, { "epoch": 0.9719387755102041, "grad_norm": 9.985580444335938, "learning_rate": 1.981124975831139e-08, "loss": 0.3385, "step": 20955 }, { "epoch": 0.9719851576994434, "grad_norm": 9.815069198608398, "learning_rate": 1.9745855253806567e-08, "loss": 0.314, "step": 20956 }, { "epoch": 0.9720315398886827, "grad_norm": 9.256895065307617, "learning_rate": 1.9680568643880572e-08, "loss": 0.2871, "step": 20957 }, { "epoch": 0.9720779220779221, "grad_norm": 6.062751770019531, "learning_rate": 1.9615389929947827e-08, "loss": 0.3282, "step": 20958 }, { "epoch": 0.9721243042671615, "grad_norm": 12.070661544799805, "learning_rate": 1.9550319113421645e-08, "loss": 0.4049, "step": 20959 }, { "epoch": 0.9721706864564007, "grad_norm": 6.133963108062744, "learning_rate": 1.9485356195710347e-08, "loss": 0.3176, "step": 20960 }, { "epoch": 0.9722170686456401, "grad_norm": 4.064580917358398, "learning_rate": 1.9420501178221696e-08, "loss": 0.2848, "step": 20961 }, { "epoch": 0.9722634508348794, "grad_norm": 14.593414306640625, "learning_rate": 1.935575406236123e-08, "loss": 0.5325, "step": 20962 }, { "epoch": 0.9723098330241188, "grad_norm": 3.862630844116211, "learning_rate": 1.929111484953117e-08, "loss": 0.3068, "step": 20963 }, { "epoch": 0.972356215213358, "grad_norm": 6.064388751983643, "learning_rate": 1.9226583541132048e-08, "loss": 0.2611, "step": 20964 }, { "epoch": 0.9724025974025974, "grad_norm": 5.8238725662231445, "learning_rate": 1.9162160138561648e-08, "loss": 0.3005, "step": 20965 }, { "epoch": 0.9724489795918367, "grad_norm": 5.406872272491455, "learning_rate": 1.9097844643216624e-08, "loss": 0.2374, "step": 20966 }, { "epoch": 0.9724953617810761, "grad_norm": 5.011817932128906, "learning_rate": 1.9033637056489197e-08, "loss": 0.2672, "step": 20967 }, { "epoch": 0.9725417439703155, "grad_norm": 6.578372955322266, "learning_rate": 1.896953737977103e-08, "loss": 0.3201, "step": 20968 }, { "epoch": 0.9725881261595547, "grad_norm": 8.81258487701416, "learning_rate": 1.890554561445046e-08, "loss": 0.4016, "step": 20969 }, { "epoch": 0.9726345083487941, "grad_norm": 17.24155616760254, "learning_rate": 1.8841661761914155e-08, "loss": 0.2183, "step": 20970 }, { "epoch": 0.9726808905380334, "grad_norm": 5.956677436828613, "learning_rate": 1.877788582354545e-08, "loss": 0.2751, "step": 20971 }, { "epoch": 0.9727272727272728, "grad_norm": 5.023167133331299, "learning_rate": 1.871421780072713e-08, "loss": 0.2827, "step": 20972 }, { "epoch": 0.972773654916512, "grad_norm": 11.875265121459961, "learning_rate": 1.8650657694837537e-08, "loss": 0.3557, "step": 20973 }, { "epoch": 0.9728200371057514, "grad_norm": 7.131392478942871, "learning_rate": 1.8587205507254457e-08, "loss": 0.3544, "step": 20974 }, { "epoch": 0.9728664192949907, "grad_norm": 9.579061508178711, "learning_rate": 1.8523861239351794e-08, "loss": 0.4786, "step": 20975 }, { "epoch": 0.9729128014842301, "grad_norm": 6.229592800140381, "learning_rate": 1.8460624892502333e-08, "loss": 0.3434, "step": 20976 }, { "epoch": 0.9729591836734693, "grad_norm": 5.4467244148254395, "learning_rate": 1.8397496468076093e-08, "loss": 0.2545, "step": 20977 }, { "epoch": 0.9730055658627087, "grad_norm": 5.2937750816345215, "learning_rate": 1.8334475967440312e-08, "loss": 0.3087, "step": 20978 }, { "epoch": 0.9730519480519481, "grad_norm": 4.836601734161377, "learning_rate": 1.8271563391960568e-08, "loss": 0.2968, "step": 20979 }, { "epoch": 0.9730983302411874, "grad_norm": 7.0872273445129395, "learning_rate": 1.820875874300021e-08, "loss": 0.2486, "step": 20980 }, { "epoch": 0.9731447124304267, "grad_norm": 8.139092445373535, "learning_rate": 1.8146062021919263e-08, "loss": 0.2157, "step": 20981 }, { "epoch": 0.973191094619666, "grad_norm": 7.035787582397461, "learning_rate": 1.8083473230076088e-08, "loss": 0.3519, "step": 20982 }, { "epoch": 0.9732374768089054, "grad_norm": 8.191246032714844, "learning_rate": 1.8020992368826816e-08, "loss": 0.3329, "step": 20983 }, { "epoch": 0.9732838589981447, "grad_norm": 5.40209436416626, "learning_rate": 1.7958619439524817e-08, "loss": 0.3004, "step": 20984 }, { "epoch": 0.9733302411873841, "grad_norm": 8.519389152526855, "learning_rate": 1.789635444352178e-08, "loss": 0.3198, "step": 20985 }, { "epoch": 0.9733766233766233, "grad_norm": 4.938971519470215, "learning_rate": 1.783419738216663e-08, "loss": 0.2309, "step": 20986 }, { "epoch": 0.9734230055658627, "grad_norm": 8.461587905883789, "learning_rate": 1.777214825680551e-08, "loss": 0.2975, "step": 20987 }, { "epoch": 0.9734693877551021, "grad_norm": 6.692622661590576, "learning_rate": 1.7710207068782904e-08, "loss": 0.317, "step": 20988 }, { "epoch": 0.9735157699443414, "grad_norm": 6.166937828063965, "learning_rate": 1.7648373819441067e-08, "loss": 0.2774, "step": 20989 }, { "epoch": 0.9735621521335807, "grad_norm": 5.130364418029785, "learning_rate": 1.7586648510119485e-08, "loss": 0.2419, "step": 20990 }, { "epoch": 0.97360853432282, "grad_norm": 8.789448738098145, "learning_rate": 1.752503114215487e-08, "loss": 0.2932, "step": 20991 }, { "epoch": 0.9736549165120594, "grad_norm": 9.962966918945312, "learning_rate": 1.7463521716882258e-08, "loss": 0.3648, "step": 20992 }, { "epoch": 0.9737012987012987, "grad_norm": 7.829069137573242, "learning_rate": 1.740212023563448e-08, "loss": 0.2633, "step": 20993 }, { "epoch": 0.973747680890538, "grad_norm": 4.039783954620361, "learning_rate": 1.734082669974213e-08, "loss": 0.2212, "step": 20994 }, { "epoch": 0.9737940630797773, "grad_norm": 5.1063079833984375, "learning_rate": 1.7279641110532487e-08, "loss": 0.2882, "step": 20995 }, { "epoch": 0.9738404452690167, "grad_norm": 8.921733856201172, "learning_rate": 1.7218563469331152e-08, "loss": 0.2831, "step": 20996 }, { "epoch": 0.973886827458256, "grad_norm": 7.60244083404541, "learning_rate": 1.7157593777461512e-08, "loss": 0.3261, "step": 20997 }, { "epoch": 0.9739332096474954, "grad_norm": 14.4611177444458, "learning_rate": 1.7096732036244733e-08, "loss": 0.4684, "step": 20998 }, { "epoch": 0.9739795918367347, "grad_norm": 10.176630020141602, "learning_rate": 1.7035978246998653e-08, "loss": 0.3974, "step": 20999 }, { "epoch": 0.974025974025974, "grad_norm": 6.71810245513916, "learning_rate": 1.6975332411040547e-08, "loss": 0.1714, "step": 21000 }, { "epoch": 0.9740723562152134, "grad_norm": 8.852054595947266, "learning_rate": 1.69147945296827e-08, "loss": 0.2726, "step": 21001 }, { "epoch": 0.9741187384044527, "grad_norm": 6.033233642578125, "learning_rate": 1.685436460423795e-08, "loss": 0.272, "step": 21002 }, { "epoch": 0.974165120593692, "grad_norm": 6.099914073944092, "learning_rate": 1.679404263601525e-08, "loss": 0.3075, "step": 21003 }, { "epoch": 0.9742115027829313, "grad_norm": 10.345914840698242, "learning_rate": 1.673382862632078e-08, "loss": 0.3214, "step": 21004 }, { "epoch": 0.9742578849721707, "grad_norm": 9.938907623291016, "learning_rate": 1.6673722576460162e-08, "loss": 0.3595, "step": 21005 }, { "epoch": 0.97430426716141, "grad_norm": 5.239159107208252, "learning_rate": 1.661372448773457e-08, "loss": 0.1731, "step": 21006 }, { "epoch": 0.9743506493506493, "grad_norm": 7.407031536102295, "learning_rate": 1.6553834361444087e-08, "loss": 0.253, "step": 21007 }, { "epoch": 0.9743970315398887, "grad_norm": 7.733709812164307, "learning_rate": 1.6494052198886557e-08, "loss": 0.3059, "step": 21008 }, { "epoch": 0.974443413729128, "grad_norm": 5.500222206115723, "learning_rate": 1.6434378001356498e-08, "loss": 0.2032, "step": 21009 }, { "epoch": 0.9744897959183674, "grad_norm": 7.9951701164245605, "learning_rate": 1.6374811770147325e-08, "loss": 0.3831, "step": 21010 }, { "epoch": 0.9745361781076067, "grad_norm": 10.756629943847656, "learning_rate": 1.6315353506549116e-08, "loss": 0.4713, "step": 21011 }, { "epoch": 0.974582560296846, "grad_norm": 6.151190280914307, "learning_rate": 1.625600321185028e-08, "loss": 0.4333, "step": 21012 }, { "epoch": 0.9746289424860853, "grad_norm": 7.343850135803223, "learning_rate": 1.619676088733646e-08, "loss": 0.342, "step": 21013 }, { "epoch": 0.9746753246753247, "grad_norm": 13.361457824707031, "learning_rate": 1.6137626534291074e-08, "loss": 0.2862, "step": 21014 }, { "epoch": 0.974721706864564, "grad_norm": 8.128921508789062, "learning_rate": 1.6078600153995317e-08, "loss": 0.2544, "step": 21015 }, { "epoch": 0.9747680890538033, "grad_norm": 12.367530822753906, "learning_rate": 1.601968174772761e-08, "loss": 0.3174, "step": 21016 }, { "epoch": 0.9748144712430427, "grad_norm": 5.943334579467773, "learning_rate": 1.596087131676527e-08, "loss": 0.2766, "step": 21017 }, { "epoch": 0.974860853432282, "grad_norm": 3.7655692100524902, "learning_rate": 1.5902168862381717e-08, "loss": 0.2533, "step": 21018 }, { "epoch": 0.9749072356215214, "grad_norm": 10.008676528930664, "learning_rate": 1.5843574385848716e-08, "loss": 0.3548, "step": 21019 }, { "epoch": 0.9749536178107606, "grad_norm": 4.884510517120361, "learning_rate": 1.5785087888435802e-08, "loss": 0.2814, "step": 21020 }, { "epoch": 0.975, "grad_norm": 5.821905136108398, "learning_rate": 1.5726709371409742e-08, "loss": 0.2969, "step": 21021 }, { "epoch": 0.9750463821892393, "grad_norm": 15.306052207946777, "learning_rate": 1.5668438836036192e-08, "loss": 0.3204, "step": 21022 }, { "epoch": 0.9750927643784787, "grad_norm": 5.738458633422852, "learning_rate": 1.561027628357692e-08, "loss": 0.3326, "step": 21023 }, { "epoch": 0.975139146567718, "grad_norm": 9.567063331604004, "learning_rate": 1.555222171529147e-08, "loss": 0.3475, "step": 21024 }, { "epoch": 0.9751855287569573, "grad_norm": 7.301161289215088, "learning_rate": 1.549427513243884e-08, "loss": 0.3418, "step": 21025 }, { "epoch": 0.9752319109461967, "grad_norm": 5.093043327331543, "learning_rate": 1.5436436536273026e-08, "loss": 0.2541, "step": 21026 }, { "epoch": 0.975278293135436, "grad_norm": 9.751911163330078, "learning_rate": 1.5378705928048022e-08, "loss": 0.3005, "step": 21027 }, { "epoch": 0.9753246753246754, "grad_norm": 6.981125354766846, "learning_rate": 1.5321083309014496e-08, "loss": 0.2865, "step": 21028 }, { "epoch": 0.9753710575139146, "grad_norm": 7.517286777496338, "learning_rate": 1.5263568680420337e-08, "loss": 0.2337, "step": 21029 }, { "epoch": 0.975417439703154, "grad_norm": 11.38714599609375, "learning_rate": 1.5206162043511218e-08, "loss": 0.3326, "step": 21030 }, { "epoch": 0.9754638218923933, "grad_norm": 6.242430210113525, "learning_rate": 1.5148863399532254e-08, "loss": 0.3498, "step": 21031 }, { "epoch": 0.9755102040816327, "grad_norm": 8.32371997833252, "learning_rate": 1.5091672749723564e-08, "loss": 0.3065, "step": 21032 }, { "epoch": 0.9755565862708719, "grad_norm": 10.80501937866211, "learning_rate": 1.5034590095324708e-08, "loss": 0.469, "step": 21033 }, { "epoch": 0.9756029684601113, "grad_norm": 5.330526351928711, "learning_rate": 1.4977615437571923e-08, "loss": 0.2018, "step": 21034 }, { "epoch": 0.9756493506493507, "grad_norm": 19.43328094482422, "learning_rate": 1.4920748777699778e-08, "loss": 0.3715, "step": 21035 }, { "epoch": 0.97569573283859, "grad_norm": 7.317155361175537, "learning_rate": 1.4863990116940063e-08, "loss": 0.2951, "step": 21036 }, { "epoch": 0.9757421150278293, "grad_norm": 5.7943291664123535, "learning_rate": 1.4807339456522906e-08, "loss": 0.3442, "step": 21037 }, { "epoch": 0.9757884972170686, "grad_norm": 10.716428756713867, "learning_rate": 1.475079679767566e-08, "loss": 0.4532, "step": 21038 }, { "epoch": 0.975834879406308, "grad_norm": 6.96117639541626, "learning_rate": 1.4694362141622342e-08, "loss": 0.3267, "step": 21039 }, { "epoch": 0.9758812615955473, "grad_norm": 6.207998752593994, "learning_rate": 1.4638035489586421e-08, "loss": 0.2745, "step": 21040 }, { "epoch": 0.9759276437847867, "grad_norm": 9.631584167480469, "learning_rate": 1.4581816842788032e-08, "loss": 0.4406, "step": 21041 }, { "epoch": 0.9759740259740259, "grad_norm": 8.892975807189941, "learning_rate": 1.4525706202445089e-08, "loss": 0.3825, "step": 21042 }, { "epoch": 0.9760204081632653, "grad_norm": 4.7669243812561035, "learning_rate": 1.4469703569773287e-08, "loss": 0.2635, "step": 21043 }, { "epoch": 0.9760667903525047, "grad_norm": 5.218352317810059, "learning_rate": 1.441380894598554e-08, "loss": 0.2869, "step": 21044 }, { "epoch": 0.976113172541744, "grad_norm": 4.706157684326172, "learning_rate": 1.4358022332293109e-08, "loss": 0.2843, "step": 21045 }, { "epoch": 0.9761595547309833, "grad_norm": 13.468488693237305, "learning_rate": 1.4302343729903911e-08, "loss": 0.4876, "step": 21046 }, { "epoch": 0.9762059369202226, "grad_norm": 6.18001651763916, "learning_rate": 1.4246773140025317e-08, "loss": 0.3487, "step": 21047 }, { "epoch": 0.976252319109462, "grad_norm": 7.199280261993408, "learning_rate": 1.4191310563860806e-08, "loss": 0.3598, "step": 21048 }, { "epoch": 0.9762987012987013, "grad_norm": 9.746349334716797, "learning_rate": 1.413595600261164e-08, "loss": 0.3678, "step": 21049 }, { "epoch": 0.9763450834879406, "grad_norm": 5.250502586364746, "learning_rate": 1.408070945747686e-08, "loss": 0.3714, "step": 21050 }, { "epoch": 0.9763914656771799, "grad_norm": 5.321678638458252, "learning_rate": 1.4025570929654397e-08, "loss": 0.3446, "step": 21051 }, { "epoch": 0.9764378478664193, "grad_norm": 15.322077751159668, "learning_rate": 1.3970540420337741e-08, "loss": 0.3246, "step": 21052 }, { "epoch": 0.9764842300556587, "grad_norm": 4.595528602600098, "learning_rate": 1.3915617930719272e-08, "loss": 0.3761, "step": 21053 }, { "epoch": 0.976530612244898, "grad_norm": 7.864274978637695, "learning_rate": 1.3860803461989148e-08, "loss": 0.3228, "step": 21054 }, { "epoch": 0.9765769944341373, "grad_norm": 13.54153060913086, "learning_rate": 1.3806097015334751e-08, "loss": 0.4046, "step": 21055 }, { "epoch": 0.9766233766233766, "grad_norm": 4.067446708679199, "learning_rate": 1.3751498591941804e-08, "loss": 0.1935, "step": 21056 }, { "epoch": 0.976669758812616, "grad_norm": 4.164811134338379, "learning_rate": 1.3697008192992134e-08, "loss": 0.3175, "step": 21057 }, { "epoch": 0.9767161410018553, "grad_norm": 4.47682523727417, "learning_rate": 1.3642625819667021e-08, "loss": 0.2525, "step": 21058 }, { "epoch": 0.9767625231910946, "grad_norm": 6.196014881134033, "learning_rate": 1.3588351473144412e-08, "loss": 0.2828, "step": 21059 }, { "epoch": 0.9768089053803339, "grad_norm": 10.013004302978516, "learning_rate": 1.3534185154600032e-08, "loss": 0.3506, "step": 21060 }, { "epoch": 0.9768552875695733, "grad_norm": 7.817138671875, "learning_rate": 1.3480126865207389e-08, "loss": 0.2597, "step": 21061 }, { "epoch": 0.9769016697588127, "grad_norm": 34.12085723876953, "learning_rate": 1.3426176606137763e-08, "loss": 0.7481, "step": 21062 }, { "epoch": 0.9769480519480519, "grad_norm": 8.40394115447998, "learning_rate": 1.337233437855967e-08, "loss": 0.2648, "step": 21063 }, { "epoch": 0.9769944341372913, "grad_norm": 7.055565357208252, "learning_rate": 1.331860018363995e-08, "loss": 0.3136, "step": 21064 }, { "epoch": 0.9770408163265306, "grad_norm": 5.232292652130127, "learning_rate": 1.3264974022542677e-08, "loss": 0.3115, "step": 21065 }, { "epoch": 0.97708719851577, "grad_norm": 10.692645072937012, "learning_rate": 1.3211455896429693e-08, "loss": 0.3287, "step": 21066 }, { "epoch": 0.9771335807050093, "grad_norm": 14.114112854003906, "learning_rate": 1.3158045806460073e-08, "loss": 0.412, "step": 21067 }, { "epoch": 0.9771799628942486, "grad_norm": 7.621822357177734, "learning_rate": 1.3104743753790672e-08, "loss": 0.3043, "step": 21068 }, { "epoch": 0.9772263450834879, "grad_norm": 9.51440715789795, "learning_rate": 1.305154973957723e-08, "loss": 0.2674, "step": 21069 }, { "epoch": 0.9772727272727273, "grad_norm": 8.053050994873047, "learning_rate": 1.2998463764971047e-08, "loss": 0.3497, "step": 21070 }, { "epoch": 0.9773191094619667, "grad_norm": 12.683456420898438, "learning_rate": 1.2945485831123427e-08, "loss": 0.2913, "step": 21071 }, { "epoch": 0.9773654916512059, "grad_norm": 5.844573497772217, "learning_rate": 1.289261593918123e-08, "loss": 0.3012, "step": 21072 }, { "epoch": 0.9774118738404453, "grad_norm": 6.302833557128906, "learning_rate": 1.2839854090290204e-08, "loss": 0.2875, "step": 21073 }, { "epoch": 0.9774582560296846, "grad_norm": 7.988204479217529, "learning_rate": 1.2787200285592772e-08, "loss": 0.34, "step": 21074 }, { "epoch": 0.977504638218924, "grad_norm": 5.131556034088135, "learning_rate": 1.2734654526230794e-08, "loss": 0.3278, "step": 21075 }, { "epoch": 0.9775510204081632, "grad_norm": 4.3957343101501465, "learning_rate": 1.2682216813341697e-08, "loss": 0.2776, "step": 21076 }, { "epoch": 0.9775974025974026, "grad_norm": 6.953841686248779, "learning_rate": 1.2629887148061793e-08, "loss": 0.3199, "step": 21077 }, { "epoch": 0.9776437847866419, "grad_norm": 5.9859795570373535, "learning_rate": 1.2577665531525174e-08, "loss": 0.3494, "step": 21078 }, { "epoch": 0.9776901669758813, "grad_norm": 5.3449506759643555, "learning_rate": 1.2525551964862604e-08, "loss": 0.2937, "step": 21079 }, { "epoch": 0.9777365491651205, "grad_norm": 5.2104363441467285, "learning_rate": 1.2473546449203178e-08, "loss": 0.3212, "step": 21080 }, { "epoch": 0.9777829313543599, "grad_norm": 8.051070213317871, "learning_rate": 1.2421648985673774e-08, "loss": 0.3264, "step": 21081 }, { "epoch": 0.9778293135435993, "grad_norm": 11.139570236206055, "learning_rate": 1.2369859575399046e-08, "loss": 0.3081, "step": 21082 }, { "epoch": 0.9778756957328386, "grad_norm": 6.618514060974121, "learning_rate": 1.231817821950032e-08, "loss": 0.3608, "step": 21083 }, { "epoch": 0.977922077922078, "grad_norm": 5.765528678894043, "learning_rate": 1.2266604919097257e-08, "loss": 0.1943, "step": 21084 }, { "epoch": 0.9779684601113172, "grad_norm": 6.123453140258789, "learning_rate": 1.2215139675307852e-08, "loss": 0.3155, "step": 21085 }, { "epoch": 0.9780148423005566, "grad_norm": 11.089531898498535, "learning_rate": 1.2163782489246213e-08, "loss": 0.2687, "step": 21086 }, { "epoch": 0.9780612244897959, "grad_norm": 10.259756088256836, "learning_rate": 1.2112533362025891e-08, "loss": 0.3112, "step": 21087 }, { "epoch": 0.9781076066790353, "grad_norm": 12.417787551879883, "learning_rate": 1.2061392294756557e-08, "loss": 0.4387, "step": 21088 }, { "epoch": 0.9781539888682745, "grad_norm": 12.007625579833984, "learning_rate": 1.2010359288546214e-08, "loss": 0.418, "step": 21089 }, { "epoch": 0.9782003710575139, "grad_norm": 25.214153289794922, "learning_rate": 1.1959434344500642e-08, "loss": 0.4334, "step": 21090 }, { "epoch": 0.9782467532467533, "grad_norm": 8.829339027404785, "learning_rate": 1.190861746372285e-08, "loss": 0.433, "step": 21091 }, { "epoch": 0.9782931354359926, "grad_norm": 5.04900598526001, "learning_rate": 1.1857908647314175e-08, "loss": 0.3273, "step": 21092 }, { "epoch": 0.9783395176252319, "grad_norm": 7.35595178604126, "learning_rate": 1.1807307896372633e-08, "loss": 0.3229, "step": 21093 }, { "epoch": 0.9783858998144712, "grad_norm": 8.541766166687012, "learning_rate": 1.1756815211995121e-08, "loss": 0.3248, "step": 21094 }, { "epoch": 0.9784322820037106, "grad_norm": 10.589008331298828, "learning_rate": 1.170643059527521e-08, "loss": 0.3348, "step": 21095 }, { "epoch": 0.9784786641929499, "grad_norm": 7.852723598480225, "learning_rate": 1.1656154047303691e-08, "loss": 0.3411, "step": 21096 }, { "epoch": 0.9785250463821893, "grad_norm": 5.738894462585449, "learning_rate": 1.1605985569171363e-08, "loss": 0.2902, "step": 21097 }, { "epoch": 0.9785714285714285, "grad_norm": 7.326605319976807, "learning_rate": 1.155592516196402e-08, "loss": 0.3368, "step": 21098 }, { "epoch": 0.9786178107606679, "grad_norm": 8.67381763458252, "learning_rate": 1.1505972826766354e-08, "loss": 0.3967, "step": 21099 }, { "epoch": 0.9786641929499073, "grad_norm": 8.36696720123291, "learning_rate": 1.1456128564660273e-08, "loss": 0.3051, "step": 21100 }, { "epoch": 0.9787105751391466, "grad_norm": 5.1982421875, "learning_rate": 1.1406392376726583e-08, "loss": 0.281, "step": 21101 }, { "epoch": 0.9787569573283859, "grad_norm": 8.186429023742676, "learning_rate": 1.1356764264042197e-08, "loss": 0.2807, "step": 21102 }, { "epoch": 0.9788033395176252, "grad_norm": 10.83450698852539, "learning_rate": 1.1307244227681813e-08, "loss": 0.3466, "step": 21103 }, { "epoch": 0.9788497217068646, "grad_norm": 10.930503845214844, "learning_rate": 1.1257832268719015e-08, "loss": 0.335, "step": 21104 }, { "epoch": 0.9788961038961039, "grad_norm": 8.450824737548828, "learning_rate": 1.120852838822406e-08, "loss": 0.2964, "step": 21105 }, { "epoch": 0.9789424860853432, "grad_norm": 9.164189338684082, "learning_rate": 1.1159332587264427e-08, "loss": 0.3674, "step": 21106 }, { "epoch": 0.9789888682745825, "grad_norm": 5.424152374267578, "learning_rate": 1.1110244866907038e-08, "loss": 0.3877, "step": 21107 }, { "epoch": 0.9790352504638219, "grad_norm": 8.508601188659668, "learning_rate": 1.1061265228214935e-08, "loss": 0.4017, "step": 21108 }, { "epoch": 0.9790816326530613, "grad_norm": 10.626704216003418, "learning_rate": 1.1012393672248934e-08, "loss": 0.4224, "step": 21109 }, { "epoch": 0.9791280148423006, "grad_norm": 7.905601501464844, "learning_rate": 1.0963630200068187e-08, "loss": 0.3797, "step": 21110 }, { "epoch": 0.9791743970315399, "grad_norm": 8.514042854309082, "learning_rate": 1.0914974812728519e-08, "loss": 0.3273, "step": 21111 }, { "epoch": 0.9792207792207792, "grad_norm": 5.295591831207275, "learning_rate": 1.0866427511285194e-08, "loss": 0.2758, "step": 21112 }, { "epoch": 0.9792671614100186, "grad_norm": 7.348471641540527, "learning_rate": 1.0817988296788484e-08, "loss": 0.3376, "step": 21113 }, { "epoch": 0.9793135435992579, "grad_norm": 6.196115493774414, "learning_rate": 1.0769657170288661e-08, "loss": 0.2481, "step": 21114 }, { "epoch": 0.9793599257884972, "grad_norm": 9.064534187316895, "learning_rate": 1.0721434132833219e-08, "loss": 0.3498, "step": 21115 }, { "epoch": 0.9794063079777365, "grad_norm": 6.278576374053955, "learning_rate": 1.067331918546577e-08, "loss": 0.4398, "step": 21116 }, { "epoch": 0.9794526901669759, "grad_norm": 10.374128341674805, "learning_rate": 1.0625312329229364e-08, "loss": 0.3061, "step": 21117 }, { "epoch": 0.9794990723562153, "grad_norm": 6.7313761711120605, "learning_rate": 1.0577413565164285e-08, "loss": 0.2835, "step": 21118 }, { "epoch": 0.9795454545454545, "grad_norm": 6.527370452880859, "learning_rate": 1.0529622894307478e-08, "loss": 0.2747, "step": 21119 }, { "epoch": 0.9795918367346939, "grad_norm": 8.852499961853027, "learning_rate": 1.0481940317694783e-08, "loss": 0.3881, "step": 21120 }, { "epoch": 0.9796382189239332, "grad_norm": 11.495277404785156, "learning_rate": 1.0434365836359262e-08, "loss": 0.3982, "step": 21121 }, { "epoch": 0.9796846011131726, "grad_norm": 10.889581680297852, "learning_rate": 1.03868994513312e-08, "loss": 0.3771, "step": 21122 }, { "epoch": 0.9797309833024119, "grad_norm": 7.296515941619873, "learning_rate": 1.0339541163639777e-08, "loss": 0.2983, "step": 21123 }, { "epoch": 0.9797773654916512, "grad_norm": 7.719404220581055, "learning_rate": 1.0292290974310282e-08, "loss": 0.3702, "step": 21124 }, { "epoch": 0.9798237476808905, "grad_norm": 8.938847541809082, "learning_rate": 1.024514888436634e-08, "loss": 0.3543, "step": 21125 }, { "epoch": 0.9798701298701299, "grad_norm": 8.831685066223145, "learning_rate": 1.0198114894829358e-08, "loss": 0.4131, "step": 21126 }, { "epoch": 0.9799165120593692, "grad_norm": 8.536332130432129, "learning_rate": 1.0151189006717966e-08, "loss": 0.3374, "step": 21127 }, { "epoch": 0.9799628942486085, "grad_norm": 5.007501125335693, "learning_rate": 1.0104371221050236e-08, "loss": 0.321, "step": 21128 }, { "epoch": 0.9800092764378479, "grad_norm": 8.154511451721191, "learning_rate": 1.0057661538838692e-08, "loss": 0.324, "step": 21129 }, { "epoch": 0.9800556586270872, "grad_norm": 4.1867289543151855, "learning_rate": 1.001105996109586e-08, "loss": 0.3572, "step": 21130 }, { "epoch": 0.9801020408163266, "grad_norm": 6.641178607940674, "learning_rate": 9.964566488832039e-09, "loss": 0.248, "step": 21131 }, { "epoch": 0.9801484230055658, "grad_norm": 4.386514186859131, "learning_rate": 9.918181123053649e-09, "loss": 0.2034, "step": 21132 }, { "epoch": 0.9801948051948052, "grad_norm": 12.201553344726562, "learning_rate": 9.871903864765997e-09, "loss": 0.3779, "step": 21133 }, { "epoch": 0.9802411873840445, "grad_norm": 5.575821399688721, "learning_rate": 9.825734714971059e-09, "loss": 0.3087, "step": 21134 }, { "epoch": 0.9802875695732839, "grad_norm": 7.822694778442383, "learning_rate": 9.779673674670253e-09, "loss": 0.3811, "step": 21135 }, { "epoch": 0.9803339517625231, "grad_norm": 10.603719711303711, "learning_rate": 9.73372074486001e-09, "loss": 0.3979, "step": 21136 }, { "epoch": 0.9803803339517625, "grad_norm": 9.024755477905273, "learning_rate": 9.687875926536749e-09, "loss": 0.3064, "step": 21137 }, { "epoch": 0.9804267161410019, "grad_norm": 7.9995551109313965, "learning_rate": 9.642139220694124e-09, "loss": 0.3391, "step": 21138 }, { "epoch": 0.9804730983302412, "grad_norm": 5.015489101409912, "learning_rate": 9.596510628321898e-09, "loss": 0.3456, "step": 21139 }, { "epoch": 0.9805194805194806, "grad_norm": 4.439427852630615, "learning_rate": 9.550990150409279e-09, "loss": 0.2465, "step": 21140 }, { "epoch": 0.9805658627087198, "grad_norm": 8.926325798034668, "learning_rate": 9.505577787942144e-09, "loss": 0.4229, "step": 21141 }, { "epoch": 0.9806122448979592, "grad_norm": 9.210476875305176, "learning_rate": 9.460273541904708e-09, "loss": 0.3714, "step": 21142 }, { "epoch": 0.9806586270871985, "grad_norm": 8.186897277832031, "learning_rate": 9.415077413278406e-09, "loss": 0.337, "step": 21143 }, { "epoch": 0.9807050092764379, "grad_norm": 16.433090209960938, "learning_rate": 9.369989403041347e-09, "loss": 0.4011, "step": 21144 }, { "epoch": 0.9807513914656771, "grad_norm": 10.775755882263184, "learning_rate": 9.32500951217219e-09, "loss": 0.2511, "step": 21145 }, { "epoch": 0.9807977736549165, "grad_norm": 11.372511863708496, "learning_rate": 9.280137741643492e-09, "loss": 0.3637, "step": 21146 }, { "epoch": 0.9808441558441559, "grad_norm": 7.58662748336792, "learning_rate": 9.235374092428917e-09, "loss": 0.4147, "step": 21147 }, { "epoch": 0.9808905380333952, "grad_norm": 5.638221740722656, "learning_rate": 9.190718565497136e-09, "loss": 0.2423, "step": 21148 }, { "epoch": 0.9809369202226345, "grad_norm": 5.845978736877441, "learning_rate": 9.146171161816264e-09, "loss": 0.2711, "step": 21149 }, { "epoch": 0.9809833024118738, "grad_norm": 6.04339599609375, "learning_rate": 9.101731882351638e-09, "loss": 0.2725, "step": 21150 }, { "epoch": 0.9810296846011132, "grad_norm": 5.916891098022461, "learning_rate": 9.057400728064713e-09, "loss": 0.3328, "step": 21151 }, { "epoch": 0.9810760667903525, "grad_norm": 11.225221633911133, "learning_rate": 9.013177699917497e-09, "loss": 0.4532, "step": 21152 }, { "epoch": 0.9811224489795919, "grad_norm": 8.550050735473633, "learning_rate": 8.969062798867002e-09, "loss": 0.4003, "step": 21153 }, { "epoch": 0.9811688311688311, "grad_norm": 5.674962043762207, "learning_rate": 8.925056025869128e-09, "loss": 0.2983, "step": 21154 }, { "epoch": 0.9812152133580705, "grad_norm": 5.1811113357543945, "learning_rate": 8.881157381877559e-09, "loss": 0.2101, "step": 21155 }, { "epoch": 0.9812615955473099, "grad_norm": 9.435236930847168, "learning_rate": 8.8373668678432e-09, "loss": 0.3204, "step": 21156 }, { "epoch": 0.9813079777365492, "grad_norm": 7.144355297088623, "learning_rate": 8.793684484714737e-09, "loss": 0.3506, "step": 21157 }, { "epoch": 0.9813543599257885, "grad_norm": 5.5956196784973145, "learning_rate": 8.750110233438635e-09, "loss": 0.2302, "step": 21158 }, { "epoch": 0.9814007421150278, "grad_norm": 9.718085289001465, "learning_rate": 8.706644114958584e-09, "loss": 0.2867, "step": 21159 }, { "epoch": 0.9814471243042672, "grad_norm": 5.436256408691406, "learning_rate": 8.663286130216608e-09, "loss": 0.3015, "step": 21160 }, { "epoch": 0.9814935064935065, "grad_norm": 8.743218421936035, "learning_rate": 8.6200362801514e-09, "loss": 0.4647, "step": 21161 }, { "epoch": 0.9815398886827458, "grad_norm": 10.570855140686035, "learning_rate": 8.576894565701099e-09, "loss": 0.2874, "step": 21162 }, { "epoch": 0.9815862708719851, "grad_norm": 4.409969329833984, "learning_rate": 8.533860987798847e-09, "loss": 0.2411, "step": 21163 }, { "epoch": 0.9816326530612245, "grad_norm": 9.629817962646484, "learning_rate": 8.490935547378898e-09, "loss": 0.3303, "step": 21164 }, { "epoch": 0.9816790352504638, "grad_norm": 7.3775129318237305, "learning_rate": 8.448118245369396e-09, "loss": 0.2978, "step": 21165 }, { "epoch": 0.9817254174397032, "grad_norm": 5.519891262054443, "learning_rate": 8.40540908269849e-09, "loss": 0.3018, "step": 21166 }, { "epoch": 0.9817717996289425, "grad_norm": 8.1278657913208, "learning_rate": 8.362808060292105e-09, "loss": 0.2273, "step": 21167 }, { "epoch": 0.9818181818181818, "grad_norm": 4.836318016052246, "learning_rate": 8.32031517907228e-09, "loss": 0.2033, "step": 21168 }, { "epoch": 0.9818645640074212, "grad_norm": 5.672039985656738, "learning_rate": 8.277930439959946e-09, "loss": 0.3641, "step": 21169 }, { "epoch": 0.9819109461966605, "grad_norm": 10.481101036071777, "learning_rate": 8.235653843873814e-09, "loss": 0.3842, "step": 21170 }, { "epoch": 0.9819573283858998, "grad_norm": 10.88998794555664, "learning_rate": 8.193485391728705e-09, "loss": 0.302, "step": 21171 }, { "epoch": 0.9820037105751391, "grad_norm": 4.838504791259766, "learning_rate": 8.151425084439447e-09, "loss": 0.3522, "step": 21172 }, { "epoch": 0.9820500927643785, "grad_norm": 10.528837203979492, "learning_rate": 8.109472922916417e-09, "loss": 0.3971, "step": 21173 }, { "epoch": 0.9820964749536178, "grad_norm": 12.07873821258545, "learning_rate": 8.067628908068891e-09, "loss": 0.4265, "step": 21174 }, { "epoch": 0.9821428571428571, "grad_norm": 4.705382347106934, "learning_rate": 8.025893040802812e-09, "loss": 0.3941, "step": 21175 }, { "epoch": 0.9821892393320965, "grad_norm": 4.789642810821533, "learning_rate": 7.984265322023011e-09, "loss": 0.2665, "step": 21176 }, { "epoch": 0.9822356215213358, "grad_norm": 8.395151138305664, "learning_rate": 7.942745752630986e-09, "loss": 0.2915, "step": 21177 }, { "epoch": 0.9822820037105752, "grad_norm": 7.907787799835205, "learning_rate": 7.901334333526578e-09, "loss": 0.3725, "step": 21178 }, { "epoch": 0.9823283858998144, "grad_norm": 8.582938194274902, "learning_rate": 7.860031065606289e-09, "loss": 0.4353, "step": 21179 }, { "epoch": 0.9823747680890538, "grad_norm": 7.373326778411865, "learning_rate": 7.818835949766069e-09, "loss": 0.311, "step": 21180 }, { "epoch": 0.9824211502782931, "grad_norm": 6.794922828674316, "learning_rate": 7.777748986896871e-09, "loss": 0.347, "step": 21181 }, { "epoch": 0.9824675324675325, "grad_norm": 6.620080947875977, "learning_rate": 7.736770177890208e-09, "loss": 0.2775, "step": 21182 }, { "epoch": 0.9825139146567718, "grad_norm": 5.818026542663574, "learning_rate": 7.695899523633143e-09, "loss": 0.3208, "step": 21183 }, { "epoch": 0.9825602968460111, "grad_norm": 10.878724098205566, "learning_rate": 7.655137025011083e-09, "loss": 0.2122, "step": 21184 }, { "epoch": 0.9826066790352505, "grad_norm": 6.0546088218688965, "learning_rate": 7.614482682907209e-09, "loss": 0.3167, "step": 21185 }, { "epoch": 0.9826530612244898, "grad_norm": 5.352355480194092, "learning_rate": 7.573936498202484e-09, "loss": 0.2384, "step": 21186 }, { "epoch": 0.9826994434137292, "grad_norm": 13.1463623046875, "learning_rate": 7.53349847177509e-09, "loss": 0.3143, "step": 21187 }, { "epoch": 0.9827458256029684, "grad_norm": 7.229176044464111, "learning_rate": 7.493168604501555e-09, "loss": 0.2737, "step": 21188 }, { "epoch": 0.9827922077922078, "grad_norm": 9.99544906616211, "learning_rate": 7.452946897255064e-09, "loss": 0.4223, "step": 21189 }, { "epoch": 0.9828385899814471, "grad_norm": 6.843137264251709, "learning_rate": 7.412833350907145e-09, "loss": 0.2645, "step": 21190 }, { "epoch": 0.9828849721706865, "grad_norm": 4.725599765777588, "learning_rate": 7.372827966326546e-09, "loss": 0.2292, "step": 21191 }, { "epoch": 0.9829313543599257, "grad_norm": 8.819933891296387, "learning_rate": 7.332930744380906e-09, "loss": 0.3509, "step": 21192 }, { "epoch": 0.9829777365491651, "grad_norm": 7.148266792297363, "learning_rate": 7.293141685933425e-09, "loss": 0.2895, "step": 21193 }, { "epoch": 0.9830241187384045, "grad_norm": 18.088314056396484, "learning_rate": 7.253460791846745e-09, "loss": 0.4846, "step": 21194 }, { "epoch": 0.9830705009276438, "grad_norm": 7.160979270935059, "learning_rate": 7.213888062980734e-09, "loss": 0.2776, "step": 21195 }, { "epoch": 0.9831168831168832, "grad_norm": 5.183220386505127, "learning_rate": 7.1744235001924845e-09, "loss": 0.2241, "step": 21196 }, { "epoch": 0.9831632653061224, "grad_norm": 5.517984867095947, "learning_rate": 7.135067104336868e-09, "loss": 0.2229, "step": 21197 }, { "epoch": 0.9832096474953618, "grad_norm": 10.408340454101562, "learning_rate": 7.095818876265981e-09, "loss": 0.3382, "step": 21198 }, { "epoch": 0.9832560296846011, "grad_norm": 8.663399696350098, "learning_rate": 7.056678816831364e-09, "loss": 0.3809, "step": 21199 }, { "epoch": 0.9833024118738405, "grad_norm": 10.456670761108398, "learning_rate": 7.017646926880117e-09, "loss": 0.2848, "step": 21200 }, { "epoch": 0.9833487940630797, "grad_norm": 11.978765487670898, "learning_rate": 6.978723207257676e-09, "loss": 0.3324, "step": 21201 }, { "epoch": 0.9833951762523191, "grad_norm": 5.8742170333862305, "learning_rate": 6.939907658807809e-09, "loss": 0.1936, "step": 21202 }, { "epoch": 0.9834415584415584, "grad_norm": 4.928782939910889, "learning_rate": 6.901200282371512e-09, "loss": 0.3618, "step": 21203 }, { "epoch": 0.9834879406307978, "grad_norm": 6.942122459411621, "learning_rate": 6.862601078787001e-09, "loss": 0.3561, "step": 21204 }, { "epoch": 0.983534322820037, "grad_norm": 4.823665618896484, "learning_rate": 6.824110048890831e-09, "loss": 0.3118, "step": 21205 }, { "epoch": 0.9835807050092764, "grad_norm": 9.264673233032227, "learning_rate": 6.785727193516223e-09, "loss": 0.3424, "step": 21206 }, { "epoch": 0.9836270871985158, "grad_norm": 9.420670509338379, "learning_rate": 6.74745251349529e-09, "loss": 0.3902, "step": 21207 }, { "epoch": 0.9836734693877551, "grad_norm": 10.229938507080078, "learning_rate": 6.709286009657368e-09, "loss": 0.3322, "step": 21208 }, { "epoch": 0.9837198515769945, "grad_norm": 8.243927955627441, "learning_rate": 6.671227682829018e-09, "loss": 0.3262, "step": 21209 }, { "epoch": 0.9837662337662337, "grad_norm": 4.032288074493408, "learning_rate": 6.633277533835136e-09, "loss": 0.1773, "step": 21210 }, { "epoch": 0.9838126159554731, "grad_norm": 9.162192344665527, "learning_rate": 6.5954355634972876e-09, "loss": 0.3922, "step": 21211 }, { "epoch": 0.9838589981447124, "grad_norm": 8.352212905883789, "learning_rate": 6.557701772635372e-09, "loss": 0.2938, "step": 21212 }, { "epoch": 0.9839053803339518, "grad_norm": 9.772424697875977, "learning_rate": 6.520076162067068e-09, "loss": 0.4425, "step": 21213 }, { "epoch": 0.983951762523191, "grad_norm": 6.098966598510742, "learning_rate": 6.482558732607835e-09, "loss": 0.3792, "step": 21214 }, { "epoch": 0.9839981447124304, "grad_norm": 6.8243513107299805, "learning_rate": 6.4451494850703566e-09, "loss": 0.2818, "step": 21215 }, { "epoch": 0.9840445269016698, "grad_norm": 4.572592735290527, "learning_rate": 6.407848420264539e-09, "loss": 0.2965, "step": 21216 }, { "epoch": 0.9840909090909091, "grad_norm": 6.3902058601379395, "learning_rate": 6.370655538998627e-09, "loss": 0.2833, "step": 21217 }, { "epoch": 0.9841372912801484, "grad_norm": 7.432596206665039, "learning_rate": 6.33357084207864e-09, "loss": 0.2305, "step": 21218 }, { "epoch": 0.9841836734693877, "grad_norm": 6.213544845581055, "learning_rate": 6.296594330308381e-09, "loss": 0.3655, "step": 21219 }, { "epoch": 0.9842300556586271, "grad_norm": 6.8292765617370605, "learning_rate": 6.25972600448832e-09, "loss": 0.2805, "step": 21220 }, { "epoch": 0.9842764378478664, "grad_norm": 6.533785343170166, "learning_rate": 6.222965865417263e-09, "loss": 0.3754, "step": 21221 }, { "epoch": 0.9843228200371058, "grad_norm": 6.032523155212402, "learning_rate": 6.186313913891795e-09, "loss": 0.3817, "step": 21222 }, { "epoch": 0.984369202226345, "grad_norm": 7.166086196899414, "learning_rate": 6.149770150705725e-09, "loss": 0.2585, "step": 21223 }, { "epoch": 0.9844155844155844, "grad_norm": 6.599669933319092, "learning_rate": 6.1133345766511975e-09, "loss": 0.4073, "step": 21224 }, { "epoch": 0.9844619666048238, "grad_norm": 6.384170055389404, "learning_rate": 6.077007192517026e-09, "loss": 0.1853, "step": 21225 }, { "epoch": 0.9845083487940631, "grad_norm": 3.912025213241577, "learning_rate": 6.040787999090913e-09, "loss": 0.3055, "step": 21226 }, { "epoch": 0.9845547309833024, "grad_norm": 7.677148342132568, "learning_rate": 6.004676997156678e-09, "loss": 0.3871, "step": 21227 }, { "epoch": 0.9846011131725417, "grad_norm": 8.587881088256836, "learning_rate": 5.968674187497025e-09, "loss": 0.324, "step": 21228 }, { "epoch": 0.9846474953617811, "grad_norm": 10.889917373657227, "learning_rate": 5.932779570891889e-09, "loss": 0.3824, "step": 21229 }, { "epoch": 0.9846938775510204, "grad_norm": 5.286273956298828, "learning_rate": 5.896993148119534e-09, "loss": 0.3062, "step": 21230 }, { "epoch": 0.9847402597402597, "grad_norm": 8.725863456726074, "learning_rate": 5.8613149199537866e-09, "loss": 0.3124, "step": 21231 }, { "epoch": 0.984786641929499, "grad_norm": 4.757845878601074, "learning_rate": 5.825744887169582e-09, "loss": 0.2729, "step": 21232 }, { "epoch": 0.9848330241187384, "grad_norm": 17.743240356445312, "learning_rate": 5.79028305053575e-09, "loss": 0.3873, "step": 21233 }, { "epoch": 0.9848794063079778, "grad_norm": 8.658782958984375, "learning_rate": 5.75492941082112e-09, "loss": 0.2956, "step": 21234 }, { "epoch": 0.984925788497217, "grad_norm": 5.5607380867004395, "learning_rate": 5.7196839687923e-09, "loss": 0.2999, "step": 21235 }, { "epoch": 0.9849721706864564, "grad_norm": 11.809372901916504, "learning_rate": 5.684546725212015e-09, "loss": 0.4402, "step": 21236 }, { "epoch": 0.9850185528756957, "grad_norm": 4.78041934967041, "learning_rate": 5.649517680841876e-09, "loss": 0.2603, "step": 21237 }, { "epoch": 0.9850649350649351, "grad_norm": 6.577226161956787, "learning_rate": 5.614596836440722e-09, "loss": 0.3148, "step": 21238 }, { "epoch": 0.9851113172541744, "grad_norm": 6.311641693115234, "learning_rate": 5.57978419276517e-09, "loss": 0.3326, "step": 21239 }, { "epoch": 0.9851576994434137, "grad_norm": 8.2659273147583, "learning_rate": 5.5450797505690605e-09, "loss": 0.4128, "step": 21240 }, { "epoch": 0.985204081632653, "grad_norm": 12.089702606201172, "learning_rate": 5.510483510605124e-09, "loss": 0.377, "step": 21241 }, { "epoch": 0.9852504638218924, "grad_norm": 11.098199844360352, "learning_rate": 5.4759954736216536e-09, "loss": 0.264, "step": 21242 }, { "epoch": 0.9852968460111318, "grad_norm": 14.60247802734375, "learning_rate": 5.441615640366382e-09, "loss": 0.3011, "step": 21243 }, { "epoch": 0.985343228200371, "grad_norm": 15.638949394226074, "learning_rate": 5.4073440115842704e-09, "loss": 0.3749, "step": 21244 }, { "epoch": 0.9853896103896104, "grad_norm": 6.823471546173096, "learning_rate": 5.373180588017501e-09, "loss": 0.3868, "step": 21245 }, { "epoch": 0.9854359925788497, "grad_norm": 6.646236896514893, "learning_rate": 5.339125370406595e-09, "loss": 0.3533, "step": 21246 }, { "epoch": 0.9854823747680891, "grad_norm": 9.493546485900879, "learning_rate": 5.3051783594892935e-09, "loss": 0.2783, "step": 21247 }, { "epoch": 0.9855287569573283, "grad_norm": 4.770097255706787, "learning_rate": 5.2713395560005654e-09, "loss": 0.3459, "step": 21248 }, { "epoch": 0.9855751391465677, "grad_norm": 5.291244983673096, "learning_rate": 5.2376089606737125e-09, "loss": 0.3709, "step": 21249 }, { "epoch": 0.985621521335807, "grad_norm": 8.350223541259766, "learning_rate": 5.203986574239817e-09, "loss": 0.2907, "step": 21250 }, { "epoch": 0.9856679035250464, "grad_norm": 6.7075605392456055, "learning_rate": 5.170472397427184e-09, "loss": 0.2318, "step": 21251 }, { "epoch": 0.9857142857142858, "grad_norm": 7.721449375152588, "learning_rate": 5.137066430961901e-09, "loss": 0.2348, "step": 21252 }, { "epoch": 0.985760667903525, "grad_norm": 5.795326232910156, "learning_rate": 5.103768675567278e-09, "loss": 0.3528, "step": 21253 }, { "epoch": 0.9858070500927644, "grad_norm": 8.595174789428711, "learning_rate": 5.070579131964959e-09, "loss": 0.3791, "step": 21254 }, { "epoch": 0.9858534322820037, "grad_norm": 6.5018839836120605, "learning_rate": 5.037497800874369e-09, "loss": 0.3876, "step": 21255 }, { "epoch": 0.9858998144712431, "grad_norm": 5.239679336547852, "learning_rate": 5.004524683011048e-09, "loss": 0.3803, "step": 21256 }, { "epoch": 0.9859461966604823, "grad_norm": 6.917906284332275, "learning_rate": 4.971659779091087e-09, "loss": 0.2825, "step": 21257 }, { "epoch": 0.9859925788497217, "grad_norm": 9.912413597106934, "learning_rate": 4.938903089824476e-09, "loss": 0.3191, "step": 21258 }, { "epoch": 0.986038961038961, "grad_norm": 7.471231937408447, "learning_rate": 4.906254615922867e-09, "loss": 0.3064, "step": 21259 }, { "epoch": 0.9860853432282004, "grad_norm": 8.315265655517578, "learning_rate": 4.873714358091808e-09, "loss": 0.3312, "step": 21260 }, { "epoch": 0.9861317254174397, "grad_norm": 7.444189548492432, "learning_rate": 4.841282317037399e-09, "loss": 0.3119, "step": 21261 }, { "epoch": 0.986178107606679, "grad_norm": 6.692695617675781, "learning_rate": 4.808958493462412e-09, "loss": 0.3145, "step": 21262 }, { "epoch": 0.9862244897959184, "grad_norm": 10.99661636352539, "learning_rate": 4.776742888066288e-09, "loss": 0.2856, "step": 21263 }, { "epoch": 0.9862708719851577, "grad_norm": 6.337247371673584, "learning_rate": 4.744635501546801e-09, "loss": 0.3132, "step": 21264 }, { "epoch": 0.9863172541743971, "grad_norm": 6.777916431427002, "learning_rate": 4.712636334600618e-09, "loss": 0.3248, "step": 21265 }, { "epoch": 0.9863636363636363, "grad_norm": 6.685410499572754, "learning_rate": 4.680745387920516e-09, "loss": 0.2986, "step": 21266 }, { "epoch": 0.9864100185528757, "grad_norm": 9.415124893188477, "learning_rate": 4.648962662197054e-09, "loss": 0.3529, "step": 21267 }, { "epoch": 0.986456400742115, "grad_norm": 12.50512409210205, "learning_rate": 4.617288158119126e-09, "loss": 0.4076, "step": 21268 }, { "epoch": 0.9865027829313544, "grad_norm": 5.738868713378906, "learning_rate": 4.585721876372851e-09, "loss": 0.3669, "step": 21269 }, { "epoch": 0.9865491651205937, "grad_norm": 13.864794731140137, "learning_rate": 4.554263817642124e-09, "loss": 0.2874, "step": 21270 }, { "epoch": 0.986595547309833, "grad_norm": 4.979961395263672, "learning_rate": 4.522913982608623e-09, "loss": 0.2498, "step": 21271 }, { "epoch": 0.9866419294990724, "grad_norm": 5.864593982696533, "learning_rate": 4.491672371950695e-09, "loss": 0.2888, "step": 21272 }, { "epoch": 0.9866883116883117, "grad_norm": 9.257222175598145, "learning_rate": 4.4605389863466855e-09, "loss": 0.4021, "step": 21273 }, { "epoch": 0.986734693877551, "grad_norm": 10.570716857910156, "learning_rate": 4.4295138264699445e-09, "loss": 0.3481, "step": 21274 }, { "epoch": 0.9867810760667903, "grad_norm": 7.966065406799316, "learning_rate": 4.398596892992712e-09, "loss": 0.2523, "step": 21275 }, { "epoch": 0.9868274582560297, "grad_norm": 4.422518253326416, "learning_rate": 4.367788186585009e-09, "loss": 0.3042, "step": 21276 }, { "epoch": 0.986873840445269, "grad_norm": 7.515536785125732, "learning_rate": 4.337087707914079e-09, "loss": 0.3167, "step": 21277 }, { "epoch": 0.9869202226345084, "grad_norm": 15.383617401123047, "learning_rate": 4.306495457645499e-09, "loss": 0.3941, "step": 21278 }, { "epoch": 0.9869666048237477, "grad_norm": 4.8958420753479, "learning_rate": 4.276011436441518e-09, "loss": 0.292, "step": 21279 }, { "epoch": 0.987012987012987, "grad_norm": 13.357478141784668, "learning_rate": 4.245635644963275e-09, "loss": 0.395, "step": 21280 }, { "epoch": 0.9870593692022264, "grad_norm": 11.675186157226562, "learning_rate": 4.215368083868021e-09, "loss": 0.3962, "step": 21281 }, { "epoch": 0.9871057513914657, "grad_norm": 8.236886978149414, "learning_rate": 4.1852087538113425e-09, "loss": 0.36, "step": 21282 }, { "epoch": 0.987152133580705, "grad_norm": 9.120816230773926, "learning_rate": 4.155157655447717e-09, "loss": 0.3986, "step": 21283 }, { "epoch": 0.9871985157699443, "grad_norm": 16.64019203186035, "learning_rate": 4.125214789427734e-09, "loss": 0.548, "step": 21284 }, { "epoch": 0.9872448979591837, "grad_norm": 4.834415435791016, "learning_rate": 4.095380156399209e-09, "loss": 0.2485, "step": 21285 }, { "epoch": 0.987291280148423, "grad_norm": 4.155575752258301, "learning_rate": 4.065653757009957e-09, "loss": 0.2949, "step": 21286 }, { "epoch": 0.9873376623376623, "grad_norm": 3.764373302459717, "learning_rate": 4.036035591902798e-09, "loss": 0.2973, "step": 21287 }, { "epoch": 0.9873840445269016, "grad_norm": 11.2294921875, "learning_rate": 4.0065256617199954e-09, "loss": 0.3116, "step": 21288 }, { "epoch": 0.987430426716141, "grad_norm": 10.281654357910156, "learning_rate": 3.977123967100482e-09, "loss": 0.2467, "step": 21289 }, { "epoch": 0.9874768089053804, "grad_norm": 13.111544609069824, "learning_rate": 3.9478305086815275e-09, "loss": 0.4601, "step": 21290 }, { "epoch": 0.9875231910946196, "grad_norm": 5.130692481994629, "learning_rate": 3.918645287097622e-09, "loss": 0.1937, "step": 21291 }, { "epoch": 0.987569573283859, "grad_norm": 6.1263957023620605, "learning_rate": 3.889568302981039e-09, "loss": 0.3354, "step": 21292 }, { "epoch": 0.9876159554730983, "grad_norm": 8.303452491760254, "learning_rate": 3.860599556962385e-09, "loss": 0.3155, "step": 21293 }, { "epoch": 0.9876623376623377, "grad_norm": 11.205835342407227, "learning_rate": 3.831739049667826e-09, "loss": 0.2726, "step": 21294 }, { "epoch": 0.987708719851577, "grad_norm": 5.772978782653809, "learning_rate": 3.802986781724083e-09, "loss": 0.2444, "step": 21295 }, { "epoch": 0.9877551020408163, "grad_norm": 5.995147228240967, "learning_rate": 3.774342753753435e-09, "loss": 0.4091, "step": 21296 }, { "epoch": 0.9878014842300556, "grad_norm": 7.890122890472412, "learning_rate": 3.7458069663759425e-09, "loss": 0.3176, "step": 21297 }, { "epoch": 0.987847866419295, "grad_norm": 9.755311012268066, "learning_rate": 3.717379420210554e-09, "loss": 0.3519, "step": 21298 }, { "epoch": 0.9878942486085344, "grad_norm": 13.405652046203613, "learning_rate": 3.689060115872889e-09, "loss": 0.3418, "step": 21299 }, { "epoch": 0.9879406307977736, "grad_norm": 4.9825663566589355, "learning_rate": 3.6608490539763452e-09, "loss": 0.2962, "step": 21300 }, { "epoch": 0.987987012987013, "grad_norm": 6.564004421234131, "learning_rate": 3.632746235132101e-09, "loss": 0.2953, "step": 21301 }, { "epoch": 0.9880333951762523, "grad_norm": 7.55426025390625, "learning_rate": 3.6047516599491126e-09, "loss": 0.3779, "step": 21302 }, { "epoch": 0.9880797773654917, "grad_norm": 8.757166862487793, "learning_rate": 3.576865329034118e-09, "loss": 0.3239, "step": 21303 }, { "epoch": 0.9881261595547309, "grad_norm": 6.144516944885254, "learning_rate": 3.5490872429910784e-09, "loss": 0.2746, "step": 21304 }, { "epoch": 0.9881725417439703, "grad_norm": 5.268794059753418, "learning_rate": 3.5214174024211788e-09, "loss": 0.3303, "step": 21305 }, { "epoch": 0.9882189239332096, "grad_norm": 11.846318244934082, "learning_rate": 3.4938558079250504e-09, "loss": 0.5051, "step": 21306 }, { "epoch": 0.988265306122449, "grad_norm": 10.188343048095703, "learning_rate": 3.4664024600988834e-09, "loss": 0.4034, "step": 21307 }, { "epoch": 0.9883116883116884, "grad_norm": 5.766073703765869, "learning_rate": 3.4390573595377563e-09, "loss": 0.3738, "step": 21308 }, { "epoch": 0.9883580705009276, "grad_norm": 3.7190744876861572, "learning_rate": 3.411820506833974e-09, "loss": 0.1699, "step": 21309 }, { "epoch": 0.988404452690167, "grad_norm": 4.920460224151611, "learning_rate": 3.3846919025776193e-09, "loss": 0.3277, "step": 21310 }, { "epoch": 0.9884508348794063, "grad_norm": 9.478131294250488, "learning_rate": 3.357671547356556e-09, "loss": 0.4792, "step": 21311 }, { "epoch": 0.9884972170686457, "grad_norm": 6.958929538726807, "learning_rate": 3.330759441755871e-09, "loss": 0.3802, "step": 21312 }, { "epoch": 0.9885435992578849, "grad_norm": 10.656241416931152, "learning_rate": 3.3039555863589865e-09, "loss": 0.4115, "step": 21313 }, { "epoch": 0.9885899814471243, "grad_norm": 5.222715377807617, "learning_rate": 3.27725998174655e-09, "loss": 0.3385, "step": 21314 }, { "epoch": 0.9886363636363636, "grad_norm": 6.207601070404053, "learning_rate": 3.250672628496432e-09, "loss": 0.3056, "step": 21315 }, { "epoch": 0.988682745825603, "grad_norm": 3.8063242435455322, "learning_rate": 3.224193527185393e-09, "loss": 0.2675, "step": 21316 }, { "epoch": 0.9887291280148423, "grad_norm": 6.319674015045166, "learning_rate": 3.1978226783863087e-09, "loss": 0.3667, "step": 21317 }, { "epoch": 0.9887755102040816, "grad_norm": 13.086344718933105, "learning_rate": 3.1715600826714986e-09, "loss": 0.3565, "step": 21318 }, { "epoch": 0.988821892393321, "grad_norm": 9.379526138305664, "learning_rate": 3.1454057406088424e-09, "loss": 0.3841, "step": 21319 }, { "epoch": 0.9888682745825603, "grad_norm": 5.2095818519592285, "learning_rate": 3.119359652765108e-09, "loss": 0.3112, "step": 21320 }, { "epoch": 0.9889146567717997, "grad_norm": 6.767408847808838, "learning_rate": 3.0934218197054e-09, "loss": 0.3741, "step": 21321 }, { "epoch": 0.9889610389610389, "grad_norm": 9.938273429870605, "learning_rate": 3.0675922419903804e-09, "loss": 0.4398, "step": 21322 }, { "epoch": 0.9890074211502783, "grad_norm": 8.549970626831055, "learning_rate": 3.041870920181267e-09, "loss": 0.4244, "step": 21323 }, { "epoch": 0.9890538033395176, "grad_norm": 13.495177268981934, "learning_rate": 3.016257854833726e-09, "loss": 0.5799, "step": 21324 }, { "epoch": 0.989100185528757, "grad_norm": 12.638522148132324, "learning_rate": 2.9907530465034252e-09, "loss": 0.4399, "step": 21325 }, { "epoch": 0.9891465677179963, "grad_norm": 9.308030128479004, "learning_rate": 2.9653564957432544e-09, "loss": 0.2317, "step": 21326 }, { "epoch": 0.9891929499072356, "grad_norm": 4.503260135650635, "learning_rate": 2.940068203102775e-09, "loss": 0.2471, "step": 21327 }, { "epoch": 0.989239332096475, "grad_norm": 6.497586250305176, "learning_rate": 2.9148881691298812e-09, "loss": 0.3811, "step": 21328 }, { "epoch": 0.9892857142857143, "grad_norm": 7.083901882171631, "learning_rate": 2.8898163943702484e-09, "loss": 0.3152, "step": 21329 }, { "epoch": 0.9893320964749536, "grad_norm": 19.72518539428711, "learning_rate": 2.86485287936733e-09, "loss": 0.4568, "step": 21330 }, { "epoch": 0.9893784786641929, "grad_norm": 11.671676635742188, "learning_rate": 2.8399976246612503e-09, "loss": 0.3761, "step": 21331 }, { "epoch": 0.9894248608534323, "grad_norm": 11.8764066696167, "learning_rate": 2.815250630791022e-09, "loss": 0.423, "step": 21332 }, { "epoch": 0.9894712430426716, "grad_norm": 11.36814022064209, "learning_rate": 2.790611898292883e-09, "loss": 0.3854, "step": 21333 }, { "epoch": 0.9895176252319109, "grad_norm": 10.890028953552246, "learning_rate": 2.7660814277002956e-09, "loss": 0.4523, "step": 21334 }, { "epoch": 0.9895640074211502, "grad_norm": 11.570907592773438, "learning_rate": 2.7416592195445015e-09, "loss": 0.378, "step": 21335 }, { "epoch": 0.9896103896103896, "grad_norm": 5.969367504119873, "learning_rate": 2.7173452743550767e-09, "loss": 0.2986, "step": 21336 }, { "epoch": 0.989656771799629, "grad_norm": 6.915856838226318, "learning_rate": 2.693139592658822e-09, "loss": 0.254, "step": 21337 }, { "epoch": 0.9897031539888683, "grad_norm": 10.865076065063477, "learning_rate": 2.6690421749797634e-09, "loss": 0.3774, "step": 21338 }, { "epoch": 0.9897495361781076, "grad_norm": 16.041093826293945, "learning_rate": 2.6450530218397054e-09, "loss": 0.449, "step": 21339 }, { "epoch": 0.9897959183673469, "grad_norm": 2.6086764335632324, "learning_rate": 2.621172133759342e-09, "loss": 0.1373, "step": 21340 }, { "epoch": 0.9898423005565863, "grad_norm": 7.208695411682129, "learning_rate": 2.5973995112549275e-09, "loss": 0.2236, "step": 21341 }, { "epoch": 0.9898886827458256, "grad_norm": 8.716858863830566, "learning_rate": 2.5737351548421606e-09, "loss": 0.2369, "step": 21342 }, { "epoch": 0.9899350649350649, "grad_norm": 5.564108848571777, "learning_rate": 2.5501790650334093e-09, "loss": 0.339, "step": 21343 }, { "epoch": 0.9899814471243042, "grad_norm": 4.756960391998291, "learning_rate": 2.5267312423393753e-09, "loss": 0.3555, "step": 21344 }, { "epoch": 0.9900278293135436, "grad_norm": 10.23171329498291, "learning_rate": 2.5033916872679865e-09, "loss": 0.3852, "step": 21345 }, { "epoch": 0.990074211502783, "grad_norm": 7.126591205596924, "learning_rate": 2.480160400324394e-09, "loss": 0.3808, "step": 21346 }, { "epoch": 0.9901205936920222, "grad_norm": 4.679253101348877, "learning_rate": 2.457037382012639e-09, "loss": 0.2009, "step": 21347 }, { "epoch": 0.9901669758812616, "grad_norm": 6.218633651733398, "learning_rate": 2.4340226328328775e-09, "loss": 0.3396, "step": 21348 }, { "epoch": 0.9902133580705009, "grad_norm": 7.232030868530273, "learning_rate": 2.4111161532841542e-09, "loss": 0.2684, "step": 21349 }, { "epoch": 0.9902597402597403, "grad_norm": 7.418116569519043, "learning_rate": 2.388317943862184e-09, "loss": 0.2811, "step": 21350 }, { "epoch": 0.9903061224489796, "grad_norm": 9.943928718566895, "learning_rate": 2.365628005062126e-09, "loss": 0.4158, "step": 21351 }, { "epoch": 0.9903525046382189, "grad_norm": 9.826797485351562, "learning_rate": 2.343046337374144e-09, "loss": 0.5068, "step": 21352 }, { "epoch": 0.9903988868274582, "grad_norm": 10.08603286743164, "learning_rate": 2.3205729412884015e-09, "loss": 0.2322, "step": 21353 }, { "epoch": 0.9904452690166976, "grad_norm": 7.830486297607422, "learning_rate": 2.298207817291176e-09, "loss": 0.322, "step": 21354 }, { "epoch": 0.990491651205937, "grad_norm": 5.7050652503967285, "learning_rate": 2.275950965867635e-09, "loss": 0.3511, "step": 21355 }, { "epoch": 0.9905380333951762, "grad_norm": 8.812443733215332, "learning_rate": 2.2538023874996153e-09, "loss": 0.3684, "step": 21356 }, { "epoch": 0.9905844155844156, "grad_norm": 7.3627495765686035, "learning_rate": 2.231762082666733e-09, "loss": 0.3942, "step": 21357 }, { "epoch": 0.9906307977736549, "grad_norm": 7.3886494636535645, "learning_rate": 2.2098300518463846e-09, "loss": 0.2051, "step": 21358 }, { "epoch": 0.9906771799628943, "grad_norm": 6.358950138092041, "learning_rate": 2.1880062955143e-09, "loss": 0.3454, "step": 21359 }, { "epoch": 0.9907235621521335, "grad_norm": 9.119235038757324, "learning_rate": 2.1662908141434346e-09, "loss": 0.3976, "step": 21360 }, { "epoch": 0.9907699443413729, "grad_norm": 8.208240509033203, "learning_rate": 2.1446836082034128e-09, "loss": 0.3457, "step": 21361 }, { "epoch": 0.9908163265306122, "grad_norm": 8.750006675720215, "learning_rate": 2.1231846781621936e-09, "loss": 0.2744, "step": 21362 }, { "epoch": 0.9908627087198516, "grad_norm": 11.301490783691406, "learning_rate": 2.1017940244866253e-09, "loss": 0.4421, "step": 21363 }, { "epoch": 0.990909090909091, "grad_norm": 6.9042510986328125, "learning_rate": 2.0805116476396715e-09, "loss": 0.2512, "step": 21364 }, { "epoch": 0.9909554730983302, "grad_norm": 6.342742443084717, "learning_rate": 2.0593375480820743e-09, "loss": 0.285, "step": 21365 }, { "epoch": 0.9910018552875696, "grad_norm": 6.480386257171631, "learning_rate": 2.0382717262729114e-09, "loss": 0.3187, "step": 21366 }, { "epoch": 0.9910482374768089, "grad_norm": 6.304664611816406, "learning_rate": 2.0173141826684838e-09, "loss": 0.3006, "step": 21367 }, { "epoch": 0.9910946196660483, "grad_norm": 6.745005130767822, "learning_rate": 1.9964649177223184e-09, "loss": 0.2976, "step": 21368 }, { "epoch": 0.9911410018552875, "grad_norm": 8.610602378845215, "learning_rate": 1.9757239318873857e-09, "loss": 0.2897, "step": 21369 }, { "epoch": 0.9911873840445269, "grad_norm": 6.189489364624023, "learning_rate": 1.9550912256116605e-09, "loss": 0.3851, "step": 21370 }, { "epoch": 0.9912337662337662, "grad_norm": 4.947093486785889, "learning_rate": 1.934566799342563e-09, "loss": 0.252, "step": 21371 }, { "epoch": 0.9912801484230056, "grad_norm": 9.335334777832031, "learning_rate": 1.914150653525293e-09, "loss": 0.4681, "step": 21372 }, { "epoch": 0.9913265306122448, "grad_norm": 4.2267231941223145, "learning_rate": 1.8938427886011635e-09, "loss": 0.2816, "step": 21373 }, { "epoch": 0.9913729128014842, "grad_norm": 5.63592004776001, "learning_rate": 1.873643205011488e-09, "loss": 0.3315, "step": 21374 }, { "epoch": 0.9914192949907236, "grad_norm": 8.731608390808105, "learning_rate": 1.8535519031925853e-09, "loss": 0.359, "step": 21375 }, { "epoch": 0.9914656771799629, "grad_norm": 5.674435138702393, "learning_rate": 1.8335688835802169e-09, "loss": 0.2538, "step": 21376 }, { "epoch": 0.9915120593692023, "grad_norm": 12.614520072937012, "learning_rate": 1.8136941466073698e-09, "loss": 0.4511, "step": 21377 }, { "epoch": 0.9915584415584415, "grad_norm": 17.63117027282715, "learning_rate": 1.793927692704811e-09, "loss": 0.4895, "step": 21378 }, { "epoch": 0.9916048237476809, "grad_norm": 11.727641105651855, "learning_rate": 1.7742695223005312e-09, "loss": 0.4169, "step": 21379 }, { "epoch": 0.9916512059369202, "grad_norm": 5.384507179260254, "learning_rate": 1.754719635819746e-09, "loss": 0.2991, "step": 21380 }, { "epoch": 0.9916975881261596, "grad_norm": 4.669081211090088, "learning_rate": 1.7352780336871155e-09, "loss": 0.2192, "step": 21381 }, { "epoch": 0.9917439703153988, "grad_norm": 6.823920249938965, "learning_rate": 1.7159447163234145e-09, "loss": 0.3822, "step": 21382 }, { "epoch": 0.9917903525046382, "grad_norm": 7.108926296234131, "learning_rate": 1.6967196841477517e-09, "loss": 0.3042, "step": 21383 }, { "epoch": 0.9918367346938776, "grad_norm": 21.679067611694336, "learning_rate": 1.6776029375759062e-09, "loss": 0.364, "step": 21384 }, { "epoch": 0.9918831168831169, "grad_norm": 5.496960163116455, "learning_rate": 1.6585944770225459e-09, "loss": 0.3182, "step": 21385 }, { "epoch": 0.9919294990723562, "grad_norm": 7.576452255249023, "learning_rate": 1.6396943028995638e-09, "loss": 0.354, "step": 21386 }, { "epoch": 0.9919758812615955, "grad_norm": 12.059981346130371, "learning_rate": 1.6209024156160768e-09, "loss": 0.3915, "step": 21387 }, { "epoch": 0.9920222634508349, "grad_norm": 15.987411499023438, "learning_rate": 1.602218815579537e-09, "loss": 0.3839, "step": 21388 }, { "epoch": 0.9920686456400742, "grad_norm": 6.416614055633545, "learning_rate": 1.5836435031940655e-09, "loss": 0.3567, "step": 21389 }, { "epoch": 0.9921150278293135, "grad_norm": 6.14442777633667, "learning_rate": 1.5651764788632284e-09, "loss": 0.3182, "step": 21390 }, { "epoch": 0.9921614100185528, "grad_norm": 7.1412458419799805, "learning_rate": 1.5468177429861509e-09, "loss": 0.3222, "step": 21391 }, { "epoch": 0.9922077922077922, "grad_norm": 5.823623180389404, "learning_rate": 1.5285672959608477e-09, "loss": 0.3119, "step": 21392 }, { "epoch": 0.9922541743970316, "grad_norm": 8.771088600158691, "learning_rate": 1.5104251381825584e-09, "loss": 0.2992, "step": 21393 }, { "epoch": 0.9923005565862709, "grad_norm": 13.16694164276123, "learning_rate": 1.4923912700443023e-09, "loss": 0.3148, "step": 21394 }, { "epoch": 0.9923469387755102, "grad_norm": 5.875662803649902, "learning_rate": 1.4744656919374324e-09, "loss": 0.184, "step": 21395 }, { "epoch": 0.9923933209647495, "grad_norm": 8.320667266845703, "learning_rate": 1.4566484042494167e-09, "loss": 0.4833, "step": 21396 }, { "epoch": 0.9924397031539889, "grad_norm": 6.231271266937256, "learning_rate": 1.4389394073671681e-09, "loss": 0.2155, "step": 21397 }, { "epoch": 0.9924860853432282, "grad_norm": 4.60943603515625, "learning_rate": 1.4213387016737135e-09, "loss": 0.3111, "step": 21398 }, { "epoch": 0.9925324675324675, "grad_norm": 10.449969291687012, "learning_rate": 1.4038462875504145e-09, "loss": 0.4908, "step": 21399 }, { "epoch": 0.9925788497217068, "grad_norm": 8.846893310546875, "learning_rate": 1.386462165375857e-09, "loss": 0.2926, "step": 21400 }, { "epoch": 0.9926252319109462, "grad_norm": 8.391936302185059, "learning_rate": 1.369186335527517e-09, "loss": 0.2965, "step": 21401 }, { "epoch": 0.9926716141001856, "grad_norm": 7.571803092956543, "learning_rate": 1.3520187983789846e-09, "loss": 0.3252, "step": 21402 }, { "epoch": 0.9927179962894248, "grad_norm": 14.223992347717285, "learning_rate": 1.3349595543027394e-09, "loss": 0.2779, "step": 21403 }, { "epoch": 0.9927643784786642, "grad_norm": 5.109471321105957, "learning_rate": 1.3180086036679306e-09, "loss": 0.3114, "step": 21404 }, { "epoch": 0.9928107606679035, "grad_norm": 7.535673141479492, "learning_rate": 1.3011659468420423e-09, "loss": 0.252, "step": 21405 }, { "epoch": 0.9928571428571429, "grad_norm": 9.810615539550781, "learning_rate": 1.2844315841897826e-09, "loss": 0.4244, "step": 21406 }, { "epoch": 0.9929035250463822, "grad_norm": 19.670528411865234, "learning_rate": 1.267805516073639e-09, "loss": 0.3373, "step": 21407 }, { "epoch": 0.9929499072356215, "grad_norm": 5.662078857421875, "learning_rate": 1.2512877428544346e-09, "loss": 0.2009, "step": 21408 }, { "epoch": 0.9929962894248608, "grad_norm": 6.961945056915283, "learning_rate": 1.2348782648891055e-09, "loss": 0.2884, "step": 21409 }, { "epoch": 0.9930426716141002, "grad_norm": 7.065725803375244, "learning_rate": 1.2185770825334786e-09, "loss": 0.2989, "step": 21410 }, { "epoch": 0.9930890538033396, "grad_norm": 6.7865214347839355, "learning_rate": 1.2023841961411597e-09, "loss": 0.2656, "step": 21411 }, { "epoch": 0.9931354359925788, "grad_norm": 11.728767395019531, "learning_rate": 1.1862996060624243e-09, "loss": 0.3406, "step": 21412 }, { "epoch": 0.9931818181818182, "grad_norm": 9.851666450500488, "learning_rate": 1.1703233126458824e-09, "loss": 0.3717, "step": 21413 }, { "epoch": 0.9932282003710575, "grad_norm": 7.779237270355225, "learning_rate": 1.1544553162373684e-09, "loss": 0.3459, "step": 21414 }, { "epoch": 0.9932745825602969, "grad_norm": 8.460787773132324, "learning_rate": 1.1386956171816066e-09, "loss": 0.3487, "step": 21415 }, { "epoch": 0.9933209647495361, "grad_norm": 11.316225051879883, "learning_rate": 1.1230442158188804e-09, "loss": 0.4, "step": 21416 }, { "epoch": 0.9933673469387755, "grad_norm": 7.487008571624756, "learning_rate": 1.107501112488918e-09, "loss": 0.3694, "step": 21417 }, { "epoch": 0.9934137291280148, "grad_norm": 5.122243404388428, "learning_rate": 1.0920663075281168e-09, "loss": 0.3836, "step": 21418 }, { "epoch": 0.9934601113172542, "grad_norm": 6.766030788421631, "learning_rate": 1.0767398012712094e-09, "loss": 0.279, "step": 21419 }, { "epoch": 0.9935064935064936, "grad_norm": 5.900418758392334, "learning_rate": 1.0615215940501523e-09, "loss": 0.2992, "step": 21420 }, { "epoch": 0.9935528756957328, "grad_norm": 5.45145320892334, "learning_rate": 1.0464116861946815e-09, "loss": 0.3014, "step": 21421 }, { "epoch": 0.9935992578849722, "grad_norm": 5.513630390167236, "learning_rate": 1.0314100780317583e-09, "loss": 0.2511, "step": 21422 }, { "epoch": 0.9936456400742115, "grad_norm": 6.626354217529297, "learning_rate": 1.0165167698866773e-09, "loss": 0.3802, "step": 21423 }, { "epoch": 0.9936920222634509, "grad_norm": 6.007113456726074, "learning_rate": 1.001731762082514e-09, "loss": 0.2723, "step": 21424 }, { "epoch": 0.9937384044526901, "grad_norm": 9.996465682983398, "learning_rate": 9.870550549384572e-10, "loss": 0.3871, "step": 21425 }, { "epoch": 0.9937847866419295, "grad_norm": 10.24880313873291, "learning_rate": 9.72486648773696e-10, "loss": 0.316, "step": 21426 }, { "epoch": 0.9938311688311688, "grad_norm": 7.510653018951416, "learning_rate": 9.58026543902979e-10, "loss": 0.2496, "step": 21427 }, { "epoch": 0.9938775510204082, "grad_norm": 7.204339027404785, "learning_rate": 9.436747406399439e-10, "loss": 0.2444, "step": 21428 }, { "epoch": 0.9939239332096474, "grad_norm": 10.449297904968262, "learning_rate": 9.29431239295453e-10, "loss": 0.3599, "step": 21429 }, { "epoch": 0.9939703153988868, "grad_norm": 6.207732200622559, "learning_rate": 9.152960401781485e-10, "loss": 0.3139, "step": 21430 }, { "epoch": 0.9940166975881262, "grad_norm": 10.215093612670898, "learning_rate": 9.012691435944521e-10, "loss": 0.3975, "step": 21431 }, { "epoch": 0.9940630797773655, "grad_norm": 4.780359268188477, "learning_rate": 8.873505498474544e-10, "loss": 0.295, "step": 21432 }, { "epoch": 0.9941094619666048, "grad_norm": 14.283310890197754, "learning_rate": 8.735402592396913e-10, "loss": 0.4232, "step": 21433 }, { "epoch": 0.9941558441558441, "grad_norm": 7.8424153327941895, "learning_rate": 8.598382720698128e-10, "loss": 0.328, "step": 21434 }, { "epoch": 0.9942022263450835, "grad_norm": 4.738224983215332, "learning_rate": 8.462445886348036e-10, "loss": 0.2939, "step": 21435 }, { "epoch": 0.9942486085343228, "grad_norm": 6.12273645401001, "learning_rate": 8.327592092288728e-10, "loss": 0.3435, "step": 21436 }, { "epoch": 0.9942949907235622, "grad_norm": 13.353134155273438, "learning_rate": 8.19382134144564e-10, "loss": 0.4194, "step": 21437 }, { "epoch": 0.9943413729128014, "grad_norm": 10.782631874084473, "learning_rate": 8.061133636716456e-10, "loss": 0.441, "step": 21438 }, { "epoch": 0.9943877551020408, "grad_norm": 15.765623092651367, "learning_rate": 7.929528980976653e-10, "loss": 0.3957, "step": 21439 }, { "epoch": 0.9944341372912802, "grad_norm": 7.274514675140381, "learning_rate": 7.799007377073953e-10, "loss": 0.3566, "step": 21440 }, { "epoch": 0.9944805194805195, "grad_norm": 9.164639472961426, "learning_rate": 7.669568827839424e-10, "loss": 0.2321, "step": 21441 }, { "epoch": 0.9945269016697588, "grad_norm": 8.365819931030273, "learning_rate": 7.54121333607083e-10, "loss": 0.3691, "step": 21442 }, { "epoch": 0.9945732838589981, "grad_norm": 10.556777000427246, "learning_rate": 7.413940904554828e-10, "loss": 0.3186, "step": 21443 }, { "epoch": 0.9946196660482375, "grad_norm": 10.79609489440918, "learning_rate": 7.287751536050324e-10, "loss": 0.4233, "step": 21444 }, { "epoch": 0.9946660482374768, "grad_norm": 5.648626327514648, "learning_rate": 7.162645233282916e-10, "loss": 0.2653, "step": 21445 }, { "epoch": 0.9947124304267161, "grad_norm": 6.108729839324951, "learning_rate": 7.03862199897265e-10, "loss": 0.197, "step": 21446 }, { "epoch": 0.9947588126159554, "grad_norm": 10.322240829467773, "learning_rate": 6.915681835800714e-10, "loss": 0.4269, "step": 21447 }, { "epoch": 0.9948051948051948, "grad_norm": 6.49552059173584, "learning_rate": 6.793824746437194e-10, "loss": 0.2747, "step": 21448 }, { "epoch": 0.9948515769944342, "grad_norm": 9.585708618164062, "learning_rate": 6.673050733507768e-10, "loss": 0.3046, "step": 21449 }, { "epoch": 0.9948979591836735, "grad_norm": 4.31449556350708, "learning_rate": 6.553359799643666e-10, "loss": 0.2233, "step": 21450 }, { "epoch": 0.9949443413729128, "grad_norm": 4.544888973236084, "learning_rate": 6.434751947431706e-10, "loss": 0.2468, "step": 21451 }, { "epoch": 0.9949907235621521, "grad_norm": 5.61613655090332, "learning_rate": 6.317227179442053e-10, "loss": 0.2154, "step": 21452 }, { "epoch": 0.9950371057513915, "grad_norm": 5.581345081329346, "learning_rate": 6.200785498217121e-10, "loss": 0.221, "step": 21453 }, { "epoch": 0.9950834879406308, "grad_norm": 9.999155044555664, "learning_rate": 6.085426906288216e-10, "loss": 0.3583, "step": 21454 }, { "epoch": 0.9951298701298701, "grad_norm": 5.05189323425293, "learning_rate": 5.97115140614779e-10, "loss": 0.3381, "step": 21455 }, { "epoch": 0.9951762523191094, "grad_norm": 15.35299015045166, "learning_rate": 5.85795900027164e-10, "loss": 0.4597, "step": 21456 }, { "epoch": 0.9952226345083488, "grad_norm": 15.041966438293457, "learning_rate": 5.745849691113359e-10, "loss": 0.4074, "step": 21457 }, { "epoch": 0.9952690166975882, "grad_norm": 7.991485118865967, "learning_rate": 5.634823481104334e-10, "loss": 0.3585, "step": 21458 }, { "epoch": 0.9953153988868274, "grad_norm": 6.302680492401123, "learning_rate": 5.524880372648201e-10, "loss": 0.2237, "step": 21459 }, { "epoch": 0.9953617810760668, "grad_norm": 9.706099510192871, "learning_rate": 5.416020368126384e-10, "loss": 0.3691, "step": 21460 }, { "epoch": 0.9954081632653061, "grad_norm": 5.375993251800537, "learning_rate": 5.30824346989256e-10, "loss": 0.3185, "step": 21461 }, { "epoch": 0.9954545454545455, "grad_norm": 8.064157485961914, "learning_rate": 5.201549680289297e-10, "loss": 0.2393, "step": 21462 }, { "epoch": 0.9955009276437848, "grad_norm": 12.23473834991455, "learning_rate": 5.095939001625861e-10, "loss": 0.2868, "step": 21463 }, { "epoch": 0.9955473098330241, "grad_norm": 8.005157470703125, "learning_rate": 4.991411436189308e-10, "loss": 0.2245, "step": 21464 }, { "epoch": 0.9955936920222634, "grad_norm": 7.6824493408203125, "learning_rate": 4.887966986238946e-10, "loss": 0.2987, "step": 21465 }, { "epoch": 0.9956400742115028, "grad_norm": 7.936574459075928, "learning_rate": 4.785605654028525e-10, "loss": 0.3225, "step": 21466 }, { "epoch": 0.9956864564007422, "grad_norm": 5.709604263305664, "learning_rate": 4.684327441761838e-10, "loss": 0.2435, "step": 21467 }, { "epoch": 0.9957328385899814, "grad_norm": 4.886187553405762, "learning_rate": 4.5841323516426784e-10, "loss": 0.3274, "step": 21468 }, { "epoch": 0.9957792207792208, "grad_norm": 6.154211521148682, "learning_rate": 4.485020385841532e-10, "loss": 0.3227, "step": 21469 }, { "epoch": 0.9958256029684601, "grad_norm": 6.901125907897949, "learning_rate": 4.3869915465011295e-10, "loss": 0.3646, "step": 21470 }, { "epoch": 0.9958719851576995, "grad_norm": 6.379782676696777, "learning_rate": 4.290045835741996e-10, "loss": 0.2833, "step": 21471 }, { "epoch": 0.9959183673469387, "grad_norm": 5.781249046325684, "learning_rate": 4.194183255673556e-10, "loss": 0.4016, "step": 21472 }, { "epoch": 0.9959647495361781, "grad_norm": 6.463577747344971, "learning_rate": 4.099403808366376e-10, "loss": 0.3864, "step": 21473 }, { "epoch": 0.9960111317254174, "grad_norm": 7.905545711517334, "learning_rate": 4.005707495874367e-10, "loss": 0.3377, "step": 21474 }, { "epoch": 0.9960575139146568, "grad_norm": 7.319280624389648, "learning_rate": 3.913094320229238e-10, "loss": 0.3228, "step": 21475 }, { "epoch": 0.9961038961038962, "grad_norm": 11.725369453430176, "learning_rate": 3.821564283434942e-10, "loss": 0.4021, "step": 21476 }, { "epoch": 0.9961502782931354, "grad_norm": 6.326411724090576, "learning_rate": 3.7311173874787775e-10, "loss": 0.2675, "step": 21477 }, { "epoch": 0.9961966604823748, "grad_norm": 8.565958976745605, "learning_rate": 3.6417536343202886e-10, "loss": 0.3073, "step": 21478 }, { "epoch": 0.9962430426716141, "grad_norm": 9.743314743041992, "learning_rate": 3.5534730258912633e-10, "loss": 0.3839, "step": 21479 }, { "epoch": 0.9962894248608535, "grad_norm": 7.937228679656982, "learning_rate": 3.466275564101285e-10, "loss": 0.3129, "step": 21480 }, { "epoch": 0.9963358070500927, "grad_norm": 5.19262170791626, "learning_rate": 3.3801612508488346e-10, "loss": 0.2468, "step": 21481 }, { "epoch": 0.9963821892393321, "grad_norm": 4.919435977935791, "learning_rate": 3.2951300879935365e-10, "loss": 0.3476, "step": 21482 }, { "epoch": 0.9964285714285714, "grad_norm": 9.799683570861816, "learning_rate": 3.211182077378361e-10, "loss": 0.3447, "step": 21483 }, { "epoch": 0.9964749536178108, "grad_norm": 7.05605936050415, "learning_rate": 3.128317220824073e-10, "loss": 0.3244, "step": 21484 }, { "epoch": 0.99652133580705, "grad_norm": 14.182849884033203, "learning_rate": 3.0465355201181324e-10, "loss": 0.3395, "step": 21485 }, { "epoch": 0.9965677179962894, "grad_norm": 6.867716312408447, "learning_rate": 2.965836977047998e-10, "loss": 0.2921, "step": 21486 }, { "epoch": 0.9966141001855288, "grad_norm": 11.0398588180542, "learning_rate": 2.886221593345617e-10, "loss": 0.4627, "step": 21487 }, { "epoch": 0.9966604823747681, "grad_norm": 15.894574165344238, "learning_rate": 2.8076893707429385e-10, "loss": 0.4734, "step": 21488 }, { "epoch": 0.9967068645640074, "grad_norm": 7.473523139953613, "learning_rate": 2.7302403109441544e-10, "loss": 0.3925, "step": 21489 }, { "epoch": 0.9967532467532467, "grad_norm": 4.899521827697754, "learning_rate": 2.65387441562015e-10, "loss": 0.281, "step": 21490 }, { "epoch": 0.9967996289424861, "grad_norm": 4.339685440063477, "learning_rate": 2.578591686430709e-10, "loss": 0.2705, "step": 21491 }, { "epoch": 0.9968460111317254, "grad_norm": 11.533761024475098, "learning_rate": 2.5043921250023086e-10, "loss": 0.4115, "step": 21492 }, { "epoch": 0.9968923933209648, "grad_norm": 10.694098472595215, "learning_rate": 2.4312757329503225e-10, "loss": 0.3634, "step": 21493 }, { "epoch": 0.996938775510204, "grad_norm": 7.4428935050964355, "learning_rate": 2.3592425118512675e-10, "loss": 0.2322, "step": 21494 }, { "epoch": 0.9969851576994434, "grad_norm": 9.81699275970459, "learning_rate": 2.2882924632705583e-10, "loss": 0.4048, "step": 21495 }, { "epoch": 0.9970315398886828, "grad_norm": 7.301259517669678, "learning_rate": 2.2184255887403028e-10, "loss": 0.2993, "step": 21496 }, { "epoch": 0.9970779220779221, "grad_norm": 8.316431045532227, "learning_rate": 2.149641889775955e-10, "loss": 0.3361, "step": 21497 }, { "epoch": 0.9971243042671614, "grad_norm": 7.636383056640625, "learning_rate": 2.0819413678707656e-10, "loss": 0.311, "step": 21498 }, { "epoch": 0.9971706864564007, "grad_norm": 5.100831985473633, "learning_rate": 2.0153240244902283e-10, "loss": 0.3194, "step": 21499 }, { "epoch": 0.9972170686456401, "grad_norm": 6.134580135345459, "learning_rate": 1.9497898610720822e-10, "loss": 0.3442, "step": 21500 }, { "epoch": 0.9972634508348794, "grad_norm": 7.225186824798584, "learning_rate": 1.8853388790429638e-10, "loss": 0.2408, "step": 21501 }, { "epoch": 0.9973098330241187, "grad_norm": 9.444147109985352, "learning_rate": 1.8219710797962032e-10, "loss": 0.2251, "step": 21502 }, { "epoch": 0.997356215213358, "grad_norm": 5.179754257202148, "learning_rate": 1.7596864647084767e-10, "loss": 0.2146, "step": 21503 }, { "epoch": 0.9974025974025974, "grad_norm": 8.224246978759766, "learning_rate": 1.698485035123154e-10, "loss": 0.3282, "step": 21504 }, { "epoch": 0.9974489795918368, "grad_norm": 7.803663730621338, "learning_rate": 1.6383667923669523e-10, "loss": 0.3058, "step": 21505 }, { "epoch": 0.9974953617810761, "grad_norm": 7.0327534675598145, "learning_rate": 1.579331737749934e-10, "loss": 0.3323, "step": 21506 }, { "epoch": 0.9975417439703154, "grad_norm": 12.65414810180664, "learning_rate": 1.5213798725377538e-10, "loss": 0.4217, "step": 21507 }, { "epoch": 0.9975881261595547, "grad_norm": 8.574481964111328, "learning_rate": 1.4645111979960657e-10, "loss": 0.3263, "step": 21508 }, { "epoch": 0.9976345083487941, "grad_norm": 13.219144821166992, "learning_rate": 1.4087257153572176e-10, "loss": 0.4123, "step": 21509 }, { "epoch": 0.9976808905380334, "grad_norm": 7.0817437171936035, "learning_rate": 1.3540234258258011e-10, "loss": 0.2932, "step": 21510 }, { "epoch": 0.9977272727272727, "grad_norm": 8.23332405090332, "learning_rate": 1.300404330584204e-10, "loss": 0.3259, "step": 21511 }, { "epoch": 0.997773654916512, "grad_norm": 7.0968337059021, "learning_rate": 1.2478684308037115e-10, "loss": 0.3408, "step": 21512 }, { "epoch": 0.9978200371057514, "grad_norm": 8.892290115356445, "learning_rate": 1.1964157276112e-10, "loss": 0.2618, "step": 21513 }, { "epoch": 0.9978664192949908, "grad_norm": 13.00003433227539, "learning_rate": 1.1460462221279944e-10, "loss": 0.3745, "step": 21514 }, { "epoch": 0.99791280148423, "grad_norm": 10.563193321228027, "learning_rate": 1.0967599154476649e-10, "loss": 0.427, "step": 21515 }, { "epoch": 0.9979591836734694, "grad_norm": 5.842106342315674, "learning_rate": 1.0485568086304742e-10, "loss": 0.1958, "step": 21516 }, { "epoch": 0.9980055658627087, "grad_norm": 7.558642387390137, "learning_rate": 1.0014369027255833e-10, "loss": 0.3705, "step": 21517 }, { "epoch": 0.9980519480519481, "grad_norm": 5.551349639892578, "learning_rate": 9.55400198754397e-11, "loss": 0.2422, "step": 21518 }, { "epoch": 0.9980983302411874, "grad_norm": 7.546125411987305, "learning_rate": 9.104466977105653e-11, "loss": 0.4081, "step": 21519 }, { "epoch": 0.9981447124304267, "grad_norm": 4.698178768157959, "learning_rate": 8.665764005710842e-11, "loss": 0.3226, "step": 21520 }, { "epoch": 0.998191094619666, "grad_norm": 14.43226432800293, "learning_rate": 8.237893082851944e-11, "loss": 0.3238, "step": 21521 }, { "epoch": 0.9982374768089054, "grad_norm": 7.521625995635986, "learning_rate": 7.820854217799323e-11, "loss": 0.2965, "step": 21522 }, { "epoch": 0.9982838589981448, "grad_norm": 8.340229034423828, "learning_rate": 7.414647419601295e-11, "loss": 0.2929, "step": 21523 }, { "epoch": 0.998330241187384, "grad_norm": 12.450108528137207, "learning_rate": 7.019272697028623e-11, "loss": 0.3887, "step": 21524 }, { "epoch": 0.9983766233766234, "grad_norm": 4.610231876373291, "learning_rate": 6.634730058685535e-11, "loss": 0.2263, "step": 21525 }, { "epoch": 0.9984230055658627, "grad_norm": 9.981742858886719, "learning_rate": 6.261019512898702e-11, "loss": 0.3273, "step": 21526 }, { "epoch": 0.9984693877551021, "grad_norm": 5.030852794647217, "learning_rate": 5.898141067717244e-11, "loss": 0.2916, "step": 21527 }, { "epoch": 0.9985157699443413, "grad_norm": 7.996539115905762, "learning_rate": 5.5460947310237435e-11, "loss": 0.3559, "step": 21528 }, { "epoch": 0.9985621521335807, "grad_norm": 7.231776714324951, "learning_rate": 5.204880510478738e-11, "loss": 0.3993, "step": 21529 }, { "epoch": 0.99860853432282, "grad_norm": 5.864333629608154, "learning_rate": 4.874498413409701e-11, "loss": 0.281, "step": 21530 }, { "epoch": 0.9986549165120594, "grad_norm": 11.890752792358398, "learning_rate": 4.5549484470330805e-11, "loss": 0.3725, "step": 21531 }, { "epoch": 0.9987012987012988, "grad_norm": 4.9649977684021, "learning_rate": 4.2462306182877724e-11, "loss": 0.3135, "step": 21532 }, { "epoch": 0.998747680890538, "grad_norm": 4.438089370727539, "learning_rate": 3.948344933779602e-11, "loss": 0.249, "step": 21533 }, { "epoch": 0.9987940630797774, "grad_norm": 9.01081371307373, "learning_rate": 3.661291400058886e-11, "loss": 0.2946, "step": 21534 }, { "epoch": 0.9988404452690167, "grad_norm": 6.087655067443848, "learning_rate": 3.3850700232873626e-11, "loss": 0.4528, "step": 21535 }, { "epoch": 0.9988868274582561, "grad_norm": 8.495268821716309, "learning_rate": 3.1196808094047235e-11, "loss": 0.3207, "step": 21536 }, { "epoch": 0.9989332096474953, "grad_norm": 13.178572654724121, "learning_rate": 2.8651237642396412e-11, "loss": 0.3463, "step": 21537 }, { "epoch": 0.9989795918367347, "grad_norm": 17.180137634277344, "learning_rate": 2.6213988932877186e-11, "loss": 0.3205, "step": 21538 }, { "epoch": 0.999025974025974, "grad_norm": 4.144109725952148, "learning_rate": 2.3885062018225158e-11, "loss": 0.2787, "step": 21539 }, { "epoch": 0.9990723562152134, "grad_norm": 9.6677885055542, "learning_rate": 2.166445694895547e-11, "loss": 0.3801, "step": 21540 }, { "epoch": 0.9991187384044526, "grad_norm": 8.217793464660645, "learning_rate": 1.9552173773362827e-11, "loss": 0.3119, "step": 21541 }, { "epoch": 0.999165120593692, "grad_norm": 4.70139741897583, "learning_rate": 1.754821253641126e-11, "loss": 0.2843, "step": 21542 }, { "epoch": 0.9992115027829314, "grad_norm": 5.227654457092285, "learning_rate": 1.5652573282509687e-11, "loss": 0.261, "step": 21543 }, { "epoch": 0.9992578849721707, "grad_norm": 12.170806884765625, "learning_rate": 1.3865256052181252e-11, "loss": 0.3209, "step": 21544 }, { "epoch": 0.99930426716141, "grad_norm": 5.738627910614014, "learning_rate": 1.2186260884283763e-11, "loss": 0.3229, "step": 21545 }, { "epoch": 0.9993506493506493, "grad_norm": 10.811928749084473, "learning_rate": 1.0615587814899464e-11, "loss": 0.3625, "step": 21546 }, { "epoch": 0.9993970315398887, "grad_norm": 5.282402992248535, "learning_rate": 9.15323687844527e-12, "loss": 0.2392, "step": 21547 }, { "epoch": 0.999443413729128, "grad_norm": 10.995625495910645, "learning_rate": 7.799208106562539e-12, "loss": 0.2836, "step": 21548 }, { "epoch": 0.9994897959183674, "grad_norm": 8.305371284484863, "learning_rate": 6.553501528117068e-12, "loss": 0.3741, "step": 21549 }, { "epoch": 0.9995361781076066, "grad_norm": 5.4937567710876465, "learning_rate": 5.416117170864432e-12, "loss": 0.3024, "step": 21550 }, { "epoch": 0.999582560296846, "grad_norm": 5.005396366119385, "learning_rate": 4.3870550586744275e-12, "loss": 0.2542, "step": 21551 }, { "epoch": 0.9996289424860854, "grad_norm": 10.933785438537598, "learning_rate": 3.466315214306626e-12, "loss": 0.3699, "step": 21552 }, { "epoch": 0.9996753246753247, "grad_norm": 11.166631698608398, "learning_rate": 2.6538976577450416e-12, "loss": 0.4337, "step": 21553 }, { "epoch": 0.999721706864564, "grad_norm": 13.226829528808594, "learning_rate": 1.949802406753243e-12, "loss": 0.3307, "step": 21554 }, { "epoch": 0.9997680890538033, "grad_norm": 7.078526496887207, "learning_rate": 1.3540294757641292e-12, "loss": 0.2979, "step": 21555 }, { "epoch": 0.9998144712430427, "grad_norm": 9.943278312683105, "learning_rate": 8.665788786554885e-13, "loss": 0.3123, "step": 21556 }, { "epoch": 0.999860853432282, "grad_norm": 6.711916446685791, "learning_rate": 4.874506254193278e-13, "loss": 0.3134, "step": 21557 }, { "epoch": 0.9999072356215213, "grad_norm": 9.801102638244629, "learning_rate": 2.1664472438232e-13, "loss": 0.2478, "step": 21558 }, { "epoch": 0.9999536178107606, "grad_norm": 7.92402458190918, "learning_rate": 5.416118165069151e-14, "loss": 0.345, "step": 21559 }, { "epoch": 1.0, "grad_norm": 7.089865684509277, "learning_rate": 0.0, "loss": 0.2059, "step": 21560 }, { "epoch": 1.0, "eval_loss": 0.3195072114467621, "eval_runtime": 38.0377, "eval_samples_per_second": 45.823, "eval_steps_per_second": 5.731, "step": 21560 }, { "epoch": 1.0, "step": 21560, "total_flos": 3.0341660520166195e+18, "train_loss": 0.3569130549777527, "train_runtime": 21207.8337, "train_samples_per_second": 8.133, "train_steps_per_second": 1.017 } ], "logging_steps": 1, "max_steps": 21560, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 21560, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.0341660520166195e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }