|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9999822547158093, |
|
"eval_steps": 500, |
|
"global_step": 14088, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007098113676290526, |
|
"grad_norm": 7.898312201157063, |
|
"learning_rate": 9.995030526764163e-06, |
|
"loss": 1.7908, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0014196227352581052, |
|
"grad_norm": 5.391248052357186, |
|
"learning_rate": 9.987931279284396e-06, |
|
"loss": 0.7438, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.002129434102887158, |
|
"grad_norm": 5.37248606279888, |
|
"learning_rate": 9.980832031804629e-06, |
|
"loss": 0.6486, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0028392454705162104, |
|
"grad_norm": 6.374151691345343, |
|
"learning_rate": 9.973732784324862e-06, |
|
"loss": 0.6267, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.003549056838145263, |
|
"grad_norm": 8.880583324938707, |
|
"learning_rate": 9.966633536845095e-06, |
|
"loss": 0.5867, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.004258868205774316, |
|
"grad_norm": 6.684640112932781, |
|
"learning_rate": 9.959534289365328e-06, |
|
"loss": 0.5473, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.004968679573403368, |
|
"grad_norm": 9.329177151842533, |
|
"learning_rate": 9.952435041885561e-06, |
|
"loss": 0.549, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.005678490941032421, |
|
"grad_norm": 4.2697386372932575, |
|
"learning_rate": 9.945335794405794e-06, |
|
"loss": 0.527, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.006388302308661473, |
|
"grad_norm": 4.981195314782428, |
|
"learning_rate": 9.938236546926027e-06, |
|
"loss": 0.5015, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.007098113676290526, |
|
"grad_norm": 3.5718526890722155, |
|
"learning_rate": 9.931137299446259e-06, |
|
"loss": 0.5148, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.007807925043919578, |
|
"grad_norm": 5.35860863602349, |
|
"learning_rate": 9.924038051966492e-06, |
|
"loss": 0.5116, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.008517736411548632, |
|
"grad_norm": 3.2550821239727434, |
|
"learning_rate": 9.916938804486725e-06, |
|
"loss": 0.4992, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.009227547779177683, |
|
"grad_norm": 3.4354498076448032, |
|
"learning_rate": 9.909839557006958e-06, |
|
"loss": 0.5088, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.009937359146806737, |
|
"grad_norm": 4.331679140736939, |
|
"learning_rate": 9.902740309527191e-06, |
|
"loss": 0.5139, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.010647170514435788, |
|
"grad_norm": 3.4337848966487265, |
|
"learning_rate": 9.895641062047424e-06, |
|
"loss": 0.5041, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.011356981882064842, |
|
"grad_norm": 8.243351710682422, |
|
"learning_rate": 9.888541814567657e-06, |
|
"loss": 0.5142, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.012066793249693893, |
|
"grad_norm": 4.091704188438657, |
|
"learning_rate": 9.88144256708789e-06, |
|
"loss": 0.4807, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.012776604617322947, |
|
"grad_norm": 21.564891334339755, |
|
"learning_rate": 9.874343319608124e-06, |
|
"loss": 0.5092, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.013486415984951998, |
|
"grad_norm": 3.1521060424258973, |
|
"learning_rate": 9.867244072128355e-06, |
|
"loss": 0.4787, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.014196227352581052, |
|
"grad_norm": 3.986481726421801, |
|
"learning_rate": 9.860144824648588e-06, |
|
"loss": 0.4827, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.014906038720210105, |
|
"grad_norm": 4.774263941683351, |
|
"learning_rate": 9.853045577168821e-06, |
|
"loss": 0.4775, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.015615850087839157, |
|
"grad_norm": 7.968327682274227, |
|
"learning_rate": 9.845946329689053e-06, |
|
"loss": 0.4716, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.01632566145546821, |
|
"grad_norm": 10.121205974855524, |
|
"learning_rate": 9.838847082209286e-06, |
|
"loss": 0.4969, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.017035472823097263, |
|
"grad_norm": 7.454679720256471, |
|
"learning_rate": 9.831747834729519e-06, |
|
"loss": 0.4923, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.017745284190726313, |
|
"grad_norm": 17.103084568275037, |
|
"learning_rate": 9.824648587249752e-06, |
|
"loss": 0.4701, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.018455095558355367, |
|
"grad_norm": 4.48293929960256, |
|
"learning_rate": 9.817549339769985e-06, |
|
"loss": 0.4734, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.01916490692598442, |
|
"grad_norm": 5.345114387506581, |
|
"learning_rate": 9.810450092290218e-06, |
|
"loss": 0.4894, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.019874718293613473, |
|
"grad_norm": 19.40561032433512, |
|
"learning_rate": 9.803350844810451e-06, |
|
"loss": 0.4791, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.020584529661242523, |
|
"grad_norm": 14.25299022016476, |
|
"learning_rate": 9.796251597330684e-06, |
|
"loss": 0.4699, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.021294341028871577, |
|
"grad_norm": 8.257072932675099, |
|
"learning_rate": 9.789152349850918e-06, |
|
"loss": 0.4712, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02200415239650063, |
|
"grad_norm": 7.954026403143938, |
|
"learning_rate": 9.782053102371149e-06, |
|
"loss": 0.4703, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.022713963764129683, |
|
"grad_norm": 11.392767049791958, |
|
"learning_rate": 9.774953854891382e-06, |
|
"loss": 0.4991, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.023423775131758737, |
|
"grad_norm": 3.6589701257251392, |
|
"learning_rate": 9.767854607411615e-06, |
|
"loss": 0.48, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.024133586499387787, |
|
"grad_norm": 2.8317614498971095, |
|
"learning_rate": 9.760755359931848e-06, |
|
"loss": 0.473, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.02484339786701684, |
|
"grad_norm": 3.3672884329345467, |
|
"learning_rate": 9.753656112452081e-06, |
|
"loss": 0.4807, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.025553209234645893, |
|
"grad_norm": 2.918860353664653, |
|
"learning_rate": 9.746556864972314e-06, |
|
"loss": 0.474, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.026263020602274947, |
|
"grad_norm": 3.985430160063577, |
|
"learning_rate": 9.739457617492548e-06, |
|
"loss": 0.4606, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.026972831969903997, |
|
"grad_norm": 3.8499162197950216, |
|
"learning_rate": 9.73235837001278e-06, |
|
"loss": 0.474, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.02768264333753305, |
|
"grad_norm": 2.955339700163119, |
|
"learning_rate": 9.725259122533012e-06, |
|
"loss": 0.472, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.028392454705162103, |
|
"grad_norm": 5.589731350821559, |
|
"learning_rate": 9.718159875053245e-06, |
|
"loss": 0.4698, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.029102266072791157, |
|
"grad_norm": 3.9824871173931973, |
|
"learning_rate": 9.711060627573478e-06, |
|
"loss": 0.4581, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.02981207744042021, |
|
"grad_norm": 2.524559409598369, |
|
"learning_rate": 9.70396138009371e-06, |
|
"loss": 0.4478, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.03052188880804926, |
|
"grad_norm": 2.970731368598553, |
|
"learning_rate": 9.696862132613943e-06, |
|
"loss": 0.4508, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.031231700175678313, |
|
"grad_norm": 2.893829595170148, |
|
"learning_rate": 9.689762885134176e-06, |
|
"loss": 0.4379, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.03194151154330736, |
|
"grad_norm": 22.795684932698137, |
|
"learning_rate": 9.682663637654409e-06, |
|
"loss": 0.4482, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.03265132291093642, |
|
"grad_norm": 3.2812945854632236, |
|
"learning_rate": 9.675564390174642e-06, |
|
"loss": 0.4599, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.03336113427856547, |
|
"grad_norm": 11.615453520589618, |
|
"learning_rate": 9.668465142694875e-06, |
|
"loss": 0.4417, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.03407094564619453, |
|
"grad_norm": 7.726986291359829, |
|
"learning_rate": 9.661365895215108e-06, |
|
"loss": 0.4594, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.03478075701382358, |
|
"grad_norm": 4.365039492938302, |
|
"learning_rate": 9.654266647735341e-06, |
|
"loss": 0.4669, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.03549056838145263, |
|
"grad_norm": 6.54988906481092, |
|
"learning_rate": 9.647167400255574e-06, |
|
"loss": 0.4567, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03620037974908168, |
|
"grad_norm": 8.933278546995766, |
|
"learning_rate": 9.640068152775806e-06, |
|
"loss": 0.4519, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.03691019111671073, |
|
"grad_norm": 3.7761657369108907, |
|
"learning_rate": 9.632968905296039e-06, |
|
"loss": 0.4501, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.03762000248433979, |
|
"grad_norm": 3.9418116527565377, |
|
"learning_rate": 9.625869657816272e-06, |
|
"loss": 0.4561, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.03832981385196884, |
|
"grad_norm": 3.5489889583606438, |
|
"learning_rate": 9.618770410336505e-06, |
|
"loss": 0.4598, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.03903962521959789, |
|
"grad_norm": 3.5164230189602548, |
|
"learning_rate": 9.611671162856738e-06, |
|
"loss": 0.4717, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.03974943658722695, |
|
"grad_norm": 2.1822863392109206, |
|
"learning_rate": 9.604571915376971e-06, |
|
"loss": 0.48, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.040459247954856, |
|
"grad_norm": 2.5677413826305413, |
|
"learning_rate": 9.597472667897204e-06, |
|
"loss": 0.4605, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.041169059322485047, |
|
"grad_norm": 3.011759104822335, |
|
"learning_rate": 9.590373420417438e-06, |
|
"loss": 0.4605, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.0418788706901141, |
|
"grad_norm": 2.56502573080614, |
|
"learning_rate": 9.58327417293767e-06, |
|
"loss": 0.4494, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.04258868205774315, |
|
"grad_norm": 3.2396125278123806, |
|
"learning_rate": 9.576174925457902e-06, |
|
"loss": 0.4542, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.04329849342537221, |
|
"grad_norm": 3.480681910714182, |
|
"learning_rate": 9.569075677978135e-06, |
|
"loss": 0.4548, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.04400830479300126, |
|
"grad_norm": 2.623695100630613, |
|
"learning_rate": 9.561976430498368e-06, |
|
"loss": 0.4594, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.04471811616063031, |
|
"grad_norm": 3.042303011325611, |
|
"learning_rate": 9.5548771830186e-06, |
|
"loss": 0.4557, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.04542792752825937, |
|
"grad_norm": 2.8781600946277863, |
|
"learning_rate": 9.547777935538833e-06, |
|
"loss": 0.484, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.04613773889588842, |
|
"grad_norm": 3.3284195205047684, |
|
"learning_rate": 9.540678688059066e-06, |
|
"loss": 0.4481, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.04684755026351747, |
|
"grad_norm": 3.5159109068224987, |
|
"learning_rate": 9.533579440579299e-06, |
|
"loss": 0.4665, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.04755736163114652, |
|
"grad_norm": 6.322136362721481, |
|
"learning_rate": 9.526480193099532e-06, |
|
"loss": 0.4585, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.04826717299877557, |
|
"grad_norm": 21.902103769968996, |
|
"learning_rate": 9.519380945619765e-06, |
|
"loss": 0.4446, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.04897698436640463, |
|
"grad_norm": 3.6046359318609356, |
|
"learning_rate": 9.512281698139998e-06, |
|
"loss": 0.4519, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.04968679573403368, |
|
"grad_norm": 3.039690187186011, |
|
"learning_rate": 9.505182450660231e-06, |
|
"loss": 0.4448, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.05039660710166273, |
|
"grad_norm": 2.608964873836775, |
|
"learning_rate": 9.498083203180465e-06, |
|
"loss": 0.4486, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.05110641846929179, |
|
"grad_norm": 3.368889371027321, |
|
"learning_rate": 9.490983955700696e-06, |
|
"loss": 0.4617, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.05181622983692084, |
|
"grad_norm": 4.094036998235093, |
|
"learning_rate": 9.483884708220929e-06, |
|
"loss": 0.4569, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.05252604120454989, |
|
"grad_norm": 2.979892302450325, |
|
"learning_rate": 9.476785460741162e-06, |
|
"loss": 0.4645, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.05323585257217894, |
|
"grad_norm": 3.676607621277054, |
|
"learning_rate": 9.469686213261395e-06, |
|
"loss": 0.4407, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.05394566393980799, |
|
"grad_norm": 359.9140493382262, |
|
"learning_rate": 9.462586965781628e-06, |
|
"loss": 0.4262, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.05465547530743705, |
|
"grad_norm": 4.447118089247447, |
|
"learning_rate": 9.455487718301861e-06, |
|
"loss": 0.4344, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.0553652866750661, |
|
"grad_norm": 4.569754671227615, |
|
"learning_rate": 9.448388470822095e-06, |
|
"loss": 0.4462, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.05607509804269516, |
|
"grad_norm": 2.3728524211263067, |
|
"learning_rate": 9.441289223342328e-06, |
|
"loss": 0.4386, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.05678490941032421, |
|
"grad_norm": 2.5997362569615903, |
|
"learning_rate": 9.434189975862559e-06, |
|
"loss": 0.4537, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.057494720777953257, |
|
"grad_norm": 4.859327134293274, |
|
"learning_rate": 9.427090728382792e-06, |
|
"loss": 0.4514, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.05820453214558231, |
|
"grad_norm": 2.6304161060559905, |
|
"learning_rate": 9.419991480903025e-06, |
|
"loss": 0.4306, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.05891434351321136, |
|
"grad_norm": 3.504607730078166, |
|
"learning_rate": 9.412892233423258e-06, |
|
"loss": 0.454, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.05962415488084042, |
|
"grad_norm": 3.3227222733710864, |
|
"learning_rate": 9.40579298594349e-06, |
|
"loss": 0.4407, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.06033396624846947, |
|
"grad_norm": 3.328718377292454, |
|
"learning_rate": 9.398693738463723e-06, |
|
"loss": 0.4581, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.06104377761609852, |
|
"grad_norm": 3.4977954338913864, |
|
"learning_rate": 9.391594490983956e-06, |
|
"loss": 0.4284, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.06175358898372758, |
|
"grad_norm": 3.228432256709841, |
|
"learning_rate": 9.384495243504189e-06, |
|
"loss": 0.4373, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.06246340035135663, |
|
"grad_norm": 3.1586832054050964, |
|
"learning_rate": 9.377395996024422e-06, |
|
"loss": 0.4348, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.06317321171898568, |
|
"grad_norm": 13.155465477764636, |
|
"learning_rate": 9.370296748544655e-06, |
|
"loss": 0.4217, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.06388302308661473, |
|
"grad_norm": 15.543581430412525, |
|
"learning_rate": 9.363197501064888e-06, |
|
"loss": 0.4593, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.06459283445424378, |
|
"grad_norm": 8.921864061523843, |
|
"learning_rate": 9.356098253585121e-06, |
|
"loss": 0.4608, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.06530264582187284, |
|
"grad_norm": 5.3983003526617335, |
|
"learning_rate": 9.348999006105353e-06, |
|
"loss": 0.4514, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.06601245718950188, |
|
"grad_norm": 7.595139513838182, |
|
"learning_rate": 9.341899758625586e-06, |
|
"loss": 0.4273, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.06672226855713094, |
|
"grad_norm": 3.2331459925046815, |
|
"learning_rate": 9.334800511145819e-06, |
|
"loss": 0.422, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.06743207992476, |
|
"grad_norm": 3.8699272404865686, |
|
"learning_rate": 9.327701263666052e-06, |
|
"loss": 0.4477, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.06814189129238905, |
|
"grad_norm": 2.68446192265652, |
|
"learning_rate": 9.320602016186285e-06, |
|
"loss": 0.4449, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.0688517026600181, |
|
"grad_norm": 2.637260503772899, |
|
"learning_rate": 9.313502768706518e-06, |
|
"loss": 0.4532, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.06956151402764715, |
|
"grad_norm": 3.9618993923437085, |
|
"learning_rate": 9.306403521226751e-06, |
|
"loss": 0.4534, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.07027132539527621, |
|
"grad_norm": 3.429568261104227, |
|
"learning_rate": 9.299304273746985e-06, |
|
"loss": 0.452, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.07098113676290525, |
|
"grad_norm": 3.663179434126313, |
|
"learning_rate": 9.292205026267218e-06, |
|
"loss": 0.439, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07169094813053431, |
|
"grad_norm": 4.408975026773321, |
|
"learning_rate": 9.285105778787449e-06, |
|
"loss": 0.4184, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.07240075949816337, |
|
"grad_norm": 2.415108601943808, |
|
"learning_rate": 9.278006531307682e-06, |
|
"loss": 0.4342, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.07311057086579241, |
|
"grad_norm": 6.698239896408658, |
|
"learning_rate": 9.270907283827915e-06, |
|
"loss": 0.4535, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.07382038223342147, |
|
"grad_norm": 11.189940656850219, |
|
"learning_rate": 9.263808036348147e-06, |
|
"loss": 0.4192, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.07453019360105052, |
|
"grad_norm": 3.85625217339617, |
|
"learning_rate": 9.25670878886838e-06, |
|
"loss": 0.4278, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.07524000496867958, |
|
"grad_norm": 32.21212360326382, |
|
"learning_rate": 9.249609541388613e-06, |
|
"loss": 0.4509, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.07594981633630862, |
|
"grad_norm": 5.919396215012425, |
|
"learning_rate": 9.242510293908846e-06, |
|
"loss": 0.4525, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.07665962770393768, |
|
"grad_norm": 5.904196801283348, |
|
"learning_rate": 9.235411046429079e-06, |
|
"loss": 0.4422, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.07736943907156674, |
|
"grad_norm": 4.486326467883555, |
|
"learning_rate": 9.228311798949312e-06, |
|
"loss": 0.4685, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.07807925043919578, |
|
"grad_norm": 11.745437972621287, |
|
"learning_rate": 9.221212551469545e-06, |
|
"loss": 0.4646, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.07878906180682484, |
|
"grad_norm": 6.5181010077573145, |
|
"learning_rate": 9.214113303989778e-06, |
|
"loss": 0.443, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.0794988731744539, |
|
"grad_norm": 11.270983163134655, |
|
"learning_rate": 9.207014056510012e-06, |
|
"loss": 0.4605, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.08020868454208294, |
|
"grad_norm": 3.7069012881976975, |
|
"learning_rate": 9.199914809030243e-06, |
|
"loss": 0.4459, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.080918495909712, |
|
"grad_norm": 8.667969696855055, |
|
"learning_rate": 9.192815561550476e-06, |
|
"loss": 0.4556, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.08162830727734105, |
|
"grad_norm": 7.559635091166787, |
|
"learning_rate": 9.185716314070709e-06, |
|
"loss": 0.4357, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.08233811864497009, |
|
"grad_norm": 17.430750080762536, |
|
"learning_rate": 9.178617066590942e-06, |
|
"loss": 0.4301, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.08304793001259915, |
|
"grad_norm": 4.351276343100192, |
|
"learning_rate": 9.171517819111175e-06, |
|
"loss": 0.4184, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.0837577413802282, |
|
"grad_norm": 6.471581804191342, |
|
"learning_rate": 9.164418571631408e-06, |
|
"loss": 0.4516, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.08446755274785726, |
|
"grad_norm": 4.3294841586504855, |
|
"learning_rate": 9.157319324151642e-06, |
|
"loss": 0.4211, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.0851773641154863, |
|
"grad_norm": 4.385208474639979, |
|
"learning_rate": 9.150220076671875e-06, |
|
"loss": 0.4203, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.08588717548311536, |
|
"grad_norm": 5.8972560031050065, |
|
"learning_rate": 9.143120829192106e-06, |
|
"loss": 0.4284, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.08659698685074442, |
|
"grad_norm": 4.604861487503107, |
|
"learning_rate": 9.136021581712339e-06, |
|
"loss": 0.4277, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.08730679821837346, |
|
"grad_norm": 4.321101106082931, |
|
"learning_rate": 9.128922334232572e-06, |
|
"loss": 0.4216, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.08801660958600252, |
|
"grad_norm": 11.04681514927992, |
|
"learning_rate": 9.121823086752805e-06, |
|
"loss": 0.4181, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.08872642095363158, |
|
"grad_norm": 4.31849841935359, |
|
"learning_rate": 9.114723839273037e-06, |
|
"loss": 0.4264, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.08943623232126062, |
|
"grad_norm": 4.674845237449041, |
|
"learning_rate": 9.10762459179327e-06, |
|
"loss": 0.4281, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.09014604368888968, |
|
"grad_norm": 3.447760098274006, |
|
"learning_rate": 9.100525344313503e-06, |
|
"loss": 0.4304, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.09085585505651873, |
|
"grad_norm": 7.189274212443334, |
|
"learning_rate": 9.093426096833736e-06, |
|
"loss": 0.4252, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.09156566642414778, |
|
"grad_norm": 19.69024332171456, |
|
"learning_rate": 9.08632684935397e-06, |
|
"loss": 0.4336, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.09227547779177683, |
|
"grad_norm": 55.22992334000048, |
|
"learning_rate": 9.079227601874202e-06, |
|
"loss": 0.4256, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.09298528915940589, |
|
"grad_norm": 5.066816349007046, |
|
"learning_rate": 9.072128354394435e-06, |
|
"loss": 0.407, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.09369510052703495, |
|
"grad_norm": 6.37711035743208, |
|
"learning_rate": 9.065029106914668e-06, |
|
"loss": 0.4257, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.09440491189466399, |
|
"grad_norm": 4.696335985596692, |
|
"learning_rate": 9.057929859434902e-06, |
|
"loss": 0.4188, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.09511472326229305, |
|
"grad_norm": 15.82313293688476, |
|
"learning_rate": 9.050830611955133e-06, |
|
"loss": 0.433, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.0958245346299221, |
|
"grad_norm": 5.692904308794704, |
|
"learning_rate": 9.043731364475366e-06, |
|
"loss": 0.4269, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.09653434599755115, |
|
"grad_norm": 15.303387309564082, |
|
"learning_rate": 9.0366321169956e-06, |
|
"loss": 0.4174, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.0972441573651802, |
|
"grad_norm": 3.9801928029461666, |
|
"learning_rate": 9.029532869515832e-06, |
|
"loss": 0.4132, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.09795396873280926, |
|
"grad_norm": 3.523690216407914, |
|
"learning_rate": 9.022433622036065e-06, |
|
"loss": 0.4281, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.0986637801004383, |
|
"grad_norm": 7.099888052775042, |
|
"learning_rate": 9.015334374556298e-06, |
|
"loss": 0.4431, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.09937359146806736, |
|
"grad_norm": 6.538985360116972, |
|
"learning_rate": 9.008235127076532e-06, |
|
"loss": 0.4172, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.10008340283569642, |
|
"grad_norm": 7.959800060910741, |
|
"learning_rate": 9.001135879596763e-06, |
|
"loss": 0.4243, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.10079321420332546, |
|
"grad_norm": 8.790445771142394, |
|
"learning_rate": 8.994036632116996e-06, |
|
"loss": 0.4254, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.10150302557095452, |
|
"grad_norm": 4.285966498899181, |
|
"learning_rate": 8.98693738463723e-06, |
|
"loss": 0.4122, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.10221283693858357, |
|
"grad_norm": 6.286806035291326, |
|
"learning_rate": 8.979838137157462e-06, |
|
"loss": 0.433, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.10292264830621263, |
|
"grad_norm": 7.3066834855049345, |
|
"learning_rate": 8.972738889677695e-06, |
|
"loss": 0.4258, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.10363245967384167, |
|
"grad_norm": 6.5695520214785565, |
|
"learning_rate": 8.965639642197927e-06, |
|
"loss": 0.4164, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.10434227104147073, |
|
"grad_norm": 20.93641513291179, |
|
"learning_rate": 8.95854039471816e-06, |
|
"loss": 0.4095, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.10505208240909979, |
|
"grad_norm": 5.657042957398901, |
|
"learning_rate": 8.951441147238393e-06, |
|
"loss": 0.4168, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.10576189377672883, |
|
"grad_norm": 6.076726326140851, |
|
"learning_rate": 8.944341899758626e-06, |
|
"loss": 0.4112, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.10647170514435789, |
|
"grad_norm": 5.092565408624009, |
|
"learning_rate": 8.93724265227886e-06, |
|
"loss": 0.4269, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.10718151651198694, |
|
"grad_norm": 2.894012289515038, |
|
"learning_rate": 8.930143404799092e-06, |
|
"loss": 0.4239, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.10789132787961599, |
|
"grad_norm": 3.7173915295575637, |
|
"learning_rate": 8.923044157319325e-06, |
|
"loss": 0.4288, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.10860113924724504, |
|
"grad_norm": 3.025402596869208, |
|
"learning_rate": 8.915944909839559e-06, |
|
"loss": 0.4421, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.1093109506148741, |
|
"grad_norm": 8.212502187483185, |
|
"learning_rate": 8.90884566235979e-06, |
|
"loss": 0.4241, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.11002076198250314, |
|
"grad_norm": 5.773771344339805, |
|
"learning_rate": 8.901746414880023e-06, |
|
"loss": 0.4355, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.1107305733501322, |
|
"grad_norm": 4.158426885786249, |
|
"learning_rate": 8.894647167400256e-06, |
|
"loss": 0.436, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.11144038471776126, |
|
"grad_norm": 6.56740526603354, |
|
"learning_rate": 8.88754791992049e-06, |
|
"loss": 0.4397, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.11215019608539031, |
|
"grad_norm": 8.263663970839248, |
|
"learning_rate": 8.880448672440722e-06, |
|
"loss": 0.4201, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.11286000745301936, |
|
"grad_norm": 2.424368072981463, |
|
"learning_rate": 8.873349424960955e-06, |
|
"loss": 0.4235, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.11356981882064841, |
|
"grad_norm": 6.489454078474153, |
|
"learning_rate": 8.866250177481189e-06, |
|
"loss": 0.4243, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.11427963018827747, |
|
"grad_norm": 3.541006640864803, |
|
"learning_rate": 8.859150930001422e-06, |
|
"loss": 0.4313, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.11498944155590651, |
|
"grad_norm": 12.323605643567065, |
|
"learning_rate": 8.852051682521653e-06, |
|
"loss": 0.4253, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.11569925292353557, |
|
"grad_norm": 4.600225981753095, |
|
"learning_rate": 8.844952435041886e-06, |
|
"loss": 0.42, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.11640906429116463, |
|
"grad_norm": 8.589796661850784, |
|
"learning_rate": 8.83785318756212e-06, |
|
"loss": 0.4219, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.11711887565879367, |
|
"grad_norm": 10.182911442610934, |
|
"learning_rate": 8.830753940082352e-06, |
|
"loss": 0.4285, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.11782868702642273, |
|
"grad_norm": 5.186284643440543, |
|
"learning_rate": 8.823654692602584e-06, |
|
"loss": 0.4139, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.11853849839405178, |
|
"grad_norm": 5.23154203196852, |
|
"learning_rate": 8.816555445122817e-06, |
|
"loss": 0.4251, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.11924830976168084, |
|
"grad_norm": 6.9839536559537505, |
|
"learning_rate": 8.80945619764305e-06, |
|
"loss": 0.4233, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.11995812112930988, |
|
"grad_norm": 6.376179671333375, |
|
"learning_rate": 8.802356950163283e-06, |
|
"loss": 0.4089, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.12066793249693894, |
|
"grad_norm": 3.824113092644885, |
|
"learning_rate": 8.795257702683516e-06, |
|
"loss": 0.4347, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.121377743864568, |
|
"grad_norm": 11.282936555631686, |
|
"learning_rate": 8.78815845520375e-06, |
|
"loss": 0.423, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.12208755523219704, |
|
"grad_norm": 4.218268240264897, |
|
"learning_rate": 8.781059207723982e-06, |
|
"loss": 0.4188, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.1227973665998261, |
|
"grad_norm": 3.943582749857493, |
|
"learning_rate": 8.773959960244215e-06, |
|
"loss": 0.4276, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.12350717796745515, |
|
"grad_norm": 9.679933576473074, |
|
"learning_rate": 8.766860712764449e-06, |
|
"loss": 0.42, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.1242169893350842, |
|
"grad_norm": 15.414309701859608, |
|
"learning_rate": 8.75976146528468e-06, |
|
"loss": 0.4316, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.12492680070271325, |
|
"grad_norm": 9.429737278511919, |
|
"learning_rate": 8.752662217804913e-06, |
|
"loss": 0.422, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.1256366120703423, |
|
"grad_norm": 23.10494354556988, |
|
"learning_rate": 8.745562970325146e-06, |
|
"loss": 0.4276, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.12634642343797137, |
|
"grad_norm": 13.541923724604345, |
|
"learning_rate": 8.73846372284538e-06, |
|
"loss": 0.4271, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.1270562348056004, |
|
"grad_norm": 2.846694152973873, |
|
"learning_rate": 8.731364475365612e-06, |
|
"loss": 0.4151, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.12776604617322945, |
|
"grad_norm": 6.934597145753292, |
|
"learning_rate": 8.724265227885845e-06, |
|
"loss": 0.4247, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.12847585754085852, |
|
"grad_norm": 3.435112347451886, |
|
"learning_rate": 8.717165980406079e-06, |
|
"loss": 0.4225, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.12918566890848757, |
|
"grad_norm": 3.4829699382867823, |
|
"learning_rate": 8.71006673292631e-06, |
|
"loss": 0.4458, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.1298954802761166, |
|
"grad_norm": 5.077072978235785, |
|
"learning_rate": 8.702967485446543e-06, |
|
"loss": 0.4283, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.13060529164374568, |
|
"grad_norm": 5.917300462616358, |
|
"learning_rate": 8.695868237966776e-06, |
|
"loss": 0.4119, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.13131510301137472, |
|
"grad_norm": 10.693397543481625, |
|
"learning_rate": 8.68876899048701e-06, |
|
"loss": 0.4305, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.13202491437900377, |
|
"grad_norm": 3.6456780546239456, |
|
"learning_rate": 8.681669743007242e-06, |
|
"loss": 0.4391, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.13273472574663284, |
|
"grad_norm": 14.68038430401678, |
|
"learning_rate": 8.674570495527474e-06, |
|
"loss": 0.4111, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.13344453711426188, |
|
"grad_norm": 5.101838800313352, |
|
"learning_rate": 8.667471248047707e-06, |
|
"loss": 0.4323, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.13415434848189095, |
|
"grad_norm": 4.497686869632987, |
|
"learning_rate": 8.66037200056794e-06, |
|
"loss": 0.4154, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.13486415984952, |
|
"grad_norm": 9.511227824879294, |
|
"learning_rate": 8.653272753088173e-06, |
|
"loss": 0.4295, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.13557397121714904, |
|
"grad_norm": 5.344003791146658, |
|
"learning_rate": 8.646173505608406e-06, |
|
"loss": 0.4254, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.1362837825847781, |
|
"grad_norm": 8.10132953922794, |
|
"learning_rate": 8.63907425812864e-06, |
|
"loss": 0.4219, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.13699359395240715, |
|
"grad_norm": 8.840386508572838, |
|
"learning_rate": 8.631975010648872e-06, |
|
"loss": 0.416, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.1377034053200362, |
|
"grad_norm": 5.639143297883941, |
|
"learning_rate": 8.624875763169106e-06, |
|
"loss": 0.4246, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.13841321668766526, |
|
"grad_norm": 5.375177742256173, |
|
"learning_rate": 8.617776515689339e-06, |
|
"loss": 0.4263, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.1391230280552943, |
|
"grad_norm": 13.872628674699765, |
|
"learning_rate": 8.61067726820957e-06, |
|
"loss": 0.4368, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.13983283942292335, |
|
"grad_norm": 6.612051924514802, |
|
"learning_rate": 8.603578020729803e-06, |
|
"loss": 0.4235, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.14054265079055242, |
|
"grad_norm": 7.420592038738273, |
|
"learning_rate": 8.596478773250036e-06, |
|
"loss": 0.4315, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.14125246215818146, |
|
"grad_norm": 3.883491154973528, |
|
"learning_rate": 8.58937952577027e-06, |
|
"loss": 0.4394, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.1419622735258105, |
|
"grad_norm": 4.031594828995353, |
|
"learning_rate": 8.582280278290502e-06, |
|
"loss": 0.4274, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.14267208489343958, |
|
"grad_norm": 6.272786134188022, |
|
"learning_rate": 8.575181030810736e-06, |
|
"loss": 0.42, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.14338189626106862, |
|
"grad_norm": 8.45570312290703, |
|
"learning_rate": 8.568081783330967e-06, |
|
"loss": 0.4336, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.14409170762869766, |
|
"grad_norm": 3.8497660341027693, |
|
"learning_rate": 8.5609825358512e-06, |
|
"loss": 0.4259, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.14480151899632673, |
|
"grad_norm": 10.12069309920438, |
|
"learning_rate": 8.553883288371433e-06, |
|
"loss": 0.4208, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.14551133036395578, |
|
"grad_norm": 5.128975578462212, |
|
"learning_rate": 8.546784040891666e-06, |
|
"loss": 0.4215, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.14622114173158482, |
|
"grad_norm": 4.45602583843403, |
|
"learning_rate": 8.5396847934119e-06, |
|
"loss": 0.4135, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.1469309530992139, |
|
"grad_norm": 5.172069700283945, |
|
"learning_rate": 8.53258554593213e-06, |
|
"loss": 0.4122, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.14764076446684293, |
|
"grad_norm": 7.147216717746435, |
|
"learning_rate": 8.525486298452364e-06, |
|
"loss": 0.4423, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.14835057583447198, |
|
"grad_norm": 14.946527022046613, |
|
"learning_rate": 8.518387050972597e-06, |
|
"loss": 0.4094, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.14906038720210105, |
|
"grad_norm": 8.460267496546166, |
|
"learning_rate": 8.51128780349283e-06, |
|
"loss": 0.4186, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.1497701985697301, |
|
"grad_norm": 8.93023218882671, |
|
"learning_rate": 8.504188556013063e-06, |
|
"loss": 0.4062, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.15048000993735916, |
|
"grad_norm": 3.213343020811049, |
|
"learning_rate": 8.497089308533296e-06, |
|
"loss": 0.3994, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.1511898213049882, |
|
"grad_norm": 8.718801113577726, |
|
"learning_rate": 8.48999006105353e-06, |
|
"loss": 0.4232, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.15189963267261725, |
|
"grad_norm": 2.832643819770658, |
|
"learning_rate": 8.482890813573762e-06, |
|
"loss": 0.4261, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.15260944404024632, |
|
"grad_norm": 3.2673324405839255, |
|
"learning_rate": 8.475791566093996e-06, |
|
"loss": 0.42, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.15331925540787536, |
|
"grad_norm": 3.2621489770969214, |
|
"learning_rate": 8.468692318614227e-06, |
|
"loss": 0.4282, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.1540290667755044, |
|
"grad_norm": 17.34420036770468, |
|
"learning_rate": 8.46159307113446e-06, |
|
"loss": 0.4198, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.15473887814313347, |
|
"grad_norm": 3.6148665582762094, |
|
"learning_rate": 8.454493823654693e-06, |
|
"loss": 0.4157, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.15544868951076252, |
|
"grad_norm": 2.775836768166624, |
|
"learning_rate": 8.447394576174926e-06, |
|
"loss": 0.417, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.15615850087839156, |
|
"grad_norm": 5.052761832862739, |
|
"learning_rate": 8.44029532869516e-06, |
|
"loss": 0.4035, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.15686831224602063, |
|
"grad_norm": 4.778779661514333, |
|
"learning_rate": 8.433196081215393e-06, |
|
"loss": 0.4445, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.15757812361364967, |
|
"grad_norm": 4.6274782338902325, |
|
"learning_rate": 8.426096833735626e-06, |
|
"loss": 0.4147, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.15828793498127872, |
|
"grad_norm": 4.310225523508245, |
|
"learning_rate": 8.418997586255857e-06, |
|
"loss": 0.4167, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.1589977463489078, |
|
"grad_norm": 4.802519845626961, |
|
"learning_rate": 8.41189833877609e-06, |
|
"loss": 0.4052, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.15970755771653683, |
|
"grad_norm": 3.949892413625005, |
|
"learning_rate": 8.404799091296323e-06, |
|
"loss": 0.4263, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.16041736908416587, |
|
"grad_norm": 5.685661053410237, |
|
"learning_rate": 8.397699843816556e-06, |
|
"loss": 0.4148, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.16112718045179494, |
|
"grad_norm": 4.337480471983148, |
|
"learning_rate": 8.39060059633679e-06, |
|
"loss": 0.4101, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.161836991819424, |
|
"grad_norm": 4.809277499740254, |
|
"learning_rate": 8.38350134885702e-06, |
|
"loss": 0.4071, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.16254680318705303, |
|
"grad_norm": 7.364507480899371, |
|
"learning_rate": 8.376402101377254e-06, |
|
"loss": 0.4021, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.1632566145546821, |
|
"grad_norm": 5.408145626972555, |
|
"learning_rate": 8.369302853897487e-06, |
|
"loss": 0.4154, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.16396642592231114, |
|
"grad_norm": 2.9449217220121784, |
|
"learning_rate": 8.36220360641772e-06, |
|
"loss": 0.4296, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.16467623728994019, |
|
"grad_norm": 3.843647555602573, |
|
"learning_rate": 8.355104358937953e-06, |
|
"loss": 0.4197, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.16538604865756926, |
|
"grad_norm": 5.843629733774891, |
|
"learning_rate": 8.348005111458186e-06, |
|
"loss": 0.4052, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.1660958600251983, |
|
"grad_norm": 4.182196885965926, |
|
"learning_rate": 8.34090586397842e-06, |
|
"loss": 0.4304, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.16680567139282734, |
|
"grad_norm": 12.343897765958163, |
|
"learning_rate": 8.333806616498653e-06, |
|
"loss": 0.4057, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.1675154827604564, |
|
"grad_norm": 4.52770872028285, |
|
"learning_rate": 8.326707369018886e-06, |
|
"loss": 0.4234, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.16822529412808546, |
|
"grad_norm": 5.473115632671873, |
|
"learning_rate": 8.319608121539117e-06, |
|
"loss": 0.4127, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.16893510549571453, |
|
"grad_norm": 5.243162829393595, |
|
"learning_rate": 8.31250887405935e-06, |
|
"loss": 0.4148, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.16964491686334357, |
|
"grad_norm": 9.638919529909746, |
|
"learning_rate": 8.305409626579583e-06, |
|
"loss": 0.4244, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.1703547282309726, |
|
"grad_norm": 5.824204497516263, |
|
"learning_rate": 8.298310379099816e-06, |
|
"loss": 0.3991, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.17106453959860168, |
|
"grad_norm": 8.92013550945478, |
|
"learning_rate": 8.29121113162005e-06, |
|
"loss": 0.4107, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.17177435096623073, |
|
"grad_norm": 4.310339052965044, |
|
"learning_rate": 8.284111884140283e-06, |
|
"loss": 0.4198, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.17248416233385977, |
|
"grad_norm": 3.674140188675587, |
|
"learning_rate": 8.277012636660514e-06, |
|
"loss": 0.4066, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.17319397370148884, |
|
"grad_norm": 3.2816580938205986, |
|
"learning_rate": 8.269913389180747e-06, |
|
"loss": 0.3948, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.17390378506911788, |
|
"grad_norm": 3.119520711268051, |
|
"learning_rate": 8.26281414170098e-06, |
|
"loss": 0.4236, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.17461359643674693, |
|
"grad_norm": 3.9529990200341216, |
|
"learning_rate": 8.255714894221213e-06, |
|
"loss": 0.4028, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.175323407804376, |
|
"grad_norm": 6.5624619571577, |
|
"learning_rate": 8.248615646741446e-06, |
|
"loss": 0.4207, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.17603321917200504, |
|
"grad_norm": 6.563862400109423, |
|
"learning_rate": 8.24151639926168e-06, |
|
"loss": 0.4234, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.17674303053963408, |
|
"grad_norm": 4.124646423199101, |
|
"learning_rate": 8.234417151781911e-06, |
|
"loss": 0.421, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.17745284190726315, |
|
"grad_norm": 8.460797246337737, |
|
"learning_rate": 8.227317904302144e-06, |
|
"loss": 0.4169, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.1781626532748922, |
|
"grad_norm": 4.636207121737827, |
|
"learning_rate": 8.220218656822377e-06, |
|
"loss": 0.4154, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.17887246464252124, |
|
"grad_norm": 15.193279765427832, |
|
"learning_rate": 8.21311940934261e-06, |
|
"loss": 0.4, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.1795822760101503, |
|
"grad_norm": 8.394690912531237, |
|
"learning_rate": 8.206020161862843e-06, |
|
"loss": 0.3994, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.18029208737777935, |
|
"grad_norm": 11.829872869588135, |
|
"learning_rate": 8.198920914383076e-06, |
|
"loss": 0.4045, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.1810018987454084, |
|
"grad_norm": 10.598164946336963, |
|
"learning_rate": 8.19182166690331e-06, |
|
"loss": 0.4167, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.18171171011303747, |
|
"grad_norm": 8.644167493937724, |
|
"learning_rate": 8.184722419423543e-06, |
|
"loss": 0.4193, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.1824215214806665, |
|
"grad_norm": 5.532113862418252, |
|
"learning_rate": 8.177623171943776e-06, |
|
"loss": 0.4134, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.18313133284829555, |
|
"grad_norm": 8.962347784457894, |
|
"learning_rate": 8.170523924464007e-06, |
|
"loss": 0.4231, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.18384114421592462, |
|
"grad_norm": 4.789480578365759, |
|
"learning_rate": 8.16342467698424e-06, |
|
"loss": 0.4056, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.18455095558355367, |
|
"grad_norm": 7.463666547462272, |
|
"learning_rate": 8.156325429504473e-06, |
|
"loss": 0.4082, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.1852607669511827, |
|
"grad_norm": 3.543632295285487, |
|
"learning_rate": 8.149226182024706e-06, |
|
"loss": 0.3957, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.18597057831881178, |
|
"grad_norm": 10.128862482609126, |
|
"learning_rate": 8.14212693454494e-06, |
|
"loss": 0.4104, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.18668038968644082, |
|
"grad_norm": 2.279815139257822, |
|
"learning_rate": 8.135027687065171e-06, |
|
"loss": 0.4023, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.1873902010540699, |
|
"grad_norm": 5.651432220535337, |
|
"learning_rate": 8.127928439585404e-06, |
|
"loss": 0.4174, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.18810001242169894, |
|
"grad_norm": 2.764126752423827, |
|
"learning_rate": 8.120829192105637e-06, |
|
"loss": 0.4316, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.18880982378932798, |
|
"grad_norm": 2.2008942019632443, |
|
"learning_rate": 8.11372994462587e-06, |
|
"loss": 0.3998, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.18951963515695705, |
|
"grad_norm": 2.6464894767494194, |
|
"learning_rate": 8.106630697146103e-06, |
|
"loss": 0.4152, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.1902294465245861, |
|
"grad_norm": 2.9891233500309697, |
|
"learning_rate": 8.099531449666336e-06, |
|
"loss": 0.4065, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.19093925789221514, |
|
"grad_norm": 3.2947192783933303, |
|
"learning_rate": 8.092432202186568e-06, |
|
"loss": 0.4096, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.1916490692598442, |
|
"grad_norm": 2.6266501022263093, |
|
"learning_rate": 8.085332954706801e-06, |
|
"loss": 0.4079, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.19235888062747325, |
|
"grad_norm": 2.0600161188196258, |
|
"learning_rate": 8.078233707227034e-06, |
|
"loss": 0.4245, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.1930686919951023, |
|
"grad_norm": 3.4259686474049587, |
|
"learning_rate": 8.071134459747267e-06, |
|
"loss": 0.4168, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.19377850336273136, |
|
"grad_norm": 4.184352662206747, |
|
"learning_rate": 8.0640352122675e-06, |
|
"loss": 0.4265, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.1944883147303604, |
|
"grad_norm": 3.7320888080359174, |
|
"learning_rate": 8.056935964787733e-06, |
|
"loss": 0.4172, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.19519812609798945, |
|
"grad_norm": 3.750448672171502, |
|
"learning_rate": 8.049836717307966e-06, |
|
"loss": 0.4327, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.19590793746561852, |
|
"grad_norm": 3.0158382271152564, |
|
"learning_rate": 8.0427374698282e-06, |
|
"loss": 0.4284, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.19661774883324756, |
|
"grad_norm": 2.438159262347708, |
|
"learning_rate": 8.035638222348433e-06, |
|
"loss": 0.4117, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.1973275602008766, |
|
"grad_norm": 4.795802800628808, |
|
"learning_rate": 8.028538974868664e-06, |
|
"loss": 0.4207, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.19803737156850568, |
|
"grad_norm": 2.5291141301554405, |
|
"learning_rate": 8.021439727388897e-06, |
|
"loss": 0.4146, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.19874718293613472, |
|
"grad_norm": 2.4740979454164727, |
|
"learning_rate": 8.01434047990913e-06, |
|
"loss": 0.3999, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.19945699430376376, |
|
"grad_norm": 3.4467777684569927, |
|
"learning_rate": 8.007241232429363e-06, |
|
"loss": 0.4151, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.20016680567139283, |
|
"grad_norm": 2.741445348023422, |
|
"learning_rate": 8.000141984949596e-06, |
|
"loss": 0.4165, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.20087661703902188, |
|
"grad_norm": 2.977547725757033, |
|
"learning_rate": 7.99304273746983e-06, |
|
"loss": 0.4137, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.20158642840665092, |
|
"grad_norm": 3.493123708582949, |
|
"learning_rate": 7.985943489990061e-06, |
|
"loss": 0.4095, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.20229623977428, |
|
"grad_norm": 9.43644672917822, |
|
"learning_rate": 7.978844242510294e-06, |
|
"loss": 0.4066, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.20300605114190903, |
|
"grad_norm": 4.050870492633986, |
|
"learning_rate": 7.971744995030527e-06, |
|
"loss": 0.4079, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.2037158625095381, |
|
"grad_norm": 7.830134940271083, |
|
"learning_rate": 7.96464574755076e-06, |
|
"loss": 0.3896, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.20442567387716715, |
|
"grad_norm": 7.557535176254197, |
|
"learning_rate": 7.957546500070993e-06, |
|
"loss": 0.4096, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.2051354852447962, |
|
"grad_norm": 4.715465621080843, |
|
"learning_rate": 7.950447252591226e-06, |
|
"loss": 0.3907, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.20584529661242526, |
|
"grad_norm": 30.299863630729803, |
|
"learning_rate": 7.943348005111458e-06, |
|
"loss": 0.4142, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.2065551079800543, |
|
"grad_norm": 13.362349279952854, |
|
"learning_rate": 7.936248757631691e-06, |
|
"loss": 0.4211, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.20726491934768335, |
|
"grad_norm": 7.166470527615742, |
|
"learning_rate": 7.929149510151924e-06, |
|
"loss": 0.4038, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.20797473071531242, |
|
"grad_norm": 218.37559359733393, |
|
"learning_rate": 7.922050262672157e-06, |
|
"loss": 0.3814, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.20868454208294146, |
|
"grad_norm": 4.776318350142146, |
|
"learning_rate": 7.91495101519239e-06, |
|
"loss": 0.4033, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.2093943534505705, |
|
"grad_norm": 6.050705359465637, |
|
"learning_rate": 7.907851767712623e-06, |
|
"loss": 0.4006, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.21010416481819957, |
|
"grad_norm": 7.0609749250244125, |
|
"learning_rate": 7.900752520232857e-06, |
|
"loss": 0.3996, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.21081397618582862, |
|
"grad_norm": 5.2294105499183985, |
|
"learning_rate": 7.89365327275309e-06, |
|
"loss": 0.3906, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.21152378755345766, |
|
"grad_norm": 5.037453517661707, |
|
"learning_rate": 7.886554025273323e-06, |
|
"loss": 0.3925, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.21223359892108673, |
|
"grad_norm": 4.329367488091813, |
|
"learning_rate": 7.879454777793554e-06, |
|
"loss": 0.4005, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.21294341028871577, |
|
"grad_norm": 4.587934783884384, |
|
"learning_rate": 7.872355530313787e-06, |
|
"loss": 0.3949, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.21365322165634482, |
|
"grad_norm": 4.34538375508175, |
|
"learning_rate": 7.86525628283402e-06, |
|
"loss": 0.3963, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.2143630330239739, |
|
"grad_norm": 14.538466945533717, |
|
"learning_rate": 7.858157035354253e-06, |
|
"loss": 0.4145, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.21507284439160293, |
|
"grad_norm": 5.725604081866674, |
|
"learning_rate": 7.851057787874487e-06, |
|
"loss": 0.397, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.21578265575923197, |
|
"grad_norm": 4.100595238075657, |
|
"learning_rate": 7.843958540394718e-06, |
|
"loss": 0.407, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.21649246712686104, |
|
"grad_norm": 3.6102459737641452, |
|
"learning_rate": 7.836859292914951e-06, |
|
"loss": 0.3941, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.2172022784944901, |
|
"grad_norm": 9.48884086833176, |
|
"learning_rate": 7.829760045435184e-06, |
|
"loss": 0.3981, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.21791208986211913, |
|
"grad_norm": 5.265598040684193, |
|
"learning_rate": 7.822660797955417e-06, |
|
"loss": 0.3865, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.2186219012297482, |
|
"grad_norm": 5.853395704700518, |
|
"learning_rate": 7.81556155047565e-06, |
|
"loss": 0.4089, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.21933171259737724, |
|
"grad_norm": 2.867041909768411, |
|
"learning_rate": 7.808462302995883e-06, |
|
"loss": 0.411, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.22004152396500629, |
|
"grad_norm": 6.447556295363806, |
|
"learning_rate": 7.801363055516117e-06, |
|
"loss": 0.4054, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.22075133533263536, |
|
"grad_norm": 6.665403407542621, |
|
"learning_rate": 7.794263808036348e-06, |
|
"loss": 0.4331, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.2214611467002644, |
|
"grad_norm": 3.740543632288075, |
|
"learning_rate": 7.787164560556581e-06, |
|
"loss": 0.4132, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.22217095806789347, |
|
"grad_norm": 19.12212944661018, |
|
"learning_rate": 7.780065313076814e-06, |
|
"loss": 0.4229, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.2228807694355225, |
|
"grad_norm": 5.646216224084272, |
|
"learning_rate": 7.772966065597047e-06, |
|
"loss": 0.4123, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.22359058080315156, |
|
"grad_norm": 12.549975615460761, |
|
"learning_rate": 7.76586681811728e-06, |
|
"loss": 0.4156, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.22430039217078063, |
|
"grad_norm": 5.34509934381609, |
|
"learning_rate": 7.758767570637513e-06, |
|
"loss": 0.3935, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.22501020353840967, |
|
"grad_norm": 4.868356423660982, |
|
"learning_rate": 7.751668323157747e-06, |
|
"loss": 0.4121, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.2257200149060387, |
|
"grad_norm": 3.604594374317723, |
|
"learning_rate": 7.74456907567798e-06, |
|
"loss": 0.3949, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.22642982627366778, |
|
"grad_norm": 2.6762060130385565, |
|
"learning_rate": 7.737469828198211e-06, |
|
"loss": 0.4192, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.22713963764129683, |
|
"grad_norm": 3.7277037964888957, |
|
"learning_rate": 7.730370580718444e-06, |
|
"loss": 0.4063, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.22784944900892587, |
|
"grad_norm": 4.2017308560808395, |
|
"learning_rate": 7.723271333238677e-06, |
|
"loss": 0.3983, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.22855926037655494, |
|
"grad_norm": 6.82717398390433, |
|
"learning_rate": 7.71617208575891e-06, |
|
"loss": 0.4003, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.22926907174418398, |
|
"grad_norm": 3.3720424392184865, |
|
"learning_rate": 7.709072838279143e-06, |
|
"loss": 0.384, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.22997888311181303, |
|
"grad_norm": 15.234041629621501, |
|
"learning_rate": 7.701973590799375e-06, |
|
"loss": 0.3936, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.2306886944794421, |
|
"grad_norm": 6.450291645106787, |
|
"learning_rate": 7.694874343319608e-06, |
|
"loss": 0.4153, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.23139850584707114, |
|
"grad_norm": 5.0596647748479056, |
|
"learning_rate": 7.687775095839841e-06, |
|
"loss": 0.4098, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.23210831721470018, |
|
"grad_norm": 6.351369993733097, |
|
"learning_rate": 7.680675848360074e-06, |
|
"loss": 0.4036, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.23281812858232925, |
|
"grad_norm": 7.706709044787595, |
|
"learning_rate": 7.673576600880307e-06, |
|
"loss": 0.4137, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.2335279399499583, |
|
"grad_norm": 6.111103199878706, |
|
"learning_rate": 7.66647735340054e-06, |
|
"loss": 0.4163, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.23423775131758734, |
|
"grad_norm": 3.182362422678598, |
|
"learning_rate": 7.659378105920773e-06, |
|
"loss": 0.4007, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.2349475626852164, |
|
"grad_norm": 3.929827344563346, |
|
"learning_rate": 7.652278858441005e-06, |
|
"loss": 0.4011, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.23565737405284545, |
|
"grad_norm": 6.606808853169358, |
|
"learning_rate": 7.645179610961238e-06, |
|
"loss": 0.4113, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.2363671854204745, |
|
"grad_norm": 7.983975561443669, |
|
"learning_rate": 7.638080363481471e-06, |
|
"loss": 0.3941, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.23707699678810357, |
|
"grad_norm": 2.551810232754013, |
|
"learning_rate": 7.630981116001704e-06, |
|
"loss": 0.3987, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.2377868081557326, |
|
"grad_norm": 16.325804366695763, |
|
"learning_rate": 7.623881868521937e-06, |
|
"loss": 0.3814, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.23849661952336168, |
|
"grad_norm": 17.86582631307272, |
|
"learning_rate": 7.61678262104217e-06, |
|
"loss": 0.4065, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.23920643089099072, |
|
"grad_norm": 4.439905284094514, |
|
"learning_rate": 7.6096833735624035e-06, |
|
"loss": 0.4079, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.23991624225861977, |
|
"grad_norm": 13.632710588001641, |
|
"learning_rate": 7.602584126082636e-06, |
|
"loss": 0.4075, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.24062605362624884, |
|
"grad_norm": 7.4557485788963405, |
|
"learning_rate": 7.595484878602869e-06, |
|
"loss": 0.399, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.24133586499387788, |
|
"grad_norm": 6.032057911933067, |
|
"learning_rate": 7.588385631123102e-06, |
|
"loss": 0.3892, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.24204567636150692, |
|
"grad_norm": 5.1424876309924, |
|
"learning_rate": 7.581286383643335e-06, |
|
"loss": 0.396, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.242755487729136, |
|
"grad_norm": 3.6691932120100987, |
|
"learning_rate": 7.574187136163567e-06, |
|
"loss": 0.4108, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.24346529909676504, |
|
"grad_norm": 2.8083232656002033, |
|
"learning_rate": 7.5670878886838004e-06, |
|
"loss": 0.3984, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.24417511046439408, |
|
"grad_norm": 13.589049355107566, |
|
"learning_rate": 7.5599886412040335e-06, |
|
"loss": 0.3957, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.24488492183202315, |
|
"grad_norm": 6.813624263530042, |
|
"learning_rate": 7.552889393724265e-06, |
|
"loss": 0.4105, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.2455947331996522, |
|
"grad_norm": 13.609829369379536, |
|
"learning_rate": 7.545790146244498e-06, |
|
"loss": 0.4175, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.24630454456728124, |
|
"grad_norm": 5.1258006881261915, |
|
"learning_rate": 7.538690898764731e-06, |
|
"loss": 0.3966, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.2470143559349103, |
|
"grad_norm": 40.31962236147607, |
|
"learning_rate": 7.531591651284964e-06, |
|
"loss": 0.3839, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.24772416730253935, |
|
"grad_norm": 6.537768993909155, |
|
"learning_rate": 7.524492403805197e-06, |
|
"loss": 0.4122, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.2484339786701684, |
|
"grad_norm": 17.652356012021233, |
|
"learning_rate": 7.51739315632543e-06, |
|
"loss": 0.3948, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.24914379003779746, |
|
"grad_norm": 3.85528406182526, |
|
"learning_rate": 7.510293908845663e-06, |
|
"loss": 0.3938, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.2498536014054265, |
|
"grad_norm": 125.62304184951121, |
|
"learning_rate": 7.503194661365896e-06, |
|
"loss": 0.389, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.25056341277305555, |
|
"grad_norm": 8.558355724038593, |
|
"learning_rate": 7.496095413886129e-06, |
|
"loss": 0.3787, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.2512732241406846, |
|
"grad_norm": 4.216427070872869, |
|
"learning_rate": 7.488996166406361e-06, |
|
"loss": 0.3835, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.2519830355083137, |
|
"grad_norm": 4.314131483032103, |
|
"learning_rate": 7.481896918926594e-06, |
|
"loss": 0.3946, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.25269284687594273, |
|
"grad_norm": 4.159823786853909, |
|
"learning_rate": 7.474797671446827e-06, |
|
"loss": 0.3972, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.2534026582435718, |
|
"grad_norm": 3.4947296702394586, |
|
"learning_rate": 7.4676984239670605e-06, |
|
"loss": 0.4165, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.2541124696112008, |
|
"grad_norm": 4.022241190948728, |
|
"learning_rate": 7.4605991764872936e-06, |
|
"loss": 0.3988, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.25482228097882986, |
|
"grad_norm": 3.4849637281174006, |
|
"learning_rate": 7.453499929007526e-06, |
|
"loss": 0.4106, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.2555320923464589, |
|
"grad_norm": 5.338306458076586, |
|
"learning_rate": 7.446400681527759e-06, |
|
"loss": 0.4082, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.256241903714088, |
|
"grad_norm": 4.970005106695202, |
|
"learning_rate": 7.439301434047992e-06, |
|
"loss": 0.3914, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.25695171508171705, |
|
"grad_norm": 6.355373029038747, |
|
"learning_rate": 7.432202186568225e-06, |
|
"loss": 0.3989, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.2576615264493461, |
|
"grad_norm": 5.996742366501121, |
|
"learning_rate": 7.425102939088457e-06, |
|
"loss": 0.3999, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.25837133781697513, |
|
"grad_norm": 6.966686936423967, |
|
"learning_rate": 7.4180036916086905e-06, |
|
"loss": 0.3831, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.2590811491846042, |
|
"grad_norm": 4.185121399245409, |
|
"learning_rate": 7.410904444128923e-06, |
|
"loss": 0.408, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.2597909605522332, |
|
"grad_norm": 2.2056616209460866, |
|
"learning_rate": 7.403805196649155e-06, |
|
"loss": 0.3931, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.2605007719198623, |
|
"grad_norm": 4.176248780095696, |
|
"learning_rate": 7.396705949169388e-06, |
|
"loss": 0.409, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.26121058328749136, |
|
"grad_norm": 2.47926985794175, |
|
"learning_rate": 7.389606701689621e-06, |
|
"loss": 0.4091, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.2619203946551204, |
|
"grad_norm": 3.02240842448802, |
|
"learning_rate": 7.382507454209854e-06, |
|
"loss": 0.4102, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.26263020602274945, |
|
"grad_norm": 2.0291710541228816, |
|
"learning_rate": 7.3754082067300866e-06, |
|
"loss": 0.382, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.2633400173903785, |
|
"grad_norm": 2.1912303159611084, |
|
"learning_rate": 7.36830895925032e-06, |
|
"loss": 0.3974, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.26404982875800753, |
|
"grad_norm": 2.964541482780821, |
|
"learning_rate": 7.361209711770553e-06, |
|
"loss": 0.4096, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.26475964012563663, |
|
"grad_norm": 5.810099164313448, |
|
"learning_rate": 7.354110464290786e-06, |
|
"loss": 0.4092, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.2654694514932657, |
|
"grad_norm": 4.879409457746285, |
|
"learning_rate": 7.347011216811019e-06, |
|
"loss": 0.4034, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.2661792628608947, |
|
"grad_norm": 2.761287928392515, |
|
"learning_rate": 7.339911969331251e-06, |
|
"loss": 0.3971, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.26688907422852376, |
|
"grad_norm": 14.80879239487425, |
|
"learning_rate": 7.332812721851484e-06, |
|
"loss": 0.4203, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.2675988855961528, |
|
"grad_norm": 2.589550559546521, |
|
"learning_rate": 7.325713474371717e-06, |
|
"loss": 0.4065, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.2683086969637819, |
|
"grad_norm": 2.1908148156089204, |
|
"learning_rate": 7.3186142268919505e-06, |
|
"loss": 0.4001, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.26901850833141094, |
|
"grad_norm": 3.614429975395643, |
|
"learning_rate": 7.311514979412183e-06, |
|
"loss": 0.3949, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.26972831969904, |
|
"grad_norm": 8.199581604131074, |
|
"learning_rate": 7.304415731932416e-06, |
|
"loss": 0.4027, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.27043813106666903, |
|
"grad_norm": 1.9841735875976263, |
|
"learning_rate": 7.297316484452649e-06, |
|
"loss": 0.3803, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.2711479424342981, |
|
"grad_norm": 1.7818490390141006, |
|
"learning_rate": 7.290217236972882e-06, |
|
"loss": 0.3979, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.2718577538019271, |
|
"grad_norm": 2.664420697627613, |
|
"learning_rate": 7.283117989493115e-06, |
|
"loss": 0.4112, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.2725675651695562, |
|
"grad_norm": 7.6015896940216345, |
|
"learning_rate": 7.2760187420133474e-06, |
|
"loss": 0.3978, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.27327737653718526, |
|
"grad_norm": 5.109710356060471, |
|
"learning_rate": 7.2689194945335805e-06, |
|
"loss": 0.3911, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.2739871879048143, |
|
"grad_norm": 1.8719451344781273, |
|
"learning_rate": 7.261820247053813e-06, |
|
"loss": 0.4039, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.27469699927244334, |
|
"grad_norm": 7.834590688589366, |
|
"learning_rate": 7.254720999574045e-06, |
|
"loss": 0.3972, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.2754068106400724, |
|
"grad_norm": 3.4725606354409915, |
|
"learning_rate": 7.247621752094278e-06, |
|
"loss": 0.4106, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.27611662200770143, |
|
"grad_norm": 2.131887069098727, |
|
"learning_rate": 7.240522504614511e-06, |
|
"loss": 0.3921, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.2768264333753305, |
|
"grad_norm": 3.840712773368679, |
|
"learning_rate": 7.233423257134744e-06, |
|
"loss": 0.3963, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.27753624474295957, |
|
"grad_norm": 1.8435607174327202, |
|
"learning_rate": 7.226324009654977e-06, |
|
"loss": 0.4171, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.2782460561105886, |
|
"grad_norm": 2.927315889095762, |
|
"learning_rate": 7.21922476217521e-06, |
|
"loss": 0.4078, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.27895586747821766, |
|
"grad_norm": 2.4533548064235955, |
|
"learning_rate": 7.212125514695443e-06, |
|
"loss": 0.4018, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.2796656788458467, |
|
"grad_norm": 2.6808622987821424, |
|
"learning_rate": 7.205026267215676e-06, |
|
"loss": 0.3952, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.28037549021347574, |
|
"grad_norm": 2.006870713713202, |
|
"learning_rate": 7.197927019735908e-06, |
|
"loss": 0.4041, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.28108530158110484, |
|
"grad_norm": 4.1552921396903955, |
|
"learning_rate": 7.190827772256141e-06, |
|
"loss": 0.3815, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.2817951129487339, |
|
"grad_norm": 3.088912130241367, |
|
"learning_rate": 7.183728524776374e-06, |
|
"loss": 0.4018, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.2825049243163629, |
|
"grad_norm": 2.9619382181530853, |
|
"learning_rate": 7.1766292772966075e-06, |
|
"loss": 0.4071, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.28321473568399197, |
|
"grad_norm": 3.194525382034512, |
|
"learning_rate": 7.1695300298168406e-06, |
|
"loss": 0.3861, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.283924547051621, |
|
"grad_norm": 2.58824315637412, |
|
"learning_rate": 7.162430782337073e-06, |
|
"loss": 0.4022, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.2846343584192501, |
|
"grad_norm": 1.6807083864960135, |
|
"learning_rate": 7.155331534857306e-06, |
|
"loss": 0.3953, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.28534416978687915, |
|
"grad_norm": 2.9052226494936706, |
|
"learning_rate": 7.148232287377539e-06, |
|
"loss": 0.3803, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.2860539811545082, |
|
"grad_norm": 1.9518486816171219, |
|
"learning_rate": 7.141133039897772e-06, |
|
"loss": 0.4076, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.28676379252213724, |
|
"grad_norm": 2.223176862483651, |
|
"learning_rate": 7.134033792418004e-06, |
|
"loss": 0.4058, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.2874736038897663, |
|
"grad_norm": 2.2196780309614854, |
|
"learning_rate": 7.1269345449382375e-06, |
|
"loss": 0.3926, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.2881834152573953, |
|
"grad_norm": 6.524368077094248, |
|
"learning_rate": 7.11983529745847e-06, |
|
"loss": 0.4172, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.2888932266250244, |
|
"grad_norm": 5.292339769504148, |
|
"learning_rate": 7.112736049978702e-06, |
|
"loss": 0.3908, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.28960303799265347, |
|
"grad_norm": 2.3067804343233282, |
|
"learning_rate": 7.105636802498935e-06, |
|
"loss": 0.3899, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.2903128493602825, |
|
"grad_norm": 3.23451698379491, |
|
"learning_rate": 7.098537555019168e-06, |
|
"loss": 0.4078, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.29102266072791155, |
|
"grad_norm": 1.9975711149406958, |
|
"learning_rate": 7.091438307539401e-06, |
|
"loss": 0.3892, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.2917324720955406, |
|
"grad_norm": 2.172457996529036, |
|
"learning_rate": 7.084339060059634e-06, |
|
"loss": 0.4024, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.29244228346316964, |
|
"grad_norm": 4.2611345539293985, |
|
"learning_rate": 7.077239812579867e-06, |
|
"loss": 0.4051, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.29315209483079874, |
|
"grad_norm": 4.8499954927547915, |
|
"learning_rate": 7.0701405651001e-06, |
|
"loss": 0.4051, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.2938619061984278, |
|
"grad_norm": 3.133374032170856, |
|
"learning_rate": 7.063041317620333e-06, |
|
"loss": 0.4113, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.2945717175660568, |
|
"grad_norm": 3.0408556337828667, |
|
"learning_rate": 7.055942070140566e-06, |
|
"loss": 0.3918, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.29528152893368587, |
|
"grad_norm": 2.967610716656761, |
|
"learning_rate": 7.048842822660798e-06, |
|
"loss": 0.3935, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.2959913403013149, |
|
"grad_norm": 4.089654504142007, |
|
"learning_rate": 7.041743575181031e-06, |
|
"loss": 0.3812, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.29670115166894395, |
|
"grad_norm": 6.123820735815897, |
|
"learning_rate": 7.0346443277012644e-06, |
|
"loss": 0.3894, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.29741096303657305, |
|
"grad_norm": 9.52031358542494, |
|
"learning_rate": 7.0275450802214975e-06, |
|
"loss": 0.3933, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.2981207744042021, |
|
"grad_norm": 4.241656002923987, |
|
"learning_rate": 7.02044583274173e-06, |
|
"loss": 0.3938, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.29883058577183114, |
|
"grad_norm": 10.364254693083032, |
|
"learning_rate": 7.013346585261963e-06, |
|
"loss": 0.3939, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.2995403971394602, |
|
"grad_norm": 2.493001703497579, |
|
"learning_rate": 7.006247337782196e-06, |
|
"loss": 0.3904, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.3002502085070892, |
|
"grad_norm": 2.372260556132136, |
|
"learning_rate": 6.999148090302429e-06, |
|
"loss": 0.4002, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.3009600198747183, |
|
"grad_norm": 4.447948099801884, |
|
"learning_rate": 6.992048842822662e-06, |
|
"loss": 0.3894, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.30166983124234736, |
|
"grad_norm": 2.4733723007039847, |
|
"learning_rate": 6.9849495953428944e-06, |
|
"loss": 0.3863, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.3023796426099764, |
|
"grad_norm": 11.318740156291982, |
|
"learning_rate": 6.977850347863127e-06, |
|
"loss": 0.3881, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.30308945397760545, |
|
"grad_norm": 3.6328999006662563, |
|
"learning_rate": 6.97075110038336e-06, |
|
"loss": 0.3894, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.3037992653452345, |
|
"grad_norm": 2.0376811180198353, |
|
"learning_rate": 6.963651852903592e-06, |
|
"loss": 0.3993, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.30450907671286354, |
|
"grad_norm": 2.1376755414320625, |
|
"learning_rate": 6.956552605423825e-06, |
|
"loss": 0.3903, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.30521888808049263, |
|
"grad_norm": 2.883515618882684, |
|
"learning_rate": 6.949453357944058e-06, |
|
"loss": 0.4082, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.3059286994481217, |
|
"grad_norm": 2.0964398516334444, |
|
"learning_rate": 6.942354110464291e-06, |
|
"loss": 0.3857, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.3066385108157507, |
|
"grad_norm": 5.410779818418891, |
|
"learning_rate": 6.935254862984524e-06, |
|
"loss": 0.391, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.30734832218337976, |
|
"grad_norm": 4.439425532620099, |
|
"learning_rate": 6.928155615504757e-06, |
|
"loss": 0.4099, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.3080581335510088, |
|
"grad_norm": 12.275643206811255, |
|
"learning_rate": 6.92105636802499e-06, |
|
"loss": 0.3953, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.30876794491863785, |
|
"grad_norm": 5.947992733400443, |
|
"learning_rate": 6.913957120545223e-06, |
|
"loss": 0.3945, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.30947775628626695, |
|
"grad_norm": 3.4397054213510843, |
|
"learning_rate": 6.906857873065456e-06, |
|
"loss": 0.3875, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.310187567653896, |
|
"grad_norm": 41.88563893552131, |
|
"learning_rate": 6.899758625585688e-06, |
|
"loss": 0.3928, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.31089737902152503, |
|
"grad_norm": 3.227989243444744, |
|
"learning_rate": 6.892659378105921e-06, |
|
"loss": 0.3908, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.3116071903891541, |
|
"grad_norm": 22.897381721878148, |
|
"learning_rate": 6.8855601306261545e-06, |
|
"loss": 0.391, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.3123170017567831, |
|
"grad_norm": 3.3630974135990406, |
|
"learning_rate": 6.878460883146388e-06, |
|
"loss": 0.374, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.31302681312441216, |
|
"grad_norm": 4.877401136832981, |
|
"learning_rate": 6.87136163566662e-06, |
|
"loss": 0.3923, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.31373662449204126, |
|
"grad_norm": 6.179682561885886, |
|
"learning_rate": 6.864262388186853e-06, |
|
"loss": 0.3865, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.3144464358596703, |
|
"grad_norm": 4.8910756460648885, |
|
"learning_rate": 6.857163140707086e-06, |
|
"loss": 0.3865, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.31515624722729935, |
|
"grad_norm": 3.260915462621521, |
|
"learning_rate": 6.850063893227319e-06, |
|
"loss": 0.3982, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.3158660585949284, |
|
"grad_norm": 4.599472395508018, |
|
"learning_rate": 6.842964645747551e-06, |
|
"loss": 0.3961, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.31657586996255743, |
|
"grad_norm": 7.776943140920524, |
|
"learning_rate": 6.8358653982677845e-06, |
|
"loss": 0.3873, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.3172856813301865, |
|
"grad_norm": 3.0126570398502723, |
|
"learning_rate": 6.828766150788017e-06, |
|
"loss": 0.3859, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.3179954926978156, |
|
"grad_norm": 1.935360939609241, |
|
"learning_rate": 6.82166690330825e-06, |
|
"loss": 0.3893, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.3187053040654446, |
|
"grad_norm": 2.8545870894952055, |
|
"learning_rate": 6.814567655828482e-06, |
|
"loss": 0.3963, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.31941511543307366, |
|
"grad_norm": 4.70013317139999, |
|
"learning_rate": 6.807468408348715e-06, |
|
"loss": 0.3939, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.3201249268007027, |
|
"grad_norm": 3.264719904276936, |
|
"learning_rate": 6.800369160868948e-06, |
|
"loss": 0.3851, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.32083473816833175, |
|
"grad_norm": 19.735683632874615, |
|
"learning_rate": 6.793269913389181e-06, |
|
"loss": 0.3722, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.32154454953596084, |
|
"grad_norm": 2.501896594333183, |
|
"learning_rate": 6.786170665909414e-06, |
|
"loss": 0.3744, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.3222543609035899, |
|
"grad_norm": 6.776418259400934, |
|
"learning_rate": 6.779071418429647e-06, |
|
"loss": 0.3868, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.32296417227121893, |
|
"grad_norm": 7.759324029832955, |
|
"learning_rate": 6.77197217094988e-06, |
|
"loss": 0.3978, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.323673983638848, |
|
"grad_norm": 5.1020465787210805, |
|
"learning_rate": 6.764872923470113e-06, |
|
"loss": 0.3756, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.324383795006477, |
|
"grad_norm": 4.584721636805871, |
|
"learning_rate": 6.757773675990345e-06, |
|
"loss": 0.3962, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.32509360637410606, |
|
"grad_norm": 5.227400251430727, |
|
"learning_rate": 6.750674428510578e-06, |
|
"loss": 0.3934, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.32580341774173516, |
|
"grad_norm": 6.3055606292098645, |
|
"learning_rate": 6.7435751810308114e-06, |
|
"loss": 0.3921, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.3265132291093642, |
|
"grad_norm": 3.6872617865325914, |
|
"learning_rate": 6.7364759335510445e-06, |
|
"loss": 0.3818, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.32722304047699324, |
|
"grad_norm": 2.007884918336012, |
|
"learning_rate": 6.729376686071278e-06, |
|
"loss": 0.4005, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.3279328518446223, |
|
"grad_norm": 5.042964957635144, |
|
"learning_rate": 6.72227743859151e-06, |
|
"loss": 0.3934, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.32864266321225133, |
|
"grad_norm": 4.122572427444757, |
|
"learning_rate": 6.715178191111743e-06, |
|
"loss": 0.3835, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.32935247457988037, |
|
"grad_norm": 4.528744366296638, |
|
"learning_rate": 6.708078943631976e-06, |
|
"loss": 0.3781, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.33006228594750947, |
|
"grad_norm": 3.0405586193089107, |
|
"learning_rate": 6.700979696152209e-06, |
|
"loss": 0.4013, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.3307720973151385, |
|
"grad_norm": 2.497528895602537, |
|
"learning_rate": 6.6938804486724415e-06, |
|
"loss": 0.4012, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.33148190868276756, |
|
"grad_norm": 3.949569099861772, |
|
"learning_rate": 6.686781201192674e-06, |
|
"loss": 0.3791, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.3321917200503966, |
|
"grad_norm": 2.9026740036563714, |
|
"learning_rate": 6.679681953712907e-06, |
|
"loss": 0.379, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.33290153141802564, |
|
"grad_norm": 4.750694201369016, |
|
"learning_rate": 6.672582706233139e-06, |
|
"loss": 0.3962, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.3336113427856547, |
|
"grad_norm": 4.9647752226572655, |
|
"learning_rate": 6.665483458753372e-06, |
|
"loss": 0.4014, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.3343211541532838, |
|
"grad_norm": 5.007567374826438, |
|
"learning_rate": 6.658384211273605e-06, |
|
"loss": 0.386, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.3350309655209128, |
|
"grad_norm": 24.665793733036637, |
|
"learning_rate": 6.651284963793838e-06, |
|
"loss": 0.3904, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.33574077688854187, |
|
"grad_norm": 8.807448982539153, |
|
"learning_rate": 6.6441857163140715e-06, |
|
"loss": 0.3817, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.3364505882561709, |
|
"grad_norm": 5.649488918187287, |
|
"learning_rate": 6.637086468834304e-06, |
|
"loss": 0.3952, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.33716039962379996, |
|
"grad_norm": 10.030238684862177, |
|
"learning_rate": 6.629987221354537e-06, |
|
"loss": 0.3894, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.33787021099142905, |
|
"grad_norm": 8.229307584465264, |
|
"learning_rate": 6.62288797387477e-06, |
|
"loss": 0.3777, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.3385800223590581, |
|
"grad_norm": 4.702015980686352, |
|
"learning_rate": 6.615788726395003e-06, |
|
"loss": 0.3846, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.33928983372668714, |
|
"grad_norm": 7.609531980298162, |
|
"learning_rate": 6.608689478915235e-06, |
|
"loss": 0.3876, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.3399996450943162, |
|
"grad_norm": 9.359016840144466, |
|
"learning_rate": 6.601590231435468e-06, |
|
"loss": 0.3912, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.3407094564619452, |
|
"grad_norm": 6.921512932106153, |
|
"learning_rate": 6.5944909839557015e-06, |
|
"loss": 0.3808, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.34141926782957427, |
|
"grad_norm": 7.896921462163668, |
|
"learning_rate": 6.587391736475935e-06, |
|
"loss": 0.3822, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.34212907919720337, |
|
"grad_norm": 41.265653283488135, |
|
"learning_rate": 6.580292488996167e-06, |
|
"loss": 0.3704, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.3428388905648324, |
|
"grad_norm": 22.410728414840314, |
|
"learning_rate": 6.5731932415164e-06, |
|
"loss": 0.3879, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.34354870193246145, |
|
"grad_norm": 28.36796548695283, |
|
"learning_rate": 6.566093994036633e-06, |
|
"loss": 0.3819, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.3442585133000905, |
|
"grad_norm": 5.964443376270807, |
|
"learning_rate": 6.558994746556866e-06, |
|
"loss": 0.3793, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.34496832466771954, |
|
"grad_norm": 4.876522423500047, |
|
"learning_rate": 6.551895499077099e-06, |
|
"loss": 0.3882, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.3456781360353486, |
|
"grad_norm": 4.871742533391797, |
|
"learning_rate": 6.544796251597331e-06, |
|
"loss": 0.3896, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.3463879474029777, |
|
"grad_norm": 11.91690423514364, |
|
"learning_rate": 6.537697004117564e-06, |
|
"loss": 0.3736, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.3470977587706067, |
|
"grad_norm": 5.986322327762981, |
|
"learning_rate": 6.530597756637797e-06, |
|
"loss": 0.368, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.34780757013823577, |
|
"grad_norm": 4.671637222361169, |
|
"learning_rate": 6.523498509158029e-06, |
|
"loss": 0.3722, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.3485173815058648, |
|
"grad_norm": 16.438976188514197, |
|
"learning_rate": 6.516399261678262e-06, |
|
"loss": 0.3776, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.34922719287349385, |
|
"grad_norm": 11.76911671905372, |
|
"learning_rate": 6.509300014198495e-06, |
|
"loss": 0.3987, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.3499370042411229, |
|
"grad_norm": 12.380867918847773, |
|
"learning_rate": 6.502200766718728e-06, |
|
"loss": 0.3949, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.350646815608752, |
|
"grad_norm": 8.367704037629133, |
|
"learning_rate": 6.495101519238961e-06, |
|
"loss": 0.3767, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.35135662697638104, |
|
"grad_norm": 74.35690108296033, |
|
"learning_rate": 6.488002271759194e-06, |
|
"loss": 0.3819, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.3520664383440101, |
|
"grad_norm": 16.231219614665278, |
|
"learning_rate": 6.480903024279427e-06, |
|
"loss": 0.3859, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.3527762497116391, |
|
"grad_norm": 9.060846103909238, |
|
"learning_rate": 6.47380377679966e-06, |
|
"loss": 0.394, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.35348606107926817, |
|
"grad_norm": 21.88016531222193, |
|
"learning_rate": 6.466704529319893e-06, |
|
"loss": 0.4167, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.35419587244689726, |
|
"grad_norm": 9.919040843315045, |
|
"learning_rate": 6.459605281840125e-06, |
|
"loss": 0.4192, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.3549056838145263, |
|
"grad_norm": 5.183299722151934, |
|
"learning_rate": 6.4525060343603584e-06, |
|
"loss": 0.4249, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.35561549518215535, |
|
"grad_norm": 8.847185946354221, |
|
"learning_rate": 6.4454067868805915e-06, |
|
"loss": 0.4112, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.3563253065497844, |
|
"grad_norm": 11.864215621262682, |
|
"learning_rate": 6.438307539400825e-06, |
|
"loss": 0.4165, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.35703511791741344, |
|
"grad_norm": 3.3703428369603503, |
|
"learning_rate": 6.431208291921057e-06, |
|
"loss": 0.3978, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.3577449292850425, |
|
"grad_norm": 5.015316577294299, |
|
"learning_rate": 6.42410904444129e-06, |
|
"loss": 0.3872, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.3584547406526716, |
|
"grad_norm": 4.2137919102595305, |
|
"learning_rate": 6.417009796961523e-06, |
|
"loss": 0.3766, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.3591645520203006, |
|
"grad_norm": 3.0372315306510056, |
|
"learning_rate": 6.409910549481756e-06, |
|
"loss": 0.3842, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.35987436338792966, |
|
"grad_norm": 2.7515400586423318, |
|
"learning_rate": 6.4028113020019885e-06, |
|
"loss": 0.3993, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.3605841747555587, |
|
"grad_norm": 9.185207292504243, |
|
"learning_rate": 6.395712054522221e-06, |
|
"loss": 0.3875, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.36129398612318775, |
|
"grad_norm": 19.515842749867563, |
|
"learning_rate": 6.388612807042454e-06, |
|
"loss": 0.4035, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.3620037974908168, |
|
"grad_norm": 12.30636697197178, |
|
"learning_rate": 6.381513559562686e-06, |
|
"loss": 0.4035, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.3627136088584459, |
|
"grad_norm": 6.732979846623905, |
|
"learning_rate": 6.374414312082919e-06, |
|
"loss": 0.4079, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.36342342022607493, |
|
"grad_norm": 6.642326962423095, |
|
"learning_rate": 6.367315064603152e-06, |
|
"loss": 0.3945, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.364133231593704, |
|
"grad_norm": 6.314154234087903, |
|
"learning_rate": 6.360215817123385e-06, |
|
"loss": 0.394, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.364843042961333, |
|
"grad_norm": 4.760512258914551, |
|
"learning_rate": 6.3531165696436185e-06, |
|
"loss": 0.3863, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.36555285432896206, |
|
"grad_norm": 4.048747245175314, |
|
"learning_rate": 6.346017322163851e-06, |
|
"loss": 0.3863, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.3662626656965911, |
|
"grad_norm": 4.190578946223062, |
|
"learning_rate": 6.338918074684084e-06, |
|
"loss": 0.3723, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.3669724770642202, |
|
"grad_norm": 4.175965799380943, |
|
"learning_rate": 6.331818827204317e-06, |
|
"loss": 0.3889, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.36768228843184925, |
|
"grad_norm": 4.807186811656143, |
|
"learning_rate": 6.32471957972455e-06, |
|
"loss": 0.3874, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.3683920997994783, |
|
"grad_norm": 6.659345248185456, |
|
"learning_rate": 6.317620332244782e-06, |
|
"loss": 0.3711, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.36910191116710733, |
|
"grad_norm": 7.2186380945453905, |
|
"learning_rate": 6.310521084765015e-06, |
|
"loss": 0.3827, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.3698117225347364, |
|
"grad_norm": 5.005630183658748, |
|
"learning_rate": 6.3034218372852485e-06, |
|
"loss": 0.3983, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.3705215339023654, |
|
"grad_norm": 3.527405153009429, |
|
"learning_rate": 6.296322589805482e-06, |
|
"loss": 0.367, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.3712313452699945, |
|
"grad_norm": 3.882199465110045, |
|
"learning_rate": 6.289223342325715e-06, |
|
"loss": 0.3883, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.37194115663762356, |
|
"grad_norm": 7.463055050907344, |
|
"learning_rate": 6.282124094845947e-06, |
|
"loss": 0.3823, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.3726509680052526, |
|
"grad_norm": 8.000906237369843, |
|
"learning_rate": 6.27502484736618e-06, |
|
"loss": 0.383, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.37336077937288165, |
|
"grad_norm": 8.362063303535368, |
|
"learning_rate": 6.267925599886413e-06, |
|
"loss": 0.3893, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.3740705907405107, |
|
"grad_norm": 4.721914441661691, |
|
"learning_rate": 6.260826352406646e-06, |
|
"loss": 0.3763, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.3747804021081398, |
|
"grad_norm": 12.175797518430029, |
|
"learning_rate": 6.253727104926878e-06, |
|
"loss": 0.3977, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.37549021347576883, |
|
"grad_norm": 9.814402397906687, |
|
"learning_rate": 6.246627857447111e-06, |
|
"loss": 0.3716, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.3762000248433979, |
|
"grad_norm": 47.1450002499556, |
|
"learning_rate": 6.239528609967344e-06, |
|
"loss": 0.3792, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.3769098362110269, |
|
"grad_norm": 27.513481595283608, |
|
"learning_rate": 6.232429362487576e-06, |
|
"loss": 0.3734, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.37761964757865596, |
|
"grad_norm": 48.09984812385904, |
|
"learning_rate": 6.225330115007809e-06, |
|
"loss": 0.3873, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.378329458946285, |
|
"grad_norm": 5.065884658180426, |
|
"learning_rate": 6.218230867528042e-06, |
|
"loss": 0.39, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.3790392703139141, |
|
"grad_norm": 9.226418902203303, |
|
"learning_rate": 6.2111316200482754e-06, |
|
"loss": 0.3819, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.37974908168154314, |
|
"grad_norm": 6.998201025336219, |
|
"learning_rate": 6.204032372568508e-06, |
|
"loss": 0.3818, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.3804588930491722, |
|
"grad_norm": 4.086309894015096, |
|
"learning_rate": 6.196933125088741e-06, |
|
"loss": 0.3573, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.38116870441680123, |
|
"grad_norm": 8.280993749723958, |
|
"learning_rate": 6.189833877608974e-06, |
|
"loss": 0.3763, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.3818785157844303, |
|
"grad_norm": 4.086208683086361, |
|
"learning_rate": 6.182734630129207e-06, |
|
"loss": 0.3754, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.3825883271520593, |
|
"grad_norm": 5.958244425553627, |
|
"learning_rate": 6.17563538264944e-06, |
|
"loss": 0.3844, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.3832981385196884, |
|
"grad_norm": 3.580000162662889, |
|
"learning_rate": 6.168536135169672e-06, |
|
"loss": 0.382, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.38400794988731746, |
|
"grad_norm": 2.986600327490101, |
|
"learning_rate": 6.1614368876899054e-06, |
|
"loss": 0.3722, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.3847177612549465, |
|
"grad_norm": 3.253411703330411, |
|
"learning_rate": 6.1543376402101386e-06, |
|
"loss": 0.3723, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.38542757262257554, |
|
"grad_norm": 5.02266916683139, |
|
"learning_rate": 6.147238392730372e-06, |
|
"loss": 0.353, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.3861373839902046, |
|
"grad_norm": 6.509810117314743, |
|
"learning_rate": 6.140139145250604e-06, |
|
"loss": 0.3859, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.38684719535783363, |
|
"grad_norm": 3.024955665262126, |
|
"learning_rate": 6.133039897770837e-06, |
|
"loss": 0.3929, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.3875570067254627, |
|
"grad_norm": 3.1517938939602206, |
|
"learning_rate": 6.12594065029107e-06, |
|
"loss": 0.3899, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.38826681809309177, |
|
"grad_norm": 4.545747430477116, |
|
"learning_rate": 6.118841402811303e-06, |
|
"loss": 0.376, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.3889766294607208, |
|
"grad_norm": 4.069699163399179, |
|
"learning_rate": 6.111742155331535e-06, |
|
"loss": 0.3813, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.38968644082834986, |
|
"grad_norm": 3.562062075517251, |
|
"learning_rate": 6.104642907851768e-06, |
|
"loss": 0.383, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.3903962521959789, |
|
"grad_norm": 9.15980720106711, |
|
"learning_rate": 6.097543660372001e-06, |
|
"loss": 0.3921, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.391106063563608, |
|
"grad_norm": 4.449111409231249, |
|
"learning_rate": 6.090444412892234e-06, |
|
"loss": 0.3823, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.39181587493123704, |
|
"grad_norm": 12.724861852641904, |
|
"learning_rate": 6.083345165412466e-06, |
|
"loss": 0.3851, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.3925256862988661, |
|
"grad_norm": 6.615402324691555, |
|
"learning_rate": 6.076245917932699e-06, |
|
"loss": 0.3667, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.3932354976664951, |
|
"grad_norm": 8.817203015753774, |
|
"learning_rate": 6.069146670452932e-06, |
|
"loss": 0.3886, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.39394530903412417, |
|
"grad_norm": 9.192960733910674, |
|
"learning_rate": 6.0620474229731655e-06, |
|
"loss": 0.3794, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.3946551204017532, |
|
"grad_norm": 4.825188565131958, |
|
"learning_rate": 6.054948175493398e-06, |
|
"loss": 0.3786, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.3953649317693823, |
|
"grad_norm": 6.68078822940831, |
|
"learning_rate": 6.047848928013631e-06, |
|
"loss": 0.3835, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.39607474313701135, |
|
"grad_norm": 2.6400726840916175, |
|
"learning_rate": 6.040749680533864e-06, |
|
"loss": 0.381, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.3967845545046404, |
|
"grad_norm": 3.6668671304324967, |
|
"learning_rate": 6.033650433054097e-06, |
|
"loss": 0.3745, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.39749436587226944, |
|
"grad_norm": 2.639833206365908, |
|
"learning_rate": 6.026551185574329e-06, |
|
"loss": 0.3777, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.3982041772398985, |
|
"grad_norm": 3.79888213287165, |
|
"learning_rate": 6.019451938094562e-06, |
|
"loss": 0.3911, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.3989139886075275, |
|
"grad_norm": 5.09183422587413, |
|
"learning_rate": 6.0123526906147955e-06, |
|
"loss": 0.3832, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.3996237999751566, |
|
"grad_norm": 3.3401895175000926, |
|
"learning_rate": 6.005253443135029e-06, |
|
"loss": 0.3862, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.40033361134278567, |
|
"grad_norm": 2.5702329959348726, |
|
"learning_rate": 5.998154195655262e-06, |
|
"loss": 0.3934, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.4010434227104147, |
|
"grad_norm": 3.0044071678975937, |
|
"learning_rate": 5.991054948175494e-06, |
|
"loss": 0.3826, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.40175323407804375, |
|
"grad_norm": 2.412654779599852, |
|
"learning_rate": 5.983955700695727e-06, |
|
"loss": 0.3969, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.4024630454456728, |
|
"grad_norm": 3.0767944703908356, |
|
"learning_rate": 5.97685645321596e-06, |
|
"loss": 0.3961, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.40317285681330184, |
|
"grad_norm": 2.8053230371522124, |
|
"learning_rate": 5.969757205736193e-06, |
|
"loss": 0.3869, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.40388266818093094, |
|
"grad_norm": 7.472643121749521, |
|
"learning_rate": 5.962657958256425e-06, |
|
"loss": 0.3851, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.40459247954856, |
|
"grad_norm": 14.585388143398843, |
|
"learning_rate": 5.955558710776658e-06, |
|
"loss": 0.3905, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.405302290916189, |
|
"grad_norm": 4.416692599365141, |
|
"learning_rate": 5.948459463296891e-06, |
|
"loss": 0.3862, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.40601210228381807, |
|
"grad_norm": 3.4729116521336776, |
|
"learning_rate": 5.941360215817123e-06, |
|
"loss": 0.402, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.4067219136514471, |
|
"grad_norm": 3.5423705326787114, |
|
"learning_rate": 5.934260968337356e-06, |
|
"loss": 0.3884, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.4074317250190762, |
|
"grad_norm": 3.1365000657861497, |
|
"learning_rate": 5.927161720857589e-06, |
|
"loss": 0.3825, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.40814153638670525, |
|
"grad_norm": 5.219488757508086, |
|
"learning_rate": 5.9200624733778224e-06, |
|
"loss": 0.3894, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.4088513477543343, |
|
"grad_norm": 3.596909048940233, |
|
"learning_rate": 5.9129632258980555e-06, |
|
"loss": 0.3831, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.40956115912196334, |
|
"grad_norm": 2.476134224023759, |
|
"learning_rate": 5.905863978418288e-06, |
|
"loss": 0.3825, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.4102709704895924, |
|
"grad_norm": 3.407930958961138, |
|
"learning_rate": 5.898764730938521e-06, |
|
"loss": 0.3714, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.4109807818572214, |
|
"grad_norm": 3.6349280667767636, |
|
"learning_rate": 5.891665483458754e-06, |
|
"loss": 0.3949, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.4116905932248505, |
|
"grad_norm": 10.032880290815127, |
|
"learning_rate": 5.884566235978987e-06, |
|
"loss": 0.3827, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.41240040459247956, |
|
"grad_norm": 4.403552459945297, |
|
"learning_rate": 5.877466988499219e-06, |
|
"loss": 0.3738, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.4131102159601086, |
|
"grad_norm": 3.2630803210797086, |
|
"learning_rate": 5.8703677410194525e-06, |
|
"loss": 0.3947, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.41382002732773765, |
|
"grad_norm": 11.228663057773362, |
|
"learning_rate": 5.8632684935396856e-06, |
|
"loss": 0.3825, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.4145298386953667, |
|
"grad_norm": 18.33844649221444, |
|
"learning_rate": 5.856169246059919e-06, |
|
"loss": 0.381, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.41523965006299574, |
|
"grad_norm": 14.576257048715338, |
|
"learning_rate": 5.849069998580152e-06, |
|
"loss": 0.389, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.41594946143062483, |
|
"grad_norm": 3.3799659706310177, |
|
"learning_rate": 5.841970751100384e-06, |
|
"loss": 0.3687, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.4166592727982539, |
|
"grad_norm": 4.306786145673671, |
|
"learning_rate": 5.834871503620617e-06, |
|
"loss": 0.3846, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.4173690841658829, |
|
"grad_norm": 2.71585444285802, |
|
"learning_rate": 5.82777225614085e-06, |
|
"loss": 0.397, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.41807889553351196, |
|
"grad_norm": 4.530639455269193, |
|
"learning_rate": 5.820673008661082e-06, |
|
"loss": 0.3633, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.418788706901141, |
|
"grad_norm": 5.299365856406392, |
|
"learning_rate": 5.813573761181315e-06, |
|
"loss": 0.3854, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.41949851826877005, |
|
"grad_norm": 3.5533453867575786, |
|
"learning_rate": 5.806474513701548e-06, |
|
"loss": 0.3855, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.42020832963639915, |
|
"grad_norm": 9.388008852057116, |
|
"learning_rate": 5.799375266221781e-06, |
|
"loss": 0.3911, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.4209181410040282, |
|
"grad_norm": 3.378607546141685, |
|
"learning_rate": 5.792276018742013e-06, |
|
"loss": 0.3751, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.42162795237165723, |
|
"grad_norm": 12.222073948575716, |
|
"learning_rate": 5.785176771262246e-06, |
|
"loss": 0.3778, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.4223377637392863, |
|
"grad_norm": 4.297952573306613, |
|
"learning_rate": 5.778077523782479e-06, |
|
"loss": 0.3827, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.4230475751069153, |
|
"grad_norm": 9.764464171752504, |
|
"learning_rate": 5.7709782763027125e-06, |
|
"loss": 0.3893, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.4237573864745444, |
|
"grad_norm": 3.7569225597805658, |
|
"learning_rate": 5.763879028822945e-06, |
|
"loss": 0.3901, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.42446719784217346, |
|
"grad_norm": 3.0005485619903824, |
|
"learning_rate": 5.756779781343178e-06, |
|
"loss": 0.3753, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.4251770092098025, |
|
"grad_norm": 6.457104695432505, |
|
"learning_rate": 5.749680533863411e-06, |
|
"loss": 0.3585, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.42588682057743155, |
|
"grad_norm": 4.252684527352716, |
|
"learning_rate": 5.742581286383644e-06, |
|
"loss": 0.3745, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.4265966319450606, |
|
"grad_norm": 3.3319349737549673, |
|
"learning_rate": 5.735482038903877e-06, |
|
"loss": 0.3836, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.42730644331268963, |
|
"grad_norm": 4.333001859655407, |
|
"learning_rate": 5.728382791424109e-06, |
|
"loss": 0.3698, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.42801625468031873, |
|
"grad_norm": 3.9838864194561343, |
|
"learning_rate": 5.7212835439443425e-06, |
|
"loss": 0.3686, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.4287260660479478, |
|
"grad_norm": 3.206673737162168, |
|
"learning_rate": 5.714184296464576e-06, |
|
"loss": 0.374, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.4294358774155768, |
|
"grad_norm": 7.910008181832549, |
|
"learning_rate": 5.707085048984809e-06, |
|
"loss": 0.3731, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.43014568878320586, |
|
"grad_norm": 11.533279860672804, |
|
"learning_rate": 5.699985801505041e-06, |
|
"loss": 0.3842, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.4308555001508349, |
|
"grad_norm": 4.06817553254219, |
|
"learning_rate": 5.692886554025274e-06, |
|
"loss": 0.3717, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.43156531151846395, |
|
"grad_norm": 12.082596102938004, |
|
"learning_rate": 5.685787306545507e-06, |
|
"loss": 0.3971, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.43227512288609304, |
|
"grad_norm": 2.685455478240202, |
|
"learning_rate": 5.678688059065739e-06, |
|
"loss": 0.3822, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.4329849342537221, |
|
"grad_norm": 3.1399973614222643, |
|
"learning_rate": 5.671588811585972e-06, |
|
"loss": 0.3774, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.43369474562135113, |
|
"grad_norm": 3.518374812592983, |
|
"learning_rate": 5.664489564106205e-06, |
|
"loss": 0.3781, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.4344045569889802, |
|
"grad_norm": 4.803932844471321, |
|
"learning_rate": 5.657390316626438e-06, |
|
"loss": 0.3757, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.4351143683566092, |
|
"grad_norm": 12.690594810777407, |
|
"learning_rate": 5.650291069146671e-06, |
|
"loss": 0.3747, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.43582417972423826, |
|
"grad_norm": 10.80688099347966, |
|
"learning_rate": 5.643191821666903e-06, |
|
"loss": 0.3676, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.43653399109186736, |
|
"grad_norm": 4.232034052682343, |
|
"learning_rate": 5.636092574187136e-06, |
|
"loss": 0.395, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.4372438024594964, |
|
"grad_norm": 3.422739256279243, |
|
"learning_rate": 5.6289933267073694e-06, |
|
"loss": 0.3693, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.43795361382712544, |
|
"grad_norm": 32.06006758689784, |
|
"learning_rate": 5.6218940792276025e-06, |
|
"loss": 0.3782, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.4386634251947545, |
|
"grad_norm": 5.623034465377633, |
|
"learning_rate": 5.614794831747835e-06, |
|
"loss": 0.3813, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.43937323656238353, |
|
"grad_norm": 10.612805886316337, |
|
"learning_rate": 5.607695584268068e-06, |
|
"loss": 0.3702, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.44008304793001257, |
|
"grad_norm": 6.077674805742986, |
|
"learning_rate": 5.600596336788301e-06, |
|
"loss": 0.3643, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.44079285929764167, |
|
"grad_norm": 7.053795971115957, |
|
"learning_rate": 5.593497089308534e-06, |
|
"loss": 0.3911, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.4415026706652707, |
|
"grad_norm": 6.212842792838621, |
|
"learning_rate": 5.586397841828766e-06, |
|
"loss": 0.3774, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.44221248203289976, |
|
"grad_norm": 7.598832178623656, |
|
"learning_rate": 5.5792985943489995e-06, |
|
"loss": 0.3808, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.4429222934005288, |
|
"grad_norm": 14.834315377312098, |
|
"learning_rate": 5.5721993468692326e-06, |
|
"loss": 0.3765, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.44363210476815784, |
|
"grad_norm": 15.459970963070427, |
|
"learning_rate": 5.565100099389466e-06, |
|
"loss": 0.3863, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.44434191613578694, |
|
"grad_norm": 5.002895033502256, |
|
"learning_rate": 5.558000851909699e-06, |
|
"loss": 0.3718, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.445051727503416, |
|
"grad_norm": 4.67592371180372, |
|
"learning_rate": 5.550901604429931e-06, |
|
"loss": 0.3869, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.445761538871045, |
|
"grad_norm": 4.246040554798665, |
|
"learning_rate": 5.543802356950164e-06, |
|
"loss": 0.3673, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.44647135023867407, |
|
"grad_norm": 5.698576828390134, |
|
"learning_rate": 5.536703109470397e-06, |
|
"loss": 0.3733, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.4471811616063031, |
|
"grad_norm": 4.890818923695549, |
|
"learning_rate": 5.529603861990629e-06, |
|
"loss": 0.3917, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.44789097297393216, |
|
"grad_norm": 3.5954099385229, |
|
"learning_rate": 5.522504614510862e-06, |
|
"loss": 0.387, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.44860078434156125, |
|
"grad_norm": 5.819667912733057, |
|
"learning_rate": 5.515405367031095e-06, |
|
"loss": 0.3772, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.4493105957091903, |
|
"grad_norm": 4.924613328068802, |
|
"learning_rate": 5.508306119551328e-06, |
|
"loss": 0.3691, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.45002040707681934, |
|
"grad_norm": 4.077670226838275, |
|
"learning_rate": 5.50120687207156e-06, |
|
"loss": 0.3606, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.4507302184444484, |
|
"grad_norm": 4.7425966011878815, |
|
"learning_rate": 5.494107624591793e-06, |
|
"loss": 0.3712, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.4514400298120774, |
|
"grad_norm": 3.7724063921848, |
|
"learning_rate": 5.487008377112026e-06, |
|
"loss": 0.3707, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.45214984117970647, |
|
"grad_norm": 2.8597041255348183, |
|
"learning_rate": 5.4799091296322595e-06, |
|
"loss": 0.364, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.45285965254733557, |
|
"grad_norm": 5.386440052681094, |
|
"learning_rate": 5.472809882152493e-06, |
|
"loss": 0.3785, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.4535694639149646, |
|
"grad_norm": 4.20147189666546, |
|
"learning_rate": 5.465710634672725e-06, |
|
"loss": 0.384, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.45427927528259365, |
|
"grad_norm": 5.4360613411555185, |
|
"learning_rate": 5.458611387192958e-06, |
|
"loss": 0.3676, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.4549890866502227, |
|
"grad_norm": 7.4543272167324846, |
|
"learning_rate": 5.451512139713191e-06, |
|
"loss": 0.3973, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.45569889801785174, |
|
"grad_norm": 5.302161729787796, |
|
"learning_rate": 5.444412892233424e-06, |
|
"loss": 0.3878, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.4564087093854808, |
|
"grad_norm": 4.774927845954586, |
|
"learning_rate": 5.437313644753656e-06, |
|
"loss": 0.368, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.4571185207531099, |
|
"grad_norm": 4.733108202290537, |
|
"learning_rate": 5.4302143972738895e-06, |
|
"loss": 0.3841, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.4578283321207389, |
|
"grad_norm": 4.581655513075473, |
|
"learning_rate": 5.423115149794123e-06, |
|
"loss": 0.3805, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.45853814348836797, |
|
"grad_norm": 2.4364404744853445, |
|
"learning_rate": 5.416015902314356e-06, |
|
"loss": 0.3587, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.459247954855997, |
|
"grad_norm": 5.16394378928267, |
|
"learning_rate": 5.408916654834588e-06, |
|
"loss": 0.3793, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 0.45995776622362605, |
|
"grad_norm": 8.232574335670192, |
|
"learning_rate": 5.401817407354821e-06, |
|
"loss": 0.3794, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.46066757759125515, |
|
"grad_norm": 10.509485180483269, |
|
"learning_rate": 5.394718159875054e-06, |
|
"loss": 0.3742, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 0.4613773889588842, |
|
"grad_norm": 3.418180521754276, |
|
"learning_rate": 5.387618912395286e-06, |
|
"loss": 0.3733, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.46208720032651324, |
|
"grad_norm": 4.2689703556593495, |
|
"learning_rate": 5.380519664915519e-06, |
|
"loss": 0.374, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.4627970116941423, |
|
"grad_norm": 7.896842999549548, |
|
"learning_rate": 5.373420417435752e-06, |
|
"loss": 0.3799, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.4635068230617713, |
|
"grad_norm": 3.4870838077093893, |
|
"learning_rate": 5.366321169955985e-06, |
|
"loss": 0.3712, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 0.46421663442940037, |
|
"grad_norm": 27.778526824166995, |
|
"learning_rate": 5.359221922476218e-06, |
|
"loss": 0.3655, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.46492644579702946, |
|
"grad_norm": 16.796202092439216, |
|
"learning_rate": 5.35212267499645e-06, |
|
"loss": 0.3846, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.4656362571646585, |
|
"grad_norm": 5.698856930659158, |
|
"learning_rate": 5.345023427516683e-06, |
|
"loss": 0.3877, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.46634606853228755, |
|
"grad_norm": 8.694016798434083, |
|
"learning_rate": 5.3379241800369165e-06, |
|
"loss": 0.3607, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 0.4670558798999166, |
|
"grad_norm": 3.617969654098083, |
|
"learning_rate": 5.3308249325571496e-06, |
|
"loss": 0.36, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.46776569126754564, |
|
"grad_norm": 7.181014577384461, |
|
"learning_rate": 5.323725685077382e-06, |
|
"loss": 0.3783, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 0.4684755026351747, |
|
"grad_norm": 9.52331650225055, |
|
"learning_rate": 5.316626437597615e-06, |
|
"loss": 0.3707, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.4691853140028038, |
|
"grad_norm": 5.927560976046885, |
|
"learning_rate": 5.309527190117848e-06, |
|
"loss": 0.3747, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 0.4698951253704328, |
|
"grad_norm": 33.354649195054265, |
|
"learning_rate": 5.302427942638081e-06, |
|
"loss": 0.3622, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.47060493673806186, |
|
"grad_norm": 5.109478632635811, |
|
"learning_rate": 5.295328695158314e-06, |
|
"loss": 0.3702, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 0.4713147481056909, |
|
"grad_norm": 62.14127099005149, |
|
"learning_rate": 5.2882294476785465e-06, |
|
"loss": 0.3718, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.47202455947331995, |
|
"grad_norm": 3.9646315343813674, |
|
"learning_rate": 5.2811302001987796e-06, |
|
"loss": 0.3579, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.472734370840949, |
|
"grad_norm": 5.822229945732986, |
|
"learning_rate": 5.274030952719013e-06, |
|
"loss": 0.358, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.4734441822085781, |
|
"grad_norm": 3.0706990453586607, |
|
"learning_rate": 5.266931705239246e-06, |
|
"loss": 0.3712, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 0.47415399357620713, |
|
"grad_norm": 2.763541771977754, |
|
"learning_rate": 5.259832457759478e-06, |
|
"loss": 0.3862, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.4748638049438362, |
|
"grad_norm": 2.8054880505902746, |
|
"learning_rate": 5.252733210279711e-06, |
|
"loss": 0.3609, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 0.4755736163114652, |
|
"grad_norm": 3.5455500616555864, |
|
"learning_rate": 5.245633962799943e-06, |
|
"loss": 0.3845, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.47628342767909426, |
|
"grad_norm": 6.871049315984216, |
|
"learning_rate": 5.238534715320176e-06, |
|
"loss": 0.3681, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 0.47699323904672336, |
|
"grad_norm": 4.626136895991325, |
|
"learning_rate": 5.231435467840409e-06, |
|
"loss": 0.3694, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.4777030504143524, |
|
"grad_norm": 4.1689737774582385, |
|
"learning_rate": 5.224336220360642e-06, |
|
"loss": 0.3722, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 0.47841286178198145, |
|
"grad_norm": 2.345831388882716, |
|
"learning_rate": 5.217236972880875e-06, |
|
"loss": 0.3778, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.4791226731496105, |
|
"grad_norm": 5.181993551246977, |
|
"learning_rate": 5.210137725401107e-06, |
|
"loss": 0.3649, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.47983248451723953, |
|
"grad_norm": 4.144025528380454, |
|
"learning_rate": 5.20303847792134e-06, |
|
"loss": 0.3854, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.4805422958848686, |
|
"grad_norm": 4.0013049178877536, |
|
"learning_rate": 5.195939230441573e-06, |
|
"loss": 0.3832, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 0.4812521072524977, |
|
"grad_norm": 4.375334224867565, |
|
"learning_rate": 5.1888399829618065e-06, |
|
"loss": 0.3678, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.4819619186201267, |
|
"grad_norm": 2.8158913555106926, |
|
"learning_rate": 5.18174073548204e-06, |
|
"loss": 0.3735, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 0.48267172998775576, |
|
"grad_norm": 4.286259213586135, |
|
"learning_rate": 5.174641488002272e-06, |
|
"loss": 0.3824, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.4833815413553848, |
|
"grad_norm": 2.917255310557774, |
|
"learning_rate": 5.167542240522505e-06, |
|
"loss": 0.367, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 0.48409135272301385, |
|
"grad_norm": 2.9474809991081194, |
|
"learning_rate": 5.160442993042738e-06, |
|
"loss": 0.37, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.4848011640906429, |
|
"grad_norm": 8.0892973566849, |
|
"learning_rate": 5.153343745562971e-06, |
|
"loss": 0.385, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 0.485510975458272, |
|
"grad_norm": 5.46237208189901, |
|
"learning_rate": 5.1462444980832034e-06, |
|
"loss": 0.3723, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.48622078682590103, |
|
"grad_norm": 4.813397707683654, |
|
"learning_rate": 5.1391452506034365e-06, |
|
"loss": 0.3847, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.4869305981935301, |
|
"grad_norm": 3.839632822272105, |
|
"learning_rate": 5.13204600312367e-06, |
|
"loss": 0.3994, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.4876404095611591, |
|
"grad_norm": 2.731217984269613, |
|
"learning_rate": 5.124946755643903e-06, |
|
"loss": 0.3928, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 0.48835022092878816, |
|
"grad_norm": 7.062296596699752, |
|
"learning_rate": 5.117847508164136e-06, |
|
"loss": 0.4141, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.4890600322964172, |
|
"grad_norm": 3.0471865890050034, |
|
"learning_rate": 5.110748260684368e-06, |
|
"loss": 0.3712, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 0.4897698436640463, |
|
"grad_norm": 8.240874357274272, |
|
"learning_rate": 5.103649013204601e-06, |
|
"loss": 0.3828, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.49047965503167534, |
|
"grad_norm": 4.557814239490917, |
|
"learning_rate": 5.0965497657248334e-06, |
|
"loss": 0.3794, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 0.4911894663993044, |
|
"grad_norm": 6.50934729087624, |
|
"learning_rate": 5.089450518245066e-06, |
|
"loss": 0.3655, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.49189927776693343, |
|
"grad_norm": 2.7892154452796696, |
|
"learning_rate": 5.082351270765299e-06, |
|
"loss": 0.3477, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 0.4926090891345625, |
|
"grad_norm": 4.296820022815862, |
|
"learning_rate": 5.075252023285532e-06, |
|
"loss": 0.3917, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.4933189005021915, |
|
"grad_norm": 3.7811542108069514, |
|
"learning_rate": 5.068152775805765e-06, |
|
"loss": 0.3846, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.4940287118698206, |
|
"grad_norm": 12.150770506288081, |
|
"learning_rate": 5.061053528325997e-06, |
|
"loss": 0.3991, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.49473852323744966, |
|
"grad_norm": 8.737862487013935, |
|
"learning_rate": 5.05395428084623e-06, |
|
"loss": 0.376, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 0.4954483346050787, |
|
"grad_norm": 4.705086993153889, |
|
"learning_rate": 5.0468550333664635e-06, |
|
"loss": 0.3774, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.49615814597270774, |
|
"grad_norm": 3.95177864719572, |
|
"learning_rate": 5.0397557858866966e-06, |
|
"loss": 0.3867, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 0.4968679573403368, |
|
"grad_norm": 4.9228476674024995, |
|
"learning_rate": 5.03265653840693e-06, |
|
"loss": 0.3868, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.4975777687079659, |
|
"grad_norm": 7.598944675436029, |
|
"learning_rate": 5.025557290927162e-06, |
|
"loss": 0.3791, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 0.4982875800755949, |
|
"grad_norm": 3.948022335506646, |
|
"learning_rate": 5.018458043447395e-06, |
|
"loss": 0.3878, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.49899739144322397, |
|
"grad_norm": 2.97600555704115, |
|
"learning_rate": 5.011358795967628e-06, |
|
"loss": 0.3891, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 0.499707202810853, |
|
"grad_norm": 7.322058927387839, |
|
"learning_rate": 5.004259548487861e-06, |
|
"loss": 0.3739, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.5004170141784821, |
|
"grad_norm": 4.054563164115399, |
|
"learning_rate": 4.9971603010080935e-06, |
|
"loss": 0.3654, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.5011268255461111, |
|
"grad_norm": 6.433797069878189, |
|
"learning_rate": 4.990061053528326e-06, |
|
"loss": 0.3769, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.5018366369137401, |
|
"grad_norm": 6.244381336548628, |
|
"learning_rate": 4.982961806048559e-06, |
|
"loss": 0.3698, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 0.5025464482813692, |
|
"grad_norm": 4.649812061123292, |
|
"learning_rate": 4.975862558568792e-06, |
|
"loss": 0.3597, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.5032562596489982, |
|
"grad_norm": 13.131635539716475, |
|
"learning_rate": 4.968763311089025e-06, |
|
"loss": 0.3737, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 0.5039660710166274, |
|
"grad_norm": 11.654767208116397, |
|
"learning_rate": 4.961664063609258e-06, |
|
"loss": 0.3809, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.5046758823842564, |
|
"grad_norm": 5.54405844933368, |
|
"learning_rate": 4.95456481612949e-06, |
|
"loss": 0.3668, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 0.5053856937518855, |
|
"grad_norm": 17.63140898183613, |
|
"learning_rate": 4.9474655686497235e-06, |
|
"loss": 0.3751, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.5060955051195145, |
|
"grad_norm": 4.735270750917372, |
|
"learning_rate": 4.940366321169957e-06, |
|
"loss": 0.3759, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.5068053164871436, |
|
"grad_norm": 3.6005983980475214, |
|
"learning_rate": 4.93326707369019e-06, |
|
"loss": 0.3932, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.5075151278547726, |
|
"grad_norm": 5.073652881259414, |
|
"learning_rate": 4.926167826210422e-06, |
|
"loss": 0.3689, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.5082249392224016, |
|
"grad_norm": 6.515311066715168, |
|
"learning_rate": 4.919068578730655e-06, |
|
"loss": 0.3675, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.5089347505900307, |
|
"grad_norm": 12.98913332417653, |
|
"learning_rate": 4.911969331250887e-06, |
|
"loss": 0.3861, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 0.5096445619576597, |
|
"grad_norm": 5.1500756291258005, |
|
"learning_rate": 4.90487008377112e-06, |
|
"loss": 0.3731, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.5103543733252888, |
|
"grad_norm": 5.833801547579832, |
|
"learning_rate": 4.8977708362913535e-06, |
|
"loss": 0.3831, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 0.5110641846929178, |
|
"grad_norm": 8.343761477251691, |
|
"learning_rate": 4.890671588811587e-06, |
|
"loss": 0.3716, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.511773996060547, |
|
"grad_norm": 6.740845613760958, |
|
"learning_rate": 4.883572341331819e-06, |
|
"loss": 0.377, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 0.512483807428176, |
|
"grad_norm": 6.834960096187304, |
|
"learning_rate": 4.876473093852052e-06, |
|
"loss": 0.3774, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.513193618795805, |
|
"grad_norm": 6.333904565562881, |
|
"learning_rate": 4.869373846372285e-06, |
|
"loss": 0.3786, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 0.5139034301634341, |
|
"grad_norm": 7.380378873059882, |
|
"learning_rate": 4.862274598892518e-06, |
|
"loss": 0.3641, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.5146132415310631, |
|
"grad_norm": 8.15711157363267, |
|
"learning_rate": 4.855175351412751e-06, |
|
"loss": 0.354, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.5153230528986922, |
|
"grad_norm": 5.298194233144714, |
|
"learning_rate": 4.8480761039329835e-06, |
|
"loss": 0.3648, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.5160328642663212, |
|
"grad_norm": 6.169565228174972, |
|
"learning_rate": 4.840976856453216e-06, |
|
"loss": 0.3606, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 0.5167426756339503, |
|
"grad_norm": 4.633952354333419, |
|
"learning_rate": 4.833877608973449e-06, |
|
"loss": 0.3627, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.5174524870015793, |
|
"grad_norm": 7.754370375548218, |
|
"learning_rate": 4.826778361493682e-06, |
|
"loss": 0.384, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 0.5181622983692084, |
|
"grad_norm": 4.628647672477682, |
|
"learning_rate": 4.819679114013915e-06, |
|
"loss": 0.3717, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.5188721097368374, |
|
"grad_norm": 4.6108119740619165, |
|
"learning_rate": 4.812579866534147e-06, |
|
"loss": 0.3531, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 0.5195819211044664, |
|
"grad_norm": 3.777480319775288, |
|
"learning_rate": 4.8054806190543805e-06, |
|
"loss": 0.3735, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.5202917324720956, |
|
"grad_norm": 6.455151414772601, |
|
"learning_rate": 4.7983813715746136e-06, |
|
"loss": 0.3845, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 0.5210015438397246, |
|
"grad_norm": 5.0016880570007, |
|
"learning_rate": 4.791282124094847e-06, |
|
"loss": 0.3588, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.5217113552073537, |
|
"grad_norm": 3.596195253014758, |
|
"learning_rate": 4.78418287661508e-06, |
|
"loss": 0.3664, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.5224211665749827, |
|
"grad_norm": 4.6111563525428005, |
|
"learning_rate": 4.777083629135312e-06, |
|
"loss": 0.3815, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.5231309779426118, |
|
"grad_norm": 3.81079107236397, |
|
"learning_rate": 4.769984381655544e-06, |
|
"loss": 0.3603, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 0.5238407893102408, |
|
"grad_norm": 10.081677733455512, |
|
"learning_rate": 4.762885134175777e-06, |
|
"loss": 0.3748, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.5245506006778698, |
|
"grad_norm": 4.011909680570432, |
|
"learning_rate": 4.7557858866960105e-06, |
|
"loss": 0.3736, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 0.5252604120454989, |
|
"grad_norm": 4.008812937992125, |
|
"learning_rate": 4.7486866392162436e-06, |
|
"loss": 0.3718, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.5259702234131279, |
|
"grad_norm": 8.895014071619777, |
|
"learning_rate": 4.741587391736477e-06, |
|
"loss": 0.3747, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 0.526680034780757, |
|
"grad_norm": 2.5646865204368394, |
|
"learning_rate": 4.734488144256709e-06, |
|
"loss": 0.3593, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.527389846148386, |
|
"grad_norm": 2.8583907278858147, |
|
"learning_rate": 4.727388896776942e-06, |
|
"loss": 0.387, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 0.5280996575160151, |
|
"grad_norm": 2.8626323560816296, |
|
"learning_rate": 4.720289649297175e-06, |
|
"loss": 0.3756, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.5288094688836442, |
|
"grad_norm": 7.38191434335366, |
|
"learning_rate": 4.713190401817408e-06, |
|
"loss": 0.3715, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.5295192802512733, |
|
"grad_norm": 3.187699709665762, |
|
"learning_rate": 4.7060911543376405e-06, |
|
"loss": 0.3763, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.5302290916189023, |
|
"grad_norm": 2.2423385405265366, |
|
"learning_rate": 4.698991906857874e-06, |
|
"loss": 0.367, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 0.5309389029865313, |
|
"grad_norm": 3.5525056364166465, |
|
"learning_rate": 4.691892659378106e-06, |
|
"loss": 0.3639, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.5316487143541604, |
|
"grad_norm": 2.5840538292895405, |
|
"learning_rate": 4.684793411898339e-06, |
|
"loss": 0.3713, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 0.5323585257217894, |
|
"grad_norm": 3.6015272776951366, |
|
"learning_rate": 4.677694164418572e-06, |
|
"loss": 0.3672, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.5330683370894185, |
|
"grad_norm": 2.958338857599813, |
|
"learning_rate": 4.670594916938805e-06, |
|
"loss": 0.364, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 0.5337781484570475, |
|
"grad_norm": 2.6780802400700248, |
|
"learning_rate": 4.663495669459037e-06, |
|
"loss": 0.3871, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.5344879598246766, |
|
"grad_norm": 2.141486624042336, |
|
"learning_rate": 4.6563964219792705e-06, |
|
"loss": 0.3918, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 0.5351977711923056, |
|
"grad_norm": 11.627725180923038, |
|
"learning_rate": 4.649297174499504e-06, |
|
"loss": 0.369, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.5359075825599346, |
|
"grad_norm": 2.164302320101156, |
|
"learning_rate": 4.642197927019737e-06, |
|
"loss": 0.3763, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.5366173939275638, |
|
"grad_norm": 2.5355641201406716, |
|
"learning_rate": 4.63509867953997e-06, |
|
"loss": 0.3709, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.5373272052951928, |
|
"grad_norm": 1.7486780225096559, |
|
"learning_rate": 4.627999432060202e-06, |
|
"loss": 0.3778, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 0.5380370166628219, |
|
"grad_norm": 2.1996857828607066, |
|
"learning_rate": 4.620900184580434e-06, |
|
"loss": 0.3878, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.5387468280304509, |
|
"grad_norm": 2.2718302971034325, |
|
"learning_rate": 4.613800937100667e-06, |
|
"loss": 0.3691, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 0.53945663939808, |
|
"grad_norm": 2.247788269458988, |
|
"learning_rate": 4.6067016896209005e-06, |
|
"loss": 0.3764, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.540166450765709, |
|
"grad_norm": 4.951241532022136, |
|
"learning_rate": 4.599602442141134e-06, |
|
"loss": 0.3696, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 0.5408762621333381, |
|
"grad_norm": 18.87723312065313, |
|
"learning_rate": 4.592503194661366e-06, |
|
"loss": 0.3752, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.5415860735009671, |
|
"grad_norm": 4.839150391451601, |
|
"learning_rate": 4.585403947181599e-06, |
|
"loss": 0.3704, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 0.5422958848685961, |
|
"grad_norm": 3.252448644894675, |
|
"learning_rate": 4.578304699701832e-06, |
|
"loss": 0.3662, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.5430056962362252, |
|
"grad_norm": 4.636061450249123, |
|
"learning_rate": 4.571205452222065e-06, |
|
"loss": 0.3695, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.5437155076038542, |
|
"grad_norm": 2.217398025384477, |
|
"learning_rate": 4.564106204742298e-06, |
|
"loss": 0.381, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.5444253189714833, |
|
"grad_norm": 3.2864797627789764, |
|
"learning_rate": 4.5570069572625305e-06, |
|
"loss": 0.3766, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 0.5451351303391124, |
|
"grad_norm": 2.5595280528292346, |
|
"learning_rate": 4.549907709782763e-06, |
|
"loss": 0.3753, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.5458449417067415, |
|
"grad_norm": 3.5869951931087356, |
|
"learning_rate": 4.542808462302996e-06, |
|
"loss": 0.3649, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 0.5465547530743705, |
|
"grad_norm": 2.878804286325741, |
|
"learning_rate": 4.535709214823229e-06, |
|
"loss": 0.365, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.5472645644419996, |
|
"grad_norm": 3.835428702840037, |
|
"learning_rate": 4.528609967343462e-06, |
|
"loss": 0.388, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 0.5479743758096286, |
|
"grad_norm": 3.3115804743584225, |
|
"learning_rate": 4.521510719863695e-06, |
|
"loss": 0.3498, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.5486841871772576, |
|
"grad_norm": 2.155325207710473, |
|
"learning_rate": 4.5144114723839275e-06, |
|
"loss": 0.3638, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 0.5493939985448867, |
|
"grad_norm": 2.9473064158817506, |
|
"learning_rate": 4.5073122249041606e-06, |
|
"loss": 0.3756, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.5501038099125157, |
|
"grad_norm": 3.977038197892431, |
|
"learning_rate": 4.500212977424394e-06, |
|
"loss": 0.3674, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.5508136212801448, |
|
"grad_norm": 5.638630944163406, |
|
"learning_rate": 4.493113729944627e-06, |
|
"loss": 0.3528, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.5515234326477738, |
|
"grad_norm": 2.8534926361264286, |
|
"learning_rate": 4.486014482464859e-06, |
|
"loss": 0.3697, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 0.5522332440154029, |
|
"grad_norm": 6.069502646886042, |
|
"learning_rate": 4.478915234985092e-06, |
|
"loss": 0.3843, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.552943055383032, |
|
"grad_norm": 4.343605351910854, |
|
"learning_rate": 4.471815987505324e-06, |
|
"loss": 0.3783, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 0.553652866750661, |
|
"grad_norm": 2.104465858436518, |
|
"learning_rate": 4.4647167400255575e-06, |
|
"loss": 0.3601, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.5543626781182901, |
|
"grad_norm": 3.0902122663518448, |
|
"learning_rate": 4.457617492545791e-06, |
|
"loss": 0.3801, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 0.5550724894859191, |
|
"grad_norm": 4.573352955842933, |
|
"learning_rate": 4.450518245066024e-06, |
|
"loss": 0.3835, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.5557823008535482, |
|
"grad_norm": 2.9707860507790924, |
|
"learning_rate": 4.443418997586256e-06, |
|
"loss": 0.3709, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 0.5564921122211772, |
|
"grad_norm": 2.5687241689417806, |
|
"learning_rate": 4.436319750106489e-06, |
|
"loss": 0.3835, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.5572019235888063, |
|
"grad_norm": 3.347322471582433, |
|
"learning_rate": 4.429220502626722e-06, |
|
"loss": 0.3735, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.5579117349564353, |
|
"grad_norm": 6.431823861299619, |
|
"learning_rate": 4.422121255146955e-06, |
|
"loss": 0.3704, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.5586215463240644, |
|
"grad_norm": 3.050115422109329, |
|
"learning_rate": 4.4150220076671875e-06, |
|
"loss": 0.3822, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 0.5593313576916934, |
|
"grad_norm": 1.7811591664189523, |
|
"learning_rate": 4.407922760187421e-06, |
|
"loss": 0.3658, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.5600411690593224, |
|
"grad_norm": 3.442846796158278, |
|
"learning_rate": 4.400823512707653e-06, |
|
"loss": 0.3621, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 0.5607509804269515, |
|
"grad_norm": 7.2461896738177, |
|
"learning_rate": 4.393724265227886e-06, |
|
"loss": 0.3526, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.5614607917945806, |
|
"grad_norm": 2.0219408065827875, |
|
"learning_rate": 4.386625017748119e-06, |
|
"loss": 0.3659, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 0.5621706031622097, |
|
"grad_norm": 4.896944413168855, |
|
"learning_rate": 4.379525770268352e-06, |
|
"loss": 0.3765, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.5628804145298387, |
|
"grad_norm": 2.1094695887191848, |
|
"learning_rate": 4.372426522788584e-06, |
|
"loss": 0.3644, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 0.5635902258974678, |
|
"grad_norm": 5.596991296221292, |
|
"learning_rate": 4.3653272753088175e-06, |
|
"loss": 0.3835, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.5643000372650968, |
|
"grad_norm": 2.373450501523087, |
|
"learning_rate": 4.358228027829051e-06, |
|
"loss": 0.3756, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.5650098486327259, |
|
"grad_norm": 4.1947432157390026, |
|
"learning_rate": 4.351128780349284e-06, |
|
"loss": 0.3787, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.5657196600003549, |
|
"grad_norm": 2.921985411820113, |
|
"learning_rate": 4.344029532869517e-06, |
|
"loss": 0.3746, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 0.5664294713679839, |
|
"grad_norm": 13.63904398617421, |
|
"learning_rate": 4.336930285389749e-06, |
|
"loss": 0.3535, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.567139282735613, |
|
"grad_norm": 2.6665592498045037, |
|
"learning_rate": 4.329831037909981e-06, |
|
"loss": 0.3668, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 0.567849094103242, |
|
"grad_norm": 2.7866449972058795, |
|
"learning_rate": 4.3227317904302144e-06, |
|
"loss": 0.3747, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.5685589054708711, |
|
"grad_norm": 2.795372211208224, |
|
"learning_rate": 4.3156325429504475e-06, |
|
"loss": 0.3737, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 0.5692687168385002, |
|
"grad_norm": 2.829992387736084, |
|
"learning_rate": 4.308533295470681e-06, |
|
"loss": 0.3813, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.5699785282061293, |
|
"grad_norm": 3.8835793195310706, |
|
"learning_rate": 4.301434047990914e-06, |
|
"loss": 0.3934, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 0.5706883395737583, |
|
"grad_norm": 2.157944880021205, |
|
"learning_rate": 4.294334800511146e-06, |
|
"loss": 0.3619, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.5713981509413874, |
|
"grad_norm": 2.576031100575868, |
|
"learning_rate": 4.287235553031379e-06, |
|
"loss": 0.3654, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.5721079623090164, |
|
"grad_norm": 2.1013120962560445, |
|
"learning_rate": 4.280136305551612e-06, |
|
"loss": 0.3808, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.5728177736766454, |
|
"grad_norm": 8.72915943640877, |
|
"learning_rate": 4.273037058071845e-06, |
|
"loss": 0.3865, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 0.5735275850442745, |
|
"grad_norm": 3.1373379205439123, |
|
"learning_rate": 4.2659378105920776e-06, |
|
"loss": 0.3631, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.5742373964119035, |
|
"grad_norm": 10.697527972561883, |
|
"learning_rate": 4.258838563112311e-06, |
|
"loss": 0.3597, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 0.5749472077795326, |
|
"grad_norm": 3.6970932139238095, |
|
"learning_rate": 4.251739315632543e-06, |
|
"loss": 0.3635, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.5756570191471616, |
|
"grad_norm": 2.4203467674630206, |
|
"learning_rate": 4.244640068152776e-06, |
|
"loss": 0.359, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 0.5763668305147907, |
|
"grad_norm": 2.9395692807103035, |
|
"learning_rate": 4.237540820673009e-06, |
|
"loss": 0.3603, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.5770766418824197, |
|
"grad_norm": 3.012599979258794, |
|
"learning_rate": 4.230441573193242e-06, |
|
"loss": 0.3568, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 0.5777864532500488, |
|
"grad_norm": 6.667370402568531, |
|
"learning_rate": 4.2233423257134745e-06, |
|
"loss": 0.3629, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.5784962646176779, |
|
"grad_norm": 4.471487834006219, |
|
"learning_rate": 4.2162430782337076e-06, |
|
"loss": 0.3683, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.5792060759853069, |
|
"grad_norm": 3.599804032694662, |
|
"learning_rate": 4.209143830753941e-06, |
|
"loss": 0.3554, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.579915887352936, |
|
"grad_norm": 2.9142466980850985, |
|
"learning_rate": 4.202044583274174e-06, |
|
"loss": 0.3524, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 0.580625698720565, |
|
"grad_norm": 3.8569199714753295, |
|
"learning_rate": 4.194945335794406e-06, |
|
"loss": 0.3663, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.5813355100881941, |
|
"grad_norm": 2.4068975949006077, |
|
"learning_rate": 4.187846088314639e-06, |
|
"loss": 0.3747, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 0.5820453214558231, |
|
"grad_norm": 6.174322801188514, |
|
"learning_rate": 4.180746840834871e-06, |
|
"loss": 0.372, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.5827551328234521, |
|
"grad_norm": 2.888969982284499, |
|
"learning_rate": 4.1736475933551045e-06, |
|
"loss": 0.361, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 0.5834649441910812, |
|
"grad_norm": 4.910093339119916, |
|
"learning_rate": 4.166548345875338e-06, |
|
"loss": 0.3574, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.5841747555587102, |
|
"grad_norm": 5.1058356496999755, |
|
"learning_rate": 4.159449098395571e-06, |
|
"loss": 0.3786, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 0.5848845669263393, |
|
"grad_norm": 14.081326767892058, |
|
"learning_rate": 4.152349850915803e-06, |
|
"loss": 0.3729, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.5855943782939684, |
|
"grad_norm": 4.958684438886047, |
|
"learning_rate": 4.145250603436036e-06, |
|
"loss": 0.3566, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.5863041896615975, |
|
"grad_norm": 3.9438637049329075, |
|
"learning_rate": 4.138151355956269e-06, |
|
"loss": 0.3861, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.5870140010292265, |
|
"grad_norm": 2.9499712942928107, |
|
"learning_rate": 4.131052108476502e-06, |
|
"loss": 0.3439, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 0.5877238123968556, |
|
"grad_norm": 3.332966504823502, |
|
"learning_rate": 4.1239528609967345e-06, |
|
"loss": 0.3788, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.5884336237644846, |
|
"grad_norm": 27.970854056782667, |
|
"learning_rate": 4.116853613516968e-06, |
|
"loss": 0.3591, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 0.5891434351321136, |
|
"grad_norm": 4.487327484061174, |
|
"learning_rate": 4.1097543660372e-06, |
|
"loss": 0.3625, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.5898532464997427, |
|
"grad_norm": 3.8006981727665496, |
|
"learning_rate": 4.102655118557433e-06, |
|
"loss": 0.3709, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 0.5905630578673717, |
|
"grad_norm": 3.463457513521014, |
|
"learning_rate": 4.095555871077666e-06, |
|
"loss": 0.3641, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.5912728692350008, |
|
"grad_norm": 7.640707242523127, |
|
"learning_rate": 4.088456623597899e-06, |
|
"loss": 0.3648, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 0.5919826806026298, |
|
"grad_norm": 2.8614936603096295, |
|
"learning_rate": 4.081357376118132e-06, |
|
"loss": 0.3616, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.5926924919702589, |
|
"grad_norm": 3.296737746561609, |
|
"learning_rate": 4.0742581286383645e-06, |
|
"loss": 0.3808, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.5934023033378879, |
|
"grad_norm": 3.2426352432246976, |
|
"learning_rate": 4.067158881158598e-06, |
|
"loss": 0.3583, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.5941121147055171, |
|
"grad_norm": 3.4522007032736806, |
|
"learning_rate": 4.060059633678831e-06, |
|
"loss": 0.365, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 0.5948219260731461, |
|
"grad_norm": 3.9166457660699145, |
|
"learning_rate": 4.052960386199063e-06, |
|
"loss": 0.3692, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.5955317374407751, |
|
"grad_norm": 2.9039677495535874, |
|
"learning_rate": 4.045861138719296e-06, |
|
"loss": 0.3468, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 0.5962415488084042, |
|
"grad_norm": 3.187977468656372, |
|
"learning_rate": 4.038761891239529e-06, |
|
"loss": 0.359, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.5969513601760332, |
|
"grad_norm": 4.529576318117622, |
|
"learning_rate": 4.0316626437597614e-06, |
|
"loss": 0.3452, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 0.5976611715436623, |
|
"grad_norm": 6.601726345536697, |
|
"learning_rate": 4.0245633962799945e-06, |
|
"loss": 0.3713, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.5983709829112913, |
|
"grad_norm": 2.4278158486667576, |
|
"learning_rate": 4.017464148800228e-06, |
|
"loss": 0.3628, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 0.5990807942789204, |
|
"grad_norm": 2.76630569189727, |
|
"learning_rate": 4.010364901320461e-06, |
|
"loss": 0.3704, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.5997906056465494, |
|
"grad_norm": 6.7843620715556545, |
|
"learning_rate": 4.003265653840693e-06, |
|
"loss": 0.3682, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.6005004170141784, |
|
"grad_norm": 2.9403338895288336, |
|
"learning_rate": 3.996166406360926e-06, |
|
"loss": 0.3608, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.6012102283818075, |
|
"grad_norm": 4.301178222098619, |
|
"learning_rate": 3.989067158881159e-06, |
|
"loss": 0.3595, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 0.6019200397494366, |
|
"grad_norm": 3.0914199152912696, |
|
"learning_rate": 3.981967911401392e-06, |
|
"loss": 0.3718, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.6026298511170657, |
|
"grad_norm": 2.753384437967004, |
|
"learning_rate": 3.9748686639216246e-06, |
|
"loss": 0.3672, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 0.6033396624846947, |
|
"grad_norm": 2.576321546323924, |
|
"learning_rate": 3.967769416441858e-06, |
|
"loss": 0.3706, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.6040494738523238, |
|
"grad_norm": 2.617904283815147, |
|
"learning_rate": 3.96067016896209e-06, |
|
"loss": 0.3539, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 0.6047592852199528, |
|
"grad_norm": 4.862875127190094, |
|
"learning_rate": 3.953570921482323e-06, |
|
"loss": 0.3763, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.6054690965875819, |
|
"grad_norm": 4.741023889550647, |
|
"learning_rate": 3.946471674002556e-06, |
|
"loss": 0.3611, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 0.6061789079552109, |
|
"grad_norm": 6.394478684199079, |
|
"learning_rate": 3.939372426522789e-06, |
|
"loss": 0.3615, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.6068887193228399, |
|
"grad_norm": 4.045100357410319, |
|
"learning_rate": 3.9322731790430215e-06, |
|
"loss": 0.3648, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.607598530690469, |
|
"grad_norm": 3.756852697194425, |
|
"learning_rate": 3.925173931563255e-06, |
|
"loss": 0.3689, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.608308342058098, |
|
"grad_norm": 4.04897373953826, |
|
"learning_rate": 3.918074684083488e-06, |
|
"loss": 0.3644, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 0.6090181534257271, |
|
"grad_norm": 4.036663207362448, |
|
"learning_rate": 3.910975436603721e-06, |
|
"loss": 0.366, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.6097279647933561, |
|
"grad_norm": 4.156260594948616, |
|
"learning_rate": 3.903876189123953e-06, |
|
"loss": 0.3554, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 0.6104377761609853, |
|
"grad_norm": 3.398605568980307, |
|
"learning_rate": 3.896776941644186e-06, |
|
"loss": 0.3717, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.6111475875286143, |
|
"grad_norm": 3.5114677948249065, |
|
"learning_rate": 3.889677694164418e-06, |
|
"loss": 0.3677, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 0.6118573988962434, |
|
"grad_norm": 4.753605099187553, |
|
"learning_rate": 3.8825784466846515e-06, |
|
"loss": 0.3547, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.6125672102638724, |
|
"grad_norm": 3.4243729659259334, |
|
"learning_rate": 3.875479199204885e-06, |
|
"loss": 0.3762, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 0.6132770216315014, |
|
"grad_norm": 5.94912381861312, |
|
"learning_rate": 3.868379951725118e-06, |
|
"loss": 0.359, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.6139868329991305, |
|
"grad_norm": 6.590267176028699, |
|
"learning_rate": 3.861280704245351e-06, |
|
"loss": 0.3758, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.6146966443667595, |
|
"grad_norm": 3.3256854782540497, |
|
"learning_rate": 3.854181456765583e-06, |
|
"loss": 0.3562, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.6154064557343886, |
|
"grad_norm": 3.6453120360212816, |
|
"learning_rate": 3.847082209285816e-06, |
|
"loss": 0.3619, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 0.6161162671020176, |
|
"grad_norm": 13.965716037023453, |
|
"learning_rate": 3.839982961806049e-06, |
|
"loss": 0.3646, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.6168260784696467, |
|
"grad_norm": 7.837860273774759, |
|
"learning_rate": 3.8328837143262815e-06, |
|
"loss": 0.3457, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 0.6175358898372757, |
|
"grad_norm": 4.729547574214101, |
|
"learning_rate": 3.825784466846515e-06, |
|
"loss": 0.3565, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.6182457012049049, |
|
"grad_norm": 2.5619385732076987, |
|
"learning_rate": 3.818685219366748e-06, |
|
"loss": 0.3676, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 0.6189555125725339, |
|
"grad_norm": 6.790019325573497, |
|
"learning_rate": 3.8115859718869804e-06, |
|
"loss": 0.3646, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.6196653239401629, |
|
"grad_norm": 3.3195434105048665, |
|
"learning_rate": 3.804486724407213e-06, |
|
"loss": 0.3575, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 0.620375135307792, |
|
"grad_norm": 3.805294873305076, |
|
"learning_rate": 3.797387476927446e-06, |
|
"loss": 0.3657, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.621084946675421, |
|
"grad_norm": 5.59682650769057, |
|
"learning_rate": 3.790288229447679e-06, |
|
"loss": 0.3609, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.6217947580430501, |
|
"grad_norm": 4.89958212672841, |
|
"learning_rate": 3.783188981967912e-06, |
|
"loss": 0.3669, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.6225045694106791, |
|
"grad_norm": 8.274929479843232, |
|
"learning_rate": 3.7760897344881446e-06, |
|
"loss": 0.3581, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 0.6232143807783082, |
|
"grad_norm": 3.2978821299433445, |
|
"learning_rate": 3.7689904870083777e-06, |
|
"loss": 0.3679, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.6239241921459372, |
|
"grad_norm": 12.435473632592815, |
|
"learning_rate": 3.76189123952861e-06, |
|
"loss": 0.3677, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 0.6246340035135662, |
|
"grad_norm": 4.195421567773733, |
|
"learning_rate": 3.754791992048843e-06, |
|
"loss": 0.3492, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.6253438148811953, |
|
"grad_norm": 4.406904963403177, |
|
"learning_rate": 3.7476927445690758e-06, |
|
"loss": 0.3597, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 0.6260536262488243, |
|
"grad_norm": 4.199730218503971, |
|
"learning_rate": 3.740593497089309e-06, |
|
"loss": 0.3797, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.6267634376164535, |
|
"grad_norm": 3.3446382282646705, |
|
"learning_rate": 3.7334942496095415e-06, |
|
"loss": 0.3638, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 0.6274732489840825, |
|
"grad_norm": 4.862585068251522, |
|
"learning_rate": 3.7263950021297747e-06, |
|
"loss": 0.3573, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.6281830603517116, |
|
"grad_norm": 8.107090011887513, |
|
"learning_rate": 3.7192957546500073e-06, |
|
"loss": 0.3672, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.6288928717193406, |
|
"grad_norm": 4.3962651782052005, |
|
"learning_rate": 3.7121965071702404e-06, |
|
"loss": 0.3412, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 0.6296026830869697, |
|
"grad_norm": 4.6424143973536935, |
|
"learning_rate": 3.705097259690473e-06, |
|
"loss": 0.3667, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 0.6303124944545987, |
|
"grad_norm": 3.840268427443435, |
|
"learning_rate": 3.697998012210706e-06, |
|
"loss": 0.3557, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.6310223058222277, |
|
"grad_norm": 3.6388205049600018, |
|
"learning_rate": 3.6908987647309385e-06, |
|
"loss": 0.3631, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 0.6317321171898568, |
|
"grad_norm": 5.233530712843461, |
|
"learning_rate": 3.6837995172511716e-06, |
|
"loss": 0.3648, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.6324419285574858, |
|
"grad_norm": 3.781452701492992, |
|
"learning_rate": 3.6767002697714042e-06, |
|
"loss": 0.3788, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 0.6331517399251149, |
|
"grad_norm": 6.068345043524154, |
|
"learning_rate": 3.6696010222916373e-06, |
|
"loss": 0.3566, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 0.6338615512927439, |
|
"grad_norm": 5.599734595118006, |
|
"learning_rate": 3.66250177481187e-06, |
|
"loss": 0.349, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 0.634571362660373, |
|
"grad_norm": 10.428150341049763, |
|
"learning_rate": 3.655402527332103e-06, |
|
"loss": 0.3584, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 0.6352811740280021, |
|
"grad_norm": 17.681698800577582, |
|
"learning_rate": 3.648303279852336e-06, |
|
"loss": 0.3458, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.6359909853956311, |
|
"grad_norm": 6.591627899287575, |
|
"learning_rate": 3.641204032372569e-06, |
|
"loss": 0.3643, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.6367007967632602, |
|
"grad_norm": 31.04186356298661, |
|
"learning_rate": 3.634104784892802e-06, |
|
"loss": 0.3577, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 0.6374106081308892, |
|
"grad_norm": 8.824274787999325, |
|
"learning_rate": 3.6270055374130347e-06, |
|
"loss": 0.3618, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 0.6381204194985183, |
|
"grad_norm": 4.7185603252655826, |
|
"learning_rate": 3.619906289933267e-06, |
|
"loss": 0.3598, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 0.6388302308661473, |
|
"grad_norm": 5.394376788444082, |
|
"learning_rate": 3.6128070424535e-06, |
|
"loss": 0.362, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.6395400422337764, |
|
"grad_norm": 7.158347387403476, |
|
"learning_rate": 3.6057077949737327e-06, |
|
"loss": 0.3694, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 0.6402498536014054, |
|
"grad_norm": 8.033101525768098, |
|
"learning_rate": 3.598608547493966e-06, |
|
"loss": 0.3626, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 0.6409596649690344, |
|
"grad_norm": 2.7105647455701667, |
|
"learning_rate": 3.591509300014199e-06, |
|
"loss": 0.3462, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 0.6416694763366635, |
|
"grad_norm": 6.3548259889750955, |
|
"learning_rate": 3.5844100525344316e-06, |
|
"loss": 0.3632, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 0.6423792877042925, |
|
"grad_norm": 7.341190059846113, |
|
"learning_rate": 3.5773108050546647e-06, |
|
"loss": 0.3653, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.6430890990719217, |
|
"grad_norm": 3.8869033025489723, |
|
"learning_rate": 3.5702115575748974e-06, |
|
"loss": 0.3412, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 0.6437989104395507, |
|
"grad_norm": 4.918908181105817, |
|
"learning_rate": 3.5631123100951305e-06, |
|
"loss": 0.3616, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 0.6445087218071798, |
|
"grad_norm": 6.124064792410853, |
|
"learning_rate": 3.556013062615363e-06, |
|
"loss": 0.3585, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 0.6452185331748088, |
|
"grad_norm": 3.6806357015000764, |
|
"learning_rate": 3.5489138151355963e-06, |
|
"loss": 0.3668, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 0.6459283445424379, |
|
"grad_norm": 5.193254667513745, |
|
"learning_rate": 3.5418145676558285e-06, |
|
"loss": 0.3669, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.6466381559100669, |
|
"grad_norm": 10.978524486328482, |
|
"learning_rate": 3.5347153201760616e-06, |
|
"loss": 0.3597, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 0.647347967277696, |
|
"grad_norm": 4.6611361687349175, |
|
"learning_rate": 3.5276160726962943e-06, |
|
"loss": 0.3695, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.648057778645325, |
|
"grad_norm": 5.205492428214056, |
|
"learning_rate": 3.5205168252165274e-06, |
|
"loss": 0.3663, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 0.648767590012954, |
|
"grad_norm": 5.139991204646184, |
|
"learning_rate": 3.51341757773676e-06, |
|
"loss": 0.3551, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 0.6494774013805831, |
|
"grad_norm": 16.35255401640736, |
|
"learning_rate": 3.506318330256993e-06, |
|
"loss": 0.3553, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.6501872127482121, |
|
"grad_norm": 10.145378264655722, |
|
"learning_rate": 3.499219082777226e-06, |
|
"loss": 0.3583, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 0.6508970241158412, |
|
"grad_norm": 24.878144093372033, |
|
"learning_rate": 3.492119835297459e-06, |
|
"loss": 0.3555, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 0.6516068354834703, |
|
"grad_norm": 3.902743241561423, |
|
"learning_rate": 3.4850205878176916e-06, |
|
"loss": 0.3723, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 0.6523166468510994, |
|
"grad_norm": 4.458085439514939, |
|
"learning_rate": 3.4779213403379247e-06, |
|
"loss": 0.3701, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 0.6530264582187284, |
|
"grad_norm": 4.717552266761064, |
|
"learning_rate": 3.470822092858157e-06, |
|
"loss": 0.3618, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.6537362695863574, |
|
"grad_norm": 4.427364622798698, |
|
"learning_rate": 3.46372284537839e-06, |
|
"loss": 0.3614, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 0.6544460809539865, |
|
"grad_norm": 8.323851654330221, |
|
"learning_rate": 3.4566235978986228e-06, |
|
"loss": 0.3678, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 0.6551558923216155, |
|
"grad_norm": 4.966094347637934, |
|
"learning_rate": 3.449524350418856e-06, |
|
"loss": 0.3688, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 0.6558657036892446, |
|
"grad_norm": 4.930577227679058, |
|
"learning_rate": 3.4424251029390886e-06, |
|
"loss": 0.3503, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 0.6565755150568736, |
|
"grad_norm": 5.52399635730182, |
|
"learning_rate": 3.4353258554593217e-06, |
|
"loss": 0.3696, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.6572853264245027, |
|
"grad_norm": 4.590670373221129, |
|
"learning_rate": 3.4282266079795543e-06, |
|
"loss": 0.3685, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 0.6579951377921317, |
|
"grad_norm": 8.264828163926657, |
|
"learning_rate": 3.4211273604997874e-06, |
|
"loss": 0.3575, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 0.6587049491597607, |
|
"grad_norm": 8.133262914973033, |
|
"learning_rate": 3.4140281130200205e-06, |
|
"loss": 0.3713, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.6594147605273899, |
|
"grad_norm": 5.742760195932282, |
|
"learning_rate": 3.4069288655402532e-06, |
|
"loss": 0.3725, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 0.6601245718950189, |
|
"grad_norm": 8.53035579823295, |
|
"learning_rate": 3.3998296180604855e-06, |
|
"loss": 0.3599, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.660834383262648, |
|
"grad_norm": 4.142002947123207, |
|
"learning_rate": 3.3927303705807186e-06, |
|
"loss": 0.3661, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 0.661544194630277, |
|
"grad_norm": 6.246166093324293, |
|
"learning_rate": 3.3856311231009513e-06, |
|
"loss": 0.351, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 0.6622540059979061, |
|
"grad_norm": 16.243950855343193, |
|
"learning_rate": 3.3785318756211844e-06, |
|
"loss": 0.3479, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 0.6629638173655351, |
|
"grad_norm": 6.147144910165458, |
|
"learning_rate": 3.3714326281414175e-06, |
|
"loss": 0.3543, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 0.6636736287331642, |
|
"grad_norm": 4.099934401177817, |
|
"learning_rate": 3.36433338066165e-06, |
|
"loss": 0.3636, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.6643834401007932, |
|
"grad_norm": 4.17019707869721, |
|
"learning_rate": 3.3572341331818832e-06, |
|
"loss": 0.351, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 0.6650932514684222, |
|
"grad_norm": 4.102146778496878, |
|
"learning_rate": 3.350134885702116e-06, |
|
"loss": 0.3737, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 0.6658030628360513, |
|
"grad_norm": 4.155164161456904, |
|
"learning_rate": 3.343035638222349e-06, |
|
"loss": 0.3505, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 0.6665128742036803, |
|
"grad_norm": 4.042739251178277, |
|
"learning_rate": 3.3359363907425817e-06, |
|
"loss": 0.3578, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 0.6672226855713094, |
|
"grad_norm": 3.4724621327513057, |
|
"learning_rate": 3.328837143262814e-06, |
|
"loss": 0.3733, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.6679324969389385, |
|
"grad_norm": 3.284294254497063, |
|
"learning_rate": 3.321737895783047e-06, |
|
"loss": 0.361, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 0.6686423083065676, |
|
"grad_norm": 5.224665667041366, |
|
"learning_rate": 3.31463864830328e-06, |
|
"loss": 0.3597, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 0.6693521196741966, |
|
"grad_norm": 13.317891191179472, |
|
"learning_rate": 3.307539400823513e-06, |
|
"loss": 0.36, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 0.6700619310418257, |
|
"grad_norm": 8.338179465785696, |
|
"learning_rate": 3.300440153343746e-06, |
|
"loss": 0.3708, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 0.6707717424094547, |
|
"grad_norm": 4.022884248031831, |
|
"learning_rate": 3.2933409058639786e-06, |
|
"loss": 0.357, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.6714815537770837, |
|
"grad_norm": 2.816929350582557, |
|
"learning_rate": 3.2862416583842117e-06, |
|
"loss": 0.3618, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 0.6721913651447128, |
|
"grad_norm": 3.2609706893982278, |
|
"learning_rate": 3.2791424109044444e-06, |
|
"loss": 0.3566, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 0.6729011765123418, |
|
"grad_norm": 2.0212043627509177, |
|
"learning_rate": 3.2720431634246775e-06, |
|
"loss": 0.3631, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 0.6736109878799709, |
|
"grad_norm": 3.472359881135022, |
|
"learning_rate": 3.26494391594491e-06, |
|
"loss": 0.3465, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 0.6743207992475999, |
|
"grad_norm": 2.365708920981696, |
|
"learning_rate": 3.257844668465143e-06, |
|
"loss": 0.36, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.675030610615229, |
|
"grad_norm": 6.47059083775482, |
|
"learning_rate": 3.2507454209853755e-06, |
|
"loss": 0.3589, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 0.6757404219828581, |
|
"grad_norm": 2.9761715896390872, |
|
"learning_rate": 3.2436461735056086e-06, |
|
"loss": 0.3737, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 0.6764502333504872, |
|
"grad_norm": 3.2920710102385375, |
|
"learning_rate": 3.2365469260258413e-06, |
|
"loss": 0.3631, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 0.6771600447181162, |
|
"grad_norm": 2.24517655258034, |
|
"learning_rate": 3.2294476785460744e-06, |
|
"loss": 0.3565, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 0.6778698560857452, |
|
"grad_norm": 4.585199424065417, |
|
"learning_rate": 3.222348431066307e-06, |
|
"loss": 0.3587, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.6785796674533743, |
|
"grad_norm": 2.616245813772314, |
|
"learning_rate": 3.21524918358654e-06, |
|
"loss": 0.3641, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 0.6792894788210033, |
|
"grad_norm": 6.790868775160296, |
|
"learning_rate": 3.208149936106773e-06, |
|
"loss": 0.3542, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 0.6799992901886324, |
|
"grad_norm": 4.6720875235574955, |
|
"learning_rate": 3.201050688627006e-06, |
|
"loss": 0.3724, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 0.6807091015562614, |
|
"grad_norm": 2.929891653919803, |
|
"learning_rate": 3.193951441147239e-06, |
|
"loss": 0.355, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 0.6814189129238905, |
|
"grad_norm": 2.5935885874594935, |
|
"learning_rate": 3.1868521936674717e-06, |
|
"loss": 0.3477, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.6821287242915195, |
|
"grad_norm": 4.16743323358689, |
|
"learning_rate": 3.179752946187704e-06, |
|
"loss": 0.3732, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 0.6828385356591485, |
|
"grad_norm": 3.119963047712144, |
|
"learning_rate": 3.172653698707937e-06, |
|
"loss": 0.3583, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 0.6835483470267776, |
|
"grad_norm": 4.025619816942283, |
|
"learning_rate": 3.1655544512281698e-06, |
|
"loss": 0.3814, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 0.6842581583944067, |
|
"grad_norm": 10.60216606667068, |
|
"learning_rate": 3.158455203748403e-06, |
|
"loss": 0.3599, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 0.6849679697620358, |
|
"grad_norm": 4.461108822226996, |
|
"learning_rate": 3.1513559562686356e-06, |
|
"loss": 0.3619, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.6856777811296648, |
|
"grad_norm": 2.7381838956818596, |
|
"learning_rate": 3.1442567087888687e-06, |
|
"loss": 0.361, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 0.6863875924972939, |
|
"grad_norm": 3.3932603213636536, |
|
"learning_rate": 3.1371574613091018e-06, |
|
"loss": 0.3722, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 0.6870974038649229, |
|
"grad_norm": 3.0238463961256556, |
|
"learning_rate": 3.1300582138293344e-06, |
|
"loss": 0.3677, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 0.687807215232552, |
|
"grad_norm": 2.9020326019536236, |
|
"learning_rate": 3.1229589663495675e-06, |
|
"loss": 0.3587, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 0.688517026600181, |
|
"grad_norm": 3.4182793620767313, |
|
"learning_rate": 3.1158597188698002e-06, |
|
"loss": 0.3958, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.68922683796781, |
|
"grad_norm": 2.7346693208831123, |
|
"learning_rate": 3.1087604713900325e-06, |
|
"loss": 0.3746, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 0.6899366493354391, |
|
"grad_norm": 2.7001110030197184, |
|
"learning_rate": 3.1016612239102656e-06, |
|
"loss": 0.3596, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 0.6906464607030681, |
|
"grad_norm": 3.8786526590857706, |
|
"learning_rate": 3.0945619764304987e-06, |
|
"loss": 0.3677, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 0.6913562720706972, |
|
"grad_norm": 3.601819125137747, |
|
"learning_rate": 3.0874627289507314e-06, |
|
"loss": 0.3599, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 0.6920660834383263, |
|
"grad_norm": 4.257577712986774, |
|
"learning_rate": 3.0803634814709645e-06, |
|
"loss": 0.3653, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.6927758948059554, |
|
"grad_norm": 16.2562479732823, |
|
"learning_rate": 3.073264233991197e-06, |
|
"loss": 0.3786, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 0.6934857061735844, |
|
"grad_norm": 2.8308341290836037, |
|
"learning_rate": 3.0661649865114302e-06, |
|
"loss": 0.347, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 0.6941955175412134, |
|
"grad_norm": 2.386467475595729, |
|
"learning_rate": 3.059065739031663e-06, |
|
"loss": 0.3785, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 0.6949053289088425, |
|
"grad_norm": 3.11594441686047, |
|
"learning_rate": 3.051966491551896e-06, |
|
"loss": 0.3613, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 0.6956151402764715, |
|
"grad_norm": 3.4457140851193677, |
|
"learning_rate": 3.0448672440721287e-06, |
|
"loss": 0.3592, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.6963249516441006, |
|
"grad_norm": 6.7733834909511135, |
|
"learning_rate": 3.0377679965923614e-06, |
|
"loss": 0.3503, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 0.6970347630117296, |
|
"grad_norm": 2.552293405448118, |
|
"learning_rate": 3.030668749112594e-06, |
|
"loss": 0.3565, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 0.6977445743793587, |
|
"grad_norm": 7.3573968999972985, |
|
"learning_rate": 3.023569501632827e-06, |
|
"loss": 0.3534, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 0.6984543857469877, |
|
"grad_norm": 2.2835556419626286, |
|
"learning_rate": 3.01647025415306e-06, |
|
"loss": 0.3627, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 0.6991641971146167, |
|
"grad_norm": 4.158935806681915, |
|
"learning_rate": 3.009371006673293e-06, |
|
"loss": 0.3676, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.6998740084822458, |
|
"grad_norm": 3.444386024390724, |
|
"learning_rate": 3.0022717591935256e-06, |
|
"loss": 0.3498, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 0.700583819849875, |
|
"grad_norm": 76.68033690471103, |
|
"learning_rate": 2.9951725117137587e-06, |
|
"loss": 0.3465, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 0.701293631217504, |
|
"grad_norm": 2.753848553217651, |
|
"learning_rate": 2.9880732642339914e-06, |
|
"loss": 0.3579, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 0.702003442585133, |
|
"grad_norm": 6.8770901385155465, |
|
"learning_rate": 2.9809740167542245e-06, |
|
"loss": 0.3644, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 0.7027132539527621, |
|
"grad_norm": 8.050770443325867, |
|
"learning_rate": 2.9738747692744576e-06, |
|
"loss": 0.3534, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.7034230653203911, |
|
"grad_norm": 6.2381173840397794, |
|
"learning_rate": 2.96677552179469e-06, |
|
"loss": 0.3799, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 0.7041328766880202, |
|
"grad_norm": 2.527197221067041, |
|
"learning_rate": 2.9596762743149225e-06, |
|
"loss": 0.3702, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.7048426880556492, |
|
"grad_norm": 3.365675129758323, |
|
"learning_rate": 2.9525770268351556e-06, |
|
"loss": 0.3618, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 0.7055524994232782, |
|
"grad_norm": 3.7307831294643323, |
|
"learning_rate": 2.9454777793553883e-06, |
|
"loss": 0.3552, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 0.7062623107909073, |
|
"grad_norm": 10.13055799757591, |
|
"learning_rate": 2.9383785318756214e-06, |
|
"loss": 0.369, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.7069721221585363, |
|
"grad_norm": 3.79159989826404, |
|
"learning_rate": 2.931279284395854e-06, |
|
"loss": 0.3393, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 0.7076819335261654, |
|
"grad_norm": 11.361319554472407, |
|
"learning_rate": 2.924180036916087e-06, |
|
"loss": 0.3726, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 0.7083917448937945, |
|
"grad_norm": 2.2727709813242, |
|
"learning_rate": 2.9170807894363203e-06, |
|
"loss": 0.3558, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 0.7091015562614236, |
|
"grad_norm": 13.54783288221351, |
|
"learning_rate": 2.909981541956553e-06, |
|
"loss": 0.3522, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 0.7098113676290526, |
|
"grad_norm": 3.4738198913190037, |
|
"learning_rate": 2.902882294476786e-06, |
|
"loss": 0.3636, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.7105211789966817, |
|
"grad_norm": 2.599196507580769, |
|
"learning_rate": 2.8957830469970183e-06, |
|
"loss": 0.373, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 0.7112309903643107, |
|
"grad_norm": 4.846340487255633, |
|
"learning_rate": 2.888683799517251e-06, |
|
"loss": 0.364, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 0.7119408017319397, |
|
"grad_norm": 4.14481835106229, |
|
"learning_rate": 2.881584552037484e-06, |
|
"loss": 0.3565, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 0.7126506130995688, |
|
"grad_norm": 3.12959687042078, |
|
"learning_rate": 2.8744853045577172e-06, |
|
"loss": 0.3597, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 0.7133604244671978, |
|
"grad_norm": 2.0499607045489157, |
|
"learning_rate": 2.86738605707795e-06, |
|
"loss": 0.3665, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.7140702358348269, |
|
"grad_norm": 3.4345739303394964, |
|
"learning_rate": 2.860286809598183e-06, |
|
"loss": 0.3406, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 0.7147800472024559, |
|
"grad_norm": 3.2507549549593677, |
|
"learning_rate": 2.8531875621184157e-06, |
|
"loss": 0.3691, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 0.715489858570085, |
|
"grad_norm": 3.088999571380729, |
|
"learning_rate": 2.8460883146386488e-06, |
|
"loss": 0.3512, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 0.716199669937714, |
|
"grad_norm": 3.992697102415428, |
|
"learning_rate": 2.8389890671588815e-06, |
|
"loss": 0.3584, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 0.7169094813053432, |
|
"grad_norm": 8.327520697203159, |
|
"learning_rate": 2.8318898196791146e-06, |
|
"loss": 0.3604, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.7176192926729722, |
|
"grad_norm": 4.600972082353797, |
|
"learning_rate": 2.824790572199347e-06, |
|
"loss": 0.3641, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 0.7183291040406012, |
|
"grad_norm": 3.6403983429872384, |
|
"learning_rate": 2.81769132471958e-06, |
|
"loss": 0.3496, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 0.7190389154082303, |
|
"grad_norm": 2.831902492470625, |
|
"learning_rate": 2.8105920772398126e-06, |
|
"loss": 0.3611, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 0.7197487267758593, |
|
"grad_norm": 4.428260390842955, |
|
"learning_rate": 2.8034928297600457e-06, |
|
"loss": 0.3572, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 0.7204585381434884, |
|
"grad_norm": 5.5528766539260825, |
|
"learning_rate": 2.7963935822802784e-06, |
|
"loss": 0.3605, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.7211683495111174, |
|
"grad_norm": 3.3271150324051124, |
|
"learning_rate": 2.7892943348005115e-06, |
|
"loss": 0.3646, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 0.7218781608787465, |
|
"grad_norm": 4.353636452465487, |
|
"learning_rate": 2.782195087320744e-06, |
|
"loss": 0.3745, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 0.7225879722463755, |
|
"grad_norm": 4.938483709090633, |
|
"learning_rate": 2.7750958398409773e-06, |
|
"loss": 0.3586, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 0.7232977836140045, |
|
"grad_norm": 4.667393928494558, |
|
"learning_rate": 2.76799659236121e-06, |
|
"loss": 0.3526, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 0.7240075949816336, |
|
"grad_norm": 5.312814121573459, |
|
"learning_rate": 2.760897344881443e-06, |
|
"loss": 0.3539, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.7247174063492627, |
|
"grad_norm": 3.102848391211554, |
|
"learning_rate": 2.7537980974016757e-06, |
|
"loss": 0.3453, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 0.7254272177168918, |
|
"grad_norm": 3.036840145081599, |
|
"learning_rate": 2.7466988499219084e-06, |
|
"loss": 0.3627, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 0.7261370290845208, |
|
"grad_norm": 5.647990352632265, |
|
"learning_rate": 2.739599602442141e-06, |
|
"loss": 0.3555, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 0.7268468404521499, |
|
"grad_norm": 4.66342024342857, |
|
"learning_rate": 2.732500354962374e-06, |
|
"loss": 0.3722, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.7275566518197789, |
|
"grad_norm": 3.168307885423117, |
|
"learning_rate": 2.725401107482607e-06, |
|
"loss": 0.3673, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.728266463187408, |
|
"grad_norm": 4.968172759395676, |
|
"learning_rate": 2.71830186000284e-06, |
|
"loss": 0.3556, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 0.728976274555037, |
|
"grad_norm": 3.5154935991341123, |
|
"learning_rate": 2.7112026125230726e-06, |
|
"loss": 0.3593, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 0.729686085922666, |
|
"grad_norm": 5.0083468168620655, |
|
"learning_rate": 2.7041033650433057e-06, |
|
"loss": 0.3592, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 0.7303958972902951, |
|
"grad_norm": 3.379094612224907, |
|
"learning_rate": 2.697004117563539e-06, |
|
"loss": 0.3643, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 0.7311057086579241, |
|
"grad_norm": 4.180270451928424, |
|
"learning_rate": 2.6899048700837715e-06, |
|
"loss": 0.3574, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.7318155200255532, |
|
"grad_norm": 4.640198570927561, |
|
"learning_rate": 2.6828056226040046e-06, |
|
"loss": 0.3578, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 0.7325253313931822, |
|
"grad_norm": 10.365125402351024, |
|
"learning_rate": 2.675706375124237e-06, |
|
"loss": 0.3614, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 0.7332351427608114, |
|
"grad_norm": 15.355341780635097, |
|
"learning_rate": 2.6686071276444695e-06, |
|
"loss": 0.3631, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 0.7339449541284404, |
|
"grad_norm": 6.738981517513828, |
|
"learning_rate": 2.6615078801647026e-06, |
|
"loss": 0.3493, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 0.7346547654960695, |
|
"grad_norm": 7.55570609393924, |
|
"learning_rate": 2.6544086326849357e-06, |
|
"loss": 0.371, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.7353645768636985, |
|
"grad_norm": 2.6482961979611526, |
|
"learning_rate": 2.6473093852051684e-06, |
|
"loss": 0.3591, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 0.7360743882313275, |
|
"grad_norm": 8.054548870993123, |
|
"learning_rate": 2.6402101377254015e-06, |
|
"loss": 0.3577, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 0.7367841995989566, |
|
"grad_norm": 7.370207938746124, |
|
"learning_rate": 2.633110890245634e-06, |
|
"loss": 0.3509, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 0.7374940109665856, |
|
"grad_norm": 8.915363239178143, |
|
"learning_rate": 2.6260116427658673e-06, |
|
"loss": 0.3595, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 0.7382038223342147, |
|
"grad_norm": 6.453539668987391, |
|
"learning_rate": 2.6189123952861e-06, |
|
"loss": 0.3735, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.7389136337018437, |
|
"grad_norm": 13.429374820990935, |
|
"learning_rate": 2.611813147806333e-06, |
|
"loss": 0.343, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 0.7396234450694728, |
|
"grad_norm": 4.019465503184252, |
|
"learning_rate": 2.6047139003265653e-06, |
|
"loss": 0.3619, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 0.7403332564371018, |
|
"grad_norm": 4.77728942914678, |
|
"learning_rate": 2.5976146528467984e-06, |
|
"loss": 0.3602, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 0.7410430678047308, |
|
"grad_norm": 16.82021280745509, |
|
"learning_rate": 2.590515405367031e-06, |
|
"loss": 0.3765, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 0.74175287917236, |
|
"grad_norm": 4.7659520678895735, |
|
"learning_rate": 2.5834161578872642e-06, |
|
"loss": 0.3557, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.742462690539989, |
|
"grad_norm": 5.846901706253607, |
|
"learning_rate": 2.576316910407497e-06, |
|
"loss": 0.3574, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 0.7431725019076181, |
|
"grad_norm": 5.00717365628058, |
|
"learning_rate": 2.56921766292773e-06, |
|
"loss": 0.371, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 0.7438823132752471, |
|
"grad_norm": 12.812616706907704, |
|
"learning_rate": 2.5621184154479627e-06, |
|
"loss": 0.3612, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 0.7445921246428762, |
|
"grad_norm": 2.7312101929568375, |
|
"learning_rate": 2.5550191679681958e-06, |
|
"loss": 0.3551, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 0.7453019360105052, |
|
"grad_norm": 3.0759041075210782, |
|
"learning_rate": 2.5479199204884285e-06, |
|
"loss": 0.3574, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.7460117473781342, |
|
"grad_norm": 7.165278043719281, |
|
"learning_rate": 2.5408206730086616e-06, |
|
"loss": 0.3605, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 0.7467215587457633, |
|
"grad_norm": 4.908665990783306, |
|
"learning_rate": 2.533721425528894e-06, |
|
"loss": 0.3479, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 0.7474313701133923, |
|
"grad_norm": 3.4583261557450227, |
|
"learning_rate": 2.526622178049127e-06, |
|
"loss": 0.3542, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 0.7481411814810214, |
|
"grad_norm": 11.387458565670322, |
|
"learning_rate": 2.5195229305693596e-06, |
|
"loss": 0.3619, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 0.7488509928486504, |
|
"grad_norm": 10.198798372329442, |
|
"learning_rate": 2.5124236830895927e-06, |
|
"loss": 0.3434, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.7495608042162796, |
|
"grad_norm": 3.893599380410888, |
|
"learning_rate": 2.5053244356098254e-06, |
|
"loss": 0.362, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 0.7502706155839086, |
|
"grad_norm": 5.107597028464082, |
|
"learning_rate": 2.4982251881300585e-06, |
|
"loss": 0.3688, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 0.7509804269515377, |
|
"grad_norm": 4.219068583835792, |
|
"learning_rate": 2.491125940650291e-06, |
|
"loss": 0.3649, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 0.7516902383191667, |
|
"grad_norm": 4.535592066198855, |
|
"learning_rate": 2.4840266931705243e-06, |
|
"loss": 0.37, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 0.7524000496867957, |
|
"grad_norm": 3.541264339618074, |
|
"learning_rate": 2.476927445690757e-06, |
|
"loss": 0.3679, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.7531098610544248, |
|
"grad_norm": 4.7884449114332845, |
|
"learning_rate": 2.4698281982109896e-06, |
|
"loss": 0.3472, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 0.7538196724220538, |
|
"grad_norm": 8.667808097909838, |
|
"learning_rate": 2.4627289507312227e-06, |
|
"loss": 0.3704, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 0.7545294837896829, |
|
"grad_norm": 4.925434074834849, |
|
"learning_rate": 2.455629703251456e-06, |
|
"loss": 0.3701, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 0.7552392951573119, |
|
"grad_norm": 3.8594886335750807, |
|
"learning_rate": 2.4485304557716885e-06, |
|
"loss": 0.3662, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 0.755949106524941, |
|
"grad_norm": 4.971536391123703, |
|
"learning_rate": 2.441431208291921e-06, |
|
"loss": 0.35, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.75665891789257, |
|
"grad_norm": 15.055144352578429, |
|
"learning_rate": 2.434331960812154e-06, |
|
"loss": 0.3584, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 0.757368729260199, |
|
"grad_norm": 14.432076661811932, |
|
"learning_rate": 2.427232713332387e-06, |
|
"loss": 0.3621, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 0.7580785406278282, |
|
"grad_norm": 9.810669772230819, |
|
"learning_rate": 2.42013346585262e-06, |
|
"loss": 0.3588, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 0.7587883519954572, |
|
"grad_norm": 5.765479927608821, |
|
"learning_rate": 2.4130342183728527e-06, |
|
"loss": 0.3549, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 0.7594981633630863, |
|
"grad_norm": 13.617197754978974, |
|
"learning_rate": 2.4059349708930854e-06, |
|
"loss": 0.3759, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.7602079747307153, |
|
"grad_norm": 5.614482278416453, |
|
"learning_rate": 2.3988357234133185e-06, |
|
"loss": 0.3376, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 0.7609177860983444, |
|
"grad_norm": 17.701642596831444, |
|
"learning_rate": 2.391736475933551e-06, |
|
"loss": 0.3647, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 0.7616275974659734, |
|
"grad_norm": 4.910333781437824, |
|
"learning_rate": 2.3846372284537843e-06, |
|
"loss": 0.3643, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 0.7623374088336025, |
|
"grad_norm": 3.415309685272355, |
|
"learning_rate": 2.377537980974017e-06, |
|
"loss": 0.3488, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 0.7630472202012315, |
|
"grad_norm": 4.350903829153794, |
|
"learning_rate": 2.3704387334942497e-06, |
|
"loss": 0.3577, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.7637570315688605, |
|
"grad_norm": 3.9361079752185435, |
|
"learning_rate": 2.3633394860144828e-06, |
|
"loss": 0.3591, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 0.7644668429364896, |
|
"grad_norm": 5.913083445040196, |
|
"learning_rate": 2.3562402385347154e-06, |
|
"loss": 0.3486, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 0.7651766543041186, |
|
"grad_norm": 5.982161931863015, |
|
"learning_rate": 2.3491409910549485e-06, |
|
"loss": 0.3714, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 0.7658864656717478, |
|
"grad_norm": 4.5231254195655906, |
|
"learning_rate": 2.3420417435751812e-06, |
|
"loss": 0.3534, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 0.7665962770393768, |
|
"grad_norm": 5.099871081954513, |
|
"learning_rate": 2.334942496095414e-06, |
|
"loss": 0.3509, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.7673060884070059, |
|
"grad_norm": 3.361247181502804, |
|
"learning_rate": 2.327843248615647e-06, |
|
"loss": 0.3692, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 0.7680158997746349, |
|
"grad_norm": 6.553423618292367, |
|
"learning_rate": 2.3207440011358797e-06, |
|
"loss": 0.353, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 0.768725711142264, |
|
"grad_norm": 2.985537513367268, |
|
"learning_rate": 2.3136447536561128e-06, |
|
"loss": 0.3498, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 0.769435522509893, |
|
"grad_norm": 3.0266471519507427, |
|
"learning_rate": 2.3065455061763455e-06, |
|
"loss": 0.3563, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 0.770145333877522, |
|
"grad_norm": 17.644165005698888, |
|
"learning_rate": 2.299446258696578e-06, |
|
"loss": 0.3662, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.7708551452451511, |
|
"grad_norm": 3.1894412768611016, |
|
"learning_rate": 2.2923470112168112e-06, |
|
"loss": 0.3503, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 0.7715649566127801, |
|
"grad_norm": 4.492544324422795, |
|
"learning_rate": 2.285247763737044e-06, |
|
"loss": 0.3436, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 0.7722747679804092, |
|
"grad_norm": 4.173829674998731, |
|
"learning_rate": 2.278148516257277e-06, |
|
"loss": 0.363, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 0.7729845793480382, |
|
"grad_norm": 3.114718418646357, |
|
"learning_rate": 2.2710492687775097e-06, |
|
"loss": 0.3368, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 0.7736943907156673, |
|
"grad_norm": 2.6323429503484443, |
|
"learning_rate": 2.2639500212977424e-06, |
|
"loss": 0.3489, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.7744042020832964, |
|
"grad_norm": 2.8865277064459223, |
|
"learning_rate": 2.2568507738179755e-06, |
|
"loss": 0.3571, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 0.7751140134509255, |
|
"grad_norm": 8.888602826244627, |
|
"learning_rate": 2.249751526338208e-06, |
|
"loss": 0.3399, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 0.7758238248185545, |
|
"grad_norm": 3.532724353902858, |
|
"learning_rate": 2.2426522788584412e-06, |
|
"loss": 0.3493, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 0.7765336361861835, |
|
"grad_norm": 3.6781547439101883, |
|
"learning_rate": 2.235553031378674e-06, |
|
"loss": 0.3462, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 0.7772434475538126, |
|
"grad_norm": 13.16004359433701, |
|
"learning_rate": 2.2284537838989066e-06, |
|
"loss": 0.3649, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.7779532589214416, |
|
"grad_norm": 9.642968589987298, |
|
"learning_rate": 2.2213545364191397e-06, |
|
"loss": 0.3582, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 0.7786630702890707, |
|
"grad_norm": 6.16050392324128, |
|
"learning_rate": 2.2142552889393724e-06, |
|
"loss": 0.3624, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 0.7793728816566997, |
|
"grad_norm": 4.012346442724565, |
|
"learning_rate": 2.2071560414596055e-06, |
|
"loss": 0.3448, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 0.7800826930243288, |
|
"grad_norm": 2.6066193255622956, |
|
"learning_rate": 2.2000567939798386e-06, |
|
"loss": 0.3644, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 0.7807925043919578, |
|
"grad_norm": 7.331639609512875, |
|
"learning_rate": 2.1929575465000713e-06, |
|
"loss": 0.3515, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.7815023157595868, |
|
"grad_norm": 2.990816174000455, |
|
"learning_rate": 2.185858299020304e-06, |
|
"loss": 0.3505, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 0.782212127127216, |
|
"grad_norm": 3.6112792490950554, |
|
"learning_rate": 2.178759051540537e-06, |
|
"loss": 0.3548, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 0.782921938494845, |
|
"grad_norm": 3.8221043132066286, |
|
"learning_rate": 2.1716598040607697e-06, |
|
"loss": 0.3571, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 0.7836317498624741, |
|
"grad_norm": 7.476265982563856, |
|
"learning_rate": 2.164560556581003e-06, |
|
"loss": 0.3428, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 0.7843415612301031, |
|
"grad_norm": 5.554911455235443, |
|
"learning_rate": 2.1574613091012355e-06, |
|
"loss": 0.354, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.7850513725977322, |
|
"grad_norm": 2.9298081851011117, |
|
"learning_rate": 2.150362061621468e-06, |
|
"loss": 0.3597, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 0.7857611839653612, |
|
"grad_norm": 5.325097733237352, |
|
"learning_rate": 2.1432628141417013e-06, |
|
"loss": 0.3486, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 0.7864709953329903, |
|
"grad_norm": 3.5814394523109114, |
|
"learning_rate": 2.136163566661934e-06, |
|
"loss": 0.3544, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 0.7871808067006193, |
|
"grad_norm": 3.6972554376986, |
|
"learning_rate": 2.129064319182167e-06, |
|
"loss": 0.3546, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 0.7878906180682483, |
|
"grad_norm": 6.754098899246775, |
|
"learning_rate": 2.1219650717023997e-06, |
|
"loss": 0.3537, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.7886004294358774, |
|
"grad_norm": 3.3122898855719876, |
|
"learning_rate": 2.1148658242226324e-06, |
|
"loss": 0.3645, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 0.7893102408035064, |
|
"grad_norm": 2.8223728276754128, |
|
"learning_rate": 2.1077665767428655e-06, |
|
"loss": 0.3599, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 0.7900200521711355, |
|
"grad_norm": 2.5012481292133937, |
|
"learning_rate": 2.100667329263098e-06, |
|
"loss": 0.3486, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 0.7907298635387646, |
|
"grad_norm": 11.033197138630223, |
|
"learning_rate": 2.0935680817833313e-06, |
|
"loss": 0.3467, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 0.7914396749063937, |
|
"grad_norm": 3.730389968284293, |
|
"learning_rate": 2.086468834303564e-06, |
|
"loss": 0.3544, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.7921494862740227, |
|
"grad_norm": 5.898064410181565, |
|
"learning_rate": 2.0793695868237967e-06, |
|
"loss": 0.3477, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 0.7928592976416518, |
|
"grad_norm": 4.55198088261442, |
|
"learning_rate": 2.0722703393440298e-06, |
|
"loss": 0.3527, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 0.7935691090092808, |
|
"grad_norm": 5.318762071563834, |
|
"learning_rate": 2.0651710918642624e-06, |
|
"loss": 0.3478, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 0.7942789203769098, |
|
"grad_norm": 6.161214607463883, |
|
"learning_rate": 2.0580718443844955e-06, |
|
"loss": 0.3546, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 0.7949887317445389, |
|
"grad_norm": 3.1236830623318537, |
|
"learning_rate": 2.0509725969047282e-06, |
|
"loss": 0.3565, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.7956985431121679, |
|
"grad_norm": 4.197839999078878, |
|
"learning_rate": 2.043873349424961e-06, |
|
"loss": 0.3496, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 0.796408354479797, |
|
"grad_norm": 3.2762330861667515, |
|
"learning_rate": 2.036774101945194e-06, |
|
"loss": 0.348, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 0.797118165847426, |
|
"grad_norm": 5.961140258537488, |
|
"learning_rate": 2.0296748544654267e-06, |
|
"loss": 0.3637, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 0.797827977215055, |
|
"grad_norm": 2.0964322412177263, |
|
"learning_rate": 2.0225756069856598e-06, |
|
"loss": 0.341, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 0.7985377885826842, |
|
"grad_norm": 11.078753928620895, |
|
"learning_rate": 2.0154763595058925e-06, |
|
"loss": 0.3582, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.7992475999503132, |
|
"grad_norm": 11.615859636107096, |
|
"learning_rate": 2.008377112026125e-06, |
|
"loss": 0.3504, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 0.7999574113179423, |
|
"grad_norm": 9.267486623233392, |
|
"learning_rate": 2.0012778645463582e-06, |
|
"loss": 0.3585, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 0.8006672226855713, |
|
"grad_norm": 3.7638868565818613, |
|
"learning_rate": 1.994178617066591e-06, |
|
"loss": 0.3572, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 0.8013770340532004, |
|
"grad_norm": 4.274096264509613, |
|
"learning_rate": 1.987079369586824e-06, |
|
"loss": 0.352, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 0.8020868454208294, |
|
"grad_norm": 3.0651382288741824, |
|
"learning_rate": 1.979980122107057e-06, |
|
"loss": 0.3487, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.8027966567884585, |
|
"grad_norm": 2.585139354778811, |
|
"learning_rate": 1.9728808746272894e-06, |
|
"loss": 0.3509, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 0.8035064681560875, |
|
"grad_norm": 3.4507245702670013, |
|
"learning_rate": 1.9657816271475225e-06, |
|
"loss": 0.3605, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 0.8042162795237165, |
|
"grad_norm": 2.168473869134373, |
|
"learning_rate": 1.9586823796677556e-06, |
|
"loss": 0.3473, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 0.8049260908913456, |
|
"grad_norm": 3.3138804394827126, |
|
"learning_rate": 1.9515831321879883e-06, |
|
"loss": 0.3451, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 0.8056359022589746, |
|
"grad_norm": 2.9967871033094284, |
|
"learning_rate": 1.9444838847082214e-06, |
|
"loss": 0.3586, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.8063457136266037, |
|
"grad_norm": 2.218098420224771, |
|
"learning_rate": 1.9373846372284536e-06, |
|
"loss": 0.3629, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 0.8070555249942328, |
|
"grad_norm": 4.124703498173868, |
|
"learning_rate": 1.9302853897486867e-06, |
|
"loss": 0.349, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 0.8077653363618619, |
|
"grad_norm": 4.336301638014139, |
|
"learning_rate": 1.92318614226892e-06, |
|
"loss": 0.3474, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 0.8084751477294909, |
|
"grad_norm": 5.67446885361532, |
|
"learning_rate": 1.9160868947891525e-06, |
|
"loss": 0.3577, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 0.80918495909712, |
|
"grad_norm": 5.496735292829206, |
|
"learning_rate": 1.9089876473093856e-06, |
|
"loss": 0.3606, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.809894770464749, |
|
"grad_norm": 2.3181036706188505, |
|
"learning_rate": 1.901888399829618e-06, |
|
"loss": 0.3573, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 0.810604581832378, |
|
"grad_norm": 4.2823563842257695, |
|
"learning_rate": 1.894789152349851e-06, |
|
"loss": 0.3456, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 0.8113143932000071, |
|
"grad_norm": 9.041186743139388, |
|
"learning_rate": 1.8876899048700838e-06, |
|
"loss": 0.3493, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 0.8120242045676361, |
|
"grad_norm": 2.135565041402105, |
|
"learning_rate": 1.8805906573903167e-06, |
|
"loss": 0.3573, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 0.8127340159352652, |
|
"grad_norm": 4.2654812969837295, |
|
"learning_rate": 1.8734914099105498e-06, |
|
"loss": 0.3462, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.8134438273028942, |
|
"grad_norm": 3.0226693302416465, |
|
"learning_rate": 1.8663921624307823e-06, |
|
"loss": 0.3399, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 0.8141536386705233, |
|
"grad_norm": 5.674429424631266, |
|
"learning_rate": 1.8592929149510152e-06, |
|
"loss": 0.3445, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 0.8148634500381524, |
|
"grad_norm": 5.107735874370569, |
|
"learning_rate": 1.852193667471248e-06, |
|
"loss": 0.3498, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 0.8155732614057815, |
|
"grad_norm": 4.211595369240753, |
|
"learning_rate": 1.8450944199914812e-06, |
|
"loss": 0.3509, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 0.8162830727734105, |
|
"grad_norm": 3.2874196387814485, |
|
"learning_rate": 1.837995172511714e-06, |
|
"loss": 0.352, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.8169928841410395, |
|
"grad_norm": 2.51051446421893, |
|
"learning_rate": 1.8308959250319465e-06, |
|
"loss": 0.3445, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 0.8177026955086686, |
|
"grad_norm": 13.267874952448258, |
|
"learning_rate": 1.8237966775521796e-06, |
|
"loss": 0.354, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 0.8184125068762976, |
|
"grad_norm": 4.900767095828628, |
|
"learning_rate": 1.8166974300724125e-06, |
|
"loss": 0.3594, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 0.8191223182439267, |
|
"grad_norm": 8.3230418317363, |
|
"learning_rate": 1.8095981825926454e-06, |
|
"loss": 0.3471, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 0.8198321296115557, |
|
"grad_norm": 2.8346340256917815, |
|
"learning_rate": 1.8024989351128783e-06, |
|
"loss": 0.3695, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.8205419409791848, |
|
"grad_norm": 5.533189262204602, |
|
"learning_rate": 1.795399687633111e-06, |
|
"loss": 0.3728, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 0.8212517523468138, |
|
"grad_norm": 3.187071233846852, |
|
"learning_rate": 1.7883004401533439e-06, |
|
"loss": 0.3464, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 0.8219615637144428, |
|
"grad_norm": 3.9314257894883937, |
|
"learning_rate": 1.7812011926735768e-06, |
|
"loss": 0.3532, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 0.8226713750820719, |
|
"grad_norm": 3.6730541227348277, |
|
"learning_rate": 1.7741019451938097e-06, |
|
"loss": 0.3565, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 0.823381186449701, |
|
"grad_norm": 2.9136274666194306, |
|
"learning_rate": 1.7670026977140426e-06, |
|
"loss": 0.3603, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.8240909978173301, |
|
"grad_norm": 6.106992201577366, |
|
"learning_rate": 1.7599034502342754e-06, |
|
"loss": 0.3484, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 0.8248008091849591, |
|
"grad_norm": 4.230462903274037, |
|
"learning_rate": 1.7528042027545081e-06, |
|
"loss": 0.35, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 0.8255106205525882, |
|
"grad_norm": 3.376064932155992, |
|
"learning_rate": 1.745704955274741e-06, |
|
"loss": 0.35, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 0.8262204319202172, |
|
"grad_norm": 2.8424779046250612, |
|
"learning_rate": 1.738605707794974e-06, |
|
"loss": 0.3552, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 0.8269302432878463, |
|
"grad_norm": 3.6044824322491347, |
|
"learning_rate": 1.7315064603152068e-06, |
|
"loss": 0.3633, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.8276400546554753, |
|
"grad_norm": 3.3041226058016324, |
|
"learning_rate": 1.7244072128354397e-06, |
|
"loss": 0.3453, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 0.8283498660231043, |
|
"grad_norm": 3.461976575510189, |
|
"learning_rate": 1.7173079653556724e-06, |
|
"loss": 0.3607, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 0.8290596773907334, |
|
"grad_norm": 3.96624408516477, |
|
"learning_rate": 1.7102087178759052e-06, |
|
"loss": 0.3431, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 0.8297694887583624, |
|
"grad_norm": 10.446490548963004, |
|
"learning_rate": 1.7031094703961381e-06, |
|
"loss": 0.3518, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 0.8304793001259915, |
|
"grad_norm": 2.4894424633296888, |
|
"learning_rate": 1.696010222916371e-06, |
|
"loss": 0.3618, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.8311891114936206, |
|
"grad_norm": 3.7097939930537494, |
|
"learning_rate": 1.688910975436604e-06, |
|
"loss": 0.3577, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 0.8318989228612497, |
|
"grad_norm": 2.591589818986439, |
|
"learning_rate": 1.6818117279568366e-06, |
|
"loss": 0.3454, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 0.8326087342288787, |
|
"grad_norm": 3.0415000039562816, |
|
"learning_rate": 1.6747124804770695e-06, |
|
"loss": 0.3514, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 0.8333185455965078, |
|
"grad_norm": 3.185465708245909, |
|
"learning_rate": 1.6676132329973024e-06, |
|
"loss": 0.3437, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 0.8340283569641368, |
|
"grad_norm": 8.153250864972724, |
|
"learning_rate": 1.6605139855175353e-06, |
|
"loss": 0.3418, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.8347381683317658, |
|
"grad_norm": 17.15311701699765, |
|
"learning_rate": 1.6534147380377682e-06, |
|
"loss": 0.3533, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 0.8354479796993949, |
|
"grad_norm": 2.956498750624732, |
|
"learning_rate": 1.6463154905580008e-06, |
|
"loss": 0.3539, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 0.8361577910670239, |
|
"grad_norm": 5.182422880739596, |
|
"learning_rate": 1.6392162430782337e-06, |
|
"loss": 0.3543, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 0.836867602434653, |
|
"grad_norm": 5.245759433932608, |
|
"learning_rate": 1.6321169955984666e-06, |
|
"loss": 0.3506, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 0.837577413802282, |
|
"grad_norm": 2.8777113855306, |
|
"learning_rate": 1.6250177481186997e-06, |
|
"loss": 0.351, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.838287225169911, |
|
"grad_norm": 3.317900354948997, |
|
"learning_rate": 1.6179185006389326e-06, |
|
"loss": 0.3426, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 0.8389970365375401, |
|
"grad_norm": 2.7259998460321295, |
|
"learning_rate": 1.610819253159165e-06, |
|
"loss": 0.3416, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 0.8397068479051693, |
|
"grad_norm": 7.203501395811214, |
|
"learning_rate": 1.603720005679398e-06, |
|
"loss": 0.346, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 0.8404166592727983, |
|
"grad_norm": 3.5281319520469343, |
|
"learning_rate": 1.596620758199631e-06, |
|
"loss": 0.3415, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 0.8411264706404273, |
|
"grad_norm": 2.8068995456792085, |
|
"learning_rate": 1.589521510719864e-06, |
|
"loss": 0.3506, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.8418362820080564, |
|
"grad_norm": 5.8571413992691, |
|
"learning_rate": 1.5824222632400968e-06, |
|
"loss": 0.3492, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 0.8425460933756854, |
|
"grad_norm": 2.8473277239745625, |
|
"learning_rate": 1.5753230157603295e-06, |
|
"loss": 0.3464, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 0.8432559047433145, |
|
"grad_norm": 2.743001963303042, |
|
"learning_rate": 1.5682237682805624e-06, |
|
"loss": 0.3457, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 0.8439657161109435, |
|
"grad_norm": 10.213481491528695, |
|
"learning_rate": 1.5611245208007953e-06, |
|
"loss": 0.3578, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 0.8446755274785726, |
|
"grad_norm": 3.735755256117381, |
|
"learning_rate": 1.5540252733210282e-06, |
|
"loss": 0.3503, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.8453853388462016, |
|
"grad_norm": 4.459890794830131, |
|
"learning_rate": 1.546926025841261e-06, |
|
"loss": 0.3409, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 0.8460951502138306, |
|
"grad_norm": 4.8029617986261295, |
|
"learning_rate": 1.5398267783614938e-06, |
|
"loss": 0.3538, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 0.8468049615814597, |
|
"grad_norm": 7.056776646894436, |
|
"learning_rate": 1.5327275308817267e-06, |
|
"loss": 0.346, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 0.8475147729490888, |
|
"grad_norm": 7.364554673266408, |
|
"learning_rate": 1.5256282834019595e-06, |
|
"loss": 0.3478, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 0.8482245843167179, |
|
"grad_norm": 3.605377806044163, |
|
"learning_rate": 1.5185290359221924e-06, |
|
"loss": 0.3499, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.8489343956843469, |
|
"grad_norm": 2.452400869581193, |
|
"learning_rate": 1.5114297884424253e-06, |
|
"loss": 0.339, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 0.849644207051976, |
|
"grad_norm": 2.870621078183671, |
|
"learning_rate": 1.504330540962658e-06, |
|
"loss": 0.3441, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 0.850354018419605, |
|
"grad_norm": 4.473314694561015, |
|
"learning_rate": 1.4972312934828909e-06, |
|
"loss": 0.3559, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 0.851063829787234, |
|
"grad_norm": 5.114834992133615, |
|
"learning_rate": 1.4901320460031238e-06, |
|
"loss": 0.3541, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 0.8517736411548631, |
|
"grad_norm": 12.083657543428806, |
|
"learning_rate": 1.4830327985233567e-06, |
|
"loss": 0.358, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.8524834525224921, |
|
"grad_norm": 3.7361409384047923, |
|
"learning_rate": 1.4759335510435896e-06, |
|
"loss": 0.3395, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 0.8531932638901212, |
|
"grad_norm": 3.4424635097779657, |
|
"learning_rate": 1.4688343035638222e-06, |
|
"loss": 0.3593, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 0.8539030752577502, |
|
"grad_norm": 1.9645069008952134, |
|
"learning_rate": 1.4617350560840551e-06, |
|
"loss": 0.3508, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 0.8546128866253793, |
|
"grad_norm": 4.627652849790996, |
|
"learning_rate": 1.454635808604288e-06, |
|
"loss": 0.3408, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 0.8553226979930083, |
|
"grad_norm": 3.831924600437753, |
|
"learning_rate": 1.447536561124521e-06, |
|
"loss": 0.3487, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 0.8560325093606375, |
|
"grad_norm": 4.570169273747359, |
|
"learning_rate": 1.4404373136447538e-06, |
|
"loss": 0.3415, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 0.8567423207282665, |
|
"grad_norm": 4.6135182738223595, |
|
"learning_rate": 1.4333380661649865e-06, |
|
"loss": 0.3604, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 0.8574521320958955, |
|
"grad_norm": 4.751574062951781, |
|
"learning_rate": 1.4262388186852194e-06, |
|
"loss": 0.3636, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 0.8581619434635246, |
|
"grad_norm": 3.378379003665899, |
|
"learning_rate": 1.4191395712054523e-06, |
|
"loss": 0.3432, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 0.8588717548311536, |
|
"grad_norm": 16.540688675093385, |
|
"learning_rate": 1.4120403237256851e-06, |
|
"loss": 0.3396, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.8595815661987827, |
|
"grad_norm": 4.814104030359969, |
|
"learning_rate": 1.404941076245918e-06, |
|
"loss": 0.3461, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 0.8602913775664117, |
|
"grad_norm": 10.051601410520883, |
|
"learning_rate": 1.3978418287661507e-06, |
|
"loss": 0.3447, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 0.8610011889340408, |
|
"grad_norm": 2.642610961406552, |
|
"learning_rate": 1.3907425812863836e-06, |
|
"loss": 0.3361, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 0.8617110003016698, |
|
"grad_norm": 4.614329866790318, |
|
"learning_rate": 1.3836433338066165e-06, |
|
"loss": 0.3528, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 0.8624208116692988, |
|
"grad_norm": 5.744791519089807, |
|
"learning_rate": 1.3765440863268496e-06, |
|
"loss": 0.3607, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.8631306230369279, |
|
"grad_norm": 3.9315757108747618, |
|
"learning_rate": 1.3694448388470825e-06, |
|
"loss": 0.3598, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 0.8638404344045569, |
|
"grad_norm": 5.812032059514415, |
|
"learning_rate": 1.3623455913673154e-06, |
|
"loss": 0.3406, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 0.8645502457721861, |
|
"grad_norm": 3.1863830261887784, |
|
"learning_rate": 1.3552463438875478e-06, |
|
"loss": 0.3435, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 0.8652600571398151, |
|
"grad_norm": 3.164333810889643, |
|
"learning_rate": 1.348147096407781e-06, |
|
"loss": 0.3477, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 0.8659698685074442, |
|
"grad_norm": 4.132090281780686, |
|
"learning_rate": 1.3410478489280138e-06, |
|
"loss": 0.3476, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.8666796798750732, |
|
"grad_norm": 3.050674443165291, |
|
"learning_rate": 1.3339486014482467e-06, |
|
"loss": 0.3451, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 0.8673894912427023, |
|
"grad_norm": 5.9765372634611476, |
|
"learning_rate": 1.3268493539684796e-06, |
|
"loss": 0.3516, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 0.8680993026103313, |
|
"grad_norm": 10.801904177839997, |
|
"learning_rate": 1.3197501064887123e-06, |
|
"loss": 0.3525, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 0.8688091139779603, |
|
"grad_norm": 10.795290079471496, |
|
"learning_rate": 1.3126508590089452e-06, |
|
"loss": 0.3458, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 0.8695189253455894, |
|
"grad_norm": 5.185082480943749, |
|
"learning_rate": 1.305551611529178e-06, |
|
"loss": 0.3471, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.8702287367132184, |
|
"grad_norm": 5.967453058115287, |
|
"learning_rate": 1.298452364049411e-06, |
|
"loss": 0.3593, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 0.8709385480808475, |
|
"grad_norm": 2.9260514202439807, |
|
"learning_rate": 1.2913531165696439e-06, |
|
"loss": 0.3401, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 0.8716483594484765, |
|
"grad_norm": 3.5904246593138924, |
|
"learning_rate": 1.2842538690898765e-06, |
|
"loss": 0.3407, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 0.8723581708161057, |
|
"grad_norm": 5.983622275696177, |
|
"learning_rate": 1.2771546216101094e-06, |
|
"loss": 0.3453, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 0.8730679821837347, |
|
"grad_norm": 4.330501853746522, |
|
"learning_rate": 1.2700553741303423e-06, |
|
"loss": 0.3494, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.8737777935513638, |
|
"grad_norm": 3.642467957948953, |
|
"learning_rate": 1.2629561266505752e-06, |
|
"loss": 0.3458, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 0.8744876049189928, |
|
"grad_norm": 5.610238111701037, |
|
"learning_rate": 1.255856879170808e-06, |
|
"loss": 0.3533, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 0.8751974162866218, |
|
"grad_norm": 5.47126817738485, |
|
"learning_rate": 1.248757631691041e-06, |
|
"loss": 0.3685, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 0.8759072276542509, |
|
"grad_norm": 2.9438005039273953, |
|
"learning_rate": 1.2416583842112737e-06, |
|
"loss": 0.3325, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 0.8766170390218799, |
|
"grad_norm": 3.7896440417507415, |
|
"learning_rate": 1.2345591367315065e-06, |
|
"loss": 0.3445, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 0.877326850389509, |
|
"grad_norm": 5.754468251004695, |
|
"learning_rate": 1.2274598892517394e-06, |
|
"loss": 0.3374, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 0.878036661757138, |
|
"grad_norm": 4.267624406753751, |
|
"learning_rate": 1.2203606417719723e-06, |
|
"loss": 0.341, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 0.8787464731247671, |
|
"grad_norm": 3.1963277785921993, |
|
"learning_rate": 1.2132613942922052e-06, |
|
"loss": 0.3381, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 0.8794562844923961, |
|
"grad_norm": 6.653906616284059, |
|
"learning_rate": 1.206162146812438e-06, |
|
"loss": 0.3506, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 0.8801660958600251, |
|
"grad_norm": 3.897977105597471, |
|
"learning_rate": 1.1990628993326708e-06, |
|
"loss": 0.3475, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.8808759072276543, |
|
"grad_norm": 4.962651576299262, |
|
"learning_rate": 1.1919636518529037e-06, |
|
"loss": 0.349, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 0.8815857185952833, |
|
"grad_norm": 5.136741390825168, |
|
"learning_rate": 1.1848644043731366e-06, |
|
"loss": 0.3465, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 0.8822955299629124, |
|
"grad_norm": 4.445543310701251, |
|
"learning_rate": 1.1777651568933695e-06, |
|
"loss": 0.3548, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 0.8830053413305414, |
|
"grad_norm": 20.40372637998409, |
|
"learning_rate": 1.1706659094136021e-06, |
|
"loss": 0.3583, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 0.8837151526981705, |
|
"grad_norm": 3.982374880512643, |
|
"learning_rate": 1.163566661933835e-06, |
|
"loss": 0.3317, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 0.8844249640657995, |
|
"grad_norm": 32.55413999411799, |
|
"learning_rate": 1.156467414454068e-06, |
|
"loss": 0.3514, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 0.8851347754334286, |
|
"grad_norm": 5.420145750098025, |
|
"learning_rate": 1.1493681669743008e-06, |
|
"loss": 0.3318, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 0.8858445868010576, |
|
"grad_norm": 3.685854173880656, |
|
"learning_rate": 1.1422689194945337e-06, |
|
"loss": 0.3429, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 0.8865543981686866, |
|
"grad_norm": 4.6974765931702605, |
|
"learning_rate": 1.1351696720147664e-06, |
|
"loss": 0.357, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 0.8872642095363157, |
|
"grad_norm": 6.795504660900696, |
|
"learning_rate": 1.1280704245349995e-06, |
|
"loss": 0.3531, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.8879740209039447, |
|
"grad_norm": 4.927867549600845, |
|
"learning_rate": 1.1209711770552324e-06, |
|
"loss": 0.3647, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 0.8886838322715739, |
|
"grad_norm": 70.3319920713418, |
|
"learning_rate": 1.113871929575465e-06, |
|
"loss": 0.3481, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 0.8893936436392029, |
|
"grad_norm": 29.187269789239732, |
|
"learning_rate": 1.106772682095698e-06, |
|
"loss": 0.3487, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 0.890103455006832, |
|
"grad_norm": 2.619165987059257, |
|
"learning_rate": 1.0996734346159308e-06, |
|
"loss": 0.3557, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 0.890813266374461, |
|
"grad_norm": 5.724483375383932, |
|
"learning_rate": 1.0925741871361637e-06, |
|
"loss": 0.3587, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 0.89152307774209, |
|
"grad_norm": 4.2668973076468, |
|
"learning_rate": 1.0854749396563966e-06, |
|
"loss": 0.3462, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 0.8922328891097191, |
|
"grad_norm": 9.234745768295488, |
|
"learning_rate": 1.0783756921766293e-06, |
|
"loss": 0.3537, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 0.8929427004773481, |
|
"grad_norm": 3.665665785771113, |
|
"learning_rate": 1.0712764446968622e-06, |
|
"loss": 0.3643, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 0.8936525118449772, |
|
"grad_norm": 2.6258893539339656, |
|
"learning_rate": 1.064177197217095e-06, |
|
"loss": 0.3338, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 0.8943623232126062, |
|
"grad_norm": 3.154491930622594, |
|
"learning_rate": 1.057077949737328e-06, |
|
"loss": 0.3444, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.8950721345802353, |
|
"grad_norm": 7.836052713310002, |
|
"learning_rate": 1.0499787022575608e-06, |
|
"loss": 0.3628, |
|
"step": 12610 |
|
}, |
|
{ |
|
"epoch": 0.8957819459478643, |
|
"grad_norm": 3.8943175763479996, |
|
"learning_rate": 1.0428794547777935e-06, |
|
"loss": 0.3403, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 0.8964917573154934, |
|
"grad_norm": 15.29553673398478, |
|
"learning_rate": 1.0357802072980264e-06, |
|
"loss": 0.3521, |
|
"step": 12630 |
|
}, |
|
{ |
|
"epoch": 0.8972015686831225, |
|
"grad_norm": 4.442650541355824, |
|
"learning_rate": 1.0286809598182595e-06, |
|
"loss": 0.3342, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 0.8979113800507516, |
|
"grad_norm": 3.9047310665092247, |
|
"learning_rate": 1.0215817123384922e-06, |
|
"loss": 0.3427, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 0.8986211914183806, |
|
"grad_norm": 2.1332446352398544, |
|
"learning_rate": 1.014482464858725e-06, |
|
"loss": 0.349, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 0.8993310027860096, |
|
"grad_norm": 2.8714716164962923, |
|
"learning_rate": 1.0073832173789578e-06, |
|
"loss": 0.357, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 0.9000408141536387, |
|
"grad_norm": 5.513019742153847, |
|
"learning_rate": 1.0002839698991909e-06, |
|
"loss": 0.3404, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 0.9007506255212677, |
|
"grad_norm": 3.940129513886605, |
|
"learning_rate": 9.931847224194237e-07, |
|
"loss": 0.3637, |
|
"step": 12690 |
|
}, |
|
{ |
|
"epoch": 0.9014604368888968, |
|
"grad_norm": 3.9515535744587256, |
|
"learning_rate": 9.860854749396564e-07, |
|
"loss": 0.3498, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.9021702482565258, |
|
"grad_norm": 3.0069372274862234, |
|
"learning_rate": 9.789862274598893e-07, |
|
"loss": 0.3398, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 0.9028800596241549, |
|
"grad_norm": 3.5043049442535072, |
|
"learning_rate": 9.718869799801222e-07, |
|
"loss": 0.339, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 0.9035898709917839, |
|
"grad_norm": 4.7818413498969825, |
|
"learning_rate": 9.64787732500355e-07, |
|
"loss": 0.3482, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 0.9042996823594129, |
|
"grad_norm": 2.9143937043517485, |
|
"learning_rate": 9.57688485020588e-07, |
|
"loss": 0.3289, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 0.9050094937270421, |
|
"grad_norm": 3.530470062388488, |
|
"learning_rate": 9.505892375408208e-07, |
|
"loss": 0.3406, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.9057193050946711, |
|
"grad_norm": 3.6289940943514245, |
|
"learning_rate": 9.434899900610537e-07, |
|
"loss": 0.343, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 0.9064291164623002, |
|
"grad_norm": 11.92232636233806, |
|
"learning_rate": 9.363907425812864e-07, |
|
"loss": 0.3538, |
|
"step": 12770 |
|
}, |
|
{ |
|
"epoch": 0.9071389278299292, |
|
"grad_norm": 3.3864038291963787, |
|
"learning_rate": 9.292914951015193e-07, |
|
"loss": 0.3361, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 0.9078487391975583, |
|
"grad_norm": 4.345114007441839, |
|
"learning_rate": 9.221922476217522e-07, |
|
"loss": 0.3307, |
|
"step": 12790 |
|
}, |
|
{ |
|
"epoch": 0.9085585505651873, |
|
"grad_norm": 3.2046183568204687, |
|
"learning_rate": 9.15093000141985e-07, |
|
"loss": 0.3467, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.9092683619328163, |
|
"grad_norm": 3.030859855481088, |
|
"learning_rate": 9.079937526622179e-07, |
|
"loss": 0.3467, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 0.9099781733004454, |
|
"grad_norm": 4.579582289306875, |
|
"learning_rate": 9.008945051824507e-07, |
|
"loss": 0.3232, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 0.9106879846680744, |
|
"grad_norm": 3.760749336756688, |
|
"learning_rate": 8.937952577026836e-07, |
|
"loss": 0.3467, |
|
"step": 12830 |
|
}, |
|
{ |
|
"epoch": 0.9113977960357035, |
|
"grad_norm": 3.179418594295822, |
|
"learning_rate": 8.866960102229165e-07, |
|
"loss": 0.3473, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 0.9121076074033325, |
|
"grad_norm": 3.983021666456075, |
|
"learning_rate": 8.795967627431492e-07, |
|
"loss": 0.3587, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 0.9128174187709616, |
|
"grad_norm": 2.6025747411648243, |
|
"learning_rate": 8.724975152633821e-07, |
|
"loss": 0.3462, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 0.9135272301385907, |
|
"grad_norm": 4.3088037403974315, |
|
"learning_rate": 8.65398267783615e-07, |
|
"loss": 0.3428, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 0.9142370415062198, |
|
"grad_norm": 3.7771085521562644, |
|
"learning_rate": 8.582990203038478e-07, |
|
"loss": 0.3398, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 0.9149468528738488, |
|
"grad_norm": 2.5115102656996853, |
|
"learning_rate": 8.511997728240808e-07, |
|
"loss": 0.3419, |
|
"step": 12890 |
|
}, |
|
{ |
|
"epoch": 0.9156566642414778, |
|
"grad_norm": 2.646423568943871, |
|
"learning_rate": 8.441005253443135e-07, |
|
"loss": 0.3326, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.9163664756091069, |
|
"grad_norm": 4.308215071259538, |
|
"learning_rate": 8.370012778645465e-07, |
|
"loss": 0.3383, |
|
"step": 12910 |
|
}, |
|
{ |
|
"epoch": 0.9170762869767359, |
|
"grad_norm": 7.273858221430791, |
|
"learning_rate": 8.299020303847794e-07, |
|
"loss": 0.3411, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 0.917786098344365, |
|
"grad_norm": 3.1600055981634183, |
|
"learning_rate": 8.228027829050122e-07, |
|
"loss": 0.3577, |
|
"step": 12930 |
|
}, |
|
{ |
|
"epoch": 0.918495909711994, |
|
"grad_norm": 6.08255963796338, |
|
"learning_rate": 8.15703535425245e-07, |
|
"loss": 0.3589, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 0.9192057210796231, |
|
"grad_norm": 4.397885394689723, |
|
"learning_rate": 8.086042879454778e-07, |
|
"loss": 0.3492, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 0.9199155324472521, |
|
"grad_norm": 227.99760672787355, |
|
"learning_rate": 8.015050404657107e-07, |
|
"loss": 0.3346, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 0.9206253438148811, |
|
"grad_norm": 2.2307237070418853, |
|
"learning_rate": 7.944057929859436e-07, |
|
"loss": 0.3441, |
|
"step": 12970 |
|
}, |
|
{ |
|
"epoch": 0.9213351551825103, |
|
"grad_norm": 5.180228064847272, |
|
"learning_rate": 7.873065455061764e-07, |
|
"loss": 0.3465, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 0.9220449665501393, |
|
"grad_norm": 3.2003044967213836, |
|
"learning_rate": 7.802072980264093e-07, |
|
"loss": 0.3425, |
|
"step": 12990 |
|
}, |
|
{ |
|
"epoch": 0.9227547779177684, |
|
"grad_norm": 2.734492726273123, |
|
"learning_rate": 7.731080505466421e-07, |
|
"loss": 0.3403, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.9234645892853974, |
|
"grad_norm": 2.825363146947483, |
|
"learning_rate": 7.66008803066875e-07, |
|
"loss": 0.3644, |
|
"step": 13010 |
|
}, |
|
{ |
|
"epoch": 0.9241744006530265, |
|
"grad_norm": 6.94935444401322, |
|
"learning_rate": 7.589095555871078e-07, |
|
"loss": 0.3498, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 0.9248842120206555, |
|
"grad_norm": 2.8121909722558924, |
|
"learning_rate": 7.518103081073406e-07, |
|
"loss": 0.356, |
|
"step": 13030 |
|
}, |
|
{ |
|
"epoch": 0.9255940233882846, |
|
"grad_norm": 2.7024231170054946, |
|
"learning_rate": 7.447110606275735e-07, |
|
"loss": 0.3415, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 0.9263038347559136, |
|
"grad_norm": 2.9617596087956195, |
|
"learning_rate": 7.376118131478063e-07, |
|
"loss": 0.3372, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 0.9270136461235426, |
|
"grad_norm": 42.5976926609076, |
|
"learning_rate": 7.305125656680392e-07, |
|
"loss": 0.3541, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 0.9277234574911717, |
|
"grad_norm": 3.769476187835692, |
|
"learning_rate": 7.234133181882722e-07, |
|
"loss": 0.3594, |
|
"step": 13070 |
|
}, |
|
{ |
|
"epoch": 0.9284332688588007, |
|
"grad_norm": 3.749361674379726, |
|
"learning_rate": 7.163140707085049e-07, |
|
"loss": 0.3348, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 0.9291430802264298, |
|
"grad_norm": 2.5267280447133937, |
|
"learning_rate": 7.092148232287379e-07, |
|
"loss": 0.3579, |
|
"step": 13090 |
|
}, |
|
{ |
|
"epoch": 0.9298528915940589, |
|
"grad_norm": 3.0968195473762097, |
|
"learning_rate": 7.021155757489707e-07, |
|
"loss": 0.3392, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.930562702961688, |
|
"grad_norm": 3.9129176862736674, |
|
"learning_rate": 6.950163282692035e-07, |
|
"loss": 0.3533, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 0.931272514329317, |
|
"grad_norm": 2.7485456874581122, |
|
"learning_rate": 6.879170807894364e-07, |
|
"loss": 0.3399, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 0.9319823256969461, |
|
"grad_norm": 4.769184944849367, |
|
"learning_rate": 6.808178333096692e-07, |
|
"loss": 0.3551, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 0.9326921370645751, |
|
"grad_norm": 2.8275717207772098, |
|
"learning_rate": 6.737185858299021e-07, |
|
"loss": 0.348, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 0.9334019484322041, |
|
"grad_norm": 2.1023857426151595, |
|
"learning_rate": 6.66619338350135e-07, |
|
"loss": 0.3381, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 0.9341117597998332, |
|
"grad_norm": 2.8745163990655125, |
|
"learning_rate": 6.595200908703678e-07, |
|
"loss": 0.3488, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 0.9348215711674622, |
|
"grad_norm": 3.97821451395574, |
|
"learning_rate": 6.524208433906007e-07, |
|
"loss": 0.349, |
|
"step": 13170 |
|
}, |
|
{ |
|
"epoch": 0.9355313825350913, |
|
"grad_norm": 7.304369226663597, |
|
"learning_rate": 6.453215959108335e-07, |
|
"loss": 0.352, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 0.9362411939027203, |
|
"grad_norm": 4.654909122469299, |
|
"learning_rate": 6.382223484310663e-07, |
|
"loss": 0.3478, |
|
"step": 13190 |
|
}, |
|
{ |
|
"epoch": 0.9369510052703494, |
|
"grad_norm": 3.4074758383445296, |
|
"learning_rate": 6.311231009512992e-07, |
|
"loss": 0.3265, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.9376608166379785, |
|
"grad_norm": 2.8891732151802687, |
|
"learning_rate": 6.24023853471532e-07, |
|
"loss": 0.342, |
|
"step": 13210 |
|
}, |
|
{ |
|
"epoch": 0.9383706280056076, |
|
"grad_norm": 4.315712149288758, |
|
"learning_rate": 6.169246059917649e-07, |
|
"loss": 0.3542, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 0.9390804393732366, |
|
"grad_norm": 4.202849073092827, |
|
"learning_rate": 6.098253585119978e-07, |
|
"loss": 0.3464, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 0.9397902507408656, |
|
"grad_norm": 4.402135376104271, |
|
"learning_rate": 6.027261110322307e-07, |
|
"loss": 0.3493, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 0.9405000621084947, |
|
"grad_norm": 3.3375797449619804, |
|
"learning_rate": 5.956268635524635e-07, |
|
"loss": 0.3431, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 0.9412098734761237, |
|
"grad_norm": 2.58448811647569, |
|
"learning_rate": 5.885276160726964e-07, |
|
"loss": 0.3516, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 0.9419196848437528, |
|
"grad_norm": 3.1207357827554216, |
|
"learning_rate": 5.814283685929293e-07, |
|
"loss": 0.3469, |
|
"step": 13270 |
|
}, |
|
{ |
|
"epoch": 0.9426294962113818, |
|
"grad_norm": 5.535335579042853, |
|
"learning_rate": 5.74329121113162e-07, |
|
"loss": 0.3411, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 0.9433393075790109, |
|
"grad_norm": 4.157192002051246, |
|
"learning_rate": 5.672298736333949e-07, |
|
"loss": 0.3357, |
|
"step": 13290 |
|
}, |
|
{ |
|
"epoch": 0.9440491189466399, |
|
"grad_norm": 4.609541473632524, |
|
"learning_rate": 5.601306261536277e-07, |
|
"loss": 0.3297, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.9447589303142689, |
|
"grad_norm": 4.556290013887312, |
|
"learning_rate": 5.530313786738606e-07, |
|
"loss": 0.3268, |
|
"step": 13310 |
|
}, |
|
{ |
|
"epoch": 0.945468741681898, |
|
"grad_norm": 4.334131807132338, |
|
"learning_rate": 5.459321311940935e-07, |
|
"loss": 0.3582, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 0.9461785530495271, |
|
"grad_norm": 4.733377355574472, |
|
"learning_rate": 5.388328837143264e-07, |
|
"loss": 0.3366, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 0.9468883644171562, |
|
"grad_norm": 6.762724277887754, |
|
"learning_rate": 5.317336362345592e-07, |
|
"loss": 0.345, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 0.9475981757847852, |
|
"grad_norm": 2.9705397730746634, |
|
"learning_rate": 5.246343887547921e-07, |
|
"loss": 0.3465, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 0.9483079871524143, |
|
"grad_norm": 3.195893348669726, |
|
"learning_rate": 5.175351412750249e-07, |
|
"loss": 0.3348, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 0.9490177985200433, |
|
"grad_norm": 7.323985518462735, |
|
"learning_rate": 5.104358937952577e-07, |
|
"loss": 0.3543, |
|
"step": 13370 |
|
}, |
|
{ |
|
"epoch": 0.9497276098876724, |
|
"grad_norm": 2.799618403745627, |
|
"learning_rate": 5.033366463154906e-07, |
|
"loss": 0.3431, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 0.9504374212553014, |
|
"grad_norm": 2.7728876598155843, |
|
"learning_rate": 4.962373988357234e-07, |
|
"loss": 0.3249, |
|
"step": 13390 |
|
}, |
|
{ |
|
"epoch": 0.9511472326229304, |
|
"grad_norm": 5.195465798306655, |
|
"learning_rate": 4.891381513559563e-07, |
|
"loss": 0.3413, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.9518570439905595, |
|
"grad_norm": 10.319650407110732, |
|
"learning_rate": 4.820389038761892e-07, |
|
"loss": 0.3289, |
|
"step": 13410 |
|
}, |
|
{ |
|
"epoch": 0.9525668553581885, |
|
"grad_norm": 3.639550539774894, |
|
"learning_rate": 4.74939656396422e-07, |
|
"loss": 0.358, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 0.9532766667258176, |
|
"grad_norm": 3.005922518183922, |
|
"learning_rate": 4.6784040891665486e-07, |
|
"loss": 0.3483, |
|
"step": 13430 |
|
}, |
|
{ |
|
"epoch": 0.9539864780934467, |
|
"grad_norm": 3.658172908229024, |
|
"learning_rate": 4.607411614368877e-07, |
|
"loss": 0.3503, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 0.9546962894610758, |
|
"grad_norm": 3.17836271977541, |
|
"learning_rate": 4.5364191395712053e-07, |
|
"loss": 0.32, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 0.9554061008287048, |
|
"grad_norm": 2.6050315565816513, |
|
"learning_rate": 4.465426664773535e-07, |
|
"loss": 0.336, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 0.9561159121963339, |
|
"grad_norm": 2.516963929561299, |
|
"learning_rate": 4.394434189975863e-07, |
|
"loss": 0.3461, |
|
"step": 13470 |
|
}, |
|
{ |
|
"epoch": 0.9568257235639629, |
|
"grad_norm": 5.182889994348168, |
|
"learning_rate": 4.3234417151781915e-07, |
|
"loss": 0.3453, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 0.9575355349315919, |
|
"grad_norm": 2.2527308195923843, |
|
"learning_rate": 4.25244924038052e-07, |
|
"loss": 0.3394, |
|
"step": 13490 |
|
}, |
|
{ |
|
"epoch": 0.958245346299221, |
|
"grad_norm": 5.702042483324615, |
|
"learning_rate": 4.181456765582848e-07, |
|
"loss": 0.3464, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.95895515766685, |
|
"grad_norm": 4.320082944510015, |
|
"learning_rate": 4.110464290785177e-07, |
|
"loss": 0.361, |
|
"step": 13510 |
|
}, |
|
{ |
|
"epoch": 0.9596649690344791, |
|
"grad_norm": 2.7057123674561683, |
|
"learning_rate": 4.0394718159875055e-07, |
|
"loss": 0.3451, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 0.9603747804021081, |
|
"grad_norm": 6.179223629975322, |
|
"learning_rate": 3.968479341189834e-07, |
|
"loss": 0.3371, |
|
"step": 13530 |
|
}, |
|
{ |
|
"epoch": 0.9610845917697372, |
|
"grad_norm": 2.5395758819730267, |
|
"learning_rate": 3.897486866392163e-07, |
|
"loss": 0.3587, |
|
"step": 13540 |
|
}, |
|
{ |
|
"epoch": 0.9617944031373662, |
|
"grad_norm": 3.6526335466786835, |
|
"learning_rate": 3.8264943915944917e-07, |
|
"loss": 0.3439, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 0.9625042145049953, |
|
"grad_norm": 6.134974420857256, |
|
"learning_rate": 3.75550191679682e-07, |
|
"loss": 0.3413, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 0.9632140258726244, |
|
"grad_norm": 4.231152248304582, |
|
"learning_rate": 3.6845094419991484e-07, |
|
"loss": 0.3412, |
|
"step": 13570 |
|
}, |
|
{ |
|
"epoch": 0.9639238372402534, |
|
"grad_norm": 19.9166049671889, |
|
"learning_rate": 3.613516967201477e-07, |
|
"loss": 0.3457, |
|
"step": 13580 |
|
}, |
|
{ |
|
"epoch": 0.9646336486078825, |
|
"grad_norm": 3.0744926751867565, |
|
"learning_rate": 3.542524492403805e-07, |
|
"loss": 0.3501, |
|
"step": 13590 |
|
}, |
|
{ |
|
"epoch": 0.9653434599755115, |
|
"grad_norm": 4.316210901775538, |
|
"learning_rate": 3.471532017606134e-07, |
|
"loss": 0.3391, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.9660532713431406, |
|
"grad_norm": 5.568442813862272, |
|
"learning_rate": 3.400539542808463e-07, |
|
"loss": 0.3571, |
|
"step": 13610 |
|
}, |
|
{ |
|
"epoch": 0.9667630827107696, |
|
"grad_norm": 2.464997647373043, |
|
"learning_rate": 3.3295470680107913e-07, |
|
"loss": 0.3403, |
|
"step": 13620 |
|
}, |
|
{ |
|
"epoch": 0.9674728940783986, |
|
"grad_norm": 9.203447351864554, |
|
"learning_rate": 3.2585545932131197e-07, |
|
"loss": 0.3372, |
|
"step": 13630 |
|
}, |
|
{ |
|
"epoch": 0.9681827054460277, |
|
"grad_norm": 4.083574237624433, |
|
"learning_rate": 3.187562118415448e-07, |
|
"loss": 0.3523, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 0.9688925168136567, |
|
"grad_norm": 2.580899686505033, |
|
"learning_rate": 3.1165696436177764e-07, |
|
"loss": 0.3331, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 0.9696023281812858, |
|
"grad_norm": 4.461792584369479, |
|
"learning_rate": 3.0455771688201053e-07, |
|
"loss": 0.3436, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 0.9703121395489148, |
|
"grad_norm": 6.002729090963929, |
|
"learning_rate": 2.9745846940224337e-07, |
|
"loss": 0.3392, |
|
"step": 13670 |
|
}, |
|
{ |
|
"epoch": 0.971021950916544, |
|
"grad_norm": 15.908649085501459, |
|
"learning_rate": 2.9035922192247626e-07, |
|
"loss": 0.3401, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 0.971731762284173, |
|
"grad_norm": 3.2548319133826875, |
|
"learning_rate": 2.832599744427091e-07, |
|
"loss": 0.3466, |
|
"step": 13690 |
|
}, |
|
{ |
|
"epoch": 0.9724415736518021, |
|
"grad_norm": 2.810860141109629, |
|
"learning_rate": 2.76160726962942e-07, |
|
"loss": 0.3445, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.9731513850194311, |
|
"grad_norm": 5.404897398221347, |
|
"learning_rate": 2.690614794831748e-07, |
|
"loss": 0.3464, |
|
"step": 13710 |
|
}, |
|
{ |
|
"epoch": 0.9738611963870601, |
|
"grad_norm": 3.07947902781157, |
|
"learning_rate": 2.6196223200340766e-07, |
|
"loss": 0.3295, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 0.9745710077546892, |
|
"grad_norm": 3.2905796500928814, |
|
"learning_rate": 2.548629845236405e-07, |
|
"loss": 0.3491, |
|
"step": 13730 |
|
}, |
|
{ |
|
"epoch": 0.9752808191223182, |
|
"grad_norm": 4.431073995020802, |
|
"learning_rate": 2.4776373704387334e-07, |
|
"loss": 0.3483, |
|
"step": 13740 |
|
}, |
|
{ |
|
"epoch": 0.9759906304899473, |
|
"grad_norm": 3.5179707782287166, |
|
"learning_rate": 2.406644895641062e-07, |
|
"loss": 0.3469, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 0.9767004418575763, |
|
"grad_norm": 4.221356923748856, |
|
"learning_rate": 2.3356524208433906e-07, |
|
"loss": 0.3343, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 0.9774102532252054, |
|
"grad_norm": 286.15418214313974, |
|
"learning_rate": 2.2646599460457195e-07, |
|
"loss": 0.3349, |
|
"step": 13770 |
|
}, |
|
{ |
|
"epoch": 0.9781200645928344, |
|
"grad_norm": 3.4922335144175576, |
|
"learning_rate": 2.193667471248048e-07, |
|
"loss": 0.3485, |
|
"step": 13780 |
|
}, |
|
{ |
|
"epoch": 0.9788298759604636, |
|
"grad_norm": 3.944308898398798, |
|
"learning_rate": 2.1226749964503763e-07, |
|
"loss": 0.3288, |
|
"step": 13790 |
|
}, |
|
{ |
|
"epoch": 0.9795396873280926, |
|
"grad_norm": 3.16447581060814, |
|
"learning_rate": 2.0516825216527052e-07, |
|
"loss": 0.3435, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.9802494986957216, |
|
"grad_norm": 7.105988741131366, |
|
"learning_rate": 1.9806900468550335e-07, |
|
"loss": 0.342, |
|
"step": 13810 |
|
}, |
|
{ |
|
"epoch": 0.9809593100633507, |
|
"grad_norm": 3.311616450653751, |
|
"learning_rate": 1.9096975720573622e-07, |
|
"loss": 0.365, |
|
"step": 13820 |
|
}, |
|
{ |
|
"epoch": 0.9816691214309797, |
|
"grad_norm": 3.1283492138129128, |
|
"learning_rate": 1.8387050972596905e-07, |
|
"loss": 0.3497, |
|
"step": 13830 |
|
}, |
|
{ |
|
"epoch": 0.9823789327986088, |
|
"grad_norm": 4.720800332800002, |
|
"learning_rate": 1.7677126224620194e-07, |
|
"loss": 0.3356, |
|
"step": 13840 |
|
}, |
|
{ |
|
"epoch": 0.9830887441662378, |
|
"grad_norm": 5.755549723756511, |
|
"learning_rate": 1.6967201476643478e-07, |
|
"loss": 0.3534, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 0.9837985555338669, |
|
"grad_norm": 12.413957162417217, |
|
"learning_rate": 1.6257276728666762e-07, |
|
"loss": 0.3514, |
|
"step": 13860 |
|
}, |
|
{ |
|
"epoch": 0.9845083669014959, |
|
"grad_norm": 3.7416649036415195, |
|
"learning_rate": 1.5547351980690048e-07, |
|
"loss": 0.3468, |
|
"step": 13870 |
|
}, |
|
{ |
|
"epoch": 0.985218178269125, |
|
"grad_norm": 5.096087166471907, |
|
"learning_rate": 1.4837427232713335e-07, |
|
"loss": 0.3478, |
|
"step": 13880 |
|
}, |
|
{ |
|
"epoch": 0.985927989636754, |
|
"grad_norm": 2.8643069595501847, |
|
"learning_rate": 1.4127502484736618e-07, |
|
"loss": 0.3307, |
|
"step": 13890 |
|
}, |
|
{ |
|
"epoch": 0.986637801004383, |
|
"grad_norm": 4.161106542911394, |
|
"learning_rate": 1.3417577736759905e-07, |
|
"loss": 0.3451, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.9873476123720122, |
|
"grad_norm": 3.161705990477656, |
|
"learning_rate": 1.270765298878319e-07, |
|
"loss": 0.3389, |
|
"step": 13910 |
|
}, |
|
{ |
|
"epoch": 0.9880574237396412, |
|
"grad_norm": 3.2196566259908637, |
|
"learning_rate": 1.1997728240806475e-07, |
|
"loss": 0.3508, |
|
"step": 13920 |
|
}, |
|
{ |
|
"epoch": 0.9887672351072703, |
|
"grad_norm": 3.0061617959710403, |
|
"learning_rate": 1.1287803492829761e-07, |
|
"loss": 0.357, |
|
"step": 13930 |
|
}, |
|
{ |
|
"epoch": 0.9894770464748993, |
|
"grad_norm": 7.195163761877952, |
|
"learning_rate": 1.0577878744853047e-07, |
|
"loss": 0.3344, |
|
"step": 13940 |
|
}, |
|
{ |
|
"epoch": 0.9901868578425284, |
|
"grad_norm": 4.778295681909435, |
|
"learning_rate": 9.867953996876332e-08, |
|
"loss": 0.3404, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 0.9908966692101574, |
|
"grad_norm": 3.6751893575330072, |
|
"learning_rate": 9.158029248899617e-08, |
|
"loss": 0.3222, |
|
"step": 13960 |
|
}, |
|
{ |
|
"epoch": 0.9916064805777864, |
|
"grad_norm": 6.066838850034421, |
|
"learning_rate": 8.448104500922902e-08, |
|
"loss": 0.3373, |
|
"step": 13970 |
|
}, |
|
{ |
|
"epoch": 0.9923162919454155, |
|
"grad_norm": 5.8640066255244525, |
|
"learning_rate": 7.738179752946189e-08, |
|
"loss": 0.35, |
|
"step": 13980 |
|
}, |
|
{ |
|
"epoch": 0.9930261033130445, |
|
"grad_norm": 4.063550481932921, |
|
"learning_rate": 7.028255004969474e-08, |
|
"loss": 0.3424, |
|
"step": 13990 |
|
}, |
|
{ |
|
"epoch": 0.9937359146806736, |
|
"grad_norm": 6.923421784576789, |
|
"learning_rate": 6.31833025699276e-08, |
|
"loss": 0.3584, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.9944457260483026, |
|
"grad_norm": 4.621602275306591, |
|
"learning_rate": 5.6084055090160446e-08, |
|
"loss": 0.3381, |
|
"step": 14010 |
|
}, |
|
{ |
|
"epoch": 0.9951555374159318, |
|
"grad_norm": 5.495946912076004, |
|
"learning_rate": 4.89848076103933e-08, |
|
"loss": 0.3557, |
|
"step": 14020 |
|
}, |
|
{ |
|
"epoch": 0.9958653487835608, |
|
"grad_norm": 2.261874767912811, |
|
"learning_rate": 4.188556013062616e-08, |
|
"loss": 0.3346, |
|
"step": 14030 |
|
}, |
|
{ |
|
"epoch": 0.9965751601511899, |
|
"grad_norm": 3.528699506394003, |
|
"learning_rate": 3.478631265085901e-08, |
|
"loss": 0.3284, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 0.9972849715188189, |
|
"grad_norm": 3.0483860239618314, |
|
"learning_rate": 2.7687065171091867e-08, |
|
"loss": 0.3341, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 0.9979947828864479, |
|
"grad_norm": 4.681194219809911, |
|
"learning_rate": 2.0587817691324724e-08, |
|
"loss": 0.333, |
|
"step": 14060 |
|
}, |
|
{ |
|
"epoch": 0.998704594254077, |
|
"grad_norm": 5.802114485594721, |
|
"learning_rate": 1.3488570211557575e-08, |
|
"loss": 0.3457, |
|
"step": 14070 |
|
}, |
|
{ |
|
"epoch": 0.999414405621706, |
|
"grad_norm": 2.8616716300198775, |
|
"learning_rate": 6.389322731790431e-09, |
|
"loss": 0.3398, |
|
"step": 14080 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 14088, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9975763395674112.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|