OpenCSG-Qwen2.5-3B-GUI / trainer_state.json
yuyijiong's picture
Upload trainer_state.json with huggingface_hub
0154d46 verified
raw
history blame
245 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9999822547158093,
"eval_steps": 500,
"global_step": 14088,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0007098113676290526,
"grad_norm": 7.898312201157063,
"learning_rate": 9.995030526764163e-06,
"loss": 1.7908,
"step": 10
},
{
"epoch": 0.0014196227352581052,
"grad_norm": 5.391248052357186,
"learning_rate": 9.987931279284396e-06,
"loss": 0.7438,
"step": 20
},
{
"epoch": 0.002129434102887158,
"grad_norm": 5.37248606279888,
"learning_rate": 9.980832031804629e-06,
"loss": 0.6486,
"step": 30
},
{
"epoch": 0.0028392454705162104,
"grad_norm": 6.374151691345343,
"learning_rate": 9.973732784324862e-06,
"loss": 0.6267,
"step": 40
},
{
"epoch": 0.003549056838145263,
"grad_norm": 8.880583324938707,
"learning_rate": 9.966633536845095e-06,
"loss": 0.5867,
"step": 50
},
{
"epoch": 0.004258868205774316,
"grad_norm": 6.684640112932781,
"learning_rate": 9.959534289365328e-06,
"loss": 0.5473,
"step": 60
},
{
"epoch": 0.004968679573403368,
"grad_norm": 9.329177151842533,
"learning_rate": 9.952435041885561e-06,
"loss": 0.549,
"step": 70
},
{
"epoch": 0.005678490941032421,
"grad_norm": 4.2697386372932575,
"learning_rate": 9.945335794405794e-06,
"loss": 0.527,
"step": 80
},
{
"epoch": 0.006388302308661473,
"grad_norm": 4.981195314782428,
"learning_rate": 9.938236546926027e-06,
"loss": 0.5015,
"step": 90
},
{
"epoch": 0.007098113676290526,
"grad_norm": 3.5718526890722155,
"learning_rate": 9.931137299446259e-06,
"loss": 0.5148,
"step": 100
},
{
"epoch": 0.007807925043919578,
"grad_norm": 5.35860863602349,
"learning_rate": 9.924038051966492e-06,
"loss": 0.5116,
"step": 110
},
{
"epoch": 0.008517736411548632,
"grad_norm": 3.2550821239727434,
"learning_rate": 9.916938804486725e-06,
"loss": 0.4992,
"step": 120
},
{
"epoch": 0.009227547779177683,
"grad_norm": 3.4354498076448032,
"learning_rate": 9.909839557006958e-06,
"loss": 0.5088,
"step": 130
},
{
"epoch": 0.009937359146806737,
"grad_norm": 4.331679140736939,
"learning_rate": 9.902740309527191e-06,
"loss": 0.5139,
"step": 140
},
{
"epoch": 0.010647170514435788,
"grad_norm": 3.4337848966487265,
"learning_rate": 9.895641062047424e-06,
"loss": 0.5041,
"step": 150
},
{
"epoch": 0.011356981882064842,
"grad_norm": 8.243351710682422,
"learning_rate": 9.888541814567657e-06,
"loss": 0.5142,
"step": 160
},
{
"epoch": 0.012066793249693893,
"grad_norm": 4.091704188438657,
"learning_rate": 9.88144256708789e-06,
"loss": 0.4807,
"step": 170
},
{
"epoch": 0.012776604617322947,
"grad_norm": 21.564891334339755,
"learning_rate": 9.874343319608124e-06,
"loss": 0.5092,
"step": 180
},
{
"epoch": 0.013486415984951998,
"grad_norm": 3.1521060424258973,
"learning_rate": 9.867244072128355e-06,
"loss": 0.4787,
"step": 190
},
{
"epoch": 0.014196227352581052,
"grad_norm": 3.986481726421801,
"learning_rate": 9.860144824648588e-06,
"loss": 0.4827,
"step": 200
},
{
"epoch": 0.014906038720210105,
"grad_norm": 4.774263941683351,
"learning_rate": 9.853045577168821e-06,
"loss": 0.4775,
"step": 210
},
{
"epoch": 0.015615850087839157,
"grad_norm": 7.968327682274227,
"learning_rate": 9.845946329689053e-06,
"loss": 0.4716,
"step": 220
},
{
"epoch": 0.01632566145546821,
"grad_norm": 10.121205974855524,
"learning_rate": 9.838847082209286e-06,
"loss": 0.4969,
"step": 230
},
{
"epoch": 0.017035472823097263,
"grad_norm": 7.454679720256471,
"learning_rate": 9.831747834729519e-06,
"loss": 0.4923,
"step": 240
},
{
"epoch": 0.017745284190726313,
"grad_norm": 17.103084568275037,
"learning_rate": 9.824648587249752e-06,
"loss": 0.4701,
"step": 250
},
{
"epoch": 0.018455095558355367,
"grad_norm": 4.48293929960256,
"learning_rate": 9.817549339769985e-06,
"loss": 0.4734,
"step": 260
},
{
"epoch": 0.01916490692598442,
"grad_norm": 5.345114387506581,
"learning_rate": 9.810450092290218e-06,
"loss": 0.4894,
"step": 270
},
{
"epoch": 0.019874718293613473,
"grad_norm": 19.40561032433512,
"learning_rate": 9.803350844810451e-06,
"loss": 0.4791,
"step": 280
},
{
"epoch": 0.020584529661242523,
"grad_norm": 14.25299022016476,
"learning_rate": 9.796251597330684e-06,
"loss": 0.4699,
"step": 290
},
{
"epoch": 0.021294341028871577,
"grad_norm": 8.257072932675099,
"learning_rate": 9.789152349850918e-06,
"loss": 0.4712,
"step": 300
},
{
"epoch": 0.02200415239650063,
"grad_norm": 7.954026403143938,
"learning_rate": 9.782053102371149e-06,
"loss": 0.4703,
"step": 310
},
{
"epoch": 0.022713963764129683,
"grad_norm": 11.392767049791958,
"learning_rate": 9.774953854891382e-06,
"loss": 0.4991,
"step": 320
},
{
"epoch": 0.023423775131758737,
"grad_norm": 3.6589701257251392,
"learning_rate": 9.767854607411615e-06,
"loss": 0.48,
"step": 330
},
{
"epoch": 0.024133586499387787,
"grad_norm": 2.8317614498971095,
"learning_rate": 9.760755359931848e-06,
"loss": 0.473,
"step": 340
},
{
"epoch": 0.02484339786701684,
"grad_norm": 3.3672884329345467,
"learning_rate": 9.753656112452081e-06,
"loss": 0.4807,
"step": 350
},
{
"epoch": 0.025553209234645893,
"grad_norm": 2.918860353664653,
"learning_rate": 9.746556864972314e-06,
"loss": 0.474,
"step": 360
},
{
"epoch": 0.026263020602274947,
"grad_norm": 3.985430160063577,
"learning_rate": 9.739457617492548e-06,
"loss": 0.4606,
"step": 370
},
{
"epoch": 0.026972831969903997,
"grad_norm": 3.8499162197950216,
"learning_rate": 9.73235837001278e-06,
"loss": 0.474,
"step": 380
},
{
"epoch": 0.02768264333753305,
"grad_norm": 2.955339700163119,
"learning_rate": 9.725259122533012e-06,
"loss": 0.472,
"step": 390
},
{
"epoch": 0.028392454705162103,
"grad_norm": 5.589731350821559,
"learning_rate": 9.718159875053245e-06,
"loss": 0.4698,
"step": 400
},
{
"epoch": 0.029102266072791157,
"grad_norm": 3.9824871173931973,
"learning_rate": 9.711060627573478e-06,
"loss": 0.4581,
"step": 410
},
{
"epoch": 0.02981207744042021,
"grad_norm": 2.524559409598369,
"learning_rate": 9.70396138009371e-06,
"loss": 0.4478,
"step": 420
},
{
"epoch": 0.03052188880804926,
"grad_norm": 2.970731368598553,
"learning_rate": 9.696862132613943e-06,
"loss": 0.4508,
"step": 430
},
{
"epoch": 0.031231700175678313,
"grad_norm": 2.893829595170148,
"learning_rate": 9.689762885134176e-06,
"loss": 0.4379,
"step": 440
},
{
"epoch": 0.03194151154330736,
"grad_norm": 22.795684932698137,
"learning_rate": 9.682663637654409e-06,
"loss": 0.4482,
"step": 450
},
{
"epoch": 0.03265132291093642,
"grad_norm": 3.2812945854632236,
"learning_rate": 9.675564390174642e-06,
"loss": 0.4599,
"step": 460
},
{
"epoch": 0.03336113427856547,
"grad_norm": 11.615453520589618,
"learning_rate": 9.668465142694875e-06,
"loss": 0.4417,
"step": 470
},
{
"epoch": 0.03407094564619453,
"grad_norm": 7.726986291359829,
"learning_rate": 9.661365895215108e-06,
"loss": 0.4594,
"step": 480
},
{
"epoch": 0.03478075701382358,
"grad_norm": 4.365039492938302,
"learning_rate": 9.654266647735341e-06,
"loss": 0.4669,
"step": 490
},
{
"epoch": 0.03549056838145263,
"grad_norm": 6.54988906481092,
"learning_rate": 9.647167400255574e-06,
"loss": 0.4567,
"step": 500
},
{
"epoch": 0.03620037974908168,
"grad_norm": 8.933278546995766,
"learning_rate": 9.640068152775806e-06,
"loss": 0.4519,
"step": 510
},
{
"epoch": 0.03691019111671073,
"grad_norm": 3.7761657369108907,
"learning_rate": 9.632968905296039e-06,
"loss": 0.4501,
"step": 520
},
{
"epoch": 0.03762000248433979,
"grad_norm": 3.9418116527565377,
"learning_rate": 9.625869657816272e-06,
"loss": 0.4561,
"step": 530
},
{
"epoch": 0.03832981385196884,
"grad_norm": 3.5489889583606438,
"learning_rate": 9.618770410336505e-06,
"loss": 0.4598,
"step": 540
},
{
"epoch": 0.03903962521959789,
"grad_norm": 3.5164230189602548,
"learning_rate": 9.611671162856738e-06,
"loss": 0.4717,
"step": 550
},
{
"epoch": 0.03974943658722695,
"grad_norm": 2.1822863392109206,
"learning_rate": 9.604571915376971e-06,
"loss": 0.48,
"step": 560
},
{
"epoch": 0.040459247954856,
"grad_norm": 2.5677413826305413,
"learning_rate": 9.597472667897204e-06,
"loss": 0.4605,
"step": 570
},
{
"epoch": 0.041169059322485047,
"grad_norm": 3.011759104822335,
"learning_rate": 9.590373420417438e-06,
"loss": 0.4605,
"step": 580
},
{
"epoch": 0.0418788706901141,
"grad_norm": 2.56502573080614,
"learning_rate": 9.58327417293767e-06,
"loss": 0.4494,
"step": 590
},
{
"epoch": 0.04258868205774315,
"grad_norm": 3.2396125278123806,
"learning_rate": 9.576174925457902e-06,
"loss": 0.4542,
"step": 600
},
{
"epoch": 0.04329849342537221,
"grad_norm": 3.480681910714182,
"learning_rate": 9.569075677978135e-06,
"loss": 0.4548,
"step": 610
},
{
"epoch": 0.04400830479300126,
"grad_norm": 2.623695100630613,
"learning_rate": 9.561976430498368e-06,
"loss": 0.4594,
"step": 620
},
{
"epoch": 0.04471811616063031,
"grad_norm": 3.042303011325611,
"learning_rate": 9.5548771830186e-06,
"loss": 0.4557,
"step": 630
},
{
"epoch": 0.04542792752825937,
"grad_norm": 2.8781600946277863,
"learning_rate": 9.547777935538833e-06,
"loss": 0.484,
"step": 640
},
{
"epoch": 0.04613773889588842,
"grad_norm": 3.3284195205047684,
"learning_rate": 9.540678688059066e-06,
"loss": 0.4481,
"step": 650
},
{
"epoch": 0.04684755026351747,
"grad_norm": 3.5159109068224987,
"learning_rate": 9.533579440579299e-06,
"loss": 0.4665,
"step": 660
},
{
"epoch": 0.04755736163114652,
"grad_norm": 6.322136362721481,
"learning_rate": 9.526480193099532e-06,
"loss": 0.4585,
"step": 670
},
{
"epoch": 0.04826717299877557,
"grad_norm": 21.902103769968996,
"learning_rate": 9.519380945619765e-06,
"loss": 0.4446,
"step": 680
},
{
"epoch": 0.04897698436640463,
"grad_norm": 3.6046359318609356,
"learning_rate": 9.512281698139998e-06,
"loss": 0.4519,
"step": 690
},
{
"epoch": 0.04968679573403368,
"grad_norm": 3.039690187186011,
"learning_rate": 9.505182450660231e-06,
"loss": 0.4448,
"step": 700
},
{
"epoch": 0.05039660710166273,
"grad_norm": 2.608964873836775,
"learning_rate": 9.498083203180465e-06,
"loss": 0.4486,
"step": 710
},
{
"epoch": 0.05110641846929179,
"grad_norm": 3.368889371027321,
"learning_rate": 9.490983955700696e-06,
"loss": 0.4617,
"step": 720
},
{
"epoch": 0.05181622983692084,
"grad_norm": 4.094036998235093,
"learning_rate": 9.483884708220929e-06,
"loss": 0.4569,
"step": 730
},
{
"epoch": 0.05252604120454989,
"grad_norm": 2.979892302450325,
"learning_rate": 9.476785460741162e-06,
"loss": 0.4645,
"step": 740
},
{
"epoch": 0.05323585257217894,
"grad_norm": 3.676607621277054,
"learning_rate": 9.469686213261395e-06,
"loss": 0.4407,
"step": 750
},
{
"epoch": 0.05394566393980799,
"grad_norm": 359.9140493382262,
"learning_rate": 9.462586965781628e-06,
"loss": 0.4262,
"step": 760
},
{
"epoch": 0.05465547530743705,
"grad_norm": 4.447118089247447,
"learning_rate": 9.455487718301861e-06,
"loss": 0.4344,
"step": 770
},
{
"epoch": 0.0553652866750661,
"grad_norm": 4.569754671227615,
"learning_rate": 9.448388470822095e-06,
"loss": 0.4462,
"step": 780
},
{
"epoch": 0.05607509804269516,
"grad_norm": 2.3728524211263067,
"learning_rate": 9.441289223342328e-06,
"loss": 0.4386,
"step": 790
},
{
"epoch": 0.05678490941032421,
"grad_norm": 2.5997362569615903,
"learning_rate": 9.434189975862559e-06,
"loss": 0.4537,
"step": 800
},
{
"epoch": 0.057494720777953257,
"grad_norm": 4.859327134293274,
"learning_rate": 9.427090728382792e-06,
"loss": 0.4514,
"step": 810
},
{
"epoch": 0.05820453214558231,
"grad_norm": 2.6304161060559905,
"learning_rate": 9.419991480903025e-06,
"loss": 0.4306,
"step": 820
},
{
"epoch": 0.05891434351321136,
"grad_norm": 3.504607730078166,
"learning_rate": 9.412892233423258e-06,
"loss": 0.454,
"step": 830
},
{
"epoch": 0.05962415488084042,
"grad_norm": 3.3227222733710864,
"learning_rate": 9.40579298594349e-06,
"loss": 0.4407,
"step": 840
},
{
"epoch": 0.06033396624846947,
"grad_norm": 3.328718377292454,
"learning_rate": 9.398693738463723e-06,
"loss": 0.4581,
"step": 850
},
{
"epoch": 0.06104377761609852,
"grad_norm": 3.4977954338913864,
"learning_rate": 9.391594490983956e-06,
"loss": 0.4284,
"step": 860
},
{
"epoch": 0.06175358898372758,
"grad_norm": 3.228432256709841,
"learning_rate": 9.384495243504189e-06,
"loss": 0.4373,
"step": 870
},
{
"epoch": 0.06246340035135663,
"grad_norm": 3.1586832054050964,
"learning_rate": 9.377395996024422e-06,
"loss": 0.4348,
"step": 880
},
{
"epoch": 0.06317321171898568,
"grad_norm": 13.155465477764636,
"learning_rate": 9.370296748544655e-06,
"loss": 0.4217,
"step": 890
},
{
"epoch": 0.06388302308661473,
"grad_norm": 15.543581430412525,
"learning_rate": 9.363197501064888e-06,
"loss": 0.4593,
"step": 900
},
{
"epoch": 0.06459283445424378,
"grad_norm": 8.921864061523843,
"learning_rate": 9.356098253585121e-06,
"loss": 0.4608,
"step": 910
},
{
"epoch": 0.06530264582187284,
"grad_norm": 5.3983003526617335,
"learning_rate": 9.348999006105353e-06,
"loss": 0.4514,
"step": 920
},
{
"epoch": 0.06601245718950188,
"grad_norm": 7.595139513838182,
"learning_rate": 9.341899758625586e-06,
"loss": 0.4273,
"step": 930
},
{
"epoch": 0.06672226855713094,
"grad_norm": 3.2331459925046815,
"learning_rate": 9.334800511145819e-06,
"loss": 0.422,
"step": 940
},
{
"epoch": 0.06743207992476,
"grad_norm": 3.8699272404865686,
"learning_rate": 9.327701263666052e-06,
"loss": 0.4477,
"step": 950
},
{
"epoch": 0.06814189129238905,
"grad_norm": 2.68446192265652,
"learning_rate": 9.320602016186285e-06,
"loss": 0.4449,
"step": 960
},
{
"epoch": 0.0688517026600181,
"grad_norm": 2.637260503772899,
"learning_rate": 9.313502768706518e-06,
"loss": 0.4532,
"step": 970
},
{
"epoch": 0.06956151402764715,
"grad_norm": 3.9618993923437085,
"learning_rate": 9.306403521226751e-06,
"loss": 0.4534,
"step": 980
},
{
"epoch": 0.07027132539527621,
"grad_norm": 3.429568261104227,
"learning_rate": 9.299304273746985e-06,
"loss": 0.452,
"step": 990
},
{
"epoch": 0.07098113676290525,
"grad_norm": 3.663179434126313,
"learning_rate": 9.292205026267218e-06,
"loss": 0.439,
"step": 1000
},
{
"epoch": 0.07169094813053431,
"grad_norm": 4.408975026773321,
"learning_rate": 9.285105778787449e-06,
"loss": 0.4184,
"step": 1010
},
{
"epoch": 0.07240075949816337,
"grad_norm": 2.415108601943808,
"learning_rate": 9.278006531307682e-06,
"loss": 0.4342,
"step": 1020
},
{
"epoch": 0.07311057086579241,
"grad_norm": 6.698239896408658,
"learning_rate": 9.270907283827915e-06,
"loss": 0.4535,
"step": 1030
},
{
"epoch": 0.07382038223342147,
"grad_norm": 11.189940656850219,
"learning_rate": 9.263808036348147e-06,
"loss": 0.4192,
"step": 1040
},
{
"epoch": 0.07453019360105052,
"grad_norm": 3.85625217339617,
"learning_rate": 9.25670878886838e-06,
"loss": 0.4278,
"step": 1050
},
{
"epoch": 0.07524000496867958,
"grad_norm": 32.21212360326382,
"learning_rate": 9.249609541388613e-06,
"loss": 0.4509,
"step": 1060
},
{
"epoch": 0.07594981633630862,
"grad_norm": 5.919396215012425,
"learning_rate": 9.242510293908846e-06,
"loss": 0.4525,
"step": 1070
},
{
"epoch": 0.07665962770393768,
"grad_norm": 5.904196801283348,
"learning_rate": 9.235411046429079e-06,
"loss": 0.4422,
"step": 1080
},
{
"epoch": 0.07736943907156674,
"grad_norm": 4.486326467883555,
"learning_rate": 9.228311798949312e-06,
"loss": 0.4685,
"step": 1090
},
{
"epoch": 0.07807925043919578,
"grad_norm": 11.745437972621287,
"learning_rate": 9.221212551469545e-06,
"loss": 0.4646,
"step": 1100
},
{
"epoch": 0.07878906180682484,
"grad_norm": 6.5181010077573145,
"learning_rate": 9.214113303989778e-06,
"loss": 0.443,
"step": 1110
},
{
"epoch": 0.0794988731744539,
"grad_norm": 11.270983163134655,
"learning_rate": 9.207014056510012e-06,
"loss": 0.4605,
"step": 1120
},
{
"epoch": 0.08020868454208294,
"grad_norm": 3.7069012881976975,
"learning_rate": 9.199914809030243e-06,
"loss": 0.4459,
"step": 1130
},
{
"epoch": 0.080918495909712,
"grad_norm": 8.667969696855055,
"learning_rate": 9.192815561550476e-06,
"loss": 0.4556,
"step": 1140
},
{
"epoch": 0.08162830727734105,
"grad_norm": 7.559635091166787,
"learning_rate": 9.185716314070709e-06,
"loss": 0.4357,
"step": 1150
},
{
"epoch": 0.08233811864497009,
"grad_norm": 17.430750080762536,
"learning_rate": 9.178617066590942e-06,
"loss": 0.4301,
"step": 1160
},
{
"epoch": 0.08304793001259915,
"grad_norm": 4.351276343100192,
"learning_rate": 9.171517819111175e-06,
"loss": 0.4184,
"step": 1170
},
{
"epoch": 0.0837577413802282,
"grad_norm": 6.471581804191342,
"learning_rate": 9.164418571631408e-06,
"loss": 0.4516,
"step": 1180
},
{
"epoch": 0.08446755274785726,
"grad_norm": 4.3294841586504855,
"learning_rate": 9.157319324151642e-06,
"loss": 0.4211,
"step": 1190
},
{
"epoch": 0.0851773641154863,
"grad_norm": 4.385208474639979,
"learning_rate": 9.150220076671875e-06,
"loss": 0.4203,
"step": 1200
},
{
"epoch": 0.08588717548311536,
"grad_norm": 5.8972560031050065,
"learning_rate": 9.143120829192106e-06,
"loss": 0.4284,
"step": 1210
},
{
"epoch": 0.08659698685074442,
"grad_norm": 4.604861487503107,
"learning_rate": 9.136021581712339e-06,
"loss": 0.4277,
"step": 1220
},
{
"epoch": 0.08730679821837346,
"grad_norm": 4.321101106082931,
"learning_rate": 9.128922334232572e-06,
"loss": 0.4216,
"step": 1230
},
{
"epoch": 0.08801660958600252,
"grad_norm": 11.04681514927992,
"learning_rate": 9.121823086752805e-06,
"loss": 0.4181,
"step": 1240
},
{
"epoch": 0.08872642095363158,
"grad_norm": 4.31849841935359,
"learning_rate": 9.114723839273037e-06,
"loss": 0.4264,
"step": 1250
},
{
"epoch": 0.08943623232126062,
"grad_norm": 4.674845237449041,
"learning_rate": 9.10762459179327e-06,
"loss": 0.4281,
"step": 1260
},
{
"epoch": 0.09014604368888968,
"grad_norm": 3.447760098274006,
"learning_rate": 9.100525344313503e-06,
"loss": 0.4304,
"step": 1270
},
{
"epoch": 0.09085585505651873,
"grad_norm": 7.189274212443334,
"learning_rate": 9.093426096833736e-06,
"loss": 0.4252,
"step": 1280
},
{
"epoch": 0.09156566642414778,
"grad_norm": 19.69024332171456,
"learning_rate": 9.08632684935397e-06,
"loss": 0.4336,
"step": 1290
},
{
"epoch": 0.09227547779177683,
"grad_norm": 55.22992334000048,
"learning_rate": 9.079227601874202e-06,
"loss": 0.4256,
"step": 1300
},
{
"epoch": 0.09298528915940589,
"grad_norm": 5.066816349007046,
"learning_rate": 9.072128354394435e-06,
"loss": 0.407,
"step": 1310
},
{
"epoch": 0.09369510052703495,
"grad_norm": 6.37711035743208,
"learning_rate": 9.065029106914668e-06,
"loss": 0.4257,
"step": 1320
},
{
"epoch": 0.09440491189466399,
"grad_norm": 4.696335985596692,
"learning_rate": 9.057929859434902e-06,
"loss": 0.4188,
"step": 1330
},
{
"epoch": 0.09511472326229305,
"grad_norm": 15.82313293688476,
"learning_rate": 9.050830611955133e-06,
"loss": 0.433,
"step": 1340
},
{
"epoch": 0.0958245346299221,
"grad_norm": 5.692904308794704,
"learning_rate": 9.043731364475366e-06,
"loss": 0.4269,
"step": 1350
},
{
"epoch": 0.09653434599755115,
"grad_norm": 15.303387309564082,
"learning_rate": 9.0366321169956e-06,
"loss": 0.4174,
"step": 1360
},
{
"epoch": 0.0972441573651802,
"grad_norm": 3.9801928029461666,
"learning_rate": 9.029532869515832e-06,
"loss": 0.4132,
"step": 1370
},
{
"epoch": 0.09795396873280926,
"grad_norm": 3.523690216407914,
"learning_rate": 9.022433622036065e-06,
"loss": 0.4281,
"step": 1380
},
{
"epoch": 0.0986637801004383,
"grad_norm": 7.099888052775042,
"learning_rate": 9.015334374556298e-06,
"loss": 0.4431,
"step": 1390
},
{
"epoch": 0.09937359146806736,
"grad_norm": 6.538985360116972,
"learning_rate": 9.008235127076532e-06,
"loss": 0.4172,
"step": 1400
},
{
"epoch": 0.10008340283569642,
"grad_norm": 7.959800060910741,
"learning_rate": 9.001135879596763e-06,
"loss": 0.4243,
"step": 1410
},
{
"epoch": 0.10079321420332546,
"grad_norm": 8.790445771142394,
"learning_rate": 8.994036632116996e-06,
"loss": 0.4254,
"step": 1420
},
{
"epoch": 0.10150302557095452,
"grad_norm": 4.285966498899181,
"learning_rate": 8.98693738463723e-06,
"loss": 0.4122,
"step": 1430
},
{
"epoch": 0.10221283693858357,
"grad_norm": 6.286806035291326,
"learning_rate": 8.979838137157462e-06,
"loss": 0.433,
"step": 1440
},
{
"epoch": 0.10292264830621263,
"grad_norm": 7.3066834855049345,
"learning_rate": 8.972738889677695e-06,
"loss": 0.4258,
"step": 1450
},
{
"epoch": 0.10363245967384167,
"grad_norm": 6.5695520214785565,
"learning_rate": 8.965639642197927e-06,
"loss": 0.4164,
"step": 1460
},
{
"epoch": 0.10434227104147073,
"grad_norm": 20.93641513291179,
"learning_rate": 8.95854039471816e-06,
"loss": 0.4095,
"step": 1470
},
{
"epoch": 0.10505208240909979,
"grad_norm": 5.657042957398901,
"learning_rate": 8.951441147238393e-06,
"loss": 0.4168,
"step": 1480
},
{
"epoch": 0.10576189377672883,
"grad_norm": 6.076726326140851,
"learning_rate": 8.944341899758626e-06,
"loss": 0.4112,
"step": 1490
},
{
"epoch": 0.10647170514435789,
"grad_norm": 5.092565408624009,
"learning_rate": 8.93724265227886e-06,
"loss": 0.4269,
"step": 1500
},
{
"epoch": 0.10718151651198694,
"grad_norm": 2.894012289515038,
"learning_rate": 8.930143404799092e-06,
"loss": 0.4239,
"step": 1510
},
{
"epoch": 0.10789132787961599,
"grad_norm": 3.7173915295575637,
"learning_rate": 8.923044157319325e-06,
"loss": 0.4288,
"step": 1520
},
{
"epoch": 0.10860113924724504,
"grad_norm": 3.025402596869208,
"learning_rate": 8.915944909839559e-06,
"loss": 0.4421,
"step": 1530
},
{
"epoch": 0.1093109506148741,
"grad_norm": 8.212502187483185,
"learning_rate": 8.90884566235979e-06,
"loss": 0.4241,
"step": 1540
},
{
"epoch": 0.11002076198250314,
"grad_norm": 5.773771344339805,
"learning_rate": 8.901746414880023e-06,
"loss": 0.4355,
"step": 1550
},
{
"epoch": 0.1107305733501322,
"grad_norm": 4.158426885786249,
"learning_rate": 8.894647167400256e-06,
"loss": 0.436,
"step": 1560
},
{
"epoch": 0.11144038471776126,
"grad_norm": 6.56740526603354,
"learning_rate": 8.88754791992049e-06,
"loss": 0.4397,
"step": 1570
},
{
"epoch": 0.11215019608539031,
"grad_norm": 8.263663970839248,
"learning_rate": 8.880448672440722e-06,
"loss": 0.4201,
"step": 1580
},
{
"epoch": 0.11286000745301936,
"grad_norm": 2.424368072981463,
"learning_rate": 8.873349424960955e-06,
"loss": 0.4235,
"step": 1590
},
{
"epoch": 0.11356981882064841,
"grad_norm": 6.489454078474153,
"learning_rate": 8.866250177481189e-06,
"loss": 0.4243,
"step": 1600
},
{
"epoch": 0.11427963018827747,
"grad_norm": 3.541006640864803,
"learning_rate": 8.859150930001422e-06,
"loss": 0.4313,
"step": 1610
},
{
"epoch": 0.11498944155590651,
"grad_norm": 12.323605643567065,
"learning_rate": 8.852051682521653e-06,
"loss": 0.4253,
"step": 1620
},
{
"epoch": 0.11569925292353557,
"grad_norm": 4.600225981753095,
"learning_rate": 8.844952435041886e-06,
"loss": 0.42,
"step": 1630
},
{
"epoch": 0.11640906429116463,
"grad_norm": 8.589796661850784,
"learning_rate": 8.83785318756212e-06,
"loss": 0.4219,
"step": 1640
},
{
"epoch": 0.11711887565879367,
"grad_norm": 10.182911442610934,
"learning_rate": 8.830753940082352e-06,
"loss": 0.4285,
"step": 1650
},
{
"epoch": 0.11782868702642273,
"grad_norm": 5.186284643440543,
"learning_rate": 8.823654692602584e-06,
"loss": 0.4139,
"step": 1660
},
{
"epoch": 0.11853849839405178,
"grad_norm": 5.23154203196852,
"learning_rate": 8.816555445122817e-06,
"loss": 0.4251,
"step": 1670
},
{
"epoch": 0.11924830976168084,
"grad_norm": 6.9839536559537505,
"learning_rate": 8.80945619764305e-06,
"loss": 0.4233,
"step": 1680
},
{
"epoch": 0.11995812112930988,
"grad_norm": 6.376179671333375,
"learning_rate": 8.802356950163283e-06,
"loss": 0.4089,
"step": 1690
},
{
"epoch": 0.12066793249693894,
"grad_norm": 3.824113092644885,
"learning_rate": 8.795257702683516e-06,
"loss": 0.4347,
"step": 1700
},
{
"epoch": 0.121377743864568,
"grad_norm": 11.282936555631686,
"learning_rate": 8.78815845520375e-06,
"loss": 0.423,
"step": 1710
},
{
"epoch": 0.12208755523219704,
"grad_norm": 4.218268240264897,
"learning_rate": 8.781059207723982e-06,
"loss": 0.4188,
"step": 1720
},
{
"epoch": 0.1227973665998261,
"grad_norm": 3.943582749857493,
"learning_rate": 8.773959960244215e-06,
"loss": 0.4276,
"step": 1730
},
{
"epoch": 0.12350717796745515,
"grad_norm": 9.679933576473074,
"learning_rate": 8.766860712764449e-06,
"loss": 0.42,
"step": 1740
},
{
"epoch": 0.1242169893350842,
"grad_norm": 15.414309701859608,
"learning_rate": 8.75976146528468e-06,
"loss": 0.4316,
"step": 1750
},
{
"epoch": 0.12492680070271325,
"grad_norm": 9.429737278511919,
"learning_rate": 8.752662217804913e-06,
"loss": 0.422,
"step": 1760
},
{
"epoch": 0.1256366120703423,
"grad_norm": 23.10494354556988,
"learning_rate": 8.745562970325146e-06,
"loss": 0.4276,
"step": 1770
},
{
"epoch": 0.12634642343797137,
"grad_norm": 13.541923724604345,
"learning_rate": 8.73846372284538e-06,
"loss": 0.4271,
"step": 1780
},
{
"epoch": 0.1270562348056004,
"grad_norm": 2.846694152973873,
"learning_rate": 8.731364475365612e-06,
"loss": 0.4151,
"step": 1790
},
{
"epoch": 0.12776604617322945,
"grad_norm": 6.934597145753292,
"learning_rate": 8.724265227885845e-06,
"loss": 0.4247,
"step": 1800
},
{
"epoch": 0.12847585754085852,
"grad_norm": 3.435112347451886,
"learning_rate": 8.717165980406079e-06,
"loss": 0.4225,
"step": 1810
},
{
"epoch": 0.12918566890848757,
"grad_norm": 3.4829699382867823,
"learning_rate": 8.71006673292631e-06,
"loss": 0.4458,
"step": 1820
},
{
"epoch": 0.1298954802761166,
"grad_norm": 5.077072978235785,
"learning_rate": 8.702967485446543e-06,
"loss": 0.4283,
"step": 1830
},
{
"epoch": 0.13060529164374568,
"grad_norm": 5.917300462616358,
"learning_rate": 8.695868237966776e-06,
"loss": 0.4119,
"step": 1840
},
{
"epoch": 0.13131510301137472,
"grad_norm": 10.693397543481625,
"learning_rate": 8.68876899048701e-06,
"loss": 0.4305,
"step": 1850
},
{
"epoch": 0.13202491437900377,
"grad_norm": 3.6456780546239456,
"learning_rate": 8.681669743007242e-06,
"loss": 0.4391,
"step": 1860
},
{
"epoch": 0.13273472574663284,
"grad_norm": 14.68038430401678,
"learning_rate": 8.674570495527474e-06,
"loss": 0.4111,
"step": 1870
},
{
"epoch": 0.13344453711426188,
"grad_norm": 5.101838800313352,
"learning_rate": 8.667471248047707e-06,
"loss": 0.4323,
"step": 1880
},
{
"epoch": 0.13415434848189095,
"grad_norm": 4.497686869632987,
"learning_rate": 8.66037200056794e-06,
"loss": 0.4154,
"step": 1890
},
{
"epoch": 0.13486415984952,
"grad_norm": 9.511227824879294,
"learning_rate": 8.653272753088173e-06,
"loss": 0.4295,
"step": 1900
},
{
"epoch": 0.13557397121714904,
"grad_norm": 5.344003791146658,
"learning_rate": 8.646173505608406e-06,
"loss": 0.4254,
"step": 1910
},
{
"epoch": 0.1362837825847781,
"grad_norm": 8.10132953922794,
"learning_rate": 8.63907425812864e-06,
"loss": 0.4219,
"step": 1920
},
{
"epoch": 0.13699359395240715,
"grad_norm": 8.840386508572838,
"learning_rate": 8.631975010648872e-06,
"loss": 0.416,
"step": 1930
},
{
"epoch": 0.1377034053200362,
"grad_norm": 5.639143297883941,
"learning_rate": 8.624875763169106e-06,
"loss": 0.4246,
"step": 1940
},
{
"epoch": 0.13841321668766526,
"grad_norm": 5.375177742256173,
"learning_rate": 8.617776515689339e-06,
"loss": 0.4263,
"step": 1950
},
{
"epoch": 0.1391230280552943,
"grad_norm": 13.872628674699765,
"learning_rate": 8.61067726820957e-06,
"loss": 0.4368,
"step": 1960
},
{
"epoch": 0.13983283942292335,
"grad_norm": 6.612051924514802,
"learning_rate": 8.603578020729803e-06,
"loss": 0.4235,
"step": 1970
},
{
"epoch": 0.14054265079055242,
"grad_norm": 7.420592038738273,
"learning_rate": 8.596478773250036e-06,
"loss": 0.4315,
"step": 1980
},
{
"epoch": 0.14125246215818146,
"grad_norm": 3.883491154973528,
"learning_rate": 8.58937952577027e-06,
"loss": 0.4394,
"step": 1990
},
{
"epoch": 0.1419622735258105,
"grad_norm": 4.031594828995353,
"learning_rate": 8.582280278290502e-06,
"loss": 0.4274,
"step": 2000
},
{
"epoch": 0.14267208489343958,
"grad_norm": 6.272786134188022,
"learning_rate": 8.575181030810736e-06,
"loss": 0.42,
"step": 2010
},
{
"epoch": 0.14338189626106862,
"grad_norm": 8.45570312290703,
"learning_rate": 8.568081783330967e-06,
"loss": 0.4336,
"step": 2020
},
{
"epoch": 0.14409170762869766,
"grad_norm": 3.8497660341027693,
"learning_rate": 8.5609825358512e-06,
"loss": 0.4259,
"step": 2030
},
{
"epoch": 0.14480151899632673,
"grad_norm": 10.12069309920438,
"learning_rate": 8.553883288371433e-06,
"loss": 0.4208,
"step": 2040
},
{
"epoch": 0.14551133036395578,
"grad_norm": 5.128975578462212,
"learning_rate": 8.546784040891666e-06,
"loss": 0.4215,
"step": 2050
},
{
"epoch": 0.14622114173158482,
"grad_norm": 4.45602583843403,
"learning_rate": 8.5396847934119e-06,
"loss": 0.4135,
"step": 2060
},
{
"epoch": 0.1469309530992139,
"grad_norm": 5.172069700283945,
"learning_rate": 8.53258554593213e-06,
"loss": 0.4122,
"step": 2070
},
{
"epoch": 0.14764076446684293,
"grad_norm": 7.147216717746435,
"learning_rate": 8.525486298452364e-06,
"loss": 0.4423,
"step": 2080
},
{
"epoch": 0.14835057583447198,
"grad_norm": 14.946527022046613,
"learning_rate": 8.518387050972597e-06,
"loss": 0.4094,
"step": 2090
},
{
"epoch": 0.14906038720210105,
"grad_norm": 8.460267496546166,
"learning_rate": 8.51128780349283e-06,
"loss": 0.4186,
"step": 2100
},
{
"epoch": 0.1497701985697301,
"grad_norm": 8.93023218882671,
"learning_rate": 8.504188556013063e-06,
"loss": 0.4062,
"step": 2110
},
{
"epoch": 0.15048000993735916,
"grad_norm": 3.213343020811049,
"learning_rate": 8.497089308533296e-06,
"loss": 0.3994,
"step": 2120
},
{
"epoch": 0.1511898213049882,
"grad_norm": 8.718801113577726,
"learning_rate": 8.48999006105353e-06,
"loss": 0.4232,
"step": 2130
},
{
"epoch": 0.15189963267261725,
"grad_norm": 2.832643819770658,
"learning_rate": 8.482890813573762e-06,
"loss": 0.4261,
"step": 2140
},
{
"epoch": 0.15260944404024632,
"grad_norm": 3.2673324405839255,
"learning_rate": 8.475791566093996e-06,
"loss": 0.42,
"step": 2150
},
{
"epoch": 0.15331925540787536,
"grad_norm": 3.2621489770969214,
"learning_rate": 8.468692318614227e-06,
"loss": 0.4282,
"step": 2160
},
{
"epoch": 0.1540290667755044,
"grad_norm": 17.34420036770468,
"learning_rate": 8.46159307113446e-06,
"loss": 0.4198,
"step": 2170
},
{
"epoch": 0.15473887814313347,
"grad_norm": 3.6148665582762094,
"learning_rate": 8.454493823654693e-06,
"loss": 0.4157,
"step": 2180
},
{
"epoch": 0.15544868951076252,
"grad_norm": 2.775836768166624,
"learning_rate": 8.447394576174926e-06,
"loss": 0.417,
"step": 2190
},
{
"epoch": 0.15615850087839156,
"grad_norm": 5.052761832862739,
"learning_rate": 8.44029532869516e-06,
"loss": 0.4035,
"step": 2200
},
{
"epoch": 0.15686831224602063,
"grad_norm": 4.778779661514333,
"learning_rate": 8.433196081215393e-06,
"loss": 0.4445,
"step": 2210
},
{
"epoch": 0.15757812361364967,
"grad_norm": 4.6274782338902325,
"learning_rate": 8.426096833735626e-06,
"loss": 0.4147,
"step": 2220
},
{
"epoch": 0.15828793498127872,
"grad_norm": 4.310225523508245,
"learning_rate": 8.418997586255857e-06,
"loss": 0.4167,
"step": 2230
},
{
"epoch": 0.1589977463489078,
"grad_norm": 4.802519845626961,
"learning_rate": 8.41189833877609e-06,
"loss": 0.4052,
"step": 2240
},
{
"epoch": 0.15970755771653683,
"grad_norm": 3.949892413625005,
"learning_rate": 8.404799091296323e-06,
"loss": 0.4263,
"step": 2250
},
{
"epoch": 0.16041736908416587,
"grad_norm": 5.685661053410237,
"learning_rate": 8.397699843816556e-06,
"loss": 0.4148,
"step": 2260
},
{
"epoch": 0.16112718045179494,
"grad_norm": 4.337480471983148,
"learning_rate": 8.39060059633679e-06,
"loss": 0.4101,
"step": 2270
},
{
"epoch": 0.161836991819424,
"grad_norm": 4.809277499740254,
"learning_rate": 8.38350134885702e-06,
"loss": 0.4071,
"step": 2280
},
{
"epoch": 0.16254680318705303,
"grad_norm": 7.364507480899371,
"learning_rate": 8.376402101377254e-06,
"loss": 0.4021,
"step": 2290
},
{
"epoch": 0.1632566145546821,
"grad_norm": 5.408145626972555,
"learning_rate": 8.369302853897487e-06,
"loss": 0.4154,
"step": 2300
},
{
"epoch": 0.16396642592231114,
"grad_norm": 2.9449217220121784,
"learning_rate": 8.36220360641772e-06,
"loss": 0.4296,
"step": 2310
},
{
"epoch": 0.16467623728994019,
"grad_norm": 3.843647555602573,
"learning_rate": 8.355104358937953e-06,
"loss": 0.4197,
"step": 2320
},
{
"epoch": 0.16538604865756926,
"grad_norm": 5.843629733774891,
"learning_rate": 8.348005111458186e-06,
"loss": 0.4052,
"step": 2330
},
{
"epoch": 0.1660958600251983,
"grad_norm": 4.182196885965926,
"learning_rate": 8.34090586397842e-06,
"loss": 0.4304,
"step": 2340
},
{
"epoch": 0.16680567139282734,
"grad_norm": 12.343897765958163,
"learning_rate": 8.333806616498653e-06,
"loss": 0.4057,
"step": 2350
},
{
"epoch": 0.1675154827604564,
"grad_norm": 4.52770872028285,
"learning_rate": 8.326707369018886e-06,
"loss": 0.4234,
"step": 2360
},
{
"epoch": 0.16822529412808546,
"grad_norm": 5.473115632671873,
"learning_rate": 8.319608121539117e-06,
"loss": 0.4127,
"step": 2370
},
{
"epoch": 0.16893510549571453,
"grad_norm": 5.243162829393595,
"learning_rate": 8.31250887405935e-06,
"loss": 0.4148,
"step": 2380
},
{
"epoch": 0.16964491686334357,
"grad_norm": 9.638919529909746,
"learning_rate": 8.305409626579583e-06,
"loss": 0.4244,
"step": 2390
},
{
"epoch": 0.1703547282309726,
"grad_norm": 5.824204497516263,
"learning_rate": 8.298310379099816e-06,
"loss": 0.3991,
"step": 2400
},
{
"epoch": 0.17106453959860168,
"grad_norm": 8.92013550945478,
"learning_rate": 8.29121113162005e-06,
"loss": 0.4107,
"step": 2410
},
{
"epoch": 0.17177435096623073,
"grad_norm": 4.310339052965044,
"learning_rate": 8.284111884140283e-06,
"loss": 0.4198,
"step": 2420
},
{
"epoch": 0.17248416233385977,
"grad_norm": 3.674140188675587,
"learning_rate": 8.277012636660514e-06,
"loss": 0.4066,
"step": 2430
},
{
"epoch": 0.17319397370148884,
"grad_norm": 3.2816580938205986,
"learning_rate": 8.269913389180747e-06,
"loss": 0.3948,
"step": 2440
},
{
"epoch": 0.17390378506911788,
"grad_norm": 3.119520711268051,
"learning_rate": 8.26281414170098e-06,
"loss": 0.4236,
"step": 2450
},
{
"epoch": 0.17461359643674693,
"grad_norm": 3.9529990200341216,
"learning_rate": 8.255714894221213e-06,
"loss": 0.4028,
"step": 2460
},
{
"epoch": 0.175323407804376,
"grad_norm": 6.5624619571577,
"learning_rate": 8.248615646741446e-06,
"loss": 0.4207,
"step": 2470
},
{
"epoch": 0.17603321917200504,
"grad_norm": 6.563862400109423,
"learning_rate": 8.24151639926168e-06,
"loss": 0.4234,
"step": 2480
},
{
"epoch": 0.17674303053963408,
"grad_norm": 4.124646423199101,
"learning_rate": 8.234417151781911e-06,
"loss": 0.421,
"step": 2490
},
{
"epoch": 0.17745284190726315,
"grad_norm": 8.460797246337737,
"learning_rate": 8.227317904302144e-06,
"loss": 0.4169,
"step": 2500
},
{
"epoch": 0.1781626532748922,
"grad_norm": 4.636207121737827,
"learning_rate": 8.220218656822377e-06,
"loss": 0.4154,
"step": 2510
},
{
"epoch": 0.17887246464252124,
"grad_norm": 15.193279765427832,
"learning_rate": 8.21311940934261e-06,
"loss": 0.4,
"step": 2520
},
{
"epoch": 0.1795822760101503,
"grad_norm": 8.394690912531237,
"learning_rate": 8.206020161862843e-06,
"loss": 0.3994,
"step": 2530
},
{
"epoch": 0.18029208737777935,
"grad_norm": 11.829872869588135,
"learning_rate": 8.198920914383076e-06,
"loss": 0.4045,
"step": 2540
},
{
"epoch": 0.1810018987454084,
"grad_norm": 10.598164946336963,
"learning_rate": 8.19182166690331e-06,
"loss": 0.4167,
"step": 2550
},
{
"epoch": 0.18171171011303747,
"grad_norm": 8.644167493937724,
"learning_rate": 8.184722419423543e-06,
"loss": 0.4193,
"step": 2560
},
{
"epoch": 0.1824215214806665,
"grad_norm": 5.532113862418252,
"learning_rate": 8.177623171943776e-06,
"loss": 0.4134,
"step": 2570
},
{
"epoch": 0.18313133284829555,
"grad_norm": 8.962347784457894,
"learning_rate": 8.170523924464007e-06,
"loss": 0.4231,
"step": 2580
},
{
"epoch": 0.18384114421592462,
"grad_norm": 4.789480578365759,
"learning_rate": 8.16342467698424e-06,
"loss": 0.4056,
"step": 2590
},
{
"epoch": 0.18455095558355367,
"grad_norm": 7.463666547462272,
"learning_rate": 8.156325429504473e-06,
"loss": 0.4082,
"step": 2600
},
{
"epoch": 0.1852607669511827,
"grad_norm": 3.543632295285487,
"learning_rate": 8.149226182024706e-06,
"loss": 0.3957,
"step": 2610
},
{
"epoch": 0.18597057831881178,
"grad_norm": 10.128862482609126,
"learning_rate": 8.14212693454494e-06,
"loss": 0.4104,
"step": 2620
},
{
"epoch": 0.18668038968644082,
"grad_norm": 2.279815139257822,
"learning_rate": 8.135027687065171e-06,
"loss": 0.4023,
"step": 2630
},
{
"epoch": 0.1873902010540699,
"grad_norm": 5.651432220535337,
"learning_rate": 8.127928439585404e-06,
"loss": 0.4174,
"step": 2640
},
{
"epoch": 0.18810001242169894,
"grad_norm": 2.764126752423827,
"learning_rate": 8.120829192105637e-06,
"loss": 0.4316,
"step": 2650
},
{
"epoch": 0.18880982378932798,
"grad_norm": 2.2008942019632443,
"learning_rate": 8.11372994462587e-06,
"loss": 0.3998,
"step": 2660
},
{
"epoch": 0.18951963515695705,
"grad_norm": 2.6464894767494194,
"learning_rate": 8.106630697146103e-06,
"loss": 0.4152,
"step": 2670
},
{
"epoch": 0.1902294465245861,
"grad_norm": 2.9891233500309697,
"learning_rate": 8.099531449666336e-06,
"loss": 0.4065,
"step": 2680
},
{
"epoch": 0.19093925789221514,
"grad_norm": 3.2947192783933303,
"learning_rate": 8.092432202186568e-06,
"loss": 0.4096,
"step": 2690
},
{
"epoch": 0.1916490692598442,
"grad_norm": 2.6266501022263093,
"learning_rate": 8.085332954706801e-06,
"loss": 0.4079,
"step": 2700
},
{
"epoch": 0.19235888062747325,
"grad_norm": 2.0600161188196258,
"learning_rate": 8.078233707227034e-06,
"loss": 0.4245,
"step": 2710
},
{
"epoch": 0.1930686919951023,
"grad_norm": 3.4259686474049587,
"learning_rate": 8.071134459747267e-06,
"loss": 0.4168,
"step": 2720
},
{
"epoch": 0.19377850336273136,
"grad_norm": 4.184352662206747,
"learning_rate": 8.0640352122675e-06,
"loss": 0.4265,
"step": 2730
},
{
"epoch": 0.1944883147303604,
"grad_norm": 3.7320888080359174,
"learning_rate": 8.056935964787733e-06,
"loss": 0.4172,
"step": 2740
},
{
"epoch": 0.19519812609798945,
"grad_norm": 3.750448672171502,
"learning_rate": 8.049836717307966e-06,
"loss": 0.4327,
"step": 2750
},
{
"epoch": 0.19590793746561852,
"grad_norm": 3.0158382271152564,
"learning_rate": 8.0427374698282e-06,
"loss": 0.4284,
"step": 2760
},
{
"epoch": 0.19661774883324756,
"grad_norm": 2.438159262347708,
"learning_rate": 8.035638222348433e-06,
"loss": 0.4117,
"step": 2770
},
{
"epoch": 0.1973275602008766,
"grad_norm": 4.795802800628808,
"learning_rate": 8.028538974868664e-06,
"loss": 0.4207,
"step": 2780
},
{
"epoch": 0.19803737156850568,
"grad_norm": 2.5291141301554405,
"learning_rate": 8.021439727388897e-06,
"loss": 0.4146,
"step": 2790
},
{
"epoch": 0.19874718293613472,
"grad_norm": 2.4740979454164727,
"learning_rate": 8.01434047990913e-06,
"loss": 0.3999,
"step": 2800
},
{
"epoch": 0.19945699430376376,
"grad_norm": 3.4467777684569927,
"learning_rate": 8.007241232429363e-06,
"loss": 0.4151,
"step": 2810
},
{
"epoch": 0.20016680567139283,
"grad_norm": 2.741445348023422,
"learning_rate": 8.000141984949596e-06,
"loss": 0.4165,
"step": 2820
},
{
"epoch": 0.20087661703902188,
"grad_norm": 2.977547725757033,
"learning_rate": 7.99304273746983e-06,
"loss": 0.4137,
"step": 2830
},
{
"epoch": 0.20158642840665092,
"grad_norm": 3.493123708582949,
"learning_rate": 7.985943489990061e-06,
"loss": 0.4095,
"step": 2840
},
{
"epoch": 0.20229623977428,
"grad_norm": 9.43644672917822,
"learning_rate": 7.978844242510294e-06,
"loss": 0.4066,
"step": 2850
},
{
"epoch": 0.20300605114190903,
"grad_norm": 4.050870492633986,
"learning_rate": 7.971744995030527e-06,
"loss": 0.4079,
"step": 2860
},
{
"epoch": 0.2037158625095381,
"grad_norm": 7.830134940271083,
"learning_rate": 7.96464574755076e-06,
"loss": 0.3896,
"step": 2870
},
{
"epoch": 0.20442567387716715,
"grad_norm": 7.557535176254197,
"learning_rate": 7.957546500070993e-06,
"loss": 0.4096,
"step": 2880
},
{
"epoch": 0.2051354852447962,
"grad_norm": 4.715465621080843,
"learning_rate": 7.950447252591226e-06,
"loss": 0.3907,
"step": 2890
},
{
"epoch": 0.20584529661242526,
"grad_norm": 30.299863630729803,
"learning_rate": 7.943348005111458e-06,
"loss": 0.4142,
"step": 2900
},
{
"epoch": 0.2065551079800543,
"grad_norm": 13.362349279952854,
"learning_rate": 7.936248757631691e-06,
"loss": 0.4211,
"step": 2910
},
{
"epoch": 0.20726491934768335,
"grad_norm": 7.166470527615742,
"learning_rate": 7.929149510151924e-06,
"loss": 0.4038,
"step": 2920
},
{
"epoch": 0.20797473071531242,
"grad_norm": 218.37559359733393,
"learning_rate": 7.922050262672157e-06,
"loss": 0.3814,
"step": 2930
},
{
"epoch": 0.20868454208294146,
"grad_norm": 4.776318350142146,
"learning_rate": 7.91495101519239e-06,
"loss": 0.4033,
"step": 2940
},
{
"epoch": 0.2093943534505705,
"grad_norm": 6.050705359465637,
"learning_rate": 7.907851767712623e-06,
"loss": 0.4006,
"step": 2950
},
{
"epoch": 0.21010416481819957,
"grad_norm": 7.0609749250244125,
"learning_rate": 7.900752520232857e-06,
"loss": 0.3996,
"step": 2960
},
{
"epoch": 0.21081397618582862,
"grad_norm": 5.2294105499183985,
"learning_rate": 7.89365327275309e-06,
"loss": 0.3906,
"step": 2970
},
{
"epoch": 0.21152378755345766,
"grad_norm": 5.037453517661707,
"learning_rate": 7.886554025273323e-06,
"loss": 0.3925,
"step": 2980
},
{
"epoch": 0.21223359892108673,
"grad_norm": 4.329367488091813,
"learning_rate": 7.879454777793554e-06,
"loss": 0.4005,
"step": 2990
},
{
"epoch": 0.21294341028871577,
"grad_norm": 4.587934783884384,
"learning_rate": 7.872355530313787e-06,
"loss": 0.3949,
"step": 3000
},
{
"epoch": 0.21365322165634482,
"grad_norm": 4.34538375508175,
"learning_rate": 7.86525628283402e-06,
"loss": 0.3963,
"step": 3010
},
{
"epoch": 0.2143630330239739,
"grad_norm": 14.538466945533717,
"learning_rate": 7.858157035354253e-06,
"loss": 0.4145,
"step": 3020
},
{
"epoch": 0.21507284439160293,
"grad_norm": 5.725604081866674,
"learning_rate": 7.851057787874487e-06,
"loss": 0.397,
"step": 3030
},
{
"epoch": 0.21578265575923197,
"grad_norm": 4.100595238075657,
"learning_rate": 7.843958540394718e-06,
"loss": 0.407,
"step": 3040
},
{
"epoch": 0.21649246712686104,
"grad_norm": 3.6102459737641452,
"learning_rate": 7.836859292914951e-06,
"loss": 0.3941,
"step": 3050
},
{
"epoch": 0.2172022784944901,
"grad_norm": 9.48884086833176,
"learning_rate": 7.829760045435184e-06,
"loss": 0.3981,
"step": 3060
},
{
"epoch": 0.21791208986211913,
"grad_norm": 5.265598040684193,
"learning_rate": 7.822660797955417e-06,
"loss": 0.3865,
"step": 3070
},
{
"epoch": 0.2186219012297482,
"grad_norm": 5.853395704700518,
"learning_rate": 7.81556155047565e-06,
"loss": 0.4089,
"step": 3080
},
{
"epoch": 0.21933171259737724,
"grad_norm": 2.867041909768411,
"learning_rate": 7.808462302995883e-06,
"loss": 0.411,
"step": 3090
},
{
"epoch": 0.22004152396500629,
"grad_norm": 6.447556295363806,
"learning_rate": 7.801363055516117e-06,
"loss": 0.4054,
"step": 3100
},
{
"epoch": 0.22075133533263536,
"grad_norm": 6.665403407542621,
"learning_rate": 7.794263808036348e-06,
"loss": 0.4331,
"step": 3110
},
{
"epoch": 0.2214611467002644,
"grad_norm": 3.740543632288075,
"learning_rate": 7.787164560556581e-06,
"loss": 0.4132,
"step": 3120
},
{
"epoch": 0.22217095806789347,
"grad_norm": 19.12212944661018,
"learning_rate": 7.780065313076814e-06,
"loss": 0.4229,
"step": 3130
},
{
"epoch": 0.2228807694355225,
"grad_norm": 5.646216224084272,
"learning_rate": 7.772966065597047e-06,
"loss": 0.4123,
"step": 3140
},
{
"epoch": 0.22359058080315156,
"grad_norm": 12.549975615460761,
"learning_rate": 7.76586681811728e-06,
"loss": 0.4156,
"step": 3150
},
{
"epoch": 0.22430039217078063,
"grad_norm": 5.34509934381609,
"learning_rate": 7.758767570637513e-06,
"loss": 0.3935,
"step": 3160
},
{
"epoch": 0.22501020353840967,
"grad_norm": 4.868356423660982,
"learning_rate": 7.751668323157747e-06,
"loss": 0.4121,
"step": 3170
},
{
"epoch": 0.2257200149060387,
"grad_norm": 3.604594374317723,
"learning_rate": 7.74456907567798e-06,
"loss": 0.3949,
"step": 3180
},
{
"epoch": 0.22642982627366778,
"grad_norm": 2.6762060130385565,
"learning_rate": 7.737469828198211e-06,
"loss": 0.4192,
"step": 3190
},
{
"epoch": 0.22713963764129683,
"grad_norm": 3.7277037964888957,
"learning_rate": 7.730370580718444e-06,
"loss": 0.4063,
"step": 3200
},
{
"epoch": 0.22784944900892587,
"grad_norm": 4.2017308560808395,
"learning_rate": 7.723271333238677e-06,
"loss": 0.3983,
"step": 3210
},
{
"epoch": 0.22855926037655494,
"grad_norm": 6.82717398390433,
"learning_rate": 7.71617208575891e-06,
"loss": 0.4003,
"step": 3220
},
{
"epoch": 0.22926907174418398,
"grad_norm": 3.3720424392184865,
"learning_rate": 7.709072838279143e-06,
"loss": 0.384,
"step": 3230
},
{
"epoch": 0.22997888311181303,
"grad_norm": 15.234041629621501,
"learning_rate": 7.701973590799375e-06,
"loss": 0.3936,
"step": 3240
},
{
"epoch": 0.2306886944794421,
"grad_norm": 6.450291645106787,
"learning_rate": 7.694874343319608e-06,
"loss": 0.4153,
"step": 3250
},
{
"epoch": 0.23139850584707114,
"grad_norm": 5.0596647748479056,
"learning_rate": 7.687775095839841e-06,
"loss": 0.4098,
"step": 3260
},
{
"epoch": 0.23210831721470018,
"grad_norm": 6.351369993733097,
"learning_rate": 7.680675848360074e-06,
"loss": 0.4036,
"step": 3270
},
{
"epoch": 0.23281812858232925,
"grad_norm": 7.706709044787595,
"learning_rate": 7.673576600880307e-06,
"loss": 0.4137,
"step": 3280
},
{
"epoch": 0.2335279399499583,
"grad_norm": 6.111103199878706,
"learning_rate": 7.66647735340054e-06,
"loss": 0.4163,
"step": 3290
},
{
"epoch": 0.23423775131758734,
"grad_norm": 3.182362422678598,
"learning_rate": 7.659378105920773e-06,
"loss": 0.4007,
"step": 3300
},
{
"epoch": 0.2349475626852164,
"grad_norm": 3.929827344563346,
"learning_rate": 7.652278858441005e-06,
"loss": 0.4011,
"step": 3310
},
{
"epoch": 0.23565737405284545,
"grad_norm": 6.606808853169358,
"learning_rate": 7.645179610961238e-06,
"loss": 0.4113,
"step": 3320
},
{
"epoch": 0.2363671854204745,
"grad_norm": 7.983975561443669,
"learning_rate": 7.638080363481471e-06,
"loss": 0.3941,
"step": 3330
},
{
"epoch": 0.23707699678810357,
"grad_norm": 2.551810232754013,
"learning_rate": 7.630981116001704e-06,
"loss": 0.3987,
"step": 3340
},
{
"epoch": 0.2377868081557326,
"grad_norm": 16.325804366695763,
"learning_rate": 7.623881868521937e-06,
"loss": 0.3814,
"step": 3350
},
{
"epoch": 0.23849661952336168,
"grad_norm": 17.86582631307272,
"learning_rate": 7.61678262104217e-06,
"loss": 0.4065,
"step": 3360
},
{
"epoch": 0.23920643089099072,
"grad_norm": 4.439905284094514,
"learning_rate": 7.6096833735624035e-06,
"loss": 0.4079,
"step": 3370
},
{
"epoch": 0.23991624225861977,
"grad_norm": 13.632710588001641,
"learning_rate": 7.602584126082636e-06,
"loss": 0.4075,
"step": 3380
},
{
"epoch": 0.24062605362624884,
"grad_norm": 7.4557485788963405,
"learning_rate": 7.595484878602869e-06,
"loss": 0.399,
"step": 3390
},
{
"epoch": 0.24133586499387788,
"grad_norm": 6.032057911933067,
"learning_rate": 7.588385631123102e-06,
"loss": 0.3892,
"step": 3400
},
{
"epoch": 0.24204567636150692,
"grad_norm": 5.1424876309924,
"learning_rate": 7.581286383643335e-06,
"loss": 0.396,
"step": 3410
},
{
"epoch": 0.242755487729136,
"grad_norm": 3.6691932120100987,
"learning_rate": 7.574187136163567e-06,
"loss": 0.4108,
"step": 3420
},
{
"epoch": 0.24346529909676504,
"grad_norm": 2.8083232656002033,
"learning_rate": 7.5670878886838004e-06,
"loss": 0.3984,
"step": 3430
},
{
"epoch": 0.24417511046439408,
"grad_norm": 13.589049355107566,
"learning_rate": 7.5599886412040335e-06,
"loss": 0.3957,
"step": 3440
},
{
"epoch": 0.24488492183202315,
"grad_norm": 6.813624263530042,
"learning_rate": 7.552889393724265e-06,
"loss": 0.4105,
"step": 3450
},
{
"epoch": 0.2455947331996522,
"grad_norm": 13.609829369379536,
"learning_rate": 7.545790146244498e-06,
"loss": 0.4175,
"step": 3460
},
{
"epoch": 0.24630454456728124,
"grad_norm": 5.1258006881261915,
"learning_rate": 7.538690898764731e-06,
"loss": 0.3966,
"step": 3470
},
{
"epoch": 0.2470143559349103,
"grad_norm": 40.31962236147607,
"learning_rate": 7.531591651284964e-06,
"loss": 0.3839,
"step": 3480
},
{
"epoch": 0.24772416730253935,
"grad_norm": 6.537768993909155,
"learning_rate": 7.524492403805197e-06,
"loss": 0.4122,
"step": 3490
},
{
"epoch": 0.2484339786701684,
"grad_norm": 17.652356012021233,
"learning_rate": 7.51739315632543e-06,
"loss": 0.3948,
"step": 3500
},
{
"epoch": 0.24914379003779746,
"grad_norm": 3.85528406182526,
"learning_rate": 7.510293908845663e-06,
"loss": 0.3938,
"step": 3510
},
{
"epoch": 0.2498536014054265,
"grad_norm": 125.62304184951121,
"learning_rate": 7.503194661365896e-06,
"loss": 0.389,
"step": 3520
},
{
"epoch": 0.25056341277305555,
"grad_norm": 8.558355724038593,
"learning_rate": 7.496095413886129e-06,
"loss": 0.3787,
"step": 3530
},
{
"epoch": 0.2512732241406846,
"grad_norm": 4.216427070872869,
"learning_rate": 7.488996166406361e-06,
"loss": 0.3835,
"step": 3540
},
{
"epoch": 0.2519830355083137,
"grad_norm": 4.314131483032103,
"learning_rate": 7.481896918926594e-06,
"loss": 0.3946,
"step": 3550
},
{
"epoch": 0.25269284687594273,
"grad_norm": 4.159823786853909,
"learning_rate": 7.474797671446827e-06,
"loss": 0.3972,
"step": 3560
},
{
"epoch": 0.2534026582435718,
"grad_norm": 3.4947296702394586,
"learning_rate": 7.4676984239670605e-06,
"loss": 0.4165,
"step": 3570
},
{
"epoch": 0.2541124696112008,
"grad_norm": 4.022241190948728,
"learning_rate": 7.4605991764872936e-06,
"loss": 0.3988,
"step": 3580
},
{
"epoch": 0.25482228097882986,
"grad_norm": 3.4849637281174006,
"learning_rate": 7.453499929007526e-06,
"loss": 0.4106,
"step": 3590
},
{
"epoch": 0.2555320923464589,
"grad_norm": 5.338306458076586,
"learning_rate": 7.446400681527759e-06,
"loss": 0.4082,
"step": 3600
},
{
"epoch": 0.256241903714088,
"grad_norm": 4.970005106695202,
"learning_rate": 7.439301434047992e-06,
"loss": 0.3914,
"step": 3610
},
{
"epoch": 0.25695171508171705,
"grad_norm": 6.355373029038747,
"learning_rate": 7.432202186568225e-06,
"loss": 0.3989,
"step": 3620
},
{
"epoch": 0.2576615264493461,
"grad_norm": 5.996742366501121,
"learning_rate": 7.425102939088457e-06,
"loss": 0.3999,
"step": 3630
},
{
"epoch": 0.25837133781697513,
"grad_norm": 6.966686936423967,
"learning_rate": 7.4180036916086905e-06,
"loss": 0.3831,
"step": 3640
},
{
"epoch": 0.2590811491846042,
"grad_norm": 4.185121399245409,
"learning_rate": 7.410904444128923e-06,
"loss": 0.408,
"step": 3650
},
{
"epoch": 0.2597909605522332,
"grad_norm": 2.2056616209460866,
"learning_rate": 7.403805196649155e-06,
"loss": 0.3931,
"step": 3660
},
{
"epoch": 0.2605007719198623,
"grad_norm": 4.176248780095696,
"learning_rate": 7.396705949169388e-06,
"loss": 0.409,
"step": 3670
},
{
"epoch": 0.26121058328749136,
"grad_norm": 2.47926985794175,
"learning_rate": 7.389606701689621e-06,
"loss": 0.4091,
"step": 3680
},
{
"epoch": 0.2619203946551204,
"grad_norm": 3.02240842448802,
"learning_rate": 7.382507454209854e-06,
"loss": 0.4102,
"step": 3690
},
{
"epoch": 0.26263020602274945,
"grad_norm": 2.0291710541228816,
"learning_rate": 7.3754082067300866e-06,
"loss": 0.382,
"step": 3700
},
{
"epoch": 0.2633400173903785,
"grad_norm": 2.1912303159611084,
"learning_rate": 7.36830895925032e-06,
"loss": 0.3974,
"step": 3710
},
{
"epoch": 0.26404982875800753,
"grad_norm": 2.964541482780821,
"learning_rate": 7.361209711770553e-06,
"loss": 0.4096,
"step": 3720
},
{
"epoch": 0.26475964012563663,
"grad_norm": 5.810099164313448,
"learning_rate": 7.354110464290786e-06,
"loss": 0.4092,
"step": 3730
},
{
"epoch": 0.2654694514932657,
"grad_norm": 4.879409457746285,
"learning_rate": 7.347011216811019e-06,
"loss": 0.4034,
"step": 3740
},
{
"epoch": 0.2661792628608947,
"grad_norm": 2.761287928392515,
"learning_rate": 7.339911969331251e-06,
"loss": 0.3971,
"step": 3750
},
{
"epoch": 0.26688907422852376,
"grad_norm": 14.80879239487425,
"learning_rate": 7.332812721851484e-06,
"loss": 0.4203,
"step": 3760
},
{
"epoch": 0.2675988855961528,
"grad_norm": 2.589550559546521,
"learning_rate": 7.325713474371717e-06,
"loss": 0.4065,
"step": 3770
},
{
"epoch": 0.2683086969637819,
"grad_norm": 2.1908148156089204,
"learning_rate": 7.3186142268919505e-06,
"loss": 0.4001,
"step": 3780
},
{
"epoch": 0.26901850833141094,
"grad_norm": 3.614429975395643,
"learning_rate": 7.311514979412183e-06,
"loss": 0.3949,
"step": 3790
},
{
"epoch": 0.26972831969904,
"grad_norm": 8.199581604131074,
"learning_rate": 7.304415731932416e-06,
"loss": 0.4027,
"step": 3800
},
{
"epoch": 0.27043813106666903,
"grad_norm": 1.9841735875976263,
"learning_rate": 7.297316484452649e-06,
"loss": 0.3803,
"step": 3810
},
{
"epoch": 0.2711479424342981,
"grad_norm": 1.7818490390141006,
"learning_rate": 7.290217236972882e-06,
"loss": 0.3979,
"step": 3820
},
{
"epoch": 0.2718577538019271,
"grad_norm": 2.664420697627613,
"learning_rate": 7.283117989493115e-06,
"loss": 0.4112,
"step": 3830
},
{
"epoch": 0.2725675651695562,
"grad_norm": 7.6015896940216345,
"learning_rate": 7.2760187420133474e-06,
"loss": 0.3978,
"step": 3840
},
{
"epoch": 0.27327737653718526,
"grad_norm": 5.109710356060471,
"learning_rate": 7.2689194945335805e-06,
"loss": 0.3911,
"step": 3850
},
{
"epoch": 0.2739871879048143,
"grad_norm": 1.8719451344781273,
"learning_rate": 7.261820247053813e-06,
"loss": 0.4039,
"step": 3860
},
{
"epoch": 0.27469699927244334,
"grad_norm": 7.834590688589366,
"learning_rate": 7.254720999574045e-06,
"loss": 0.3972,
"step": 3870
},
{
"epoch": 0.2754068106400724,
"grad_norm": 3.4725606354409915,
"learning_rate": 7.247621752094278e-06,
"loss": 0.4106,
"step": 3880
},
{
"epoch": 0.27611662200770143,
"grad_norm": 2.131887069098727,
"learning_rate": 7.240522504614511e-06,
"loss": 0.3921,
"step": 3890
},
{
"epoch": 0.2768264333753305,
"grad_norm": 3.840712773368679,
"learning_rate": 7.233423257134744e-06,
"loss": 0.3963,
"step": 3900
},
{
"epoch": 0.27753624474295957,
"grad_norm": 1.8435607174327202,
"learning_rate": 7.226324009654977e-06,
"loss": 0.4171,
"step": 3910
},
{
"epoch": 0.2782460561105886,
"grad_norm": 2.927315889095762,
"learning_rate": 7.21922476217521e-06,
"loss": 0.4078,
"step": 3920
},
{
"epoch": 0.27895586747821766,
"grad_norm": 2.4533548064235955,
"learning_rate": 7.212125514695443e-06,
"loss": 0.4018,
"step": 3930
},
{
"epoch": 0.2796656788458467,
"grad_norm": 2.6808622987821424,
"learning_rate": 7.205026267215676e-06,
"loss": 0.3952,
"step": 3940
},
{
"epoch": 0.28037549021347574,
"grad_norm": 2.006870713713202,
"learning_rate": 7.197927019735908e-06,
"loss": 0.4041,
"step": 3950
},
{
"epoch": 0.28108530158110484,
"grad_norm": 4.1552921396903955,
"learning_rate": 7.190827772256141e-06,
"loss": 0.3815,
"step": 3960
},
{
"epoch": 0.2817951129487339,
"grad_norm": 3.088912130241367,
"learning_rate": 7.183728524776374e-06,
"loss": 0.4018,
"step": 3970
},
{
"epoch": 0.2825049243163629,
"grad_norm": 2.9619382181530853,
"learning_rate": 7.1766292772966075e-06,
"loss": 0.4071,
"step": 3980
},
{
"epoch": 0.28321473568399197,
"grad_norm": 3.194525382034512,
"learning_rate": 7.1695300298168406e-06,
"loss": 0.3861,
"step": 3990
},
{
"epoch": 0.283924547051621,
"grad_norm": 2.58824315637412,
"learning_rate": 7.162430782337073e-06,
"loss": 0.4022,
"step": 4000
},
{
"epoch": 0.2846343584192501,
"grad_norm": 1.6807083864960135,
"learning_rate": 7.155331534857306e-06,
"loss": 0.3953,
"step": 4010
},
{
"epoch": 0.28534416978687915,
"grad_norm": 2.9052226494936706,
"learning_rate": 7.148232287377539e-06,
"loss": 0.3803,
"step": 4020
},
{
"epoch": 0.2860539811545082,
"grad_norm": 1.9518486816171219,
"learning_rate": 7.141133039897772e-06,
"loss": 0.4076,
"step": 4030
},
{
"epoch": 0.28676379252213724,
"grad_norm": 2.223176862483651,
"learning_rate": 7.134033792418004e-06,
"loss": 0.4058,
"step": 4040
},
{
"epoch": 0.2874736038897663,
"grad_norm": 2.2196780309614854,
"learning_rate": 7.1269345449382375e-06,
"loss": 0.3926,
"step": 4050
},
{
"epoch": 0.2881834152573953,
"grad_norm": 6.524368077094248,
"learning_rate": 7.11983529745847e-06,
"loss": 0.4172,
"step": 4060
},
{
"epoch": 0.2888932266250244,
"grad_norm": 5.292339769504148,
"learning_rate": 7.112736049978702e-06,
"loss": 0.3908,
"step": 4070
},
{
"epoch": 0.28960303799265347,
"grad_norm": 2.3067804343233282,
"learning_rate": 7.105636802498935e-06,
"loss": 0.3899,
"step": 4080
},
{
"epoch": 0.2903128493602825,
"grad_norm": 3.23451698379491,
"learning_rate": 7.098537555019168e-06,
"loss": 0.4078,
"step": 4090
},
{
"epoch": 0.29102266072791155,
"grad_norm": 1.9975711149406958,
"learning_rate": 7.091438307539401e-06,
"loss": 0.3892,
"step": 4100
},
{
"epoch": 0.2917324720955406,
"grad_norm": 2.172457996529036,
"learning_rate": 7.084339060059634e-06,
"loss": 0.4024,
"step": 4110
},
{
"epoch": 0.29244228346316964,
"grad_norm": 4.2611345539293985,
"learning_rate": 7.077239812579867e-06,
"loss": 0.4051,
"step": 4120
},
{
"epoch": 0.29315209483079874,
"grad_norm": 4.8499954927547915,
"learning_rate": 7.0701405651001e-06,
"loss": 0.4051,
"step": 4130
},
{
"epoch": 0.2938619061984278,
"grad_norm": 3.133374032170856,
"learning_rate": 7.063041317620333e-06,
"loss": 0.4113,
"step": 4140
},
{
"epoch": 0.2945717175660568,
"grad_norm": 3.0408556337828667,
"learning_rate": 7.055942070140566e-06,
"loss": 0.3918,
"step": 4150
},
{
"epoch": 0.29528152893368587,
"grad_norm": 2.967610716656761,
"learning_rate": 7.048842822660798e-06,
"loss": 0.3935,
"step": 4160
},
{
"epoch": 0.2959913403013149,
"grad_norm": 4.089654504142007,
"learning_rate": 7.041743575181031e-06,
"loss": 0.3812,
"step": 4170
},
{
"epoch": 0.29670115166894395,
"grad_norm": 6.123820735815897,
"learning_rate": 7.0346443277012644e-06,
"loss": 0.3894,
"step": 4180
},
{
"epoch": 0.29741096303657305,
"grad_norm": 9.52031358542494,
"learning_rate": 7.0275450802214975e-06,
"loss": 0.3933,
"step": 4190
},
{
"epoch": 0.2981207744042021,
"grad_norm": 4.241656002923987,
"learning_rate": 7.02044583274173e-06,
"loss": 0.3938,
"step": 4200
},
{
"epoch": 0.29883058577183114,
"grad_norm": 10.364254693083032,
"learning_rate": 7.013346585261963e-06,
"loss": 0.3939,
"step": 4210
},
{
"epoch": 0.2995403971394602,
"grad_norm": 2.493001703497579,
"learning_rate": 7.006247337782196e-06,
"loss": 0.3904,
"step": 4220
},
{
"epoch": 0.3002502085070892,
"grad_norm": 2.372260556132136,
"learning_rate": 6.999148090302429e-06,
"loss": 0.4002,
"step": 4230
},
{
"epoch": 0.3009600198747183,
"grad_norm": 4.447948099801884,
"learning_rate": 6.992048842822662e-06,
"loss": 0.3894,
"step": 4240
},
{
"epoch": 0.30166983124234736,
"grad_norm": 2.4733723007039847,
"learning_rate": 6.9849495953428944e-06,
"loss": 0.3863,
"step": 4250
},
{
"epoch": 0.3023796426099764,
"grad_norm": 11.318740156291982,
"learning_rate": 6.977850347863127e-06,
"loss": 0.3881,
"step": 4260
},
{
"epoch": 0.30308945397760545,
"grad_norm": 3.6328999006662563,
"learning_rate": 6.97075110038336e-06,
"loss": 0.3894,
"step": 4270
},
{
"epoch": 0.3037992653452345,
"grad_norm": 2.0376811180198353,
"learning_rate": 6.963651852903592e-06,
"loss": 0.3993,
"step": 4280
},
{
"epoch": 0.30450907671286354,
"grad_norm": 2.1376755414320625,
"learning_rate": 6.956552605423825e-06,
"loss": 0.3903,
"step": 4290
},
{
"epoch": 0.30521888808049263,
"grad_norm": 2.883515618882684,
"learning_rate": 6.949453357944058e-06,
"loss": 0.4082,
"step": 4300
},
{
"epoch": 0.3059286994481217,
"grad_norm": 2.0964398516334444,
"learning_rate": 6.942354110464291e-06,
"loss": 0.3857,
"step": 4310
},
{
"epoch": 0.3066385108157507,
"grad_norm": 5.410779818418891,
"learning_rate": 6.935254862984524e-06,
"loss": 0.391,
"step": 4320
},
{
"epoch": 0.30734832218337976,
"grad_norm": 4.439425532620099,
"learning_rate": 6.928155615504757e-06,
"loss": 0.4099,
"step": 4330
},
{
"epoch": 0.3080581335510088,
"grad_norm": 12.275643206811255,
"learning_rate": 6.92105636802499e-06,
"loss": 0.3953,
"step": 4340
},
{
"epoch": 0.30876794491863785,
"grad_norm": 5.947992733400443,
"learning_rate": 6.913957120545223e-06,
"loss": 0.3945,
"step": 4350
},
{
"epoch": 0.30947775628626695,
"grad_norm": 3.4397054213510843,
"learning_rate": 6.906857873065456e-06,
"loss": 0.3875,
"step": 4360
},
{
"epoch": 0.310187567653896,
"grad_norm": 41.88563893552131,
"learning_rate": 6.899758625585688e-06,
"loss": 0.3928,
"step": 4370
},
{
"epoch": 0.31089737902152503,
"grad_norm": 3.227989243444744,
"learning_rate": 6.892659378105921e-06,
"loss": 0.3908,
"step": 4380
},
{
"epoch": 0.3116071903891541,
"grad_norm": 22.897381721878148,
"learning_rate": 6.8855601306261545e-06,
"loss": 0.391,
"step": 4390
},
{
"epoch": 0.3123170017567831,
"grad_norm": 3.3630974135990406,
"learning_rate": 6.878460883146388e-06,
"loss": 0.374,
"step": 4400
},
{
"epoch": 0.31302681312441216,
"grad_norm": 4.877401136832981,
"learning_rate": 6.87136163566662e-06,
"loss": 0.3923,
"step": 4410
},
{
"epoch": 0.31373662449204126,
"grad_norm": 6.179682561885886,
"learning_rate": 6.864262388186853e-06,
"loss": 0.3865,
"step": 4420
},
{
"epoch": 0.3144464358596703,
"grad_norm": 4.8910756460648885,
"learning_rate": 6.857163140707086e-06,
"loss": 0.3865,
"step": 4430
},
{
"epoch": 0.31515624722729935,
"grad_norm": 3.260915462621521,
"learning_rate": 6.850063893227319e-06,
"loss": 0.3982,
"step": 4440
},
{
"epoch": 0.3158660585949284,
"grad_norm": 4.599472395508018,
"learning_rate": 6.842964645747551e-06,
"loss": 0.3961,
"step": 4450
},
{
"epoch": 0.31657586996255743,
"grad_norm": 7.776943140920524,
"learning_rate": 6.8358653982677845e-06,
"loss": 0.3873,
"step": 4460
},
{
"epoch": 0.3172856813301865,
"grad_norm": 3.0126570398502723,
"learning_rate": 6.828766150788017e-06,
"loss": 0.3859,
"step": 4470
},
{
"epoch": 0.3179954926978156,
"grad_norm": 1.935360939609241,
"learning_rate": 6.82166690330825e-06,
"loss": 0.3893,
"step": 4480
},
{
"epoch": 0.3187053040654446,
"grad_norm": 2.8545870894952055,
"learning_rate": 6.814567655828482e-06,
"loss": 0.3963,
"step": 4490
},
{
"epoch": 0.31941511543307366,
"grad_norm": 4.70013317139999,
"learning_rate": 6.807468408348715e-06,
"loss": 0.3939,
"step": 4500
},
{
"epoch": 0.3201249268007027,
"grad_norm": 3.264719904276936,
"learning_rate": 6.800369160868948e-06,
"loss": 0.3851,
"step": 4510
},
{
"epoch": 0.32083473816833175,
"grad_norm": 19.735683632874615,
"learning_rate": 6.793269913389181e-06,
"loss": 0.3722,
"step": 4520
},
{
"epoch": 0.32154454953596084,
"grad_norm": 2.501896594333183,
"learning_rate": 6.786170665909414e-06,
"loss": 0.3744,
"step": 4530
},
{
"epoch": 0.3222543609035899,
"grad_norm": 6.776418259400934,
"learning_rate": 6.779071418429647e-06,
"loss": 0.3868,
"step": 4540
},
{
"epoch": 0.32296417227121893,
"grad_norm": 7.759324029832955,
"learning_rate": 6.77197217094988e-06,
"loss": 0.3978,
"step": 4550
},
{
"epoch": 0.323673983638848,
"grad_norm": 5.1020465787210805,
"learning_rate": 6.764872923470113e-06,
"loss": 0.3756,
"step": 4560
},
{
"epoch": 0.324383795006477,
"grad_norm": 4.584721636805871,
"learning_rate": 6.757773675990345e-06,
"loss": 0.3962,
"step": 4570
},
{
"epoch": 0.32509360637410606,
"grad_norm": 5.227400251430727,
"learning_rate": 6.750674428510578e-06,
"loss": 0.3934,
"step": 4580
},
{
"epoch": 0.32580341774173516,
"grad_norm": 6.3055606292098645,
"learning_rate": 6.7435751810308114e-06,
"loss": 0.3921,
"step": 4590
},
{
"epoch": 0.3265132291093642,
"grad_norm": 3.6872617865325914,
"learning_rate": 6.7364759335510445e-06,
"loss": 0.3818,
"step": 4600
},
{
"epoch": 0.32722304047699324,
"grad_norm": 2.007884918336012,
"learning_rate": 6.729376686071278e-06,
"loss": 0.4005,
"step": 4610
},
{
"epoch": 0.3279328518446223,
"grad_norm": 5.042964957635144,
"learning_rate": 6.72227743859151e-06,
"loss": 0.3934,
"step": 4620
},
{
"epoch": 0.32864266321225133,
"grad_norm": 4.122572427444757,
"learning_rate": 6.715178191111743e-06,
"loss": 0.3835,
"step": 4630
},
{
"epoch": 0.32935247457988037,
"grad_norm": 4.528744366296638,
"learning_rate": 6.708078943631976e-06,
"loss": 0.3781,
"step": 4640
},
{
"epoch": 0.33006228594750947,
"grad_norm": 3.0405586193089107,
"learning_rate": 6.700979696152209e-06,
"loss": 0.4013,
"step": 4650
},
{
"epoch": 0.3307720973151385,
"grad_norm": 2.497528895602537,
"learning_rate": 6.6938804486724415e-06,
"loss": 0.4012,
"step": 4660
},
{
"epoch": 0.33148190868276756,
"grad_norm": 3.949569099861772,
"learning_rate": 6.686781201192674e-06,
"loss": 0.3791,
"step": 4670
},
{
"epoch": 0.3321917200503966,
"grad_norm": 2.9026740036563714,
"learning_rate": 6.679681953712907e-06,
"loss": 0.379,
"step": 4680
},
{
"epoch": 0.33290153141802564,
"grad_norm": 4.750694201369016,
"learning_rate": 6.672582706233139e-06,
"loss": 0.3962,
"step": 4690
},
{
"epoch": 0.3336113427856547,
"grad_norm": 4.9647752226572655,
"learning_rate": 6.665483458753372e-06,
"loss": 0.4014,
"step": 4700
},
{
"epoch": 0.3343211541532838,
"grad_norm": 5.007567374826438,
"learning_rate": 6.658384211273605e-06,
"loss": 0.386,
"step": 4710
},
{
"epoch": 0.3350309655209128,
"grad_norm": 24.665793733036637,
"learning_rate": 6.651284963793838e-06,
"loss": 0.3904,
"step": 4720
},
{
"epoch": 0.33574077688854187,
"grad_norm": 8.807448982539153,
"learning_rate": 6.6441857163140715e-06,
"loss": 0.3817,
"step": 4730
},
{
"epoch": 0.3364505882561709,
"grad_norm": 5.649488918187287,
"learning_rate": 6.637086468834304e-06,
"loss": 0.3952,
"step": 4740
},
{
"epoch": 0.33716039962379996,
"grad_norm": 10.030238684862177,
"learning_rate": 6.629987221354537e-06,
"loss": 0.3894,
"step": 4750
},
{
"epoch": 0.33787021099142905,
"grad_norm": 8.229307584465264,
"learning_rate": 6.62288797387477e-06,
"loss": 0.3777,
"step": 4760
},
{
"epoch": 0.3385800223590581,
"grad_norm": 4.702015980686352,
"learning_rate": 6.615788726395003e-06,
"loss": 0.3846,
"step": 4770
},
{
"epoch": 0.33928983372668714,
"grad_norm": 7.609531980298162,
"learning_rate": 6.608689478915235e-06,
"loss": 0.3876,
"step": 4780
},
{
"epoch": 0.3399996450943162,
"grad_norm": 9.359016840144466,
"learning_rate": 6.601590231435468e-06,
"loss": 0.3912,
"step": 4790
},
{
"epoch": 0.3407094564619452,
"grad_norm": 6.921512932106153,
"learning_rate": 6.5944909839557015e-06,
"loss": 0.3808,
"step": 4800
},
{
"epoch": 0.34141926782957427,
"grad_norm": 7.896921462163668,
"learning_rate": 6.587391736475935e-06,
"loss": 0.3822,
"step": 4810
},
{
"epoch": 0.34212907919720337,
"grad_norm": 41.265653283488135,
"learning_rate": 6.580292488996167e-06,
"loss": 0.3704,
"step": 4820
},
{
"epoch": 0.3428388905648324,
"grad_norm": 22.410728414840314,
"learning_rate": 6.5731932415164e-06,
"loss": 0.3879,
"step": 4830
},
{
"epoch": 0.34354870193246145,
"grad_norm": 28.36796548695283,
"learning_rate": 6.566093994036633e-06,
"loss": 0.3819,
"step": 4840
},
{
"epoch": 0.3442585133000905,
"grad_norm": 5.964443376270807,
"learning_rate": 6.558994746556866e-06,
"loss": 0.3793,
"step": 4850
},
{
"epoch": 0.34496832466771954,
"grad_norm": 4.876522423500047,
"learning_rate": 6.551895499077099e-06,
"loss": 0.3882,
"step": 4860
},
{
"epoch": 0.3456781360353486,
"grad_norm": 4.871742533391797,
"learning_rate": 6.544796251597331e-06,
"loss": 0.3896,
"step": 4870
},
{
"epoch": 0.3463879474029777,
"grad_norm": 11.91690423514364,
"learning_rate": 6.537697004117564e-06,
"loss": 0.3736,
"step": 4880
},
{
"epoch": 0.3470977587706067,
"grad_norm": 5.986322327762981,
"learning_rate": 6.530597756637797e-06,
"loss": 0.368,
"step": 4890
},
{
"epoch": 0.34780757013823577,
"grad_norm": 4.671637222361169,
"learning_rate": 6.523498509158029e-06,
"loss": 0.3722,
"step": 4900
},
{
"epoch": 0.3485173815058648,
"grad_norm": 16.438976188514197,
"learning_rate": 6.516399261678262e-06,
"loss": 0.3776,
"step": 4910
},
{
"epoch": 0.34922719287349385,
"grad_norm": 11.76911671905372,
"learning_rate": 6.509300014198495e-06,
"loss": 0.3987,
"step": 4920
},
{
"epoch": 0.3499370042411229,
"grad_norm": 12.380867918847773,
"learning_rate": 6.502200766718728e-06,
"loss": 0.3949,
"step": 4930
},
{
"epoch": 0.350646815608752,
"grad_norm": 8.367704037629133,
"learning_rate": 6.495101519238961e-06,
"loss": 0.3767,
"step": 4940
},
{
"epoch": 0.35135662697638104,
"grad_norm": 74.35690108296033,
"learning_rate": 6.488002271759194e-06,
"loss": 0.3819,
"step": 4950
},
{
"epoch": 0.3520664383440101,
"grad_norm": 16.231219614665278,
"learning_rate": 6.480903024279427e-06,
"loss": 0.3859,
"step": 4960
},
{
"epoch": 0.3527762497116391,
"grad_norm": 9.060846103909238,
"learning_rate": 6.47380377679966e-06,
"loss": 0.394,
"step": 4970
},
{
"epoch": 0.35348606107926817,
"grad_norm": 21.88016531222193,
"learning_rate": 6.466704529319893e-06,
"loss": 0.4167,
"step": 4980
},
{
"epoch": 0.35419587244689726,
"grad_norm": 9.919040843315045,
"learning_rate": 6.459605281840125e-06,
"loss": 0.4192,
"step": 4990
},
{
"epoch": 0.3549056838145263,
"grad_norm": 5.183299722151934,
"learning_rate": 6.4525060343603584e-06,
"loss": 0.4249,
"step": 5000
},
{
"epoch": 0.35561549518215535,
"grad_norm": 8.847185946354221,
"learning_rate": 6.4454067868805915e-06,
"loss": 0.4112,
"step": 5010
},
{
"epoch": 0.3563253065497844,
"grad_norm": 11.864215621262682,
"learning_rate": 6.438307539400825e-06,
"loss": 0.4165,
"step": 5020
},
{
"epoch": 0.35703511791741344,
"grad_norm": 3.3703428369603503,
"learning_rate": 6.431208291921057e-06,
"loss": 0.3978,
"step": 5030
},
{
"epoch": 0.3577449292850425,
"grad_norm": 5.015316577294299,
"learning_rate": 6.42410904444129e-06,
"loss": 0.3872,
"step": 5040
},
{
"epoch": 0.3584547406526716,
"grad_norm": 4.2137919102595305,
"learning_rate": 6.417009796961523e-06,
"loss": 0.3766,
"step": 5050
},
{
"epoch": 0.3591645520203006,
"grad_norm": 3.0372315306510056,
"learning_rate": 6.409910549481756e-06,
"loss": 0.3842,
"step": 5060
},
{
"epoch": 0.35987436338792966,
"grad_norm": 2.7515400586423318,
"learning_rate": 6.4028113020019885e-06,
"loss": 0.3993,
"step": 5070
},
{
"epoch": 0.3605841747555587,
"grad_norm": 9.185207292504243,
"learning_rate": 6.395712054522221e-06,
"loss": 0.3875,
"step": 5080
},
{
"epoch": 0.36129398612318775,
"grad_norm": 19.515842749867563,
"learning_rate": 6.388612807042454e-06,
"loss": 0.4035,
"step": 5090
},
{
"epoch": 0.3620037974908168,
"grad_norm": 12.30636697197178,
"learning_rate": 6.381513559562686e-06,
"loss": 0.4035,
"step": 5100
},
{
"epoch": 0.3627136088584459,
"grad_norm": 6.732979846623905,
"learning_rate": 6.374414312082919e-06,
"loss": 0.4079,
"step": 5110
},
{
"epoch": 0.36342342022607493,
"grad_norm": 6.642326962423095,
"learning_rate": 6.367315064603152e-06,
"loss": 0.3945,
"step": 5120
},
{
"epoch": 0.364133231593704,
"grad_norm": 6.314154234087903,
"learning_rate": 6.360215817123385e-06,
"loss": 0.394,
"step": 5130
},
{
"epoch": 0.364843042961333,
"grad_norm": 4.760512258914551,
"learning_rate": 6.3531165696436185e-06,
"loss": 0.3863,
"step": 5140
},
{
"epoch": 0.36555285432896206,
"grad_norm": 4.048747245175314,
"learning_rate": 6.346017322163851e-06,
"loss": 0.3863,
"step": 5150
},
{
"epoch": 0.3662626656965911,
"grad_norm": 4.190578946223062,
"learning_rate": 6.338918074684084e-06,
"loss": 0.3723,
"step": 5160
},
{
"epoch": 0.3669724770642202,
"grad_norm": 4.175965799380943,
"learning_rate": 6.331818827204317e-06,
"loss": 0.3889,
"step": 5170
},
{
"epoch": 0.36768228843184925,
"grad_norm": 4.807186811656143,
"learning_rate": 6.32471957972455e-06,
"loss": 0.3874,
"step": 5180
},
{
"epoch": 0.3683920997994783,
"grad_norm": 6.659345248185456,
"learning_rate": 6.317620332244782e-06,
"loss": 0.3711,
"step": 5190
},
{
"epoch": 0.36910191116710733,
"grad_norm": 7.2186380945453905,
"learning_rate": 6.310521084765015e-06,
"loss": 0.3827,
"step": 5200
},
{
"epoch": 0.3698117225347364,
"grad_norm": 5.005630183658748,
"learning_rate": 6.3034218372852485e-06,
"loss": 0.3983,
"step": 5210
},
{
"epoch": 0.3705215339023654,
"grad_norm": 3.527405153009429,
"learning_rate": 6.296322589805482e-06,
"loss": 0.367,
"step": 5220
},
{
"epoch": 0.3712313452699945,
"grad_norm": 3.882199465110045,
"learning_rate": 6.289223342325715e-06,
"loss": 0.3883,
"step": 5230
},
{
"epoch": 0.37194115663762356,
"grad_norm": 7.463055050907344,
"learning_rate": 6.282124094845947e-06,
"loss": 0.3823,
"step": 5240
},
{
"epoch": 0.3726509680052526,
"grad_norm": 8.000906237369843,
"learning_rate": 6.27502484736618e-06,
"loss": 0.383,
"step": 5250
},
{
"epoch": 0.37336077937288165,
"grad_norm": 8.362063303535368,
"learning_rate": 6.267925599886413e-06,
"loss": 0.3893,
"step": 5260
},
{
"epoch": 0.3740705907405107,
"grad_norm": 4.721914441661691,
"learning_rate": 6.260826352406646e-06,
"loss": 0.3763,
"step": 5270
},
{
"epoch": 0.3747804021081398,
"grad_norm": 12.175797518430029,
"learning_rate": 6.253727104926878e-06,
"loss": 0.3977,
"step": 5280
},
{
"epoch": 0.37549021347576883,
"grad_norm": 9.814402397906687,
"learning_rate": 6.246627857447111e-06,
"loss": 0.3716,
"step": 5290
},
{
"epoch": 0.3762000248433979,
"grad_norm": 47.1450002499556,
"learning_rate": 6.239528609967344e-06,
"loss": 0.3792,
"step": 5300
},
{
"epoch": 0.3769098362110269,
"grad_norm": 27.513481595283608,
"learning_rate": 6.232429362487576e-06,
"loss": 0.3734,
"step": 5310
},
{
"epoch": 0.37761964757865596,
"grad_norm": 48.09984812385904,
"learning_rate": 6.225330115007809e-06,
"loss": 0.3873,
"step": 5320
},
{
"epoch": 0.378329458946285,
"grad_norm": 5.065884658180426,
"learning_rate": 6.218230867528042e-06,
"loss": 0.39,
"step": 5330
},
{
"epoch": 0.3790392703139141,
"grad_norm": 9.226418902203303,
"learning_rate": 6.2111316200482754e-06,
"loss": 0.3819,
"step": 5340
},
{
"epoch": 0.37974908168154314,
"grad_norm": 6.998201025336219,
"learning_rate": 6.204032372568508e-06,
"loss": 0.3818,
"step": 5350
},
{
"epoch": 0.3804588930491722,
"grad_norm": 4.086309894015096,
"learning_rate": 6.196933125088741e-06,
"loss": 0.3573,
"step": 5360
},
{
"epoch": 0.38116870441680123,
"grad_norm": 8.280993749723958,
"learning_rate": 6.189833877608974e-06,
"loss": 0.3763,
"step": 5370
},
{
"epoch": 0.3818785157844303,
"grad_norm": 4.086208683086361,
"learning_rate": 6.182734630129207e-06,
"loss": 0.3754,
"step": 5380
},
{
"epoch": 0.3825883271520593,
"grad_norm": 5.958244425553627,
"learning_rate": 6.17563538264944e-06,
"loss": 0.3844,
"step": 5390
},
{
"epoch": 0.3832981385196884,
"grad_norm": 3.580000162662889,
"learning_rate": 6.168536135169672e-06,
"loss": 0.382,
"step": 5400
},
{
"epoch": 0.38400794988731746,
"grad_norm": 2.986600327490101,
"learning_rate": 6.1614368876899054e-06,
"loss": 0.3722,
"step": 5410
},
{
"epoch": 0.3847177612549465,
"grad_norm": 3.253411703330411,
"learning_rate": 6.1543376402101386e-06,
"loss": 0.3723,
"step": 5420
},
{
"epoch": 0.38542757262257554,
"grad_norm": 5.02266916683139,
"learning_rate": 6.147238392730372e-06,
"loss": 0.353,
"step": 5430
},
{
"epoch": 0.3861373839902046,
"grad_norm": 6.509810117314743,
"learning_rate": 6.140139145250604e-06,
"loss": 0.3859,
"step": 5440
},
{
"epoch": 0.38684719535783363,
"grad_norm": 3.024955665262126,
"learning_rate": 6.133039897770837e-06,
"loss": 0.3929,
"step": 5450
},
{
"epoch": 0.3875570067254627,
"grad_norm": 3.1517938939602206,
"learning_rate": 6.12594065029107e-06,
"loss": 0.3899,
"step": 5460
},
{
"epoch": 0.38826681809309177,
"grad_norm": 4.545747430477116,
"learning_rate": 6.118841402811303e-06,
"loss": 0.376,
"step": 5470
},
{
"epoch": 0.3889766294607208,
"grad_norm": 4.069699163399179,
"learning_rate": 6.111742155331535e-06,
"loss": 0.3813,
"step": 5480
},
{
"epoch": 0.38968644082834986,
"grad_norm": 3.562062075517251,
"learning_rate": 6.104642907851768e-06,
"loss": 0.383,
"step": 5490
},
{
"epoch": 0.3903962521959789,
"grad_norm": 9.15980720106711,
"learning_rate": 6.097543660372001e-06,
"loss": 0.3921,
"step": 5500
},
{
"epoch": 0.391106063563608,
"grad_norm": 4.449111409231249,
"learning_rate": 6.090444412892234e-06,
"loss": 0.3823,
"step": 5510
},
{
"epoch": 0.39181587493123704,
"grad_norm": 12.724861852641904,
"learning_rate": 6.083345165412466e-06,
"loss": 0.3851,
"step": 5520
},
{
"epoch": 0.3925256862988661,
"grad_norm": 6.615402324691555,
"learning_rate": 6.076245917932699e-06,
"loss": 0.3667,
"step": 5530
},
{
"epoch": 0.3932354976664951,
"grad_norm": 8.817203015753774,
"learning_rate": 6.069146670452932e-06,
"loss": 0.3886,
"step": 5540
},
{
"epoch": 0.39394530903412417,
"grad_norm": 9.192960733910674,
"learning_rate": 6.0620474229731655e-06,
"loss": 0.3794,
"step": 5550
},
{
"epoch": 0.3946551204017532,
"grad_norm": 4.825188565131958,
"learning_rate": 6.054948175493398e-06,
"loss": 0.3786,
"step": 5560
},
{
"epoch": 0.3953649317693823,
"grad_norm": 6.68078822940831,
"learning_rate": 6.047848928013631e-06,
"loss": 0.3835,
"step": 5570
},
{
"epoch": 0.39607474313701135,
"grad_norm": 2.6400726840916175,
"learning_rate": 6.040749680533864e-06,
"loss": 0.381,
"step": 5580
},
{
"epoch": 0.3967845545046404,
"grad_norm": 3.6668671304324967,
"learning_rate": 6.033650433054097e-06,
"loss": 0.3745,
"step": 5590
},
{
"epoch": 0.39749436587226944,
"grad_norm": 2.639833206365908,
"learning_rate": 6.026551185574329e-06,
"loss": 0.3777,
"step": 5600
},
{
"epoch": 0.3982041772398985,
"grad_norm": 3.79888213287165,
"learning_rate": 6.019451938094562e-06,
"loss": 0.3911,
"step": 5610
},
{
"epoch": 0.3989139886075275,
"grad_norm": 5.09183422587413,
"learning_rate": 6.0123526906147955e-06,
"loss": 0.3832,
"step": 5620
},
{
"epoch": 0.3996237999751566,
"grad_norm": 3.3401895175000926,
"learning_rate": 6.005253443135029e-06,
"loss": 0.3862,
"step": 5630
},
{
"epoch": 0.40033361134278567,
"grad_norm": 2.5702329959348726,
"learning_rate": 5.998154195655262e-06,
"loss": 0.3934,
"step": 5640
},
{
"epoch": 0.4010434227104147,
"grad_norm": 3.0044071678975937,
"learning_rate": 5.991054948175494e-06,
"loss": 0.3826,
"step": 5650
},
{
"epoch": 0.40175323407804375,
"grad_norm": 2.412654779599852,
"learning_rate": 5.983955700695727e-06,
"loss": 0.3969,
"step": 5660
},
{
"epoch": 0.4024630454456728,
"grad_norm": 3.0767944703908356,
"learning_rate": 5.97685645321596e-06,
"loss": 0.3961,
"step": 5670
},
{
"epoch": 0.40317285681330184,
"grad_norm": 2.8053230371522124,
"learning_rate": 5.969757205736193e-06,
"loss": 0.3869,
"step": 5680
},
{
"epoch": 0.40388266818093094,
"grad_norm": 7.472643121749521,
"learning_rate": 5.962657958256425e-06,
"loss": 0.3851,
"step": 5690
},
{
"epoch": 0.40459247954856,
"grad_norm": 14.585388143398843,
"learning_rate": 5.955558710776658e-06,
"loss": 0.3905,
"step": 5700
},
{
"epoch": 0.405302290916189,
"grad_norm": 4.416692599365141,
"learning_rate": 5.948459463296891e-06,
"loss": 0.3862,
"step": 5710
},
{
"epoch": 0.40601210228381807,
"grad_norm": 3.4729116521336776,
"learning_rate": 5.941360215817123e-06,
"loss": 0.402,
"step": 5720
},
{
"epoch": 0.4067219136514471,
"grad_norm": 3.5423705326787114,
"learning_rate": 5.934260968337356e-06,
"loss": 0.3884,
"step": 5730
},
{
"epoch": 0.4074317250190762,
"grad_norm": 3.1365000657861497,
"learning_rate": 5.927161720857589e-06,
"loss": 0.3825,
"step": 5740
},
{
"epoch": 0.40814153638670525,
"grad_norm": 5.219488757508086,
"learning_rate": 5.9200624733778224e-06,
"loss": 0.3894,
"step": 5750
},
{
"epoch": 0.4088513477543343,
"grad_norm": 3.596909048940233,
"learning_rate": 5.9129632258980555e-06,
"loss": 0.3831,
"step": 5760
},
{
"epoch": 0.40956115912196334,
"grad_norm": 2.476134224023759,
"learning_rate": 5.905863978418288e-06,
"loss": 0.3825,
"step": 5770
},
{
"epoch": 0.4102709704895924,
"grad_norm": 3.407930958961138,
"learning_rate": 5.898764730938521e-06,
"loss": 0.3714,
"step": 5780
},
{
"epoch": 0.4109807818572214,
"grad_norm": 3.6349280667767636,
"learning_rate": 5.891665483458754e-06,
"loss": 0.3949,
"step": 5790
},
{
"epoch": 0.4116905932248505,
"grad_norm": 10.032880290815127,
"learning_rate": 5.884566235978987e-06,
"loss": 0.3827,
"step": 5800
},
{
"epoch": 0.41240040459247956,
"grad_norm": 4.403552459945297,
"learning_rate": 5.877466988499219e-06,
"loss": 0.3738,
"step": 5810
},
{
"epoch": 0.4131102159601086,
"grad_norm": 3.2630803210797086,
"learning_rate": 5.8703677410194525e-06,
"loss": 0.3947,
"step": 5820
},
{
"epoch": 0.41382002732773765,
"grad_norm": 11.228663057773362,
"learning_rate": 5.8632684935396856e-06,
"loss": 0.3825,
"step": 5830
},
{
"epoch": 0.4145298386953667,
"grad_norm": 18.33844649221444,
"learning_rate": 5.856169246059919e-06,
"loss": 0.381,
"step": 5840
},
{
"epoch": 0.41523965006299574,
"grad_norm": 14.576257048715338,
"learning_rate": 5.849069998580152e-06,
"loss": 0.389,
"step": 5850
},
{
"epoch": 0.41594946143062483,
"grad_norm": 3.3799659706310177,
"learning_rate": 5.841970751100384e-06,
"loss": 0.3687,
"step": 5860
},
{
"epoch": 0.4166592727982539,
"grad_norm": 4.306786145673671,
"learning_rate": 5.834871503620617e-06,
"loss": 0.3846,
"step": 5870
},
{
"epoch": 0.4173690841658829,
"grad_norm": 2.71585444285802,
"learning_rate": 5.82777225614085e-06,
"loss": 0.397,
"step": 5880
},
{
"epoch": 0.41807889553351196,
"grad_norm": 4.530639455269193,
"learning_rate": 5.820673008661082e-06,
"loss": 0.3633,
"step": 5890
},
{
"epoch": 0.418788706901141,
"grad_norm": 5.299365856406392,
"learning_rate": 5.813573761181315e-06,
"loss": 0.3854,
"step": 5900
},
{
"epoch": 0.41949851826877005,
"grad_norm": 3.5533453867575786,
"learning_rate": 5.806474513701548e-06,
"loss": 0.3855,
"step": 5910
},
{
"epoch": 0.42020832963639915,
"grad_norm": 9.388008852057116,
"learning_rate": 5.799375266221781e-06,
"loss": 0.3911,
"step": 5920
},
{
"epoch": 0.4209181410040282,
"grad_norm": 3.378607546141685,
"learning_rate": 5.792276018742013e-06,
"loss": 0.3751,
"step": 5930
},
{
"epoch": 0.42162795237165723,
"grad_norm": 12.222073948575716,
"learning_rate": 5.785176771262246e-06,
"loss": 0.3778,
"step": 5940
},
{
"epoch": 0.4223377637392863,
"grad_norm": 4.297952573306613,
"learning_rate": 5.778077523782479e-06,
"loss": 0.3827,
"step": 5950
},
{
"epoch": 0.4230475751069153,
"grad_norm": 9.764464171752504,
"learning_rate": 5.7709782763027125e-06,
"loss": 0.3893,
"step": 5960
},
{
"epoch": 0.4237573864745444,
"grad_norm": 3.7569225597805658,
"learning_rate": 5.763879028822945e-06,
"loss": 0.3901,
"step": 5970
},
{
"epoch": 0.42446719784217346,
"grad_norm": 3.0005485619903824,
"learning_rate": 5.756779781343178e-06,
"loss": 0.3753,
"step": 5980
},
{
"epoch": 0.4251770092098025,
"grad_norm": 6.457104695432505,
"learning_rate": 5.749680533863411e-06,
"loss": 0.3585,
"step": 5990
},
{
"epoch": 0.42588682057743155,
"grad_norm": 4.252684527352716,
"learning_rate": 5.742581286383644e-06,
"loss": 0.3745,
"step": 6000
},
{
"epoch": 0.4265966319450606,
"grad_norm": 3.3319349737549673,
"learning_rate": 5.735482038903877e-06,
"loss": 0.3836,
"step": 6010
},
{
"epoch": 0.42730644331268963,
"grad_norm": 4.333001859655407,
"learning_rate": 5.728382791424109e-06,
"loss": 0.3698,
"step": 6020
},
{
"epoch": 0.42801625468031873,
"grad_norm": 3.9838864194561343,
"learning_rate": 5.7212835439443425e-06,
"loss": 0.3686,
"step": 6030
},
{
"epoch": 0.4287260660479478,
"grad_norm": 3.206673737162168,
"learning_rate": 5.714184296464576e-06,
"loss": 0.374,
"step": 6040
},
{
"epoch": 0.4294358774155768,
"grad_norm": 7.910008181832549,
"learning_rate": 5.707085048984809e-06,
"loss": 0.3731,
"step": 6050
},
{
"epoch": 0.43014568878320586,
"grad_norm": 11.533279860672804,
"learning_rate": 5.699985801505041e-06,
"loss": 0.3842,
"step": 6060
},
{
"epoch": 0.4308555001508349,
"grad_norm": 4.06817553254219,
"learning_rate": 5.692886554025274e-06,
"loss": 0.3717,
"step": 6070
},
{
"epoch": 0.43156531151846395,
"grad_norm": 12.082596102938004,
"learning_rate": 5.685787306545507e-06,
"loss": 0.3971,
"step": 6080
},
{
"epoch": 0.43227512288609304,
"grad_norm": 2.685455478240202,
"learning_rate": 5.678688059065739e-06,
"loss": 0.3822,
"step": 6090
},
{
"epoch": 0.4329849342537221,
"grad_norm": 3.1399973614222643,
"learning_rate": 5.671588811585972e-06,
"loss": 0.3774,
"step": 6100
},
{
"epoch": 0.43369474562135113,
"grad_norm": 3.518374812592983,
"learning_rate": 5.664489564106205e-06,
"loss": 0.3781,
"step": 6110
},
{
"epoch": 0.4344045569889802,
"grad_norm": 4.803932844471321,
"learning_rate": 5.657390316626438e-06,
"loss": 0.3757,
"step": 6120
},
{
"epoch": 0.4351143683566092,
"grad_norm": 12.690594810777407,
"learning_rate": 5.650291069146671e-06,
"loss": 0.3747,
"step": 6130
},
{
"epoch": 0.43582417972423826,
"grad_norm": 10.80688099347966,
"learning_rate": 5.643191821666903e-06,
"loss": 0.3676,
"step": 6140
},
{
"epoch": 0.43653399109186736,
"grad_norm": 4.232034052682343,
"learning_rate": 5.636092574187136e-06,
"loss": 0.395,
"step": 6150
},
{
"epoch": 0.4372438024594964,
"grad_norm": 3.422739256279243,
"learning_rate": 5.6289933267073694e-06,
"loss": 0.3693,
"step": 6160
},
{
"epoch": 0.43795361382712544,
"grad_norm": 32.06006758689784,
"learning_rate": 5.6218940792276025e-06,
"loss": 0.3782,
"step": 6170
},
{
"epoch": 0.4386634251947545,
"grad_norm": 5.623034465377633,
"learning_rate": 5.614794831747835e-06,
"loss": 0.3813,
"step": 6180
},
{
"epoch": 0.43937323656238353,
"grad_norm": 10.612805886316337,
"learning_rate": 5.607695584268068e-06,
"loss": 0.3702,
"step": 6190
},
{
"epoch": 0.44008304793001257,
"grad_norm": 6.077674805742986,
"learning_rate": 5.600596336788301e-06,
"loss": 0.3643,
"step": 6200
},
{
"epoch": 0.44079285929764167,
"grad_norm": 7.053795971115957,
"learning_rate": 5.593497089308534e-06,
"loss": 0.3911,
"step": 6210
},
{
"epoch": 0.4415026706652707,
"grad_norm": 6.212842792838621,
"learning_rate": 5.586397841828766e-06,
"loss": 0.3774,
"step": 6220
},
{
"epoch": 0.44221248203289976,
"grad_norm": 7.598832178623656,
"learning_rate": 5.5792985943489995e-06,
"loss": 0.3808,
"step": 6230
},
{
"epoch": 0.4429222934005288,
"grad_norm": 14.834315377312098,
"learning_rate": 5.5721993468692326e-06,
"loss": 0.3765,
"step": 6240
},
{
"epoch": 0.44363210476815784,
"grad_norm": 15.459970963070427,
"learning_rate": 5.565100099389466e-06,
"loss": 0.3863,
"step": 6250
},
{
"epoch": 0.44434191613578694,
"grad_norm": 5.002895033502256,
"learning_rate": 5.558000851909699e-06,
"loss": 0.3718,
"step": 6260
},
{
"epoch": 0.445051727503416,
"grad_norm": 4.67592371180372,
"learning_rate": 5.550901604429931e-06,
"loss": 0.3869,
"step": 6270
},
{
"epoch": 0.445761538871045,
"grad_norm": 4.246040554798665,
"learning_rate": 5.543802356950164e-06,
"loss": 0.3673,
"step": 6280
},
{
"epoch": 0.44647135023867407,
"grad_norm": 5.698576828390134,
"learning_rate": 5.536703109470397e-06,
"loss": 0.3733,
"step": 6290
},
{
"epoch": 0.4471811616063031,
"grad_norm": 4.890818923695549,
"learning_rate": 5.529603861990629e-06,
"loss": 0.3917,
"step": 6300
},
{
"epoch": 0.44789097297393216,
"grad_norm": 3.5954099385229,
"learning_rate": 5.522504614510862e-06,
"loss": 0.387,
"step": 6310
},
{
"epoch": 0.44860078434156125,
"grad_norm": 5.819667912733057,
"learning_rate": 5.515405367031095e-06,
"loss": 0.3772,
"step": 6320
},
{
"epoch": 0.4493105957091903,
"grad_norm": 4.924613328068802,
"learning_rate": 5.508306119551328e-06,
"loss": 0.3691,
"step": 6330
},
{
"epoch": 0.45002040707681934,
"grad_norm": 4.077670226838275,
"learning_rate": 5.50120687207156e-06,
"loss": 0.3606,
"step": 6340
},
{
"epoch": 0.4507302184444484,
"grad_norm": 4.7425966011878815,
"learning_rate": 5.494107624591793e-06,
"loss": 0.3712,
"step": 6350
},
{
"epoch": 0.4514400298120774,
"grad_norm": 3.7724063921848,
"learning_rate": 5.487008377112026e-06,
"loss": 0.3707,
"step": 6360
},
{
"epoch": 0.45214984117970647,
"grad_norm": 2.8597041255348183,
"learning_rate": 5.4799091296322595e-06,
"loss": 0.364,
"step": 6370
},
{
"epoch": 0.45285965254733557,
"grad_norm": 5.386440052681094,
"learning_rate": 5.472809882152493e-06,
"loss": 0.3785,
"step": 6380
},
{
"epoch": 0.4535694639149646,
"grad_norm": 4.20147189666546,
"learning_rate": 5.465710634672725e-06,
"loss": 0.384,
"step": 6390
},
{
"epoch": 0.45427927528259365,
"grad_norm": 5.4360613411555185,
"learning_rate": 5.458611387192958e-06,
"loss": 0.3676,
"step": 6400
},
{
"epoch": 0.4549890866502227,
"grad_norm": 7.4543272167324846,
"learning_rate": 5.451512139713191e-06,
"loss": 0.3973,
"step": 6410
},
{
"epoch": 0.45569889801785174,
"grad_norm": 5.302161729787796,
"learning_rate": 5.444412892233424e-06,
"loss": 0.3878,
"step": 6420
},
{
"epoch": 0.4564087093854808,
"grad_norm": 4.774927845954586,
"learning_rate": 5.437313644753656e-06,
"loss": 0.368,
"step": 6430
},
{
"epoch": 0.4571185207531099,
"grad_norm": 4.733108202290537,
"learning_rate": 5.4302143972738895e-06,
"loss": 0.3841,
"step": 6440
},
{
"epoch": 0.4578283321207389,
"grad_norm": 4.581655513075473,
"learning_rate": 5.423115149794123e-06,
"loss": 0.3805,
"step": 6450
},
{
"epoch": 0.45853814348836797,
"grad_norm": 2.4364404744853445,
"learning_rate": 5.416015902314356e-06,
"loss": 0.3587,
"step": 6460
},
{
"epoch": 0.459247954855997,
"grad_norm": 5.16394378928267,
"learning_rate": 5.408916654834588e-06,
"loss": 0.3793,
"step": 6470
},
{
"epoch": 0.45995776622362605,
"grad_norm": 8.232574335670192,
"learning_rate": 5.401817407354821e-06,
"loss": 0.3794,
"step": 6480
},
{
"epoch": 0.46066757759125515,
"grad_norm": 10.509485180483269,
"learning_rate": 5.394718159875054e-06,
"loss": 0.3742,
"step": 6490
},
{
"epoch": 0.4613773889588842,
"grad_norm": 3.418180521754276,
"learning_rate": 5.387618912395286e-06,
"loss": 0.3733,
"step": 6500
},
{
"epoch": 0.46208720032651324,
"grad_norm": 4.2689703556593495,
"learning_rate": 5.380519664915519e-06,
"loss": 0.374,
"step": 6510
},
{
"epoch": 0.4627970116941423,
"grad_norm": 7.896842999549548,
"learning_rate": 5.373420417435752e-06,
"loss": 0.3799,
"step": 6520
},
{
"epoch": 0.4635068230617713,
"grad_norm": 3.4870838077093893,
"learning_rate": 5.366321169955985e-06,
"loss": 0.3712,
"step": 6530
},
{
"epoch": 0.46421663442940037,
"grad_norm": 27.778526824166995,
"learning_rate": 5.359221922476218e-06,
"loss": 0.3655,
"step": 6540
},
{
"epoch": 0.46492644579702946,
"grad_norm": 16.796202092439216,
"learning_rate": 5.35212267499645e-06,
"loss": 0.3846,
"step": 6550
},
{
"epoch": 0.4656362571646585,
"grad_norm": 5.698856930659158,
"learning_rate": 5.345023427516683e-06,
"loss": 0.3877,
"step": 6560
},
{
"epoch": 0.46634606853228755,
"grad_norm": 8.694016798434083,
"learning_rate": 5.3379241800369165e-06,
"loss": 0.3607,
"step": 6570
},
{
"epoch": 0.4670558798999166,
"grad_norm": 3.617969654098083,
"learning_rate": 5.3308249325571496e-06,
"loss": 0.36,
"step": 6580
},
{
"epoch": 0.46776569126754564,
"grad_norm": 7.181014577384461,
"learning_rate": 5.323725685077382e-06,
"loss": 0.3783,
"step": 6590
},
{
"epoch": 0.4684755026351747,
"grad_norm": 9.52331650225055,
"learning_rate": 5.316626437597615e-06,
"loss": 0.3707,
"step": 6600
},
{
"epoch": 0.4691853140028038,
"grad_norm": 5.927560976046885,
"learning_rate": 5.309527190117848e-06,
"loss": 0.3747,
"step": 6610
},
{
"epoch": 0.4698951253704328,
"grad_norm": 33.354649195054265,
"learning_rate": 5.302427942638081e-06,
"loss": 0.3622,
"step": 6620
},
{
"epoch": 0.47060493673806186,
"grad_norm": 5.109478632635811,
"learning_rate": 5.295328695158314e-06,
"loss": 0.3702,
"step": 6630
},
{
"epoch": 0.4713147481056909,
"grad_norm": 62.14127099005149,
"learning_rate": 5.2882294476785465e-06,
"loss": 0.3718,
"step": 6640
},
{
"epoch": 0.47202455947331995,
"grad_norm": 3.9646315343813674,
"learning_rate": 5.2811302001987796e-06,
"loss": 0.3579,
"step": 6650
},
{
"epoch": 0.472734370840949,
"grad_norm": 5.822229945732986,
"learning_rate": 5.274030952719013e-06,
"loss": 0.358,
"step": 6660
},
{
"epoch": 0.4734441822085781,
"grad_norm": 3.0706990453586607,
"learning_rate": 5.266931705239246e-06,
"loss": 0.3712,
"step": 6670
},
{
"epoch": 0.47415399357620713,
"grad_norm": 2.763541771977754,
"learning_rate": 5.259832457759478e-06,
"loss": 0.3862,
"step": 6680
},
{
"epoch": 0.4748638049438362,
"grad_norm": 2.8054880505902746,
"learning_rate": 5.252733210279711e-06,
"loss": 0.3609,
"step": 6690
},
{
"epoch": 0.4755736163114652,
"grad_norm": 3.5455500616555864,
"learning_rate": 5.245633962799943e-06,
"loss": 0.3845,
"step": 6700
},
{
"epoch": 0.47628342767909426,
"grad_norm": 6.871049315984216,
"learning_rate": 5.238534715320176e-06,
"loss": 0.3681,
"step": 6710
},
{
"epoch": 0.47699323904672336,
"grad_norm": 4.626136895991325,
"learning_rate": 5.231435467840409e-06,
"loss": 0.3694,
"step": 6720
},
{
"epoch": 0.4777030504143524,
"grad_norm": 4.1689737774582385,
"learning_rate": 5.224336220360642e-06,
"loss": 0.3722,
"step": 6730
},
{
"epoch": 0.47841286178198145,
"grad_norm": 2.345831388882716,
"learning_rate": 5.217236972880875e-06,
"loss": 0.3778,
"step": 6740
},
{
"epoch": 0.4791226731496105,
"grad_norm": 5.181993551246977,
"learning_rate": 5.210137725401107e-06,
"loss": 0.3649,
"step": 6750
},
{
"epoch": 0.47983248451723953,
"grad_norm": 4.144025528380454,
"learning_rate": 5.20303847792134e-06,
"loss": 0.3854,
"step": 6760
},
{
"epoch": 0.4805422958848686,
"grad_norm": 4.0013049178877536,
"learning_rate": 5.195939230441573e-06,
"loss": 0.3832,
"step": 6770
},
{
"epoch": 0.4812521072524977,
"grad_norm": 4.375334224867565,
"learning_rate": 5.1888399829618065e-06,
"loss": 0.3678,
"step": 6780
},
{
"epoch": 0.4819619186201267,
"grad_norm": 2.8158913555106926,
"learning_rate": 5.18174073548204e-06,
"loss": 0.3735,
"step": 6790
},
{
"epoch": 0.48267172998775576,
"grad_norm": 4.286259213586135,
"learning_rate": 5.174641488002272e-06,
"loss": 0.3824,
"step": 6800
},
{
"epoch": 0.4833815413553848,
"grad_norm": 2.917255310557774,
"learning_rate": 5.167542240522505e-06,
"loss": 0.367,
"step": 6810
},
{
"epoch": 0.48409135272301385,
"grad_norm": 2.9474809991081194,
"learning_rate": 5.160442993042738e-06,
"loss": 0.37,
"step": 6820
},
{
"epoch": 0.4848011640906429,
"grad_norm": 8.0892973566849,
"learning_rate": 5.153343745562971e-06,
"loss": 0.385,
"step": 6830
},
{
"epoch": 0.485510975458272,
"grad_norm": 5.46237208189901,
"learning_rate": 5.1462444980832034e-06,
"loss": 0.3723,
"step": 6840
},
{
"epoch": 0.48622078682590103,
"grad_norm": 4.813397707683654,
"learning_rate": 5.1391452506034365e-06,
"loss": 0.3847,
"step": 6850
},
{
"epoch": 0.4869305981935301,
"grad_norm": 3.839632822272105,
"learning_rate": 5.13204600312367e-06,
"loss": 0.3994,
"step": 6860
},
{
"epoch": 0.4876404095611591,
"grad_norm": 2.731217984269613,
"learning_rate": 5.124946755643903e-06,
"loss": 0.3928,
"step": 6870
},
{
"epoch": 0.48835022092878816,
"grad_norm": 7.062296596699752,
"learning_rate": 5.117847508164136e-06,
"loss": 0.4141,
"step": 6880
},
{
"epoch": 0.4890600322964172,
"grad_norm": 3.0471865890050034,
"learning_rate": 5.110748260684368e-06,
"loss": 0.3712,
"step": 6890
},
{
"epoch": 0.4897698436640463,
"grad_norm": 8.240874357274272,
"learning_rate": 5.103649013204601e-06,
"loss": 0.3828,
"step": 6900
},
{
"epoch": 0.49047965503167534,
"grad_norm": 4.557814239490917,
"learning_rate": 5.0965497657248334e-06,
"loss": 0.3794,
"step": 6910
},
{
"epoch": 0.4911894663993044,
"grad_norm": 6.50934729087624,
"learning_rate": 5.089450518245066e-06,
"loss": 0.3655,
"step": 6920
},
{
"epoch": 0.49189927776693343,
"grad_norm": 2.7892154452796696,
"learning_rate": 5.082351270765299e-06,
"loss": 0.3477,
"step": 6930
},
{
"epoch": 0.4926090891345625,
"grad_norm": 4.296820022815862,
"learning_rate": 5.075252023285532e-06,
"loss": 0.3917,
"step": 6940
},
{
"epoch": 0.4933189005021915,
"grad_norm": 3.7811542108069514,
"learning_rate": 5.068152775805765e-06,
"loss": 0.3846,
"step": 6950
},
{
"epoch": 0.4940287118698206,
"grad_norm": 12.150770506288081,
"learning_rate": 5.061053528325997e-06,
"loss": 0.3991,
"step": 6960
},
{
"epoch": 0.49473852323744966,
"grad_norm": 8.737862487013935,
"learning_rate": 5.05395428084623e-06,
"loss": 0.376,
"step": 6970
},
{
"epoch": 0.4954483346050787,
"grad_norm": 4.705086993153889,
"learning_rate": 5.0468550333664635e-06,
"loss": 0.3774,
"step": 6980
},
{
"epoch": 0.49615814597270774,
"grad_norm": 3.95177864719572,
"learning_rate": 5.0397557858866966e-06,
"loss": 0.3867,
"step": 6990
},
{
"epoch": 0.4968679573403368,
"grad_norm": 4.9228476674024995,
"learning_rate": 5.03265653840693e-06,
"loss": 0.3868,
"step": 7000
},
{
"epoch": 0.4975777687079659,
"grad_norm": 7.598944675436029,
"learning_rate": 5.025557290927162e-06,
"loss": 0.3791,
"step": 7010
},
{
"epoch": 0.4982875800755949,
"grad_norm": 3.948022335506646,
"learning_rate": 5.018458043447395e-06,
"loss": 0.3878,
"step": 7020
},
{
"epoch": 0.49899739144322397,
"grad_norm": 2.97600555704115,
"learning_rate": 5.011358795967628e-06,
"loss": 0.3891,
"step": 7030
},
{
"epoch": 0.499707202810853,
"grad_norm": 7.322058927387839,
"learning_rate": 5.004259548487861e-06,
"loss": 0.3739,
"step": 7040
},
{
"epoch": 0.5004170141784821,
"grad_norm": 4.054563164115399,
"learning_rate": 4.9971603010080935e-06,
"loss": 0.3654,
"step": 7050
},
{
"epoch": 0.5011268255461111,
"grad_norm": 6.433797069878189,
"learning_rate": 4.990061053528326e-06,
"loss": 0.3769,
"step": 7060
},
{
"epoch": 0.5018366369137401,
"grad_norm": 6.244381336548628,
"learning_rate": 4.982961806048559e-06,
"loss": 0.3698,
"step": 7070
},
{
"epoch": 0.5025464482813692,
"grad_norm": 4.649812061123292,
"learning_rate": 4.975862558568792e-06,
"loss": 0.3597,
"step": 7080
},
{
"epoch": 0.5032562596489982,
"grad_norm": 13.131635539716475,
"learning_rate": 4.968763311089025e-06,
"loss": 0.3737,
"step": 7090
},
{
"epoch": 0.5039660710166274,
"grad_norm": 11.654767208116397,
"learning_rate": 4.961664063609258e-06,
"loss": 0.3809,
"step": 7100
},
{
"epoch": 0.5046758823842564,
"grad_norm": 5.54405844933368,
"learning_rate": 4.95456481612949e-06,
"loss": 0.3668,
"step": 7110
},
{
"epoch": 0.5053856937518855,
"grad_norm": 17.63140898183613,
"learning_rate": 4.9474655686497235e-06,
"loss": 0.3751,
"step": 7120
},
{
"epoch": 0.5060955051195145,
"grad_norm": 4.735270750917372,
"learning_rate": 4.940366321169957e-06,
"loss": 0.3759,
"step": 7130
},
{
"epoch": 0.5068053164871436,
"grad_norm": 3.6005983980475214,
"learning_rate": 4.93326707369019e-06,
"loss": 0.3932,
"step": 7140
},
{
"epoch": 0.5075151278547726,
"grad_norm": 5.073652881259414,
"learning_rate": 4.926167826210422e-06,
"loss": 0.3689,
"step": 7150
},
{
"epoch": 0.5082249392224016,
"grad_norm": 6.515311066715168,
"learning_rate": 4.919068578730655e-06,
"loss": 0.3675,
"step": 7160
},
{
"epoch": 0.5089347505900307,
"grad_norm": 12.98913332417653,
"learning_rate": 4.911969331250887e-06,
"loss": 0.3861,
"step": 7170
},
{
"epoch": 0.5096445619576597,
"grad_norm": 5.1500756291258005,
"learning_rate": 4.90487008377112e-06,
"loss": 0.3731,
"step": 7180
},
{
"epoch": 0.5103543733252888,
"grad_norm": 5.833801547579832,
"learning_rate": 4.8977708362913535e-06,
"loss": 0.3831,
"step": 7190
},
{
"epoch": 0.5110641846929178,
"grad_norm": 8.343761477251691,
"learning_rate": 4.890671588811587e-06,
"loss": 0.3716,
"step": 7200
},
{
"epoch": 0.511773996060547,
"grad_norm": 6.740845613760958,
"learning_rate": 4.883572341331819e-06,
"loss": 0.377,
"step": 7210
},
{
"epoch": 0.512483807428176,
"grad_norm": 6.834960096187304,
"learning_rate": 4.876473093852052e-06,
"loss": 0.3774,
"step": 7220
},
{
"epoch": 0.513193618795805,
"grad_norm": 6.333904565562881,
"learning_rate": 4.869373846372285e-06,
"loss": 0.3786,
"step": 7230
},
{
"epoch": 0.5139034301634341,
"grad_norm": 7.380378873059882,
"learning_rate": 4.862274598892518e-06,
"loss": 0.3641,
"step": 7240
},
{
"epoch": 0.5146132415310631,
"grad_norm": 8.15711157363267,
"learning_rate": 4.855175351412751e-06,
"loss": 0.354,
"step": 7250
},
{
"epoch": 0.5153230528986922,
"grad_norm": 5.298194233144714,
"learning_rate": 4.8480761039329835e-06,
"loss": 0.3648,
"step": 7260
},
{
"epoch": 0.5160328642663212,
"grad_norm": 6.169565228174972,
"learning_rate": 4.840976856453216e-06,
"loss": 0.3606,
"step": 7270
},
{
"epoch": 0.5167426756339503,
"grad_norm": 4.633952354333419,
"learning_rate": 4.833877608973449e-06,
"loss": 0.3627,
"step": 7280
},
{
"epoch": 0.5174524870015793,
"grad_norm": 7.754370375548218,
"learning_rate": 4.826778361493682e-06,
"loss": 0.384,
"step": 7290
},
{
"epoch": 0.5181622983692084,
"grad_norm": 4.628647672477682,
"learning_rate": 4.819679114013915e-06,
"loss": 0.3717,
"step": 7300
},
{
"epoch": 0.5188721097368374,
"grad_norm": 4.6108119740619165,
"learning_rate": 4.812579866534147e-06,
"loss": 0.3531,
"step": 7310
},
{
"epoch": 0.5195819211044664,
"grad_norm": 3.777480319775288,
"learning_rate": 4.8054806190543805e-06,
"loss": 0.3735,
"step": 7320
},
{
"epoch": 0.5202917324720956,
"grad_norm": 6.455151414772601,
"learning_rate": 4.7983813715746136e-06,
"loss": 0.3845,
"step": 7330
},
{
"epoch": 0.5210015438397246,
"grad_norm": 5.0016880570007,
"learning_rate": 4.791282124094847e-06,
"loss": 0.3588,
"step": 7340
},
{
"epoch": 0.5217113552073537,
"grad_norm": 3.596195253014758,
"learning_rate": 4.78418287661508e-06,
"loss": 0.3664,
"step": 7350
},
{
"epoch": 0.5224211665749827,
"grad_norm": 4.6111563525428005,
"learning_rate": 4.777083629135312e-06,
"loss": 0.3815,
"step": 7360
},
{
"epoch": 0.5231309779426118,
"grad_norm": 3.81079107236397,
"learning_rate": 4.769984381655544e-06,
"loss": 0.3603,
"step": 7370
},
{
"epoch": 0.5238407893102408,
"grad_norm": 10.081677733455512,
"learning_rate": 4.762885134175777e-06,
"loss": 0.3748,
"step": 7380
},
{
"epoch": 0.5245506006778698,
"grad_norm": 4.011909680570432,
"learning_rate": 4.7557858866960105e-06,
"loss": 0.3736,
"step": 7390
},
{
"epoch": 0.5252604120454989,
"grad_norm": 4.008812937992125,
"learning_rate": 4.7486866392162436e-06,
"loss": 0.3718,
"step": 7400
},
{
"epoch": 0.5259702234131279,
"grad_norm": 8.895014071619777,
"learning_rate": 4.741587391736477e-06,
"loss": 0.3747,
"step": 7410
},
{
"epoch": 0.526680034780757,
"grad_norm": 2.5646865204368394,
"learning_rate": 4.734488144256709e-06,
"loss": 0.3593,
"step": 7420
},
{
"epoch": 0.527389846148386,
"grad_norm": 2.8583907278858147,
"learning_rate": 4.727388896776942e-06,
"loss": 0.387,
"step": 7430
},
{
"epoch": 0.5280996575160151,
"grad_norm": 2.8626323560816296,
"learning_rate": 4.720289649297175e-06,
"loss": 0.3756,
"step": 7440
},
{
"epoch": 0.5288094688836442,
"grad_norm": 7.38191434335366,
"learning_rate": 4.713190401817408e-06,
"loss": 0.3715,
"step": 7450
},
{
"epoch": 0.5295192802512733,
"grad_norm": 3.187699709665762,
"learning_rate": 4.7060911543376405e-06,
"loss": 0.3763,
"step": 7460
},
{
"epoch": 0.5302290916189023,
"grad_norm": 2.2423385405265366,
"learning_rate": 4.698991906857874e-06,
"loss": 0.367,
"step": 7470
},
{
"epoch": 0.5309389029865313,
"grad_norm": 3.5525056364166465,
"learning_rate": 4.691892659378106e-06,
"loss": 0.3639,
"step": 7480
},
{
"epoch": 0.5316487143541604,
"grad_norm": 2.5840538292895405,
"learning_rate": 4.684793411898339e-06,
"loss": 0.3713,
"step": 7490
},
{
"epoch": 0.5323585257217894,
"grad_norm": 3.6015272776951366,
"learning_rate": 4.677694164418572e-06,
"loss": 0.3672,
"step": 7500
},
{
"epoch": 0.5330683370894185,
"grad_norm": 2.958338857599813,
"learning_rate": 4.670594916938805e-06,
"loss": 0.364,
"step": 7510
},
{
"epoch": 0.5337781484570475,
"grad_norm": 2.6780802400700248,
"learning_rate": 4.663495669459037e-06,
"loss": 0.3871,
"step": 7520
},
{
"epoch": 0.5344879598246766,
"grad_norm": 2.141486624042336,
"learning_rate": 4.6563964219792705e-06,
"loss": 0.3918,
"step": 7530
},
{
"epoch": 0.5351977711923056,
"grad_norm": 11.627725180923038,
"learning_rate": 4.649297174499504e-06,
"loss": 0.369,
"step": 7540
},
{
"epoch": 0.5359075825599346,
"grad_norm": 2.164302320101156,
"learning_rate": 4.642197927019737e-06,
"loss": 0.3763,
"step": 7550
},
{
"epoch": 0.5366173939275638,
"grad_norm": 2.5355641201406716,
"learning_rate": 4.63509867953997e-06,
"loss": 0.3709,
"step": 7560
},
{
"epoch": 0.5373272052951928,
"grad_norm": 1.7486780225096559,
"learning_rate": 4.627999432060202e-06,
"loss": 0.3778,
"step": 7570
},
{
"epoch": 0.5380370166628219,
"grad_norm": 2.1996857828607066,
"learning_rate": 4.620900184580434e-06,
"loss": 0.3878,
"step": 7580
},
{
"epoch": 0.5387468280304509,
"grad_norm": 2.2718302971034325,
"learning_rate": 4.613800937100667e-06,
"loss": 0.3691,
"step": 7590
},
{
"epoch": 0.53945663939808,
"grad_norm": 2.247788269458988,
"learning_rate": 4.6067016896209005e-06,
"loss": 0.3764,
"step": 7600
},
{
"epoch": 0.540166450765709,
"grad_norm": 4.951241532022136,
"learning_rate": 4.599602442141134e-06,
"loss": 0.3696,
"step": 7610
},
{
"epoch": 0.5408762621333381,
"grad_norm": 18.87723312065313,
"learning_rate": 4.592503194661366e-06,
"loss": 0.3752,
"step": 7620
},
{
"epoch": 0.5415860735009671,
"grad_norm": 4.839150391451601,
"learning_rate": 4.585403947181599e-06,
"loss": 0.3704,
"step": 7630
},
{
"epoch": 0.5422958848685961,
"grad_norm": 3.252448644894675,
"learning_rate": 4.578304699701832e-06,
"loss": 0.3662,
"step": 7640
},
{
"epoch": 0.5430056962362252,
"grad_norm": 4.636061450249123,
"learning_rate": 4.571205452222065e-06,
"loss": 0.3695,
"step": 7650
},
{
"epoch": 0.5437155076038542,
"grad_norm": 2.217398025384477,
"learning_rate": 4.564106204742298e-06,
"loss": 0.381,
"step": 7660
},
{
"epoch": 0.5444253189714833,
"grad_norm": 3.2864797627789764,
"learning_rate": 4.5570069572625305e-06,
"loss": 0.3766,
"step": 7670
},
{
"epoch": 0.5451351303391124,
"grad_norm": 2.5595280528292346,
"learning_rate": 4.549907709782763e-06,
"loss": 0.3753,
"step": 7680
},
{
"epoch": 0.5458449417067415,
"grad_norm": 3.5869951931087356,
"learning_rate": 4.542808462302996e-06,
"loss": 0.3649,
"step": 7690
},
{
"epoch": 0.5465547530743705,
"grad_norm": 2.878804286325741,
"learning_rate": 4.535709214823229e-06,
"loss": 0.365,
"step": 7700
},
{
"epoch": 0.5472645644419996,
"grad_norm": 3.835428702840037,
"learning_rate": 4.528609967343462e-06,
"loss": 0.388,
"step": 7710
},
{
"epoch": 0.5479743758096286,
"grad_norm": 3.3115804743584225,
"learning_rate": 4.521510719863695e-06,
"loss": 0.3498,
"step": 7720
},
{
"epoch": 0.5486841871772576,
"grad_norm": 2.155325207710473,
"learning_rate": 4.5144114723839275e-06,
"loss": 0.3638,
"step": 7730
},
{
"epoch": 0.5493939985448867,
"grad_norm": 2.9473064158817506,
"learning_rate": 4.5073122249041606e-06,
"loss": 0.3756,
"step": 7740
},
{
"epoch": 0.5501038099125157,
"grad_norm": 3.977038197892431,
"learning_rate": 4.500212977424394e-06,
"loss": 0.3674,
"step": 7750
},
{
"epoch": 0.5508136212801448,
"grad_norm": 5.638630944163406,
"learning_rate": 4.493113729944627e-06,
"loss": 0.3528,
"step": 7760
},
{
"epoch": 0.5515234326477738,
"grad_norm": 2.8534926361264286,
"learning_rate": 4.486014482464859e-06,
"loss": 0.3697,
"step": 7770
},
{
"epoch": 0.5522332440154029,
"grad_norm": 6.069502646886042,
"learning_rate": 4.478915234985092e-06,
"loss": 0.3843,
"step": 7780
},
{
"epoch": 0.552943055383032,
"grad_norm": 4.343605351910854,
"learning_rate": 4.471815987505324e-06,
"loss": 0.3783,
"step": 7790
},
{
"epoch": 0.553652866750661,
"grad_norm": 2.104465858436518,
"learning_rate": 4.4647167400255575e-06,
"loss": 0.3601,
"step": 7800
},
{
"epoch": 0.5543626781182901,
"grad_norm": 3.0902122663518448,
"learning_rate": 4.457617492545791e-06,
"loss": 0.3801,
"step": 7810
},
{
"epoch": 0.5550724894859191,
"grad_norm": 4.573352955842933,
"learning_rate": 4.450518245066024e-06,
"loss": 0.3835,
"step": 7820
},
{
"epoch": 0.5557823008535482,
"grad_norm": 2.9707860507790924,
"learning_rate": 4.443418997586256e-06,
"loss": 0.3709,
"step": 7830
},
{
"epoch": 0.5564921122211772,
"grad_norm": 2.5687241689417806,
"learning_rate": 4.436319750106489e-06,
"loss": 0.3835,
"step": 7840
},
{
"epoch": 0.5572019235888063,
"grad_norm": 3.347322471582433,
"learning_rate": 4.429220502626722e-06,
"loss": 0.3735,
"step": 7850
},
{
"epoch": 0.5579117349564353,
"grad_norm": 6.431823861299619,
"learning_rate": 4.422121255146955e-06,
"loss": 0.3704,
"step": 7860
},
{
"epoch": 0.5586215463240644,
"grad_norm": 3.050115422109329,
"learning_rate": 4.4150220076671875e-06,
"loss": 0.3822,
"step": 7870
},
{
"epoch": 0.5593313576916934,
"grad_norm": 1.7811591664189523,
"learning_rate": 4.407922760187421e-06,
"loss": 0.3658,
"step": 7880
},
{
"epoch": 0.5600411690593224,
"grad_norm": 3.442846796158278,
"learning_rate": 4.400823512707653e-06,
"loss": 0.3621,
"step": 7890
},
{
"epoch": 0.5607509804269515,
"grad_norm": 7.2461896738177,
"learning_rate": 4.393724265227886e-06,
"loss": 0.3526,
"step": 7900
},
{
"epoch": 0.5614607917945806,
"grad_norm": 2.0219408065827875,
"learning_rate": 4.386625017748119e-06,
"loss": 0.3659,
"step": 7910
},
{
"epoch": 0.5621706031622097,
"grad_norm": 4.896944413168855,
"learning_rate": 4.379525770268352e-06,
"loss": 0.3765,
"step": 7920
},
{
"epoch": 0.5628804145298387,
"grad_norm": 2.1094695887191848,
"learning_rate": 4.372426522788584e-06,
"loss": 0.3644,
"step": 7930
},
{
"epoch": 0.5635902258974678,
"grad_norm": 5.596991296221292,
"learning_rate": 4.3653272753088175e-06,
"loss": 0.3835,
"step": 7940
},
{
"epoch": 0.5643000372650968,
"grad_norm": 2.373450501523087,
"learning_rate": 4.358228027829051e-06,
"loss": 0.3756,
"step": 7950
},
{
"epoch": 0.5650098486327259,
"grad_norm": 4.1947432157390026,
"learning_rate": 4.351128780349284e-06,
"loss": 0.3787,
"step": 7960
},
{
"epoch": 0.5657196600003549,
"grad_norm": 2.921985411820113,
"learning_rate": 4.344029532869517e-06,
"loss": 0.3746,
"step": 7970
},
{
"epoch": 0.5664294713679839,
"grad_norm": 13.63904398617421,
"learning_rate": 4.336930285389749e-06,
"loss": 0.3535,
"step": 7980
},
{
"epoch": 0.567139282735613,
"grad_norm": 2.6665592498045037,
"learning_rate": 4.329831037909981e-06,
"loss": 0.3668,
"step": 7990
},
{
"epoch": 0.567849094103242,
"grad_norm": 2.7866449972058795,
"learning_rate": 4.3227317904302144e-06,
"loss": 0.3747,
"step": 8000
},
{
"epoch": 0.5685589054708711,
"grad_norm": 2.795372211208224,
"learning_rate": 4.3156325429504475e-06,
"loss": 0.3737,
"step": 8010
},
{
"epoch": 0.5692687168385002,
"grad_norm": 2.829992387736084,
"learning_rate": 4.308533295470681e-06,
"loss": 0.3813,
"step": 8020
},
{
"epoch": 0.5699785282061293,
"grad_norm": 3.8835793195310706,
"learning_rate": 4.301434047990914e-06,
"loss": 0.3934,
"step": 8030
},
{
"epoch": 0.5706883395737583,
"grad_norm": 2.157944880021205,
"learning_rate": 4.294334800511146e-06,
"loss": 0.3619,
"step": 8040
},
{
"epoch": 0.5713981509413874,
"grad_norm": 2.576031100575868,
"learning_rate": 4.287235553031379e-06,
"loss": 0.3654,
"step": 8050
},
{
"epoch": 0.5721079623090164,
"grad_norm": 2.1013120962560445,
"learning_rate": 4.280136305551612e-06,
"loss": 0.3808,
"step": 8060
},
{
"epoch": 0.5728177736766454,
"grad_norm": 8.72915943640877,
"learning_rate": 4.273037058071845e-06,
"loss": 0.3865,
"step": 8070
},
{
"epoch": 0.5735275850442745,
"grad_norm": 3.1373379205439123,
"learning_rate": 4.2659378105920776e-06,
"loss": 0.3631,
"step": 8080
},
{
"epoch": 0.5742373964119035,
"grad_norm": 10.697527972561883,
"learning_rate": 4.258838563112311e-06,
"loss": 0.3597,
"step": 8090
},
{
"epoch": 0.5749472077795326,
"grad_norm": 3.6970932139238095,
"learning_rate": 4.251739315632543e-06,
"loss": 0.3635,
"step": 8100
},
{
"epoch": 0.5756570191471616,
"grad_norm": 2.4203467674630206,
"learning_rate": 4.244640068152776e-06,
"loss": 0.359,
"step": 8110
},
{
"epoch": 0.5763668305147907,
"grad_norm": 2.9395692807103035,
"learning_rate": 4.237540820673009e-06,
"loss": 0.3603,
"step": 8120
},
{
"epoch": 0.5770766418824197,
"grad_norm": 3.012599979258794,
"learning_rate": 4.230441573193242e-06,
"loss": 0.3568,
"step": 8130
},
{
"epoch": 0.5777864532500488,
"grad_norm": 6.667370402568531,
"learning_rate": 4.2233423257134745e-06,
"loss": 0.3629,
"step": 8140
},
{
"epoch": 0.5784962646176779,
"grad_norm": 4.471487834006219,
"learning_rate": 4.2162430782337076e-06,
"loss": 0.3683,
"step": 8150
},
{
"epoch": 0.5792060759853069,
"grad_norm": 3.599804032694662,
"learning_rate": 4.209143830753941e-06,
"loss": 0.3554,
"step": 8160
},
{
"epoch": 0.579915887352936,
"grad_norm": 2.9142466980850985,
"learning_rate": 4.202044583274174e-06,
"loss": 0.3524,
"step": 8170
},
{
"epoch": 0.580625698720565,
"grad_norm": 3.8569199714753295,
"learning_rate": 4.194945335794406e-06,
"loss": 0.3663,
"step": 8180
},
{
"epoch": 0.5813355100881941,
"grad_norm": 2.4068975949006077,
"learning_rate": 4.187846088314639e-06,
"loss": 0.3747,
"step": 8190
},
{
"epoch": 0.5820453214558231,
"grad_norm": 6.174322801188514,
"learning_rate": 4.180746840834871e-06,
"loss": 0.372,
"step": 8200
},
{
"epoch": 0.5827551328234521,
"grad_norm": 2.888969982284499,
"learning_rate": 4.1736475933551045e-06,
"loss": 0.361,
"step": 8210
},
{
"epoch": 0.5834649441910812,
"grad_norm": 4.910093339119916,
"learning_rate": 4.166548345875338e-06,
"loss": 0.3574,
"step": 8220
},
{
"epoch": 0.5841747555587102,
"grad_norm": 5.1058356496999755,
"learning_rate": 4.159449098395571e-06,
"loss": 0.3786,
"step": 8230
},
{
"epoch": 0.5848845669263393,
"grad_norm": 14.081326767892058,
"learning_rate": 4.152349850915803e-06,
"loss": 0.3729,
"step": 8240
},
{
"epoch": 0.5855943782939684,
"grad_norm": 4.958684438886047,
"learning_rate": 4.145250603436036e-06,
"loss": 0.3566,
"step": 8250
},
{
"epoch": 0.5863041896615975,
"grad_norm": 3.9438637049329075,
"learning_rate": 4.138151355956269e-06,
"loss": 0.3861,
"step": 8260
},
{
"epoch": 0.5870140010292265,
"grad_norm": 2.9499712942928107,
"learning_rate": 4.131052108476502e-06,
"loss": 0.3439,
"step": 8270
},
{
"epoch": 0.5877238123968556,
"grad_norm": 3.332966504823502,
"learning_rate": 4.1239528609967345e-06,
"loss": 0.3788,
"step": 8280
},
{
"epoch": 0.5884336237644846,
"grad_norm": 27.970854056782667,
"learning_rate": 4.116853613516968e-06,
"loss": 0.3591,
"step": 8290
},
{
"epoch": 0.5891434351321136,
"grad_norm": 4.487327484061174,
"learning_rate": 4.1097543660372e-06,
"loss": 0.3625,
"step": 8300
},
{
"epoch": 0.5898532464997427,
"grad_norm": 3.8006981727665496,
"learning_rate": 4.102655118557433e-06,
"loss": 0.3709,
"step": 8310
},
{
"epoch": 0.5905630578673717,
"grad_norm": 3.463457513521014,
"learning_rate": 4.095555871077666e-06,
"loss": 0.3641,
"step": 8320
},
{
"epoch": 0.5912728692350008,
"grad_norm": 7.640707242523127,
"learning_rate": 4.088456623597899e-06,
"loss": 0.3648,
"step": 8330
},
{
"epoch": 0.5919826806026298,
"grad_norm": 2.8614936603096295,
"learning_rate": 4.081357376118132e-06,
"loss": 0.3616,
"step": 8340
},
{
"epoch": 0.5926924919702589,
"grad_norm": 3.296737746561609,
"learning_rate": 4.0742581286383645e-06,
"loss": 0.3808,
"step": 8350
},
{
"epoch": 0.5934023033378879,
"grad_norm": 3.2426352432246976,
"learning_rate": 4.067158881158598e-06,
"loss": 0.3583,
"step": 8360
},
{
"epoch": 0.5941121147055171,
"grad_norm": 3.4522007032736806,
"learning_rate": 4.060059633678831e-06,
"loss": 0.365,
"step": 8370
},
{
"epoch": 0.5948219260731461,
"grad_norm": 3.9166457660699145,
"learning_rate": 4.052960386199063e-06,
"loss": 0.3692,
"step": 8380
},
{
"epoch": 0.5955317374407751,
"grad_norm": 2.9039677495535874,
"learning_rate": 4.045861138719296e-06,
"loss": 0.3468,
"step": 8390
},
{
"epoch": 0.5962415488084042,
"grad_norm": 3.187977468656372,
"learning_rate": 4.038761891239529e-06,
"loss": 0.359,
"step": 8400
},
{
"epoch": 0.5969513601760332,
"grad_norm": 4.529576318117622,
"learning_rate": 4.0316626437597614e-06,
"loss": 0.3452,
"step": 8410
},
{
"epoch": 0.5976611715436623,
"grad_norm": 6.601726345536697,
"learning_rate": 4.0245633962799945e-06,
"loss": 0.3713,
"step": 8420
},
{
"epoch": 0.5983709829112913,
"grad_norm": 2.4278158486667576,
"learning_rate": 4.017464148800228e-06,
"loss": 0.3628,
"step": 8430
},
{
"epoch": 0.5990807942789204,
"grad_norm": 2.76630569189727,
"learning_rate": 4.010364901320461e-06,
"loss": 0.3704,
"step": 8440
},
{
"epoch": 0.5997906056465494,
"grad_norm": 6.7843620715556545,
"learning_rate": 4.003265653840693e-06,
"loss": 0.3682,
"step": 8450
},
{
"epoch": 0.6005004170141784,
"grad_norm": 2.9403338895288336,
"learning_rate": 3.996166406360926e-06,
"loss": 0.3608,
"step": 8460
},
{
"epoch": 0.6012102283818075,
"grad_norm": 4.301178222098619,
"learning_rate": 3.989067158881159e-06,
"loss": 0.3595,
"step": 8470
},
{
"epoch": 0.6019200397494366,
"grad_norm": 3.0914199152912696,
"learning_rate": 3.981967911401392e-06,
"loss": 0.3718,
"step": 8480
},
{
"epoch": 0.6026298511170657,
"grad_norm": 2.753384437967004,
"learning_rate": 3.9748686639216246e-06,
"loss": 0.3672,
"step": 8490
},
{
"epoch": 0.6033396624846947,
"grad_norm": 2.576321546323924,
"learning_rate": 3.967769416441858e-06,
"loss": 0.3706,
"step": 8500
},
{
"epoch": 0.6040494738523238,
"grad_norm": 2.617904283815147,
"learning_rate": 3.96067016896209e-06,
"loss": 0.3539,
"step": 8510
},
{
"epoch": 0.6047592852199528,
"grad_norm": 4.862875127190094,
"learning_rate": 3.953570921482323e-06,
"loss": 0.3763,
"step": 8520
},
{
"epoch": 0.6054690965875819,
"grad_norm": 4.741023889550647,
"learning_rate": 3.946471674002556e-06,
"loss": 0.3611,
"step": 8530
},
{
"epoch": 0.6061789079552109,
"grad_norm": 6.394478684199079,
"learning_rate": 3.939372426522789e-06,
"loss": 0.3615,
"step": 8540
},
{
"epoch": 0.6068887193228399,
"grad_norm": 4.045100357410319,
"learning_rate": 3.9322731790430215e-06,
"loss": 0.3648,
"step": 8550
},
{
"epoch": 0.607598530690469,
"grad_norm": 3.756852697194425,
"learning_rate": 3.925173931563255e-06,
"loss": 0.3689,
"step": 8560
},
{
"epoch": 0.608308342058098,
"grad_norm": 4.04897373953826,
"learning_rate": 3.918074684083488e-06,
"loss": 0.3644,
"step": 8570
},
{
"epoch": 0.6090181534257271,
"grad_norm": 4.036663207362448,
"learning_rate": 3.910975436603721e-06,
"loss": 0.366,
"step": 8580
},
{
"epoch": 0.6097279647933561,
"grad_norm": 4.156260594948616,
"learning_rate": 3.903876189123953e-06,
"loss": 0.3554,
"step": 8590
},
{
"epoch": 0.6104377761609853,
"grad_norm": 3.398605568980307,
"learning_rate": 3.896776941644186e-06,
"loss": 0.3717,
"step": 8600
},
{
"epoch": 0.6111475875286143,
"grad_norm": 3.5114677948249065,
"learning_rate": 3.889677694164418e-06,
"loss": 0.3677,
"step": 8610
},
{
"epoch": 0.6118573988962434,
"grad_norm": 4.753605099187553,
"learning_rate": 3.8825784466846515e-06,
"loss": 0.3547,
"step": 8620
},
{
"epoch": 0.6125672102638724,
"grad_norm": 3.4243729659259334,
"learning_rate": 3.875479199204885e-06,
"loss": 0.3762,
"step": 8630
},
{
"epoch": 0.6132770216315014,
"grad_norm": 5.94912381861312,
"learning_rate": 3.868379951725118e-06,
"loss": 0.359,
"step": 8640
},
{
"epoch": 0.6139868329991305,
"grad_norm": 6.590267176028699,
"learning_rate": 3.861280704245351e-06,
"loss": 0.3758,
"step": 8650
},
{
"epoch": 0.6146966443667595,
"grad_norm": 3.3256854782540497,
"learning_rate": 3.854181456765583e-06,
"loss": 0.3562,
"step": 8660
},
{
"epoch": 0.6154064557343886,
"grad_norm": 3.6453120360212816,
"learning_rate": 3.847082209285816e-06,
"loss": 0.3619,
"step": 8670
},
{
"epoch": 0.6161162671020176,
"grad_norm": 13.965716037023453,
"learning_rate": 3.839982961806049e-06,
"loss": 0.3646,
"step": 8680
},
{
"epoch": 0.6168260784696467,
"grad_norm": 7.837860273774759,
"learning_rate": 3.8328837143262815e-06,
"loss": 0.3457,
"step": 8690
},
{
"epoch": 0.6175358898372757,
"grad_norm": 4.729547574214101,
"learning_rate": 3.825784466846515e-06,
"loss": 0.3565,
"step": 8700
},
{
"epoch": 0.6182457012049049,
"grad_norm": 2.5619385732076987,
"learning_rate": 3.818685219366748e-06,
"loss": 0.3676,
"step": 8710
},
{
"epoch": 0.6189555125725339,
"grad_norm": 6.790019325573497,
"learning_rate": 3.8115859718869804e-06,
"loss": 0.3646,
"step": 8720
},
{
"epoch": 0.6196653239401629,
"grad_norm": 3.3195434105048665,
"learning_rate": 3.804486724407213e-06,
"loss": 0.3575,
"step": 8730
},
{
"epoch": 0.620375135307792,
"grad_norm": 3.805294873305076,
"learning_rate": 3.797387476927446e-06,
"loss": 0.3657,
"step": 8740
},
{
"epoch": 0.621084946675421,
"grad_norm": 5.59682650769057,
"learning_rate": 3.790288229447679e-06,
"loss": 0.3609,
"step": 8750
},
{
"epoch": 0.6217947580430501,
"grad_norm": 4.89958212672841,
"learning_rate": 3.783188981967912e-06,
"loss": 0.3669,
"step": 8760
},
{
"epoch": 0.6225045694106791,
"grad_norm": 8.274929479843232,
"learning_rate": 3.7760897344881446e-06,
"loss": 0.3581,
"step": 8770
},
{
"epoch": 0.6232143807783082,
"grad_norm": 3.2978821299433445,
"learning_rate": 3.7689904870083777e-06,
"loss": 0.3679,
"step": 8780
},
{
"epoch": 0.6239241921459372,
"grad_norm": 12.435473632592815,
"learning_rate": 3.76189123952861e-06,
"loss": 0.3677,
"step": 8790
},
{
"epoch": 0.6246340035135662,
"grad_norm": 4.195421567773733,
"learning_rate": 3.754791992048843e-06,
"loss": 0.3492,
"step": 8800
},
{
"epoch": 0.6253438148811953,
"grad_norm": 4.406904963403177,
"learning_rate": 3.7476927445690758e-06,
"loss": 0.3597,
"step": 8810
},
{
"epoch": 0.6260536262488243,
"grad_norm": 4.199730218503971,
"learning_rate": 3.740593497089309e-06,
"loss": 0.3797,
"step": 8820
},
{
"epoch": 0.6267634376164535,
"grad_norm": 3.3446382282646705,
"learning_rate": 3.7334942496095415e-06,
"loss": 0.3638,
"step": 8830
},
{
"epoch": 0.6274732489840825,
"grad_norm": 4.862585068251522,
"learning_rate": 3.7263950021297747e-06,
"loss": 0.3573,
"step": 8840
},
{
"epoch": 0.6281830603517116,
"grad_norm": 8.107090011887513,
"learning_rate": 3.7192957546500073e-06,
"loss": 0.3672,
"step": 8850
},
{
"epoch": 0.6288928717193406,
"grad_norm": 4.3962651782052005,
"learning_rate": 3.7121965071702404e-06,
"loss": 0.3412,
"step": 8860
},
{
"epoch": 0.6296026830869697,
"grad_norm": 4.6424143973536935,
"learning_rate": 3.705097259690473e-06,
"loss": 0.3667,
"step": 8870
},
{
"epoch": 0.6303124944545987,
"grad_norm": 3.840268427443435,
"learning_rate": 3.697998012210706e-06,
"loss": 0.3557,
"step": 8880
},
{
"epoch": 0.6310223058222277,
"grad_norm": 3.6388205049600018,
"learning_rate": 3.6908987647309385e-06,
"loss": 0.3631,
"step": 8890
},
{
"epoch": 0.6317321171898568,
"grad_norm": 5.233530712843461,
"learning_rate": 3.6837995172511716e-06,
"loss": 0.3648,
"step": 8900
},
{
"epoch": 0.6324419285574858,
"grad_norm": 3.781452701492992,
"learning_rate": 3.6767002697714042e-06,
"loss": 0.3788,
"step": 8910
},
{
"epoch": 0.6331517399251149,
"grad_norm": 6.068345043524154,
"learning_rate": 3.6696010222916373e-06,
"loss": 0.3566,
"step": 8920
},
{
"epoch": 0.6338615512927439,
"grad_norm": 5.599734595118006,
"learning_rate": 3.66250177481187e-06,
"loss": 0.349,
"step": 8930
},
{
"epoch": 0.634571362660373,
"grad_norm": 10.428150341049763,
"learning_rate": 3.655402527332103e-06,
"loss": 0.3584,
"step": 8940
},
{
"epoch": 0.6352811740280021,
"grad_norm": 17.681698800577582,
"learning_rate": 3.648303279852336e-06,
"loss": 0.3458,
"step": 8950
},
{
"epoch": 0.6359909853956311,
"grad_norm": 6.591627899287575,
"learning_rate": 3.641204032372569e-06,
"loss": 0.3643,
"step": 8960
},
{
"epoch": 0.6367007967632602,
"grad_norm": 31.04186356298661,
"learning_rate": 3.634104784892802e-06,
"loss": 0.3577,
"step": 8970
},
{
"epoch": 0.6374106081308892,
"grad_norm": 8.824274787999325,
"learning_rate": 3.6270055374130347e-06,
"loss": 0.3618,
"step": 8980
},
{
"epoch": 0.6381204194985183,
"grad_norm": 4.7185603252655826,
"learning_rate": 3.619906289933267e-06,
"loss": 0.3598,
"step": 8990
},
{
"epoch": 0.6388302308661473,
"grad_norm": 5.394376788444082,
"learning_rate": 3.6128070424535e-06,
"loss": 0.362,
"step": 9000
},
{
"epoch": 0.6395400422337764,
"grad_norm": 7.158347387403476,
"learning_rate": 3.6057077949737327e-06,
"loss": 0.3694,
"step": 9010
},
{
"epoch": 0.6402498536014054,
"grad_norm": 8.033101525768098,
"learning_rate": 3.598608547493966e-06,
"loss": 0.3626,
"step": 9020
},
{
"epoch": 0.6409596649690344,
"grad_norm": 2.7105647455701667,
"learning_rate": 3.591509300014199e-06,
"loss": 0.3462,
"step": 9030
},
{
"epoch": 0.6416694763366635,
"grad_norm": 6.3548259889750955,
"learning_rate": 3.5844100525344316e-06,
"loss": 0.3632,
"step": 9040
},
{
"epoch": 0.6423792877042925,
"grad_norm": 7.341190059846113,
"learning_rate": 3.5773108050546647e-06,
"loss": 0.3653,
"step": 9050
},
{
"epoch": 0.6430890990719217,
"grad_norm": 3.8869033025489723,
"learning_rate": 3.5702115575748974e-06,
"loss": 0.3412,
"step": 9060
},
{
"epoch": 0.6437989104395507,
"grad_norm": 4.918908181105817,
"learning_rate": 3.5631123100951305e-06,
"loss": 0.3616,
"step": 9070
},
{
"epoch": 0.6445087218071798,
"grad_norm": 6.124064792410853,
"learning_rate": 3.556013062615363e-06,
"loss": 0.3585,
"step": 9080
},
{
"epoch": 0.6452185331748088,
"grad_norm": 3.6806357015000764,
"learning_rate": 3.5489138151355963e-06,
"loss": 0.3668,
"step": 9090
},
{
"epoch": 0.6459283445424379,
"grad_norm": 5.193254667513745,
"learning_rate": 3.5418145676558285e-06,
"loss": 0.3669,
"step": 9100
},
{
"epoch": 0.6466381559100669,
"grad_norm": 10.978524486328482,
"learning_rate": 3.5347153201760616e-06,
"loss": 0.3597,
"step": 9110
},
{
"epoch": 0.647347967277696,
"grad_norm": 4.6611361687349175,
"learning_rate": 3.5276160726962943e-06,
"loss": 0.3695,
"step": 9120
},
{
"epoch": 0.648057778645325,
"grad_norm": 5.205492428214056,
"learning_rate": 3.5205168252165274e-06,
"loss": 0.3663,
"step": 9130
},
{
"epoch": 0.648767590012954,
"grad_norm": 5.139991204646184,
"learning_rate": 3.51341757773676e-06,
"loss": 0.3551,
"step": 9140
},
{
"epoch": 0.6494774013805831,
"grad_norm": 16.35255401640736,
"learning_rate": 3.506318330256993e-06,
"loss": 0.3553,
"step": 9150
},
{
"epoch": 0.6501872127482121,
"grad_norm": 10.145378264655722,
"learning_rate": 3.499219082777226e-06,
"loss": 0.3583,
"step": 9160
},
{
"epoch": 0.6508970241158412,
"grad_norm": 24.878144093372033,
"learning_rate": 3.492119835297459e-06,
"loss": 0.3555,
"step": 9170
},
{
"epoch": 0.6516068354834703,
"grad_norm": 3.902743241561423,
"learning_rate": 3.4850205878176916e-06,
"loss": 0.3723,
"step": 9180
},
{
"epoch": 0.6523166468510994,
"grad_norm": 4.458085439514939,
"learning_rate": 3.4779213403379247e-06,
"loss": 0.3701,
"step": 9190
},
{
"epoch": 0.6530264582187284,
"grad_norm": 4.717552266761064,
"learning_rate": 3.470822092858157e-06,
"loss": 0.3618,
"step": 9200
},
{
"epoch": 0.6537362695863574,
"grad_norm": 4.427364622798698,
"learning_rate": 3.46372284537839e-06,
"loss": 0.3614,
"step": 9210
},
{
"epoch": 0.6544460809539865,
"grad_norm": 8.323851654330221,
"learning_rate": 3.4566235978986228e-06,
"loss": 0.3678,
"step": 9220
},
{
"epoch": 0.6551558923216155,
"grad_norm": 4.966094347637934,
"learning_rate": 3.449524350418856e-06,
"loss": 0.3688,
"step": 9230
},
{
"epoch": 0.6558657036892446,
"grad_norm": 4.930577227679058,
"learning_rate": 3.4424251029390886e-06,
"loss": 0.3503,
"step": 9240
},
{
"epoch": 0.6565755150568736,
"grad_norm": 5.52399635730182,
"learning_rate": 3.4353258554593217e-06,
"loss": 0.3696,
"step": 9250
},
{
"epoch": 0.6572853264245027,
"grad_norm": 4.590670373221129,
"learning_rate": 3.4282266079795543e-06,
"loss": 0.3685,
"step": 9260
},
{
"epoch": 0.6579951377921317,
"grad_norm": 8.264828163926657,
"learning_rate": 3.4211273604997874e-06,
"loss": 0.3575,
"step": 9270
},
{
"epoch": 0.6587049491597607,
"grad_norm": 8.133262914973033,
"learning_rate": 3.4140281130200205e-06,
"loss": 0.3713,
"step": 9280
},
{
"epoch": 0.6594147605273899,
"grad_norm": 5.742760195932282,
"learning_rate": 3.4069288655402532e-06,
"loss": 0.3725,
"step": 9290
},
{
"epoch": 0.6601245718950189,
"grad_norm": 8.53035579823295,
"learning_rate": 3.3998296180604855e-06,
"loss": 0.3599,
"step": 9300
},
{
"epoch": 0.660834383262648,
"grad_norm": 4.142002947123207,
"learning_rate": 3.3927303705807186e-06,
"loss": 0.3661,
"step": 9310
},
{
"epoch": 0.661544194630277,
"grad_norm": 6.246166093324293,
"learning_rate": 3.3856311231009513e-06,
"loss": 0.351,
"step": 9320
},
{
"epoch": 0.6622540059979061,
"grad_norm": 16.243950855343193,
"learning_rate": 3.3785318756211844e-06,
"loss": 0.3479,
"step": 9330
},
{
"epoch": 0.6629638173655351,
"grad_norm": 6.147144910165458,
"learning_rate": 3.3714326281414175e-06,
"loss": 0.3543,
"step": 9340
},
{
"epoch": 0.6636736287331642,
"grad_norm": 4.099934401177817,
"learning_rate": 3.36433338066165e-06,
"loss": 0.3636,
"step": 9350
},
{
"epoch": 0.6643834401007932,
"grad_norm": 4.17019707869721,
"learning_rate": 3.3572341331818832e-06,
"loss": 0.351,
"step": 9360
},
{
"epoch": 0.6650932514684222,
"grad_norm": 4.102146778496878,
"learning_rate": 3.350134885702116e-06,
"loss": 0.3737,
"step": 9370
},
{
"epoch": 0.6658030628360513,
"grad_norm": 4.155164161456904,
"learning_rate": 3.343035638222349e-06,
"loss": 0.3505,
"step": 9380
},
{
"epoch": 0.6665128742036803,
"grad_norm": 4.042739251178277,
"learning_rate": 3.3359363907425817e-06,
"loss": 0.3578,
"step": 9390
},
{
"epoch": 0.6672226855713094,
"grad_norm": 3.4724621327513057,
"learning_rate": 3.328837143262814e-06,
"loss": 0.3733,
"step": 9400
},
{
"epoch": 0.6679324969389385,
"grad_norm": 3.284294254497063,
"learning_rate": 3.321737895783047e-06,
"loss": 0.361,
"step": 9410
},
{
"epoch": 0.6686423083065676,
"grad_norm": 5.224665667041366,
"learning_rate": 3.31463864830328e-06,
"loss": 0.3597,
"step": 9420
},
{
"epoch": 0.6693521196741966,
"grad_norm": 13.317891191179472,
"learning_rate": 3.307539400823513e-06,
"loss": 0.36,
"step": 9430
},
{
"epoch": 0.6700619310418257,
"grad_norm": 8.338179465785696,
"learning_rate": 3.300440153343746e-06,
"loss": 0.3708,
"step": 9440
},
{
"epoch": 0.6707717424094547,
"grad_norm": 4.022884248031831,
"learning_rate": 3.2933409058639786e-06,
"loss": 0.357,
"step": 9450
},
{
"epoch": 0.6714815537770837,
"grad_norm": 2.816929350582557,
"learning_rate": 3.2862416583842117e-06,
"loss": 0.3618,
"step": 9460
},
{
"epoch": 0.6721913651447128,
"grad_norm": 3.2609706893982278,
"learning_rate": 3.2791424109044444e-06,
"loss": 0.3566,
"step": 9470
},
{
"epoch": 0.6729011765123418,
"grad_norm": 2.0212043627509177,
"learning_rate": 3.2720431634246775e-06,
"loss": 0.3631,
"step": 9480
},
{
"epoch": 0.6736109878799709,
"grad_norm": 3.472359881135022,
"learning_rate": 3.26494391594491e-06,
"loss": 0.3465,
"step": 9490
},
{
"epoch": 0.6743207992475999,
"grad_norm": 2.365708920981696,
"learning_rate": 3.257844668465143e-06,
"loss": 0.36,
"step": 9500
},
{
"epoch": 0.675030610615229,
"grad_norm": 6.47059083775482,
"learning_rate": 3.2507454209853755e-06,
"loss": 0.3589,
"step": 9510
},
{
"epoch": 0.6757404219828581,
"grad_norm": 2.9761715896390872,
"learning_rate": 3.2436461735056086e-06,
"loss": 0.3737,
"step": 9520
},
{
"epoch": 0.6764502333504872,
"grad_norm": 3.2920710102385375,
"learning_rate": 3.2365469260258413e-06,
"loss": 0.3631,
"step": 9530
},
{
"epoch": 0.6771600447181162,
"grad_norm": 2.24517655258034,
"learning_rate": 3.2294476785460744e-06,
"loss": 0.3565,
"step": 9540
},
{
"epoch": 0.6778698560857452,
"grad_norm": 4.585199424065417,
"learning_rate": 3.222348431066307e-06,
"loss": 0.3587,
"step": 9550
},
{
"epoch": 0.6785796674533743,
"grad_norm": 2.616245813772314,
"learning_rate": 3.21524918358654e-06,
"loss": 0.3641,
"step": 9560
},
{
"epoch": 0.6792894788210033,
"grad_norm": 6.790868775160296,
"learning_rate": 3.208149936106773e-06,
"loss": 0.3542,
"step": 9570
},
{
"epoch": 0.6799992901886324,
"grad_norm": 4.6720875235574955,
"learning_rate": 3.201050688627006e-06,
"loss": 0.3724,
"step": 9580
},
{
"epoch": 0.6807091015562614,
"grad_norm": 2.929891653919803,
"learning_rate": 3.193951441147239e-06,
"loss": 0.355,
"step": 9590
},
{
"epoch": 0.6814189129238905,
"grad_norm": 2.5935885874594935,
"learning_rate": 3.1868521936674717e-06,
"loss": 0.3477,
"step": 9600
},
{
"epoch": 0.6821287242915195,
"grad_norm": 4.16743323358689,
"learning_rate": 3.179752946187704e-06,
"loss": 0.3732,
"step": 9610
},
{
"epoch": 0.6828385356591485,
"grad_norm": 3.119963047712144,
"learning_rate": 3.172653698707937e-06,
"loss": 0.3583,
"step": 9620
},
{
"epoch": 0.6835483470267776,
"grad_norm": 4.025619816942283,
"learning_rate": 3.1655544512281698e-06,
"loss": 0.3814,
"step": 9630
},
{
"epoch": 0.6842581583944067,
"grad_norm": 10.60216606667068,
"learning_rate": 3.158455203748403e-06,
"loss": 0.3599,
"step": 9640
},
{
"epoch": 0.6849679697620358,
"grad_norm": 4.461108822226996,
"learning_rate": 3.1513559562686356e-06,
"loss": 0.3619,
"step": 9650
},
{
"epoch": 0.6856777811296648,
"grad_norm": 2.7381838956818596,
"learning_rate": 3.1442567087888687e-06,
"loss": 0.361,
"step": 9660
},
{
"epoch": 0.6863875924972939,
"grad_norm": 3.3932603213636536,
"learning_rate": 3.1371574613091018e-06,
"loss": 0.3722,
"step": 9670
},
{
"epoch": 0.6870974038649229,
"grad_norm": 3.0238463961256556,
"learning_rate": 3.1300582138293344e-06,
"loss": 0.3677,
"step": 9680
},
{
"epoch": 0.687807215232552,
"grad_norm": 2.9020326019536236,
"learning_rate": 3.1229589663495675e-06,
"loss": 0.3587,
"step": 9690
},
{
"epoch": 0.688517026600181,
"grad_norm": 3.4182793620767313,
"learning_rate": 3.1158597188698002e-06,
"loss": 0.3958,
"step": 9700
},
{
"epoch": 0.68922683796781,
"grad_norm": 2.7346693208831123,
"learning_rate": 3.1087604713900325e-06,
"loss": 0.3746,
"step": 9710
},
{
"epoch": 0.6899366493354391,
"grad_norm": 2.7001110030197184,
"learning_rate": 3.1016612239102656e-06,
"loss": 0.3596,
"step": 9720
},
{
"epoch": 0.6906464607030681,
"grad_norm": 3.8786526590857706,
"learning_rate": 3.0945619764304987e-06,
"loss": 0.3677,
"step": 9730
},
{
"epoch": 0.6913562720706972,
"grad_norm": 3.601819125137747,
"learning_rate": 3.0874627289507314e-06,
"loss": 0.3599,
"step": 9740
},
{
"epoch": 0.6920660834383263,
"grad_norm": 4.257577712986774,
"learning_rate": 3.0803634814709645e-06,
"loss": 0.3653,
"step": 9750
},
{
"epoch": 0.6927758948059554,
"grad_norm": 16.2562479732823,
"learning_rate": 3.073264233991197e-06,
"loss": 0.3786,
"step": 9760
},
{
"epoch": 0.6934857061735844,
"grad_norm": 2.8308341290836037,
"learning_rate": 3.0661649865114302e-06,
"loss": 0.347,
"step": 9770
},
{
"epoch": 0.6941955175412134,
"grad_norm": 2.386467475595729,
"learning_rate": 3.059065739031663e-06,
"loss": 0.3785,
"step": 9780
},
{
"epoch": 0.6949053289088425,
"grad_norm": 3.11594441686047,
"learning_rate": 3.051966491551896e-06,
"loss": 0.3613,
"step": 9790
},
{
"epoch": 0.6956151402764715,
"grad_norm": 3.4457140851193677,
"learning_rate": 3.0448672440721287e-06,
"loss": 0.3592,
"step": 9800
},
{
"epoch": 0.6963249516441006,
"grad_norm": 6.7733834909511135,
"learning_rate": 3.0377679965923614e-06,
"loss": 0.3503,
"step": 9810
},
{
"epoch": 0.6970347630117296,
"grad_norm": 2.552293405448118,
"learning_rate": 3.030668749112594e-06,
"loss": 0.3565,
"step": 9820
},
{
"epoch": 0.6977445743793587,
"grad_norm": 7.3573968999972985,
"learning_rate": 3.023569501632827e-06,
"loss": 0.3534,
"step": 9830
},
{
"epoch": 0.6984543857469877,
"grad_norm": 2.2835556419626286,
"learning_rate": 3.01647025415306e-06,
"loss": 0.3627,
"step": 9840
},
{
"epoch": 0.6991641971146167,
"grad_norm": 4.158935806681915,
"learning_rate": 3.009371006673293e-06,
"loss": 0.3676,
"step": 9850
},
{
"epoch": 0.6998740084822458,
"grad_norm": 3.444386024390724,
"learning_rate": 3.0022717591935256e-06,
"loss": 0.3498,
"step": 9860
},
{
"epoch": 0.700583819849875,
"grad_norm": 76.68033690471103,
"learning_rate": 2.9951725117137587e-06,
"loss": 0.3465,
"step": 9870
},
{
"epoch": 0.701293631217504,
"grad_norm": 2.753848553217651,
"learning_rate": 2.9880732642339914e-06,
"loss": 0.3579,
"step": 9880
},
{
"epoch": 0.702003442585133,
"grad_norm": 6.8770901385155465,
"learning_rate": 2.9809740167542245e-06,
"loss": 0.3644,
"step": 9890
},
{
"epoch": 0.7027132539527621,
"grad_norm": 8.050770443325867,
"learning_rate": 2.9738747692744576e-06,
"loss": 0.3534,
"step": 9900
},
{
"epoch": 0.7034230653203911,
"grad_norm": 6.2381173840397794,
"learning_rate": 2.96677552179469e-06,
"loss": 0.3799,
"step": 9910
},
{
"epoch": 0.7041328766880202,
"grad_norm": 2.527197221067041,
"learning_rate": 2.9596762743149225e-06,
"loss": 0.3702,
"step": 9920
},
{
"epoch": 0.7048426880556492,
"grad_norm": 3.365675129758323,
"learning_rate": 2.9525770268351556e-06,
"loss": 0.3618,
"step": 9930
},
{
"epoch": 0.7055524994232782,
"grad_norm": 3.7307831294643323,
"learning_rate": 2.9454777793553883e-06,
"loss": 0.3552,
"step": 9940
},
{
"epoch": 0.7062623107909073,
"grad_norm": 10.13055799757591,
"learning_rate": 2.9383785318756214e-06,
"loss": 0.369,
"step": 9950
},
{
"epoch": 0.7069721221585363,
"grad_norm": 3.79159989826404,
"learning_rate": 2.931279284395854e-06,
"loss": 0.3393,
"step": 9960
},
{
"epoch": 0.7076819335261654,
"grad_norm": 11.361319554472407,
"learning_rate": 2.924180036916087e-06,
"loss": 0.3726,
"step": 9970
},
{
"epoch": 0.7083917448937945,
"grad_norm": 2.2727709813242,
"learning_rate": 2.9170807894363203e-06,
"loss": 0.3558,
"step": 9980
},
{
"epoch": 0.7091015562614236,
"grad_norm": 13.54783288221351,
"learning_rate": 2.909981541956553e-06,
"loss": 0.3522,
"step": 9990
},
{
"epoch": 0.7098113676290526,
"grad_norm": 3.4738198913190037,
"learning_rate": 2.902882294476786e-06,
"loss": 0.3636,
"step": 10000
},
{
"epoch": 0.7105211789966817,
"grad_norm": 2.599196507580769,
"learning_rate": 2.8957830469970183e-06,
"loss": 0.373,
"step": 10010
},
{
"epoch": 0.7112309903643107,
"grad_norm": 4.846340487255633,
"learning_rate": 2.888683799517251e-06,
"loss": 0.364,
"step": 10020
},
{
"epoch": 0.7119408017319397,
"grad_norm": 4.14481835106229,
"learning_rate": 2.881584552037484e-06,
"loss": 0.3565,
"step": 10030
},
{
"epoch": 0.7126506130995688,
"grad_norm": 3.12959687042078,
"learning_rate": 2.8744853045577172e-06,
"loss": 0.3597,
"step": 10040
},
{
"epoch": 0.7133604244671978,
"grad_norm": 2.0499607045489157,
"learning_rate": 2.86738605707795e-06,
"loss": 0.3665,
"step": 10050
},
{
"epoch": 0.7140702358348269,
"grad_norm": 3.4345739303394964,
"learning_rate": 2.860286809598183e-06,
"loss": 0.3406,
"step": 10060
},
{
"epoch": 0.7147800472024559,
"grad_norm": 3.2507549549593677,
"learning_rate": 2.8531875621184157e-06,
"loss": 0.3691,
"step": 10070
},
{
"epoch": 0.715489858570085,
"grad_norm": 3.088999571380729,
"learning_rate": 2.8460883146386488e-06,
"loss": 0.3512,
"step": 10080
},
{
"epoch": 0.716199669937714,
"grad_norm": 3.992697102415428,
"learning_rate": 2.8389890671588815e-06,
"loss": 0.3584,
"step": 10090
},
{
"epoch": 0.7169094813053432,
"grad_norm": 8.327520697203159,
"learning_rate": 2.8318898196791146e-06,
"loss": 0.3604,
"step": 10100
},
{
"epoch": 0.7176192926729722,
"grad_norm": 4.600972082353797,
"learning_rate": 2.824790572199347e-06,
"loss": 0.3641,
"step": 10110
},
{
"epoch": 0.7183291040406012,
"grad_norm": 3.6403983429872384,
"learning_rate": 2.81769132471958e-06,
"loss": 0.3496,
"step": 10120
},
{
"epoch": 0.7190389154082303,
"grad_norm": 2.831902492470625,
"learning_rate": 2.8105920772398126e-06,
"loss": 0.3611,
"step": 10130
},
{
"epoch": 0.7197487267758593,
"grad_norm": 4.428260390842955,
"learning_rate": 2.8034928297600457e-06,
"loss": 0.3572,
"step": 10140
},
{
"epoch": 0.7204585381434884,
"grad_norm": 5.5528766539260825,
"learning_rate": 2.7963935822802784e-06,
"loss": 0.3605,
"step": 10150
},
{
"epoch": 0.7211683495111174,
"grad_norm": 3.3271150324051124,
"learning_rate": 2.7892943348005115e-06,
"loss": 0.3646,
"step": 10160
},
{
"epoch": 0.7218781608787465,
"grad_norm": 4.353636452465487,
"learning_rate": 2.782195087320744e-06,
"loss": 0.3745,
"step": 10170
},
{
"epoch": 0.7225879722463755,
"grad_norm": 4.938483709090633,
"learning_rate": 2.7750958398409773e-06,
"loss": 0.3586,
"step": 10180
},
{
"epoch": 0.7232977836140045,
"grad_norm": 4.667393928494558,
"learning_rate": 2.76799659236121e-06,
"loss": 0.3526,
"step": 10190
},
{
"epoch": 0.7240075949816336,
"grad_norm": 5.312814121573459,
"learning_rate": 2.760897344881443e-06,
"loss": 0.3539,
"step": 10200
},
{
"epoch": 0.7247174063492627,
"grad_norm": 3.102848391211554,
"learning_rate": 2.7537980974016757e-06,
"loss": 0.3453,
"step": 10210
},
{
"epoch": 0.7254272177168918,
"grad_norm": 3.036840145081599,
"learning_rate": 2.7466988499219084e-06,
"loss": 0.3627,
"step": 10220
},
{
"epoch": 0.7261370290845208,
"grad_norm": 5.647990352632265,
"learning_rate": 2.739599602442141e-06,
"loss": 0.3555,
"step": 10230
},
{
"epoch": 0.7268468404521499,
"grad_norm": 4.66342024342857,
"learning_rate": 2.732500354962374e-06,
"loss": 0.3722,
"step": 10240
},
{
"epoch": 0.7275566518197789,
"grad_norm": 3.168307885423117,
"learning_rate": 2.725401107482607e-06,
"loss": 0.3673,
"step": 10250
},
{
"epoch": 0.728266463187408,
"grad_norm": 4.968172759395676,
"learning_rate": 2.71830186000284e-06,
"loss": 0.3556,
"step": 10260
},
{
"epoch": 0.728976274555037,
"grad_norm": 3.5154935991341123,
"learning_rate": 2.7112026125230726e-06,
"loss": 0.3593,
"step": 10270
},
{
"epoch": 0.729686085922666,
"grad_norm": 5.0083468168620655,
"learning_rate": 2.7041033650433057e-06,
"loss": 0.3592,
"step": 10280
},
{
"epoch": 0.7303958972902951,
"grad_norm": 3.379094612224907,
"learning_rate": 2.697004117563539e-06,
"loss": 0.3643,
"step": 10290
},
{
"epoch": 0.7311057086579241,
"grad_norm": 4.180270451928424,
"learning_rate": 2.6899048700837715e-06,
"loss": 0.3574,
"step": 10300
},
{
"epoch": 0.7318155200255532,
"grad_norm": 4.640198570927561,
"learning_rate": 2.6828056226040046e-06,
"loss": 0.3578,
"step": 10310
},
{
"epoch": 0.7325253313931822,
"grad_norm": 10.365125402351024,
"learning_rate": 2.675706375124237e-06,
"loss": 0.3614,
"step": 10320
},
{
"epoch": 0.7332351427608114,
"grad_norm": 15.355341780635097,
"learning_rate": 2.6686071276444695e-06,
"loss": 0.3631,
"step": 10330
},
{
"epoch": 0.7339449541284404,
"grad_norm": 6.738981517513828,
"learning_rate": 2.6615078801647026e-06,
"loss": 0.3493,
"step": 10340
},
{
"epoch": 0.7346547654960695,
"grad_norm": 7.55570609393924,
"learning_rate": 2.6544086326849357e-06,
"loss": 0.371,
"step": 10350
},
{
"epoch": 0.7353645768636985,
"grad_norm": 2.6482961979611526,
"learning_rate": 2.6473093852051684e-06,
"loss": 0.3591,
"step": 10360
},
{
"epoch": 0.7360743882313275,
"grad_norm": 8.054548870993123,
"learning_rate": 2.6402101377254015e-06,
"loss": 0.3577,
"step": 10370
},
{
"epoch": 0.7367841995989566,
"grad_norm": 7.370207938746124,
"learning_rate": 2.633110890245634e-06,
"loss": 0.3509,
"step": 10380
},
{
"epoch": 0.7374940109665856,
"grad_norm": 8.915363239178143,
"learning_rate": 2.6260116427658673e-06,
"loss": 0.3595,
"step": 10390
},
{
"epoch": 0.7382038223342147,
"grad_norm": 6.453539668987391,
"learning_rate": 2.6189123952861e-06,
"loss": 0.3735,
"step": 10400
},
{
"epoch": 0.7389136337018437,
"grad_norm": 13.429374820990935,
"learning_rate": 2.611813147806333e-06,
"loss": 0.343,
"step": 10410
},
{
"epoch": 0.7396234450694728,
"grad_norm": 4.019465503184252,
"learning_rate": 2.6047139003265653e-06,
"loss": 0.3619,
"step": 10420
},
{
"epoch": 0.7403332564371018,
"grad_norm": 4.77728942914678,
"learning_rate": 2.5976146528467984e-06,
"loss": 0.3602,
"step": 10430
},
{
"epoch": 0.7410430678047308,
"grad_norm": 16.82021280745509,
"learning_rate": 2.590515405367031e-06,
"loss": 0.3765,
"step": 10440
},
{
"epoch": 0.74175287917236,
"grad_norm": 4.7659520678895735,
"learning_rate": 2.5834161578872642e-06,
"loss": 0.3557,
"step": 10450
},
{
"epoch": 0.742462690539989,
"grad_norm": 5.846901706253607,
"learning_rate": 2.576316910407497e-06,
"loss": 0.3574,
"step": 10460
},
{
"epoch": 0.7431725019076181,
"grad_norm": 5.00717365628058,
"learning_rate": 2.56921766292773e-06,
"loss": 0.371,
"step": 10470
},
{
"epoch": 0.7438823132752471,
"grad_norm": 12.812616706907704,
"learning_rate": 2.5621184154479627e-06,
"loss": 0.3612,
"step": 10480
},
{
"epoch": 0.7445921246428762,
"grad_norm": 2.7312101929568375,
"learning_rate": 2.5550191679681958e-06,
"loss": 0.3551,
"step": 10490
},
{
"epoch": 0.7453019360105052,
"grad_norm": 3.0759041075210782,
"learning_rate": 2.5479199204884285e-06,
"loss": 0.3574,
"step": 10500
},
{
"epoch": 0.7460117473781342,
"grad_norm": 7.165278043719281,
"learning_rate": 2.5408206730086616e-06,
"loss": 0.3605,
"step": 10510
},
{
"epoch": 0.7467215587457633,
"grad_norm": 4.908665990783306,
"learning_rate": 2.533721425528894e-06,
"loss": 0.3479,
"step": 10520
},
{
"epoch": 0.7474313701133923,
"grad_norm": 3.4583261557450227,
"learning_rate": 2.526622178049127e-06,
"loss": 0.3542,
"step": 10530
},
{
"epoch": 0.7481411814810214,
"grad_norm": 11.387458565670322,
"learning_rate": 2.5195229305693596e-06,
"loss": 0.3619,
"step": 10540
},
{
"epoch": 0.7488509928486504,
"grad_norm": 10.198798372329442,
"learning_rate": 2.5124236830895927e-06,
"loss": 0.3434,
"step": 10550
},
{
"epoch": 0.7495608042162796,
"grad_norm": 3.893599380410888,
"learning_rate": 2.5053244356098254e-06,
"loss": 0.362,
"step": 10560
},
{
"epoch": 0.7502706155839086,
"grad_norm": 5.107597028464082,
"learning_rate": 2.4982251881300585e-06,
"loss": 0.3688,
"step": 10570
},
{
"epoch": 0.7509804269515377,
"grad_norm": 4.219068583835792,
"learning_rate": 2.491125940650291e-06,
"loss": 0.3649,
"step": 10580
},
{
"epoch": 0.7516902383191667,
"grad_norm": 4.535592066198855,
"learning_rate": 2.4840266931705243e-06,
"loss": 0.37,
"step": 10590
},
{
"epoch": 0.7524000496867957,
"grad_norm": 3.541264339618074,
"learning_rate": 2.476927445690757e-06,
"loss": 0.3679,
"step": 10600
},
{
"epoch": 0.7531098610544248,
"grad_norm": 4.7884449114332845,
"learning_rate": 2.4698281982109896e-06,
"loss": 0.3472,
"step": 10610
},
{
"epoch": 0.7538196724220538,
"grad_norm": 8.667808097909838,
"learning_rate": 2.4627289507312227e-06,
"loss": 0.3704,
"step": 10620
},
{
"epoch": 0.7545294837896829,
"grad_norm": 4.925434074834849,
"learning_rate": 2.455629703251456e-06,
"loss": 0.3701,
"step": 10630
},
{
"epoch": 0.7552392951573119,
"grad_norm": 3.8594886335750807,
"learning_rate": 2.4485304557716885e-06,
"loss": 0.3662,
"step": 10640
},
{
"epoch": 0.755949106524941,
"grad_norm": 4.971536391123703,
"learning_rate": 2.441431208291921e-06,
"loss": 0.35,
"step": 10650
},
{
"epoch": 0.75665891789257,
"grad_norm": 15.055144352578429,
"learning_rate": 2.434331960812154e-06,
"loss": 0.3584,
"step": 10660
},
{
"epoch": 0.757368729260199,
"grad_norm": 14.432076661811932,
"learning_rate": 2.427232713332387e-06,
"loss": 0.3621,
"step": 10670
},
{
"epoch": 0.7580785406278282,
"grad_norm": 9.810669772230819,
"learning_rate": 2.42013346585262e-06,
"loss": 0.3588,
"step": 10680
},
{
"epoch": 0.7587883519954572,
"grad_norm": 5.765479927608821,
"learning_rate": 2.4130342183728527e-06,
"loss": 0.3549,
"step": 10690
},
{
"epoch": 0.7594981633630863,
"grad_norm": 13.617197754978974,
"learning_rate": 2.4059349708930854e-06,
"loss": 0.3759,
"step": 10700
},
{
"epoch": 0.7602079747307153,
"grad_norm": 5.614482278416453,
"learning_rate": 2.3988357234133185e-06,
"loss": 0.3376,
"step": 10710
},
{
"epoch": 0.7609177860983444,
"grad_norm": 17.701642596831444,
"learning_rate": 2.391736475933551e-06,
"loss": 0.3647,
"step": 10720
},
{
"epoch": 0.7616275974659734,
"grad_norm": 4.910333781437824,
"learning_rate": 2.3846372284537843e-06,
"loss": 0.3643,
"step": 10730
},
{
"epoch": 0.7623374088336025,
"grad_norm": 3.415309685272355,
"learning_rate": 2.377537980974017e-06,
"loss": 0.3488,
"step": 10740
},
{
"epoch": 0.7630472202012315,
"grad_norm": 4.350903829153794,
"learning_rate": 2.3704387334942497e-06,
"loss": 0.3577,
"step": 10750
},
{
"epoch": 0.7637570315688605,
"grad_norm": 3.9361079752185435,
"learning_rate": 2.3633394860144828e-06,
"loss": 0.3591,
"step": 10760
},
{
"epoch": 0.7644668429364896,
"grad_norm": 5.913083445040196,
"learning_rate": 2.3562402385347154e-06,
"loss": 0.3486,
"step": 10770
},
{
"epoch": 0.7651766543041186,
"grad_norm": 5.982161931863015,
"learning_rate": 2.3491409910549485e-06,
"loss": 0.3714,
"step": 10780
},
{
"epoch": 0.7658864656717478,
"grad_norm": 4.5231254195655906,
"learning_rate": 2.3420417435751812e-06,
"loss": 0.3534,
"step": 10790
},
{
"epoch": 0.7665962770393768,
"grad_norm": 5.099871081954513,
"learning_rate": 2.334942496095414e-06,
"loss": 0.3509,
"step": 10800
},
{
"epoch": 0.7673060884070059,
"grad_norm": 3.361247181502804,
"learning_rate": 2.327843248615647e-06,
"loss": 0.3692,
"step": 10810
},
{
"epoch": 0.7680158997746349,
"grad_norm": 6.553423618292367,
"learning_rate": 2.3207440011358797e-06,
"loss": 0.353,
"step": 10820
},
{
"epoch": 0.768725711142264,
"grad_norm": 2.985537513367268,
"learning_rate": 2.3136447536561128e-06,
"loss": 0.3498,
"step": 10830
},
{
"epoch": 0.769435522509893,
"grad_norm": 3.0266471519507427,
"learning_rate": 2.3065455061763455e-06,
"loss": 0.3563,
"step": 10840
},
{
"epoch": 0.770145333877522,
"grad_norm": 17.644165005698888,
"learning_rate": 2.299446258696578e-06,
"loss": 0.3662,
"step": 10850
},
{
"epoch": 0.7708551452451511,
"grad_norm": 3.1894412768611016,
"learning_rate": 2.2923470112168112e-06,
"loss": 0.3503,
"step": 10860
},
{
"epoch": 0.7715649566127801,
"grad_norm": 4.492544324422795,
"learning_rate": 2.285247763737044e-06,
"loss": 0.3436,
"step": 10870
},
{
"epoch": 0.7722747679804092,
"grad_norm": 4.173829674998731,
"learning_rate": 2.278148516257277e-06,
"loss": 0.363,
"step": 10880
},
{
"epoch": 0.7729845793480382,
"grad_norm": 3.114718418646357,
"learning_rate": 2.2710492687775097e-06,
"loss": 0.3368,
"step": 10890
},
{
"epoch": 0.7736943907156673,
"grad_norm": 2.6323429503484443,
"learning_rate": 2.2639500212977424e-06,
"loss": 0.3489,
"step": 10900
},
{
"epoch": 0.7744042020832964,
"grad_norm": 2.8865277064459223,
"learning_rate": 2.2568507738179755e-06,
"loss": 0.3571,
"step": 10910
},
{
"epoch": 0.7751140134509255,
"grad_norm": 8.888602826244627,
"learning_rate": 2.249751526338208e-06,
"loss": 0.3399,
"step": 10920
},
{
"epoch": 0.7758238248185545,
"grad_norm": 3.532724353902858,
"learning_rate": 2.2426522788584412e-06,
"loss": 0.3493,
"step": 10930
},
{
"epoch": 0.7765336361861835,
"grad_norm": 3.6781547439101883,
"learning_rate": 2.235553031378674e-06,
"loss": 0.3462,
"step": 10940
},
{
"epoch": 0.7772434475538126,
"grad_norm": 13.16004359433701,
"learning_rate": 2.2284537838989066e-06,
"loss": 0.3649,
"step": 10950
},
{
"epoch": 0.7779532589214416,
"grad_norm": 9.642968589987298,
"learning_rate": 2.2213545364191397e-06,
"loss": 0.3582,
"step": 10960
},
{
"epoch": 0.7786630702890707,
"grad_norm": 6.16050392324128,
"learning_rate": 2.2142552889393724e-06,
"loss": 0.3624,
"step": 10970
},
{
"epoch": 0.7793728816566997,
"grad_norm": 4.012346442724565,
"learning_rate": 2.2071560414596055e-06,
"loss": 0.3448,
"step": 10980
},
{
"epoch": 0.7800826930243288,
"grad_norm": 2.6066193255622956,
"learning_rate": 2.2000567939798386e-06,
"loss": 0.3644,
"step": 10990
},
{
"epoch": 0.7807925043919578,
"grad_norm": 7.331639609512875,
"learning_rate": 2.1929575465000713e-06,
"loss": 0.3515,
"step": 11000
},
{
"epoch": 0.7815023157595868,
"grad_norm": 2.990816174000455,
"learning_rate": 2.185858299020304e-06,
"loss": 0.3505,
"step": 11010
},
{
"epoch": 0.782212127127216,
"grad_norm": 3.6112792490950554,
"learning_rate": 2.178759051540537e-06,
"loss": 0.3548,
"step": 11020
},
{
"epoch": 0.782921938494845,
"grad_norm": 3.8221043132066286,
"learning_rate": 2.1716598040607697e-06,
"loss": 0.3571,
"step": 11030
},
{
"epoch": 0.7836317498624741,
"grad_norm": 7.476265982563856,
"learning_rate": 2.164560556581003e-06,
"loss": 0.3428,
"step": 11040
},
{
"epoch": 0.7843415612301031,
"grad_norm": 5.554911455235443,
"learning_rate": 2.1574613091012355e-06,
"loss": 0.354,
"step": 11050
},
{
"epoch": 0.7850513725977322,
"grad_norm": 2.9298081851011117,
"learning_rate": 2.150362061621468e-06,
"loss": 0.3597,
"step": 11060
},
{
"epoch": 0.7857611839653612,
"grad_norm": 5.325097733237352,
"learning_rate": 2.1432628141417013e-06,
"loss": 0.3486,
"step": 11070
},
{
"epoch": 0.7864709953329903,
"grad_norm": 3.5814394523109114,
"learning_rate": 2.136163566661934e-06,
"loss": 0.3544,
"step": 11080
},
{
"epoch": 0.7871808067006193,
"grad_norm": 3.6972554376986,
"learning_rate": 2.129064319182167e-06,
"loss": 0.3546,
"step": 11090
},
{
"epoch": 0.7878906180682483,
"grad_norm": 6.754098899246775,
"learning_rate": 2.1219650717023997e-06,
"loss": 0.3537,
"step": 11100
},
{
"epoch": 0.7886004294358774,
"grad_norm": 3.3122898855719876,
"learning_rate": 2.1148658242226324e-06,
"loss": 0.3645,
"step": 11110
},
{
"epoch": 0.7893102408035064,
"grad_norm": 2.8223728276754128,
"learning_rate": 2.1077665767428655e-06,
"loss": 0.3599,
"step": 11120
},
{
"epoch": 0.7900200521711355,
"grad_norm": 2.5012481292133937,
"learning_rate": 2.100667329263098e-06,
"loss": 0.3486,
"step": 11130
},
{
"epoch": 0.7907298635387646,
"grad_norm": 11.033197138630223,
"learning_rate": 2.0935680817833313e-06,
"loss": 0.3467,
"step": 11140
},
{
"epoch": 0.7914396749063937,
"grad_norm": 3.730389968284293,
"learning_rate": 2.086468834303564e-06,
"loss": 0.3544,
"step": 11150
},
{
"epoch": 0.7921494862740227,
"grad_norm": 5.898064410181565,
"learning_rate": 2.0793695868237967e-06,
"loss": 0.3477,
"step": 11160
},
{
"epoch": 0.7928592976416518,
"grad_norm": 4.55198088261442,
"learning_rate": 2.0722703393440298e-06,
"loss": 0.3527,
"step": 11170
},
{
"epoch": 0.7935691090092808,
"grad_norm": 5.318762071563834,
"learning_rate": 2.0651710918642624e-06,
"loss": 0.3478,
"step": 11180
},
{
"epoch": 0.7942789203769098,
"grad_norm": 6.161214607463883,
"learning_rate": 2.0580718443844955e-06,
"loss": 0.3546,
"step": 11190
},
{
"epoch": 0.7949887317445389,
"grad_norm": 3.1236830623318537,
"learning_rate": 2.0509725969047282e-06,
"loss": 0.3565,
"step": 11200
},
{
"epoch": 0.7956985431121679,
"grad_norm": 4.197839999078878,
"learning_rate": 2.043873349424961e-06,
"loss": 0.3496,
"step": 11210
},
{
"epoch": 0.796408354479797,
"grad_norm": 3.2762330861667515,
"learning_rate": 2.036774101945194e-06,
"loss": 0.348,
"step": 11220
},
{
"epoch": 0.797118165847426,
"grad_norm": 5.961140258537488,
"learning_rate": 2.0296748544654267e-06,
"loss": 0.3637,
"step": 11230
},
{
"epoch": 0.797827977215055,
"grad_norm": 2.0964322412177263,
"learning_rate": 2.0225756069856598e-06,
"loss": 0.341,
"step": 11240
},
{
"epoch": 0.7985377885826842,
"grad_norm": 11.078753928620895,
"learning_rate": 2.0154763595058925e-06,
"loss": 0.3582,
"step": 11250
},
{
"epoch": 0.7992475999503132,
"grad_norm": 11.615859636107096,
"learning_rate": 2.008377112026125e-06,
"loss": 0.3504,
"step": 11260
},
{
"epoch": 0.7999574113179423,
"grad_norm": 9.267486623233392,
"learning_rate": 2.0012778645463582e-06,
"loss": 0.3585,
"step": 11270
},
{
"epoch": 0.8006672226855713,
"grad_norm": 3.7638868565818613,
"learning_rate": 1.994178617066591e-06,
"loss": 0.3572,
"step": 11280
},
{
"epoch": 0.8013770340532004,
"grad_norm": 4.274096264509613,
"learning_rate": 1.987079369586824e-06,
"loss": 0.352,
"step": 11290
},
{
"epoch": 0.8020868454208294,
"grad_norm": 3.0651382288741824,
"learning_rate": 1.979980122107057e-06,
"loss": 0.3487,
"step": 11300
},
{
"epoch": 0.8027966567884585,
"grad_norm": 2.585139354778811,
"learning_rate": 1.9728808746272894e-06,
"loss": 0.3509,
"step": 11310
},
{
"epoch": 0.8035064681560875,
"grad_norm": 3.4507245702670013,
"learning_rate": 1.9657816271475225e-06,
"loss": 0.3605,
"step": 11320
},
{
"epoch": 0.8042162795237165,
"grad_norm": 2.168473869134373,
"learning_rate": 1.9586823796677556e-06,
"loss": 0.3473,
"step": 11330
},
{
"epoch": 0.8049260908913456,
"grad_norm": 3.3138804394827126,
"learning_rate": 1.9515831321879883e-06,
"loss": 0.3451,
"step": 11340
},
{
"epoch": 0.8056359022589746,
"grad_norm": 2.9967871033094284,
"learning_rate": 1.9444838847082214e-06,
"loss": 0.3586,
"step": 11350
},
{
"epoch": 0.8063457136266037,
"grad_norm": 2.218098420224771,
"learning_rate": 1.9373846372284536e-06,
"loss": 0.3629,
"step": 11360
},
{
"epoch": 0.8070555249942328,
"grad_norm": 4.124703498173868,
"learning_rate": 1.9302853897486867e-06,
"loss": 0.349,
"step": 11370
},
{
"epoch": 0.8077653363618619,
"grad_norm": 4.336301638014139,
"learning_rate": 1.92318614226892e-06,
"loss": 0.3474,
"step": 11380
},
{
"epoch": 0.8084751477294909,
"grad_norm": 5.67446885361532,
"learning_rate": 1.9160868947891525e-06,
"loss": 0.3577,
"step": 11390
},
{
"epoch": 0.80918495909712,
"grad_norm": 5.496735292829206,
"learning_rate": 1.9089876473093856e-06,
"loss": 0.3606,
"step": 11400
},
{
"epoch": 0.809894770464749,
"grad_norm": 2.3181036706188505,
"learning_rate": 1.901888399829618e-06,
"loss": 0.3573,
"step": 11410
},
{
"epoch": 0.810604581832378,
"grad_norm": 4.2823563842257695,
"learning_rate": 1.894789152349851e-06,
"loss": 0.3456,
"step": 11420
},
{
"epoch": 0.8113143932000071,
"grad_norm": 9.041186743139388,
"learning_rate": 1.8876899048700838e-06,
"loss": 0.3493,
"step": 11430
},
{
"epoch": 0.8120242045676361,
"grad_norm": 2.135565041402105,
"learning_rate": 1.8805906573903167e-06,
"loss": 0.3573,
"step": 11440
},
{
"epoch": 0.8127340159352652,
"grad_norm": 4.2654812969837295,
"learning_rate": 1.8734914099105498e-06,
"loss": 0.3462,
"step": 11450
},
{
"epoch": 0.8134438273028942,
"grad_norm": 3.0226693302416465,
"learning_rate": 1.8663921624307823e-06,
"loss": 0.3399,
"step": 11460
},
{
"epoch": 0.8141536386705233,
"grad_norm": 5.674429424631266,
"learning_rate": 1.8592929149510152e-06,
"loss": 0.3445,
"step": 11470
},
{
"epoch": 0.8148634500381524,
"grad_norm": 5.107735874370569,
"learning_rate": 1.852193667471248e-06,
"loss": 0.3498,
"step": 11480
},
{
"epoch": 0.8155732614057815,
"grad_norm": 4.211595369240753,
"learning_rate": 1.8450944199914812e-06,
"loss": 0.3509,
"step": 11490
},
{
"epoch": 0.8162830727734105,
"grad_norm": 3.2874196387814485,
"learning_rate": 1.837995172511714e-06,
"loss": 0.352,
"step": 11500
},
{
"epoch": 0.8169928841410395,
"grad_norm": 2.51051446421893,
"learning_rate": 1.8308959250319465e-06,
"loss": 0.3445,
"step": 11510
},
{
"epoch": 0.8177026955086686,
"grad_norm": 13.267874952448258,
"learning_rate": 1.8237966775521796e-06,
"loss": 0.354,
"step": 11520
},
{
"epoch": 0.8184125068762976,
"grad_norm": 4.900767095828628,
"learning_rate": 1.8166974300724125e-06,
"loss": 0.3594,
"step": 11530
},
{
"epoch": 0.8191223182439267,
"grad_norm": 8.3230418317363,
"learning_rate": 1.8095981825926454e-06,
"loss": 0.3471,
"step": 11540
},
{
"epoch": 0.8198321296115557,
"grad_norm": 2.8346340256917815,
"learning_rate": 1.8024989351128783e-06,
"loss": 0.3695,
"step": 11550
},
{
"epoch": 0.8205419409791848,
"grad_norm": 5.533189262204602,
"learning_rate": 1.795399687633111e-06,
"loss": 0.3728,
"step": 11560
},
{
"epoch": 0.8212517523468138,
"grad_norm": 3.187071233846852,
"learning_rate": 1.7883004401533439e-06,
"loss": 0.3464,
"step": 11570
},
{
"epoch": 0.8219615637144428,
"grad_norm": 3.9314257894883937,
"learning_rate": 1.7812011926735768e-06,
"loss": 0.3532,
"step": 11580
},
{
"epoch": 0.8226713750820719,
"grad_norm": 3.6730541227348277,
"learning_rate": 1.7741019451938097e-06,
"loss": 0.3565,
"step": 11590
},
{
"epoch": 0.823381186449701,
"grad_norm": 2.9136274666194306,
"learning_rate": 1.7670026977140426e-06,
"loss": 0.3603,
"step": 11600
},
{
"epoch": 0.8240909978173301,
"grad_norm": 6.106992201577366,
"learning_rate": 1.7599034502342754e-06,
"loss": 0.3484,
"step": 11610
},
{
"epoch": 0.8248008091849591,
"grad_norm": 4.230462903274037,
"learning_rate": 1.7528042027545081e-06,
"loss": 0.35,
"step": 11620
},
{
"epoch": 0.8255106205525882,
"grad_norm": 3.376064932155992,
"learning_rate": 1.745704955274741e-06,
"loss": 0.35,
"step": 11630
},
{
"epoch": 0.8262204319202172,
"grad_norm": 2.8424779046250612,
"learning_rate": 1.738605707794974e-06,
"loss": 0.3552,
"step": 11640
},
{
"epoch": 0.8269302432878463,
"grad_norm": 3.6044824322491347,
"learning_rate": 1.7315064603152068e-06,
"loss": 0.3633,
"step": 11650
},
{
"epoch": 0.8276400546554753,
"grad_norm": 3.3041226058016324,
"learning_rate": 1.7244072128354397e-06,
"loss": 0.3453,
"step": 11660
},
{
"epoch": 0.8283498660231043,
"grad_norm": 3.461976575510189,
"learning_rate": 1.7173079653556724e-06,
"loss": 0.3607,
"step": 11670
},
{
"epoch": 0.8290596773907334,
"grad_norm": 3.96624408516477,
"learning_rate": 1.7102087178759052e-06,
"loss": 0.3431,
"step": 11680
},
{
"epoch": 0.8297694887583624,
"grad_norm": 10.446490548963004,
"learning_rate": 1.7031094703961381e-06,
"loss": 0.3518,
"step": 11690
},
{
"epoch": 0.8304793001259915,
"grad_norm": 2.4894424633296888,
"learning_rate": 1.696010222916371e-06,
"loss": 0.3618,
"step": 11700
},
{
"epoch": 0.8311891114936206,
"grad_norm": 3.7097939930537494,
"learning_rate": 1.688910975436604e-06,
"loss": 0.3577,
"step": 11710
},
{
"epoch": 0.8318989228612497,
"grad_norm": 2.591589818986439,
"learning_rate": 1.6818117279568366e-06,
"loss": 0.3454,
"step": 11720
},
{
"epoch": 0.8326087342288787,
"grad_norm": 3.0415000039562816,
"learning_rate": 1.6747124804770695e-06,
"loss": 0.3514,
"step": 11730
},
{
"epoch": 0.8333185455965078,
"grad_norm": 3.185465708245909,
"learning_rate": 1.6676132329973024e-06,
"loss": 0.3437,
"step": 11740
},
{
"epoch": 0.8340283569641368,
"grad_norm": 8.153250864972724,
"learning_rate": 1.6605139855175353e-06,
"loss": 0.3418,
"step": 11750
},
{
"epoch": 0.8347381683317658,
"grad_norm": 17.15311701699765,
"learning_rate": 1.6534147380377682e-06,
"loss": 0.3533,
"step": 11760
},
{
"epoch": 0.8354479796993949,
"grad_norm": 2.956498750624732,
"learning_rate": 1.6463154905580008e-06,
"loss": 0.3539,
"step": 11770
},
{
"epoch": 0.8361577910670239,
"grad_norm": 5.182422880739596,
"learning_rate": 1.6392162430782337e-06,
"loss": 0.3543,
"step": 11780
},
{
"epoch": 0.836867602434653,
"grad_norm": 5.245759433932608,
"learning_rate": 1.6321169955984666e-06,
"loss": 0.3506,
"step": 11790
},
{
"epoch": 0.837577413802282,
"grad_norm": 2.8777113855306,
"learning_rate": 1.6250177481186997e-06,
"loss": 0.351,
"step": 11800
},
{
"epoch": 0.838287225169911,
"grad_norm": 3.317900354948997,
"learning_rate": 1.6179185006389326e-06,
"loss": 0.3426,
"step": 11810
},
{
"epoch": 0.8389970365375401,
"grad_norm": 2.7259998460321295,
"learning_rate": 1.610819253159165e-06,
"loss": 0.3416,
"step": 11820
},
{
"epoch": 0.8397068479051693,
"grad_norm": 7.203501395811214,
"learning_rate": 1.603720005679398e-06,
"loss": 0.346,
"step": 11830
},
{
"epoch": 0.8404166592727983,
"grad_norm": 3.5281319520469343,
"learning_rate": 1.596620758199631e-06,
"loss": 0.3415,
"step": 11840
},
{
"epoch": 0.8411264706404273,
"grad_norm": 2.8068995456792085,
"learning_rate": 1.589521510719864e-06,
"loss": 0.3506,
"step": 11850
},
{
"epoch": 0.8418362820080564,
"grad_norm": 5.8571413992691,
"learning_rate": 1.5824222632400968e-06,
"loss": 0.3492,
"step": 11860
},
{
"epoch": 0.8425460933756854,
"grad_norm": 2.8473277239745625,
"learning_rate": 1.5753230157603295e-06,
"loss": 0.3464,
"step": 11870
},
{
"epoch": 0.8432559047433145,
"grad_norm": 2.743001963303042,
"learning_rate": 1.5682237682805624e-06,
"loss": 0.3457,
"step": 11880
},
{
"epoch": 0.8439657161109435,
"grad_norm": 10.213481491528695,
"learning_rate": 1.5611245208007953e-06,
"loss": 0.3578,
"step": 11890
},
{
"epoch": 0.8446755274785726,
"grad_norm": 3.735755256117381,
"learning_rate": 1.5540252733210282e-06,
"loss": 0.3503,
"step": 11900
},
{
"epoch": 0.8453853388462016,
"grad_norm": 4.459890794830131,
"learning_rate": 1.546926025841261e-06,
"loss": 0.3409,
"step": 11910
},
{
"epoch": 0.8460951502138306,
"grad_norm": 4.8029617986261295,
"learning_rate": 1.5398267783614938e-06,
"loss": 0.3538,
"step": 11920
},
{
"epoch": 0.8468049615814597,
"grad_norm": 7.056776646894436,
"learning_rate": 1.5327275308817267e-06,
"loss": 0.346,
"step": 11930
},
{
"epoch": 0.8475147729490888,
"grad_norm": 7.364554673266408,
"learning_rate": 1.5256282834019595e-06,
"loss": 0.3478,
"step": 11940
},
{
"epoch": 0.8482245843167179,
"grad_norm": 3.605377806044163,
"learning_rate": 1.5185290359221924e-06,
"loss": 0.3499,
"step": 11950
},
{
"epoch": 0.8489343956843469,
"grad_norm": 2.452400869581193,
"learning_rate": 1.5114297884424253e-06,
"loss": 0.339,
"step": 11960
},
{
"epoch": 0.849644207051976,
"grad_norm": 2.870621078183671,
"learning_rate": 1.504330540962658e-06,
"loss": 0.3441,
"step": 11970
},
{
"epoch": 0.850354018419605,
"grad_norm": 4.473314694561015,
"learning_rate": 1.4972312934828909e-06,
"loss": 0.3559,
"step": 11980
},
{
"epoch": 0.851063829787234,
"grad_norm": 5.114834992133615,
"learning_rate": 1.4901320460031238e-06,
"loss": 0.3541,
"step": 11990
},
{
"epoch": 0.8517736411548631,
"grad_norm": 12.083657543428806,
"learning_rate": 1.4830327985233567e-06,
"loss": 0.358,
"step": 12000
},
{
"epoch": 0.8524834525224921,
"grad_norm": 3.7361409384047923,
"learning_rate": 1.4759335510435896e-06,
"loss": 0.3395,
"step": 12010
},
{
"epoch": 0.8531932638901212,
"grad_norm": 3.4424635097779657,
"learning_rate": 1.4688343035638222e-06,
"loss": 0.3593,
"step": 12020
},
{
"epoch": 0.8539030752577502,
"grad_norm": 1.9645069008952134,
"learning_rate": 1.4617350560840551e-06,
"loss": 0.3508,
"step": 12030
},
{
"epoch": 0.8546128866253793,
"grad_norm": 4.627652849790996,
"learning_rate": 1.454635808604288e-06,
"loss": 0.3408,
"step": 12040
},
{
"epoch": 0.8553226979930083,
"grad_norm": 3.831924600437753,
"learning_rate": 1.447536561124521e-06,
"loss": 0.3487,
"step": 12050
},
{
"epoch": 0.8560325093606375,
"grad_norm": 4.570169273747359,
"learning_rate": 1.4404373136447538e-06,
"loss": 0.3415,
"step": 12060
},
{
"epoch": 0.8567423207282665,
"grad_norm": 4.6135182738223595,
"learning_rate": 1.4333380661649865e-06,
"loss": 0.3604,
"step": 12070
},
{
"epoch": 0.8574521320958955,
"grad_norm": 4.751574062951781,
"learning_rate": 1.4262388186852194e-06,
"loss": 0.3636,
"step": 12080
},
{
"epoch": 0.8581619434635246,
"grad_norm": 3.378379003665899,
"learning_rate": 1.4191395712054523e-06,
"loss": 0.3432,
"step": 12090
},
{
"epoch": 0.8588717548311536,
"grad_norm": 16.540688675093385,
"learning_rate": 1.4120403237256851e-06,
"loss": 0.3396,
"step": 12100
},
{
"epoch": 0.8595815661987827,
"grad_norm": 4.814104030359969,
"learning_rate": 1.404941076245918e-06,
"loss": 0.3461,
"step": 12110
},
{
"epoch": 0.8602913775664117,
"grad_norm": 10.051601410520883,
"learning_rate": 1.3978418287661507e-06,
"loss": 0.3447,
"step": 12120
},
{
"epoch": 0.8610011889340408,
"grad_norm": 2.642610961406552,
"learning_rate": 1.3907425812863836e-06,
"loss": 0.3361,
"step": 12130
},
{
"epoch": 0.8617110003016698,
"grad_norm": 4.614329866790318,
"learning_rate": 1.3836433338066165e-06,
"loss": 0.3528,
"step": 12140
},
{
"epoch": 0.8624208116692988,
"grad_norm": 5.744791519089807,
"learning_rate": 1.3765440863268496e-06,
"loss": 0.3607,
"step": 12150
},
{
"epoch": 0.8631306230369279,
"grad_norm": 3.9315757108747618,
"learning_rate": 1.3694448388470825e-06,
"loss": 0.3598,
"step": 12160
},
{
"epoch": 0.8638404344045569,
"grad_norm": 5.812032059514415,
"learning_rate": 1.3623455913673154e-06,
"loss": 0.3406,
"step": 12170
},
{
"epoch": 0.8645502457721861,
"grad_norm": 3.1863830261887784,
"learning_rate": 1.3552463438875478e-06,
"loss": 0.3435,
"step": 12180
},
{
"epoch": 0.8652600571398151,
"grad_norm": 3.164333810889643,
"learning_rate": 1.348147096407781e-06,
"loss": 0.3477,
"step": 12190
},
{
"epoch": 0.8659698685074442,
"grad_norm": 4.132090281780686,
"learning_rate": 1.3410478489280138e-06,
"loss": 0.3476,
"step": 12200
},
{
"epoch": 0.8666796798750732,
"grad_norm": 3.050674443165291,
"learning_rate": 1.3339486014482467e-06,
"loss": 0.3451,
"step": 12210
},
{
"epoch": 0.8673894912427023,
"grad_norm": 5.9765372634611476,
"learning_rate": 1.3268493539684796e-06,
"loss": 0.3516,
"step": 12220
},
{
"epoch": 0.8680993026103313,
"grad_norm": 10.801904177839997,
"learning_rate": 1.3197501064887123e-06,
"loss": 0.3525,
"step": 12230
},
{
"epoch": 0.8688091139779603,
"grad_norm": 10.795290079471496,
"learning_rate": 1.3126508590089452e-06,
"loss": 0.3458,
"step": 12240
},
{
"epoch": 0.8695189253455894,
"grad_norm": 5.185082480943749,
"learning_rate": 1.305551611529178e-06,
"loss": 0.3471,
"step": 12250
},
{
"epoch": 0.8702287367132184,
"grad_norm": 5.967453058115287,
"learning_rate": 1.298452364049411e-06,
"loss": 0.3593,
"step": 12260
},
{
"epoch": 0.8709385480808475,
"grad_norm": 2.9260514202439807,
"learning_rate": 1.2913531165696439e-06,
"loss": 0.3401,
"step": 12270
},
{
"epoch": 0.8716483594484765,
"grad_norm": 3.5904246593138924,
"learning_rate": 1.2842538690898765e-06,
"loss": 0.3407,
"step": 12280
},
{
"epoch": 0.8723581708161057,
"grad_norm": 5.983622275696177,
"learning_rate": 1.2771546216101094e-06,
"loss": 0.3453,
"step": 12290
},
{
"epoch": 0.8730679821837347,
"grad_norm": 4.330501853746522,
"learning_rate": 1.2700553741303423e-06,
"loss": 0.3494,
"step": 12300
},
{
"epoch": 0.8737777935513638,
"grad_norm": 3.642467957948953,
"learning_rate": 1.2629561266505752e-06,
"loss": 0.3458,
"step": 12310
},
{
"epoch": 0.8744876049189928,
"grad_norm": 5.610238111701037,
"learning_rate": 1.255856879170808e-06,
"loss": 0.3533,
"step": 12320
},
{
"epoch": 0.8751974162866218,
"grad_norm": 5.47126817738485,
"learning_rate": 1.248757631691041e-06,
"loss": 0.3685,
"step": 12330
},
{
"epoch": 0.8759072276542509,
"grad_norm": 2.9438005039273953,
"learning_rate": 1.2416583842112737e-06,
"loss": 0.3325,
"step": 12340
},
{
"epoch": 0.8766170390218799,
"grad_norm": 3.7896440417507415,
"learning_rate": 1.2345591367315065e-06,
"loss": 0.3445,
"step": 12350
},
{
"epoch": 0.877326850389509,
"grad_norm": 5.754468251004695,
"learning_rate": 1.2274598892517394e-06,
"loss": 0.3374,
"step": 12360
},
{
"epoch": 0.878036661757138,
"grad_norm": 4.267624406753751,
"learning_rate": 1.2203606417719723e-06,
"loss": 0.341,
"step": 12370
},
{
"epoch": 0.8787464731247671,
"grad_norm": 3.1963277785921993,
"learning_rate": 1.2132613942922052e-06,
"loss": 0.3381,
"step": 12380
},
{
"epoch": 0.8794562844923961,
"grad_norm": 6.653906616284059,
"learning_rate": 1.206162146812438e-06,
"loss": 0.3506,
"step": 12390
},
{
"epoch": 0.8801660958600251,
"grad_norm": 3.897977105597471,
"learning_rate": 1.1990628993326708e-06,
"loss": 0.3475,
"step": 12400
},
{
"epoch": 0.8808759072276543,
"grad_norm": 4.962651576299262,
"learning_rate": 1.1919636518529037e-06,
"loss": 0.349,
"step": 12410
},
{
"epoch": 0.8815857185952833,
"grad_norm": 5.136741390825168,
"learning_rate": 1.1848644043731366e-06,
"loss": 0.3465,
"step": 12420
},
{
"epoch": 0.8822955299629124,
"grad_norm": 4.445543310701251,
"learning_rate": 1.1777651568933695e-06,
"loss": 0.3548,
"step": 12430
},
{
"epoch": 0.8830053413305414,
"grad_norm": 20.40372637998409,
"learning_rate": 1.1706659094136021e-06,
"loss": 0.3583,
"step": 12440
},
{
"epoch": 0.8837151526981705,
"grad_norm": 3.982374880512643,
"learning_rate": 1.163566661933835e-06,
"loss": 0.3317,
"step": 12450
},
{
"epoch": 0.8844249640657995,
"grad_norm": 32.55413999411799,
"learning_rate": 1.156467414454068e-06,
"loss": 0.3514,
"step": 12460
},
{
"epoch": 0.8851347754334286,
"grad_norm": 5.420145750098025,
"learning_rate": 1.1493681669743008e-06,
"loss": 0.3318,
"step": 12470
},
{
"epoch": 0.8858445868010576,
"grad_norm": 3.685854173880656,
"learning_rate": 1.1422689194945337e-06,
"loss": 0.3429,
"step": 12480
},
{
"epoch": 0.8865543981686866,
"grad_norm": 4.6974765931702605,
"learning_rate": 1.1351696720147664e-06,
"loss": 0.357,
"step": 12490
},
{
"epoch": 0.8872642095363157,
"grad_norm": 6.795504660900696,
"learning_rate": 1.1280704245349995e-06,
"loss": 0.3531,
"step": 12500
},
{
"epoch": 0.8879740209039447,
"grad_norm": 4.927867549600845,
"learning_rate": 1.1209711770552324e-06,
"loss": 0.3647,
"step": 12510
},
{
"epoch": 0.8886838322715739,
"grad_norm": 70.3319920713418,
"learning_rate": 1.113871929575465e-06,
"loss": 0.3481,
"step": 12520
},
{
"epoch": 0.8893936436392029,
"grad_norm": 29.187269789239732,
"learning_rate": 1.106772682095698e-06,
"loss": 0.3487,
"step": 12530
},
{
"epoch": 0.890103455006832,
"grad_norm": 2.619165987059257,
"learning_rate": 1.0996734346159308e-06,
"loss": 0.3557,
"step": 12540
},
{
"epoch": 0.890813266374461,
"grad_norm": 5.724483375383932,
"learning_rate": 1.0925741871361637e-06,
"loss": 0.3587,
"step": 12550
},
{
"epoch": 0.89152307774209,
"grad_norm": 4.2668973076468,
"learning_rate": 1.0854749396563966e-06,
"loss": 0.3462,
"step": 12560
},
{
"epoch": 0.8922328891097191,
"grad_norm": 9.234745768295488,
"learning_rate": 1.0783756921766293e-06,
"loss": 0.3537,
"step": 12570
},
{
"epoch": 0.8929427004773481,
"grad_norm": 3.665665785771113,
"learning_rate": 1.0712764446968622e-06,
"loss": 0.3643,
"step": 12580
},
{
"epoch": 0.8936525118449772,
"grad_norm": 2.6258893539339656,
"learning_rate": 1.064177197217095e-06,
"loss": 0.3338,
"step": 12590
},
{
"epoch": 0.8943623232126062,
"grad_norm": 3.154491930622594,
"learning_rate": 1.057077949737328e-06,
"loss": 0.3444,
"step": 12600
},
{
"epoch": 0.8950721345802353,
"grad_norm": 7.836052713310002,
"learning_rate": 1.0499787022575608e-06,
"loss": 0.3628,
"step": 12610
},
{
"epoch": 0.8957819459478643,
"grad_norm": 3.8943175763479996,
"learning_rate": 1.0428794547777935e-06,
"loss": 0.3403,
"step": 12620
},
{
"epoch": 0.8964917573154934,
"grad_norm": 15.29553673398478,
"learning_rate": 1.0357802072980264e-06,
"loss": 0.3521,
"step": 12630
},
{
"epoch": 0.8972015686831225,
"grad_norm": 4.442650541355824,
"learning_rate": 1.0286809598182595e-06,
"loss": 0.3342,
"step": 12640
},
{
"epoch": 0.8979113800507516,
"grad_norm": 3.9047310665092247,
"learning_rate": 1.0215817123384922e-06,
"loss": 0.3427,
"step": 12650
},
{
"epoch": 0.8986211914183806,
"grad_norm": 2.1332446352398544,
"learning_rate": 1.014482464858725e-06,
"loss": 0.349,
"step": 12660
},
{
"epoch": 0.8993310027860096,
"grad_norm": 2.8714716164962923,
"learning_rate": 1.0073832173789578e-06,
"loss": 0.357,
"step": 12670
},
{
"epoch": 0.9000408141536387,
"grad_norm": 5.513019742153847,
"learning_rate": 1.0002839698991909e-06,
"loss": 0.3404,
"step": 12680
},
{
"epoch": 0.9007506255212677,
"grad_norm": 3.940129513886605,
"learning_rate": 9.931847224194237e-07,
"loss": 0.3637,
"step": 12690
},
{
"epoch": 0.9014604368888968,
"grad_norm": 3.9515535744587256,
"learning_rate": 9.860854749396564e-07,
"loss": 0.3498,
"step": 12700
},
{
"epoch": 0.9021702482565258,
"grad_norm": 3.0069372274862234,
"learning_rate": 9.789862274598893e-07,
"loss": 0.3398,
"step": 12710
},
{
"epoch": 0.9028800596241549,
"grad_norm": 3.5043049442535072,
"learning_rate": 9.718869799801222e-07,
"loss": 0.339,
"step": 12720
},
{
"epoch": 0.9035898709917839,
"grad_norm": 4.7818413498969825,
"learning_rate": 9.64787732500355e-07,
"loss": 0.3482,
"step": 12730
},
{
"epoch": 0.9042996823594129,
"grad_norm": 2.9143937043517485,
"learning_rate": 9.57688485020588e-07,
"loss": 0.3289,
"step": 12740
},
{
"epoch": 0.9050094937270421,
"grad_norm": 3.530470062388488,
"learning_rate": 9.505892375408208e-07,
"loss": 0.3406,
"step": 12750
},
{
"epoch": 0.9057193050946711,
"grad_norm": 3.6289940943514245,
"learning_rate": 9.434899900610537e-07,
"loss": 0.343,
"step": 12760
},
{
"epoch": 0.9064291164623002,
"grad_norm": 11.92232636233806,
"learning_rate": 9.363907425812864e-07,
"loss": 0.3538,
"step": 12770
},
{
"epoch": 0.9071389278299292,
"grad_norm": 3.3864038291963787,
"learning_rate": 9.292914951015193e-07,
"loss": 0.3361,
"step": 12780
},
{
"epoch": 0.9078487391975583,
"grad_norm": 4.345114007441839,
"learning_rate": 9.221922476217522e-07,
"loss": 0.3307,
"step": 12790
},
{
"epoch": 0.9085585505651873,
"grad_norm": 3.2046183568204687,
"learning_rate": 9.15093000141985e-07,
"loss": 0.3467,
"step": 12800
},
{
"epoch": 0.9092683619328163,
"grad_norm": 3.030859855481088,
"learning_rate": 9.079937526622179e-07,
"loss": 0.3467,
"step": 12810
},
{
"epoch": 0.9099781733004454,
"grad_norm": 4.579582289306875,
"learning_rate": 9.008945051824507e-07,
"loss": 0.3232,
"step": 12820
},
{
"epoch": 0.9106879846680744,
"grad_norm": 3.760749336756688,
"learning_rate": 8.937952577026836e-07,
"loss": 0.3467,
"step": 12830
},
{
"epoch": 0.9113977960357035,
"grad_norm": 3.179418594295822,
"learning_rate": 8.866960102229165e-07,
"loss": 0.3473,
"step": 12840
},
{
"epoch": 0.9121076074033325,
"grad_norm": 3.983021666456075,
"learning_rate": 8.795967627431492e-07,
"loss": 0.3587,
"step": 12850
},
{
"epoch": 0.9128174187709616,
"grad_norm": 2.6025747411648243,
"learning_rate": 8.724975152633821e-07,
"loss": 0.3462,
"step": 12860
},
{
"epoch": 0.9135272301385907,
"grad_norm": 4.3088037403974315,
"learning_rate": 8.65398267783615e-07,
"loss": 0.3428,
"step": 12870
},
{
"epoch": 0.9142370415062198,
"grad_norm": 3.7771085521562644,
"learning_rate": 8.582990203038478e-07,
"loss": 0.3398,
"step": 12880
},
{
"epoch": 0.9149468528738488,
"grad_norm": 2.5115102656996853,
"learning_rate": 8.511997728240808e-07,
"loss": 0.3419,
"step": 12890
},
{
"epoch": 0.9156566642414778,
"grad_norm": 2.646423568943871,
"learning_rate": 8.441005253443135e-07,
"loss": 0.3326,
"step": 12900
},
{
"epoch": 0.9163664756091069,
"grad_norm": 4.308215071259538,
"learning_rate": 8.370012778645465e-07,
"loss": 0.3383,
"step": 12910
},
{
"epoch": 0.9170762869767359,
"grad_norm": 7.273858221430791,
"learning_rate": 8.299020303847794e-07,
"loss": 0.3411,
"step": 12920
},
{
"epoch": 0.917786098344365,
"grad_norm": 3.1600055981634183,
"learning_rate": 8.228027829050122e-07,
"loss": 0.3577,
"step": 12930
},
{
"epoch": 0.918495909711994,
"grad_norm": 6.08255963796338,
"learning_rate": 8.15703535425245e-07,
"loss": 0.3589,
"step": 12940
},
{
"epoch": 0.9192057210796231,
"grad_norm": 4.397885394689723,
"learning_rate": 8.086042879454778e-07,
"loss": 0.3492,
"step": 12950
},
{
"epoch": 0.9199155324472521,
"grad_norm": 227.99760672787355,
"learning_rate": 8.015050404657107e-07,
"loss": 0.3346,
"step": 12960
},
{
"epoch": 0.9206253438148811,
"grad_norm": 2.2307237070418853,
"learning_rate": 7.944057929859436e-07,
"loss": 0.3441,
"step": 12970
},
{
"epoch": 0.9213351551825103,
"grad_norm": 5.180228064847272,
"learning_rate": 7.873065455061764e-07,
"loss": 0.3465,
"step": 12980
},
{
"epoch": 0.9220449665501393,
"grad_norm": 3.2003044967213836,
"learning_rate": 7.802072980264093e-07,
"loss": 0.3425,
"step": 12990
},
{
"epoch": 0.9227547779177684,
"grad_norm": 2.734492726273123,
"learning_rate": 7.731080505466421e-07,
"loss": 0.3403,
"step": 13000
},
{
"epoch": 0.9234645892853974,
"grad_norm": 2.825363146947483,
"learning_rate": 7.66008803066875e-07,
"loss": 0.3644,
"step": 13010
},
{
"epoch": 0.9241744006530265,
"grad_norm": 6.94935444401322,
"learning_rate": 7.589095555871078e-07,
"loss": 0.3498,
"step": 13020
},
{
"epoch": 0.9248842120206555,
"grad_norm": 2.8121909722558924,
"learning_rate": 7.518103081073406e-07,
"loss": 0.356,
"step": 13030
},
{
"epoch": 0.9255940233882846,
"grad_norm": 2.7024231170054946,
"learning_rate": 7.447110606275735e-07,
"loss": 0.3415,
"step": 13040
},
{
"epoch": 0.9263038347559136,
"grad_norm": 2.9617596087956195,
"learning_rate": 7.376118131478063e-07,
"loss": 0.3372,
"step": 13050
},
{
"epoch": 0.9270136461235426,
"grad_norm": 42.5976926609076,
"learning_rate": 7.305125656680392e-07,
"loss": 0.3541,
"step": 13060
},
{
"epoch": 0.9277234574911717,
"grad_norm": 3.769476187835692,
"learning_rate": 7.234133181882722e-07,
"loss": 0.3594,
"step": 13070
},
{
"epoch": 0.9284332688588007,
"grad_norm": 3.749361674379726,
"learning_rate": 7.163140707085049e-07,
"loss": 0.3348,
"step": 13080
},
{
"epoch": 0.9291430802264298,
"grad_norm": 2.5267280447133937,
"learning_rate": 7.092148232287379e-07,
"loss": 0.3579,
"step": 13090
},
{
"epoch": 0.9298528915940589,
"grad_norm": 3.0968195473762097,
"learning_rate": 7.021155757489707e-07,
"loss": 0.3392,
"step": 13100
},
{
"epoch": 0.930562702961688,
"grad_norm": 3.9129176862736674,
"learning_rate": 6.950163282692035e-07,
"loss": 0.3533,
"step": 13110
},
{
"epoch": 0.931272514329317,
"grad_norm": 2.7485456874581122,
"learning_rate": 6.879170807894364e-07,
"loss": 0.3399,
"step": 13120
},
{
"epoch": 0.9319823256969461,
"grad_norm": 4.769184944849367,
"learning_rate": 6.808178333096692e-07,
"loss": 0.3551,
"step": 13130
},
{
"epoch": 0.9326921370645751,
"grad_norm": 2.8275717207772098,
"learning_rate": 6.737185858299021e-07,
"loss": 0.348,
"step": 13140
},
{
"epoch": 0.9334019484322041,
"grad_norm": 2.1023857426151595,
"learning_rate": 6.66619338350135e-07,
"loss": 0.3381,
"step": 13150
},
{
"epoch": 0.9341117597998332,
"grad_norm": 2.8745163990655125,
"learning_rate": 6.595200908703678e-07,
"loss": 0.3488,
"step": 13160
},
{
"epoch": 0.9348215711674622,
"grad_norm": 3.97821451395574,
"learning_rate": 6.524208433906007e-07,
"loss": 0.349,
"step": 13170
},
{
"epoch": 0.9355313825350913,
"grad_norm": 7.304369226663597,
"learning_rate": 6.453215959108335e-07,
"loss": 0.352,
"step": 13180
},
{
"epoch": 0.9362411939027203,
"grad_norm": 4.654909122469299,
"learning_rate": 6.382223484310663e-07,
"loss": 0.3478,
"step": 13190
},
{
"epoch": 0.9369510052703494,
"grad_norm": 3.4074758383445296,
"learning_rate": 6.311231009512992e-07,
"loss": 0.3265,
"step": 13200
},
{
"epoch": 0.9376608166379785,
"grad_norm": 2.8891732151802687,
"learning_rate": 6.24023853471532e-07,
"loss": 0.342,
"step": 13210
},
{
"epoch": 0.9383706280056076,
"grad_norm": 4.315712149288758,
"learning_rate": 6.169246059917649e-07,
"loss": 0.3542,
"step": 13220
},
{
"epoch": 0.9390804393732366,
"grad_norm": 4.202849073092827,
"learning_rate": 6.098253585119978e-07,
"loss": 0.3464,
"step": 13230
},
{
"epoch": 0.9397902507408656,
"grad_norm": 4.402135376104271,
"learning_rate": 6.027261110322307e-07,
"loss": 0.3493,
"step": 13240
},
{
"epoch": 0.9405000621084947,
"grad_norm": 3.3375797449619804,
"learning_rate": 5.956268635524635e-07,
"loss": 0.3431,
"step": 13250
},
{
"epoch": 0.9412098734761237,
"grad_norm": 2.58448811647569,
"learning_rate": 5.885276160726964e-07,
"loss": 0.3516,
"step": 13260
},
{
"epoch": 0.9419196848437528,
"grad_norm": 3.1207357827554216,
"learning_rate": 5.814283685929293e-07,
"loss": 0.3469,
"step": 13270
},
{
"epoch": 0.9426294962113818,
"grad_norm": 5.535335579042853,
"learning_rate": 5.74329121113162e-07,
"loss": 0.3411,
"step": 13280
},
{
"epoch": 0.9433393075790109,
"grad_norm": 4.157192002051246,
"learning_rate": 5.672298736333949e-07,
"loss": 0.3357,
"step": 13290
},
{
"epoch": 0.9440491189466399,
"grad_norm": 4.609541473632524,
"learning_rate": 5.601306261536277e-07,
"loss": 0.3297,
"step": 13300
},
{
"epoch": 0.9447589303142689,
"grad_norm": 4.556290013887312,
"learning_rate": 5.530313786738606e-07,
"loss": 0.3268,
"step": 13310
},
{
"epoch": 0.945468741681898,
"grad_norm": 4.334131807132338,
"learning_rate": 5.459321311940935e-07,
"loss": 0.3582,
"step": 13320
},
{
"epoch": 0.9461785530495271,
"grad_norm": 4.733377355574472,
"learning_rate": 5.388328837143264e-07,
"loss": 0.3366,
"step": 13330
},
{
"epoch": 0.9468883644171562,
"grad_norm": 6.762724277887754,
"learning_rate": 5.317336362345592e-07,
"loss": 0.345,
"step": 13340
},
{
"epoch": 0.9475981757847852,
"grad_norm": 2.9705397730746634,
"learning_rate": 5.246343887547921e-07,
"loss": 0.3465,
"step": 13350
},
{
"epoch": 0.9483079871524143,
"grad_norm": 3.195893348669726,
"learning_rate": 5.175351412750249e-07,
"loss": 0.3348,
"step": 13360
},
{
"epoch": 0.9490177985200433,
"grad_norm": 7.323985518462735,
"learning_rate": 5.104358937952577e-07,
"loss": 0.3543,
"step": 13370
},
{
"epoch": 0.9497276098876724,
"grad_norm": 2.799618403745627,
"learning_rate": 5.033366463154906e-07,
"loss": 0.3431,
"step": 13380
},
{
"epoch": 0.9504374212553014,
"grad_norm": 2.7728876598155843,
"learning_rate": 4.962373988357234e-07,
"loss": 0.3249,
"step": 13390
},
{
"epoch": 0.9511472326229304,
"grad_norm": 5.195465798306655,
"learning_rate": 4.891381513559563e-07,
"loss": 0.3413,
"step": 13400
},
{
"epoch": 0.9518570439905595,
"grad_norm": 10.319650407110732,
"learning_rate": 4.820389038761892e-07,
"loss": 0.3289,
"step": 13410
},
{
"epoch": 0.9525668553581885,
"grad_norm": 3.639550539774894,
"learning_rate": 4.74939656396422e-07,
"loss": 0.358,
"step": 13420
},
{
"epoch": 0.9532766667258176,
"grad_norm": 3.005922518183922,
"learning_rate": 4.6784040891665486e-07,
"loss": 0.3483,
"step": 13430
},
{
"epoch": 0.9539864780934467,
"grad_norm": 3.658172908229024,
"learning_rate": 4.607411614368877e-07,
"loss": 0.3503,
"step": 13440
},
{
"epoch": 0.9546962894610758,
"grad_norm": 3.17836271977541,
"learning_rate": 4.5364191395712053e-07,
"loss": 0.32,
"step": 13450
},
{
"epoch": 0.9554061008287048,
"grad_norm": 2.6050315565816513,
"learning_rate": 4.465426664773535e-07,
"loss": 0.336,
"step": 13460
},
{
"epoch": 0.9561159121963339,
"grad_norm": 2.516963929561299,
"learning_rate": 4.394434189975863e-07,
"loss": 0.3461,
"step": 13470
},
{
"epoch": 0.9568257235639629,
"grad_norm": 5.182889994348168,
"learning_rate": 4.3234417151781915e-07,
"loss": 0.3453,
"step": 13480
},
{
"epoch": 0.9575355349315919,
"grad_norm": 2.2527308195923843,
"learning_rate": 4.25244924038052e-07,
"loss": 0.3394,
"step": 13490
},
{
"epoch": 0.958245346299221,
"grad_norm": 5.702042483324615,
"learning_rate": 4.181456765582848e-07,
"loss": 0.3464,
"step": 13500
},
{
"epoch": 0.95895515766685,
"grad_norm": 4.320082944510015,
"learning_rate": 4.110464290785177e-07,
"loss": 0.361,
"step": 13510
},
{
"epoch": 0.9596649690344791,
"grad_norm": 2.7057123674561683,
"learning_rate": 4.0394718159875055e-07,
"loss": 0.3451,
"step": 13520
},
{
"epoch": 0.9603747804021081,
"grad_norm": 6.179223629975322,
"learning_rate": 3.968479341189834e-07,
"loss": 0.3371,
"step": 13530
},
{
"epoch": 0.9610845917697372,
"grad_norm": 2.5395758819730267,
"learning_rate": 3.897486866392163e-07,
"loss": 0.3587,
"step": 13540
},
{
"epoch": 0.9617944031373662,
"grad_norm": 3.6526335466786835,
"learning_rate": 3.8264943915944917e-07,
"loss": 0.3439,
"step": 13550
},
{
"epoch": 0.9625042145049953,
"grad_norm": 6.134974420857256,
"learning_rate": 3.75550191679682e-07,
"loss": 0.3413,
"step": 13560
},
{
"epoch": 0.9632140258726244,
"grad_norm": 4.231152248304582,
"learning_rate": 3.6845094419991484e-07,
"loss": 0.3412,
"step": 13570
},
{
"epoch": 0.9639238372402534,
"grad_norm": 19.9166049671889,
"learning_rate": 3.613516967201477e-07,
"loss": 0.3457,
"step": 13580
},
{
"epoch": 0.9646336486078825,
"grad_norm": 3.0744926751867565,
"learning_rate": 3.542524492403805e-07,
"loss": 0.3501,
"step": 13590
},
{
"epoch": 0.9653434599755115,
"grad_norm": 4.316210901775538,
"learning_rate": 3.471532017606134e-07,
"loss": 0.3391,
"step": 13600
},
{
"epoch": 0.9660532713431406,
"grad_norm": 5.568442813862272,
"learning_rate": 3.400539542808463e-07,
"loss": 0.3571,
"step": 13610
},
{
"epoch": 0.9667630827107696,
"grad_norm": 2.464997647373043,
"learning_rate": 3.3295470680107913e-07,
"loss": 0.3403,
"step": 13620
},
{
"epoch": 0.9674728940783986,
"grad_norm": 9.203447351864554,
"learning_rate": 3.2585545932131197e-07,
"loss": 0.3372,
"step": 13630
},
{
"epoch": 0.9681827054460277,
"grad_norm": 4.083574237624433,
"learning_rate": 3.187562118415448e-07,
"loss": 0.3523,
"step": 13640
},
{
"epoch": 0.9688925168136567,
"grad_norm": 2.580899686505033,
"learning_rate": 3.1165696436177764e-07,
"loss": 0.3331,
"step": 13650
},
{
"epoch": 0.9696023281812858,
"grad_norm": 4.461792584369479,
"learning_rate": 3.0455771688201053e-07,
"loss": 0.3436,
"step": 13660
},
{
"epoch": 0.9703121395489148,
"grad_norm": 6.002729090963929,
"learning_rate": 2.9745846940224337e-07,
"loss": 0.3392,
"step": 13670
},
{
"epoch": 0.971021950916544,
"grad_norm": 15.908649085501459,
"learning_rate": 2.9035922192247626e-07,
"loss": 0.3401,
"step": 13680
},
{
"epoch": 0.971731762284173,
"grad_norm": 3.2548319133826875,
"learning_rate": 2.832599744427091e-07,
"loss": 0.3466,
"step": 13690
},
{
"epoch": 0.9724415736518021,
"grad_norm": 2.810860141109629,
"learning_rate": 2.76160726962942e-07,
"loss": 0.3445,
"step": 13700
},
{
"epoch": 0.9731513850194311,
"grad_norm": 5.404897398221347,
"learning_rate": 2.690614794831748e-07,
"loss": 0.3464,
"step": 13710
},
{
"epoch": 0.9738611963870601,
"grad_norm": 3.07947902781157,
"learning_rate": 2.6196223200340766e-07,
"loss": 0.3295,
"step": 13720
},
{
"epoch": 0.9745710077546892,
"grad_norm": 3.2905796500928814,
"learning_rate": 2.548629845236405e-07,
"loss": 0.3491,
"step": 13730
},
{
"epoch": 0.9752808191223182,
"grad_norm": 4.431073995020802,
"learning_rate": 2.4776373704387334e-07,
"loss": 0.3483,
"step": 13740
},
{
"epoch": 0.9759906304899473,
"grad_norm": 3.5179707782287166,
"learning_rate": 2.406644895641062e-07,
"loss": 0.3469,
"step": 13750
},
{
"epoch": 0.9767004418575763,
"grad_norm": 4.221356923748856,
"learning_rate": 2.3356524208433906e-07,
"loss": 0.3343,
"step": 13760
},
{
"epoch": 0.9774102532252054,
"grad_norm": 286.15418214313974,
"learning_rate": 2.2646599460457195e-07,
"loss": 0.3349,
"step": 13770
},
{
"epoch": 0.9781200645928344,
"grad_norm": 3.4922335144175576,
"learning_rate": 2.193667471248048e-07,
"loss": 0.3485,
"step": 13780
},
{
"epoch": 0.9788298759604636,
"grad_norm": 3.944308898398798,
"learning_rate": 2.1226749964503763e-07,
"loss": 0.3288,
"step": 13790
},
{
"epoch": 0.9795396873280926,
"grad_norm": 3.16447581060814,
"learning_rate": 2.0516825216527052e-07,
"loss": 0.3435,
"step": 13800
},
{
"epoch": 0.9802494986957216,
"grad_norm": 7.105988741131366,
"learning_rate": 1.9806900468550335e-07,
"loss": 0.342,
"step": 13810
},
{
"epoch": 0.9809593100633507,
"grad_norm": 3.311616450653751,
"learning_rate": 1.9096975720573622e-07,
"loss": 0.365,
"step": 13820
},
{
"epoch": 0.9816691214309797,
"grad_norm": 3.1283492138129128,
"learning_rate": 1.8387050972596905e-07,
"loss": 0.3497,
"step": 13830
},
{
"epoch": 0.9823789327986088,
"grad_norm": 4.720800332800002,
"learning_rate": 1.7677126224620194e-07,
"loss": 0.3356,
"step": 13840
},
{
"epoch": 0.9830887441662378,
"grad_norm": 5.755549723756511,
"learning_rate": 1.6967201476643478e-07,
"loss": 0.3534,
"step": 13850
},
{
"epoch": 0.9837985555338669,
"grad_norm": 12.413957162417217,
"learning_rate": 1.6257276728666762e-07,
"loss": 0.3514,
"step": 13860
},
{
"epoch": 0.9845083669014959,
"grad_norm": 3.7416649036415195,
"learning_rate": 1.5547351980690048e-07,
"loss": 0.3468,
"step": 13870
},
{
"epoch": 0.985218178269125,
"grad_norm": 5.096087166471907,
"learning_rate": 1.4837427232713335e-07,
"loss": 0.3478,
"step": 13880
},
{
"epoch": 0.985927989636754,
"grad_norm": 2.8643069595501847,
"learning_rate": 1.4127502484736618e-07,
"loss": 0.3307,
"step": 13890
},
{
"epoch": 0.986637801004383,
"grad_norm": 4.161106542911394,
"learning_rate": 1.3417577736759905e-07,
"loss": 0.3451,
"step": 13900
},
{
"epoch": 0.9873476123720122,
"grad_norm": 3.161705990477656,
"learning_rate": 1.270765298878319e-07,
"loss": 0.3389,
"step": 13910
},
{
"epoch": 0.9880574237396412,
"grad_norm": 3.2196566259908637,
"learning_rate": 1.1997728240806475e-07,
"loss": 0.3508,
"step": 13920
},
{
"epoch": 0.9887672351072703,
"grad_norm": 3.0061617959710403,
"learning_rate": 1.1287803492829761e-07,
"loss": 0.357,
"step": 13930
},
{
"epoch": 0.9894770464748993,
"grad_norm": 7.195163761877952,
"learning_rate": 1.0577878744853047e-07,
"loss": 0.3344,
"step": 13940
},
{
"epoch": 0.9901868578425284,
"grad_norm": 4.778295681909435,
"learning_rate": 9.867953996876332e-08,
"loss": 0.3404,
"step": 13950
},
{
"epoch": 0.9908966692101574,
"grad_norm": 3.6751893575330072,
"learning_rate": 9.158029248899617e-08,
"loss": 0.3222,
"step": 13960
},
{
"epoch": 0.9916064805777864,
"grad_norm": 6.066838850034421,
"learning_rate": 8.448104500922902e-08,
"loss": 0.3373,
"step": 13970
},
{
"epoch": 0.9923162919454155,
"grad_norm": 5.8640066255244525,
"learning_rate": 7.738179752946189e-08,
"loss": 0.35,
"step": 13980
},
{
"epoch": 0.9930261033130445,
"grad_norm": 4.063550481932921,
"learning_rate": 7.028255004969474e-08,
"loss": 0.3424,
"step": 13990
},
{
"epoch": 0.9937359146806736,
"grad_norm": 6.923421784576789,
"learning_rate": 6.31833025699276e-08,
"loss": 0.3584,
"step": 14000
},
{
"epoch": 0.9944457260483026,
"grad_norm": 4.621602275306591,
"learning_rate": 5.6084055090160446e-08,
"loss": 0.3381,
"step": 14010
},
{
"epoch": 0.9951555374159318,
"grad_norm": 5.495946912076004,
"learning_rate": 4.89848076103933e-08,
"loss": 0.3557,
"step": 14020
},
{
"epoch": 0.9958653487835608,
"grad_norm": 2.261874767912811,
"learning_rate": 4.188556013062616e-08,
"loss": 0.3346,
"step": 14030
},
{
"epoch": 0.9965751601511899,
"grad_norm": 3.528699506394003,
"learning_rate": 3.478631265085901e-08,
"loss": 0.3284,
"step": 14040
},
{
"epoch": 0.9972849715188189,
"grad_norm": 3.0483860239618314,
"learning_rate": 2.7687065171091867e-08,
"loss": 0.3341,
"step": 14050
},
{
"epoch": 0.9979947828864479,
"grad_norm": 4.681194219809911,
"learning_rate": 2.0587817691324724e-08,
"loss": 0.333,
"step": 14060
},
{
"epoch": 0.998704594254077,
"grad_norm": 5.802114485594721,
"learning_rate": 1.3488570211557575e-08,
"loss": 0.3457,
"step": 14070
},
{
"epoch": 0.999414405621706,
"grad_norm": 2.8616716300198775,
"learning_rate": 6.389322731790431e-09,
"loss": 0.3398,
"step": 14080
}
],
"logging_steps": 10,
"max_steps": 14088,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9975763395674112.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}